xref: /llvm-project/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp (revision 76e79b0bef6c547e74b0c5e5900e41b44eb2a2f8)
1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities to generate mappings for parallel loops to
10 // GPU devices.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "mlir/Dialect/GPU/Transforms/Passes.h"
15 
16 #include "mlir/Dialect/Func/IR/FuncOps.h"
17 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
18 #include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
19 #include "mlir/Dialect/SCF/IR/SCF.h"
20 #include "mlir/IR/AffineMap.h"
21 
22 namespace mlir {
23 #define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
24 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
25 } // namespace mlir
26 
27 namespace mlir {
28 
29 using scf::ParallelOp;
30 
getMappingAttrName()31 StringRef gpu::getMappingAttrName() { return "mapping"; }
32 
33 LogicalResult
setMappingAttr(ParallelOp ploopOp,ArrayRef<ParallelLoopDimMappingAttr> mapping)34 gpu::setMappingAttr(ParallelOp ploopOp,
35                     ArrayRef<ParallelLoopDimMappingAttr> mapping) {
36   // Verify that each processor is mapped to only once.
37   llvm::DenseSet<gpu::Processor> specifiedMappings;
38   for (auto dimAttr : mapping) {
39     gpu::Processor processor = dimAttr.getProcessor();
40     if (processor != gpu::Processor::Sequential &&
41         specifiedMappings.count(processor))
42       return ploopOp.emitError(
43           "invalid mapping multiple loops to same processor");
44     specifiedMappings.insert(processor);
45   }
46   ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
47   ploopOp->setAttr(getMappingAttrName(),
48                    ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
49   return success();
50 }
51 
52 namespace gpu {
53 namespace {
54 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
55 } // namespace
56 
57 static constexpr int kNumHardwareIds = 3;
58 
59 /// Bounded increment on MappingLevel. Increments to the next
60 /// level unless Sequential was already reached.
operator ++(MappingLevel & mappingLevel)61 static MappingLevel &operator++(MappingLevel &mappingLevel) {
62   if (mappingLevel < Sequential) {
63     mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
64   }
65   return mappingLevel;
66 }
67 
68 /// Computed the hardware id to use for a given mapping level. Will
69 /// assign x,y and z hardware ids for the first 3 dimensions and use
70 /// sequential after.
71 /// TODO: Make this use x for the inner-most loop that is
72 /// distributed to map to x, the next innermost to y and the next innermost to
73 /// z.
getHardwareIdForMapping(MappingLevel level,int dimension)74 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
75 
76   if (dimension >= kNumHardwareIds || level == Sequential)
77     return Processor::Sequential;
78   switch (level) {
79   case MapGrid:
80     switch (dimension) {
81     case 0:
82       return Processor::BlockX;
83     case 1:
84       return Processor::BlockY;
85     case 2:
86       return Processor::BlockZ;
87     default:
88       return Processor::Sequential;
89     }
90     break;
91   case MapBlock:
92     switch (dimension) {
93     case 0:
94       return Processor::ThreadX;
95     case 1:
96       return Processor::ThreadY;
97     case 2:
98       return Processor::ThreadZ;
99     default:
100       return Processor::Sequential;
101     }
102   default:;
103   }
104   return Processor::Sequential;
105 }
106 
107 /// Add mapping information to the given parallel loop. Do not add
108 /// mapping information if the loop already has it. Also, don't
109 /// start a mapping at a nested loop.
mapParallelOp(ParallelOp parallelOp,MappingLevel mappingLevel=MapGrid)110 static void mapParallelOp(ParallelOp parallelOp,
111                           MappingLevel mappingLevel = MapGrid) {
112   // Do not try to add a mapping to already mapped loops or nested loops.
113   if (parallelOp->getAttr(getMappingAttrName()) ||
114       ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
115     return;
116 
117   MLIRContext *ctx = parallelOp.getContext();
118   Builder b(ctx);
119   SmallVector<ParallelLoopDimMappingAttr, 4> attrs;
120   attrs.reserve(parallelOp.getNumLoops());
121   for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
122     attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
123         getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
124         b.getDimIdentityMap()));
125   }
126   (void)setMappingAttr(parallelOp, attrs);
127   ++mappingLevel;
128   // Parallel loop operations are immediately nested, so do not use
129   // walk but just iterate over the operations.
130   for (Operation &op : *parallelOp.getBody()) {
131     if (ParallelOp nested = dyn_cast<ParallelOp>(op))
132       mapParallelOp(nested, mappingLevel);
133   }
134 }
135 
136 namespace {
137 struct GpuMapParallelLoopsPass
138     : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
runOnOperationmlir::gpu::__anon7c5d29460211::GpuMapParallelLoopsPass139   void runOnOperation() override {
140     for (Region &region : getOperation()->getRegions()) {
141       region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
142     }
143   }
144 };
145 
146 } // namespace
147 } // namespace gpu
148 } // namespace mlir
149 
150 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
createGpuMapParallelLoopsPass()151 mlir::createGpuMapParallelLoopsPass() {
152   return std::make_unique<gpu::GpuMapParallelLoopsPass>();
153 }
154