xref: /llvm-project/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp (revision bcf3d5248678db3bc12eda9fbc212ab2c4fbcc0f)
1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities to generate mappings for parallel loops to
10 // GPU devices.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "mlir/Dialect/GPU/ParallelLoopMapper.h"
15 
16 #include "PassDetail.h"
17 #include "mlir/Dialect/GPU/GPUDialect.h"
18 #include "mlir/Dialect/GPU/Passes.h"
19 #include "mlir/Dialect/SCF/SCF.h"
20 #include "mlir/IR/AffineMap.h"
21 #include "mlir/Pass/Pass.h"
22 
23 #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc"
24 #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc"
25 
26 namespace mlir {
27 
28 using scf::ParallelOp;
29 
30 StringRef gpu::getMappingAttrName() { return "mapping"; }
31 
32 gpu::ParallelLoopDimMapping
33 gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map,
34                                    AffineMap bound) {
35   MLIRContext *context = map.getContext();
36   OpBuilder builder(context);
37   return ParallelLoopDimMapping::get(
38       ProcessorAttr::get(builder.getContext(), processor),
39       AffineMapAttr::get(map), AffineMapAttr::get(bound), context);
40 }
41 
42 LogicalResult gpu::setMappingAttr(ParallelOp ploopOp,
43                                   ArrayRef<ParallelLoopDimMapping> mapping) {
44   // Verify that each processor is mapped to only once.
45   llvm::DenseSet<gpu::Processor> specifiedMappings;
46   for (auto dimAttr : mapping) {
47     gpu::Processor processor = getProcessor(dimAttr);
48     if (processor != gpu::Processor::Sequential &&
49         specifiedMappings.count(processor))
50       return ploopOp.emitError(
51           "invalid mapping multiple loops to same processor");
52   }
53   ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
54   ploopOp->setAttr(getMappingAttrName(),
55                    ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
56   return success();
57 }
58 
59 namespace gpu {
60 namespace {
61 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
62 } // namespace
63 
64 static constexpr int kNumHardwareIds = 3;
65 
66 /// Bounded increment on MappingLevel. Increments to the next
67 /// level unless Sequential was already reached.
68 static MappingLevel &operator++(MappingLevel &mappingLevel) {
69   if (mappingLevel < Sequential) {
70     mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
71   }
72   return mappingLevel;
73 }
74 
75 /// Computed the hardware id to use for a given mapping level. Will
76 /// assign x,y and z hardware ids for the first 3 dimensions and use
77 /// sequential after.
78 /// TODO: Make this use x for the inner-most loop that is
79 /// distributed to map to x, the next innermost to y and the next innermost to
80 /// z.
81 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
82 
83   if (dimension >= kNumHardwareIds || level == Sequential)
84     return Processor::Sequential;
85   switch (level) {
86   case MapGrid:
87     switch (dimension) {
88     case 0:
89       return Processor::BlockX;
90     case 1:
91       return Processor::BlockY;
92     case 2:
93       return Processor::BlockZ;
94     default:
95       return Processor::Sequential;
96     }
97     break;
98   case MapBlock:
99     switch (dimension) {
100     case 0:
101       return Processor::ThreadX;
102     case 1:
103       return Processor::ThreadY;
104     case 2:
105       return Processor::ThreadZ;
106     default:
107       return Processor::Sequential;
108     }
109   default:;
110   }
111   return Processor::Sequential;
112 }
113 
114 /// Add mapping information to the given parallel loop. Do not add
115 /// mapping information if the loop already has it. Also, don't
116 /// start a mapping at a nested loop.
117 static void mapParallelOp(ParallelOp parallelOp,
118                           MappingLevel mappingLevel = MapGrid) {
119   // Do not try to add a mapping to already mapped loops or nested loops.
120   if (parallelOp->getAttr(getMappingAttrName()) ||
121       ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
122     return;
123 
124   MLIRContext *ctx = parallelOp.getContext();
125   Builder b(ctx);
126   SmallVector<ParallelLoopDimMapping, 4> attrs;
127   attrs.reserve(parallelOp.getNumLoops());
128   for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
129     attrs.push_back(getParallelLoopDimMappingAttr(
130         getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
131         b.getDimIdentityMap()));
132   }
133   (void)setMappingAttr(parallelOp, attrs);
134   ++mappingLevel;
135   // Parallel loop operations are immediately nested, so do not use
136   // walk but just iterate over the operations.
137   for (Operation &op : *parallelOp.getBody()) {
138     if (ParallelOp nested = dyn_cast<ParallelOp>(op))
139       mapParallelOp(nested, mappingLevel);
140   }
141 }
142 
143 namespace {
144 struct GpuMapParallelLoopsPass
145     : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
146   void runOnOperation() override {
147     for (Region &region : getOperation()->getRegions()) {
148       region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
149     }
150   }
151 };
152 
153 } // namespace
154 } // namespace gpu
155 } // namespace mlir
156 
157 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
158 mlir::createGpuMapParallelLoopsPass() {
159   return std::make_unique<gpu::GpuMapParallelLoopsPass>();
160 }
161