xref: /llvm-project/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp (revision c25b20c0f6c13d68dbc2e185764082d61ae4a132)
1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities to generate mappings for parallel loops to
10 // GPU devices.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "mlir/Dialect/GPU/ParallelLoopMapper.h"
15 
16 #include "mlir/Dialect/GPU/GPUDialect.h"
17 #include "mlir/Dialect/GPU/Passes.h"
18 #include "mlir/Dialect/SCF/SCF.h"
19 #include "mlir/IR/AffineMap.h"
20 #include "mlir/Pass/Pass.h"
21 
22 using namespace mlir;
23 using namespace mlir::gpu;
24 using namespace mlir::scf;
25 
26 #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc"
27 namespace mlir {
28 
29 #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc"
30 namespace gpu {
31 
32 StringRef getMappingAttrName() { return "mapping"; }
33 
34 ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
35                                                      AffineMap map,
36                                                      AffineMap bound) {
37   MLIRContext *context = map.getContext();
38   OpBuilder builder(context);
39   return ParallelLoopDimMapping::get(
40       builder.getI64IntegerAttr(static_cast<int32_t>(processor)),
41       AffineMapAttr::get(map), AffineMapAttr::get(bound), context);
42 }
43 
44 LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
45                              ArrayRef<ParallelLoopDimMapping> mapping) {
46   // Verify that each processor is mapped to only once.
47   llvm::DenseSet<gpu::Processor> specifiedMappings;
48   for (auto dimAttr : mapping) {
49     gpu::Processor processor = getProcessor(dimAttr);
50     if (processor != gpu::Processor::Sequential &&
51         specifiedMappings.count(processor))
52       return ploopOp.emitError(
53           "invalid mapping multiple loops to same processor");
54   }
55   ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
56   ploopOp.setAttr(getMappingAttrName(),
57                   ArrayAttr::get(mappingAsAttrs, ploopOp.getContext()));
58   return success();
59 }
60 } // namespace gpu
61 } // namespace mlir
62 
63 namespace {
64 
65 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
66 
67 static constexpr int kNumHardwareIds = 3;
68 
69 } // namespace
70 
71 /// Bounded increment on MappingLevel. Increments to the next
72 /// level unless Sequential was already reached.
73 MappingLevel &operator++(MappingLevel &mappingLevel) {
74   if (mappingLevel < Sequential) {
75     mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
76   }
77   return mappingLevel;
78 }
79 
80 /// Computed the hardware id to use for a given mapping level. Will
81 /// assign x,y and z hardware ids for the first 3 dimensions and use
82 /// sequential after.
83 /// TODO(ravishankarm/herhut) : Make this use x for the inner-most loop that is
84 /// distributed to map to x, the next innermost to y and the next innermost to
85 /// z.
86 static gpu::Processor getHardwareIdForMapping(MappingLevel level,
87                                               int dimension) {
88 
89   if (dimension >= kNumHardwareIds || level == Sequential)
90     return Processor::Sequential;
91   switch (level) {
92   case MapGrid:
93     switch (dimension) {
94     case 0:
95       return Processor::BlockX;
96     case 1:
97       return Processor::BlockY;
98     case 2:
99       return Processor::BlockZ;
100     default:
101       return Processor::Sequential;
102     }
103     break;
104   case MapBlock:
105     switch (dimension) {
106     case 0:
107       return Processor::ThreadX;
108     case 1:
109       return Processor::ThreadY;
110     case 2:
111       return Processor::ThreadZ;
112     default:
113       return Processor::Sequential;
114     }
115   default:;
116   }
117   return Processor::Sequential;
118 }
119 
120 /// Add mapping information to the given parallel loop. Do not add
121 /// mapping information if the loop already has it. Also, don't
122 /// start a mapping at a nested loop.
123 static void mapParallelOp(ParallelOp parallelOp,
124                           MappingLevel mappingLevel = MapGrid) {
125   // Do not try to add a mapping to already mapped loops or nested loops.
126   if (parallelOp.getAttr(getMappingAttrName()) ||
127       ((mappingLevel == MapGrid) && parallelOp.getParentOfType<ParallelOp>()))
128     return;
129 
130   MLIRContext *ctx = parallelOp.getContext();
131   Builder b(ctx);
132   SmallVector<ParallelLoopDimMapping, 4> attrs;
133   attrs.reserve(parallelOp.getNumLoops());
134   for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
135     attrs.push_back(getParallelLoopDimMappingAttr(
136         getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
137         b.getDimIdentityMap()));
138   }
139   setMappingAttr(parallelOp, attrs);
140   ++mappingLevel;
141   // Parallel loop operations are immediately nested, so do not use
142   // walk but just iterate over the operations.
143   for (Operation &op : *parallelOp.getBody()) {
144     if (ParallelOp nested = dyn_cast<ParallelOp>(op))
145       mapParallelOp(nested, mappingLevel);
146   }
147 }
148 
149 void mlir::greedilyMapParallelLoopsToGPU(Region &region) {
150   region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
151 }
152