1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utilities to generate mappings for parallel loops to 10 // GPU devices. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/GPU/ParallelLoopMapper.h" 15 16 #include "PassDetail.h" 17 #include "mlir/Dialect/GPU/GPUDialect.h" 18 #include "mlir/Dialect/GPU/Passes.h" 19 #include "mlir/Dialect/SCF/SCF.h" 20 #include "mlir/IR/AffineMap.h" 21 #include "mlir/Pass/Pass.h" 22 23 #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" 24 #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" 25 26 namespace mlir { 27 28 using scf::ParallelOp; 29 30 StringRef gpu::getMappingAttrName() { return "mapping"; } 31 32 gpu::ParallelLoopDimMapping 33 gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map, 34 AffineMap bound) { 35 MLIRContext *context = map.getContext(); 36 OpBuilder builder(context); 37 return ParallelLoopDimMapping::get( 38 ProcessorAttr::get(builder.getContext(), processor), 39 AffineMapAttr::get(map), AffineMapAttr::get(bound), context); 40 } 41 42 LogicalResult gpu::setMappingAttr(ParallelOp ploopOp, 43 ArrayRef<ParallelLoopDimMapping> mapping) { 44 // Verify that each processor is mapped to only once. 45 llvm::DenseSet<gpu::Processor> specifiedMappings; 46 for (auto dimAttr : mapping) { 47 gpu::Processor processor = getProcessor(dimAttr); 48 if (processor != gpu::Processor::Sequential && 49 specifiedMappings.count(processor)) 50 return ploopOp.emitError( 51 "invalid mapping multiple loops to same processor"); 52 } 53 ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size()); 54 ploopOp->setAttr(getMappingAttrName(), 55 ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs)); 56 return success(); 57 } 58 59 namespace gpu { 60 namespace { 61 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; 62 } // namespace 63 64 static constexpr int kNumHardwareIds = 3; 65 66 /// Bounded increment on MappingLevel. Increments to the next 67 /// level unless Sequential was already reached. 68 static MappingLevel &operator++(MappingLevel &mappingLevel) { 69 if (mappingLevel < Sequential) { 70 mappingLevel = static_cast<MappingLevel>(mappingLevel + 1); 71 } 72 return mappingLevel; 73 } 74 75 /// Computed the hardware id to use for a given mapping level. Will 76 /// assign x,y and z hardware ids for the first 3 dimensions and use 77 /// sequential after. 78 /// TODO: Make this use x for the inner-most loop that is 79 /// distributed to map to x, the next innermost to y and the next innermost to 80 /// z. 81 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) { 82 83 if (dimension >= kNumHardwareIds || level == Sequential) 84 return Processor::Sequential; 85 switch (level) { 86 case MapGrid: 87 switch (dimension) { 88 case 0: 89 return Processor::BlockX; 90 case 1: 91 return Processor::BlockY; 92 case 2: 93 return Processor::BlockZ; 94 default: 95 return Processor::Sequential; 96 } 97 break; 98 case MapBlock: 99 switch (dimension) { 100 case 0: 101 return Processor::ThreadX; 102 case 1: 103 return Processor::ThreadY; 104 case 2: 105 return Processor::ThreadZ; 106 default: 107 return Processor::Sequential; 108 } 109 default:; 110 } 111 return Processor::Sequential; 112 } 113 114 /// Add mapping information to the given parallel loop. Do not add 115 /// mapping information if the loop already has it. Also, don't 116 /// start a mapping at a nested loop. 117 static void mapParallelOp(ParallelOp parallelOp, 118 MappingLevel mappingLevel = MapGrid) { 119 // Do not try to add a mapping to already mapped loops or nested loops. 120 if (parallelOp->getAttr(getMappingAttrName()) || 121 ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>())) 122 return; 123 124 MLIRContext *ctx = parallelOp.getContext(); 125 Builder b(ctx); 126 SmallVector<ParallelLoopDimMapping, 4> attrs; 127 attrs.reserve(parallelOp.getNumLoops()); 128 for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) { 129 attrs.push_back(getParallelLoopDimMappingAttr( 130 getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), 131 b.getDimIdentityMap())); 132 } 133 (void)setMappingAttr(parallelOp, attrs); 134 ++mappingLevel; 135 // Parallel loop operations are immediately nested, so do not use 136 // walk but just iterate over the operations. 137 for (Operation &op : *parallelOp.getBody()) { 138 if (ParallelOp nested = dyn_cast<ParallelOp>(op)) 139 mapParallelOp(nested, mappingLevel); 140 } 141 } 142 143 namespace { 144 struct GpuMapParallelLoopsPass 145 : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> { 146 void runOnOperation() override { 147 for (Region ®ion : getOperation()->getRegions()) { 148 region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); 149 } 150 } 151 }; 152 153 } // namespace 154 } // namespace gpu 155 } // namespace mlir 156 157 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>> 158 mlir::createGpuMapParallelLoopsPass() { 159 return std::make_unique<gpu::GpuMapParallelLoopsPass>(); 160 } 161