1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utilities to generate mappings for parallel loops to 10 // GPU devices. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/GPU/Transforms/Passes.h" 15 16 #include "mlir/Dialect/Func/IR/FuncOps.h" 17 #include "mlir/Dialect/GPU/IR/GPUDialect.h" 18 #include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h" 19 #include "mlir/Dialect/SCF/IR/SCF.h" 20 #include "mlir/IR/AffineMap.h" 21 22 namespace mlir { 23 #define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS 24 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc" 25 } // namespace mlir 26 27 namespace mlir { 28 29 using scf::ParallelOp; 30 31 StringRef gpu::getMappingAttrName() { return "mapping"; } 32 33 LogicalResult 34 gpu::setMappingAttr(ParallelOp ploopOp, 35 ArrayRef<ParallelLoopDimMappingAttr> mapping) { 36 // Verify that each processor is mapped to only once. 37 llvm::DenseSet<gpu::Processor> specifiedMappings; 38 for (auto dimAttr : mapping) { 39 gpu::Processor processor = dimAttr.getProcessor(); 40 if (processor != gpu::Processor::Sequential && 41 specifiedMappings.count(processor)) 42 return ploopOp.emitError( 43 "invalid mapping multiple loops to same processor"); 44 } 45 ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size()); 46 ploopOp->setAttr(getMappingAttrName(), 47 ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs)); 48 return success(); 49 } 50 51 namespace gpu { 52 namespace { 53 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; 54 } // namespace 55 56 static constexpr int kNumHardwareIds = 3; 57 58 /// Bounded increment on MappingLevel. Increments to the next 59 /// level unless Sequential was already reached. 60 static MappingLevel &operator++(MappingLevel &mappingLevel) { 61 if (mappingLevel < Sequential) { 62 mappingLevel = static_cast<MappingLevel>(mappingLevel + 1); 63 } 64 return mappingLevel; 65 } 66 67 /// Computed the hardware id to use for a given mapping level. Will 68 /// assign x,y and z hardware ids for the first 3 dimensions and use 69 /// sequential after. 70 /// TODO: Make this use x for the inner-most loop that is 71 /// distributed to map to x, the next innermost to y and the next innermost to 72 /// z. 73 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) { 74 75 if (dimension >= kNumHardwareIds || level == Sequential) 76 return Processor::Sequential; 77 switch (level) { 78 case MapGrid: 79 switch (dimension) { 80 case 0: 81 return Processor::BlockX; 82 case 1: 83 return Processor::BlockY; 84 case 2: 85 return Processor::BlockZ; 86 default: 87 return Processor::Sequential; 88 } 89 break; 90 case MapBlock: 91 switch (dimension) { 92 case 0: 93 return Processor::ThreadX; 94 case 1: 95 return Processor::ThreadY; 96 case 2: 97 return Processor::ThreadZ; 98 default: 99 return Processor::Sequential; 100 } 101 default:; 102 } 103 return Processor::Sequential; 104 } 105 106 /// Add mapping information to the given parallel loop. Do not add 107 /// mapping information if the loop already has it. Also, don't 108 /// start a mapping at a nested loop. 109 static void mapParallelOp(ParallelOp parallelOp, 110 MappingLevel mappingLevel = MapGrid) { 111 // Do not try to add a mapping to already mapped loops or nested loops. 112 if (parallelOp->getAttr(getMappingAttrName()) || 113 ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>())) 114 return; 115 116 MLIRContext *ctx = parallelOp.getContext(); 117 Builder b(ctx); 118 SmallVector<ParallelLoopDimMappingAttr, 4> attrs; 119 attrs.reserve(parallelOp.getNumLoops()); 120 for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) { 121 attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>( 122 getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), 123 b.getDimIdentityMap())); 124 } 125 (void)setMappingAttr(parallelOp, attrs); 126 ++mappingLevel; 127 // Parallel loop operations are immediately nested, so do not use 128 // walk but just iterate over the operations. 129 for (Operation &op : *parallelOp.getBody()) { 130 if (ParallelOp nested = dyn_cast<ParallelOp>(op)) 131 mapParallelOp(nested, mappingLevel); 132 } 133 } 134 135 namespace { 136 struct GpuMapParallelLoopsPass 137 : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> { 138 void runOnOperation() override { 139 for (Region ®ion : getOperation()->getRegions()) { 140 region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); 141 } 142 } 143 }; 144 145 } // namespace 146 } // namespace gpu 147 } // namespace mlir 148 149 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>> 150 mlir::createGpuMapParallelLoopsPass() { 151 return std::make_unique<gpu::GpuMapParallelLoopsPass>(); 152 } 153