1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utilities to generate mappings for parallel loops to 10 // GPU devices. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/GPU/ParallelLoopMapper.h" 15 16 #include "mlir/Dialect/GPU/GPUDialect.h" 17 #include "mlir/Dialect/GPU/Passes.h" 18 #include "mlir/Dialect/SCF/SCF.h" 19 #include "mlir/IR/AffineMap.h" 20 #include "mlir/Pass/Pass.h" 21 22 using namespace mlir; 23 using namespace mlir::gpu; 24 using namespace mlir::scf; 25 26 #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" 27 #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" 28 namespace mlir { 29 namespace gpu { 30 31 StringRef getMappingAttrName() { return "mapping"; } 32 33 ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, 34 AffineMap map, 35 AffineMap bound) { 36 MLIRContext *context = map.getContext(); 37 OpBuilder builder(context); 38 return ParallelLoopDimMapping::get( 39 builder.getI64IntegerAttr(static_cast<int32_t>(processor)), 40 AffineMapAttr::get(map), AffineMapAttr::get(bound), context); 41 } 42 43 LogicalResult setMappingAttr(scf::ParallelOp ploopOp, 44 ArrayRef<ParallelLoopDimMapping> mapping) { 45 // Verify that each processor is mapped to only once. 46 llvm::DenseSet<gpu::Processor> specifiedMappings; 47 for (auto dimAttr : mapping) { 48 gpu::Processor processor = getProcessor(dimAttr); 49 if (processor != gpu::Processor::Sequential && 50 specifiedMappings.count(processor)) 51 return ploopOp.emitError( 52 "invalid mapping multiple loops to same processor"); 53 } 54 ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size()); 55 ploopOp->setAttr(getMappingAttrName(), 56 ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs)); 57 return success(); 58 } 59 } // namespace gpu 60 } // namespace mlir 61 62 namespace { 63 64 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; 65 66 static constexpr int kNumHardwareIds = 3; 67 68 } // namespace 69 70 /// Bounded increment on MappingLevel. Increments to the next 71 /// level unless Sequential was already reached. 72 MappingLevel &operator++(MappingLevel &mappingLevel) { 73 if (mappingLevel < Sequential) { 74 mappingLevel = static_cast<MappingLevel>(mappingLevel + 1); 75 } 76 return mappingLevel; 77 } 78 79 /// Computed the hardware id to use for a given mapping level. Will 80 /// assign x,y and z hardware ids for the first 3 dimensions and use 81 /// sequential after. 82 /// TODO: Make this use x for the inner-most loop that is 83 /// distributed to map to x, the next innermost to y and the next innermost to 84 /// z. 85 static gpu::Processor getHardwareIdForMapping(MappingLevel level, 86 int dimension) { 87 88 if (dimension >= kNumHardwareIds || level == Sequential) 89 return Processor::Sequential; 90 switch (level) { 91 case MapGrid: 92 switch (dimension) { 93 case 0: 94 return Processor::BlockX; 95 case 1: 96 return Processor::BlockY; 97 case 2: 98 return Processor::BlockZ; 99 default: 100 return Processor::Sequential; 101 } 102 break; 103 case MapBlock: 104 switch (dimension) { 105 case 0: 106 return Processor::ThreadX; 107 case 1: 108 return Processor::ThreadY; 109 case 2: 110 return Processor::ThreadZ; 111 default: 112 return Processor::Sequential; 113 } 114 default:; 115 } 116 return Processor::Sequential; 117 } 118 119 /// Add mapping information to the given parallel loop. Do not add 120 /// mapping information if the loop already has it. Also, don't 121 /// start a mapping at a nested loop. 122 static void mapParallelOp(ParallelOp parallelOp, 123 MappingLevel mappingLevel = MapGrid) { 124 // Do not try to add a mapping to already mapped loops or nested loops. 125 if (parallelOp->getAttr(getMappingAttrName()) || 126 ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>())) 127 return; 128 129 MLIRContext *ctx = parallelOp.getContext(); 130 Builder b(ctx); 131 SmallVector<ParallelLoopDimMapping, 4> attrs; 132 attrs.reserve(parallelOp.getNumLoops()); 133 for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) { 134 attrs.push_back(getParallelLoopDimMappingAttr( 135 getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), 136 b.getDimIdentityMap())); 137 } 138 (void)setMappingAttr(parallelOp, attrs); 139 ++mappingLevel; 140 // Parallel loop operations are immediately nested, so do not use 141 // walk but just iterate over the operations. 142 for (Operation &op : *parallelOp.getBody()) { 143 if (ParallelOp nested = dyn_cast<ParallelOp>(op)) 144 mapParallelOp(nested, mappingLevel); 145 } 146 } 147 148 void mlir::greedilyMapParallelSCFToGPU(Region ®ion) { 149 region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); 150 } 151