1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utilities to generate mappings for parallel loops to 10 // GPU devices. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/GPU/ParallelLoopMapper.h" 15 16 #include "mlir/Dialect/GPU/GPUDialect.h" 17 #include "mlir/Dialect/GPU/Passes.h" 18 #include "mlir/Dialect/SCF/SCF.h" 19 #include "mlir/IR/AffineMap.h" 20 #include "mlir/Pass/Pass.h" 21 22 using namespace mlir; 23 using namespace mlir::gpu; 24 using namespace mlir::scf; 25 26 #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" 27 namespace mlir { 28 29 #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" 30 namespace gpu { 31 32 StringRef getMappingAttrName() { return "mapping"; } 33 34 ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, 35 AffineMap map, 36 AffineMap bound) { 37 MLIRContext *context = map.getContext(); 38 OpBuilder builder(context); 39 return ParallelLoopDimMapping::get( 40 builder.getI64IntegerAttr(static_cast<int32_t>(processor)), 41 AffineMapAttr::get(map), AffineMapAttr::get(bound), context); 42 } 43 44 LogicalResult setMappingAttr(scf::ParallelOp ploopOp, 45 ArrayRef<ParallelLoopDimMapping> mapping) { 46 // Verify that each processor is mapped to only once. 47 llvm::DenseSet<gpu::Processor> specifiedMappings; 48 for (auto dimAttr : mapping) { 49 gpu::Processor processor = getProcessor(dimAttr); 50 if (processor != gpu::Processor::Sequential && 51 specifiedMappings.count(processor)) 52 return ploopOp.emitError( 53 "invalid mapping multiple loops to same processor"); 54 } 55 ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size()); 56 ploopOp.setAttr(getMappingAttrName(), 57 ArrayAttr::get(mappingAsAttrs, ploopOp.getContext())); 58 return success(); 59 } 60 } // namespace gpu 61 } // namespace mlir 62 63 namespace { 64 65 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; 66 67 static constexpr int kNumHardwareIds = 3; 68 69 } // namespace 70 71 /// Bounded increment on MappingLevel. Increments to the next 72 /// level unless Sequential was already reached. 73 MappingLevel &operator++(MappingLevel &mappingLevel) { 74 if (mappingLevel < Sequential) { 75 mappingLevel = static_cast<MappingLevel>(mappingLevel + 1); 76 } 77 return mappingLevel; 78 } 79 80 /// Computed the hardware id to use for a given mapping level. Will 81 /// assign x,y and z hardware ids for the first 3 dimensions and use 82 /// sequential after. 83 /// TODO(ravishankarm/herhut) : Make this use x for the inner-most loop that is 84 /// distributed to map to x, the next innermost to y and the next innermost to 85 /// z. 86 static gpu::Processor getHardwareIdForMapping(MappingLevel level, 87 int dimension) { 88 89 if (dimension >= kNumHardwareIds || level == Sequential) 90 return Processor::Sequential; 91 switch (level) { 92 case MapGrid: 93 switch (dimension) { 94 case 0: 95 return Processor::BlockX; 96 case 1: 97 return Processor::BlockY; 98 case 2: 99 return Processor::BlockZ; 100 default: 101 return Processor::Sequential; 102 } 103 break; 104 case MapBlock: 105 switch (dimension) { 106 case 0: 107 return Processor::ThreadX; 108 case 1: 109 return Processor::ThreadY; 110 case 2: 111 return Processor::ThreadZ; 112 default: 113 return Processor::Sequential; 114 } 115 default:; 116 } 117 return Processor::Sequential; 118 } 119 120 /// Add mapping information to the given parallel loop. Do not add 121 /// mapping information if the loop already has it. Also, don't 122 /// start a mapping at a nested loop. 123 static void mapParallelOp(ParallelOp parallelOp, 124 MappingLevel mappingLevel = MapGrid) { 125 // Do not try to add a mapping to already mapped loops or nested loops. 126 if (parallelOp.getAttr(getMappingAttrName()) || 127 ((mappingLevel == MapGrid) && parallelOp.getParentOfType<ParallelOp>())) 128 return; 129 130 MLIRContext *ctx = parallelOp.getContext(); 131 Builder b(ctx); 132 SmallVector<ParallelLoopDimMapping, 4> attrs; 133 attrs.reserve(parallelOp.getNumLoops()); 134 for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) { 135 attrs.push_back(getParallelLoopDimMappingAttr( 136 getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), 137 b.getDimIdentityMap())); 138 } 139 setMappingAttr(parallelOp, attrs); 140 ++mappingLevel; 141 // Parallel loop operations are immediately nested, so do not use 142 // walk but just iterate over the operations. 143 for (Operation &op : *parallelOp.getBody()) { 144 if (ParallelOp nested = dyn_cast<ParallelOp>(op)) 145 mapParallelOp(nested, mappingLevel); 146 } 147 } 148 149 void mlir::greedilyMapParallelSCFToGPU(Region ®ion) { 150 region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); 151 } 152