1 //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utilities to generate mappings for parallel loops to 10 // GPU devices. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/GPU/ParallelLoopMapper.h" 15 16 #include "PassDetail.h" 17 #include "mlir/Dialect/GPU/GPUDialect.h" 18 #include "mlir/Dialect/GPU/Passes.h" 19 #include "mlir/Dialect/SCF/SCF.h" 20 #include "mlir/IR/AffineMap.h" 21 22 namespace mlir { 23 24 using scf::ParallelOp; 25 26 StringRef gpu::getMappingAttrName() { return "mapping"; } 27 28 LogicalResult 29 gpu::setMappingAttr(ParallelOp ploopOp, 30 ArrayRef<ParallelLoopDimMappingAttr> mapping) { 31 // Verify that each processor is mapped to only once. 32 llvm::DenseSet<gpu::Processor> specifiedMappings; 33 for (auto dimAttr : mapping) { 34 gpu::Processor processor = dimAttr.getProcessor(); 35 if (processor != gpu::Processor::Sequential && 36 specifiedMappings.count(processor)) 37 return ploopOp.emitError( 38 "invalid mapping multiple loops to same processor"); 39 } 40 ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size()); 41 ploopOp->setAttr(getMappingAttrName(), 42 ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs)); 43 return success(); 44 } 45 46 namespace gpu { 47 namespace { 48 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 }; 49 } // namespace 50 51 static constexpr int kNumHardwareIds = 3; 52 53 /// Bounded increment on MappingLevel. Increments to the next 54 /// level unless Sequential was already reached. 55 static MappingLevel &operator++(MappingLevel &mappingLevel) { 56 if (mappingLevel < Sequential) { 57 mappingLevel = static_cast<MappingLevel>(mappingLevel + 1); 58 } 59 return mappingLevel; 60 } 61 62 /// Computed the hardware id to use for a given mapping level. Will 63 /// assign x,y and z hardware ids for the first 3 dimensions and use 64 /// sequential after. 65 /// TODO: Make this use x for the inner-most loop that is 66 /// distributed to map to x, the next innermost to y and the next innermost to 67 /// z. 68 static Processor getHardwareIdForMapping(MappingLevel level, int dimension) { 69 70 if (dimension >= kNumHardwareIds || level == Sequential) 71 return Processor::Sequential; 72 switch (level) { 73 case MapGrid: 74 switch (dimension) { 75 case 0: 76 return Processor::BlockX; 77 case 1: 78 return Processor::BlockY; 79 case 2: 80 return Processor::BlockZ; 81 default: 82 return Processor::Sequential; 83 } 84 break; 85 case MapBlock: 86 switch (dimension) { 87 case 0: 88 return Processor::ThreadX; 89 case 1: 90 return Processor::ThreadY; 91 case 2: 92 return Processor::ThreadZ; 93 default: 94 return Processor::Sequential; 95 } 96 default:; 97 } 98 return Processor::Sequential; 99 } 100 101 /// Add mapping information to the given parallel loop. Do not add 102 /// mapping information if the loop already has it. Also, don't 103 /// start a mapping at a nested loop. 104 static void mapParallelOp(ParallelOp parallelOp, 105 MappingLevel mappingLevel = MapGrid) { 106 // Do not try to add a mapping to already mapped loops or nested loops. 107 if (parallelOp->getAttr(getMappingAttrName()) || 108 ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>())) 109 return; 110 111 MLIRContext *ctx = parallelOp.getContext(); 112 Builder b(ctx); 113 SmallVector<ParallelLoopDimMappingAttr, 4> attrs; 114 attrs.reserve(parallelOp.getNumLoops()); 115 for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) { 116 attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>( 117 getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), 118 b.getDimIdentityMap())); 119 } 120 (void)setMappingAttr(parallelOp, attrs); 121 ++mappingLevel; 122 // Parallel loop operations are immediately nested, so do not use 123 // walk but just iterate over the operations. 124 for (Operation &op : *parallelOp.getBody()) { 125 if (ParallelOp nested = dyn_cast<ParallelOp>(op)) 126 mapParallelOp(nested, mappingLevel); 127 } 128 } 129 130 namespace { 131 struct GpuMapParallelLoopsPass 132 : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> { 133 void runOnOperation() override { 134 for (Region ®ion : getOperation()->getRegions()) { 135 region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); }); 136 } 137 } 138 }; 139 140 } // namespace 141 } // namespace gpu 142 } // namespace mlir 143 144 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>> 145 mlir::createGpuMapParallelLoopsPass() { 146 return std::make_unique<gpu::GpuMapParallelLoopsPass>(); 147 } 148