1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" 10 11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h" 12 #include "mlir/Dialect/Affine/IR/AffineOps.h" 13 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" 14 #include "mlir/Dialect/Complex/IR/Complex.h" 15 #include "mlir/Dialect/GPU/IR/GPUDialect.h" 16 #include "mlir/Dialect/SCF/IR/SCF.h" 17 #include "mlir/Pass/Pass.h" 18 #include "mlir/Transforms/DialectConversion.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/Support/CommandLine.h" 21 22 namespace mlir { 23 #define GEN_PASS_DEF_CONVERTAFFINEFORTOGPU 24 #define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPU 25 #include "mlir/Conversion/Passes.h.inc" 26 } // namespace mlir 27 28 using namespace mlir; 29 using namespace mlir::scf; 30 31 namespace { 32 // A pass that traverses top-level loops in the function and converts them to 33 // GPU launch operations. Nested launches are not allowed, so this does not 34 // walk the function recursively to avoid considering nested loops. 35 struct ForLoopMapper : public impl::ConvertAffineForToGPUBase<ForLoopMapper> { 36 ForLoopMapper() = default; 37 ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) { 38 this->numBlockDims = numBlockDims; 39 this->numThreadDims = numThreadDims; 40 } 41 42 void runOnOperation() override { 43 for (Operation &op : 44 llvm::make_early_inc_range(getOperation().getBody().getOps())) { 45 if (auto forOp = dyn_cast<AffineForOp>(&op)) { 46 if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims, 47 numThreadDims))) 48 signalPassFailure(); 49 } 50 } 51 } 52 }; 53 54 struct ParallelLoopToGpuPass 55 : public impl::ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> { 56 void runOnOperation() override { 57 RewritePatternSet patterns(&getContext()); 58 populateParallelLoopToGPUPatterns(patterns); 59 ConversionTarget target(getContext()); 60 target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); 61 configureParallelLoopToGPULegality(target); 62 if (failed(applyPartialConversion(getOperation(), target, 63 std::move(patterns)))) 64 signalPassFailure(); 65 finalizeParallelLoopToGPUConversion(getOperation()); 66 } 67 }; 68 69 } // namespace 70 71 std::unique_ptr<InterfacePass<FunctionOpInterface>> 72 mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) { 73 return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims); 74 } 75 std::unique_ptr<InterfacePass<FunctionOpInterface>> 76 mlir::createAffineForToGPUPass() { 77 return std::make_unique<ForLoopMapper>(); 78 } 79 80 std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() { 81 return std::make_unique<ParallelLoopToGpuPass>(); 82 } 83