xref: /llvm-project/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp (revision 2bcd1927dd9f3ffc12b990bb132995a4c68f998e)
1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
10 #include "../PassDetail.h"
11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
12 #include "mlir/Dialect/Affine/IR/AffineOps.h"
13 #include "mlir/Dialect/GPU/GPUDialect.h"
14 #include "mlir/Dialect/SCF/SCF.h"
15 #include "mlir/Dialect/StandardOps/IR/Ops.h"
16 #include "mlir/Transforms/DialectConversion.h"
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/Support/CommandLine.h"
20 
21 #define PASS_NAME "convert-affine-for-to-gpu"
22 #define LOOPOP_TO_GPU_PASS_NAME "convert-loop-op-to-gpu"
23 
24 using namespace mlir;
25 using namespace mlir::scf;
26 
27 namespace {
28 // A pass that traverses top-level loops in the function and converts them to
29 // GPU launch operations.  Nested launches are not allowed, so this does not
30 // walk the function recursively to avoid considering nested loops.
31 struct ForLoopMapper : public ConvertAffineForToGPUBase<ForLoopMapper> {
32   ForLoopMapper() = default;
33   ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
34     this->numBlockDims = numBlockDims;
35     this->numThreadDims = numThreadDims;
36   }
37 
38   void runOnFunction() override {
39     for (Operation &op : llvm::make_early_inc_range(getFunction().getOps())) {
40       if (auto forOp = dyn_cast<AffineForOp>(&op)) {
41         if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
42                                                     numThreadDims)))
43           signalPassFailure();
44       }
45     }
46   }
47 };
48 
49 struct ParallelLoopToGpuPass
50     : public ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
51   void runOnOperation() override {
52     OwningRewritePatternList patterns;
53     populateParallelLoopToGPUPatterns(patterns, &getContext());
54     ConversionTarget target(getContext());
55     target.addLegalDialect<StandardOpsDialect>();
56     target.addLegalDialect<AffineDialect>();
57     target.addLegalDialect<gpu::GPUDialect>();
58     target.addLegalDialect<scf::SCFDialect>();
59     target.addIllegalOp<scf::ParallelOp>();
60     if (failed(applyPartialConversion(getOperation(), target, patterns)))
61       signalPassFailure();
62   }
63 };
64 
65 } // namespace
66 
67 std::unique_ptr<OperationPass<FuncOp>>
68 mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
69   return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
70 }
71 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineForToGPUPass() {
72   return std::make_unique<ForLoopMapper>();
73 }
74 
75 std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
76   return std::make_unique<ParallelLoopToGpuPass>();
77 }
78