xref: /llvm-project/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp (revision 2be8af8f0e0780901213b6fd3013a5268ddc3359)
1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
10 
11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
12 #include "mlir/Dialect/Affine/IR/AffineOps.h"
13 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
14 #include "mlir/Dialect/Complex/IR/Complex.h"
15 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
16 #include "mlir/Dialect/SCF/IR/SCF.h"
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Transforms/DialectConversion.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/Support/CommandLine.h"
21 
22 namespace mlir {
23 #define GEN_PASS_DEF_CONVERTAFFINEFORTOGPUPASS
24 #define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPUPASS
25 #include "mlir/Conversion/Passes.h.inc"
26 } // namespace mlir
27 
28 using namespace mlir;
29 using namespace mlir::scf;
30 
31 namespace {
32 // A pass that traverses top-level loops in the function and converts them to
33 // GPU launch operations.  Nested launches are not allowed, so this does not
34 // walk the function recursively to avoid considering nested loops.
35 struct ForLoopMapper
36     : public impl::ConvertAffineForToGPUPassBase<ForLoopMapper> {
37   ForLoopMapper() = default;
38   ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
39     this->numBlockDims = numBlockDims;
40     this->numThreadDims = numThreadDims;
41   }
42 
43   void runOnOperation() override {
44     for (Operation &op :
45          llvm::make_early_inc_range(getOperation().getBody().getOps())) {
46       if (auto forOp = dyn_cast<AffineForOp>(&op)) {
47         if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
48                                                     numThreadDims)))
49           signalPassFailure();
50       }
51     }
52   }
53 };
54 
55 struct ParallelLoopToGpuPass
56     : public impl::ConvertParallelLoopToGpuPassBase<ParallelLoopToGpuPass> {
57   using ConvertParallelLoopToGpuPassBase::ConvertParallelLoopToGpuPassBase;
58 
59   void runOnOperation() override {
60     RewritePatternSet patterns(&getContext());
61     populateParallelLoopToGPUPatterns(patterns);
62     ConversionTarget target(getContext());
63     target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
64     configureParallelLoopToGPULegality(target);
65     if (failed(applyPartialConversion(getOperation(), target,
66                                       std::move(patterns))))
67       signalPassFailure();
68     finalizeParallelLoopToGPUConversion(getOperation());
69   }
70 };
71 
72 } // namespace
73 
74 std::unique_ptr<InterfacePass<FunctionOpInterface>>
75 mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
76   return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
77 }
78 std::unique_ptr<InterfacePass<FunctionOpInterface>>
79 mlir::createAffineForToGPUPass() {
80   return std::make_unique<ForLoopMapper>();
81 }
82