xref: /llvm-project/mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp (revision dc4e913be9c3d1c37f66348d4b5047a107499b53)
1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
10 #include "../PassDetail.h"
11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
12 #include "mlir/Dialect/Affine/IR/AffineOps.h"
13 #include "mlir/Dialect/Complex/IR/Complex.h"
14 #include "mlir/Dialect/GPU/GPUDialect.h"
15 #include "mlir/Dialect/SCF/SCF.h"
16 #include "mlir/Dialect/StandardOps/IR/Ops.h"
17 #include "mlir/Transforms/DialectConversion.h"
18 
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/Support/CommandLine.h"
21 
22 using namespace mlir;
23 using namespace mlir::scf;
24 
25 namespace {
26 // A pass that traverses top-level loops in the function and converts them to
27 // GPU launch operations.  Nested launches are not allowed, so this does not
28 // walk the function recursively to avoid considering nested loops.
29 struct ForLoopMapper : public ConvertAffineForToGPUBase<ForLoopMapper> {
30   ForLoopMapper() = default;
31   ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
32     this->numBlockDims = numBlockDims;
33     this->numThreadDims = numThreadDims;
34   }
35 
36   void runOnFunction() override {
37     for (Operation &op : llvm::make_early_inc_range(getFunction().getOps())) {
38       if (auto forOp = dyn_cast<AffineForOp>(&op)) {
39         if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
40                                                     numThreadDims)))
41           signalPassFailure();
42       }
43     }
44   }
45 };
46 
47 struct ParallelLoopToGpuPass
48     : public ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
49   void runOnOperation() override {
50     RewritePatternSet patterns(&getContext());
51     populateParallelLoopToGPUPatterns(patterns);
52     ConversionTarget target(getContext());
53     target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
54     configureParallelLoopToGPULegality(target);
55     if (failed(applyPartialConversion(getOperation(), target,
56                                       std::move(patterns))))
57       signalPassFailure();
58   }
59 };
60 
61 } // namespace
62 
63 std::unique_ptr<OperationPass<FuncOp>>
64 mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
65   return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
66 }
67 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineForToGPUPass() {
68   return std::make_unique<ForLoopMapper>();
69 }
70 
71 std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
72   return std::make_unique<ParallelLoopToGpuPass>();
73 }
74