xref: /llvm-project/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h (revision 599c73990532333e62edf8ba19a5302b543f976f)
1 //===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
9 #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
10 
11 #include "mlir/Conversion/LLVMCommon/Pattern.h"
12 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
13 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
14 
15 namespace mlir {
16 
17 //===----------------------------------------------------------------------===//
18 // Helper Functions
19 //===----------------------------------------------------------------------===//
20 
21 /// Find or create an external function declaration in the given module.
22 LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc,
23                                      OpBuilder &b, StringRef name,
24                                      LLVM::LLVMFunctionType type);
25 
26 /// Create a global that contains the given string. If a global with the same
27 /// string already exists in the module, return that global.
28 LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc,
29                                          gpu::GPUModuleOp moduleOp, Type llvmI8,
30                                          StringRef namePrefix, StringRef str,
31                                          uint64_t alignment = 0,
32                                          unsigned addrSpace = 0);
33 
34 //===----------------------------------------------------------------------===//
35 // Lowering Patterns
36 //===----------------------------------------------------------------------===//
37 
38 /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first
39 /// create a 0-sized global array symbol similar as LLVM expects. It constructs
40 /// a memref descriptor with these values and return it.
41 struct GPUDynamicSharedMemoryOpLowering
42     : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> {
43   using ConvertOpToLLVMPattern<
44       gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern;
45   GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter,
46                                    unsigned alignmentBit = 0)
47       : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter),
48         alignmentBit(alignmentBit) {}
49 
50   LogicalResult
51   matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
52                   ConversionPatternRewriter &rewriter) const override;
53 
54 private:
55   // Alignment bit
56   unsigned alignmentBit;
57 };
58 
59 struct GPUFuncOpLoweringOptions {
60   /// The address space to use for `alloca`s in private memory.
61   unsigned allocaAddrSpace;
62   /// The address space to use declaring workgroup memory.
63   unsigned workgroupAddrSpace;
64 
65   /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
66   /// should be used.
67   StringAttr kernelAttributeName;
68   /// The attribute name to to set block size. Null if no attribute should be
69   /// used.
70   StringAttr kernelBlockSizeAttributeName;
71 
72   /// The calling convention to use for kernel functions.
73   LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
74   /// The calling convention to use for non-kernel functions.
75   LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;
76 
77   /// Whether to encode workgroup attributions as additional arguments instead
78   /// of a global variable.
79   bool encodeWorkgroupAttributionsAsArguments = false;
80 };
81 
82 struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
83   GPUFuncOpLowering(const LLVMTypeConverter &converter,
84                     const GPUFuncOpLoweringOptions &options)
85       : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter),
86         allocaAddrSpace(options.allocaAddrSpace),
87         workgroupAddrSpace(options.workgroupAddrSpace),
88         kernelAttributeName(options.kernelAttributeName),
89         kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
90         kernelCallingConvention(options.kernelCallingConvention),
91         nonKernelCallingConvention(options.nonKernelCallingConvention),
92         encodeWorkgroupAttributionsAsArguments(
93             options.encodeWorkgroupAttributionsAsArguments) {}
94 
95   LogicalResult
96   matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
97                   ConversionPatternRewriter &rewriter) const override;
98 
99 private:
100   /// The address space to use for `alloca`s in private memory.
101   unsigned allocaAddrSpace;
102   /// The address space to use declaring workgroup memory.
103   unsigned workgroupAddrSpace;
104 
105   /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
106   /// should be used.
107   StringAttr kernelAttributeName;
108   /// The attribute name to to set block size. Null if no attribute should be
109   /// used.
110   StringAttr kernelBlockSizeAttributeName;
111 
112   /// The calling convention to use for kernel functions
113   LLVM::CConv kernelCallingConvention;
114   /// The calling convention to use for non-kernel functions
115   LLVM::CConv nonKernelCallingConvention;
116 
117   /// Whether to encode workgroup attributions as additional arguments instead
118   /// of a global variable.
119   bool encodeWorkgroupAttributionsAsArguments;
120 };
121 
122 /// The lowering of gpu.printf to a call to HIP hostcalls
123 ///
124 /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have
125 /// to deal with %s (even if there were first-class strings in MLIR, they're not
126 /// legal input to gpu.printf) or non-constant format strings
127 struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
128   using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;
129 
130   LogicalResult
131   matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
132                   ConversionPatternRewriter &rewriter) const override;
133 };
134 
135 /// The lowering of gpu.printf to a call to an external printf() function
136 ///
137 /// This pass will add a declaration of printf() to the GPUModule if needed
138 /// and separate out the format strings into global constants. For some
139 /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
140 /// will lower printf calls to appropriate device-side code
141 struct GPUPrintfOpToLLVMCallLowering
142     : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
143   GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter,
144                                 int addressSpace = 0)
145       : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
146         addressSpace(addressSpace) {}
147 
148   LogicalResult
149   matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
150                   ConversionPatternRewriter &rewriter) const override;
151 
152 private:
153   int addressSpace;
154 };
155 
156 /// Lowering of gpu.printf to a vprintf standard library.
157 struct GPUPrintfOpToVPrintfLowering
158     : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
159   using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;
160 
161   LogicalResult
162   matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
163                   ConversionPatternRewriter &rewriter) const override;
164 };
165 
166 struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
167   using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern;
168 
169   LogicalResult
170   matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
171                   ConversionPatternRewriter &rewriter) const override;
172 };
173 
174 namespace impl {
175 /// Unrolls op if it's operating on vectors.
176 LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
177                                 ConversionPatternRewriter &rewriter,
178                                 const LLVMTypeConverter &converter);
179 } // namespace impl
180 
181 /// Rewriting that unrolls SourceOp to scalars if it's operating on vectors.
182 template <typename SourceOp>
183 struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> {
184 public:
185   using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
186 
187   LogicalResult
188   matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
189                   ConversionPatternRewriter &rewriter) const override {
190     return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter,
191                                    *this->getTypeConverter());
192   }
193 };
194 } // namespace mlir
195 
196 #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
197