1 //===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ 9 #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ 10 11 #include "mlir/Conversion/LLVMCommon/Pattern.h" 12 #include "mlir/Dialect/GPU/IR/GPUDialect.h" 13 #include "mlir/Dialect/LLVMIR/LLVMDialect.h" 14 15 namespace mlir { 16 17 //===----------------------------------------------------------------------===// 18 // Helper Functions 19 //===----------------------------------------------------------------------===// 20 21 /// Find or create an external function declaration in the given module. 22 LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, 23 OpBuilder &b, StringRef name, 24 LLVM::LLVMFunctionType type); 25 26 /// Create a global that contains the given string. If a global with the same 27 /// string already exists in the module, return that global. 28 LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, 29 gpu::GPUModuleOp moduleOp, Type llvmI8, 30 StringRef namePrefix, StringRef str, 31 uint64_t alignment = 0, 32 unsigned addrSpace = 0); 33 34 //===----------------------------------------------------------------------===// 35 // Lowering Patterns 36 //===----------------------------------------------------------------------===// 37 38 /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first 39 /// create a 0-sized global array symbol similar as LLVM expects. It constructs 40 /// a memref descriptor with these values and return it. 41 struct GPUDynamicSharedMemoryOpLowering 42 : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> { 43 using ConvertOpToLLVMPattern< 44 gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern; 45 GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, 46 unsigned alignmentBit = 0) 47 : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter), 48 alignmentBit(alignmentBit) {} 49 50 LogicalResult 51 matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, 52 ConversionPatternRewriter &rewriter) const override; 53 54 private: 55 // Alignment bit 56 unsigned alignmentBit; 57 }; 58 59 struct GPUFuncOpLoweringOptions { 60 /// The address space to use for `alloca`s in private memory. 61 unsigned allocaAddrSpace; 62 /// The address space to use declaring workgroup memory. 63 unsigned workgroupAddrSpace; 64 65 /// The attribute name to use instead of `gpu.kernel`. Null if no attribute 66 /// should be used. 67 StringAttr kernelAttributeName; 68 /// The attribute name to to set block size. Null if no attribute should be 69 /// used. 70 StringAttr kernelBlockSizeAttributeName; 71 72 /// The calling convention to use for kernel functions. 73 LLVM::CConv kernelCallingConvention = LLVM::CConv::C; 74 /// The calling convention to use for non-kernel functions. 75 LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C; 76 77 /// Whether to encode workgroup attributions as additional arguments instead 78 /// of a global variable. 79 bool encodeWorkgroupAttributionsAsArguments = false; 80 }; 81 82 struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { 83 GPUFuncOpLowering(const LLVMTypeConverter &converter, 84 const GPUFuncOpLoweringOptions &options) 85 : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter), 86 allocaAddrSpace(options.allocaAddrSpace), 87 workgroupAddrSpace(options.workgroupAddrSpace), 88 kernelAttributeName(options.kernelAttributeName), 89 kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName), 90 kernelCallingConvention(options.kernelCallingConvention), 91 nonKernelCallingConvention(options.nonKernelCallingConvention), 92 encodeWorkgroupAttributionsAsArguments( 93 options.encodeWorkgroupAttributionsAsArguments) {} 94 95 LogicalResult 96 matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, 97 ConversionPatternRewriter &rewriter) const override; 98 99 private: 100 /// The address space to use for `alloca`s in private memory. 101 unsigned allocaAddrSpace; 102 /// The address space to use declaring workgroup memory. 103 unsigned workgroupAddrSpace; 104 105 /// The attribute name to use instead of `gpu.kernel`. Null if no attribute 106 /// should be used. 107 StringAttr kernelAttributeName; 108 /// The attribute name to to set block size. Null if no attribute should be 109 /// used. 110 StringAttr kernelBlockSizeAttributeName; 111 112 /// The calling convention to use for kernel functions 113 LLVM::CConv kernelCallingConvention; 114 /// The calling convention to use for non-kernel functions 115 LLVM::CConv nonKernelCallingConvention; 116 117 /// Whether to encode workgroup attributions as additional arguments instead 118 /// of a global variable. 119 bool encodeWorkgroupAttributionsAsArguments; 120 }; 121 122 /// The lowering of gpu.printf to a call to HIP hostcalls 123 /// 124 /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have 125 /// to deal with %s (even if there were first-class strings in MLIR, they're not 126 /// legal input to gpu.printf) or non-constant format strings 127 struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> { 128 using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; 129 130 LogicalResult 131 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, 132 ConversionPatternRewriter &rewriter) const override; 133 }; 134 135 /// The lowering of gpu.printf to a call to an external printf() function 136 /// 137 /// This pass will add a declaration of printf() to the GPUModule if needed 138 /// and separate out the format strings into global constants. For some 139 /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler 140 /// will lower printf calls to appropriate device-side code 141 struct GPUPrintfOpToLLVMCallLowering 142 : public ConvertOpToLLVMPattern<gpu::PrintfOp> { 143 GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, 144 int addressSpace = 0) 145 : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter), 146 addressSpace(addressSpace) {} 147 148 LogicalResult 149 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, 150 ConversionPatternRewriter &rewriter) const override; 151 152 private: 153 int addressSpace; 154 }; 155 156 /// Lowering of gpu.printf to a vprintf standard library. 157 struct GPUPrintfOpToVPrintfLowering 158 : public ConvertOpToLLVMPattern<gpu::PrintfOp> { 159 using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; 160 161 LogicalResult 162 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, 163 ConversionPatternRewriter &rewriter) const override; 164 }; 165 166 struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> { 167 using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern; 168 169 LogicalResult 170 matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, 171 ConversionPatternRewriter &rewriter) const override; 172 }; 173 174 namespace impl { 175 /// Unrolls op if it's operating on vectors. 176 LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, 177 ConversionPatternRewriter &rewriter, 178 const LLVMTypeConverter &converter); 179 } // namespace impl 180 181 /// Rewriting that unrolls SourceOp to scalars if it's operating on vectors. 182 template <typename SourceOp> 183 struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> { 184 public: 185 using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern; 186 187 LogicalResult 188 matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, 189 ConversionPatternRewriter &rewriter) const override { 190 return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter, 191 *this->getTypeConverter()); 192 } 193 }; 194 } // namespace mlir 195 196 #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ 197