1 //===- Passes.h - Sparse tensor pipeline entry points -----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header file defines prototypes of all sparse tensor pipelines. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef MLIR_DIALECT_SPARSETENSOR_PIPELINES_PASSES_H_ 14 #define MLIR_DIALECT_SPARSETENSOR_PIPELINES_PASSES_H_ 15 16 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h" 17 #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" 18 #include "mlir/Pass/PassOptions.h" 19 20 using namespace mlir::detail; 21 using namespace llvm::cl; 22 23 namespace mlir { 24 namespace sparse_tensor { 25 26 /// Options for the "sparsifier" pipeline. So far this only contains 27 /// a subset of the options that can be set for the underlying passes, 28 /// because it must be manually kept in sync with the tablegen files 29 /// for those passes. 30 struct SparsifierOptions : public PassPipelineOptions<SparsifierOptions> { 31 // These options must be kept in sync with `SparsificationBase`. 32 // TODO(57514): These options are duplicated in Passes.td. 33 PassOptions::Option<mlir::SparseParallelizationStrategy> parallelization{ 34 *this, "parallelization-strategy", 35 ::llvm::cl::desc("Set the parallelization strategy"), 36 ::llvm::cl::init(mlir::SparseParallelizationStrategy::kNone), 37 llvm::cl::values( 38 clEnumValN(mlir::SparseParallelizationStrategy::kNone, "none", 39 "Turn off sparse parallelization."), 40 clEnumValN(mlir::SparseParallelizationStrategy::kDenseOuterLoop, 41 "dense-outer-loop", 42 "Enable dense outer loop sparse parallelization."), 43 clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageOuterLoop, 44 "any-storage-outer-loop", 45 "Enable sparse parallelization regardless of storage for " 46 "the outer loop."), 47 clEnumValN(mlir::SparseParallelizationStrategy::kDenseAnyLoop, 48 "dense-any-loop", 49 "Enable dense parallelization for any loop."), 50 clEnumValN( 51 mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop, 52 "any-storage-any-loop", 53 "Enable sparse parallelization for any storage and loop."))}; 54 PassOptions::Option<mlir::SparseEmitStrategy> emitStrategy{ 55 *this, "sparse-emit-strategy", 56 ::llvm::cl::desc( 57 "Emit functional code or interfaces (to debug) for sparse loops"), 58 ::llvm::cl::init(mlir::SparseEmitStrategy::kFunctional), 59 llvm::cl::values( 60 clEnumValN(mlir::SparseEmitStrategy::kFunctional, "functional", 61 "Emit functional code (with scf.for/while)."), 62 clEnumValN(mlir::SparseEmitStrategy::kSparseIterator, 63 "sparse-iterator", 64 "Emit (experimental) loops (with sparse.iterate)."), 65 clEnumValN( 66 mlir::SparseEmitStrategy::kDebugInterface, "debug-interface", 67 "Emit non-functional but easy-to-read interfaces to debug."))}; 68 69 PassOptions::Option<bool> enableRuntimeLibrary{ 70 *this, "enable-runtime-library", 71 desc("Enable runtime library for manipulating sparse tensors"), 72 init(true)}; 73 74 PassOptions::Option<bool> testBufferizationAnalysisOnly{ 75 *this, "test-bufferization-analysis-only", 76 desc("Run only the inplacability analysis"), init(false)}; 77 78 PassOptions::Option<bool> enableBufferInitialization{ 79 *this, "enable-buffer-initialization", 80 desc("Enable zero-initialization of memory buffers"), init(false)}; 81 82 // TODO: Delete the option, it should also be false after switching to 83 // buffer-deallocation-pass 84 PassOptions::Option<bool> createSparseDeallocs{ 85 *this, "create-sparse-deallocs", 86 desc("Specify if the temporary buffers created by the sparse " 87 "compiler should be deallocated. For compatibility with core " 88 "bufferization passes. " 89 "This option is only used when enable-runtime-library=false."), 90 init(true)}; 91 92 PassOptions::Option<int32_t> vectorLength{ 93 *this, "vl", desc("Set the vector length (0 disables vectorization)"), 94 init(0)}; 95 96 // These options must be kept in sync with the `ConvertVectorToLLVM` 97 // (defined in include/mlir/Dialect/SparseTensor/Pipelines/Passes.h). 98 PassOptions::Option<bool> reassociateFPReductions{ 99 *this, "reassociate-fp-reductions", 100 desc("Allows llvm to reassociate floating-point reductions for speed"), 101 init(false)}; 102 PassOptions::Option<bool> force32BitVectorIndices{ 103 *this, "enable-index-optimizations", 104 desc("Allows compiler to assume indices fit in 32-bit if that yields " 105 "faster code"), 106 init(true)}; 107 PassOptions::Option<bool> amx{ 108 *this, "enable-amx", 109 desc("Enables the use of AMX dialect while lowering the vector dialect"), 110 init(false)}; 111 PassOptions::Option<bool> armNeon{ 112 *this, "enable-arm-neon", 113 desc("Enables the use of ArmNeon dialect while lowering the vector " 114 "dialect"), 115 init(false)}; 116 PassOptions::Option<bool> armSVE{ 117 *this, "enable-arm-sve", 118 desc("Enables the use of ArmSVE dialect while lowering the vector " 119 "dialect"), 120 init(false)}; 121 PassOptions::Option<bool> x86Vector{ 122 *this, "enable-x86vector", 123 desc("Enables the use of X86Vector dialect while lowering the vector " 124 "dialect"), 125 init(false)}; 126 127 /// These options are used to enable GPU code generation. 128 PassOptions::Option<std::string> gpuTriple{*this, "gpu-triple", 129 desc("GPU target triple")}; 130 PassOptions::Option<std::string> gpuChip{*this, "gpu-chip", 131 desc("GPU target architecture")}; 132 PassOptions::Option<std::string> gpuFeatures{*this, "gpu-features", 133 desc("GPU target features")}; 134 /// For NVIDIA GPUs there are 3 compilation format options: 135 /// 1. `isa`: the compiler generates PTX and the driver JITs the PTX. 136 /// 2. `bin`: generates a CUBIN object for `chip=gpuChip`. 137 /// 3. `fatbin`: generates a fat binary with a CUBIN object for `gpuChip` and 138 /// also embeds the PTX in the fat binary. 139 /// Notes: 140 /// Option 1 adds a significant runtime performance hit, however, tests are 141 /// more likely to pass with this option. 142 /// Option 2 is better for execution time as there is no JIT; however, the 143 /// program will fail if there's an architecture mismatch between `gpuChip` 144 /// and the GPU running the program. 145 /// Option 3 is the best compromise between options 1 and 2 as it can JIT in 146 /// case of an architecture mismatch between `gpuChip` and the running 147 /// architecture. However, it's only possible to JIT to a higher CC than 148 /// `gpuChip`. 149 PassOptions::Option<std::string> gpuFormat{ 150 *this, "gpu-format", desc("GPU compilation format"), init("fatbin")}; 151 152 /// This option is used to enable GPU library generation. 153 PassOptions::Option<bool> enableGPULibgen{ 154 *this, "enable-gpu-libgen", 155 desc("Enables GPU acceleration by means of direct library calls (like " 156 "cuSPARSE)")}; 157 158 /// Projects out the options for `createSparsificationPass`. 159 SparsificationOptions sparsificationOptions() const { 160 return SparsificationOptions(parallelization, emitStrategy, 161 enableRuntimeLibrary); 162 } 163 164 /// Projects out the options for `createConvertVectorToLLVMPass`. 165 ConvertVectorToLLVMPassOptions convertVectorToLLVMOptions() const { 166 ConvertVectorToLLVMPassOptions opts{}; 167 opts.reassociateFPReductions = reassociateFPReductions; 168 opts.force32BitVectorIndices = force32BitVectorIndices; 169 opts.armNeon = armNeon; 170 opts.armSVE = armSVE; 171 opts.amx = amx; 172 opts.x86Vector = x86Vector; 173 return opts; 174 } 175 }; 176 177 //===----------------------------------------------------------------------===// 178 // Building and Registering. 179 //===----------------------------------------------------------------------===// 180 181 /// Adds the "sparsifier" pipeline to the `OpPassManager`. This 182 /// is the standard pipeline for taking sparsity-agnostic IR using 183 /// the sparse-tensor type and lowering it to LLVM IR with concrete 184 /// representations and algorithms for sparse tensors. 185 void buildSparsifier(OpPassManager &pm, const SparsifierOptions &options); 186 187 /// Registers all pipelines for the `sparse_tensor` dialect. At present, 188 /// this includes only "sparsifier". 189 void registerSparseTensorPipelines(); 190 191 } // namespace sparse_tensor 192 } // namespace mlir 193 194 #endif // MLIR_DIALECT_SPARSETENSOR_PIPELINES_PASSES_H_ 195