1 //===- ParallelLoopCollapsing.cpp - Pass collapsing parallel loop indices -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "mlir/Dialect/SCF/Transforms/Passes.h" 10 11 #include "mlir/Dialect/SCF/IR/SCF.h" 12 #include "mlir/Dialect/SCF/Utils/Utils.h" 13 #include "mlir/Transforms/RegionUtils.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/Debug.h" 17 18 namespace mlir { 19 #define GEN_PASS_DEF_TESTSCFPARALLELLOOPCOLLAPSING 20 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc" 21 } // namespace mlir 22 23 #define DEBUG_TYPE "parallel-loop-collapsing" 24 25 using namespace mlir; 26 27 namespace { 28 struct TestSCFParallelLoopCollapsing 29 : public impl::TestSCFParallelLoopCollapsingBase< 30 TestSCFParallelLoopCollapsing> { 31 void runOnOperation() override { 32 Operation *module = getOperation(); 33 34 // The common case for GPU dialect will be simplifying the ParallelOp to 3 35 // arguments, so we do that here to simplify things. 36 llvm::SmallVector<std::vector<unsigned>, 3> combinedLoops; 37 38 // Gather the input args into the format required by 39 // `collapseParallelLoops`. 40 if (!clCollapsedIndices0.empty()) 41 combinedLoops.push_back(clCollapsedIndices0); 42 if (!clCollapsedIndices1.empty()) { 43 if (clCollapsedIndices0.empty()) { 44 llvm::errs() 45 << "collapsed-indices-1 specified but not collapsed-indices-0"; 46 signalPassFailure(); 47 return; 48 } 49 combinedLoops.push_back(clCollapsedIndices1); 50 } 51 if (!clCollapsedIndices2.empty()) { 52 if (clCollapsedIndices1.empty()) { 53 llvm::errs() 54 << "collapsed-indices-2 specified but not collapsed-indices-1"; 55 signalPassFailure(); 56 return; 57 } 58 combinedLoops.push_back(clCollapsedIndices2); 59 } 60 61 if (combinedLoops.empty()) { 62 llvm::errs() << "No collapsed-indices were specified. This pass is only " 63 "for testing and does not automatically collapse all " 64 "parallel loops or similar."; 65 signalPassFailure(); 66 return; 67 } 68 69 // Confirm that the specified loops are [0,N) by testing that N values exist 70 // with the maximum value being N-1. 71 llvm::SmallSet<unsigned, 8> flattenedCombinedLoops; 72 unsigned maxCollapsedIndex = 0; 73 for (auto &loops : combinedLoops) { 74 for (auto &loop : loops) { 75 flattenedCombinedLoops.insert(loop); 76 maxCollapsedIndex = std::max(maxCollapsedIndex, loop); 77 } 78 } 79 80 if (maxCollapsedIndex != flattenedCombinedLoops.size() - 1 || 81 !flattenedCombinedLoops.contains(maxCollapsedIndex)) { 82 llvm::errs() 83 << "collapsed-indices arguments must include all values [0,N)."; 84 signalPassFailure(); 85 return; 86 } 87 88 // Only apply the transformation on parallel loops where the specified 89 // transformation is valid, but do NOT early abort in the case of invalid 90 // loops. 91 module->walk([&](scf::ParallelOp op) { 92 if (flattenedCombinedLoops.size() != op.getNumLoops()) { 93 op.emitOpError("has ") 94 << op.getNumLoops() 95 << " iter args while this limited functionality testing pass was " 96 "configured only for loops with exactly " 97 << flattenedCombinedLoops.size() << " iter args."; 98 return; 99 } 100 collapseParallelLoops(op, combinedLoops); 101 }); 102 } 103 }; 104 } // namespace 105 106 std::unique_ptr<Pass> mlir::createTestSCFParallelLoopCollapsingPass() { 107 return std::make_unique<TestSCFParallelLoopCollapsing>(); 108 } 109