13f170eb1SMichael Kruse //===------ ManualOptimizer.cpp -------------------------------------------===// 23f170eb1SMichael Kruse // 33f170eb1SMichael Kruse // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 43f170eb1SMichael Kruse // See https://llvm.org/LICENSE.txt for license information. 53f170eb1SMichael Kruse // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 63f170eb1SMichael Kruse // 73f170eb1SMichael Kruse //===----------------------------------------------------------------------===// 83f170eb1SMichael Kruse // 93f170eb1SMichael Kruse // Handle pragma/metadata-directed transformations. 103f170eb1SMichael Kruse // 113f170eb1SMichael Kruse //===----------------------------------------------------------------------===// 123f170eb1SMichael Kruse 133f170eb1SMichael Kruse #include "polly/ManualOptimizer.h" 14e470f926SMichael Kruse #include "polly/DependenceInfo.h" 15e470f926SMichael Kruse #include "polly/Options.h" 163f170eb1SMichael Kruse #include "polly/ScheduleTreeTransform.h" 173f170eb1SMichael Kruse #include "polly/Support/ScopHelper.h" 183f170eb1SMichael Kruse #include "llvm/ADT/StringRef.h" 193f170eb1SMichael Kruse #include "llvm/Analysis/LoopInfo.h" 20e470f926SMichael Kruse #include "llvm/Analysis/OptimizationRemarkEmitter.h" 213f170eb1SMichael Kruse #include "llvm/IR/Metadata.h" 2228667787SMichael Kruse #include "llvm/Transforms/Utils/LoopUtils.h" 23ccdc271aSKazu Hirata #include <optional> 243f170eb1SMichael Kruse 25601d7eabSKarthika Devi C #include "polly/Support/PollyDebug.h" 263f170eb1SMichael Kruse #define DEBUG_TYPE "polly-opt-manual" 273f170eb1SMichael Kruse 283f170eb1SMichael Kruse using namespace polly; 293f170eb1SMichael Kruse using namespace llvm; 303f170eb1SMichael Kruse 313f170eb1SMichael Kruse namespace { 32e470f926SMichael Kruse 33e470f926SMichael Kruse static cl::opt<bool> IgnoreDepcheck( 34e470f926SMichael Kruse "polly-pragma-ignore-depcheck", 35e470f926SMichael Kruse cl::desc("Skip the dependency check for pragma-based transformations"), 3636c7d79dSFangrui Song cl::cat(PollyCategory)); 37e470f926SMichael Kruse 3828667787SMichael Kruse /// Same as llvm::hasUnrollTransformation(), but takes a LoopID as argument 3928667787SMichael Kruse /// instead of a Loop. 4028667787SMichael Kruse static TransformationMode hasUnrollTransformation(MDNode *LoopID) { 4128667787SMichael Kruse if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.disable")) 4228667787SMichael Kruse return TM_SuppressedByUser; 433f170eb1SMichael Kruse 44ccdc271aSKazu Hirata std::optional<int> Count = 4528667787SMichael Kruse getOptionalIntLoopAttribute(LoopID, "llvm.loop.unroll.count"); 4694460f51SKazu Hirata if (Count) 4753e5cd4dSFangrui Song return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser; 483f170eb1SMichael Kruse 4928667787SMichael Kruse if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.enable")) 5028667787SMichael Kruse return TM_ForcedByUser; 513f170eb1SMichael Kruse 5228667787SMichael Kruse if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.full")) 5328667787SMichael Kruse return TM_ForcedByUser; 543f170eb1SMichael Kruse 5528667787SMichael Kruse if (hasDisableAllTransformsHint(LoopID)) 5628667787SMichael Kruse return TM_Disable; 5728667787SMichael Kruse 5828667787SMichael Kruse return TM_Unspecified; 593f170eb1SMichael Kruse } 603f170eb1SMichael Kruse 61e470f926SMichael Kruse // Return the first DebugLoc in the list. 62e470f926SMichael Kruse static DebugLoc findFirstDebugLoc(MDNode *MD) { 63e470f926SMichael Kruse if (MD) { 64e470f926SMichael Kruse for (const MDOperand &X : drop_begin(MD->operands(), 1)) { 65e470f926SMichael Kruse Metadata *A = X.get(); 66e470f926SMichael Kruse if (!isa<DILocation>(A)) 67e470f926SMichael Kruse continue; 68e470f926SMichael Kruse return cast<DILocation>(A); 69e470f926SMichael Kruse } 70e470f926SMichael Kruse } 71e470f926SMichael Kruse 72e470f926SMichael Kruse return {}; 73e470f926SMichael Kruse } 74e470f926SMichael Kruse 75e470f926SMichael Kruse static DebugLoc findTransformationDebugLoc(MDNode *LoopMD, StringRef Name) { 76e470f926SMichael Kruse // First find dedicated transformation location 77e470f926SMichael Kruse // (such as the location of #pragma clang loop) 78e470f926SMichael Kruse MDNode *MD = findOptionMDForLoopID(LoopMD, Name); 79e470f926SMichael Kruse if (DebugLoc K = findFirstDebugLoc(MD)) 80e470f926SMichael Kruse return K; 81e470f926SMichael Kruse 82e470f926SMichael Kruse // Otherwise, fall back to the location of the loop itself 83e470f926SMichael Kruse return findFirstDebugLoc(LoopMD); 84e470f926SMichael Kruse } 85e470f926SMichael Kruse 863f170eb1SMichael Kruse /// Apply full or partial unrolling. 873f170eb1SMichael Kruse static isl::schedule applyLoopUnroll(MDNode *LoopMD, 883f170eb1SMichael Kruse isl::schedule_node BandToUnroll) { 8928667787SMichael Kruse TransformationMode UnrollMode = ::hasUnrollTransformation(LoopMD); 9028667787SMichael Kruse if (UnrollMode & TM_Disable) 9128667787SMichael Kruse return {}; 9228667787SMichael Kruse 937c7978a1Spatacca assert(!BandToUnroll.is_null()); 943f170eb1SMichael Kruse // TODO: Isl's codegen also supports unrolling by isl_ast_build via 953f170eb1SMichael Kruse // isl_schedule_node_band_set_ast_build_options({ unroll[x] }) which would be 963f170eb1SMichael Kruse // more efficient because the content duplication is delayed. However, the 973f170eb1SMichael Kruse // unrolled loop could be input of another loop transformation which expects 983f170eb1SMichael Kruse // the explicit schedule nodes. That is, we would need this explicit expansion 993f170eb1SMichael Kruse // anyway and using the ISL codegen option is a compile-time optimization. 10030c67587SKazu Hirata int64_t Factor = 10130c67587SKazu Hirata getOptionalIntLoopAttribute(LoopMD, "llvm.loop.unroll.count").value_or(0); 10228667787SMichael Kruse bool Full = getBooleanLoopAttribute(LoopMD, "llvm.loop.unroll.full"); 1033f170eb1SMichael Kruse assert((!Full || !(Factor > 0)) && 1043f170eb1SMichael Kruse "Cannot unroll fully and partially at the same time"); 1053f170eb1SMichael Kruse 1063f170eb1SMichael Kruse if (Full) 1073f170eb1SMichael Kruse return applyFullUnroll(BandToUnroll); 1083f170eb1SMichael Kruse 1093f170eb1SMichael Kruse if (Factor > 0) 1103f170eb1SMichael Kruse return applyPartialUnroll(BandToUnroll, Factor); 1113f170eb1SMichael Kruse 11228667787SMichael Kruse // For heuristic unrolling, fall back to LLVM's LoopUnroll pass. 11328667787SMichael Kruse return {}; 1143f170eb1SMichael Kruse } 1153f170eb1SMichael Kruse 116e470f926SMichael Kruse static isl::schedule applyLoopFission(MDNode *LoopMD, 117e470f926SMichael Kruse isl::schedule_node BandToFission) { 118e470f926SMichael Kruse // TODO: Make it possible to selectively fission substatements. 119e470f926SMichael Kruse // TODO: Apply followup loop properties. 120e470f926SMichael Kruse // TODO: Instead of fission every statement, find the maximum set that does 121e470f926SMichael Kruse // not cause a dependency violation. 122e470f926SMichael Kruse return applyMaxFission(BandToFission); 123e470f926SMichael Kruse } 124e470f926SMichael Kruse 1253f170eb1SMichael Kruse // Return the properties from a LoopID. Scalar properties are ignored. 1263f170eb1SMichael Kruse static auto getLoopMDProps(MDNode *LoopMD) { 1273f170eb1SMichael Kruse return map_range( 1283f170eb1SMichael Kruse make_filter_range( 1293f170eb1SMichael Kruse drop_begin(LoopMD->operands(), 1), 1303f170eb1SMichael Kruse [](const MDOperand &MDOp) { return isa<MDNode>(MDOp.get()); }), 1313f170eb1SMichael Kruse [](const MDOperand &MDOp) { return cast<MDNode>(MDOp.get()); }); 1323f170eb1SMichael Kruse } 1333f170eb1SMichael Kruse 1343f170eb1SMichael Kruse /// Recursively visit all nodes in a schedule, loop for loop-transformations 1353f170eb1SMichael Kruse /// metadata and apply the first encountered. 136bd93df93SMichael Kruse class SearchTransformVisitor final 1373f170eb1SMichael Kruse : public RecursiveScheduleTreeVisitor<SearchTransformVisitor> { 1383f170eb1SMichael Kruse private: 1393f170eb1SMichael Kruse using BaseTy = RecursiveScheduleTreeVisitor<SearchTransformVisitor>; 1403f170eb1SMichael Kruse BaseTy &getBase() { return *this; } 1413f170eb1SMichael Kruse const BaseTy &getBase() const { return *this; } 1423f170eb1SMichael Kruse 143e470f926SMichael Kruse polly::Scop *S; 144e470f926SMichael Kruse const Dependences *D; 145e470f926SMichael Kruse OptimizationRemarkEmitter *ORE; 146e470f926SMichael Kruse 1473f170eb1SMichael Kruse // Set after a transformation is applied. Recursive search must be aborted 1483f170eb1SMichael Kruse // once this happens to ensure that any new followup transformation is 1493f170eb1SMichael Kruse // transformed in innermost-first order. 1503f170eb1SMichael Kruse isl::schedule Result; 1513f170eb1SMichael Kruse 152*5aafc6d5SChristian Clauss /// Check whether a schedule after a transformation is legal. Return the old 153e470f926SMichael Kruse /// schedule without the transformation. 154e470f926SMichael Kruse isl::schedule 155e470f926SMichael Kruse checkDependencyViolation(llvm::MDNode *LoopMD, llvm::Value *CodeRegion, 156e470f926SMichael Kruse const isl::schedule_node &OrigBand, 157e470f926SMichael Kruse StringRef DebugLocAttr, StringRef TransPrefix, 158e470f926SMichael Kruse StringRef RemarkName, StringRef TransformationName) { 159e470f926SMichael Kruse if (D->isValidSchedule(*S, Result)) 160e470f926SMichael Kruse return Result; 161e470f926SMichael Kruse 162e470f926SMichael Kruse LLVMContext &Ctx = LoopMD->getContext(); 163601d7eabSKarthika Devi C POLLY_DEBUG(dbgs() << "Dependency violation detected\n"); 164e470f926SMichael Kruse 165e470f926SMichael Kruse DebugLoc TransformLoc = findTransformationDebugLoc(LoopMD, DebugLocAttr); 166e470f926SMichael Kruse 167e470f926SMichael Kruse if (IgnoreDepcheck) { 168601d7eabSKarthika Devi C POLLY_DEBUG(dbgs() << "Still accepting transformation due to " 169e470f926SMichael Kruse "-polly-pragma-ignore-depcheck\n"); 170e470f926SMichael Kruse if (ORE) { 171e470f926SMichael Kruse ORE->emit( 172e470f926SMichael Kruse OptimizationRemark(DEBUG_TYPE, RemarkName, TransformLoc, CodeRegion) 173e470f926SMichael Kruse << (Twine("Could not verify dependencies for ") + 174e470f926SMichael Kruse TransformationName + 175e470f926SMichael Kruse "; still applying because of -polly-pragma-ignore-depcheck") 176e470f926SMichael Kruse .str()); 177e470f926SMichael Kruse } 178e470f926SMichael Kruse return Result; 179e470f926SMichael Kruse } 180e470f926SMichael Kruse 181601d7eabSKarthika Devi C POLLY_DEBUG(dbgs() << "Rolling back transformation\n"); 182e470f926SMichael Kruse 183e470f926SMichael Kruse if (ORE) { 184e470f926SMichael Kruse ORE->emit(DiagnosticInfoOptimizationFailure(DEBUG_TYPE, RemarkName, 185e470f926SMichael Kruse TransformLoc, CodeRegion) 186e470f926SMichael Kruse << (Twine("not applying ") + TransformationName + 187e470f926SMichael Kruse ": cannot ensure semantic equivalence due to possible " 188e470f926SMichael Kruse "dependency violations") 189e470f926SMichael Kruse .str()); 190e470f926SMichael Kruse } 191e470f926SMichael Kruse 192e470f926SMichael Kruse // If illegal, revert and remove the transformation to not risk re-trying 193ea540bc2SGabriel Ravier // indefinitely. 194e470f926SMichael Kruse MDNode *NewLoopMD = 195e470f926SMichael Kruse makePostTransformationMetadata(Ctx, LoopMD, {TransPrefix}, {}); 196e470f926SMichael Kruse BandAttr *Attr = getBandAttr(OrigBand); 197e470f926SMichael Kruse Attr->Metadata = NewLoopMD; 198e470f926SMichael Kruse 199e470f926SMichael Kruse // Roll back old schedule. 200e470f926SMichael Kruse return OrigBand.get_schedule(); 201e470f926SMichael Kruse } 202e470f926SMichael Kruse 2033f170eb1SMichael Kruse public: 204e470f926SMichael Kruse SearchTransformVisitor(polly::Scop *S, const Dependences *D, 205e470f926SMichael Kruse OptimizationRemarkEmitter *ORE) 206e470f926SMichael Kruse : S(S), D(D), ORE(ORE) {} 207e470f926SMichael Kruse 208e470f926SMichael Kruse static isl::schedule applyOneTransformation(polly::Scop *S, 209e470f926SMichael Kruse const Dependences *D, 210e470f926SMichael Kruse OptimizationRemarkEmitter *ORE, 211e470f926SMichael Kruse const isl::schedule &Sched) { 212e470f926SMichael Kruse SearchTransformVisitor Transformer(S, D, ORE); 2133f170eb1SMichael Kruse Transformer.visit(Sched); 2143f170eb1SMichael Kruse return Transformer.Result; 2153f170eb1SMichael Kruse } 2163f170eb1SMichael Kruse 217c62d9a5cSMichael Kruse void visitBand(isl::schedule_node_band Band) { 2183f170eb1SMichael Kruse // Transform inner loops first (depth-first search). 2193f170eb1SMichael Kruse getBase().visitBand(Band); 2207c7978a1Spatacca if (!Result.is_null()) 2213f170eb1SMichael Kruse return; 2223f170eb1SMichael Kruse 2233f170eb1SMichael Kruse // Since it is (currently) not possible to have a BandAttr marker that is 2243f170eb1SMichael Kruse // specific to each loop in a band, we only support single-loop bands. 2253f170eb1SMichael Kruse if (isl_schedule_node_band_n_member(Band.get()) != 1) 2263f170eb1SMichael Kruse return; 2273f170eb1SMichael Kruse 2283f170eb1SMichael Kruse BandAttr *Attr = getBandAttr(Band); 2293f170eb1SMichael Kruse if (!Attr) { 2303f170eb1SMichael Kruse // Band has no attribute. 2313f170eb1SMichael Kruse return; 2323f170eb1SMichael Kruse } 2333f170eb1SMichael Kruse 234e470f926SMichael Kruse // CodeRegion used but ORE to determine code hotness. 235e470f926SMichael Kruse // TODO: Works only for original loop; for transformed loops, should track 236e470f926SMichael Kruse // where the loop's body code comes from. 237e470f926SMichael Kruse Loop *Loop = Attr->OriginalLoop; 238e470f926SMichael Kruse Value *CodeRegion = nullptr; 239e470f926SMichael Kruse if (Loop) 240e470f926SMichael Kruse CodeRegion = Loop->getHeader(); 241e470f926SMichael Kruse 2423f170eb1SMichael Kruse MDNode *LoopMD = Attr->Metadata; 2433f170eb1SMichael Kruse if (!LoopMD) 2443f170eb1SMichael Kruse return; 2453f170eb1SMichael Kruse 2463f170eb1SMichael Kruse // Iterate over loop properties to find the first transformation. 2473f170eb1SMichael Kruse // FIXME: If there are more than one transformation in the LoopMD (making 2483f170eb1SMichael Kruse // the order of transformations ambiguous), all others are silently ignored. 2493f170eb1SMichael Kruse for (MDNode *MD : getLoopMDProps(LoopMD)) { 2503f170eb1SMichael Kruse auto *NameMD = dyn_cast<MDString>(MD->getOperand(0).get()); 2513f170eb1SMichael Kruse if (!NameMD) 2523f170eb1SMichael Kruse continue; 2533f170eb1SMichael Kruse StringRef AttrName = NameMD->getString(); 2543f170eb1SMichael Kruse 25528667787SMichael Kruse // Honor transformation order; transform the first transformation in the 25628667787SMichael Kruse // list first. 25728667787SMichael Kruse if (AttrName == "llvm.loop.unroll.enable" || 25828667787SMichael Kruse AttrName == "llvm.loop.unroll.count" || 25928667787SMichael Kruse AttrName == "llvm.loop.unroll.full") { 2603f170eb1SMichael Kruse Result = applyLoopUnroll(LoopMD, Band); 2617c7978a1Spatacca if (!Result.is_null()) 26228667787SMichael Kruse return; 263e470f926SMichael Kruse } else if (AttrName == "llvm.loop.distribute.enable") { 264e470f926SMichael Kruse Result = applyLoopFission(LoopMD, Band); 265e470f926SMichael Kruse if (!Result.is_null()) 266e470f926SMichael Kruse Result = checkDependencyViolation( 267e470f926SMichael Kruse LoopMD, CodeRegion, Band, "llvm.loop.distribute.loc", 268e470f926SMichael Kruse "llvm.loop.distribute.", "FailedRequestedFission", 269e470f926SMichael Kruse "loop fission/distribution"); 270e470f926SMichael Kruse if (!Result.is_null()) 271e470f926SMichael Kruse return; 2723f170eb1SMichael Kruse } 2733f170eb1SMichael Kruse 27428667787SMichael Kruse // not a loop transformation; look for next property 2753f170eb1SMichael Kruse } 2763f170eb1SMichael Kruse } 2773f170eb1SMichael Kruse 278c62d9a5cSMichael Kruse void visitNode(isl::schedule_node Other) { 2797c7978a1Spatacca if (!Result.is_null()) 2803f170eb1SMichael Kruse return; 2813f170eb1SMichael Kruse getBase().visitNode(Other); 2823f170eb1SMichael Kruse } 2833f170eb1SMichael Kruse }; 2843f170eb1SMichael Kruse 2853f170eb1SMichael Kruse } // namespace 2863f170eb1SMichael Kruse 287e470f926SMichael Kruse isl::schedule 288e470f926SMichael Kruse polly::applyManualTransformations(Scop *S, isl::schedule Sched, 289e470f926SMichael Kruse const Dependences &D, 290e470f926SMichael Kruse OptimizationRemarkEmitter *ORE) { 2913f170eb1SMichael Kruse // Search the loop nest for transformations until fixpoint. 2923f170eb1SMichael Kruse while (true) { 2933f170eb1SMichael Kruse isl::schedule Result = 294e470f926SMichael Kruse SearchTransformVisitor::applyOneTransformation(S, &D, ORE, Sched); 2957c7978a1Spatacca if (Result.is_null()) { 2963f170eb1SMichael Kruse // No (more) transformation has been found. 2973f170eb1SMichael Kruse break; 2983f170eb1SMichael Kruse } 2993f170eb1SMichael Kruse 3003f170eb1SMichael Kruse // Use transformed schedule and look for more transformations. 3013f170eb1SMichael Kruse Sched = Result; 3023f170eb1SMichael Kruse } 3033f170eb1SMichael Kruse 3043f170eb1SMichael Kruse return Sched; 3053f170eb1SMichael Kruse } 306