Lines Matching +full:node +full:- +full:version

1 //===- ScheduleOptimizer.cpp - Calculate an optimized schedule ------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
15 // parallelism and tileability and minimizes data-dependence distances. The
16 // algorithm used is a modified version of the ``Pluto'' algorithm:
23 // 2) A set of post-scheduling transformations is applied on the schedule tree.
27 // - Tiling of the innermost tilable bands
28 // - Prevectorization - The choice of a possible outer loop that is strip-mined
29 // to the innermost level to enable inner-loop
31 // - Some optimizations for spatial locality are also planned.
40 // http://www.grosser.es/#pub-polyhedral-AST-generation
46 //===----------------------------------------------------------------------===//
73 #define DEBUG_TYPE "polly-opt-isl"
76 OptimizeDeps("polly-opt-optimize-only",
81 SimplifyDeps("polly-opt-simplify-deps",
86 "polly-opt-max-constant-term",
87 cl::desc("The maximal constant term allowed (-1 is unlimited)"), cl::Hidden,
91 "polly-opt-max-coefficient",
92 cl::desc("The maximal coefficient allowed (-1 is unlimited)"), cl::Hidden,
96 MaximizeBandDepth("polly-opt-maximize-bands",
101 ScheduleComputeOut("polly-schedule-computeout",
108 GreedyFusion("polly-loopfusion-greedy",
113 "polly-opt-outer-coincidence",
119 "polly-prevect-width",
121 "The number of loop iterations to strip-mine for pre-vectorization"),
124 static cl::opt<bool> FirstLevelTiling("polly-tiling",
129 "polly-default-tile-size",
131 " --polly-tile-sizes)"),
135 FirstLevelTileSizes("polly-tile-sizes",
137 "with --polly-default-tile-size"),
141 SecondLevelTiling("polly-2nd-level-tiling",
146 "polly-2nd-level-default-tile-size",
147 cl::desc("The default 2nd-level tile size (if not enough were provided by"
148 " --polly-2nd-level-tile-sizes)"),
152 SecondLevelTileSizes("polly-2nd-level-tile-sizes",
154 "with --polly-default-tile-size"),
158 static cl::opt<bool> RegisterTiling("polly-register-tiling",
163 "polly-register-tiling-default-tile-size",
165 " --polly-register-tile-sizes)"),
169 RegisterTileSizes("polly-register-tile-sizes",
171 "with --polly-register-tile-size"),
175 "polly-pragma-based-opts",
176 cl::desc("Apply user-directed transformation from metadata"),
179 static cl::opt<bool> EnableReschedule("polly-reschedule",
184 PMBasedOpts("polly-pattern-matching-based-opts",
189 EnablePostopts("polly-postopts",
190 cl::desc("Apply post-rescheduling optimizations such as "
191 "tiling (requires -polly-reschedule)"),
195 "polly-optimized-scops",
196 cl::desc("Polly - Dump polyhedral description of Scops optimized with "
197 "the isl scheduling optimizer and the set of post-scheduling "
224 STATISTIC(PrevectOpts, "Number of strip-mining for prevectorization applied");
250 /// - Pattern-based optimizations
251 /// - Tiling
252 /// - Prevectorization
264 /// This function takes a node in an (possibly already optimized) schedule
266 /// node and its descendants. The transformations applied include:
268 /// - Pattern-based optimizations
269 /// - Tiling
270 /// - Prevectorization
272 /// @param Node The schedule object post-transformations will be applied to.
276 optimizeScheduleNode(isl::schedule_node Node,
292 /// @param Node A schedule node band, which is a parent of a band node,
295 static isl::schedule_node isolateFullPartialTiles(isl::schedule_node Node,
299 /// Check if this node is a band node we want to tile.
304 /// @param Node The node to check.
305 static bool isTileableBandNode(isl::schedule_node Node);
307 /// Check if this node is a band node we want to transform using pattern
314 /// @param Node The node to check.
315 static bool isPMOptimizableBandNode(isl::schedule_node Node);
317 /// Pre-vectorizes one scheduling dimension of a schedule band.
326 /// | A[i,j] -> [i,j]
348 static isl::schedule_node prevectSchedBand(isl::schedule_node Node,
354 /// We are looking for an innermost band node and apply the following
357 /// - Tile the band
358 /// - if the band is tileable
359 /// - if the band has more than one loop dimension
361 /// - Prevectorize the schedule of the band (or the point loop in case of
363 /// - if vectorization is enabled
365 /// @param Node The schedule node to (possibly) optimize.
368 static isl_schedule_node *optimizeBand(isl_schedule_node *Node, void *User);
372 /// @param Node The schedule node to (possibly) optimize.
373 static isl::schedule_node applyTileBandOpt(isl::schedule_node Node);
377 /// @param Node The schedule node to (possibly) prevectorize.
378 static isl::schedule_node applyPrevectBandOpt(isl::schedule_node Node);
382 ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
384 assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
385 Node = Node.child(0).child(0);
386 isl::union_map SchedRelUMap = Node.get_prefix_schedule_relation();
392 Node = Node.parent().parent();
395 Node.as<isl::schedule_node_band>().set_ast_build_options(Options);
401 isl::schedule_node Node = visitChildren(Band);
404 if (!Node.first_child().isa<isl::schedule_node_leaf>())
405 return Node;
413 isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) {
414 assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
416 auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
421 Node = isl::manage(
422 isl_schedule_node_band_split(Node.release(), DimToVectorize));
423 Node = Node.child(0);
425 if (DimToVectorize < ScheduleDimensions - 1)
426 Node = isl::manage(isl_schedule_node_band_split(Node.release(), 1));
427 Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
429 Sizes = Sizes.set_val(0, isl::val(Node.ctx(), VectorWidth));
430 Node =
431 isl::manage(isl_schedule_node_band_tile(Node.release(), Sizes.release()));
432 Node = isolateFullPartialTiles(Node, VectorWidth);
433 Node = Node.child(0);
436 Node = Node.as<isl::schedule_node_band>().set_ast_build_options(
437 isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }"));
441 Node = isl::manage(isl_schedule_node_band_sink(Node.release()));
445 Node = SimdMarkerInserter.visit(Node);
448 return Node.parent();
451 static bool isSimpleInnermostBand(const isl::schedule_node &Node) {
452 assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
453 assert(isl_schedule_node_n_children(Node.get()) == 1);
455 auto ChildType = isl_schedule_node_get_type(Node.child(0).get());
463 auto Sequence = Node.child(0);
477 /// Check if this node is a band node, which has only one child.
479 /// @param Node The node to check.
480 static bool isOneTimeParentBandNode(isl::schedule_node Node) {
481 if (isl_schedule_node_get_type(Node.get()) != isl_schedule_node_band)
484 if (isl_schedule_node_n_children(Node.get()) != 1)
490 bool ScheduleTreeOptimizer::isTileableBandNode(isl::schedule_node Node) {
491 if (!isOneTimeParentBandNode(Node))
494 if (!isl_schedule_node_band_get_permutable(Node.get()))
497 auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
502 return isSimpleInnermostBand(Node);
505 bool ScheduleTreeOptimizer::isPMOptimizableBandNode(isl::schedule_node Node) {
506 if (!isOneTimeParentBandNode(Node))
509 return Node.child(0).isa<isl::schedule_node_leaf>();
513 ScheduleTreeOptimizer::applyTileBandOpt(isl::schedule_node Node) {
515 Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes,
521 Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes,
527 Node =
528 applyRegisterTiling(Node, RegisterTileSizes, RegisterDefaultTileSize);
532 return Node;
536 ScheduleTreeOptimizer::applyPrevectBandOpt(isl::schedule_node Node) {
537 auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
540 for (int i = Dims - 1; i >= 0; i--)
541 if (Node.as<isl::schedule_node_band>().member_get_coincident(i)) {
542 Node = prevectSchedBand(Node, i, PrevectorWidth);
546 return Node;
556 isl::schedule_node Node = isl::manage(NodeArg);
558 if (OAI->PatternOpts && isPMOptimizableBandNode(Node)) {
560 tryOptimizeMatMulPattern(Node, OAI->TTI, OAI->D);
563 OAI->DepsChanged = true;
568 if (!isTileableBandNode(Node))
569 return Node.release();
571 if (OAI->Postopts)
572 Node = applyTileBandOpt(Node);
574 if (OAI->Prevect) {
577 Node = applyPrevectBandOpt(Node);
580 return Node.release();
592 isl::schedule_node Node, const OptimizerAdditionalInfoTy *OAI) {
593 Node = isl::manage(isl_schedule_node_map_descendant_bottom_up(
594 Node.release(), optimizeBand,
596 return Node;
609 // (e.g., #stride-one accesses)
610 // FIXME: A schedule tree whose union_map-conversion is identical to the
668 /// @param Version The version of the schedule tree that is analyzed.
673 static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {
680 [](__isl_keep isl_schedule_node *nodeptr, void *user) -> isl_bool {
681 isl::schedule_node Node = isl::manage_copy(nodeptr);
682 int Version = *static_cast<int *>(user);
684 switch (isl_schedule_node_get_type(Node.get())) {
686 NumBands[Version]++;
687 if (isl_schedule_node_band_get_permutable(Node.get()) ==
689 NumPermutable[Version]++;
691 int CountMembers = isl_schedule_node_band_n_member(Node.get());
692 NumBandMembers[Version] += CountMembers;
694 if (Node.as<isl::schedule_node_band>().member_get_coincident(i))
695 NumCoincident[Version]++;
701 NumFilters[Version]++;
705 NumExtension[Version]++;
714 &Version);
756 // TODO: Detect disabled heuristics and no user-directed transformation
775 // post-rescheduling optimizations (tiling, pattern-based, prevectorization)
824 << "warning: Option -polly-opt-simplify-deps should either be 'yes' "
840 << "warning: Option -polly-opt-maximize-bands should either be 'yes'"
851 errs() << "warning: Option -polly-opt-outer-coincidence should either be "
901 // Apply post-rescheduling optimizations (if enabled) and/or prevectorization.
912 POLLY_DEBUG(printSchedule(dbgs(), Schedule, "After post-optimizations"));
941 [this](Dependences::AnalysisLevel) -> const Dependences & {
1002 INITIALIZE_PASS_BEGIN(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
1003 "Polly - Optimize schedule of SCoP", false, false);
1008 INITIALIZE_PASS_END(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
1009 "Polly - Optimize schedule of SCoP", false, false)
1016 auto GetDeps = [&Deps](Dependences::AnalysisLevel) -> const Dependences & {
1028 *OS << "Printing analysis 'Polly - Optimize schedule of SCoP' for region: '"
1049 //===----------------------------------------------------------------------===//
1092 "polly-print-opt-isl",
1093 "Polly - Print optimizer schedule of SCoP", false, false);
1096 "polly-print-opt-isl",
1097 "Polly - Print optimizer schedule of SCoP", false, false)