1 /* 2 * Copyright 2015 Sven Verdoolaege 3 * 4 * Use of this software is governed by the MIT license 5 * 6 * Written by Sven Verdoolaege 7 */ 8 9 #include "isl_map_private.h" 10 11 #include <isl/id.h> 12 #include <isl/schedule_node.h> 13 #include <isl/union_set.h> 14 15 #include "isl_mat_private.h" 16 #include "isl_scheduler_clustering.h" 17 #include "isl_scheduler_scc.h" 18 #include "isl_seq.h" 19 #include "isl_tarjan.h" 20 21 /* Initialize the clustering data structure "c" from "graph". 22 * 23 * In particular, allocate memory, extract the SCCs from "graph" 24 * into c->scc, initialize scc_cluster and construct 25 * a band of schedule rows for each SCC. 26 * Within each SCC, there is only one SCC by definition. 27 * Each SCC initially belongs to a cluster containing only that SCC. 28 */ 29 static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c, 30 struct isl_sched_graph *graph) 31 { 32 int i; 33 34 c->n = graph->scc; 35 c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n); 36 c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n); 37 c->scc_cluster = isl_calloc_array(ctx, int, c->n); 38 c->scc_node = isl_calloc_array(ctx, int, c->n); 39 c->scc_in_merge = isl_calloc_array(ctx, int, c->n); 40 if (!c->scc || !c->cluster || 41 !c->scc_cluster || !c->scc_node || !c->scc_in_merge) 42 return isl_stat_error; 43 44 for (i = 0; i < c->n; ++i) { 45 if (isl_sched_graph_extract_sub_graph(ctx, graph, 46 &isl_sched_node_scc_exactly, 47 &isl_sched_edge_scc_exactly, 48 i, &c->scc[i]) < 0) 49 return isl_stat_error; 50 c->scc[i].scc = 1; 51 if (isl_sched_graph_compute_maxvar(&c->scc[i]) < 0) 52 return isl_stat_error; 53 if (isl_schedule_node_compute_wcc_band(ctx, &c->scc[i]) < 0) 54 return isl_stat_error; 55 c->scc_cluster[i] = i; 56 } 57 58 return isl_stat_ok; 59 } 60 61 /* Free all memory allocated for "c". 62 */ 63 static void clustering_free(isl_ctx *ctx, struct isl_clustering *c) 64 { 65 int i; 66 67 if (c->scc) 68 for (i = 0; i < c->n; ++i) 69 isl_sched_graph_free(ctx, &c->scc[i]); 70 free(c->scc); 71 if (c->cluster) 72 for (i = 0; i < c->n; ++i) 73 isl_sched_graph_free(ctx, &c->cluster[i]); 74 free(c->cluster); 75 free(c->scc_cluster); 76 free(c->scc_node); 77 free(c->scc_in_merge); 78 } 79 80 /* Should we refrain from merging the cluster in "graph" with 81 * any other cluster? 82 * In particular, is its current schedule band empty and incomplete. 83 */ 84 static int bad_cluster(struct isl_sched_graph *graph) 85 { 86 return graph->n_row < graph->maxvar && 87 graph->n_total_row == graph->band_start; 88 } 89 90 /* Is "edge" a proximity edge with a non-empty dependence relation? 91 */ 92 static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge) 93 { 94 if (!isl_sched_edge_is_proximity(edge)) 95 return isl_bool_false; 96 return isl_bool_not(isl_map_plain_is_empty(edge->map)); 97 } 98 99 /* Return the index of an edge in "graph" that can be used to merge 100 * two clusters in "c". 101 * Return graph->n_edge if no such edge can be found. 102 * Return -1 on error. 103 * 104 * In particular, return a proximity edge between two clusters 105 * that is not marked "no_merge" and such that neither of the 106 * two clusters has an incomplete, empty band. 107 * 108 * If there are multiple such edges, then try and find the most 109 * appropriate edge to use for merging. In particular, pick the edge 110 * with the greatest weight. If there are multiple of those, 111 * then pick one with the shortest distance between 112 * the two cluster representatives. 113 */ 114 static int find_proximity(struct isl_sched_graph *graph, 115 struct isl_clustering *c) 116 { 117 int i, best = graph->n_edge, best_dist, best_weight; 118 119 for (i = 0; i < graph->n_edge; ++i) { 120 struct isl_sched_edge *edge = &graph->edge[i]; 121 int dist, weight; 122 isl_bool prox; 123 124 prox = is_non_empty_proximity(edge); 125 if (prox < 0) 126 return -1; 127 if (!prox) 128 continue; 129 if (edge->no_merge) 130 continue; 131 if (bad_cluster(&c->scc[edge->src->scc]) || 132 bad_cluster(&c->scc[edge->dst->scc])) 133 continue; 134 dist = c->scc_cluster[edge->dst->scc] - 135 c->scc_cluster[edge->src->scc]; 136 if (dist == 0) 137 continue; 138 weight = edge->weight; 139 if (best < graph->n_edge) { 140 if (best_weight > weight) 141 continue; 142 if (best_weight == weight && best_dist <= dist) 143 continue; 144 } 145 best = i; 146 best_dist = dist; 147 best_weight = weight; 148 } 149 150 return best; 151 } 152 153 /* Internal data structure used in mark_merge_sccs. 154 * 155 * "graph" is the dependence graph in which a strongly connected 156 * component is constructed. 157 * "scc_cluster" maps each SCC index to the cluster to which it belongs. 158 * "src" and "dst" are the indices of the nodes that are being merged. 159 */ 160 struct isl_mark_merge_sccs_data { 161 struct isl_sched_graph *graph; 162 int *scc_cluster; 163 int src; 164 int dst; 165 }; 166 167 /* Check whether the cluster containing node "i" depends on the cluster 168 * containing node "j". If "i" and "j" belong to the same cluster, 169 * then they are taken to depend on each other to ensure that 170 * the resulting strongly connected component consists of complete 171 * clusters. Furthermore, if "i" and "j" are the two nodes that 172 * are being merged, then they are taken to depend on each other as well. 173 * Otherwise, check if there is a (conditional) validity dependence 174 * from node[j] to node[i], forcing node[i] to follow node[j]. 175 */ 176 static isl_bool cluster_follows(int i, int j, void *user) 177 { 178 struct isl_mark_merge_sccs_data *data = user; 179 struct isl_sched_graph *graph = data->graph; 180 int *scc_cluster = data->scc_cluster; 181 182 if (data->src == i && data->dst == j) 183 return isl_bool_true; 184 if (data->src == j && data->dst == i) 185 return isl_bool_true; 186 if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc]) 187 return isl_bool_true; 188 189 return isl_sched_graph_has_validity_edge(graph, &graph->node[j], 190 &graph->node[i]); 191 } 192 193 /* Mark all SCCs that belong to either of the two clusters in "c" 194 * connected by the edge in "graph" with index "edge", or to any 195 * of the intermediate clusters. 196 * The marking is recorded in c->scc_in_merge. 197 * 198 * The given edge has been selected for merging two clusters, 199 * meaning that there is at least a proximity edge between the two nodes. 200 * However, there may also be (indirect) validity dependences 201 * between the two nodes. When merging the two clusters, all clusters 202 * containing one or more of the intermediate nodes along the 203 * indirect validity dependences need to be merged in as well. 204 * 205 * First collect all such nodes by computing the strongly connected 206 * component (SCC) containing the two nodes connected by the edge, where 207 * the two nodes are considered to depend on each other to make 208 * sure they end up in the same SCC. Similarly, each node is considered 209 * to depend on every other node in the same cluster to ensure 210 * that the SCC consists of complete clusters. 211 * 212 * Then the original SCCs that contain any of these nodes are marked 213 * in c->scc_in_merge. 214 */ 215 static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph, 216 int edge, struct isl_clustering *c) 217 { 218 struct isl_mark_merge_sccs_data data; 219 struct isl_tarjan_graph *g; 220 int i; 221 222 for (i = 0; i < c->n; ++i) 223 c->scc_in_merge[i] = 0; 224 225 data.graph = graph; 226 data.scc_cluster = c->scc_cluster; 227 data.src = graph->edge[edge].src - graph->node; 228 data.dst = graph->edge[edge].dst - graph->node; 229 230 g = isl_tarjan_graph_component(ctx, graph->n, data.dst, 231 &cluster_follows, &data); 232 if (!g) 233 goto error; 234 235 i = g->op; 236 if (i < 3) 237 isl_die(ctx, isl_error_internal, 238 "expecting at least two nodes in component", 239 goto error); 240 if (g->order[--i] != -1) 241 isl_die(ctx, isl_error_internal, 242 "expecting end of component marker", goto error); 243 244 for (--i; i >= 0 && g->order[i] != -1; --i) { 245 int scc = graph->node[g->order[i]].scc; 246 c->scc_in_merge[scc] = 1; 247 } 248 249 isl_tarjan_graph_free(g); 250 return isl_stat_ok; 251 error: 252 isl_tarjan_graph_free(g); 253 return isl_stat_error; 254 } 255 256 /* Construct the identifier "cluster_i". 257 */ 258 static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i) 259 { 260 char name[40]; 261 262 snprintf(name, sizeof(name), "cluster_%d", i); 263 return isl_id_alloc(ctx, name, NULL); 264 } 265 266 /* Construct the space of the cluster with index "i" containing 267 * the strongly connected component "scc". 268 * 269 * In particular, construct a space called cluster_i with dimension equal 270 * to the number of schedule rows in the current band of "scc". 271 */ 272 static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i) 273 { 274 int nvar; 275 isl_space *space; 276 isl_id *id; 277 278 nvar = scc->n_total_row - scc->band_start; 279 space = isl_space_copy(scc->node[0].space); 280 space = isl_space_params(space); 281 space = isl_space_set_from_params(space); 282 space = isl_space_add_dims(space, isl_dim_set, nvar); 283 id = cluster_id(isl_space_get_ctx(space), i); 284 space = isl_space_set_tuple_id(space, isl_dim_set, id); 285 286 return space; 287 } 288 289 /* Collect the domain of the graph for merging clusters. 290 * 291 * In particular, for each cluster with first SCC "i", construct 292 * a set in the space called cluster_i with dimension equal 293 * to the number of schedule rows in the current band of the cluster. 294 */ 295 static __isl_give isl_union_set *collect_domain(isl_ctx *ctx, 296 struct isl_sched_graph *graph, struct isl_clustering *c) 297 { 298 int i; 299 isl_space *space; 300 isl_union_set *domain; 301 302 space = isl_space_params_alloc(ctx, 0); 303 domain = isl_union_set_empty(space); 304 305 for (i = 0; i < graph->scc; ++i) { 306 isl_space *space; 307 308 if (!c->scc_in_merge[i]) 309 continue; 310 if (c->scc_cluster[i] != i) 311 continue; 312 space = cluster_space(&c->scc[i], i); 313 domain = isl_union_set_add_set(domain, isl_set_universe(space)); 314 } 315 316 return domain; 317 } 318 319 /* Construct a map from the original instances to the corresponding 320 * cluster instance in the current bands of the clusters in "c". 321 */ 322 static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx, 323 struct isl_sched_graph *graph, struct isl_clustering *c) 324 { 325 int i, j; 326 isl_space *space; 327 isl_union_map *cluster_map; 328 329 space = isl_space_params_alloc(ctx, 0); 330 cluster_map = isl_union_map_empty(space); 331 for (i = 0; i < graph->scc; ++i) { 332 int start, n; 333 isl_id *id; 334 335 if (!c->scc_in_merge[i]) 336 continue; 337 338 id = cluster_id(ctx, c->scc_cluster[i]); 339 start = c->scc[i].band_start; 340 n = c->scc[i].n_total_row - start; 341 for (j = 0; j < c->scc[i].n; ++j) { 342 isl_multi_aff *ma; 343 isl_map *map; 344 struct isl_sched_node *node = &c->scc[i].node[j]; 345 346 ma = isl_sched_node_extract_partial_schedule_multi_aff( 347 node, start, n); 348 ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, 349 isl_id_copy(id)); 350 map = isl_map_from_multi_aff(ma); 351 cluster_map = isl_union_map_add_map(cluster_map, map); 352 } 353 isl_id_free(id); 354 } 355 356 return cluster_map; 357 } 358 359 /* Add "umap" to the schedule constraints "sc" of all types of "edge" 360 * that are not isl_edge_condition or isl_edge_conditional_validity. 361 */ 362 static __isl_give isl_schedule_constraints *add_non_conditional_constraints( 363 struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, 364 __isl_take isl_schedule_constraints *sc) 365 { 366 enum isl_edge_type t; 367 368 if (!sc) 369 return NULL; 370 371 for (t = isl_edge_first; t <= isl_edge_last; ++t) { 372 if (t == isl_edge_condition || 373 t == isl_edge_conditional_validity) 374 continue; 375 if (!isl_sched_edge_has_type(edge, t)) 376 continue; 377 sc = isl_schedule_constraints_add(sc, t, 378 isl_union_map_copy(umap)); 379 } 380 381 return sc; 382 } 383 384 /* Add schedule constraints of types isl_edge_condition and 385 * isl_edge_conditional_validity to "sc" by applying "umap" to 386 * the domains of the wrapped relations in domain and range 387 * of the corresponding tagged constraints of "edge". 388 */ 389 static __isl_give isl_schedule_constraints *add_conditional_constraints( 390 struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, 391 __isl_take isl_schedule_constraints *sc) 392 { 393 enum isl_edge_type t; 394 isl_union_map *tagged; 395 396 for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) { 397 if (!isl_sched_edge_has_type(edge, t)) 398 continue; 399 if (t == isl_edge_condition) 400 tagged = isl_union_map_copy(edge->tagged_condition); 401 else 402 tagged = isl_union_map_copy(edge->tagged_validity); 403 tagged = isl_union_map_zip(tagged); 404 tagged = isl_union_map_apply_domain(tagged, 405 isl_union_map_copy(umap)); 406 tagged = isl_union_map_zip(tagged); 407 sc = isl_schedule_constraints_add(sc, t, tagged); 408 if (!sc) 409 return NULL; 410 } 411 412 return sc; 413 } 414 415 /* Given a mapping "cluster_map" from the original instances to 416 * the cluster instances, add schedule constraints on the clusters 417 * to "sc" corresponding to the original constraints represented by "edge". 418 * 419 * For non-tagged dependence constraints, the cluster constraints 420 * are obtained by applying "cluster_map" to the edge->map. 421 * 422 * For tagged dependence constraints, "cluster_map" needs to be applied 423 * to the domains of the wrapped relations in domain and range 424 * of the tagged dependence constraints. Pick out the mappings 425 * from these domains from "cluster_map" and construct their product. 426 * This mapping can then be applied to the pair of domains. 427 */ 428 static __isl_give isl_schedule_constraints *collect_edge_constraints( 429 struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map, 430 __isl_take isl_schedule_constraints *sc) 431 { 432 isl_union_map *umap; 433 isl_space *space; 434 isl_union_set *uset; 435 isl_union_map *umap1, *umap2; 436 437 if (!sc) 438 return NULL; 439 440 umap = isl_union_map_from_map(isl_map_copy(edge->map)); 441 umap = isl_union_map_apply_domain(umap, 442 isl_union_map_copy(cluster_map)); 443 umap = isl_union_map_apply_range(umap, 444 isl_union_map_copy(cluster_map)); 445 sc = add_non_conditional_constraints(edge, umap, sc); 446 isl_union_map_free(umap); 447 448 if (!sc || 449 (!isl_sched_edge_is_condition(edge) && 450 !isl_sched_edge_is_conditional_validity(edge))) 451 return sc; 452 453 space = isl_space_domain(isl_map_get_space(edge->map)); 454 uset = isl_union_set_from_set(isl_set_universe(space)); 455 umap1 = isl_union_map_copy(cluster_map); 456 umap1 = isl_union_map_intersect_domain(umap1, uset); 457 space = isl_space_range(isl_map_get_space(edge->map)); 458 uset = isl_union_set_from_set(isl_set_universe(space)); 459 umap2 = isl_union_map_copy(cluster_map); 460 umap2 = isl_union_map_intersect_domain(umap2, uset); 461 umap = isl_union_map_product(umap1, umap2); 462 463 sc = add_conditional_constraints(edge, umap, sc); 464 465 isl_union_map_free(umap); 466 return sc; 467 } 468 469 /* Given a mapping "cluster_map" from the original instances to 470 * the cluster instances, add schedule constraints on the clusters 471 * to "sc" corresponding to all edges in "graph" between nodes that 472 * belong to SCCs that are marked for merging in "scc_in_merge". 473 */ 474 static __isl_give isl_schedule_constraints *collect_constraints( 475 struct isl_sched_graph *graph, int *scc_in_merge, 476 __isl_keep isl_union_map *cluster_map, 477 __isl_take isl_schedule_constraints *sc) 478 { 479 int i; 480 481 for (i = 0; i < graph->n_edge; ++i) { 482 struct isl_sched_edge *edge = &graph->edge[i]; 483 484 if (!scc_in_merge[edge->src->scc]) 485 continue; 486 if (!scc_in_merge[edge->dst->scc]) 487 continue; 488 sc = collect_edge_constraints(edge, cluster_map, sc); 489 } 490 491 return sc; 492 } 493 494 /* Construct a dependence graph for scheduling clusters with respect 495 * to each other and store the result in "merge_graph". 496 * In particular, the nodes of the graph correspond to the schedule 497 * dimensions of the current bands of those clusters that have been 498 * marked for merging in "c". 499 * 500 * First construct an isl_schedule_constraints object for this domain 501 * by transforming the edges in "graph" to the domain. 502 * Then initialize a dependence graph for scheduling from these 503 * constraints. 504 */ 505 static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph, 506 struct isl_clustering *c, struct isl_sched_graph *merge_graph) 507 { 508 isl_union_set *domain; 509 isl_union_map *cluster_map; 510 isl_schedule_constraints *sc; 511 isl_stat r; 512 513 domain = collect_domain(ctx, graph, c); 514 sc = isl_schedule_constraints_on_domain(domain); 515 if (!sc) 516 return isl_stat_error; 517 cluster_map = collect_cluster_map(ctx, graph, c); 518 sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc); 519 isl_union_map_free(cluster_map); 520 521 r = isl_sched_graph_init(merge_graph, sc); 522 523 isl_schedule_constraints_free(sc); 524 525 return r; 526 } 527 528 /* Compute the maximal number of remaining schedule rows that still need 529 * to be computed for the nodes that belong to clusters with the maximal 530 * dimension for the current band (i.e., the band that is to be merged). 531 * Only clusters that are about to be merged are considered. 532 * "maxvar" is the maximal dimension for the current band. 533 * "c" contains information about the clusters. 534 * 535 * Return the maximal number of remaining schedule rows or 536 * isl_size_error on error. 537 */ 538 static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c) 539 { 540 int i, j; 541 int max_slack; 542 543 max_slack = 0; 544 for (i = 0; i < c->n; ++i) { 545 int nvar; 546 struct isl_sched_graph *scc; 547 548 if (!c->scc_in_merge[i]) 549 continue; 550 scc = &c->scc[i]; 551 nvar = scc->n_total_row - scc->band_start; 552 if (nvar != maxvar) 553 continue; 554 for (j = 0; j < scc->n; ++j) { 555 struct isl_sched_node *node = &scc->node[j]; 556 int slack; 557 558 if (isl_sched_node_update_vmap(node) < 0) 559 return isl_size_error; 560 slack = node->nvar - node->rank; 561 if (slack > max_slack) 562 max_slack = slack; 563 } 564 } 565 566 return max_slack; 567 } 568 569 /* If there are any clusters where the dimension of the current band 570 * (i.e., the band that is to be merged) is smaller than "maxvar" and 571 * if there are any nodes in such a cluster where the number 572 * of remaining schedule rows that still need to be computed 573 * is greater than "max_slack", then return the smallest current band 574 * dimension of all these clusters. Otherwise return the original value 575 * of "maxvar". Return isl_size_error in case of any error. 576 * Only clusters that are about to be merged are considered. 577 * "c" contains information about the clusters. 578 */ 579 static isl_size limit_maxvar_to_slack(int maxvar, int max_slack, 580 struct isl_clustering *c) 581 { 582 int i, j; 583 584 for (i = 0; i < c->n; ++i) { 585 int nvar; 586 struct isl_sched_graph *scc; 587 588 if (!c->scc_in_merge[i]) 589 continue; 590 scc = &c->scc[i]; 591 nvar = scc->n_total_row - scc->band_start; 592 if (nvar >= maxvar) 593 continue; 594 for (j = 0; j < scc->n; ++j) { 595 struct isl_sched_node *node = &scc->node[j]; 596 int slack; 597 598 if (isl_sched_node_update_vmap(node) < 0) 599 return isl_size_error; 600 slack = node->nvar - node->rank; 601 if (slack > max_slack) { 602 maxvar = nvar; 603 break; 604 } 605 } 606 } 607 608 return maxvar; 609 } 610 611 /* Adjust merge_graph->maxvar based on the number of remaining schedule rows 612 * that still need to be computed. In particular, if there is a node 613 * in a cluster where the dimension of the current band is smaller 614 * than merge_graph->maxvar, but the number of remaining schedule rows 615 * is greater than that of any node in a cluster with the maximal 616 * dimension for the current band (i.e., merge_graph->maxvar), 617 * then adjust merge_graph->maxvar to the (smallest) current band dimension 618 * of those clusters. Without this adjustment, the total number of 619 * schedule dimensions would be increased, resulting in a skewed view 620 * of the number of coincident dimensions. 621 * "c" contains information about the clusters. 622 * 623 * If the maximize_band_depth option is set and merge_graph->maxvar is reduced, 624 * then there is no point in attempting any merge since it will be rejected 625 * anyway. Set merge_graph->maxvar to zero in such cases. 626 */ 627 static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx, 628 struct isl_sched_graph *merge_graph, struct isl_clustering *c) 629 { 630 isl_size max_slack, maxvar; 631 632 max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c); 633 if (max_slack < 0) 634 return isl_stat_error; 635 maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c); 636 if (maxvar < 0) 637 return isl_stat_error; 638 639 if (maxvar < merge_graph->maxvar) { 640 if (isl_options_get_schedule_maximize_band_depth(ctx)) 641 merge_graph->maxvar = 0; 642 else 643 merge_graph->maxvar = maxvar; 644 } 645 646 return isl_stat_ok; 647 } 648 649 /* Return the number of coincident dimensions in the current band of "graph", 650 * where the nodes of "graph" are assumed to be scheduled by a single band. 651 */ 652 static int get_n_coincident(struct isl_sched_graph *graph) 653 { 654 int i; 655 656 for (i = graph->band_start; i < graph->n_total_row; ++i) 657 if (!graph->node[0].coincident[i]) 658 break; 659 660 return i - graph->band_start; 661 } 662 663 /* Should the clusters be merged based on the cluster schedule 664 * in the current (and only) band of "merge_graph", given that 665 * coincidence should be maximized? 666 * 667 * If the number of coincident schedule dimensions in the merged band 668 * would be less than the maximal number of coincident schedule dimensions 669 * in any of the merged clusters, then the clusters should not be merged. 670 */ 671 static isl_bool ok_to_merge_coincident(struct isl_clustering *c, 672 struct isl_sched_graph *merge_graph) 673 { 674 int i; 675 int n_coincident; 676 int max_coincident; 677 678 max_coincident = 0; 679 for (i = 0; i < c->n; ++i) { 680 if (!c->scc_in_merge[i]) 681 continue; 682 n_coincident = get_n_coincident(&c->scc[i]); 683 if (n_coincident > max_coincident) 684 max_coincident = n_coincident; 685 } 686 687 n_coincident = get_n_coincident(merge_graph); 688 689 return isl_bool_ok(n_coincident >= max_coincident); 690 } 691 692 /* Return the transformation on "node" expressed by the current (and only) 693 * band of "merge_graph" applied to the clusters in "c". 694 * 695 * First find the representation of "node" in its SCC in "c" and 696 * extract the transformation expressed by the current band. 697 * Then extract the transformation applied by "merge_graph" 698 * to the cluster to which this SCC belongs. 699 * Combine the two to obtain the complete transformation on the node. 700 * 701 * Note that the range of the first transformation is an anonymous space, 702 * while the domain of the second is named "cluster_X". The range 703 * of the former therefore needs to be adjusted before the two 704 * can be combined. 705 */ 706 static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx, 707 struct isl_sched_node *node, struct isl_clustering *c, 708 struct isl_sched_graph *merge_graph) 709 { 710 struct isl_sched_node *scc_node, *cluster_node; 711 int start, n; 712 isl_id *id; 713 isl_space *space; 714 isl_multi_aff *ma, *ma2; 715 716 scc_node = isl_sched_graph_find_node(ctx, &c->scc[node->scc], 717 node->space); 718 if (scc_node && !isl_sched_graph_is_node(&c->scc[node->scc], scc_node)) 719 isl_die(ctx, isl_error_internal, "unable to find node", 720 return NULL); 721 start = c->scc[node->scc].band_start; 722 n = c->scc[node->scc].n_total_row - start; 723 ma = isl_sched_node_extract_partial_schedule_multi_aff(scc_node, 724 start, n); 725 space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]); 726 cluster_node = isl_sched_graph_find_node(ctx, merge_graph, space); 727 if (cluster_node && !isl_sched_graph_is_node(merge_graph, cluster_node)) 728 isl_die(ctx, isl_error_internal, "unable to find cluster", 729 space = isl_space_free(space)); 730 id = isl_space_get_tuple_id(space, isl_dim_set); 731 ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id); 732 isl_space_free(space); 733 n = merge_graph->n_total_row; 734 ma2 = isl_sched_node_extract_partial_schedule_multi_aff(cluster_node, 735 0, n); 736 ma = isl_multi_aff_pullback_multi_aff(ma2, ma); 737 738 return isl_map_from_multi_aff(ma); 739 } 740 741 /* Give a set of distances "set", are they bounded by a small constant 742 * in direction "pos"? 743 * In practice, check if they are bounded by 2 by checking that there 744 * are no elements with a value greater than or equal to 3 or 745 * smaller than or equal to -3. 746 */ 747 static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos) 748 { 749 isl_bool bounded; 750 isl_set *test; 751 752 if (!set) 753 return isl_bool_error; 754 755 test = isl_set_copy(set); 756 test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3); 757 bounded = isl_set_is_empty(test); 758 isl_set_free(test); 759 760 if (bounded < 0 || !bounded) 761 return bounded; 762 763 test = isl_set_copy(set); 764 test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3); 765 bounded = isl_set_is_empty(test); 766 isl_set_free(test); 767 768 return bounded; 769 } 770 771 /* Does the set "set" have a fixed (but possible parametric) value 772 * at dimension "pos"? 773 */ 774 static isl_bool has_single_value(__isl_keep isl_set *set, int pos) 775 { 776 isl_size n; 777 isl_bool single; 778 779 n = isl_set_dim(set, isl_dim_set); 780 if (n < 0) 781 return isl_bool_error; 782 set = isl_set_copy(set); 783 set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1)); 784 set = isl_set_project_out(set, isl_dim_set, 0, pos); 785 single = isl_set_is_singleton(set); 786 isl_set_free(set); 787 788 return single; 789 } 790 791 /* Does "map" have a fixed (but possible parametric) value 792 * at dimension "pos" of either its domain or its range? 793 */ 794 static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos) 795 { 796 isl_set *set; 797 isl_bool single; 798 799 set = isl_map_domain(isl_map_copy(map)); 800 single = has_single_value(set, pos); 801 isl_set_free(set); 802 803 if (single < 0 || single) 804 return single; 805 806 set = isl_map_range(isl_map_copy(map)); 807 single = has_single_value(set, pos); 808 isl_set_free(set); 809 810 return single; 811 } 812 813 /* Does the edge "edge" from "graph" have bounded dependence distances 814 * in the merged graph "merge_graph" of a selection of clusters in "c"? 815 * 816 * Extract the complete transformations of the source and destination 817 * nodes of the edge, apply them to the edge constraints and 818 * compute the differences. Finally, check if these differences are bounded 819 * in each direction. 820 * 821 * If the dimension of the band is greater than the number of 822 * dimensions that can be expected to be optimized by the edge 823 * (based on its weight), then also allow the differences to be unbounded 824 * in the remaining dimensions, but only if either the source or 825 * the destination has a fixed value in that direction. 826 * This allows a statement that produces values that are used by 827 * several instances of another statement to be merged with that 828 * other statement. 829 * However, merging such clusters will introduce an inherently 830 * large proximity distance inside the merged cluster, meaning 831 * that proximity distances will no longer be optimized in 832 * subsequent merges. These merges are therefore only allowed 833 * after all other possible merges have been tried. 834 * The first time such a merge is encountered, the weight of the edge 835 * is replaced by a negative weight. The second time (i.e., after 836 * all merges over edges with a non-negative weight have been tried), 837 * the merge is allowed. 838 */ 839 static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge, 840 struct isl_sched_graph *graph, struct isl_clustering *c, 841 struct isl_sched_graph *merge_graph) 842 { 843 int i, n_slack; 844 isl_size n; 845 isl_bool bounded; 846 isl_map *map, *t; 847 isl_set *dist; 848 849 map = isl_map_copy(edge->map); 850 t = extract_node_transformation(ctx, edge->src, c, merge_graph); 851 map = isl_map_apply_domain(map, t); 852 t = extract_node_transformation(ctx, edge->dst, c, merge_graph); 853 map = isl_map_apply_range(map, t); 854 dist = isl_map_deltas(isl_map_copy(map)); 855 856 bounded = isl_bool_true; 857 n = isl_set_dim(dist, isl_dim_set); 858 if (n < 0) 859 goto error; 860 n_slack = n - edge->weight; 861 if (edge->weight < 0) 862 n_slack -= graph->max_weight + 1; 863 for (i = 0; i < n; ++i) { 864 isl_bool bounded_i, singular_i; 865 866 bounded_i = distance_is_bounded(dist, i); 867 if (bounded_i < 0) 868 goto error; 869 if (bounded_i) 870 continue; 871 if (edge->weight >= 0) 872 bounded = isl_bool_false; 873 n_slack--; 874 if (n_slack < 0) 875 break; 876 singular_i = has_singular_src_or_dst(map, i); 877 if (singular_i < 0) 878 goto error; 879 if (singular_i) 880 continue; 881 bounded = isl_bool_false; 882 break; 883 } 884 if (!bounded && i >= n && edge->weight >= 0) 885 edge->weight -= graph->max_weight + 1; 886 isl_map_free(map); 887 isl_set_free(dist); 888 889 return bounded; 890 error: 891 isl_map_free(map); 892 isl_set_free(dist); 893 return isl_bool_error; 894 } 895 896 /* Should the clusters be merged based on the cluster schedule 897 * in the current (and only) band of "merge_graph"? 898 * "graph" is the original dependence graph, while "c" records 899 * which SCCs are involved in the latest merge. 900 * 901 * In particular, is there at least one proximity constraint 902 * that is optimized by the merge? 903 * 904 * A proximity constraint is considered to be optimized 905 * if the dependence distances are small. 906 */ 907 static isl_bool ok_to_merge_proximity(isl_ctx *ctx, 908 struct isl_sched_graph *graph, struct isl_clustering *c, 909 struct isl_sched_graph *merge_graph) 910 { 911 int i; 912 913 for (i = 0; i < graph->n_edge; ++i) { 914 struct isl_sched_edge *edge = &graph->edge[i]; 915 isl_bool bounded; 916 917 if (!isl_sched_edge_is_proximity(edge)) 918 continue; 919 if (!c->scc_in_merge[edge->src->scc]) 920 continue; 921 if (!c->scc_in_merge[edge->dst->scc]) 922 continue; 923 if (c->scc_cluster[edge->dst->scc] == 924 c->scc_cluster[edge->src->scc]) 925 continue; 926 bounded = has_bounded_distances(ctx, edge, graph, c, 927 merge_graph); 928 if (bounded < 0 || bounded) 929 return bounded; 930 } 931 932 return isl_bool_false; 933 } 934 935 /* Should the clusters be merged based on the cluster schedule 936 * in the current (and only) band of "merge_graph"? 937 * "graph" is the original dependence graph, while "c" records 938 * which SCCs are involved in the latest merge. 939 * 940 * If the current band is empty, then the clusters should not be merged. 941 * 942 * If the band depth should be maximized and the merge schedule 943 * is incomplete (meaning that the dimension of some of the schedule 944 * bands in the original schedule will be reduced), then the clusters 945 * should not be merged. 946 * 947 * If the schedule_maximize_coincidence option is set, then check that 948 * the number of coincident schedule dimensions is not reduced. 949 * 950 * Finally, only allow the merge if at least one proximity 951 * constraint is optimized. 952 */ 953 static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph, 954 struct isl_clustering *c, struct isl_sched_graph *merge_graph) 955 { 956 if (merge_graph->n_total_row == merge_graph->band_start) 957 return isl_bool_false; 958 959 if (isl_options_get_schedule_maximize_band_depth(ctx) && 960 merge_graph->n_total_row < merge_graph->maxvar) 961 return isl_bool_false; 962 963 if (isl_options_get_schedule_maximize_coincidence(ctx)) { 964 isl_bool ok; 965 966 ok = ok_to_merge_coincident(c, merge_graph); 967 if (ok < 0 || !ok) 968 return ok; 969 } 970 971 return ok_to_merge_proximity(ctx, graph, c, merge_graph); 972 } 973 974 /* Apply the schedule in "t_node" to the "n" rows starting at "first" 975 * of the schedule in "node" and return the result. 976 * 977 * That is, essentially compute 978 * 979 * T * N(first:first+n-1) 980 * 981 * taking into account the constant term and the parameter coefficients 982 * in "t_node". 983 */ 984 static __isl_give isl_mat *node_transformation(isl_ctx *ctx, 985 struct isl_sched_node *t_node, struct isl_sched_node *node, 986 int first, int n) 987 { 988 int i, j; 989 isl_mat *t; 990 isl_size n_row, n_col; 991 int n_param, n_var; 992 993 n_param = node->nparam; 994 n_var = node->nvar; 995 n_row = isl_mat_rows(t_node->sched); 996 n_col = isl_mat_cols(node->sched); 997 if (n_row < 0 || n_col < 0) 998 return NULL; 999 t = isl_mat_alloc(ctx, n_row, n_col); 1000 if (!t) 1001 return NULL; 1002 for (i = 0; i < n_row; ++i) { 1003 isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param); 1004 isl_seq_clr(t->row[i] + 1 + n_param, n_var); 1005 for (j = 0; j < n; ++j) 1006 isl_seq_addmul(t->row[i], 1007 t_node->sched->row[i][1 + n_param + j], 1008 node->sched->row[first + j], 1009 1 + n_param + n_var); 1010 } 1011 return t; 1012 } 1013 1014 /* Apply the cluster schedule in "t_node" to the current band 1015 * schedule of the nodes in "graph". 1016 * 1017 * In particular, replace the rows starting at band_start 1018 * by the result of applying the cluster schedule in "t_node" 1019 * to the original rows. 1020 * 1021 * The coincidence of the schedule is determined by the coincidence 1022 * of the cluster schedule. 1023 */ 1024 static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph, 1025 struct isl_sched_node *t_node) 1026 { 1027 int i, j; 1028 isl_size n_new; 1029 int start, n; 1030 1031 start = graph->band_start; 1032 n = graph->n_total_row - start; 1033 1034 n_new = isl_mat_rows(t_node->sched); 1035 if (n_new < 0) 1036 return isl_stat_error; 1037 for (i = 0; i < graph->n; ++i) { 1038 struct isl_sched_node *node = &graph->node[i]; 1039 isl_mat *t; 1040 1041 t = node_transformation(ctx, t_node, node, start, n); 1042 node->sched = isl_mat_drop_rows(node->sched, start, n); 1043 node->sched = isl_mat_concat(node->sched, t); 1044 node->sched_map = isl_map_free(node->sched_map); 1045 if (!node->sched) 1046 return isl_stat_error; 1047 for (j = 0; j < n_new; ++j) 1048 node->coincident[start + j] = t_node->coincident[j]; 1049 } 1050 graph->n_total_row -= n; 1051 graph->n_row -= n; 1052 graph->n_total_row += n_new; 1053 graph->n_row += n_new; 1054 1055 return isl_stat_ok; 1056 } 1057 1058 /* Merge the clusters marked for merging in "c" into a single 1059 * cluster using the cluster schedule in the current band of "merge_graph". 1060 * The representative SCC for the new cluster is the SCC with 1061 * the smallest index. 1062 * 1063 * The current band schedule of each SCC in the new cluster is obtained 1064 * by applying the schedule of the corresponding original cluster 1065 * to the original band schedule. 1066 * All SCCs in the new cluster have the same number of schedule rows. 1067 */ 1068 static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c, 1069 struct isl_sched_graph *merge_graph) 1070 { 1071 int i; 1072 int cluster = -1; 1073 isl_space *space; 1074 1075 for (i = 0; i < c->n; ++i) { 1076 struct isl_sched_node *node; 1077 1078 if (!c->scc_in_merge[i]) 1079 continue; 1080 if (cluster < 0) 1081 cluster = i; 1082 space = cluster_space(&c->scc[i], c->scc_cluster[i]); 1083 node = isl_sched_graph_find_node(ctx, merge_graph, space); 1084 isl_space_free(space); 1085 if (!node) 1086 return isl_stat_error; 1087 if (!isl_sched_graph_is_node(merge_graph, node)) 1088 isl_die(ctx, isl_error_internal, 1089 "unable to find cluster", 1090 return isl_stat_error); 1091 if (transform(ctx, &c->scc[i], node) < 0) 1092 return isl_stat_error; 1093 c->scc_cluster[i] = cluster; 1094 } 1095 1096 return isl_stat_ok; 1097 } 1098 1099 /* Try and merge the clusters of SCCs marked in c->scc_in_merge 1100 * by scheduling the current cluster bands with respect to each other. 1101 * 1102 * Construct a dependence graph with a space for each cluster and 1103 * with the coordinates of each space corresponding to the schedule 1104 * dimensions of the current band of that cluster. 1105 * Construct a cluster schedule in this cluster dependence graph and 1106 * apply it to the current cluster bands if it is applicable 1107 * according to ok_to_merge. 1108 * 1109 * If the number of remaining schedule dimensions in a cluster 1110 * with a non-maximal current schedule dimension is greater than 1111 * the number of remaining schedule dimensions in clusters 1112 * with a maximal current schedule dimension, then restrict 1113 * the number of rows to be computed in the cluster schedule 1114 * to the minimal such non-maximal current schedule dimension. 1115 * Do this by adjusting merge_graph.maxvar. 1116 * 1117 * Return isl_bool_true if the clusters have effectively been merged 1118 * into a single cluster. 1119 * 1120 * Note that since the standard scheduling algorithm minimizes the maximal 1121 * distance over proximity constraints, the proximity constraints between 1122 * the merged clusters may not be optimized any further than what is 1123 * sufficient to bring the distances within the limits of the internal 1124 * proximity constraints inside the individual clusters. 1125 * It may therefore make sense to perform an additional translation step 1126 * to bring the clusters closer to each other, while maintaining 1127 * the linear part of the merging schedule found using the standard 1128 * scheduling algorithm. 1129 */ 1130 static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph, 1131 struct isl_clustering *c) 1132 { 1133 struct isl_sched_graph merge_graph = { 0 }; 1134 isl_bool merged; 1135 1136 if (init_merge_graph(ctx, graph, c, &merge_graph) < 0) 1137 goto error; 1138 1139 if (isl_sched_graph_compute_maxvar(&merge_graph) < 0) 1140 goto error; 1141 if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0) 1142 goto error; 1143 if (isl_schedule_node_compute_wcc_band(ctx, &merge_graph) < 0) 1144 goto error; 1145 merged = ok_to_merge(ctx, graph, c, &merge_graph); 1146 if (merged && merge(ctx, c, &merge_graph) < 0) 1147 goto error; 1148 1149 isl_sched_graph_free(ctx, &merge_graph); 1150 return merged; 1151 error: 1152 isl_sched_graph_free(ctx, &merge_graph); 1153 return isl_bool_error; 1154 } 1155 1156 /* Is there any edge marked "no_merge" between two SCCs that are 1157 * about to be merged (i.e., that are set in "scc_in_merge")? 1158 * "merge_edge" is the proximity edge along which the clusters of SCCs 1159 * are going to be merged. 1160 * 1161 * If there is any edge between two SCCs with a negative weight, 1162 * while the weight of "merge_edge" is non-negative, then this 1163 * means that the edge was postponed. "merge_edge" should then 1164 * also be postponed since merging along the edge with negative weight should 1165 * be postponed until all edges with non-negative weight have been tried. 1166 * Replace the weight of "merge_edge" by a negative weight as well and 1167 * tell the caller not to attempt a merge. 1168 */ 1169 static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge, 1170 struct isl_sched_edge *merge_edge) 1171 { 1172 int i; 1173 1174 for (i = 0; i < graph->n_edge; ++i) { 1175 struct isl_sched_edge *edge = &graph->edge[i]; 1176 1177 if (!scc_in_merge[edge->src->scc]) 1178 continue; 1179 if (!scc_in_merge[edge->dst->scc]) 1180 continue; 1181 if (edge->no_merge) 1182 return 1; 1183 if (merge_edge->weight >= 0 && edge->weight < 0) { 1184 merge_edge->weight -= graph->max_weight + 1; 1185 return 1; 1186 } 1187 } 1188 1189 return 0; 1190 } 1191 1192 /* Merge the two clusters in "c" connected by the edge in "graph" 1193 * with index "edge" into a single cluster. 1194 * If it turns out to be impossible to merge these two clusters, 1195 * then mark the edge as "no_merge" such that it will not be 1196 * considered again. 1197 * 1198 * First mark all SCCs that need to be merged. This includes the SCCs 1199 * in the two clusters, but it may also include the SCCs 1200 * of intermediate clusters. 1201 * If there is already a no_merge edge between any pair of such SCCs, 1202 * then simply mark the current edge as no_merge as well. 1203 * Likewise, if any of those edges was postponed by has_bounded_distances, 1204 * then postpone the current edge as well. 1205 * Otherwise, try and merge the clusters and mark "edge" as "no_merge" 1206 * if the clusters did not end up getting merged, unless the non-merge 1207 * is due to the fact that the edge was postponed. This postponement 1208 * can be recognized by a change in weight (from non-negative to negative). 1209 */ 1210 static isl_stat merge_clusters_along_edge(isl_ctx *ctx, 1211 struct isl_sched_graph *graph, int edge, struct isl_clustering *c) 1212 { 1213 isl_bool merged; 1214 int edge_weight = graph->edge[edge].weight; 1215 1216 if (mark_merge_sccs(ctx, graph, edge, c) < 0) 1217 return isl_stat_error; 1218 1219 if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge])) 1220 merged = isl_bool_false; 1221 else 1222 merged = try_merge(ctx, graph, c); 1223 if (merged < 0) 1224 return isl_stat_error; 1225 if (!merged && edge_weight == graph->edge[edge].weight) 1226 graph->edge[edge].no_merge = 1; 1227 1228 return isl_stat_ok; 1229 } 1230 1231 /* Does "node" belong to the cluster identified by "cluster"? 1232 */ 1233 static int node_cluster_exactly(struct isl_sched_node *node, int cluster) 1234 { 1235 return node->cluster == cluster; 1236 } 1237 1238 /* Does "edge" connect two nodes belonging to the cluster 1239 * identified by "cluster"? 1240 */ 1241 static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster) 1242 { 1243 return edge->src->cluster == cluster && edge->dst->cluster == cluster; 1244 } 1245 1246 /* Swap the schedule of "node1" and "node2". 1247 * Both nodes have been derived from the same node in a common parent graph. 1248 * Since the "coincident" field is shared with that node 1249 * in the parent graph, there is no need to also swap this field. 1250 */ 1251 static void swap_sched(struct isl_sched_node *node1, 1252 struct isl_sched_node *node2) 1253 { 1254 isl_mat *sched; 1255 isl_map *sched_map; 1256 1257 sched = node1->sched; 1258 node1->sched = node2->sched; 1259 node2->sched = sched; 1260 1261 sched_map = node1->sched_map; 1262 node1->sched_map = node2->sched_map; 1263 node2->sched_map = sched_map; 1264 } 1265 1266 /* Copy the current band schedule from the SCCs that form the cluster 1267 * with index "pos" to the actual cluster at position "pos". 1268 * By construction, the index of the first SCC that belongs to the cluster 1269 * is also "pos". 1270 * 1271 * The order of the nodes inside both the SCCs and the cluster 1272 * is assumed to be same as the order in the original "graph". 1273 * 1274 * Since the SCC graphs will no longer be used after this function, 1275 * the schedules are actually swapped rather than copied. 1276 */ 1277 static isl_stat copy_partial(struct isl_sched_graph *graph, 1278 struct isl_clustering *c, int pos) 1279 { 1280 int i, j; 1281 1282 c->cluster[pos].n_total_row = c->scc[pos].n_total_row; 1283 c->cluster[pos].n_row = c->scc[pos].n_row; 1284 c->cluster[pos].maxvar = c->scc[pos].maxvar; 1285 j = 0; 1286 for (i = 0; i < graph->n; ++i) { 1287 int k; 1288 int s; 1289 1290 if (graph->node[i].cluster != pos) 1291 continue; 1292 s = graph->node[i].scc; 1293 k = c->scc_node[s]++; 1294 swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]); 1295 if (c->scc[s].maxvar > c->cluster[pos].maxvar) 1296 c->cluster[pos].maxvar = c->scc[s].maxvar; 1297 ++j; 1298 } 1299 1300 return isl_stat_ok; 1301 } 1302 1303 /* Is there a (conditional) validity dependence from node[j] to node[i], 1304 * forcing node[i] to follow node[j] or do the nodes belong to the same 1305 * cluster? 1306 */ 1307 static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user) 1308 { 1309 struct isl_sched_graph *graph = user; 1310 1311 if (graph->node[i].cluster == graph->node[j].cluster) 1312 return isl_bool_true; 1313 return isl_sched_graph_has_validity_edge(graph, &graph->node[j], 1314 &graph->node[i]); 1315 } 1316 1317 /* Extract the merged clusters of SCCs in "graph", sort them, and 1318 * store them in c->clusters. Update c->scc_cluster accordingly. 1319 * 1320 * First keep track of the cluster containing the SCC to which a node 1321 * belongs in the node itself. 1322 * Then extract the clusters into c->clusters, copying the current 1323 * band schedule from the SCCs that belong to the cluster. 1324 * Do this only once per cluster. 1325 * 1326 * Finally, topologically sort the clusters and update c->scc_cluster 1327 * to match the new scc numbering. While the SCCs were originally 1328 * sorted already, some SCCs that depend on some other SCCs may 1329 * have been merged with SCCs that appear before these other SCCs. 1330 * A reordering may therefore be required. 1331 */ 1332 static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph, 1333 struct isl_clustering *c) 1334 { 1335 int i; 1336 1337 for (i = 0; i < graph->n; ++i) 1338 graph->node[i].cluster = c->scc_cluster[graph->node[i].scc]; 1339 1340 for (i = 0; i < graph->scc; ++i) { 1341 if (c->scc_cluster[i] != i) 1342 continue; 1343 if (isl_sched_graph_extract_sub_graph(ctx, graph, 1344 &node_cluster_exactly, 1345 &edge_cluster_exactly, i, &c->cluster[i]) < 0) 1346 return isl_stat_error; 1347 c->cluster[i].src_scc = -1; 1348 c->cluster[i].dst_scc = -1; 1349 if (copy_partial(graph, c, i) < 0) 1350 return isl_stat_error; 1351 } 1352 1353 if (isl_sched_graph_detect_ccs(ctx, graph, 1354 &node_follows_strong_or_same_cluster) < 0) 1355 return isl_stat_error; 1356 for (i = 0; i < graph->n; ++i) 1357 c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster; 1358 1359 return isl_stat_ok; 1360 } 1361 1362 /* Compute weights on the proximity edges of "graph" that can 1363 * be used by find_proximity to find the most appropriate 1364 * proximity edge to use to merge two clusters in "c". 1365 * The weights are also used by has_bounded_distances to determine 1366 * whether the merge should be allowed. 1367 * Store the maximum of the computed weights in graph->max_weight. 1368 * 1369 * The computed weight is a measure for the number of remaining schedule 1370 * dimensions that can still be completely aligned. 1371 * In particular, compute the number of equalities between 1372 * input dimensions and output dimensions in the proximity constraints. 1373 * The directions that are already handled by outer schedule bands 1374 * are projected out prior to determining this number. 1375 * 1376 * Edges that will never be considered by find_proximity are ignored. 1377 */ 1378 static isl_stat compute_weights(struct isl_sched_graph *graph, 1379 struct isl_clustering *c) 1380 { 1381 int i; 1382 1383 graph->max_weight = 0; 1384 1385 for (i = 0; i < graph->n_edge; ++i) { 1386 struct isl_sched_edge *edge = &graph->edge[i]; 1387 struct isl_sched_node *src = edge->src; 1388 struct isl_sched_node *dst = edge->dst; 1389 isl_basic_map *hull; 1390 isl_bool prox; 1391 isl_size n_in, n_out, n; 1392 1393 prox = is_non_empty_proximity(edge); 1394 if (prox < 0) 1395 return isl_stat_error; 1396 if (!prox) 1397 continue; 1398 if (bad_cluster(&c->scc[edge->src->scc]) || 1399 bad_cluster(&c->scc[edge->dst->scc])) 1400 continue; 1401 if (c->scc_cluster[edge->dst->scc] == 1402 c->scc_cluster[edge->src->scc]) 1403 continue; 1404 1405 hull = isl_map_affine_hull(isl_map_copy(edge->map)); 1406 hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0, 1407 isl_mat_copy(src->vmap)); 1408 hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0, 1409 isl_mat_copy(dst->vmap)); 1410 hull = isl_basic_map_project_out(hull, 1411 isl_dim_in, 0, src->rank); 1412 hull = isl_basic_map_project_out(hull, 1413 isl_dim_out, 0, dst->rank); 1414 hull = isl_basic_map_remove_divs(hull); 1415 n_in = isl_basic_map_dim(hull, isl_dim_in); 1416 n_out = isl_basic_map_dim(hull, isl_dim_out); 1417 if (n_in < 0 || n_out < 0) 1418 hull = isl_basic_map_free(hull); 1419 hull = isl_basic_map_drop_constraints_not_involving_dims(hull, 1420 isl_dim_in, 0, n_in); 1421 hull = isl_basic_map_drop_constraints_not_involving_dims(hull, 1422 isl_dim_out, 0, n_out); 1423 n = isl_basic_map_n_equality(hull); 1424 isl_basic_map_free(hull); 1425 if (n < 0) 1426 return isl_stat_error; 1427 edge->weight = n; 1428 1429 if (edge->weight > graph->max_weight) 1430 graph->max_weight = edge->weight; 1431 } 1432 1433 return isl_stat_ok; 1434 } 1435 1436 /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and 1437 * update "node" to arrange for them to be executed in an order 1438 * possibly involving set nodes that generalizes the topological order 1439 * determined by the scc fields of the nodes in "graph". 1440 * 1441 * Note that at this stage, there are graph->scc clusters and 1442 * their positions in c->cluster are determined by the values 1443 * of c->scc_cluster. 1444 * 1445 * Construct an isl_scc_graph and perform the decomposition 1446 * using this graph. 1447 */ 1448 static __isl_give isl_schedule_node *finish_bands_decompose( 1449 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 1450 struct isl_clustering *c) 1451 { 1452 isl_ctx *ctx; 1453 struct isl_scc_graph *scc_graph; 1454 1455 ctx = isl_schedule_node_get_ctx(node); 1456 1457 scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c); 1458 node = isl_scc_graph_decompose(scc_graph, node); 1459 isl_scc_graph_free(scc_graph); 1460 1461 return node; 1462 } 1463 1464 /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" 1465 * in their topological order. This order is determined by the scc 1466 * fields of the nodes in "graph". 1467 * Combine the results in a sequence expressing the topological order. 1468 * 1469 * If there is only one cluster left, then there is no need to introduce 1470 * a sequence node. Also, in this case, the cluster necessarily contains 1471 * the SCC at position 0 in the original graph and is therefore also 1472 * stored in the first cluster of "c". 1473 * 1474 * If there are more than two clusters left, then some subsets of the clusters 1475 * may still be independent of each other. These could then still 1476 * be reordered with respect to each other. Call finish_bands_decompose 1477 * to try and construct an ordering involving set and sequence nodes 1478 * that generalizes the topological order. 1479 * Note that at the outermost level there can be no independent components 1480 * because isl_schedule_node_compute_wcc_clustering is called 1481 * on a (weakly) connected component. 1482 */ 1483 static __isl_give isl_schedule_node *finish_bands_clustering( 1484 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, 1485 struct isl_clustering *c) 1486 { 1487 int i; 1488 isl_ctx *ctx; 1489 isl_union_set_list *filters; 1490 1491 if (graph->scc == 1) 1492 return isl_schedule_node_compute_finish_band(node, 1493 &c->cluster[0], 0); 1494 if (graph->scc > 2) 1495 return finish_bands_decompose(node, graph, c); 1496 1497 ctx = isl_schedule_node_get_ctx(node); 1498 1499 filters = isl_sched_graph_extract_sccs(ctx, graph); 1500 node = isl_schedule_node_insert_sequence(node, filters); 1501 1502 for (i = 0; i < graph->scc; ++i) { 1503 int j = c->scc_cluster[i]; 1504 node = isl_schedule_node_grandchild(node, i, 0); 1505 node = isl_schedule_node_compute_finish_band(node, 1506 &c->cluster[j], 0); 1507 node = isl_schedule_node_grandparent(node); 1508 } 1509 1510 return node; 1511 } 1512 1513 /* Compute a schedule for a connected dependence graph by first considering 1514 * each strongly connected component (SCC) in the graph separately and then 1515 * incrementally combining them into clusters. 1516 * Return the updated schedule node. 1517 * 1518 * Initially, each cluster consists of a single SCC, each with its 1519 * own band schedule. The algorithm then tries to merge pairs 1520 * of clusters along a proximity edge until no more suitable 1521 * proximity edges can be found. During this merging, the schedule 1522 * is maintained in the individual SCCs. 1523 * After the merging is completed, the full resulting clusters 1524 * are extracted and in finish_bands_clustering, 1525 * isl_schedule_node_compute_finish_band is called on each of them to integrate 1526 * the band into "node" and to continue the computation. 1527 * 1528 * compute_weights initializes the weights that are used by find_proximity. 1529 */ 1530 __isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering( 1531 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) 1532 { 1533 isl_ctx *ctx; 1534 struct isl_clustering c; 1535 int i; 1536 1537 ctx = isl_schedule_node_get_ctx(node); 1538 1539 if (clustering_init(ctx, &c, graph) < 0) 1540 goto error; 1541 1542 if (compute_weights(graph, &c) < 0) 1543 goto error; 1544 1545 for (;;) { 1546 i = find_proximity(graph, &c); 1547 if (i < 0) 1548 goto error; 1549 if (i >= graph->n_edge) 1550 break; 1551 if (merge_clusters_along_edge(ctx, graph, i, &c) < 0) 1552 goto error; 1553 } 1554 1555 if (extract_clusters(ctx, graph, &c) < 0) 1556 goto error; 1557 1558 node = finish_bands_clustering(node, graph, &c); 1559 1560 clustering_free(ctx, &c); 1561 return node; 1562 error: 1563 clustering_free(ctx, &c); 1564 return isl_schedule_node_free(node); 1565 } 1566