xref: /llvm-project/polly/lib/External/isl/isl_scheduler_clustering.c (revision a749e09e184b2b0b6dde71af01c82dd427b3e3e2)
1 /*
2  * Copyright 2015      Sven Verdoolaege
3  *
4  * Use of this software is governed by the MIT license
5  *
6  * Written by Sven Verdoolaege
7  */
8 
9 #include "isl_map_private.h"
10 
11 #include <isl/id.h>
12 #include <isl/schedule_node.h>
13 #include <isl/union_set.h>
14 
15 #include "isl_mat_private.h"
16 #include "isl_scheduler_clustering.h"
17 #include "isl_scheduler_scc.h"
18 #include "isl_seq.h"
19 #include "isl_tarjan.h"
20 
21 /* Initialize the clustering data structure "c" from "graph".
22  *
23  * In particular, allocate memory, extract the SCCs from "graph"
24  * into c->scc, initialize scc_cluster and construct
25  * a band of schedule rows for each SCC.
26  * Within each SCC, there is only one SCC by definition.
27  * Each SCC initially belongs to a cluster containing only that SCC.
28  */
clustering_init(isl_ctx * ctx,struct isl_clustering * c,struct isl_sched_graph * graph)29 static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
30 	struct isl_sched_graph *graph)
31 {
32 	int i;
33 
34 	c->n = graph->scc;
35 	c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
36 	c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
37 	c->scc_cluster = isl_calloc_array(ctx, int, c->n);
38 	c->scc_node = isl_calloc_array(ctx, int, c->n);
39 	c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
40 	if (!c->scc || !c->cluster ||
41 	    !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
42 		return isl_stat_error;
43 
44 	for (i = 0; i < c->n; ++i) {
45 		if (isl_sched_graph_extract_sub_graph(ctx, graph,
46 					&isl_sched_node_scc_exactly,
47 					&isl_sched_edge_scc_exactly,
48 					i, &c->scc[i]) < 0)
49 			return isl_stat_error;
50 		c->scc[i].scc = 1;
51 		if (isl_sched_graph_compute_maxvar(&c->scc[i]) < 0)
52 			return isl_stat_error;
53 		if (isl_schedule_node_compute_wcc_band(ctx, &c->scc[i]) < 0)
54 			return isl_stat_error;
55 		c->scc_cluster[i] = i;
56 	}
57 
58 	return isl_stat_ok;
59 }
60 
61 /* Free all memory allocated for "c".
62  */
clustering_free(isl_ctx * ctx,struct isl_clustering * c)63 static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
64 {
65 	int i;
66 
67 	if (c->scc)
68 		for (i = 0; i < c->n; ++i)
69 			isl_sched_graph_free(ctx, &c->scc[i]);
70 	free(c->scc);
71 	if (c->cluster)
72 		for (i = 0; i < c->n; ++i)
73 			isl_sched_graph_free(ctx, &c->cluster[i]);
74 	free(c->cluster);
75 	free(c->scc_cluster);
76 	free(c->scc_node);
77 	free(c->scc_in_merge);
78 }
79 
80 /* Should we refrain from merging the cluster in "graph" with
81  * any other cluster?
82  * In particular, is its current schedule band empty and incomplete.
83  */
bad_cluster(struct isl_sched_graph * graph)84 static int bad_cluster(struct isl_sched_graph *graph)
85 {
86 	return graph->n_row < graph->maxvar &&
87 		graph->n_total_row == graph->band_start;
88 }
89 
90 /* Is "edge" a proximity edge with a non-empty dependence relation?
91  */
is_non_empty_proximity(struct isl_sched_edge * edge)92 static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
93 {
94 	if (!isl_sched_edge_is_proximity(edge))
95 		return isl_bool_false;
96 	return isl_bool_not(isl_map_plain_is_empty(edge->map));
97 }
98 
99 /* Return the index of an edge in "graph" that can be used to merge
100  * two clusters in "c".
101  * Return graph->n_edge if no such edge can be found.
102  * Return -1 on error.
103  *
104  * In particular, return a proximity edge between two clusters
105  * that is not marked "no_merge" and such that neither of the
106  * two clusters has an incomplete, empty band.
107  *
108  * If there are multiple such edges, then try and find the most
109  * appropriate edge to use for merging.  In particular, pick the edge
110  * with the greatest weight.  If there are multiple of those,
111  * then pick one with the shortest distance between
112  * the two cluster representatives.
113  */
find_proximity(struct isl_sched_graph * graph,struct isl_clustering * c)114 static int find_proximity(struct isl_sched_graph *graph,
115 	struct isl_clustering *c)
116 {
117 	int i, best = graph->n_edge, best_dist, best_weight;
118 
119 	for (i = 0; i < graph->n_edge; ++i) {
120 		struct isl_sched_edge *edge = &graph->edge[i];
121 		int dist, weight;
122 		isl_bool prox;
123 
124 		prox = is_non_empty_proximity(edge);
125 		if (prox < 0)
126 			return -1;
127 		if (!prox)
128 			continue;
129 		if (edge->no_merge)
130 			continue;
131 		if (bad_cluster(&c->scc[edge->src->scc]) ||
132 		    bad_cluster(&c->scc[edge->dst->scc]))
133 			continue;
134 		dist = c->scc_cluster[edge->dst->scc] -
135 			c->scc_cluster[edge->src->scc];
136 		if (dist == 0)
137 			continue;
138 		weight = edge->weight;
139 		if (best < graph->n_edge) {
140 			if (best_weight > weight)
141 				continue;
142 			if (best_weight == weight && best_dist <= dist)
143 				continue;
144 		}
145 		best = i;
146 		best_dist = dist;
147 		best_weight = weight;
148 	}
149 
150 	return best;
151 }
152 
153 /* Internal data structure used in mark_merge_sccs.
154  *
155  * "graph" is the dependence graph in which a strongly connected
156  * component is constructed.
157  * "scc_cluster" maps each SCC index to the cluster to which it belongs.
158  * "src" and "dst" are the indices of the nodes that are being merged.
159  */
160 struct isl_mark_merge_sccs_data {
161 	struct isl_sched_graph *graph;
162 	int *scc_cluster;
163 	int src;
164 	int dst;
165 };
166 
167 /* Check whether the cluster containing node "i" depends on the cluster
168  * containing node "j".  If "i" and "j" belong to the same cluster,
169  * then they are taken to depend on each other to ensure that
170  * the resulting strongly connected component consists of complete
171  * clusters.  Furthermore, if "i" and "j" are the two nodes that
172  * are being merged, then they are taken to depend on each other as well.
173  * Otherwise, check if there is a (conditional) validity dependence
174  * from node[j] to node[i], forcing node[i] to follow node[j].
175  */
cluster_follows(int i,int j,void * user)176 static isl_bool cluster_follows(int i, int j, void *user)
177 {
178 	struct isl_mark_merge_sccs_data *data = user;
179 	struct isl_sched_graph *graph = data->graph;
180 	int *scc_cluster = data->scc_cluster;
181 
182 	if (data->src == i && data->dst == j)
183 		return isl_bool_true;
184 	if (data->src == j && data->dst == i)
185 		return isl_bool_true;
186 	if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
187 		return isl_bool_true;
188 
189 	return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
190 							&graph->node[i]);
191 }
192 
193 /* Mark all SCCs that belong to either of the two clusters in "c"
194  * connected by the edge in "graph" with index "edge", or to any
195  * of the intermediate clusters.
196  * The marking is recorded in c->scc_in_merge.
197  *
198  * The given edge has been selected for merging two clusters,
199  * meaning that there is at least a proximity edge between the two nodes.
200  * However, there may also be (indirect) validity dependences
201  * between the two nodes.  When merging the two clusters, all clusters
202  * containing one or more of the intermediate nodes along the
203  * indirect validity dependences need to be merged in as well.
204  *
205  * First collect all such nodes by computing the strongly connected
206  * component (SCC) containing the two nodes connected by the edge, where
207  * the two nodes are considered to depend on each other to make
208  * sure they end up in the same SCC.  Similarly, each node is considered
209  * to depend on every other node in the same cluster to ensure
210  * that the SCC consists of complete clusters.
211  *
212  * Then the original SCCs that contain any of these nodes are marked
213  * in c->scc_in_merge.
214  */
mark_merge_sccs(isl_ctx * ctx,struct isl_sched_graph * graph,int edge,struct isl_clustering * c)215 static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
216 	int edge, struct isl_clustering *c)
217 {
218 	struct isl_mark_merge_sccs_data data;
219 	struct isl_tarjan_graph *g;
220 	int i;
221 
222 	for (i = 0; i < c->n; ++i)
223 		c->scc_in_merge[i] = 0;
224 
225 	data.graph = graph;
226 	data.scc_cluster = c->scc_cluster;
227 	data.src = graph->edge[edge].src - graph->node;
228 	data.dst = graph->edge[edge].dst - graph->node;
229 
230 	g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
231 					&cluster_follows, &data);
232 	if (!g)
233 		goto error;
234 
235 	i = g->op;
236 	if (i < 3)
237 		isl_die(ctx, isl_error_internal,
238 			"expecting at least two nodes in component",
239 			goto error);
240 	if (g->order[--i] != -1)
241 		isl_die(ctx, isl_error_internal,
242 			"expecting end of component marker", goto error);
243 
244 	for (--i; i >= 0 && g->order[i] != -1; --i) {
245 		int scc = graph->node[g->order[i]].scc;
246 		c->scc_in_merge[scc] = 1;
247 	}
248 
249 	isl_tarjan_graph_free(g);
250 	return isl_stat_ok;
251 error:
252 	isl_tarjan_graph_free(g);
253 	return isl_stat_error;
254 }
255 
256 /* Construct the identifier "cluster_i".
257  */
cluster_id(isl_ctx * ctx,int i)258 static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
259 {
260 	char name[40];
261 
262 	snprintf(name, sizeof(name), "cluster_%d", i);
263 	return isl_id_alloc(ctx, name, NULL);
264 }
265 
266 /* Construct the space of the cluster with index "i" containing
267  * the strongly connected component "scc".
268  *
269  * In particular, construct a space called cluster_i with dimension equal
270  * to the number of schedule rows in the current band of "scc".
271  */
cluster_space(struct isl_sched_graph * scc,int i)272 static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
273 {
274 	int nvar;
275 	isl_space *space;
276 	isl_id *id;
277 
278 	nvar = scc->n_total_row - scc->band_start;
279 	space = isl_space_copy(scc->node[0].space);
280 	space = isl_space_params(space);
281 	space = isl_space_set_from_params(space);
282 	space = isl_space_add_dims(space, isl_dim_set, nvar);
283 	id = cluster_id(isl_space_get_ctx(space), i);
284 	space = isl_space_set_tuple_id(space, isl_dim_set, id);
285 
286 	return space;
287 }
288 
289 /* Collect the domain of the graph for merging clusters.
290  *
291  * In particular, for each cluster with first SCC "i", construct
292  * a set in the space called cluster_i with dimension equal
293  * to the number of schedule rows in the current band of the cluster.
294  */
collect_domain(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c)295 static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
296 	struct isl_sched_graph *graph, struct isl_clustering *c)
297 {
298 	int i;
299 	isl_space *space;
300 	isl_union_set *domain;
301 
302 	space = isl_space_params_alloc(ctx, 0);
303 	domain = isl_union_set_empty(space);
304 
305 	for (i = 0; i < graph->scc; ++i) {
306 		isl_space *space;
307 
308 		if (!c->scc_in_merge[i])
309 			continue;
310 		if (c->scc_cluster[i] != i)
311 			continue;
312 		space = cluster_space(&c->scc[i], i);
313 		domain = isl_union_set_add_set(domain, isl_set_universe(space));
314 	}
315 
316 	return domain;
317 }
318 
319 /* Construct a map from the original instances to the corresponding
320  * cluster instance in the current bands of the clusters in "c".
321  */
collect_cluster_map(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c)322 static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
323 	struct isl_sched_graph *graph, struct isl_clustering *c)
324 {
325 	int i, j;
326 	isl_space *space;
327 	isl_union_map *cluster_map;
328 
329 	space = isl_space_params_alloc(ctx, 0);
330 	cluster_map = isl_union_map_empty(space);
331 	for (i = 0; i < graph->scc; ++i) {
332 		int start, n;
333 		isl_id *id;
334 
335 		if (!c->scc_in_merge[i])
336 			continue;
337 
338 		id = cluster_id(ctx, c->scc_cluster[i]);
339 		start = c->scc[i].band_start;
340 		n = c->scc[i].n_total_row - start;
341 		for (j = 0; j < c->scc[i].n; ++j) {
342 			isl_multi_aff *ma;
343 			isl_map *map;
344 			struct isl_sched_node *node = &c->scc[i].node[j];
345 
346 			ma = isl_sched_node_extract_partial_schedule_multi_aff(
347 								node, start, n);
348 			ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
349 							    isl_id_copy(id));
350 			map = isl_map_from_multi_aff(ma);
351 			cluster_map = isl_union_map_add_map(cluster_map, map);
352 		}
353 		isl_id_free(id);
354 	}
355 
356 	return cluster_map;
357 }
358 
359 /* Add "umap" to the schedule constraints "sc" of all types of "edge"
360  * that are not isl_edge_condition or isl_edge_conditional_validity.
361  */
add_non_conditional_constraints(struct isl_sched_edge * edge,__isl_keep isl_union_map * umap,__isl_take isl_schedule_constraints * sc)362 static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
363 	struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
364 	__isl_take isl_schedule_constraints *sc)
365 {
366 	enum isl_edge_type t;
367 
368 	if (!sc)
369 		return NULL;
370 
371 	for (t = isl_edge_first; t <= isl_edge_last; ++t) {
372 		if (t == isl_edge_condition ||
373 		    t == isl_edge_conditional_validity)
374 			continue;
375 		if (!isl_sched_edge_has_type(edge, t))
376 			continue;
377 		sc = isl_schedule_constraints_add(sc, t,
378 						    isl_union_map_copy(umap));
379 	}
380 
381 	return sc;
382 }
383 
384 /* Add schedule constraints of types isl_edge_condition and
385  * isl_edge_conditional_validity to "sc" by applying "umap" to
386  * the domains of the wrapped relations in domain and range
387  * of the corresponding tagged constraints of "edge".
388  */
add_conditional_constraints(struct isl_sched_edge * edge,__isl_keep isl_union_map * umap,__isl_take isl_schedule_constraints * sc)389 static __isl_give isl_schedule_constraints *add_conditional_constraints(
390 	struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
391 	__isl_take isl_schedule_constraints *sc)
392 {
393 	enum isl_edge_type t;
394 	isl_union_map *tagged;
395 
396 	for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
397 		if (!isl_sched_edge_has_type(edge, t))
398 			continue;
399 		if (t == isl_edge_condition)
400 			tagged = isl_union_map_copy(edge->tagged_condition);
401 		else
402 			tagged = isl_union_map_copy(edge->tagged_validity);
403 		tagged = isl_union_map_zip(tagged);
404 		tagged = isl_union_map_apply_domain(tagged,
405 					isl_union_map_copy(umap));
406 		tagged = isl_union_map_zip(tagged);
407 		sc = isl_schedule_constraints_add(sc, t, tagged);
408 		if (!sc)
409 			return NULL;
410 	}
411 
412 	return sc;
413 }
414 
415 /* Given a mapping "cluster_map" from the original instances to
416  * the cluster instances, add schedule constraints on the clusters
417  * to "sc" corresponding to the original constraints represented by "edge".
418  *
419  * For non-tagged dependence constraints, the cluster constraints
420  * are obtained by applying "cluster_map" to the edge->map.
421  *
422  * For tagged dependence constraints, "cluster_map" needs to be applied
423  * to the domains of the wrapped relations in domain and range
424  * of the tagged dependence constraints.  Pick out the mappings
425  * from these domains from "cluster_map" and construct their product.
426  * This mapping can then be applied to the pair of domains.
427  */
collect_edge_constraints(struct isl_sched_edge * edge,__isl_keep isl_union_map * cluster_map,__isl_take isl_schedule_constraints * sc)428 static __isl_give isl_schedule_constraints *collect_edge_constraints(
429 	struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
430 	__isl_take isl_schedule_constraints *sc)
431 {
432 	isl_union_map *umap;
433 	isl_space *space;
434 	isl_union_set *uset;
435 	isl_union_map *umap1, *umap2;
436 
437 	if (!sc)
438 		return NULL;
439 
440 	umap = isl_union_map_from_map(isl_map_copy(edge->map));
441 	umap = isl_union_map_apply_domain(umap,
442 				isl_union_map_copy(cluster_map));
443 	umap = isl_union_map_apply_range(umap,
444 				isl_union_map_copy(cluster_map));
445 	sc = add_non_conditional_constraints(edge, umap, sc);
446 	isl_union_map_free(umap);
447 
448 	if (!sc ||
449 	    (!isl_sched_edge_is_condition(edge) &&
450 	     !isl_sched_edge_is_conditional_validity(edge)))
451 		return sc;
452 
453 	space = isl_space_domain(isl_map_get_space(edge->map));
454 	uset = isl_union_set_from_set(isl_set_universe(space));
455 	umap1 = isl_union_map_copy(cluster_map);
456 	umap1 = isl_union_map_intersect_domain(umap1, uset);
457 	space = isl_space_range(isl_map_get_space(edge->map));
458 	uset = isl_union_set_from_set(isl_set_universe(space));
459 	umap2 = isl_union_map_copy(cluster_map);
460 	umap2 = isl_union_map_intersect_domain(umap2, uset);
461 	umap = isl_union_map_product(umap1, umap2);
462 
463 	sc = add_conditional_constraints(edge, umap, sc);
464 
465 	isl_union_map_free(umap);
466 	return sc;
467 }
468 
469 /* Given a mapping "cluster_map" from the original instances to
470  * the cluster instances, add schedule constraints on the clusters
471  * to "sc" corresponding to all edges in "graph" between nodes that
472  * belong to SCCs that are marked for merging in "scc_in_merge".
473  */
collect_constraints(struct isl_sched_graph * graph,int * scc_in_merge,__isl_keep isl_union_map * cluster_map,__isl_take isl_schedule_constraints * sc)474 static __isl_give isl_schedule_constraints *collect_constraints(
475 	struct isl_sched_graph *graph, int *scc_in_merge,
476 	__isl_keep isl_union_map *cluster_map,
477 	__isl_take isl_schedule_constraints *sc)
478 {
479 	int i;
480 
481 	for (i = 0; i < graph->n_edge; ++i) {
482 		struct isl_sched_edge *edge = &graph->edge[i];
483 
484 		if (!scc_in_merge[edge->src->scc])
485 			continue;
486 		if (!scc_in_merge[edge->dst->scc])
487 			continue;
488 		sc = collect_edge_constraints(edge, cluster_map, sc);
489 	}
490 
491 	return sc;
492 }
493 
494 /* Construct a dependence graph for scheduling clusters with respect
495  * to each other and store the result in "merge_graph".
496  * In particular, the nodes of the graph correspond to the schedule
497  * dimensions of the current bands of those clusters that have been
498  * marked for merging in "c".
499  *
500  * First construct an isl_schedule_constraints object for this domain
501  * by transforming the edges in "graph" to the domain.
502  * Then initialize a dependence graph for scheduling from these
503  * constraints.
504  */
init_merge_graph(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c,struct isl_sched_graph * merge_graph)505 static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
506 	struct isl_clustering *c, struct isl_sched_graph *merge_graph)
507 {
508 	isl_union_set *domain;
509 	isl_union_map *cluster_map;
510 	isl_schedule_constraints *sc;
511 	isl_stat r;
512 
513 	domain = collect_domain(ctx, graph, c);
514 	sc = isl_schedule_constraints_on_domain(domain);
515 	if (!sc)
516 		return isl_stat_error;
517 	cluster_map = collect_cluster_map(ctx, graph, c);
518 	sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
519 	isl_union_map_free(cluster_map);
520 
521 	r = isl_sched_graph_init(merge_graph, sc);
522 
523 	isl_schedule_constraints_free(sc);
524 
525 	return r;
526 }
527 
528 /* Compute the maximal number of remaining schedule rows that still need
529  * to be computed for the nodes that belong to clusters with the maximal
530  * dimension for the current band (i.e., the band that is to be merged).
531  * Only clusters that are about to be merged are considered.
532  * "maxvar" is the maximal dimension for the current band.
533  * "c" contains information about the clusters.
534  *
535  * Return the maximal number of remaining schedule rows or
536  * isl_size_error on error.
537  */
compute_maxvar_max_slack(int maxvar,struct isl_clustering * c)538 static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
539 {
540 	int i, j;
541 	int max_slack;
542 
543 	max_slack = 0;
544 	for (i = 0; i < c->n; ++i) {
545 		int nvar;
546 		struct isl_sched_graph *scc;
547 
548 		if (!c->scc_in_merge[i])
549 			continue;
550 		scc = &c->scc[i];
551 		nvar = scc->n_total_row - scc->band_start;
552 		if (nvar != maxvar)
553 			continue;
554 		for (j = 0; j < scc->n; ++j) {
555 			struct isl_sched_node *node = &scc->node[j];
556 			int slack;
557 
558 			if (isl_sched_node_update_vmap(node) < 0)
559 				return isl_size_error;
560 			slack = node->nvar - node->rank;
561 			if (slack > max_slack)
562 				max_slack = slack;
563 		}
564 	}
565 
566 	return max_slack;
567 }
568 
569 /* If there are any clusters where the dimension of the current band
570  * (i.e., the band that is to be merged) is smaller than "maxvar" and
571  * if there are any nodes in such a cluster where the number
572  * of remaining schedule rows that still need to be computed
573  * is greater than "max_slack", then return the smallest current band
574  * dimension of all these clusters.  Otherwise return the original value
575  * of "maxvar".  Return isl_size_error in case of any error.
576  * Only clusters that are about to be merged are considered.
577  * "c" contains information about the clusters.
578  */
limit_maxvar_to_slack(int maxvar,int max_slack,struct isl_clustering * c)579 static isl_size limit_maxvar_to_slack(int maxvar, int max_slack,
580 	struct isl_clustering *c)
581 {
582 	int i, j;
583 
584 	for (i = 0; i < c->n; ++i) {
585 		int nvar;
586 		struct isl_sched_graph *scc;
587 
588 		if (!c->scc_in_merge[i])
589 			continue;
590 		scc = &c->scc[i];
591 		nvar = scc->n_total_row - scc->band_start;
592 		if (nvar >= maxvar)
593 			continue;
594 		for (j = 0; j < scc->n; ++j) {
595 			struct isl_sched_node *node = &scc->node[j];
596 			int slack;
597 
598 			if (isl_sched_node_update_vmap(node) < 0)
599 				return isl_size_error;
600 			slack = node->nvar - node->rank;
601 			if (slack > max_slack) {
602 				maxvar = nvar;
603 				break;
604 			}
605 		}
606 	}
607 
608 	return maxvar;
609 }
610 
611 /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
612  * that still need to be computed.  In particular, if there is a node
613  * in a cluster where the dimension of the current band is smaller
614  * than merge_graph->maxvar, but the number of remaining schedule rows
615  * is greater than that of any node in a cluster with the maximal
616  * dimension for the current band (i.e., merge_graph->maxvar),
617  * then adjust merge_graph->maxvar to the (smallest) current band dimension
618  * of those clusters.  Without this adjustment, the total number of
619  * schedule dimensions would be increased, resulting in a skewed view
620  * of the number of coincident dimensions.
621  * "c" contains information about the clusters.
622  *
623  * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
624  * then there is no point in attempting any merge since it will be rejected
625  * anyway.  Set merge_graph->maxvar to zero in such cases.
626  */
adjust_maxvar_to_slack(isl_ctx * ctx,struct isl_sched_graph * merge_graph,struct isl_clustering * c)627 static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
628 	struct isl_sched_graph *merge_graph, struct isl_clustering *c)
629 {
630 	isl_size max_slack, maxvar;
631 
632 	max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
633 	if (max_slack < 0)
634 		return isl_stat_error;
635 	maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
636 	if (maxvar < 0)
637 		return isl_stat_error;
638 
639 	if (maxvar < merge_graph->maxvar) {
640 		if (isl_options_get_schedule_maximize_band_depth(ctx))
641 			merge_graph->maxvar = 0;
642 		else
643 			merge_graph->maxvar = maxvar;
644 	}
645 
646 	return isl_stat_ok;
647 }
648 
649 /* Return the number of coincident dimensions in the current band of "graph",
650  * where the nodes of "graph" are assumed to be scheduled by a single band.
651  */
get_n_coincident(struct isl_sched_graph * graph)652 static int get_n_coincident(struct isl_sched_graph *graph)
653 {
654 	int i;
655 
656 	for (i = graph->band_start; i < graph->n_total_row; ++i)
657 		if (!graph->node[0].coincident[i])
658 			break;
659 
660 	return i - graph->band_start;
661 }
662 
663 /* Should the clusters be merged based on the cluster schedule
664  * in the current (and only) band of "merge_graph", given that
665  * coincidence should be maximized?
666  *
667  * If the number of coincident schedule dimensions in the merged band
668  * would be less than the maximal number of coincident schedule dimensions
669  * in any of the merged clusters, then the clusters should not be merged.
670  */
ok_to_merge_coincident(struct isl_clustering * c,struct isl_sched_graph * merge_graph)671 static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
672 	struct isl_sched_graph *merge_graph)
673 {
674 	int i;
675 	int n_coincident;
676 	int max_coincident;
677 
678 	max_coincident = 0;
679 	for (i = 0; i < c->n; ++i) {
680 		if (!c->scc_in_merge[i])
681 			continue;
682 		n_coincident = get_n_coincident(&c->scc[i]);
683 		if (n_coincident > max_coincident)
684 			max_coincident = n_coincident;
685 	}
686 
687 	n_coincident = get_n_coincident(merge_graph);
688 
689 	return isl_bool_ok(n_coincident >= max_coincident);
690 }
691 
692 /* Return the transformation on "node" expressed by the current (and only)
693  * band of "merge_graph" applied to the clusters in "c".
694  *
695  * First find the representation of "node" in its SCC in "c" and
696  * extract the transformation expressed by the current band.
697  * Then extract the transformation applied by "merge_graph"
698  * to the cluster to which this SCC belongs.
699  * Combine the two to obtain the complete transformation on the node.
700  *
701  * Note that the range of the first transformation is an anonymous space,
702  * while the domain of the second is named "cluster_X".  The range
703  * of the former therefore needs to be adjusted before the two
704  * can be combined.
705  */
extract_node_transformation(isl_ctx * ctx,struct isl_sched_node * node,struct isl_clustering * c,struct isl_sched_graph * merge_graph)706 static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
707 	struct isl_sched_node *node, struct isl_clustering *c,
708 	struct isl_sched_graph *merge_graph)
709 {
710 	struct isl_sched_node *scc_node, *cluster_node;
711 	int start, n;
712 	isl_id *id;
713 	isl_space *space;
714 	isl_multi_aff *ma, *ma2;
715 
716 	scc_node = isl_sched_graph_find_node(ctx, &c->scc[node->scc],
717 						node->space);
718 	if (scc_node && !isl_sched_graph_is_node(&c->scc[node->scc], scc_node))
719 		isl_die(ctx, isl_error_internal, "unable to find node",
720 			return NULL);
721 	start = c->scc[node->scc].band_start;
722 	n = c->scc[node->scc].n_total_row - start;
723 	ma = isl_sched_node_extract_partial_schedule_multi_aff(scc_node,
724 								start, n);
725 	space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
726 	cluster_node = isl_sched_graph_find_node(ctx, merge_graph, space);
727 	if (cluster_node && !isl_sched_graph_is_node(merge_graph, cluster_node))
728 		isl_die(ctx, isl_error_internal, "unable to find cluster",
729 			space = isl_space_free(space));
730 	id = isl_space_get_tuple_id(space, isl_dim_set);
731 	ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
732 	isl_space_free(space);
733 	n = merge_graph->n_total_row;
734 	ma2 = isl_sched_node_extract_partial_schedule_multi_aff(cluster_node,
735 								0, n);
736 	ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
737 
738 	return isl_map_from_multi_aff(ma);
739 }
740 
741 /* Give a set of distances "set", are they bounded by a small constant
742  * in direction "pos"?
743  * In practice, check if they are bounded by 2 by checking that there
744  * are no elements with a value greater than or equal to 3 or
745  * smaller than or equal to -3.
746  */
distance_is_bounded(__isl_keep isl_set * set,int pos)747 static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
748 {
749 	isl_bool bounded;
750 	isl_set *test;
751 
752 	if (!set)
753 		return isl_bool_error;
754 
755 	test = isl_set_copy(set);
756 	test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
757 	bounded = isl_set_is_empty(test);
758 	isl_set_free(test);
759 
760 	if (bounded < 0 || !bounded)
761 		return bounded;
762 
763 	test = isl_set_copy(set);
764 	test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
765 	bounded = isl_set_is_empty(test);
766 	isl_set_free(test);
767 
768 	return bounded;
769 }
770 
771 /* Does the set "set" have a fixed (but possible parametric) value
772  * at dimension "pos"?
773  */
has_single_value(__isl_keep isl_set * set,int pos)774 static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
775 {
776 	isl_size n;
777 	isl_bool single;
778 
779 	n = isl_set_dim(set, isl_dim_set);
780 	if (n < 0)
781 		return isl_bool_error;
782 	set = isl_set_copy(set);
783 	set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
784 	set = isl_set_project_out(set, isl_dim_set, 0, pos);
785 	single = isl_set_is_singleton(set);
786 	isl_set_free(set);
787 
788 	return single;
789 }
790 
791 /* Does "map" have a fixed (but possible parametric) value
792  * at dimension "pos" of either its domain or its range?
793  */
has_singular_src_or_dst(__isl_keep isl_map * map,int pos)794 static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
795 {
796 	isl_set *set;
797 	isl_bool single;
798 
799 	set = isl_map_domain(isl_map_copy(map));
800 	single = has_single_value(set, pos);
801 	isl_set_free(set);
802 
803 	if (single < 0 || single)
804 		return single;
805 
806 	set = isl_map_range(isl_map_copy(map));
807 	single = has_single_value(set, pos);
808 	isl_set_free(set);
809 
810 	return single;
811 }
812 
813 /* Does the edge "edge" from "graph" have bounded dependence distances
814  * in the merged graph "merge_graph" of a selection of clusters in "c"?
815  *
816  * Extract the complete transformations of the source and destination
817  * nodes of the edge, apply them to the edge constraints and
818  * compute the differences.  Finally, check if these differences are bounded
819  * in each direction.
820  *
821  * If the dimension of the band is greater than the number of
822  * dimensions that can be expected to be optimized by the edge
823  * (based on its weight), then also allow the differences to be unbounded
824  * in the remaining dimensions, but only if either the source or
825  * the destination has a fixed value in that direction.
826  * This allows a statement that produces values that are used by
827  * several instances of another statement to be merged with that
828  * other statement.
829  * However, merging such clusters will introduce an inherently
830  * large proximity distance inside the merged cluster, meaning
831  * that proximity distances will no longer be optimized in
832  * subsequent merges.  These merges are therefore only allowed
833  * after all other possible merges have been tried.
834  * The first time such a merge is encountered, the weight of the edge
835  * is replaced by a negative weight.  The second time (i.e., after
836  * all merges over edges with a non-negative weight have been tried),
837  * the merge is allowed.
838  */
has_bounded_distances(isl_ctx * ctx,struct isl_sched_edge * edge,struct isl_sched_graph * graph,struct isl_clustering * c,struct isl_sched_graph * merge_graph)839 static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
840 	struct isl_sched_graph *graph, struct isl_clustering *c,
841 	struct isl_sched_graph *merge_graph)
842 {
843 	int i, n_slack;
844 	isl_size n;
845 	isl_bool bounded;
846 	isl_map *map, *t;
847 	isl_set *dist;
848 
849 	map = isl_map_copy(edge->map);
850 	t = extract_node_transformation(ctx, edge->src, c, merge_graph);
851 	map = isl_map_apply_domain(map, t);
852 	t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
853 	map = isl_map_apply_range(map, t);
854 	dist = isl_map_deltas(isl_map_copy(map));
855 
856 	bounded = isl_bool_true;
857 	n = isl_set_dim(dist, isl_dim_set);
858 	if (n < 0)
859 		goto error;
860 	n_slack = n - edge->weight;
861 	if (edge->weight < 0)
862 		n_slack -= graph->max_weight + 1;
863 	for (i = 0; i < n; ++i) {
864 		isl_bool bounded_i, singular_i;
865 
866 		bounded_i = distance_is_bounded(dist, i);
867 		if (bounded_i < 0)
868 			goto error;
869 		if (bounded_i)
870 			continue;
871 		if (edge->weight >= 0)
872 			bounded = isl_bool_false;
873 		n_slack--;
874 		if (n_slack < 0)
875 			break;
876 		singular_i = has_singular_src_or_dst(map, i);
877 		if (singular_i < 0)
878 			goto error;
879 		if (singular_i)
880 			continue;
881 		bounded = isl_bool_false;
882 		break;
883 	}
884 	if (!bounded && i >= n && edge->weight >= 0)
885 		edge->weight -= graph->max_weight + 1;
886 	isl_map_free(map);
887 	isl_set_free(dist);
888 
889 	return bounded;
890 error:
891 	isl_map_free(map);
892 	isl_set_free(dist);
893 	return isl_bool_error;
894 }
895 
896 /* Should the clusters be merged based on the cluster schedule
897  * in the current (and only) band of "merge_graph"?
898  * "graph" is the original dependence graph, while "c" records
899  * which SCCs are involved in the latest merge.
900  *
901  * In particular, is there at least one proximity constraint
902  * that is optimized by the merge?
903  *
904  * A proximity constraint is considered to be optimized
905  * if the dependence distances are small.
906  */
ok_to_merge_proximity(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c,struct isl_sched_graph * merge_graph)907 static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
908 	struct isl_sched_graph *graph, struct isl_clustering *c,
909 	struct isl_sched_graph *merge_graph)
910 {
911 	int i;
912 
913 	for (i = 0; i < graph->n_edge; ++i) {
914 		struct isl_sched_edge *edge = &graph->edge[i];
915 		isl_bool bounded;
916 
917 		if (!isl_sched_edge_is_proximity(edge))
918 			continue;
919 		if (!c->scc_in_merge[edge->src->scc])
920 			continue;
921 		if (!c->scc_in_merge[edge->dst->scc])
922 			continue;
923 		if (c->scc_cluster[edge->dst->scc] ==
924 		    c->scc_cluster[edge->src->scc])
925 			continue;
926 		bounded = has_bounded_distances(ctx, edge, graph, c,
927 						merge_graph);
928 		if (bounded < 0 || bounded)
929 			return bounded;
930 	}
931 
932 	return isl_bool_false;
933 }
934 
935 /* Should the clusters be merged based on the cluster schedule
936  * in the current (and only) band of "merge_graph"?
937  * "graph" is the original dependence graph, while "c" records
938  * which SCCs are involved in the latest merge.
939  *
940  * If the current band is empty, then the clusters should not be merged.
941  *
942  * If the band depth should be maximized and the merge schedule
943  * is incomplete (meaning that the dimension of some of the schedule
944  * bands in the original schedule will be reduced), then the clusters
945  * should not be merged.
946  *
947  * If the schedule_maximize_coincidence option is set, then check that
948  * the number of coincident schedule dimensions is not reduced.
949  *
950  * Finally, only allow the merge if at least one proximity
951  * constraint is optimized.
952  */
ok_to_merge(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c,struct isl_sched_graph * merge_graph)953 static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
954 	struct isl_clustering *c, struct isl_sched_graph *merge_graph)
955 {
956 	if (merge_graph->n_total_row == merge_graph->band_start)
957 		return isl_bool_false;
958 
959 	if (isl_options_get_schedule_maximize_band_depth(ctx) &&
960 	    merge_graph->n_total_row < merge_graph->maxvar)
961 		return isl_bool_false;
962 
963 	if (isl_options_get_schedule_maximize_coincidence(ctx)) {
964 		isl_bool ok;
965 
966 		ok = ok_to_merge_coincident(c, merge_graph);
967 		if (ok < 0 || !ok)
968 			return ok;
969 	}
970 
971 	return ok_to_merge_proximity(ctx, graph, c, merge_graph);
972 }
973 
974 /* Apply the schedule in "t_node" to the "n" rows starting at "first"
975  * of the schedule in "node" and return the result.
976  *
977  * That is, essentially compute
978  *
979  *	T * N(first:first+n-1)
980  *
981  * taking into account the constant term and the parameter coefficients
982  * in "t_node".
983  */
node_transformation(isl_ctx * ctx,struct isl_sched_node * t_node,struct isl_sched_node * node,int first,int n)984 static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
985 	struct isl_sched_node *t_node, struct isl_sched_node *node,
986 	int first, int n)
987 {
988 	int i, j;
989 	isl_mat *t;
990 	isl_size n_row, n_col;
991 	int n_param, n_var;
992 
993 	n_param = node->nparam;
994 	n_var = node->nvar;
995 	n_row = isl_mat_rows(t_node->sched);
996 	n_col = isl_mat_cols(node->sched);
997 	if (n_row < 0 || n_col < 0)
998 		return NULL;
999 	t = isl_mat_alloc(ctx, n_row, n_col);
1000 	if (!t)
1001 		return NULL;
1002 	for (i = 0; i < n_row; ++i) {
1003 		isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
1004 		isl_seq_clr(t->row[i] + 1 + n_param, n_var);
1005 		for (j = 0; j < n; ++j)
1006 			isl_seq_addmul(t->row[i],
1007 					t_node->sched->row[i][1 + n_param + j],
1008 					node->sched->row[first + j],
1009 					1 + n_param + n_var);
1010 	}
1011 	return t;
1012 }
1013 
1014 /* Apply the cluster schedule in "t_node" to the current band
1015  * schedule of the nodes in "graph".
1016  *
1017  * In particular, replace the rows starting at band_start
1018  * by the result of applying the cluster schedule in "t_node"
1019  * to the original rows.
1020  *
1021  * The coincidence of the schedule is determined by the coincidence
1022  * of the cluster schedule.
1023  */
transform(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * t_node)1024 static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
1025 	struct isl_sched_node *t_node)
1026 {
1027 	int i, j;
1028 	isl_size n_new;
1029 	int start, n;
1030 
1031 	start = graph->band_start;
1032 	n = graph->n_total_row - start;
1033 
1034 	n_new = isl_mat_rows(t_node->sched);
1035 	if (n_new < 0)
1036 		return isl_stat_error;
1037 	for (i = 0; i < graph->n; ++i) {
1038 		struct isl_sched_node *node = &graph->node[i];
1039 		isl_mat *t;
1040 
1041 		t = node_transformation(ctx, t_node, node, start, n);
1042 		node->sched = isl_mat_drop_rows(node->sched, start, n);
1043 		node->sched = isl_mat_concat(node->sched, t);
1044 		node->sched_map = isl_map_free(node->sched_map);
1045 		if (!node->sched)
1046 			return isl_stat_error;
1047 		for (j = 0; j < n_new; ++j)
1048 			node->coincident[start + j] = t_node->coincident[j];
1049 	}
1050 	graph->n_total_row -= n;
1051 	graph->n_row -= n;
1052 	graph->n_total_row += n_new;
1053 	graph->n_row += n_new;
1054 
1055 	return isl_stat_ok;
1056 }
1057 
1058 /* Merge the clusters marked for merging in "c" into a single
1059  * cluster using the cluster schedule in the current band of "merge_graph".
1060  * The representative SCC for the new cluster is the SCC with
1061  * the smallest index.
1062  *
1063  * The current band schedule of each SCC in the new cluster is obtained
1064  * by applying the schedule of the corresponding original cluster
1065  * to the original band schedule.
1066  * All SCCs in the new cluster have the same number of schedule rows.
1067  */
merge(isl_ctx * ctx,struct isl_clustering * c,struct isl_sched_graph * merge_graph)1068 static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
1069 	struct isl_sched_graph *merge_graph)
1070 {
1071 	int i;
1072 	int cluster = -1;
1073 	isl_space *space;
1074 
1075 	for (i = 0; i < c->n; ++i) {
1076 		struct isl_sched_node *node;
1077 
1078 		if (!c->scc_in_merge[i])
1079 			continue;
1080 		if (cluster < 0)
1081 			cluster = i;
1082 		space = cluster_space(&c->scc[i], c->scc_cluster[i]);
1083 		node = isl_sched_graph_find_node(ctx, merge_graph, space);
1084 		isl_space_free(space);
1085 		if (!node)
1086 			return isl_stat_error;
1087 		if (!isl_sched_graph_is_node(merge_graph, node))
1088 			isl_die(ctx, isl_error_internal,
1089 				"unable to find cluster",
1090 				return isl_stat_error);
1091 		if (transform(ctx, &c->scc[i], node) < 0)
1092 			return isl_stat_error;
1093 		c->scc_cluster[i] = cluster;
1094 	}
1095 
1096 	return isl_stat_ok;
1097 }
1098 
1099 /* Try and merge the clusters of SCCs marked in c->scc_in_merge
1100  * by scheduling the current cluster bands with respect to each other.
1101  *
1102  * Construct a dependence graph with a space for each cluster and
1103  * with the coordinates of each space corresponding to the schedule
1104  * dimensions of the current band of that cluster.
1105  * Construct a cluster schedule in this cluster dependence graph and
1106  * apply it to the current cluster bands if it is applicable
1107  * according to ok_to_merge.
1108  *
1109  * If the number of remaining schedule dimensions in a cluster
1110  * with a non-maximal current schedule dimension is greater than
1111  * the number of remaining schedule dimensions in clusters
1112  * with a maximal current schedule dimension, then restrict
1113  * the number of rows to be computed in the cluster schedule
1114  * to the minimal such non-maximal current schedule dimension.
1115  * Do this by adjusting merge_graph.maxvar.
1116  *
1117  * Return isl_bool_true if the clusters have effectively been merged
1118  * into a single cluster.
1119  *
1120  * Note that since the standard scheduling algorithm minimizes the maximal
1121  * distance over proximity constraints, the proximity constraints between
1122  * the merged clusters may not be optimized any further than what is
1123  * sufficient to bring the distances within the limits of the internal
1124  * proximity constraints inside the individual clusters.
1125  * It may therefore make sense to perform an additional translation step
1126  * to bring the clusters closer to each other, while maintaining
1127  * the linear part of the merging schedule found using the standard
1128  * scheduling algorithm.
1129  */
try_merge(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c)1130 static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
1131 	struct isl_clustering *c)
1132 {
1133 	struct isl_sched_graph merge_graph = { 0 };
1134 	isl_bool merged;
1135 
1136 	if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
1137 		goto error;
1138 
1139 	if (isl_sched_graph_compute_maxvar(&merge_graph) < 0)
1140 		goto error;
1141 	if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
1142 		goto error;
1143 	if (isl_schedule_node_compute_wcc_band(ctx, &merge_graph) < 0)
1144 		goto error;
1145 	merged = ok_to_merge(ctx, graph, c, &merge_graph);
1146 	if (merged && merge(ctx, c, &merge_graph) < 0)
1147 		goto error;
1148 
1149 	isl_sched_graph_free(ctx, &merge_graph);
1150 	return merged;
1151 error:
1152 	isl_sched_graph_free(ctx, &merge_graph);
1153 	return isl_bool_error;
1154 }
1155 
1156 /* Is there any edge marked "no_merge" between two SCCs that are
1157  * about to be merged (i.e., that are set in "scc_in_merge")?
1158  * "merge_edge" is the proximity edge along which the clusters of SCCs
1159  * are going to be merged.
1160  *
1161  * If there is any edge between two SCCs with a negative weight,
1162  * while the weight of "merge_edge" is non-negative, then this
1163  * means that the edge was postponed.  "merge_edge" should then
1164  * also be postponed since merging along the edge with negative weight should
1165  * be postponed until all edges with non-negative weight have been tried.
1166  * Replace the weight of "merge_edge" by a negative weight as well and
1167  * tell the caller not to attempt a merge.
1168  */
any_no_merge(struct isl_sched_graph * graph,int * scc_in_merge,struct isl_sched_edge * merge_edge)1169 static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
1170 	struct isl_sched_edge *merge_edge)
1171 {
1172 	int i;
1173 
1174 	for (i = 0; i < graph->n_edge; ++i) {
1175 		struct isl_sched_edge *edge = &graph->edge[i];
1176 
1177 		if (!scc_in_merge[edge->src->scc])
1178 			continue;
1179 		if (!scc_in_merge[edge->dst->scc])
1180 			continue;
1181 		if (edge->no_merge)
1182 			return 1;
1183 		if (merge_edge->weight >= 0 && edge->weight < 0) {
1184 			merge_edge->weight -= graph->max_weight + 1;
1185 			return 1;
1186 		}
1187 	}
1188 
1189 	return 0;
1190 }
1191 
1192 /* Merge the two clusters in "c" connected by the edge in "graph"
1193  * with index "edge" into a single cluster.
1194  * If it turns out to be impossible to merge these two clusters,
1195  * then mark the edge as "no_merge" such that it will not be
1196  * considered again.
1197  *
1198  * First mark all SCCs that need to be merged.  This includes the SCCs
1199  * in the two clusters, but it may also include the SCCs
1200  * of intermediate clusters.
1201  * If there is already a no_merge edge between any pair of such SCCs,
1202  * then simply mark the current edge as no_merge as well.
1203  * Likewise, if any of those edges was postponed by has_bounded_distances,
1204  * then postpone the current edge as well.
1205  * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
1206  * if the clusters did not end up getting merged, unless the non-merge
1207  * is due to the fact that the edge was postponed.  This postponement
1208  * can be recognized by a change in weight (from non-negative to negative).
1209  */
merge_clusters_along_edge(isl_ctx * ctx,struct isl_sched_graph * graph,int edge,struct isl_clustering * c)1210 static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
1211 	struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
1212 {
1213 	isl_bool merged;
1214 	int edge_weight = graph->edge[edge].weight;
1215 
1216 	if (mark_merge_sccs(ctx, graph, edge, c) < 0)
1217 		return isl_stat_error;
1218 
1219 	if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
1220 		merged = isl_bool_false;
1221 	else
1222 		merged = try_merge(ctx, graph, c);
1223 	if (merged < 0)
1224 		return isl_stat_error;
1225 	if (!merged && edge_weight == graph->edge[edge].weight)
1226 		graph->edge[edge].no_merge = 1;
1227 
1228 	return isl_stat_ok;
1229 }
1230 
1231 /* Does "node" belong to the cluster identified by "cluster"?
1232  */
node_cluster_exactly(struct isl_sched_node * node,int cluster)1233 static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
1234 {
1235 	return node->cluster == cluster;
1236 }
1237 
1238 /* Does "edge" connect two nodes belonging to the cluster
1239  * identified by "cluster"?
1240  */
edge_cluster_exactly(struct isl_sched_edge * edge,int cluster)1241 static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
1242 {
1243 	return edge->src->cluster == cluster && edge->dst->cluster == cluster;
1244 }
1245 
1246 /* Swap the schedule of "node1" and "node2".
1247  * Both nodes have been derived from the same node in a common parent graph.
1248  * Since the "coincident" field is shared with that node
1249  * in the parent graph, there is no need to also swap this field.
1250  */
swap_sched(struct isl_sched_node * node1,struct isl_sched_node * node2)1251 static void swap_sched(struct isl_sched_node *node1,
1252 	struct isl_sched_node *node2)
1253 {
1254 	isl_mat *sched;
1255 	isl_map *sched_map;
1256 
1257 	sched = node1->sched;
1258 	node1->sched = node2->sched;
1259 	node2->sched = sched;
1260 
1261 	sched_map = node1->sched_map;
1262 	node1->sched_map = node2->sched_map;
1263 	node2->sched_map = sched_map;
1264 }
1265 
1266 /* Copy the current band schedule from the SCCs that form the cluster
1267  * with index "pos" to the actual cluster at position "pos".
1268  * By construction, the index of the first SCC that belongs to the cluster
1269  * is also "pos".
1270  *
1271  * The order of the nodes inside both the SCCs and the cluster
1272  * is assumed to be same as the order in the original "graph".
1273  *
1274  * Since the SCC graphs will no longer be used after this function,
1275  * the schedules are actually swapped rather than copied.
1276  */
copy_partial(struct isl_sched_graph * graph,struct isl_clustering * c,int pos)1277 static isl_stat copy_partial(struct isl_sched_graph *graph,
1278 	struct isl_clustering *c, int pos)
1279 {
1280 	int i, j;
1281 
1282 	c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
1283 	c->cluster[pos].n_row = c->scc[pos].n_row;
1284 	c->cluster[pos].maxvar = c->scc[pos].maxvar;
1285 	j = 0;
1286 	for (i = 0; i < graph->n; ++i) {
1287 		int k;
1288 		int s;
1289 
1290 		if (graph->node[i].cluster != pos)
1291 			continue;
1292 		s = graph->node[i].scc;
1293 		k = c->scc_node[s]++;
1294 		swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
1295 		if (c->scc[s].maxvar > c->cluster[pos].maxvar)
1296 			c->cluster[pos].maxvar = c->scc[s].maxvar;
1297 		++j;
1298 	}
1299 
1300 	return isl_stat_ok;
1301 }
1302 
1303 /* Is there a (conditional) validity dependence from node[j] to node[i],
1304  * forcing node[i] to follow node[j] or do the nodes belong to the same
1305  * cluster?
1306  */
node_follows_strong_or_same_cluster(int i,int j,void * user)1307 static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
1308 {
1309 	struct isl_sched_graph *graph = user;
1310 
1311 	if (graph->node[i].cluster == graph->node[j].cluster)
1312 		return isl_bool_true;
1313 	return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
1314 							&graph->node[i]);
1315 }
1316 
1317 /* Extract the merged clusters of SCCs in "graph", sort them, and
1318  * store them in c->clusters.  Update c->scc_cluster accordingly.
1319  *
1320  * First keep track of the cluster containing the SCC to which a node
1321  * belongs in the node itself.
1322  * Then extract the clusters into c->clusters, copying the current
1323  * band schedule from the SCCs that belong to the cluster.
1324  * Do this only once per cluster.
1325  *
1326  * Finally, topologically sort the clusters and update c->scc_cluster
1327  * to match the new scc numbering.  While the SCCs were originally
1328  * sorted already, some SCCs that depend on some other SCCs may
1329  * have been merged with SCCs that appear before these other SCCs.
1330  * A reordering may therefore be required.
1331  */
extract_clusters(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_clustering * c)1332 static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
1333 	struct isl_clustering *c)
1334 {
1335 	int i;
1336 
1337 	for (i = 0; i < graph->n; ++i)
1338 		graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
1339 
1340 	for (i = 0; i < graph->scc; ++i) {
1341 		if (c->scc_cluster[i] != i)
1342 			continue;
1343 		if (isl_sched_graph_extract_sub_graph(ctx, graph,
1344 				&node_cluster_exactly,
1345 				&edge_cluster_exactly, i, &c->cluster[i]) < 0)
1346 			return isl_stat_error;
1347 		c->cluster[i].src_scc = -1;
1348 		c->cluster[i].dst_scc = -1;
1349 		if (copy_partial(graph, c, i) < 0)
1350 			return isl_stat_error;
1351 	}
1352 
1353 	if (isl_sched_graph_detect_ccs(ctx, graph,
1354 				&node_follows_strong_or_same_cluster) < 0)
1355 		return isl_stat_error;
1356 	for (i = 0; i < graph->n; ++i)
1357 		c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
1358 
1359 	return isl_stat_ok;
1360 }
1361 
1362 /* Compute weights on the proximity edges of "graph" that can
1363  * be used by find_proximity to find the most appropriate
1364  * proximity edge to use to merge two clusters in "c".
1365  * The weights are also used by has_bounded_distances to determine
1366  * whether the merge should be allowed.
1367  * Store the maximum of the computed weights in graph->max_weight.
1368  *
1369  * The computed weight is a measure for the number of remaining schedule
1370  * dimensions that can still be completely aligned.
1371  * In particular, compute the number of equalities between
1372  * input dimensions and output dimensions in the proximity constraints.
1373  * The directions that are already handled by outer schedule bands
1374  * are projected out prior to determining this number.
1375  *
1376  * Edges that will never be considered by find_proximity are ignored.
1377  */
compute_weights(struct isl_sched_graph * graph,struct isl_clustering * c)1378 static isl_stat compute_weights(struct isl_sched_graph *graph,
1379 	struct isl_clustering *c)
1380 {
1381 	int i;
1382 
1383 	graph->max_weight = 0;
1384 
1385 	for (i = 0; i < graph->n_edge; ++i) {
1386 		struct isl_sched_edge *edge = &graph->edge[i];
1387 		struct isl_sched_node *src = edge->src;
1388 		struct isl_sched_node *dst = edge->dst;
1389 		isl_basic_map *hull;
1390 		isl_bool prox;
1391 		isl_size n_in, n_out, n;
1392 
1393 		prox = is_non_empty_proximity(edge);
1394 		if (prox < 0)
1395 			return isl_stat_error;
1396 		if (!prox)
1397 			continue;
1398 		if (bad_cluster(&c->scc[edge->src->scc]) ||
1399 		    bad_cluster(&c->scc[edge->dst->scc]))
1400 			continue;
1401 		if (c->scc_cluster[edge->dst->scc] ==
1402 		    c->scc_cluster[edge->src->scc])
1403 			continue;
1404 
1405 		hull = isl_map_affine_hull(isl_map_copy(edge->map));
1406 		hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
1407 						    isl_mat_copy(src->vmap));
1408 		hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
1409 						    isl_mat_copy(dst->vmap));
1410 		hull = isl_basic_map_project_out(hull,
1411 						isl_dim_in, 0, src->rank);
1412 		hull = isl_basic_map_project_out(hull,
1413 						isl_dim_out, 0, dst->rank);
1414 		hull = isl_basic_map_remove_divs(hull);
1415 		n_in = isl_basic_map_dim(hull, isl_dim_in);
1416 		n_out = isl_basic_map_dim(hull, isl_dim_out);
1417 		if (n_in < 0 || n_out < 0)
1418 			hull = isl_basic_map_free(hull);
1419 		hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
1420 							isl_dim_in, 0, n_in);
1421 		hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
1422 							isl_dim_out, 0, n_out);
1423 		n = isl_basic_map_n_equality(hull);
1424 		isl_basic_map_free(hull);
1425 		if (n < 0)
1426 			return isl_stat_error;
1427 		edge->weight = n;
1428 
1429 		if (edge->weight > graph->max_weight)
1430 			graph->max_weight = edge->weight;
1431 	}
1432 
1433 	return isl_stat_ok;
1434 }
1435 
1436 /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and
1437  * update "node" to arrange for them to be executed in an order
1438  * possibly involving set nodes that generalizes the topological order
1439  * determined by the scc fields of the nodes in "graph".
1440  *
1441  * Note that at this stage, there are graph->scc clusters and
1442  * their positions in c->cluster are determined by the values
1443  * of c->scc_cluster.
1444  *
1445  * Construct an isl_scc_graph and perform the decomposition
1446  * using this graph.
1447  */
finish_bands_decompose(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,struct isl_clustering * c)1448 static __isl_give isl_schedule_node *finish_bands_decompose(
1449 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
1450 	struct isl_clustering *c)
1451 {
1452 	isl_ctx *ctx;
1453 	struct isl_scc_graph *scc_graph;
1454 
1455 	ctx = isl_schedule_node_get_ctx(node);
1456 
1457 	scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c);
1458 	node = isl_scc_graph_decompose(scc_graph, node);
1459 	isl_scc_graph_free(scc_graph);
1460 
1461 	return node;
1462 }
1463 
1464 /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c"
1465  * in their topological order.  This order is determined by the scc
1466  * fields of the nodes in "graph".
1467  * Combine the results in a sequence expressing the topological order.
1468  *
1469  * If there is only one cluster left, then there is no need to introduce
1470  * a sequence node.  Also, in this case, the cluster necessarily contains
1471  * the SCC at position 0 in the original graph and is therefore also
1472  * stored in the first cluster of "c".
1473  *
1474  * If there are more than two clusters left, then some subsets of the clusters
1475  * may still be independent of each other.  These could then still
1476  * be reordered with respect to each other.  Call finish_bands_decompose
1477  * to try and construct an ordering involving set and sequence nodes
1478  * that generalizes the topological order.
1479  * Note that at the outermost level there can be no independent components
1480  * because isl_schedule_node_compute_wcc_clustering is called
1481  * on a (weakly) connected component.
1482  */
finish_bands_clustering(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,struct isl_clustering * c)1483 static __isl_give isl_schedule_node *finish_bands_clustering(
1484 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
1485 	struct isl_clustering *c)
1486 {
1487 	int i;
1488 	isl_ctx *ctx;
1489 	isl_union_set_list *filters;
1490 
1491 	if (graph->scc == 1)
1492 		return isl_schedule_node_compute_finish_band(node,
1493 							&c->cluster[0], 0);
1494 	if (graph->scc > 2)
1495 		return finish_bands_decompose(node, graph, c);
1496 
1497 	ctx = isl_schedule_node_get_ctx(node);
1498 
1499 	filters = isl_sched_graph_extract_sccs(ctx, graph);
1500 	node = isl_schedule_node_insert_sequence(node, filters);
1501 
1502 	for (i = 0; i < graph->scc; ++i) {
1503 		int j = c->scc_cluster[i];
1504 		node = isl_schedule_node_grandchild(node, i, 0);
1505 		node = isl_schedule_node_compute_finish_band(node,
1506 							&c->cluster[j], 0);
1507 		node = isl_schedule_node_grandparent(node);
1508 	}
1509 
1510 	return node;
1511 }
1512 
1513 /* Compute a schedule for a connected dependence graph by first considering
1514  * each strongly connected component (SCC) in the graph separately and then
1515  * incrementally combining them into clusters.
1516  * Return the updated schedule node.
1517  *
1518  * Initially, each cluster consists of a single SCC, each with its
1519  * own band schedule.  The algorithm then tries to merge pairs
1520  * of clusters along a proximity edge until no more suitable
1521  * proximity edges can be found.  During this merging, the schedule
1522  * is maintained in the individual SCCs.
1523  * After the merging is completed, the full resulting clusters
1524  * are extracted and in finish_bands_clustering,
1525  * isl_schedule_node_compute_finish_band is called on each of them to integrate
1526  * the band into "node" and to continue the computation.
1527  *
1528  * compute_weights initializes the weights that are used by find_proximity.
1529  */
isl_schedule_node_compute_wcc_clustering(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)1530 __isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering(
1531 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
1532 {
1533 	isl_ctx *ctx;
1534 	struct isl_clustering c;
1535 	int i;
1536 
1537 	ctx = isl_schedule_node_get_ctx(node);
1538 
1539 	if (clustering_init(ctx, &c, graph) < 0)
1540 		goto error;
1541 
1542 	if (compute_weights(graph, &c) < 0)
1543 		goto error;
1544 
1545 	for (;;) {
1546 		i = find_proximity(graph, &c);
1547 		if (i < 0)
1548 			goto error;
1549 		if (i >= graph->n_edge)
1550 			break;
1551 		if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
1552 			goto error;
1553 	}
1554 
1555 	if (extract_clusters(ctx, graph, &c) < 0)
1556 		goto error;
1557 
1558 	node = finish_bands_clustering(node, graph, &c);
1559 
1560 	clustering_free(ctx, &c);
1561 	return node;
1562 error:
1563 	clustering_free(ctx, &c);
1564 	return isl_schedule_node_free(node);
1565 }
1566