xref: /llvm-project/polly/lib/External/isl/isl_scheduler.c (revision a749e09e184b2b0b6dde71af01c82dd427b3e3e2)
1 /*
2  * Copyright 2011      INRIA Saclay
3  * Copyright 2012-2014 Ecole Normale Superieure
4  * Copyright 2015-2016 Sven Verdoolaege
5  * Copyright 2016      INRIA Paris
6  * Copyright 2017      Sven Verdoolaege
7  *
8  * Use of this software is governed by the MIT license
9  *
10  * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11  * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12  * 91893 Orsay, France
13  * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14  * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15  * CS 42112, 75589 Paris Cedex 12, France
16  */
17 
18 #include <isl_ctx_private.h>
19 #include <isl_map_private.h>
20 #include <isl_space_private.h>
21 #include <isl_aff_private.h>
22 #include <isl/hash.h>
23 #include <isl/id.h>
24 #include <isl/constraint.h>
25 #include <isl/schedule.h>
26 #include <isl_schedule_constraints.h>
27 #include <isl/schedule_node.h>
28 #include <isl_mat_private.h>
29 #include <isl_vec_private.h>
30 #include <isl/set.h>
31 #include <isl_union_set_private.h>
32 #include <isl_seq.h>
33 #include <isl_tab.h>
34 #include <isl_dim_map.h>
35 #include <isl/map_to_basic_set.h>
36 #include <isl_sort.h>
37 #include <isl_options_private.h>
38 #include <isl_tarjan.h>
39 #include <isl_morph.h>
40 #include <isl/ilp.h>
41 #include <isl_val_private.h>
42 
43 #include "isl_scheduler.h"
44 #include "isl_scheduler_clustering.h"
45 
46 /*
47  * The scheduling algorithm implemented in this file was inspired by
48  * Bondhugula et al., "Automatic Transformations for Communication-Minimized
49  * Parallelization and Locality Optimization in the Polyhedral Model".
50  *
51  * For a detailed description of the variant implemented in isl,
52  * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
53  */
54 
55 
node_has_tuples(const void * entry,const void * val)56 static isl_bool node_has_tuples(const void *entry, const void *val)
57 {
58 	struct isl_sched_node *node = (struct isl_sched_node *)entry;
59 	isl_space *space = (isl_space *) val;
60 
61 	return isl_space_has_equal_tuples(node->space, space);
62 }
63 
isl_sched_node_scc_exactly(struct isl_sched_node * node,int scc)64 int isl_sched_node_scc_exactly(struct isl_sched_node *node, int scc)
65 {
66 	return node->scc == scc;
67 }
68 
node_scc_at_most(struct isl_sched_node * node,int scc)69 static int node_scc_at_most(struct isl_sched_node *node, int scc)
70 {
71 	return node->scc <= scc;
72 }
73 
node_scc_at_least(struct isl_sched_node * node,int scc)74 static int node_scc_at_least(struct isl_sched_node *node, int scc)
75 {
76 	return node->scc >= scc;
77 }
78 
79 /* Is "edge" marked as being of type "type"?
80  */
isl_sched_edge_has_type(struct isl_sched_edge * edge,enum isl_edge_type type)81 int isl_sched_edge_has_type(struct isl_sched_edge *edge,
82 	enum isl_edge_type type)
83 {
84 	return ISL_FL_ISSET(edge->types, 1 << type);
85 }
86 
87 /* Mark "edge" as being of type "type".
88  */
set_type(struct isl_sched_edge * edge,enum isl_edge_type type)89 static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
90 {
91 	ISL_FL_SET(edge->types, 1 << type);
92 }
93 
94 /* No longer mark "edge" as being of type "type"?
95  */
clear_type(struct isl_sched_edge * edge,enum isl_edge_type type)96 static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
97 {
98 	ISL_FL_CLR(edge->types, 1 << type);
99 }
100 
101 /* Is "edge" marked as a validity edge?
102  */
is_validity(struct isl_sched_edge * edge)103 static int is_validity(struct isl_sched_edge *edge)
104 {
105 	return isl_sched_edge_has_type(edge, isl_edge_validity);
106 }
107 
108 /* Mark "edge" as a validity edge.
109  */
set_validity(struct isl_sched_edge * edge)110 static void set_validity(struct isl_sched_edge *edge)
111 {
112 	set_type(edge, isl_edge_validity);
113 }
114 
115 /* Is "edge" marked as a proximity edge?
116  */
isl_sched_edge_is_proximity(struct isl_sched_edge * edge)117 int isl_sched_edge_is_proximity(struct isl_sched_edge *edge)
118 {
119 	return isl_sched_edge_has_type(edge, isl_edge_proximity);
120 }
121 
122 /* Is "edge" marked as a local edge?
123  */
is_local(struct isl_sched_edge * edge)124 static int is_local(struct isl_sched_edge *edge)
125 {
126 	return isl_sched_edge_has_type(edge, isl_edge_local);
127 }
128 
129 /* Mark "edge" as a local edge.
130  */
set_local(struct isl_sched_edge * edge)131 static void set_local(struct isl_sched_edge *edge)
132 {
133 	set_type(edge, isl_edge_local);
134 }
135 
136 /* No longer mark "edge" as a local edge.
137  */
clear_local(struct isl_sched_edge * edge)138 static void clear_local(struct isl_sched_edge *edge)
139 {
140 	clear_type(edge, isl_edge_local);
141 }
142 
143 /* Is "edge" marked as a coincidence edge?
144  */
is_coincidence(struct isl_sched_edge * edge)145 static int is_coincidence(struct isl_sched_edge *edge)
146 {
147 	return isl_sched_edge_has_type(edge, isl_edge_coincidence);
148 }
149 
150 /* Is "edge" marked as a condition edge?
151  */
isl_sched_edge_is_condition(struct isl_sched_edge * edge)152 int isl_sched_edge_is_condition(struct isl_sched_edge *edge)
153 {
154 	return isl_sched_edge_has_type(edge, isl_edge_condition);
155 }
156 
157 /* Is "edge" marked as a conditional validity edge?
158  */
isl_sched_edge_is_conditional_validity(struct isl_sched_edge * edge)159 int isl_sched_edge_is_conditional_validity(struct isl_sched_edge *edge)
160 {
161 	return isl_sched_edge_has_type(edge, isl_edge_conditional_validity);
162 }
163 
164 /* Is "edge" of a type that can appear multiple times between
165  * the same pair of nodes?
166  *
167  * Condition edges and conditional validity edges may have tagged
168  * dependence relations, in which case an edge is added for each
169  * pair of tags.
170  */
is_multi_edge_type(struct isl_sched_edge * edge)171 static int is_multi_edge_type(struct isl_sched_edge *edge)
172 {
173 	return isl_sched_edge_is_condition(edge) ||
174 		isl_sched_edge_is_conditional_validity(edge);
175 }
176 
177 /* Initialize node_table based on the list of nodes.
178  */
graph_init_table(isl_ctx * ctx,struct isl_sched_graph * graph)179 static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
180 {
181 	int i;
182 
183 	graph->node_table = isl_hash_table_alloc(ctx, graph->n);
184 	if (!graph->node_table)
185 		return -1;
186 
187 	for (i = 0; i < graph->n; ++i) {
188 		struct isl_hash_table_entry *entry;
189 		uint32_t hash;
190 
191 		hash = isl_space_get_tuple_hash(graph->node[i].space);
192 		entry = isl_hash_table_find(ctx, graph->node_table, hash,
193 					    &node_has_tuples,
194 					    graph->node[i].space, 1);
195 		if (!entry)
196 			return -1;
197 		entry->data = &graph->node[i];
198 	}
199 
200 	return 0;
201 }
202 
203 /* Return a pointer to the node that lives within the given space,
204  * an invalid node if there is no such node, or NULL in case of error.
205  */
isl_sched_graph_find_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_space * space)206 struct isl_sched_node *isl_sched_graph_find_node(isl_ctx *ctx,
207 	struct isl_sched_graph *graph, __isl_keep isl_space *space)
208 {
209 	struct isl_hash_table_entry *entry;
210 	uint32_t hash;
211 
212 	if (!space)
213 		return NULL;
214 
215 	hash = isl_space_get_tuple_hash(space);
216 	entry = isl_hash_table_find(ctx, graph->node_table, hash,
217 				    &node_has_tuples, space, 0);
218 	if (!entry)
219 		return NULL;
220 	if (entry == isl_hash_table_entry_none)
221 		return graph->node + graph->n;
222 
223 	return entry->data;
224 }
225 
226 /* Is "node" a node in "graph"?
227  */
isl_sched_graph_is_node(struct isl_sched_graph * graph,struct isl_sched_node * node)228 int isl_sched_graph_is_node(struct isl_sched_graph *graph,
229 	struct isl_sched_node *node)
230 {
231 	return node && node >= &graph->node[0] && node < &graph->node[graph->n];
232 }
233 
edge_has_src_and_dst(const void * entry,const void * val)234 static isl_bool edge_has_src_and_dst(const void *entry, const void *val)
235 {
236 	const struct isl_sched_edge *edge = entry;
237 	const struct isl_sched_edge *temp = val;
238 
239 	return isl_bool_ok(edge->src == temp->src && edge->dst == temp->dst);
240 }
241 
242 /* Add the given edge to graph->edge_table[type].
243  */
graph_edge_table_add(isl_ctx * ctx,struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_edge * edge)244 static isl_stat graph_edge_table_add(isl_ctx *ctx,
245 	struct isl_sched_graph *graph, enum isl_edge_type type,
246 	struct isl_sched_edge *edge)
247 {
248 	struct isl_hash_table_entry *entry;
249 	uint32_t hash;
250 
251 	hash = isl_hash_init();
252 	hash = isl_hash_builtin(hash, edge->src);
253 	hash = isl_hash_builtin(hash, edge->dst);
254 	entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
255 				    &edge_has_src_and_dst, edge, 1);
256 	if (!entry)
257 		return isl_stat_error;
258 	entry->data = edge;
259 
260 	return isl_stat_ok;
261 }
262 
263 /* Add "edge" to all relevant edge tables.
264  * That is, for every type of the edge, add it to the corresponding table.
265  */
graph_edge_tables_add(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_edge * edge)266 static isl_stat graph_edge_tables_add(isl_ctx *ctx,
267 	struct isl_sched_graph *graph, struct isl_sched_edge *edge)
268 {
269 	enum isl_edge_type t;
270 
271 	for (t = isl_edge_first; t <= isl_edge_last; ++t) {
272 		if (!isl_sched_edge_has_type(edge, t))
273 			continue;
274 		if (graph_edge_table_add(ctx, graph, t, edge) < 0)
275 			return isl_stat_error;
276 	}
277 
278 	return isl_stat_ok;
279 }
280 
281 /* Allocate the edge_tables based on the maximal number of edges of
282  * each type.
283  */
graph_init_edge_tables(isl_ctx * ctx,struct isl_sched_graph * graph)284 static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
285 {
286 	int i;
287 
288 	for (i = 0; i <= isl_edge_last; ++i) {
289 		graph->edge_table[i] = isl_hash_table_alloc(ctx,
290 							    graph->max_edge[i]);
291 		if (!graph->edge_table[i])
292 			return -1;
293 	}
294 
295 	return 0;
296 }
297 
298 /* If graph->edge_table[type] contains an edge from the given source
299  * to the given destination, then return the hash table entry of this edge.
300  * Otherwise, return NULL.
301  */
graph_find_edge_entry(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst)302 static struct isl_hash_table_entry *graph_find_edge_entry(
303 	struct isl_sched_graph *graph,
304 	enum isl_edge_type type,
305 	struct isl_sched_node *src, struct isl_sched_node *dst)
306 {
307 	isl_ctx *ctx = isl_space_get_ctx(src->space);
308 	uint32_t hash;
309 	struct isl_sched_edge temp = { .src = src, .dst = dst };
310 
311 	hash = isl_hash_init();
312 	hash = isl_hash_builtin(hash, temp.src);
313 	hash = isl_hash_builtin(hash, temp.dst);
314 	return isl_hash_table_find(ctx, graph->edge_table[type], hash,
315 				    &edge_has_src_and_dst, &temp, 0);
316 }
317 
318 
319 /* If graph->edge_table[type] contains an edge from the given source
320  * to the given destination, then return this edge.
321  * Return "none" if no such edge can be found.
322  * Return NULL on error.
323  */
graph_find_edge(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst,struct isl_sched_edge * none)324 static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
325 	enum isl_edge_type type,
326 	struct isl_sched_node *src, struct isl_sched_node *dst,
327 	struct isl_sched_edge *none)
328 {
329 	struct isl_hash_table_entry *entry;
330 
331 	entry = graph_find_edge_entry(graph, type, src, dst);
332 	if (!entry)
333 		return NULL;
334 	if (entry == isl_hash_table_entry_none)
335 		return none;
336 
337 	return entry->data;
338 }
339 
340 /* Check whether the dependence graph has an edge of the given type
341  * between the given two nodes.
342  */
graph_has_edge(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst)343 static isl_bool graph_has_edge(struct isl_sched_graph *graph,
344 	enum isl_edge_type type,
345 	struct isl_sched_node *src, struct isl_sched_node *dst)
346 {
347 	struct isl_sched_edge dummy;
348 	struct isl_sched_edge *edge;
349 	isl_bool empty;
350 
351 	edge = graph_find_edge(graph, type, src, dst, &dummy);
352 	if (!edge)
353 		return isl_bool_error;
354 	if (edge == &dummy)
355 		return isl_bool_false;
356 
357 	empty = isl_map_plain_is_empty(edge->map);
358 
359 	return isl_bool_not(empty);
360 }
361 
362 /* Look for any edge with the same src, dst and map fields as "model".
363  *
364  * Return the matching edge if one can be found.
365  * Return "model" if no matching edge is found.
366  * Return NULL on error.
367  */
graph_find_matching_edge(struct isl_sched_graph * graph,struct isl_sched_edge * model)368 static struct isl_sched_edge *graph_find_matching_edge(
369 	struct isl_sched_graph *graph, struct isl_sched_edge *model)
370 {
371 	enum isl_edge_type i;
372 	struct isl_sched_edge *edge;
373 
374 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
375 		int is_equal;
376 
377 		edge = graph_find_edge(graph, i, model->src, model->dst, model);
378 		if (!edge)
379 			return NULL;
380 		if (edge == model)
381 			continue;
382 		is_equal = isl_map_plain_is_equal(model->map, edge->map);
383 		if (is_equal < 0)
384 			return NULL;
385 		if (is_equal)
386 			return edge;
387 	}
388 
389 	return model;
390 }
391 
392 /* Remove the given edge from all the edge_tables that refer to it.
393  */
graph_remove_edge(struct isl_sched_graph * graph,struct isl_sched_edge * edge)394 static isl_stat graph_remove_edge(struct isl_sched_graph *graph,
395 	struct isl_sched_edge *edge)
396 {
397 	isl_ctx *ctx = isl_map_get_ctx(edge->map);
398 	enum isl_edge_type i;
399 
400 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
401 		struct isl_hash_table_entry *entry;
402 
403 		entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
404 		if (!entry)
405 			return isl_stat_error;
406 		if (entry == isl_hash_table_entry_none)
407 			continue;
408 		if (entry->data != edge)
409 			continue;
410 		isl_hash_table_remove(ctx, graph->edge_table[i], entry);
411 	}
412 
413 	return isl_stat_ok;
414 }
415 
416 /* Check whether the dependence graph has any edge
417  * between the given two nodes.
418  */
graph_has_any_edge(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst)419 static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
420 	struct isl_sched_node *src, struct isl_sched_node *dst)
421 {
422 	enum isl_edge_type i;
423 	isl_bool r;
424 
425 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
426 		r = graph_has_edge(graph, i, src, dst);
427 		if (r < 0 || r)
428 			return r;
429 	}
430 
431 	return r;
432 }
433 
434 /* Check whether the dependence graph has a validity edge
435  * between the given two nodes.
436  *
437  * Conditional validity edges are essentially validity edges that
438  * can be ignored if the corresponding condition edges are iteration private.
439  * Here, we are only checking for the presence of validity
440  * edges, so we need to consider the conditional validity edges too.
441  * In particular, this function is used during the detection
442  * of strongly connected components and we cannot ignore
443  * conditional validity edges during this detection.
444  */
isl_sched_graph_has_validity_edge(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst)445 isl_bool isl_sched_graph_has_validity_edge(struct isl_sched_graph *graph,
446 	struct isl_sched_node *src, struct isl_sched_node *dst)
447 {
448 	isl_bool r;
449 
450 	r = graph_has_edge(graph, isl_edge_validity, src, dst);
451 	if (r < 0 || r)
452 		return r;
453 
454 	return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
455 }
456 
457 /* Perform all the required memory allocations for a schedule graph "graph"
458  * with "n_node" nodes and "n_edge" edge and initialize the corresponding
459  * fields.
460  */
graph_alloc(isl_ctx * ctx,struct isl_sched_graph * graph,int n_node,int n_edge)461 static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
462 	int n_node, int n_edge)
463 {
464 	int i;
465 
466 	graph->n = n_node;
467 	graph->n_edge = n_edge;
468 	graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
469 	graph->sorted = isl_calloc_array(ctx, int, graph->n);
470 	graph->region = isl_alloc_array(ctx,
471 					struct isl_trivial_region, graph->n);
472 	graph->edge = isl_calloc_array(ctx,
473 					struct isl_sched_edge, graph->n_edge);
474 
475 	graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
476 	graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
477 	graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
478 
479 	if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) ||
480 	    !graph->sorted)
481 		return isl_stat_error;
482 
483 	for(i = 0; i < graph->n; ++i)
484 		graph->sorted[i] = i;
485 
486 	return isl_stat_ok;
487 }
488 
489 /* Free the memory associated to node "node" in "graph".
490  * The "coincident" field is shared by nodes in a graph and its subgraph.
491  * It therefore only needs to be freed for the original dependence graph,
492  * i.e., one that is not the result of splitting.
493  */
clear_node(struct isl_sched_graph * graph,struct isl_sched_node * node)494 static void clear_node(struct isl_sched_graph *graph,
495 	struct isl_sched_node *node)
496 {
497 	isl_space_free(node->space);
498 	isl_set_free(node->hull);
499 	isl_multi_aff_free(node->compress);
500 	isl_pw_multi_aff_free(node->decompress);
501 	isl_mat_free(node->sched);
502 	isl_map_free(node->sched_map);
503 	isl_mat_free(node->indep);
504 	isl_mat_free(node->vmap);
505 	if (graph->root == graph)
506 		free(node->coincident);
507 	isl_multi_val_free(node->sizes);
508 	isl_basic_set_free(node->bounds);
509 	isl_vec_free(node->max);
510 }
511 
isl_sched_graph_free(isl_ctx * ctx,struct isl_sched_graph * graph)512 void isl_sched_graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
513 {
514 	int i;
515 
516 	isl_map_to_basic_set_free(graph->intra_hmap);
517 	isl_map_to_basic_set_free(graph->intra_hmap_param);
518 	isl_map_to_basic_set_free(graph->inter_hmap);
519 
520 	if (graph->node)
521 		for (i = 0; i < graph->n; ++i)
522 			clear_node(graph, &graph->node[i]);
523 	free(graph->node);
524 	free(graph->sorted);
525 	if (graph->edge)
526 		for (i = 0; i < graph->n_edge; ++i) {
527 			isl_map_free(graph->edge[i].map);
528 			isl_union_map_free(graph->edge[i].tagged_condition);
529 			isl_union_map_free(graph->edge[i].tagged_validity);
530 		}
531 	free(graph->edge);
532 	free(graph->region);
533 	for (i = 0; i <= isl_edge_last; ++i)
534 		isl_hash_table_free(ctx, graph->edge_table[i]);
535 	isl_hash_table_free(ctx, graph->node_table);
536 	isl_basic_set_free(graph->lp);
537 }
538 
539 /* For each "set" on which this function is called, increment
540  * graph->n by one and update graph->maxvar.
541  */
init_n_maxvar(__isl_take isl_set * set,void * user)542 static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
543 {
544 	struct isl_sched_graph *graph = user;
545 	isl_size nvar = isl_set_dim(set, isl_dim_set);
546 
547 	graph->n++;
548 	if (nvar > graph->maxvar)
549 		graph->maxvar = nvar;
550 
551 	isl_set_free(set);
552 
553 	if (nvar < 0)
554 		return isl_stat_error;
555 	return isl_stat_ok;
556 }
557 
558 /* Compute the number of rows that should be allocated for the schedule.
559  * In particular, we need one row for each variable or one row
560  * for each basic map in the dependences.
561  * Note that it is practically impossible to exhaust both
562  * the number of dependences and the number of variables.
563  */
compute_max_row(struct isl_sched_graph * graph,__isl_keep isl_schedule_constraints * sc)564 static isl_stat compute_max_row(struct isl_sched_graph *graph,
565 	__isl_keep isl_schedule_constraints *sc)
566 {
567 	int n_edge;
568 	isl_stat r;
569 	isl_union_set *domain;
570 
571 	graph->n = 0;
572 	graph->maxvar = 0;
573 	domain = isl_schedule_constraints_get_domain(sc);
574 	r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
575 	isl_union_set_free(domain);
576 	if (r < 0)
577 		return isl_stat_error;
578 	n_edge = isl_schedule_constraints_n_basic_map(sc);
579 	if (n_edge < 0)
580 		return isl_stat_error;
581 	graph->max_row = n_edge + graph->maxvar;
582 
583 	return isl_stat_ok;
584 }
585 
586 /* Does "bset" have any defining equalities for its set variables?
587  */
has_any_defining_equality(__isl_keep isl_basic_set * bset)588 static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
589 {
590 	int i;
591 	isl_size n;
592 
593 	n = isl_basic_set_dim(bset, isl_dim_set);
594 	if (n < 0)
595 		return isl_bool_error;
596 
597 	for (i = 0; i < n; ++i) {
598 		isl_bool has;
599 
600 		has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
601 							NULL);
602 		if (has < 0 || has)
603 			return has;
604 	}
605 
606 	return isl_bool_false;
607 }
608 
609 /* Set the entries of node->max to the value of the schedule_max_coefficient
610  * option, if set.
611  */
set_max_coefficient(isl_ctx * ctx,struct isl_sched_node * node)612 static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
613 {
614 	int max;
615 
616 	max = isl_options_get_schedule_max_coefficient(ctx);
617 	if (max == -1)
618 		return isl_stat_ok;
619 
620 	node->max = isl_vec_alloc(ctx, node->nvar);
621 	node->max = isl_vec_set_si(node->max, max);
622 	if (!node->max)
623 		return isl_stat_error;
624 
625 	return isl_stat_ok;
626 }
627 
628 /* Set the entries of node->max to the minimum of the schedule_max_coefficient
629  * option (if set) and half of the minimum of the sizes in the other
630  * dimensions.  Round up when computing the half such that
631  * if the minimum of the sizes is one, half of the size is taken to be one
632  * rather than zero.
633  * If the global minimum is unbounded (i.e., if both
634  * the schedule_max_coefficient is not set and the sizes in the other
635  * dimensions are unbounded), then store a negative value.
636  * If the schedule coefficient is close to the size of the instance set
637  * in another dimension, then the schedule may represent a loop
638  * coalescing transformation (especially if the coefficient
639  * in that other dimension is one).  Forcing the coefficient to be
640  * smaller than or equal to half the minimal size should avoid this
641  * situation.
642  */
compute_max_coefficient(isl_ctx * ctx,struct isl_sched_node * node)643 static isl_stat compute_max_coefficient(isl_ctx *ctx,
644 	struct isl_sched_node *node)
645 {
646 	int max;
647 	int i, j;
648 	isl_vec *v;
649 
650 	max = isl_options_get_schedule_max_coefficient(ctx);
651 	v = isl_vec_alloc(ctx, node->nvar);
652 	if (!v)
653 		return isl_stat_error;
654 
655 	for (i = 0; i < node->nvar; ++i) {
656 		isl_int_set_si(v->el[i], max);
657 		isl_int_mul_si(v->el[i], v->el[i], 2);
658 	}
659 
660 	for (i = 0; i < node->nvar; ++i) {
661 		isl_val *size;
662 
663 		size = isl_multi_val_get_val(node->sizes, i);
664 		if (!size)
665 			goto error;
666 		if (!isl_val_is_int(size)) {
667 			isl_val_free(size);
668 			continue;
669 		}
670 		for (j = 0; j < node->nvar; ++j) {
671 			if (j == i)
672 				continue;
673 			if (isl_int_is_neg(v->el[j]) ||
674 			    isl_int_gt(v->el[j], size->n))
675 				isl_int_set(v->el[j], size->n);
676 		}
677 		isl_val_free(size);
678 	}
679 
680 	for (i = 0; i < node->nvar; ++i)
681 		isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
682 
683 	node->max = v;
684 	return isl_stat_ok;
685 error:
686 	isl_vec_free(v);
687 	return isl_stat_error;
688 }
689 
690 /* Construct an identifier for node "node", which will represent "set".
691  * The name of the identifier is either "compressed" or
692  * "compressed_<name>", with <name> the name of the space of "set".
693  * The user pointer of the identifier points to "node".
694  */
construct_compressed_id(__isl_keep isl_set * set,struct isl_sched_node * node)695 static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
696 	struct isl_sched_node *node)
697 {
698 	isl_bool has_name;
699 	isl_ctx *ctx;
700 	isl_id *id;
701 	isl_printer *p;
702 	const char *name;
703 	char *id_name;
704 
705 	has_name = isl_set_has_tuple_name(set);
706 	if (has_name < 0)
707 		return NULL;
708 
709 	ctx = isl_set_get_ctx(set);
710 	if (!has_name)
711 		return isl_id_alloc(ctx, "compressed", node);
712 
713 	p = isl_printer_to_str(ctx);
714 	name = isl_set_get_tuple_name(set);
715 	p = isl_printer_print_str(p, "compressed_");
716 	p = isl_printer_print_str(p, name);
717 	id_name = isl_printer_get_str(p);
718 	isl_printer_free(p);
719 
720 	id = isl_id_alloc(ctx, id_name, node);
721 	free(id_name);
722 
723 	return id;
724 }
725 
726 /* Construct a map that isolates the variable in position "pos" in "set".
727  *
728  * That is, construct
729  *
730  *	[i_0, ..., i_pos-1, i_pos+1, ...] -> [i_pos]
731  */
isolate(__isl_take isl_set * set,int pos)732 static __isl_give isl_map *isolate(__isl_take isl_set *set, int pos)
733 {
734 	isl_map *map;
735 
736 	map = isl_set_project_onto_map(set, isl_dim_set, pos, 1);
737 	map = isl_map_project_out(map, isl_dim_in, pos, 1);
738 	return map;
739 }
740 
741 /* Compute and return the size of "set" in dimension "dim".
742  * The size is taken to be the difference in values for that variable
743  * for fixed values of the other variables.
744  * This assumes that "set" is convex.
745  * In particular, the variable is first isolated from the other variables
746  * in the range of a map
747  *
748  *	[i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
749  *
750  * and then duplicated
751  *
752  *	[i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
753  *
754  * The shared variables are then projected out and the maximal value
755  * of i_dim' - i_dim is computed.
756  */
compute_size(__isl_take isl_set * set,int dim)757 static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
758 {
759 	isl_map *map;
760 	isl_local_space *ls;
761 	isl_aff *obj;
762 	isl_val *v;
763 
764 	map = isolate(set, dim);
765 	map = isl_map_range_product(map, isl_map_copy(map));
766 	map = isl_set_unwrap(isl_map_range(map));
767 	set = isl_map_deltas(map);
768 	ls = isl_local_space_from_space(isl_set_get_space(set));
769 	obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
770 	v = isl_set_max_val(set, obj);
771 	isl_aff_free(obj);
772 	isl_set_free(set);
773 
774 	return v;
775 }
776 
777 /* Perform a compression on "node" where "hull" represents the constraints
778  * that were used to derive the compression, while "compress" and
779  * "decompress" map the original space to the compressed space and
780  * vice versa.
781  *
782  * If "node" was not compressed already, then simply store
783  * the compression information.
784  * Otherwise the "original" space is actually the result
785  * of a previous compression, which is then combined
786  * with the present compression.
787  *
788  * The dimensionality of the compressed domain is also adjusted.
789  * Other information, such as the sizes and the maximal coefficient values,
790  * has not been computed yet and therefore does not need to be adjusted.
791  */
compress_node(struct isl_sched_node * node,__isl_take isl_set * hull,__isl_take isl_multi_aff * compress,__isl_take isl_pw_multi_aff * decompress)792 static isl_stat compress_node(struct isl_sched_node *node,
793 	__isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
794 	__isl_take isl_pw_multi_aff *decompress)
795 {
796 	node->nvar = isl_multi_aff_dim(compress, isl_dim_out);
797 	if (!node->compressed) {
798 		node->compressed = 1;
799 		node->hull = hull;
800 		node->compress = compress;
801 		node->decompress = decompress;
802 	} else {
803 		hull = isl_set_preimage_multi_aff(hull,
804 					    isl_multi_aff_copy(node->compress));
805 		node->hull = isl_set_intersect(node->hull, hull);
806 		node->compress = isl_multi_aff_pullback_multi_aff(
807 						compress, node->compress);
808 		node->decompress = isl_pw_multi_aff_pullback_pw_multi_aff(
809 						node->decompress, decompress);
810 	}
811 
812 	if (!node->hull || !node->compress || !node->decompress)
813 		return isl_stat_error;
814 
815 	return isl_stat_ok;
816 }
817 
818 /* Given that dimension "pos" in "set" has a fixed value
819  * in terms of the other dimensions, (further) compress "node"
820  * by projecting out this dimension.
821  * "set" may be the result of a previous compression.
822  * "uncompressed" is the original domain (without compression).
823  *
824  * The compression function simply projects out the dimension.
825  * The decompression function adds back the dimension
826  * in the right position as an expression of the other dimensions
827  * derived from "set".
828  * As in extract_node, the compressed space has an identifier
829  * that references "node" such that each compressed space is unique and
830  * such that the node can be recovered from the compressed space.
831  *
832  * The constraint removed through the compression is added to the "hull"
833  * such that only edges that relate to the original domains
834  * are taken into account.
835  * In particular, it is obtained by composing compression and decompression and
836  * taking the relation among the variables in the range.
837  */
project_out_fixed(struct isl_sched_node * node,__isl_keep isl_set * uncompressed,__isl_take isl_set * set,int pos)838 static isl_stat project_out_fixed(struct isl_sched_node *node,
839 	__isl_keep isl_set *uncompressed, __isl_take isl_set *set, int pos)
840 {
841 	isl_id *id;
842 	isl_space *space;
843 	isl_set *domain;
844 	isl_map *map;
845 	isl_multi_aff *compress;
846 	isl_pw_multi_aff *decompress, *pma;
847 	isl_multi_pw_aff *mpa;
848 	isl_set *hull;
849 
850 	map = isolate(isl_set_copy(set), pos);
851 	pma = isl_pw_multi_aff_from_map(map);
852 	domain = isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(pma));
853 	pma = isl_pw_multi_aff_gist(pma, domain);
854 	space = isl_pw_multi_aff_get_domain_space(pma);
855 	mpa = isl_multi_pw_aff_identity(isl_space_map_from_set(space));
856 	mpa = isl_multi_pw_aff_range_splice(mpa, pos,
857 				    isl_multi_pw_aff_from_pw_multi_aff(pma));
858 	decompress = isl_pw_multi_aff_from_multi_pw_aff(mpa);
859 	space = isl_set_get_space(set);
860 	compress = isl_multi_aff_project_out_map(space, isl_dim_set, pos, 1);
861 	id = construct_compressed_id(uncompressed, node);
862 	compress = isl_multi_aff_set_tuple_id(compress, isl_dim_out, id);
863 	space = isl_space_reverse(isl_multi_aff_get_space(compress));
864 	decompress = isl_pw_multi_aff_reset_space(decompress, space);
865 	pma = isl_pw_multi_aff_pullback_multi_aff(
866 	    isl_pw_multi_aff_copy(decompress), isl_multi_aff_copy(compress));
867 	hull = isl_map_range(isl_map_from_pw_multi_aff(pma));
868 
869 	isl_set_free(set);
870 
871 	return compress_node(node, hull, compress, decompress);
872 }
873 
874 /* Compute the size of the compressed domain in each dimension and
875  * store the results in node->sizes.
876  * "uncompressed" is the original domain (without compression).
877  *
878  * First compress the domain if needed and then compute the size
879  * in each direction.
880  * If the domain is not convex, then the sizes are computed
881  * on a convex superset in order to avoid picking up sizes
882  * that are valid for the individual disjuncts, but not for
883  * the domain as a whole.
884  *
885  * If any of the sizes turns out to be zero, then this means
886  * that this dimension has a fixed value in terms of
887  * the other dimensions.  Perform an (extra) compression
888  * to remove this dimension.
889  */
compute_sizes(struct isl_sched_node * node,__isl_keep isl_set * uncompressed)890 static isl_stat compute_sizes(struct isl_sched_node *node,
891 	__isl_keep isl_set *uncompressed)
892 {
893 	int j;
894 	isl_size n;
895 	isl_multi_val *mv;
896 	isl_set *set = isl_set_copy(uncompressed);
897 
898 	if (node->compressed)
899 		set = isl_set_preimage_pw_multi_aff(set,
900 				    isl_pw_multi_aff_copy(node->decompress));
901 	set = isl_set_from_basic_set(isl_set_simple_hull(set));
902 	mv = isl_multi_val_zero(isl_set_get_space(set));
903 	n = isl_set_dim(set, isl_dim_set);
904 	if (n < 0)
905 		mv = isl_multi_val_free(mv);
906 	for (j = 0; j < n; ++j) {
907 		isl_bool is_zero;
908 		isl_val *v;
909 
910 		v = compute_size(isl_set_copy(set), j);
911 		is_zero = isl_val_is_zero(v);
912 		mv = isl_multi_val_set_val(mv, j, v);
913 		if (is_zero >= 0 && is_zero) {
914 			isl_multi_val_free(mv);
915 			if (project_out_fixed(node, uncompressed, set, j) < 0)
916 				return isl_stat_error;
917 			return compute_sizes(node, uncompressed);
918 		}
919 	}
920 	node->sizes = mv;
921 	isl_set_free(set);
922 	if (!node->sizes)
923 		return isl_stat_error;
924 	return isl_stat_ok;
925 }
926 
927 /* Compute the size of the instance set "set" of "node", after compression,
928  * as well as bounds on the corresponding coefficients, if needed.
929  *
930  * The sizes are needed when the schedule_treat_coalescing option is set.
931  * The bounds are needed when the schedule_treat_coalescing option or
932  * the schedule_max_coefficient option is set.
933  *
934  * If the schedule_treat_coalescing option is not set, then at most
935  * the bounds need to be set and this is done in set_max_coefficient.
936  * Otherwise, compute the size of the compressed domain
937  * in each direction and store the results in node->size.
938  * Finally, set the bounds on the coefficients based on the sizes
939  * and the schedule_max_coefficient option in compute_max_coefficient.
940  */
compute_sizes_and_max(isl_ctx * ctx,struct isl_sched_node * node,__isl_take isl_set * set)941 static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
942 	__isl_take isl_set *set)
943 {
944 	isl_stat r;
945 
946 	if (!isl_options_get_schedule_treat_coalescing(ctx)) {
947 		isl_set_free(set);
948 		return set_max_coefficient(ctx, node);
949 	}
950 
951 	r = compute_sizes(node, set);
952 	isl_set_free(set);
953 	if (r < 0)
954 		return isl_stat_error;
955 	return compute_max_coefficient(ctx, node);
956 }
957 
958 /* Add a new node to the graph representing the given instance set.
959  * "nvar" is the (possibly compressed) number of variables and
960  * may be smaller than then number of set variables in "set"
961  * if "compressed" is set.
962  * If "compressed" is set, then "hull" represents the constraints
963  * that were used to derive the compression, while "compress" and
964  * "decompress" map the original space to the compressed space and
965  * vice versa.
966  * If "compressed" is not set, then "hull", "compress" and "decompress"
967  * should be NULL.
968  *
969  * Compute the size of the instance set and bounds on the coefficients,
970  * if needed.
971  */
add_node(struct isl_sched_graph * graph,__isl_take isl_set * set,int nvar,int compressed,__isl_take isl_set * hull,__isl_take isl_multi_aff * compress,__isl_take isl_pw_multi_aff * decompress)972 static isl_stat add_node(struct isl_sched_graph *graph,
973 	__isl_take isl_set *set, int nvar, int compressed,
974 	__isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
975 	__isl_take isl_pw_multi_aff *decompress)
976 {
977 	isl_size nparam;
978 	isl_ctx *ctx;
979 	isl_mat *sched;
980 	isl_space *space;
981 	int *coincident;
982 	struct isl_sched_node *node;
983 
984 	nparam = isl_set_dim(set, isl_dim_param);
985 	if (nparam < 0)
986 		goto error;
987 
988 	ctx = isl_set_get_ctx(set);
989 	if (!ctx->opt->schedule_parametric)
990 		nparam = 0;
991 	sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
992 	node = &graph->node[graph->n];
993 	graph->n++;
994 	space = isl_set_get_space(set);
995 	node->space = space;
996 	node->nvar = nvar;
997 	node->nparam = nparam;
998 	node->sched = sched;
999 	node->sched_map = NULL;
1000 	coincident = isl_calloc_array(ctx, int, graph->max_row);
1001 	node->coincident = coincident;
1002 	node->compressed = compressed;
1003 	node->hull = hull;
1004 	node->compress = compress;
1005 	node->decompress = decompress;
1006 	if (compute_sizes_and_max(ctx, node, set) < 0)
1007 		return isl_stat_error;
1008 
1009 	if (!space || !sched || (graph->max_row && !coincident))
1010 		return isl_stat_error;
1011 	if (compressed && (!hull || !compress || !decompress))
1012 		return isl_stat_error;
1013 
1014 	return isl_stat_ok;
1015 error:
1016 	isl_set_free(set);
1017 	isl_set_free(hull);
1018 	isl_multi_aff_free(compress);
1019 	isl_pw_multi_aff_free(decompress);
1020 	return isl_stat_error;
1021 }
1022 
1023 /* Add a new node to the graph representing the given set.
1024  *
1025  * If any of the set variables is defined by an equality, then
1026  * we perform variable compression such that we can perform
1027  * the scheduling on the compressed domain.
1028  * In this case, an identifier is used that references the new node
1029  * such that each compressed space is unique and
1030  * such that the node can be recovered from the compressed space.
1031  */
extract_node(__isl_take isl_set * set,void * user)1032 static isl_stat extract_node(__isl_take isl_set *set, void *user)
1033 {
1034 	isl_size nvar;
1035 	isl_bool has_equality;
1036 	isl_id *id;
1037 	isl_basic_set *hull;
1038 	isl_set *hull_set;
1039 	isl_morph *morph;
1040 	isl_multi_aff *compress, *decompress_ma;
1041 	isl_pw_multi_aff *decompress;
1042 	struct isl_sched_graph *graph = user;
1043 
1044 	hull = isl_set_affine_hull(isl_set_copy(set));
1045 	hull = isl_basic_set_remove_divs(hull);
1046 	nvar = isl_set_dim(set, isl_dim_set);
1047 	has_equality = has_any_defining_equality(hull);
1048 
1049 	if (nvar < 0 || has_equality < 0)
1050 		goto error;
1051 	if (!has_equality) {
1052 		isl_basic_set_free(hull);
1053 		return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1054 	}
1055 
1056 	id = construct_compressed_id(set, &graph->node[graph->n]);
1057 	morph = isl_basic_set_variable_compression_with_id(hull, id);
1058 	isl_id_free(id);
1059 	nvar = isl_morph_ran_dim(morph, isl_dim_set);
1060 	if (nvar < 0)
1061 		set = isl_set_free(set);
1062 	compress = isl_morph_get_var_multi_aff(morph);
1063 	morph = isl_morph_inverse(morph);
1064 	decompress_ma = isl_morph_get_var_multi_aff(morph);
1065 	decompress = isl_pw_multi_aff_from_multi_aff(decompress_ma);
1066 	isl_morph_free(morph);
1067 
1068 	hull_set = isl_set_from_basic_set(hull);
1069 	return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1070 error:
1071 	isl_basic_set_free(hull);
1072 	isl_set_free(set);
1073 	return isl_stat_error;
1074 }
1075 
1076 struct isl_extract_edge_data {
1077 	enum isl_edge_type type;
1078 	struct isl_sched_graph *graph;
1079 };
1080 
1081 /* Merge edge2 into edge1, freeing the contents of edge2.
1082  * Return 0 on success and -1 on failure.
1083  *
1084  * edge1 and edge2 are assumed to have the same value for the map field.
1085  */
merge_edge(struct isl_sched_edge * edge1,struct isl_sched_edge * edge2)1086 static int merge_edge(struct isl_sched_edge *edge1,
1087 	struct isl_sched_edge *edge2)
1088 {
1089 	edge1->types |= edge2->types;
1090 	isl_map_free(edge2->map);
1091 
1092 	if (isl_sched_edge_is_condition(edge2)) {
1093 		if (!edge1->tagged_condition)
1094 			edge1->tagged_condition = edge2->tagged_condition;
1095 		else
1096 			edge1->tagged_condition =
1097 				isl_union_map_union(edge1->tagged_condition,
1098 						    edge2->tagged_condition);
1099 	}
1100 
1101 	if (isl_sched_edge_is_conditional_validity(edge2)) {
1102 		if (!edge1->tagged_validity)
1103 			edge1->tagged_validity = edge2->tagged_validity;
1104 		else
1105 			edge1->tagged_validity =
1106 				isl_union_map_union(edge1->tagged_validity,
1107 						    edge2->tagged_validity);
1108 	}
1109 
1110 	if (isl_sched_edge_is_condition(edge2) && !edge1->tagged_condition)
1111 		return -1;
1112 	if (isl_sched_edge_is_conditional_validity(edge2) &&
1113 	    !edge1->tagged_validity)
1114 		return -1;
1115 
1116 	return 0;
1117 }
1118 
1119 /* Insert dummy tags in domain and range of "map".
1120  *
1121  * In particular, if "map" is of the form
1122  *
1123  *	A -> B
1124  *
1125  * then return
1126  *
1127  *	[A -> dummy_tag] -> [B -> dummy_tag]
1128  *
1129  * where the dummy_tags are identical and equal to any dummy tags
1130  * introduced by any other call to this function.
1131  */
insert_dummy_tags(__isl_take isl_map * map)1132 static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1133 {
1134 	static char dummy;
1135 	isl_ctx *ctx;
1136 	isl_id *id;
1137 	isl_space *space;
1138 	isl_set *domain, *range;
1139 
1140 	ctx = isl_map_get_ctx(map);
1141 
1142 	id = isl_id_alloc(ctx, NULL, &dummy);
1143 	space = isl_space_params(isl_map_get_space(map));
1144 	space = isl_space_set_from_params(space);
1145 	space = isl_space_set_tuple_id(space, isl_dim_set, id);
1146 	space = isl_space_map_from_set(space);
1147 
1148 	domain = isl_map_wrap(map);
1149 	range = isl_map_wrap(isl_map_universe(space));
1150 	map = isl_map_from_domain_and_range(domain, range);
1151 	map = isl_map_zip(map);
1152 
1153 	return map;
1154 }
1155 
1156 /* Given that at least one of "src" or "dst" is compressed, return
1157  * a map between the spaces of these nodes restricted to the affine
1158  * hull that was used in the compression.
1159  */
extract_hull(struct isl_sched_node * src,struct isl_sched_node * dst)1160 static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1161 	struct isl_sched_node *dst)
1162 {
1163 	isl_set *dom, *ran;
1164 
1165 	if (src->compressed)
1166 		dom = isl_set_copy(src->hull);
1167 	else
1168 		dom = isl_set_universe(isl_space_copy(src->space));
1169 	if (dst->compressed)
1170 		ran = isl_set_copy(dst->hull);
1171 	else
1172 		ran = isl_set_universe(isl_space_copy(dst->space));
1173 
1174 	return isl_map_from_domain_and_range(dom, ran);
1175 }
1176 
1177 /* Intersect the domains of the nested relations in domain and range
1178  * of "tagged" with "map".
1179  */
map_intersect_domains(__isl_take isl_map * tagged,__isl_keep isl_map * map)1180 static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1181 	__isl_keep isl_map *map)
1182 {
1183 	isl_set *set;
1184 
1185 	tagged = isl_map_zip(tagged);
1186 	set = isl_map_wrap(isl_map_copy(map));
1187 	tagged = isl_map_intersect_domain(tagged, set);
1188 	tagged = isl_map_zip(tagged);
1189 	return tagged;
1190 }
1191 
1192 /* Return a pointer to the node that lives in the domain space of "map",
1193  * an invalid node if there is no such node, or NULL in case of error.
1194  */
find_domain_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_map * map)1195 static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1196 	struct isl_sched_graph *graph, __isl_keep isl_map *map)
1197 {
1198 	struct isl_sched_node *node;
1199 	isl_space *space;
1200 
1201 	space = isl_space_domain(isl_map_get_space(map));
1202 	node = isl_sched_graph_find_node(ctx, graph, space);
1203 	isl_space_free(space);
1204 
1205 	return node;
1206 }
1207 
1208 /* Return a pointer to the node that lives in the range space of "map",
1209  * an invalid node if there is no such node, or NULL in case of error.
1210  */
find_range_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_map * map)1211 static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1212 	struct isl_sched_graph *graph, __isl_keep isl_map *map)
1213 {
1214 	struct isl_sched_node *node;
1215 	isl_space *space;
1216 
1217 	space = isl_space_range(isl_map_get_space(map));
1218 	node = isl_sched_graph_find_node(ctx, graph, space);
1219 	isl_space_free(space);
1220 
1221 	return node;
1222 }
1223 
1224 /* Refrain from adding a new edge based on "map".
1225  * Instead, just free the map.
1226  * "tagged" is either a copy of "map" with additional tags or NULL.
1227  */
skip_edge(__isl_take isl_map * map,__isl_take isl_map * tagged)1228 static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
1229 {
1230 	isl_map_free(map);
1231 	isl_map_free(tagged);
1232 
1233 	return isl_stat_ok;
1234 }
1235 
1236 /* Add a new edge to the graph based on the given map
1237  * and add it to data->graph->edge_table[data->type].
1238  * If a dependence relation of a given type happens to be identical
1239  * to one of the dependence relations of a type that was added before,
1240  * then we don't create a new edge, but instead mark the original edge
1241  * as also representing a dependence of the current type.
1242  *
1243  * Edges of type isl_edge_condition or isl_edge_conditional_validity
1244  * may be specified as "tagged" dependence relations.  That is, "map"
1245  * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1246  * the dependence on iterations and a and b are tags.
1247  * edge->map is set to the relation containing the elements i -> j,
1248  * while edge->tagged_condition and edge->tagged_validity contain
1249  * the union of all the "map" relations
1250  * for which extract_edge is called that result in the same edge->map.
1251  *
1252  * If the source or the destination node is compressed, then
1253  * intersect both "map" and "tagged" with the constraints that
1254  * were used to construct the compression.
1255  * This ensures that there are no schedule constraints defined
1256  * outside of these domains, while the scheduler no longer has
1257  * any control over those outside parts.
1258  */
extract_edge(__isl_take isl_map * map,void * user)1259 static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1260 {
1261 	isl_bool empty;
1262 	isl_ctx *ctx = isl_map_get_ctx(map);
1263 	struct isl_extract_edge_data *data = user;
1264 	struct isl_sched_graph *graph = data->graph;
1265 	struct isl_sched_node *src, *dst;
1266 	struct isl_sched_edge *edge;
1267 	isl_map *tagged = NULL;
1268 
1269 	if (data->type == isl_edge_condition ||
1270 	    data->type == isl_edge_conditional_validity) {
1271 		if (isl_map_can_zip(map)) {
1272 			tagged = isl_map_copy(map);
1273 			map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1274 		} else {
1275 			tagged = insert_dummy_tags(isl_map_copy(map));
1276 		}
1277 	}
1278 
1279 	src = find_domain_node(ctx, graph, map);
1280 	dst = find_range_node(ctx, graph, map);
1281 
1282 	if (!src || !dst)
1283 		goto error;
1284 	if (!isl_sched_graph_is_node(graph, src) ||
1285 	    !isl_sched_graph_is_node(graph, dst))
1286 		return skip_edge(map, tagged);
1287 
1288 	if (src->compressed || dst->compressed) {
1289 		isl_map *hull;
1290 		hull = extract_hull(src, dst);
1291 		if (tagged)
1292 			tagged = map_intersect_domains(tagged, hull);
1293 		map = isl_map_intersect(map, hull);
1294 	}
1295 
1296 	empty = isl_map_plain_is_empty(map);
1297 	if (empty < 0)
1298 		goto error;
1299 	if (empty)
1300 		return skip_edge(map, tagged);
1301 
1302 	graph->edge[graph->n_edge].src = src;
1303 	graph->edge[graph->n_edge].dst = dst;
1304 	graph->edge[graph->n_edge].map = map;
1305 	graph->edge[graph->n_edge].types = 0;
1306 	graph->edge[graph->n_edge].tagged_condition = NULL;
1307 	graph->edge[graph->n_edge].tagged_validity = NULL;
1308 	set_type(&graph->edge[graph->n_edge], data->type);
1309 	if (data->type == isl_edge_condition)
1310 		graph->edge[graph->n_edge].tagged_condition =
1311 					isl_union_map_from_map(tagged);
1312 	if (data->type == isl_edge_conditional_validity)
1313 		graph->edge[graph->n_edge].tagged_validity =
1314 					isl_union_map_from_map(tagged);
1315 
1316 	edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1317 	if (!edge) {
1318 		graph->n_edge++;
1319 		return isl_stat_error;
1320 	}
1321 	if (edge == &graph->edge[graph->n_edge])
1322 		return graph_edge_table_add(ctx, graph, data->type,
1323 				    &graph->edge[graph->n_edge++]);
1324 
1325 	if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1326 		return isl_stat_error;
1327 
1328 	return graph_edge_table_add(ctx, graph, data->type, edge);
1329 error:
1330 	isl_map_free(map);
1331 	isl_map_free(tagged);
1332 	return isl_stat_error;
1333 }
1334 
1335 /* Initialize the schedule graph "graph" from the schedule constraints "sc".
1336  *
1337  * The context is included in the domain before the nodes of
1338  * the graphs are extracted in order to be able to exploit
1339  * any possible additional equalities.
1340  * Note that this intersection is only performed locally here.
1341  */
isl_sched_graph_init(struct isl_sched_graph * graph,__isl_keep isl_schedule_constraints * sc)1342 isl_stat isl_sched_graph_init(struct isl_sched_graph *graph,
1343 	__isl_keep isl_schedule_constraints *sc)
1344 {
1345 	isl_ctx *ctx;
1346 	isl_union_set *domain;
1347 	isl_union_map *c;
1348 	struct isl_extract_edge_data data;
1349 	enum isl_edge_type i;
1350 	isl_stat r;
1351 	isl_size n;
1352 
1353 	if (!sc)
1354 		return isl_stat_error;
1355 
1356 	ctx = isl_schedule_constraints_get_ctx(sc);
1357 
1358 	domain = isl_schedule_constraints_get_domain(sc);
1359 	n = isl_union_set_n_set(domain);
1360 	graph->n = n;
1361 	isl_union_set_free(domain);
1362 	if (n < 0)
1363 		return isl_stat_error;
1364 
1365 	n = isl_schedule_constraints_n_map(sc);
1366 	if (n < 0 || graph_alloc(ctx, graph, graph->n, n) < 0)
1367 		return isl_stat_error;
1368 
1369 	if (compute_max_row(graph, sc) < 0)
1370 		return isl_stat_error;
1371 	graph->root = graph;
1372 	graph->n = 0;
1373 	domain = isl_schedule_constraints_get_domain(sc);
1374 	domain = isl_union_set_intersect_params(domain,
1375 				    isl_schedule_constraints_get_context(sc));
1376 	r = isl_union_set_foreach_set(domain, &extract_node, graph);
1377 	isl_union_set_free(domain);
1378 	if (r < 0)
1379 		return isl_stat_error;
1380 	if (graph_init_table(ctx, graph) < 0)
1381 		return isl_stat_error;
1382 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
1383 		isl_size n;
1384 
1385 		c = isl_schedule_constraints_get(sc, i);
1386 		n = isl_union_map_n_map(c);
1387 		graph->max_edge[i] = n;
1388 		isl_union_map_free(c);
1389 		if (n < 0)
1390 			return isl_stat_error;
1391 	}
1392 	if (graph_init_edge_tables(ctx, graph) < 0)
1393 		return isl_stat_error;
1394 	graph->n_edge = 0;
1395 	data.graph = graph;
1396 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
1397 		isl_stat r;
1398 
1399 		data.type = i;
1400 		c = isl_schedule_constraints_get(sc, i);
1401 		r = isl_union_map_foreach_map(c, &extract_edge, &data);
1402 		isl_union_map_free(c);
1403 		if (r < 0)
1404 			return isl_stat_error;
1405 	}
1406 
1407 	return isl_stat_ok;
1408 }
1409 
1410 /* Check whether there is any dependence from node[j] to node[i]
1411  * or from node[i] to node[j].
1412  */
node_follows_weak(int i,int j,void * user)1413 static isl_bool node_follows_weak(int i, int j, void *user)
1414 {
1415 	isl_bool f;
1416 	struct isl_sched_graph *graph = user;
1417 
1418 	f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1419 	if (f < 0 || f)
1420 		return f;
1421 	return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1422 }
1423 
1424 /* Check whether there is a (conditional) validity dependence from node[j]
1425  * to node[i], forcing node[i] to follow node[j].
1426  */
node_follows_strong(int i,int j,void * user)1427 static isl_bool node_follows_strong(int i, int j, void *user)
1428 {
1429 	struct isl_sched_graph *graph = user;
1430 
1431 	return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
1432 							&graph->node[i]);
1433 }
1434 
1435 /* Use Tarjan's algorithm for computing the strongly connected components
1436  * in the dependence graph only considering those edges defined by "follows".
1437  */
isl_sched_graph_detect_ccs(isl_ctx * ctx,struct isl_sched_graph * graph,isl_bool (* follows)(int i,int j,void * user))1438 isl_stat isl_sched_graph_detect_ccs(isl_ctx *ctx,
1439 	struct isl_sched_graph *graph,
1440 	isl_bool (*follows)(int i, int j, void *user))
1441 {
1442 	int i, n;
1443 	struct isl_tarjan_graph *g = NULL;
1444 
1445 	g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1446 	if (!g)
1447 		return isl_stat_error;
1448 
1449 	graph->scc = 0;
1450 	i = 0;
1451 	n = graph->n;
1452 	while (n) {
1453 		while (g->order[i] != -1) {
1454 			graph->node[g->order[i]].scc = graph->scc;
1455 			--n;
1456 			++i;
1457 		}
1458 		++i;
1459 		graph->scc++;
1460 	}
1461 
1462 	isl_tarjan_graph_free(g);
1463 
1464 	return isl_stat_ok;
1465 }
1466 
1467 /* Apply Tarjan's algorithm to detect the strongly connected components
1468  * in the dependence graph.
1469  * Only consider the (conditional) validity dependences and clear "weak".
1470  */
detect_sccs(isl_ctx * ctx,struct isl_sched_graph * graph)1471 static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1472 {
1473 	graph->weak = 0;
1474 	return isl_sched_graph_detect_ccs(ctx, graph, &node_follows_strong);
1475 }
1476 
1477 /* Apply Tarjan's algorithm to detect the (weakly) connected components
1478  * in the dependence graph.
1479  * Consider all dependences and set "weak".
1480  */
detect_wccs(isl_ctx * ctx,struct isl_sched_graph * graph)1481 static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1482 {
1483 	graph->weak = 1;
1484 	return isl_sched_graph_detect_ccs(ctx, graph, &node_follows_weak);
1485 }
1486 
cmp_scc(const void * a,const void * b,void * data)1487 static int cmp_scc(const void *a, const void *b, void *data)
1488 {
1489 	struct isl_sched_graph *graph = data;
1490 	const int *i1 = a;
1491 	const int *i2 = b;
1492 
1493 	return graph->node[*i1].scc - graph->node[*i2].scc;
1494 }
1495 
1496 /* Sort the elements of graph->sorted according to the corresponding SCCs.
1497  */
sort_sccs(struct isl_sched_graph * graph)1498 static int sort_sccs(struct isl_sched_graph *graph)
1499 {
1500 	return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1501 }
1502 
1503 /* Return a non-parametric set in the compressed space of "node" that is
1504  * bounded by the size in each direction
1505  *
1506  *	{ [x] : -S_i <= x_i <= S_i }
1507  *
1508  * If S_i is infinity in direction i, then there are no constraints
1509  * in that direction.
1510  *
1511  * Cache the result in node->bounds.
1512  */
get_size_bounds(struct isl_sched_node * node)1513 static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
1514 {
1515 	isl_space *space;
1516 	isl_basic_set *bounds;
1517 	int i;
1518 
1519 	if (node->bounds)
1520 		return isl_basic_set_copy(node->bounds);
1521 
1522 	if (node->compressed)
1523 		space = isl_pw_multi_aff_get_domain_space(node->decompress);
1524 	else
1525 		space = isl_space_copy(node->space);
1526 	space = isl_space_drop_all_params(space);
1527 	bounds = isl_basic_set_universe(space);
1528 
1529 	for (i = 0; i < node->nvar; ++i) {
1530 		isl_val *size;
1531 
1532 		size = isl_multi_val_get_val(node->sizes, i);
1533 		if (!size)
1534 			return isl_basic_set_free(bounds);
1535 		if (!isl_val_is_int(size)) {
1536 			isl_val_free(size);
1537 			continue;
1538 		}
1539 		bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
1540 							isl_val_copy(size));
1541 		bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
1542 							isl_val_neg(size));
1543 	}
1544 
1545 	node->bounds = isl_basic_set_copy(bounds);
1546 	return bounds;
1547 }
1548 
1549 /* Compress the dependence relation "map", if needed, i.e.,
1550  * when the source node "src" and/or the destination node "dst"
1551  * has been compressed.
1552  */
compress(__isl_take isl_map * map,struct isl_sched_node * src,struct isl_sched_node * dst)1553 static __isl_give isl_map *compress(__isl_take isl_map *map,
1554 	struct isl_sched_node *src, struct isl_sched_node *dst)
1555 {
1556 	if (src->compressed)
1557 		map = isl_map_preimage_domain_pw_multi_aff(map,
1558 					isl_pw_multi_aff_copy(src->decompress));
1559 	if (dst->compressed)
1560 		map = isl_map_preimage_range_pw_multi_aff(map,
1561 					isl_pw_multi_aff_copy(dst->decompress));
1562 	return map;
1563 }
1564 
1565 /* Drop some constraints from "delta" that could be exploited
1566  * to construct loop coalescing schedules.
1567  * In particular, drop those constraint that bound the difference
1568  * to the size of the domain.
1569  * First project out the parameters to improve the effectiveness.
1570  */
drop_coalescing_constraints(__isl_take isl_set * delta,struct isl_sched_node * node)1571 static __isl_give isl_set *drop_coalescing_constraints(
1572 	__isl_take isl_set *delta, struct isl_sched_node *node)
1573 {
1574 	isl_size nparam;
1575 	isl_basic_set *bounds;
1576 
1577 	nparam = isl_set_dim(delta, isl_dim_param);
1578 	if (nparam < 0)
1579 		return isl_set_free(delta);
1580 
1581 	bounds = get_size_bounds(node);
1582 
1583 	delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
1584 	delta = isl_set_remove_divs(delta);
1585 	delta = isl_set_plain_gist_basic_set(delta, bounds);
1586 	return delta;
1587 }
1588 
1589 /* Given a dependence relation R from "node" to itself,
1590  * construct the set of coefficients of valid constraints for elements
1591  * in that dependence relation.
1592  * In particular, the result contains tuples of coefficients
1593  * c_0, c_n, c_x such that
1594  *
1595  *	c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1596  *
1597  * or, equivalently,
1598  *
1599  *	c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1600  *
1601  * We choose here to compute the dual of delta R.
1602  * Alternatively, we could have computed the dual of R, resulting
1603  * in a set of tuples c_0, c_n, c_x, c_y, and then
1604  * plugged in (c_0, c_n, c_x, -c_x).
1605  *
1606  * If "need_param" is set, then the resulting coefficients effectively
1607  * include coefficients for the parameters c_n.  Otherwise, they may
1608  * have been projected out already.
1609  * Since the constraints may be different for these two cases,
1610  * they are stored in separate caches.
1611  * In particular, if no parameter coefficients are required and
1612  * the schedule_treat_coalescing option is set, then the parameters
1613  * are projected out and some constraints that could be exploited
1614  * to construct coalescing schedules are removed before the dual
1615  * is computed.
1616  *
1617  * If "node" has been compressed, then the dependence relation
1618  * is also compressed before the set of coefficients is computed.
1619  */
intra_coefficients(struct isl_sched_graph * graph,struct isl_sched_node * node,__isl_take isl_map * map,int need_param)1620 static __isl_give isl_basic_set *intra_coefficients(
1621 	struct isl_sched_graph *graph, struct isl_sched_node *node,
1622 	__isl_take isl_map *map, int need_param)
1623 {
1624 	isl_ctx *ctx;
1625 	isl_set *delta;
1626 	isl_map *key;
1627 	isl_basic_set *coef;
1628 	isl_maybe_isl_basic_set m;
1629 	isl_map_to_basic_set **hmap = &graph->intra_hmap;
1630 	int treat;
1631 
1632 	if (!map)
1633 		return NULL;
1634 
1635 	ctx = isl_map_get_ctx(map);
1636 	treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx);
1637 	if (!treat)
1638 		hmap = &graph->intra_hmap_param;
1639 	m = isl_map_to_basic_set_try_get(*hmap, map);
1640 	if (m.valid < 0 || m.valid) {
1641 		isl_map_free(map);
1642 		return m.value;
1643 	}
1644 
1645 	key = isl_map_copy(map);
1646 	map = compress(map, node, node);
1647 	delta = isl_map_deltas(map);
1648 	if (treat)
1649 		delta = drop_coalescing_constraints(delta, node);
1650 	delta = isl_set_remove_divs(delta);
1651 	coef = isl_set_coefficients(delta);
1652 	*hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
1653 
1654 	return coef;
1655 }
1656 
1657 /* Given a dependence relation R, construct the set of coefficients
1658  * of valid constraints for elements in that dependence relation.
1659  * In particular, the result contains tuples of coefficients
1660  * c_0, c_n, c_x, c_y such that
1661  *
1662  *	c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1663  *
1664  * If the source or destination nodes of "edge" have been compressed,
1665  * then the dependence relation is also compressed before
1666  * the set of coefficients is computed.
1667  */
inter_coefficients(struct isl_sched_graph * graph,struct isl_sched_edge * edge,__isl_take isl_map * map)1668 static __isl_give isl_basic_set *inter_coefficients(
1669 	struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1670 	__isl_take isl_map *map)
1671 {
1672 	isl_set *set;
1673 	isl_map *key;
1674 	isl_basic_set *coef;
1675 	isl_maybe_isl_basic_set m;
1676 
1677 	m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1678 	if (m.valid < 0 || m.valid) {
1679 		isl_map_free(map);
1680 		return m.value;
1681 	}
1682 
1683 	key = isl_map_copy(map);
1684 	map = compress(map, edge->src, edge->dst);
1685 	set = isl_map_wrap(isl_map_remove_divs(map));
1686 	coef = isl_set_coefficients(set);
1687 	graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1688 					isl_basic_set_copy(coef));
1689 
1690 	return coef;
1691 }
1692 
1693 /* Return the position of the coefficients of the variables in
1694  * the coefficients constraints "coef".
1695  *
1696  * The space of "coef" is of the form
1697  *
1698  *	{ coefficients[[cst, params] -> S] }
1699  *
1700  * Return the position of S.
1701  */
coef_var_offset(__isl_keep isl_basic_set * coef)1702 static isl_size coef_var_offset(__isl_keep isl_basic_set *coef)
1703 {
1704 	isl_size offset;
1705 	isl_space *space;
1706 
1707 	space = isl_space_unwrap(isl_basic_set_get_space(coef));
1708 	offset = isl_space_dim(space, isl_dim_in);
1709 	isl_space_free(space);
1710 
1711 	return offset;
1712 }
1713 
1714 /* Return the offset of the coefficient of the constant term of "node"
1715  * within the (I)LP.
1716  *
1717  * Within each node, the coefficients have the following order:
1718  *	- positive and negative parts of c_i_x
1719  *	- c_i_n (if parametric)
1720  *	- c_i_0
1721  */
node_cst_coef_offset(struct isl_sched_node * node)1722 static int node_cst_coef_offset(struct isl_sched_node *node)
1723 {
1724 	return node->start + 2 * node->nvar + node->nparam;
1725 }
1726 
1727 /* Return the offset of the coefficients of the parameters of "node"
1728  * within the (I)LP.
1729  *
1730  * Within each node, the coefficients have the following order:
1731  *	- positive and negative parts of c_i_x
1732  *	- c_i_n (if parametric)
1733  *	- c_i_0
1734  */
node_par_coef_offset(struct isl_sched_node * node)1735 static int node_par_coef_offset(struct isl_sched_node *node)
1736 {
1737 	return node->start + 2 * node->nvar;
1738 }
1739 
1740 /* Return the offset of the coefficients of the variables of "node"
1741  * within the (I)LP.
1742  *
1743  * Within each node, the coefficients have the following order:
1744  *	- positive and negative parts of c_i_x
1745  *	- c_i_n (if parametric)
1746  *	- c_i_0
1747  */
node_var_coef_offset(struct isl_sched_node * node)1748 static int node_var_coef_offset(struct isl_sched_node *node)
1749 {
1750 	return node->start;
1751 }
1752 
1753 /* Return the position of the pair of variables encoding
1754  * coefficient "i" of "node".
1755  *
1756  * The order of these variable pairs is the opposite of
1757  * that of the coefficients, with 2 variables per coefficient.
1758  */
node_var_coef_pos(struct isl_sched_node * node,int i)1759 static int node_var_coef_pos(struct isl_sched_node *node, int i)
1760 {
1761 	return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
1762 }
1763 
1764 /* Construct an isl_dim_map for mapping constraints on coefficients
1765  * for "node" to the corresponding positions in graph->lp.
1766  * "offset" is the offset of the coefficients for the variables
1767  * in the input constraints.
1768  * "s" is the sign of the mapping.
1769  *
1770  * The input constraints are given in terms of the coefficients
1771  * (c_0, c_x) or (c_0, c_n, c_x).
1772  * The mapping produced by this function essentially plugs in
1773  * (0, c_i_x^+ - c_i_x^-) if s = 1 and
1774  * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
1775  * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1776  * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1777  * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1778  * Furthermore, the order of these pairs is the opposite of that
1779  * of the corresponding coefficients.
1780  *
1781  * The caller can extend the mapping to also map the other coefficients
1782  * (and therefore not plug in 0).
1783  */
intra_dim_map(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * node,int offset,int s)1784 static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1785 	struct isl_sched_graph *graph, struct isl_sched_node *node,
1786 	int offset, int s)
1787 {
1788 	int pos;
1789 	isl_size total;
1790 	isl_dim_map *dim_map;
1791 
1792 	total = isl_basic_set_dim(graph->lp, isl_dim_all);
1793 	if (!node || total < 0)
1794 		return NULL;
1795 
1796 	pos = node_var_coef_pos(node, 0);
1797 	dim_map = isl_dim_map_alloc(ctx, total);
1798 	isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
1799 	isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
1800 
1801 	return dim_map;
1802 }
1803 
1804 /* Construct an isl_dim_map for mapping constraints on coefficients
1805  * for "src" (node i) and "dst" (node j) to the corresponding positions
1806  * in graph->lp.
1807  * "offset" is the offset of the coefficients for the variables of "src"
1808  * in the input constraints.
1809  * "s" is the sign of the mapping.
1810  *
1811  * The input constraints are given in terms of the coefficients
1812  * (c_0, c_n, c_x, c_y).
1813  * The mapping produced by this function essentially plugs in
1814  * (c_j_0 - c_i_0, c_j_n - c_i_n,
1815  *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
1816  * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1817  *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
1818  * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1819  * Furthermore, the order of these pairs is the opposite of that
1820  * of the corresponding coefficients.
1821  *
1822  * The caller can further extend the mapping.
1823  */
inter_dim_map(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst,int offset,int s)1824 static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1825 	struct isl_sched_graph *graph, struct isl_sched_node *src,
1826 	struct isl_sched_node *dst, int offset, int s)
1827 {
1828 	int pos;
1829 	isl_size total;
1830 	isl_dim_map *dim_map;
1831 
1832 	total = isl_basic_set_dim(graph->lp, isl_dim_all);
1833 	if (!src || !dst || total < 0)
1834 		return NULL;
1835 
1836 	dim_map = isl_dim_map_alloc(ctx, total);
1837 
1838 	pos = node_cst_coef_offset(dst);
1839 	isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
1840 	pos = node_par_coef_offset(dst);
1841 	isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
1842 	pos = node_var_coef_pos(dst, 0);
1843 	isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
1844 			  dst->nvar, -s);
1845 	isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
1846 			  dst->nvar, s);
1847 
1848 	pos = node_cst_coef_offset(src);
1849 	isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
1850 	pos = node_par_coef_offset(src);
1851 	isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
1852 	pos = node_var_coef_pos(src, 0);
1853 	isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
1854 	isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
1855 
1856 	return dim_map;
1857 }
1858 
1859 /* Add the constraints from "src" to "dst" using "dim_map",
1860  * after making sure there is enough room in "dst" for the extra constraints.
1861  */
add_constraints_dim_map(__isl_take isl_basic_set * dst,__isl_take isl_basic_set * src,__isl_take isl_dim_map * dim_map)1862 static __isl_give isl_basic_set *add_constraints_dim_map(
1863 	__isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
1864 	__isl_take isl_dim_map *dim_map)
1865 {
1866 	isl_size n_eq, n_ineq;
1867 
1868 	n_eq = isl_basic_set_n_equality(src);
1869 	n_ineq = isl_basic_set_n_inequality(src);
1870 	if (n_eq < 0 || n_ineq < 0)
1871 		dst = isl_basic_set_free(dst);
1872 	dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
1873 	dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
1874 	return dst;
1875 }
1876 
1877 /* Add constraints to graph->lp that force validity for the given
1878  * dependence from a node i to itself.
1879  * That is, add constraints that enforce
1880  *
1881  *	(c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1882  *	= c_i_x (y - x) >= 0
1883  *
1884  * for each (x,y) in R.
1885  * We obtain general constraints on coefficients (c_0, c_x)
1886  * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
1887  * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1888  * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1889  * Note that the result of intra_coefficients may also contain
1890  * parameter coefficients c_n, in which case 0 is plugged in for them as well.
1891  */
add_intra_validity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge)1892 static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1893 	struct isl_sched_edge *edge)
1894 {
1895 	isl_size offset;
1896 	isl_map *map = isl_map_copy(edge->map);
1897 	isl_ctx *ctx = isl_map_get_ctx(map);
1898 	isl_dim_map *dim_map;
1899 	isl_basic_set *coef;
1900 	struct isl_sched_node *node = edge->src;
1901 
1902 	coef = intra_coefficients(graph, node, map, 0);
1903 
1904 	offset = coef_var_offset(coef);
1905 	if (offset < 0)
1906 		coef = isl_basic_set_free(coef);
1907 	if (!coef)
1908 		return isl_stat_error;
1909 
1910 	dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1911 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1912 
1913 	return isl_stat_ok;
1914 }
1915 
1916 /* Add constraints to graph->lp that force validity for the given
1917  * dependence from node i to node j.
1918  * That is, add constraints that enforce
1919  *
1920  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1921  *
1922  * for each (x,y) in R.
1923  * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1924  * of valid constraints for R and then plug in
1925  * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
1926  * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1927  * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1928  */
add_inter_validity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge)1929 static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1930 	struct isl_sched_edge *edge)
1931 {
1932 	isl_size offset;
1933 	isl_map *map;
1934 	isl_ctx *ctx;
1935 	isl_dim_map *dim_map;
1936 	isl_basic_set *coef;
1937 	struct isl_sched_node *src = edge->src;
1938 	struct isl_sched_node *dst = edge->dst;
1939 
1940 	if (!graph->lp)
1941 		return isl_stat_error;
1942 
1943 	map = isl_map_copy(edge->map);
1944 	ctx = isl_map_get_ctx(map);
1945 	coef = inter_coefficients(graph, edge, map);
1946 
1947 	offset = coef_var_offset(coef);
1948 	if (offset < 0)
1949 		coef = isl_basic_set_free(coef);
1950 	if (!coef)
1951 		return isl_stat_error;
1952 
1953 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1954 
1955 	edge->start = graph->lp->n_ineq;
1956 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1957 	if (!graph->lp)
1958 		return isl_stat_error;
1959 	edge->end = graph->lp->n_ineq;
1960 
1961 	return isl_stat_ok;
1962 }
1963 
1964 /* Add constraints to graph->lp that bound the dependence distance for the given
1965  * dependence from a node i to itself.
1966  * If s = 1, we add the constraint
1967  *
1968  *	c_i_x (y - x) <= m_0 + m_n n
1969  *
1970  * or
1971  *
1972  *	-c_i_x (y - x) + m_0 + m_n n >= 0
1973  *
1974  * for each (x,y) in R.
1975  * If s = -1, we add the constraint
1976  *
1977  *	-c_i_x (y - x) <= m_0 + m_n n
1978  *
1979  * or
1980  *
1981  *	c_i_x (y - x) + m_0 + m_n n >= 0
1982  *
1983  * for each (x,y) in R.
1984  * We obtain general constraints on coefficients (c_0, c_n, c_x)
1985  * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
1986  * with each coefficient (except m_0) represented as a pair of non-negative
1987  * coefficients.
1988  *
1989  *
1990  * If "local" is set, then we add constraints
1991  *
1992  *	c_i_x (y - x) <= 0
1993  *
1994  * or
1995  *
1996  *	-c_i_x (y - x) <= 0
1997  *
1998  * instead, forcing the dependence distance to be (less than or) equal to 0.
1999  * That is, we plug in (0, 0, -s * c_i_x),
2000  * intra_coefficients is not required to have c_n in its result when
2001  * "local" is set.  If they are missing, then (0, -s * c_i_x) is plugged in.
2002  * Note that dependences marked local are treated as validity constraints
2003  * by add_all_validity_constraints and therefore also have
2004  * their distances bounded by 0 from below.
2005  */
add_intra_proximity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,int s,int local)2006 static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
2007 	struct isl_sched_edge *edge, int s, int local)
2008 {
2009 	isl_size offset;
2010 	isl_size nparam;
2011 	isl_map *map = isl_map_copy(edge->map);
2012 	isl_ctx *ctx = isl_map_get_ctx(map);
2013 	isl_dim_map *dim_map;
2014 	isl_basic_set *coef;
2015 	struct isl_sched_node *node = edge->src;
2016 
2017 	coef = intra_coefficients(graph, node, map, !local);
2018 	nparam = isl_space_dim(node->space, isl_dim_param);
2019 
2020 	offset = coef_var_offset(coef);
2021 	if (nparam < 0 || offset < 0)
2022 		coef = isl_basic_set_free(coef);
2023 	if (!coef)
2024 		return isl_stat_error;
2025 
2026 	dim_map = intra_dim_map(ctx, graph, node, offset, -s);
2027 
2028 	if (!local) {
2029 		isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2030 		isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2031 		isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2032 	}
2033 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2034 
2035 	return isl_stat_ok;
2036 }
2037 
2038 /* Add constraints to graph->lp that bound the dependence distance for the given
2039  * dependence from node i to node j.
2040  * If s = 1, we add the constraint
2041  *
2042  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2043  *		<= m_0 + m_n n
2044  *
2045  * or
2046  *
2047  *	-(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2048  *		m_0 + m_n n >= 0
2049  *
2050  * for each (x,y) in R.
2051  * If s = -1, we add the constraint
2052  *
2053  *	-((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2054  *		<= m_0 + m_n n
2055  *
2056  * or
2057  *
2058  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2059  *		m_0 + m_n n >= 0
2060  *
2061  * for each (x,y) in R.
2062  * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2063  * of valid constraints for R and then plug in
2064  * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2065  *  s*c_i_x, -s*c_j_x)
2066  * with each coefficient (except m_0, c_*_0 and c_*_n)
2067  * represented as a pair of non-negative coefficients.
2068  *
2069  *
2070  * If "local" is set (and s = 1), then we add constraints
2071  *
2072  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2073  *
2074  * or
2075  *
2076  *	-((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2077  *
2078  * instead, forcing the dependence distance to be (less than or) equal to 0.
2079  * That is, we plug in
2080  * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2081  * Note that dependences marked local are treated as validity constraints
2082  * by add_all_validity_constraints and therefore also have
2083  * their distances bounded by 0 from below.
2084  */
add_inter_proximity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,int s,int local)2085 static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2086 	struct isl_sched_edge *edge, int s, int local)
2087 {
2088 	isl_size offset;
2089 	isl_size nparam;
2090 	isl_map *map = isl_map_copy(edge->map);
2091 	isl_ctx *ctx = isl_map_get_ctx(map);
2092 	isl_dim_map *dim_map;
2093 	isl_basic_set *coef;
2094 	struct isl_sched_node *src = edge->src;
2095 	struct isl_sched_node *dst = edge->dst;
2096 
2097 	coef = inter_coefficients(graph, edge, map);
2098 	nparam = isl_space_dim(src->space, isl_dim_param);
2099 
2100 	offset = coef_var_offset(coef);
2101 	if (nparam < 0 || offset < 0)
2102 		coef = isl_basic_set_free(coef);
2103 	if (!coef)
2104 		return isl_stat_error;
2105 
2106 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2107 
2108 	if (!local) {
2109 		isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2110 		isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2111 		isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2112 	}
2113 
2114 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2115 
2116 	return isl_stat_ok;
2117 }
2118 
2119 /* Should the distance over "edge" be forced to zero?
2120  * That is, is it marked as a local edge?
2121  * If "use_coincidence" is set, then coincidence edges are treated
2122  * as local edges.
2123  */
force_zero(struct isl_sched_edge * edge,int use_coincidence)2124 static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
2125 {
2126 	return is_local(edge) || (use_coincidence && is_coincidence(edge));
2127 }
2128 
2129 /* Add all validity constraints to graph->lp.
2130  *
2131  * An edge that is forced to be local needs to have its dependence
2132  * distances equal to zero.  We take care of bounding them by 0 from below
2133  * here.  add_all_proximity_constraints takes care of bounding them by 0
2134  * from above.
2135  *
2136  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2137  * Otherwise, we ignore them.
2138  */
add_all_validity_constraints(struct isl_sched_graph * graph,int use_coincidence)2139 static int add_all_validity_constraints(struct isl_sched_graph *graph,
2140 	int use_coincidence)
2141 {
2142 	int i;
2143 
2144 	for (i = 0; i < graph->n_edge; ++i) {
2145 		struct isl_sched_edge *edge = &graph->edge[i];
2146 		int zero;
2147 
2148 		zero = force_zero(edge, use_coincidence);
2149 		if (!is_validity(edge) && !zero)
2150 			continue;
2151 		if (edge->src != edge->dst)
2152 			continue;
2153 		if (add_intra_validity_constraints(graph, edge) < 0)
2154 			return -1;
2155 	}
2156 
2157 	for (i = 0; i < graph->n_edge; ++i) {
2158 		struct isl_sched_edge *edge = &graph->edge[i];
2159 		int zero;
2160 
2161 		zero = force_zero(edge, use_coincidence);
2162 		if (!is_validity(edge) && !zero)
2163 			continue;
2164 		if (edge->src == edge->dst)
2165 			continue;
2166 		if (add_inter_validity_constraints(graph, edge) < 0)
2167 			return -1;
2168 	}
2169 
2170 	return 0;
2171 }
2172 
2173 /* Add constraints to graph->lp that bound the dependence distance
2174  * for all dependence relations.
2175  * If a given proximity dependence is identical to a validity
2176  * dependence, then the dependence distance is already bounded
2177  * from below (by zero), so we only need to bound the distance
2178  * from above.  (This includes the case of "local" dependences
2179  * which are treated as validity dependence by add_all_validity_constraints.)
2180  * Otherwise, we need to bound the distance both from above and from below.
2181  *
2182  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2183  * Otherwise, we ignore them.
2184  */
add_all_proximity_constraints(struct isl_sched_graph * graph,int use_coincidence)2185 static int add_all_proximity_constraints(struct isl_sched_graph *graph,
2186 	int use_coincidence)
2187 {
2188 	int i;
2189 
2190 	for (i = 0; i < graph->n_edge; ++i) {
2191 		struct isl_sched_edge *edge = &graph->edge[i];
2192 		int zero;
2193 
2194 		zero = force_zero(edge, use_coincidence);
2195 		if (!isl_sched_edge_is_proximity(edge) && !zero)
2196 			continue;
2197 		if (edge->src == edge->dst &&
2198 		    add_intra_proximity_constraints(graph, edge, 1, zero) < 0)
2199 			return -1;
2200 		if (edge->src != edge->dst &&
2201 		    add_inter_proximity_constraints(graph, edge, 1, zero) < 0)
2202 			return -1;
2203 		if (is_validity(edge) || zero)
2204 			continue;
2205 		if (edge->src == edge->dst &&
2206 		    add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
2207 			return -1;
2208 		if (edge->src != edge->dst &&
2209 		    add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
2210 			return -1;
2211 	}
2212 
2213 	return 0;
2214 }
2215 
2216 /* Normalize the rows of "indep" such that all rows are lexicographically
2217  * positive and such that each row contains as many final zeros as possible,
2218  * given the choice for the previous rows.
2219  * Do this by performing elementary row operations.
2220  */
normalize_independent(__isl_take isl_mat * indep)2221 static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
2222 {
2223 	indep = isl_mat_reverse_gauss(indep);
2224 	indep = isl_mat_lexnonneg_rows(indep);
2225 	return indep;
2226 }
2227 
2228 /* Extract the linear part of the current schedule for node "node".
2229  */
extract_linear_schedule(struct isl_sched_node * node)2230 static __isl_give isl_mat *extract_linear_schedule(struct isl_sched_node *node)
2231 {
2232 	isl_size n_row = isl_mat_rows(node->sched);
2233 
2234 	if (n_row < 0)
2235 		return NULL;
2236 	return isl_mat_sub_alloc(node->sched, 0, n_row,
2237 			      1 + node->nparam, node->nvar);
2238 }
2239 
2240 /* Compute a basis for the rows in the linear part of the schedule
2241  * and extend this basis to a full basis.  The remaining rows
2242  * can then be used to force linear independence from the rows
2243  * in the schedule.
2244  *
2245  * In particular, given the schedule rows S, we compute
2246  *
2247  *	S   = H Q
2248  *	S U = H
2249  *
2250  * with H the Hermite normal form of S.  That is, all but the
2251  * first rank columns of H are zero and so each row in S is
2252  * a linear combination of the first rank rows of Q.
2253  * The matrix Q can be used as a variable transformation
2254  * that isolates the directions of S in the first rank rows.
2255  * Transposing S U = H yields
2256  *
2257  *	U^T S^T = H^T
2258  *
2259  * with all but the first rank rows of H^T zero.
2260  * The last rows of U^T are therefore linear combinations
2261  * of schedule coefficients that are all zero on schedule
2262  * coefficients that are linearly dependent on the rows of S.
2263  * At least one of these combinations is non-zero on
2264  * linearly independent schedule coefficients.
2265  * The rows are normalized to involve as few of the last
2266  * coefficients as possible and to have a positive initial value.
2267  */
isl_sched_node_update_vmap(struct isl_sched_node * node)2268 isl_stat isl_sched_node_update_vmap(struct isl_sched_node *node)
2269 {
2270 	isl_mat *H, *U, *Q;
2271 
2272 	H = extract_linear_schedule(node);
2273 
2274 	H = isl_mat_left_hermite(H, 0, &U, &Q);
2275 	isl_mat_free(node->indep);
2276 	isl_mat_free(node->vmap);
2277 	node->vmap = Q;
2278 	node->indep = isl_mat_transpose(U);
2279 	node->rank = isl_mat_initial_non_zero_cols(H);
2280 	node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
2281 	node->indep = normalize_independent(node->indep);
2282 	isl_mat_free(H);
2283 
2284 	if (!node->indep || !node->vmap || node->rank < 0)
2285 		return isl_stat_error;
2286 	return isl_stat_ok;
2287 }
2288 
2289 /* Is "edge" marked as a validity or a conditional validity edge?
2290  */
is_any_validity(struct isl_sched_edge * edge)2291 static int is_any_validity(struct isl_sched_edge *edge)
2292 {
2293 	return is_validity(edge) ||
2294 		isl_sched_edge_is_conditional_validity(edge);
2295 }
2296 
2297 /* How many times should we count the constraints in "edge"?
2298  *
2299  * We count as follows
2300  * validity		-> 1 (>= 0)
2301  * validity+proximity	-> 2 (>= 0 and upper bound)
2302  * proximity		-> 2 (lower and upper bound)
2303  * local(+any)		-> 2 (>= 0 and <= 0)
2304  *
2305  * If an edge is only marked conditional_validity then it counts
2306  * as zero since it is only checked afterwards.
2307  *
2308  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2309  * Otherwise, we ignore them.
2310  */
edge_multiplicity(struct isl_sched_edge * edge,int use_coincidence)2311 static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2312 {
2313 	if (isl_sched_edge_is_proximity(edge) ||
2314 	    force_zero(edge, use_coincidence))
2315 		return 2;
2316 	if (is_validity(edge))
2317 		return 1;
2318 	return 0;
2319 }
2320 
2321 /* How many times should the constraints in "edge" be counted
2322  * as a parametric intra-node constraint?
2323  *
2324  * Only proximity edges that are not forced zero need
2325  * coefficient constraints that include coefficients for parameters.
2326  * If the edge is also a validity edge, then only
2327  * an upper bound is introduced.  Otherwise, both lower and upper bounds
2328  * are introduced.
2329  */
parametric_intra_edge_multiplicity(struct isl_sched_edge * edge,int use_coincidence)2330 static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
2331 	int use_coincidence)
2332 {
2333 	if (edge->src != edge->dst)
2334 		return 0;
2335 	if (!isl_sched_edge_is_proximity(edge))
2336 		return 0;
2337 	if (force_zero(edge, use_coincidence))
2338 		return 0;
2339 	if (is_validity(edge))
2340 		return 1;
2341 	else
2342 		return 2;
2343 }
2344 
2345 /* Add "f" times the number of equality and inequality constraints of "bset"
2346  * to "n_eq" and "n_ineq" and free "bset".
2347  */
update_count(__isl_take isl_basic_set * bset,int f,int * n_eq,int * n_ineq)2348 static isl_stat update_count(__isl_take isl_basic_set *bset,
2349 	int f, int *n_eq, int *n_ineq)
2350 {
2351 	isl_size eq, ineq;
2352 
2353 	eq = isl_basic_set_n_equality(bset);
2354 	ineq = isl_basic_set_n_inequality(bset);
2355 	isl_basic_set_free(bset);
2356 
2357 	if (eq < 0 || ineq < 0)
2358 		return isl_stat_error;
2359 
2360 	*n_eq += eq;
2361 	*n_ineq += ineq;
2362 
2363 	return isl_stat_ok;
2364 }
2365 
2366 /* Count the number of equality and inequality constraints
2367  * that will be added for the given map.
2368  *
2369  * The edges that require parameter coefficients are counted separately.
2370  *
2371  * "use_coincidence" is set if we should take into account coincidence edges.
2372  */
count_map_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,__isl_take isl_map * map,int * n_eq,int * n_ineq,int use_coincidence)2373 static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2374 	struct isl_sched_edge *edge, __isl_take isl_map *map,
2375 	int *n_eq, int *n_ineq, int use_coincidence)
2376 {
2377 	isl_map *copy;
2378 	isl_basic_set *coef;
2379 	int f = edge_multiplicity(edge, use_coincidence);
2380 	int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
2381 
2382 	if (f == 0) {
2383 		isl_map_free(map);
2384 		return isl_stat_ok;
2385 	}
2386 
2387 	if (edge->src != edge->dst) {
2388 		coef = inter_coefficients(graph, edge, map);
2389 		return update_count(coef, f, n_eq, n_ineq);
2390 	}
2391 
2392 	if (fp > 0) {
2393 		copy = isl_map_copy(map);
2394 		coef = intra_coefficients(graph, edge->src, copy, 1);
2395 		if (update_count(coef, fp, n_eq, n_ineq) < 0)
2396 			goto error;
2397 	}
2398 
2399 	if (f > fp) {
2400 		copy = isl_map_copy(map);
2401 		coef = intra_coefficients(graph, edge->src, copy, 0);
2402 		if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
2403 			goto error;
2404 	}
2405 
2406 	isl_map_free(map);
2407 	return isl_stat_ok;
2408 error:
2409 	isl_map_free(map);
2410 	return isl_stat_error;
2411 }
2412 
2413 /* Count the number of equality and inequality constraints
2414  * that will be added to the main lp problem.
2415  * We count as follows
2416  * validity		-> 1 (>= 0)
2417  * validity+proximity	-> 2 (>= 0 and upper bound)
2418  * proximity		-> 2 (lower and upper bound)
2419  * local(+any)		-> 2 (>= 0 and <= 0)
2420  *
2421  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2422  * Otherwise, we ignore them.
2423  */
count_constraints(struct isl_sched_graph * graph,int * n_eq,int * n_ineq,int use_coincidence)2424 static int count_constraints(struct isl_sched_graph *graph,
2425 	int *n_eq, int *n_ineq, int use_coincidence)
2426 {
2427 	int i;
2428 
2429 	*n_eq = *n_ineq = 0;
2430 	for (i = 0; i < graph->n_edge; ++i) {
2431 		struct isl_sched_edge *edge = &graph->edge[i];
2432 		isl_map *map = isl_map_copy(edge->map);
2433 
2434 		if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2435 					    use_coincidence) < 0)
2436 			return -1;
2437 	}
2438 
2439 	return 0;
2440 }
2441 
2442 /* Count the number of constraints that will be added by
2443  * add_bound_constant_constraints to bound the values of the constant terms
2444  * and increment *n_eq and *n_ineq accordingly.
2445  *
2446  * In practice, add_bound_constant_constraints only adds inequalities.
2447  */
count_bound_constant_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,int * n_eq,int * n_ineq)2448 static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2449 	struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2450 {
2451 	if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2452 		return isl_stat_ok;
2453 
2454 	*n_ineq += graph->n;
2455 
2456 	return isl_stat_ok;
2457 }
2458 
2459 /* Add constraints to bound the values of the constant terms in the schedule,
2460  * if requested by the user.
2461  *
2462  * The maximal value of the constant terms is defined by the option
2463  * "schedule_max_constant_term".
2464  */
add_bound_constant_constraints(isl_ctx * ctx,struct isl_sched_graph * graph)2465 static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2466 	struct isl_sched_graph *graph)
2467 {
2468 	int i, k;
2469 	int max;
2470 	isl_size total;
2471 
2472 	max = isl_options_get_schedule_max_constant_term(ctx);
2473 	if (max == -1)
2474 		return isl_stat_ok;
2475 
2476 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2477 	if (total < 0)
2478 		return isl_stat_error;
2479 
2480 	for (i = 0; i < graph->n; ++i) {
2481 		struct isl_sched_node *node = &graph->node[i];
2482 		int pos;
2483 
2484 		k = isl_basic_set_alloc_inequality(graph->lp);
2485 		if (k < 0)
2486 			return isl_stat_error;
2487 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
2488 		pos = node_cst_coef_offset(node);
2489 		isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
2490 		isl_int_set_si(graph->lp->ineq[k][0], max);
2491 	}
2492 
2493 	return isl_stat_ok;
2494 }
2495 
2496 /* Count the number of constraints that will be added by
2497  * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2498  * accordingly.
2499  *
2500  * In practice, add_bound_coefficient_constraints only adds inequalities.
2501  */
count_bound_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,int * n_eq,int * n_ineq)2502 static int count_bound_coefficient_constraints(isl_ctx *ctx,
2503 	struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2504 {
2505 	int i;
2506 
2507 	if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2508 	    !isl_options_get_schedule_treat_coalescing(ctx))
2509 		return 0;
2510 
2511 	for (i = 0; i < graph->n; ++i)
2512 		*n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2513 
2514 	return 0;
2515 }
2516 
2517 /* Add constraints to graph->lp that bound the values of
2518  * the parameter schedule coefficients of "node" to "max" and
2519  * the variable schedule coefficients to the corresponding entry
2520  * in node->max.
2521  * In either case, a negative value means that no bound needs to be imposed.
2522  *
2523  * For parameter coefficients, this amounts to adding a constraint
2524  *
2525  *	c_n <= max
2526  *
2527  * i.e.,
2528  *
2529  *	-c_n + max >= 0
2530  *
2531  * The variables coefficients are, however, not represented directly.
2532  * Instead, the variable coefficients c_x are written as differences
2533  * c_x = c_x^+ - c_x^-.
2534  * That is,
2535  *
2536  *	-max_i <= c_x_i <= max_i
2537  *
2538  * is encoded as
2539  *
2540  *	-max_i <= c_x_i^+ - c_x_i^- <= max_i
2541  *
2542  * or
2543  *
2544  *	-(c_x_i^+ - c_x_i^-) + max_i >= 0
2545  *	c_x_i^+ - c_x_i^- + max_i >= 0
2546  */
node_add_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * node,int max)2547 static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2548 	struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2549 {
2550 	int i, j, k;
2551 	isl_size total;
2552 	isl_vec *ineq;
2553 
2554 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2555 	if (total < 0)
2556 		return isl_stat_error;
2557 
2558 	for (j = 0; j < node->nparam; ++j) {
2559 		int dim;
2560 
2561 		if (max < 0)
2562 			continue;
2563 
2564 		k = isl_basic_set_alloc_inequality(graph->lp);
2565 		if (k < 0)
2566 			return isl_stat_error;
2567 		dim = 1 + node_par_coef_offset(node) + j;
2568 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
2569 		isl_int_set_si(graph->lp->ineq[k][dim], -1);
2570 		isl_int_set_si(graph->lp->ineq[k][0], max);
2571 	}
2572 
2573 	ineq = isl_vec_alloc(ctx, 1 + total);
2574 	ineq = isl_vec_clr(ineq);
2575 	if (!ineq)
2576 		return isl_stat_error;
2577 	for (i = 0; i < node->nvar; ++i) {
2578 		int pos = 1 + node_var_coef_pos(node, i);
2579 
2580 		if (isl_int_is_neg(node->max->el[i]))
2581 			continue;
2582 
2583 		isl_int_set_si(ineq->el[pos], 1);
2584 		isl_int_set_si(ineq->el[pos + 1], -1);
2585 		isl_int_set(ineq->el[0], node->max->el[i]);
2586 
2587 		k = isl_basic_set_alloc_inequality(graph->lp);
2588 		if (k < 0)
2589 			goto error;
2590 		isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2591 
2592 		isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
2593 		k = isl_basic_set_alloc_inequality(graph->lp);
2594 		if (k < 0)
2595 			goto error;
2596 		isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2597 
2598 		isl_seq_clr(ineq->el + pos, 2);
2599 	}
2600 	isl_vec_free(ineq);
2601 
2602 	return isl_stat_ok;
2603 error:
2604 	isl_vec_free(ineq);
2605 	return isl_stat_error;
2606 }
2607 
2608 /* Add constraints that bound the values of the variable and parameter
2609  * coefficients of the schedule.
2610  *
2611  * The maximal value of the coefficients is defined by the option
2612  * 'schedule_max_coefficient' and the entries in node->max.
2613  * These latter entries are only set if either the schedule_max_coefficient
2614  * option or the schedule_treat_coalescing option is set.
2615  */
add_bound_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph)2616 static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2617 	struct isl_sched_graph *graph)
2618 {
2619 	int i;
2620 	int max;
2621 
2622 	max = isl_options_get_schedule_max_coefficient(ctx);
2623 
2624 	if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx))
2625 		return isl_stat_ok;
2626 
2627 	for (i = 0; i < graph->n; ++i) {
2628 		struct isl_sched_node *node = &graph->node[i];
2629 
2630 		if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2631 			return isl_stat_error;
2632 	}
2633 
2634 	return isl_stat_ok;
2635 }
2636 
2637 /* Add a constraint to graph->lp that equates the value at position
2638  * "sum_pos" to the sum of the "n" values starting at "first".
2639  */
add_sum_constraint(struct isl_sched_graph * graph,int sum_pos,int first,int n)2640 static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2641 	int sum_pos, int first, int n)
2642 {
2643 	int i, k;
2644 	isl_size total;
2645 
2646 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2647 	if (total < 0)
2648 		return isl_stat_error;
2649 
2650 	k = isl_basic_set_alloc_equality(graph->lp);
2651 	if (k < 0)
2652 		return isl_stat_error;
2653 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2654 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2655 	for (i = 0; i < n; ++i)
2656 		isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2657 
2658 	return isl_stat_ok;
2659 }
2660 
2661 /* Add a constraint to graph->lp that equates the value at position
2662  * "sum_pos" to the sum of the parameter coefficients of all nodes.
2663  */
add_param_sum_constraint(struct isl_sched_graph * graph,int sum_pos)2664 static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2665 	int sum_pos)
2666 {
2667 	int i, j, k;
2668 	isl_size total;
2669 
2670 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2671 	if (total < 0)
2672 		return isl_stat_error;
2673 
2674 	k = isl_basic_set_alloc_equality(graph->lp);
2675 	if (k < 0)
2676 		return isl_stat_error;
2677 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2678 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2679 	for (i = 0; i < graph->n; ++i) {
2680 		int pos = 1 + node_par_coef_offset(&graph->node[i]);
2681 
2682 		for (j = 0; j < graph->node[i].nparam; ++j)
2683 			isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2684 	}
2685 
2686 	return isl_stat_ok;
2687 }
2688 
2689 /* Add a constraint to graph->lp that equates the value at position
2690  * "sum_pos" to the sum of the variable coefficients of all nodes.
2691  */
add_var_sum_constraint(struct isl_sched_graph * graph,int sum_pos)2692 static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2693 	int sum_pos)
2694 {
2695 	int i, j, k;
2696 	isl_size total;
2697 
2698 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2699 	if (total < 0)
2700 		return isl_stat_error;
2701 
2702 	k = isl_basic_set_alloc_equality(graph->lp);
2703 	if (k < 0)
2704 		return isl_stat_error;
2705 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2706 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2707 	for (i = 0; i < graph->n; ++i) {
2708 		struct isl_sched_node *node = &graph->node[i];
2709 		int pos = 1 + node_var_coef_offset(node);
2710 
2711 		for (j = 0; j < 2 * node->nvar; ++j)
2712 			isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2713 	}
2714 
2715 	return isl_stat_ok;
2716 }
2717 
2718 /* Construct an ILP problem for finding schedule coefficients
2719  * that result in non-negative, but small dependence distances
2720  * over all dependences.
2721  * In particular, the dependence distances over proximity edges
2722  * are bounded by m_0 + m_n n and we compute schedule coefficients
2723  * with small values (preferably zero) of m_n and m_0.
2724  *
2725  * All variables of the ILP are non-negative.  The actual coefficients
2726  * may be negative, so each coefficient is represented as the difference
2727  * of two non-negative variables.  The negative part always appears
2728  * immediately before the positive part.
2729  * Other than that, the variables have the following order
2730  *
2731  *	- sum of positive and negative parts of m_n coefficients
2732  *	- m_0
2733  *	- sum of all c_n coefficients
2734  *		(unconstrained when computing non-parametric schedules)
2735  *	- sum of positive and negative parts of all c_x coefficients
2736  *	- positive and negative parts of m_n coefficients
2737  *	- for each node
2738  *		- positive and negative parts of c_i_x, in opposite order
2739  *		- c_i_n (if parametric)
2740  *		- c_i_0
2741  *
2742  * The constraints are those from the edges plus two or three equalities
2743  * to express the sums.
2744  *
2745  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2746  * Otherwise, we ignore them.
2747  */
setup_lp(isl_ctx * ctx,struct isl_sched_graph * graph,int use_coincidence)2748 static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2749 	int use_coincidence)
2750 {
2751 	int i;
2752 	isl_size nparam;
2753 	unsigned total;
2754 	isl_space *space;
2755 	int parametric;
2756 	int param_pos;
2757 	int n_eq, n_ineq;
2758 
2759 	parametric = ctx->opt->schedule_parametric;
2760 	nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2761 	if (nparam < 0)
2762 		return isl_stat_error;
2763 	param_pos = 4;
2764 	total = param_pos + 2 * nparam;
2765 	for (i = 0; i < graph->n; ++i) {
2766 		struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2767 		if (isl_sched_node_update_vmap(node) < 0)
2768 			return isl_stat_error;
2769 		node->start = total;
2770 		total += 1 + node->nparam + 2 * node->nvar;
2771 	}
2772 
2773 	if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
2774 		return isl_stat_error;
2775 	if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2776 		return isl_stat_error;
2777 	if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2778 		return isl_stat_error;
2779 
2780 	space = isl_space_set_alloc(ctx, 0, total);
2781 	isl_basic_set_free(graph->lp);
2782 	n_eq += 2 + parametric;
2783 
2784 	graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2785 
2786 	if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2787 		return isl_stat_error;
2788 	if (parametric && add_param_sum_constraint(graph, 2) < 0)
2789 		return isl_stat_error;
2790 	if (add_var_sum_constraint(graph, 3) < 0)
2791 		return isl_stat_error;
2792 	if (add_bound_constant_constraints(ctx, graph) < 0)
2793 		return isl_stat_error;
2794 	if (add_bound_coefficient_constraints(ctx, graph) < 0)
2795 		return isl_stat_error;
2796 	if (add_all_validity_constraints(graph, use_coincidence) < 0)
2797 		return isl_stat_error;
2798 	if (add_all_proximity_constraints(graph, use_coincidence) < 0)
2799 		return isl_stat_error;
2800 
2801 	return isl_stat_ok;
2802 }
2803 
2804 /* Analyze the conflicting constraint found by
2805  * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2806  * constraint of one of the edges between distinct nodes, living, moreover
2807  * in distinct SCCs, then record the source and sink SCC as this may
2808  * be a good place to cut between SCCs.
2809  */
check_conflict(int con,void * user)2810 static int check_conflict(int con, void *user)
2811 {
2812 	int i;
2813 	struct isl_sched_graph *graph = user;
2814 
2815 	if (graph->src_scc >= 0)
2816 		return 0;
2817 
2818 	con -= graph->lp->n_eq;
2819 
2820 	if (con >= graph->lp->n_ineq)
2821 		return 0;
2822 
2823 	for (i = 0; i < graph->n_edge; ++i) {
2824 		if (!is_validity(&graph->edge[i]))
2825 			continue;
2826 		if (graph->edge[i].src == graph->edge[i].dst)
2827 			continue;
2828 		if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
2829 			continue;
2830 		if (graph->edge[i].start > con)
2831 			continue;
2832 		if (graph->edge[i].end <= con)
2833 			continue;
2834 		graph->src_scc = graph->edge[i].src->scc;
2835 		graph->dst_scc = graph->edge[i].dst->scc;
2836 	}
2837 
2838 	return 0;
2839 }
2840 
2841 /* Check whether the next schedule row of the given node needs to be
2842  * non-trivial.  Lower-dimensional domains may have some trivial rows,
2843  * but as soon as the number of remaining required non-trivial rows
2844  * is as large as the number or remaining rows to be computed,
2845  * all remaining rows need to be non-trivial.
2846  */
needs_row(struct isl_sched_graph * graph,struct isl_sched_node * node)2847 static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2848 {
2849 	return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2850 }
2851 
2852 /* Construct a non-triviality region with triviality directions
2853  * corresponding to the rows of "indep".
2854  * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
2855  * while the triviality directions are expressed in terms of
2856  * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
2857  * before c^+_i.  Furthermore,
2858  * the pairs of non-negative variables representing the coefficients
2859  * are stored in the opposite order.
2860  */
construct_trivial(__isl_keep isl_mat * indep)2861 static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
2862 {
2863 	isl_ctx *ctx;
2864 	isl_mat *mat;
2865 	int i, j;
2866 	isl_size n, n_var;
2867 
2868 	n = isl_mat_rows(indep);
2869 	n_var = isl_mat_cols(indep);
2870 	if (n < 0 || n_var < 0)
2871 		return NULL;
2872 
2873 	ctx = isl_mat_get_ctx(indep);
2874 	mat = isl_mat_alloc(ctx, n, 2 * n_var);
2875 	if (!mat)
2876 		return NULL;
2877 	for (i = 0; i < n; ++i) {
2878 		for (j = 0; j < n_var; ++j) {
2879 			int nj = n_var - 1 - j;
2880 			isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
2881 			isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
2882 		}
2883 	}
2884 
2885 	return mat;
2886 }
2887 
2888 /* Solve the ILP problem constructed in setup_lp.
2889  * For each node such that all the remaining rows of its schedule
2890  * need to be non-trivial, we construct a non-triviality region.
2891  * This region imposes that the next row is independent of previous rows.
2892  * In particular, the non-triviality region enforces that at least
2893  * one of the linear combinations in the rows of node->indep is non-zero.
2894  */
solve_lp(isl_ctx * ctx,struct isl_sched_graph * graph)2895 static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2896 {
2897 	int i;
2898 	isl_vec *sol;
2899 	isl_basic_set *lp;
2900 
2901 	for (i = 0; i < graph->n; ++i) {
2902 		struct isl_sched_node *node = &graph->node[i];
2903 		isl_mat *trivial;
2904 
2905 		graph->region[i].pos = node_var_coef_offset(node);
2906 		if (needs_row(graph, node))
2907 			trivial = construct_trivial(node->indep);
2908 		else
2909 			trivial = isl_mat_zero(ctx, 0, 0);
2910 		graph->region[i].trivial = trivial;
2911 	}
2912 	lp = isl_basic_set_copy(graph->lp);
2913 	sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2914 				       graph->region, &check_conflict, graph);
2915 	for (i = 0; i < graph->n; ++i)
2916 		isl_mat_free(graph->region[i].trivial);
2917 	return sol;
2918 }
2919 
2920 /* Extract the coefficients for the variables of "node" from "sol".
2921  *
2922  * Each schedule coefficient c_i_x is represented as the difference
2923  * between two non-negative variables c_i_x^+ - c_i_x^-.
2924  * The c_i_x^- appear before their c_i_x^+ counterpart.
2925  * Furthermore, the order of these pairs is the opposite of that
2926  * of the corresponding coefficients.
2927  *
2928  * Return c_i_x = c_i_x^+ - c_i_x^-
2929  */
extract_var_coef(struct isl_sched_node * node,__isl_keep isl_vec * sol)2930 static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2931 	__isl_keep isl_vec *sol)
2932 {
2933 	int i;
2934 	int pos;
2935 	isl_vec *csol;
2936 
2937 	if (!sol)
2938 		return NULL;
2939 	csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2940 	if (!csol)
2941 		return NULL;
2942 
2943 	pos = 1 + node_var_coef_offset(node);
2944 	for (i = 0; i < node->nvar; ++i)
2945 		isl_int_sub(csol->el[node->nvar - 1 - i],
2946 			    sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2947 
2948 	return csol;
2949 }
2950 
2951 /* Update the schedules of all nodes based on the given solution
2952  * of the LP problem.
2953  * The new row is added to the current band.
2954  * All possibly negative coefficients are encoded as a difference
2955  * of two non-negative variables, so we need to perform the subtraction
2956  * here.
2957  *
2958  * If coincident is set, then the caller guarantees that the new
2959  * row satisfies the coincidence constraints.
2960  */
update_schedule(struct isl_sched_graph * graph,__isl_take isl_vec * sol,int coincident)2961 static int update_schedule(struct isl_sched_graph *graph,
2962 	__isl_take isl_vec *sol, int coincident)
2963 {
2964 	int i, j;
2965 	isl_vec *csol = NULL;
2966 
2967 	if (!sol)
2968 		goto error;
2969 	if (sol->size == 0)
2970 		isl_die(sol->ctx, isl_error_internal,
2971 			"no solution found", goto error);
2972 	if (graph->n_total_row >= graph->max_row)
2973 		isl_die(sol->ctx, isl_error_internal,
2974 			"too many schedule rows", goto error);
2975 
2976 	for (i = 0; i < graph->n; ++i) {
2977 		struct isl_sched_node *node = &graph->node[i];
2978 		int pos;
2979 		isl_size row = isl_mat_rows(node->sched);
2980 
2981 		isl_vec_free(csol);
2982 		csol = extract_var_coef(node, sol);
2983 		if (row < 0 || !csol)
2984 			goto error;
2985 
2986 		isl_map_free(node->sched_map);
2987 		node->sched_map = NULL;
2988 		node->sched = isl_mat_add_rows(node->sched, 1);
2989 		if (!node->sched)
2990 			goto error;
2991 		pos = node_cst_coef_offset(node);
2992 		node->sched = isl_mat_set_element(node->sched,
2993 					row, 0, sol->el[1 + pos]);
2994 		pos = node_par_coef_offset(node);
2995 		for (j = 0; j < node->nparam; ++j)
2996 			node->sched = isl_mat_set_element(node->sched,
2997 					row, 1 + j, sol->el[1 + pos + j]);
2998 		for (j = 0; j < node->nvar; ++j)
2999 			node->sched = isl_mat_set_element(node->sched,
3000 					row, 1 + node->nparam + j, csol->el[j]);
3001 		node->coincident[graph->n_total_row] = coincident;
3002 	}
3003 	isl_vec_free(sol);
3004 	isl_vec_free(csol);
3005 
3006 	graph->n_row++;
3007 	graph->n_total_row++;
3008 
3009 	return 0;
3010 error:
3011 	isl_vec_free(sol);
3012 	isl_vec_free(csol);
3013 	return -1;
3014 }
3015 
3016 /* Convert row "row" of node->sched into an isl_aff living in "ls"
3017  * and return this isl_aff.
3018  */
extract_schedule_row(__isl_take isl_local_space * ls,struct isl_sched_node * node,int row)3019 static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
3020 	struct isl_sched_node *node, int row)
3021 {
3022 	int j;
3023 	isl_int v;
3024 	isl_aff *aff;
3025 
3026 	isl_int_init(v);
3027 
3028 	aff = isl_aff_zero_on_domain(ls);
3029 	if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
3030 		goto error;
3031 	aff = isl_aff_set_constant(aff, v);
3032 	for (j = 0; j < node->nparam; ++j) {
3033 		if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
3034 			goto error;
3035 		aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
3036 	}
3037 	for (j = 0; j < node->nvar; ++j) {
3038 		if (isl_mat_get_element(node->sched, row,
3039 					1 + node->nparam + j, &v) < 0)
3040 			goto error;
3041 		aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
3042 	}
3043 
3044 	isl_int_clear(v);
3045 
3046 	return aff;
3047 error:
3048 	isl_int_clear(v);
3049 	isl_aff_free(aff);
3050 	return NULL;
3051 }
3052 
3053 /* Convert the "n" rows starting at "first" of node->sched into a multi_aff
3054  * and return this multi_aff.
3055  *
3056  * The result is defined over the uncompressed node domain.
3057  */
isl_sched_node_extract_partial_schedule_multi_aff(struct isl_sched_node * node,int first,int n)3058 __isl_give isl_multi_aff *isl_sched_node_extract_partial_schedule_multi_aff(
3059 	struct isl_sched_node *node, int first, int n)
3060 {
3061 	int i;
3062 	isl_space *space;
3063 	isl_local_space *ls;
3064 	isl_aff *aff;
3065 	isl_multi_aff *ma;
3066 	isl_size nrow;
3067 
3068 	if (!node)
3069 		return NULL;
3070 	nrow = isl_mat_rows(node->sched);
3071 	if (nrow < 0)
3072 		return NULL;
3073 	if (node->compressed)
3074 		space = isl_pw_multi_aff_get_domain_space(node->decompress);
3075 	else
3076 		space = isl_space_copy(node->space);
3077 	ls = isl_local_space_from_space(isl_space_copy(space));
3078 	space = isl_space_from_domain(space);
3079 	space = isl_space_add_dims(space, isl_dim_out, n);
3080 	ma = isl_multi_aff_zero(space);
3081 
3082 	for (i = first; i < first + n; ++i) {
3083 		aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
3084 		ma = isl_multi_aff_set_aff(ma, i - first, aff);
3085 	}
3086 
3087 	isl_local_space_free(ls);
3088 
3089 	if (node->compressed)
3090 		ma = isl_multi_aff_pullback_multi_aff(ma,
3091 					isl_multi_aff_copy(node->compress));
3092 
3093 	return ma;
3094 }
3095 
3096 /* Convert node->sched into a multi_aff and return this multi_aff.
3097  *
3098  * The result is defined over the uncompressed node domain.
3099  */
node_extract_schedule_multi_aff(struct isl_sched_node * node)3100 static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
3101 	struct isl_sched_node *node)
3102 {
3103 	isl_size nrow;
3104 
3105 	nrow = isl_mat_rows(node->sched);
3106 	if (nrow < 0)
3107 		return NULL;
3108 	return isl_sched_node_extract_partial_schedule_multi_aff(node, 0, nrow);
3109 }
3110 
3111 /* Convert node->sched into a map and return this map.
3112  *
3113  * The result is cached in node->sched_map, which needs to be released
3114  * whenever node->sched is updated.
3115  * It is defined over the uncompressed node domain.
3116  */
node_extract_schedule(struct isl_sched_node * node)3117 static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
3118 {
3119 	if (!node->sched_map) {
3120 		isl_multi_aff *ma;
3121 
3122 		ma = node_extract_schedule_multi_aff(node);
3123 		node->sched_map = isl_map_from_multi_aff(ma);
3124 	}
3125 
3126 	return isl_map_copy(node->sched_map);
3127 }
3128 
3129 /* Construct a map that can be used to update a dependence relation
3130  * based on the current schedule.
3131  * That is, construct a map expressing that source and sink
3132  * are executed within the same iteration of the current schedule.
3133  * This map can then be intersected with the dependence relation.
3134  * This is not the most efficient way, but this shouldn't be a critical
3135  * operation.
3136  */
specializer(struct isl_sched_node * src,struct isl_sched_node * dst)3137 static __isl_give isl_map *specializer(struct isl_sched_node *src,
3138 	struct isl_sched_node *dst)
3139 {
3140 	isl_map *src_sched, *dst_sched;
3141 
3142 	src_sched = node_extract_schedule(src);
3143 	dst_sched = node_extract_schedule(dst);
3144 	return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
3145 }
3146 
3147 /* Intersect the domains of the nested relations in domain and range
3148  * of "umap" with "map".
3149  */
intersect_domains(__isl_take isl_union_map * umap,__isl_keep isl_map * map)3150 static __isl_give isl_union_map *intersect_domains(
3151 	__isl_take isl_union_map *umap, __isl_keep isl_map *map)
3152 {
3153 	isl_union_set *uset;
3154 
3155 	umap = isl_union_map_zip(umap);
3156 	uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
3157 	umap = isl_union_map_intersect_domain(umap, uset);
3158 	umap = isl_union_map_zip(umap);
3159 	return umap;
3160 }
3161 
3162 /* Update the dependence relation of the given edge based
3163  * on the current schedule.
3164  * If the dependence is carried completely by the current schedule, then
3165  * it is removed from the edge_tables.  It is kept in the list of edges
3166  * as otherwise all edge_tables would have to be recomputed.
3167  *
3168  * If the edge is of a type that can appear multiple times
3169  * between the same pair of nodes, then it is added to
3170  * the edge table (again).  This prevents the situation
3171  * where none of these edges is referenced from the edge table
3172  * because the one that was referenced turned out to be empty and
3173  * was therefore removed from the table.
3174  */
update_edge(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_edge * edge)3175 static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
3176 	struct isl_sched_edge *edge)
3177 {
3178 	int empty;
3179 	isl_map *id;
3180 
3181 	id = specializer(edge->src, edge->dst);
3182 	edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
3183 	if (!edge->map)
3184 		goto error;
3185 
3186 	if (edge->tagged_condition) {
3187 		edge->tagged_condition =
3188 			intersect_domains(edge->tagged_condition, id);
3189 		if (!edge->tagged_condition)
3190 			goto error;
3191 	}
3192 	if (edge->tagged_validity) {
3193 		edge->tagged_validity =
3194 			intersect_domains(edge->tagged_validity, id);
3195 		if (!edge->tagged_validity)
3196 			goto error;
3197 	}
3198 
3199 	empty = isl_map_plain_is_empty(edge->map);
3200 	if (empty < 0)
3201 		goto error;
3202 	if (empty) {
3203 		if (graph_remove_edge(graph, edge) < 0)
3204 			goto error;
3205 	} else if (is_multi_edge_type(edge)) {
3206 		if (graph_edge_tables_add(ctx, graph, edge) < 0)
3207 			goto error;
3208 	}
3209 
3210 	isl_map_free(id);
3211 	return isl_stat_ok;
3212 error:
3213 	isl_map_free(id);
3214 	return isl_stat_error;
3215 }
3216 
3217 /* Does the domain of "umap" intersect "uset"?
3218  */
domain_intersects(__isl_keep isl_union_map * umap,__isl_keep isl_union_set * uset)3219 static int domain_intersects(__isl_keep isl_union_map *umap,
3220 	__isl_keep isl_union_set *uset)
3221 {
3222 	int empty;
3223 
3224 	umap = isl_union_map_copy(umap);
3225 	umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
3226 	empty = isl_union_map_is_empty(umap);
3227 	isl_union_map_free(umap);
3228 
3229 	return empty < 0 ? -1 : !empty;
3230 }
3231 
3232 /* Does the range of "umap" intersect "uset"?
3233  */
range_intersects(__isl_keep isl_union_map * umap,__isl_keep isl_union_set * uset)3234 static int range_intersects(__isl_keep isl_union_map *umap,
3235 	__isl_keep isl_union_set *uset)
3236 {
3237 	int empty;
3238 
3239 	umap = isl_union_map_copy(umap);
3240 	umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
3241 	empty = isl_union_map_is_empty(umap);
3242 	isl_union_map_free(umap);
3243 
3244 	return empty < 0 ? -1 : !empty;
3245 }
3246 
3247 /* Are the condition dependences of "edge" local with respect to
3248  * the current schedule?
3249  *
3250  * That is, are domain and range of the condition dependences mapped
3251  * to the same point?
3252  *
3253  * In other words, is the condition false?
3254  */
is_condition_false(struct isl_sched_edge * edge)3255 static int is_condition_false(struct isl_sched_edge *edge)
3256 {
3257 	isl_union_map *umap;
3258 	isl_map *map, *sched, *test;
3259 	int empty, local;
3260 
3261 	empty = isl_union_map_is_empty(edge->tagged_condition);
3262 	if (empty < 0 || empty)
3263 		return empty;
3264 
3265 	umap = isl_union_map_copy(edge->tagged_condition);
3266 	umap = isl_union_map_zip(umap);
3267 	umap = isl_union_set_unwrap(isl_union_map_domain(umap));
3268 	map = isl_map_from_union_map(umap);
3269 
3270 	sched = node_extract_schedule(edge->src);
3271 	map = isl_map_apply_domain(map, sched);
3272 	sched = node_extract_schedule(edge->dst);
3273 	map = isl_map_apply_range(map, sched);
3274 
3275 	test = isl_map_identity(isl_map_get_space(map));
3276 	local = isl_map_is_subset(map, test);
3277 	isl_map_free(map);
3278 	isl_map_free(test);
3279 
3280 	return local;
3281 }
3282 
3283 /* For each conditional validity constraint that is adjacent
3284  * to a condition with domain in condition_source or range in condition_sink,
3285  * turn it into an unconditional validity constraint.
3286  */
unconditionalize_adjacent_validity(struct isl_sched_graph * graph,__isl_take isl_union_set * condition_source,__isl_take isl_union_set * condition_sink)3287 static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
3288 	__isl_take isl_union_set *condition_source,
3289 	__isl_take isl_union_set *condition_sink)
3290 {
3291 	int i;
3292 
3293 	condition_source = isl_union_set_coalesce(condition_source);
3294 	condition_sink = isl_union_set_coalesce(condition_sink);
3295 
3296 	for (i = 0; i < graph->n_edge; ++i) {
3297 		int adjacent;
3298 		isl_union_map *validity;
3299 
3300 		if (!isl_sched_edge_is_conditional_validity(&graph->edge[i]))
3301 			continue;
3302 		if (is_validity(&graph->edge[i]))
3303 			continue;
3304 
3305 		validity = graph->edge[i].tagged_validity;
3306 		adjacent = domain_intersects(validity, condition_sink);
3307 		if (adjacent >= 0 && !adjacent)
3308 			adjacent = range_intersects(validity, condition_source);
3309 		if (adjacent < 0)
3310 			goto error;
3311 		if (!adjacent)
3312 			continue;
3313 
3314 		set_validity(&graph->edge[i]);
3315 	}
3316 
3317 	isl_union_set_free(condition_source);
3318 	isl_union_set_free(condition_sink);
3319 	return 0;
3320 error:
3321 	isl_union_set_free(condition_source);
3322 	isl_union_set_free(condition_sink);
3323 	return -1;
3324 }
3325 
3326 /* Update the dependence relations of all edges based on the current schedule
3327  * and enforce conditional validity constraints that are adjacent
3328  * to satisfied condition constraints.
3329  *
3330  * First check if any of the condition constraints are satisfied
3331  * (i.e., not local to the outer schedule) and keep track of
3332  * their domain and range.
3333  * Then update all dependence relations (which removes the non-local
3334  * constraints).
3335  * Finally, if any condition constraints turned out to be satisfied,
3336  * then turn all adjacent conditional validity constraints into
3337  * unconditional validity constraints.
3338  */
update_edges(isl_ctx * ctx,struct isl_sched_graph * graph)3339 static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3340 {
3341 	int i;
3342 	int any = 0;
3343 	isl_union_set *source, *sink;
3344 
3345 	source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3346 	sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3347 	for (i = 0; i < graph->n_edge; ++i) {
3348 		int local;
3349 		isl_union_set *uset;
3350 		isl_union_map *umap;
3351 
3352 		if (!isl_sched_edge_is_condition(&graph->edge[i]))
3353 			continue;
3354 		if (is_local(&graph->edge[i]))
3355 			continue;
3356 		local = is_condition_false(&graph->edge[i]);
3357 		if (local < 0)
3358 			goto error;
3359 		if (local)
3360 			continue;
3361 
3362 		any = 1;
3363 
3364 		umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3365 		uset = isl_union_map_domain(umap);
3366 		source = isl_union_set_union(source, uset);
3367 
3368 		umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3369 		uset = isl_union_map_range(umap);
3370 		sink = isl_union_set_union(sink, uset);
3371 	}
3372 
3373 	for (i = 0; i < graph->n_edge; ++i) {
3374 		if (update_edge(ctx, graph, &graph->edge[i]) < 0)
3375 			goto error;
3376 	}
3377 
3378 	if (any)
3379 		return unconditionalize_adjacent_validity(graph, source, sink);
3380 
3381 	isl_union_set_free(source);
3382 	isl_union_set_free(sink);
3383 	return 0;
3384 error:
3385 	isl_union_set_free(source);
3386 	isl_union_set_free(sink);
3387 	return -1;
3388 }
3389 
next_band(struct isl_sched_graph * graph)3390 static void next_band(struct isl_sched_graph *graph)
3391 {
3392 	graph->band_start = graph->n_total_row;
3393 }
3394 
3395 /* Return the union of the universe domains of the nodes in "graph"
3396  * that satisfy "pred".
3397  */
isl_sched_graph_domain(isl_ctx * ctx,struct isl_sched_graph * graph,int (* pred)(struct isl_sched_node * node,int data),int data)3398 static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3399 	struct isl_sched_graph *graph,
3400 	int (*pred)(struct isl_sched_node *node, int data), int data)
3401 {
3402 	int i;
3403 	isl_set *set;
3404 	isl_union_set *dom;
3405 
3406 	for (i = 0; i < graph->n; ++i)
3407 		if (pred(&graph->node[i], data))
3408 			break;
3409 
3410 	if (i >= graph->n)
3411 		isl_die(ctx, isl_error_internal,
3412 			"empty component", return NULL);
3413 
3414 	set = isl_set_universe(isl_space_copy(graph->node[i].space));
3415 	dom = isl_union_set_from_set(set);
3416 
3417 	for (i = i + 1; i < graph->n; ++i) {
3418 		if (!pred(&graph->node[i], data))
3419 			continue;
3420 		set = isl_set_universe(isl_space_copy(graph->node[i].space));
3421 		dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3422 	}
3423 
3424 	return dom;
3425 }
3426 
3427 /* Return a union of universe domains corresponding to the nodes
3428  * in the SCC with index "scc".
3429  */
isl_sched_graph_extract_scc(isl_ctx * ctx,struct isl_sched_graph * graph,int scc)3430 __isl_give isl_union_set *isl_sched_graph_extract_scc(isl_ctx *ctx,
3431 	struct isl_sched_graph *graph, int scc)
3432 {
3433 	return isl_sched_graph_domain(ctx, graph,
3434 					&isl_sched_node_scc_exactly, scc);
3435 }
3436 
3437 /* Return a list of unions of universe domains, where each element
3438  * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3439  */
isl_sched_graph_extract_sccs(isl_ctx * ctx,struct isl_sched_graph * graph)3440 __isl_give isl_union_set_list *isl_sched_graph_extract_sccs(isl_ctx *ctx,
3441 	struct isl_sched_graph *graph)
3442 {
3443 	int i;
3444 	isl_union_set_list *filters;
3445 
3446 	filters = isl_union_set_list_alloc(ctx, graph->scc);
3447 	for (i = 0; i < graph->scc; ++i) {
3448 		isl_union_set *dom;
3449 
3450 		dom = isl_sched_graph_extract_scc(ctx, graph, i);
3451 		filters = isl_union_set_list_add(filters, dom);
3452 	}
3453 
3454 	return filters;
3455 }
3456 
3457 /* Return a list of two unions of universe domains, one for the SCCs up
3458  * to and including graph->src_scc and another for the other SCCs.
3459  */
extract_split(isl_ctx * ctx,struct isl_sched_graph * graph)3460 static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3461 	struct isl_sched_graph *graph)
3462 {
3463 	isl_union_set *dom;
3464 	isl_union_set_list *filters;
3465 
3466 	filters = isl_union_set_list_alloc(ctx, 2);
3467 	dom = isl_sched_graph_domain(ctx, graph,
3468 					&node_scc_at_most, graph->src_scc);
3469 	filters = isl_union_set_list_add(filters, dom);
3470 	dom = isl_sched_graph_domain(ctx, graph,
3471 					&node_scc_at_least, graph->src_scc + 1);
3472 	filters = isl_union_set_list_add(filters, dom);
3473 
3474 	return filters;
3475 }
3476 
3477 /* Copy nodes that satisfy node_pred from the src dependence graph
3478  * to the dst dependence graph.
3479  */
copy_nodes(struct isl_sched_graph * dst,struct isl_sched_graph * src,int (* node_pred)(struct isl_sched_node * node,int data),int data)3480 static isl_stat copy_nodes(struct isl_sched_graph *dst,
3481 	struct isl_sched_graph *src,
3482 	int (*node_pred)(struct isl_sched_node *node, int data), int data)
3483 {
3484 	int i;
3485 
3486 	dst->n = 0;
3487 	for (i = 0; i < src->n; ++i) {
3488 		int j;
3489 
3490 		if (!node_pred(&src->node[i], data))
3491 			continue;
3492 
3493 		j = dst->n;
3494 		dst->node[j].space = isl_space_copy(src->node[i].space);
3495 		dst->node[j].compressed = src->node[i].compressed;
3496 		dst->node[j].hull = isl_set_copy(src->node[i].hull);
3497 		dst->node[j].compress =
3498 			isl_multi_aff_copy(src->node[i].compress);
3499 		dst->node[j].decompress =
3500 			isl_pw_multi_aff_copy(src->node[i].decompress);
3501 		dst->node[j].nvar = src->node[i].nvar;
3502 		dst->node[j].nparam = src->node[i].nparam;
3503 		dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3504 		dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3505 		dst->node[j].coincident = src->node[i].coincident;
3506 		dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3507 		dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
3508 		dst->node[j].max = isl_vec_copy(src->node[i].max);
3509 		dst->n++;
3510 
3511 		if (!dst->node[j].space || !dst->node[j].sched)
3512 			return isl_stat_error;
3513 		if (dst->node[j].compressed &&
3514 		    (!dst->node[j].hull || !dst->node[j].compress ||
3515 		     !dst->node[j].decompress))
3516 			return isl_stat_error;
3517 	}
3518 
3519 	return isl_stat_ok;
3520 }
3521 
3522 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3523  * to the dst dependence graph.
3524  * If the source or destination node of the edge is not in the destination
3525  * graph, then it must be a backward proximity edge and it should simply
3526  * be ignored.
3527  */
copy_edges(isl_ctx * ctx,struct isl_sched_graph * dst,struct isl_sched_graph * src,int (* edge_pred)(struct isl_sched_edge * edge,int data),int data)3528 static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3529 	struct isl_sched_graph *src,
3530 	int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3531 {
3532 	int i;
3533 
3534 	dst->n_edge = 0;
3535 	for (i = 0; i < src->n_edge; ++i) {
3536 		struct isl_sched_edge *edge = &src->edge[i];
3537 		isl_map *map;
3538 		isl_union_map *tagged_condition;
3539 		isl_union_map *tagged_validity;
3540 		struct isl_sched_node *dst_src, *dst_dst;
3541 
3542 		if (!edge_pred(edge, data))
3543 			continue;
3544 
3545 		if (isl_map_plain_is_empty(edge->map))
3546 			continue;
3547 
3548 		dst_src = isl_sched_graph_find_node(ctx, dst, edge->src->space);
3549 		dst_dst = isl_sched_graph_find_node(ctx, dst, edge->dst->space);
3550 		if (!dst_src || !dst_dst)
3551 			return isl_stat_error;
3552 		if (!isl_sched_graph_is_node(dst, dst_src) ||
3553 		    !isl_sched_graph_is_node(dst, dst_dst)) {
3554 			if (is_validity(edge) ||
3555 			    isl_sched_edge_is_conditional_validity(edge))
3556 				isl_die(ctx, isl_error_internal,
3557 					"backward (conditional) validity edge",
3558 					return isl_stat_error);
3559 			continue;
3560 		}
3561 
3562 		map = isl_map_copy(edge->map);
3563 		tagged_condition = isl_union_map_copy(edge->tagged_condition);
3564 		tagged_validity = isl_union_map_copy(edge->tagged_validity);
3565 
3566 		dst->edge[dst->n_edge].src = dst_src;
3567 		dst->edge[dst->n_edge].dst = dst_dst;
3568 		dst->edge[dst->n_edge].map = map;
3569 		dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3570 		dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3571 		dst->edge[dst->n_edge].types = edge->types;
3572 		dst->n_edge++;
3573 
3574 		if (edge->tagged_condition && !tagged_condition)
3575 			return isl_stat_error;
3576 		if (edge->tagged_validity && !tagged_validity)
3577 			return isl_stat_error;
3578 
3579 		if (graph_edge_tables_add(ctx, dst,
3580 					    &dst->edge[dst->n_edge - 1]) < 0)
3581 			return isl_stat_error;
3582 	}
3583 
3584 	return isl_stat_ok;
3585 }
3586 
3587 /* Compute the maximal number of variables over all nodes.
3588  * This is the maximal number of linearly independent schedule
3589  * rows that we need to compute.
3590  * Just in case we end up in a part of the dependence graph
3591  * with only lower-dimensional domains, we make sure we will
3592  * compute the required amount of extra linearly independent rows.
3593  */
isl_sched_graph_compute_maxvar(struct isl_sched_graph * graph)3594 isl_stat isl_sched_graph_compute_maxvar(struct isl_sched_graph *graph)
3595 {
3596 	int i;
3597 
3598 	graph->maxvar = 0;
3599 	for (i = 0; i < graph->n; ++i) {
3600 		struct isl_sched_node *node = &graph->node[i];
3601 		int nvar;
3602 
3603 		if (isl_sched_node_update_vmap(node) < 0)
3604 			return isl_stat_error;
3605 		nvar = node->nvar + graph->n_row - node->rank;
3606 		if (nvar > graph->maxvar)
3607 			graph->maxvar = nvar;
3608 	}
3609 
3610 	return isl_stat_ok;
3611 }
3612 
3613 /* Extract the subgraph of "graph" that consists of the nodes satisfying
3614  * "node_pred" and the edges satisfying "edge_pred" and store
3615  * the result in "sub".
3616  */
isl_sched_graph_extract_sub_graph(isl_ctx * ctx,struct isl_sched_graph * graph,int (* node_pred)(struct isl_sched_node * node,int data),int (* edge_pred)(struct isl_sched_edge * edge,int data),int data,struct isl_sched_graph * sub)3617 isl_stat isl_sched_graph_extract_sub_graph(isl_ctx *ctx,
3618 	struct isl_sched_graph *graph,
3619 	int (*node_pred)(struct isl_sched_node *node, int data),
3620 	int (*edge_pred)(struct isl_sched_edge *edge, int data),
3621 	int data, struct isl_sched_graph *sub)
3622 {
3623 	int i, n = 0, n_edge = 0;
3624 	int t;
3625 
3626 	for (i = 0; i < graph->n; ++i)
3627 		if (node_pred(&graph->node[i], data))
3628 			++n;
3629 	for (i = 0; i < graph->n_edge; ++i)
3630 		if (edge_pred(&graph->edge[i], data))
3631 			++n_edge;
3632 	if (graph_alloc(ctx, sub, n, n_edge) < 0)
3633 		return isl_stat_error;
3634 	sub->root = graph->root;
3635 	if (copy_nodes(sub, graph, node_pred, data) < 0)
3636 		return isl_stat_error;
3637 	if (graph_init_table(ctx, sub) < 0)
3638 		return isl_stat_error;
3639 	for (t = 0; t <= isl_edge_last; ++t)
3640 		sub->max_edge[t] = graph->max_edge[t];
3641 	if (graph_init_edge_tables(ctx, sub) < 0)
3642 		return isl_stat_error;
3643 	if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
3644 		return isl_stat_error;
3645 	sub->n_row = graph->n_row;
3646 	sub->max_row = graph->max_row;
3647 	sub->n_total_row = graph->n_total_row;
3648 	sub->band_start = graph->band_start;
3649 
3650 	return isl_stat_ok;
3651 }
3652 
3653 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3654 	struct isl_sched_graph *graph);
3655 static __isl_give isl_schedule_node *compute_schedule_wcc(
3656 	isl_schedule_node *node, struct isl_sched_graph *graph);
3657 
3658 /* Compute a schedule for a subgraph of "graph".  In particular, for
3659  * the graph composed of nodes that satisfy node_pred and edges that
3660  * that satisfy edge_pred.
3661  * If the subgraph is known to consist of a single component, then wcc should
3662  * be set and then we call compute_schedule_wcc on the constructed subgraph.
3663  * Otherwise, we call compute_schedule, which will check whether the subgraph
3664  * is connected.
3665  *
3666  * The schedule is inserted at "node" and the updated schedule node
3667  * is returned.
3668  */
compute_sub_schedule(__isl_take isl_schedule_node * node,isl_ctx * ctx,struct isl_sched_graph * graph,int (* node_pred)(struct isl_sched_node * node,int data),int (* edge_pred)(struct isl_sched_edge * edge,int data),int data,int wcc)3669 static __isl_give isl_schedule_node *compute_sub_schedule(
3670 	__isl_take isl_schedule_node *node, isl_ctx *ctx,
3671 	struct isl_sched_graph *graph,
3672 	int (*node_pred)(struct isl_sched_node *node, int data),
3673 	int (*edge_pred)(struct isl_sched_edge *edge, int data),
3674 	int data, int wcc)
3675 {
3676 	struct isl_sched_graph split = { 0 };
3677 
3678 	if (isl_sched_graph_extract_sub_graph(ctx, graph, node_pred, edge_pred,
3679 						data, &split) < 0)
3680 		goto error;
3681 
3682 	if (wcc)
3683 		node = compute_schedule_wcc(node, &split);
3684 	else
3685 		node = compute_schedule(node, &split);
3686 
3687 	isl_sched_graph_free(ctx, &split);
3688 	return node;
3689 error:
3690 	isl_sched_graph_free(ctx, &split);
3691 	return isl_schedule_node_free(node);
3692 }
3693 
isl_sched_edge_scc_exactly(struct isl_sched_edge * edge,int scc)3694 int isl_sched_edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3695 {
3696 	return edge->src->scc == scc && edge->dst->scc == scc;
3697 }
3698 
edge_dst_scc_at_most(struct isl_sched_edge * edge,int scc)3699 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3700 {
3701 	return edge->dst->scc <= scc;
3702 }
3703 
edge_src_scc_at_least(struct isl_sched_edge * edge,int scc)3704 static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3705 {
3706 	return edge->src->scc >= scc;
3707 }
3708 
3709 /* Reset the current band by dropping all its schedule rows.
3710  */
reset_band(struct isl_sched_graph * graph)3711 static isl_stat reset_band(struct isl_sched_graph *graph)
3712 {
3713 	int i;
3714 	int drop;
3715 
3716 	drop = graph->n_total_row - graph->band_start;
3717 	graph->n_total_row -= drop;
3718 	graph->n_row -= drop;
3719 
3720 	for (i = 0; i < graph->n; ++i) {
3721 		struct isl_sched_node *node = &graph->node[i];
3722 
3723 		isl_map_free(node->sched_map);
3724 		node->sched_map = NULL;
3725 
3726 		node->sched = isl_mat_drop_rows(node->sched,
3727 						graph->band_start, drop);
3728 
3729 		if (!node->sched)
3730 			return isl_stat_error;
3731 	}
3732 
3733 	return isl_stat_ok;
3734 }
3735 
3736 /* Split the current graph into two parts and compute a schedule for each
3737  * part individually.  In particular, one part consists of all SCCs up
3738  * to and including graph->src_scc, while the other part contains the other
3739  * SCCs.  The split is enforced by a sequence node inserted at position "node"
3740  * in the schedule tree.  Return the updated schedule node.
3741  * If either of these two parts consists of a sequence, then it is spliced
3742  * into the sequence containing the two parts.
3743  *
3744  * The current band is reset. It would be possible to reuse
3745  * the previously computed rows as the first rows in the next
3746  * band, but recomputing them may result in better rows as we are looking
3747  * at a smaller part of the dependence graph.
3748  */
compute_split_schedule(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)3749 static __isl_give isl_schedule_node *compute_split_schedule(
3750 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3751 {
3752 	isl_ctx *ctx;
3753 	isl_union_set_list *filters;
3754 
3755 	if (!node)
3756 		return NULL;
3757 
3758 	if (reset_band(graph) < 0)
3759 		return isl_schedule_node_free(node);
3760 
3761 	next_band(graph);
3762 
3763 	ctx = isl_schedule_node_get_ctx(node);
3764 	filters = extract_split(ctx, graph);
3765 	node = isl_schedule_node_insert_sequence(node, filters);
3766 	node = isl_schedule_node_grandchild(node, 1, 0);
3767 
3768 	node = compute_sub_schedule(node, ctx, graph,
3769 				&node_scc_at_least, &edge_src_scc_at_least,
3770 				graph->src_scc + 1, 0);
3771 	node = isl_schedule_node_grandparent(node);
3772 	node = isl_schedule_node_grandchild(node, 0, 0);
3773 	node = compute_sub_schedule(node, ctx, graph,
3774 				&node_scc_at_most, &edge_dst_scc_at_most,
3775 				graph->src_scc, 0);
3776 	node = isl_schedule_node_grandparent(node);
3777 
3778 	node = isl_schedule_node_sequence_splice_children(node);
3779 
3780 	return node;
3781 }
3782 
3783 /* Insert a band node at position "node" in the schedule tree corresponding
3784  * to the current band in "graph".  Mark the band node permutable
3785  * if "permutable" is set.
3786  * The partial schedules and the coincidence property are extracted
3787  * from the graph nodes.
3788  * Return the updated schedule node.
3789  */
insert_current_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int permutable)3790 static __isl_give isl_schedule_node *insert_current_band(
3791 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3792 	int permutable)
3793 {
3794 	int i;
3795 	int start, end, n;
3796 	isl_multi_aff *ma;
3797 	isl_multi_pw_aff *mpa;
3798 	isl_multi_union_pw_aff *mupa;
3799 
3800 	if (!node)
3801 		return NULL;
3802 
3803 	if (graph->n < 1)
3804 		isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
3805 			"graph should have at least one node",
3806 			return isl_schedule_node_free(node));
3807 
3808 	start = graph->band_start;
3809 	end = graph->n_total_row;
3810 	n = end - start;
3811 
3812 	ma = isl_sched_node_extract_partial_schedule_multi_aff(&graph->node[0],
3813 								start, n);
3814 	mpa = isl_multi_pw_aff_from_multi_aff(ma);
3815 	mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3816 
3817 	for (i = 1; i < graph->n; ++i) {
3818 		isl_multi_union_pw_aff *mupa_i;
3819 
3820 		ma = isl_sched_node_extract_partial_schedule_multi_aff(
3821 						&graph->node[i], start, n);
3822 		mpa = isl_multi_pw_aff_from_multi_aff(ma);
3823 		mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3824 		mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3825 	}
3826 	node = isl_schedule_node_insert_partial_schedule(node, mupa);
3827 
3828 	for (i = 0; i < n; ++i)
3829 		node = isl_schedule_node_band_member_set_coincident(node, i,
3830 					graph->node[0].coincident[start + i]);
3831 	node = isl_schedule_node_band_set_permutable(node, permutable);
3832 
3833 	return node;
3834 }
3835 
3836 /* Update the dependence relations based on the current schedule,
3837  * add the current band to "node" and then continue with the computation
3838  * of the next band.
3839  * Return the updated schedule node.
3840  */
compute_next_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int permutable)3841 static __isl_give isl_schedule_node *compute_next_band(
3842 	__isl_take isl_schedule_node *node,
3843 	struct isl_sched_graph *graph, int permutable)
3844 {
3845 	isl_ctx *ctx;
3846 
3847 	if (!node)
3848 		return NULL;
3849 
3850 	ctx = isl_schedule_node_get_ctx(node);
3851 	if (update_edges(ctx, graph) < 0)
3852 		return isl_schedule_node_free(node);
3853 	node = insert_current_band(node, graph, permutable);
3854 	next_band(graph);
3855 
3856 	node = isl_schedule_node_child(node, 0);
3857 	node = compute_schedule(node, graph);
3858 	node = isl_schedule_node_parent(node);
3859 
3860 	return node;
3861 }
3862 
3863 /* Add the constraints "coef" derived from an edge from "node" to itself
3864  * to graph->lp in order to respect the dependences and to try and carry them.
3865  * "pos" is the sequence number of the edge that needs to be carried.
3866  * "coef" represents general constraints on coefficients (c_0, c_x)
3867  * of valid constraints for (y - x) with x and y instances of the node.
3868  *
3869  * The constraints added to graph->lp need to enforce
3870  *
3871  *	(c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
3872  *	= c_j_x (y - x) >= e_i
3873  *
3874  * for each (x,y) in the dependence relation of the edge.
3875  * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
3876  * taking into account that each coefficient in c_j_x is represented
3877  * as a pair of non-negative coefficients.
3878  */
add_intra_constraints(struct isl_sched_graph * graph,struct isl_sched_node * node,__isl_take isl_basic_set * coef,int pos)3879 static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
3880 	struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
3881 {
3882 	isl_size offset;
3883 	isl_ctx *ctx;
3884 	isl_dim_map *dim_map;
3885 
3886 	offset = coef_var_offset(coef);
3887 	if (offset < 0)
3888 		coef = isl_basic_set_free(coef);
3889 	if (!coef)
3890 		return isl_stat_error;
3891 
3892 	ctx = isl_basic_set_get_ctx(coef);
3893 	dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3894 	isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3895 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3896 
3897 	return isl_stat_ok;
3898 }
3899 
3900 /* Add the constraints "coef" derived from an edge from "src" to "dst"
3901  * to graph->lp in order to respect the dependences and to try and carry them.
3902  * "pos" is the sequence number of the edge that needs to be carried or
3903  * -1 if no attempt should be made to carry the dependences.
3904  * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
3905  * of valid constraints for (x, y) with x and y instances of "src" and "dst".
3906  *
3907  * The constraints added to graph->lp need to enforce
3908  *
3909  *	(c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3910  *
3911  * for each (x,y) in the dependence relation of the edge or
3912  *
3913  *	(c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
3914  *
3915  * if pos is -1.
3916  * That is,
3917  * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3918  * or
3919  * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3920  * needs to be plugged in for (c_0, c_n, c_x, c_y),
3921  * taking into account that each coefficient in c_j_x and c_k_x is represented
3922  * as a pair of non-negative coefficients.
3923  */
add_inter_constraints(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst,__isl_take isl_basic_set * coef,int pos)3924 static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
3925 	struct isl_sched_node *src, struct isl_sched_node *dst,
3926 	__isl_take isl_basic_set *coef, int pos)
3927 {
3928 	isl_size offset;
3929 	isl_ctx *ctx;
3930 	isl_dim_map *dim_map;
3931 
3932 	offset = coef_var_offset(coef);
3933 	if (offset < 0)
3934 		coef = isl_basic_set_free(coef);
3935 	if (!coef)
3936 		return isl_stat_error;
3937 
3938 	ctx = isl_basic_set_get_ctx(coef);
3939 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3940 	if (pos >= 0)
3941 		isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3942 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3943 
3944 	return isl_stat_ok;
3945 }
3946 
3947 /* Data structure for keeping track of the data needed
3948  * to exploit non-trivial lineality spaces.
3949  *
3950  * "any_non_trivial" is true if there are any non-trivial lineality spaces.
3951  * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
3952  * "equivalent" connects instances to other instances on the same line(s).
3953  * "mask" contains the domain spaces of "equivalent".
3954  * Any instance set not in "mask" does not have a non-trivial lineality space.
3955  */
3956 struct isl_exploit_lineality_data {
3957 	isl_bool any_non_trivial;
3958 	isl_union_map *equivalent;
3959 	isl_union_set *mask;
3960 };
3961 
3962 /* Data structure collecting information used during the construction
3963  * of an LP for carrying dependences.
3964  *
3965  * "intra" is a sequence of coefficient constraints for intra-node edges.
3966  * "inter" is a sequence of coefficient constraints for inter-node edges.
3967  * "lineality" contains data used to exploit non-trivial lineality spaces.
3968  */
3969 struct isl_carry {
3970 	isl_basic_set_list *intra;
3971 	isl_basic_set_list *inter;
3972 	struct isl_exploit_lineality_data lineality;
3973 };
3974 
3975 /* Free all the data stored in "carry".
3976  */
isl_carry_clear(struct isl_carry * carry)3977 static void isl_carry_clear(struct isl_carry *carry)
3978 {
3979 	isl_basic_set_list_free(carry->intra);
3980 	isl_basic_set_list_free(carry->inter);
3981 	isl_union_map_free(carry->lineality.equivalent);
3982 	isl_union_set_free(carry->lineality.mask);
3983 }
3984 
3985 /* Return a pointer to the node in "graph" that lives in "space".
3986  * If the requested node has been compressed, then "space"
3987  * corresponds to the compressed space.
3988  * The graph is assumed to have such a node.
3989  * Return NULL in case of error.
3990  *
3991  * First try and see if "space" is the space of an uncompressed node.
3992  * If so, return that node.
3993  * Otherwise, "space" was constructed by construct_compressed_id and
3994  * contains a user pointer pointing to the node in the tuple id.
3995  * However, this node belongs to the original dependence graph.
3996  * If "graph" is a subgraph of this original dependence graph,
3997  * then the node with the same space still needs to be looked up
3998  * in the current graph.
3999  */
graph_find_compressed_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_space * space)4000 static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
4001 	struct isl_sched_graph *graph, __isl_keep isl_space *space)
4002 {
4003 	isl_id *id;
4004 	struct isl_sched_node *node;
4005 
4006 	if (!space)
4007 		return NULL;
4008 
4009 	node = isl_sched_graph_find_node(ctx, graph, space);
4010 	if (!node)
4011 		return NULL;
4012 	if (isl_sched_graph_is_node(graph, node))
4013 		return node;
4014 
4015 	id = isl_space_get_tuple_id(space, isl_dim_set);
4016 	node = isl_id_get_user(id);
4017 	isl_id_free(id);
4018 
4019 	if (!node)
4020 		return NULL;
4021 
4022 	if (!isl_sched_graph_is_node(graph->root, node))
4023 		isl_die(ctx, isl_error_internal,
4024 			"space points to invalid node", return NULL);
4025 	if (graph != graph->root)
4026 		node = isl_sched_graph_find_node(ctx, graph, node->space);
4027 	if (!isl_sched_graph_is_node(graph, node))
4028 		isl_die(ctx, isl_error_internal,
4029 			"unable to find node", return NULL);
4030 
4031 	return node;
4032 }
4033 
4034 /* Internal data structure for add_all_constraints.
4035  *
4036  * "graph" is the schedule constraint graph for which an LP problem
4037  * is being constructed.
4038  * "carry_inter" indicates whether inter-node edges should be carried.
4039  * "pos" is the position of the next edge that needs to be carried.
4040  */
4041 struct isl_add_all_constraints_data {
4042 	isl_ctx *ctx;
4043 	struct isl_sched_graph *graph;
4044 	int carry_inter;
4045 	int pos;
4046 };
4047 
4048 /* Add the constraints "coef" derived from an edge from a node to itself
4049  * to data->graph->lp in order to respect the dependences and
4050  * to try and carry them.
4051  *
4052  * The space of "coef" is of the form
4053  *
4054  *	coefficients[[c_cst] -> S[c_x]]
4055  *
4056  * with S[c_x] the (compressed) space of the node.
4057  * Extract the node from the space and call add_intra_constraints.
4058  */
lp_add_intra(__isl_take isl_basic_set * coef,void * user)4059 static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
4060 {
4061 	struct isl_add_all_constraints_data *data = user;
4062 	isl_space *space;
4063 	struct isl_sched_node *node;
4064 
4065 	space = isl_basic_set_get_space(coef);
4066 	space = isl_space_range(isl_space_unwrap(space));
4067 	node = graph_find_compressed_node(data->ctx, data->graph, space);
4068 	isl_space_free(space);
4069 	return add_intra_constraints(data->graph, node, coef, data->pos++);
4070 }
4071 
4072 /* Add the constraints "coef" derived from an edge from a node j
4073  * to a node k to data->graph->lp in order to respect the dependences and
4074  * to try and carry them (provided data->carry_inter is set).
4075  *
4076  * The space of "coef" is of the form
4077  *
4078  *	coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
4079  *
4080  * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
4081  * Extract the nodes from the space and call add_inter_constraints.
4082  */
lp_add_inter(__isl_take isl_basic_set * coef,void * user)4083 static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
4084 {
4085 	struct isl_add_all_constraints_data *data = user;
4086 	isl_space *space, *dom;
4087 	struct isl_sched_node *src, *dst;
4088 	int pos;
4089 
4090 	space = isl_basic_set_get_space(coef);
4091 	space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
4092 	dom = isl_space_domain(isl_space_copy(space));
4093 	src = graph_find_compressed_node(data->ctx, data->graph, dom);
4094 	isl_space_free(dom);
4095 	space = isl_space_range(space);
4096 	dst = graph_find_compressed_node(data->ctx, data->graph, space);
4097 	isl_space_free(space);
4098 
4099 	pos = data->carry_inter ? data->pos++ : -1;
4100 	return add_inter_constraints(data->graph, src, dst, coef, pos);
4101 }
4102 
4103 /* Add constraints to graph->lp that force all (conditional) validity
4104  * dependences to be respected and attempt to carry them.
4105  * "intra" is the sequence of coefficient constraints for intra-node edges.
4106  * "inter" is the sequence of coefficient constraints for inter-node edges.
4107  * "carry_inter" indicates whether inter-node edges should be carried or
4108  * only respected.
4109  */
add_all_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int carry_inter)4110 static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
4111 	__isl_keep isl_basic_set_list *intra,
4112 	__isl_keep isl_basic_set_list *inter, int carry_inter)
4113 {
4114 	struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
4115 
4116 	data.pos = 0;
4117 	if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
4118 		return isl_stat_error;
4119 	if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
4120 		return isl_stat_error;
4121 	return isl_stat_ok;
4122 }
4123 
4124 /* Internal data structure for count_all_constraints
4125  * for keeping track of the number of equality and inequality constraints.
4126  */
4127 struct isl_sched_count {
4128 	int n_eq;
4129 	int n_ineq;
4130 };
4131 
4132 /* Add the number of equality and inequality constraints of "bset"
4133  * to data->n_eq and data->n_ineq.
4134  */
bset_update_count(__isl_take isl_basic_set * bset,void * user)4135 static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
4136 {
4137 	struct isl_sched_count *data = user;
4138 
4139 	return update_count(bset, 1, &data->n_eq, &data->n_ineq);
4140 }
4141 
4142 /* Count the number of equality and inequality constraints
4143  * that will be added to the carry_lp problem.
4144  * We count each edge exactly once.
4145  * "intra" is the sequence of coefficient constraints for intra-node edges.
4146  * "inter" is the sequence of coefficient constraints for inter-node edges.
4147  */
count_all_constraints(__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int * n_eq,int * n_ineq)4148 static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
4149 	__isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
4150 {
4151 	struct isl_sched_count data;
4152 
4153 	data.n_eq = data.n_ineq = 0;
4154 	if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
4155 		return isl_stat_error;
4156 	if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
4157 		return isl_stat_error;
4158 
4159 	*n_eq = data.n_eq;
4160 	*n_ineq = data.n_ineq;
4161 
4162 	return isl_stat_ok;
4163 }
4164 
4165 /* Construct an LP problem for finding schedule coefficients
4166  * such that the schedule carries as many validity dependences as possible.
4167  * In particular, for each dependence i, we bound the dependence distance
4168  * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
4169  * of all e_i's.  Dependences with e_i = 0 in the solution are simply
4170  * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
4171  * "intra" is the sequence of coefficient constraints for intra-node edges.
4172  * "inter" is the sequence of coefficient constraints for inter-node edges.
4173  * "n_edge" is the total number of edges.
4174  * "carry_inter" indicates whether inter-node edges should be carried or
4175  * only respected.  That is, if "carry_inter" is not set, then
4176  * no e_i variables are introduced for the inter-node edges.
4177  *
4178  * All variables of the LP are non-negative.  The actual coefficients
4179  * may be negative, so each coefficient is represented as the difference
4180  * of two non-negative variables.  The negative part always appears
4181  * immediately before the positive part.
4182  * Other than that, the variables have the following order
4183  *
4184  *	- sum of (1 - e_i) over all edges
4185  *	- sum of all c_n coefficients
4186  *		(unconstrained when computing non-parametric schedules)
4187  *	- sum of positive and negative parts of all c_x coefficients
4188  *	- for each edge
4189  *		- e_i
4190  *	- for each node
4191  *		- positive and negative parts of c_i_x, in opposite order
4192  *		- c_i_n (if parametric)
4193  *		- c_i_0
4194  *
4195  * The constraints are those from the (validity) edges plus three equalities
4196  * to express the sums and n_edge inequalities to express e_i <= 1.
4197  */
setup_carry_lp(isl_ctx * ctx,struct isl_sched_graph * graph,int n_edge,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int carry_inter)4198 static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
4199 	int n_edge, __isl_keep isl_basic_set_list *intra,
4200 	__isl_keep isl_basic_set_list *inter, int carry_inter)
4201 {
4202 	int i;
4203 	int k;
4204 	isl_space *space;
4205 	unsigned total;
4206 	int n_eq, n_ineq;
4207 
4208 	total = 3 + n_edge;
4209 	for (i = 0; i < graph->n; ++i) {
4210 		struct isl_sched_node *node = &graph->node[graph->sorted[i]];
4211 		node->start = total;
4212 		total += 1 + node->nparam + 2 * node->nvar;
4213 	}
4214 
4215 	if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
4216 		return isl_stat_error;
4217 
4218 	space = isl_space_set_alloc(ctx, 0, total);
4219 	isl_basic_set_free(graph->lp);
4220 	n_eq += 3;
4221 	n_ineq += n_edge;
4222 	graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
4223 	graph->lp = isl_basic_set_set_rational(graph->lp);
4224 
4225 	k = isl_basic_set_alloc_equality(graph->lp);
4226 	if (k < 0)
4227 		return isl_stat_error;
4228 	isl_seq_clr(graph->lp->eq[k], 1 + total);
4229 	isl_int_set_si(graph->lp->eq[k][0], -n_edge);
4230 	isl_int_set_si(graph->lp->eq[k][1], 1);
4231 	for (i = 0; i < n_edge; ++i)
4232 		isl_int_set_si(graph->lp->eq[k][4 + i], 1);
4233 
4234 	if (add_param_sum_constraint(graph, 1) < 0)
4235 		return isl_stat_error;
4236 	if (add_var_sum_constraint(graph, 2) < 0)
4237 		return isl_stat_error;
4238 
4239 	for (i = 0; i < n_edge; ++i) {
4240 		k = isl_basic_set_alloc_inequality(graph->lp);
4241 		if (k < 0)
4242 			return isl_stat_error;
4243 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
4244 		isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
4245 		isl_int_set_si(graph->lp->ineq[k][0], 1);
4246 	}
4247 
4248 	if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
4249 		return isl_stat_error;
4250 
4251 	return isl_stat_ok;
4252 }
4253 
4254 static __isl_give isl_schedule_node *compute_component_schedule(
4255 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4256 	int wcc);
4257 
4258 /* If the schedule_split_scaled option is set and if the linear
4259  * parts of the scheduling rows for all nodes in the graphs have
4260  * a non-trivial common divisor, then remove this
4261  * common divisor from the linear part.
4262  * Otherwise, insert a band node directly and continue with
4263  * the construction of the schedule.
4264  *
4265  * If a non-trivial common divisor is found, then
4266  * the linear part is reduced and the remainder is ignored.
4267  * The pieces of the graph that are assigned different remainders
4268  * form (groups of) strongly connected components within
4269  * the scaled down band.  If needed, they can therefore
4270  * be ordered along this remainder in a sequence node.
4271  * However, this ordering is not enforced here in order to allow
4272  * the scheduler to combine some of the strongly connected components.
4273  */
split_scaled(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)4274 static __isl_give isl_schedule_node *split_scaled(
4275 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4276 {
4277 	int i;
4278 	int row;
4279 	isl_ctx *ctx;
4280 	isl_int gcd, gcd_i;
4281 	isl_size n_row;
4282 
4283 	if (!node)
4284 		return NULL;
4285 
4286 	ctx = isl_schedule_node_get_ctx(node);
4287 	if (!ctx->opt->schedule_split_scaled)
4288 		return compute_next_band(node, graph, 0);
4289 	if (graph->n <= 1)
4290 		return compute_next_band(node, graph, 0);
4291 	n_row = isl_mat_rows(graph->node[0].sched);
4292 	if (n_row < 0)
4293 		return isl_schedule_node_free(node);
4294 
4295 	isl_int_init(gcd);
4296 	isl_int_init(gcd_i);
4297 
4298 	isl_int_set_si(gcd, 0);
4299 
4300 	row = n_row - 1;
4301 
4302 	for (i = 0; i < graph->n; ++i) {
4303 		struct isl_sched_node *node = &graph->node[i];
4304 		isl_size cols = isl_mat_cols(node->sched);
4305 
4306 		if (cols < 0)
4307 			break;
4308 		isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
4309 		isl_int_gcd(gcd, gcd, gcd_i);
4310 	}
4311 
4312 	isl_int_clear(gcd_i);
4313 	if (i < graph->n)
4314 		goto error;
4315 
4316 	if (isl_int_cmp_si(gcd, 1) <= 0) {
4317 		isl_int_clear(gcd);
4318 		return compute_next_band(node, graph, 0);
4319 	}
4320 
4321 	for (i = 0; i < graph->n; ++i) {
4322 		struct isl_sched_node *node = &graph->node[i];
4323 
4324 		isl_int_fdiv_q(node->sched->row[row][0],
4325 			       node->sched->row[row][0], gcd);
4326 		isl_int_mul(node->sched->row[row][0],
4327 			    node->sched->row[row][0], gcd);
4328 		node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
4329 		if (!node->sched)
4330 			goto error;
4331 	}
4332 
4333 	isl_int_clear(gcd);
4334 
4335 	return compute_next_band(node, graph, 0);
4336 error:
4337 	isl_int_clear(gcd);
4338 	return isl_schedule_node_free(node);
4339 }
4340 
4341 /* Is the schedule row "sol" trivial on node "node"?
4342  * That is, is the solution zero on the dimensions linearly independent of
4343  * the previously found solutions?
4344  * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
4345  *
4346  * Each coefficient is represented as the difference between
4347  * two non-negative values in "sol".
4348  * We construct the schedule row s and check if it is linearly
4349  * independent of previously computed schedule rows
4350  * by computing T s, with T the linear combinations that are zero
4351  * on linearly dependent schedule rows.
4352  * If the result consists of all zeros, then the solution is trivial.
4353  */
is_trivial(struct isl_sched_node * node,__isl_keep isl_vec * sol)4354 static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4355 {
4356 	int trivial;
4357 	isl_vec *node_sol;
4358 
4359 	if (!sol)
4360 		return -1;
4361 	if (node->nvar == node->rank)
4362 		return 0;
4363 
4364 	node_sol = extract_var_coef(node, sol);
4365 	node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
4366 	if (!node_sol)
4367 		return -1;
4368 
4369 	trivial = isl_seq_first_non_zero(node_sol->el,
4370 					node->nvar - node->rank) == -1;
4371 
4372 	isl_vec_free(node_sol);
4373 
4374 	return trivial;
4375 }
4376 
4377 /* Is the schedule row "sol" trivial on any node where it should
4378  * not be trivial?
4379  * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4380  */
is_any_trivial(struct isl_sched_graph * graph,__isl_keep isl_vec * sol)4381 static int is_any_trivial(struct isl_sched_graph *graph,
4382 	__isl_keep isl_vec *sol)
4383 {
4384 	int i;
4385 
4386 	for (i = 0; i < graph->n; ++i) {
4387 		struct isl_sched_node *node = &graph->node[i];
4388 		int trivial;
4389 
4390 		if (!needs_row(graph, node))
4391 			continue;
4392 		trivial = is_trivial(node, sol);
4393 		if (trivial < 0 || trivial)
4394 			return trivial;
4395 	}
4396 
4397 	return 0;
4398 }
4399 
4400 /* Does the schedule represented by "sol" perform loop coalescing on "node"?
4401  * If so, return the position of the coalesced dimension.
4402  * Otherwise, return node->nvar or -1 on error.
4403  *
4404  * In particular, look for pairs of coefficients c_i and c_j such that
4405  * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
4406  * If any such pair is found, then return i.
4407  * If size_i is infinity, then no check on c_i needs to be performed.
4408  */
find_node_coalescing(struct isl_sched_node * node,__isl_keep isl_vec * sol)4409 static int find_node_coalescing(struct isl_sched_node *node,
4410 	__isl_keep isl_vec *sol)
4411 {
4412 	int i, j;
4413 	isl_int max;
4414 	isl_vec *csol;
4415 
4416 	if (node->nvar <= 1)
4417 		return node->nvar;
4418 
4419 	csol = extract_var_coef(node, sol);
4420 	if (!csol)
4421 		return -1;
4422 	isl_int_init(max);
4423 	for (i = 0; i < node->nvar; ++i) {
4424 		isl_val *v;
4425 
4426 		if (isl_int_is_zero(csol->el[i]))
4427 			continue;
4428 		v = isl_multi_val_get_val(node->sizes, i);
4429 		if (!v)
4430 			goto error;
4431 		if (!isl_val_is_int(v)) {
4432 			isl_val_free(v);
4433 			continue;
4434 		}
4435 		v = isl_val_div_ui(v, 2);
4436 		v = isl_val_ceil(v);
4437 		if (!v)
4438 			goto error;
4439 		isl_int_mul(max, v->n, csol->el[i]);
4440 		isl_val_free(v);
4441 
4442 		for (j = 0; j < node->nvar; ++j) {
4443 			if (j == i)
4444 				continue;
4445 			if (isl_int_abs_gt(csol->el[j], max))
4446 				break;
4447 		}
4448 		if (j < node->nvar)
4449 			break;
4450 	}
4451 
4452 	isl_int_clear(max);
4453 	isl_vec_free(csol);
4454 	return i;
4455 error:
4456 	isl_int_clear(max);
4457 	isl_vec_free(csol);
4458 	return -1;
4459 }
4460 
4461 /* Force the schedule coefficient at position "pos" of "node" to be zero
4462  * in "tl".
4463  * The coefficient is encoded as the difference between two non-negative
4464  * variables.  Force these two variables to have the same value.
4465  */
zero_out_node_coef(__isl_take isl_tab_lexmin * tl,struct isl_sched_node * node,int pos)4466 static __isl_give isl_tab_lexmin *zero_out_node_coef(
4467 	__isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4468 {
4469 	int dim;
4470 	isl_ctx *ctx;
4471 	isl_vec *eq;
4472 
4473 	ctx = isl_space_get_ctx(node->space);
4474 	dim = isl_tab_lexmin_dim(tl);
4475 	if (dim < 0)
4476 		return isl_tab_lexmin_free(tl);
4477 	eq = isl_vec_alloc(ctx, 1 + dim);
4478 	eq = isl_vec_clr(eq);
4479 	if (!eq)
4480 		return isl_tab_lexmin_free(tl);
4481 
4482 	pos = 1 + node_var_coef_pos(node, pos);
4483 	isl_int_set_si(eq->el[pos], 1);
4484 	isl_int_set_si(eq->el[pos + 1], -1);
4485 	tl = isl_tab_lexmin_add_eq(tl, eq->el);
4486 	isl_vec_free(eq);
4487 
4488 	return tl;
4489 }
4490 
4491 /* Return the lexicographically smallest rational point in the basic set
4492  * from which "tl" was constructed, double checking that this input set
4493  * was not empty.
4494  */
non_empty_solution(__isl_keep isl_tab_lexmin * tl)4495 static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4496 {
4497 	isl_vec *sol;
4498 
4499 	sol = isl_tab_lexmin_get_solution(tl);
4500 	if (!sol)
4501 		return NULL;
4502 	if (sol->size == 0)
4503 		isl_die(isl_vec_get_ctx(sol), isl_error_internal,
4504 			"error in schedule construction",
4505 			return isl_vec_free(sol));
4506 	return sol;
4507 }
4508 
4509 /* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4510  * carry any of the "n_edge" groups of dependences?
4511  * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4512  * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4513  * by the edge are carried by the solution.
4514  * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4515  * one of those is carried.
4516  *
4517  * Note that despite the fact that the problem is solved using a rational
4518  * solver, the solution is guaranteed to be integral.
4519  * Specifically, the dependence distance lower bounds e_i (and therefore
4520  * also their sum) are integers.  See Lemma 5 of [1].
4521  *
4522  * Any potential denominator of the sum is cleared by this function.
4523  * The denominator is not relevant for any of the other elements
4524  * in the solution.
4525  *
4526  * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4527  *     Problem, Part II: Multi-Dimensional Time.
4528  *     In Intl. Journal of Parallel Programming, 1992.
4529  */
carries_dependences(__isl_keep isl_vec * sol,int n_edge)4530 static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4531 {
4532 	isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4533 	isl_int_set_si(sol->el[0], 1);
4534 	return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4535 }
4536 
4537 /* Return the lexicographically smallest rational point in "lp",
4538  * assuming that all variables are non-negative and performing some
4539  * additional sanity checks.
4540  * If "want_integral" is set, then compute the lexicographically smallest
4541  * integer point instead.
4542  * In particular, "lp" should not be empty by construction.
4543  * Double check that this is the case.
4544  * If dependences are not carried for any of the "n_edge" edges,
4545  * then return an empty vector.
4546  *
4547  * If the schedule_treat_coalescing option is set and
4548  * if the computed schedule performs loop coalescing on a given node,
4549  * i.e., if it is of the form
4550  *
4551  *	c_i i + c_j j + ...
4552  *
4553  * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4554  * to cut out this solution.  Repeat this process until no more loop
4555  * coalescing occurs or until no more dependences can be carried.
4556  * In the latter case, revert to the previously computed solution.
4557  *
4558  * If the caller requests an integral solution and if coalescing should
4559  * be treated, then perform the coalescing treatment first as
4560  * an integral solution computed before coalescing treatment
4561  * would carry the same number of edges and would therefore probably
4562  * also be coalescing.
4563  *
4564  * To allow the coalescing treatment to be performed first,
4565  * the initial solution is allowed to be rational and it is only
4566  * cut out (if needed) in the next iteration, if no coalescing measures
4567  * were taken.
4568  */
non_neg_lexmin(struct isl_sched_graph * graph,__isl_take isl_basic_set * lp,int n_edge,int want_integral)4569 static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4570 	__isl_take isl_basic_set *lp, int n_edge, int want_integral)
4571 {
4572 	int i, pos, cut;
4573 	isl_ctx *ctx;
4574 	isl_tab_lexmin *tl;
4575 	isl_vec *sol = NULL, *prev;
4576 	int treat_coalescing;
4577 	int try_again;
4578 
4579 	if (!lp)
4580 		return NULL;
4581 	ctx = isl_basic_set_get_ctx(lp);
4582 	treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4583 	tl = isl_tab_lexmin_from_basic_set(lp);
4584 
4585 	cut = 0;
4586 	do {
4587 		int integral;
4588 
4589 		try_again = 0;
4590 		if (cut)
4591 			tl = isl_tab_lexmin_cut_to_integer(tl);
4592 		prev = sol;
4593 		sol = non_empty_solution(tl);
4594 		if (!sol)
4595 			goto error;
4596 
4597 		integral = isl_int_is_one(sol->el[0]);
4598 		if (!carries_dependences(sol, n_edge)) {
4599 			if (!prev)
4600 				prev = isl_vec_alloc(ctx, 0);
4601 			isl_vec_free(sol);
4602 			sol = prev;
4603 			break;
4604 		}
4605 		prev = isl_vec_free(prev);
4606 		cut = want_integral && !integral;
4607 		if (cut)
4608 			try_again = 1;
4609 		if (!treat_coalescing)
4610 			continue;
4611 		for (i = 0; i < graph->n; ++i) {
4612 			struct isl_sched_node *node = &graph->node[i];
4613 
4614 			pos = find_node_coalescing(node, sol);
4615 			if (pos < 0)
4616 				goto error;
4617 			if (pos < node->nvar)
4618 				break;
4619 		}
4620 		if (i < graph->n) {
4621 			try_again = 1;
4622 			tl = zero_out_node_coef(tl, &graph->node[i], pos);
4623 			cut = 0;
4624 		}
4625 	} while (try_again);
4626 
4627 	isl_tab_lexmin_free(tl);
4628 
4629 	return sol;
4630 error:
4631 	isl_tab_lexmin_free(tl);
4632 	isl_vec_free(prev);
4633 	isl_vec_free(sol);
4634 	return NULL;
4635 }
4636 
4637 /* If "edge" is an edge from a node to itself, then add the corresponding
4638  * dependence relation to "umap".
4639  * If "node" has been compressed, then the dependence relation
4640  * is also compressed first.
4641  */
add_intra(__isl_take isl_union_map * umap,struct isl_sched_edge * edge)4642 static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4643 	struct isl_sched_edge *edge)
4644 {
4645 	isl_map *map;
4646 	struct isl_sched_node *node = edge->src;
4647 
4648 	if (edge->src != edge->dst)
4649 		return umap;
4650 
4651 	map = isl_map_copy(edge->map);
4652 	map = compress(map, node, node);
4653 	umap = isl_union_map_add_map(umap, map);
4654 	return umap;
4655 }
4656 
4657 /* If "edge" is an edge from a node to another node, then add the corresponding
4658  * dependence relation to "umap".
4659  * If the source or destination nodes of "edge" have been compressed,
4660  * then the dependence relation is also compressed first.
4661  */
add_inter(__isl_take isl_union_map * umap,struct isl_sched_edge * edge)4662 static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4663 	struct isl_sched_edge *edge)
4664 {
4665 	isl_map *map;
4666 
4667 	if (edge->src == edge->dst)
4668 		return umap;
4669 
4670 	map = isl_map_copy(edge->map);
4671 	map = compress(map, edge->src, edge->dst);
4672 	umap = isl_union_map_add_map(umap, map);
4673 	return umap;
4674 }
4675 
4676 /* Internal data structure used by union_drop_coalescing_constraints
4677  * to collect bounds on all relevant statements.
4678  *
4679  * "graph" is the schedule constraint graph for which an LP problem
4680  * is being constructed.
4681  * "bounds" collects the bounds.
4682  */
4683 struct isl_collect_bounds_data {
4684 	isl_ctx *ctx;
4685 	struct isl_sched_graph *graph;
4686 	isl_union_set *bounds;
4687 };
4688 
4689 /* Add the size bounds for the node with instance deltas in "set"
4690  * to data->bounds.
4691  */
collect_bounds(__isl_take isl_set * set,void * user)4692 static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
4693 {
4694 	struct isl_collect_bounds_data *data = user;
4695 	struct isl_sched_node *node;
4696 	isl_space *space;
4697 	isl_set *bounds;
4698 
4699 	space = isl_set_get_space(set);
4700 	isl_set_free(set);
4701 
4702 	node = graph_find_compressed_node(data->ctx, data->graph, space);
4703 	isl_space_free(space);
4704 
4705 	bounds = isl_set_from_basic_set(get_size_bounds(node));
4706 	data->bounds = isl_union_set_add_set(data->bounds, bounds);
4707 
4708 	return isl_stat_ok;
4709 }
4710 
4711 /* Drop some constraints from "delta" that could be exploited
4712  * to construct loop coalescing schedules.
4713  * In particular, drop those constraint that bound the difference
4714  * to the size of the domain.
4715  * Do this for each set/node in "delta" separately.
4716  * The parameters are assumed to have been projected out by the caller.
4717  */
union_drop_coalescing_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_take isl_union_set * delta)4718 static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
4719 	struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
4720 {
4721 	struct isl_collect_bounds_data data = { ctx, graph };
4722 
4723 	data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4724 	if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
4725 		data.bounds = isl_union_set_free(data.bounds);
4726 	delta = isl_union_set_plain_gist(delta, data.bounds);
4727 
4728 	return delta;
4729 }
4730 
4731 /* Given a non-trivial lineality space "lineality", add the corresponding
4732  * universe set to data->mask and add a map from elements to
4733  * other elements along the lines in "lineality" to data->equivalent.
4734  * If this is the first time this function gets called
4735  * (data->any_non_trivial is still false), then set data->any_non_trivial and
4736  * initialize data->mask and data->equivalent.
4737  *
4738  * In particular, if the lineality space is defined by equality constraints
4739  *
4740  *	E x = 0
4741  *
4742  * then construct an affine mapping
4743  *
4744  *	f : x -> E x
4745  *
4746  * and compute the equivalence relation of having the same image under f:
4747  *
4748  *	{ x -> x' : E x = E x' }
4749  */
add_non_trivial_lineality(__isl_take isl_basic_set * lineality,struct isl_exploit_lineality_data * data)4750 static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
4751 	struct isl_exploit_lineality_data *data)
4752 {
4753 	isl_mat *eq;
4754 	isl_space *space;
4755 	isl_set *univ;
4756 	isl_multi_aff *ma;
4757 	isl_multi_pw_aff *mpa;
4758 	isl_map *map;
4759 	isl_size n;
4760 
4761 	if (isl_basic_set_check_no_locals(lineality) < 0)
4762 		goto error;
4763 
4764 	space = isl_basic_set_get_space(lineality);
4765 	if (!data->any_non_trivial) {
4766 		data->equivalent = isl_union_map_empty(isl_space_copy(space));
4767 		data->mask = isl_union_set_empty(isl_space_copy(space));
4768 	}
4769 	data->any_non_trivial = isl_bool_true;
4770 
4771 	univ = isl_set_universe(isl_space_copy(space));
4772 	data->mask = isl_union_set_add_set(data->mask, univ);
4773 
4774 	eq = isl_basic_set_extract_equalities(lineality);
4775 	n = isl_mat_rows(eq);
4776 	if (n < 0)
4777 		space = isl_space_free(space);
4778 	eq = isl_mat_insert_zero_rows(eq, 0, 1);
4779 	eq = isl_mat_set_element_si(eq, 0, 0, 1);
4780 	space = isl_space_from_domain(space);
4781 	space = isl_space_add_dims(space, isl_dim_out, n);
4782 	ma = isl_multi_aff_from_aff_mat(space, eq);
4783 	mpa = isl_multi_pw_aff_from_multi_aff(ma);
4784 	map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
4785 	data->equivalent = isl_union_map_add_map(data->equivalent, map);
4786 
4787 	isl_basic_set_free(lineality);
4788 	return isl_stat_ok;
4789 error:
4790 	isl_basic_set_free(lineality);
4791 	return isl_stat_error;
4792 }
4793 
4794 /* Check if the lineality space "set" is non-trivial (i.e., is not just
4795  * the origin or, in other words, satisfies a number of equality constraints
4796  * that is smaller than the dimension of the set).
4797  * If so, extend data->mask and data->equivalent accordingly.
4798  *
4799  * The input should not have any local variables already, but
4800  * isl_set_remove_divs is called to make sure it does not.
4801  */
add_lineality(__isl_take isl_set * set,void * user)4802 static isl_stat add_lineality(__isl_take isl_set *set, void *user)
4803 {
4804 	struct isl_exploit_lineality_data *data = user;
4805 	isl_basic_set *hull;
4806 	isl_size dim;
4807 	isl_size n_eq;
4808 
4809 	set = isl_set_remove_divs(set);
4810 	hull = isl_set_unshifted_simple_hull(set);
4811 	dim = isl_basic_set_dim(hull, isl_dim_set);
4812 	n_eq = isl_basic_set_n_equality(hull);
4813 	if (dim < 0 || n_eq < 0)
4814 		goto error;
4815 	if (dim != n_eq)
4816 		return add_non_trivial_lineality(hull, data);
4817 	isl_basic_set_free(hull);
4818 	return isl_stat_ok;
4819 error:
4820 	isl_basic_set_free(hull);
4821 	return isl_stat_error;
4822 }
4823 
4824 /* Check if the difference set on intra-node schedule constraints "intra"
4825  * has any non-trivial lineality space.
4826  * If so, then extend the difference set to a difference set
4827  * on equivalent elements.  That is, if "intra" is
4828  *
4829  *	{ y - x : (x,y) \in V }
4830  *
4831  * and elements are equivalent if they have the same image under f,
4832  * then return
4833  *
4834  *	{ y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4835  *
4836  * or, since f is linear,
4837  *
4838  *	{ y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
4839  *
4840  * The results of the search for non-trivial lineality spaces is stored
4841  * in "data".
4842  */
exploit_intra_lineality(__isl_take isl_union_set * intra,struct isl_exploit_lineality_data * data)4843 static __isl_give isl_union_set *exploit_intra_lineality(
4844 	__isl_take isl_union_set *intra,
4845 	struct isl_exploit_lineality_data *data)
4846 {
4847 	isl_union_set *lineality;
4848 	isl_union_set *uset;
4849 
4850 	data->any_non_trivial = isl_bool_false;
4851 	lineality = isl_union_set_copy(intra);
4852 	lineality = isl_union_set_combined_lineality_space(lineality);
4853 	if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
4854 		data->any_non_trivial = isl_bool_error;
4855 	isl_union_set_free(lineality);
4856 
4857 	if (data->any_non_trivial < 0)
4858 		return isl_union_set_free(intra);
4859 	if (!data->any_non_trivial)
4860 		return intra;
4861 
4862 	uset = isl_union_set_copy(intra);
4863 	intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
4864 	uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
4865 	intra = isl_union_set_union(intra, uset);
4866 
4867 	intra = isl_union_set_remove_divs(intra);
4868 
4869 	return intra;
4870 }
4871 
4872 /* If the difference set on intra-node schedule constraints was found to have
4873  * any non-trivial lineality space by exploit_intra_lineality,
4874  * as recorded in "data", then extend the inter-node
4875  * schedule constraints "inter" to schedule constraints on equivalent elements.
4876  * That is, if "inter" is V and
4877  * elements are equivalent if they have the same image under f, then return
4878  *
4879  *	{ (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4880  */
exploit_inter_lineality(__isl_take isl_union_map * inter,struct isl_exploit_lineality_data * data)4881 static __isl_give isl_union_map *exploit_inter_lineality(
4882 	__isl_take isl_union_map *inter,
4883 	struct isl_exploit_lineality_data *data)
4884 {
4885 	isl_union_map *umap;
4886 
4887 	if (data->any_non_trivial < 0)
4888 		return isl_union_map_free(inter);
4889 	if (!data->any_non_trivial)
4890 		return inter;
4891 
4892 	umap = isl_union_map_copy(inter);
4893 	inter = isl_union_map_subtract_range(inter,
4894 				isl_union_set_copy(data->mask));
4895 	umap = isl_union_map_apply_range(umap,
4896 				isl_union_map_copy(data->equivalent));
4897 	inter = isl_union_map_union(inter, umap);
4898 	umap = isl_union_map_copy(inter);
4899 	inter = isl_union_map_subtract_domain(inter,
4900 				isl_union_set_copy(data->mask));
4901 	umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
4902 				umap);
4903 	inter = isl_union_map_union(inter, umap);
4904 
4905 	inter = isl_union_map_remove_divs(inter);
4906 
4907 	return inter;
4908 }
4909 
4910 /* For each (conditional) validity edge in "graph",
4911  * add the corresponding dependence relation using "add"
4912  * to a collection of dependence relations and return the result.
4913  * If "coincidence" is set, then coincidence edges are considered as well.
4914  */
collect_validity(struct isl_sched_graph * graph,__isl_give isl_union_map * (* add)(__isl_take isl_union_map * umap,struct isl_sched_edge * edge),int coincidence)4915 static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
4916 	__isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
4917 		struct isl_sched_edge *edge), int coincidence)
4918 {
4919 	int i;
4920 	isl_space *space;
4921 	isl_union_map *umap;
4922 
4923 	space = isl_space_copy(graph->node[0].space);
4924 	umap = isl_union_map_empty(space);
4925 
4926 	for (i = 0; i < graph->n_edge; ++i) {
4927 		struct isl_sched_edge *edge = &graph->edge[i];
4928 
4929 		if (!is_any_validity(edge) &&
4930 		    (!coincidence || !is_coincidence(edge)))
4931 			continue;
4932 
4933 		umap = add(umap, edge);
4934 	}
4935 
4936 	return umap;
4937 }
4938 
4939 /* For each dependence relation on a (conditional) validity edge
4940  * from a node to itself,
4941  * construct the set of coefficients of valid constraints for elements
4942  * in that dependence relation and collect the results.
4943  * If "coincidence" is set, then coincidence edges are considered as well.
4944  *
4945  * In particular, for each dependence relation R, constraints
4946  * on coefficients (c_0, c_x) are constructed such that
4947  *
4948  *	c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
4949  *
4950  * If the schedule_treat_coalescing option is set, then some constraints
4951  * that could be exploited to construct coalescing schedules
4952  * are removed before the dual is computed, but after the parameters
4953  * have been projected out.
4954  * The entire computation is essentially the same as that performed
4955  * by intra_coefficients, except that it operates on multiple
4956  * edges together and that the parameters are always projected out.
4957  *
4958  * Additionally, exploit any non-trivial lineality space
4959  * in the difference set after removing coalescing constraints and
4960  * store the results of the non-trivial lineality space detection in "data".
4961  * The procedure is currently run unconditionally, but it is unlikely
4962  * to find any non-trivial lineality spaces if no coalescing constraints
4963  * have been removed.
4964  *
4965  * Note that if a dependence relation is a union of basic maps,
4966  * then each basic map needs to be treated individually as it may only
4967  * be possible to carry the dependences expressed by some of those
4968  * basic maps and not all of them.
4969  * The collected validity constraints are therefore not coalesced and
4970  * it is assumed that they are not coalesced automatically.
4971  * Duplicate basic maps can be removed, however.
4972  * In particular, if the same basic map appears as a disjunct
4973  * in multiple edges, then it only needs to be carried once.
4974  */
collect_intra_validity(isl_ctx * ctx,struct isl_sched_graph * graph,int coincidence,struct isl_exploit_lineality_data * data)4975 static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
4976 	struct isl_sched_graph *graph, int coincidence,
4977 	struct isl_exploit_lineality_data *data)
4978 {
4979 	isl_union_map *intra;
4980 	isl_union_set *delta;
4981 	isl_basic_set_list *list;
4982 
4983 	intra = collect_validity(graph, &add_intra, coincidence);
4984 	delta = isl_union_map_deltas(intra);
4985 	delta = isl_union_set_project_out_all_params(delta);
4986 	delta = isl_union_set_remove_divs(delta);
4987 	if (isl_options_get_schedule_treat_coalescing(ctx))
4988 		delta = union_drop_coalescing_constraints(ctx, graph, delta);
4989 	delta = exploit_intra_lineality(delta, data);
4990 	list = isl_union_set_get_basic_set_list(delta);
4991 	isl_union_set_free(delta);
4992 
4993 	return isl_basic_set_list_coefficients(list);
4994 }
4995 
4996 /* For each dependence relation on a (conditional) validity edge
4997  * from a node to some other node,
4998  * construct the set of coefficients of valid constraints for elements
4999  * in that dependence relation and collect the results.
5000  * If "coincidence" is set, then coincidence edges are considered as well.
5001  *
5002  * In particular, for each dependence relation R, constraints
5003  * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
5004  *
5005  *	c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
5006  *
5007  * This computation is essentially the same as that performed
5008  * by inter_coefficients, except that it operates on multiple
5009  * edges together.
5010  *
5011  * Additionally, exploit any non-trivial lineality space
5012  * that may have been discovered by collect_intra_validity
5013  * (as stored in "data").
5014  *
5015  * Note that if a dependence relation is a union of basic maps,
5016  * then each basic map needs to be treated individually as it may only
5017  * be possible to carry the dependences expressed by some of those
5018  * basic maps and not all of them.
5019  * The collected validity constraints are therefore not coalesced and
5020  * it is assumed that they are not coalesced automatically.
5021  * Duplicate basic maps can be removed, however.
5022  * In particular, if the same basic map appears as a disjunct
5023  * in multiple edges, then it only needs to be carried once.
5024  */
collect_inter_validity(struct isl_sched_graph * graph,int coincidence,struct isl_exploit_lineality_data * data)5025 static __isl_give isl_basic_set_list *collect_inter_validity(
5026 	struct isl_sched_graph *graph, int coincidence,
5027 	struct isl_exploit_lineality_data *data)
5028 {
5029 	isl_union_map *inter;
5030 	isl_union_set *wrap;
5031 	isl_basic_set_list *list;
5032 
5033 	inter = collect_validity(graph, &add_inter, coincidence);
5034 	inter = exploit_inter_lineality(inter, data);
5035 	inter = isl_union_map_remove_divs(inter);
5036 	wrap = isl_union_map_wrap(inter);
5037 	list = isl_union_set_get_basic_set_list(wrap);
5038 	isl_union_set_free(wrap);
5039 	return isl_basic_set_list_coefficients(list);
5040 }
5041 
5042 /* Construct an LP problem for finding schedule coefficients
5043  * such that the schedule carries as many of the "n_edge" groups of
5044  * dependences as possible based on the corresponding coefficient
5045  * constraints and return the lexicographically smallest non-trivial solution.
5046  * "intra" is the sequence of coefficient constraints for intra-node edges.
5047  * "inter" is the sequence of coefficient constraints for inter-node edges.
5048  * If "want_integral" is set, then compute an integral solution
5049  * for the coefficients rather than using the numerators
5050  * of a rational solution.
5051  * "carry_inter" indicates whether inter-node edges should be carried or
5052  * only respected.
5053  *
5054  * If none of the "n_edge" groups can be carried
5055  * then return an empty vector.
5056  */
compute_carrying_sol_coef(isl_ctx * ctx,struct isl_sched_graph * graph,int n_edge,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int want_integral,int carry_inter)5057 static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
5058 	struct isl_sched_graph *graph, int n_edge,
5059 	__isl_keep isl_basic_set_list *intra,
5060 	__isl_keep isl_basic_set_list *inter, int want_integral,
5061 	int carry_inter)
5062 {
5063 	isl_basic_set *lp;
5064 
5065 	if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
5066 		return NULL;
5067 
5068 	lp = isl_basic_set_copy(graph->lp);
5069 	return non_neg_lexmin(graph, lp, n_edge, want_integral);
5070 }
5071 
5072 /* Construct an LP problem for finding schedule coefficients
5073  * such that the schedule carries as many of the validity dependences
5074  * as possible and
5075  * return the lexicographically smallest non-trivial solution.
5076  * If "fallback" is set, then the carrying is performed as a fallback
5077  * for the Pluto-like scheduler.
5078  * If "coincidence" is set, then try and carry coincidence edges as well.
5079  *
5080  * The variable "n_edge" stores the number of groups that should be carried.
5081  * If none of the "n_edge" groups can be carried
5082  * then return an empty vector.
5083  * If, moreover, "n_edge" is zero, then the LP problem does not even
5084  * need to be constructed.
5085  *
5086  * If a fallback solution is being computed, then compute an integral solution
5087  * for the coefficients rather than using the numerators
5088  * of a rational solution.
5089  *
5090  * If a fallback solution is being computed, if there are any intra-node
5091  * dependences, and if requested by the user, then first try
5092  * to only carry those intra-node dependences.
5093  * If this fails to carry any dependences, then try again
5094  * with the inter-node dependences included.
5095  */
compute_carrying_sol(isl_ctx * ctx,struct isl_sched_graph * graph,int fallback,int coincidence)5096 static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
5097 	struct isl_sched_graph *graph, int fallback, int coincidence)
5098 {
5099 	isl_size n_intra, n_inter;
5100 	int n_edge;
5101 	struct isl_carry carry = { 0 };
5102 	isl_vec *sol;
5103 
5104 	carry.intra = collect_intra_validity(ctx, graph, coincidence,
5105 						&carry.lineality);
5106 	carry.inter = collect_inter_validity(graph, coincidence,
5107 						&carry.lineality);
5108 	n_intra = isl_basic_set_list_n_basic_set(carry.intra);
5109 	n_inter = isl_basic_set_list_n_basic_set(carry.inter);
5110 	if (n_intra < 0 || n_inter < 0)
5111 		goto error;
5112 
5113 	if (fallback && n_intra > 0 &&
5114 	    isl_options_get_schedule_carry_self_first(ctx)) {
5115 		sol = compute_carrying_sol_coef(ctx, graph, n_intra,
5116 				carry.intra, carry.inter, fallback, 0);
5117 		if (!sol || sol->size != 0 || n_inter == 0) {
5118 			isl_carry_clear(&carry);
5119 			return sol;
5120 		}
5121 		isl_vec_free(sol);
5122 	}
5123 
5124 	n_edge = n_intra + n_inter;
5125 	if (n_edge == 0) {
5126 		isl_carry_clear(&carry);
5127 		return isl_vec_alloc(ctx, 0);
5128 	}
5129 
5130 	sol = compute_carrying_sol_coef(ctx, graph, n_edge,
5131 				carry.intra, carry.inter, fallback, 1);
5132 	isl_carry_clear(&carry);
5133 	return sol;
5134 error:
5135 	isl_carry_clear(&carry);
5136 	return NULL;
5137 }
5138 
5139 /* Construct a schedule row for each node such that as many validity dependences
5140  * as possible are carried and then continue with the next band.
5141  * If "fallback" is set, then the carrying is performed as a fallback
5142  * for the Pluto-like scheduler.
5143  * If "coincidence" is set, then try and carry coincidence edges as well.
5144  *
5145  * If there are no validity dependences, then no dependence can be carried and
5146  * the procedure is guaranteed to fail.  If there is more than one component,
5147  * then try computing a schedule on each component separately
5148  * to prevent or at least postpone this failure.
5149  *
5150  * If a schedule row is computed, then check that dependences are carried
5151  * for at least one of the edges.
5152  *
5153  * If the computed schedule row turns out to be trivial on one or
5154  * more nodes where it should not be trivial, then we throw it away
5155  * and try again on each component separately.
5156  *
5157  * If there is only one component, then we accept the schedule row anyway,
5158  * but we do not consider it as a complete row and therefore do not
5159  * increment graph->n_row.  Note that the ranks of the nodes that
5160  * do get a non-trivial schedule part will get updated regardless and
5161  * graph->maxvar is computed based on these ranks.  The test for
5162  * whether more schedule rows are required in compute_schedule_wcc
5163  * is therefore not affected.
5164  *
5165  * Insert a band corresponding to the schedule row at position "node"
5166  * of the schedule tree and continue with the construction of the schedule.
5167  * This insertion and the continued construction is performed by split_scaled
5168  * after optionally checking for non-trivial common divisors.
5169  */
carry(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int fallback,int coincidence)5170 static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
5171 	struct isl_sched_graph *graph, int fallback, int coincidence)
5172 {
5173 	int trivial;
5174 	isl_ctx *ctx;
5175 	isl_vec *sol;
5176 
5177 	if (!node)
5178 		return NULL;
5179 
5180 	ctx = isl_schedule_node_get_ctx(node);
5181 	sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
5182 	if (!sol)
5183 		return isl_schedule_node_free(node);
5184 	if (sol->size == 0) {
5185 		isl_vec_free(sol);
5186 		if (graph->scc > 1)
5187 			return compute_component_schedule(node, graph, 1);
5188 		isl_die(ctx, isl_error_unknown, "unable to carry dependences",
5189 			return isl_schedule_node_free(node));
5190 	}
5191 
5192 	trivial = is_any_trivial(graph, sol);
5193 	if (trivial < 0) {
5194 		sol = isl_vec_free(sol);
5195 	} else if (trivial && graph->scc > 1) {
5196 		isl_vec_free(sol);
5197 		return compute_component_schedule(node, graph, 1);
5198 	}
5199 
5200 	if (update_schedule(graph, sol, 0) < 0)
5201 		return isl_schedule_node_free(node);
5202 	if (trivial)
5203 		graph->n_row--;
5204 
5205 	return split_scaled(node, graph);
5206 }
5207 
5208 /* Construct a schedule row for each node such that as many validity dependences
5209  * as possible are carried and then continue with the next band.
5210  * Do so as a fallback for the Pluto-like scheduler.
5211  * If "coincidence" is set, then try and carry coincidence edges as well.
5212  */
carry_fallback(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int coincidence)5213 static __isl_give isl_schedule_node *carry_fallback(
5214 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5215 	int coincidence)
5216 {
5217 	return carry(node, graph, 1, coincidence);
5218 }
5219 
5220 /* Construct a schedule row for each node such that as many validity dependences
5221  * as possible are carried and then continue with the next band.
5222  * Do so for the case where the Feautrier scheduler was selected
5223  * by the user.
5224  */
carry_feautrier(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5225 static __isl_give isl_schedule_node *carry_feautrier(
5226 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5227 {
5228 	return carry(node, graph, 0, 0);
5229 }
5230 
5231 /* Construct a schedule row for each node such that as many validity dependences
5232  * as possible are carried and then continue with the next band.
5233  * Do so as a fallback for the Pluto-like scheduler.
5234  */
carry_dependences(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5235 static __isl_give isl_schedule_node *carry_dependences(
5236 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5237 {
5238 	return carry_fallback(node, graph, 0);
5239 }
5240 
5241 /* Construct a schedule row for each node such that as many validity or
5242  * coincidence dependences as possible are carried and
5243  * then continue with the next band.
5244  * Do so as a fallback for the Pluto-like scheduler.
5245  */
carry_coincidence(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5246 static __isl_give isl_schedule_node *carry_coincidence(
5247 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5248 {
5249 	return carry_fallback(node, graph, 1);
5250 }
5251 
5252 /* Topologically sort statements mapped to the same schedule iteration
5253  * and add insert a sequence node in front of "node"
5254  * corresponding to this order.
5255  * If "initialized" is set, then it may be assumed that
5256  * isl_sched_graph_compute_maxvar
5257  * has been called on the current band.  Otherwise, call
5258  * isl_sched_graph_compute_maxvar if and before carry_dependences gets called.
5259  *
5260  * If it turns out to be impossible to sort the statements apart,
5261  * because different dependences impose different orderings
5262  * on the statements, then we extend the schedule such that
5263  * it carries at least one more dependence.
5264  */
sort_statements(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int initialized)5265 static __isl_give isl_schedule_node *sort_statements(
5266 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5267 	int initialized)
5268 {
5269 	isl_ctx *ctx;
5270 	isl_union_set_list *filters;
5271 
5272 	if (!node)
5273 		return NULL;
5274 
5275 	ctx = isl_schedule_node_get_ctx(node);
5276 	if (graph->n < 1)
5277 		isl_die(ctx, isl_error_internal,
5278 			"graph should have at least one node",
5279 			return isl_schedule_node_free(node));
5280 
5281 	if (graph->n == 1)
5282 		return node;
5283 
5284 	if (update_edges(ctx, graph) < 0)
5285 		return isl_schedule_node_free(node);
5286 
5287 	if (graph->n_edge == 0)
5288 		return node;
5289 
5290 	if (detect_sccs(ctx, graph) < 0)
5291 		return isl_schedule_node_free(node);
5292 
5293 	next_band(graph);
5294 	if (graph->scc < graph->n) {
5295 		if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0)
5296 			return isl_schedule_node_free(node);
5297 		return carry_dependences(node, graph);
5298 	}
5299 
5300 	filters = isl_sched_graph_extract_sccs(ctx, graph);
5301 	node = isl_schedule_node_insert_sequence(node, filters);
5302 
5303 	return node;
5304 }
5305 
5306 /* Are there any (non-empty) (conditional) validity edges in the graph?
5307  */
has_validity_edges(struct isl_sched_graph * graph)5308 static int has_validity_edges(struct isl_sched_graph *graph)
5309 {
5310 	int i;
5311 
5312 	for (i = 0; i < graph->n_edge; ++i) {
5313 		int empty;
5314 
5315 		empty = isl_map_plain_is_empty(graph->edge[i].map);
5316 		if (empty < 0)
5317 			return -1;
5318 		if (empty)
5319 			continue;
5320 		if (is_any_validity(&graph->edge[i]))
5321 			return 1;
5322 	}
5323 
5324 	return 0;
5325 }
5326 
5327 /* Should we apply a Feautrier step?
5328  * That is, did the user request the Feautrier algorithm and are
5329  * there any validity dependences (left)?
5330  */
need_feautrier_step(isl_ctx * ctx,struct isl_sched_graph * graph)5331 static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
5332 {
5333 	if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
5334 		return 0;
5335 
5336 	return has_validity_edges(graph);
5337 }
5338 
5339 /* Compute a schedule for a connected dependence graph using Feautrier's
5340  * multi-dimensional scheduling algorithm and return the updated schedule node.
5341  *
5342  * The original algorithm is described in [1].
5343  * The main idea is to minimize the number of scheduling dimensions, by
5344  * trying to satisfy as many dependences as possible per scheduling dimension.
5345  *
5346  * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
5347  *     Problem, Part II: Multi-Dimensional Time.
5348  *     In Intl. Journal of Parallel Programming, 1992.
5349  */
compute_schedule_wcc_feautrier(isl_schedule_node * node,struct isl_sched_graph * graph)5350 static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
5351 	isl_schedule_node *node, struct isl_sched_graph *graph)
5352 {
5353 	return carry_feautrier(node, graph);
5354 }
5355 
5356 /* Turn off the "local" bit on all (condition) edges.
5357  */
clear_local_edges(struct isl_sched_graph * graph)5358 static void clear_local_edges(struct isl_sched_graph *graph)
5359 {
5360 	int i;
5361 
5362 	for (i = 0; i < graph->n_edge; ++i)
5363 		if (isl_sched_edge_is_condition(&graph->edge[i]))
5364 			clear_local(&graph->edge[i]);
5365 }
5366 
5367 /* Does "graph" have both condition and conditional validity edges?
5368  */
need_condition_check(struct isl_sched_graph * graph)5369 static int need_condition_check(struct isl_sched_graph *graph)
5370 {
5371 	int i;
5372 	int any_condition = 0;
5373 	int any_conditional_validity = 0;
5374 
5375 	for (i = 0; i < graph->n_edge; ++i) {
5376 		if (isl_sched_edge_is_condition(&graph->edge[i]))
5377 			any_condition = 1;
5378 		if (isl_sched_edge_is_conditional_validity(&graph->edge[i]))
5379 			any_conditional_validity = 1;
5380 	}
5381 
5382 	return any_condition && any_conditional_validity;
5383 }
5384 
5385 /* Does "graph" contain any coincidence edge?
5386  */
has_any_coincidence(struct isl_sched_graph * graph)5387 static int has_any_coincidence(struct isl_sched_graph *graph)
5388 {
5389 	int i;
5390 
5391 	for (i = 0; i < graph->n_edge; ++i)
5392 		if (is_coincidence(&graph->edge[i]))
5393 			return 1;
5394 
5395 	return 0;
5396 }
5397 
5398 /* Extract the final schedule row as a map with the iteration domain
5399  * of "node" as domain.
5400  */
final_row(struct isl_sched_node * node)5401 static __isl_give isl_map *final_row(struct isl_sched_node *node)
5402 {
5403 	isl_multi_aff *ma;
5404 	isl_size n_row;
5405 
5406 	n_row = isl_mat_rows(node->sched);
5407 	if (n_row < 0)
5408 		return NULL;
5409 	ma = isl_sched_node_extract_partial_schedule_multi_aff(node,
5410 								n_row - 1, 1);
5411 	return isl_map_from_multi_aff(ma);
5412 }
5413 
5414 /* Is the conditional validity dependence in the edge with index "edge_index"
5415  * violated by the latest (i.e., final) row of the schedule?
5416  * That is, is i scheduled after j
5417  * for any conditional validity dependence i -> j?
5418  */
is_violated(struct isl_sched_graph * graph,int edge_index)5419 static int is_violated(struct isl_sched_graph *graph, int edge_index)
5420 {
5421 	isl_map *src_sched, *dst_sched, *map;
5422 	struct isl_sched_edge *edge = &graph->edge[edge_index];
5423 	int empty;
5424 
5425 	src_sched = final_row(edge->src);
5426 	dst_sched = final_row(edge->dst);
5427 	map = isl_map_copy(edge->map);
5428 	map = isl_map_apply_domain(map, src_sched);
5429 	map = isl_map_apply_range(map, dst_sched);
5430 	map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
5431 	empty = isl_map_is_empty(map);
5432 	isl_map_free(map);
5433 
5434 	if (empty < 0)
5435 		return -1;
5436 
5437 	return !empty;
5438 }
5439 
5440 /* Does "graph" have any satisfied condition edges that
5441  * are adjacent to the conditional validity constraint with
5442  * domain "conditional_source" and range "conditional_sink"?
5443  *
5444  * A satisfied condition is one that is not local.
5445  * If a condition was forced to be local already (i.e., marked as local)
5446  * then there is no need to check if it is in fact local.
5447  *
5448  * Additionally, mark all adjacent condition edges found as local.
5449  */
has_adjacent_true_conditions(struct isl_sched_graph * graph,__isl_keep isl_union_set * conditional_source,__isl_keep isl_union_set * conditional_sink)5450 static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
5451 	__isl_keep isl_union_set *conditional_source,
5452 	__isl_keep isl_union_set *conditional_sink)
5453 {
5454 	int i;
5455 	int any = 0;
5456 
5457 	for (i = 0; i < graph->n_edge; ++i) {
5458 		int adjacent, local;
5459 		isl_union_map *condition;
5460 
5461 		if (!isl_sched_edge_is_condition(&graph->edge[i]))
5462 			continue;
5463 		if (is_local(&graph->edge[i]))
5464 			continue;
5465 
5466 		condition = graph->edge[i].tagged_condition;
5467 		adjacent = domain_intersects(condition, conditional_sink);
5468 		if (adjacent >= 0 && !adjacent)
5469 			adjacent = range_intersects(condition,
5470 							conditional_source);
5471 		if (adjacent < 0)
5472 			return -1;
5473 		if (!adjacent)
5474 			continue;
5475 
5476 		set_local(&graph->edge[i]);
5477 
5478 		local = is_condition_false(&graph->edge[i]);
5479 		if (local < 0)
5480 			return -1;
5481 		if (!local)
5482 			any = 1;
5483 	}
5484 
5485 	return any;
5486 }
5487 
5488 /* Are there any violated conditional validity dependences with
5489  * adjacent condition dependences that are not local with respect
5490  * to the current schedule?
5491  * That is, is the conditional validity constraint violated?
5492  *
5493  * Additionally, mark all those adjacent condition dependences as local.
5494  * We also mark those adjacent condition dependences that were not marked
5495  * as local before, but just happened to be local already.  This ensures
5496  * that they remain local if the schedule is recomputed.
5497  *
5498  * We first collect domain and range of all violated conditional validity
5499  * dependences and then check if there are any adjacent non-local
5500  * condition dependences.
5501  */
has_violated_conditional_constraint(isl_ctx * ctx,struct isl_sched_graph * graph)5502 static int has_violated_conditional_constraint(isl_ctx *ctx,
5503 	struct isl_sched_graph *graph)
5504 {
5505 	int i;
5506 	int any = 0;
5507 	isl_union_set *source, *sink;
5508 
5509 	source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5510 	sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5511 	for (i = 0; i < graph->n_edge; ++i) {
5512 		isl_union_set *uset;
5513 		isl_union_map *umap;
5514 		int violated;
5515 
5516 		if (!isl_sched_edge_is_conditional_validity(&graph->edge[i]))
5517 			continue;
5518 
5519 		violated = is_violated(graph, i);
5520 		if (violated < 0)
5521 			goto error;
5522 		if (!violated)
5523 			continue;
5524 
5525 		any = 1;
5526 
5527 		umap = isl_union_map_copy(graph->edge[i].tagged_validity);
5528 		uset = isl_union_map_domain(umap);
5529 		source = isl_union_set_union(source, uset);
5530 		source = isl_union_set_coalesce(source);
5531 
5532 		umap = isl_union_map_copy(graph->edge[i].tagged_validity);
5533 		uset = isl_union_map_range(umap);
5534 		sink = isl_union_set_union(sink, uset);
5535 		sink = isl_union_set_coalesce(sink);
5536 	}
5537 
5538 	if (any)
5539 		any = has_adjacent_true_conditions(graph, source, sink);
5540 
5541 	isl_union_set_free(source);
5542 	isl_union_set_free(sink);
5543 	return any;
5544 error:
5545 	isl_union_set_free(source);
5546 	isl_union_set_free(sink);
5547 	return -1;
5548 }
5549 
5550 /* Examine the current band (the rows between graph->band_start and
5551  * graph->n_total_row), deciding whether to drop it or add it to "node"
5552  * and then continue with the computation of the next band, if any.
5553  * If "initialized" is set, then it may be assumed that
5554  * isl_sched_graph_compute_maxvar
5555  * has been called on the current band.  Otherwise, call
5556  * isl_sched_graph_compute_maxvar if and before carry_dependences gets called.
5557  *
5558  * The caller keeps looking for a new row as long as
5559  * graph->n_row < graph->maxvar.  If the latest attempt to find
5560  * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
5561  * then we either
5562  * - split between SCCs and start over (assuming we found an interesting
5563  *	pair of SCCs between which to split)
5564  * - continue with the next band (assuming the current band has at least
5565  *	one row)
5566  * - if there is more than one SCC left, then split along all SCCs
5567  * - if outer coincidence needs to be enforced, then try to carry as many
5568  *	validity or coincidence dependences as possible and
5569  *	continue with the next band
5570  * - try to carry as many validity dependences as possible and
5571  *	continue with the next band
5572  * In each case, we first insert a band node in the schedule tree
5573  * if any rows have been computed.
5574  *
5575  * If the caller managed to complete the schedule and the current band
5576  * is empty, then finish off by topologically
5577  * sorting the statements based on the remaining dependences.
5578  * If, on the other hand, the current band has at least one row,
5579  * then continue with the next band.  Note that this next band
5580  * will necessarily be empty, but the graph may still be split up
5581  * into weakly connected components before arriving back here.
5582  */
isl_schedule_node_compute_finish_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int initialized)5583 __isl_give isl_schedule_node *isl_schedule_node_compute_finish_band(
5584 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5585 	int initialized)
5586 {
5587 	int empty;
5588 
5589 	if (!node)
5590 		return NULL;
5591 
5592 	empty = graph->n_total_row == graph->band_start;
5593 	if (graph->n_row < graph->maxvar) {
5594 		isl_ctx *ctx;
5595 
5596 		ctx = isl_schedule_node_get_ctx(node);
5597 		if (!ctx->opt->schedule_maximize_band_depth && !empty)
5598 			return compute_next_band(node, graph, 1);
5599 		if (graph->src_scc >= 0)
5600 			return compute_split_schedule(node, graph);
5601 		if (!empty)
5602 			return compute_next_band(node, graph, 1);
5603 		if (graph->scc > 1)
5604 			return compute_component_schedule(node, graph, 1);
5605 		if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0)
5606 			return isl_schedule_node_free(node);
5607 		if (isl_options_get_schedule_outer_coincidence(ctx))
5608 			return carry_coincidence(node, graph);
5609 		return carry_dependences(node, graph);
5610 	}
5611 
5612 	if (!empty)
5613 		return compute_next_band(node, graph, 1);
5614 	return sort_statements(node, graph, initialized);
5615 }
5616 
5617 /* Construct a band of schedule rows for a connected dependence graph.
5618  * The caller is responsible for determining the strongly connected
5619  * components and calling isl_sched_graph_compute_maxvar first.
5620  *
5621  * We try to find a sequence of as many schedule rows as possible that result
5622  * in non-negative dependence distances (independent of the previous rows
5623  * in the sequence, i.e., such that the sequence is tilable), with as
5624  * many of the initial rows as possible satisfying the coincidence constraints.
5625  * The computation stops if we can't find any more rows or if we have found
5626  * all the rows we wanted to find.
5627  *
5628  * If ctx->opt->schedule_outer_coincidence is set, then we force the
5629  * outermost dimension to satisfy the coincidence constraints.  If this
5630  * turns out to be impossible, we fall back on the general scheme above
5631  * and try to carry as many dependences as possible.
5632  *
5633  * If "graph" contains both condition and conditional validity dependences,
5634  * then we need to check that that the conditional schedule constraint
5635  * is satisfied, i.e., there are no violated conditional validity dependences
5636  * that are adjacent to any non-local condition dependences.
5637  * If there are, then we mark all those adjacent condition dependences
5638  * as local and recompute the current band.  Those dependences that
5639  * are marked local will then be forced to be local.
5640  * The initial computation is performed with no dependences marked as local.
5641  * If we are lucky, then there will be no violated conditional validity
5642  * dependences adjacent to any non-local condition dependences.
5643  * Otherwise, we mark some additional condition dependences as local and
5644  * recompute.  We continue this process until there are no violations left or
5645  * until we are no longer able to compute a schedule.
5646  * Since there are only a finite number of dependences,
5647  * there will only be a finite number of iterations.
5648  */
isl_schedule_node_compute_wcc_band(isl_ctx * ctx,struct isl_sched_graph * graph)5649 isl_stat isl_schedule_node_compute_wcc_band(isl_ctx *ctx,
5650 	struct isl_sched_graph *graph)
5651 {
5652 	int has_coincidence;
5653 	int use_coincidence;
5654 	int force_coincidence = 0;
5655 	int check_conditional;
5656 
5657 	if (sort_sccs(graph) < 0)
5658 		return isl_stat_error;
5659 
5660 	clear_local_edges(graph);
5661 	check_conditional = need_condition_check(graph);
5662 	has_coincidence = has_any_coincidence(graph);
5663 
5664 	if (ctx->opt->schedule_outer_coincidence)
5665 		force_coincidence = 1;
5666 
5667 	use_coincidence = has_coincidence;
5668 	while (graph->n_row < graph->maxvar) {
5669 		isl_vec *sol;
5670 		int violated;
5671 		int coincident;
5672 
5673 		graph->src_scc = -1;
5674 		graph->dst_scc = -1;
5675 
5676 		if (setup_lp(ctx, graph, use_coincidence) < 0)
5677 			return isl_stat_error;
5678 		sol = solve_lp(ctx, graph);
5679 		if (!sol)
5680 			return isl_stat_error;
5681 		if (sol->size == 0) {
5682 			int empty = graph->n_total_row == graph->band_start;
5683 
5684 			isl_vec_free(sol);
5685 			if (use_coincidence && (!force_coincidence || !empty)) {
5686 				use_coincidence = 0;
5687 				continue;
5688 			}
5689 			return isl_stat_ok;
5690 		}
5691 		coincident = !has_coincidence || use_coincidence;
5692 		if (update_schedule(graph, sol, coincident) < 0)
5693 			return isl_stat_error;
5694 
5695 		if (!check_conditional)
5696 			continue;
5697 		violated = has_violated_conditional_constraint(ctx, graph);
5698 		if (violated < 0)
5699 			return isl_stat_error;
5700 		if (!violated)
5701 			continue;
5702 		if (reset_band(graph) < 0)
5703 			return isl_stat_error;
5704 		use_coincidence = has_coincidence;
5705 	}
5706 
5707 	return isl_stat_ok;
5708 }
5709 
5710 /* Compute a schedule for a connected dependence graph by considering
5711  * the graph as a whole and return the updated schedule node.
5712  *
5713  * The actual schedule rows of the current band are computed by
5714  * isl_schedule_node_compute_wcc_band.  isl_schedule_node_compute_finish_band
5715  * takes care of integrating the band into "node" and continuing
5716  * the computation.
5717  */
compute_schedule_wcc_whole(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5718 static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
5719 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5720 {
5721 	isl_ctx *ctx;
5722 
5723 	if (!node)
5724 		return NULL;
5725 
5726 	ctx = isl_schedule_node_get_ctx(node);
5727 	if (isl_schedule_node_compute_wcc_band(ctx, graph) < 0)
5728 		return isl_schedule_node_free(node);
5729 
5730 	return isl_schedule_node_compute_finish_band(node, graph, 1);
5731 }
5732 
5733 /* Compute a schedule for a connected dependence graph and return
5734  * the updated schedule node.
5735  *
5736  * If Feautrier's algorithm is selected, we first recursively try to satisfy
5737  * as many validity dependences as possible. When all validity dependences
5738  * are satisfied we extend the schedule to a full-dimensional schedule.
5739  *
5740  * Call compute_schedule_wcc_whole or isl_schedule_node_compute_wcc_clustering
5741  * depending on whether the user has selected the option to try and
5742  * compute a schedule for the entire (weakly connected) component first.
5743  * If there is only a single strongly connected component (SCC), then
5744  * there is no point in trying to combine SCCs
5745  * in isl_schedule_node_compute_wcc_clustering, so compute_schedule_wcc_whole
5746  * is called instead.
5747  */
compute_schedule_wcc(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5748 static __isl_give isl_schedule_node *compute_schedule_wcc(
5749 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5750 {
5751 	isl_ctx *ctx;
5752 
5753 	if (!node)
5754 		return NULL;
5755 
5756 	ctx = isl_schedule_node_get_ctx(node);
5757 	if (detect_sccs(ctx, graph) < 0)
5758 		return isl_schedule_node_free(node);
5759 
5760 	if (isl_sched_graph_compute_maxvar(graph) < 0)
5761 		return isl_schedule_node_free(node);
5762 
5763 	if (need_feautrier_step(ctx, graph))
5764 		return compute_schedule_wcc_feautrier(node, graph);
5765 
5766 	if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx))
5767 		return compute_schedule_wcc_whole(node, graph);
5768 	else
5769 		return isl_schedule_node_compute_wcc_clustering(node, graph);
5770 }
5771 
5772 /* Compute a schedule for each group of nodes identified by node->scc
5773  * separately and then combine them in a sequence node (or as set node
5774  * if graph->weak is set) inserted at position "node" of the schedule tree.
5775  * Return the updated schedule node.
5776  *
5777  * If "wcc" is set then each of the groups belongs to a single
5778  * weakly connected component in the dependence graph so that
5779  * there is no need for compute_sub_schedule to look for weakly
5780  * connected components.
5781  *
5782  * If a set node would be introduced and if the number of components
5783  * is equal to the number of nodes, then check if the schedule
5784  * is already complete.  If so, a redundant set node would be introduced
5785  * (without any further descendants) stating that the statements
5786  * can be executed in arbitrary order, which is also expressed
5787  * by the absence of any node.  Refrain from inserting any nodes
5788  * in this case and simply return.
5789  */
compute_component_schedule(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int wcc)5790 static __isl_give isl_schedule_node *compute_component_schedule(
5791 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5792 	int wcc)
5793 {
5794 	int component;
5795 	isl_ctx *ctx;
5796 	isl_union_set_list *filters;
5797 
5798 	if (!node)
5799 		return NULL;
5800 
5801 	if (graph->weak && graph->scc == graph->n) {
5802 		if (isl_sched_graph_compute_maxvar(graph) < 0)
5803 			return isl_schedule_node_free(node);
5804 		if (graph->n_row >= graph->maxvar)
5805 			return node;
5806 	}
5807 
5808 	ctx = isl_schedule_node_get_ctx(node);
5809 	filters = isl_sched_graph_extract_sccs(ctx, graph);
5810 	if (graph->weak)
5811 		node = isl_schedule_node_insert_set(node, filters);
5812 	else
5813 		node = isl_schedule_node_insert_sequence(node, filters);
5814 
5815 	for (component = 0; component < graph->scc; ++component) {
5816 		node = isl_schedule_node_grandchild(node, component, 0);
5817 		node = compute_sub_schedule(node, ctx, graph,
5818 				    &isl_sched_node_scc_exactly,
5819 				    &isl_sched_edge_scc_exactly,
5820 				    component, wcc);
5821 		node = isl_schedule_node_grandparent(node);
5822 	}
5823 
5824 	return node;
5825 }
5826 
5827 /* Compute a schedule for the given dependence graph and insert it at "node".
5828  * Return the updated schedule node.
5829  *
5830  * We first check if the graph is connected (through validity and conditional
5831  * validity dependences) and, if not, compute a schedule
5832  * for each component separately.
5833  * If the schedule_serialize_sccs option is set, then we check for strongly
5834  * connected components instead and compute a separate schedule for
5835  * each such strongly connected component.
5836  */
compute_schedule(isl_schedule_node * node,struct isl_sched_graph * graph)5837 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
5838 	struct isl_sched_graph *graph)
5839 {
5840 	isl_ctx *ctx;
5841 
5842 	if (!node)
5843 		return NULL;
5844 
5845 	ctx = isl_schedule_node_get_ctx(node);
5846 	if (isl_options_get_schedule_serialize_sccs(ctx)) {
5847 		if (detect_sccs(ctx, graph) < 0)
5848 			return isl_schedule_node_free(node);
5849 	} else {
5850 		if (detect_wccs(ctx, graph) < 0)
5851 			return isl_schedule_node_free(node);
5852 	}
5853 
5854 	if (graph->scc > 1)
5855 		return compute_component_schedule(node, graph, 1);
5856 
5857 	return compute_schedule_wcc(node, graph);
5858 }
5859 
5860 /* Compute a schedule on sc->domain that respects the given schedule
5861  * constraints.
5862  *
5863  * In particular, the schedule respects all the validity dependences.
5864  * If the default isl scheduling algorithm is used, it tries to minimize
5865  * the dependence distances over the proximity dependences.
5866  * If Feautrier's scheduling algorithm is used, the proximity dependence
5867  * distances are only minimized during the extension to a full-dimensional
5868  * schedule.
5869  *
5870  * If there are any condition and conditional validity dependences,
5871  * then the conditional validity dependences may be violated inside
5872  * a tilable band, provided they have no adjacent non-local
5873  * condition dependences.
5874  */
isl_schedule_constraints_compute_schedule(__isl_take isl_schedule_constraints * sc)5875 __isl_give isl_schedule *isl_schedule_constraints_compute_schedule(
5876 	__isl_take isl_schedule_constraints *sc)
5877 {
5878 	isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc);
5879 	struct isl_sched_graph graph = { 0 };
5880 	isl_schedule *sched;
5881 	isl_schedule_node *node;
5882 	isl_union_set *domain;
5883 	isl_size n;
5884 
5885 	sc = isl_schedule_constraints_align_params(sc);
5886 
5887 	domain = isl_schedule_constraints_get_domain(sc);
5888 	n = isl_union_set_n_set(domain);
5889 	if (n == 0) {
5890 		isl_schedule_constraints_free(sc);
5891 		return isl_schedule_from_domain(domain);
5892 	}
5893 
5894 	if (n < 0 || isl_sched_graph_init(&graph, sc) < 0)
5895 		domain = isl_union_set_free(domain);
5896 
5897 	node = isl_schedule_node_from_domain(domain);
5898 	node = isl_schedule_node_child(node, 0);
5899 	if (graph.n > 0)
5900 		node = compute_schedule(node, &graph);
5901 	sched = isl_schedule_node_get_schedule(node);
5902 	isl_schedule_node_free(node);
5903 
5904 	isl_sched_graph_free(ctx, &graph);
5905 	isl_schedule_constraints_free(sc);
5906 
5907 	return sched;
5908 }
5909 
5910 /* Compute a schedule for the given union of domains that respects
5911  * all the validity dependences and minimizes
5912  * the dependence distances over the proximity dependences.
5913  *
5914  * This function is kept for backward compatibility.
5915  */
isl_union_set_compute_schedule(__isl_take isl_union_set * domain,__isl_take isl_union_map * validity,__isl_take isl_union_map * proximity)5916 __isl_give isl_schedule *isl_union_set_compute_schedule(
5917 	__isl_take isl_union_set *domain,
5918 	__isl_take isl_union_map *validity,
5919 	__isl_take isl_union_map *proximity)
5920 {
5921 	isl_schedule_constraints *sc;
5922 
5923 	sc = isl_schedule_constraints_on_domain(domain);
5924 	sc = isl_schedule_constraints_set_validity(sc, validity);
5925 	sc = isl_schedule_constraints_set_proximity(sc, proximity);
5926 
5927 	return isl_schedule_constraints_compute_schedule(sc);
5928 }
5929