xref: /netbsd-src/external/mit/isl/dist/isl_scheduler.c (revision 5971e316fdea024efff6be8f03536623db06833e)
1 /*
2  * Copyright 2011      INRIA Saclay
3  * Copyright 2012-2014 Ecole Normale Superieure
4  * Copyright 2015-2016 Sven Verdoolaege
5  * Copyright 2016      INRIA Paris
6  * Copyright 2017      Sven Verdoolaege
7  *
8  * Use of this software is governed by the MIT license
9  *
10  * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11  * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12  * 91893 Orsay, France
13  * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14  * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15  * CS 42112, 75589 Paris Cedex 12, France
16  */
17 
18 #include <isl_ctx_private.h>
19 #include <isl_map_private.h>
20 #include <isl_space_private.h>
21 #include <isl_aff_private.h>
22 #include <isl/hash.h>
23 #include <isl/id.h>
24 #include <isl/constraint.h>
25 #include <isl/schedule.h>
26 #include <isl_schedule_constraints.h>
27 #include <isl/schedule_node.h>
28 #include <isl_mat_private.h>
29 #include <isl_vec_private.h>
30 #include <isl/set.h>
31 #include <isl_union_set_private.h>
32 #include <isl_seq.h>
33 #include <isl_tab.h>
34 #include <isl_dim_map.h>
35 #include <isl/map_to_basic_set.h>
36 #include <isl_sort.h>
37 #include <isl_options_private.h>
38 #include <isl_tarjan.h>
39 #include <isl_morph.h>
40 #include <isl/ilp.h>
41 #include <isl_val_private.h>
42 
43 #include "isl_scheduler.h"
44 #include "isl_scheduler_clustering.h"
45 
46 /*
47  * The scheduling algorithm implemented in this file was inspired by
48  * Bondhugula et al., "Automatic Transformations for Communication-Minimized
49  * Parallelization and Locality Optimization in the Polyhedral Model".
50  *
51  * For a detailed description of the variant implemented in isl,
52  * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
53  */
54 
55 
node_has_tuples(const void * entry,const void * val)56 static isl_bool node_has_tuples(const void *entry, const void *val)
57 {
58 	struct isl_sched_node *node = (struct isl_sched_node *)entry;
59 	isl_space *space = (isl_space *) val;
60 
61 	return isl_space_has_equal_tuples(node->space, space);
62 }
63 
isl_sched_node_scc_exactly(struct isl_sched_node * node,int scc)64 int isl_sched_node_scc_exactly(struct isl_sched_node *node, int scc)
65 {
66 	return node->scc == scc;
67 }
68 
node_scc_at_most(struct isl_sched_node * node,int scc)69 static int node_scc_at_most(struct isl_sched_node *node, int scc)
70 {
71 	return node->scc <= scc;
72 }
73 
node_scc_at_least(struct isl_sched_node * node,int scc)74 static int node_scc_at_least(struct isl_sched_node *node, int scc)
75 {
76 	return node->scc >= scc;
77 }
78 
79 /* Is "edge" marked as being of type "type"?
80  */
isl_sched_edge_has_type(struct isl_sched_edge * edge,enum isl_edge_type type)81 int isl_sched_edge_has_type(struct isl_sched_edge *edge,
82 	enum isl_edge_type type)
83 {
84 	return ISL_FL_ISSET(edge->types, 1 << type);
85 }
86 
87 /* Mark "edge" as being of type "type".
88  */
set_type(struct isl_sched_edge * edge,enum isl_edge_type type)89 static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
90 {
91 	ISL_FL_SET(edge->types, 1 << type);
92 }
93 
94 /* No longer mark "edge" as being of type "type"?
95  */
clear_type(struct isl_sched_edge * edge,enum isl_edge_type type)96 static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
97 {
98 	ISL_FL_CLR(edge->types, 1 << type);
99 }
100 
101 /* Is "edge" marked as a validity edge?
102  */
is_validity(struct isl_sched_edge * edge)103 static int is_validity(struct isl_sched_edge *edge)
104 {
105 	return isl_sched_edge_has_type(edge, isl_edge_validity);
106 }
107 
108 /* Mark "edge" as a validity edge.
109  */
set_validity(struct isl_sched_edge * edge)110 static void set_validity(struct isl_sched_edge *edge)
111 {
112 	set_type(edge, isl_edge_validity);
113 }
114 
115 /* Is "edge" marked as a proximity edge?
116  */
isl_sched_edge_is_proximity(struct isl_sched_edge * edge)117 int isl_sched_edge_is_proximity(struct isl_sched_edge *edge)
118 {
119 	return isl_sched_edge_has_type(edge, isl_edge_proximity);
120 }
121 
122 /* Is "edge" marked as a local edge?
123  */
is_local(struct isl_sched_edge * edge)124 static int is_local(struct isl_sched_edge *edge)
125 {
126 	return isl_sched_edge_has_type(edge, isl_edge_local);
127 }
128 
129 /* Mark "edge" as a local edge.
130  */
set_local(struct isl_sched_edge * edge)131 static void set_local(struct isl_sched_edge *edge)
132 {
133 	set_type(edge, isl_edge_local);
134 }
135 
136 /* No longer mark "edge" as a local edge.
137  */
clear_local(struct isl_sched_edge * edge)138 static void clear_local(struct isl_sched_edge *edge)
139 {
140 	clear_type(edge, isl_edge_local);
141 }
142 
143 /* Is "edge" marked as a coincidence edge?
144  */
is_coincidence(struct isl_sched_edge * edge)145 static int is_coincidence(struct isl_sched_edge *edge)
146 {
147 	return isl_sched_edge_has_type(edge, isl_edge_coincidence);
148 }
149 
150 /* Is "edge" marked as a condition edge?
151  */
isl_sched_edge_is_condition(struct isl_sched_edge * edge)152 int isl_sched_edge_is_condition(struct isl_sched_edge *edge)
153 {
154 	return isl_sched_edge_has_type(edge, isl_edge_condition);
155 }
156 
157 /* Is "edge" marked as a conditional validity edge?
158  */
isl_sched_edge_is_conditional_validity(struct isl_sched_edge * edge)159 int isl_sched_edge_is_conditional_validity(struct isl_sched_edge *edge)
160 {
161 	return isl_sched_edge_has_type(edge, isl_edge_conditional_validity);
162 }
163 
164 /* Is "edge" of a type that can appear multiple times between
165  * the same pair of nodes?
166  *
167  * Condition edges and conditional validity edges may have tagged
168  * dependence relations, in which case an edge is added for each
169  * pair of tags.
170  */
is_multi_edge_type(struct isl_sched_edge * edge)171 static int is_multi_edge_type(struct isl_sched_edge *edge)
172 {
173 	return isl_sched_edge_is_condition(edge) ||
174 		isl_sched_edge_is_conditional_validity(edge);
175 }
176 
177 /* Initialize node_table based on the list of nodes.
178  */
graph_init_table(isl_ctx * ctx,struct isl_sched_graph * graph)179 static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
180 {
181 	int i;
182 
183 	graph->node_table = isl_hash_table_alloc(ctx, graph->n);
184 	if (!graph->node_table)
185 		return -1;
186 
187 	for (i = 0; i < graph->n; ++i) {
188 		struct isl_hash_table_entry *entry;
189 		uint32_t hash;
190 
191 		hash = isl_space_get_tuple_hash(graph->node[i].space);
192 		entry = isl_hash_table_find(ctx, graph->node_table, hash,
193 					    &node_has_tuples,
194 					    graph->node[i].space, 1);
195 		if (!entry)
196 			return -1;
197 		entry->data = &graph->node[i];
198 	}
199 
200 	return 0;
201 }
202 
203 /* Return a pointer to the node that lives within the given space,
204  * an invalid node if there is no such node, or NULL in case of error.
205  */
isl_sched_graph_find_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_space * space)206 struct isl_sched_node *isl_sched_graph_find_node(isl_ctx *ctx,
207 	struct isl_sched_graph *graph, __isl_keep isl_space *space)
208 {
209 	struct isl_hash_table_entry *entry;
210 	uint32_t hash;
211 
212 	if (!space)
213 		return NULL;
214 
215 	hash = isl_space_get_tuple_hash(space);
216 	entry = isl_hash_table_find(ctx, graph->node_table, hash,
217 				    &node_has_tuples, space, 0);
218 	if (!entry)
219 		return NULL;
220 	if (entry == isl_hash_table_entry_none)
221 		return graph->node + graph->n;
222 
223 	return entry->data;
224 }
225 
226 /* Is "node" a node in "graph"?
227  */
isl_sched_graph_is_node(struct isl_sched_graph * graph,struct isl_sched_node * node)228 int isl_sched_graph_is_node(struct isl_sched_graph *graph,
229 	struct isl_sched_node *node)
230 {
231 	return node && node >= &graph->node[0] && node < &graph->node[graph->n];
232 }
233 
edge_has_src_and_dst(const void * entry,const void * val)234 static isl_bool edge_has_src_and_dst(const void *entry, const void *val)
235 {
236 	const struct isl_sched_edge *edge = entry;
237 	const struct isl_sched_edge *temp = val;
238 
239 	return isl_bool_ok(edge->src == temp->src && edge->dst == temp->dst);
240 }
241 
242 /* Add the given edge to graph->edge_table[type].
243  */
graph_edge_table_add(isl_ctx * ctx,struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_edge * edge)244 static isl_stat graph_edge_table_add(isl_ctx *ctx,
245 	struct isl_sched_graph *graph, enum isl_edge_type type,
246 	struct isl_sched_edge *edge)
247 {
248 	struct isl_hash_table_entry *entry;
249 	uint32_t hash;
250 
251 	hash = isl_hash_init();
252 	hash = isl_hash_builtin(hash, edge->src);
253 	hash = isl_hash_builtin(hash, edge->dst);
254 	entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
255 				    &edge_has_src_and_dst, edge, 1);
256 	if (!entry)
257 		return isl_stat_error;
258 	entry->data = edge;
259 
260 	return isl_stat_ok;
261 }
262 
263 /* Add "edge" to all relevant edge tables.
264  * That is, for every type of the edge, add it to the corresponding table.
265  */
graph_edge_tables_add(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_edge * edge)266 static isl_stat graph_edge_tables_add(isl_ctx *ctx,
267 	struct isl_sched_graph *graph, struct isl_sched_edge *edge)
268 {
269 	enum isl_edge_type t;
270 
271 	for (t = isl_edge_first; t <= isl_edge_last; ++t) {
272 		if (!isl_sched_edge_has_type(edge, t))
273 			continue;
274 		if (graph_edge_table_add(ctx, graph, t, edge) < 0)
275 			return isl_stat_error;
276 	}
277 
278 	return isl_stat_ok;
279 }
280 
281 /* Allocate the edge_tables based on the maximal number of edges of
282  * each type.
283  */
graph_init_edge_tables(isl_ctx * ctx,struct isl_sched_graph * graph)284 static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
285 {
286 	int i;
287 
288 	for (i = 0; i <= isl_edge_last; ++i) {
289 		graph->edge_table[i] = isl_hash_table_alloc(ctx,
290 							    graph->max_edge[i]);
291 		if (!graph->edge_table[i])
292 			return -1;
293 	}
294 
295 	return 0;
296 }
297 
298 /* If graph->edge_table[type] contains an edge from the given source
299  * to the given destination, then return the hash table entry of this edge.
300  * Otherwise, return NULL.
301  */
graph_find_edge_entry(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst)302 static struct isl_hash_table_entry *graph_find_edge_entry(
303 	struct isl_sched_graph *graph,
304 	enum isl_edge_type type,
305 	struct isl_sched_node *src, struct isl_sched_node *dst)
306 {
307 	isl_ctx *ctx = isl_space_get_ctx(src->space);
308 	uint32_t hash;
309 	struct isl_sched_edge temp = { .src = src, .dst = dst };
310 
311 	hash = isl_hash_init();
312 	hash = isl_hash_builtin(hash, temp.src);
313 	hash = isl_hash_builtin(hash, temp.dst);
314 	return isl_hash_table_find(ctx, graph->edge_table[type], hash,
315 				    &edge_has_src_and_dst, &temp, 0);
316 }
317 
318 
319 /* If graph->edge_table[type] contains an edge from the given source
320  * to the given destination, then return this edge.
321  * Return "none" if no such edge can be found.
322  * Return NULL on error.
323  */
graph_find_edge(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst,struct isl_sched_edge * none)324 static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
325 	enum isl_edge_type type,
326 	struct isl_sched_node *src, struct isl_sched_node *dst,
327 	struct isl_sched_edge *none)
328 {
329 	struct isl_hash_table_entry *entry;
330 
331 	entry = graph_find_edge_entry(graph, type, src, dst);
332 	if (!entry)
333 		return NULL;
334 	if (entry == isl_hash_table_entry_none)
335 		return none;
336 
337 	return entry->data;
338 }
339 
340 /* Check whether the dependence graph has an edge of the given type
341  * between the given two nodes.
342  */
graph_has_edge(struct isl_sched_graph * graph,enum isl_edge_type type,struct isl_sched_node * src,struct isl_sched_node * dst)343 static isl_bool graph_has_edge(struct isl_sched_graph *graph,
344 	enum isl_edge_type type,
345 	struct isl_sched_node *src, struct isl_sched_node *dst)
346 {
347 	struct isl_sched_edge dummy;
348 	struct isl_sched_edge *edge;
349 	isl_bool empty;
350 
351 	edge = graph_find_edge(graph, type, src, dst, &dummy);
352 	if (!edge)
353 		return isl_bool_error;
354 	if (edge == &dummy)
355 		return isl_bool_false;
356 
357 	empty = isl_map_plain_is_empty(edge->map);
358 
359 	return isl_bool_not(empty);
360 }
361 
362 /* Look for any edge with the same src, dst and map fields as "model".
363  *
364  * Return the matching edge if one can be found.
365  * Return "model" if no matching edge is found.
366  * Return NULL on error.
367  */
graph_find_matching_edge(struct isl_sched_graph * graph,struct isl_sched_edge * model)368 static struct isl_sched_edge *graph_find_matching_edge(
369 	struct isl_sched_graph *graph, struct isl_sched_edge *model)
370 {
371 	enum isl_edge_type i;
372 	struct isl_sched_edge *edge;
373 
374 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
375 		int is_equal;
376 
377 		edge = graph_find_edge(graph, i, model->src, model->dst, model);
378 		if (!edge)
379 			return NULL;
380 		if (edge == model)
381 			continue;
382 		is_equal = isl_map_plain_is_equal(model->map, edge->map);
383 		if (is_equal < 0)
384 			return NULL;
385 		if (is_equal)
386 			return edge;
387 	}
388 
389 	return model;
390 }
391 
392 /* Remove the given edge from all the edge_tables that refer to it.
393  */
graph_remove_edge(struct isl_sched_graph * graph,struct isl_sched_edge * edge)394 static isl_stat graph_remove_edge(struct isl_sched_graph *graph,
395 	struct isl_sched_edge *edge)
396 {
397 	isl_ctx *ctx = isl_map_get_ctx(edge->map);
398 	enum isl_edge_type i;
399 
400 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
401 		struct isl_hash_table_entry *entry;
402 
403 		entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
404 		if (!entry)
405 			return isl_stat_error;
406 		if (entry == isl_hash_table_entry_none)
407 			continue;
408 		if (entry->data != edge)
409 			continue;
410 		isl_hash_table_remove(ctx, graph->edge_table[i], entry);
411 	}
412 
413 	return isl_stat_ok;
414 }
415 
416 /* Check whether the dependence graph has any edge
417  * between the given two nodes.
418  */
graph_has_any_edge(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst)419 static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
420 	struct isl_sched_node *src, struct isl_sched_node *dst)
421 {
422 	enum isl_edge_type i;
423 	isl_bool r;
424 
425 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
426 		r = graph_has_edge(graph, i, src, dst);
427 		if (r < 0 || r)
428 			return r;
429 	}
430 
431 	return r;
432 }
433 
434 /* Check whether the dependence graph has a validity edge
435  * between the given two nodes.
436  *
437  * Conditional validity edges are essentially validity edges that
438  * can be ignored if the corresponding condition edges are iteration private.
439  * Here, we are only checking for the presence of validity
440  * edges, so we need to consider the conditional validity edges too.
441  * In particular, this function is used during the detection
442  * of strongly connected components and we cannot ignore
443  * conditional validity edges during this detection.
444  */
isl_sched_graph_has_validity_edge(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst)445 isl_bool isl_sched_graph_has_validity_edge(struct isl_sched_graph *graph,
446 	struct isl_sched_node *src, struct isl_sched_node *dst)
447 {
448 	isl_bool r;
449 
450 	r = graph_has_edge(graph, isl_edge_validity, src, dst);
451 	if (r < 0 || r)
452 		return r;
453 
454 	return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
455 }
456 
457 /* Perform all the required memory allocations for a schedule graph "graph"
458  * with "n_node" nodes and "n_edge" edge and initialize the corresponding
459  * fields.
460  */
graph_alloc(isl_ctx * ctx,struct isl_sched_graph * graph,int n_node,int n_edge)461 static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
462 	int n_node, int n_edge)
463 {
464 	int i;
465 
466 	graph->n = n_node;
467 	graph->n_edge = n_edge;
468 	graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
469 	graph->sorted = isl_calloc_array(ctx, int, graph->n);
470 	graph->region = isl_alloc_array(ctx,
471 					struct isl_trivial_region, graph->n);
472 	graph->edge = isl_calloc_array(ctx,
473 					struct isl_sched_edge, graph->n_edge);
474 
475 	graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
476 	graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
477 	graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
478 
479 	if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) ||
480 	    !graph->sorted)
481 		return isl_stat_error;
482 
483 	for(i = 0; i < graph->n; ++i)
484 		graph->sorted[i] = i;
485 
486 	return isl_stat_ok;
487 }
488 
489 /* Free the memory associated to node "node" in "graph".
490  * The "coincident" field is shared by nodes in a graph and its subgraph.
491  * It therefore only needs to be freed for the original dependence graph,
492  * i.e., one that is not the result of splitting.
493  */
clear_node(struct isl_sched_graph * graph,struct isl_sched_node * node)494 static void clear_node(struct isl_sched_graph *graph,
495 	struct isl_sched_node *node)
496 {
497 	isl_space_free(node->space);
498 	isl_set_free(node->hull);
499 	isl_multi_aff_free(node->compress);
500 	isl_pw_multi_aff_free(node->decompress);
501 	isl_mat_free(node->sched);
502 	isl_map_free(node->sched_map);
503 	isl_mat_free(node->indep);
504 	isl_mat_free(node->vmap);
505 	if (graph->root == graph)
506 		free(node->coincident);
507 	isl_multi_val_free(node->sizes);
508 	isl_basic_set_free(node->bounds);
509 	isl_vec_free(node->max);
510 }
511 
isl_sched_graph_free(isl_ctx * ctx,struct isl_sched_graph * graph)512 void isl_sched_graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
513 {
514 	int i;
515 
516 	isl_map_to_basic_set_free(graph->intra_hmap);
517 	isl_map_to_basic_set_free(graph->intra_hmap_param);
518 	isl_map_to_basic_set_free(graph->inter_hmap);
519 
520 	if (graph->node)
521 		for (i = 0; i < graph->n; ++i)
522 			clear_node(graph, &graph->node[i]);
523 	free(graph->node);
524 	free(graph->sorted);
525 	if (graph->edge)
526 		for (i = 0; i < graph->n_edge; ++i) {
527 			isl_map_free(graph->edge[i].map);
528 			isl_union_map_free(graph->edge[i].tagged_condition);
529 			isl_union_map_free(graph->edge[i].tagged_validity);
530 		}
531 	free(graph->edge);
532 	free(graph->region);
533 	for (i = 0; i <= isl_edge_last; ++i)
534 		isl_hash_table_free(ctx, graph->edge_table[i]);
535 	isl_hash_table_free(ctx, graph->node_table);
536 	isl_basic_set_free(graph->lp);
537 }
538 
539 /* For each "set" on which this function is called, increment
540  * graph->n by one and update graph->maxvar.
541  */
init_n_maxvar(__isl_take isl_set * set,void * user)542 static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
543 {
544 	struct isl_sched_graph *graph = user;
545 	isl_size nvar = isl_set_dim(set, isl_dim_set);
546 
547 	graph->n++;
548 	if (nvar > graph->maxvar)
549 		graph->maxvar = nvar;
550 
551 	isl_set_free(set);
552 
553 	if (nvar < 0)
554 		return isl_stat_error;
555 	return isl_stat_ok;
556 }
557 
558 /* Compute the number of rows that should be allocated for the schedule.
559  * In particular, we need one row for each variable or one row
560  * for each basic map in the dependences.
561  * Note that it is practically impossible to exhaust both
562  * the number of dependences and the number of variables.
563  */
compute_max_row(struct isl_sched_graph * graph,__isl_keep isl_schedule_constraints * sc)564 static isl_stat compute_max_row(struct isl_sched_graph *graph,
565 	__isl_keep isl_schedule_constraints *sc)
566 {
567 	int n_edge;
568 	isl_stat r;
569 	isl_union_set *domain;
570 
571 	graph->n = 0;
572 	graph->maxvar = 0;
573 	domain = isl_schedule_constraints_get_domain(sc);
574 	r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
575 	isl_union_set_free(domain);
576 	if (r < 0)
577 		return isl_stat_error;
578 	n_edge = isl_schedule_constraints_n_basic_map(sc);
579 	if (n_edge < 0)
580 		return isl_stat_error;
581 	graph->max_row = n_edge + graph->maxvar;
582 
583 	return isl_stat_ok;
584 }
585 
586 /* Does "bset" have any defining equalities for its set variables?
587  */
has_any_defining_equality(__isl_keep isl_basic_set * bset)588 static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
589 {
590 	int i;
591 	isl_size n;
592 
593 	n = isl_basic_set_dim(bset, isl_dim_set);
594 	if (n < 0)
595 		return isl_bool_error;
596 
597 	for (i = 0; i < n; ++i) {
598 		isl_bool has;
599 
600 		has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
601 							NULL);
602 		if (has < 0 || has)
603 			return has;
604 	}
605 
606 	return isl_bool_false;
607 }
608 
609 /* Set the entries of node->max to the value of the schedule_max_coefficient
610  * option, if set.
611  */
set_max_coefficient(isl_ctx * ctx,struct isl_sched_node * node)612 static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
613 {
614 	int max;
615 
616 	max = isl_options_get_schedule_max_coefficient(ctx);
617 	if (max == -1)
618 		return isl_stat_ok;
619 
620 	node->max = isl_vec_alloc(ctx, node->nvar);
621 	node->max = isl_vec_set_si(node->max, max);
622 	if (!node->max)
623 		return isl_stat_error;
624 
625 	return isl_stat_ok;
626 }
627 
628 /* Set the entries of node->max to the minimum of the schedule_max_coefficient
629  * option (if set) and half of the minimum of the sizes in the other
630  * dimensions.  Round up when computing the half such that
631  * if the minimum of the sizes is one, half of the size is taken to be one
632  * rather than zero.
633  * If the global minimum is unbounded (i.e., if both
634  * the schedule_max_coefficient is not set and the sizes in the other
635  * dimensions are unbounded), then store a negative value.
636  * If the schedule coefficient is close to the size of the instance set
637  * in another dimension, then the schedule may represent a loop
638  * coalescing transformation (especially if the coefficient
639  * in that other dimension is one).  Forcing the coefficient to be
640  * smaller than or equal to half the minimal size should avoid this
641  * situation.
642  */
compute_max_coefficient(isl_ctx * ctx,struct isl_sched_node * node)643 static isl_stat compute_max_coefficient(isl_ctx *ctx,
644 	struct isl_sched_node *node)
645 {
646 	int max;
647 	int i, j;
648 	isl_vec *v;
649 
650 	max = isl_options_get_schedule_max_coefficient(ctx);
651 	v = isl_vec_alloc(ctx, node->nvar);
652 	if (!v)
653 		return isl_stat_error;
654 
655 	for (i = 0; i < node->nvar; ++i) {
656 		isl_int_set_si(v->el[i], max);
657 		isl_int_mul_si(v->el[i], v->el[i], 2);
658 	}
659 
660 	for (i = 0; i < node->nvar; ++i) {
661 		isl_val *size;
662 
663 		size = isl_multi_val_get_val(node->sizes, i);
664 		if (!size)
665 			goto error;
666 		if (!isl_val_is_int(size)) {
667 			isl_val_free(size);
668 			continue;
669 		}
670 		for (j = 0; j < node->nvar; ++j) {
671 			if (j == i)
672 				continue;
673 			if (isl_int_is_neg(v->el[j]) ||
674 			    isl_int_gt(v->el[j], size->n))
675 				isl_int_set(v->el[j], size->n);
676 		}
677 		isl_val_free(size);
678 	}
679 
680 	for (i = 0; i < node->nvar; ++i)
681 		isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
682 
683 	node->max = v;
684 	return isl_stat_ok;
685 error:
686 	isl_vec_free(v);
687 	return isl_stat_error;
688 }
689 
690 /* Construct an identifier for node "node", which will represent "set".
691  * The name of the identifier is either "compressed" or
692  * "compressed_<name>", with <name> the name of the space of "set".
693  * The user pointer of the identifier points to "node".
694  */
construct_compressed_id(__isl_keep isl_set * set,struct isl_sched_node * node)695 static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
696 	struct isl_sched_node *node)
697 {
698 	isl_bool has_name;
699 	isl_ctx *ctx;
700 	isl_id *id;
701 	isl_printer *p;
702 	const char *name;
703 	char *id_name;
704 
705 	has_name = isl_set_has_tuple_name(set);
706 	if (has_name < 0)
707 		return NULL;
708 
709 	ctx = isl_set_get_ctx(set);
710 	if (!has_name)
711 		return isl_id_alloc(ctx, "compressed", node);
712 
713 	p = isl_printer_to_str(ctx);
714 	name = isl_set_get_tuple_name(set);
715 	p = isl_printer_print_str(p, "compressed_");
716 	p = isl_printer_print_str(p, name);
717 	id_name = isl_printer_get_str(p);
718 	isl_printer_free(p);
719 
720 	id = isl_id_alloc(ctx, id_name, node);
721 	free(id_name);
722 
723 	return id;
724 }
725 
726 /* Construct a map that isolates the variable in position "pos" in "set".
727  *
728  * That is, construct
729  *
730  *	[i_0, ..., i_pos-1, i_pos+1, ...] -> [i_pos]
731  */
isolate(__isl_take isl_set * set,int pos)732 static __isl_give isl_map *isolate(__isl_take isl_set *set, int pos)
733 {
734 	isl_map *map;
735 
736 	map = isl_set_project_onto_map(set, isl_dim_set, pos, 1);
737 	map = isl_map_project_out(map, isl_dim_in, pos, 1);
738 	return map;
739 }
740 
741 /* Compute and return the size of "set" in dimension "dim".
742  * The size is taken to be the difference in values for that variable
743  * for fixed values of the other variables.
744  * This assumes that "set" is convex.
745  * In particular, the variable is first isolated from the other variables
746  * in the range of a map
747  *
748  *	[i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
749  *
750  * and then duplicated
751  *
752  *	[i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
753  *
754  * The shared variables are then projected out and the maximal value
755  * of i_dim' - i_dim is computed.
756  */
compute_size(__isl_take isl_set * set,int dim)757 static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
758 {
759 	isl_map *map;
760 	isl_local_space *ls;
761 	isl_aff *obj;
762 	isl_val *v;
763 
764 	map = isolate(set, dim);
765 	map = isl_map_range_product(map, isl_map_copy(map));
766 	map = isl_set_unwrap(isl_map_range(map));
767 	set = isl_map_deltas(map);
768 	ls = isl_local_space_from_space(isl_set_get_space(set));
769 	obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
770 	v = isl_set_max_val(set, obj);
771 	isl_aff_free(obj);
772 	isl_set_free(set);
773 
774 	return v;
775 }
776 
777 /* Perform a compression on "node" where "hull" represents the constraints
778  * that were used to derive the compression, while "compress" and
779  * "decompress" map the original space to the compressed space and
780  * vice versa.
781  *
782  * If "node" was not compressed already, then simply store
783  * the compression information.
784  * Otherwise the "original" space is actually the result
785  * of a previous compression, which is then combined
786  * with the present compression.
787  *
788  * The dimensionality of the compressed domain is also adjusted.
789  * Other information, such as the sizes and the maximal coefficient values,
790  * has not been computed yet and therefore does not need to be adjusted.
791  */
compress_node(struct isl_sched_node * node,__isl_take isl_set * hull,__isl_take isl_multi_aff * compress,__isl_take isl_pw_multi_aff * decompress)792 static isl_stat compress_node(struct isl_sched_node *node,
793 	__isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
794 	__isl_take isl_pw_multi_aff *decompress)
795 {
796 	node->nvar = isl_multi_aff_dim(compress, isl_dim_out);
797 	if (!node->compressed) {
798 		node->compressed = 1;
799 		node->hull = hull;
800 		node->compress = compress;
801 		node->decompress = decompress;
802 	} else {
803 		hull = isl_set_preimage_multi_aff(hull,
804 					    isl_multi_aff_copy(node->compress));
805 		node->hull = isl_set_intersect(node->hull, hull);
806 		node->compress = isl_multi_aff_pullback_multi_aff(
807 						compress, node->compress);
808 		node->decompress = isl_pw_multi_aff_pullback_pw_multi_aff(
809 						node->decompress, decompress);
810 	}
811 
812 	if (!node->hull || !node->compress || !node->decompress)
813 		return isl_stat_error;
814 
815 	return isl_stat_ok;
816 }
817 
818 /* Given that dimension "pos" in "set" has a fixed value
819  * in terms of the other dimensions, (further) compress "node"
820  * by projecting out this dimension.
821  * "set" may be the result of a previous compression.
822  * "uncompressed" is the original domain (without compression).
823  *
824  * The compression function simply projects out the dimension.
825  * The decompression function adds back the dimension
826  * in the right position as an expression of the other dimensions
827  * derived from "set".
828  * As in extract_node, the compressed space has an identifier
829  * that references "node" such that each compressed space is unique and
830  * such that the node can be recovered from the compressed space.
831  *
832  * The constraint removed through the compression is added to the "hull"
833  * such that only edges that relate to the original domains
834  * are taken into account.
835  * In particular, it is obtained by composing compression and decompression and
836  * taking the relation among the variables in the range.
837  */
project_out_fixed(struct isl_sched_node * node,__isl_keep isl_set * uncompressed,__isl_take isl_set * set,int pos)838 static isl_stat project_out_fixed(struct isl_sched_node *node,
839 	__isl_keep isl_set *uncompressed, __isl_take isl_set *set, int pos)
840 {
841 	isl_id *id;
842 	isl_space *space;
843 	isl_set *domain;
844 	isl_map *map;
845 	isl_multi_aff *compress;
846 	isl_pw_multi_aff *decompress, *pma;
847 	isl_multi_pw_aff *mpa;
848 	isl_set *hull;
849 
850 	map = isolate(isl_set_copy(set), pos);
851 	pma = isl_pw_multi_aff_from_map(map);
852 	domain = isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(pma));
853 	pma = isl_pw_multi_aff_gist(pma, domain);
854 	space = isl_pw_multi_aff_get_domain_space(pma);
855 	mpa = isl_multi_pw_aff_identity(isl_space_map_from_set(space));
856 	mpa = isl_multi_pw_aff_range_splice(mpa, pos,
857 				    isl_multi_pw_aff_from_pw_multi_aff(pma));
858 	decompress = isl_pw_multi_aff_from_multi_pw_aff(mpa);
859 	space = isl_set_get_space(set);
860 	compress = isl_multi_aff_project_out_map(space, isl_dim_set, pos, 1);
861 	id = construct_compressed_id(uncompressed, node);
862 	compress = isl_multi_aff_set_tuple_id(compress, isl_dim_out, id);
863 	space = isl_space_reverse(isl_multi_aff_get_space(compress));
864 	decompress = isl_pw_multi_aff_reset_space(decompress, space);
865 	pma = isl_pw_multi_aff_pullback_multi_aff(
866 	    isl_pw_multi_aff_copy(decompress), isl_multi_aff_copy(compress));
867 	hull = isl_map_range(isl_map_from_pw_multi_aff(pma));
868 
869 	isl_set_free(set);
870 
871 	return compress_node(node, hull, compress, decompress);
872 }
873 
874 /* Compute the size of the compressed domain in each dimension and
875  * store the results in node->sizes.
876  * "uncompressed" is the original domain (without compression).
877  *
878  * First compress the domain if needed and then compute the size
879  * in each direction.
880  * If the domain is not convex, then the sizes are computed
881  * on a convex superset in order to avoid picking up sizes
882  * that are valid for the individual disjuncts, but not for
883  * the domain as a whole.
884  *
885  * If any of the sizes turns out to be zero, then this means
886  * that this dimension has a fixed value in terms of
887  * the other dimensions.  Perform an (extra) compression
888  * to remove this dimension.
889  */
compute_sizes(struct isl_sched_node * node,__isl_keep isl_set * uncompressed)890 static isl_stat compute_sizes(struct isl_sched_node *node,
891 	__isl_keep isl_set *uncompressed)
892 {
893 	int j;
894 	isl_size n;
895 	isl_multi_val *mv;
896 	isl_set *set = isl_set_copy(uncompressed);
897 
898 	if (node->compressed)
899 		set = isl_set_preimage_pw_multi_aff(set,
900 				    isl_pw_multi_aff_copy(node->decompress));
901 	set = isl_set_from_basic_set(isl_set_simple_hull(set));
902 	mv = isl_multi_val_zero(isl_set_get_space(set));
903 	n = isl_set_dim(set, isl_dim_set);
904 	if (n < 0)
905 		mv = isl_multi_val_free(mv);
906 	for (j = 0; j < n; ++j) {
907 		isl_bool is_zero;
908 		isl_val *v;
909 
910 		v = compute_size(isl_set_copy(set), j);
911 		is_zero = isl_val_is_zero(v);
912 		mv = isl_multi_val_set_val(mv, j, v);
913 		if (is_zero >= 0 && is_zero) {
914 			isl_multi_val_free(mv);
915 			if (project_out_fixed(node, uncompressed, set, j) < 0)
916 				return isl_stat_error;
917 			return compute_sizes(node, uncompressed);
918 		}
919 	}
920 	node->sizes = mv;
921 	isl_set_free(set);
922 	if (!node->sizes)
923 		return isl_stat_error;
924 	return isl_stat_ok;
925 }
926 
927 /* Compute the size of the instance set "set" of "node", after compression,
928  * as well as bounds on the corresponding coefficients, if needed.
929  *
930  * The sizes are needed when the schedule_treat_coalescing option is set.
931  * The bounds are needed when the schedule_treat_coalescing option or
932  * the schedule_max_coefficient option is set.
933  *
934  * If the schedule_treat_coalescing option is not set, then at most
935  * the bounds need to be set and this is done in set_max_coefficient.
936  * Otherwise, compute the size of the compressed domain
937  * in each direction and store the results in node->size.
938  * Finally, set the bounds on the coefficients based on the sizes
939  * and the schedule_max_coefficient option in compute_max_coefficient.
940  */
compute_sizes_and_max(isl_ctx * ctx,struct isl_sched_node * node,__isl_take isl_set * set)941 static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
942 	__isl_take isl_set *set)
943 {
944 	isl_stat r;
945 
946 	if (!isl_options_get_schedule_treat_coalescing(ctx)) {
947 		isl_set_free(set);
948 		return set_max_coefficient(ctx, node);
949 	}
950 
951 	r = compute_sizes(node, set);
952 	isl_set_free(set);
953 	if (r < 0)
954 		return isl_stat_error;
955 	return compute_max_coefficient(ctx, node);
956 }
957 
958 /* Add a new node to the graph representing the given instance set.
959  * "nvar" is the (possibly compressed) number of variables and
960  * may be smaller than then number of set variables in "set"
961  * if "compressed" is set.
962  * If "compressed" is set, then "hull" represents the constraints
963  * that were used to derive the compression, while "compress" and
964  * "decompress" map the original space to the compressed space and
965  * vice versa.
966  * If "compressed" is not set, then "hull", "compress" and "decompress"
967  * should be NULL.
968  *
969  * Compute the size of the instance set and bounds on the coefficients,
970  * if needed.
971  */
add_node(struct isl_sched_graph * graph,__isl_take isl_set * set,int nvar,int compressed,__isl_take isl_set * hull,__isl_take isl_multi_aff * compress,__isl_take isl_pw_multi_aff * decompress)972 static isl_stat add_node(struct isl_sched_graph *graph,
973 	__isl_take isl_set *set, int nvar, int compressed,
974 	__isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
975 	__isl_take isl_pw_multi_aff *decompress)
976 {
977 	isl_size nparam;
978 	isl_ctx *ctx;
979 	isl_mat *sched;
980 	isl_space *space;
981 	int *coincident;
982 	struct isl_sched_node *node;
983 
984 	nparam = isl_set_dim(set, isl_dim_param);
985 	if (nparam < 0)
986 		goto error;
987 
988 	ctx = isl_set_get_ctx(set);
989 	if (!ctx->opt->schedule_parametric)
990 		nparam = 0;
991 	sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
992 	node = &graph->node[graph->n];
993 	graph->n++;
994 	space = isl_set_get_space(set);
995 	node->space = space;
996 	node->nvar = nvar;
997 	node->nparam = nparam;
998 	node->sched = sched;
999 	node->sched_map = NULL;
1000 	coincident = isl_calloc_array(ctx, int, graph->max_row);
1001 	node->coincident = coincident;
1002 	node->compressed = compressed;
1003 	node->hull = hull;
1004 	node->compress = compress;
1005 	node->decompress = decompress;
1006 	if (compute_sizes_and_max(ctx, node, set) < 0)
1007 		return isl_stat_error;
1008 
1009 	if (!space || !sched || (graph->max_row && !coincident))
1010 		return isl_stat_error;
1011 	if (compressed && (!hull || !compress || !decompress))
1012 		return isl_stat_error;
1013 
1014 	return isl_stat_ok;
1015 error:
1016 	isl_set_free(set);
1017 	isl_set_free(hull);
1018 	isl_multi_aff_free(compress);
1019 	isl_pw_multi_aff_free(decompress);
1020 	return isl_stat_error;
1021 }
1022 
1023 /* Add a new node to the graph representing the given set.
1024  *
1025  * If any of the set variables is defined by an equality, then
1026  * we perform variable compression such that we can perform
1027  * the scheduling on the compressed domain.
1028  * In this case, an identifier is used that references the new node
1029  * such that each compressed space is unique and
1030  * such that the node can be recovered from the compressed space.
1031  */
extract_node(__isl_take isl_set * set,void * user)1032 static isl_stat extract_node(__isl_take isl_set *set, void *user)
1033 {
1034 	isl_size nvar;
1035 	isl_bool has_equality;
1036 	isl_id *id;
1037 	isl_basic_set *hull;
1038 	isl_set *hull_set;
1039 	isl_morph *morph;
1040 	isl_multi_aff *compress, *decompress_ma;
1041 	isl_pw_multi_aff *decompress;
1042 	struct isl_sched_graph *graph = user;
1043 
1044 	hull = isl_set_affine_hull(isl_set_copy(set));
1045 	hull = isl_basic_set_remove_divs(hull);
1046 	nvar = isl_set_dim(set, isl_dim_set);
1047 	has_equality = has_any_defining_equality(hull);
1048 
1049 	if (nvar < 0 || has_equality < 0)
1050 		goto error;
1051 	if (!has_equality) {
1052 		isl_basic_set_free(hull);
1053 		return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1054 	}
1055 
1056 	id = construct_compressed_id(set, &graph->node[graph->n]);
1057 	morph = isl_basic_set_variable_compression_with_id(hull, id);
1058 	isl_id_free(id);
1059 	nvar = isl_morph_ran_dim(morph, isl_dim_set);
1060 	if (nvar < 0)
1061 		set = isl_set_free(set);
1062 	compress = isl_morph_get_var_multi_aff(morph);
1063 	morph = isl_morph_inverse(morph);
1064 	decompress_ma = isl_morph_get_var_multi_aff(morph);
1065 	decompress = isl_pw_multi_aff_from_multi_aff(decompress_ma);
1066 	isl_morph_free(morph);
1067 
1068 	hull_set = isl_set_from_basic_set(hull);
1069 	return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1070 error:
1071 	isl_basic_set_free(hull);
1072 	isl_set_free(set);
1073 	return isl_stat_error;
1074 }
1075 
1076 struct isl_extract_edge_data {
1077 	isl_schedule_constraints *sc;
1078 	enum isl_edge_type type;
1079 	struct isl_sched_graph *graph;
1080 };
1081 
1082 /* Merge edge2 into edge1, freeing the contents of edge2.
1083  * Return 0 on success and -1 on failure.
1084  *
1085  * edge1 and edge2 are assumed to have the same value for the map field.
1086  */
merge_edge(struct isl_sched_edge * edge1,struct isl_sched_edge * edge2)1087 static int merge_edge(struct isl_sched_edge *edge1,
1088 	struct isl_sched_edge *edge2)
1089 {
1090 	edge1->types |= edge2->types;
1091 	isl_map_free(edge2->map);
1092 
1093 	if (isl_sched_edge_is_condition(edge2)) {
1094 		if (!edge1->tagged_condition)
1095 			edge1->tagged_condition = edge2->tagged_condition;
1096 		else
1097 			edge1->tagged_condition =
1098 				isl_union_map_union(edge1->tagged_condition,
1099 						    edge2->tagged_condition);
1100 	}
1101 
1102 	if (isl_sched_edge_is_conditional_validity(edge2)) {
1103 		if (!edge1->tagged_validity)
1104 			edge1->tagged_validity = edge2->tagged_validity;
1105 		else
1106 			edge1->tagged_validity =
1107 				isl_union_map_union(edge1->tagged_validity,
1108 						    edge2->tagged_validity);
1109 	}
1110 
1111 	if (isl_sched_edge_is_condition(edge2) && !edge1->tagged_condition)
1112 		return -1;
1113 	if (isl_sched_edge_is_conditional_validity(edge2) &&
1114 	    !edge1->tagged_validity)
1115 		return -1;
1116 
1117 	return 0;
1118 }
1119 
1120 /* Insert dummy tags in domain and range of "map".
1121  *
1122  * In particular, if "map" is of the form
1123  *
1124  *	A -> B
1125  *
1126  * then return
1127  *
1128  *	[A -> dummy_tag] -> [B -> dummy_tag]
1129  *
1130  * where the dummy_tags are identical and equal to any dummy tags
1131  * introduced by any other call to this function.
1132  */
insert_dummy_tags(__isl_take isl_map * map)1133 static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1134 {
1135 	static char dummy;
1136 	isl_ctx *ctx;
1137 	isl_id *id;
1138 	isl_space *space;
1139 	isl_set *domain, *range;
1140 
1141 	ctx = isl_map_get_ctx(map);
1142 
1143 	id = isl_id_alloc(ctx, NULL, &dummy);
1144 	space = isl_space_params(isl_map_get_space(map));
1145 	space = isl_space_set_from_params(space);
1146 	space = isl_space_set_tuple_id(space, isl_dim_set, id);
1147 	space = isl_space_map_from_set(space);
1148 
1149 	domain = isl_map_wrap(map);
1150 	range = isl_map_wrap(isl_map_universe(space));
1151 	map = isl_map_from_domain_and_range(domain, range);
1152 	map = isl_map_zip(map);
1153 
1154 	return map;
1155 }
1156 
1157 /* Given that at least one of "src" or "dst" is compressed, return
1158  * a map between the spaces of these nodes restricted to the affine
1159  * hull that was used in the compression.
1160  */
extract_hull(struct isl_sched_node * src,struct isl_sched_node * dst)1161 static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1162 	struct isl_sched_node *dst)
1163 {
1164 	isl_set *dom, *ran;
1165 
1166 	if (src->compressed)
1167 		dom = isl_set_copy(src->hull);
1168 	else
1169 		dom = isl_set_universe(isl_space_copy(src->space));
1170 	if (dst->compressed)
1171 		ran = isl_set_copy(dst->hull);
1172 	else
1173 		ran = isl_set_universe(isl_space_copy(dst->space));
1174 
1175 	return isl_map_from_domain_and_range(dom, ran);
1176 }
1177 
1178 /* Intersect the domains of the nested relations in domain and range
1179  * of "tagged" with "map".
1180  */
map_intersect_domains(__isl_take isl_map * tagged,__isl_keep isl_map * map)1181 static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1182 	__isl_keep isl_map *map)
1183 {
1184 	isl_set *set;
1185 
1186 	tagged = isl_map_zip(tagged);
1187 	set = isl_map_wrap(isl_map_copy(map));
1188 	tagged = isl_map_intersect_domain(tagged, set);
1189 	tagged = isl_map_zip(tagged);
1190 	return tagged;
1191 }
1192 
1193 /* Return a pointer to the node that lives in the domain space of "map",
1194  * an invalid node if there is no such node, or NULL in case of error.
1195  */
find_domain_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_map * map)1196 static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1197 	struct isl_sched_graph *graph, __isl_keep isl_map *map)
1198 {
1199 	struct isl_sched_node *node;
1200 	isl_space *space;
1201 
1202 	space = isl_space_domain(isl_map_get_space(map));
1203 	node = isl_sched_graph_find_node(ctx, graph, space);
1204 	isl_space_free(space);
1205 
1206 	return node;
1207 }
1208 
1209 /* Return a pointer to the node that lives in the range space of "map",
1210  * an invalid node if there is no such node, or NULL in case of error.
1211  */
find_range_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_map * map)1212 static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1213 	struct isl_sched_graph *graph, __isl_keep isl_map *map)
1214 {
1215 	struct isl_sched_node *node;
1216 	isl_space *space;
1217 
1218 	space = isl_space_range(isl_map_get_space(map));
1219 	node = isl_sched_graph_find_node(ctx, graph, space);
1220 	isl_space_free(space);
1221 
1222 	return node;
1223 }
1224 
1225 /* Refrain from adding a new edge based on "map".
1226  * Instead, just free the map.
1227  * "tagged" is either a copy of "map" with additional tags or NULL.
1228  */
skip_edge(__isl_take isl_map * map,__isl_take isl_map * tagged)1229 static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
1230 {
1231 	isl_map_free(map);
1232 	isl_map_free(tagged);
1233 
1234 	return isl_stat_ok;
1235 }
1236 
1237 /* Add a new edge to the graph based on the given map
1238  * and add it to data->graph->edge_table[data->type].
1239  * If a dependence relation of a given type happens to be identical
1240  * to one of the dependence relations of a type that was added before,
1241  * then we don't create a new edge, but instead mark the original edge
1242  * as also representing a dependence of the current type.
1243  *
1244  * Edges of type isl_edge_condition or isl_edge_conditional_validity
1245  * may be specified as "tagged" dependence relations.  That is, "map"
1246  * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1247  * the dependence on iterations and a and b are tags.
1248  * edge->map is set to the relation containing the elements i -> j,
1249  * while edge->tagged_condition and edge->tagged_validity contain
1250  * the union of all the "map" relations
1251  * for which extract_edge is called that result in the same edge->map.
1252  *
1253  * Compute the gist with respect to the context.
1254  * This may remove some constraints on the parameters or
1255  * eliminate some parts of the dependence relation
1256  * that are not relevant on the context.
1257  *
1258  * If the source or the destination node is compressed, then
1259  * intersect both "map" and "tagged" with the constraints that
1260  * were used to construct the compression.
1261  * This ensures that there are no schedule constraints defined
1262  * outside of these domains, while the scheduler no longer has
1263  * any control over those outside parts.
1264  */
extract_edge(__isl_take isl_map * map,void * user)1265 static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1266 {
1267 	isl_bool empty;
1268 	isl_ctx *ctx = isl_map_get_ctx(map);
1269 	struct isl_extract_edge_data *data = user;
1270 	struct isl_sched_graph *graph = data->graph;
1271 	struct isl_sched_node *src, *dst;
1272 	struct isl_sched_edge *edge;
1273 	isl_set *context;
1274 	isl_map *tagged = NULL;
1275 	isl_schedule_constraints *sc = data->sc;
1276 
1277 	if (data->type == isl_edge_condition ||
1278 	    data->type == isl_edge_conditional_validity) {
1279 		if (isl_map_can_zip(map)) {
1280 			tagged = isl_map_copy(map);
1281 			map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1282 		} else {
1283 			tagged = insert_dummy_tags(isl_map_copy(map));
1284 		}
1285 	}
1286 
1287 	src = find_domain_node(ctx, graph, map);
1288 	dst = find_range_node(ctx, graph, map);
1289 
1290 	if (!src || !dst)
1291 		goto error;
1292 	if (!isl_sched_graph_is_node(graph, src) ||
1293 	    !isl_sched_graph_is_node(graph, dst))
1294 		return skip_edge(map, tagged);
1295 
1296 	context = isl_schedule_constraints_get_context(sc);
1297 	map = isl_map_gist_params(map, context);
1298 
1299 	if (src->compressed || dst->compressed) {
1300 		isl_map *hull;
1301 		hull = extract_hull(src, dst);
1302 		if (tagged)
1303 			tagged = map_intersect_domains(tagged, hull);
1304 		map = isl_map_intersect(map, hull);
1305 	}
1306 
1307 	empty = isl_map_plain_is_empty(map);
1308 	if (empty < 0)
1309 		goto error;
1310 	if (empty)
1311 		return skip_edge(map, tagged);
1312 
1313 	graph->edge[graph->n_edge].src = src;
1314 	graph->edge[graph->n_edge].dst = dst;
1315 	graph->edge[graph->n_edge].map = map;
1316 	graph->edge[graph->n_edge].types = 0;
1317 	graph->edge[graph->n_edge].tagged_condition = NULL;
1318 	graph->edge[graph->n_edge].tagged_validity = NULL;
1319 	set_type(&graph->edge[graph->n_edge], data->type);
1320 	if (data->type == isl_edge_condition)
1321 		graph->edge[graph->n_edge].tagged_condition =
1322 					isl_union_map_from_map(tagged);
1323 	if (data->type == isl_edge_conditional_validity)
1324 		graph->edge[graph->n_edge].tagged_validity =
1325 					isl_union_map_from_map(tagged);
1326 
1327 	edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1328 	if (!edge) {
1329 		graph->n_edge++;
1330 		return isl_stat_error;
1331 	}
1332 	if (edge == &graph->edge[graph->n_edge])
1333 		return graph_edge_table_add(ctx, graph, data->type,
1334 				    &graph->edge[graph->n_edge++]);
1335 
1336 	if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1337 		return isl_stat_error;
1338 
1339 	return graph_edge_table_add(ctx, graph, data->type, edge);
1340 error:
1341 	isl_map_free(map);
1342 	isl_map_free(tagged);
1343 	return isl_stat_error;
1344 }
1345 
1346 /* Initialize the schedule graph "graph" from the schedule constraints "sc".
1347  *
1348  * The context is included in the domain before the nodes of
1349  * the graphs are extracted in order to be able to exploit
1350  * any possible additional equalities.
1351  * Note that this intersection is only performed locally here.
1352  */
isl_sched_graph_init(struct isl_sched_graph * graph,__isl_keep isl_schedule_constraints * sc)1353 isl_stat isl_sched_graph_init(struct isl_sched_graph *graph,
1354 	__isl_keep isl_schedule_constraints *sc)
1355 {
1356 	isl_ctx *ctx;
1357 	isl_union_set *domain;
1358 	isl_union_map *c;
1359 	struct isl_extract_edge_data data = { sc };
1360 	enum isl_edge_type i;
1361 	isl_stat r;
1362 	isl_size n;
1363 
1364 	if (!sc)
1365 		return isl_stat_error;
1366 
1367 	ctx = isl_schedule_constraints_get_ctx(sc);
1368 
1369 	domain = isl_schedule_constraints_get_domain(sc);
1370 	n = isl_union_set_n_set(domain);
1371 	graph->n = n;
1372 	isl_union_set_free(domain);
1373 	if (n < 0)
1374 		return isl_stat_error;
1375 
1376 	n = isl_schedule_constraints_n_map(sc);
1377 	if (n < 0 || graph_alloc(ctx, graph, graph->n, n) < 0)
1378 		return isl_stat_error;
1379 
1380 	if (compute_max_row(graph, sc) < 0)
1381 		return isl_stat_error;
1382 	graph->root = graph;
1383 	graph->n = 0;
1384 	domain = isl_schedule_constraints_get_domain(sc);
1385 	domain = isl_union_set_intersect_params(domain,
1386 				    isl_schedule_constraints_get_context(sc));
1387 	r = isl_union_set_foreach_set(domain, &extract_node, graph);
1388 	isl_union_set_free(domain);
1389 	if (r < 0)
1390 		return isl_stat_error;
1391 	if (graph_init_table(ctx, graph) < 0)
1392 		return isl_stat_error;
1393 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
1394 		isl_size n;
1395 
1396 		c = isl_schedule_constraints_get(sc, i);
1397 		n = isl_union_map_n_map(c);
1398 		graph->max_edge[i] = n;
1399 		isl_union_map_free(c);
1400 		if (n < 0)
1401 			return isl_stat_error;
1402 	}
1403 	if (graph_init_edge_tables(ctx, graph) < 0)
1404 		return isl_stat_error;
1405 	graph->n_edge = 0;
1406 	data.graph = graph;
1407 	for (i = isl_edge_first; i <= isl_edge_last; ++i) {
1408 		isl_stat r;
1409 
1410 		data.type = i;
1411 		c = isl_schedule_constraints_get(sc, i);
1412 		r = isl_union_map_foreach_map(c, &extract_edge, &data);
1413 		isl_union_map_free(c);
1414 		if (r < 0)
1415 			return isl_stat_error;
1416 	}
1417 
1418 	return isl_stat_ok;
1419 }
1420 
1421 /* Check whether there is any dependence from node[j] to node[i]
1422  * or from node[i] to node[j].
1423  */
node_follows_weak(int i,int j,void * user)1424 static isl_bool node_follows_weak(int i, int j, void *user)
1425 {
1426 	isl_bool f;
1427 	struct isl_sched_graph *graph = user;
1428 
1429 	f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
1430 	if (f < 0 || f)
1431 		return f;
1432 	return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
1433 }
1434 
1435 /* Check whether there is a (conditional) validity dependence from node[j]
1436  * to node[i], forcing node[i] to follow node[j].
1437  */
node_follows_strong(int i,int j,void * user)1438 static isl_bool node_follows_strong(int i, int j, void *user)
1439 {
1440 	struct isl_sched_graph *graph = user;
1441 
1442 	return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
1443 							&graph->node[i]);
1444 }
1445 
1446 /* Use Tarjan's algorithm for computing the strongly connected components
1447  * in the dependence graph only considering those edges defined by "follows".
1448  */
isl_sched_graph_detect_ccs(isl_ctx * ctx,struct isl_sched_graph * graph,isl_bool (* follows)(int i,int j,void * user))1449 isl_stat isl_sched_graph_detect_ccs(isl_ctx *ctx,
1450 	struct isl_sched_graph *graph,
1451 	isl_bool (*follows)(int i, int j, void *user))
1452 {
1453 	int i, n;
1454 	struct isl_tarjan_graph *g = NULL;
1455 
1456 	g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
1457 	if (!g)
1458 		return isl_stat_error;
1459 
1460 	graph->scc = 0;
1461 	i = 0;
1462 	n = graph->n;
1463 	while (n) {
1464 		while (g->order[i] != -1) {
1465 			graph->node[g->order[i]].scc = graph->scc;
1466 			--n;
1467 			++i;
1468 		}
1469 		++i;
1470 		graph->scc++;
1471 	}
1472 
1473 	isl_tarjan_graph_free(g);
1474 
1475 	return isl_stat_ok;
1476 }
1477 
1478 /* Apply Tarjan's algorithm to detect the strongly connected components
1479  * in the dependence graph.
1480  * Only consider the (conditional) validity dependences and clear "weak".
1481  */
detect_sccs(isl_ctx * ctx,struct isl_sched_graph * graph)1482 static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1483 {
1484 	graph->weak = 0;
1485 	return isl_sched_graph_detect_ccs(ctx, graph, &node_follows_strong);
1486 }
1487 
1488 /* Apply Tarjan's algorithm to detect the (weakly) connected components
1489  * in the dependence graph.
1490  * Consider all dependences and set "weak".
1491  */
detect_wccs(isl_ctx * ctx,struct isl_sched_graph * graph)1492 static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
1493 {
1494 	graph->weak = 1;
1495 	return isl_sched_graph_detect_ccs(ctx, graph, &node_follows_weak);
1496 }
1497 
cmp_scc(const void * a,const void * b,void * data)1498 static int cmp_scc(const void *a, const void *b, void *data)
1499 {
1500 	struct isl_sched_graph *graph = data;
1501 	const int *i1 = a;
1502 	const int *i2 = b;
1503 
1504 	return graph->node[*i1].scc - graph->node[*i2].scc;
1505 }
1506 
1507 /* Sort the elements of graph->sorted according to the corresponding SCCs.
1508  */
sort_sccs(struct isl_sched_graph * graph)1509 static int sort_sccs(struct isl_sched_graph *graph)
1510 {
1511 	return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
1512 }
1513 
1514 /* Return a non-parametric set in the compressed space of "node" that is
1515  * bounded by the size in each direction
1516  *
1517  *	{ [x] : -S_i <= x_i <= S_i }
1518  *
1519  * If S_i is infinity in direction i, then there are no constraints
1520  * in that direction.
1521  *
1522  * Cache the result in node->bounds.
1523  */
get_size_bounds(struct isl_sched_node * node)1524 static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
1525 {
1526 	isl_space *space;
1527 	isl_basic_set *bounds;
1528 	int i;
1529 
1530 	if (node->bounds)
1531 		return isl_basic_set_copy(node->bounds);
1532 
1533 	if (node->compressed)
1534 		space = isl_pw_multi_aff_get_domain_space(node->decompress);
1535 	else
1536 		space = isl_space_copy(node->space);
1537 	space = isl_space_drop_all_params(space);
1538 	bounds = isl_basic_set_universe(space);
1539 
1540 	for (i = 0; i < node->nvar; ++i) {
1541 		isl_val *size;
1542 
1543 		size = isl_multi_val_get_val(node->sizes, i);
1544 		if (!size)
1545 			return isl_basic_set_free(bounds);
1546 		if (!isl_val_is_int(size)) {
1547 			isl_val_free(size);
1548 			continue;
1549 		}
1550 		bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
1551 							isl_val_copy(size));
1552 		bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
1553 							isl_val_neg(size));
1554 	}
1555 
1556 	node->bounds = isl_basic_set_copy(bounds);
1557 	return bounds;
1558 }
1559 
1560 /* Compress the dependence relation "map", if needed, i.e.,
1561  * when the source node "src" and/or the destination node "dst"
1562  * has been compressed.
1563  */
compress(__isl_take isl_map * map,struct isl_sched_node * src,struct isl_sched_node * dst)1564 static __isl_give isl_map *compress(__isl_take isl_map *map,
1565 	struct isl_sched_node *src, struct isl_sched_node *dst)
1566 {
1567 	if (src->compressed)
1568 		map = isl_map_preimage_domain_pw_multi_aff(map,
1569 					isl_pw_multi_aff_copy(src->decompress));
1570 	if (dst->compressed)
1571 		map = isl_map_preimage_range_pw_multi_aff(map,
1572 					isl_pw_multi_aff_copy(dst->decompress));
1573 	return map;
1574 }
1575 
1576 /* Drop some constraints from "delta" that could be exploited
1577  * to construct loop coalescing schedules.
1578  * In particular, drop those constraint that bound the difference
1579  * to the size of the domain.
1580  * First project out the parameters to improve the effectiveness.
1581  */
drop_coalescing_constraints(__isl_take isl_set * delta,struct isl_sched_node * node)1582 static __isl_give isl_set *drop_coalescing_constraints(
1583 	__isl_take isl_set *delta, struct isl_sched_node *node)
1584 {
1585 	isl_size nparam;
1586 	isl_basic_set *bounds;
1587 
1588 	nparam = isl_set_dim(delta, isl_dim_param);
1589 	if (nparam < 0)
1590 		return isl_set_free(delta);
1591 
1592 	bounds = get_size_bounds(node);
1593 
1594 	delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
1595 	delta = isl_set_remove_divs(delta);
1596 	delta = isl_set_plain_gist_basic_set(delta, bounds);
1597 	return delta;
1598 }
1599 
1600 /* Given a dependence relation R from "node" to itself,
1601  * construct the set of coefficients of valid constraints for elements
1602  * in that dependence relation.
1603  * In particular, the result contains tuples of coefficients
1604  * c_0, c_n, c_x such that
1605  *
1606  *	c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
1607  *
1608  * or, equivalently,
1609  *
1610  *	c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
1611  *
1612  * We choose here to compute the dual of delta R.
1613  * Alternatively, we could have computed the dual of R, resulting
1614  * in a set of tuples c_0, c_n, c_x, c_y, and then
1615  * plugged in (c_0, c_n, c_x, -c_x).
1616  *
1617  * If "need_param" is set, then the resulting coefficients effectively
1618  * include coefficients for the parameters c_n.  Otherwise, they may
1619  * have been projected out already.
1620  * Since the constraints may be different for these two cases,
1621  * they are stored in separate caches.
1622  * In particular, if no parameter coefficients are required and
1623  * the schedule_treat_coalescing option is set, then the parameters
1624  * are projected out and some constraints that could be exploited
1625  * to construct coalescing schedules are removed before the dual
1626  * is computed.
1627  *
1628  * If "node" has been compressed, then the dependence relation
1629  * is also compressed before the set of coefficients is computed.
1630  */
intra_coefficients(struct isl_sched_graph * graph,struct isl_sched_node * node,__isl_take isl_map * map,int need_param)1631 static __isl_give isl_basic_set *intra_coefficients(
1632 	struct isl_sched_graph *graph, struct isl_sched_node *node,
1633 	__isl_take isl_map *map, int need_param)
1634 {
1635 	isl_ctx *ctx;
1636 	isl_set *delta;
1637 	isl_map *key;
1638 	isl_basic_set *coef;
1639 	isl_maybe_isl_basic_set m;
1640 	isl_map_to_basic_set **hmap = &graph->intra_hmap;
1641 	int treat;
1642 
1643 	if (!map)
1644 		return NULL;
1645 
1646 	ctx = isl_map_get_ctx(map);
1647 	treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx);
1648 	if (!treat)
1649 		hmap = &graph->intra_hmap_param;
1650 	m = isl_map_to_basic_set_try_get(*hmap, map);
1651 	if (m.valid < 0 || m.valid) {
1652 		isl_map_free(map);
1653 		return m.value;
1654 	}
1655 
1656 	key = isl_map_copy(map);
1657 	map = compress(map, node, node);
1658 	delta = isl_map_deltas(map);
1659 	if (treat)
1660 		delta = drop_coalescing_constraints(delta, node);
1661 	delta = isl_set_remove_divs(delta);
1662 	coef = isl_set_coefficients(delta);
1663 	*hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
1664 
1665 	return coef;
1666 }
1667 
1668 /* Given a dependence relation R, construct the set of coefficients
1669  * of valid constraints for elements in that dependence relation.
1670  * In particular, the result contains tuples of coefficients
1671  * c_0, c_n, c_x, c_y such that
1672  *
1673  *	c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
1674  *
1675  * If the source or destination nodes of "edge" have been compressed,
1676  * then the dependence relation is also compressed before
1677  * the set of coefficients is computed.
1678  */
inter_coefficients(struct isl_sched_graph * graph,struct isl_sched_edge * edge,__isl_take isl_map * map)1679 static __isl_give isl_basic_set *inter_coefficients(
1680 	struct isl_sched_graph *graph, struct isl_sched_edge *edge,
1681 	__isl_take isl_map *map)
1682 {
1683 	isl_set *set;
1684 	isl_map *key;
1685 	isl_basic_set *coef;
1686 	isl_maybe_isl_basic_set m;
1687 
1688 	m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
1689 	if (m.valid < 0 || m.valid) {
1690 		isl_map_free(map);
1691 		return m.value;
1692 	}
1693 
1694 	key = isl_map_copy(map);
1695 	map = compress(map, edge->src, edge->dst);
1696 	set = isl_map_wrap(isl_map_remove_divs(map));
1697 	coef = isl_set_coefficients(set);
1698 	graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
1699 					isl_basic_set_copy(coef));
1700 
1701 	return coef;
1702 }
1703 
1704 /* Return the position of the coefficients of the variables in
1705  * the coefficients constraints "coef".
1706  *
1707  * The space of "coef" is of the form
1708  *
1709  *	{ coefficients[[cst, params] -> S] }
1710  *
1711  * Return the position of S.
1712  */
coef_var_offset(__isl_keep isl_basic_set * coef)1713 static isl_size coef_var_offset(__isl_keep isl_basic_set *coef)
1714 {
1715 	isl_size offset;
1716 	isl_space *space;
1717 
1718 	space = isl_space_unwrap(isl_basic_set_get_space(coef));
1719 	offset = isl_space_dim(space, isl_dim_in);
1720 	isl_space_free(space);
1721 
1722 	return offset;
1723 }
1724 
1725 /* Return the offset of the coefficient of the constant term of "node"
1726  * within the (I)LP.
1727  *
1728  * Within each node, the coefficients have the following order:
1729  *	- positive and negative parts of c_i_x
1730  *	- c_i_n (if parametric)
1731  *	- c_i_0
1732  */
node_cst_coef_offset(struct isl_sched_node * node)1733 static int node_cst_coef_offset(struct isl_sched_node *node)
1734 {
1735 	return node->start + 2 * node->nvar + node->nparam;
1736 }
1737 
1738 /* Return the offset of the coefficients of the parameters of "node"
1739  * within the (I)LP.
1740  *
1741  * Within each node, the coefficients have the following order:
1742  *	- positive and negative parts of c_i_x
1743  *	- c_i_n (if parametric)
1744  *	- c_i_0
1745  */
node_par_coef_offset(struct isl_sched_node * node)1746 static int node_par_coef_offset(struct isl_sched_node *node)
1747 {
1748 	return node->start + 2 * node->nvar;
1749 }
1750 
1751 /* Return the offset of the coefficients of the variables of "node"
1752  * within the (I)LP.
1753  *
1754  * Within each node, the coefficients have the following order:
1755  *	- positive and negative parts of c_i_x
1756  *	- c_i_n (if parametric)
1757  *	- c_i_0
1758  */
node_var_coef_offset(struct isl_sched_node * node)1759 static int node_var_coef_offset(struct isl_sched_node *node)
1760 {
1761 	return node->start;
1762 }
1763 
1764 /* Return the position of the pair of variables encoding
1765  * coefficient "i" of "node".
1766  *
1767  * The order of these variable pairs is the opposite of
1768  * that of the coefficients, with 2 variables per coefficient.
1769  */
node_var_coef_pos(struct isl_sched_node * node,int i)1770 static int node_var_coef_pos(struct isl_sched_node *node, int i)
1771 {
1772 	return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
1773 }
1774 
1775 /* Construct an isl_dim_map for mapping constraints on coefficients
1776  * for "node" to the corresponding positions in graph->lp.
1777  * "offset" is the offset of the coefficients for the variables
1778  * in the input constraints.
1779  * "s" is the sign of the mapping.
1780  *
1781  * The input constraints are given in terms of the coefficients
1782  * (c_0, c_x) or (c_0, c_n, c_x).
1783  * The mapping produced by this function essentially plugs in
1784  * (0, c_i_x^+ - c_i_x^-) if s = 1 and
1785  * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
1786  * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
1787  * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
1788  * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1789  * Furthermore, the order of these pairs is the opposite of that
1790  * of the corresponding coefficients.
1791  *
1792  * The caller can extend the mapping to also map the other coefficients
1793  * (and therefore not plug in 0).
1794  */
intra_dim_map(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * node,int offset,int s)1795 static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
1796 	struct isl_sched_graph *graph, struct isl_sched_node *node,
1797 	int offset, int s)
1798 {
1799 	int pos;
1800 	isl_size total;
1801 	isl_dim_map *dim_map;
1802 
1803 	total = isl_basic_set_dim(graph->lp, isl_dim_all);
1804 	if (!node || total < 0)
1805 		return NULL;
1806 
1807 	pos = node_var_coef_pos(node, 0);
1808 	dim_map = isl_dim_map_alloc(ctx, total);
1809 	isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
1810 	isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
1811 
1812 	return dim_map;
1813 }
1814 
1815 /* Construct an isl_dim_map for mapping constraints on coefficients
1816  * for "src" (node i) and "dst" (node j) to the corresponding positions
1817  * in graph->lp.
1818  * "offset" is the offset of the coefficients for the variables of "src"
1819  * in the input constraints.
1820  * "s" is the sign of the mapping.
1821  *
1822  * The input constraints are given in terms of the coefficients
1823  * (c_0, c_n, c_x, c_y).
1824  * The mapping produced by this function essentially plugs in
1825  * (c_j_0 - c_i_0, c_j_n - c_i_n,
1826  *  -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
1827  * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
1828  *  c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
1829  * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1830  * Furthermore, the order of these pairs is the opposite of that
1831  * of the corresponding coefficients.
1832  *
1833  * The caller can further extend the mapping.
1834  */
inter_dim_map(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst,int offset,int s)1835 static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
1836 	struct isl_sched_graph *graph, struct isl_sched_node *src,
1837 	struct isl_sched_node *dst, int offset, int s)
1838 {
1839 	int pos;
1840 	isl_size total;
1841 	isl_dim_map *dim_map;
1842 
1843 	total = isl_basic_set_dim(graph->lp, isl_dim_all);
1844 	if (!src || !dst || total < 0)
1845 		return NULL;
1846 
1847 	dim_map = isl_dim_map_alloc(ctx, total);
1848 
1849 	pos = node_cst_coef_offset(dst);
1850 	isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
1851 	pos = node_par_coef_offset(dst);
1852 	isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
1853 	pos = node_var_coef_pos(dst, 0);
1854 	isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
1855 			  dst->nvar, -s);
1856 	isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
1857 			  dst->nvar, s);
1858 
1859 	pos = node_cst_coef_offset(src);
1860 	isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
1861 	pos = node_par_coef_offset(src);
1862 	isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
1863 	pos = node_var_coef_pos(src, 0);
1864 	isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
1865 	isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
1866 
1867 	return dim_map;
1868 }
1869 
1870 /* Add the constraints from "src" to "dst" using "dim_map",
1871  * after making sure there is enough room in "dst" for the extra constraints.
1872  */
add_constraints_dim_map(__isl_take isl_basic_set * dst,__isl_take isl_basic_set * src,__isl_take isl_dim_map * dim_map)1873 static __isl_give isl_basic_set *add_constraints_dim_map(
1874 	__isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
1875 	__isl_take isl_dim_map *dim_map)
1876 {
1877 	isl_size n_eq, n_ineq;
1878 
1879 	n_eq = isl_basic_set_n_equality(src);
1880 	n_ineq = isl_basic_set_n_inequality(src);
1881 	if (n_eq < 0 || n_ineq < 0)
1882 		dst = isl_basic_set_free(dst);
1883 	dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
1884 	dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
1885 	return dst;
1886 }
1887 
1888 /* Add constraints to graph->lp that force validity for the given
1889  * dependence from a node i to itself.
1890  * That is, add constraints that enforce
1891  *
1892  *	(c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
1893  *	= c_i_x (y - x) >= 0
1894  *
1895  * for each (x,y) in R.
1896  * We obtain general constraints on coefficients (c_0, c_x)
1897  * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
1898  * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
1899  * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
1900  * Note that the result of intra_coefficients may also contain
1901  * parameter coefficients c_n, in which case 0 is plugged in for them as well.
1902  */
add_intra_validity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge)1903 static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
1904 	struct isl_sched_edge *edge)
1905 {
1906 	isl_size offset;
1907 	isl_map *map = isl_map_copy(edge->map);
1908 	isl_ctx *ctx = isl_map_get_ctx(map);
1909 	isl_dim_map *dim_map;
1910 	isl_basic_set *coef;
1911 	struct isl_sched_node *node = edge->src;
1912 
1913 	coef = intra_coefficients(graph, node, map, 0);
1914 
1915 	offset = coef_var_offset(coef);
1916 	if (offset < 0)
1917 		coef = isl_basic_set_free(coef);
1918 	if (!coef)
1919 		return isl_stat_error;
1920 
1921 	dim_map = intra_dim_map(ctx, graph, node, offset, 1);
1922 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1923 
1924 	return isl_stat_ok;
1925 }
1926 
1927 /* Add constraints to graph->lp that force validity for the given
1928  * dependence from node i to node j.
1929  * That is, add constraints that enforce
1930  *
1931  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
1932  *
1933  * for each (x,y) in R.
1934  * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
1935  * of valid constraints for R and then plug in
1936  * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
1937  * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
1938  * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
1939  */
add_inter_validity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge)1940 static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
1941 	struct isl_sched_edge *edge)
1942 {
1943 	isl_size offset;
1944 	isl_map *map;
1945 	isl_ctx *ctx;
1946 	isl_dim_map *dim_map;
1947 	isl_basic_set *coef;
1948 	struct isl_sched_node *src = edge->src;
1949 	struct isl_sched_node *dst = edge->dst;
1950 
1951 	if (!graph->lp)
1952 		return isl_stat_error;
1953 
1954 	map = isl_map_copy(edge->map);
1955 	ctx = isl_map_get_ctx(map);
1956 	coef = inter_coefficients(graph, edge, map);
1957 
1958 	offset = coef_var_offset(coef);
1959 	if (offset < 0)
1960 		coef = isl_basic_set_free(coef);
1961 	if (!coef)
1962 		return isl_stat_error;
1963 
1964 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
1965 
1966 	edge->start = graph->lp->n_ineq;
1967 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
1968 	if (!graph->lp)
1969 		return isl_stat_error;
1970 	edge->end = graph->lp->n_ineq;
1971 
1972 	return isl_stat_ok;
1973 }
1974 
1975 /* Add constraints to graph->lp that bound the dependence distance for the given
1976  * dependence from a node i to itself.
1977  * If s = 1, we add the constraint
1978  *
1979  *	c_i_x (y - x) <= m_0 + m_n n
1980  *
1981  * or
1982  *
1983  *	-c_i_x (y - x) + m_0 + m_n n >= 0
1984  *
1985  * for each (x,y) in R.
1986  * If s = -1, we add the constraint
1987  *
1988  *	-c_i_x (y - x) <= m_0 + m_n n
1989  *
1990  * or
1991  *
1992  *	c_i_x (y - x) + m_0 + m_n n >= 0
1993  *
1994  * for each (x,y) in R.
1995  * We obtain general constraints on coefficients (c_0, c_n, c_x)
1996  * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
1997  * with each coefficient (except m_0) represented as a pair of non-negative
1998  * coefficients.
1999  *
2000  *
2001  * If "local" is set, then we add constraints
2002  *
2003  *	c_i_x (y - x) <= 0
2004  *
2005  * or
2006  *
2007  *	-c_i_x (y - x) <= 0
2008  *
2009  * instead, forcing the dependence distance to be (less than or) equal to 0.
2010  * That is, we plug in (0, 0, -s * c_i_x),
2011  * intra_coefficients is not required to have c_n in its result when
2012  * "local" is set.  If they are missing, then (0, -s * c_i_x) is plugged in.
2013  * Note that dependences marked local are treated as validity constraints
2014  * by add_all_validity_constraints and therefore also have
2015  * their distances bounded by 0 from below.
2016  */
add_intra_proximity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,int s,int local)2017 static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
2018 	struct isl_sched_edge *edge, int s, int local)
2019 {
2020 	isl_size offset;
2021 	isl_size nparam;
2022 	isl_map *map = isl_map_copy(edge->map);
2023 	isl_ctx *ctx = isl_map_get_ctx(map);
2024 	isl_dim_map *dim_map;
2025 	isl_basic_set *coef;
2026 	struct isl_sched_node *node = edge->src;
2027 
2028 	coef = intra_coefficients(graph, node, map, !local);
2029 	nparam = isl_space_dim(node->space, isl_dim_param);
2030 
2031 	offset = coef_var_offset(coef);
2032 	if (nparam < 0 || offset < 0)
2033 		coef = isl_basic_set_free(coef);
2034 	if (!coef)
2035 		return isl_stat_error;
2036 
2037 	dim_map = intra_dim_map(ctx, graph, node, offset, -s);
2038 
2039 	if (!local) {
2040 		isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2041 		isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2042 		isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2043 	}
2044 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2045 
2046 	return isl_stat_ok;
2047 }
2048 
2049 /* Add constraints to graph->lp that bound the dependence distance for the given
2050  * dependence from node i to node j.
2051  * If s = 1, we add the constraint
2052  *
2053  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2054  *		<= m_0 + m_n n
2055  *
2056  * or
2057  *
2058  *	-(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2059  *		m_0 + m_n n >= 0
2060  *
2061  * for each (x,y) in R.
2062  * If s = -1, we add the constraint
2063  *
2064  *	-((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2065  *		<= m_0 + m_n n
2066  *
2067  * or
2068  *
2069  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2070  *		m_0 + m_n n >= 0
2071  *
2072  * for each (x,y) in R.
2073  * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2074  * of valid constraints for R and then plug in
2075  * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2076  *  s*c_i_x, -s*c_j_x)
2077  * with each coefficient (except m_0, c_*_0 and c_*_n)
2078  * represented as a pair of non-negative coefficients.
2079  *
2080  *
2081  * If "local" is set (and s = 1), then we add constraints
2082  *
2083  *	(c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2084  *
2085  * or
2086  *
2087  *	-((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2088  *
2089  * instead, forcing the dependence distance to be (less than or) equal to 0.
2090  * That is, we plug in
2091  * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2092  * Note that dependences marked local are treated as validity constraints
2093  * by add_all_validity_constraints and therefore also have
2094  * their distances bounded by 0 from below.
2095  */
add_inter_proximity_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,int s,int local)2096 static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2097 	struct isl_sched_edge *edge, int s, int local)
2098 {
2099 	isl_size offset;
2100 	isl_size nparam;
2101 	isl_map *map = isl_map_copy(edge->map);
2102 	isl_ctx *ctx = isl_map_get_ctx(map);
2103 	isl_dim_map *dim_map;
2104 	isl_basic_set *coef;
2105 	struct isl_sched_node *src = edge->src;
2106 	struct isl_sched_node *dst = edge->dst;
2107 
2108 	coef = inter_coefficients(graph, edge, map);
2109 	nparam = isl_space_dim(src->space, isl_dim_param);
2110 
2111 	offset = coef_var_offset(coef);
2112 	if (nparam < 0 || offset < 0)
2113 		coef = isl_basic_set_free(coef);
2114 	if (!coef)
2115 		return isl_stat_error;
2116 
2117 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2118 
2119 	if (!local) {
2120 		isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2121 		isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2122 		isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2123 	}
2124 
2125 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2126 
2127 	return isl_stat_ok;
2128 }
2129 
2130 /* Should the distance over "edge" be forced to zero?
2131  * That is, is it marked as a local edge?
2132  * If "use_coincidence" is set, then coincidence edges are treated
2133  * as local edges.
2134  */
force_zero(struct isl_sched_edge * edge,int use_coincidence)2135 static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
2136 {
2137 	return is_local(edge) || (use_coincidence && is_coincidence(edge));
2138 }
2139 
2140 /* Add all validity constraints to graph->lp.
2141  *
2142  * An edge that is forced to be local needs to have its dependence
2143  * distances equal to zero.  We take care of bounding them by 0 from below
2144  * here.  add_all_proximity_constraints takes care of bounding them by 0
2145  * from above.
2146  *
2147  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2148  * Otherwise, we ignore them.
2149  */
add_all_validity_constraints(struct isl_sched_graph * graph,int use_coincidence)2150 static int add_all_validity_constraints(struct isl_sched_graph *graph,
2151 	int use_coincidence)
2152 {
2153 	int i;
2154 
2155 	for (i = 0; i < graph->n_edge; ++i) {
2156 		struct isl_sched_edge *edge = &graph->edge[i];
2157 		int zero;
2158 
2159 		zero = force_zero(edge, use_coincidence);
2160 		if (!is_validity(edge) && !zero)
2161 			continue;
2162 		if (edge->src != edge->dst)
2163 			continue;
2164 		if (add_intra_validity_constraints(graph, edge) < 0)
2165 			return -1;
2166 	}
2167 
2168 	for (i = 0; i < graph->n_edge; ++i) {
2169 		struct isl_sched_edge *edge = &graph->edge[i];
2170 		int zero;
2171 
2172 		zero = force_zero(edge, use_coincidence);
2173 		if (!is_validity(edge) && !zero)
2174 			continue;
2175 		if (edge->src == edge->dst)
2176 			continue;
2177 		if (add_inter_validity_constraints(graph, edge) < 0)
2178 			return -1;
2179 	}
2180 
2181 	return 0;
2182 }
2183 
2184 /* Add constraints to graph->lp that bound the dependence distance
2185  * for all dependence relations.
2186  * If a given proximity dependence is identical to a validity
2187  * dependence, then the dependence distance is already bounded
2188  * from below (by zero), so we only need to bound the distance
2189  * from above.  (This includes the case of "local" dependences
2190  * which are treated as validity dependence by add_all_validity_constraints.)
2191  * Otherwise, we need to bound the distance both from above and from below.
2192  *
2193  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2194  * Otherwise, we ignore them.
2195  */
add_all_proximity_constraints(struct isl_sched_graph * graph,int use_coincidence)2196 static int add_all_proximity_constraints(struct isl_sched_graph *graph,
2197 	int use_coincidence)
2198 {
2199 	int i;
2200 
2201 	for (i = 0; i < graph->n_edge; ++i) {
2202 		struct isl_sched_edge *edge = &graph->edge[i];
2203 		int zero;
2204 
2205 		zero = force_zero(edge, use_coincidence);
2206 		if (!isl_sched_edge_is_proximity(edge) && !zero)
2207 			continue;
2208 		if (edge->src == edge->dst &&
2209 		    add_intra_proximity_constraints(graph, edge, 1, zero) < 0)
2210 			return -1;
2211 		if (edge->src != edge->dst &&
2212 		    add_inter_proximity_constraints(graph, edge, 1, zero) < 0)
2213 			return -1;
2214 		if (is_validity(edge) || zero)
2215 			continue;
2216 		if (edge->src == edge->dst &&
2217 		    add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
2218 			return -1;
2219 		if (edge->src != edge->dst &&
2220 		    add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
2221 			return -1;
2222 	}
2223 
2224 	return 0;
2225 }
2226 
2227 /* Normalize the rows of "indep" such that all rows are lexicographically
2228  * positive and such that each row contains as many final zeros as possible,
2229  * given the choice for the previous rows.
2230  * Do this by performing elementary row operations.
2231  */
normalize_independent(__isl_take isl_mat * indep)2232 static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
2233 {
2234 	indep = isl_mat_reverse_gauss(indep);
2235 	indep = isl_mat_lexnonneg_rows(indep);
2236 	return indep;
2237 }
2238 
2239 /* Extract the linear part of the current schedule for node "node".
2240  */
extract_linear_schedule(struct isl_sched_node * node)2241 static __isl_give isl_mat *extract_linear_schedule(struct isl_sched_node *node)
2242 {
2243 	isl_size n_row = isl_mat_rows(node->sched);
2244 
2245 	if (n_row < 0)
2246 		return NULL;
2247 	return isl_mat_sub_alloc(node->sched, 0, n_row,
2248 			      1 + node->nparam, node->nvar);
2249 }
2250 
2251 /* Compute a basis for the rows in the linear part of the schedule
2252  * and extend this basis to a full basis.  The remaining rows
2253  * can then be used to force linear independence from the rows
2254  * in the schedule.
2255  *
2256  * In particular, given the schedule rows S, we compute
2257  *
2258  *	S   = H Q
2259  *	S U = H
2260  *
2261  * with H the Hermite normal form of S.  That is, all but the
2262  * first rank columns of H are zero and so each row in S is
2263  * a linear combination of the first rank rows of Q.
2264  * The matrix Q can be used as a variable transformation
2265  * that isolates the directions of S in the first rank rows.
2266  * Transposing S U = H yields
2267  *
2268  *	U^T S^T = H^T
2269  *
2270  * with all but the first rank rows of H^T zero.
2271  * The last rows of U^T are therefore linear combinations
2272  * of schedule coefficients that are all zero on schedule
2273  * coefficients that are linearly dependent on the rows of S.
2274  * At least one of these combinations is non-zero on
2275  * linearly independent schedule coefficients.
2276  * The rows are normalized to involve as few of the last
2277  * coefficients as possible and to have a positive initial value.
2278  */
isl_sched_node_update_vmap(struct isl_sched_node * node)2279 isl_stat isl_sched_node_update_vmap(struct isl_sched_node *node)
2280 {
2281 	isl_mat *H, *U, *Q;
2282 
2283 	H = extract_linear_schedule(node);
2284 
2285 	H = isl_mat_left_hermite(H, 0, &U, &Q);
2286 	isl_mat_free(node->indep);
2287 	isl_mat_free(node->vmap);
2288 	node->vmap = Q;
2289 	node->indep = isl_mat_transpose(U);
2290 	node->rank = isl_mat_initial_non_zero_cols(H);
2291 	node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
2292 	node->indep = normalize_independent(node->indep);
2293 	isl_mat_free(H);
2294 
2295 	if (!node->indep || !node->vmap || node->rank < 0)
2296 		return isl_stat_error;
2297 	return isl_stat_ok;
2298 }
2299 
2300 /* Is "edge" marked as a validity or a conditional validity edge?
2301  */
is_any_validity(struct isl_sched_edge * edge)2302 static int is_any_validity(struct isl_sched_edge *edge)
2303 {
2304 	return is_validity(edge) ||
2305 		isl_sched_edge_is_conditional_validity(edge);
2306 }
2307 
2308 /* How many times should we count the constraints in "edge"?
2309  *
2310  * We count as follows
2311  * validity		-> 1 (>= 0)
2312  * validity+proximity	-> 2 (>= 0 and upper bound)
2313  * proximity		-> 2 (lower and upper bound)
2314  * local(+any)		-> 2 (>= 0 and <= 0)
2315  *
2316  * If an edge is only marked conditional_validity then it counts
2317  * as zero since it is only checked afterwards.
2318  *
2319  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2320  * Otherwise, we ignore them.
2321  */
edge_multiplicity(struct isl_sched_edge * edge,int use_coincidence)2322 static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
2323 {
2324 	if (isl_sched_edge_is_proximity(edge) ||
2325 	    force_zero(edge, use_coincidence))
2326 		return 2;
2327 	if (is_validity(edge))
2328 		return 1;
2329 	return 0;
2330 }
2331 
2332 /* How many times should the constraints in "edge" be counted
2333  * as a parametric intra-node constraint?
2334  *
2335  * Only proximity edges that are not forced zero need
2336  * coefficient constraints that include coefficients for parameters.
2337  * If the edge is also a validity edge, then only
2338  * an upper bound is introduced.  Otherwise, both lower and upper bounds
2339  * are introduced.
2340  */
parametric_intra_edge_multiplicity(struct isl_sched_edge * edge,int use_coincidence)2341 static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
2342 	int use_coincidence)
2343 {
2344 	if (edge->src != edge->dst)
2345 		return 0;
2346 	if (!isl_sched_edge_is_proximity(edge))
2347 		return 0;
2348 	if (force_zero(edge, use_coincidence))
2349 		return 0;
2350 	if (is_validity(edge))
2351 		return 1;
2352 	else
2353 		return 2;
2354 }
2355 
2356 /* Add "f" times the number of equality and inequality constraints of "bset"
2357  * to "n_eq" and "n_ineq" and free "bset".
2358  */
update_count(__isl_take isl_basic_set * bset,int f,int * n_eq,int * n_ineq)2359 static isl_stat update_count(__isl_take isl_basic_set *bset,
2360 	int f, int *n_eq, int *n_ineq)
2361 {
2362 	isl_size eq, ineq;
2363 
2364 	eq = isl_basic_set_n_equality(bset);
2365 	ineq = isl_basic_set_n_inequality(bset);
2366 	isl_basic_set_free(bset);
2367 
2368 	if (eq < 0 || ineq < 0)
2369 		return isl_stat_error;
2370 
2371 	*n_eq += eq;
2372 	*n_ineq += ineq;
2373 
2374 	return isl_stat_ok;
2375 }
2376 
2377 /* Count the number of equality and inequality constraints
2378  * that will be added for the given map.
2379  *
2380  * The edges that require parameter coefficients are counted separately.
2381  *
2382  * "use_coincidence" is set if we should take into account coincidence edges.
2383  */
count_map_constraints(struct isl_sched_graph * graph,struct isl_sched_edge * edge,__isl_take isl_map * map,int * n_eq,int * n_ineq,int use_coincidence)2384 static isl_stat count_map_constraints(struct isl_sched_graph *graph,
2385 	struct isl_sched_edge *edge, __isl_take isl_map *map,
2386 	int *n_eq, int *n_ineq, int use_coincidence)
2387 {
2388 	isl_map *copy;
2389 	isl_basic_set *coef;
2390 	int f = edge_multiplicity(edge, use_coincidence);
2391 	int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
2392 
2393 	if (f == 0) {
2394 		isl_map_free(map);
2395 		return isl_stat_ok;
2396 	}
2397 
2398 	if (edge->src != edge->dst) {
2399 		coef = inter_coefficients(graph, edge, map);
2400 		return update_count(coef, f, n_eq, n_ineq);
2401 	}
2402 
2403 	if (fp > 0) {
2404 		copy = isl_map_copy(map);
2405 		coef = intra_coefficients(graph, edge->src, copy, 1);
2406 		if (update_count(coef, fp, n_eq, n_ineq) < 0)
2407 			goto error;
2408 	}
2409 
2410 	if (f > fp) {
2411 		copy = isl_map_copy(map);
2412 		coef = intra_coefficients(graph, edge->src, copy, 0);
2413 		if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
2414 			goto error;
2415 	}
2416 
2417 	isl_map_free(map);
2418 	return isl_stat_ok;
2419 error:
2420 	isl_map_free(map);
2421 	return isl_stat_error;
2422 }
2423 
2424 /* Count the number of equality and inequality constraints
2425  * that will be added to the main lp problem.
2426  * We count as follows
2427  * validity		-> 1 (>= 0)
2428  * validity+proximity	-> 2 (>= 0 and upper bound)
2429  * proximity		-> 2 (lower and upper bound)
2430  * local(+any)		-> 2 (>= 0 and <= 0)
2431  *
2432  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2433  * Otherwise, we ignore them.
2434  */
count_constraints(struct isl_sched_graph * graph,int * n_eq,int * n_ineq,int use_coincidence)2435 static int count_constraints(struct isl_sched_graph *graph,
2436 	int *n_eq, int *n_ineq, int use_coincidence)
2437 {
2438 	int i;
2439 
2440 	*n_eq = *n_ineq = 0;
2441 	for (i = 0; i < graph->n_edge; ++i) {
2442 		struct isl_sched_edge *edge = &graph->edge[i];
2443 		isl_map *map = isl_map_copy(edge->map);
2444 
2445 		if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
2446 					    use_coincidence) < 0)
2447 			return -1;
2448 	}
2449 
2450 	return 0;
2451 }
2452 
2453 /* Count the number of constraints that will be added by
2454  * add_bound_constant_constraints to bound the values of the constant terms
2455  * and increment *n_eq and *n_ineq accordingly.
2456  *
2457  * In practice, add_bound_constant_constraints only adds inequalities.
2458  */
count_bound_constant_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,int * n_eq,int * n_ineq)2459 static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
2460 	struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2461 {
2462 	if (isl_options_get_schedule_max_constant_term(ctx) == -1)
2463 		return isl_stat_ok;
2464 
2465 	*n_ineq += graph->n;
2466 
2467 	return isl_stat_ok;
2468 }
2469 
2470 /* Add constraints to bound the values of the constant terms in the schedule,
2471  * if requested by the user.
2472  *
2473  * The maximal value of the constant terms is defined by the option
2474  * "schedule_max_constant_term".
2475  */
add_bound_constant_constraints(isl_ctx * ctx,struct isl_sched_graph * graph)2476 static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
2477 	struct isl_sched_graph *graph)
2478 {
2479 	int i, k;
2480 	int max;
2481 	isl_size total;
2482 
2483 	max = isl_options_get_schedule_max_constant_term(ctx);
2484 	if (max == -1)
2485 		return isl_stat_ok;
2486 
2487 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2488 	if (total < 0)
2489 		return isl_stat_error;
2490 
2491 	for (i = 0; i < graph->n; ++i) {
2492 		struct isl_sched_node *node = &graph->node[i];
2493 		int pos;
2494 
2495 		k = isl_basic_set_alloc_inequality(graph->lp);
2496 		if (k < 0)
2497 			return isl_stat_error;
2498 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
2499 		pos = node_cst_coef_offset(node);
2500 		isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
2501 		isl_int_set_si(graph->lp->ineq[k][0], max);
2502 	}
2503 
2504 	return isl_stat_ok;
2505 }
2506 
2507 /* Count the number of constraints that will be added by
2508  * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
2509  * accordingly.
2510  *
2511  * In practice, add_bound_coefficient_constraints only adds inequalities.
2512  */
count_bound_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,int * n_eq,int * n_ineq)2513 static int count_bound_coefficient_constraints(isl_ctx *ctx,
2514 	struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
2515 {
2516 	int i;
2517 
2518 	if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
2519 	    !isl_options_get_schedule_treat_coalescing(ctx))
2520 		return 0;
2521 
2522 	for (i = 0; i < graph->n; ++i)
2523 		*n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
2524 
2525 	return 0;
2526 }
2527 
2528 /* Add constraints to graph->lp that bound the values of
2529  * the parameter schedule coefficients of "node" to "max" and
2530  * the variable schedule coefficients to the corresponding entry
2531  * in node->max.
2532  * In either case, a negative value means that no bound needs to be imposed.
2533  *
2534  * For parameter coefficients, this amounts to adding a constraint
2535  *
2536  *	c_n <= max
2537  *
2538  * i.e.,
2539  *
2540  *	-c_n + max >= 0
2541  *
2542  * The variables coefficients are, however, not represented directly.
2543  * Instead, the variable coefficients c_x are written as differences
2544  * c_x = c_x^+ - c_x^-.
2545  * That is,
2546  *
2547  *	-max_i <= c_x_i <= max_i
2548  *
2549  * is encoded as
2550  *
2551  *	-max_i <= c_x_i^+ - c_x_i^- <= max_i
2552  *
2553  * or
2554  *
2555  *	-(c_x_i^+ - c_x_i^-) + max_i >= 0
2556  *	c_x_i^+ - c_x_i^- + max_i >= 0
2557  */
node_add_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_node * node,int max)2558 static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
2559 	struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
2560 {
2561 	int i, j, k;
2562 	isl_size total;
2563 	isl_vec *ineq;
2564 
2565 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2566 	if (total < 0)
2567 		return isl_stat_error;
2568 
2569 	for (j = 0; j < node->nparam; ++j) {
2570 		int dim;
2571 
2572 		if (max < 0)
2573 			continue;
2574 
2575 		k = isl_basic_set_alloc_inequality(graph->lp);
2576 		if (k < 0)
2577 			return isl_stat_error;
2578 		dim = 1 + node_par_coef_offset(node) + j;
2579 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
2580 		isl_int_set_si(graph->lp->ineq[k][dim], -1);
2581 		isl_int_set_si(graph->lp->ineq[k][0], max);
2582 	}
2583 
2584 	ineq = isl_vec_alloc(ctx, 1 + total);
2585 	ineq = isl_vec_clr(ineq);
2586 	if (!ineq)
2587 		return isl_stat_error;
2588 	for (i = 0; i < node->nvar; ++i) {
2589 		int pos = 1 + node_var_coef_pos(node, i);
2590 
2591 		if (isl_int_is_neg(node->max->el[i]))
2592 			continue;
2593 
2594 		isl_int_set_si(ineq->el[pos], 1);
2595 		isl_int_set_si(ineq->el[pos + 1], -1);
2596 		isl_int_set(ineq->el[0], node->max->el[i]);
2597 
2598 		k = isl_basic_set_alloc_inequality(graph->lp);
2599 		if (k < 0)
2600 			goto error;
2601 		isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2602 
2603 		isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
2604 		k = isl_basic_set_alloc_inequality(graph->lp);
2605 		if (k < 0)
2606 			goto error;
2607 		isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
2608 
2609 		isl_seq_clr(ineq->el + pos, 2);
2610 	}
2611 	isl_vec_free(ineq);
2612 
2613 	return isl_stat_ok;
2614 error:
2615 	isl_vec_free(ineq);
2616 	return isl_stat_error;
2617 }
2618 
2619 /* Add constraints that bound the values of the variable and parameter
2620  * coefficients of the schedule.
2621  *
2622  * The maximal value of the coefficients is defined by the option
2623  * 'schedule_max_coefficient' and the entries in node->max.
2624  * These latter entries are only set if either the schedule_max_coefficient
2625  * option or the schedule_treat_coalescing option is set.
2626  */
add_bound_coefficient_constraints(isl_ctx * ctx,struct isl_sched_graph * graph)2627 static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
2628 	struct isl_sched_graph *graph)
2629 {
2630 	int i;
2631 	int max;
2632 
2633 	max = isl_options_get_schedule_max_coefficient(ctx);
2634 
2635 	if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx))
2636 		return isl_stat_ok;
2637 
2638 	for (i = 0; i < graph->n; ++i) {
2639 		struct isl_sched_node *node = &graph->node[i];
2640 
2641 		if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
2642 			return isl_stat_error;
2643 	}
2644 
2645 	return isl_stat_ok;
2646 }
2647 
2648 /* Add a constraint to graph->lp that equates the value at position
2649  * "sum_pos" to the sum of the "n" values starting at "first".
2650  */
add_sum_constraint(struct isl_sched_graph * graph,int sum_pos,int first,int n)2651 static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
2652 	int sum_pos, int first, int n)
2653 {
2654 	int i, k;
2655 	isl_size total;
2656 
2657 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2658 	if (total < 0)
2659 		return isl_stat_error;
2660 
2661 	k = isl_basic_set_alloc_equality(graph->lp);
2662 	if (k < 0)
2663 		return isl_stat_error;
2664 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2665 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2666 	for (i = 0; i < n; ++i)
2667 		isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
2668 
2669 	return isl_stat_ok;
2670 }
2671 
2672 /* Add a constraint to graph->lp that equates the value at position
2673  * "sum_pos" to the sum of the parameter coefficients of all nodes.
2674  */
add_param_sum_constraint(struct isl_sched_graph * graph,int sum_pos)2675 static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
2676 	int sum_pos)
2677 {
2678 	int i, j, k;
2679 	isl_size total;
2680 
2681 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2682 	if (total < 0)
2683 		return isl_stat_error;
2684 
2685 	k = isl_basic_set_alloc_equality(graph->lp);
2686 	if (k < 0)
2687 		return isl_stat_error;
2688 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2689 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2690 	for (i = 0; i < graph->n; ++i) {
2691 		int pos = 1 + node_par_coef_offset(&graph->node[i]);
2692 
2693 		for (j = 0; j < graph->node[i].nparam; ++j)
2694 			isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2695 	}
2696 
2697 	return isl_stat_ok;
2698 }
2699 
2700 /* Add a constraint to graph->lp that equates the value at position
2701  * "sum_pos" to the sum of the variable coefficients of all nodes.
2702  */
add_var_sum_constraint(struct isl_sched_graph * graph,int sum_pos)2703 static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
2704 	int sum_pos)
2705 {
2706 	int i, j, k;
2707 	isl_size total;
2708 
2709 	total = isl_basic_set_dim(graph->lp, isl_dim_set);
2710 	if (total < 0)
2711 		return isl_stat_error;
2712 
2713 	k = isl_basic_set_alloc_equality(graph->lp);
2714 	if (k < 0)
2715 		return isl_stat_error;
2716 	isl_seq_clr(graph->lp->eq[k], 1 + total);
2717 	isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
2718 	for (i = 0; i < graph->n; ++i) {
2719 		struct isl_sched_node *node = &graph->node[i];
2720 		int pos = 1 + node_var_coef_offset(node);
2721 
2722 		for (j = 0; j < 2 * node->nvar; ++j)
2723 			isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2724 	}
2725 
2726 	return isl_stat_ok;
2727 }
2728 
2729 /* Construct an ILP problem for finding schedule coefficients
2730  * that result in non-negative, but small dependence distances
2731  * over all dependences.
2732  * In particular, the dependence distances over proximity edges
2733  * are bounded by m_0 + m_n n and we compute schedule coefficients
2734  * with small values (preferably zero) of m_n and m_0.
2735  *
2736  * All variables of the ILP are non-negative.  The actual coefficients
2737  * may be negative, so each coefficient is represented as the difference
2738  * of two non-negative variables.  The negative part always appears
2739  * immediately before the positive part.
2740  * Other than that, the variables have the following order
2741  *
2742  *	- sum of positive and negative parts of m_n coefficients
2743  *	- m_0
2744  *	- sum of all c_n coefficients
2745  *		(unconstrained when computing non-parametric schedules)
2746  *	- sum of positive and negative parts of all c_x coefficients
2747  *	- positive and negative parts of m_n coefficients
2748  *	- for each node
2749  *		- positive and negative parts of c_i_x, in opposite order
2750  *		- c_i_n (if parametric)
2751  *		- c_i_0
2752  *
2753  * The constraints are those from the edges plus two or three equalities
2754  * to express the sums.
2755  *
2756  * If "use_coincidence" is set, then we treat coincidence edges as local edges.
2757  * Otherwise, we ignore them.
2758  */
setup_lp(isl_ctx * ctx,struct isl_sched_graph * graph,int use_coincidence)2759 static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
2760 	int use_coincidence)
2761 {
2762 	int i;
2763 	isl_size nparam;
2764 	unsigned total;
2765 	isl_space *space;
2766 	int parametric;
2767 	int param_pos;
2768 	int n_eq, n_ineq;
2769 
2770 	parametric = ctx->opt->schedule_parametric;
2771 	nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
2772 	if (nparam < 0)
2773 		return isl_stat_error;
2774 	param_pos = 4;
2775 	total = param_pos + 2 * nparam;
2776 	for (i = 0; i < graph->n; ++i) {
2777 		struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2778 		if (isl_sched_node_update_vmap(node) < 0)
2779 			return isl_stat_error;
2780 		node->start = total;
2781 		total += 1 + node->nparam + 2 * node->nvar;
2782 	}
2783 
2784 	if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
2785 		return isl_stat_error;
2786 	if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2787 		return isl_stat_error;
2788 	if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
2789 		return isl_stat_error;
2790 
2791 	space = isl_space_set_alloc(ctx, 0, total);
2792 	isl_basic_set_free(graph->lp);
2793 	n_eq += 2 + parametric;
2794 
2795 	graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
2796 
2797 	if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
2798 		return isl_stat_error;
2799 	if (parametric && add_param_sum_constraint(graph, 2) < 0)
2800 		return isl_stat_error;
2801 	if (add_var_sum_constraint(graph, 3) < 0)
2802 		return isl_stat_error;
2803 	if (add_bound_constant_constraints(ctx, graph) < 0)
2804 		return isl_stat_error;
2805 	if (add_bound_coefficient_constraints(ctx, graph) < 0)
2806 		return isl_stat_error;
2807 	if (add_all_validity_constraints(graph, use_coincidence) < 0)
2808 		return isl_stat_error;
2809 	if (add_all_proximity_constraints(graph, use_coincidence) < 0)
2810 		return isl_stat_error;
2811 
2812 	return isl_stat_ok;
2813 }
2814 
2815 /* Analyze the conflicting constraint found by
2816  * isl_tab_basic_set_non_trivial_lexmin.  If it corresponds to the validity
2817  * constraint of one of the edges between distinct nodes, living, moreover
2818  * in distinct SCCs, then record the source and sink SCC as this may
2819  * be a good place to cut between SCCs.
2820  */
check_conflict(int con,void * user)2821 static int check_conflict(int con, void *user)
2822 {
2823 	int i;
2824 	struct isl_sched_graph *graph = user;
2825 
2826 	if (graph->src_scc >= 0)
2827 		return 0;
2828 
2829 	con -= graph->lp->n_eq;
2830 
2831 	if (con >= graph->lp->n_ineq)
2832 		return 0;
2833 
2834 	for (i = 0; i < graph->n_edge; ++i) {
2835 		if (!is_validity(&graph->edge[i]))
2836 			continue;
2837 		if (graph->edge[i].src == graph->edge[i].dst)
2838 			continue;
2839 		if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
2840 			continue;
2841 		if (graph->edge[i].start > con)
2842 			continue;
2843 		if (graph->edge[i].end <= con)
2844 			continue;
2845 		graph->src_scc = graph->edge[i].src->scc;
2846 		graph->dst_scc = graph->edge[i].dst->scc;
2847 	}
2848 
2849 	return 0;
2850 }
2851 
2852 /* Check whether the next schedule row of the given node needs to be
2853  * non-trivial.  Lower-dimensional domains may have some trivial rows,
2854  * but as soon as the number of remaining required non-trivial rows
2855  * is as large as the number or remaining rows to be computed,
2856  * all remaining rows need to be non-trivial.
2857  */
needs_row(struct isl_sched_graph * graph,struct isl_sched_node * node)2858 static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
2859 {
2860 	return node->nvar - node->rank >= graph->maxvar - graph->n_row;
2861 }
2862 
2863 /* Construct a non-triviality region with triviality directions
2864  * corresponding to the rows of "indep".
2865  * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
2866  * while the triviality directions are expressed in terms of
2867  * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
2868  * before c^+_i.  Furthermore,
2869  * the pairs of non-negative variables representing the coefficients
2870  * are stored in the opposite order.
2871  */
construct_trivial(__isl_keep isl_mat * indep)2872 static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
2873 {
2874 	isl_ctx *ctx;
2875 	isl_mat *mat;
2876 	int i, j;
2877 	isl_size n, n_var;
2878 
2879 	n = isl_mat_rows(indep);
2880 	n_var = isl_mat_cols(indep);
2881 	if (n < 0 || n_var < 0)
2882 		return NULL;
2883 
2884 	ctx = isl_mat_get_ctx(indep);
2885 	mat = isl_mat_alloc(ctx, n, 2 * n_var);
2886 	if (!mat)
2887 		return NULL;
2888 	for (i = 0; i < n; ++i) {
2889 		for (j = 0; j < n_var; ++j) {
2890 			int nj = n_var - 1 - j;
2891 			isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
2892 			isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
2893 		}
2894 	}
2895 
2896 	return mat;
2897 }
2898 
2899 /* Solve the ILP problem constructed in setup_lp.
2900  * For each node such that all the remaining rows of its schedule
2901  * need to be non-trivial, we construct a non-triviality region.
2902  * This region imposes that the next row is independent of previous rows.
2903  * In particular, the non-triviality region enforces that at least
2904  * one of the linear combinations in the rows of node->indep is non-zero.
2905  */
solve_lp(isl_ctx * ctx,struct isl_sched_graph * graph)2906 static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2907 {
2908 	int i;
2909 	isl_vec *sol;
2910 	isl_basic_set *lp;
2911 
2912 	for (i = 0; i < graph->n; ++i) {
2913 		struct isl_sched_node *node = &graph->node[i];
2914 		isl_mat *trivial;
2915 
2916 		graph->region[i].pos = node_var_coef_offset(node);
2917 		if (needs_row(graph, node))
2918 			trivial = construct_trivial(node->indep);
2919 		else
2920 			trivial = isl_mat_zero(ctx, 0, 0);
2921 		graph->region[i].trivial = trivial;
2922 	}
2923 	lp = isl_basic_set_copy(graph->lp);
2924 	sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
2925 				       graph->region, &check_conflict, graph);
2926 	for (i = 0; i < graph->n; ++i)
2927 		isl_mat_free(graph->region[i].trivial);
2928 	return sol;
2929 }
2930 
2931 /* Extract the coefficients for the variables of "node" from "sol".
2932  *
2933  * Each schedule coefficient c_i_x is represented as the difference
2934  * between two non-negative variables c_i_x^+ - c_i_x^-.
2935  * The c_i_x^- appear before their c_i_x^+ counterpart.
2936  * Furthermore, the order of these pairs is the opposite of that
2937  * of the corresponding coefficients.
2938  *
2939  * Return c_i_x = c_i_x^+ - c_i_x^-
2940  */
extract_var_coef(struct isl_sched_node * node,__isl_keep isl_vec * sol)2941 static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
2942 	__isl_keep isl_vec *sol)
2943 {
2944 	int i;
2945 	int pos;
2946 	isl_vec *csol;
2947 
2948 	if (!sol)
2949 		return NULL;
2950 	csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
2951 	if (!csol)
2952 		return NULL;
2953 
2954 	pos = 1 + node_var_coef_offset(node);
2955 	for (i = 0; i < node->nvar; ++i)
2956 		isl_int_sub(csol->el[node->nvar - 1 - i],
2957 			    sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
2958 
2959 	return csol;
2960 }
2961 
2962 /* Update the schedules of all nodes based on the given solution
2963  * of the LP problem.
2964  * The new row is added to the current band.
2965  * All possibly negative coefficients are encoded as a difference
2966  * of two non-negative variables, so we need to perform the subtraction
2967  * here.
2968  *
2969  * If coincident is set, then the caller guarantees that the new
2970  * row satisfies the coincidence constraints.
2971  */
update_schedule(struct isl_sched_graph * graph,__isl_take isl_vec * sol,int coincident)2972 static int update_schedule(struct isl_sched_graph *graph,
2973 	__isl_take isl_vec *sol, int coincident)
2974 {
2975 	int i, j;
2976 	isl_vec *csol = NULL;
2977 
2978 	if (!sol)
2979 		goto error;
2980 	if (sol->size == 0)
2981 		isl_die(sol->ctx, isl_error_internal,
2982 			"no solution found", goto error);
2983 	if (graph->n_total_row >= graph->max_row)
2984 		isl_die(sol->ctx, isl_error_internal,
2985 			"too many schedule rows", goto error);
2986 
2987 	for (i = 0; i < graph->n; ++i) {
2988 		struct isl_sched_node *node = &graph->node[i];
2989 		int pos;
2990 		isl_size row = isl_mat_rows(node->sched);
2991 
2992 		isl_vec_free(csol);
2993 		csol = extract_var_coef(node, sol);
2994 		if (row < 0 || !csol)
2995 			goto error;
2996 
2997 		isl_map_free(node->sched_map);
2998 		node->sched_map = NULL;
2999 		node->sched = isl_mat_add_rows(node->sched, 1);
3000 		if (!node->sched)
3001 			goto error;
3002 		pos = node_cst_coef_offset(node);
3003 		node->sched = isl_mat_set_element(node->sched,
3004 					row, 0, sol->el[1 + pos]);
3005 		pos = node_par_coef_offset(node);
3006 		for (j = 0; j < node->nparam; ++j)
3007 			node->sched = isl_mat_set_element(node->sched,
3008 					row, 1 + j, sol->el[1 + pos + j]);
3009 		for (j = 0; j < node->nvar; ++j)
3010 			node->sched = isl_mat_set_element(node->sched,
3011 					row, 1 + node->nparam + j, csol->el[j]);
3012 		node->coincident[graph->n_total_row] = coincident;
3013 	}
3014 	isl_vec_free(sol);
3015 	isl_vec_free(csol);
3016 
3017 	graph->n_row++;
3018 	graph->n_total_row++;
3019 
3020 	return 0;
3021 error:
3022 	isl_vec_free(sol);
3023 	isl_vec_free(csol);
3024 	return -1;
3025 }
3026 
3027 /* Convert row "row" of node->sched into an isl_aff living in "ls"
3028  * and return this isl_aff.
3029  */
extract_schedule_row(__isl_take isl_local_space * ls,struct isl_sched_node * node,int row)3030 static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
3031 	struct isl_sched_node *node, int row)
3032 {
3033 	int j;
3034 	isl_int v;
3035 	isl_aff *aff;
3036 
3037 	isl_int_init(v);
3038 
3039 	aff = isl_aff_zero_on_domain(ls);
3040 	if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
3041 		goto error;
3042 	aff = isl_aff_set_constant(aff, v);
3043 	for (j = 0; j < node->nparam; ++j) {
3044 		if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
3045 			goto error;
3046 		aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
3047 	}
3048 	for (j = 0; j < node->nvar; ++j) {
3049 		if (isl_mat_get_element(node->sched, row,
3050 					1 + node->nparam + j, &v) < 0)
3051 			goto error;
3052 		aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
3053 	}
3054 
3055 	isl_int_clear(v);
3056 
3057 	return aff;
3058 error:
3059 	isl_int_clear(v);
3060 	isl_aff_free(aff);
3061 	return NULL;
3062 }
3063 
3064 /* Convert the "n" rows starting at "first" of node->sched into a multi_aff
3065  * and return this multi_aff.
3066  *
3067  * The result is defined over the uncompressed node domain.
3068  */
isl_sched_node_extract_partial_schedule_multi_aff(struct isl_sched_node * node,int first,int n)3069 __isl_give isl_multi_aff *isl_sched_node_extract_partial_schedule_multi_aff(
3070 	struct isl_sched_node *node, int first, int n)
3071 {
3072 	int i;
3073 	isl_space *space;
3074 	isl_local_space *ls;
3075 	isl_aff *aff;
3076 	isl_multi_aff *ma;
3077 	isl_size nrow;
3078 
3079 	if (!node)
3080 		return NULL;
3081 	nrow = isl_mat_rows(node->sched);
3082 	if (nrow < 0)
3083 		return NULL;
3084 	if (node->compressed)
3085 		space = isl_pw_multi_aff_get_domain_space(node->decompress);
3086 	else
3087 		space = isl_space_copy(node->space);
3088 	ls = isl_local_space_from_space(isl_space_copy(space));
3089 	space = isl_space_from_domain(space);
3090 	space = isl_space_add_dims(space, isl_dim_out, n);
3091 	ma = isl_multi_aff_zero(space);
3092 
3093 	for (i = first; i < first + n; ++i) {
3094 		aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
3095 		ma = isl_multi_aff_set_aff(ma, i - first, aff);
3096 	}
3097 
3098 	isl_local_space_free(ls);
3099 
3100 	if (node->compressed)
3101 		ma = isl_multi_aff_pullback_multi_aff(ma,
3102 					isl_multi_aff_copy(node->compress));
3103 
3104 	return ma;
3105 }
3106 
3107 /* Convert node->sched into a multi_aff and return this multi_aff.
3108  *
3109  * The result is defined over the uncompressed node domain.
3110  */
node_extract_schedule_multi_aff(struct isl_sched_node * node)3111 static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
3112 	struct isl_sched_node *node)
3113 {
3114 	isl_size nrow;
3115 
3116 	nrow = isl_mat_rows(node->sched);
3117 	if (nrow < 0)
3118 		return NULL;
3119 	return isl_sched_node_extract_partial_schedule_multi_aff(node, 0, nrow);
3120 }
3121 
3122 /* Convert node->sched into a map and return this map.
3123  *
3124  * The result is cached in node->sched_map, which needs to be released
3125  * whenever node->sched is updated.
3126  * It is defined over the uncompressed node domain.
3127  */
node_extract_schedule(struct isl_sched_node * node)3128 static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
3129 {
3130 	if (!node->sched_map) {
3131 		isl_multi_aff *ma;
3132 
3133 		ma = node_extract_schedule_multi_aff(node);
3134 		node->sched_map = isl_map_from_multi_aff(ma);
3135 	}
3136 
3137 	return isl_map_copy(node->sched_map);
3138 }
3139 
3140 /* Construct a map that can be used to update a dependence relation
3141  * based on the current schedule.
3142  * That is, construct a map expressing that source and sink
3143  * are executed within the same iteration of the current schedule.
3144  * This map can then be intersected with the dependence relation.
3145  * This is not the most efficient way, but this shouldn't be a critical
3146  * operation.
3147  */
specializer(struct isl_sched_node * src,struct isl_sched_node * dst)3148 static __isl_give isl_map *specializer(struct isl_sched_node *src,
3149 	struct isl_sched_node *dst)
3150 {
3151 	isl_map *src_sched, *dst_sched;
3152 
3153 	src_sched = node_extract_schedule(src);
3154 	dst_sched = node_extract_schedule(dst);
3155 	return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
3156 }
3157 
3158 /* Intersect the domains of the nested relations in domain and range
3159  * of "umap" with "map".
3160  */
intersect_domains(__isl_take isl_union_map * umap,__isl_keep isl_map * map)3161 static __isl_give isl_union_map *intersect_domains(
3162 	__isl_take isl_union_map *umap, __isl_keep isl_map *map)
3163 {
3164 	isl_union_set *uset;
3165 
3166 	umap = isl_union_map_zip(umap);
3167 	uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
3168 	umap = isl_union_map_intersect_domain(umap, uset);
3169 	umap = isl_union_map_zip(umap);
3170 	return umap;
3171 }
3172 
3173 /* Update the dependence relation of the given edge based
3174  * on the current schedule.
3175  * If the dependence is carried completely by the current schedule, then
3176  * it is removed from the edge_tables.  It is kept in the list of edges
3177  * as otherwise all edge_tables would have to be recomputed.
3178  *
3179  * If the edge is of a type that can appear multiple times
3180  * between the same pair of nodes, then it is added to
3181  * the edge table (again).  This prevents the situation
3182  * where none of these edges is referenced from the edge table
3183  * because the one that was referenced turned out to be empty and
3184  * was therefore removed from the table.
3185  */
update_edge(isl_ctx * ctx,struct isl_sched_graph * graph,struct isl_sched_edge * edge)3186 static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
3187 	struct isl_sched_edge *edge)
3188 {
3189 	int empty;
3190 	isl_map *id;
3191 
3192 	id = specializer(edge->src, edge->dst);
3193 	edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
3194 	if (!edge->map)
3195 		goto error;
3196 
3197 	if (edge->tagged_condition) {
3198 		edge->tagged_condition =
3199 			intersect_domains(edge->tagged_condition, id);
3200 		if (!edge->tagged_condition)
3201 			goto error;
3202 	}
3203 	if (edge->tagged_validity) {
3204 		edge->tagged_validity =
3205 			intersect_domains(edge->tagged_validity, id);
3206 		if (!edge->tagged_validity)
3207 			goto error;
3208 	}
3209 
3210 	empty = isl_map_plain_is_empty(edge->map);
3211 	if (empty < 0)
3212 		goto error;
3213 	if (empty) {
3214 		if (graph_remove_edge(graph, edge) < 0)
3215 			goto error;
3216 	} else if (is_multi_edge_type(edge)) {
3217 		if (graph_edge_tables_add(ctx, graph, edge) < 0)
3218 			goto error;
3219 	}
3220 
3221 	isl_map_free(id);
3222 	return isl_stat_ok;
3223 error:
3224 	isl_map_free(id);
3225 	return isl_stat_error;
3226 }
3227 
3228 /* Does the domain of "umap" intersect "uset"?
3229  */
domain_intersects(__isl_keep isl_union_map * umap,__isl_keep isl_union_set * uset)3230 static int domain_intersects(__isl_keep isl_union_map *umap,
3231 	__isl_keep isl_union_set *uset)
3232 {
3233 	int empty;
3234 
3235 	umap = isl_union_map_copy(umap);
3236 	umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
3237 	empty = isl_union_map_is_empty(umap);
3238 	isl_union_map_free(umap);
3239 
3240 	return empty < 0 ? -1 : !empty;
3241 }
3242 
3243 /* Does the range of "umap" intersect "uset"?
3244  */
range_intersects(__isl_keep isl_union_map * umap,__isl_keep isl_union_set * uset)3245 static int range_intersects(__isl_keep isl_union_map *umap,
3246 	__isl_keep isl_union_set *uset)
3247 {
3248 	int empty;
3249 
3250 	umap = isl_union_map_copy(umap);
3251 	umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
3252 	empty = isl_union_map_is_empty(umap);
3253 	isl_union_map_free(umap);
3254 
3255 	return empty < 0 ? -1 : !empty;
3256 }
3257 
3258 /* Are the condition dependences of "edge" local with respect to
3259  * the current schedule?
3260  *
3261  * That is, are domain and range of the condition dependences mapped
3262  * to the same point?
3263  *
3264  * In other words, is the condition false?
3265  */
is_condition_false(struct isl_sched_edge * edge)3266 static int is_condition_false(struct isl_sched_edge *edge)
3267 {
3268 	isl_union_map *umap;
3269 	isl_map *map, *sched, *test;
3270 	int empty, local;
3271 
3272 	empty = isl_union_map_is_empty(edge->tagged_condition);
3273 	if (empty < 0 || empty)
3274 		return empty;
3275 
3276 	umap = isl_union_map_copy(edge->tagged_condition);
3277 	umap = isl_union_map_zip(umap);
3278 	umap = isl_union_set_unwrap(isl_union_map_domain(umap));
3279 	map = isl_map_from_union_map(umap);
3280 
3281 	sched = node_extract_schedule(edge->src);
3282 	map = isl_map_apply_domain(map, sched);
3283 	sched = node_extract_schedule(edge->dst);
3284 	map = isl_map_apply_range(map, sched);
3285 
3286 	test = isl_map_identity(isl_map_get_space(map));
3287 	local = isl_map_is_subset(map, test);
3288 	isl_map_free(map);
3289 	isl_map_free(test);
3290 
3291 	return local;
3292 }
3293 
3294 /* For each conditional validity constraint that is adjacent
3295  * to a condition with domain in condition_source or range in condition_sink,
3296  * turn it into an unconditional validity constraint.
3297  */
unconditionalize_adjacent_validity(struct isl_sched_graph * graph,__isl_take isl_union_set * condition_source,__isl_take isl_union_set * condition_sink)3298 static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
3299 	__isl_take isl_union_set *condition_source,
3300 	__isl_take isl_union_set *condition_sink)
3301 {
3302 	int i;
3303 
3304 	condition_source = isl_union_set_coalesce(condition_source);
3305 	condition_sink = isl_union_set_coalesce(condition_sink);
3306 
3307 	for (i = 0; i < graph->n_edge; ++i) {
3308 		int adjacent;
3309 		isl_union_map *validity;
3310 
3311 		if (!isl_sched_edge_is_conditional_validity(&graph->edge[i]))
3312 			continue;
3313 		if (is_validity(&graph->edge[i]))
3314 			continue;
3315 
3316 		validity = graph->edge[i].tagged_validity;
3317 		adjacent = domain_intersects(validity, condition_sink);
3318 		if (adjacent >= 0 && !adjacent)
3319 			adjacent = range_intersects(validity, condition_source);
3320 		if (adjacent < 0)
3321 			goto error;
3322 		if (!adjacent)
3323 			continue;
3324 
3325 		set_validity(&graph->edge[i]);
3326 	}
3327 
3328 	isl_union_set_free(condition_source);
3329 	isl_union_set_free(condition_sink);
3330 	return 0;
3331 error:
3332 	isl_union_set_free(condition_source);
3333 	isl_union_set_free(condition_sink);
3334 	return -1;
3335 }
3336 
3337 /* Update the dependence relations of all edges based on the current schedule
3338  * and enforce conditional validity constraints that are adjacent
3339  * to satisfied condition constraints.
3340  *
3341  * First check if any of the condition constraints are satisfied
3342  * (i.e., not local to the outer schedule) and keep track of
3343  * their domain and range.
3344  * Then update all dependence relations (which removes the non-local
3345  * constraints).
3346  * Finally, if any condition constraints turned out to be satisfied,
3347  * then turn all adjacent conditional validity constraints into
3348  * unconditional validity constraints.
3349  */
update_edges(isl_ctx * ctx,struct isl_sched_graph * graph)3350 static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
3351 {
3352 	int i;
3353 	int any = 0;
3354 	isl_union_set *source, *sink;
3355 
3356 	source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3357 	sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
3358 	for (i = 0; i < graph->n_edge; ++i) {
3359 		int local;
3360 		isl_union_set *uset;
3361 		isl_union_map *umap;
3362 
3363 		if (!isl_sched_edge_is_condition(&graph->edge[i]))
3364 			continue;
3365 		if (is_local(&graph->edge[i]))
3366 			continue;
3367 		local = is_condition_false(&graph->edge[i]);
3368 		if (local < 0)
3369 			goto error;
3370 		if (local)
3371 			continue;
3372 
3373 		any = 1;
3374 
3375 		umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3376 		uset = isl_union_map_domain(umap);
3377 		source = isl_union_set_union(source, uset);
3378 
3379 		umap = isl_union_map_copy(graph->edge[i].tagged_condition);
3380 		uset = isl_union_map_range(umap);
3381 		sink = isl_union_set_union(sink, uset);
3382 	}
3383 
3384 	for (i = 0; i < graph->n_edge; ++i) {
3385 		if (update_edge(ctx, graph, &graph->edge[i]) < 0)
3386 			goto error;
3387 	}
3388 
3389 	if (any)
3390 		return unconditionalize_adjacent_validity(graph, source, sink);
3391 
3392 	isl_union_set_free(source);
3393 	isl_union_set_free(sink);
3394 	return 0;
3395 error:
3396 	isl_union_set_free(source);
3397 	isl_union_set_free(sink);
3398 	return -1;
3399 }
3400 
next_band(struct isl_sched_graph * graph)3401 static void next_band(struct isl_sched_graph *graph)
3402 {
3403 	graph->band_start = graph->n_total_row;
3404 }
3405 
3406 /* Return the union of the universe domains of the nodes in "graph"
3407  * that satisfy "pred".
3408  */
isl_sched_graph_domain(isl_ctx * ctx,struct isl_sched_graph * graph,int (* pred)(struct isl_sched_node * node,int data),int data)3409 static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
3410 	struct isl_sched_graph *graph,
3411 	int (*pred)(struct isl_sched_node *node, int data), int data)
3412 {
3413 	int i;
3414 	isl_set *set;
3415 	isl_union_set *dom;
3416 
3417 	for (i = 0; i < graph->n; ++i)
3418 		if (pred(&graph->node[i], data))
3419 			break;
3420 
3421 	if (i >= graph->n)
3422 		isl_die(ctx, isl_error_internal,
3423 			"empty component", return NULL);
3424 
3425 	set = isl_set_universe(isl_space_copy(graph->node[i].space));
3426 	dom = isl_union_set_from_set(set);
3427 
3428 	for (i = i + 1; i < graph->n; ++i) {
3429 		if (!pred(&graph->node[i], data))
3430 			continue;
3431 		set = isl_set_universe(isl_space_copy(graph->node[i].space));
3432 		dom = isl_union_set_union(dom, isl_union_set_from_set(set));
3433 	}
3434 
3435 	return dom;
3436 }
3437 
3438 /* Return a union of universe domains corresponding to the nodes
3439  * in the SCC with index "scc".
3440  */
isl_sched_graph_extract_scc(isl_ctx * ctx,struct isl_sched_graph * graph,int scc)3441 __isl_give isl_union_set *isl_sched_graph_extract_scc(isl_ctx *ctx,
3442 	struct isl_sched_graph *graph, int scc)
3443 {
3444 	return isl_sched_graph_domain(ctx, graph,
3445 					&isl_sched_node_scc_exactly, scc);
3446 }
3447 
3448 /* Return a list of unions of universe domains, where each element
3449  * in the list corresponds to an SCC (or WCC) indexed by node->scc.
3450  */
isl_sched_graph_extract_sccs(isl_ctx * ctx,struct isl_sched_graph * graph)3451 __isl_give isl_union_set_list *isl_sched_graph_extract_sccs(isl_ctx *ctx,
3452 	struct isl_sched_graph *graph)
3453 {
3454 	int i;
3455 	isl_union_set_list *filters;
3456 
3457 	filters = isl_union_set_list_alloc(ctx, graph->scc);
3458 	for (i = 0; i < graph->scc; ++i) {
3459 		isl_union_set *dom;
3460 
3461 		dom = isl_sched_graph_extract_scc(ctx, graph, i);
3462 		filters = isl_union_set_list_add(filters, dom);
3463 	}
3464 
3465 	return filters;
3466 }
3467 
3468 /* Return a list of two unions of universe domains, one for the SCCs up
3469  * to and including graph->src_scc and another for the other SCCs.
3470  */
extract_split(isl_ctx * ctx,struct isl_sched_graph * graph)3471 static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
3472 	struct isl_sched_graph *graph)
3473 {
3474 	isl_union_set *dom;
3475 	isl_union_set_list *filters;
3476 
3477 	filters = isl_union_set_list_alloc(ctx, 2);
3478 	dom = isl_sched_graph_domain(ctx, graph,
3479 					&node_scc_at_most, graph->src_scc);
3480 	filters = isl_union_set_list_add(filters, dom);
3481 	dom = isl_sched_graph_domain(ctx, graph,
3482 					&node_scc_at_least, graph->src_scc + 1);
3483 	filters = isl_union_set_list_add(filters, dom);
3484 
3485 	return filters;
3486 }
3487 
3488 /* Copy nodes that satisfy node_pred from the src dependence graph
3489  * to the dst dependence graph.
3490  */
copy_nodes(struct isl_sched_graph * dst,struct isl_sched_graph * src,int (* node_pred)(struct isl_sched_node * node,int data),int data)3491 static isl_stat copy_nodes(struct isl_sched_graph *dst,
3492 	struct isl_sched_graph *src,
3493 	int (*node_pred)(struct isl_sched_node *node, int data), int data)
3494 {
3495 	int i;
3496 
3497 	dst->n = 0;
3498 	for (i = 0; i < src->n; ++i) {
3499 		int j;
3500 
3501 		if (!node_pred(&src->node[i], data))
3502 			continue;
3503 
3504 		j = dst->n;
3505 		dst->node[j].space = isl_space_copy(src->node[i].space);
3506 		dst->node[j].compressed = src->node[i].compressed;
3507 		dst->node[j].hull = isl_set_copy(src->node[i].hull);
3508 		dst->node[j].compress =
3509 			isl_multi_aff_copy(src->node[i].compress);
3510 		dst->node[j].decompress =
3511 			isl_pw_multi_aff_copy(src->node[i].decompress);
3512 		dst->node[j].nvar = src->node[i].nvar;
3513 		dst->node[j].nparam = src->node[i].nparam;
3514 		dst->node[j].sched = isl_mat_copy(src->node[i].sched);
3515 		dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
3516 		dst->node[j].coincident = src->node[i].coincident;
3517 		dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
3518 		dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
3519 		dst->node[j].max = isl_vec_copy(src->node[i].max);
3520 		dst->n++;
3521 
3522 		if (!dst->node[j].space || !dst->node[j].sched)
3523 			return isl_stat_error;
3524 		if (dst->node[j].compressed &&
3525 		    (!dst->node[j].hull || !dst->node[j].compress ||
3526 		     !dst->node[j].decompress))
3527 			return isl_stat_error;
3528 	}
3529 
3530 	return isl_stat_ok;
3531 }
3532 
3533 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
3534  * to the dst dependence graph.
3535  * If the source or destination node of the edge is not in the destination
3536  * graph, then it must be a backward proximity edge and it should simply
3537  * be ignored.
3538  */
copy_edges(isl_ctx * ctx,struct isl_sched_graph * dst,struct isl_sched_graph * src,int (* edge_pred)(struct isl_sched_edge * edge,int data),int data)3539 static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
3540 	struct isl_sched_graph *src,
3541 	int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
3542 {
3543 	int i;
3544 
3545 	dst->n_edge = 0;
3546 	for (i = 0; i < src->n_edge; ++i) {
3547 		struct isl_sched_edge *edge = &src->edge[i];
3548 		isl_map *map;
3549 		isl_union_map *tagged_condition;
3550 		isl_union_map *tagged_validity;
3551 		struct isl_sched_node *dst_src, *dst_dst;
3552 
3553 		if (!edge_pred(edge, data))
3554 			continue;
3555 
3556 		if (isl_map_plain_is_empty(edge->map))
3557 			continue;
3558 
3559 		dst_src = isl_sched_graph_find_node(ctx, dst, edge->src->space);
3560 		dst_dst = isl_sched_graph_find_node(ctx, dst, edge->dst->space);
3561 		if (!dst_src || !dst_dst)
3562 			return isl_stat_error;
3563 		if (!isl_sched_graph_is_node(dst, dst_src) ||
3564 		    !isl_sched_graph_is_node(dst, dst_dst)) {
3565 			if (is_validity(edge) ||
3566 			    isl_sched_edge_is_conditional_validity(edge))
3567 				isl_die(ctx, isl_error_internal,
3568 					"backward (conditional) validity edge",
3569 					return isl_stat_error);
3570 			continue;
3571 		}
3572 
3573 		map = isl_map_copy(edge->map);
3574 		tagged_condition = isl_union_map_copy(edge->tagged_condition);
3575 		tagged_validity = isl_union_map_copy(edge->tagged_validity);
3576 
3577 		dst->edge[dst->n_edge].src = dst_src;
3578 		dst->edge[dst->n_edge].dst = dst_dst;
3579 		dst->edge[dst->n_edge].map = map;
3580 		dst->edge[dst->n_edge].tagged_condition = tagged_condition;
3581 		dst->edge[dst->n_edge].tagged_validity = tagged_validity;
3582 		dst->edge[dst->n_edge].types = edge->types;
3583 		dst->n_edge++;
3584 
3585 		if (edge->tagged_condition && !tagged_condition)
3586 			return isl_stat_error;
3587 		if (edge->tagged_validity && !tagged_validity)
3588 			return isl_stat_error;
3589 
3590 		if (graph_edge_tables_add(ctx, dst,
3591 					    &dst->edge[dst->n_edge - 1]) < 0)
3592 			return isl_stat_error;
3593 	}
3594 
3595 	return isl_stat_ok;
3596 }
3597 
3598 /* Compute the maximal number of variables over all nodes.
3599  * This is the maximal number of linearly independent schedule
3600  * rows that we need to compute.
3601  * Just in case we end up in a part of the dependence graph
3602  * with only lower-dimensional domains, we make sure we will
3603  * compute the required amount of extra linearly independent rows.
3604  */
isl_sched_graph_compute_maxvar(struct isl_sched_graph * graph)3605 isl_stat isl_sched_graph_compute_maxvar(struct isl_sched_graph *graph)
3606 {
3607 	int i;
3608 
3609 	graph->maxvar = 0;
3610 	for (i = 0; i < graph->n; ++i) {
3611 		struct isl_sched_node *node = &graph->node[i];
3612 		int nvar;
3613 
3614 		if (isl_sched_node_update_vmap(node) < 0)
3615 			return isl_stat_error;
3616 		nvar = node->nvar + graph->n_row - node->rank;
3617 		if (nvar > graph->maxvar)
3618 			graph->maxvar = nvar;
3619 	}
3620 
3621 	return isl_stat_ok;
3622 }
3623 
3624 /* Extract the subgraph of "graph" that consists of the nodes satisfying
3625  * "node_pred" and the edges satisfying "edge_pred" and store
3626  * the result in "sub".
3627  */
isl_sched_graph_extract_sub_graph(isl_ctx * ctx,struct isl_sched_graph * graph,int (* node_pred)(struct isl_sched_node * node,int data),int (* edge_pred)(struct isl_sched_edge * edge,int data),int data,struct isl_sched_graph * sub)3628 isl_stat isl_sched_graph_extract_sub_graph(isl_ctx *ctx,
3629 	struct isl_sched_graph *graph,
3630 	int (*node_pred)(struct isl_sched_node *node, int data),
3631 	int (*edge_pred)(struct isl_sched_edge *edge, int data),
3632 	int data, struct isl_sched_graph *sub)
3633 {
3634 	int i, n = 0, n_edge = 0;
3635 	int t;
3636 
3637 	for (i = 0; i < graph->n; ++i)
3638 		if (node_pred(&graph->node[i], data))
3639 			++n;
3640 	for (i = 0; i < graph->n_edge; ++i)
3641 		if (edge_pred(&graph->edge[i], data))
3642 			++n_edge;
3643 	if (graph_alloc(ctx, sub, n, n_edge) < 0)
3644 		return isl_stat_error;
3645 	sub->root = graph->root;
3646 	if (copy_nodes(sub, graph, node_pred, data) < 0)
3647 		return isl_stat_error;
3648 	if (graph_init_table(ctx, sub) < 0)
3649 		return isl_stat_error;
3650 	for (t = 0; t <= isl_edge_last; ++t)
3651 		sub->max_edge[t] = graph->max_edge[t];
3652 	if (graph_init_edge_tables(ctx, sub) < 0)
3653 		return isl_stat_error;
3654 	if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
3655 		return isl_stat_error;
3656 	sub->n_row = graph->n_row;
3657 	sub->max_row = graph->max_row;
3658 	sub->n_total_row = graph->n_total_row;
3659 	sub->band_start = graph->band_start;
3660 
3661 	return isl_stat_ok;
3662 }
3663 
3664 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
3665 	struct isl_sched_graph *graph);
3666 static __isl_give isl_schedule_node *compute_schedule_wcc(
3667 	isl_schedule_node *node, struct isl_sched_graph *graph);
3668 
3669 /* Compute a schedule for a subgraph of "graph".  In particular, for
3670  * the graph composed of nodes that satisfy node_pred and edges that
3671  * that satisfy edge_pred.
3672  * If the subgraph is known to consist of a single component, then wcc should
3673  * be set and then we call compute_schedule_wcc on the constructed subgraph.
3674  * Otherwise, we call compute_schedule, which will check whether the subgraph
3675  * is connected.
3676  *
3677  * The schedule is inserted at "node" and the updated schedule node
3678  * is returned.
3679  */
compute_sub_schedule(__isl_take isl_schedule_node * node,isl_ctx * ctx,struct isl_sched_graph * graph,int (* node_pred)(struct isl_sched_node * node,int data),int (* edge_pred)(struct isl_sched_edge * edge,int data),int data,int wcc)3680 static __isl_give isl_schedule_node *compute_sub_schedule(
3681 	__isl_take isl_schedule_node *node, isl_ctx *ctx,
3682 	struct isl_sched_graph *graph,
3683 	int (*node_pred)(struct isl_sched_node *node, int data),
3684 	int (*edge_pred)(struct isl_sched_edge *edge, int data),
3685 	int data, int wcc)
3686 {
3687 	struct isl_sched_graph split = { 0 };
3688 
3689 	if (isl_sched_graph_extract_sub_graph(ctx, graph, node_pred, edge_pred,
3690 						data, &split) < 0)
3691 		goto error;
3692 
3693 	if (wcc)
3694 		node = compute_schedule_wcc(node, &split);
3695 	else
3696 		node = compute_schedule(node, &split);
3697 
3698 	isl_sched_graph_free(ctx, &split);
3699 	return node;
3700 error:
3701 	isl_sched_graph_free(ctx, &split);
3702 	return isl_schedule_node_free(node);
3703 }
3704 
isl_sched_edge_scc_exactly(struct isl_sched_edge * edge,int scc)3705 int isl_sched_edge_scc_exactly(struct isl_sched_edge *edge, int scc)
3706 {
3707 	return edge->src->scc == scc && edge->dst->scc == scc;
3708 }
3709 
edge_dst_scc_at_most(struct isl_sched_edge * edge,int scc)3710 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
3711 {
3712 	return edge->dst->scc <= scc;
3713 }
3714 
edge_src_scc_at_least(struct isl_sched_edge * edge,int scc)3715 static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
3716 {
3717 	return edge->src->scc >= scc;
3718 }
3719 
3720 /* Reset the current band by dropping all its schedule rows.
3721  */
reset_band(struct isl_sched_graph * graph)3722 static isl_stat reset_band(struct isl_sched_graph *graph)
3723 {
3724 	int i;
3725 	int drop;
3726 
3727 	drop = graph->n_total_row - graph->band_start;
3728 	graph->n_total_row -= drop;
3729 	graph->n_row -= drop;
3730 
3731 	for (i = 0; i < graph->n; ++i) {
3732 		struct isl_sched_node *node = &graph->node[i];
3733 
3734 		isl_map_free(node->sched_map);
3735 		node->sched_map = NULL;
3736 
3737 		node->sched = isl_mat_drop_rows(node->sched,
3738 						graph->band_start, drop);
3739 
3740 		if (!node->sched)
3741 			return isl_stat_error;
3742 	}
3743 
3744 	return isl_stat_ok;
3745 }
3746 
3747 /* Split the current graph into two parts and compute a schedule for each
3748  * part individually.  In particular, one part consists of all SCCs up
3749  * to and including graph->src_scc, while the other part contains the other
3750  * SCCs.  The split is enforced by a sequence node inserted at position "node"
3751  * in the schedule tree.  Return the updated schedule node.
3752  * If either of these two parts consists of a sequence, then it is spliced
3753  * into the sequence containing the two parts.
3754  *
3755  * The current band is reset. It would be possible to reuse
3756  * the previously computed rows as the first rows in the next
3757  * band, but recomputing them may result in better rows as we are looking
3758  * at a smaller part of the dependence graph.
3759  */
compute_split_schedule(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)3760 static __isl_give isl_schedule_node *compute_split_schedule(
3761 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
3762 {
3763 	isl_ctx *ctx;
3764 	isl_union_set_list *filters;
3765 
3766 	if (!node)
3767 		return NULL;
3768 
3769 	if (reset_band(graph) < 0)
3770 		return isl_schedule_node_free(node);
3771 
3772 	next_band(graph);
3773 
3774 	ctx = isl_schedule_node_get_ctx(node);
3775 	filters = extract_split(ctx, graph);
3776 	node = isl_schedule_node_insert_sequence(node, filters);
3777 	node = isl_schedule_node_grandchild(node, 1, 0);
3778 
3779 	node = compute_sub_schedule(node, ctx, graph,
3780 				&node_scc_at_least, &edge_src_scc_at_least,
3781 				graph->src_scc + 1, 0);
3782 	node = isl_schedule_node_grandparent(node);
3783 	node = isl_schedule_node_grandchild(node, 0, 0);
3784 	node = compute_sub_schedule(node, ctx, graph,
3785 				&node_scc_at_most, &edge_dst_scc_at_most,
3786 				graph->src_scc, 0);
3787 	node = isl_schedule_node_grandparent(node);
3788 
3789 	node = isl_schedule_node_sequence_splice_children(node);
3790 
3791 	return node;
3792 }
3793 
3794 /* Insert a band node at position "node" in the schedule tree corresponding
3795  * to the current band in "graph".  Mark the band node permutable
3796  * if "permutable" is set.
3797  * The partial schedules and the coincidence property are extracted
3798  * from the graph nodes.
3799  * Return the updated schedule node.
3800  */
insert_current_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int permutable)3801 static __isl_give isl_schedule_node *insert_current_band(
3802 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
3803 	int permutable)
3804 {
3805 	int i;
3806 	int start, end, n;
3807 	isl_multi_aff *ma;
3808 	isl_multi_pw_aff *mpa;
3809 	isl_multi_union_pw_aff *mupa;
3810 
3811 	if (!node)
3812 		return NULL;
3813 
3814 	if (graph->n < 1)
3815 		isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
3816 			"graph should have at least one node",
3817 			return isl_schedule_node_free(node));
3818 
3819 	start = graph->band_start;
3820 	end = graph->n_total_row;
3821 	n = end - start;
3822 
3823 	ma = isl_sched_node_extract_partial_schedule_multi_aff(&graph->node[0],
3824 								start, n);
3825 	mpa = isl_multi_pw_aff_from_multi_aff(ma);
3826 	mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3827 
3828 	for (i = 1; i < graph->n; ++i) {
3829 		isl_multi_union_pw_aff *mupa_i;
3830 
3831 		ma = isl_sched_node_extract_partial_schedule_multi_aff(
3832 						&graph->node[i], start, n);
3833 		mpa = isl_multi_pw_aff_from_multi_aff(ma);
3834 		mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
3835 		mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
3836 	}
3837 	node = isl_schedule_node_insert_partial_schedule(node, mupa);
3838 
3839 	for (i = 0; i < n; ++i)
3840 		node = isl_schedule_node_band_member_set_coincident(node, i,
3841 					graph->node[0].coincident[start + i]);
3842 	node = isl_schedule_node_band_set_permutable(node, permutable);
3843 
3844 	return node;
3845 }
3846 
3847 /* Update the dependence relations based on the current schedule,
3848  * add the current band to "node" and then continue with the computation
3849  * of the next band.
3850  * Return the updated schedule node.
3851  */
compute_next_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int permutable)3852 static __isl_give isl_schedule_node *compute_next_band(
3853 	__isl_take isl_schedule_node *node,
3854 	struct isl_sched_graph *graph, int permutable)
3855 {
3856 	isl_ctx *ctx;
3857 
3858 	if (!node)
3859 		return NULL;
3860 
3861 	ctx = isl_schedule_node_get_ctx(node);
3862 	if (update_edges(ctx, graph) < 0)
3863 		return isl_schedule_node_free(node);
3864 	node = insert_current_band(node, graph, permutable);
3865 	next_band(graph);
3866 
3867 	node = isl_schedule_node_child(node, 0);
3868 	node = compute_schedule(node, graph);
3869 	node = isl_schedule_node_parent(node);
3870 
3871 	return node;
3872 }
3873 
3874 /* Add the constraints "coef" derived from an edge from "node" to itself
3875  * to graph->lp in order to respect the dependences and to try and carry them.
3876  * "pos" is the sequence number of the edge that needs to be carried.
3877  * "coef" represents general constraints on coefficients (c_0, c_x)
3878  * of valid constraints for (y - x) with x and y instances of the node.
3879  *
3880  * The constraints added to graph->lp need to enforce
3881  *
3882  *	(c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
3883  *	= c_j_x (y - x) >= e_i
3884  *
3885  * for each (x,y) in the dependence relation of the edge.
3886  * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
3887  * taking into account that each coefficient in c_j_x is represented
3888  * as a pair of non-negative coefficients.
3889  */
add_intra_constraints(struct isl_sched_graph * graph,struct isl_sched_node * node,__isl_take isl_basic_set * coef,int pos)3890 static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
3891 	struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
3892 {
3893 	isl_size offset;
3894 	isl_ctx *ctx;
3895 	isl_dim_map *dim_map;
3896 
3897 	offset = coef_var_offset(coef);
3898 	if (offset < 0)
3899 		coef = isl_basic_set_free(coef);
3900 	if (!coef)
3901 		return isl_stat_error;
3902 
3903 	ctx = isl_basic_set_get_ctx(coef);
3904 	dim_map = intra_dim_map(ctx, graph, node, offset, 1);
3905 	isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3906 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3907 
3908 	return isl_stat_ok;
3909 }
3910 
3911 /* Add the constraints "coef" derived from an edge from "src" to "dst"
3912  * to graph->lp in order to respect the dependences and to try and carry them.
3913  * "pos" is the sequence number of the edge that needs to be carried or
3914  * -1 if no attempt should be made to carry the dependences.
3915  * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
3916  * of valid constraints for (x, y) with x and y instances of "src" and "dst".
3917  *
3918  * The constraints added to graph->lp need to enforce
3919  *
3920  *	(c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
3921  *
3922  * for each (x,y) in the dependence relation of the edge or
3923  *
3924  *	(c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
3925  *
3926  * if pos is -1.
3927  * That is,
3928  * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3929  * or
3930  * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
3931  * needs to be plugged in for (c_0, c_n, c_x, c_y),
3932  * taking into account that each coefficient in c_j_x and c_k_x is represented
3933  * as a pair of non-negative coefficients.
3934  */
add_inter_constraints(struct isl_sched_graph * graph,struct isl_sched_node * src,struct isl_sched_node * dst,__isl_take isl_basic_set * coef,int pos)3935 static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
3936 	struct isl_sched_node *src, struct isl_sched_node *dst,
3937 	__isl_take isl_basic_set *coef, int pos)
3938 {
3939 	isl_size offset;
3940 	isl_ctx *ctx;
3941 	isl_dim_map *dim_map;
3942 
3943 	offset = coef_var_offset(coef);
3944 	if (offset < 0)
3945 		coef = isl_basic_set_free(coef);
3946 	if (!coef)
3947 		return isl_stat_error;
3948 
3949 	ctx = isl_basic_set_get_ctx(coef);
3950 	dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
3951 	if (pos >= 0)
3952 		isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
3953 	graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
3954 
3955 	return isl_stat_ok;
3956 }
3957 
3958 /* Data structure for keeping track of the data needed
3959  * to exploit non-trivial lineality spaces.
3960  *
3961  * "any_non_trivial" is true if there are any non-trivial lineality spaces.
3962  * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
3963  * "equivalent" connects instances to other instances on the same line(s).
3964  * "mask" contains the domain spaces of "equivalent".
3965  * Any instance set not in "mask" does not have a non-trivial lineality space.
3966  */
3967 struct isl_exploit_lineality_data {
3968 	isl_bool any_non_trivial;
3969 	isl_union_map *equivalent;
3970 	isl_union_set *mask;
3971 };
3972 
3973 /* Data structure collecting information used during the construction
3974  * of an LP for carrying dependences.
3975  *
3976  * "intra" is a sequence of coefficient constraints for intra-node edges.
3977  * "inter" is a sequence of coefficient constraints for inter-node edges.
3978  * "lineality" contains data used to exploit non-trivial lineality spaces.
3979  */
3980 struct isl_carry {
3981 	isl_basic_set_list *intra;
3982 	isl_basic_set_list *inter;
3983 	struct isl_exploit_lineality_data lineality;
3984 };
3985 
3986 /* Free all the data stored in "carry".
3987  */
isl_carry_clear(struct isl_carry * carry)3988 static void isl_carry_clear(struct isl_carry *carry)
3989 {
3990 	isl_basic_set_list_free(carry->intra);
3991 	isl_basic_set_list_free(carry->inter);
3992 	isl_union_map_free(carry->lineality.equivalent);
3993 	isl_union_set_free(carry->lineality.mask);
3994 }
3995 
3996 /* Return a pointer to the node in "graph" that lives in "space".
3997  * If the requested node has been compressed, then "space"
3998  * corresponds to the compressed space.
3999  * The graph is assumed to have such a node.
4000  * Return NULL in case of error.
4001  *
4002  * First try and see if "space" is the space of an uncompressed node.
4003  * If so, return that node.
4004  * Otherwise, "space" was constructed by construct_compressed_id and
4005  * contains a user pointer pointing to the node in the tuple id.
4006  * However, this node belongs to the original dependence graph.
4007  * If "graph" is a subgraph of this original dependence graph,
4008  * then the node with the same space still needs to be looked up
4009  * in the current graph.
4010  */
graph_find_compressed_node(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_space * space)4011 static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
4012 	struct isl_sched_graph *graph, __isl_keep isl_space *space)
4013 {
4014 	isl_id *id;
4015 	struct isl_sched_node *node;
4016 
4017 	if (!space)
4018 		return NULL;
4019 
4020 	node = isl_sched_graph_find_node(ctx, graph, space);
4021 	if (!node)
4022 		return NULL;
4023 	if (isl_sched_graph_is_node(graph, node))
4024 		return node;
4025 
4026 	id = isl_space_get_tuple_id(space, isl_dim_set);
4027 	node = isl_id_get_user(id);
4028 	isl_id_free(id);
4029 
4030 	if (!node)
4031 		return NULL;
4032 
4033 	if (!isl_sched_graph_is_node(graph->root, node))
4034 		isl_die(ctx, isl_error_internal,
4035 			"space points to invalid node", return NULL);
4036 	if (graph != graph->root)
4037 		node = isl_sched_graph_find_node(ctx, graph, node->space);
4038 	if (!isl_sched_graph_is_node(graph, node))
4039 		isl_die(ctx, isl_error_internal,
4040 			"unable to find node", return NULL);
4041 
4042 	return node;
4043 }
4044 
4045 /* Internal data structure for add_all_constraints.
4046  *
4047  * "graph" is the schedule constraint graph for which an LP problem
4048  * is being constructed.
4049  * "carry_inter" indicates whether inter-node edges should be carried.
4050  * "pos" is the position of the next edge that needs to be carried.
4051  */
4052 struct isl_add_all_constraints_data {
4053 	isl_ctx *ctx;
4054 	struct isl_sched_graph *graph;
4055 	int carry_inter;
4056 	int pos;
4057 };
4058 
4059 /* Add the constraints "coef" derived from an edge from a node to itself
4060  * to data->graph->lp in order to respect the dependences and
4061  * to try and carry them.
4062  *
4063  * The space of "coef" is of the form
4064  *
4065  *	coefficients[[c_cst] -> S[c_x]]
4066  *
4067  * with S[c_x] the (compressed) space of the node.
4068  * Extract the node from the space and call add_intra_constraints.
4069  */
lp_add_intra(__isl_take isl_basic_set * coef,void * user)4070 static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
4071 {
4072 	struct isl_add_all_constraints_data *data = user;
4073 	isl_space *space;
4074 	struct isl_sched_node *node;
4075 
4076 	space = isl_basic_set_get_space(coef);
4077 	space = isl_space_range(isl_space_unwrap(space));
4078 	node = graph_find_compressed_node(data->ctx, data->graph, space);
4079 	isl_space_free(space);
4080 	return add_intra_constraints(data->graph, node, coef, data->pos++);
4081 }
4082 
4083 /* Add the constraints "coef" derived from an edge from a node j
4084  * to a node k to data->graph->lp in order to respect the dependences and
4085  * to try and carry them (provided data->carry_inter is set).
4086  *
4087  * The space of "coef" is of the form
4088  *
4089  *	coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
4090  *
4091  * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
4092  * Extract the nodes from the space and call add_inter_constraints.
4093  */
lp_add_inter(__isl_take isl_basic_set * coef,void * user)4094 static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
4095 {
4096 	struct isl_add_all_constraints_data *data = user;
4097 	isl_space *space, *dom;
4098 	struct isl_sched_node *src, *dst;
4099 	int pos;
4100 
4101 	space = isl_basic_set_get_space(coef);
4102 	space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
4103 	dom = isl_space_domain(isl_space_copy(space));
4104 	src = graph_find_compressed_node(data->ctx, data->graph, dom);
4105 	isl_space_free(dom);
4106 	space = isl_space_range(space);
4107 	dst = graph_find_compressed_node(data->ctx, data->graph, space);
4108 	isl_space_free(space);
4109 
4110 	pos = data->carry_inter ? data->pos++ : -1;
4111 	return add_inter_constraints(data->graph, src, dst, coef, pos);
4112 }
4113 
4114 /* Add constraints to graph->lp that force all (conditional) validity
4115  * dependences to be respected and attempt to carry them.
4116  * "intra" is the sequence of coefficient constraints for intra-node edges.
4117  * "inter" is the sequence of coefficient constraints for inter-node edges.
4118  * "carry_inter" indicates whether inter-node edges should be carried or
4119  * only respected.
4120  */
add_all_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int carry_inter)4121 static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
4122 	__isl_keep isl_basic_set_list *intra,
4123 	__isl_keep isl_basic_set_list *inter, int carry_inter)
4124 {
4125 	struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
4126 
4127 	data.pos = 0;
4128 	if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
4129 		return isl_stat_error;
4130 	if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
4131 		return isl_stat_error;
4132 	return isl_stat_ok;
4133 }
4134 
4135 /* Internal data structure for count_all_constraints
4136  * for keeping track of the number of equality and inequality constraints.
4137  */
4138 struct isl_sched_count {
4139 	int n_eq;
4140 	int n_ineq;
4141 };
4142 
4143 /* Add the number of equality and inequality constraints of "bset"
4144  * to data->n_eq and data->n_ineq.
4145  */
bset_update_count(__isl_take isl_basic_set * bset,void * user)4146 static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
4147 {
4148 	struct isl_sched_count *data = user;
4149 
4150 	return update_count(bset, 1, &data->n_eq, &data->n_ineq);
4151 }
4152 
4153 /* Count the number of equality and inequality constraints
4154  * that will be added to the carry_lp problem.
4155  * We count each edge exactly once.
4156  * "intra" is the sequence of coefficient constraints for intra-node edges.
4157  * "inter" is the sequence of coefficient constraints for inter-node edges.
4158  */
count_all_constraints(__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int * n_eq,int * n_ineq)4159 static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
4160 	__isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
4161 {
4162 	struct isl_sched_count data;
4163 
4164 	data.n_eq = data.n_ineq = 0;
4165 	if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
4166 		return isl_stat_error;
4167 	if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
4168 		return isl_stat_error;
4169 
4170 	*n_eq = data.n_eq;
4171 	*n_ineq = data.n_ineq;
4172 
4173 	return isl_stat_ok;
4174 }
4175 
4176 /* Construct an LP problem for finding schedule coefficients
4177  * such that the schedule carries as many validity dependences as possible.
4178  * In particular, for each dependence i, we bound the dependence distance
4179  * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
4180  * of all e_i's.  Dependences with e_i = 0 in the solution are simply
4181  * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
4182  * "intra" is the sequence of coefficient constraints for intra-node edges.
4183  * "inter" is the sequence of coefficient constraints for inter-node edges.
4184  * "n_edge" is the total number of edges.
4185  * "carry_inter" indicates whether inter-node edges should be carried or
4186  * only respected.  That is, if "carry_inter" is not set, then
4187  * no e_i variables are introduced for the inter-node edges.
4188  *
4189  * All variables of the LP are non-negative.  The actual coefficients
4190  * may be negative, so each coefficient is represented as the difference
4191  * of two non-negative variables.  The negative part always appears
4192  * immediately before the positive part.
4193  * Other than that, the variables have the following order
4194  *
4195  *	- sum of (1 - e_i) over all edges
4196  *	- sum of all c_n coefficients
4197  *		(unconstrained when computing non-parametric schedules)
4198  *	- sum of positive and negative parts of all c_x coefficients
4199  *	- for each edge
4200  *		- e_i
4201  *	- for each node
4202  *		- positive and negative parts of c_i_x, in opposite order
4203  *		- c_i_n (if parametric)
4204  *		- c_i_0
4205  *
4206  * The constraints are those from the (validity) edges plus three equalities
4207  * to express the sums and n_edge inequalities to express e_i <= 1.
4208  */
setup_carry_lp(isl_ctx * ctx,struct isl_sched_graph * graph,int n_edge,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int carry_inter)4209 static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
4210 	int n_edge, __isl_keep isl_basic_set_list *intra,
4211 	__isl_keep isl_basic_set_list *inter, int carry_inter)
4212 {
4213 	int i;
4214 	int k;
4215 	isl_space *space;
4216 	unsigned total;
4217 	int n_eq, n_ineq;
4218 
4219 	total = 3 + n_edge;
4220 	for (i = 0; i < graph->n; ++i) {
4221 		struct isl_sched_node *node = &graph->node[graph->sorted[i]];
4222 		node->start = total;
4223 		total += 1 + node->nparam + 2 * node->nvar;
4224 	}
4225 
4226 	if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
4227 		return isl_stat_error;
4228 
4229 	space = isl_space_set_alloc(ctx, 0, total);
4230 	isl_basic_set_free(graph->lp);
4231 	n_eq += 3;
4232 	n_ineq += n_edge;
4233 	graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
4234 	graph->lp = isl_basic_set_set_rational(graph->lp);
4235 
4236 	k = isl_basic_set_alloc_equality(graph->lp);
4237 	if (k < 0)
4238 		return isl_stat_error;
4239 	isl_seq_clr(graph->lp->eq[k], 1 + total);
4240 	isl_int_set_si(graph->lp->eq[k][0], -n_edge);
4241 	isl_int_set_si(graph->lp->eq[k][1], 1);
4242 	for (i = 0; i < n_edge; ++i)
4243 		isl_int_set_si(graph->lp->eq[k][4 + i], 1);
4244 
4245 	if (add_param_sum_constraint(graph, 1) < 0)
4246 		return isl_stat_error;
4247 	if (add_var_sum_constraint(graph, 2) < 0)
4248 		return isl_stat_error;
4249 
4250 	for (i = 0; i < n_edge; ++i) {
4251 		k = isl_basic_set_alloc_inequality(graph->lp);
4252 		if (k < 0)
4253 			return isl_stat_error;
4254 		isl_seq_clr(graph->lp->ineq[k], 1 + total);
4255 		isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
4256 		isl_int_set_si(graph->lp->ineq[k][0], 1);
4257 	}
4258 
4259 	if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
4260 		return isl_stat_error;
4261 
4262 	return isl_stat_ok;
4263 }
4264 
4265 static __isl_give isl_schedule_node *compute_component_schedule(
4266 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
4267 	int wcc);
4268 
4269 /* If the schedule_split_scaled option is set and if the linear
4270  * parts of the scheduling rows for all nodes in the graphs have
4271  * a non-trivial common divisor, then remove this
4272  * common divisor from the linear part.
4273  * Otherwise, insert a band node directly and continue with
4274  * the construction of the schedule.
4275  *
4276  * If a non-trivial common divisor is found, then
4277  * the linear part is reduced and the remainder is ignored.
4278  * The pieces of the graph that are assigned different remainders
4279  * form (groups of) strongly connected components within
4280  * the scaled down band.  If needed, they can therefore
4281  * be ordered along this remainder in a sequence node.
4282  * However, this ordering is not enforced here in order to allow
4283  * the scheduler to combine some of the strongly connected components.
4284  */
split_scaled(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)4285 static __isl_give isl_schedule_node *split_scaled(
4286 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
4287 {
4288 	int i;
4289 	int row;
4290 	isl_ctx *ctx;
4291 	isl_int gcd, gcd_i;
4292 	isl_size n_row;
4293 
4294 	if (!node)
4295 		return NULL;
4296 
4297 	ctx = isl_schedule_node_get_ctx(node);
4298 	if (!ctx->opt->schedule_split_scaled)
4299 		return compute_next_band(node, graph, 0);
4300 	if (graph->n <= 1)
4301 		return compute_next_band(node, graph, 0);
4302 	n_row = isl_mat_rows(graph->node[0].sched);
4303 	if (n_row < 0)
4304 		return isl_schedule_node_free(node);
4305 
4306 	isl_int_init(gcd);
4307 	isl_int_init(gcd_i);
4308 
4309 	isl_int_set_si(gcd, 0);
4310 
4311 	row = n_row - 1;
4312 
4313 	for (i = 0; i < graph->n; ++i) {
4314 		struct isl_sched_node *node = &graph->node[i];
4315 		isl_size cols = isl_mat_cols(node->sched);
4316 
4317 		if (cols < 0)
4318 			break;
4319 		isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
4320 		isl_int_gcd(gcd, gcd, gcd_i);
4321 	}
4322 
4323 	isl_int_clear(gcd_i);
4324 	if (i < graph->n)
4325 		goto error;
4326 
4327 	if (isl_int_cmp_si(gcd, 1) <= 0) {
4328 		isl_int_clear(gcd);
4329 		return compute_next_band(node, graph, 0);
4330 	}
4331 
4332 	for (i = 0; i < graph->n; ++i) {
4333 		struct isl_sched_node *node = &graph->node[i];
4334 
4335 		isl_int_fdiv_q(node->sched->row[row][0],
4336 			       node->sched->row[row][0], gcd);
4337 		isl_int_mul(node->sched->row[row][0],
4338 			    node->sched->row[row][0], gcd);
4339 		node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
4340 		if (!node->sched)
4341 			goto error;
4342 	}
4343 
4344 	isl_int_clear(gcd);
4345 
4346 	return compute_next_band(node, graph, 0);
4347 error:
4348 	isl_int_clear(gcd);
4349 	return isl_schedule_node_free(node);
4350 }
4351 
4352 /* Is the schedule row "sol" trivial on node "node"?
4353  * That is, is the solution zero on the dimensions linearly independent of
4354  * the previously found solutions?
4355  * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
4356  *
4357  * Each coefficient is represented as the difference between
4358  * two non-negative values in "sol".
4359  * We construct the schedule row s and check if it is linearly
4360  * independent of previously computed schedule rows
4361  * by computing T s, with T the linear combinations that are zero
4362  * on linearly dependent schedule rows.
4363  * If the result consists of all zeros, then the solution is trivial.
4364  */
is_trivial(struct isl_sched_node * node,__isl_keep isl_vec * sol)4365 static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
4366 {
4367 	int trivial;
4368 	isl_vec *node_sol;
4369 
4370 	if (!sol)
4371 		return -1;
4372 	if (node->nvar == node->rank)
4373 		return 0;
4374 
4375 	node_sol = extract_var_coef(node, sol);
4376 	node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
4377 	if (!node_sol)
4378 		return -1;
4379 
4380 	trivial = isl_seq_first_non_zero(node_sol->el,
4381 					node->nvar - node->rank) == -1;
4382 
4383 	isl_vec_free(node_sol);
4384 
4385 	return trivial;
4386 }
4387 
4388 /* Is the schedule row "sol" trivial on any node where it should
4389  * not be trivial?
4390  * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
4391  */
is_any_trivial(struct isl_sched_graph * graph,__isl_keep isl_vec * sol)4392 static int is_any_trivial(struct isl_sched_graph *graph,
4393 	__isl_keep isl_vec *sol)
4394 {
4395 	int i;
4396 
4397 	for (i = 0; i < graph->n; ++i) {
4398 		struct isl_sched_node *node = &graph->node[i];
4399 		int trivial;
4400 
4401 		if (!needs_row(graph, node))
4402 			continue;
4403 		trivial = is_trivial(node, sol);
4404 		if (trivial < 0 || trivial)
4405 			return trivial;
4406 	}
4407 
4408 	return 0;
4409 }
4410 
4411 /* Does the schedule represented by "sol" perform loop coalescing on "node"?
4412  * If so, return the position of the coalesced dimension.
4413  * Otherwise, return node->nvar or -1 on error.
4414  *
4415  * In particular, look for pairs of coefficients c_i and c_j such that
4416  * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
4417  * If any such pair is found, then return i.
4418  * If size_i is infinity, then no check on c_i needs to be performed.
4419  */
find_node_coalescing(struct isl_sched_node * node,__isl_keep isl_vec * sol)4420 static int find_node_coalescing(struct isl_sched_node *node,
4421 	__isl_keep isl_vec *sol)
4422 {
4423 	int i, j;
4424 	isl_int max;
4425 	isl_vec *csol;
4426 
4427 	if (node->nvar <= 1)
4428 		return node->nvar;
4429 
4430 	csol = extract_var_coef(node, sol);
4431 	if (!csol)
4432 		return -1;
4433 	isl_int_init(max);
4434 	for (i = 0; i < node->nvar; ++i) {
4435 		isl_val *v;
4436 
4437 		if (isl_int_is_zero(csol->el[i]))
4438 			continue;
4439 		v = isl_multi_val_get_val(node->sizes, i);
4440 		if (!v)
4441 			goto error;
4442 		if (!isl_val_is_int(v)) {
4443 			isl_val_free(v);
4444 			continue;
4445 		}
4446 		v = isl_val_div_ui(v, 2);
4447 		v = isl_val_ceil(v);
4448 		if (!v)
4449 			goto error;
4450 		isl_int_mul(max, v->n, csol->el[i]);
4451 		isl_val_free(v);
4452 
4453 		for (j = 0; j < node->nvar; ++j) {
4454 			if (j == i)
4455 				continue;
4456 			if (isl_int_abs_gt(csol->el[j], max))
4457 				break;
4458 		}
4459 		if (j < node->nvar)
4460 			break;
4461 	}
4462 
4463 	isl_int_clear(max);
4464 	isl_vec_free(csol);
4465 	return i;
4466 error:
4467 	isl_int_clear(max);
4468 	isl_vec_free(csol);
4469 	return -1;
4470 }
4471 
4472 /* Force the schedule coefficient at position "pos" of "node" to be zero
4473  * in "tl".
4474  * The coefficient is encoded as the difference between two non-negative
4475  * variables.  Force these two variables to have the same value.
4476  */
zero_out_node_coef(__isl_take isl_tab_lexmin * tl,struct isl_sched_node * node,int pos)4477 static __isl_give isl_tab_lexmin *zero_out_node_coef(
4478 	__isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
4479 {
4480 	int dim;
4481 	isl_ctx *ctx;
4482 	isl_vec *eq;
4483 
4484 	ctx = isl_space_get_ctx(node->space);
4485 	dim = isl_tab_lexmin_dim(tl);
4486 	if (dim < 0)
4487 		return isl_tab_lexmin_free(tl);
4488 	eq = isl_vec_alloc(ctx, 1 + dim);
4489 	eq = isl_vec_clr(eq);
4490 	if (!eq)
4491 		return isl_tab_lexmin_free(tl);
4492 
4493 	pos = 1 + node_var_coef_pos(node, pos);
4494 	isl_int_set_si(eq->el[pos], 1);
4495 	isl_int_set_si(eq->el[pos + 1], -1);
4496 	tl = isl_tab_lexmin_add_eq(tl, eq->el);
4497 	isl_vec_free(eq);
4498 
4499 	return tl;
4500 }
4501 
4502 /* Return the lexicographically smallest rational point in the basic set
4503  * from which "tl" was constructed, double checking that this input set
4504  * was not empty.
4505  */
non_empty_solution(__isl_keep isl_tab_lexmin * tl)4506 static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
4507 {
4508 	isl_vec *sol;
4509 
4510 	sol = isl_tab_lexmin_get_solution(tl);
4511 	if (!sol)
4512 		return NULL;
4513 	if (sol->size == 0)
4514 		isl_die(isl_vec_get_ctx(sol), isl_error_internal,
4515 			"error in schedule construction",
4516 			return isl_vec_free(sol));
4517 	return sol;
4518 }
4519 
4520 /* Does the solution "sol" of the LP problem constructed by setup_carry_lp
4521  * carry any of the "n_edge" groups of dependences?
4522  * The value in the first position is the sum of (1 - e_i) over all "n_edge"
4523  * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
4524  * by the edge are carried by the solution.
4525  * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
4526  * one of those is carried.
4527  *
4528  * Note that despite the fact that the problem is solved using a rational
4529  * solver, the solution is guaranteed to be integral.
4530  * Specifically, the dependence distance lower bounds e_i (and therefore
4531  * also their sum) are integers.  See Lemma 5 of [1].
4532  *
4533  * Any potential denominator of the sum is cleared by this function.
4534  * The denominator is not relevant for any of the other elements
4535  * in the solution.
4536  *
4537  * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
4538  *     Problem, Part II: Multi-Dimensional Time.
4539  *     In Intl. Journal of Parallel Programming, 1992.
4540  */
carries_dependences(__isl_keep isl_vec * sol,int n_edge)4541 static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
4542 {
4543 	isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
4544 	isl_int_set_si(sol->el[0], 1);
4545 	return isl_int_cmp_si(sol->el[1], n_edge) < 0;
4546 }
4547 
4548 /* Return the lexicographically smallest rational point in "lp",
4549  * assuming that all variables are non-negative and performing some
4550  * additional sanity checks.
4551  * If "want_integral" is set, then compute the lexicographically smallest
4552  * integer point instead.
4553  * In particular, "lp" should not be empty by construction.
4554  * Double check that this is the case.
4555  * If dependences are not carried for any of the "n_edge" edges,
4556  * then return an empty vector.
4557  *
4558  * If the schedule_treat_coalescing option is set and
4559  * if the computed schedule performs loop coalescing on a given node,
4560  * i.e., if it is of the form
4561  *
4562  *	c_i i + c_j j + ...
4563  *
4564  * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
4565  * to cut out this solution.  Repeat this process until no more loop
4566  * coalescing occurs or until no more dependences can be carried.
4567  * In the latter case, revert to the previously computed solution.
4568  *
4569  * If the caller requests an integral solution and if coalescing should
4570  * be treated, then perform the coalescing treatment first as
4571  * an integral solution computed before coalescing treatment
4572  * would carry the same number of edges and would therefore probably
4573  * also be coalescing.
4574  *
4575  * To allow the coalescing treatment to be performed first,
4576  * the initial solution is allowed to be rational and it is only
4577  * cut out (if needed) in the next iteration, if no coalescing measures
4578  * were taken.
4579  */
non_neg_lexmin(struct isl_sched_graph * graph,__isl_take isl_basic_set * lp,int n_edge,int want_integral)4580 static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
4581 	__isl_take isl_basic_set *lp, int n_edge, int want_integral)
4582 {
4583 	int i, pos, cut;
4584 	isl_ctx *ctx;
4585 	isl_tab_lexmin *tl;
4586 	isl_vec *sol = NULL, *prev;
4587 	int treat_coalescing;
4588 	int try_again;
4589 
4590 	if (!lp)
4591 		return NULL;
4592 	ctx = isl_basic_set_get_ctx(lp);
4593 	treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
4594 	tl = isl_tab_lexmin_from_basic_set(lp);
4595 
4596 	cut = 0;
4597 	do {
4598 		int integral;
4599 
4600 		try_again = 0;
4601 		if (cut)
4602 			tl = isl_tab_lexmin_cut_to_integer(tl);
4603 		prev = sol;
4604 		sol = non_empty_solution(tl);
4605 		if (!sol)
4606 			goto error;
4607 
4608 		integral = isl_int_is_one(sol->el[0]);
4609 		if (!carries_dependences(sol, n_edge)) {
4610 			if (!prev)
4611 				prev = isl_vec_alloc(ctx, 0);
4612 			isl_vec_free(sol);
4613 			sol = prev;
4614 			break;
4615 		}
4616 		prev = isl_vec_free(prev);
4617 		cut = want_integral && !integral;
4618 		if (cut)
4619 			try_again = 1;
4620 		if (!treat_coalescing)
4621 			continue;
4622 		for (i = 0; i < graph->n; ++i) {
4623 			struct isl_sched_node *node = &graph->node[i];
4624 
4625 			pos = find_node_coalescing(node, sol);
4626 			if (pos < 0)
4627 				goto error;
4628 			if (pos < node->nvar)
4629 				break;
4630 		}
4631 		if (i < graph->n) {
4632 			try_again = 1;
4633 			tl = zero_out_node_coef(tl, &graph->node[i], pos);
4634 			cut = 0;
4635 		}
4636 	} while (try_again);
4637 
4638 	isl_tab_lexmin_free(tl);
4639 
4640 	return sol;
4641 error:
4642 	isl_tab_lexmin_free(tl);
4643 	isl_vec_free(prev);
4644 	isl_vec_free(sol);
4645 	return NULL;
4646 }
4647 
4648 /* If "edge" is an edge from a node to itself, then add the corresponding
4649  * dependence relation to "umap".
4650  * If "node" has been compressed, then the dependence relation
4651  * is also compressed first.
4652  */
add_intra(__isl_take isl_union_map * umap,struct isl_sched_edge * edge)4653 static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
4654 	struct isl_sched_edge *edge)
4655 {
4656 	isl_map *map;
4657 	struct isl_sched_node *node = edge->src;
4658 
4659 	if (edge->src != edge->dst)
4660 		return umap;
4661 
4662 	map = isl_map_copy(edge->map);
4663 	map = compress(map, node, node);
4664 	umap = isl_union_map_add_map(umap, map);
4665 	return umap;
4666 }
4667 
4668 /* If "edge" is an edge from a node to another node, then add the corresponding
4669  * dependence relation to "umap".
4670  * If the source or destination nodes of "edge" have been compressed,
4671  * then the dependence relation is also compressed first.
4672  */
add_inter(__isl_take isl_union_map * umap,struct isl_sched_edge * edge)4673 static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
4674 	struct isl_sched_edge *edge)
4675 {
4676 	isl_map *map;
4677 
4678 	if (edge->src == edge->dst)
4679 		return umap;
4680 
4681 	map = isl_map_copy(edge->map);
4682 	map = compress(map, edge->src, edge->dst);
4683 	umap = isl_union_map_add_map(umap, map);
4684 	return umap;
4685 }
4686 
4687 /* Internal data structure used by union_drop_coalescing_constraints
4688  * to collect bounds on all relevant statements.
4689  *
4690  * "graph" is the schedule constraint graph for which an LP problem
4691  * is being constructed.
4692  * "bounds" collects the bounds.
4693  */
4694 struct isl_collect_bounds_data {
4695 	isl_ctx *ctx;
4696 	struct isl_sched_graph *graph;
4697 	isl_union_set *bounds;
4698 };
4699 
4700 /* Add the size bounds for the node with instance deltas in "set"
4701  * to data->bounds.
4702  */
collect_bounds(__isl_take isl_set * set,void * user)4703 static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
4704 {
4705 	struct isl_collect_bounds_data *data = user;
4706 	struct isl_sched_node *node;
4707 	isl_space *space;
4708 	isl_set *bounds;
4709 
4710 	space = isl_set_get_space(set);
4711 	isl_set_free(set);
4712 
4713 	node = graph_find_compressed_node(data->ctx, data->graph, space);
4714 	isl_space_free(space);
4715 
4716 	bounds = isl_set_from_basic_set(get_size_bounds(node));
4717 	data->bounds = isl_union_set_add_set(data->bounds, bounds);
4718 
4719 	return isl_stat_ok;
4720 }
4721 
4722 /* Drop some constraints from "delta" that could be exploited
4723  * to construct loop coalescing schedules.
4724  * In particular, drop those constraint that bound the difference
4725  * to the size of the domain.
4726  * Do this for each set/node in "delta" separately.
4727  * The parameters are assumed to have been projected out by the caller.
4728  */
union_drop_coalescing_constraints(isl_ctx * ctx,struct isl_sched_graph * graph,__isl_take isl_union_set * delta)4729 static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
4730 	struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
4731 {
4732 	struct isl_collect_bounds_data data = { ctx, graph };
4733 
4734 	data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
4735 	if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
4736 		data.bounds = isl_union_set_free(data.bounds);
4737 	delta = isl_union_set_plain_gist(delta, data.bounds);
4738 
4739 	return delta;
4740 }
4741 
4742 /* Given a non-trivial lineality space "lineality", add the corresponding
4743  * universe set to data->mask and add a map from elements to
4744  * other elements along the lines in "lineality" to data->equivalent.
4745  * If this is the first time this function gets called
4746  * (data->any_non_trivial is still false), then set data->any_non_trivial and
4747  * initialize data->mask and data->equivalent.
4748  *
4749  * In particular, if the lineality space is defined by equality constraints
4750  *
4751  *	E x = 0
4752  *
4753  * then construct an affine mapping
4754  *
4755  *	f : x -> E x
4756  *
4757  * and compute the equivalence relation of having the same image under f:
4758  *
4759  *	{ x -> x' : E x = E x' }
4760  */
add_non_trivial_lineality(__isl_take isl_basic_set * lineality,struct isl_exploit_lineality_data * data)4761 static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
4762 	struct isl_exploit_lineality_data *data)
4763 {
4764 	isl_mat *eq;
4765 	isl_space *space;
4766 	isl_set *univ;
4767 	isl_multi_aff *ma;
4768 	isl_multi_pw_aff *mpa;
4769 	isl_map *map;
4770 	isl_size n;
4771 
4772 	if (isl_basic_set_check_no_locals(lineality) < 0)
4773 		goto error;
4774 
4775 	space = isl_basic_set_get_space(lineality);
4776 	if (!data->any_non_trivial) {
4777 		data->equivalent = isl_union_map_empty(isl_space_copy(space));
4778 		data->mask = isl_union_set_empty(isl_space_copy(space));
4779 	}
4780 	data->any_non_trivial = isl_bool_true;
4781 
4782 	univ = isl_set_universe(isl_space_copy(space));
4783 	data->mask = isl_union_set_add_set(data->mask, univ);
4784 
4785 	eq = isl_basic_set_extract_equalities(lineality);
4786 	n = isl_mat_rows(eq);
4787 	if (n < 0)
4788 		space = isl_space_free(space);
4789 	eq = isl_mat_insert_zero_rows(eq, 0, 1);
4790 	eq = isl_mat_set_element_si(eq, 0, 0, 1);
4791 	space = isl_space_from_domain(space);
4792 	space = isl_space_add_dims(space, isl_dim_out, n);
4793 	ma = isl_multi_aff_from_aff_mat(space, eq);
4794 	mpa = isl_multi_pw_aff_from_multi_aff(ma);
4795 	map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
4796 	data->equivalent = isl_union_map_add_map(data->equivalent, map);
4797 
4798 	isl_basic_set_free(lineality);
4799 	return isl_stat_ok;
4800 error:
4801 	isl_basic_set_free(lineality);
4802 	return isl_stat_error;
4803 }
4804 
4805 /* Check if the lineality space "set" is non-trivial (i.e., is not just
4806  * the origin or, in other words, satisfies a number of equality constraints
4807  * that is smaller than the dimension of the set).
4808  * If so, extend data->mask and data->equivalent accordingly.
4809  *
4810  * The input should not have any local variables already, but
4811  * isl_set_remove_divs is called to make sure it does not.
4812  */
add_lineality(__isl_take isl_set * set,void * user)4813 static isl_stat add_lineality(__isl_take isl_set *set, void *user)
4814 {
4815 	struct isl_exploit_lineality_data *data = user;
4816 	isl_basic_set *hull;
4817 	isl_size dim;
4818 	isl_size n_eq;
4819 
4820 	set = isl_set_remove_divs(set);
4821 	hull = isl_set_unshifted_simple_hull(set);
4822 	dim = isl_basic_set_dim(hull, isl_dim_set);
4823 	n_eq = isl_basic_set_n_equality(hull);
4824 	if (dim < 0 || n_eq < 0)
4825 		goto error;
4826 	if (dim != n_eq)
4827 		return add_non_trivial_lineality(hull, data);
4828 	isl_basic_set_free(hull);
4829 	return isl_stat_ok;
4830 error:
4831 	isl_basic_set_free(hull);
4832 	return isl_stat_error;
4833 }
4834 
4835 /* Check if the difference set on intra-node schedule constraints "intra"
4836  * has any non-trivial lineality space.
4837  * If so, then extend the difference set to a difference set
4838  * on equivalent elements.  That is, if "intra" is
4839  *
4840  *	{ y - x : (x,y) \in V }
4841  *
4842  * and elements are equivalent if they have the same image under f,
4843  * then return
4844  *
4845  *	{ y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4846  *
4847  * or, since f is linear,
4848  *
4849  *	{ y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
4850  *
4851  * The results of the search for non-trivial lineality spaces is stored
4852  * in "data".
4853  */
exploit_intra_lineality(__isl_take isl_union_set * intra,struct isl_exploit_lineality_data * data)4854 static __isl_give isl_union_set *exploit_intra_lineality(
4855 	__isl_take isl_union_set *intra,
4856 	struct isl_exploit_lineality_data *data)
4857 {
4858 	isl_union_set *lineality;
4859 	isl_union_set *uset;
4860 
4861 	data->any_non_trivial = isl_bool_false;
4862 	lineality = isl_union_set_copy(intra);
4863 	lineality = isl_union_set_combined_lineality_space(lineality);
4864 	if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
4865 		data->any_non_trivial = isl_bool_error;
4866 	isl_union_set_free(lineality);
4867 
4868 	if (data->any_non_trivial < 0)
4869 		return isl_union_set_free(intra);
4870 	if (!data->any_non_trivial)
4871 		return intra;
4872 
4873 	uset = isl_union_set_copy(intra);
4874 	intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
4875 	uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
4876 	intra = isl_union_set_union(intra, uset);
4877 
4878 	intra = isl_union_set_remove_divs(intra);
4879 
4880 	return intra;
4881 }
4882 
4883 /* If the difference set on intra-node schedule constraints was found to have
4884  * any non-trivial lineality space by exploit_intra_lineality,
4885  * as recorded in "data", then extend the inter-node
4886  * schedule constraints "inter" to schedule constraints on equivalent elements.
4887  * That is, if "inter" is V and
4888  * elements are equivalent if they have the same image under f, then return
4889  *
4890  *	{ (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
4891  */
exploit_inter_lineality(__isl_take isl_union_map * inter,struct isl_exploit_lineality_data * data)4892 static __isl_give isl_union_map *exploit_inter_lineality(
4893 	__isl_take isl_union_map *inter,
4894 	struct isl_exploit_lineality_data *data)
4895 {
4896 	isl_union_map *umap;
4897 
4898 	if (data->any_non_trivial < 0)
4899 		return isl_union_map_free(inter);
4900 	if (!data->any_non_trivial)
4901 		return inter;
4902 
4903 	umap = isl_union_map_copy(inter);
4904 	inter = isl_union_map_subtract_range(inter,
4905 				isl_union_set_copy(data->mask));
4906 	umap = isl_union_map_apply_range(umap,
4907 				isl_union_map_copy(data->equivalent));
4908 	inter = isl_union_map_union(inter, umap);
4909 	umap = isl_union_map_copy(inter);
4910 	inter = isl_union_map_subtract_domain(inter,
4911 				isl_union_set_copy(data->mask));
4912 	umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
4913 				umap);
4914 	inter = isl_union_map_union(inter, umap);
4915 
4916 	inter = isl_union_map_remove_divs(inter);
4917 
4918 	return inter;
4919 }
4920 
4921 /* For each (conditional) validity edge in "graph",
4922  * add the corresponding dependence relation using "add"
4923  * to a collection of dependence relations and return the result.
4924  * If "coincidence" is set, then coincidence edges are considered as well.
4925  */
collect_validity(struct isl_sched_graph * graph,__isl_give isl_union_map * (* add)(__isl_take isl_union_map * umap,struct isl_sched_edge * edge),int coincidence)4926 static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
4927 	__isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
4928 		struct isl_sched_edge *edge), int coincidence)
4929 {
4930 	int i;
4931 	isl_space *space;
4932 	isl_union_map *umap;
4933 
4934 	space = isl_space_copy(graph->node[0].space);
4935 	umap = isl_union_map_empty(space);
4936 
4937 	for (i = 0; i < graph->n_edge; ++i) {
4938 		struct isl_sched_edge *edge = &graph->edge[i];
4939 
4940 		if (!is_any_validity(edge) &&
4941 		    (!coincidence || !is_coincidence(edge)))
4942 			continue;
4943 
4944 		umap = add(umap, edge);
4945 	}
4946 
4947 	return umap;
4948 }
4949 
4950 /* For each dependence relation on a (conditional) validity edge
4951  * from a node to itself,
4952  * construct the set of coefficients of valid constraints for elements
4953  * in that dependence relation and collect the results.
4954  * If "coincidence" is set, then coincidence edges are considered as well.
4955  *
4956  * In particular, for each dependence relation R, constraints
4957  * on coefficients (c_0, c_x) are constructed such that
4958  *
4959  *	c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
4960  *
4961  * If the schedule_treat_coalescing option is set, then some constraints
4962  * that could be exploited to construct coalescing schedules
4963  * are removed before the dual is computed, but after the parameters
4964  * have been projected out.
4965  * The entire computation is essentially the same as that performed
4966  * by intra_coefficients, except that it operates on multiple
4967  * edges together and that the parameters are always projected out.
4968  *
4969  * Additionally, exploit any non-trivial lineality space
4970  * in the difference set after removing coalescing constraints and
4971  * store the results of the non-trivial lineality space detection in "data".
4972  * The procedure is currently run unconditionally, but it is unlikely
4973  * to find any non-trivial lineality spaces if no coalescing constraints
4974  * have been removed.
4975  *
4976  * Note that if a dependence relation is a union of basic maps,
4977  * then each basic map needs to be treated individually as it may only
4978  * be possible to carry the dependences expressed by some of those
4979  * basic maps and not all of them.
4980  * The collected validity constraints are therefore not coalesced and
4981  * it is assumed that they are not coalesced automatically.
4982  * Duplicate basic maps can be removed, however.
4983  * In particular, if the same basic map appears as a disjunct
4984  * in multiple edges, then it only needs to be carried once.
4985  */
collect_intra_validity(isl_ctx * ctx,struct isl_sched_graph * graph,int coincidence,struct isl_exploit_lineality_data * data)4986 static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
4987 	struct isl_sched_graph *graph, int coincidence,
4988 	struct isl_exploit_lineality_data *data)
4989 {
4990 	isl_union_map *intra;
4991 	isl_union_set *delta;
4992 	isl_basic_set_list *list;
4993 
4994 	intra = collect_validity(graph, &add_intra, coincidence);
4995 	delta = isl_union_map_deltas(intra);
4996 	delta = isl_union_set_project_out_all_params(delta);
4997 	delta = isl_union_set_remove_divs(delta);
4998 	if (isl_options_get_schedule_treat_coalescing(ctx))
4999 		delta = union_drop_coalescing_constraints(ctx, graph, delta);
5000 	delta = exploit_intra_lineality(delta, data);
5001 	list = isl_union_set_get_basic_set_list(delta);
5002 	isl_union_set_free(delta);
5003 
5004 	return isl_basic_set_list_coefficients(list);
5005 }
5006 
5007 /* For each dependence relation on a (conditional) validity edge
5008  * from a node to some other node,
5009  * construct the set of coefficients of valid constraints for elements
5010  * in that dependence relation and collect the results.
5011  * If "coincidence" is set, then coincidence edges are considered as well.
5012  *
5013  * In particular, for each dependence relation R, constraints
5014  * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
5015  *
5016  *	c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
5017  *
5018  * This computation is essentially the same as that performed
5019  * by inter_coefficients, except that it operates on multiple
5020  * edges together.
5021  *
5022  * Additionally, exploit any non-trivial lineality space
5023  * that may have been discovered by collect_intra_validity
5024  * (as stored in "data").
5025  *
5026  * Note that if a dependence relation is a union of basic maps,
5027  * then each basic map needs to be treated individually as it may only
5028  * be possible to carry the dependences expressed by some of those
5029  * basic maps and not all of them.
5030  * The collected validity constraints are therefore not coalesced and
5031  * it is assumed that they are not coalesced automatically.
5032  * Duplicate basic maps can be removed, however.
5033  * In particular, if the same basic map appears as a disjunct
5034  * in multiple edges, then it only needs to be carried once.
5035  */
collect_inter_validity(struct isl_sched_graph * graph,int coincidence,struct isl_exploit_lineality_data * data)5036 static __isl_give isl_basic_set_list *collect_inter_validity(
5037 	struct isl_sched_graph *graph, int coincidence,
5038 	struct isl_exploit_lineality_data *data)
5039 {
5040 	isl_union_map *inter;
5041 	isl_union_set *wrap;
5042 	isl_basic_set_list *list;
5043 
5044 	inter = collect_validity(graph, &add_inter, coincidence);
5045 	inter = exploit_inter_lineality(inter, data);
5046 	inter = isl_union_map_remove_divs(inter);
5047 	wrap = isl_union_map_wrap(inter);
5048 	list = isl_union_set_get_basic_set_list(wrap);
5049 	isl_union_set_free(wrap);
5050 	return isl_basic_set_list_coefficients(list);
5051 }
5052 
5053 /* Construct an LP problem for finding schedule coefficients
5054  * such that the schedule carries as many of the "n_edge" groups of
5055  * dependences as possible based on the corresponding coefficient
5056  * constraints and return the lexicographically smallest non-trivial solution.
5057  * "intra" is the sequence of coefficient constraints for intra-node edges.
5058  * "inter" is the sequence of coefficient constraints for inter-node edges.
5059  * If "want_integral" is set, then compute an integral solution
5060  * for the coefficients rather than using the numerators
5061  * of a rational solution.
5062  * "carry_inter" indicates whether inter-node edges should be carried or
5063  * only respected.
5064  *
5065  * If none of the "n_edge" groups can be carried
5066  * then return an empty vector.
5067  */
compute_carrying_sol_coef(isl_ctx * ctx,struct isl_sched_graph * graph,int n_edge,__isl_keep isl_basic_set_list * intra,__isl_keep isl_basic_set_list * inter,int want_integral,int carry_inter)5068 static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
5069 	struct isl_sched_graph *graph, int n_edge,
5070 	__isl_keep isl_basic_set_list *intra,
5071 	__isl_keep isl_basic_set_list *inter, int want_integral,
5072 	int carry_inter)
5073 {
5074 	isl_basic_set *lp;
5075 
5076 	if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
5077 		return NULL;
5078 
5079 	lp = isl_basic_set_copy(graph->lp);
5080 	return non_neg_lexmin(graph, lp, n_edge, want_integral);
5081 }
5082 
5083 /* Construct an LP problem for finding schedule coefficients
5084  * such that the schedule carries as many of the validity dependences
5085  * as possible and
5086  * return the lexicographically smallest non-trivial solution.
5087  * If "fallback" is set, then the carrying is performed as a fallback
5088  * for the Pluto-like scheduler.
5089  * If "coincidence" is set, then try and carry coincidence edges as well.
5090  *
5091  * The variable "n_edge" stores the number of groups that should be carried.
5092  * If none of the "n_edge" groups can be carried
5093  * then return an empty vector.
5094  * If, moreover, "n_edge" is zero, then the LP problem does not even
5095  * need to be constructed.
5096  *
5097  * If a fallback solution is being computed, then compute an integral solution
5098  * for the coefficients rather than using the numerators
5099  * of a rational solution.
5100  *
5101  * If a fallback solution is being computed, if there are any intra-node
5102  * dependences, and if requested by the user, then first try
5103  * to only carry those intra-node dependences.
5104  * If this fails to carry any dependences, then try again
5105  * with the inter-node dependences included.
5106  */
compute_carrying_sol(isl_ctx * ctx,struct isl_sched_graph * graph,int fallback,int coincidence)5107 static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
5108 	struct isl_sched_graph *graph, int fallback, int coincidence)
5109 {
5110 	isl_size n_intra, n_inter;
5111 	int n_edge;
5112 	struct isl_carry carry = { 0 };
5113 	isl_vec *sol;
5114 
5115 	carry.intra = collect_intra_validity(ctx, graph, coincidence,
5116 						&carry.lineality);
5117 	carry.inter = collect_inter_validity(graph, coincidence,
5118 						&carry.lineality);
5119 	n_intra = isl_basic_set_list_n_basic_set(carry.intra);
5120 	n_inter = isl_basic_set_list_n_basic_set(carry.inter);
5121 	if (n_intra < 0 || n_inter < 0)
5122 		goto error;
5123 
5124 	if (fallback && n_intra > 0 &&
5125 	    isl_options_get_schedule_carry_self_first(ctx)) {
5126 		sol = compute_carrying_sol_coef(ctx, graph, n_intra,
5127 				carry.intra, carry.inter, fallback, 0);
5128 		if (!sol || sol->size != 0 || n_inter == 0) {
5129 			isl_carry_clear(&carry);
5130 			return sol;
5131 		}
5132 		isl_vec_free(sol);
5133 	}
5134 
5135 	n_edge = n_intra + n_inter;
5136 	if (n_edge == 0) {
5137 		isl_carry_clear(&carry);
5138 		return isl_vec_alloc(ctx, 0);
5139 	}
5140 
5141 	sol = compute_carrying_sol_coef(ctx, graph, n_edge,
5142 				carry.intra, carry.inter, fallback, 1);
5143 	isl_carry_clear(&carry);
5144 	return sol;
5145 error:
5146 	isl_carry_clear(&carry);
5147 	return NULL;
5148 }
5149 
5150 /* Construct a schedule row for each node such that as many validity dependences
5151  * as possible are carried and then continue with the next band.
5152  * If "fallback" is set, then the carrying is performed as a fallback
5153  * for the Pluto-like scheduler.
5154  * If "coincidence" is set, then try and carry coincidence edges as well.
5155  *
5156  * If there are no validity dependences, then no dependence can be carried and
5157  * the procedure is guaranteed to fail.  If there is more than one component,
5158  * then try computing a schedule on each component separately
5159  * to prevent or at least postpone this failure.
5160  *
5161  * If a schedule row is computed, then check that dependences are carried
5162  * for at least one of the edges.
5163  *
5164  * If the computed schedule row turns out to be trivial on one or
5165  * more nodes where it should not be trivial, then we throw it away
5166  * and try again on each component separately.
5167  *
5168  * If there is only one component, then we accept the schedule row anyway,
5169  * but we do not consider it as a complete row and therefore do not
5170  * increment graph->n_row.  Note that the ranks of the nodes that
5171  * do get a non-trivial schedule part will get updated regardless and
5172  * graph->maxvar is computed based on these ranks.  The test for
5173  * whether more schedule rows are required in compute_schedule_wcc
5174  * is therefore not affected.
5175  *
5176  * Insert a band corresponding to the schedule row at position "node"
5177  * of the schedule tree and continue with the construction of the schedule.
5178  * This insertion and the continued construction is performed by split_scaled
5179  * after optionally checking for non-trivial common divisors.
5180  */
carry(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int fallback,int coincidence)5181 static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
5182 	struct isl_sched_graph *graph, int fallback, int coincidence)
5183 {
5184 	int trivial;
5185 	isl_ctx *ctx;
5186 	isl_vec *sol;
5187 
5188 	if (!node)
5189 		return NULL;
5190 
5191 	ctx = isl_schedule_node_get_ctx(node);
5192 	sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
5193 	if (!sol)
5194 		return isl_schedule_node_free(node);
5195 	if (sol->size == 0) {
5196 		isl_vec_free(sol);
5197 		if (graph->scc > 1)
5198 			return compute_component_schedule(node, graph, 1);
5199 		isl_die(ctx, isl_error_unknown, "unable to carry dependences",
5200 			return isl_schedule_node_free(node));
5201 	}
5202 
5203 	trivial = is_any_trivial(graph, sol);
5204 	if (trivial < 0) {
5205 		sol = isl_vec_free(sol);
5206 	} else if (trivial && graph->scc > 1) {
5207 		isl_vec_free(sol);
5208 		return compute_component_schedule(node, graph, 1);
5209 	}
5210 
5211 	if (update_schedule(graph, sol, 0) < 0)
5212 		return isl_schedule_node_free(node);
5213 	if (trivial)
5214 		graph->n_row--;
5215 
5216 	return split_scaled(node, graph);
5217 }
5218 
5219 /* Construct a schedule row for each node such that as many validity dependences
5220  * as possible are carried and then continue with the next band.
5221  * Do so as a fallback for the Pluto-like scheduler.
5222  * If "coincidence" is set, then try and carry coincidence edges as well.
5223  */
carry_fallback(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int coincidence)5224 static __isl_give isl_schedule_node *carry_fallback(
5225 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5226 	int coincidence)
5227 {
5228 	return carry(node, graph, 1, coincidence);
5229 }
5230 
5231 /* Construct a schedule row for each node such that as many validity dependences
5232  * as possible are carried and then continue with the next band.
5233  * Do so for the case where the Feautrier scheduler was selected
5234  * by the user.
5235  */
carry_feautrier(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5236 static __isl_give isl_schedule_node *carry_feautrier(
5237 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5238 {
5239 	return carry(node, graph, 0, 0);
5240 }
5241 
5242 /* Construct a schedule row for each node such that as many validity dependences
5243  * as possible are carried and then continue with the next band.
5244  * Do so as a fallback for the Pluto-like scheduler.
5245  */
carry_dependences(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5246 static __isl_give isl_schedule_node *carry_dependences(
5247 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5248 {
5249 	return carry_fallback(node, graph, 0);
5250 }
5251 
5252 /* Construct a schedule row for each node such that as many validity or
5253  * coincidence dependences as possible are carried and
5254  * then continue with the next band.
5255  * Do so as a fallback for the Pluto-like scheduler.
5256  */
carry_coincidence(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5257 static __isl_give isl_schedule_node *carry_coincidence(
5258 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5259 {
5260 	return carry_fallback(node, graph, 1);
5261 }
5262 
5263 /* Topologically sort statements mapped to the same schedule iteration
5264  * and add insert a sequence node in front of "node"
5265  * corresponding to this order.
5266  * If "initialized" is set, then it may be assumed that
5267  * isl_sched_graph_compute_maxvar
5268  * has been called on the current band.  Otherwise, call
5269  * isl_sched_graph_compute_maxvar if and before carry_dependences gets called.
5270  *
5271  * If it turns out to be impossible to sort the statements apart,
5272  * because different dependences impose different orderings
5273  * on the statements, then we extend the schedule such that
5274  * it carries at least one more dependence.
5275  */
sort_statements(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int initialized)5276 static __isl_give isl_schedule_node *sort_statements(
5277 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5278 	int initialized)
5279 {
5280 	isl_ctx *ctx;
5281 	isl_union_set_list *filters;
5282 
5283 	if (!node)
5284 		return NULL;
5285 
5286 	ctx = isl_schedule_node_get_ctx(node);
5287 	if (graph->n < 1)
5288 		isl_die(ctx, isl_error_internal,
5289 			"graph should have at least one node",
5290 			return isl_schedule_node_free(node));
5291 
5292 	if (graph->n == 1)
5293 		return node;
5294 
5295 	if (update_edges(ctx, graph) < 0)
5296 		return isl_schedule_node_free(node);
5297 
5298 	if (graph->n_edge == 0)
5299 		return node;
5300 
5301 	if (detect_sccs(ctx, graph) < 0)
5302 		return isl_schedule_node_free(node);
5303 
5304 	next_band(graph);
5305 	if (graph->scc < graph->n) {
5306 		if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0)
5307 			return isl_schedule_node_free(node);
5308 		return carry_dependences(node, graph);
5309 	}
5310 
5311 	filters = isl_sched_graph_extract_sccs(ctx, graph);
5312 	node = isl_schedule_node_insert_sequence(node, filters);
5313 
5314 	return node;
5315 }
5316 
5317 /* Are there any (non-empty) (conditional) validity edges in the graph?
5318  */
has_validity_edges(struct isl_sched_graph * graph)5319 static int has_validity_edges(struct isl_sched_graph *graph)
5320 {
5321 	int i;
5322 
5323 	for (i = 0; i < graph->n_edge; ++i) {
5324 		int empty;
5325 
5326 		empty = isl_map_plain_is_empty(graph->edge[i].map);
5327 		if (empty < 0)
5328 			return -1;
5329 		if (empty)
5330 			continue;
5331 		if (is_any_validity(&graph->edge[i]))
5332 			return 1;
5333 	}
5334 
5335 	return 0;
5336 }
5337 
5338 /* Should we apply a Feautrier step?
5339  * That is, did the user request the Feautrier algorithm and are
5340  * there any validity dependences (left)?
5341  */
need_feautrier_step(isl_ctx * ctx,struct isl_sched_graph * graph)5342 static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
5343 {
5344 	if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
5345 		return 0;
5346 
5347 	return has_validity_edges(graph);
5348 }
5349 
5350 /* Compute a schedule for a connected dependence graph using Feautrier's
5351  * multi-dimensional scheduling algorithm and return the updated schedule node.
5352  *
5353  * The original algorithm is described in [1].
5354  * The main idea is to minimize the number of scheduling dimensions, by
5355  * trying to satisfy as many dependences as possible per scheduling dimension.
5356  *
5357  * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
5358  *     Problem, Part II: Multi-Dimensional Time.
5359  *     In Intl. Journal of Parallel Programming, 1992.
5360  */
compute_schedule_wcc_feautrier(isl_schedule_node * node,struct isl_sched_graph * graph)5361 static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
5362 	isl_schedule_node *node, struct isl_sched_graph *graph)
5363 {
5364 	return carry_feautrier(node, graph);
5365 }
5366 
5367 /* Turn off the "local" bit on all (condition) edges.
5368  */
clear_local_edges(struct isl_sched_graph * graph)5369 static void clear_local_edges(struct isl_sched_graph *graph)
5370 {
5371 	int i;
5372 
5373 	for (i = 0; i < graph->n_edge; ++i)
5374 		if (isl_sched_edge_is_condition(&graph->edge[i]))
5375 			clear_local(&graph->edge[i]);
5376 }
5377 
5378 /* Does "graph" have both condition and conditional validity edges?
5379  */
need_condition_check(struct isl_sched_graph * graph)5380 static int need_condition_check(struct isl_sched_graph *graph)
5381 {
5382 	int i;
5383 	int any_condition = 0;
5384 	int any_conditional_validity = 0;
5385 
5386 	for (i = 0; i < graph->n_edge; ++i) {
5387 		if (isl_sched_edge_is_condition(&graph->edge[i]))
5388 			any_condition = 1;
5389 		if (isl_sched_edge_is_conditional_validity(&graph->edge[i]))
5390 			any_conditional_validity = 1;
5391 	}
5392 
5393 	return any_condition && any_conditional_validity;
5394 }
5395 
5396 /* Does "graph" contain any coincidence edge?
5397  */
has_any_coincidence(struct isl_sched_graph * graph)5398 static int has_any_coincidence(struct isl_sched_graph *graph)
5399 {
5400 	int i;
5401 
5402 	for (i = 0; i < graph->n_edge; ++i)
5403 		if (is_coincidence(&graph->edge[i]))
5404 			return 1;
5405 
5406 	return 0;
5407 }
5408 
5409 /* Extract the final schedule row as a map with the iteration domain
5410  * of "node" as domain.
5411  */
final_row(struct isl_sched_node * node)5412 static __isl_give isl_map *final_row(struct isl_sched_node *node)
5413 {
5414 	isl_multi_aff *ma;
5415 	isl_size n_row;
5416 
5417 	n_row = isl_mat_rows(node->sched);
5418 	if (n_row < 0)
5419 		return NULL;
5420 	ma = isl_sched_node_extract_partial_schedule_multi_aff(node,
5421 								n_row - 1, 1);
5422 	return isl_map_from_multi_aff(ma);
5423 }
5424 
5425 /* Is the conditional validity dependence in the edge with index "edge_index"
5426  * violated by the latest (i.e., final) row of the schedule?
5427  * That is, is i scheduled after j
5428  * for any conditional validity dependence i -> j?
5429  */
is_violated(struct isl_sched_graph * graph,int edge_index)5430 static int is_violated(struct isl_sched_graph *graph, int edge_index)
5431 {
5432 	isl_map *src_sched, *dst_sched, *map;
5433 	struct isl_sched_edge *edge = &graph->edge[edge_index];
5434 	int empty;
5435 
5436 	src_sched = final_row(edge->src);
5437 	dst_sched = final_row(edge->dst);
5438 	map = isl_map_copy(edge->map);
5439 	map = isl_map_apply_domain(map, src_sched);
5440 	map = isl_map_apply_range(map, dst_sched);
5441 	map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
5442 	empty = isl_map_is_empty(map);
5443 	isl_map_free(map);
5444 
5445 	if (empty < 0)
5446 		return -1;
5447 
5448 	return !empty;
5449 }
5450 
5451 /* Does "graph" have any satisfied condition edges that
5452  * are adjacent to the conditional validity constraint with
5453  * domain "conditional_source" and range "conditional_sink"?
5454  *
5455  * A satisfied condition is one that is not local.
5456  * If a condition was forced to be local already (i.e., marked as local)
5457  * then there is no need to check if it is in fact local.
5458  *
5459  * Additionally, mark all adjacent condition edges found as local.
5460  */
has_adjacent_true_conditions(struct isl_sched_graph * graph,__isl_keep isl_union_set * conditional_source,__isl_keep isl_union_set * conditional_sink)5461 static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
5462 	__isl_keep isl_union_set *conditional_source,
5463 	__isl_keep isl_union_set *conditional_sink)
5464 {
5465 	int i;
5466 	int any = 0;
5467 
5468 	for (i = 0; i < graph->n_edge; ++i) {
5469 		int adjacent, local;
5470 		isl_union_map *condition;
5471 
5472 		if (!isl_sched_edge_is_condition(&graph->edge[i]))
5473 			continue;
5474 		if (is_local(&graph->edge[i]))
5475 			continue;
5476 
5477 		condition = graph->edge[i].tagged_condition;
5478 		adjacent = domain_intersects(condition, conditional_sink);
5479 		if (adjacent >= 0 && !adjacent)
5480 			adjacent = range_intersects(condition,
5481 							conditional_source);
5482 		if (adjacent < 0)
5483 			return -1;
5484 		if (!adjacent)
5485 			continue;
5486 
5487 		set_local(&graph->edge[i]);
5488 
5489 		local = is_condition_false(&graph->edge[i]);
5490 		if (local < 0)
5491 			return -1;
5492 		if (!local)
5493 			any = 1;
5494 	}
5495 
5496 	return any;
5497 }
5498 
5499 /* Are there any violated conditional validity dependences with
5500  * adjacent condition dependences that are not local with respect
5501  * to the current schedule?
5502  * That is, is the conditional validity constraint violated?
5503  *
5504  * Additionally, mark all those adjacent condition dependences as local.
5505  * We also mark those adjacent condition dependences that were not marked
5506  * as local before, but just happened to be local already.  This ensures
5507  * that they remain local if the schedule is recomputed.
5508  *
5509  * We first collect domain and range of all violated conditional validity
5510  * dependences and then check if there are any adjacent non-local
5511  * condition dependences.
5512  */
has_violated_conditional_constraint(isl_ctx * ctx,struct isl_sched_graph * graph)5513 static int has_violated_conditional_constraint(isl_ctx *ctx,
5514 	struct isl_sched_graph *graph)
5515 {
5516 	int i;
5517 	int any = 0;
5518 	isl_union_set *source, *sink;
5519 
5520 	source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5521 	sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5522 	for (i = 0; i < graph->n_edge; ++i) {
5523 		isl_union_set *uset;
5524 		isl_union_map *umap;
5525 		int violated;
5526 
5527 		if (!isl_sched_edge_is_conditional_validity(&graph->edge[i]))
5528 			continue;
5529 
5530 		violated = is_violated(graph, i);
5531 		if (violated < 0)
5532 			goto error;
5533 		if (!violated)
5534 			continue;
5535 
5536 		any = 1;
5537 
5538 		umap = isl_union_map_copy(graph->edge[i].tagged_validity);
5539 		uset = isl_union_map_domain(umap);
5540 		source = isl_union_set_union(source, uset);
5541 		source = isl_union_set_coalesce(source);
5542 
5543 		umap = isl_union_map_copy(graph->edge[i].tagged_validity);
5544 		uset = isl_union_map_range(umap);
5545 		sink = isl_union_set_union(sink, uset);
5546 		sink = isl_union_set_coalesce(sink);
5547 	}
5548 
5549 	if (any)
5550 		any = has_adjacent_true_conditions(graph, source, sink);
5551 
5552 	isl_union_set_free(source);
5553 	isl_union_set_free(sink);
5554 	return any;
5555 error:
5556 	isl_union_set_free(source);
5557 	isl_union_set_free(sink);
5558 	return -1;
5559 }
5560 
5561 /* Examine the current band (the rows between graph->band_start and
5562  * graph->n_total_row), deciding whether to drop it or add it to "node"
5563  * and then continue with the computation of the next band, if any.
5564  * If "initialized" is set, then it may be assumed that
5565  * isl_sched_graph_compute_maxvar
5566  * has been called on the current band.  Otherwise, call
5567  * isl_sched_graph_compute_maxvar if and before carry_dependences gets called.
5568  *
5569  * The caller keeps looking for a new row as long as
5570  * graph->n_row < graph->maxvar.  If the latest attempt to find
5571  * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
5572  * then we either
5573  * - split between SCCs and start over (assuming we found an interesting
5574  *	pair of SCCs between which to split)
5575  * - continue with the next band (assuming the current band has at least
5576  *	one row)
5577  * - if there is more than one SCC left, then split along all SCCs
5578  * - if outer coincidence needs to be enforced, then try to carry as many
5579  *	validity or coincidence dependences as possible and
5580  *	continue with the next band
5581  * - try to carry as many validity dependences as possible and
5582  *	continue with the next band
5583  * In each case, we first insert a band node in the schedule tree
5584  * if any rows have been computed.
5585  *
5586  * If the caller managed to complete the schedule and the current band
5587  * is empty, then finish off by topologically
5588  * sorting the statements based on the remaining dependences.
5589  * If, on the other hand, the current band has at least one row,
5590  * then continue with the next band.  Note that this next band
5591  * will necessarily be empty, but the graph may still be split up
5592  * into weakly connected components before arriving back here.
5593  */
isl_schedule_node_compute_finish_band(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int initialized)5594 __isl_give isl_schedule_node *isl_schedule_node_compute_finish_band(
5595 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5596 	int initialized)
5597 {
5598 	int empty;
5599 
5600 	if (!node)
5601 		return NULL;
5602 
5603 	empty = graph->n_total_row == graph->band_start;
5604 	if (graph->n_row < graph->maxvar) {
5605 		isl_ctx *ctx;
5606 
5607 		ctx = isl_schedule_node_get_ctx(node);
5608 		if (!ctx->opt->schedule_maximize_band_depth && !empty)
5609 			return compute_next_band(node, graph, 1);
5610 		if (graph->src_scc >= 0)
5611 			return compute_split_schedule(node, graph);
5612 		if (!empty)
5613 			return compute_next_band(node, graph, 1);
5614 		if (graph->scc > 1)
5615 			return compute_component_schedule(node, graph, 1);
5616 		if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0)
5617 			return isl_schedule_node_free(node);
5618 		if (isl_options_get_schedule_outer_coincidence(ctx))
5619 			return carry_coincidence(node, graph);
5620 		return carry_dependences(node, graph);
5621 	}
5622 
5623 	if (!empty)
5624 		return compute_next_band(node, graph, 1);
5625 	return sort_statements(node, graph, initialized);
5626 }
5627 
5628 /* Construct a band of schedule rows for a connected dependence graph.
5629  * The caller is responsible for determining the strongly connected
5630  * components and calling isl_sched_graph_compute_maxvar first.
5631  *
5632  * We try to find a sequence of as many schedule rows as possible that result
5633  * in non-negative dependence distances (independent of the previous rows
5634  * in the sequence, i.e., such that the sequence is tilable), with as
5635  * many of the initial rows as possible satisfying the coincidence constraints.
5636  * The computation stops if we can't find any more rows or if we have found
5637  * all the rows we wanted to find.
5638  *
5639  * If ctx->opt->schedule_outer_coincidence is set, then we force the
5640  * outermost dimension to satisfy the coincidence constraints.  If this
5641  * turns out to be impossible, we fall back on the general scheme above
5642  * and try to carry as many dependences as possible.
5643  *
5644  * If "graph" contains both condition and conditional validity dependences,
5645  * then we need to check that that the conditional schedule constraint
5646  * is satisfied, i.e., there are no violated conditional validity dependences
5647  * that are adjacent to any non-local condition dependences.
5648  * If there are, then we mark all those adjacent condition dependences
5649  * as local and recompute the current band.  Those dependences that
5650  * are marked local will then be forced to be local.
5651  * The initial computation is performed with no dependences marked as local.
5652  * If we are lucky, then there will be no violated conditional validity
5653  * dependences adjacent to any non-local condition dependences.
5654  * Otherwise, we mark some additional condition dependences as local and
5655  * recompute.  We continue this process until there are no violations left or
5656  * until we are no longer able to compute a schedule.
5657  * Since there are only a finite number of dependences,
5658  * there will only be a finite number of iterations.
5659  */
isl_schedule_node_compute_wcc_band(isl_ctx * ctx,struct isl_sched_graph * graph)5660 isl_stat isl_schedule_node_compute_wcc_band(isl_ctx *ctx,
5661 	struct isl_sched_graph *graph)
5662 {
5663 	int has_coincidence;
5664 	int use_coincidence;
5665 	int force_coincidence = 0;
5666 	int check_conditional;
5667 
5668 	if (sort_sccs(graph) < 0)
5669 		return isl_stat_error;
5670 
5671 	clear_local_edges(graph);
5672 	check_conditional = need_condition_check(graph);
5673 	has_coincidence = has_any_coincidence(graph);
5674 
5675 	if (ctx->opt->schedule_outer_coincidence)
5676 		force_coincidence = 1;
5677 
5678 	use_coincidence = has_coincidence;
5679 	while (graph->n_row < graph->maxvar) {
5680 		isl_vec *sol;
5681 		int violated;
5682 		int coincident;
5683 
5684 		graph->src_scc = -1;
5685 		graph->dst_scc = -1;
5686 
5687 		if (setup_lp(ctx, graph, use_coincidence) < 0)
5688 			return isl_stat_error;
5689 		sol = solve_lp(ctx, graph);
5690 		if (!sol)
5691 			return isl_stat_error;
5692 		if (sol->size == 0) {
5693 			int empty = graph->n_total_row == graph->band_start;
5694 
5695 			isl_vec_free(sol);
5696 			if (use_coincidence && (!force_coincidence || !empty)) {
5697 				use_coincidence = 0;
5698 				continue;
5699 			}
5700 			return isl_stat_ok;
5701 		}
5702 		coincident = !has_coincidence || use_coincidence;
5703 		if (update_schedule(graph, sol, coincident) < 0)
5704 			return isl_stat_error;
5705 
5706 		if (!check_conditional)
5707 			continue;
5708 		violated = has_violated_conditional_constraint(ctx, graph);
5709 		if (violated < 0)
5710 			return isl_stat_error;
5711 		if (!violated)
5712 			continue;
5713 		if (reset_band(graph) < 0)
5714 			return isl_stat_error;
5715 		use_coincidence = has_coincidence;
5716 	}
5717 
5718 	return isl_stat_ok;
5719 }
5720 
5721 /* Compute a schedule for a connected dependence graph by considering
5722  * the graph as a whole and return the updated schedule node.
5723  *
5724  * The actual schedule rows of the current band are computed by
5725  * isl_schedule_node_compute_wcc_band.  isl_schedule_node_compute_finish_band
5726  * takes care of integrating the band into "node" and continuing
5727  * the computation.
5728  */
compute_schedule_wcc_whole(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5729 static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
5730 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5731 {
5732 	isl_ctx *ctx;
5733 
5734 	if (!node)
5735 		return NULL;
5736 
5737 	ctx = isl_schedule_node_get_ctx(node);
5738 	if (isl_schedule_node_compute_wcc_band(ctx, graph) < 0)
5739 		return isl_schedule_node_free(node);
5740 
5741 	return isl_schedule_node_compute_finish_band(node, graph, 1);
5742 }
5743 
5744 /* Compute a schedule for a connected dependence graph and return
5745  * the updated schedule node.
5746  *
5747  * If Feautrier's algorithm is selected, we first recursively try to satisfy
5748  * as many validity dependences as possible. When all validity dependences
5749  * are satisfied we extend the schedule to a full-dimensional schedule.
5750  *
5751  * Call compute_schedule_wcc_whole or isl_schedule_node_compute_wcc_clustering
5752  * depending on whether the user has selected the option to try and
5753  * compute a schedule for the entire (weakly connected) component first.
5754  * If there is only a single strongly connected component (SCC), then
5755  * there is no point in trying to combine SCCs
5756  * in isl_schedule_node_compute_wcc_clustering, so compute_schedule_wcc_whole
5757  * is called instead.
5758  */
compute_schedule_wcc(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph)5759 static __isl_give isl_schedule_node *compute_schedule_wcc(
5760 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5761 {
5762 	isl_ctx *ctx;
5763 
5764 	if (!node)
5765 		return NULL;
5766 
5767 	ctx = isl_schedule_node_get_ctx(node);
5768 	if (detect_sccs(ctx, graph) < 0)
5769 		return isl_schedule_node_free(node);
5770 
5771 	if (isl_sched_graph_compute_maxvar(graph) < 0)
5772 		return isl_schedule_node_free(node);
5773 
5774 	if (need_feautrier_step(ctx, graph))
5775 		return compute_schedule_wcc_feautrier(node, graph);
5776 
5777 	if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx))
5778 		return compute_schedule_wcc_whole(node, graph);
5779 	else
5780 		return isl_schedule_node_compute_wcc_clustering(node, graph);
5781 }
5782 
5783 /* Compute a schedule for each group of nodes identified by node->scc
5784  * separately and then combine them in a sequence node (or as set node
5785  * if graph->weak is set) inserted at position "node" of the schedule tree.
5786  * Return the updated schedule node.
5787  *
5788  * If "wcc" is set then each of the groups belongs to a single
5789  * weakly connected component in the dependence graph so that
5790  * there is no need for compute_sub_schedule to look for weakly
5791  * connected components.
5792  *
5793  * If a set node would be introduced and if the number of components
5794  * is equal to the number of nodes, then check if the schedule
5795  * is already complete.  If so, a redundant set node would be introduced
5796  * (without any further descendants) stating that the statements
5797  * can be executed in arbitrary order, which is also expressed
5798  * by the absence of any node.  Refrain from inserting any nodes
5799  * in this case and simply return.
5800  */
compute_component_schedule(__isl_take isl_schedule_node * node,struct isl_sched_graph * graph,int wcc)5801 static __isl_give isl_schedule_node *compute_component_schedule(
5802 	__isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5803 	int wcc)
5804 {
5805 	int component;
5806 	isl_ctx *ctx;
5807 	isl_union_set_list *filters;
5808 
5809 	if (!node)
5810 		return NULL;
5811 
5812 	if (graph->weak && graph->scc == graph->n) {
5813 		if (isl_sched_graph_compute_maxvar(graph) < 0)
5814 			return isl_schedule_node_free(node);
5815 		if (graph->n_row >= graph->maxvar)
5816 			return node;
5817 	}
5818 
5819 	ctx = isl_schedule_node_get_ctx(node);
5820 	filters = isl_sched_graph_extract_sccs(ctx, graph);
5821 	if (graph->weak)
5822 		node = isl_schedule_node_insert_set(node, filters);
5823 	else
5824 		node = isl_schedule_node_insert_sequence(node, filters);
5825 
5826 	for (component = 0; component < graph->scc; ++component) {
5827 		node = isl_schedule_node_grandchild(node, component, 0);
5828 		node = compute_sub_schedule(node, ctx, graph,
5829 				    &isl_sched_node_scc_exactly,
5830 				    &isl_sched_edge_scc_exactly,
5831 				    component, wcc);
5832 		node = isl_schedule_node_grandparent(node);
5833 	}
5834 
5835 	return node;
5836 }
5837 
5838 /* Compute a schedule for the given dependence graph and insert it at "node".
5839  * Return the updated schedule node.
5840  *
5841  * We first check if the graph is connected (through validity and conditional
5842  * validity dependences) and, if not, compute a schedule
5843  * for each component separately.
5844  * If the schedule_serialize_sccs option is set, then we check for strongly
5845  * connected components instead and compute a separate schedule for
5846  * each such strongly connected component.
5847  */
compute_schedule(isl_schedule_node * node,struct isl_sched_graph * graph)5848 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
5849 	struct isl_sched_graph *graph)
5850 {
5851 	isl_ctx *ctx;
5852 
5853 	if (!node)
5854 		return NULL;
5855 
5856 	ctx = isl_schedule_node_get_ctx(node);
5857 	if (isl_options_get_schedule_serialize_sccs(ctx)) {
5858 		if (detect_sccs(ctx, graph) < 0)
5859 			return isl_schedule_node_free(node);
5860 	} else {
5861 		if (detect_wccs(ctx, graph) < 0)
5862 			return isl_schedule_node_free(node);
5863 	}
5864 
5865 	if (graph->scc > 1)
5866 		return compute_component_schedule(node, graph, 1);
5867 
5868 	return compute_schedule_wcc(node, graph);
5869 }
5870 
5871 /* Compute a schedule on sc->domain that respects the given schedule
5872  * constraints.
5873  *
5874  * In particular, the schedule respects all the validity dependences.
5875  * If the default isl scheduling algorithm is used, it tries to minimize
5876  * the dependence distances over the proximity dependences.
5877  * If Feautrier's scheduling algorithm is used, the proximity dependence
5878  * distances are only minimized during the extension to a full-dimensional
5879  * schedule.
5880  *
5881  * If there are any condition and conditional validity dependences,
5882  * then the conditional validity dependences may be violated inside
5883  * a tilable band, provided they have no adjacent non-local
5884  * condition dependences.
5885  */
isl_schedule_constraints_compute_schedule(__isl_take isl_schedule_constraints * sc)5886 __isl_give isl_schedule *isl_schedule_constraints_compute_schedule(
5887 	__isl_take isl_schedule_constraints *sc)
5888 {
5889 	isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc);
5890 	struct isl_sched_graph graph = { 0 };
5891 	isl_schedule *sched;
5892 	isl_schedule_node *node;
5893 	isl_union_set *domain;
5894 	isl_size n;
5895 
5896 	sc = isl_schedule_constraints_align_params(sc);
5897 
5898 	domain = isl_schedule_constraints_get_domain(sc);
5899 	n = isl_union_set_n_set(domain);
5900 	if (n == 0) {
5901 		isl_schedule_constraints_free(sc);
5902 		return isl_schedule_from_domain(domain);
5903 	}
5904 
5905 	if (n < 0 || isl_sched_graph_init(&graph, sc) < 0)
5906 		domain = isl_union_set_free(domain);
5907 
5908 	node = isl_schedule_node_from_domain(domain);
5909 	node = isl_schedule_node_child(node, 0);
5910 	if (graph.n > 0)
5911 		node = compute_schedule(node, &graph);
5912 	sched = isl_schedule_node_get_schedule(node);
5913 	isl_schedule_node_free(node);
5914 
5915 	isl_sched_graph_free(ctx, &graph);
5916 	isl_schedule_constraints_free(sc);
5917 
5918 	return sched;
5919 }
5920 
5921 /* Compute a schedule for the given union of domains that respects
5922  * all the validity dependences and minimizes
5923  * the dependence distances over the proximity dependences.
5924  *
5925  * This function is kept for backward compatibility.
5926  */
isl_union_set_compute_schedule(__isl_take isl_union_set * domain,__isl_take isl_union_map * validity,__isl_take isl_union_map * proximity)5927 __isl_give isl_schedule *isl_union_set_compute_schedule(
5928 	__isl_take isl_union_set *domain,
5929 	__isl_take isl_union_map *validity,
5930 	__isl_take isl_union_map *proximity)
5931 {
5932 	isl_schedule_constraints *sc;
5933 
5934 	sc = isl_schedule_constraints_on_domain(domain);
5935 	sc = isl_schedule_constraints_set_validity(sc, validity);
5936 	sc = isl_schedule_constraints_set_proximity(sc, proximity);
5937 
5938 	return isl_schedule_constraints_compute_schedule(sc);
5939 }
5940