xref: /netbsd-src/external/mit/isl/dist/isl_ast_codegen.c (revision 5971e316fdea024efff6be8f03536623db06833e)
1 /*
2  * Copyright 2012-2014 Ecole Normale Superieure
3  * Copyright 2014      INRIA Rocquencourt
4  *
5  * Use of this software is governed by the MIT license
6  *
7  * Written by Sven Verdoolaege,
8  * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
9  * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt,
10  * B.P. 105 - 78153 Le Chesnay, France
11  */
12 
13 #include <limits.h>
14 #include <isl/id.h>
15 #include <isl/val.h>
16 #include <isl/space.h>
17 #include <isl/aff.h>
18 #include <isl/constraint.h>
19 #include <isl/set.h>
20 #include <isl/ilp.h>
21 #include <isl/union_set.h>
22 #include <isl/union_map.h>
23 #include <isl/schedule_node.h>
24 #include <isl/options.h>
25 #include <isl_sort.h>
26 #include <isl_tarjan.h>
27 #include <isl_ast_private.h>
28 #include <isl_ast_build_expr.h>
29 #include <isl_ast_build_private.h>
30 #include <isl_ast_graft_private.h>
31 
32 /* Try and reduce the number of disjuncts in the representation of "set",
33  * without dropping explicit representations of local variables.
34  */
isl_set_coalesce_preserve(__isl_take isl_set * set)35 static __isl_give isl_set *isl_set_coalesce_preserve(__isl_take isl_set *set)
36 {
37 	isl_ctx *ctx;
38 	int save_preserve;
39 
40 	if (!set)
41 		return NULL;
42 
43 	ctx = isl_set_get_ctx(set);
44 	save_preserve = isl_options_get_coalesce_preserve_locals(ctx);
45 	isl_options_set_coalesce_preserve_locals(ctx, 1);
46 	set = isl_set_coalesce(set);
47 	isl_options_set_coalesce_preserve_locals(ctx, save_preserve);
48 	return set;
49 }
50 
51 /* Data used in generate_domain.
52  *
53  * "build" is the input build.
54  * "list" collects the results.
55  */
56 struct isl_generate_domain_data {
57 	isl_ast_build *build;
58 
59 	isl_ast_graft_list *list;
60 };
61 
62 static __isl_give isl_ast_graft_list *generate_next_level(
63 	__isl_take isl_union_map *executed,
64 	__isl_take isl_ast_build *build);
65 static __isl_give isl_ast_graft_list *generate_code(
66 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
67 	int internal);
68 
69 /* Generate an AST for a single domain based on
70  * the (non single valued) inverse schedule "executed".
71  *
72  * We extend the schedule with the iteration domain
73  * and continue generating through a call to generate_code.
74  *
75  * In particular, if executed has the form
76  *
77  *	S -> D
78  *
79  * then we continue generating code on
80  *
81  *	[S -> D] -> D
82  *
83  * The extended inverse schedule is clearly single valued
84  * ensuring that the nested generate_code will not reach this function,
85  * but will instead create calls to all elements of D that need
86  * to be executed from the current schedule domain.
87  */
generate_non_single_valued(__isl_take isl_map * executed,struct isl_generate_domain_data * data)88 static isl_stat generate_non_single_valued(__isl_take isl_map *executed,
89 	struct isl_generate_domain_data *data)
90 {
91 	isl_map *identity;
92 	isl_ast_build *build;
93 	isl_ast_graft_list *list;
94 
95 	build = isl_ast_build_copy(data->build);
96 
97 	identity = isl_set_identity(isl_map_range(isl_map_copy(executed)));
98 	executed = isl_map_domain_product(executed, identity);
99 	build = isl_ast_build_set_single_valued(build, 1);
100 
101 	list = generate_code(isl_union_map_from_map(executed), build, 1);
102 
103 	data->list = isl_ast_graft_list_concat(data->list, list);
104 
105 	return isl_stat_ok;
106 }
107 
108 /* Call the at_each_domain callback, if requested by the user,
109  * after recording the current inverse schedule in the build.
110  */
at_each_domain(__isl_take isl_ast_graft * graft,__isl_keep isl_map * executed,__isl_keep isl_ast_build * build)111 static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft,
112 	__isl_keep isl_map *executed, __isl_keep isl_ast_build *build)
113 {
114 	if (!graft || !build)
115 		return isl_ast_graft_free(graft);
116 	if (!build->at_each_domain)
117 		return graft;
118 
119 	build = isl_ast_build_copy(build);
120 	build = isl_ast_build_set_executed(build,
121 			isl_union_map_from_map(isl_map_copy(executed)));
122 	if (!build)
123 		return isl_ast_graft_free(graft);
124 
125 	graft->node = build->at_each_domain(graft->node,
126 					build, build->at_each_domain_user);
127 	isl_ast_build_free(build);
128 
129 	if (!graft->node)
130 		graft = isl_ast_graft_free(graft);
131 
132 	return graft;
133 }
134 
135 /* Generate a call expression for the single executed
136  * domain element "map" and put a guard around it based its (simplified)
137  * domain.  "executed" is the original inverse schedule from which "map"
138  * has been derived.  In particular, "map" is either identical to "executed"
139  * or it is the result of gisting "executed" with respect to the build domain.
140  * "executed" is only used if there is an at_each_domain callback.
141  *
142  * At this stage, any pending constraints in the build can no longer
143  * be simplified with respect to any enforced constraints since
144  * the call node does not have any enforced constraints.
145  * Since all pending constraints not covered by any enforced constraints
146  * will be added as a guard to the graft in create_node_scaled,
147  * even in the eliminated case, the pending constraints
148  * can be considered to have been generated by outer constructs.
149  *
150  * If the user has set an at_each_domain callback, it is called
151  * on the constructed call expression node.
152  */
add_domain(__isl_take isl_map * executed,__isl_take isl_map * map,struct isl_generate_domain_data * data)153 static isl_stat add_domain(__isl_take isl_map *executed,
154 	__isl_take isl_map *map, struct isl_generate_domain_data *data)
155 {
156 	isl_ast_build *build;
157 	isl_ast_graft *graft;
158 	isl_ast_graft_list *list;
159 	isl_set *guard, *pending;
160 
161 	build = isl_ast_build_copy(data->build);
162 	pending = isl_ast_build_get_pending(build);
163 	build = isl_ast_build_replace_pending_by_guard(build, pending);
164 
165 	guard = isl_map_domain(isl_map_copy(map));
166 	guard = isl_set_compute_divs(guard);
167 	guard = isl_set_coalesce_preserve(guard);
168 	guard = isl_set_gist(guard, isl_ast_build_get_generated(build));
169 	guard = isl_ast_build_specialize(build, guard);
170 
171 	graft = isl_ast_graft_alloc_domain(map, build);
172 	graft = at_each_domain(graft, executed, build);
173 	isl_ast_build_free(build);
174 	isl_map_free(executed);
175 	graft = isl_ast_graft_add_guard(graft, guard, data->build);
176 
177 	list = isl_ast_graft_list_from_ast_graft(graft);
178 	data->list = isl_ast_graft_list_concat(data->list, list);
179 
180 	return isl_stat_ok;
181 }
182 
183 /* Generate an AST for a single domain based on
184  * the inverse schedule "executed" and add it to data->list.
185  *
186  * If there is more than one domain element associated to the current
187  * schedule "time", then we need to continue the generation process
188  * in generate_non_single_valued.
189  * Note that the inverse schedule being single-valued may depend
190  * on constraints that are only available in the original context
191  * domain specified by the user.  We therefore first introduce
192  * some of the constraints of data->build->domain.  In particular,
193  * we intersect with a single-disjunct approximation of this set.
194  * We perform this approximation to avoid further splitting up
195  * the executed relation, possibly introducing a disjunctive guard
196  * on the statement.
197  *
198  * On the other hand, we only perform the test after having taken the gist
199  * of the domain as the resulting map is the one from which the call
200  * expression is constructed.  Using this map to construct the call
201  * expression usually yields simpler results in cases where the original
202  * map is not obviously single-valued.
203  * If the original map is obviously single-valued, then the gist
204  * operation is skipped.
205  *
206  * Because we perform the single-valuedness test on the gisted map,
207  * we may in rare cases fail to recognize that the inverse schedule
208  * is single-valued.  This becomes problematic if this happens
209  * from the recursive call through generate_non_single_valued
210  * as we would then end up in an infinite recursion.
211  * We therefore check if we are inside a call to generate_non_single_valued
212  * and revert to the ungisted map if the gisted map turns out not to be
213  * single-valued.
214  *
215  * Otherwise, call add_domain to generate a call expression (with guard) and
216  * to call the at_each_domain callback, if any.
217  */
generate_domain(__isl_take isl_map * executed,void * user)218 static isl_stat generate_domain(__isl_take isl_map *executed, void *user)
219 {
220 	struct isl_generate_domain_data *data = user;
221 	isl_set *domain;
222 	isl_map *map = NULL;
223 	int empty, sv;
224 
225 	domain = isl_ast_build_get_domain(data->build);
226 	domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
227 	executed = isl_map_intersect_domain(executed, domain);
228 	empty = isl_map_is_empty(executed);
229 	if (empty < 0)
230 		goto error;
231 	if (empty) {
232 		isl_map_free(executed);
233 		return isl_stat_ok;
234 	}
235 
236 	sv = isl_map_plain_is_single_valued(executed);
237 	if (sv < 0)
238 		goto error;
239 	if (sv)
240 		return add_domain(executed, isl_map_copy(executed), data);
241 
242 	executed = isl_map_coalesce(executed);
243 	map = isl_map_copy(executed);
244 	map = isl_ast_build_compute_gist_map_domain(data->build, map);
245 	sv = isl_map_is_single_valued(map);
246 	if (sv < 0)
247 		goto error;
248 	if (!sv) {
249 		isl_map_free(map);
250 		if (data->build->single_valued)
251 			map = isl_map_copy(executed);
252 		else
253 			return generate_non_single_valued(executed, data);
254 	}
255 
256 	return add_domain(executed, map, data);
257 error:
258 	isl_map_free(map);
259 	isl_map_free(executed);
260 	return isl_stat_error;
261 }
262 
263 /* Call build->create_leaf to a create "leaf" node in the AST,
264  * encapsulate the result in an isl_ast_graft and return the result
265  * as a 1-element list.
266  *
267  * Note that the node returned by the user may be an entire tree.
268  *
269  * Since the node itself cannot enforce any constraints, we turn
270  * all pending constraints into guards and add them to the resulting
271  * graft to ensure that they will be generated.
272  *
273  * Before we pass control to the user, we first clear some information
274  * from the build that is (presumbably) only meaningful
275  * for the current code generation.
276  * This includes the create_leaf callback itself, so we make a copy
277  * of the build first.
278  */
call_create_leaf(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build)279 static __isl_give isl_ast_graft_list *call_create_leaf(
280 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
281 {
282 	isl_set *guard;
283 	isl_ast_node *node;
284 	isl_ast_graft *graft;
285 	isl_ast_build *user_build;
286 
287 	guard = isl_ast_build_get_pending(build);
288 	user_build = isl_ast_build_copy(build);
289 	user_build = isl_ast_build_replace_pending_by_guard(user_build,
290 							isl_set_copy(guard));
291 	user_build = isl_ast_build_set_executed(user_build, executed);
292 	user_build = isl_ast_build_clear_local_info(user_build);
293 	if (!user_build)
294 		node = NULL;
295 	else
296 		node = build->create_leaf(user_build, build->create_leaf_user);
297 	graft = isl_ast_graft_alloc(node, build);
298 	graft = isl_ast_graft_add_guard(graft, guard, build);
299 	isl_ast_build_free(build);
300 	return isl_ast_graft_list_from_ast_graft(graft);
301 }
302 
303 static __isl_give isl_ast_graft_list *build_ast_from_child(
304 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
305 	__isl_take isl_union_map *executed);
306 
307 /* Generate an AST after having handled the complete schedule
308  * of this call to the code generator or the complete band
309  * if we are generating an AST from a schedule tree.
310  *
311  * If we are inside a band node, then move on to the child of the band.
312  *
313  * If the user has specified a create_leaf callback, control
314  * is passed to the user in call_create_leaf.
315  *
316  * Otherwise, we generate one or more calls for each individual
317  * domain in generate_domain.
318  */
generate_inner_level(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build)319 static __isl_give isl_ast_graft_list *generate_inner_level(
320 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
321 {
322 	isl_ctx *ctx;
323 	struct isl_generate_domain_data data = { build };
324 
325 	if (!build || !executed)
326 		goto error;
327 
328 	if (isl_ast_build_has_schedule_node(build)) {
329 		isl_schedule_node *node;
330 		node = isl_ast_build_get_schedule_node(build);
331 		build = isl_ast_build_reset_schedule_node(build);
332 		return build_ast_from_child(build, node, executed);
333 	}
334 
335 	if (build->create_leaf)
336 		return call_create_leaf(executed, build);
337 
338 	ctx = isl_union_map_get_ctx(executed);
339 	data.list = isl_ast_graft_list_alloc(ctx, 0);
340 	if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0)
341 		data.list = isl_ast_graft_list_free(data.list);
342 
343 	if (0)
344 error:		data.list = NULL;
345 	isl_ast_build_free(build);
346 	isl_union_map_free(executed);
347 	return data.list;
348 }
349 
350 /* Call the before_each_for callback, if requested by the user.
351  */
before_each_for(__isl_take isl_ast_node * node,__isl_keep isl_ast_build * build)352 static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node,
353 	__isl_keep isl_ast_build *build)
354 {
355 	isl_id *id;
356 
357 	if (!node || !build)
358 		return isl_ast_node_free(node);
359 	if (!build->before_each_for)
360 		return node;
361 	id = build->before_each_for(build, build->before_each_for_user);
362 	node = isl_ast_node_set_annotation(node, id);
363 	return node;
364 }
365 
366 /* Call the after_each_for callback, if requested by the user.
367  */
after_each_for(__isl_take isl_ast_graft * graft,__isl_keep isl_ast_build * build)368 static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft,
369 	__isl_keep isl_ast_build *build)
370 {
371 	if (!graft || !build)
372 		return isl_ast_graft_free(graft);
373 	if (!build->after_each_for)
374 		return graft;
375 	graft->node = build->after_each_for(graft->node, build,
376 						build->after_each_for_user);
377 	if (!graft->node)
378 		return isl_ast_graft_free(graft);
379 	return graft;
380 }
381 
382 /* Plug in all the know values of the current and outer dimensions
383  * in the domain of "executed".  In principle, we only need to plug
384  * in the known value of the current dimension since the values of
385  * outer dimensions have been plugged in already.
386  * However, it turns out to be easier to just plug in all known values.
387  */
plug_in_values(__isl_take isl_union_map * executed,__isl_keep isl_ast_build * build)388 static __isl_give isl_union_map *plug_in_values(
389 	__isl_take isl_union_map *executed, __isl_keep isl_ast_build *build)
390 {
391 	return isl_ast_build_substitute_values_union_map_domain(build,
392 								    executed);
393 }
394 
395 /* Check if the constraint "c" is a lower bound on dimension "pos",
396  * an upper bound, or independent of dimension "pos".
397  */
constraint_type(isl_constraint * c,int pos)398 static int constraint_type(isl_constraint *c, int pos)
399 {
400 	if (isl_constraint_is_lower_bound(c, isl_dim_set, pos))
401 		return 1;
402 	if (isl_constraint_is_upper_bound(c, isl_dim_set, pos))
403 		return 2;
404 	return 0;
405 }
406 
407 /* Compare the types of the constraints "a" and "b",
408  * resulting in constraints that are independent of "depth"
409  * to be sorted before the lower bounds on "depth", which in
410  * turn are sorted before the upper bounds on "depth".
411  */
cmp_constraint(__isl_keep isl_constraint * a,__isl_keep isl_constraint * b,void * user)412 static int cmp_constraint(__isl_keep isl_constraint *a,
413 	__isl_keep isl_constraint *b, void *user)
414 {
415 	int *depth = user;
416 	int t1 = constraint_type(a, *depth);
417 	int t2 = constraint_type(b, *depth);
418 
419 	return t1 - t2;
420 }
421 
422 /* Extract a lower bound on dimension "pos" from constraint "c".
423  *
424  * If the constraint is of the form
425  *
426  *	a x + f(...) >= 0
427  *
428  * then we essentially return
429  *
430  *	l = ceil(-f(...)/a)
431  *
432  * However, if the current dimension is strided, then we need to make
433  * sure that the lower bound we construct is of the form
434  *
435  *	f + s a
436  *
437  * with f the offset and s the stride.
438  * We therefore compute
439  *
440  *	f + s * ceil((l - f)/s)
441  */
lower_bound(__isl_keep isl_constraint * c,int pos,__isl_keep isl_ast_build * build)442 static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c,
443 	int pos, __isl_keep isl_ast_build *build)
444 {
445 	isl_aff *aff;
446 
447 	aff = isl_constraint_get_bound(c, isl_dim_set, pos);
448 	aff = isl_aff_ceil(aff);
449 
450 	if (isl_ast_build_has_stride(build, pos)) {
451 		isl_aff *offset;
452 		isl_val *stride;
453 
454 		offset = isl_ast_build_get_offset(build, pos);
455 		stride = isl_ast_build_get_stride(build, pos);
456 
457 		aff = isl_aff_sub(aff, isl_aff_copy(offset));
458 		aff = isl_aff_scale_down_val(aff, isl_val_copy(stride));
459 		aff = isl_aff_ceil(aff);
460 		aff = isl_aff_scale_val(aff, stride);
461 		aff = isl_aff_add(aff, offset);
462 	}
463 
464 	aff = isl_ast_build_compute_gist_aff(build, aff);
465 
466 	return aff;
467 }
468 
469 /* Return the exact lower bound (or upper bound if "upper" is set)
470  * of "domain" as a piecewise affine expression.
471  *
472  * If we are computing a lower bound (of a strided dimension), then
473  * we need to make sure it is of the form
474  *
475  *	f + s a
476  *
477  * where f is the offset and s is the stride.
478  * We therefore need to include the stride constraint before computing
479  * the minimum.
480  */
exact_bound(__isl_keep isl_set * domain,__isl_keep isl_ast_build * build,int upper)481 static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain,
482 	__isl_keep isl_ast_build *build, int upper)
483 {
484 	isl_set *stride;
485 	isl_map *it_map;
486 	isl_pw_aff *pa;
487 	isl_pw_multi_aff *pma;
488 
489 	domain = isl_set_copy(domain);
490 	if (!upper) {
491 		stride = isl_ast_build_get_stride_constraint(build);
492 		domain = isl_set_intersect(domain, stride);
493 	}
494 	it_map = isl_ast_build_map_to_iterator(build, domain);
495 	if (upper)
496 		pma = isl_map_lexmax_pw_multi_aff(it_map);
497 	else
498 		pma = isl_map_lexmin_pw_multi_aff(it_map);
499 	pa = isl_pw_multi_aff_get_pw_aff(pma, 0);
500 	isl_pw_multi_aff_free(pma);
501 	pa = isl_ast_build_compute_gist_pw_aff(build, pa);
502 	pa = isl_pw_aff_coalesce(pa);
503 
504 	return pa;
505 }
506 
507 /* Callback for sorting the isl_pw_aff_list passed to reduce_list and
508  * remove_redundant_lower_bounds.
509  */
reduce_list_cmp(__isl_keep isl_pw_aff * a,__isl_keep isl_pw_aff * b,void * user)510 static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b,
511 	void *user)
512 {
513 	return isl_pw_aff_plain_cmp(a, b);
514 }
515 
516 /* Given a list of lower bounds "list", remove those that are redundant
517  * with respect to the other bounds in "list" and the domain of "build".
518  *
519  * We first sort the bounds in the same way as they would be sorted
520  * by set_for_node_expressions so that we can try and remove the last
521  * bounds first.
522  *
523  * For a lower bound to be effective, there needs to be at least
524  * one domain element for which it is larger than all other lower bounds.
525  * For each lower bound we therefore intersect the domain with
526  * the conditions that it is larger than all other bounds and
527  * check whether the result is empty.  If so, the bound can be removed.
528  */
remove_redundant_lower_bounds(__isl_take isl_pw_aff_list * list,__isl_keep isl_ast_build * build)529 static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds(
530 	__isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
531 {
532 	int i, j;
533 	isl_size n;
534 	isl_set *domain;
535 
536 	list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
537 
538 	n = isl_pw_aff_list_n_pw_aff(list);
539 	if (n < 0)
540 		return isl_pw_aff_list_free(list);
541 	if (n <= 1)
542 		return list;
543 
544 	domain = isl_ast_build_get_domain(build);
545 
546 	for (i = n - 1; i >= 0; --i) {
547 		isl_pw_aff *pa_i;
548 		isl_set *domain_i;
549 		int empty;
550 
551 		domain_i = isl_set_copy(domain);
552 		pa_i = isl_pw_aff_list_get_pw_aff(list, i);
553 
554 		for (j = 0; j < n; ++j) {
555 			isl_pw_aff *pa_j;
556 			isl_set *better;
557 
558 			if (j == i)
559 				continue;
560 
561 			pa_j = isl_pw_aff_list_get_pw_aff(list, j);
562 			better = isl_pw_aff_gt_set(isl_pw_aff_copy(pa_i), pa_j);
563 			domain_i = isl_set_intersect(domain_i, better);
564 		}
565 
566 		empty = isl_set_is_empty(domain_i);
567 
568 		isl_set_free(domain_i);
569 		isl_pw_aff_free(pa_i);
570 
571 		if (empty < 0)
572 			goto error;
573 		if (!empty)
574 			continue;
575 		list = isl_pw_aff_list_drop(list, i, 1);
576 		n--;
577 	}
578 
579 	isl_set_free(domain);
580 
581 	return list;
582 error:
583 	isl_set_free(domain);
584 	return isl_pw_aff_list_free(list);
585 }
586 
587 /* Extract a lower bound on dimension "pos" from each constraint
588  * in "constraints" and return the list of lower bounds.
589  * If "constraints" has zero elements, then we extract a lower bound
590  * from "domain" instead.
591  *
592  * If the current dimension is strided, then the lower bound
593  * is adjusted by lower_bound to match the stride information.
594  * This modification may make one or more lower bounds redundant
595  * with respect to the other lower bounds.  We therefore check
596  * for this condition and remove the redundant lower bounds.
597  */
lower_bounds(__isl_keep isl_constraint_list * constraints,int pos,__isl_keep isl_set * domain,__isl_keep isl_ast_build * build)598 static __isl_give isl_pw_aff_list *lower_bounds(
599 	__isl_keep isl_constraint_list *constraints, int pos,
600 	__isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
601 {
602 	isl_ctx *ctx;
603 	isl_pw_aff_list *list;
604 	int i;
605 	isl_size n;
606 
607 	if (!build)
608 		return NULL;
609 
610 	n = isl_constraint_list_n_constraint(constraints);
611 	if (n < 0)
612 		return NULL;
613 	if (n == 0) {
614 		isl_pw_aff *pa;
615 		pa = exact_bound(domain, build, 0);
616 		return isl_pw_aff_list_from_pw_aff(pa);
617 	}
618 
619 	ctx = isl_ast_build_get_ctx(build);
620 	list = isl_pw_aff_list_alloc(ctx,n);
621 
622 	for (i = 0; i < n; ++i) {
623 		isl_aff *aff;
624 		isl_constraint *c;
625 
626 		c = isl_constraint_list_get_constraint(constraints, i);
627 		aff = lower_bound(c, pos, build);
628 		isl_constraint_free(c);
629 		list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
630 	}
631 
632 	if (isl_ast_build_has_stride(build, pos))
633 		list = remove_redundant_lower_bounds(list, build);
634 
635 	return list;
636 }
637 
638 /* Extract an upper bound on dimension "pos" from each constraint
639  * in "constraints" and return the list of upper bounds.
640  * If "constraints" has zero elements, then we extract an upper bound
641  * from "domain" instead.
642  */
upper_bounds(__isl_keep isl_constraint_list * constraints,int pos,__isl_keep isl_set * domain,__isl_keep isl_ast_build * build)643 static __isl_give isl_pw_aff_list *upper_bounds(
644 	__isl_keep isl_constraint_list *constraints, int pos,
645 	__isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
646 {
647 	isl_ctx *ctx;
648 	isl_pw_aff_list *list;
649 	int i;
650 	isl_size n;
651 
652 	n = isl_constraint_list_n_constraint(constraints);
653 	if (n < 0)
654 		return NULL;
655 	if (n == 0) {
656 		isl_pw_aff *pa;
657 		pa = exact_bound(domain, build, 1);
658 		return isl_pw_aff_list_from_pw_aff(pa);
659 	}
660 
661 	ctx = isl_ast_build_get_ctx(build);
662 	list = isl_pw_aff_list_alloc(ctx,n);
663 
664 	for (i = 0; i < n; ++i) {
665 		isl_aff *aff;
666 		isl_constraint *c;
667 
668 		c = isl_constraint_list_get_constraint(constraints, i);
669 		aff = isl_constraint_get_bound(c, isl_dim_set, pos);
670 		isl_constraint_free(c);
671 		aff = isl_aff_floor(aff);
672 		list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff));
673 	}
674 
675 	return list;
676 }
677 
678 /* Return an isl_ast_expr that performs the reduction of type "type"
679  * on AST expressions corresponding to the elements in "list".
680  *
681  * The list is assumed to contain at least one element.
682  * If the list contains exactly one element, then the returned isl_ast_expr
683  * simply computes that affine expression.
684  * If the list contains more than one element, then we sort it
685  * using a fairly arbitrary but hopefully reasonably stable order.
686  */
reduce_list(enum isl_ast_expr_op_type type,__isl_keep isl_pw_aff_list * list,__isl_keep isl_ast_build * build)687 static __isl_give isl_ast_expr *reduce_list(enum isl_ast_expr_op_type type,
688 	__isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
689 {
690 	int i;
691 	isl_size n;
692 	isl_ctx *ctx;
693 	isl_ast_expr *expr;
694 
695 	n = isl_pw_aff_list_n_pw_aff(list);
696 	if (n < 0)
697 		return NULL;
698 
699 	if (n == 1)
700 		return isl_ast_build_expr_from_pw_aff_internal(build,
701 				isl_pw_aff_list_get_pw_aff(list, 0));
702 
703 	ctx = isl_pw_aff_list_get_ctx(list);
704 	expr = isl_ast_expr_alloc_op(ctx, type, n);
705 
706 	list = isl_pw_aff_list_copy(list);
707 	list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL);
708 	if (!list)
709 		return isl_ast_expr_free(expr);
710 
711 	for (i = 0; i < n; ++i) {
712 		isl_ast_expr *expr_i;
713 
714 		expr_i = isl_ast_build_expr_from_pw_aff_internal(build,
715 				isl_pw_aff_list_get_pw_aff(list, i));
716 		expr = isl_ast_expr_op_add_arg(expr, expr_i);
717 	}
718 
719 	isl_pw_aff_list_free(list);
720 	return expr;
721 }
722 
723 /* Add guards implied by the "generated constraints",
724  * but not (necessarily) enforced by the generated AST to "guard".
725  * In particular, if there is any stride constraints,
726  * then add the guard implied by those constraints.
727  * If we have generated a degenerate loop, then add the guard
728  * implied by "bounds" on the outer dimensions, i.e., the guard
729  * that ensures that the single value actually exists.
730  * Since there may also be guards implied by a combination
731  * of these constraints, we first combine them before
732  * deriving the implied constraints.
733  */
add_implied_guards(__isl_take isl_set * guard,int degenerate,__isl_keep isl_basic_set * bounds,__isl_keep isl_ast_build * build)734 static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard,
735 	int degenerate, __isl_keep isl_basic_set *bounds,
736 	__isl_keep isl_ast_build *build)
737 {
738 	isl_size depth;
739 	isl_bool has_stride;
740 	isl_space *space;
741 	isl_set *dom, *set;
742 
743 	depth = isl_ast_build_get_depth(build);
744 	has_stride = isl_ast_build_has_stride(build, depth);
745 	if (depth < 0 || has_stride < 0)
746 		return isl_set_free(guard);
747 	if (!has_stride && !degenerate)
748 		return guard;
749 
750 	space = isl_basic_set_get_space(bounds);
751 	dom = isl_set_universe(space);
752 
753 	if (degenerate) {
754 		bounds = isl_basic_set_copy(bounds);
755 		bounds = isl_basic_set_drop_constraints_not_involving_dims(
756 					bounds, isl_dim_set, depth, 1);
757 		set = isl_set_from_basic_set(bounds);
758 		dom = isl_set_intersect(dom, set);
759 	}
760 
761 	if (has_stride) {
762 		set = isl_ast_build_get_stride_constraint(build);
763 		dom = isl_set_intersect(dom, set);
764 	}
765 
766 	dom = isl_set_eliminate(dom, isl_dim_set, depth, 1);
767 	dom = isl_ast_build_compute_gist(build, dom);
768 	guard = isl_set_intersect(guard, dom);
769 
770 	return guard;
771 }
772 
773 /* Update "graft" based on "sub_build" for the degenerate case.
774  *
775  * "build" is the build in which graft->node was created
776  * "sub_build" contains information about the current level itself,
777  * including the single value attained.
778  *
779  * We set the initialization part of the for loop to the single
780  * value attained by the current dimension.
781  * The increment and condition are not strictly needed as they are known
782  * to be "1" and "iterator <= value" respectively.
783  */
refine_degenerate(__isl_take isl_ast_graft * graft,__isl_keep isl_ast_build * build,__isl_keep isl_ast_build * sub_build)784 static __isl_give isl_ast_graft *refine_degenerate(
785 	__isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build,
786 	__isl_keep isl_ast_build *sub_build)
787 {
788 	isl_pw_aff *value;
789 	isl_ast_expr *init;
790 
791 	if (!graft || !sub_build)
792 		return isl_ast_graft_free(graft);
793 
794 	value = isl_pw_aff_copy(sub_build->value);
795 
796 	init = isl_ast_build_expr_from_pw_aff_internal(build, value);
797 	graft->node = isl_ast_node_for_set_init(graft->node, init);
798 	if (!graft->node)
799 		return isl_ast_graft_free(graft);
800 
801 	return graft;
802 }
803 
804 /* Return the intersection of constraints in "list" as a set.
805  */
intersect_constraints(__isl_keep isl_constraint_list * list)806 static __isl_give isl_set *intersect_constraints(
807 	__isl_keep isl_constraint_list *list)
808 {
809 	int i;
810 	isl_size n;
811 	isl_basic_set *bset;
812 
813 	n = isl_constraint_list_n_constraint(list);
814 	if (n < 0)
815 		return NULL;
816 	if (n < 1)
817 		isl_die(isl_constraint_list_get_ctx(list), isl_error_internal,
818 			"expecting at least one constraint", return NULL);
819 
820 	bset = isl_basic_set_from_constraint(
821 				isl_constraint_list_get_constraint(list, 0));
822 	for (i = 1; i < n; ++i) {
823 		isl_basic_set *bset_i;
824 
825 		bset_i = isl_basic_set_from_constraint(
826 				isl_constraint_list_get_constraint(list, i));
827 		bset = isl_basic_set_intersect(bset, bset_i);
828 	}
829 
830 	return isl_set_from_basic_set(bset);
831 }
832 
833 /* Compute the constraints on the outer dimensions enforced by
834  * graft->node and add those constraints to graft->enforced,
835  * in case the upper bound is expressed as a set "upper".
836  *
837  * In particular, if l(...) is a lower bound in "lower", and
838  *
839  *	-a i + f(...) >= 0		or	a i <= f(...)
840  *
841  * is an upper bound ocnstraint on the current dimension i,
842  * then the for loop enforces the constraint
843  *
844  *	-a l(...) + f(...) >= 0		or	a l(...) <= f(...)
845  *
846  * We therefore simply take each lower bound in turn, plug it into
847  * the upper bounds and compute the intersection over all lower bounds.
848  *
849  * If a lower bound is a rational expression, then
850  * isl_basic_set_preimage_multi_aff will force this rational
851  * expression to have only integer values.  However, the loop
852  * itself does not enforce this integrality constraint.  We therefore
853  * use the ceil of the lower bounds instead of the lower bounds themselves.
854  * Other constraints will make sure that the for loop is only executed
855  * when each of the lower bounds attains an integral value.
856  * In particular, potentially rational values only occur in
857  * lower_bound if the offset is a (seemingly) rational expression,
858  * but then outer conditions will make sure that this rational expression
859  * only attains integer values.
860  */
set_enforced_from_set(__isl_take isl_ast_graft * graft,__isl_keep isl_pw_aff_list * lower,int pos,__isl_keep isl_set * upper)861 static __isl_give isl_ast_graft *set_enforced_from_set(
862 	__isl_take isl_ast_graft *graft,
863 	__isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper)
864 {
865 	isl_space *space;
866 	isl_basic_set *enforced;
867 	isl_pw_multi_aff *pma;
868 	int i;
869 	isl_size n;
870 
871 	n = isl_pw_aff_list_n_pw_aff(lower);
872 	if (!graft || n < 0)
873 		return isl_ast_graft_free(graft);
874 
875 	space = isl_set_get_space(upper);
876 	enforced = isl_basic_set_universe(isl_space_copy(space));
877 
878 	space = isl_space_map_from_set(space);
879 	pma = isl_pw_multi_aff_identity(space);
880 
881 	for (i = 0; i < n; ++i) {
882 		isl_pw_aff *pa;
883 		isl_set *enforced_i;
884 		isl_basic_set *hull;
885 		isl_pw_multi_aff *pma_i;
886 
887 		pa = isl_pw_aff_list_get_pw_aff(lower, i);
888 		pa = isl_pw_aff_ceil(pa);
889 		pma_i = isl_pw_multi_aff_copy(pma);
890 		pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa);
891 		enforced_i = isl_set_copy(upper);
892 		enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i);
893 		hull = isl_set_simple_hull(enforced_i);
894 		enforced = isl_basic_set_intersect(enforced, hull);
895 	}
896 
897 	isl_pw_multi_aff_free(pma);
898 
899 	graft = isl_ast_graft_enforce(graft, enforced);
900 
901 	return graft;
902 }
903 
904 /* Compute the constraints on the outer dimensions enforced by
905  * graft->node and add those constraints to graft->enforced,
906  * in case the upper bound is expressed as
907  * a list of affine expressions "upper".
908  *
909  * The enforced condition is that each lower bound expression is less
910  * than or equal to each upper bound expression.
911  */
set_enforced_from_list(__isl_take isl_ast_graft * graft,__isl_keep isl_pw_aff_list * lower,__isl_keep isl_pw_aff_list * upper)912 static __isl_give isl_ast_graft *set_enforced_from_list(
913 	__isl_take isl_ast_graft *graft,
914 	__isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper)
915 {
916 	isl_set *cond;
917 	isl_basic_set *enforced;
918 
919 	lower = isl_pw_aff_list_copy(lower);
920 	upper = isl_pw_aff_list_copy(upper);
921 	cond = isl_pw_aff_list_le_set(lower, upper);
922 	enforced = isl_set_simple_hull(cond);
923 	graft = isl_ast_graft_enforce(graft, enforced);
924 
925 	return graft;
926 }
927 
928 /* Does "aff" have a negative constant term?
929  */
aff_constant_is_negative(__isl_keep isl_set * set,__isl_keep isl_aff * aff,void * user)930 static isl_bool aff_constant_is_negative(__isl_keep isl_set *set,
931 	__isl_keep isl_aff *aff, void *user)
932 {
933 	isl_bool is_neg;
934 	isl_val *v;
935 
936 	v = isl_aff_get_constant_val(aff);
937 	is_neg = isl_val_is_neg(v);
938 	isl_val_free(v);
939 
940 	return is_neg;
941 }
942 
943 /* Does "pa" have a negative constant term over its entire domain?
944  */
pw_aff_constant_is_negative(__isl_keep isl_pw_aff * pa,void * user)945 static isl_bool pw_aff_constant_is_negative(__isl_keep isl_pw_aff *pa,
946 	void *user)
947 {
948 	return isl_pw_aff_every_piece(pa, &aff_constant_is_negative, NULL);
949 }
950 
951 /* Does each element in "list" have a negative constant term?
952  */
list_constant_is_negative(__isl_keep isl_pw_aff_list * list)953 static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list)
954 {
955 	return isl_pw_aff_list_every(list, &pw_aff_constant_is_negative, NULL);
956 }
957 
958 /* Add 1 to each of the elements in "list", where each of these elements
959  * is defined over the internal schedule space of "build".
960  */
list_add_one(__isl_take isl_pw_aff_list * list,__isl_keep isl_ast_build * build)961 static __isl_give isl_pw_aff_list *list_add_one(
962 	__isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build)
963 {
964 	int i;
965 	isl_size n;
966 	isl_space *space;
967 	isl_aff *aff;
968 	isl_pw_aff *one;
969 
970 	n = isl_pw_aff_list_n_pw_aff(list);
971 	if (n < 0)
972 		return isl_pw_aff_list_free(list);
973 
974 	space = isl_ast_build_get_space(build, 1);
975 	aff = isl_aff_zero_on_domain(isl_local_space_from_space(space));
976 	aff = isl_aff_add_constant_si(aff, 1);
977 	one = isl_pw_aff_from_aff(aff);
978 
979 	for (i = 0; i < n; ++i) {
980 		isl_pw_aff *pa;
981 		pa = isl_pw_aff_list_get_pw_aff(list, i);
982 		pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one));
983 		list = isl_pw_aff_list_set_pw_aff(list, i, pa);
984 	}
985 
986 	isl_pw_aff_free(one);
987 
988 	return list;
989 }
990 
991 /* Set the condition part of the for node graft->node in case
992  * the upper bound is represented as a list of piecewise affine expressions.
993  *
994  * In particular, set the condition to
995  *
996  *	iterator <= min(list of upper bounds)
997  *
998  * If each of the upper bounds has a negative constant term, then
999  * set the condition to
1000  *
1001  *	iterator < min(list of (upper bound + 1)s)
1002  *
1003  */
set_for_cond_from_list(__isl_take isl_ast_graft * graft,__isl_keep isl_pw_aff_list * list,__isl_keep isl_ast_build * build)1004 static __isl_give isl_ast_graft *set_for_cond_from_list(
1005 	__isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list,
1006 	__isl_keep isl_ast_build *build)
1007 {
1008 	int neg;
1009 	isl_ast_expr *bound, *iterator, *cond;
1010 	enum isl_ast_expr_op_type type = isl_ast_expr_op_le;
1011 
1012 	if (!graft || !list)
1013 		return isl_ast_graft_free(graft);
1014 
1015 	neg = list_constant_is_negative(list);
1016 	if (neg < 0)
1017 		return isl_ast_graft_free(graft);
1018 	list = isl_pw_aff_list_copy(list);
1019 	if (neg) {
1020 		list = list_add_one(list, build);
1021 		type = isl_ast_expr_op_lt;
1022 	}
1023 
1024 	bound = reduce_list(isl_ast_expr_op_min, list, build);
1025 	iterator = isl_ast_expr_copy(graft->node->u.f.iterator);
1026 	cond = isl_ast_expr_alloc_binary(type, iterator, bound);
1027 	graft->node = isl_ast_node_for_set_cond(graft->node, cond);
1028 
1029 	isl_pw_aff_list_free(list);
1030 	if (!graft->node)
1031 		return isl_ast_graft_free(graft);
1032 	return graft;
1033 }
1034 
1035 /* Set the condition part of the for node graft->node in case
1036  * the upper bound is represented as a set.
1037  */
set_for_cond_from_set(__isl_take isl_ast_graft * graft,__isl_keep isl_set * set,__isl_keep isl_ast_build * build)1038 static __isl_give isl_ast_graft *set_for_cond_from_set(
1039 	__isl_take isl_ast_graft *graft, __isl_keep isl_set *set,
1040 	__isl_keep isl_ast_build *build)
1041 {
1042 	isl_ast_expr *cond;
1043 
1044 	if (!graft)
1045 		return NULL;
1046 
1047 	cond = isl_ast_build_expr_from_set_internal(build, isl_set_copy(set));
1048 	graft->node = isl_ast_node_for_set_cond(graft->node, cond);
1049 	if (!graft->node)
1050 		return isl_ast_graft_free(graft);
1051 	return graft;
1052 }
1053 
1054 /* Construct an isl_ast_expr for the increment (i.e., stride) of
1055  * the current dimension.
1056  */
for_inc(__isl_keep isl_ast_build * build)1057 static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build)
1058 {
1059 	isl_size depth;
1060 	isl_val *v;
1061 	isl_ctx *ctx;
1062 
1063 	depth = isl_ast_build_get_depth(build);
1064 	if (depth < 0)
1065 		return NULL;
1066 	ctx = isl_ast_build_get_ctx(build);
1067 
1068 	if (!isl_ast_build_has_stride(build, depth))
1069 		return isl_ast_expr_alloc_int_si(ctx, 1);
1070 
1071 	v = isl_ast_build_get_stride(build, depth);
1072 	return isl_ast_expr_from_val(v);
1073 }
1074 
1075 /* Should we express the loop condition as
1076  *
1077  *	iterator <= min(list of upper bounds)
1078  *
1079  * or as a conjunction of constraints?
1080  *
1081  * The first is constructed from a list of upper bounds.
1082  * The second is constructed from a set.
1083  *
1084  * If there are no upper bounds in "constraints", then this could mean
1085  * that "domain" simply doesn't have an upper bound or that we didn't
1086  * pick any upper bound.  In the first case, we want to generate the
1087  * loop condition as a(n empty) conjunction of constraints
1088  * In the second case, we will compute
1089  * a single upper bound from "domain" and so we use the list form.
1090  *
1091  * If there are upper bounds in "constraints",
1092  * then we use the list form iff the atomic_upper_bound option is set.
1093  */
use_upper_bound_list(isl_ctx * ctx,int n_upper,__isl_keep isl_set * domain,int depth)1094 static int use_upper_bound_list(isl_ctx *ctx, int n_upper,
1095 	__isl_keep isl_set *domain, int depth)
1096 {
1097 	if (n_upper > 0)
1098 		return isl_options_get_ast_build_atomic_upper_bound(ctx);
1099 	else
1100 		return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth);
1101 }
1102 
1103 /* Fill in the expressions of the for node in graft->node.
1104  *
1105  * In particular,
1106  * - set the initialization part of the loop to the maximum of the lower bounds
1107  * - extract the increment from the stride of the current dimension
1108  * - construct the for condition either based on a list of upper bounds
1109  *	or on a set of upper bound constraints.
1110  */
set_for_node_expressions(__isl_take isl_ast_graft * graft,__isl_keep isl_pw_aff_list * lower,int use_list,__isl_keep isl_pw_aff_list * upper_list,__isl_keep isl_set * upper_set,__isl_keep isl_ast_build * build)1111 static __isl_give isl_ast_graft *set_for_node_expressions(
1112 	__isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower,
1113 	int use_list, __isl_keep isl_pw_aff_list *upper_list,
1114 	__isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build)
1115 {
1116 	isl_ast_expr *init;
1117 
1118 	if (!graft)
1119 		return NULL;
1120 
1121 	init = reduce_list(isl_ast_expr_op_max, lower, build);
1122 	graft->node = isl_ast_node_for_set_init(graft->node, init);
1123 	graft->node = isl_ast_node_for_set_inc(graft->node, for_inc(build));
1124 
1125 	if (!graft->node)
1126 		graft = isl_ast_graft_free(graft);
1127 
1128 	if (use_list)
1129 		graft = set_for_cond_from_list(graft, upper_list, build);
1130 	else
1131 		graft = set_for_cond_from_set(graft, upper_set, build);
1132 
1133 	return graft;
1134 }
1135 
1136 /* Update "graft" based on "bounds" and "domain" for the generic,
1137  * non-degenerate, case.
1138  *
1139  * "c_lower" and "c_upper" contain the lower and upper bounds
1140  * that the loop node should express.
1141  * "domain" is the subset of the intersection of the constraints
1142  * for which some code is executed.
1143  *
1144  * There may be zero lower bounds or zero upper bounds in "constraints"
1145  * in case the list of constraints was created
1146  * based on the atomic option or based on separation with explicit bounds.
1147  * In that case, we use "domain" to derive lower and/or upper bounds.
1148  *
1149  * We first compute a list of one or more lower bounds.
1150  *
1151  * Then we decide if we want to express the condition as
1152  *
1153  *	iterator <= min(list of upper bounds)
1154  *
1155  * or as a conjunction of constraints.
1156  *
1157  * The set of enforced constraints is then computed either based on
1158  * a list of upper bounds or on a set of upper bound constraints.
1159  * We do not compute any enforced constraints if we were forced
1160  * to compute a lower or upper bound using exact_bound.  The domains
1161  * of the resulting expressions may imply some bounds on outer dimensions
1162  * that we do not want to appear in the enforced constraints since
1163  * they are not actually enforced by the corresponding code.
1164  *
1165  * Finally, we fill in the expressions of the for node.
1166  */
refine_generic_bounds(__isl_take isl_ast_graft * graft,__isl_take isl_constraint_list * c_lower,__isl_take isl_constraint_list * c_upper,__isl_keep isl_set * domain,__isl_keep isl_ast_build * build)1167 static __isl_give isl_ast_graft *refine_generic_bounds(
1168 	__isl_take isl_ast_graft *graft,
1169 	__isl_take isl_constraint_list *c_lower,
1170 	__isl_take isl_constraint_list *c_upper,
1171 	__isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1172 {
1173 	isl_size depth;
1174 	isl_ctx *ctx;
1175 	isl_pw_aff_list *lower;
1176 	int use_list;
1177 	isl_set *upper_set = NULL;
1178 	isl_pw_aff_list *upper_list = NULL;
1179 	isl_size n_lower, n_upper;
1180 
1181 	depth = isl_ast_build_get_depth(build);
1182 	if (!graft || !c_lower || !c_upper || depth < 0)
1183 		goto error;
1184 
1185 	ctx = isl_ast_graft_get_ctx(graft);
1186 
1187 	n_lower = isl_constraint_list_n_constraint(c_lower);
1188 	n_upper = isl_constraint_list_n_constraint(c_upper);
1189 	if (n_lower < 0 || n_upper < 0)
1190 		goto error;
1191 
1192 	use_list = use_upper_bound_list(ctx, n_upper, domain, depth);
1193 
1194 	lower = lower_bounds(c_lower, depth, domain, build);
1195 
1196 	if (use_list)
1197 		upper_list = upper_bounds(c_upper, depth, domain, build);
1198 	else if (n_upper > 0)
1199 		upper_set = intersect_constraints(c_upper);
1200 	else
1201 		upper_set = isl_set_universe(isl_set_get_space(domain));
1202 
1203 	if (n_lower == 0 || n_upper == 0)
1204 		;
1205 	else if (use_list)
1206 		graft = set_enforced_from_list(graft, lower, upper_list);
1207 	else
1208 		graft = set_enforced_from_set(graft, lower, depth, upper_set);
1209 
1210 	graft = set_for_node_expressions(graft, lower, use_list, upper_list,
1211 					upper_set, build);
1212 
1213 	isl_pw_aff_list_free(lower);
1214 	isl_pw_aff_list_free(upper_list);
1215 	isl_set_free(upper_set);
1216 	isl_constraint_list_free(c_lower);
1217 	isl_constraint_list_free(c_upper);
1218 
1219 	return graft;
1220 error:
1221 	isl_constraint_list_free(c_lower);
1222 	isl_constraint_list_free(c_upper);
1223 	return isl_ast_graft_free(graft);
1224 }
1225 
1226 /* Internal data structure used inside count_constraints to keep
1227  * track of the number of constraints that are independent of dimension "pos",
1228  * the lower bounds in "pos" and the upper bounds in "pos".
1229  */
1230 struct isl_ast_count_constraints_data {
1231 	int pos;
1232 
1233 	int n_indep;
1234 	int n_lower;
1235 	int n_upper;
1236 };
1237 
1238 /* Increment data->n_indep, data->lower or data->upper depending
1239  * on whether "c" is independent of dimensions data->pos,
1240  * a lower bound or an upper bound.
1241  */
count_constraints(__isl_take isl_constraint * c,void * user)1242 static isl_stat count_constraints(__isl_take isl_constraint *c, void *user)
1243 {
1244 	struct isl_ast_count_constraints_data *data = user;
1245 
1246 	if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos))
1247 		data->n_lower++;
1248 	else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos))
1249 		data->n_upper++;
1250 	else
1251 		data->n_indep++;
1252 
1253 	isl_constraint_free(c);
1254 
1255 	return isl_stat_ok;
1256 }
1257 
1258 /* Update "graft" based on "bounds" and "domain" for the generic,
1259  * non-degenerate, case.
1260  *
1261  * "list" respresent the list of bounds that need to be encoded by
1262  * the for loop.  Only the constraints that involve the iterator
1263  * are relevant here.  The other constraints are taken care of by
1264  * the caller and are included in the generated constraints of "build".
1265  * "domain" is the subset of the intersection of the constraints
1266  * for which some code is executed.
1267  * "build" is the build in which graft->node was created.
1268  *
1269  * We separate lower bounds, upper bounds and constraints that
1270  * are independent of the loop iterator.
1271  *
1272  * The actual for loop bounds are generated in refine_generic_bounds.
1273  */
refine_generic_split(__isl_take isl_ast_graft * graft,__isl_take isl_constraint_list * list,__isl_keep isl_set * domain,__isl_keep isl_ast_build * build)1274 static __isl_give isl_ast_graft *refine_generic_split(
1275 	__isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list,
1276 	__isl_keep isl_set *domain, __isl_keep isl_ast_build *build)
1277 {
1278 	struct isl_ast_count_constraints_data data;
1279 	isl_size depth;
1280 	isl_constraint_list *lower;
1281 	isl_constraint_list *upper;
1282 
1283 	depth = isl_ast_build_get_depth(build);
1284 	if (depth < 0)
1285 		list = isl_constraint_list_free(list);
1286 	if (!list)
1287 		return isl_ast_graft_free(graft);
1288 
1289 	data.pos = depth;
1290 
1291 	list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos);
1292 	if (!list)
1293 		return isl_ast_graft_free(graft);
1294 
1295 	data.n_indep = data.n_lower = data.n_upper = 0;
1296 	if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) {
1297 		isl_constraint_list_free(list);
1298 		return isl_ast_graft_free(graft);
1299 	}
1300 
1301 	lower = isl_constraint_list_drop(list, 0, data.n_indep);
1302 	upper = isl_constraint_list_copy(lower);
1303 	lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper);
1304 	upper = isl_constraint_list_drop(upper, 0, data.n_lower);
1305 
1306 	return refine_generic_bounds(graft, lower, upper, domain, build);
1307 }
1308 
1309 /* Update "graft" based on "bounds" and "domain" for the generic,
1310  * non-degenerate, case.
1311  *
1312  * "bounds" respresent the bounds that need to be encoded by
1313  * the for loop (or a guard around the for loop).
1314  * "domain" is the subset of "bounds" for which some code is executed.
1315  * "build" is the build in which graft->node was created.
1316  *
1317  * We break up "bounds" into a list of constraints and continue with
1318  * refine_generic_split.
1319  */
refine_generic(__isl_take isl_ast_graft * graft,__isl_keep isl_basic_set * bounds,__isl_keep isl_set * domain,__isl_keep isl_ast_build * build)1320 static __isl_give isl_ast_graft *refine_generic(
1321 	__isl_take isl_ast_graft *graft,
1322 	__isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain,
1323 	__isl_keep isl_ast_build *build)
1324 {
1325 	isl_constraint_list *list;
1326 
1327 	if (!build || !graft)
1328 		return isl_ast_graft_free(graft);
1329 
1330 	list = isl_basic_set_get_constraint_list(bounds);
1331 
1332 	graft = refine_generic_split(graft, list, domain, build);
1333 
1334 	return graft;
1335 }
1336 
1337 /* Create a for node for the current level.
1338  *
1339  * Mark the for node degenerate if "degenerate" is set.
1340  */
create_for(__isl_keep isl_ast_build * build,int degenerate)1341 static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build,
1342 	int degenerate)
1343 {
1344 	isl_size depth;
1345 	isl_id *id;
1346 	isl_ast_node *node;
1347 
1348 	depth = isl_ast_build_get_depth(build);
1349 	if (depth < 0)
1350 		return NULL;
1351 
1352 	id = isl_ast_build_get_iterator_id(build, depth);
1353 	node = isl_ast_node_alloc_for(id);
1354 	if (degenerate)
1355 		node = isl_ast_node_for_mark_degenerate(node);
1356 
1357 	return node;
1358 }
1359 
1360 /* If the ast_build_exploit_nested_bounds option is set, then return
1361  * the constraints enforced by all elements in "list".
1362  * Otherwise, return the universe.
1363  */
extract_shared_enforced(__isl_keep isl_ast_graft_list * list,__isl_keep isl_ast_build * build)1364 static __isl_give isl_basic_set *extract_shared_enforced(
1365 	__isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build)
1366 {
1367 	isl_ctx *ctx;
1368 	isl_space *space;
1369 
1370 	if (!list)
1371 		return NULL;
1372 
1373 	ctx = isl_ast_graft_list_get_ctx(list);
1374 	if (isl_options_get_ast_build_exploit_nested_bounds(ctx))
1375 		return isl_ast_graft_list_extract_shared_enforced(list, build);
1376 
1377 	space = isl_ast_build_get_space(build, 1);
1378 	return isl_basic_set_universe(space);
1379 }
1380 
1381 /* Return the pending constraints of "build" that are not already taken
1382  * care of (by a combination of "enforced" and the generated constraints
1383  * of "build").
1384  */
extract_pending(__isl_keep isl_ast_build * build,__isl_keep isl_basic_set * enforced)1385 static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build,
1386 	__isl_keep isl_basic_set *enforced)
1387 {
1388 	isl_set *guard, *context;
1389 
1390 	guard = isl_ast_build_get_pending(build);
1391 	context = isl_set_from_basic_set(isl_basic_set_copy(enforced));
1392 	context = isl_set_intersect(context,
1393 					isl_ast_build_get_generated(build));
1394 	return isl_set_gist(guard, context);
1395 }
1396 
1397 /* Create an AST node for the current dimension based on
1398  * the schedule domain "bounds" and return the node encapsulated
1399  * in an isl_ast_graft.
1400  *
1401  * "executed" is the current inverse schedule, taking into account
1402  * the bounds in "bounds"
1403  * "domain" is the domain of "executed", with inner dimensions projected out.
1404  * It may be a strict subset of "bounds" in case "bounds" was created
1405  * based on the atomic option or based on separation with explicit bounds.
1406  *
1407  * "domain" may satisfy additional equalities that result
1408  * from intersecting "executed" with "bounds" in add_node.
1409  * It may also satisfy some global constraints that were dropped out because
1410  * we performed separation with explicit bounds.
1411  * The very first step is then to copy these constraints to "bounds".
1412  *
1413  * Since we may be calling before_each_for and after_each_for
1414  * callbacks, we record the current inverse schedule in the build.
1415  *
1416  * We consider three builds,
1417  * "build" is the one in which the current level is created,
1418  * "body_build" is the build in which the next level is created,
1419  * "sub_build" is essentially the same as "body_build", except that
1420  * the depth has not been increased yet.
1421  *
1422  * "build" already contains information (in strides and offsets)
1423  * about the strides at the current level, but this information is not
1424  * reflected in the build->domain.
1425  * We first add this information and the "bounds" to the sub_build->domain.
1426  * isl_ast_build_set_loop_bounds adds the stride information and
1427  * checks whether the current dimension attains
1428  * only a single value and whether this single value can be represented using
1429  * a single affine expression.
1430  * In the first case, the current level is considered "degenerate".
1431  * In the second, sub-case, the current level is considered "eliminated".
1432  * Eliminated levels don't need to be reflected in the AST since we can
1433  * simply plug in the affine expression.  For degenerate, but non-eliminated,
1434  * levels, we do introduce a for node, but mark is as degenerate so that
1435  * it can be printed as an assignment of the single value to the loop
1436  * "iterator".
1437  *
1438  * If the current level is eliminated, we explicitly plug in the value
1439  * for the current level found by isl_ast_build_set_loop_bounds in the
1440  * inverse schedule.  This ensures that if we are working on a slice
1441  * of the domain based on information available in the inverse schedule
1442  * and the build domain, that then this information is also reflected
1443  * in the inverse schedule.  This operation also eliminates the current
1444  * dimension from the inverse schedule making sure no inner dimensions depend
1445  * on the current dimension.  Otherwise, we create a for node, marking
1446  * it degenerate if appropriate.  The initial for node is still incomplete
1447  * and will be completed in either refine_degenerate or refine_generic.
1448  *
1449  * We then generate a sequence of grafts for the next level,
1450  * create a surrounding graft for the current level and insert
1451  * the for node we created (if the current level is not eliminated).
1452  * Before creating a graft for the current level, we first extract
1453  * hoistable constraints from the child guards and combine them
1454  * with the pending constraints in the build.  These constraints
1455  * are used to simplify the child guards and then added to the guard
1456  * of the current graft to ensure that they will be generated.
1457  * If the hoisted guard is a disjunction, then we use it directly
1458  * to gist the guards on the children before intersect it with the
1459  * pending constraints.  We do so because this disjunction is typically
1460  * identical to the guards on the children such that these guards
1461  * can be effectively removed completely.  After the intersection,
1462  * the gist operation would have a harder time figuring this out.
1463  *
1464  * Finally, we set the bounds of the for loop in either
1465  * refine_degenerate or refine_generic.
1466  * We do so in a context where the pending constraints of the build
1467  * have been replaced by the guard of the current graft.
1468  */
create_node_scaled(__isl_take isl_union_map * executed,__isl_take isl_basic_set * bounds,__isl_take isl_set * domain,__isl_take isl_ast_build * build)1469 static __isl_give isl_ast_graft *create_node_scaled(
1470 	__isl_take isl_union_map *executed,
1471 	__isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1472 	__isl_take isl_ast_build *build)
1473 {
1474 	isl_size depth;
1475 	int degenerate;
1476 	isl_bool eliminated;
1477 	isl_size n;
1478 	isl_basic_set *hull;
1479 	isl_basic_set *enforced;
1480 	isl_set *guard, *hoisted;
1481 	isl_ast_node *node = NULL;
1482 	isl_ast_graft *graft;
1483 	isl_ast_graft_list *children;
1484 	isl_ast_build *sub_build;
1485 	isl_ast_build *body_build;
1486 
1487 	domain = isl_ast_build_eliminate_divs(build, domain);
1488 	domain = isl_set_detect_equalities(domain);
1489 	hull = isl_set_unshifted_simple_hull(isl_set_copy(domain));
1490 	bounds = isl_basic_set_intersect(bounds, hull);
1491 	build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
1492 
1493 	depth = isl_ast_build_get_depth(build);
1494 	if (depth < 0)
1495 		build = isl_ast_build_free(build);
1496 	sub_build = isl_ast_build_copy(build);
1497 	bounds = isl_basic_set_remove_redundancies(bounds);
1498 	bounds = isl_ast_build_specialize_basic_set(sub_build, bounds);
1499 	sub_build = isl_ast_build_set_loop_bounds(sub_build,
1500 						isl_basic_set_copy(bounds));
1501 	degenerate = isl_ast_build_has_value(sub_build);
1502 	eliminated = isl_ast_build_has_affine_value(sub_build, depth);
1503 	if (degenerate < 0 || eliminated < 0)
1504 		executed = isl_union_map_free(executed);
1505 	if (!degenerate)
1506 		bounds = isl_ast_build_compute_gist_basic_set(build, bounds);
1507 	sub_build = isl_ast_build_set_pending_generated(sub_build,
1508 						isl_basic_set_copy(bounds));
1509 	if (eliminated)
1510 		executed = plug_in_values(executed, sub_build);
1511 	else
1512 		node = create_for(build, degenerate);
1513 
1514 	body_build = isl_ast_build_copy(sub_build);
1515 	body_build = isl_ast_build_increase_depth(body_build);
1516 	if (!eliminated)
1517 		node = before_each_for(node, body_build);
1518 	children = generate_next_level(executed,
1519 				    isl_ast_build_copy(body_build));
1520 
1521 	enforced = extract_shared_enforced(children, build);
1522 	guard = extract_pending(sub_build, enforced);
1523 	hoisted = isl_ast_graft_list_extract_hoistable_guard(children, build);
1524 	n = isl_set_n_basic_set(hoisted);
1525 	if (n < 0)
1526 		children = isl_ast_graft_list_free(children);
1527 	if (n > 1)
1528 		children = isl_ast_graft_list_gist_guards(children,
1529 						    isl_set_copy(hoisted));
1530 	guard = isl_set_intersect(guard, hoisted);
1531 	if (!eliminated)
1532 		guard = add_implied_guards(guard, degenerate, bounds, build);
1533 
1534 	graft = isl_ast_graft_alloc_from_children(children,
1535 			    isl_set_copy(guard), enforced, build, sub_build);
1536 
1537 	if (!eliminated) {
1538 		isl_ast_build *for_build;
1539 
1540 		graft = isl_ast_graft_insert_for(graft, node);
1541 		for_build = isl_ast_build_copy(build);
1542 		for_build = isl_ast_build_replace_pending_by_guard(for_build,
1543 							isl_set_copy(guard));
1544 		if (degenerate)
1545 			graft = refine_degenerate(graft, for_build, sub_build);
1546 		else
1547 			graft = refine_generic(graft, bounds,
1548 					domain, for_build);
1549 		isl_ast_build_free(for_build);
1550 	}
1551 	isl_set_free(guard);
1552 	if (!eliminated)
1553 		graft = after_each_for(graft, body_build);
1554 
1555 	isl_ast_build_free(body_build);
1556 	isl_ast_build_free(sub_build);
1557 	isl_ast_build_free(build);
1558 	isl_basic_set_free(bounds);
1559 	isl_set_free(domain);
1560 
1561 	return graft;
1562 }
1563 
1564 /* Internal data structure for checking if all constraints involving
1565  * the input dimension "depth" are such that the other coefficients
1566  * are multiples of "m", reducing "m" if they are not.
1567  * If "m" is reduced all the way down to "1", then the check has failed
1568  * and we break out of the iteration.
1569  */
1570 struct isl_check_scaled_data {
1571 	int depth;
1572 	isl_val *m;
1573 };
1574 
1575 /* If constraint "c" involves the input dimension data->depth,
1576  * then make sure that all the other coefficients are multiples of data->m,
1577  * reducing data->m if needed.
1578  * Break out of the iteration if data->m has become equal to "1".
1579  */
constraint_check_scaled(__isl_take isl_constraint * c,void * user)1580 static isl_stat constraint_check_scaled(__isl_take isl_constraint *c,
1581 	void *user)
1582 {
1583 	struct isl_check_scaled_data *data = user;
1584 	int i, j;
1585 	isl_size n;
1586 	enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out,
1587 				    isl_dim_div };
1588 
1589 	if (!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)) {
1590 		isl_constraint_free(c);
1591 		return isl_stat_ok;
1592 	}
1593 
1594 	for (i = 0; i < 4; ++i) {
1595 		n = isl_constraint_dim(c, t[i]);
1596 		if (n < 0)
1597 			break;
1598 		for (j = 0; j < n; ++j) {
1599 			isl_val *d;
1600 
1601 			if (t[i] == isl_dim_in && j == data->depth)
1602 				continue;
1603 			if (!isl_constraint_involves_dims(c, t[i], j, 1))
1604 				continue;
1605 			d = isl_constraint_get_coefficient_val(c, t[i], j);
1606 			data->m = isl_val_gcd(data->m, d);
1607 			if (isl_val_is_one(data->m))
1608 				break;
1609 		}
1610 		if (j < n)
1611 			break;
1612 	}
1613 
1614 	isl_constraint_free(c);
1615 
1616 	return i < 4 ? isl_stat_error : isl_stat_ok;
1617 }
1618 
1619 /* For each constraint of "bmap" that involves the input dimension data->depth,
1620  * make sure that all the other coefficients are multiples of data->m,
1621  * reducing data->m if needed.
1622  * Break out of the iteration if data->m has become equal to "1".
1623  */
basic_map_check_scaled(__isl_take isl_basic_map * bmap,void * user)1624 static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap,
1625 	void *user)
1626 {
1627 	isl_stat r;
1628 
1629 	r = isl_basic_map_foreach_constraint(bmap,
1630 						&constraint_check_scaled, user);
1631 	isl_basic_map_free(bmap);
1632 
1633 	return r;
1634 }
1635 
1636 /* For each constraint of "map" that involves the input dimension data->depth,
1637  * make sure that all the other coefficients are multiples of data->m,
1638  * reducing data->m if needed.
1639  * Break out of the iteration if data->m has become equal to "1".
1640  */
map_check_scaled(__isl_take isl_map * map,void * user)1641 static isl_stat map_check_scaled(__isl_take isl_map *map, void *user)
1642 {
1643 	isl_stat r;
1644 
1645 	r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user);
1646 	isl_map_free(map);
1647 
1648 	return r;
1649 }
1650 
1651 /* Create an AST node for the current dimension based on
1652  * the schedule domain "bounds" and return the node encapsulated
1653  * in an isl_ast_graft.
1654  *
1655  * "executed" is the current inverse schedule, taking into account
1656  * the bounds in "bounds"
1657  * "domain" is the domain of "executed", with inner dimensions projected out.
1658  *
1659  *
1660  * Before moving on to the actual AST node construction in create_node_scaled,
1661  * we first check if the current dimension is strided and if we can scale
1662  * down this stride.  Note that we only do this if the ast_build_scale_strides
1663  * option is set.
1664  *
1665  * In particular, let the current dimension take on values
1666  *
1667  *	f + s a
1668  *
1669  * with a an integer.  We check if we can find an integer m that (obviously)
1670  * divides both f and s.
1671  *
1672  * If so, we check if the current dimension only appears in constraints
1673  * where the coefficients of the other variables are multiples of m.
1674  * We perform this extra check to avoid the risk of introducing
1675  * divisions by scaling down the current dimension.
1676  *
1677  * If so, we scale the current dimension down by a factor of m.
1678  * That is, we plug in
1679  *
1680  *	i = m i'							(1)
1681  *
1682  * Note that in principle we could always scale down strided loops
1683  * by plugging in
1684  *
1685  *	i = f + s i'
1686  *
1687  * but this may result in i' taking on larger values than the original i,
1688  * due to the shift by "f".
1689  * By constrast, the scaling in (1) can only reduce the (absolute) value "i".
1690  */
create_node(__isl_take isl_union_map * executed,__isl_take isl_basic_set * bounds,__isl_take isl_set * domain,__isl_take isl_ast_build * build)1691 static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed,
1692 	__isl_take isl_basic_set *bounds, __isl_take isl_set *domain,
1693 	__isl_take isl_ast_build *build)
1694 {
1695 	struct isl_check_scaled_data data;
1696 	isl_size depth;
1697 	isl_ctx *ctx;
1698 	isl_aff *offset;
1699 	isl_val *d;
1700 
1701 	ctx = isl_ast_build_get_ctx(build);
1702 	if (!isl_options_get_ast_build_scale_strides(ctx))
1703 		return create_node_scaled(executed, bounds, domain, build);
1704 
1705 	depth = isl_ast_build_get_depth(build);
1706 	if (depth < 0)
1707 		build = isl_ast_build_free(build);
1708 	data.depth = depth;
1709 	if (!isl_ast_build_has_stride(build, data.depth))
1710 		return create_node_scaled(executed, bounds, domain, build);
1711 
1712 	offset = isl_ast_build_get_offset(build, data.depth);
1713 	data.m = isl_ast_build_get_stride(build, data.depth);
1714 	if (!data.m)
1715 		offset = isl_aff_free(offset);
1716 	offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m));
1717 	d = isl_aff_get_denominator_val(offset);
1718 	if (!d)
1719 		executed = isl_union_map_free(executed);
1720 
1721 	if (executed && isl_val_is_divisible_by(data.m, d))
1722 		data.m = isl_val_div(data.m, d);
1723 	else {
1724 		data.m = isl_val_set_si(data.m, 1);
1725 		isl_val_free(d);
1726 	}
1727 
1728 	if (!isl_val_is_one(data.m)) {
1729 		if (isl_union_map_foreach_map(executed, &map_check_scaled,
1730 						&data) < 0 &&
1731 		    !isl_val_is_one(data.m))
1732 			executed = isl_union_map_free(executed);
1733 	}
1734 
1735 	if (!isl_val_is_one(data.m)) {
1736 		isl_space *space;
1737 		isl_multi_aff *ma;
1738 		isl_aff *aff;
1739 		isl_map *map;
1740 		isl_union_map *umap;
1741 
1742 		space = isl_ast_build_get_space(build, 1);
1743 		space = isl_space_map_from_set(space);
1744 		ma = isl_multi_aff_identity(space);
1745 		aff = isl_multi_aff_get_aff(ma, data.depth);
1746 		aff = isl_aff_scale_val(aff, isl_val_copy(data.m));
1747 		ma = isl_multi_aff_set_aff(ma, data.depth, aff);
1748 
1749 		bounds = isl_basic_set_preimage_multi_aff(bounds,
1750 						isl_multi_aff_copy(ma));
1751 		domain = isl_set_preimage_multi_aff(domain,
1752 						isl_multi_aff_copy(ma));
1753 		map = isl_map_reverse(isl_map_from_multi_aff(ma));
1754 		umap = isl_union_map_from_map(map);
1755 		executed = isl_union_map_apply_domain(executed,
1756 						isl_union_map_copy(umap));
1757 		build = isl_ast_build_scale_down(build, isl_val_copy(data.m),
1758 						umap);
1759 	}
1760 	isl_aff_free(offset);
1761 	isl_val_free(data.m);
1762 
1763 	return create_node_scaled(executed, bounds, domain, build);
1764 }
1765 
1766 /* Add the basic set to the list that "user" points to.
1767  */
collect_basic_set(__isl_take isl_basic_set * bset,void * user)1768 static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user)
1769 {
1770 	isl_basic_set_list **list = user;
1771 
1772 	*list = isl_basic_set_list_add(*list, bset);
1773 
1774 	return isl_stat_ok;
1775 }
1776 
1777 /* Extract the basic sets of "set" and collect them in an isl_basic_set_list.
1778  */
isl_basic_set_list_from_set(__isl_take isl_set * set)1779 static __isl_give isl_basic_set_list *isl_basic_set_list_from_set(
1780 	__isl_take isl_set *set)
1781 {
1782 	isl_size n;
1783 	isl_ctx *ctx;
1784 	isl_basic_set_list *list;
1785 
1786 	n = isl_set_n_basic_set(set);
1787 	if (n < 0)
1788 		set = isl_set_free(set);
1789 	if (!set)
1790 		return NULL;
1791 
1792 	ctx = isl_set_get_ctx(set);
1793 
1794 	list = isl_basic_set_list_alloc(ctx, n);
1795 	if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0)
1796 		list = isl_basic_set_list_free(list);
1797 
1798 	isl_set_free(set);
1799 	return list;
1800 }
1801 
1802 /* Generate code for the schedule domain "bounds"
1803  * and add the result to "list".
1804  *
1805  * We mainly detect strides here and check if the bounds do not
1806  * conflict with the current build domain
1807  * and then pass over control to create_node.
1808  *
1809  * "bounds" reflects the bounds on the current dimension and possibly
1810  * some extra conditions on outer dimensions.
1811  * It does not, however, include any divs involving the current dimension,
1812  * so it does not capture any stride constraints.
1813  * We therefore need to compute that part of the schedule domain that
1814  * intersects with "bounds" and derive the strides from the result.
1815  */
add_node(__isl_take isl_ast_graft_list * list,__isl_take isl_union_map * executed,__isl_take isl_basic_set * bounds,__isl_take isl_ast_build * build)1816 static __isl_give isl_ast_graft_list *add_node(
1817 	__isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed,
1818 	__isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build)
1819 {
1820 	isl_ast_graft *graft;
1821 	isl_set *domain = NULL;
1822 	isl_union_set *uset;
1823 	int empty, disjoint;
1824 
1825 	uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds));
1826 	executed = isl_union_map_intersect_domain(executed, uset);
1827 	empty = isl_union_map_is_empty(executed);
1828 	if (empty < 0)
1829 		goto error;
1830 	if (empty)
1831 		goto done;
1832 
1833 	uset = isl_union_map_domain(isl_union_map_copy(executed));
1834 	domain = isl_set_from_union_set(uset);
1835 	domain = isl_ast_build_specialize(build, domain);
1836 
1837 	domain = isl_set_compute_divs(domain);
1838 	domain = isl_ast_build_eliminate_inner(build, domain);
1839 	disjoint = isl_set_is_disjoint(domain, build->domain);
1840 	if (disjoint < 0)
1841 		goto error;
1842 	if (disjoint)
1843 		goto done;
1844 
1845 	build = isl_ast_build_detect_strides(build, isl_set_copy(domain));
1846 
1847 	graft = create_node(executed, bounds, domain,
1848 				isl_ast_build_copy(build));
1849 	list = isl_ast_graft_list_add(list, graft);
1850 	isl_ast_build_free(build);
1851 	return list;
1852 error:
1853 	list = isl_ast_graft_list_free(list);
1854 done:
1855 	isl_set_free(domain);
1856 	isl_basic_set_free(bounds);
1857 	isl_union_map_free(executed);
1858 	isl_ast_build_free(build);
1859 	return list;
1860 }
1861 
1862 /* Does any element of i follow or coincide with any element of j
1863  * at the current depth for equal values of the outer dimensions?
1864  */
domain_follows_at_depth(__isl_keep isl_basic_set * i,__isl_keep isl_basic_set * j,void * user)1865 static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i,
1866 	__isl_keep isl_basic_set *j, void *user)
1867 {
1868 	int depth = *(int *) user;
1869 	isl_basic_map *test;
1870 	isl_bool empty;
1871 	int l;
1872 
1873 	test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
1874 						    isl_basic_set_copy(j));
1875 	for (l = 0; l < depth; ++l)
1876 		test = isl_basic_map_equate(test, isl_dim_in, l,
1877 						isl_dim_out, l);
1878 	test = isl_basic_map_order_ge(test, isl_dim_in, depth,
1879 					isl_dim_out, depth);
1880 	empty = isl_basic_map_is_empty(test);
1881 	isl_basic_map_free(test);
1882 
1883 	return isl_bool_not(empty);
1884 }
1885 
1886 /* Split up each element of "list" into a part that is related to "bset"
1887  * according to "gt" and a part that is not.
1888  * Return a list that consist of "bset" and all the pieces.
1889  */
add_split_on(__isl_take isl_basic_set_list * list,__isl_take isl_basic_set * bset,__isl_keep isl_basic_map * gt)1890 static __isl_give isl_basic_set_list *add_split_on(
1891 	__isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset,
1892 	__isl_keep isl_basic_map *gt)
1893 {
1894 	int i;
1895 	isl_size n;
1896 	isl_basic_set_list *res;
1897 
1898 	n = isl_basic_set_list_n_basic_set(list);
1899 	if (n < 0)
1900 		bset = isl_basic_set_free(bset);
1901 
1902 	gt = isl_basic_map_copy(gt);
1903 	gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset));
1904 	res = isl_basic_set_list_from_basic_set(bset);
1905 	for (i = 0; res && i < n; ++i) {
1906 		isl_basic_set *bset;
1907 		isl_set *set1, *set2;
1908 		isl_basic_map *bmap;
1909 		int empty;
1910 
1911 		bset = isl_basic_set_list_get_basic_set(list, i);
1912 		bmap = isl_basic_map_copy(gt);
1913 		bmap = isl_basic_map_intersect_range(bmap, bset);
1914 		bset = isl_basic_map_range(bmap);
1915 		empty = isl_basic_set_is_empty(bset);
1916 		if (empty < 0)
1917 			res = isl_basic_set_list_free(res);
1918 		if (empty)  {
1919 			isl_basic_set_free(bset);
1920 			bset = isl_basic_set_list_get_basic_set(list, i);
1921 			res = isl_basic_set_list_add(res, bset);
1922 			continue;
1923 		}
1924 
1925 		res = isl_basic_set_list_add(res, isl_basic_set_copy(bset));
1926 		set1 = isl_set_from_basic_set(bset);
1927 		bset = isl_basic_set_list_get_basic_set(list, i);
1928 		set2 = isl_set_from_basic_set(bset);
1929 		set1 = isl_set_subtract(set2, set1);
1930 		set1 = isl_set_make_disjoint(set1);
1931 
1932 		res = isl_basic_set_list_concat(res,
1933 					    isl_basic_set_list_from_set(set1));
1934 	}
1935 	isl_basic_map_free(gt);
1936 	isl_basic_set_list_free(list);
1937 	return res;
1938 }
1939 
1940 static __isl_give isl_ast_graft_list *generate_sorted_domains(
1941 	__isl_keep isl_basic_set_list *domain_list,
1942 	__isl_keep isl_union_map *executed,
1943 	__isl_keep isl_ast_build *build);
1944 
1945 /* Internal data structure for add_nodes.
1946  *
1947  * "executed" and "build" are extra arguments to be passed to add_node.
1948  * "list" collects the results.
1949  */
1950 struct isl_add_nodes_data {
1951 	isl_union_map *executed;
1952 	isl_ast_build *build;
1953 
1954 	isl_ast_graft_list *list;
1955 };
1956 
1957 /* Generate code for the schedule domains in "scc"
1958  * and add the results to "list".
1959  *
1960  * The domains in "scc" form a strongly connected component in the ordering.
1961  * If the number of domains in "scc" is larger than 1, then this means
1962  * that we cannot determine a valid ordering for the domains in the component.
1963  * This should be fairly rare because the individual domains
1964  * have been made disjoint first.
1965  * The problem is that the domains may be integrally disjoint but not
1966  * rationally disjoint.  For example, we may have domains
1967  *
1968  *	{ [i,i] : 0 <= i <= 1 }		and	{ [i,1-i] : 0 <= i <= 1 }
1969  *
1970  * These two domains have an empty intersection, but their rational
1971  * relaxations do intersect.  It is impossible to order these domains
1972  * in the second dimension because the first should be ordered before
1973  * the second for outer dimension equal to 0, while it should be ordered
1974  * after for outer dimension equal to 1.
1975  *
1976  * This may happen in particular in case of unrolling since the domain
1977  * of each slice is replaced by its simple hull.
1978  *
1979  * For each basic set i in "scc" and for each of the following basic sets j,
1980  * we split off that part of the basic set i that shares the outer dimensions
1981  * with j and lies before j in the current dimension.
1982  * We collect all the pieces in a new list that replaces "scc".
1983  *
1984  * While the elements in "scc" should be disjoint, we double-check
1985  * this property to avoid running into an infinite recursion in case
1986  * they intersect due to some internal error.
1987  */
add_nodes(__isl_take isl_basic_set_list * scc,void * user)1988 static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user)
1989 {
1990 	struct isl_add_nodes_data *data = user;
1991 	int i;
1992 	isl_size depth;
1993 	isl_size n;
1994 	isl_basic_set *bset, *first;
1995 	isl_basic_set_list *list;
1996 	isl_space *space;
1997 	isl_basic_map *gt;
1998 
1999 	n = isl_basic_set_list_n_basic_set(scc);
2000 	if (n < 0)
2001 		goto error;
2002 	bset = isl_basic_set_list_get_basic_set(scc, 0);
2003 	if (n == 1) {
2004 		isl_basic_set_list_free(scc);
2005 		data->list = add_node(data->list,
2006 				isl_union_map_copy(data->executed), bset,
2007 				isl_ast_build_copy(data->build));
2008 		return data->list ? isl_stat_ok : isl_stat_error;
2009 	}
2010 
2011 	depth = isl_ast_build_get_depth(data->build);
2012 	if (depth < 0)
2013 		bset = isl_basic_set_free(bset);
2014 	space = isl_basic_set_get_space(bset);
2015 	space = isl_space_map_from_set(space);
2016 	gt = isl_basic_map_universe(space);
2017 	for (i = 0; i < depth; ++i)
2018 		gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
2019 	gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
2020 
2021 	first = isl_basic_set_copy(bset);
2022 	list = isl_basic_set_list_from_basic_set(bset);
2023 	for (i = 1; i < n; ++i) {
2024 		int disjoint;
2025 
2026 		bset = isl_basic_set_list_get_basic_set(scc, i);
2027 
2028 		disjoint = isl_basic_set_is_disjoint(bset, first);
2029 		if (disjoint < 0)
2030 			list = isl_basic_set_list_free(list);
2031 		else if (!disjoint)
2032 			isl_die(isl_basic_set_list_get_ctx(scc),
2033 				isl_error_internal,
2034 				"basic sets in scc are assumed to be disjoint",
2035 				list = isl_basic_set_list_free(list));
2036 
2037 		list = add_split_on(list, bset, gt);
2038 	}
2039 	isl_basic_set_free(first);
2040 	isl_basic_map_free(gt);
2041 	isl_basic_set_list_free(scc);
2042 	scc = list;
2043 	data->list = isl_ast_graft_list_concat(data->list,
2044 		    generate_sorted_domains(scc, data->executed, data->build));
2045 	isl_basic_set_list_free(scc);
2046 
2047 	return data->list ? isl_stat_ok : isl_stat_error;
2048 error:
2049 	isl_basic_set_list_free(scc);
2050 	return isl_stat_error;
2051 }
2052 
2053 /* Sort the domains in "domain_list" according to the execution order
2054  * at the current depth (for equal values of the outer dimensions),
2055  * generate code for each of them, collecting the results in a list.
2056  * If no code is generated (because the intersection of the inverse schedule
2057  * with the domains turns out to be empty), then an empty list is returned.
2058  *
2059  * The caller is responsible for ensuring that the basic sets in "domain_list"
2060  * are pair-wise disjoint.  It can, however, in principle happen that
2061  * two basic sets should be ordered one way for one value of the outer
2062  * dimensions and the other way for some other value of the outer dimensions.
2063  * We therefore play safe and look for strongly connected components.
2064  * The function add_nodes takes care of handling non-trivial components.
2065  */
generate_sorted_domains(__isl_keep isl_basic_set_list * domain_list,__isl_keep isl_union_map * executed,__isl_keep isl_ast_build * build)2066 static __isl_give isl_ast_graft_list *generate_sorted_domains(
2067 	__isl_keep isl_basic_set_list *domain_list,
2068 	__isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2069 {
2070 	isl_ctx *ctx;
2071 	struct isl_add_nodes_data data;
2072 	isl_size depth;
2073 	isl_size n;
2074 
2075 	n = isl_basic_set_list_n_basic_set(domain_list);
2076 	if (n < 0)
2077 		return NULL;
2078 
2079 	ctx = isl_basic_set_list_get_ctx(domain_list);
2080 	data.list = isl_ast_graft_list_alloc(ctx, n);
2081 	if (n == 0)
2082 		return data.list;
2083 	if (n == 1)
2084 		return add_node(data.list, isl_union_map_copy(executed),
2085 			isl_basic_set_list_get_basic_set(domain_list, 0),
2086 			isl_ast_build_copy(build));
2087 
2088 	depth = isl_ast_build_get_depth(build);
2089 	data.executed = executed;
2090 	data.build = build;
2091 	if (depth < 0 || isl_basic_set_list_foreach_scc(domain_list,
2092 					&domain_follows_at_depth, &depth,
2093 					&add_nodes, &data) < 0)
2094 		data.list = isl_ast_graft_list_free(data.list);
2095 
2096 	return data.list;
2097 }
2098 
2099 /* Do i and j share any values for the outer dimensions?
2100  */
shared_outer(__isl_keep isl_basic_set * i,__isl_keep isl_basic_set * j,void * user)2101 static isl_bool shared_outer(__isl_keep isl_basic_set *i,
2102 	__isl_keep isl_basic_set *j, void *user)
2103 {
2104 	int depth = *(int *) user;
2105 	isl_basic_map *test;
2106 	isl_bool empty;
2107 	int l;
2108 
2109 	test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i),
2110 						    isl_basic_set_copy(j));
2111 	for (l = 0; l < depth; ++l)
2112 		test = isl_basic_map_equate(test, isl_dim_in, l,
2113 						isl_dim_out, l);
2114 	empty = isl_basic_map_is_empty(test);
2115 	isl_basic_map_free(test);
2116 
2117 	return isl_bool_not(empty);
2118 }
2119 
2120 /* Internal data structure for generate_sorted_domains_wrap.
2121  *
2122  * "n" is the total number of basic sets
2123  * "executed" and "build" are extra arguments to be passed
2124  *	to generate_sorted_domains.
2125  *
2126  * "single" is set to 1 by generate_sorted_domains_wrap if there
2127  * is only a single component.
2128  * "list" collects the results.
2129  */
2130 struct isl_ast_generate_parallel_domains_data {
2131 	isl_size n;
2132 	isl_union_map *executed;
2133 	isl_ast_build *build;
2134 
2135 	int single;
2136 	isl_ast_graft_list *list;
2137 };
2138 
2139 /* Call generate_sorted_domains on "scc", fuse the result into a list
2140  * with either zero or one graft and collect the these single element
2141  * lists into data->list.
2142  *
2143  * If there is only one component, i.e., if the number of basic sets
2144  * in the current component is equal to the total number of basic sets,
2145  * then data->single is set to 1 and the result of generate_sorted_domains
2146  * is not fused.
2147  */
generate_sorted_domains_wrap(__isl_take isl_basic_set_list * scc,void * user)2148 static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc,
2149 	void *user)
2150 {
2151 	struct isl_ast_generate_parallel_domains_data *data = user;
2152 	isl_ast_graft_list *list;
2153 	isl_size n;
2154 
2155 	n = isl_basic_set_list_n_basic_set(scc);
2156 	if (n < 0)
2157 		scc = isl_basic_set_list_free(scc);
2158 	list = generate_sorted_domains(scc, data->executed, data->build);
2159 	data->single = n == data->n;
2160 	if (!data->single)
2161 		list = isl_ast_graft_list_fuse(list, data->build);
2162 	if (!data->list)
2163 		data->list = list;
2164 	else
2165 		data->list = isl_ast_graft_list_concat(data->list, list);
2166 
2167 	isl_basic_set_list_free(scc);
2168 	if (!data->list)
2169 		return isl_stat_error;
2170 
2171 	return isl_stat_ok;
2172 }
2173 
2174 /* Look for any (weakly connected) components in the "domain_list"
2175  * of domains that share some values of the outer dimensions.
2176  * That is, domains in different components do not share any values
2177  * of the outer dimensions.  This means that these components
2178  * can be freely reordered.
2179  * Within each of the components, we sort the domains according
2180  * to the execution order at the current depth.
2181  *
2182  * If there is more than one component, then generate_sorted_domains_wrap
2183  * fuses the result of each call to generate_sorted_domains
2184  * into a list with either zero or one graft and collects these (at most)
2185  * single element lists into a bigger list. This means that the elements of the
2186  * final list can be freely reordered.  In particular, we sort them
2187  * according to an arbitrary but fixed ordering to ease merging of
2188  * graft lists from different components.
2189  */
generate_parallel_domains(__isl_keep isl_basic_set_list * domain_list,__isl_keep isl_union_map * executed,__isl_keep isl_ast_build * build)2190 static __isl_give isl_ast_graft_list *generate_parallel_domains(
2191 	__isl_keep isl_basic_set_list *domain_list,
2192 	__isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
2193 {
2194 	isl_size depth;
2195 	struct isl_ast_generate_parallel_domains_data data;
2196 
2197 	data.n = isl_basic_set_list_n_basic_set(domain_list);
2198 	if (data.n < 0)
2199 		return NULL;
2200 
2201 	if (data.n <= 1)
2202 		return generate_sorted_domains(domain_list, executed, build);
2203 
2204 	depth = isl_ast_build_get_depth(build);
2205 	if (depth < 0)
2206 		return NULL;
2207 	data.list = NULL;
2208 	data.executed = executed;
2209 	data.build = build;
2210 	data.single = 0;
2211 	if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth,
2212 					    &generate_sorted_domains_wrap,
2213 					    &data) < 0)
2214 		data.list = isl_ast_graft_list_free(data.list);
2215 
2216 	if (!data.single)
2217 		data.list = isl_ast_graft_list_sort_guard(data.list);
2218 
2219 	return data.list;
2220 }
2221 
2222 /* Internal data for separate_domain.
2223  *
2224  * "explicit" is set if we only want to use explicit bounds.
2225  *
2226  * "domain" collects the separated domains.
2227  */
2228 struct isl_separate_domain_data {
2229 	isl_ast_build *build;
2230 	int explicit;
2231 	isl_set *domain;
2232 };
2233 
2234 /* Extract implicit bounds on the current dimension for the executed "map".
2235  *
2236  * The domain of "map" may involve inner dimensions, so we
2237  * need to eliminate them.
2238  */
implicit_bounds(__isl_take isl_map * map,__isl_keep isl_ast_build * build)2239 static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map,
2240 	__isl_keep isl_ast_build *build)
2241 {
2242 	isl_set *domain;
2243 
2244 	domain = isl_map_domain(map);
2245 	domain = isl_ast_build_eliminate(build, domain);
2246 
2247 	return domain;
2248 }
2249 
2250 /* Extract explicit bounds on the current dimension for the executed "map".
2251  *
2252  * Rather than eliminating the inner dimensions as in implicit_bounds,
2253  * we simply drop any constraints involving those inner dimensions.
2254  * The idea is that most bounds that are implied by constraints on the
2255  * inner dimensions will be enforced by for loops and not by explicit guards.
2256  * There is then no need to separate along those bounds.
2257  */
explicit_bounds(__isl_take isl_map * map,__isl_keep isl_ast_build * build)2258 static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map,
2259 	__isl_keep isl_ast_build *build)
2260 {
2261 	isl_set *domain;
2262 	isl_size depth;
2263 	isl_size dim;
2264 
2265 	depth = isl_ast_build_get_depth(build);
2266 	dim = isl_map_dim(map, isl_dim_out);
2267 	if (depth < 0 || dim < 0)
2268 		return isl_map_domain(isl_map_free(map));
2269 	map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim);
2270 
2271 	domain = isl_map_domain(map);
2272 	dim = isl_set_dim(domain, isl_dim_set);
2273 	domain = isl_set_detect_equalities(domain);
2274 	domain = isl_set_drop_constraints_involving_dims(domain,
2275 				isl_dim_set, depth + 1, dim - (depth + 1));
2276 	domain = isl_set_remove_divs_involving_dims(domain,
2277 				isl_dim_set, depth, 1);
2278 	domain = isl_set_remove_unknown_divs(domain);
2279 
2280 	return domain;
2281 }
2282 
2283 /* Split data->domain into pieces that intersect with the range of "map"
2284  * and pieces that do not intersect with the range of "map"
2285  * and then add that part of the range of "map" that does not intersect
2286  * with data->domain.
2287  */
separate_domain(__isl_take isl_map * map,void * user)2288 static isl_stat separate_domain(__isl_take isl_map *map, void *user)
2289 {
2290 	struct isl_separate_domain_data *data = user;
2291 	isl_set *domain;
2292 	isl_set *d1, *d2;
2293 
2294 	if (data->explicit)
2295 		domain = explicit_bounds(map, data->build);
2296 	else
2297 		domain = implicit_bounds(map, data->build);
2298 
2299 	domain = isl_set_coalesce(domain);
2300 	domain = isl_set_make_disjoint(domain);
2301 	d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain));
2302 	d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain));
2303 	data->domain = isl_set_intersect(data->domain, domain);
2304 	data->domain = isl_set_union(data->domain, d1);
2305 	data->domain = isl_set_union(data->domain, d2);
2306 
2307 	return isl_stat_ok;
2308 }
2309 
2310 /* Separate the schedule domains of "executed".
2311  *
2312  * That is, break up the domain of "executed" into basic sets,
2313  * such that for each basic set S, every element in S is associated with
2314  * the same domain spaces.
2315  *
2316  * "space" is the (single) domain space of "executed".
2317  */
separate_schedule_domains(__isl_take isl_space * space,__isl_take isl_union_map * executed,__isl_keep isl_ast_build * build)2318 static __isl_give isl_set *separate_schedule_domains(
2319 	__isl_take isl_space *space, __isl_take isl_union_map *executed,
2320 	__isl_keep isl_ast_build *build)
2321 {
2322 	struct isl_separate_domain_data data = { build };
2323 	isl_ctx *ctx;
2324 
2325 	ctx = isl_ast_build_get_ctx(build);
2326 	data.explicit = isl_options_get_ast_build_separation_bounds(ctx) ==
2327 				    ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT;
2328 	data.domain = isl_set_empty(space);
2329 	if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0)
2330 		data.domain = isl_set_free(data.domain);
2331 
2332 	isl_union_map_free(executed);
2333 	return data.domain;
2334 }
2335 
2336 /* Temporary data used during the search for a lower bound for unrolling.
2337  *
2338  * "build" is the build in which the unrolling will be performed
2339  * "domain" is the original set for which to find a lower bound
2340  * "depth" is the dimension for which to find a lower boudn
2341  * "expansion" is the expansion that needs to be applied to "domain"
2342  * in the unrolling that will be performed
2343  *
2344  * "lower" is the best lower bound found so far.  It is NULL if we have not
2345  * found any yet.
2346  * "n" is the corresponding size.  If lower is NULL, then the value of n
2347  * is undefined.
2348  * "n_div" is the maximal number of integer divisions in the first
2349  * unrolled iteration (after expansion).  It is set to -1 if it hasn't
2350  * been computed yet.
2351  */
2352 struct isl_find_unroll_data {
2353 	isl_ast_build *build;
2354 	isl_set *domain;
2355 	int depth;
2356 	isl_basic_map *expansion;
2357 
2358 	isl_aff *lower;
2359 	int *n;
2360 	int n_div;
2361 };
2362 
2363 /* Return the constraint
2364  *
2365  *	i_"depth" = aff + offset
2366  */
at_offset(int depth,__isl_keep isl_aff * aff,int offset)2367 static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff,
2368 	int offset)
2369 {
2370 	aff = isl_aff_copy(aff);
2371 	aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1);
2372 	aff = isl_aff_add_constant_si(aff, offset);
2373 	return isl_equality_from_aff(aff);
2374 }
2375 
2376 /* Update *user to the number of integer divisions in the first element
2377  * of "ma", if it is larger than the current value.
2378  */
update_n_div(__isl_take isl_set * set,__isl_take isl_multi_aff * ma,void * user)2379 static isl_stat update_n_div(__isl_take isl_set *set,
2380 	__isl_take isl_multi_aff *ma, void *user)
2381 {
2382 	isl_aff *aff;
2383 	int *n = user;
2384 	isl_size n_div;
2385 
2386 	aff = isl_multi_aff_get_aff(ma, 0);
2387 	n_div = isl_aff_dim(aff, isl_dim_div);
2388 	isl_aff_free(aff);
2389 	isl_multi_aff_free(ma);
2390 	isl_set_free(set);
2391 
2392 	if (n_div > *n)
2393 		*n = n_div;
2394 
2395 	return n_div >= 0 ? isl_stat_ok : isl_stat_error;
2396 }
2397 
2398 /* Get the number of integer divisions in the expression for the iterator
2399  * value at the first slice in the unrolling based on lower bound "lower",
2400  * taking into account the expansion that needs to be performed on this slice.
2401  */
get_expanded_n_div(struct isl_find_unroll_data * data,__isl_keep isl_aff * lower)2402 static int get_expanded_n_div(struct isl_find_unroll_data *data,
2403 	__isl_keep isl_aff *lower)
2404 {
2405 	isl_constraint *c;
2406 	isl_set *set;
2407 	isl_map *it_map, *expansion;
2408 	isl_pw_multi_aff *pma;
2409 	int n;
2410 
2411 	c = at_offset(data->depth, lower, 0);
2412 	set = isl_set_copy(data->domain);
2413 	set = isl_set_add_constraint(set, c);
2414 	expansion = isl_map_from_basic_map(isl_basic_map_copy(data->expansion));
2415 	set = isl_set_apply(set, expansion);
2416 	it_map = isl_ast_build_map_to_iterator(data->build, set);
2417 	pma = isl_pw_multi_aff_from_map(it_map);
2418 	n = 0;
2419 	if (isl_pw_multi_aff_foreach_piece(pma, &update_n_div, &n) < 0)
2420 		n = -1;
2421 	isl_pw_multi_aff_free(pma);
2422 
2423 	return n;
2424 }
2425 
2426 /* Is the lower bound "lower" with corresponding iteration count "n"
2427  * better than the one stored in "data"?
2428  * If there is no upper bound on the iteration count ("n" is infinity) or
2429  * if the count is too large, then we cannot use this lower bound.
2430  * Otherwise, if there was no previous lower bound or
2431  * if the iteration count of the new lower bound is smaller than
2432  * the iteration count of the previous lower bound, then we consider
2433  * the new lower bound to be better.
2434  * If the iteration count is the same, then compare the number
2435  * of integer divisions that would be needed to express
2436  * the iterator value at the first slice in the unrolling
2437  * according to the lower bound.  If we end up computing this
2438  * number, then store the lowest value in data->n_div.
2439  */
is_better_lower_bound(struct isl_find_unroll_data * data,__isl_keep isl_aff * lower,__isl_keep isl_val * n)2440 static int is_better_lower_bound(struct isl_find_unroll_data *data,
2441 	__isl_keep isl_aff *lower, __isl_keep isl_val *n)
2442 {
2443 	int cmp;
2444 	int n_div;
2445 
2446 	if (!n)
2447 		return -1;
2448 	if (isl_val_is_infty(n))
2449 		return 0;
2450 	if (isl_val_cmp_si(n, INT_MAX) > 0)
2451 		return 0;
2452 	if (!data->lower)
2453 		return 1;
2454 	cmp = isl_val_cmp_si(n, *data->n);
2455 	if (cmp < 0)
2456 		return 1;
2457 	if (cmp > 0)
2458 		return 0;
2459 	if (data->n_div < 0)
2460 		data->n_div = get_expanded_n_div(data, data->lower);
2461 	if (data->n_div < 0)
2462 		return -1;
2463 	if (data->n_div == 0)
2464 		return 0;
2465 	n_div = get_expanded_n_div(data, lower);
2466 	if (n_div < 0)
2467 		return -1;
2468 	if (n_div >= data->n_div)
2469 		return 0;
2470 	data->n_div = n_div;
2471 
2472 	return 1;
2473 }
2474 
2475 /* Check if we can use "c" as a lower bound and if it is better than
2476  * any previously found lower bound.
2477  *
2478  * If "c" does not involve the dimension at the current depth,
2479  * then we cannot use it.
2480  * Otherwise, let "c" be of the form
2481  *
2482  *	i >= f(j)/a
2483  *
2484  * We compute the maximal value of
2485  *
2486  *	-ceil(f(j)/a)) + i + 1
2487  *
2488  * over the domain.  If there is such a value "n", then we know
2489  *
2490  *	-ceil(f(j)/a)) + i + 1 <= n
2491  *
2492  * or
2493  *
2494  *	i < ceil(f(j)/a)) + n
2495  *
2496  * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling.
2497  * We just need to check if we have found any lower bound before and
2498  * if the new lower bound is better (smaller n or fewer integer divisions)
2499  * than the previously found lower bounds.
2500  */
update_unrolling_lower_bound(struct isl_find_unroll_data * data,__isl_keep isl_constraint * c)2501 static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data,
2502 	__isl_keep isl_constraint *c)
2503 {
2504 	isl_aff *aff, *lower;
2505 	isl_val *max;
2506 	int better;
2507 
2508 	if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth))
2509 		return isl_stat_ok;
2510 
2511 	lower = isl_constraint_get_bound(c, isl_dim_set, data->depth);
2512 	lower = isl_aff_ceil(lower);
2513 	aff = isl_aff_copy(lower);
2514 	aff = isl_aff_neg(aff);
2515 	aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1);
2516 	aff = isl_aff_add_constant_si(aff, 1);
2517 	max = isl_set_max_val(data->domain, aff);
2518 	isl_aff_free(aff);
2519 
2520 	better = is_better_lower_bound(data, lower, max);
2521 	if (better < 0 || !better) {
2522 		isl_val_free(max);
2523 		isl_aff_free(lower);
2524 		return better < 0 ? isl_stat_error : isl_stat_ok;
2525 	}
2526 
2527 	isl_aff_free(data->lower);
2528 	data->lower = lower;
2529 	*data->n = isl_val_get_num_si(max);
2530 	isl_val_free(max);
2531 
2532 	return isl_stat_ok;
2533 }
2534 
2535 /* Check if we can use "c" as a lower bound and if it is better than
2536  * any previously found lower bound.
2537  */
constraint_find_unroll(__isl_take isl_constraint * c,void * user)2538 static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user)
2539 {
2540 	struct isl_find_unroll_data *data;
2541 	isl_stat r;
2542 
2543 	data = (struct isl_find_unroll_data *) user;
2544 	r = update_unrolling_lower_bound(data, c);
2545 	isl_constraint_free(c);
2546 
2547 	return r;
2548 }
2549 
2550 /* Look for a lower bound l(i) on the dimension at "depth"
2551  * and a size n such that "domain" is a subset of
2552  *
2553  *	{ [i] : l(i) <= i_d < l(i) + n }
2554  *
2555  * where d is "depth" and l(i) depends only on earlier dimensions.
2556  * Furthermore, try and find a lower bound such that n is as small as possible.
2557  * In particular, "n" needs to be finite.
2558  * "build" is the build in which the unrolling will be performed.
2559  * "expansion" is the expansion that needs to be applied to "domain"
2560  * in the unrolling that will be performed.
2561  *
2562  * Inner dimensions have been eliminated from "domain" by the caller.
2563  *
2564  * We first construct a collection of lower bounds on the input set
2565  * by computing its simple hull.  We then iterate through them,
2566  * discarding those that we cannot use (either because they do not
2567  * involve the dimension at "depth" or because they have no corresponding
2568  * upper bound, meaning that "n" would be unbounded) and pick out the
2569  * best from the remaining ones.
2570  *
2571  * If we cannot find a suitable lower bound, then we consider that
2572  * to be an error.
2573  */
find_unroll_lower_bound(__isl_keep isl_ast_build * build,__isl_keep isl_set * domain,int depth,__isl_keep isl_basic_map * expansion,int * n)2574 static __isl_give isl_aff *find_unroll_lower_bound(
2575 	__isl_keep isl_ast_build *build, __isl_keep isl_set *domain,
2576 	int depth, __isl_keep isl_basic_map *expansion, int *n)
2577 {
2578 	struct isl_find_unroll_data data =
2579 			{ build, domain, depth, expansion, NULL, n, -1 };
2580 	isl_basic_set *hull;
2581 
2582 	hull = isl_set_simple_hull(isl_set_copy(domain));
2583 
2584 	if (isl_basic_set_foreach_constraint(hull,
2585 					    &constraint_find_unroll, &data) < 0)
2586 		goto error;
2587 
2588 	isl_basic_set_free(hull);
2589 
2590 	if (!data.lower)
2591 		isl_die(isl_set_get_ctx(domain), isl_error_invalid,
2592 			"cannot find lower bound for unrolling", return NULL);
2593 
2594 	return data.lower;
2595 error:
2596 	isl_basic_set_free(hull);
2597 	return isl_aff_free(data.lower);
2598 }
2599 
2600 /* Call "fn" on each iteration of the current dimension of "domain".
2601  * If "init" is not NULL, then it is called with the number of
2602  * iterations before any call to "fn".
2603  * Return -1 on failure.
2604  *
2605  * Since we are going to be iterating over the individual values,
2606  * we first check if there are any strides on the current dimension.
2607  * If there is, we rewrite the current dimension i as
2608  *
2609  *		i = stride i' + offset
2610  *
2611  * and then iterate over individual values of i' instead.
2612  *
2613  * We then look for a lower bound on i' and a size such that the domain
2614  * is a subset of
2615  *
2616  *	{ [j,i'] : l(j) <= i' < l(j) + n }
2617  *
2618  * and then take slices of the domain at values of i'
2619  * between l(j) and l(j) + n - 1.
2620  *
2621  * We compute the unshifted simple hull of each slice to ensure that
2622  * we have a single basic set per offset.  The slicing constraint
2623  * may get simplified away before the unshifted simple hull is taken
2624  * and may therefore in some rare cases disappear from the result.
2625  * We therefore explicitly add the constraint back after computing
2626  * the unshifted simple hull to ensure that the basic sets
2627  * remain disjoint.  The constraints that are dropped by taking the hull
2628  * will be taken into account at the next level, as in the case of the
2629  * atomic option.
2630  *
2631  * Finally, we map i' back to i and call "fn".
2632  */
foreach_iteration(__isl_take isl_set * domain,__isl_keep isl_ast_build * build,int (* init)(int n,void * user),int (* fn)(__isl_take isl_basic_set * bset,void * user),void * user)2633 static int foreach_iteration(__isl_take isl_set *domain,
2634 	__isl_keep isl_ast_build *build, int (*init)(int n, void *user),
2635 	int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user)
2636 {
2637 	int i, n;
2638 	isl_bool empty;
2639 	isl_size depth;
2640 	isl_multi_aff *expansion;
2641 	isl_basic_map *bmap;
2642 	isl_aff *lower = NULL;
2643 	isl_ast_build *stride_build;
2644 
2645 	depth = isl_ast_build_get_depth(build);
2646 	if (depth < 0)
2647 		domain = isl_set_free(domain);
2648 
2649 	domain = isl_ast_build_eliminate_inner(build, domain);
2650 	domain = isl_set_intersect(domain, isl_ast_build_get_domain(build));
2651 	stride_build = isl_ast_build_copy(build);
2652 	stride_build = isl_ast_build_detect_strides(stride_build,
2653 							isl_set_copy(domain));
2654 	expansion = isl_ast_build_get_stride_expansion(stride_build);
2655 
2656 	domain = isl_set_preimage_multi_aff(domain,
2657 					    isl_multi_aff_copy(expansion));
2658 	domain = isl_ast_build_eliminate_divs(stride_build, domain);
2659 	isl_ast_build_free(stride_build);
2660 
2661 	bmap = isl_basic_map_from_multi_aff(expansion);
2662 
2663 	empty = isl_set_is_empty(domain);
2664 	if (empty < 0) {
2665 		n = -1;
2666 	} else if (empty) {
2667 		n = 0;
2668 	} else {
2669 		lower = find_unroll_lower_bound(build, domain, depth, bmap, &n);
2670 		if (!lower)
2671 			n = -1;
2672 	}
2673 	if (n >= 0 && init && init(n, user) < 0)
2674 		n = -1;
2675 	for (i = 0; i < n; ++i) {
2676 		isl_set *set;
2677 		isl_basic_set *bset;
2678 		isl_constraint *slice;
2679 
2680 		slice = at_offset(depth, lower, i);
2681 		set = isl_set_copy(domain);
2682 		set = isl_set_add_constraint(set, isl_constraint_copy(slice));
2683 		bset = isl_set_unshifted_simple_hull(set);
2684 		bset = isl_basic_set_add_constraint(bset, slice);
2685 		bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap));
2686 
2687 		if (fn(bset, user) < 0)
2688 			break;
2689 	}
2690 
2691 	isl_aff_free(lower);
2692 	isl_set_free(domain);
2693 	isl_basic_map_free(bmap);
2694 
2695 	return n < 0 || i < n ? -1 : 0;
2696 }
2697 
2698 /* Data structure for storing the results and the intermediate objects
2699  * of compute_domains.
2700  *
2701  * "list" is the main result of the function and contains a list
2702  * of disjoint basic sets for which code should be generated.
2703  *
2704  * "executed" and "build" are inputs to compute_domains.
2705  * "schedule_domain" is the domain of "executed".
2706  *
2707  * "option" contains the domains at the current depth that should by
2708  * atomic, separated or unrolled.  These domains are as specified by
2709  * the user, except that inner dimensions have been eliminated and
2710  * that they have been made pair-wise disjoint.
2711  *
2712  * "sep_class" contains the user-specified split into separation classes
2713  * specialized to the current depth.
2714  * "done" contains the union of the separation domains that have already
2715  * been handled.
2716  */
2717 struct isl_codegen_domains {
2718 	isl_basic_set_list *list;
2719 
2720 	isl_union_map *executed;
2721 	isl_ast_build *build;
2722 	isl_set *schedule_domain;
2723 
2724 	isl_set *option[4];
2725 
2726 	isl_map *sep_class;
2727 	isl_set *done;
2728 };
2729 
2730 /* Internal data structure for do_unroll.
2731  *
2732  * "domains" stores the results of compute_domains.
2733  * "class_domain" is the original class domain passed to do_unroll.
2734  * "unroll_domain" collects the unrolled iterations.
2735  */
2736 struct isl_ast_unroll_data {
2737 	struct isl_codegen_domains *domains;
2738 	isl_set *class_domain;
2739 	isl_set *unroll_domain;
2740 };
2741 
2742 /* Given an iteration of an unrolled domain represented by "bset",
2743  * add it to data->domains->list.
2744  * Since we may have dropped some constraints, we intersect with
2745  * the class domain again to ensure that each element in the list
2746  * is disjoint from the other class domains.
2747  */
do_unroll_iteration(__isl_take isl_basic_set * bset,void * user)2748 static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user)
2749 {
2750 	struct isl_ast_unroll_data *data = user;
2751 	isl_set *set;
2752 	isl_basic_set_list *list;
2753 
2754 	set = isl_set_from_basic_set(bset);
2755 	data->unroll_domain = isl_set_union(data->unroll_domain,
2756 					    isl_set_copy(set));
2757 	set = isl_set_intersect(set, isl_set_copy(data->class_domain));
2758 	set = isl_set_make_disjoint(set);
2759 	list = isl_basic_set_list_from_set(set);
2760 	data->domains->list = isl_basic_set_list_concat(data->domains->list,
2761 							list);
2762 
2763 	return 0;
2764 }
2765 
2766 /* Extend domains->list with a list of basic sets, one for each value
2767  * of the current dimension in "domain" and remove the corresponding
2768  * sets from the class domain.  Return the updated class domain.
2769  * The divs that involve the current dimension have not been projected out
2770  * from this domain.
2771  *
2772  * We call foreach_iteration to iterate over the individual values and
2773  * in do_unroll_iteration we collect the individual basic sets in
2774  * domains->list and their union in data->unroll_domain, which is then
2775  * used to update the class domain.
2776  */
do_unroll(struct isl_codegen_domains * domains,__isl_take isl_set * domain,__isl_take isl_set * class_domain)2777 static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains,
2778 	__isl_take isl_set *domain, __isl_take isl_set *class_domain)
2779 {
2780 	struct isl_ast_unroll_data data;
2781 
2782 	if (!domain)
2783 		return isl_set_free(class_domain);
2784 	if (!class_domain)
2785 		return isl_set_free(domain);
2786 
2787 	data.domains = domains;
2788 	data.class_domain = class_domain;
2789 	data.unroll_domain = isl_set_empty(isl_set_get_space(domain));
2790 
2791 	if (foreach_iteration(domain, domains->build, NULL,
2792 				&do_unroll_iteration, &data) < 0)
2793 		data.unroll_domain = isl_set_free(data.unroll_domain);
2794 
2795 	class_domain = isl_set_subtract(class_domain, data.unroll_domain);
2796 
2797 	return class_domain;
2798 }
2799 
2800 /* Add domains to domains->list for each individual value of the current
2801  * dimension, for that part of the schedule domain that lies in the
2802  * intersection of the option domain and the class domain.
2803  * Remove the corresponding sets from the class domain and
2804  * return the updated class domain.
2805  *
2806  * We first break up the unroll option domain into individual pieces
2807  * and then handle each of them separately.  The unroll option domain
2808  * has been made disjoint in compute_domains_init_options,
2809  *
2810  * Note that we actively want to combine different pieces of the
2811  * schedule domain that have the same value at the current dimension.
2812  * We therefore need to break up the unroll option domain before
2813  * intersecting with class and schedule domain, hoping that the
2814  * unroll option domain specified by the user is relatively simple.
2815  */
compute_unroll_domains(struct isl_codegen_domains * domains,__isl_take isl_set * class_domain)2816 static __isl_give isl_set *compute_unroll_domains(
2817 	struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2818 {
2819 	isl_set *unroll_domain;
2820 	isl_basic_set_list *unroll_list;
2821 	int i;
2822 	isl_size n;
2823 	isl_bool empty;
2824 
2825 	empty = isl_set_is_empty(domains->option[isl_ast_loop_unroll]);
2826 	if (empty < 0)
2827 		return isl_set_free(class_domain);
2828 	if (empty)
2829 		return class_domain;
2830 
2831 	unroll_domain = isl_set_copy(domains->option[isl_ast_loop_unroll]);
2832 	unroll_list = isl_basic_set_list_from_set(unroll_domain);
2833 
2834 	n = isl_basic_set_list_n_basic_set(unroll_list);
2835 	if (n < 0)
2836 		class_domain = isl_set_free(class_domain);
2837 	for (i = 0; i < n; ++i) {
2838 		isl_basic_set *bset;
2839 
2840 		bset = isl_basic_set_list_get_basic_set(unroll_list, i);
2841 		unroll_domain = isl_set_from_basic_set(bset);
2842 		unroll_domain = isl_set_intersect(unroll_domain,
2843 						    isl_set_copy(class_domain));
2844 		unroll_domain = isl_set_intersect(unroll_domain,
2845 					isl_set_copy(domains->schedule_domain));
2846 
2847 		empty = isl_set_is_empty(unroll_domain);
2848 		if (empty >= 0 && empty) {
2849 			isl_set_free(unroll_domain);
2850 			continue;
2851 		}
2852 
2853 		class_domain = do_unroll(domains, unroll_domain, class_domain);
2854 	}
2855 
2856 	isl_basic_set_list_free(unroll_list);
2857 
2858 	return class_domain;
2859 }
2860 
2861 /* Try and construct a single basic set that includes the intersection of
2862  * the schedule domain, the atomic option domain and the class domain.
2863  * Add the resulting basic set(s) to domains->list and remove them
2864  * from class_domain.  Return the updated class domain.
2865  *
2866  * We construct a single domain rather than trying to combine
2867  * the schedule domains of individual domains because we are working
2868  * within a single component so that non-overlapping schedule domains
2869  * should already have been separated.
2870  * We do however need to make sure that this single domains is a subset
2871  * of the class domain so that it would not intersect with any other
2872  * class domains.  This means that we may end up splitting up the atomic
2873  * domain in case separation classes are being used.
2874  *
2875  * "domain" is the intersection of the schedule domain and the class domain,
2876  * with inner dimensions projected out.
2877  */
compute_atomic_domain(struct isl_codegen_domains * domains,__isl_take isl_set * class_domain)2878 static __isl_give isl_set *compute_atomic_domain(
2879 	struct isl_codegen_domains *domains, __isl_take isl_set *class_domain)
2880 {
2881 	isl_basic_set *bset;
2882 	isl_basic_set_list *list;
2883 	isl_set *domain, *atomic_domain;
2884 	int empty;
2885 
2886 	domain = isl_set_copy(domains->option[isl_ast_loop_atomic]);
2887 	domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2888 	domain = isl_set_intersect(domain,
2889 				isl_set_copy(domains->schedule_domain));
2890 	empty = isl_set_is_empty(domain);
2891 	if (empty < 0)
2892 		class_domain = isl_set_free(class_domain);
2893 	if (empty) {
2894 		isl_set_free(domain);
2895 		return class_domain;
2896 	}
2897 
2898 	domain = isl_ast_build_eliminate(domains->build, domain);
2899 	domain = isl_set_coalesce_preserve(domain);
2900 	bset = isl_set_unshifted_simple_hull(domain);
2901 	domain = isl_set_from_basic_set(bset);
2902 	atomic_domain = isl_set_copy(domain);
2903 	domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2904 	class_domain = isl_set_subtract(class_domain, atomic_domain);
2905 	domain = isl_set_make_disjoint(domain);
2906 	list = isl_basic_set_list_from_set(domain);
2907 	domains->list = isl_basic_set_list_concat(domains->list, list);
2908 
2909 	return class_domain;
2910 }
2911 
2912 /* Split up the schedule domain into uniform basic sets,
2913  * in the sense that each element in a basic set is associated to
2914  * elements of the same domains, and add the result to domains->list.
2915  * Do this for that part of the schedule domain that lies in the
2916  * intersection of "class_domain" and the separate option domain.
2917  *
2918  * "class_domain" may or may not include the constraints
2919  * of the schedule domain, but this does not make a difference
2920  * since we are going to intersect it with the domain of the inverse schedule.
2921  * If it includes schedule domain constraints, then they may involve
2922  * inner dimensions, but we will eliminate them in separation_domain.
2923  */
compute_separate_domain(struct isl_codegen_domains * domains,__isl_keep isl_set * class_domain)2924 static int compute_separate_domain(struct isl_codegen_domains *domains,
2925 	__isl_keep isl_set *class_domain)
2926 {
2927 	isl_space *space;
2928 	isl_set *domain;
2929 	isl_union_map *executed;
2930 	isl_basic_set_list *list;
2931 	int empty;
2932 
2933 	domain = isl_set_copy(domains->option[isl_ast_loop_separate]);
2934 	domain = isl_set_intersect(domain, isl_set_copy(class_domain));
2935 	executed = isl_union_map_copy(domains->executed);
2936 	executed = isl_union_map_intersect_domain(executed,
2937 				    isl_union_set_from_set(domain));
2938 	empty = isl_union_map_is_empty(executed);
2939 	if (empty < 0 || empty) {
2940 		isl_union_map_free(executed);
2941 		return empty < 0 ? -1 : 0;
2942 	}
2943 
2944 	space = isl_set_get_space(class_domain);
2945 	domain = separate_schedule_domains(space, executed, domains->build);
2946 
2947 	list = isl_basic_set_list_from_set(domain);
2948 	domains->list = isl_basic_set_list_concat(domains->list, list);
2949 
2950 	return 0;
2951 }
2952 
2953 /* Split up the domain at the current depth into disjoint
2954  * basic sets for which code should be generated separately
2955  * for the given separation class domain.
2956  *
2957  * If any separation classes have been defined, then "class_domain"
2958  * is the domain of the current class and does not refer to inner dimensions.
2959  * Otherwise, "class_domain" is the universe domain.
2960  *
2961  * We first make sure that the class domain is disjoint from
2962  * previously considered class domains.
2963  *
2964  * The separate domains can be computed directly from the "class_domain".
2965  *
2966  * The unroll, atomic and remainder domains need the constraints
2967  * from the schedule domain.
2968  *
2969  * For unrolling, the actual schedule domain is needed (with divs that
2970  * may refer to the current dimension) so that stride detection can be
2971  * performed.
2972  *
2973  * For atomic and remainder domains, inner dimensions and divs involving
2974  * the current dimensions should be eliminated.
2975  * In case we are working within a separation class, we need to intersect
2976  * the result with the current "class_domain" to ensure that the domains
2977  * are disjoint from those generated from other class domains.
2978  *
2979  * The domain that has been made atomic may be larger than specified
2980  * by the user since it needs to be representable as a single basic set.
2981  * This possibly larger domain is removed from class_domain by
2982  * compute_atomic_domain.  It is computed first so that the extended domain
2983  * would not overlap with any domains computed before.
2984  * Similary, the unrolled domains may have some constraints removed and
2985  * may therefore also be larger than specified by the user.
2986  *
2987  * If anything is left after handling separate, unroll and atomic,
2988  * we split it up into basic sets and append the basic sets to domains->list.
2989  */
compute_partial_domains(struct isl_codegen_domains * domains,__isl_take isl_set * class_domain)2990 static isl_stat compute_partial_domains(struct isl_codegen_domains *domains,
2991 	__isl_take isl_set *class_domain)
2992 {
2993 	isl_basic_set_list *list;
2994 	isl_set *domain;
2995 
2996 	class_domain = isl_set_subtract(class_domain,
2997 					isl_set_copy(domains->done));
2998 	domains->done = isl_set_union(domains->done,
2999 					isl_set_copy(class_domain));
3000 
3001 	class_domain = compute_atomic_domain(domains, class_domain);
3002 	class_domain = compute_unroll_domains(domains, class_domain);
3003 
3004 	domain = isl_set_copy(class_domain);
3005 
3006 	if (compute_separate_domain(domains, domain) < 0)
3007 		goto error;
3008 	domain = isl_set_subtract(domain,
3009 			isl_set_copy(domains->option[isl_ast_loop_separate]));
3010 
3011 	domain = isl_set_intersect(domain,
3012 				isl_set_copy(domains->schedule_domain));
3013 
3014 	domain = isl_ast_build_eliminate(domains->build, domain);
3015 	domain = isl_set_intersect(domain, isl_set_copy(class_domain));
3016 
3017 	domain = isl_set_coalesce_preserve(domain);
3018 	domain = isl_set_make_disjoint(domain);
3019 
3020 	list = isl_basic_set_list_from_set(domain);
3021 	domains->list = isl_basic_set_list_concat(domains->list, list);
3022 
3023 	isl_set_free(class_domain);
3024 
3025 	return isl_stat_ok;
3026 error:
3027 	isl_set_free(domain);
3028 	isl_set_free(class_domain);
3029 	return isl_stat_error;
3030 }
3031 
3032 /* Split up the domain at the current depth into disjoint
3033  * basic sets for which code should be generated separately
3034  * for the separation class identified by "pnt".
3035  *
3036  * We extract the corresponding class domain from domains->sep_class,
3037  * eliminate inner dimensions and pass control to compute_partial_domains.
3038  */
compute_class_domains(__isl_take isl_point * pnt,void * user)3039 static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user)
3040 {
3041 	struct isl_codegen_domains *domains = user;
3042 	isl_set *class_set;
3043 	isl_set *domain;
3044 	int disjoint;
3045 
3046 	class_set = isl_set_from_point(pnt);
3047 	domain = isl_map_domain(isl_map_intersect_range(
3048 				isl_map_copy(domains->sep_class), class_set));
3049 	domain = isl_ast_build_compute_gist(domains->build, domain);
3050 	domain = isl_ast_build_eliminate(domains->build, domain);
3051 
3052 	disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain);
3053 	if (disjoint < 0)
3054 		return isl_stat_error;
3055 	if (disjoint) {
3056 		isl_set_free(domain);
3057 		return isl_stat_ok;
3058 	}
3059 
3060 	return compute_partial_domains(domains, domain);
3061 }
3062 
3063 /* Extract the domains at the current depth that should be atomic,
3064  * separated or unrolled and store them in option.
3065  *
3066  * The domains specified by the user might overlap, so we make
3067  * them disjoint by subtracting earlier domains from later domains.
3068  */
compute_domains_init_options(isl_set * option[4],__isl_keep isl_ast_build * build)3069 static void compute_domains_init_options(isl_set *option[4],
3070 	__isl_keep isl_ast_build *build)
3071 {
3072 	enum isl_ast_loop_type type, type2;
3073 	isl_set *unroll;
3074 
3075 	for (type = isl_ast_loop_atomic;
3076 	    type <= isl_ast_loop_separate; ++type) {
3077 		option[type] = isl_ast_build_get_option_domain(build, type);
3078 		for (type2 = isl_ast_loop_atomic; type2 < type; ++type2)
3079 			option[type] = isl_set_subtract(option[type],
3080 						isl_set_copy(option[type2]));
3081 	}
3082 
3083 	unroll = option[isl_ast_loop_unroll];
3084 	unroll = isl_set_coalesce(unroll);
3085 	unroll = isl_set_make_disjoint(unroll);
3086 	option[isl_ast_loop_unroll] = unroll;
3087 }
3088 
3089 /* Split up the domain at the current depth into disjoint
3090  * basic sets for which code should be generated separately,
3091  * based on the user-specified options.
3092  * Return the list of disjoint basic sets.
3093  *
3094  * There are three kinds of domains that we need to keep track of.
3095  * - the "schedule domain" is the domain of "executed"
3096  * - the "class domain" is the domain corresponding to the currrent
3097  *	separation class
3098  * - the "option domain" is the domain corresponding to one of the options
3099  *	atomic, unroll or separate
3100  *
3101  * We first consider the individial values of the separation classes
3102  * and split up the domain for each of them separately.
3103  * Finally, we consider the remainder.  If no separation classes were
3104  * specified, then we call compute_partial_domains with the universe
3105  * "class_domain".  Otherwise, we take the "schedule_domain" as "class_domain",
3106  * with inner dimensions removed.  We do this because we want to
3107  * avoid computing the complement of the class domains (i.e., the difference
3108  * between the universe and domains->done).
3109  */
compute_domains(__isl_keep isl_union_map * executed,__isl_keep isl_ast_build * build)3110 static __isl_give isl_basic_set_list *compute_domains(
3111 	__isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build)
3112 {
3113 	struct isl_codegen_domains domains;
3114 	isl_ctx *ctx;
3115 	isl_set *domain;
3116 	isl_union_set *schedule_domain;
3117 	isl_set *classes;
3118 	isl_space *space;
3119 	int n_param;
3120 	enum isl_ast_loop_type type;
3121 	isl_bool empty;
3122 
3123 	if (!executed)
3124 		return NULL;
3125 
3126 	ctx = isl_union_map_get_ctx(executed);
3127 	domains.list = isl_basic_set_list_alloc(ctx, 0);
3128 
3129 	schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3130 	domain = isl_set_from_union_set(schedule_domain);
3131 
3132 	compute_domains_init_options(domains.option, build);
3133 
3134 	domains.sep_class = isl_ast_build_get_separation_class(build);
3135 	classes = isl_map_range(isl_map_copy(domains.sep_class));
3136 	n_param = isl_set_dim(classes, isl_dim_param);
3137 	if (n_param < 0)
3138 		classes = isl_set_free(classes);
3139 	classes = isl_set_project_out(classes, isl_dim_param, 0, n_param);
3140 
3141 	space = isl_set_get_space(domain);
3142 	domains.build = build;
3143 	domains.schedule_domain = isl_set_copy(domain);
3144 	domains.executed = executed;
3145 	domains.done = isl_set_empty(space);
3146 
3147 	if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0)
3148 		domains.list = isl_basic_set_list_free(domains.list);
3149 	isl_set_free(classes);
3150 
3151 	empty = isl_set_is_empty(domains.done);
3152 	if (empty < 0) {
3153 		domains.list = isl_basic_set_list_free(domains.list);
3154 		domain = isl_set_free(domain);
3155 	} else if (empty) {
3156 		isl_set_free(domain);
3157 		domain = isl_set_universe(isl_set_get_space(domains.done));
3158 	} else {
3159 		domain = isl_ast_build_eliminate(build, domain);
3160 	}
3161 	if (compute_partial_domains(&domains, domain) < 0)
3162 		domains.list = isl_basic_set_list_free(domains.list);
3163 
3164 	isl_set_free(domains.schedule_domain);
3165 	isl_set_free(domains.done);
3166 	isl_map_free(domains.sep_class);
3167 	for (type = isl_ast_loop_atomic; type <= isl_ast_loop_separate; ++type)
3168 		isl_set_free(domains.option[type]);
3169 
3170 	return domains.list;
3171 }
3172 
3173 /* Generate code for a single component, after shifting (if any)
3174  * has been applied, in case the schedule was specified as a union map.
3175  *
3176  * We first split up the domain at the current depth into disjoint
3177  * basic sets based on the user-specified options.
3178  * Then we generated code for each of them and concatenate the results.
3179  */
generate_shifted_component_flat(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build)3180 static __isl_give isl_ast_graft_list *generate_shifted_component_flat(
3181 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3182 {
3183 	isl_basic_set_list *domain_list;
3184 	isl_ast_graft_list *list = NULL;
3185 
3186 	domain_list = compute_domains(executed, build);
3187 	list = generate_parallel_domains(domain_list, executed, build);
3188 
3189 	isl_basic_set_list_free(domain_list);
3190 	isl_union_map_free(executed);
3191 	isl_ast_build_free(build);
3192 
3193 	return list;
3194 }
3195 
3196 /* Generate code for a single component, after shifting (if any)
3197  * has been applied, in case the schedule was specified as a schedule tree
3198  * and the separate option was specified.
3199  *
3200  * We perform separation on the domain of "executed" and then generate
3201  * an AST for each of the resulting disjoint basic sets.
3202  */
generate_shifted_component_tree_separate(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build)3203 static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate(
3204 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3205 {
3206 	isl_space *space;
3207 	isl_set *domain;
3208 	isl_basic_set_list *domain_list;
3209 	isl_ast_graft_list *list;
3210 
3211 	space = isl_ast_build_get_space(build, 1);
3212 	domain = separate_schedule_domains(space,
3213 					isl_union_map_copy(executed), build);
3214 	domain_list = isl_basic_set_list_from_set(domain);
3215 
3216 	list = generate_parallel_domains(domain_list, executed, build);
3217 
3218 	isl_basic_set_list_free(domain_list);
3219 	isl_union_map_free(executed);
3220 	isl_ast_build_free(build);
3221 
3222 	return list;
3223 }
3224 
3225 /* Internal data structure for generate_shifted_component_tree_unroll.
3226  *
3227  * "executed" and "build" are inputs to generate_shifted_component_tree_unroll.
3228  * "list" collects the constructs grafts.
3229  */
3230 struct isl_ast_unroll_tree_data {
3231 	isl_union_map *executed;
3232 	isl_ast_build *build;
3233 	isl_ast_graft_list *list;
3234 };
3235 
3236 /* Initialize data->list to a list of "n" elements.
3237  */
init_unroll_tree(int n,void * user)3238 static int init_unroll_tree(int n, void *user)
3239 {
3240 	struct isl_ast_unroll_tree_data *data = user;
3241 	isl_ctx *ctx;
3242 
3243 	ctx = isl_ast_build_get_ctx(data->build);
3244 	data->list = isl_ast_graft_list_alloc(ctx, n);
3245 
3246 	return 0;
3247 }
3248 
3249 /* Given an iteration of an unrolled domain represented by "bset",
3250  * generate the corresponding AST and add the result to data->list.
3251  */
do_unroll_tree_iteration(__isl_take isl_basic_set * bset,void * user)3252 static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user)
3253 {
3254 	struct isl_ast_unroll_tree_data *data = user;
3255 
3256 	data->list = add_node(data->list, isl_union_map_copy(data->executed),
3257 				bset, isl_ast_build_copy(data->build));
3258 
3259 	return 0;
3260 }
3261 
3262 /* Generate code for a single component, after shifting (if any)
3263  * has been applied, in case the schedule was specified as a schedule tree
3264  * and the unroll option was specified.
3265  *
3266  * We call foreach_iteration to iterate over the individual values and
3267  * construct and collect the corresponding grafts in do_unroll_tree_iteration.
3268  */
generate_shifted_component_tree_unroll(__isl_take isl_union_map * executed,__isl_take isl_set * domain,__isl_take isl_ast_build * build)3269 static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll(
3270 	__isl_take isl_union_map *executed, __isl_take isl_set *domain,
3271 	__isl_take isl_ast_build *build)
3272 {
3273 	struct isl_ast_unroll_tree_data data = { executed, build, NULL };
3274 
3275 	if (foreach_iteration(domain, build, &init_unroll_tree,
3276 				&do_unroll_tree_iteration, &data) < 0)
3277 		data.list = isl_ast_graft_list_free(data.list);
3278 
3279 	isl_union_map_free(executed);
3280 	isl_ast_build_free(build);
3281 
3282 	return data.list;
3283 }
3284 
3285 /* Does "domain" involve a disjunction that is purely based on
3286  * constraints involving only outer dimension?
3287  *
3288  * In particular, is there a disjunction such that the constraints
3289  * involving the current and later dimensions are the same over
3290  * all the disjuncts?
3291  */
has_pure_outer_disjunction(__isl_keep isl_set * domain,__isl_keep isl_ast_build * build)3292 static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain,
3293 	__isl_keep isl_ast_build *build)
3294 {
3295 	isl_basic_set *hull;
3296 	isl_set *shared, *inner;
3297 	isl_bool equal;
3298 	isl_size depth;
3299 	isl_size n;
3300 	isl_size dim;
3301 
3302 	n = isl_set_n_basic_set(domain);
3303 	if (n < 0)
3304 		return isl_bool_error;
3305 	if (n <= 1)
3306 		return isl_bool_false;
3307 	dim = isl_set_dim(domain, isl_dim_set);
3308 	depth = isl_ast_build_get_depth(build);
3309 	if (dim < 0 || depth < 0)
3310 		return isl_bool_error;
3311 
3312 	inner = isl_set_copy(domain);
3313 	inner = isl_set_drop_constraints_not_involving_dims(inner,
3314 					    isl_dim_set, depth, dim - depth);
3315 	hull = isl_set_plain_unshifted_simple_hull(isl_set_copy(inner));
3316 	shared = isl_set_from_basic_set(hull);
3317 	equal = isl_set_plain_is_equal(inner, shared);
3318 	isl_set_free(inner);
3319 	isl_set_free(shared);
3320 
3321 	return equal;
3322 }
3323 
3324 /* Generate code for a single component, after shifting (if any)
3325  * has been applied, in case the schedule was specified as a schedule tree.
3326  * In particular, handle the base case where there is either no isolated
3327  * set or we are within the isolated set (in which case "isolated" is set)
3328  * or the iterations that precede or follow the isolated set.
3329  *
3330  * The schedule domain is broken up or combined into basic sets
3331  * according to the AST generation option specified in the current
3332  * schedule node, which may be either atomic, separate, unroll or
3333  * unspecified.  If the option is unspecified, then we currently simply
3334  * split the schedule domain into disjoint basic sets.
3335  *
3336  * In case the separate option is specified, the AST generation is
3337  * handled by generate_shifted_component_tree_separate.
3338  * In the other cases, we need the global schedule domain.
3339  * In the unroll case, the AST generation is then handled by
3340  * generate_shifted_component_tree_unroll which needs the actual
3341  * schedule domain (with divs that may refer to the current dimension)
3342  * so that stride detection can be performed.
3343  * In the atomic or unspecified case, inner dimensions and divs involving
3344  * the current dimensions should be eliminated.
3345  * The result is then either combined into a single basic set or
3346  * split up into disjoint basic sets.
3347  * Finally an AST is generated for each basic set and the results are
3348  * concatenated.
3349  *
3350  * If the schedule domain involves a disjunction that is purely based on
3351  * constraints involving only outer dimension, then it is treated as
3352  * if atomic was specified.  This ensures that only a single loop
3353  * is generated instead of a sequence of identical loops with
3354  * different guards.
3355  */
generate_shifted_component_tree_base(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build,int isolated)3356 static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base(
3357 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
3358 	int isolated)
3359 {
3360 	isl_bool outer_disjunction;
3361 	isl_union_set *schedule_domain;
3362 	isl_set *domain;
3363 	isl_basic_set_list *domain_list;
3364 	isl_ast_graft_list *list;
3365 	enum isl_ast_loop_type type;
3366 
3367 	type = isl_ast_build_get_loop_type(build, isolated);
3368 	if (type < 0)
3369 		goto error;
3370 
3371 	if (type == isl_ast_loop_separate)
3372 		return generate_shifted_component_tree_separate(executed,
3373 								build);
3374 
3375 	schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3376 	domain = isl_set_from_union_set(schedule_domain);
3377 
3378 	if (type == isl_ast_loop_unroll)
3379 		return generate_shifted_component_tree_unroll(executed, domain,
3380 								build);
3381 
3382 	domain = isl_ast_build_eliminate(build, domain);
3383 	domain = isl_set_coalesce_preserve(domain);
3384 
3385 	outer_disjunction = has_pure_outer_disjunction(domain, build);
3386 	if (outer_disjunction < 0)
3387 		domain = isl_set_free(domain);
3388 
3389 	if (outer_disjunction || type == isl_ast_loop_atomic) {
3390 		isl_basic_set *hull;
3391 		hull = isl_set_unshifted_simple_hull(domain);
3392 		domain_list = isl_basic_set_list_from_basic_set(hull);
3393 	} else {
3394 		domain = isl_set_make_disjoint(domain);
3395 		domain_list = isl_basic_set_list_from_set(domain);
3396 	}
3397 
3398 	list = generate_parallel_domains(domain_list, executed, build);
3399 
3400 	isl_basic_set_list_free(domain_list);
3401 	isl_union_map_free(executed);
3402 	isl_ast_build_free(build);
3403 
3404 	return list;
3405 error:
3406 	isl_union_map_free(executed);
3407 	isl_ast_build_free(build);
3408 	return NULL;
3409 }
3410 
3411 /* Extract out the disjunction imposed by "domain" on the outer
3412  * schedule dimensions.
3413  *
3414  * In particular, remove all inner dimensions from "domain" (including
3415  * the current dimension) and then remove the constraints that are shared
3416  * by all disjuncts in the result.
3417  */
extract_disjunction(__isl_take isl_set * domain,__isl_keep isl_ast_build * build)3418 static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain,
3419 	__isl_keep isl_ast_build *build)
3420 {
3421 	isl_set *hull;
3422 	isl_size depth;
3423 	isl_size dim;
3424 
3425 	domain = isl_ast_build_specialize(build, domain);
3426 	depth = isl_ast_build_get_depth(build);
3427 	dim = isl_set_dim(domain, isl_dim_set);
3428 	if (depth < 0 || dim < 0)
3429 		return isl_set_free(domain);
3430 	domain = isl_set_eliminate(domain, isl_dim_set, depth, dim - depth);
3431 	domain = isl_set_remove_unknown_divs(domain);
3432 	hull = isl_set_copy(domain);
3433 	hull = isl_set_from_basic_set(isl_set_unshifted_simple_hull(hull));
3434 	domain = isl_set_gist(domain, hull);
3435 
3436 	return domain;
3437 }
3438 
3439 /* Add "guard" to the grafts in "list".
3440  * "build" is the outer AST build, while "sub_build" includes "guard"
3441  * in its generated domain.
3442  *
3443  * First combine the grafts into a single graft and then add the guard.
3444  * If the list is empty, or if some error occurred, then simply return
3445  * the list.
3446  */
list_add_guard(__isl_take isl_ast_graft_list * list,__isl_keep isl_set * guard,__isl_keep isl_ast_build * build,__isl_keep isl_ast_build * sub_build)3447 static __isl_give isl_ast_graft_list *list_add_guard(
3448 	__isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard,
3449 	__isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build)
3450 {
3451 	isl_ast_graft *graft;
3452 	isl_size n;
3453 
3454 	list = isl_ast_graft_list_fuse(list, sub_build);
3455 
3456 	n = isl_ast_graft_list_n_ast_graft(list);
3457 	if (n < 0)
3458 		return isl_ast_graft_list_free(list);
3459 	if (n != 1)
3460 		return list;
3461 
3462 	graft = isl_ast_graft_list_get_ast_graft(list, 0);
3463 	graft = isl_ast_graft_add_guard(graft, isl_set_copy(guard), build);
3464 	list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
3465 
3466 	return list;
3467 }
3468 
3469 /* Generate code for a single component, after shifting (if any)
3470  * has been applied, in case the schedule was specified as a schedule tree.
3471  * In particular, do so for the specified subset of the schedule domain.
3472  *
3473  * If we are outside of the isolated part, then "domain" may include
3474  * a disjunction.  Explicitly generate this disjunction at this point
3475  * instead of relying on the disjunction getting hoisted back up
3476  * to this level.
3477  */
generate_shifted_component_tree_part(__isl_keep isl_union_map * executed,__isl_take isl_set * domain,__isl_keep isl_ast_build * build,int isolated)3478 static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part(
3479 	__isl_keep isl_union_map *executed, __isl_take isl_set *domain,
3480 	__isl_keep isl_ast_build *build, int isolated)
3481 {
3482 	isl_union_set *uset;
3483 	isl_ast_graft_list *list;
3484 	isl_ast_build *sub_build;
3485 	int empty;
3486 
3487 	uset = isl_union_set_from_set(isl_set_copy(domain));
3488 	executed = isl_union_map_copy(executed);
3489 	executed = isl_union_map_intersect_domain(executed, uset);
3490 	empty = isl_union_map_is_empty(executed);
3491 	if (empty < 0)
3492 		goto error;
3493 	if (empty) {
3494 		isl_ctx *ctx;
3495 		isl_union_map_free(executed);
3496 		isl_set_free(domain);
3497 		ctx = isl_ast_build_get_ctx(build);
3498 		return isl_ast_graft_list_alloc(ctx, 0);
3499 	}
3500 
3501 	sub_build = isl_ast_build_copy(build);
3502 	if (!isolated) {
3503 		domain = extract_disjunction(domain, build);
3504 		sub_build = isl_ast_build_restrict_generated(sub_build,
3505 							isl_set_copy(domain));
3506 	}
3507 	list = generate_shifted_component_tree_base(executed,
3508 				isl_ast_build_copy(sub_build), isolated);
3509 	if (!isolated)
3510 		list = list_add_guard(list, domain, build, sub_build);
3511 	isl_ast_build_free(sub_build);
3512 	isl_set_free(domain);
3513 	return list;
3514 error:
3515 	isl_union_map_free(executed);
3516 	isl_set_free(domain);
3517 	return NULL;
3518 }
3519 
3520 /* Generate code for a single component, after shifting (if any)
3521  * has been applied, in case the schedule was specified as a schedule tree.
3522  * In particular, do so for the specified sequence of subsets
3523  * of the schedule domain, "before", "isolated", "after" and "other",
3524  * where only the "isolated" part is considered to be isolated.
3525  */
generate_shifted_component_parts(__isl_take isl_union_map * executed,__isl_take isl_set * before,__isl_take isl_set * isolated,__isl_take isl_set * after,__isl_take isl_set * other,__isl_take isl_ast_build * build)3526 static __isl_give isl_ast_graft_list *generate_shifted_component_parts(
3527 	__isl_take isl_union_map *executed, __isl_take isl_set *before,
3528 	__isl_take isl_set *isolated, __isl_take isl_set *after,
3529 	__isl_take isl_set *other, __isl_take isl_ast_build *build)
3530 {
3531 	isl_ast_graft_list *list, *res;
3532 
3533 	res = generate_shifted_component_tree_part(executed, before, build, 0);
3534 	list = generate_shifted_component_tree_part(executed, isolated,
3535 						    build, 1);
3536 	res = isl_ast_graft_list_concat(res, list);
3537 	list = generate_shifted_component_tree_part(executed, after, build, 0);
3538 	res = isl_ast_graft_list_concat(res, list);
3539 	list = generate_shifted_component_tree_part(executed, other, build, 0);
3540 	res = isl_ast_graft_list_concat(res, list);
3541 
3542 	isl_union_map_free(executed);
3543 	isl_ast_build_free(build);
3544 
3545 	return res;
3546 }
3547 
3548 /* Does "set" intersect "first", but not "second"?
3549  */
only_intersects_first(__isl_keep isl_set * set,__isl_keep isl_set * first,__isl_keep isl_set * second)3550 static isl_bool only_intersects_first(__isl_keep isl_set *set,
3551 	__isl_keep isl_set *first, __isl_keep isl_set *second)
3552 {
3553 	isl_bool disjoint;
3554 
3555 	disjoint = isl_set_is_disjoint(set, first);
3556 	if (disjoint < 0)
3557 		return isl_bool_error;
3558 	if (disjoint)
3559 		return isl_bool_false;
3560 
3561 	return isl_set_is_disjoint(set, second);
3562 }
3563 
3564 /* Generate code for a single component, after shifting (if any)
3565  * has been applied, in case the schedule was specified as a schedule tree.
3566  * In particular, do so in case of isolation where there is
3567  * only an "isolated" part and an "after" part.
3568  * "dead1" and "dead2" are freed by this function in order to simplify
3569  * the caller.
3570  *
3571  * The "before" and "other" parts are set to empty sets.
3572  */
generate_shifted_component_only_after(__isl_take isl_union_map * executed,__isl_take isl_set * isolated,__isl_take isl_set * after,__isl_take isl_ast_build * build,__isl_take isl_set * dead1,__isl_take isl_set * dead2)3573 static __isl_give isl_ast_graft_list *generate_shifted_component_only_after(
3574 	__isl_take isl_union_map *executed, __isl_take isl_set *isolated,
3575 	__isl_take isl_set *after, __isl_take isl_ast_build *build,
3576 	__isl_take isl_set *dead1, __isl_take isl_set *dead2)
3577 {
3578 	isl_set *empty;
3579 
3580 	empty = isl_set_empty(isl_set_get_space(after));
3581 	isl_set_free(dead1);
3582 	isl_set_free(dead2);
3583 	return generate_shifted_component_parts(executed, isl_set_copy(empty),
3584 						isolated, after, empty, build);
3585 }
3586 
3587 /* Generate code for a single component, after shifting (if any)
3588  * has been applied, in case the schedule was specified as a schedule tree.
3589  *
3590  * We first check if the user has specified an isolated schedule domain
3591  * and that we are not already outside of this isolated schedule domain.
3592  * If so, we break up the schedule domain into iterations that
3593  * precede the isolated domain, the isolated domain itself,
3594  * the iterations that follow the isolated domain and
3595  * the remaining iterations (those that are incomparable
3596  * to the isolated domain).
3597  * We generate an AST for each piece and concatenate the results.
3598  *
3599  * If the isolated domain is not convex, then it is replaced
3600  * by a convex superset to ensure that the sets of preceding and
3601  * following iterations are properly defined and, in particular,
3602  * that there are no intermediate iterations that do not belong
3603  * to the isolated domain.
3604  *
3605  * In the special case where at least one element of the schedule
3606  * domain that does not belong to the isolated domain needs
3607  * to be scheduled after this isolated domain, but none of those
3608  * elements need to be scheduled before, break up the schedule domain
3609  * in only two parts, the isolated domain, and a part that will be
3610  * scheduled after the isolated domain.
3611  *
3612  * If no isolated set has been specified, then we generate an
3613  * AST for the entire inverse schedule.
3614  */
generate_shifted_component_tree(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build)3615 static __isl_give isl_ast_graft_list *generate_shifted_component_tree(
3616 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3617 {
3618 	int i;
3619 	isl_size depth;
3620 	int empty, has_isolate;
3621 	isl_space *space;
3622 	isl_union_set *schedule_domain;
3623 	isl_set *domain;
3624 	isl_basic_set *hull;
3625 	isl_set *isolated, *before, *after, *test;
3626 	isl_map *gt, *lt;
3627 	isl_bool pure;
3628 
3629 	build = isl_ast_build_extract_isolated(build);
3630 	has_isolate = isl_ast_build_has_isolated(build);
3631 	if (has_isolate < 0)
3632 		executed = isl_union_map_free(executed);
3633 	else if (!has_isolate)
3634 		return generate_shifted_component_tree_base(executed, build, 0);
3635 
3636 	schedule_domain = isl_union_map_domain(isl_union_map_copy(executed));
3637 	domain = isl_set_from_union_set(schedule_domain);
3638 
3639 	isolated = isl_ast_build_get_isolated(build);
3640 	isolated = isl_set_intersect(isolated, isl_set_copy(domain));
3641 	test = isl_ast_build_specialize(build, isl_set_copy(isolated));
3642 	empty = isl_set_is_empty(test);
3643 	isl_set_free(test);
3644 	if (empty < 0)
3645 		goto error;
3646 	if (empty) {
3647 		isl_set_free(isolated);
3648 		isl_set_free(domain);
3649 		return generate_shifted_component_tree_base(executed, build, 0);
3650 	}
3651 	depth = isl_ast_build_get_depth(build);
3652 	if (depth < 0)
3653 		goto error;
3654 
3655 	isolated = isl_ast_build_eliminate(build, isolated);
3656 	hull = isl_set_unshifted_simple_hull(isolated);
3657 	isolated = isl_set_from_basic_set(hull);
3658 
3659 	space = isl_space_map_from_set(isl_set_get_space(isolated));
3660 	gt = isl_map_universe(space);
3661 	for (i = 0; i < depth; ++i)
3662 		gt = isl_map_equate(gt, isl_dim_in, i, isl_dim_out, i);
3663 	gt = isl_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth);
3664 	lt = isl_map_reverse(isl_map_copy(gt));
3665 	before = isl_set_apply(isl_set_copy(isolated), gt);
3666 	after = isl_set_apply(isl_set_copy(isolated), lt);
3667 
3668 	domain = isl_set_subtract(domain, isl_set_copy(isolated));
3669 	pure = only_intersects_first(domain, after, before);
3670 	if (pure < 0)
3671 		executed = isl_union_map_free(executed);
3672 	else if (pure)
3673 		return generate_shifted_component_only_after(executed, isolated,
3674 						domain, build, before, after);
3675 	domain = isl_set_subtract(domain, isl_set_copy(before));
3676 	domain = isl_set_subtract(domain, isl_set_copy(after));
3677 	after = isl_set_subtract(after, isl_set_copy(isolated));
3678 	after = isl_set_subtract(after, isl_set_copy(before));
3679 	before = isl_set_subtract(before, isl_set_copy(isolated));
3680 
3681 	return generate_shifted_component_parts(executed, before, isolated,
3682 						after, domain, build);
3683 error:
3684 	isl_set_free(domain);
3685 	isl_set_free(isolated);
3686 	isl_union_map_free(executed);
3687 	isl_ast_build_free(build);
3688 	return NULL;
3689 }
3690 
3691 /* Generate code for a single component, after shifting (if any)
3692  * has been applied.
3693  *
3694  * Call generate_shifted_component_tree or generate_shifted_component_flat
3695  * depending on whether the schedule was specified as a schedule tree.
3696  */
generate_shifted_component(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build)3697 static __isl_give isl_ast_graft_list *generate_shifted_component(
3698 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
3699 {
3700 	if (isl_ast_build_has_schedule_node(build))
3701 		return generate_shifted_component_tree(executed, build);
3702 	else
3703 		return generate_shifted_component_flat(executed, build);
3704 }
3705 
3706 struct isl_set_map_pair {
3707 	isl_set *set;
3708 	isl_map *map;
3709 };
3710 
3711 /* Given an array "domain" of isl_set_map_pairs and an array "order"
3712  * of indices into the "domain" array,
3713  * return the union of the "map" fields of the elements
3714  * indexed by the first "n" elements of "order".
3715  */
construct_component_executed(struct isl_set_map_pair * domain,int * order,int n)3716 static __isl_give isl_union_map *construct_component_executed(
3717 	struct isl_set_map_pair *domain, int *order, int n)
3718 {
3719 	int i;
3720 	isl_map *map;
3721 	isl_union_map *executed;
3722 
3723 	map = isl_map_copy(domain[order[0]].map);
3724 	executed = isl_union_map_from_map(map);
3725 	for (i = 1; i < n; ++i) {
3726 		map = isl_map_copy(domain[order[i]].map);
3727 		executed = isl_union_map_add_map(executed, map);
3728 	}
3729 
3730 	return executed;
3731 }
3732 
3733 /* Generate code for a single component, after shifting (if any)
3734  * has been applied.
3735  *
3736  * The component inverse schedule is specified as the "map" fields
3737  * of the elements of "domain" indexed by the first "n" elements of "order".
3738  */
generate_shifted_component_from_list(struct isl_set_map_pair * domain,int * order,int n,__isl_take isl_ast_build * build)3739 static __isl_give isl_ast_graft_list *generate_shifted_component_from_list(
3740 	struct isl_set_map_pair *domain, int *order, int n,
3741 	__isl_take isl_ast_build *build)
3742 {
3743 	isl_union_map *executed;
3744 
3745 	executed = construct_component_executed(domain, order, n);
3746 	return generate_shifted_component(executed, build);
3747 }
3748 
3749 /* Does set dimension "pos" of "set" have an obviously fixed value?
3750  */
dim_is_fixed(__isl_keep isl_set * set,int pos)3751 static int dim_is_fixed(__isl_keep isl_set *set, int pos)
3752 {
3753 	int fixed;
3754 	isl_val *v;
3755 
3756 	v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos);
3757 	if (!v)
3758 		return -1;
3759 	fixed = !isl_val_is_nan(v);
3760 	isl_val_free(v);
3761 
3762 	return fixed;
3763 }
3764 
3765 /* Given an array "domain" of isl_set_map_pairs and an array "order"
3766  * of indices into the "domain" array,
3767  * do all (except for at most one) of the "set" field of the elements
3768  * indexed by the first "n" elements of "order" have a fixed value
3769  * at position "depth"?
3770  */
at_most_one_non_fixed(struct isl_set_map_pair * domain,int * order,int n,int depth)3771 static int at_most_one_non_fixed(struct isl_set_map_pair *domain,
3772 	int *order, int n, int depth)
3773 {
3774 	int i;
3775 	int non_fixed = -1;
3776 
3777 	for (i = 0; i < n; ++i) {
3778 		int f;
3779 
3780 		f = dim_is_fixed(domain[order[i]].set, depth);
3781 		if (f < 0)
3782 			return -1;
3783 		if (f)
3784 			continue;
3785 		if (non_fixed >= 0)
3786 			return 0;
3787 		non_fixed = i;
3788 	}
3789 
3790 	return 1;
3791 }
3792 
3793 /* Given an array "domain" of isl_set_map_pairs and an array "order"
3794  * of indices into the "domain" array,
3795  * eliminate the inner dimensions from the "set" field of the elements
3796  * indexed by the first "n" elements of "order", provided the current
3797  * dimension does not have a fixed value.
3798  *
3799  * Return the index of the first element in "order" with a corresponding
3800  * "set" field that does not have an (obviously) fixed value.
3801  */
eliminate_non_fixed(struct isl_set_map_pair * domain,int * order,int n,int depth,__isl_keep isl_ast_build * build)3802 static int eliminate_non_fixed(struct isl_set_map_pair *domain,
3803 	int *order, int n, int depth, __isl_keep isl_ast_build *build)
3804 {
3805 	int i;
3806 	int base = -1;
3807 
3808 	for (i = n - 1; i >= 0; --i) {
3809 		int f;
3810 		f = dim_is_fixed(domain[order[i]].set, depth);
3811 		if (f < 0)
3812 			return -1;
3813 		if (f)
3814 			continue;
3815 		domain[order[i]].set = isl_ast_build_eliminate_inner(build,
3816 							domain[order[i]].set);
3817 		base = i;
3818 	}
3819 
3820 	return base;
3821 }
3822 
3823 /* Given an array "domain" of isl_set_map_pairs and an array "order"
3824  * of indices into the "domain" array,
3825  * find the element of "domain" (amongst those indexed by the first "n"
3826  * elements of "order") with the "set" field that has the smallest
3827  * value for the current iterator.
3828  *
3829  * Note that the domain with the smallest value may depend on the parameters
3830  * and/or outer loop dimension.  Since the result of this function is only
3831  * used as heuristic, we only make a reasonable attempt at finding the best
3832  * domain, one that should work in case a single domain provides the smallest
3833  * value for the current dimension over all values of the parameters
3834  * and outer dimensions.
3835  *
3836  * In particular, we compute the smallest value of the first domain
3837  * and replace it by that of any later domain if that later domain
3838  * has a smallest value that is smaller for at least some value
3839  * of the parameters and outer dimensions.
3840  */
first_offset(struct isl_set_map_pair * domain,int * order,int n,__isl_keep isl_ast_build * build)3841 static int first_offset(struct isl_set_map_pair *domain, int *order, int n,
3842 	__isl_keep isl_ast_build *build)
3843 {
3844 	int i;
3845 	isl_map *min_first;
3846 	int first = 0;
3847 
3848 	min_first = isl_ast_build_map_to_iterator(build,
3849 					isl_set_copy(domain[order[0]].set));
3850 	min_first = isl_map_lexmin(min_first);
3851 
3852 	for (i = 1; i < n; ++i) {
3853 		isl_map *min, *test;
3854 		int empty;
3855 
3856 		min = isl_ast_build_map_to_iterator(build,
3857 					isl_set_copy(domain[order[i]].set));
3858 		min = isl_map_lexmin(min);
3859 		test = isl_map_copy(min);
3860 		test = isl_map_apply_domain(isl_map_copy(min_first), test);
3861 		test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0);
3862 		empty = isl_map_is_empty(test);
3863 		isl_map_free(test);
3864 		if (empty >= 0 && !empty) {
3865 			isl_map_free(min_first);
3866 			first = i;
3867 			min_first = min;
3868 		} else
3869 			isl_map_free(min);
3870 
3871 		if (empty < 0)
3872 			break;
3873 	}
3874 
3875 	isl_map_free(min_first);
3876 
3877 	return i < n ? -1 : first;
3878 }
3879 
3880 /* Construct a shifted inverse schedule based on the original inverse schedule,
3881  * the stride and the offset.
3882  *
3883  * The original inverse schedule is specified as the "map" fields
3884  * of the elements of "domain" indexed by the first "n" elements of "order".
3885  *
3886  * "stride" and "offset" are such that the difference
3887  * between the values of the current dimension of domain "i"
3888  * and the values of the current dimension for some reference domain are
3889  * equal to
3890  *
3891  *	stride * integer + offset[i]
3892  *
3893  * Moreover, 0 <= offset[i] < stride.
3894  *
3895  * For each domain, we create a map
3896  *
3897  *	{ [..., j, ...] -> [..., j - offset[i], offset[i], ....] }
3898  *
3899  * where j refers to the current dimension and the other dimensions are
3900  * unchanged, and apply this map to the original schedule domain.
3901  *
3902  * For example, for the original schedule
3903  *
3904  *	{ A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
3905  *
3906  * and assuming the offset is 0 for the A domain and 1 for the B domain,
3907  * we apply the mapping
3908  *
3909  *	{ [j] -> [j, 0] }
3910  *
3911  * to the schedule of the "A" domain and the mapping
3912  *
3913  *	{ [j - 1] -> [j, 1] }
3914  *
3915  * to the schedule of the "B" domain.
3916  *
3917  *
3918  * Note that after the transformation, the differences between pairs
3919  * of values of the current dimension over all domains are multiples
3920  * of stride and that we have therefore exposed the stride.
3921  *
3922  *
3923  * To see that the mapping preserves the lexicographic order,
3924  * first note that each of the individual maps above preserves the order.
3925  * If the value of the current iterator is j1 in one domain and j2 in another,
3926  * then if j1 = j2, we know that the same map is applied to both domains
3927  * and the order is preserved.
3928  * Otherwise, let us assume, without loss of generality, that j1 < j2.
3929  * If c1 >= c2 (with c1 and c2 the corresponding offsets), then
3930  *
3931  *	j1 - c1 < j2 - c2
3932  *
3933  * and the order is preserved.
3934  * If c1 < c2, then we know
3935  *
3936  *	0 <= c2 - c1 < s
3937  *
3938  * We also have
3939  *
3940  *	j2 - j1 = n * s + r
3941  *
3942  * with n >= 0 and 0 <= r < s.
3943  * In other words, r = c2 - c1.
3944  * If n > 0, then
3945  *
3946  *	j1 - c1 < j2 - c2
3947  *
3948  * If n = 0, then
3949  *
3950  *	j1 - c1 = j2 - c2
3951  *
3952  * and so
3953  *
3954  *	(j1 - c1, c1) << (j2 - c2, c2)
3955  *
3956  * with "<<" the lexicographic order, proving that the order is preserved
3957  * in all cases.
3958  */
construct_shifted_executed(struct isl_set_map_pair * domain,int * order,int n,__isl_keep isl_val * stride,__isl_keep isl_multi_val * offset,__isl_keep isl_ast_build * build)3959 static __isl_give isl_union_map *construct_shifted_executed(
3960 	struct isl_set_map_pair *domain, int *order, int n,
3961 	__isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
3962 	__isl_keep isl_ast_build *build)
3963 {
3964 	int i;
3965 	isl_union_map *executed;
3966 	isl_space *space;
3967 	isl_map *map;
3968 	isl_size depth;
3969 	isl_constraint *c;
3970 
3971 	depth = isl_ast_build_get_depth(build);
3972 	if (depth < 0)
3973 		return NULL;
3974 	space = isl_ast_build_get_space(build, 1);
3975 	executed = isl_union_map_empty(isl_space_copy(space));
3976 	space = isl_space_map_from_set(space);
3977 	map = isl_map_identity(isl_space_copy(space));
3978 	map = isl_map_eliminate(map, isl_dim_out, depth, 1);
3979 	map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1);
3980 	space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1);
3981 
3982 	c = isl_constraint_alloc_equality(isl_local_space_from_space(space));
3983 	c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1);
3984 	c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1);
3985 
3986 	for (i = 0; i < n; ++i) {
3987 		isl_map *map_i;
3988 		isl_val *v;
3989 
3990 		v = isl_multi_val_get_val(offset, i);
3991 		if (!v)
3992 			break;
3993 		map_i = isl_map_copy(map);
3994 		map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1,
3995 					isl_val_copy(v));
3996 		v = isl_val_neg(v);
3997 		c = isl_constraint_set_constant_val(c, v);
3998 		map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c));
3999 
4000 		map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map),
4001 						map_i);
4002 		executed = isl_union_map_add_map(executed, map_i);
4003 	}
4004 
4005 	isl_constraint_free(c);
4006 	isl_map_free(map);
4007 
4008 	if (i < n)
4009 		executed = isl_union_map_free(executed);
4010 
4011 	return executed;
4012 }
4013 
4014 /* Generate code for a single component, after exposing the stride,
4015  * given that the schedule domain is "shifted strided".
4016  *
4017  * The component inverse schedule is specified as the "map" fields
4018  * of the elements of "domain" indexed by the first "n" elements of "order".
4019  *
4020  * The schedule domain being "shifted strided" means that the differences
4021  * between the values of the current dimension of domain "i"
4022  * and the values of the current dimension for some reference domain are
4023  * equal to
4024  *
4025  *	stride * integer + offset[i]
4026  *
4027  * We first look for the domain with the "smallest" value for the current
4028  * dimension and adjust the offsets such that the offset of the "smallest"
4029  * domain is equal to zero.  The other offsets are reduced modulo stride.
4030  *
4031  * Based on this information, we construct a new inverse schedule in
4032  * construct_shifted_executed that exposes the stride.
4033  * Since this involves the introduction of a new schedule dimension,
4034  * the build needs to be changed accordingly.
4035  * After computing the AST, the newly introduced dimension needs
4036  * to be removed again from the list of grafts.  We do this by plugging
4037  * in a mapping that represents the new schedule domain in terms of the
4038  * old schedule domain.
4039  */
generate_shift_component(struct isl_set_map_pair * domain,int * order,int n,__isl_keep isl_val * stride,__isl_keep isl_multi_val * offset,__isl_take isl_ast_build * build)4040 static __isl_give isl_ast_graft_list *generate_shift_component(
4041 	struct isl_set_map_pair *domain, int *order, int n,
4042 	__isl_keep isl_val *stride, __isl_keep isl_multi_val *offset,
4043 	__isl_take isl_ast_build *build)
4044 {
4045 	isl_ast_graft_list *list;
4046 	int first;
4047 	isl_size depth;
4048 	isl_val *val;
4049 	isl_multi_val *mv;
4050 	isl_space *space;
4051 	isl_multi_aff *ma, *zero;
4052 	isl_union_map *executed;
4053 
4054 	depth = isl_ast_build_get_depth(build);
4055 
4056 	first = first_offset(domain, order, n, build);
4057 	if (depth < 0 || first < 0)
4058 		goto error;
4059 
4060 	mv = isl_multi_val_copy(offset);
4061 	val = isl_multi_val_get_val(offset, first);
4062 	val = isl_val_neg(val);
4063 	mv = isl_multi_val_add_val(mv, val);
4064 	mv = isl_multi_val_mod_val(mv, isl_val_copy(stride));
4065 
4066 	executed = construct_shifted_executed(domain, order, n, stride, mv,
4067 						build);
4068 	space = isl_ast_build_get_space(build, 1);
4069 	space = isl_space_map_from_set(space);
4070 	ma = isl_multi_aff_identity(isl_space_copy(space));
4071 	space = isl_space_from_domain(isl_space_domain(space));
4072 	space = isl_space_add_dims(space, isl_dim_out, 1);
4073 	zero = isl_multi_aff_zero(space);
4074 	ma = isl_multi_aff_range_splice(ma, depth + 1, zero);
4075 	build = isl_ast_build_insert_dim(build, depth + 1);
4076 	list = generate_shifted_component(executed, build);
4077 
4078 	list = isl_ast_graft_list_preimage_multi_aff(list, ma);
4079 
4080 	isl_multi_val_free(mv);
4081 
4082 	return list;
4083 error:
4084 	isl_ast_build_free(build);
4085 	return NULL;
4086 }
4087 
4088 /* Does any node in the schedule tree rooted at the current schedule node
4089  * of "build" depend on outer schedule nodes?
4090  */
has_anchored_subtree(__isl_keep isl_ast_build * build)4091 static int has_anchored_subtree(__isl_keep isl_ast_build *build)
4092 {
4093 	isl_schedule_node *node;
4094 	int dependent = 0;
4095 
4096 	node = isl_ast_build_get_schedule_node(build);
4097 	dependent = isl_schedule_node_is_subtree_anchored(node);
4098 	isl_schedule_node_free(node);
4099 
4100 	return dependent;
4101 }
4102 
4103 /* Generate code for a single component.
4104  *
4105  * The component inverse schedule is specified as the "map" fields
4106  * of the elements of "domain" indexed by the first "n" elements of "order".
4107  *
4108  * This function may modify the "set" fields of "domain".
4109  *
4110  * Before proceeding with the actual code generation for the component,
4111  * we first check if there are any "shifted" strides, meaning that
4112  * the schedule domains of the individual domains are all strided,
4113  * but that they have different offsets, resulting in the union
4114  * of schedule domains not being strided anymore.
4115  *
4116  * The simplest example is the schedule
4117  *
4118  *	{ A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 }
4119  *
4120  * Both schedule domains are strided, but their union is not.
4121  * This function detects such cases and then rewrites the schedule to
4122  *
4123  *	{ A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 }
4124  *
4125  * In the new schedule, the schedule domains have the same offset (modulo
4126  * the stride), ensuring that the union of schedule domains is also strided.
4127  *
4128  *
4129  * If there is only a single domain in the component, then there is
4130  * nothing to do.   Similarly, if the current schedule dimension has
4131  * a fixed value for almost all domains then there is nothing to be done.
4132  * In particular, we need at least two domains where the current schedule
4133  * dimension does not have a fixed value.
4134  * Finally, in case of a schedule map input,
4135  * if any of the options refer to the current schedule dimension,
4136  * then we bail out as well.  It would be possible to reformulate the options
4137  * in terms of the new schedule domain, but that would introduce constraints
4138  * that separate the domains in the options and that is something we would
4139  * like to avoid.
4140  * In the case of a schedule tree input, we bail out if any of
4141  * the descendants of the current schedule node refer to outer
4142  * schedule nodes in any way.
4143  *
4144  *
4145  * To see if there is any shifted stride, we look at the differences
4146  * between the values of the current dimension in pairs of domains
4147  * for equal values of outer dimensions.  These differences should be
4148  * of the form
4149  *
4150  *	m x + r
4151  *
4152  * with "m" the stride and "r" a constant.  Note that we cannot perform
4153  * this analysis on individual domains as the lower bound in each domain
4154  * may depend on parameters or outer dimensions and so the current dimension
4155  * itself may not have a fixed remainder on division by the stride.
4156  *
4157  * In particular, we compare the first domain that does not have an
4158  * obviously fixed value for the current dimension to itself and all
4159  * other domains and collect the offsets and the gcd of the strides.
4160  * If the gcd becomes one, then we failed to find shifted strides.
4161  * If the gcd is zero, then the differences were all fixed, meaning
4162  * that some domains had non-obviously fixed values for the current dimension.
4163  * If all the offsets are the same (for those domains that do not have
4164  * an obviously fixed value for the current dimension), then we do not
4165  * apply the transformation.
4166  * If none of the domains were skipped, then there is nothing to do.
4167  * If some of them were skipped, then if we apply separation, the schedule
4168  * domain should get split in pieces with a (non-shifted) stride.
4169  *
4170  * Otherwise, we apply a shift to expose the stride in
4171  * generate_shift_component.
4172  */
generate_component(struct isl_set_map_pair * domain,int * order,int n,__isl_take isl_ast_build * build)4173 static __isl_give isl_ast_graft_list *generate_component(
4174 	struct isl_set_map_pair *domain, int *order, int n,
4175 	__isl_take isl_ast_build *build)
4176 {
4177 	int i, d;
4178 	isl_size depth;
4179 	isl_ctx *ctx;
4180 	isl_map *map;
4181 	isl_set *deltas;
4182 	isl_val *gcd = NULL;
4183 	isl_multi_val *mv;
4184 	int fixed, skip;
4185 	int base;
4186 	isl_ast_graft_list *list;
4187 	int res = 0;
4188 
4189 	depth = isl_ast_build_get_depth(build);
4190 	if (depth < 0)
4191 		goto error;
4192 
4193 	skip = n == 1;
4194 	if (skip >= 0 && !skip)
4195 		skip = at_most_one_non_fixed(domain, order, n, depth);
4196 	if (skip >= 0 && !skip) {
4197 		if (isl_ast_build_has_schedule_node(build))
4198 			skip = has_anchored_subtree(build);
4199 		else
4200 			skip = isl_ast_build_options_involve_depth(build);
4201 	}
4202 	if (skip < 0)
4203 		goto error;
4204 	if (skip)
4205 		return generate_shifted_component_from_list(domain,
4206 							    order, n, build);
4207 
4208 	base = eliminate_non_fixed(domain, order, n, depth, build);
4209 	if (base < 0)
4210 		goto error;
4211 
4212 	ctx = isl_ast_build_get_ctx(build);
4213 
4214 	mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n));
4215 
4216 	fixed = 1;
4217 	for (i = 0; i < n; ++i) {
4218 		isl_val *r, *m;
4219 
4220 		map = isl_map_from_domain_and_range(
4221 					isl_set_copy(domain[order[base]].set),
4222 					isl_set_copy(domain[order[i]].set));
4223 		for (d = 0; d < depth; ++d)
4224 			map = isl_map_equate(map, isl_dim_in, d,
4225 						    isl_dim_out, d);
4226 		deltas = isl_map_deltas(map);
4227 		res = isl_set_dim_residue_class_val(deltas, depth, &m, &r);
4228 		isl_set_free(deltas);
4229 		if (res < 0)
4230 			break;
4231 
4232 		if (i == 0)
4233 			gcd = m;
4234 		else
4235 			gcd = isl_val_gcd(gcd, m);
4236 		if (isl_val_is_one(gcd)) {
4237 			isl_val_free(r);
4238 			break;
4239 		}
4240 		mv = isl_multi_val_set_val(mv, i, r);
4241 
4242 		res = dim_is_fixed(domain[order[i]].set, depth);
4243 		if (res < 0)
4244 			break;
4245 		if (res)
4246 			continue;
4247 
4248 		if (fixed && i > base) {
4249 			isl_val *a, *b;
4250 			a = isl_multi_val_get_val(mv, i);
4251 			b = isl_multi_val_get_val(mv, base);
4252 			if (isl_val_ne(a, b))
4253 				fixed = 0;
4254 			isl_val_free(a);
4255 			isl_val_free(b);
4256 		}
4257 	}
4258 
4259 	if (res < 0 || !gcd) {
4260 		isl_ast_build_free(build);
4261 		list = NULL;
4262 	} else if (i < n || fixed || isl_val_is_zero(gcd)) {
4263 		list = generate_shifted_component_from_list(domain,
4264 							    order, n, build);
4265 	} else {
4266 		list = generate_shift_component(domain, order, n, gcd, mv,
4267 						build);
4268 	}
4269 
4270 	isl_val_free(gcd);
4271 	isl_multi_val_free(mv);
4272 
4273 	return list;
4274 error:
4275 	isl_ast_build_free(build);
4276 	return NULL;
4277 }
4278 
4279 /* Store both "map" itself and its domain in the
4280  * structure pointed to by *next and advance to the next array element.
4281  */
extract_domain(__isl_take isl_map * map,void * user)4282 static isl_stat extract_domain(__isl_take isl_map *map, void *user)
4283 {
4284 	struct isl_set_map_pair **next = user;
4285 
4286 	(*next)->map = isl_map_copy(map);
4287 	(*next)->set = isl_map_domain(map);
4288 	(*next)++;
4289 
4290 	return isl_stat_ok;
4291 }
4292 
4293 static isl_bool after_in_tree(__isl_keep isl_union_map *umap,
4294 	__isl_keep isl_schedule_node *node);
4295 
4296 /* Is any domain element of "umap" scheduled after any of
4297  * the corresponding image elements by the tree rooted at
4298  * the child of "node"?
4299  */
after_in_child(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4300 static isl_bool after_in_child(__isl_keep isl_union_map *umap,
4301 	__isl_keep isl_schedule_node *node)
4302 {
4303 	isl_schedule_node *child;
4304 	isl_bool after;
4305 
4306 	child = isl_schedule_node_get_child(node, 0);
4307 	after = after_in_tree(umap, child);
4308 	isl_schedule_node_free(child);
4309 
4310 	return after;
4311 }
4312 
4313 /* Is any domain element of "umap" scheduled after any of
4314  * the corresponding image elements by the tree rooted at
4315  * the band node "node"?
4316  *
4317  * We first check if any domain element is scheduled after any
4318  * of the corresponding image elements by the band node itself.
4319  * If not, we restrict "map" to those pairs of element that
4320  * are scheduled together by the band node and continue with
4321  * the child of the band node.
4322  * If there are no such pairs then the map passed to after_in_child
4323  * will be empty causing it to return 0.
4324  */
after_in_band(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4325 static isl_bool after_in_band(__isl_keep isl_union_map *umap,
4326 	__isl_keep isl_schedule_node *node)
4327 {
4328 	isl_multi_union_pw_aff *mupa;
4329 	isl_union_map *partial, *test, *gt, *universe, *umap1, *umap2;
4330 	isl_union_set *domain, *range;
4331 	isl_space *space;
4332 	isl_bool empty;
4333 	isl_bool after;
4334 	isl_size n;
4335 
4336 	n = isl_schedule_node_band_n_member(node);
4337 	if (n < 0)
4338 		return isl_bool_error;
4339 	if (n == 0)
4340 		return after_in_child(umap, node);
4341 
4342 	mupa = isl_schedule_node_band_get_partial_schedule(node);
4343 	space = isl_multi_union_pw_aff_get_space(mupa);
4344 	partial = isl_union_map_from_multi_union_pw_aff(mupa);
4345 	test = isl_union_map_copy(umap);
4346 	test = isl_union_map_apply_domain(test, isl_union_map_copy(partial));
4347 	test = isl_union_map_apply_range(test, isl_union_map_copy(partial));
4348 	gt = isl_union_map_from_map(isl_map_lex_gt(space));
4349 	test = isl_union_map_intersect(test, gt);
4350 	empty = isl_union_map_is_empty(test);
4351 	isl_union_map_free(test);
4352 
4353 	if (empty < 0 || !empty) {
4354 		isl_union_map_free(partial);
4355 		return isl_bool_not(empty);
4356 	}
4357 
4358 	universe = isl_union_map_universe(isl_union_map_copy(umap));
4359 	domain = isl_union_map_domain(isl_union_map_copy(universe));
4360 	range = isl_union_map_range(universe);
4361 	umap1 = isl_union_map_copy(partial);
4362 	umap1 = isl_union_map_intersect_domain(umap1, domain);
4363 	umap2 = isl_union_map_intersect_domain(partial, range);
4364 	test = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4365 	test = isl_union_map_intersect(test, isl_union_map_copy(umap));
4366 	after = after_in_child(test, node);
4367 	isl_union_map_free(test);
4368 	return after;
4369 }
4370 
4371 /* Is any domain element of "umap" scheduled after any of
4372  * the corresponding image elements by the tree rooted at
4373  * the context node "node"?
4374  *
4375  * The context constraints apply to the schedule domain,
4376  * so we cannot apply them directly to "umap", which contains
4377  * pairs of statement instances.  Instead, we add them
4378  * to the range of the prefix schedule for both domain and
4379  * range of "umap".
4380  */
after_in_context(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4381 static isl_bool after_in_context(__isl_keep isl_union_map *umap,
4382 	__isl_keep isl_schedule_node *node)
4383 {
4384 	isl_union_map *prefix, *universe, *umap1, *umap2;
4385 	isl_union_set *domain, *range;
4386 	isl_set *context;
4387 	isl_bool after;
4388 
4389 	umap = isl_union_map_copy(umap);
4390 	context = isl_schedule_node_context_get_context(node);
4391 	prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
4392 	universe = isl_union_map_universe(isl_union_map_copy(umap));
4393 	domain = isl_union_map_domain(isl_union_map_copy(universe));
4394 	range = isl_union_map_range(universe);
4395 	umap1 = isl_union_map_copy(prefix);
4396 	umap1 = isl_union_map_intersect_domain(umap1, domain);
4397 	umap2 = isl_union_map_intersect_domain(prefix, range);
4398 	umap1 = isl_union_map_intersect_range(umap1,
4399 					    isl_union_set_from_set(context));
4400 	umap1 = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2));
4401 	umap = isl_union_map_intersect(umap, umap1);
4402 
4403 	after = after_in_child(umap, node);
4404 
4405 	isl_union_map_free(umap);
4406 
4407 	return after;
4408 }
4409 
4410 /* Is any domain element of "umap" scheduled after any of
4411  * the corresponding image elements by the tree rooted at
4412  * the expansion node "node"?
4413  *
4414  * We apply the expansion to domain and range of "umap" and
4415  * continue with its child.
4416  */
after_in_expansion(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4417 static isl_bool after_in_expansion(__isl_keep isl_union_map *umap,
4418 	__isl_keep isl_schedule_node *node)
4419 {
4420 	isl_union_map *expansion;
4421 	isl_bool after;
4422 
4423 	expansion = isl_schedule_node_expansion_get_expansion(node);
4424 	umap = isl_union_map_copy(umap);
4425 	umap = isl_union_map_apply_domain(umap, isl_union_map_copy(expansion));
4426 	umap = isl_union_map_apply_range(umap, expansion);
4427 
4428 	after = after_in_child(umap, node);
4429 
4430 	isl_union_map_free(umap);
4431 
4432 	return after;
4433 }
4434 
4435 /* Is any domain element of "umap" scheduled after any of
4436  * the corresponding image elements by the tree rooted at
4437  * the extension node "node"?
4438  *
4439  * Since the extension node may add statement instances before or
4440  * after the pairs of statement instances in "umap", we return isl_bool_true
4441  * to ensure that these pairs are not broken up.
4442  */
after_in_extension(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4443 static isl_bool after_in_extension(__isl_keep isl_union_map *umap,
4444 	__isl_keep isl_schedule_node *node)
4445 {
4446 	return isl_bool_true;
4447 }
4448 
4449 /* Is any domain element of "umap" scheduled after any of
4450  * the corresponding image elements by the tree rooted at
4451  * the filter node "node"?
4452  *
4453  * We intersect domain and range of "umap" with the filter and
4454  * continue with its child.
4455  */
after_in_filter(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4456 static isl_bool after_in_filter(__isl_keep isl_union_map *umap,
4457 	__isl_keep isl_schedule_node *node)
4458 {
4459 	isl_union_set *filter;
4460 	isl_bool after;
4461 
4462 	umap = isl_union_map_copy(umap);
4463 	filter = isl_schedule_node_filter_get_filter(node);
4464 	umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(filter));
4465 	umap = isl_union_map_intersect_range(umap, filter);
4466 
4467 	after = after_in_child(umap, node);
4468 
4469 	isl_union_map_free(umap);
4470 
4471 	return after;
4472 }
4473 
4474 /* Is any domain element of "umap" scheduled after any of
4475  * the corresponding image elements by the tree rooted at
4476  * the set node "node"?
4477  *
4478  * This is only the case if this condition holds in any
4479  * of the (filter) children of the set node.
4480  * In particular, if the domain and the range of "umap"
4481  * are contained in different children, then the condition
4482  * does not hold.
4483  */
after_in_set(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4484 static isl_bool after_in_set(__isl_keep isl_union_map *umap,
4485 	__isl_keep isl_schedule_node *node)
4486 {
4487 	int i;
4488 	isl_size n;
4489 
4490 	n = isl_schedule_node_n_children(node);
4491 	if (n < 0)
4492 		return isl_bool_error;
4493 	for (i = 0; i < n; ++i) {
4494 		isl_schedule_node *child;
4495 		isl_bool after;
4496 
4497 		child = isl_schedule_node_get_child(node, i);
4498 		after = after_in_tree(umap, child);
4499 		isl_schedule_node_free(child);
4500 
4501 		if (after < 0 || after)
4502 			return after;
4503 	}
4504 
4505 	return isl_bool_false;
4506 }
4507 
4508 /* Return the filter of child "i" of "node".
4509  */
child_filter(__isl_keep isl_schedule_node * node,int i)4510 static __isl_give isl_union_set *child_filter(
4511 	__isl_keep isl_schedule_node *node, int i)
4512 {
4513 	isl_schedule_node *child;
4514 	isl_union_set *filter;
4515 
4516 	child = isl_schedule_node_get_child(node, i);
4517 	filter = isl_schedule_node_filter_get_filter(child);
4518 	isl_schedule_node_free(child);
4519 
4520 	return filter;
4521 }
4522 
4523 /* Is any domain element of "umap" scheduled after any of
4524  * the corresponding image elements by the tree rooted at
4525  * the sequence node "node"?
4526  *
4527  * This happens in particular if any domain element is
4528  * contained in a later child than one containing a range element or
4529  * if the condition holds within a given child in the sequence.
4530  * The later part of the condition is checked by after_in_set.
4531  */
after_in_sequence(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4532 static isl_bool after_in_sequence(__isl_keep isl_union_map *umap,
4533 	__isl_keep isl_schedule_node *node)
4534 {
4535 	int i, j;
4536 	isl_size n;
4537 	isl_union_map *umap_i;
4538 	isl_bool empty;
4539 	isl_bool after = isl_bool_false;
4540 
4541 	n = isl_schedule_node_n_children(node);
4542 	if (n < 0)
4543 		return isl_bool_error;
4544 	for (i = 1; i < n; ++i) {
4545 		isl_union_set *filter_i;
4546 
4547 		umap_i = isl_union_map_copy(umap);
4548 		filter_i = child_filter(node, i);
4549 		umap_i = isl_union_map_intersect_domain(umap_i, filter_i);
4550 		empty = isl_union_map_is_empty(umap_i);
4551 		if (empty < 0)
4552 			goto error;
4553 		if (empty) {
4554 			isl_union_map_free(umap_i);
4555 			continue;
4556 		}
4557 
4558 		for (j = 0; j < i; ++j) {
4559 			isl_union_set *filter_j;
4560 			isl_union_map *umap_ij;
4561 
4562 			umap_ij = isl_union_map_copy(umap_i);
4563 			filter_j = child_filter(node, j);
4564 			umap_ij = isl_union_map_intersect_range(umap_ij,
4565 								filter_j);
4566 			empty = isl_union_map_is_empty(umap_ij);
4567 			isl_union_map_free(umap_ij);
4568 
4569 			if (empty < 0)
4570 				goto error;
4571 			if (!empty)
4572 				after = isl_bool_true;
4573 			if (after)
4574 				break;
4575 		}
4576 
4577 		isl_union_map_free(umap_i);
4578 		if (after)
4579 			break;
4580 	}
4581 
4582 	if (after < 0 || after)
4583 		return after;
4584 
4585 	return after_in_set(umap, node);
4586 error:
4587 	isl_union_map_free(umap_i);
4588 	return isl_bool_error;
4589 }
4590 
4591 /* Is any domain element of "umap" scheduled after any of
4592  * the corresponding image elements by the tree rooted at "node"?
4593  *
4594  * If "umap" is empty, then clearly there is no such element.
4595  * Otherwise, consider the different types of nodes separately.
4596  */
after_in_tree(__isl_keep isl_union_map * umap,__isl_keep isl_schedule_node * node)4597 static isl_bool after_in_tree(__isl_keep isl_union_map *umap,
4598 	__isl_keep isl_schedule_node *node)
4599 {
4600 	isl_bool empty;
4601 	enum isl_schedule_node_type type;
4602 
4603 	empty = isl_union_map_is_empty(umap);
4604 	if (empty < 0)
4605 		return isl_bool_error;
4606 	if (empty)
4607 		return isl_bool_false;
4608 	if (!node)
4609 		return isl_bool_error;
4610 
4611 	type = isl_schedule_node_get_type(node);
4612 	switch (type) {
4613 	case isl_schedule_node_error:
4614 		return isl_bool_error;
4615 	case isl_schedule_node_leaf:
4616 		return isl_bool_false;
4617 	case isl_schedule_node_band:
4618 		return after_in_band(umap, node);
4619 	case isl_schedule_node_domain:
4620 		isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
4621 			"unexpected internal domain node",
4622 			return isl_bool_error);
4623 	case isl_schedule_node_context:
4624 		return after_in_context(umap, node);
4625 	case isl_schedule_node_expansion:
4626 		return after_in_expansion(umap, node);
4627 	case isl_schedule_node_extension:
4628 		return after_in_extension(umap, node);
4629 	case isl_schedule_node_filter:
4630 		return after_in_filter(umap, node);
4631 	case isl_schedule_node_guard:
4632 	case isl_schedule_node_mark:
4633 		return after_in_child(umap, node);
4634 	case isl_schedule_node_set:
4635 		return after_in_set(umap, node);
4636 	case isl_schedule_node_sequence:
4637 		return after_in_sequence(umap, node);
4638 	}
4639 
4640 	return isl_bool_true;
4641 }
4642 
4643 /* Is any domain element of "map1" scheduled after any domain
4644  * element of "map2" by the subtree underneath the current band node,
4645  * while at the same time being scheduled together by the current
4646  * band node, i.e., by "map1" and "map2?
4647  *
4648  * If the child of the current band node is a leaf, then
4649  * no element can be scheduled after any other element.
4650  *
4651  * Otherwise, we construct a relation between domain elements
4652  * of "map1" and domain elements of "map2" that are scheduled
4653  * together and then check if the subtree underneath the current
4654  * band node determines their relative order.
4655  */
after_in_subtree(__isl_keep isl_ast_build * build,__isl_keep isl_map * map1,__isl_keep isl_map * map2)4656 static isl_bool after_in_subtree(__isl_keep isl_ast_build *build,
4657 	__isl_keep isl_map *map1, __isl_keep isl_map *map2)
4658 {
4659 	isl_schedule_node *node;
4660 	isl_map *map;
4661 	isl_union_map *umap;
4662 	isl_bool after;
4663 
4664 	node = isl_ast_build_get_schedule_node(build);
4665 	if (!node)
4666 		return isl_bool_error;
4667 	node = isl_schedule_node_child(node, 0);
4668 	if (isl_schedule_node_get_type(node) == isl_schedule_node_leaf) {
4669 		isl_schedule_node_free(node);
4670 		return isl_bool_false;
4671 	}
4672 	map = isl_map_copy(map2);
4673 	map = isl_map_apply_domain(map, isl_map_copy(map1));
4674 	umap = isl_union_map_from_map(map);
4675 	after = after_in_tree(umap, node);
4676 	isl_union_map_free(umap);
4677 	isl_schedule_node_free(node);
4678 	return after;
4679 }
4680 
4681 /* Internal data for any_scheduled_after.
4682  *
4683  * "build" is the build in which the AST is constructed.
4684  * "depth" is the number of loops that have already been generated
4685  * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled
4686  * "domain" is an array of set-map pairs corresponding to the different
4687  * iteration domains.  The set is the schedule domain, i.e., the domain
4688  * of the inverse schedule, while the map is the inverse schedule itself.
4689  */
4690 struct isl_any_scheduled_after_data {
4691 	isl_ast_build *build;
4692 	int depth;
4693 	int group_coscheduled;
4694 	struct isl_set_map_pair *domain;
4695 };
4696 
4697 /* Is any element of domain "i" scheduled after any element of domain "j"
4698  * (for a common iteration of the first data->depth loops)?
4699  *
4700  * data->domain[i].set contains the domain of the inverse schedule
4701  * for domain "i", i.e., elements in the schedule domain.
4702  *
4703  * If we are inside a band of a schedule tree and there is a pair
4704  * of elements in the two domains that is schedule together by
4705  * the current band, then we check if any element of "i" may be schedule
4706  * after element of "j" by the descendants of the band node.
4707  *
4708  * If data->group_coscheduled is set, then we also return 1 if there
4709  * is any pair of elements in the two domains that are scheduled together.
4710  */
any_scheduled_after(int i,int j,void * user)4711 static isl_bool any_scheduled_after(int i, int j, void *user)
4712 {
4713 	struct isl_any_scheduled_after_data *data = user;
4714 	isl_size dim = isl_set_dim(data->domain[i].set, isl_dim_set);
4715 	int pos;
4716 
4717 	if (dim < 0)
4718 		return isl_bool_error;
4719 
4720 	for (pos = data->depth; pos < dim; ++pos) {
4721 		int follows;
4722 
4723 		follows = isl_set_follows_at(data->domain[i].set,
4724 						data->domain[j].set, pos);
4725 
4726 		if (follows < -1)
4727 			return isl_bool_error;
4728 		if (follows > 0)
4729 			return isl_bool_true;
4730 		if (follows < 0)
4731 			return isl_bool_false;
4732 	}
4733 
4734 	if (isl_ast_build_has_schedule_node(data->build)) {
4735 		isl_bool after;
4736 
4737 		after = after_in_subtree(data->build, data->domain[i].map,
4738 					    data->domain[j].map);
4739 		if (after < 0 || after)
4740 			return after;
4741 	}
4742 
4743 	return isl_bool_ok(data->group_coscheduled);
4744 }
4745 
4746 /* Look for independent components at the current depth and generate code
4747  * for each component separately.  The resulting lists of grafts are
4748  * merged in an attempt to combine grafts with identical guards.
4749  *
4750  * Code for two domains can be generated separately if all the elements
4751  * of one domain are scheduled before (or together with) all the elements
4752  * of the other domain.  We therefore consider the graph with as nodes
4753  * the domains and an edge between two nodes if any element of the first
4754  * node is scheduled after any element of the second node.
4755  * If the ast_build_group_coscheduled is set, then we also add an edge if
4756  * there is any pair of elements in the two domains that are scheduled
4757  * together.
4758  * Code is then generated (by generate_component)
4759  * for each of the strongly connected components in this graph
4760  * in their topological order.
4761  *
4762  * Since the test is performed on the domain of the inverse schedules of
4763  * the different domains, we precompute these domains and store
4764  * them in data.domain.
4765  */
generate_components(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build)4766 static __isl_give isl_ast_graft_list *generate_components(
4767 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4768 {
4769 	int i;
4770 	isl_ctx *ctx = isl_ast_build_get_ctx(build);
4771 	isl_size n = isl_union_map_n_map(executed);
4772 	isl_size depth;
4773 	struct isl_any_scheduled_after_data data;
4774 	struct isl_set_map_pair *next;
4775 	struct isl_tarjan_graph *g = NULL;
4776 	isl_ast_graft_list *list = NULL;
4777 	int n_domain = 0;
4778 
4779 	data.domain = NULL;
4780 	if (n < 0)
4781 		goto error;
4782 	data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n);
4783 	if (!data.domain)
4784 		goto error;
4785 	n_domain = n;
4786 
4787 	next = data.domain;
4788 	if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0)
4789 		goto error;
4790 
4791 	depth = isl_ast_build_get_depth(build);
4792 	if (depth < 0)
4793 		goto error;
4794 	data.build = build;
4795 	data.depth = depth;
4796 	data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx);
4797 	g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data);
4798 	if (!g)
4799 		goto error;
4800 
4801 	list = isl_ast_graft_list_alloc(ctx, 0);
4802 
4803 	i = 0;
4804 	while (list && n) {
4805 		isl_ast_graft_list *list_c;
4806 		int first = i;
4807 
4808 		if (g->order[i] == -1)
4809 			isl_die(ctx, isl_error_internal, "cannot happen",
4810 				goto error);
4811 		++i; --n;
4812 		while (g->order[i] != -1) {
4813 			++i; --n;
4814 		}
4815 
4816 		list_c = generate_component(data.domain,
4817 					    g->order + first, i - first,
4818 					    isl_ast_build_copy(build));
4819 		list = isl_ast_graft_list_merge(list, list_c, build);
4820 
4821 		++i;
4822 	}
4823 
4824 	if (0)
4825 error:		list = isl_ast_graft_list_free(list);
4826 	isl_tarjan_graph_free(g);
4827 	for (i = 0; i < n_domain; ++i) {
4828 		isl_map_free(data.domain[i].map);
4829 		isl_set_free(data.domain[i].set);
4830 	}
4831 	free(data.domain);
4832 	isl_union_map_free(executed);
4833 	isl_ast_build_free(build);
4834 
4835 	return list;
4836 }
4837 
4838 /* Generate code for the next level (and all inner levels).
4839  *
4840  * If "executed" is empty, i.e., no code needs to be generated,
4841  * then we return an empty list.
4842  *
4843  * If we have already generated code for all loop levels, then we pass
4844  * control to generate_inner_level.
4845  *
4846  * If "executed" lives in a single space, i.e., if code needs to be
4847  * generated for a single domain, then there can only be a single
4848  * component and we go directly to generate_shifted_component.
4849  * Otherwise, we call generate_components to detect the components
4850  * and to call generate_component on each of them separately.
4851  */
generate_next_level(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build)4852 static __isl_give isl_ast_graft_list *generate_next_level(
4853 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build)
4854 {
4855 	isl_size depth;
4856 	isl_size dim;
4857 	isl_size n;
4858 
4859 	if (!build || !executed)
4860 		goto error;
4861 
4862 	if (isl_union_map_is_empty(executed)) {
4863 		isl_ctx *ctx = isl_ast_build_get_ctx(build);
4864 		isl_union_map_free(executed);
4865 		isl_ast_build_free(build);
4866 		return isl_ast_graft_list_alloc(ctx, 0);
4867 	}
4868 
4869 	depth = isl_ast_build_get_depth(build);
4870 	dim = isl_ast_build_dim(build, isl_dim_set);
4871 	if (depth < 0 || dim < 0)
4872 		goto error;
4873 	if (depth >= dim)
4874 		return generate_inner_level(executed, build);
4875 
4876 	n = isl_union_map_n_map(executed);
4877 	if (n < 0)
4878 		goto error;
4879 	if (n == 1)
4880 		return generate_shifted_component(executed, build);
4881 
4882 	return generate_components(executed, build);
4883 error:
4884 	isl_union_map_free(executed);
4885 	isl_ast_build_free(build);
4886 	return NULL;
4887 }
4888 
4889 /* Internal data structure used by isl_ast_build_node_from_schedule_map.
4890  * internal, executed and build are the inputs to generate_code.
4891  * list collects the output.
4892  */
4893 struct isl_generate_code_data {
4894 	int internal;
4895 	isl_union_map *executed;
4896 	isl_ast_build *build;
4897 
4898 	isl_ast_graft_list *list;
4899 };
4900 
4901 /* Given an inverse schedule in terms of the external build schedule, i.e.,
4902  *
4903  *	[E -> S] -> D
4904  *
4905  * with E the external build schedule and S the additional schedule "space",
4906  * reformulate the inverse schedule in terms of the internal schedule domain,
4907  * i.e., return
4908  *
4909  *	[I -> S] -> D
4910  *
4911  * We first obtain a mapping
4912  *
4913  *	I -> E
4914  *
4915  * take the inverse and the product with S -> S, resulting in
4916  *
4917  *	[I -> S] -> [E -> S]
4918  *
4919  * Applying the map to the input produces the desired result.
4920  */
internal_executed(__isl_take isl_union_map * executed,__isl_keep isl_space * space,__isl_keep isl_ast_build * build)4921 static __isl_give isl_union_map *internal_executed(
4922 	__isl_take isl_union_map *executed, __isl_keep isl_space *space,
4923 	__isl_keep isl_ast_build *build)
4924 {
4925 	isl_map *id, *proj;
4926 
4927 	proj = isl_ast_build_get_schedule_map(build);
4928 	proj = isl_map_reverse(proj);
4929 	space = isl_space_map_from_set(isl_space_copy(space));
4930 	id = isl_map_identity(space);
4931 	proj = isl_map_product(proj, id);
4932 	executed = isl_union_map_apply_domain(executed,
4933 						isl_union_map_from_map(proj));
4934 	return executed;
4935 }
4936 
4937 /* Generate an AST that visits the elements in the range of data->executed
4938  * in the relative order specified by the corresponding domain element(s)
4939  * for those domain elements that belong to "set".
4940  * Add the result to data->list.
4941  *
4942  * The caller ensures that "set" is a universe domain.
4943  * "space" is the space of the additional part of the schedule.
4944  * It is equal to the space of "set" if build->domain is parametric.
4945  * Otherwise, it is equal to the range of the wrapped space of "set".
4946  *
4947  * If the build space is not parametric and
4948  * if isl_ast_build_node_from_schedule_map
4949  * was called from an outside user (data->internal not set), then
4950  * the (inverse) schedule refers to the external build domain and needs to
4951  * be transformed to refer to the internal build domain.
4952  *
4953  * If the build space is parametric, then we add some of the parameter
4954  * constraints to the executed relation.  Adding these constraints
4955  * allows for an earlier detection of conflicts in some cases.
4956  * However, we do not want to divide the executed relation into
4957  * more disjuncts than necessary.  We therefore approximate
4958  * the constraints on the parameters by a single disjunct set.
4959  *
4960  * The build is extended to include the additional part of the schedule.
4961  * If the original build space was not parametric, then the options
4962  * in data->build refer only to the additional part of the schedule
4963  * and they need to be adjusted to refer to the complete AST build
4964  * domain.
4965  *
4966  * After having adjusted inverse schedule and build, we start generating
4967  * code with the outer loop of the current code generation
4968  * in generate_next_level.
4969  *
4970  * If the original build space was not parametric, we undo the embedding
4971  * on the resulting isl_ast_node_list so that it can be used within
4972  * the outer AST build.
4973  */
generate_code_in_space(struct isl_generate_code_data * data,__isl_take isl_set * set,__isl_take isl_space * space)4974 static isl_stat generate_code_in_space(struct isl_generate_code_data *data,
4975 	__isl_take isl_set *set, __isl_take isl_space *space)
4976 {
4977 	isl_union_map *executed;
4978 	isl_ast_build *build;
4979 	isl_ast_graft_list *list;
4980 	int embed;
4981 
4982 	executed = isl_union_map_copy(data->executed);
4983 	executed = isl_union_map_intersect_domain(executed,
4984 						 isl_union_set_from_set(set));
4985 
4986 	embed = !isl_set_is_params(data->build->domain);
4987 	if (embed && !data->internal)
4988 		executed = internal_executed(executed, space, data->build);
4989 	if (!embed) {
4990 		isl_set *domain;
4991 		domain = isl_ast_build_get_domain(data->build);
4992 		domain = isl_set_from_basic_set(isl_set_simple_hull(domain));
4993 		executed = isl_union_map_intersect_params(executed, domain);
4994 	}
4995 
4996 	build = isl_ast_build_copy(data->build);
4997 	build = isl_ast_build_product(build, space);
4998 
4999 	list = generate_next_level(executed, build);
5000 
5001 	list = isl_ast_graft_list_unembed(list, embed);
5002 
5003 	data->list = isl_ast_graft_list_concat(data->list, list);
5004 
5005 	return isl_stat_ok;
5006 }
5007 
5008 /* Generate an AST that visits the elements in the range of data->executed
5009  * in the relative order specified by the corresponding domain element(s)
5010  * for those domain elements that belong to "set".
5011  * Add the result to data->list.
5012  *
5013  * The caller ensures that "set" is a universe domain.
5014  *
5015  * If the build space S is not parametric, then the space of "set"
5016  * need to be a wrapped relation with S as domain.  That is, it needs
5017  * to be of the form
5018  *
5019  *	[S -> T]
5020  *
5021  * Check this property and pass control to generate_code_in_space
5022  * passing along T.
5023  * If the build space is not parametric, then T is the space of "set".
5024  */
generate_code_set(__isl_take isl_set * set,void * user)5025 static isl_stat generate_code_set(__isl_take isl_set *set, void *user)
5026 {
5027 	struct isl_generate_code_data *data = user;
5028 	isl_space *space, *build_space;
5029 	int is_domain;
5030 
5031 	space = isl_set_get_space(set);
5032 
5033 	if (isl_set_is_params(data->build->domain))
5034 		return generate_code_in_space(data, set, space);
5035 
5036 	build_space = isl_ast_build_get_space(data->build, data->internal);
5037 	space = isl_space_unwrap(space);
5038 	is_domain = isl_space_is_domain(build_space, space);
5039 	isl_space_free(build_space);
5040 	space = isl_space_range(space);
5041 
5042 	if (is_domain < 0)
5043 		goto error;
5044 	if (!is_domain)
5045 		isl_die(isl_set_get_ctx(set), isl_error_invalid,
5046 			"invalid nested schedule space", goto error);
5047 
5048 	return generate_code_in_space(data, set, space);
5049 error:
5050 	isl_set_free(set);
5051 	isl_space_free(space);
5052 	return isl_stat_error;
5053 }
5054 
5055 /* Generate an AST that visits the elements in the range of "executed"
5056  * in the relative order specified by the corresponding domain element(s).
5057  *
5058  * "build" is an isl_ast_build that has either been constructed by
5059  * isl_ast_build_from_context or passed to a callback set by
5060  * isl_ast_build_set_create_leaf.
5061  * In the first case, the space of the isl_ast_build is typically
5062  * a parametric space, although this is currently not enforced.
5063  * In the second case, the space is never a parametric space.
5064  * If the space S is not parametric, then the domain space(s) of "executed"
5065  * need to be wrapped relations with S as domain.
5066  *
5067  * If the domain of "executed" consists of several spaces, then an AST
5068  * is generated for each of them (in arbitrary order) and the results
5069  * are concatenated.
5070  *
5071  * If "internal" is set, then the domain "S" above refers to the internal
5072  * schedule domain representation.  Otherwise, it refers to the external
5073  * representation, as returned by isl_ast_build_get_schedule_space.
5074  *
5075  * We essentially run over all the spaces in the domain of "executed"
5076  * and call generate_code_set on each of them.
5077  */
generate_code(__isl_take isl_union_map * executed,__isl_take isl_ast_build * build,int internal)5078 static __isl_give isl_ast_graft_list *generate_code(
5079 	__isl_take isl_union_map *executed, __isl_take isl_ast_build *build,
5080 	int internal)
5081 {
5082 	isl_ctx *ctx;
5083 	struct isl_generate_code_data data = { 0 };
5084 	isl_space *space;
5085 	isl_union_set *schedule_domain;
5086 	isl_union_map *universe;
5087 
5088 	if (!build)
5089 		goto error;
5090 	space = isl_ast_build_get_space(build, 1);
5091 	space = isl_space_align_params(space,
5092 				    isl_union_map_get_space(executed));
5093 	space = isl_space_align_params(space,
5094 				    isl_union_map_get_space(build->options));
5095 	build = isl_ast_build_align_params(build, isl_space_copy(space));
5096 	executed = isl_union_map_align_params(executed, space);
5097 	if (!executed || !build)
5098 		goto error;
5099 
5100 	ctx = isl_ast_build_get_ctx(build);
5101 
5102 	data.internal = internal;
5103 	data.executed = executed;
5104 	data.build = build;
5105 	data.list = isl_ast_graft_list_alloc(ctx, 0);
5106 
5107 	universe = isl_union_map_universe(isl_union_map_copy(executed));
5108 	schedule_domain = isl_union_map_domain(universe);
5109 	if (isl_union_set_foreach_set(schedule_domain, &generate_code_set,
5110 					&data) < 0)
5111 		data.list = isl_ast_graft_list_free(data.list);
5112 
5113 	isl_union_set_free(schedule_domain);
5114 	isl_union_map_free(executed);
5115 
5116 	isl_ast_build_free(build);
5117 	return data.list;
5118 error:
5119 	isl_union_map_free(executed);
5120 	isl_ast_build_free(build);
5121 	return NULL;
5122 }
5123 
5124 /* Generate an AST that visits the elements in the domain of "schedule"
5125  * in the relative order specified by the corresponding image element(s).
5126  *
5127  * "build" is an isl_ast_build that has either been constructed by
5128  * isl_ast_build_from_context or passed to a callback set by
5129  * isl_ast_build_set_create_leaf.
5130  * In the first case, the space of the isl_ast_build is typically
5131  * a parametric space, although this is currently not enforced.
5132  * In the second case, the space is never a parametric space.
5133  * If the space S is not parametric, then the range space(s) of "schedule"
5134  * need to be wrapped relations with S as domain.
5135  *
5136  * If the range of "schedule" consists of several spaces, then an AST
5137  * is generated for each of them (in arbitrary order) and the results
5138  * are concatenated.
5139  *
5140  * We first initialize the local copies of the relevant options.
5141  * We do this here rather than when the isl_ast_build is created
5142  * because the options may have changed between the construction
5143  * of the isl_ast_build and the call to isl_generate_code.
5144  *
5145  * The main computation is performed on an inverse schedule (with
5146  * the schedule domain in the domain and the elements to be executed
5147  * in the range) called "executed".
5148  */
isl_ast_build_node_from_schedule_map(__isl_keep isl_ast_build * build,__isl_take isl_union_map * schedule)5149 __isl_give isl_ast_node *isl_ast_build_node_from_schedule_map(
5150 	__isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5151 {
5152 	isl_ast_graft_list *list;
5153 	isl_ast_node *node;
5154 	isl_union_map *executed;
5155 
5156 	build = isl_ast_build_copy(build);
5157 	build = isl_ast_build_set_single_valued(build, 0);
5158 	schedule = isl_union_map_coalesce(schedule);
5159 	schedule = isl_union_map_remove_redundancies(schedule);
5160 	executed = isl_union_map_reverse(schedule);
5161 	list = generate_code(executed, isl_ast_build_copy(build), 0);
5162 	node = isl_ast_node_from_graft_list(list, build);
5163 	isl_ast_build_free(build);
5164 
5165 	return node;
5166 }
5167 
5168 /* The old name for isl_ast_build_node_from_schedule_map.
5169  * It is being kept for backward compatibility, but
5170  * it will be removed in the future.
5171  */
isl_ast_build_ast_from_schedule(__isl_keep isl_ast_build * build,__isl_take isl_union_map * schedule)5172 __isl_give isl_ast_node *isl_ast_build_ast_from_schedule(
5173 	__isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule)
5174 {
5175 	return isl_ast_build_node_from_schedule_map(build, schedule);
5176 }
5177 
5178 /* Generate an AST that visits the elements in the domain of "executed"
5179  * in the relative order specified by the leaf node "node".
5180  *
5181  * The relation "executed" maps the outer generated loop iterators
5182  * to the domain elements executed by those iterations.
5183  *
5184  * Simply pass control to generate_inner_level.
5185  * Note that the current build does not refer to any band node, so
5186  * that generate_inner_level will not try to visit the child of
5187  * the leaf node.
5188  *
5189  * If multiple statement instances reach a leaf,
5190  * then they can be executed in any order.
5191  * Group the list of grafts based on shared guards
5192  * such that identical guards are only generated once
5193  * when the list is eventually passed on to isl_ast_graft_list_fuse.
5194  */
build_ast_from_leaf(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5195 static __isl_give isl_ast_graft_list *build_ast_from_leaf(
5196 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5197 	__isl_take isl_union_map *executed)
5198 {
5199 	isl_ast_graft_list *list;
5200 
5201 	isl_schedule_node_free(node);
5202 	list = generate_inner_level(executed, isl_ast_build_copy(build));
5203 	list = isl_ast_graft_list_group_on_guard(list, build);
5204 	isl_ast_build_free(build);
5205 
5206 	return list;
5207 }
5208 
5209 /* Check that the band partial schedule "partial" does not filter out
5210  * any statement instances, as specified by the range of "executed".
5211  */
check_band_schedule_total_on_instances(__isl_keep isl_multi_union_pw_aff * partial,__isl_keep isl_union_map * executed)5212 static isl_stat check_band_schedule_total_on_instances(
5213 	__isl_keep isl_multi_union_pw_aff *partial,
5214 	__isl_keep isl_union_map *executed)
5215 {
5216 	isl_bool subset;
5217 	isl_union_set *domain, *instances;
5218 
5219 	instances = isl_union_map_range(isl_union_map_copy(executed));
5220 	partial = isl_multi_union_pw_aff_copy(partial);
5221 	domain = isl_multi_union_pw_aff_domain(partial);
5222 	subset = isl_union_set_is_subset(instances, domain);
5223 	isl_union_set_free(domain);
5224 	isl_union_set_free(instances);
5225 
5226 	if (subset < 0)
5227 		return isl_stat_error;
5228 	if (!subset)
5229 		isl_die(isl_union_map_get_ctx(executed), isl_error_invalid,
5230 			"band node is not allowed to drop statement instances",
5231 			return isl_stat_error);
5232 	return isl_stat_ok;
5233 }
5234 
5235 /* Generate an AST that visits the elements in the domain of "executed"
5236  * in the relative order specified by the band node "node" and its descendants.
5237  *
5238  * The relation "executed" maps the outer generated loop iterators
5239  * to the domain elements executed by those iterations.
5240  *
5241  * If the band is empty, we continue with its descendants.
5242  * Otherwise, we extend the build and the inverse schedule with
5243  * the additional space/partial schedule and continue generating
5244  * an AST in generate_next_level.
5245  * As soon as we have extended the inverse schedule with the additional
5246  * partial schedule, we look for equalities that may exists between
5247  * the old and the new part.
5248  */
build_ast_from_band(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5249 static __isl_give isl_ast_graft_list *build_ast_from_band(
5250 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5251 	__isl_take isl_union_map *executed)
5252 {
5253 	isl_space *space;
5254 	isl_multi_union_pw_aff *extra;
5255 	isl_union_map *extra_umap;
5256 	isl_ast_graft_list *list;
5257 	isl_size n1, n2;
5258 	isl_size n;
5259 
5260 	n = isl_schedule_node_band_n_member(node);
5261 	if (!build || n < 0 || !executed)
5262 		goto error;
5263 
5264 	if (n == 0)
5265 		return build_ast_from_child(build, node, executed);
5266 
5267 	extra = isl_schedule_node_band_get_partial_schedule(node);
5268 	extra = isl_multi_union_pw_aff_align_params(extra,
5269 				isl_ast_build_get_space(build, 1));
5270 	space = isl_multi_union_pw_aff_get_space(extra);
5271 
5272 	if (check_band_schedule_total_on_instances(extra, executed) < 0)
5273 		executed = isl_union_map_free(executed);
5274 
5275 	extra_umap = isl_union_map_from_multi_union_pw_aff(extra);
5276 	extra_umap = isl_union_map_reverse(extra_umap);
5277 
5278 	executed = isl_union_map_domain_product(executed, extra_umap);
5279 	executed = isl_union_map_detect_equalities(executed);
5280 
5281 	n1 = isl_ast_build_dim(build, isl_dim_param);
5282 	build = isl_ast_build_product(build, space);
5283 	n2 = isl_ast_build_dim(build, isl_dim_param);
5284 	if (n1 < 0 || n2 < 0)
5285 		build = isl_ast_build_free(build);
5286 	else if (n2 > n1)
5287 		isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5288 			"band node is not allowed to introduce new parameters",
5289 			build = isl_ast_build_free(build));
5290 	build = isl_ast_build_set_schedule_node(build, node);
5291 
5292 	list = generate_next_level(executed, build);
5293 
5294 	list = isl_ast_graft_list_unembed(list, 1);
5295 
5296 	return list;
5297 error:
5298 	isl_schedule_node_free(node);
5299 	isl_union_map_free(executed);
5300 	isl_ast_build_free(build);
5301 	return NULL;
5302 }
5303 
5304 /* Hoist a list of grafts (in practice containing a single graft)
5305  * from "sub_build" (which includes extra context information)
5306  * to "build".
5307  *
5308  * In particular, project out all additional parameters introduced
5309  * by the context node from the enforced constraints and the guard
5310  * of the single graft.
5311  */
hoist_out_of_context(__isl_take isl_ast_graft_list * list,__isl_keep isl_ast_build * build,__isl_keep isl_ast_build * sub_build)5312 static __isl_give isl_ast_graft_list *hoist_out_of_context(
5313 	__isl_take isl_ast_graft_list *list, __isl_keep isl_ast_build *build,
5314 	__isl_keep isl_ast_build *sub_build)
5315 {
5316 	isl_ast_graft *graft;
5317 	isl_basic_set *enforced;
5318 	isl_set *guard;
5319 	isl_size n_param, extra_param;
5320 
5321 	n_param = isl_ast_build_dim(build, isl_dim_param);
5322 	extra_param = isl_ast_build_dim(sub_build, isl_dim_param);
5323 	if (n_param < 0 || extra_param < 0)
5324 		return isl_ast_graft_list_free(list);
5325 
5326 	if (extra_param == n_param)
5327 		return list;
5328 
5329 	extra_param -= n_param;
5330 	enforced = isl_ast_graft_list_extract_shared_enforced(list, sub_build);
5331 	enforced = isl_basic_set_project_out(enforced, isl_dim_param,
5332 							n_param, extra_param);
5333 	enforced = isl_basic_set_remove_unknown_divs(enforced);
5334 	guard = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
5335 	guard = isl_set_remove_divs_involving_dims(guard, isl_dim_param,
5336 							n_param, extra_param);
5337 	guard = isl_set_project_out(guard, isl_dim_param, n_param, extra_param);
5338 	guard = isl_set_compute_divs(guard);
5339 	graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
5340 							build, sub_build);
5341 	list = isl_ast_graft_list_from_ast_graft(graft);
5342 
5343 	return list;
5344 }
5345 
5346 /* Generate an AST that visits the elements in the domain of "executed"
5347  * in the relative order specified by the context node "node"
5348  * and its descendants.
5349  *
5350  * The relation "executed" maps the outer generated loop iterators
5351  * to the domain elements executed by those iterations.
5352  *
5353  * The context node may introduce additional parameters as well as
5354  * constraints on the outer schedule dimensions or original parameters.
5355  *
5356  * We add the extra parameters to a new build and the context
5357  * constraints to both the build and (as a single disjunct)
5358  * to the domain of "executed".  Since the context constraints
5359  * are specified in terms of the input schedule, we first need
5360  * to map them to the internal schedule domain.
5361  *
5362  * After constructing the AST from the descendants of "node",
5363  * we combine the list of grafts into a single graft within
5364  * the new build, in order to be able to exploit the additional
5365  * context constraints during this combination.
5366  *
5367  * Additionally, if the current node is the outermost node in
5368  * the schedule tree (apart from the root domain node), we generate
5369  * all pending guards, again to be able to exploit the additional
5370  * context constraints.  We currently do not do this for internal
5371  * context nodes since we may still want to hoist conditions
5372  * to outer AST nodes.
5373  *
5374  * If the context node introduced any new parameters, then they
5375  * are removed from the set of enforced constraints and guard
5376  * in hoist_out_of_context.
5377  */
build_ast_from_context(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5378 static __isl_give isl_ast_graft_list *build_ast_from_context(
5379 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5380 	__isl_take isl_union_map *executed)
5381 {
5382 	isl_set *context;
5383 	isl_space *space;
5384 	isl_multi_aff *internal2input;
5385 	isl_ast_build *sub_build;
5386 	isl_ast_graft_list *list;
5387 	isl_size n;
5388 	isl_size depth;
5389 
5390 	depth = isl_schedule_node_get_tree_depth(node);
5391 	if (depth < 0)
5392 		build = isl_ast_build_free(build);
5393 	space = isl_ast_build_get_space(build, 1);
5394 	context = isl_schedule_node_context_get_context(node);
5395 	context = isl_set_align_params(context, space);
5396 	sub_build = isl_ast_build_copy(build);
5397 	space = isl_set_get_space(context);
5398 	sub_build = isl_ast_build_align_params(sub_build, space);
5399 	internal2input = isl_ast_build_get_internal2input(sub_build);
5400 	context = isl_set_preimage_multi_aff(context, internal2input);
5401 	sub_build = isl_ast_build_restrict_generated(sub_build,
5402 					isl_set_copy(context));
5403 	context = isl_set_from_basic_set(isl_set_simple_hull(context));
5404 	executed = isl_union_map_intersect_domain(executed,
5405 					isl_union_set_from_set(context));
5406 
5407 	list = build_ast_from_child(isl_ast_build_copy(sub_build),
5408 						node, executed);
5409 	n = isl_ast_graft_list_n_ast_graft(list);
5410 	if (n < 0)
5411 		list = isl_ast_graft_list_free(list);
5412 
5413 	list = isl_ast_graft_list_fuse(list, sub_build);
5414 	if (depth == 1)
5415 		list = isl_ast_graft_list_insert_pending_guard_nodes(list,
5416 								sub_build);
5417 	if (n >= 1)
5418 		list = hoist_out_of_context(list, build, sub_build);
5419 
5420 	isl_ast_build_free(build);
5421 	isl_ast_build_free(sub_build);
5422 
5423 	return list;
5424 }
5425 
5426 /* Generate an AST that visits the elements in the domain of "executed"
5427  * in the relative order specified by the expansion node "node" and
5428  * its descendants.
5429  *
5430  * The relation "executed" maps the outer generated loop iterators
5431  * to the domain elements executed by those iterations.
5432  *
5433  * We expand the domain elements by the expansion and
5434  * continue with the descendants of the node.
5435  */
build_ast_from_expansion(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5436 static __isl_give isl_ast_graft_list *build_ast_from_expansion(
5437 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5438 	__isl_take isl_union_map *executed)
5439 {
5440 	isl_union_map *expansion;
5441 	isl_size n1, n2;
5442 
5443 	expansion = isl_schedule_node_expansion_get_expansion(node);
5444 	expansion = isl_union_map_align_params(expansion,
5445 				isl_union_map_get_space(executed));
5446 
5447 	n1 = isl_union_map_dim(executed, isl_dim_param);
5448 	executed = isl_union_map_apply_range(executed, expansion);
5449 	n2 = isl_union_map_dim(executed, isl_dim_param);
5450 	if (n1 < 0 || n2 < 0)
5451 		goto error;
5452 	if (n2 > n1)
5453 		isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5454 			"expansion node is not allowed to introduce "
5455 			"new parameters", goto error);
5456 
5457 	return build_ast_from_child(build, node, executed);
5458 error:
5459 	isl_ast_build_free(build);
5460 	isl_schedule_node_free(node);
5461 	isl_union_map_free(executed);
5462 	return NULL;
5463 }
5464 
5465 /* Generate an AST that visits the elements in the domain of "executed"
5466  * in the relative order specified by the extension node "node" and
5467  * its descendants.
5468  *
5469  * The relation "executed" maps the outer generated loop iterators
5470  * to the domain elements executed by those iterations.
5471  *
5472  * Extend the inverse schedule with the extension applied to current
5473  * set of generated constraints.  Since the extension if formulated
5474  * in terms of the input schedule, it first needs to be transformed
5475  * to refer to the internal schedule.
5476  */
build_ast_from_extension(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5477 static __isl_give isl_ast_graft_list *build_ast_from_extension(
5478 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5479 	__isl_take isl_union_map *executed)
5480 {
5481 	isl_union_set *schedule_domain;
5482 	isl_union_map *extension;
5483 	isl_set *set;
5484 
5485 	set = isl_ast_build_get_generated(build);
5486 	set = isl_set_from_basic_set(isl_set_simple_hull(set));
5487 	schedule_domain = isl_union_set_from_set(set);
5488 
5489 	extension = isl_schedule_node_extension_get_extension(node);
5490 
5491 	extension = isl_union_map_preimage_domain_multi_aff(extension,
5492 			isl_multi_aff_copy(build->internal2input));
5493 	extension = isl_union_map_intersect_domain(extension, schedule_domain);
5494 	extension = isl_ast_build_substitute_values_union_map_domain(build,
5495 								    extension);
5496 	executed = isl_union_map_union(executed, extension);
5497 
5498 	return build_ast_from_child(build, node, executed);
5499 }
5500 
5501 /* Generate an AST that visits the elements in the domain of "executed"
5502  * in the relative order specified by the filter node "node" and
5503  * its descendants.
5504  *
5505  * The relation "executed" maps the outer generated loop iterators
5506  * to the domain elements executed by those iterations.
5507  *
5508  * We simply intersect the iteration domain (i.e., the range of "executed")
5509  * with the filter and continue with the descendants of the node,
5510  * unless the resulting inverse schedule is empty, in which
5511  * case we return an empty list.
5512  *
5513  * If the result of the intersection is equal to the original "executed"
5514  * relation, then keep the original representation since the intersection
5515  * may have unnecessarily broken up the relation into a greater number
5516  * of disjuncts.
5517  */
build_ast_from_filter(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5518 static __isl_give isl_ast_graft_list *build_ast_from_filter(
5519 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5520 	__isl_take isl_union_map *executed)
5521 {
5522 	isl_ctx *ctx;
5523 	isl_union_set *filter;
5524 	isl_union_map *orig;
5525 	isl_ast_graft_list *list;
5526 	int empty;
5527 	isl_bool unchanged;
5528 	isl_size n1, n2;
5529 
5530 	orig = isl_union_map_copy(executed);
5531 	if (!build || !node || !executed)
5532 		goto error;
5533 
5534 	filter = isl_schedule_node_filter_get_filter(node);
5535 	filter = isl_union_set_align_params(filter,
5536 				isl_union_map_get_space(executed));
5537 	n1 = isl_union_map_dim(executed, isl_dim_param);
5538 	executed = isl_union_map_intersect_range(executed, filter);
5539 	n2 = isl_union_map_dim(executed, isl_dim_param);
5540 	if (n1 < 0 || n2 < 0)
5541 		goto error;
5542 	if (n2 > n1)
5543 		isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5544 			"filter node is not allowed to introduce "
5545 			"new parameters", goto error);
5546 
5547 	unchanged = isl_union_map_is_subset(orig, executed);
5548 	empty = isl_union_map_is_empty(executed);
5549 	if (unchanged < 0 || empty < 0)
5550 		goto error;
5551 	if (unchanged) {
5552 		isl_union_map_free(executed);
5553 		return build_ast_from_child(build, node, orig);
5554 	}
5555 	isl_union_map_free(orig);
5556 	if (!empty)
5557 		return build_ast_from_child(build, node, executed);
5558 
5559 	ctx = isl_ast_build_get_ctx(build);
5560 	list = isl_ast_graft_list_alloc(ctx, 0);
5561 	isl_ast_build_free(build);
5562 	isl_schedule_node_free(node);
5563 	isl_union_map_free(executed);
5564 	return list;
5565 error:
5566 	isl_ast_build_free(build);
5567 	isl_schedule_node_free(node);
5568 	isl_union_map_free(executed);
5569 	isl_union_map_free(orig);
5570 	return NULL;
5571 }
5572 
5573 /* Generate an AST that visits the elements in the domain of "executed"
5574  * in the relative order specified by the guard node "node" and
5575  * its descendants.
5576  *
5577  * The relation "executed" maps the outer generated loop iterators
5578  * to the domain elements executed by those iterations.
5579  *
5580  * Ensure that the associated guard is enforced by the outer AST
5581  * constructs by adding it to the guard of the graft.
5582  * Since we know that we will enforce the guard, we can also include it
5583  * in the generated constraints used to construct an AST for
5584  * the descendant nodes.
5585  */
build_ast_from_guard(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5586 static __isl_give isl_ast_graft_list *build_ast_from_guard(
5587 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5588 	__isl_take isl_union_map *executed)
5589 {
5590 	isl_space *space;
5591 	isl_set *guard, *hoisted;
5592 	isl_basic_set *enforced;
5593 	isl_ast_build *sub_build;
5594 	isl_ast_graft *graft;
5595 	isl_ast_graft_list *list;
5596 	isl_size n1, n2, n;
5597 
5598 	space = isl_ast_build_get_space(build, 1);
5599 	guard = isl_schedule_node_guard_get_guard(node);
5600 	n1 = isl_space_dim(space, isl_dim_param);
5601 	guard = isl_set_align_params(guard, space);
5602 	n2 = isl_set_dim(guard, isl_dim_param);
5603 	if (n1 < 0 || n2 < 0)
5604 		guard = isl_set_free(guard);
5605 	else if (n2 > n1)
5606 		isl_die(isl_ast_build_get_ctx(build), isl_error_invalid,
5607 			"guard node is not allowed to introduce "
5608 			"new parameters", guard = isl_set_free(guard));
5609 	guard = isl_set_preimage_multi_aff(guard,
5610 			isl_multi_aff_copy(build->internal2input));
5611 	guard = isl_ast_build_specialize(build, guard);
5612 	guard = isl_set_gist(guard, isl_set_copy(build->generated));
5613 
5614 	sub_build = isl_ast_build_copy(build);
5615 	sub_build = isl_ast_build_restrict_generated(sub_build,
5616 							isl_set_copy(guard));
5617 
5618 	list = build_ast_from_child(isl_ast_build_copy(sub_build),
5619 							node, executed);
5620 
5621 	hoisted = isl_ast_graft_list_extract_hoistable_guard(list, sub_build);
5622 	n = isl_set_n_basic_set(hoisted);
5623 	if (n < 0)
5624 		list = isl_ast_graft_list_free(list);
5625 	if (n > 1)
5626 		list = isl_ast_graft_list_gist_guards(list,
5627 						    isl_set_copy(hoisted));
5628 	guard = isl_set_intersect(guard, hoisted);
5629 	enforced = extract_shared_enforced(list, build);
5630 	graft = isl_ast_graft_alloc_from_children(list, guard, enforced,
5631 						    build, sub_build);
5632 
5633 	isl_ast_build_free(sub_build);
5634 	isl_ast_build_free(build);
5635 	return isl_ast_graft_list_from_ast_graft(graft);
5636 }
5637 
5638 /* Call the before_each_mark callback, if requested by the user.
5639  *
5640  * Return 0 on success and -1 on error.
5641  *
5642  * The caller is responsible for recording the current inverse schedule
5643  * in "build".
5644  */
before_each_mark(__isl_keep isl_id * mark,__isl_keep isl_ast_build * build)5645 static isl_stat before_each_mark(__isl_keep isl_id *mark,
5646 	__isl_keep isl_ast_build *build)
5647 {
5648 	if (!build)
5649 		return isl_stat_error;
5650 	if (!build->before_each_mark)
5651 		return isl_stat_ok;
5652 	return build->before_each_mark(mark, build,
5653 					build->before_each_mark_user);
5654 }
5655 
5656 /* Call the after_each_mark callback, if requested by the user.
5657  *
5658  * The caller is responsible for recording the current inverse schedule
5659  * in "build".
5660  */
after_each_mark(__isl_take isl_ast_graft * graft,__isl_keep isl_ast_build * build)5661 static __isl_give isl_ast_graft *after_each_mark(
5662 	__isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build)
5663 {
5664 	if (!graft || !build)
5665 		return isl_ast_graft_free(graft);
5666 	if (!build->after_each_mark)
5667 		return graft;
5668 	graft->node = build->after_each_mark(graft->node, build,
5669 						build->after_each_mark_user);
5670 	if (!graft->node)
5671 		return isl_ast_graft_free(graft);
5672 	return graft;
5673 }
5674 
5675 
5676 /* Generate an AST that visits the elements in the domain of "executed"
5677  * in the relative order specified by the mark node "node" and
5678  * its descendants.
5679  *
5680  * The relation "executed" maps the outer generated loop iterators
5681  * to the domain elements executed by those iterations.
5682 
5683  * Since we may be calling before_each_mark and after_each_mark
5684  * callbacks, we record the current inverse schedule in the build.
5685  *
5686  * We generate an AST for the child of the mark node, combine
5687  * the graft list into a single graft and then insert the mark
5688  * in the AST of that single graft.
5689  */
build_ast_from_mark(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5690 static __isl_give isl_ast_graft_list *build_ast_from_mark(
5691 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5692 	__isl_take isl_union_map *executed)
5693 {
5694 	isl_id *mark;
5695 	isl_ast_graft *graft;
5696 	isl_ast_graft_list *list;
5697 	isl_size n;
5698 
5699 	build = isl_ast_build_set_executed(build, isl_union_map_copy(executed));
5700 
5701 	mark = isl_schedule_node_mark_get_id(node);
5702 	if (before_each_mark(mark, build) < 0)
5703 		node = isl_schedule_node_free(node);
5704 
5705 	list = build_ast_from_child(isl_ast_build_copy(build), node, executed);
5706 	list = isl_ast_graft_list_fuse(list, build);
5707 	n = isl_ast_graft_list_n_ast_graft(list);
5708 	if (n < 0)
5709 		list = isl_ast_graft_list_free(list);
5710 	if (n == 0) {
5711 		isl_id_free(mark);
5712 	} else {
5713 		graft = isl_ast_graft_list_get_ast_graft(list, 0);
5714 		graft = isl_ast_graft_insert_mark(graft, mark);
5715 		graft = after_each_mark(graft, build);
5716 		list = isl_ast_graft_list_set_ast_graft(list, 0, graft);
5717 	}
5718 	isl_ast_build_free(build);
5719 
5720 	return list;
5721 }
5722 
5723 static __isl_give isl_ast_graft_list *build_ast_from_schedule_node(
5724 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5725 	__isl_take isl_union_map *executed);
5726 
5727 /* Generate an AST that visits the elements in the domain of "executed"
5728  * in the relative order specified by the sequence (or set) node "node" and
5729  * its descendants.
5730  *
5731  * The relation "executed" maps the outer generated loop iterators
5732  * to the domain elements executed by those iterations.
5733  *
5734  * We simply generate an AST for each of the children and concatenate
5735  * the results.
5736  */
build_ast_from_sequence(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5737 static __isl_give isl_ast_graft_list *build_ast_from_sequence(
5738 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5739 	__isl_take isl_union_map *executed)
5740 {
5741 	int i;
5742 	isl_size n;
5743 	isl_ctx *ctx;
5744 	isl_ast_graft_list *list;
5745 
5746 	ctx = isl_ast_build_get_ctx(build);
5747 	list = isl_ast_graft_list_alloc(ctx, 0);
5748 
5749 	n = isl_schedule_node_n_children(node);
5750 	if (n < 0)
5751 		list = isl_ast_graft_list_free(list);
5752 	for (i = 0; i < n; ++i) {
5753 		isl_schedule_node *child;
5754 		isl_ast_graft_list *list_i;
5755 
5756 		child = isl_schedule_node_get_child(node, i);
5757 		list_i = build_ast_from_schedule_node(isl_ast_build_copy(build),
5758 					child, isl_union_map_copy(executed));
5759 		list = isl_ast_graft_list_concat(list, list_i);
5760 	}
5761 	isl_ast_build_free(build);
5762 	isl_schedule_node_free(node);
5763 	isl_union_map_free(executed);
5764 
5765 	return list;
5766 }
5767 
5768 /* Generate an AST that visits the elements in the domain of "executed"
5769  * in the relative order specified by the node "node" and its descendants.
5770  *
5771  * The relation "executed" maps the outer generated loop iterators
5772  * to the domain elements executed by those iterations.
5773  *
5774  * The node types are handled in separate functions.
5775  * Set nodes are currently treated in the same way as sequence nodes.
5776  * The children of a set node may be executed in any order,
5777  * including the order of the children.
5778  */
build_ast_from_schedule_node(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5779 static __isl_give isl_ast_graft_list *build_ast_from_schedule_node(
5780 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5781 	__isl_take isl_union_map *executed)
5782 {
5783 	enum isl_schedule_node_type type;
5784 
5785 	type = isl_schedule_node_get_type(node);
5786 
5787 	switch (type) {
5788 	case isl_schedule_node_error:
5789 		goto error;
5790 	case isl_schedule_node_leaf:
5791 		return build_ast_from_leaf(build, node, executed);
5792 	case isl_schedule_node_band:
5793 		return build_ast_from_band(build, node, executed);
5794 	case isl_schedule_node_context:
5795 		return build_ast_from_context(build, node, executed);
5796 	case isl_schedule_node_domain:
5797 		isl_die(isl_schedule_node_get_ctx(node), isl_error_unsupported,
5798 			"unexpected internal domain node", goto error);
5799 	case isl_schedule_node_expansion:
5800 		return build_ast_from_expansion(build, node, executed);
5801 	case isl_schedule_node_extension:
5802 		return build_ast_from_extension(build, node, executed);
5803 	case isl_schedule_node_filter:
5804 		return build_ast_from_filter(build, node, executed);
5805 	case isl_schedule_node_guard:
5806 		return build_ast_from_guard(build, node, executed);
5807 	case isl_schedule_node_mark:
5808 		return build_ast_from_mark(build, node, executed);
5809 	case isl_schedule_node_sequence:
5810 	case isl_schedule_node_set:
5811 		return build_ast_from_sequence(build, node, executed);
5812 	}
5813 
5814 	isl_die(isl_ast_build_get_ctx(build), isl_error_internal,
5815 		"unhandled type", goto error);
5816 error:
5817 	isl_union_map_free(executed);
5818 	isl_schedule_node_free(node);
5819 	isl_ast_build_free(build);
5820 
5821 	return NULL;
5822 }
5823 
5824 /* Generate an AST that visits the elements in the domain of "executed"
5825  * in the relative order specified by the (single) child of "node" and
5826  * its descendants.
5827  *
5828  * The relation "executed" maps the outer generated loop iterators
5829  * to the domain elements executed by those iterations.
5830  *
5831  * This function is never called on a leaf, set or sequence node,
5832  * so the node always has exactly one child.
5833  */
build_ast_from_child(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node,__isl_take isl_union_map * executed)5834 static __isl_give isl_ast_graft_list *build_ast_from_child(
5835 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node,
5836 	__isl_take isl_union_map *executed)
5837 {
5838 	node = isl_schedule_node_child(node, 0);
5839 	return build_ast_from_schedule_node(build, node, executed);
5840 }
5841 
5842 /* Generate an AST that visits the elements in the domain of the domain
5843  * node "node" in the relative order specified by its descendants.
5844  *
5845  * An initial inverse schedule is created that maps a zero-dimensional
5846  * schedule space to the node domain.
5847  * The input "build" is assumed to have a parametric domain and
5848  * is replaced by the same zero-dimensional schedule space.
5849  *
5850  * We also add some of the parameter constraints in the build domain
5851  * to the executed relation.  Adding these constraints
5852  * allows for an earlier detection of conflicts in some cases.
5853  * However, we do not want to divide the executed relation into
5854  * more disjuncts than necessary.  We therefore approximate
5855  * the constraints on the parameters by a single disjunct set.
5856  */
build_ast_from_domain(__isl_take isl_ast_build * build,__isl_take isl_schedule_node * node)5857 static __isl_give isl_ast_node *build_ast_from_domain(
5858 	__isl_take isl_ast_build *build, __isl_take isl_schedule_node *node)
5859 {
5860 	isl_ctx *ctx;
5861 	isl_union_set *domain, *schedule_domain;
5862 	isl_union_map *executed;
5863 	isl_space *space;
5864 	isl_set *set;
5865 	isl_ast_graft_list *list;
5866 	isl_ast_node *ast;
5867 	int is_params;
5868 
5869 	if (!build)
5870 		goto error;
5871 
5872 	ctx = isl_ast_build_get_ctx(build);
5873 	space = isl_ast_build_get_space(build, 1);
5874 	is_params = isl_space_is_params(space);
5875 	isl_space_free(space);
5876 	if (is_params < 0)
5877 		goto error;
5878 	if (!is_params)
5879 		isl_die(ctx, isl_error_unsupported,
5880 			"expecting parametric initial context", goto error);
5881 
5882 	domain = isl_schedule_node_domain_get_domain(node);
5883 	domain = isl_union_set_coalesce(domain);
5884 
5885 	space = isl_union_set_get_space(domain);
5886 	space = isl_space_set_from_params(space);
5887 	build = isl_ast_build_product(build, space);
5888 
5889 	set = isl_ast_build_get_domain(build);
5890 	set = isl_set_from_basic_set(isl_set_simple_hull(set));
5891 	schedule_domain = isl_union_set_from_set(set);
5892 
5893 	executed = isl_union_map_from_domain_and_range(schedule_domain, domain);
5894 	list = build_ast_from_child(isl_ast_build_copy(build), node, executed);
5895 	ast = isl_ast_node_from_graft_list(list, build);
5896 	isl_ast_build_free(build);
5897 
5898 	return ast;
5899 error:
5900 	isl_schedule_node_free(node);
5901 	isl_ast_build_free(build);
5902 	return NULL;
5903 }
5904 
5905 /* Generate an AST that visits the elements in the domain of "schedule"
5906  * in the relative order specified by the schedule tree.
5907  *
5908  * "build" is an isl_ast_build that has been created using
5909  * isl_ast_build_alloc or isl_ast_build_from_context based
5910  * on a parametric set.
5911  *
5912  * The construction starts at the root node of the schedule,
5913  * which is assumed to be a domain node.
5914  */
isl_ast_build_node_from_schedule(__isl_keep isl_ast_build * build,__isl_take isl_schedule * schedule)5915 __isl_give isl_ast_node *isl_ast_build_node_from_schedule(
5916 	__isl_keep isl_ast_build *build, __isl_take isl_schedule *schedule)
5917 {
5918 	isl_ctx *ctx;
5919 	isl_schedule_node *node;
5920 
5921 	if (!build || !schedule)
5922 		goto error;
5923 
5924 	ctx = isl_ast_build_get_ctx(build);
5925 
5926 	node = isl_schedule_get_root(schedule);
5927 	if (!node)
5928 		goto error;
5929 	isl_schedule_free(schedule);
5930 
5931 	build = isl_ast_build_copy(build);
5932 	build = isl_ast_build_set_single_valued(build, 0);
5933 	if (isl_schedule_node_get_type(node) != isl_schedule_node_domain)
5934 		isl_die(ctx, isl_error_unsupported,
5935 			"expecting root domain node",
5936 			build = isl_ast_build_free(build));
5937 	return build_ast_from_domain(build, node);
5938 error:
5939 	isl_schedule_free(schedule);
5940 	return NULL;
5941 }
5942