xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/graphite-optimize-isl.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /* A scheduling optimizer for Graphite
2    Copyright (C) 2012-2016 Free Software Foundation, Inc.
3    Contributed by Tobias Grosser <tobias@grosser.es>.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define USES_ISL
22 
23 #include "config.h"
24 
25 #ifdef HAVE_isl
26 
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "cfghooks.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "fold-const.h"
34 #include "gimple-iterator.h"
35 #include "tree-ssa-loop.h"
36 #include "cfgloop.h"
37 #include "tree-data-ref.h"
38 #include "params.h"
39 #include "dumpfile.h"
40 #include "graphite.h"
41 
42 #ifdef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS
43 /* isl 0.15 or later.  */
44 
45 /* get_schedule_for_node_st - Improve schedule for the schedule node.
46    Only Simple loop tiling is considered.  */
47 
48 static __isl_give isl_schedule_node *
49 get_schedule_for_node_st (__isl_take isl_schedule_node *node, void *user)
50 {
51   if (user)
52     return node;
53 
54   if (isl_schedule_node_get_type (node) != isl_schedule_node_band
55       || isl_schedule_node_n_children (node) != 1)
56     return node;
57 
58   isl_space *space = isl_schedule_node_band_get_space (node);
59   unsigned dims = isl_space_dim (space, isl_dim_set);
60   isl_schedule_node *child = isl_schedule_node_get_child (node, 0);
61   isl_schedule_node_type type = isl_schedule_node_get_type (child);
62   isl_space_free (space);
63   isl_schedule_node_free (child);
64 
65   if (type != isl_schedule_node_leaf)
66     return node;
67 
68   if (dims <= 1 || !isl_schedule_node_band_get_permutable (node))
69     {
70       if (dump_file && dump_flags)
71 	fprintf (dump_file, "not tiled\n");
72       return node;
73     }
74 
75   /* Tile loops.  */
76   space = isl_schedule_node_band_get_space (node);
77   isl_multi_val *sizes = isl_multi_val_zero (space);
78   long tile_size = PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE);
79   isl_ctx *ctx = isl_schedule_node_get_ctx (node);
80 
81   for (unsigned i = 0; i < dims; i++)
82     {
83       sizes = isl_multi_val_set_val (sizes, i,
84 				     isl_val_int_from_si (ctx, tile_size));
85       if (dump_file && dump_flags)
86 	fprintf (dump_file, "tiled by %ld\n", tile_size);
87     }
88 
89   node = isl_schedule_node_band_tile (node, sizes);
90   node = isl_schedule_node_child (node, 0);
91 
92   return node;
93 }
94 
95 static isl_union_set *
96 scop_get_domains (scop_p scop)
97 {
98   int i;
99   poly_bb_p pbb;
100   isl_space *space = isl_set_get_space (scop->param_context);
101   isl_union_set *res = isl_union_set_empty (space);
102 
103   FOR_EACH_VEC_ELT (scop->pbbs, i, pbb)
104     res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
105 
106   return res;
107 }
108 
109 /* Compute the schedule for SCOP based on its parameters, domain and set of
110    constraints.  Then apply the schedule to SCOP.  */
111 
112 static bool
113 optimize_isl (scop_p scop)
114 {
115   int old_max_operations = isl_ctx_get_max_operations (scop->isl_context);
116   int max_operations = PARAM_VALUE (PARAM_MAX_ISL_OPERATIONS);
117   if (max_operations)
118     isl_ctx_set_max_operations (scop->isl_context, max_operations);
119   isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_CONTINUE);
120 
121   isl_union_set *domain = scop_get_domains (scop);
122 
123   /* Simplify the dependences on the domain.  */
124   scop_get_dependences (scop);
125   isl_union_map *dependences
126     = isl_union_map_gist_domain (isl_union_map_copy (scop->dependence),
127 				 isl_union_set_copy (domain));
128   isl_union_map *validity
129     = isl_union_map_gist_range (dependences, isl_union_set_copy (domain));
130 
131   /* FIXME: proximity should not be validity.  */
132   isl_union_map *proximity = isl_union_map_copy (validity);
133 
134   isl_schedule_constraints *sc = isl_schedule_constraints_on_domain (domain);
135   sc = isl_schedule_constraints_set_proximity (sc, proximity);
136   sc = isl_schedule_constraints_set_validity (sc, isl_union_map_copy (validity));
137   sc = isl_schedule_constraints_set_coincidence (sc, validity);
138 
139   isl_options_set_schedule_serialize_sccs (scop->isl_context, 0);
140   isl_options_set_schedule_maximize_band_depth (scop->isl_context, 1);
141   isl_options_set_schedule_max_constant_term (scop->isl_context, 20);
142   isl_options_set_schedule_max_coefficient (scop->isl_context, 20);
143   isl_options_set_tile_scale_tile_loops (scop->isl_context, 0);
144   /* Generate loop upper bounds that consist of the current loop iterator, an
145      operator (< or <=) and an expression not involving the iterator.  If this
146      option is not set, then the current loop iterator may appear several times
147      in the upper bound.  See the isl manual for more details.  */
148   isl_options_set_ast_build_atomic_upper_bound (scop->isl_context, 1);
149 
150   scop->transformed_schedule = isl_schedule_constraints_compute_schedule (sc);
151   scop->transformed_schedule =
152     isl_schedule_map_schedule_node_bottom_up (scop->transformed_schedule,
153 					      get_schedule_for_node_st, NULL);
154   isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_ABORT);
155 
156   isl_ctx_reset_operations (scop->isl_context);
157   isl_ctx_set_max_operations (scop->isl_context, old_max_operations);
158   if (!scop->transformed_schedule
159       || isl_ctx_last_error (scop->isl_context) == isl_error_quota)
160     {
161       if (dump_file && dump_flags)
162 	fprintf (dump_file, "isl timed out --param max-isl-operations=%d\n",
163 		 max_operations);
164       return false;
165     }
166 
167   gcc_assert (scop->original_schedule);
168   isl_union_map *original = isl_schedule_get_map (scop->original_schedule);
169   isl_union_map *transformed = isl_schedule_get_map (scop->transformed_schedule);
170   bool same_schedule = isl_union_map_is_equal (original, transformed);
171   isl_union_map_free (original);
172   isl_union_map_free (transformed);
173 
174   if (same_schedule)
175     {
176       if (dump_file)
177 	{
178 	  fprintf (dump_file, "[scheduler] isl optimized schedule is "
179 		   "identical to the original schedule.\n");
180 	  print_schedule_ast (dump_file, scop->original_schedule, scop);
181 	}
182       isl_schedule_free (scop->transformed_schedule);
183       scop->transformed_schedule = isl_schedule_copy (scop->original_schedule);
184       return false;
185     }
186 
187   return true;
188 }
189 
190 /* Apply graphite transformations to all the basic blocks of SCOP.  */
191 
192 bool
193 apply_poly_transforms (scop_p scop)
194 {
195   if (flag_loop_nest_optimize)
196     return optimize_isl (scop);
197 
198   if (!flag_graphite_identity && !flag_loop_parallelize_all)
199     return false;
200 
201   /* Generate code even if we did not apply any real transformation.
202      This also allows to check the performance for the identity
203      transformation: GIMPLE -> GRAPHITE -> GIMPLE.  */
204   gcc_assert (scop->original_schedule);
205   scop->transformed_schedule = isl_schedule_copy (scop->original_schedule);
206   return true;
207 }
208 
209 #else
210 
211 /* get_tile_map - Create a map that describes a n-dimensonal tiling.
212 
213    get_tile_map creates a map from a n-dimensional scattering space into an
214    2*n-dimensional scattering space. The map describes a rectangular tiling.
215 
216    Example:
217      SCHEDULE_DIMENSIONS = 2, PARAMETER_DIMENSIONS = 1, TILE_SIZE = 32
218 
219     tile_map := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
220 			 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
221 			 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
222 
223    Before tiling:
224 
225    for (i = 0; i < N; i++)
226      for (j = 0; j < M; j++)
227 	S(i,j)
228 
229    After tiling:
230 
231    for (t_i = 0; t_i < N; i+=32)
232      for (t_j = 0; t_j < M; j+=32)
233 	for (i = t_i; i < min(t_i + 32, N); i++)  | Unknown that N % 32 = 0
234 	  for (j = t_j; j < t_j + 32; j++)        |   Known that M % 32 = 0
235 	    S(i,j)
236   */
237 
238 static isl_basic_map *
239 get_tile_map (isl_ctx *ctx, int schedule_dimensions, int tile_size)
240 {
241   /* We construct
242 
243      tile_map := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
244 			s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
245 			s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
246 
247      and project out the auxilary dimensions a0 and a1.  */
248   isl_space *space
249     = isl_space_alloc (ctx, 0, schedule_dimensions, schedule_dimensions * 3);
250   isl_basic_map *tile_map = isl_basic_map_universe (isl_space_copy (space));
251 
252   isl_local_space *local_space = isl_local_space_from_space (space);
253 
254   for (int x = 0; x < schedule_dimensions; x++)
255     {
256       int sX = x;
257       int tX = x;
258       int pX = schedule_dimensions + x;
259       int aX = 2 * schedule_dimensions + x;
260 
261       isl_constraint *c;
262 
263       /* sX = aX * tile_size; */
264       c = isl_equality_alloc (isl_local_space_copy (local_space));
265       isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
266       isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tile_size);
267       tile_map = isl_basic_map_add_constraint (tile_map, c);
268 
269       /* pX = sX; */
270       c = isl_equality_alloc (isl_local_space_copy (local_space));
271       isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
272       isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
273       tile_map = isl_basic_map_add_constraint (tile_map, c);
274 
275       /* tX <= pX */
276       c = isl_inequality_alloc (isl_local_space_copy (local_space));
277       isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
278       isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
279       tile_map = isl_basic_map_add_constraint (tile_map, c);
280 
281       /* pX <= tX + (tile_size - 1) */
282       c = isl_inequality_alloc (isl_local_space_copy (local_space));
283       isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
284       isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
285       isl_constraint_set_constant_si (c, tile_size - 1);
286       tile_map = isl_basic_map_add_constraint (tile_map, c);
287     }
288 
289   /* Project out auxiliary dimensions.
290 
291      The auxiliary dimensions are transformed into existentially quantified
292      ones.
293      This reduces the number of visible scattering dimensions and allows isl
294      to produces better code.  */
295   tile_map =
296       isl_basic_map_project_out (tile_map, isl_dim_out,
297 				 2 * schedule_dimensions, schedule_dimensions);
298   isl_local_space_free (local_space);
299   return tile_map;
300 }
301 
302 /* get_schedule_for_band - Get the schedule for this BAND.
303 
304    Polly applies transformations like tiling on top of the isl calculated
305    value.
306    This can influence the number of scheduling dimension. The number of
307    schedule dimensions is returned in DIMENSIONS.  */
308 
309 static isl_union_map *
310 get_schedule_for_band (isl_band *band, int *dimensions)
311 {
312   isl_union_map *partial_schedule;
313   isl_ctx *ctx;
314   isl_space *space;
315   isl_basic_map *tile_map;
316   isl_union_map *tile_umap;
317 
318   partial_schedule = isl_band_get_partial_schedule (band);
319   *dimensions = isl_band_n_member (band);
320 
321   /* It does not make any sense to tile a band with just one dimension.  */
322   if (*dimensions == 1)
323     {
324       if (dump_file && dump_flags)
325 	fprintf (dump_file, "not tiled\n");
326       return partial_schedule;
327     }
328 
329   if (dump_file && dump_flags)
330     fprintf (dump_file, "tiled by %d\n",
331 	     PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE));
332 
333   ctx = isl_union_map_get_ctx (partial_schedule);
334   space = isl_union_map_get_space (partial_schedule);
335 
336   tile_map = get_tile_map (ctx, *dimensions,
337 			   PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE));
338   tile_umap = isl_union_map_from_map (isl_map_from_basic_map (tile_map));
339   tile_umap = isl_union_map_align_params (tile_umap, space);
340   tile_umap = isl_union_map_coalesce (tile_umap);
341   *dimensions = 2 * *dimensions;
342 
343   return isl_union_map_apply_range (partial_schedule, tile_umap);
344 }
345 
346 
347 /* get_schedule_for_band_list - Get the scheduling map for a list of bands.
348 
349    We walk recursively the forest of bands to combine the schedules of the
350    individual bands to the overall schedule.  In case tiling is requested,
351    the individual bands are tiled.  */
352 
353 static isl_union_map *
354 get_schedule_for_band_list (isl_band_list *band_list)
355 {
356   int num_bands, i;
357   isl_union_map *schedule;
358   isl_ctx *ctx;
359 
360   ctx = isl_band_list_get_ctx (band_list);
361   num_bands = isl_band_list_n_band (band_list);
362   schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
363 
364   for (i = 0; i < num_bands; i++)
365     {
366       isl_band *band;
367       isl_union_map *partial_schedule;
368       int schedule_dimensions;
369       isl_space *space;
370 
371       band = isl_band_list_get_band (band_list, i);
372       partial_schedule = get_schedule_for_band (band, &schedule_dimensions);
373       space = isl_union_map_get_space (partial_schedule);
374 
375       if (isl_band_has_children (band))
376 	{
377 	  isl_band_list *children = isl_band_get_children (band);
378 	  isl_union_map *suffixSchedule
379 	    = get_schedule_for_band_list (children);
380 	  partial_schedule
381 	    = isl_union_map_flat_range_product (partial_schedule,
382 						suffixSchedule);
383 	  isl_band_list_free (children);
384 	}
385 
386       schedule = isl_union_map_union (schedule, partial_schedule);
387 
388       isl_band_free (band);
389       isl_space_free (space);
390     }
391 
392   return isl_union_map_coalesce (schedule);
393 }
394 
395 static isl_union_map *
396 get_schedule_map (isl_schedule *schedule)
397 {
398   isl_band_list *band_list = isl_schedule_get_band_forest (schedule);
399   isl_union_map *schedule_map = get_schedule_for_band_list (band_list);
400   isl_band_list_free (band_list);
401   return schedule_map;
402 }
403 
404 static isl_stat
405 get_single_map (__isl_take isl_map *map, void *user)
406 {
407   isl_map **single_map = (isl_map **)user;
408   *single_map = map;
409   return isl_stat_ok;
410 }
411 
412 static void
413 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
414 {
415   int i;
416   poly_bb_p pbb;
417 
418   FOR_EACH_VEC_ELT (scop->pbbs, i, pbb)
419     {
420       isl_set *domain = isl_set_copy (pbb->domain);
421       isl_map *stmt_schedule;
422 
423       isl_union_map *stmt_band
424 	= isl_union_map_intersect_domain (isl_union_map_copy (schedule_map),
425 					  isl_union_set_from_set (domain));
426       stmt_band = isl_union_map_coalesce (stmt_band);
427       isl_union_map_foreach_map (stmt_band, get_single_map, &stmt_schedule);
428       isl_map_free (pbb->transformed);
429       pbb->transformed = isl_map_coalesce (stmt_schedule);
430       isl_union_map_free (stmt_band);
431     }
432 }
433 
434 static isl_union_set *
435 scop_get_domains (scop_p scop)
436 {
437   int i;
438   poly_bb_p pbb;
439   isl_space *space = isl_set_get_space (scop->param_context);
440   isl_union_set *res = isl_union_set_empty (space);
441 
442   FOR_EACH_VEC_ELT (scop->pbbs, i, pbb)
443     res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
444 
445   return res;
446 }
447 
448 /* Compute the schedule for SCOP based on its parameters, domain and set of
449    constraints.  Then apply the schedule to SCOP.  */
450 
451 static bool
452 optimize_isl (scop_p scop)
453 {
454   int old_max_operations = isl_ctx_get_max_operations (scop->isl_context);
455   int max_operations = PARAM_VALUE (PARAM_MAX_ISL_OPERATIONS);
456   if (max_operations)
457     isl_ctx_set_max_operations (scop->isl_context, max_operations);
458   isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_CONTINUE);
459 
460   isl_union_set *domain = scop_get_domains (scop);
461   scop_get_dependences (scop);
462   scop->dependence
463     = isl_union_map_gist_domain (scop->dependence, isl_union_set_copy (domain));
464   scop->dependence
465     = isl_union_map_gist_range (scop->dependence, isl_union_set_copy (domain));
466   isl_union_map *validity = isl_union_map_copy (scop->dependence);
467   isl_union_map *proximity = isl_union_map_copy (validity);
468 
469   isl_options_set_schedule_fuse (scop->isl_context, ISL_SCHEDULE_FUSE_MIN);
470   isl_schedule *schedule
471     = isl_union_set_compute_schedule (domain, validity, proximity);
472 
473   isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_ABORT);
474 
475   isl_ctx_reset_operations (scop->isl_context);
476   isl_ctx_set_max_operations (scop->isl_context, old_max_operations);
477   if (!schedule || isl_ctx_last_error (scop->isl_context) == isl_error_quota)
478     {
479       if (dump_file && dump_flags)
480 	{
481 	  if (!schedule)
482 	    fprintf (dump_file, "isl did not return any schedule.\n");
483 	  else
484 	    fprintf (dump_file, "isl timed out --param max-isl-operations=%d\n",
485 		     max_operations);
486 	}
487 
488       if (schedule)
489 	isl_schedule_free (schedule);
490       return false;
491     }
492 
493   scop->schedule = schedule;
494 
495   isl_union_map *schedule_map = get_schedule_map (schedule);
496   apply_schedule_map_to_scop (scop, schedule_map);
497   isl_union_map_free (schedule_map);
498 
499   if (dump_file)
500     {
501       fprintf (dump_file, "isl end schedule:\n");
502       print_isl_schedule (dump_file, scop->schedule);
503     }
504 
505   return true;
506 }
507 
508 /* Apply graphite transformations to all the basic blocks of SCOP.  */
509 
510 bool
511 apply_poly_transforms (scop_p scop)
512 {
513   if (flag_loop_nest_optimize)
514     return optimize_isl (scop);
515 
516   if (!flag_graphite_identity && !flag_loop_parallelize_all)
517     return false;
518 
519   /* Generate code even if we did not apply any real transformation.
520      This also allows to check the performance for the identity
521      transformation: GIMPLE -> GRAPHITE -> GIMPLE.  */
522   return true;
523 }
524 
525 #endif /* HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS */
526 
527 #endif /* HAVE_isl */
528