1 /* A scheduling optimizer for Graphite 2 Copyright (C) 2012-2016 Free Software Foundation, Inc. 3 Contributed by Tobias Grosser <tobias@grosser.es>. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #define USES_ISL 22 23 #include "config.h" 24 25 #ifdef HAVE_isl 26 27 #include "system.h" 28 #include "coretypes.h" 29 #include "backend.h" 30 #include "cfghooks.h" 31 #include "tree.h" 32 #include "gimple.h" 33 #include "fold-const.h" 34 #include "gimple-iterator.h" 35 #include "tree-ssa-loop.h" 36 #include "cfgloop.h" 37 #include "tree-data-ref.h" 38 #include "params.h" 39 #include "dumpfile.h" 40 #include "graphite.h" 41 42 #ifdef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS 43 /* isl 0.15 or later. */ 44 45 /* get_schedule_for_node_st - Improve schedule for the schedule node. 46 Only Simple loop tiling is considered. */ 47 48 static __isl_give isl_schedule_node * 49 get_schedule_for_node_st (__isl_take isl_schedule_node *node, void *user) 50 { 51 if (user) 52 return node; 53 54 if (isl_schedule_node_get_type (node) != isl_schedule_node_band 55 || isl_schedule_node_n_children (node) != 1) 56 return node; 57 58 isl_space *space = isl_schedule_node_band_get_space (node); 59 unsigned dims = isl_space_dim (space, isl_dim_set); 60 isl_schedule_node *child = isl_schedule_node_get_child (node, 0); 61 isl_schedule_node_type type = isl_schedule_node_get_type (child); 62 isl_space_free (space); 63 isl_schedule_node_free (child); 64 65 if (type != isl_schedule_node_leaf) 66 return node; 67 68 if (dims <= 1 || !isl_schedule_node_band_get_permutable (node)) 69 { 70 if (dump_file && dump_flags) 71 fprintf (dump_file, "not tiled\n"); 72 return node; 73 } 74 75 /* Tile loops. */ 76 space = isl_schedule_node_band_get_space (node); 77 isl_multi_val *sizes = isl_multi_val_zero (space); 78 long tile_size = PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE); 79 isl_ctx *ctx = isl_schedule_node_get_ctx (node); 80 81 for (unsigned i = 0; i < dims; i++) 82 { 83 sizes = isl_multi_val_set_val (sizes, i, 84 isl_val_int_from_si (ctx, tile_size)); 85 if (dump_file && dump_flags) 86 fprintf (dump_file, "tiled by %ld\n", tile_size); 87 } 88 89 node = isl_schedule_node_band_tile (node, sizes); 90 node = isl_schedule_node_child (node, 0); 91 92 return node; 93 } 94 95 static isl_union_set * 96 scop_get_domains (scop_p scop) 97 { 98 int i; 99 poly_bb_p pbb; 100 isl_space *space = isl_set_get_space (scop->param_context); 101 isl_union_set *res = isl_union_set_empty (space); 102 103 FOR_EACH_VEC_ELT (scop->pbbs, i, pbb) 104 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain)); 105 106 return res; 107 } 108 109 /* Compute the schedule for SCOP based on its parameters, domain and set of 110 constraints. Then apply the schedule to SCOP. */ 111 112 static bool 113 optimize_isl (scop_p scop) 114 { 115 int old_max_operations = isl_ctx_get_max_operations (scop->isl_context); 116 int max_operations = PARAM_VALUE (PARAM_MAX_ISL_OPERATIONS); 117 if (max_operations) 118 isl_ctx_set_max_operations (scop->isl_context, max_operations); 119 isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_CONTINUE); 120 121 isl_union_set *domain = scop_get_domains (scop); 122 123 /* Simplify the dependences on the domain. */ 124 scop_get_dependences (scop); 125 isl_union_map *dependences 126 = isl_union_map_gist_domain (isl_union_map_copy (scop->dependence), 127 isl_union_set_copy (domain)); 128 isl_union_map *validity 129 = isl_union_map_gist_range (dependences, isl_union_set_copy (domain)); 130 131 /* FIXME: proximity should not be validity. */ 132 isl_union_map *proximity = isl_union_map_copy (validity); 133 134 isl_schedule_constraints *sc = isl_schedule_constraints_on_domain (domain); 135 sc = isl_schedule_constraints_set_proximity (sc, proximity); 136 sc = isl_schedule_constraints_set_validity (sc, isl_union_map_copy (validity)); 137 sc = isl_schedule_constraints_set_coincidence (sc, validity); 138 139 isl_options_set_schedule_serialize_sccs (scop->isl_context, 0); 140 isl_options_set_schedule_maximize_band_depth (scop->isl_context, 1); 141 isl_options_set_schedule_max_constant_term (scop->isl_context, 20); 142 isl_options_set_schedule_max_coefficient (scop->isl_context, 20); 143 isl_options_set_tile_scale_tile_loops (scop->isl_context, 0); 144 /* Generate loop upper bounds that consist of the current loop iterator, an 145 operator (< or <=) and an expression not involving the iterator. If this 146 option is not set, then the current loop iterator may appear several times 147 in the upper bound. See the isl manual for more details. */ 148 isl_options_set_ast_build_atomic_upper_bound (scop->isl_context, 1); 149 150 scop->transformed_schedule = isl_schedule_constraints_compute_schedule (sc); 151 scop->transformed_schedule = 152 isl_schedule_map_schedule_node_bottom_up (scop->transformed_schedule, 153 get_schedule_for_node_st, NULL); 154 isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_ABORT); 155 156 isl_ctx_reset_operations (scop->isl_context); 157 isl_ctx_set_max_operations (scop->isl_context, old_max_operations); 158 if (!scop->transformed_schedule 159 || isl_ctx_last_error (scop->isl_context) == isl_error_quota) 160 { 161 if (dump_file && dump_flags) 162 fprintf (dump_file, "isl timed out --param max-isl-operations=%d\n", 163 max_operations); 164 return false; 165 } 166 167 gcc_assert (scop->original_schedule); 168 isl_union_map *original = isl_schedule_get_map (scop->original_schedule); 169 isl_union_map *transformed = isl_schedule_get_map (scop->transformed_schedule); 170 bool same_schedule = isl_union_map_is_equal (original, transformed); 171 isl_union_map_free (original); 172 isl_union_map_free (transformed); 173 174 if (same_schedule) 175 { 176 if (dump_file) 177 { 178 fprintf (dump_file, "[scheduler] isl optimized schedule is " 179 "identical to the original schedule.\n"); 180 print_schedule_ast (dump_file, scop->original_schedule, scop); 181 } 182 isl_schedule_free (scop->transformed_schedule); 183 scop->transformed_schedule = isl_schedule_copy (scop->original_schedule); 184 return false; 185 } 186 187 return true; 188 } 189 190 /* Apply graphite transformations to all the basic blocks of SCOP. */ 191 192 bool 193 apply_poly_transforms (scop_p scop) 194 { 195 if (flag_loop_nest_optimize) 196 return optimize_isl (scop); 197 198 if (!flag_graphite_identity && !flag_loop_parallelize_all) 199 return false; 200 201 /* Generate code even if we did not apply any real transformation. 202 This also allows to check the performance for the identity 203 transformation: GIMPLE -> GRAPHITE -> GIMPLE. */ 204 gcc_assert (scop->original_schedule); 205 scop->transformed_schedule = isl_schedule_copy (scop->original_schedule); 206 return true; 207 } 208 209 #else 210 211 /* get_tile_map - Create a map that describes a n-dimensonal tiling. 212 213 get_tile_map creates a map from a n-dimensional scattering space into an 214 2*n-dimensional scattering space. The map describes a rectangular tiling. 215 216 Example: 217 SCHEDULE_DIMENSIONS = 2, PARAMETER_DIMENSIONS = 1, TILE_SIZE = 32 218 219 tile_map := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: 220 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and 221 t1 % 32 = 0 and t1 <= s1 < t1 + 32} 222 223 Before tiling: 224 225 for (i = 0; i < N; i++) 226 for (j = 0; j < M; j++) 227 S(i,j) 228 229 After tiling: 230 231 for (t_i = 0; t_i < N; i+=32) 232 for (t_j = 0; t_j < M; j+=32) 233 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 234 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0 235 S(i,j) 236 */ 237 238 static isl_basic_map * 239 get_tile_map (isl_ctx *ctx, int schedule_dimensions, int tile_size) 240 { 241 /* We construct 242 243 tile_map := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: 244 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and 245 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} 246 247 and project out the auxilary dimensions a0 and a1. */ 248 isl_space *space 249 = isl_space_alloc (ctx, 0, schedule_dimensions, schedule_dimensions * 3); 250 isl_basic_map *tile_map = isl_basic_map_universe (isl_space_copy (space)); 251 252 isl_local_space *local_space = isl_local_space_from_space (space); 253 254 for (int x = 0; x < schedule_dimensions; x++) 255 { 256 int sX = x; 257 int tX = x; 258 int pX = schedule_dimensions + x; 259 int aX = 2 * schedule_dimensions + x; 260 261 isl_constraint *c; 262 263 /* sX = aX * tile_size; */ 264 c = isl_equality_alloc (isl_local_space_copy (local_space)); 265 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1); 266 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tile_size); 267 tile_map = isl_basic_map_add_constraint (tile_map, c); 268 269 /* pX = sX; */ 270 c = isl_equality_alloc (isl_local_space_copy (local_space)); 271 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1); 272 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1); 273 tile_map = isl_basic_map_add_constraint (tile_map, c); 274 275 /* tX <= pX */ 276 c = isl_inequality_alloc (isl_local_space_copy (local_space)); 277 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1); 278 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1); 279 tile_map = isl_basic_map_add_constraint (tile_map, c); 280 281 /* pX <= tX + (tile_size - 1) */ 282 c = isl_inequality_alloc (isl_local_space_copy (local_space)); 283 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1); 284 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1); 285 isl_constraint_set_constant_si (c, tile_size - 1); 286 tile_map = isl_basic_map_add_constraint (tile_map, c); 287 } 288 289 /* Project out auxiliary dimensions. 290 291 The auxiliary dimensions are transformed into existentially quantified 292 ones. 293 This reduces the number of visible scattering dimensions and allows isl 294 to produces better code. */ 295 tile_map = 296 isl_basic_map_project_out (tile_map, isl_dim_out, 297 2 * schedule_dimensions, schedule_dimensions); 298 isl_local_space_free (local_space); 299 return tile_map; 300 } 301 302 /* get_schedule_for_band - Get the schedule for this BAND. 303 304 Polly applies transformations like tiling on top of the isl calculated 305 value. 306 This can influence the number of scheduling dimension. The number of 307 schedule dimensions is returned in DIMENSIONS. */ 308 309 static isl_union_map * 310 get_schedule_for_band (isl_band *band, int *dimensions) 311 { 312 isl_union_map *partial_schedule; 313 isl_ctx *ctx; 314 isl_space *space; 315 isl_basic_map *tile_map; 316 isl_union_map *tile_umap; 317 318 partial_schedule = isl_band_get_partial_schedule (band); 319 *dimensions = isl_band_n_member (band); 320 321 /* It does not make any sense to tile a band with just one dimension. */ 322 if (*dimensions == 1) 323 { 324 if (dump_file && dump_flags) 325 fprintf (dump_file, "not tiled\n"); 326 return partial_schedule; 327 } 328 329 if (dump_file && dump_flags) 330 fprintf (dump_file, "tiled by %d\n", 331 PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE)); 332 333 ctx = isl_union_map_get_ctx (partial_schedule); 334 space = isl_union_map_get_space (partial_schedule); 335 336 tile_map = get_tile_map (ctx, *dimensions, 337 PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE)); 338 tile_umap = isl_union_map_from_map (isl_map_from_basic_map (tile_map)); 339 tile_umap = isl_union_map_align_params (tile_umap, space); 340 tile_umap = isl_union_map_coalesce (tile_umap); 341 *dimensions = 2 * *dimensions; 342 343 return isl_union_map_apply_range (partial_schedule, tile_umap); 344 } 345 346 347 /* get_schedule_for_band_list - Get the scheduling map for a list of bands. 348 349 We walk recursively the forest of bands to combine the schedules of the 350 individual bands to the overall schedule. In case tiling is requested, 351 the individual bands are tiled. */ 352 353 static isl_union_map * 354 get_schedule_for_band_list (isl_band_list *band_list) 355 { 356 int num_bands, i; 357 isl_union_map *schedule; 358 isl_ctx *ctx; 359 360 ctx = isl_band_list_get_ctx (band_list); 361 num_bands = isl_band_list_n_band (band_list); 362 schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0)); 363 364 for (i = 0; i < num_bands; i++) 365 { 366 isl_band *band; 367 isl_union_map *partial_schedule; 368 int schedule_dimensions; 369 isl_space *space; 370 371 band = isl_band_list_get_band (band_list, i); 372 partial_schedule = get_schedule_for_band (band, &schedule_dimensions); 373 space = isl_union_map_get_space (partial_schedule); 374 375 if (isl_band_has_children (band)) 376 { 377 isl_band_list *children = isl_band_get_children (band); 378 isl_union_map *suffixSchedule 379 = get_schedule_for_band_list (children); 380 partial_schedule 381 = isl_union_map_flat_range_product (partial_schedule, 382 suffixSchedule); 383 isl_band_list_free (children); 384 } 385 386 schedule = isl_union_map_union (schedule, partial_schedule); 387 388 isl_band_free (band); 389 isl_space_free (space); 390 } 391 392 return isl_union_map_coalesce (schedule); 393 } 394 395 static isl_union_map * 396 get_schedule_map (isl_schedule *schedule) 397 { 398 isl_band_list *band_list = isl_schedule_get_band_forest (schedule); 399 isl_union_map *schedule_map = get_schedule_for_band_list (band_list); 400 isl_band_list_free (band_list); 401 return schedule_map; 402 } 403 404 static isl_stat 405 get_single_map (__isl_take isl_map *map, void *user) 406 { 407 isl_map **single_map = (isl_map **)user; 408 *single_map = map; 409 return isl_stat_ok; 410 } 411 412 static void 413 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map) 414 { 415 int i; 416 poly_bb_p pbb; 417 418 FOR_EACH_VEC_ELT (scop->pbbs, i, pbb) 419 { 420 isl_set *domain = isl_set_copy (pbb->domain); 421 isl_map *stmt_schedule; 422 423 isl_union_map *stmt_band 424 = isl_union_map_intersect_domain (isl_union_map_copy (schedule_map), 425 isl_union_set_from_set (domain)); 426 stmt_band = isl_union_map_coalesce (stmt_band); 427 isl_union_map_foreach_map (stmt_band, get_single_map, &stmt_schedule); 428 isl_map_free (pbb->transformed); 429 pbb->transformed = isl_map_coalesce (stmt_schedule); 430 isl_union_map_free (stmt_band); 431 } 432 } 433 434 static isl_union_set * 435 scop_get_domains (scop_p scop) 436 { 437 int i; 438 poly_bb_p pbb; 439 isl_space *space = isl_set_get_space (scop->param_context); 440 isl_union_set *res = isl_union_set_empty (space); 441 442 FOR_EACH_VEC_ELT (scop->pbbs, i, pbb) 443 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain)); 444 445 return res; 446 } 447 448 /* Compute the schedule for SCOP based on its parameters, domain and set of 449 constraints. Then apply the schedule to SCOP. */ 450 451 static bool 452 optimize_isl (scop_p scop) 453 { 454 int old_max_operations = isl_ctx_get_max_operations (scop->isl_context); 455 int max_operations = PARAM_VALUE (PARAM_MAX_ISL_OPERATIONS); 456 if (max_operations) 457 isl_ctx_set_max_operations (scop->isl_context, max_operations); 458 isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_CONTINUE); 459 460 isl_union_set *domain = scop_get_domains (scop); 461 scop_get_dependences (scop); 462 scop->dependence 463 = isl_union_map_gist_domain (scop->dependence, isl_union_set_copy (domain)); 464 scop->dependence 465 = isl_union_map_gist_range (scop->dependence, isl_union_set_copy (domain)); 466 isl_union_map *validity = isl_union_map_copy (scop->dependence); 467 isl_union_map *proximity = isl_union_map_copy (validity); 468 469 isl_options_set_schedule_fuse (scop->isl_context, ISL_SCHEDULE_FUSE_MIN); 470 isl_schedule *schedule 471 = isl_union_set_compute_schedule (domain, validity, proximity); 472 473 isl_options_set_on_error (scop->isl_context, ISL_ON_ERROR_ABORT); 474 475 isl_ctx_reset_operations (scop->isl_context); 476 isl_ctx_set_max_operations (scop->isl_context, old_max_operations); 477 if (!schedule || isl_ctx_last_error (scop->isl_context) == isl_error_quota) 478 { 479 if (dump_file && dump_flags) 480 { 481 if (!schedule) 482 fprintf (dump_file, "isl did not return any schedule.\n"); 483 else 484 fprintf (dump_file, "isl timed out --param max-isl-operations=%d\n", 485 max_operations); 486 } 487 488 if (schedule) 489 isl_schedule_free (schedule); 490 return false; 491 } 492 493 scop->schedule = schedule; 494 495 isl_union_map *schedule_map = get_schedule_map (schedule); 496 apply_schedule_map_to_scop (scop, schedule_map); 497 isl_union_map_free (schedule_map); 498 499 if (dump_file) 500 { 501 fprintf (dump_file, "isl end schedule:\n"); 502 print_isl_schedule (dump_file, scop->schedule); 503 } 504 505 return true; 506 } 507 508 /* Apply graphite transformations to all the basic blocks of SCOP. */ 509 510 bool 511 apply_poly_transforms (scop_p scop) 512 { 513 if (flag_loop_nest_optimize) 514 return optimize_isl (scop); 515 516 if (!flag_graphite_identity && !flag_loop_parallelize_all) 517 return false; 518 519 /* Generate code even if we did not apply any real transformation. 520 This also allows to check the performance for the identity 521 transformation: GIMPLE -> GRAPHITE -> GIMPLE. */ 522 return true; 523 } 524 525 #endif /* HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS */ 526 527 #endif /* HAVE_isl */ 528