1 /* A scheduling optimizer for Graphite 2 Copyright (C) 2012-2013 Free Software Foundation, Inc. 3 Contributed by Tobias Grosser <tobias@grosser.es>. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 23 #ifdef HAVE_cloog 24 #include <isl/set.h> 25 #include <isl/map.h> 26 #include <isl/union_map.h> 27 #include <isl/schedule.h> 28 #include <isl/band.h> 29 #include <isl/aff.h> 30 #include <isl/options.h> 31 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE 32 #include <isl/deprecated/int.h> 33 #include <isl/deprecated/aff_int.h> 34 #endif 35 #endif 36 37 #include "system.h" 38 #include "coretypes.h" 39 #include "tree-flow.h" 40 #include "dumpfile.h" 41 #include "cfgloop.h" 42 #include "tree-chrec.h" 43 #include "tree-data-ref.h" 44 #include "tree-scalar-evolution.h" 45 #include "sese.h" 46 47 #ifdef HAVE_cloog 48 #include "graphite-poly.h" 49 50 static isl_union_set * 51 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) 52 { 53 int i; 54 poly_bb_p pbb; 55 isl_space *space = isl_set_get_space (scop->context); 56 isl_union_set *res = isl_union_set_empty (space); 57 58 FOR_EACH_VEC_ELT (scop->bbs, i, pbb) 59 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain)); 60 61 return res; 62 } 63 64 static isl_union_map * 65 scop_get_dependences (scop_p scop) 66 { 67 isl_union_map *dependences; 68 69 if (!scop->must_raw) 70 compute_deps (scop, SCOP_BBS (scop), 71 &scop->must_raw, &scop->may_raw, 72 &scop->must_raw_no_source, &scop->may_raw_no_source, 73 &scop->must_war, &scop->may_war, 74 &scop->must_war_no_source, &scop->may_war_no_source, 75 &scop->must_waw, &scop->may_waw, 76 &scop->must_waw_no_source, &scop->may_waw_no_source); 77 78 dependences = isl_union_map_copy (scop->must_raw); 79 dependences = isl_union_map_union (dependences, 80 isl_union_map_copy (scop->must_war)); 81 dependences = isl_union_map_union (dependences, 82 isl_union_map_copy (scop->must_waw)); 83 dependences = isl_union_map_union (dependences, 84 isl_union_map_copy (scop->may_raw)); 85 dependences = isl_union_map_union (dependences, 86 isl_union_map_copy (scop->may_war)); 87 dependences = isl_union_map_union (dependences, 88 isl_union_map_copy (scop->may_waw)); 89 90 return dependences; 91 } 92 93 /* getTileMap - Create a map that describes a n-dimensonal tiling. 94 95 getTileMap creates a map from a n-dimensional scattering space into an 96 2*n-dimensional scattering space. The map describes a rectangular tiling. 97 98 Example: 99 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32 100 101 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: 102 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and 103 t1 % 32 = 0 and t1 <= s1 < t1 + 32} 104 105 Before tiling: 106 107 for (i = 0; i < N; i++) 108 for (j = 0; j < M; j++) 109 S(i,j) 110 111 After tiling: 112 113 for (t_i = 0; t_i < N; i+=32) 114 for (t_j = 0; t_j < M; j+=32) 115 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 116 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0 117 S(i,j) 118 */ 119 120 static isl_basic_map * 121 getTileMap(isl_ctx *ctx, int scheduleDimensions, int tileSize) 122 { 123 int x; 124 /* We construct 125 126 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: 127 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and 128 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} 129 130 and project out the auxilary dimensions a0 and a1. */ 131 isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions, 132 scheduleDimensions * 3); 133 isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space)); 134 135 isl_local_space *LocalSpace = isl_local_space_from_space(Space); 136 137 for (x = 0; x < scheduleDimensions; x++) 138 { 139 int sX = x; 140 int tX = x; 141 int pX = scheduleDimensions + x; 142 int aX = 2 * scheduleDimensions + x; 143 144 isl_constraint *c; 145 146 /* sX = aX * tileSize; */ 147 c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); 148 isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1); 149 isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize); 150 tileMap = isl_basic_map_add_constraint(tileMap, c); 151 152 /* pX = sX; */ 153 c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); 154 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1); 155 isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1); 156 tileMap = isl_basic_map_add_constraint(tileMap, c); 157 158 /* tX <= pX */ 159 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); 160 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1); 161 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1); 162 tileMap = isl_basic_map_add_constraint(tileMap, c); 163 164 /* pX <= tX + (tileSize - 1) */ 165 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); 166 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1); 167 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1); 168 isl_constraint_set_constant_si(c, tileSize - 1); 169 tileMap = isl_basic_map_add_constraint(tileMap, c); 170 } 171 172 /* Project out auxilary dimensions. 173 174 The auxilary dimensions are transformed into existentially quantified ones. 175 This reduces the number of visible scattering dimensions and allows Cloog 176 to produces better code. */ 177 tileMap = isl_basic_map_project_out(tileMap, isl_dim_out, 178 2 * scheduleDimensions, 179 scheduleDimensions); 180 isl_local_space_free(LocalSpace); 181 return tileMap; 182 } 183 184 /* getScheduleForBand - Get the schedule for this band. 185 186 Polly applies transformations like tiling on top of the isl calculated value. 187 This can influence the number of scheduling dimension. The number of 188 schedule dimensions is returned in the parameter 'Dimension'. */ 189 static bool DisableTiling = false; 190 191 static isl_union_map * 192 getScheduleForBand(isl_band *Band, int *Dimensions) 193 { 194 isl_union_map *PartialSchedule; 195 isl_ctx *ctx; 196 isl_space *Space; 197 isl_basic_map *TileMap; 198 isl_union_map *TileUMap; 199 200 PartialSchedule = isl_band_get_partial_schedule(Band); 201 *Dimensions = isl_band_n_member(Band); 202 203 if (DisableTiling) 204 return PartialSchedule; 205 206 /* It does not make any sense to tile a band with just one dimension. */ 207 if (*Dimensions == 1) 208 return PartialSchedule; 209 210 ctx = isl_union_map_get_ctx(PartialSchedule); 211 Space = isl_union_map_get_space(PartialSchedule); 212 213 TileMap = getTileMap(ctx, *Dimensions, 32); 214 TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap)); 215 TileUMap = isl_union_map_align_params(TileUMap, Space); 216 *Dimensions = 2 * *Dimensions; 217 218 return isl_union_map_apply_range(PartialSchedule, TileUMap); 219 } 220 221 /* Create a map that pre-vectorizes one scheduling dimension. 222 223 getPrevectorMap creates a map that maps each input dimension to the same 224 output dimension, except for the dimension DimToVectorize. DimToVectorize is 225 strip mined by 'VectorWidth' and the newly created point loop of 226 DimToVectorize is moved to the innermost level. 227 228 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4): 229 230 | Before transformation 231 | 232 | A[i,j] -> [i,j] 233 | 234 | for (i = 0; i < 128; i++) 235 | for (j = 0; j < 128; j++) 236 | A(i,j); 237 238 Prevector map: 239 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip 240 241 | After transformation: 242 | 243 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip 244 | 245 | for (it = 0; it < 128; it+=4) 246 | for (j = 0; j < 128; j++) 247 | for (ip = max(0,it); ip < min(128, it + 3); ip++) 248 | A(ip,j); 249 250 The goal of this transformation is to create a trivially vectorizable loop. 251 This means a parallel loop at the innermost level that has a constant number 252 of iterations corresponding to the target vector width. 253 254 This transformation creates a loop at the innermost level. The loop has a 255 constant number of iterations, if the number of loop iterations at 256 DimToVectorize can be devided by VectorWidth. The default VectorWidth is 257 currently constant and not yet target specific. This function does not reason 258 about parallelism. */ 259 static isl_map * 260 getPrevectorMap(isl_ctx *ctx, int DimToVectorize, 261 int ScheduleDimensions, 262 int VectorWidth) 263 { 264 isl_space *Space; 265 isl_local_space *LocalSpace, *LocalSpaceRange; 266 isl_set *Modulo; 267 isl_map *TilingMap; 268 isl_constraint *c; 269 isl_aff *Aff; 270 int PointDimension; /* ip */ 271 int TileDimension; /* it */ 272 isl_int VectorWidthMP; 273 int i; 274 275 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/ 276 277 Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1); 278 TilingMap = isl_map_universe(isl_space_copy(Space)); 279 LocalSpace = isl_local_space_from_space(Space); 280 PointDimension = ScheduleDimensions; 281 TileDimension = DimToVectorize; 282 283 /* Create an identity map for everything except DimToVectorize and map 284 DimToVectorize to the point loop at the innermost dimension. */ 285 for (i = 0; i < ScheduleDimensions; i++) 286 { 287 c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); 288 isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1); 289 290 if (i == DimToVectorize) 291 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1); 292 else 293 isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1); 294 295 TilingMap = isl_map_add_constraint(TilingMap, c); 296 } 297 298 /* it % 'VectorWidth' = 0 */ 299 LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace)); 300 Aff = isl_aff_zero_on_domain(LocalSpaceRange); 301 Aff = isl_aff_set_constant_si(Aff, VectorWidth); 302 Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1); 303 isl_int_init(VectorWidthMP); 304 isl_int_set_si(VectorWidthMP, VectorWidth); 305 Aff = isl_aff_mod(Aff, VectorWidthMP); 306 isl_int_clear(VectorWidthMP); 307 Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff)); 308 TilingMap = isl_map_intersect_range(TilingMap, Modulo); 309 310 /* it <= ip */ 311 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); 312 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1); 313 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1); 314 TilingMap = isl_map_add_constraint(TilingMap, c); 315 316 /* ip <= it + ('VectorWidth' - 1) */ 317 c = isl_inequality_alloc(LocalSpace); 318 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1); 319 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1); 320 isl_constraint_set_constant_si(c, VectorWidth - 1); 321 TilingMap = isl_map_add_constraint(TilingMap, c); 322 323 isl_map_dump(TilingMap); 324 325 return TilingMap; 326 } 327 328 static bool EnablePollyVector = false; 329 330 /* getScheduleForBandList - Get the scheduling map for a list of bands. 331 332 We walk recursively the forest of bands to combine the schedules of the 333 individual bands to the overall schedule. In case tiling is requested, 334 the individual bands are tiled. */ 335 static isl_union_map * 336 getScheduleForBandList(isl_band_list *BandList) 337 { 338 int NumBands, i; 339 isl_union_map *Schedule; 340 isl_ctx *ctx; 341 342 ctx = isl_band_list_get_ctx(BandList); 343 NumBands = isl_band_list_n_band(BandList); 344 Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0)); 345 346 for (i = 0; i < NumBands; i++) 347 { 348 isl_band *Band; 349 isl_union_map *PartialSchedule; 350 int ScheduleDimensions; 351 isl_space *Space; 352 353 Band = isl_band_list_get_band(BandList, i); 354 PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions); 355 Space = isl_union_map_get_space(PartialSchedule); 356 357 if (isl_band_has_children(Band)) 358 { 359 isl_band_list *Children; 360 isl_union_map *SuffixSchedule; 361 362 Children = isl_band_get_children(Band); 363 SuffixSchedule = getScheduleForBandList(Children); 364 PartialSchedule = isl_union_map_flat_range_product(PartialSchedule, 365 SuffixSchedule); 366 isl_band_list_free(Children); 367 } 368 else if (EnablePollyVector) 369 { 370 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--) 371 { 372 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE 373 if (isl_band_member_is_coincident (Band, i)) 374 #else 375 if (isl_band_member_is_zero_distance(Band, i)) 376 #endif 377 { 378 isl_map *TileMap; 379 isl_union_map *TileUMap; 380 381 TileMap = getPrevectorMap(ctx, i, ScheduleDimensions, 4); 382 TileUMap = isl_union_map_from_map(TileMap); 383 TileUMap = isl_union_map_align_params(TileUMap, 384 isl_space_copy(Space)); 385 PartialSchedule = isl_union_map_apply_range(PartialSchedule, 386 TileUMap); 387 break; 388 } 389 } 390 } 391 392 Schedule = isl_union_map_union(Schedule, PartialSchedule); 393 394 isl_band_free(Band); 395 isl_space_free(Space); 396 } 397 398 return Schedule; 399 } 400 401 static isl_union_map * 402 getScheduleMap(isl_schedule *Schedule) 403 { 404 isl_band_list *BandList = isl_schedule_get_band_forest(Schedule); 405 isl_union_map *ScheduleMap = getScheduleForBandList(BandList); 406 isl_band_list_free(BandList); 407 return ScheduleMap; 408 } 409 410 static int 411 getSingleMap(__isl_take isl_map *map, void *user) 412 { 413 isl_map **singleMap = (isl_map **) user; 414 *singleMap = map; 415 416 return 0; 417 } 418 419 static void 420 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map) 421 { 422 int i; 423 poly_bb_p pbb; 424 425 FOR_EACH_VEC_ELT (scop->bbs, i, pbb) 426 { 427 isl_set *domain = isl_set_copy (pbb->domain); 428 isl_union_map *stmtBand; 429 isl_map *stmtSchedule; 430 431 stmtBand = isl_union_map_intersect_domain(isl_union_map_copy(schedule_map), 432 isl_union_set_from_set(domain)); 433 isl_union_map_foreach_map(stmtBand, getSingleMap, &stmtSchedule); 434 isl_map_free(pbb->transformed); 435 pbb->transformed = stmtSchedule; 436 isl_union_map_free(stmtBand); 437 } 438 } 439 440 static const int CONSTANT_BOUND = 20; 441 442 bool 443 optimize_isl (scop_p scop) 444 { 445 446 isl_schedule *schedule; 447 isl_union_set *domain; 448 isl_union_map *validity, *proximity, *dependences; 449 isl_union_map *schedule_map; 450 451 domain = scop_get_domains (scop); 452 dependences = scop_get_dependences (scop); 453 dependences = isl_union_map_gist_domain(dependences, 454 isl_union_set_copy(domain)); 455 dependences = isl_union_map_gist_range(dependences, 456 isl_union_set_copy(domain)); 457 validity = dependences; 458 459 proximity = isl_union_map_copy (validity); 460 461 isl_options_set_schedule_max_constant_term(scop->ctx, CONSTANT_BOUND); 462 isl_options_set_schedule_maximize_band_depth(scop->ctx, 1); 463 isl_options_set_schedule_fuse(scop->ctx, ISL_SCHEDULE_FUSE_MIN); 464 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_CONTINUE); 465 schedule = isl_union_set_compute_schedule (domain, validity, proximity); 466 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_ABORT); 467 468 if (!schedule) 469 return false; 470 471 schedule_map = getScheduleMap (schedule); 472 473 apply_schedule_map_to_scop (scop, schedule_map); 474 475 isl_schedule_free (schedule); 476 isl_union_map_free (schedule_map); 477 478 return true; 479 } 480 481 #endif 482