1*e4b17023SJohn Marino /* Heuristics and transform for loop blocking and strip mining on 2*e4b17023SJohn Marino polyhedral representation. 3*e4b17023SJohn Marino 4*e4b17023SJohn Marino Copyright (C) 2009, 2010 Free Software Foundation, Inc. 5*e4b17023SJohn Marino Contributed by Sebastian Pop <sebastian.pop@amd.com> and 6*e4b17023SJohn Marino Pranav Garg <pranav.garg2107@gmail.com>. 7*e4b17023SJohn Marino 8*e4b17023SJohn Marino This file is part of GCC. 9*e4b17023SJohn Marino 10*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify 11*e4b17023SJohn Marino it under the terms of the GNU General Public License as published by 12*e4b17023SJohn Marino the Free Software Foundation; either version 3, or (at your option) 13*e4b17023SJohn Marino any later version. 14*e4b17023SJohn Marino 15*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, 16*e4b17023SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of 17*e4b17023SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*e4b17023SJohn Marino GNU General Public License for more details. 19*e4b17023SJohn Marino 20*e4b17023SJohn Marino You should have received a copy of the GNU General Public License 21*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see 22*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */ 23*e4b17023SJohn Marino #include "config.h" 24*e4b17023SJohn Marino #include "system.h" 25*e4b17023SJohn Marino #include "coretypes.h" 26*e4b17023SJohn Marino #include "tree-flow.h" 27*e4b17023SJohn Marino #include "tree-dump.h" 28*e4b17023SJohn Marino #include "cfgloop.h" 29*e4b17023SJohn Marino #include "tree-chrec.h" 30*e4b17023SJohn Marino #include "tree-data-ref.h" 31*e4b17023SJohn Marino #include "sese.h" 32*e4b17023SJohn Marino 33*e4b17023SJohn Marino #ifdef HAVE_cloog 34*e4b17023SJohn Marino #include "ppl_c.h" 35*e4b17023SJohn Marino #include "graphite-ppl.h" 36*e4b17023SJohn Marino #include "graphite-poly.h" 37*e4b17023SJohn Marino 38*e4b17023SJohn Marino 39*e4b17023SJohn Marino /* Strip mines with a factor STRIDE the scattering (time) dimension 40*e4b17023SJohn Marino around PBB at depth TIME_DEPTH. 41*e4b17023SJohn Marino 42*e4b17023SJohn Marino The following example comes from the wiki page: 43*e4b17023SJohn Marino http://gcc.gnu.org/wiki/Graphite/Strip_mine 44*e4b17023SJohn Marino 45*e4b17023SJohn Marino The strip mine of a loop with a tile of 64 can be obtained with a 46*e4b17023SJohn Marino scattering function as follows: 47*e4b17023SJohn Marino 48*e4b17023SJohn Marino $ cat ./albert_strip_mine.cloog 49*e4b17023SJohn Marino # language: C 50*e4b17023SJohn Marino c 51*e4b17023SJohn Marino 52*e4b17023SJohn Marino # parameter {n | n >= 0} 53*e4b17023SJohn Marino 1 3 54*e4b17023SJohn Marino # n 1 55*e4b17023SJohn Marino 1 1 0 56*e4b17023SJohn Marino 1 57*e4b17023SJohn Marino n 58*e4b17023SJohn Marino 59*e4b17023SJohn Marino 1 # Number of statements: 60*e4b17023SJohn Marino 61*e4b17023SJohn Marino 1 62*e4b17023SJohn Marino # {i | 0 <= i <= n} 63*e4b17023SJohn Marino 2 4 64*e4b17023SJohn Marino # i n 1 65*e4b17023SJohn Marino 1 1 0 0 66*e4b17023SJohn Marino 1 -1 1 0 67*e4b17023SJohn Marino 68*e4b17023SJohn Marino 0 0 0 69*e4b17023SJohn Marino 1 70*e4b17023SJohn Marino i 71*e4b17023SJohn Marino 72*e4b17023SJohn Marino 1 # Scattering functions 73*e4b17023SJohn Marino 74*e4b17023SJohn Marino 3 6 75*e4b17023SJohn Marino # NEW OLD i n 1 76*e4b17023SJohn Marino 1 -64 0 1 0 0 77*e4b17023SJohn Marino 1 64 0 -1 0 63 78*e4b17023SJohn Marino 0 0 1 -1 0 0 79*e4b17023SJohn Marino 80*e4b17023SJohn Marino 1 81*e4b17023SJohn Marino NEW OLD 82*e4b17023SJohn Marino 83*e4b17023SJohn Marino #the output of CLooG is like this: 84*e4b17023SJohn Marino #$ cloog ./albert_strip_mine.cloog 85*e4b17023SJohn Marino # for (NEW=0;NEW<=floord(n,64);NEW++) { 86*e4b17023SJohn Marino # for (OLD=max(64*NEW,0);OLD<=min(64*NEW+63,n);OLD++) { 87*e4b17023SJohn Marino # S1(i = OLD) ; 88*e4b17023SJohn Marino # } 89*e4b17023SJohn Marino # } 90*e4b17023SJohn Marino */ 91*e4b17023SJohn Marino 92*e4b17023SJohn Marino static void 93*e4b17023SJohn Marino pbb_strip_mine_time_depth (poly_bb_p pbb, int time_depth, int stride) 94*e4b17023SJohn Marino { 95*e4b17023SJohn Marino ppl_dimension_type iter, dim, strip; 96*e4b17023SJohn Marino ppl_Polyhedron_t res = PBB_TRANSFORMED_SCATTERING (pbb); 97*e4b17023SJohn Marino /* STRIP is the dimension that iterates with stride STRIDE. */ 98*e4b17023SJohn Marino /* ITER is the dimension that enumerates single iterations inside 99*e4b17023SJohn Marino one strip that has at most STRIDE iterations. */ 100*e4b17023SJohn Marino strip = time_depth; 101*e4b17023SJohn Marino iter = strip + 2; 102*e4b17023SJohn Marino 103*e4b17023SJohn Marino psct_add_scattering_dimension (pbb, strip); 104*e4b17023SJohn Marino psct_add_scattering_dimension (pbb, strip + 1); 105*e4b17023SJohn Marino 106*e4b17023SJohn Marino ppl_Polyhedron_space_dimension (res, &dim); 107*e4b17023SJohn Marino 108*e4b17023SJohn Marino /* Lower bound of the striped loop. */ 109*e4b17023SJohn Marino { 110*e4b17023SJohn Marino ppl_Constraint_t new_cstr; 111*e4b17023SJohn Marino ppl_Linear_Expression_t expr; 112*e4b17023SJohn Marino 113*e4b17023SJohn Marino ppl_new_Linear_Expression_with_dimension (&expr, dim); 114*e4b17023SJohn Marino ppl_set_coef (expr, strip, -1 * stride); 115*e4b17023SJohn Marino ppl_set_coef (expr, iter, 1); 116*e4b17023SJohn Marino 117*e4b17023SJohn Marino ppl_new_Constraint (&new_cstr, expr, PPL_CONSTRAINT_TYPE_GREATER_OR_EQUAL); 118*e4b17023SJohn Marino ppl_delete_Linear_Expression (expr); 119*e4b17023SJohn Marino ppl_Polyhedron_add_constraint (res, new_cstr); 120*e4b17023SJohn Marino ppl_delete_Constraint (new_cstr); 121*e4b17023SJohn Marino } 122*e4b17023SJohn Marino 123*e4b17023SJohn Marino /* Upper bound of the striped loop. */ 124*e4b17023SJohn Marino { 125*e4b17023SJohn Marino ppl_Constraint_t new_cstr; 126*e4b17023SJohn Marino ppl_Linear_Expression_t expr; 127*e4b17023SJohn Marino 128*e4b17023SJohn Marino ppl_new_Linear_Expression_with_dimension (&expr, dim); 129*e4b17023SJohn Marino ppl_set_coef (expr, strip, stride); 130*e4b17023SJohn Marino ppl_set_coef (expr, iter, -1); 131*e4b17023SJohn Marino ppl_set_inhomogeneous (expr, stride - 1); 132*e4b17023SJohn Marino 133*e4b17023SJohn Marino ppl_new_Constraint (&new_cstr, expr, PPL_CONSTRAINT_TYPE_GREATER_OR_EQUAL); 134*e4b17023SJohn Marino ppl_delete_Linear_Expression (expr); 135*e4b17023SJohn Marino ppl_Polyhedron_add_constraint (res, new_cstr); 136*e4b17023SJohn Marino ppl_delete_Constraint (new_cstr); 137*e4b17023SJohn Marino } 138*e4b17023SJohn Marino 139*e4b17023SJohn Marino /* Static scheduling for ITER level. 140*e4b17023SJohn Marino This is mandatory to keep the 2d + 1 canonical scheduling format. */ 141*e4b17023SJohn Marino { 142*e4b17023SJohn Marino ppl_Constraint_t new_cstr; 143*e4b17023SJohn Marino ppl_Linear_Expression_t expr; 144*e4b17023SJohn Marino 145*e4b17023SJohn Marino ppl_new_Linear_Expression_with_dimension (&expr, dim); 146*e4b17023SJohn Marino ppl_set_coef (expr, strip + 1, 1); 147*e4b17023SJohn Marino ppl_set_inhomogeneous (expr, 0); 148*e4b17023SJohn Marino 149*e4b17023SJohn Marino ppl_new_Constraint (&new_cstr, expr, PPL_CONSTRAINT_TYPE_EQUAL); 150*e4b17023SJohn Marino ppl_delete_Linear_Expression (expr); 151*e4b17023SJohn Marino ppl_Polyhedron_add_constraint (res, new_cstr); 152*e4b17023SJohn Marino ppl_delete_Constraint (new_cstr); 153*e4b17023SJohn Marino } 154*e4b17023SJohn Marino } 155*e4b17023SJohn Marino 156*e4b17023SJohn Marino /* Returns true when strip mining with STRIDE of the loop LST is 157*e4b17023SJohn Marino profitable. */ 158*e4b17023SJohn Marino 159*e4b17023SJohn Marino static bool 160*e4b17023SJohn Marino lst_strip_mine_profitable_p (lst_p lst, int stride) 161*e4b17023SJohn Marino { 162*e4b17023SJohn Marino mpz_t niter, strip_stride; 163*e4b17023SJohn Marino bool res; 164*e4b17023SJohn Marino 165*e4b17023SJohn Marino gcc_assert (LST_LOOP_P (lst)); 166*e4b17023SJohn Marino mpz_init (strip_stride); 167*e4b17023SJohn Marino mpz_init (niter); 168*e4b17023SJohn Marino 169*e4b17023SJohn Marino mpz_set_si (strip_stride, stride); 170*e4b17023SJohn Marino lst_niter_for_loop (lst, niter); 171*e4b17023SJohn Marino res = (mpz_cmp (niter, strip_stride) > 0); 172*e4b17023SJohn Marino 173*e4b17023SJohn Marino mpz_clear (strip_stride); 174*e4b17023SJohn Marino mpz_clear (niter); 175*e4b17023SJohn Marino return res; 176*e4b17023SJohn Marino } 177*e4b17023SJohn Marino 178*e4b17023SJohn Marino /* Strip-mines all the loops of LST with STRIDE. Return the number of 179*e4b17023SJohn Marino loops strip-mined. */ 180*e4b17023SJohn Marino 181*e4b17023SJohn Marino static int 182*e4b17023SJohn Marino lst_do_strip_mine_loop (lst_p lst, int depth, int stride) 183*e4b17023SJohn Marino { 184*e4b17023SJohn Marino int i; 185*e4b17023SJohn Marino lst_p l; 186*e4b17023SJohn Marino poly_bb_p pbb; 187*e4b17023SJohn Marino 188*e4b17023SJohn Marino if (!lst) 189*e4b17023SJohn Marino return 0; 190*e4b17023SJohn Marino 191*e4b17023SJohn Marino if (LST_LOOP_P (lst)) 192*e4b17023SJohn Marino { 193*e4b17023SJohn Marino int res = 0; 194*e4b17023SJohn Marino 195*e4b17023SJohn Marino FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) 196*e4b17023SJohn Marino res += lst_do_strip_mine_loop (l, depth, stride); 197*e4b17023SJohn Marino 198*e4b17023SJohn Marino return res; 199*e4b17023SJohn Marino } 200*e4b17023SJohn Marino 201*e4b17023SJohn Marino pbb = LST_PBB (lst); 202*e4b17023SJohn Marino pbb_strip_mine_time_depth (pbb, psct_dynamic_dim (pbb, depth), stride); 203*e4b17023SJohn Marino return 1; 204*e4b17023SJohn Marino } 205*e4b17023SJohn Marino 206*e4b17023SJohn Marino /* Strip-mines all the loops of LST with STRIDE. When STRIDE is zero, 207*e4b17023SJohn Marino read the stride from the PARAM_LOOP_BLOCK_TILE_SIZE. Return the 208*e4b17023SJohn Marino number of strip-mined loops. 209*e4b17023SJohn Marino 210*e4b17023SJohn Marino Strip mining transforms a loop 211*e4b17023SJohn Marino 212*e4b17023SJohn Marino | for (i = 0; i < N; i++) 213*e4b17023SJohn Marino | S (i); 214*e4b17023SJohn Marino 215*e4b17023SJohn Marino into the following loop nest: 216*e4b17023SJohn Marino 217*e4b17023SJohn Marino | for (k = 0; k < N; k += STRIDE) 218*e4b17023SJohn Marino | for (j = 0; j < STRIDE; j++) 219*e4b17023SJohn Marino | S (i = k + j); 220*e4b17023SJohn Marino */ 221*e4b17023SJohn Marino 222*e4b17023SJohn Marino static int 223*e4b17023SJohn Marino lst_do_strip_mine (lst_p lst, int stride) 224*e4b17023SJohn Marino { 225*e4b17023SJohn Marino int i; 226*e4b17023SJohn Marino lst_p l; 227*e4b17023SJohn Marino int res = 0; 228*e4b17023SJohn Marino int depth; 229*e4b17023SJohn Marino 230*e4b17023SJohn Marino if (!stride) 231*e4b17023SJohn Marino stride = PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE); 232*e4b17023SJohn Marino 233*e4b17023SJohn Marino if (!lst 234*e4b17023SJohn Marino || !LST_LOOP_P (lst)) 235*e4b17023SJohn Marino return false; 236*e4b17023SJohn Marino 237*e4b17023SJohn Marino FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) 238*e4b17023SJohn Marino res += lst_do_strip_mine (l, stride); 239*e4b17023SJohn Marino 240*e4b17023SJohn Marino depth = lst_depth (lst); 241*e4b17023SJohn Marino if (depth >= 0 242*e4b17023SJohn Marino && lst_strip_mine_profitable_p (lst, stride)) 243*e4b17023SJohn Marino { 244*e4b17023SJohn Marino res += lst_do_strip_mine_loop (lst, lst_depth (lst), stride); 245*e4b17023SJohn Marino lst_add_loop_under_loop (lst); 246*e4b17023SJohn Marino } 247*e4b17023SJohn Marino 248*e4b17023SJohn Marino return res; 249*e4b17023SJohn Marino } 250*e4b17023SJohn Marino 251*e4b17023SJohn Marino /* Strip mines all the loops in SCOP. Returns the number of 252*e4b17023SJohn Marino strip-mined loops. */ 253*e4b17023SJohn Marino 254*e4b17023SJohn Marino int 255*e4b17023SJohn Marino scop_do_strip_mine (scop_p scop, int stride) 256*e4b17023SJohn Marino { 257*e4b17023SJohn Marino return lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), stride); 258*e4b17023SJohn Marino } 259*e4b17023SJohn Marino 260*e4b17023SJohn Marino /* Loop blocks all the loops in SCOP. Returns true when we manage to 261*e4b17023SJohn Marino block some loops. */ 262*e4b17023SJohn Marino 263*e4b17023SJohn Marino bool 264*e4b17023SJohn Marino scop_do_block (scop_p scop) 265*e4b17023SJohn Marino { 266*e4b17023SJohn Marino store_scattering (scop); 267*e4b17023SJohn Marino 268*e4b17023SJohn Marino /* If we don't strip mine at least two loops, or not interchange 269*e4b17023SJohn Marino loops, the strip mine alone will not be profitable, and the 270*e4b17023SJohn Marino transform is not a loop blocking: so revert the transform. */ 271*e4b17023SJohn Marino if (lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), 0) < 2 272*e4b17023SJohn Marino || scop_do_interchange (scop) == 0) 273*e4b17023SJohn Marino { 274*e4b17023SJohn Marino restore_scattering (scop); 275*e4b17023SJohn Marino return false; 276*e4b17023SJohn Marino } 277*e4b17023SJohn Marino 278*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS)) 279*e4b17023SJohn Marino fprintf (dump_file, "SCoP will be loop blocked.\n"); 280*e4b17023SJohn Marino 281*e4b17023SJohn Marino return true; 282*e4b17023SJohn Marino } 283*e4b17023SJohn Marino 284*e4b17023SJohn Marino #endif 285