1*38fd1498Szrj /* Predictive commoning. 2*38fd1498Szrj Copyright (C) 2005-2018 Free Software Foundation, Inc. 3*38fd1498Szrj 4*38fd1498Szrj This file is part of GCC. 5*38fd1498Szrj 6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it 7*38fd1498Szrj under the terms of the GNU General Public License as published by the 8*38fd1498Szrj Free Software Foundation; either version 3, or (at your option) any 9*38fd1498Szrj later version. 10*38fd1498Szrj 11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT 12*38fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14*38fd1498Szrj for more details. 15*38fd1498Szrj 16*38fd1498Szrj You should have received a copy of the GNU General Public License 17*38fd1498Szrj along with GCC; see the file COPYING3. If not see 18*38fd1498Szrj <http://www.gnu.org/licenses/>. */ 19*38fd1498Szrj 20*38fd1498Szrj /* This file implements the predictive commoning optimization. Predictive 21*38fd1498Szrj commoning can be viewed as CSE around a loop, and with some improvements, 22*38fd1498Szrj as generalized strength reduction-- i.e., reusing values computed in 23*38fd1498Szrj earlier iterations of a loop in the later ones. So far, the pass only 24*38fd1498Szrj handles the most useful case, that is, reusing values of memory references. 25*38fd1498Szrj If you think this is all just a special case of PRE, you are sort of right; 26*38fd1498Szrj however, concentrating on loops is simpler, and makes it possible to 27*38fd1498Szrj incorporate data dependence analysis to detect the opportunities, perform 28*38fd1498Szrj loop unrolling to avoid copies together with renaming immediately, 29*38fd1498Szrj and if needed, we could also take register pressure into account. 30*38fd1498Szrj 31*38fd1498Szrj Let us demonstrate what is done on an example: 32*38fd1498Szrj 33*38fd1498Szrj for (i = 0; i < 100; i++) 34*38fd1498Szrj { 35*38fd1498Szrj a[i+2] = a[i] + a[i+1]; 36*38fd1498Szrj b[10] = b[10] + i; 37*38fd1498Szrj c[i] = c[99 - i]; 38*38fd1498Szrj d[i] = d[i + 1]; 39*38fd1498Szrj } 40*38fd1498Szrj 41*38fd1498Szrj 1) We find data references in the loop, and split them to mutually 42*38fd1498Szrj independent groups (i.e., we find components of a data dependence 43*38fd1498Szrj graph). We ignore read-read dependences whose distance is not constant. 44*38fd1498Szrj (TODO -- we could also ignore antidependences). In this example, we 45*38fd1498Szrj find the following groups: 46*38fd1498Szrj 47*38fd1498Szrj a[i]{read}, a[i+1]{read}, a[i+2]{write} 48*38fd1498Szrj b[10]{read}, b[10]{write} 49*38fd1498Szrj c[99 - i]{read}, c[i]{write} 50*38fd1498Szrj d[i + 1]{read}, d[i]{write} 51*38fd1498Szrj 52*38fd1498Szrj 2) Inside each of the group, we verify several conditions: 53*38fd1498Szrj a) all the references must differ in indices only, and the indices 54*38fd1498Szrj must all have the same step 55*38fd1498Szrj b) the references must dominate loop latch (and thus, they must be 56*38fd1498Szrj ordered by dominance relation). 57*38fd1498Szrj c) the distance of the indices must be a small multiple of the step 58*38fd1498Szrj We are then able to compute the difference of the references (# of 59*38fd1498Szrj iterations before they point to the same place as the first of them). 60*38fd1498Szrj Also, in case there are writes in the loop, we split the groups into 61*38fd1498Szrj chains whose head is the write whose values are used by the reads in 62*38fd1498Szrj the same chain. The chains are then processed independently, 63*38fd1498Szrj making the further transformations simpler. Also, the shorter chains 64*38fd1498Szrj need the same number of registers, but may require lower unrolling 65*38fd1498Szrj factor in order to get rid of the copies on the loop latch. 66*38fd1498Szrj 67*38fd1498Szrj In our example, we get the following chains (the chain for c is invalid). 68*38fd1498Szrj 69*38fd1498Szrj a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2} 70*38fd1498Szrj b[10]{read,+0}, b[10]{write,+0} 71*38fd1498Szrj d[i + 1]{read,+0}, d[i]{write,+1} 72*38fd1498Szrj 73*38fd1498Szrj 3) For each read, we determine the read or write whose value it reuses, 74*38fd1498Szrj together with the distance of this reuse. I.e. we take the last 75*38fd1498Szrj reference before it with distance 0, or the last of the references 76*38fd1498Szrj with the smallest positive distance to the read. Then, we remove 77*38fd1498Szrj the references that are not used in any of these chains, discard the 78*38fd1498Szrj empty groups, and propagate all the links so that they point to the 79*38fd1498Szrj single root reference of the chain (adjusting their distance 80*38fd1498Szrj appropriately). Some extra care needs to be taken for references with 81*38fd1498Szrj step 0. In our example (the numbers indicate the distance of the 82*38fd1498Szrj reuse), 83*38fd1498Szrj 84*38fd1498Szrj a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*) 85*38fd1498Szrj b[10] --> (*) 1, b[10] (*) 86*38fd1498Szrj 87*38fd1498Szrj 4) The chains are combined together if possible. If the corresponding 88*38fd1498Szrj elements of two chains are always combined together with the same 89*38fd1498Szrj operator, we remember just the result of this combination, instead 90*38fd1498Szrj of remembering the values separately. We may need to perform 91*38fd1498Szrj reassociation to enable combining, for example 92*38fd1498Szrj 93*38fd1498Szrj e[i] + f[i+1] + e[i+1] + f[i] 94*38fd1498Szrj 95*38fd1498Szrj can be reassociated as 96*38fd1498Szrj 97*38fd1498Szrj (e[i] + f[i]) + (e[i+1] + f[i+1]) 98*38fd1498Szrj 99*38fd1498Szrj and we can combine the chains for e and f into one chain. 100*38fd1498Szrj 101*38fd1498Szrj 5) For each root reference (end of the chain) R, let N be maximum distance 102*38fd1498Szrj of a reference reusing its value. Variables R0 up to RN are created, 103*38fd1498Szrj together with phi nodes that transfer values from R1 .. RN to 104*38fd1498Szrj R0 .. R(N-1). 105*38fd1498Szrj Initial values are loaded to R0..R(N-1) (in case not all references 106*38fd1498Szrj must necessarily be accessed and they may trap, we may fail here; 107*38fd1498Szrj TODO sometimes, the loads could be guarded by a check for the number 108*38fd1498Szrj of iterations). Values loaded/stored in roots are also copied to 109*38fd1498Szrj RN. Other reads are replaced with the appropriate variable Ri. 110*38fd1498Szrj Everything is put to SSA form. 111*38fd1498Szrj 112*38fd1498Szrj As a small improvement, if R0 is dead after the root (i.e., all uses of 113*38fd1498Szrj the value with the maximum distance dominate the root), we can avoid 114*38fd1498Szrj creating RN and use R0 instead of it. 115*38fd1498Szrj 116*38fd1498Szrj In our example, we get (only the parts concerning a and b are shown): 117*38fd1498Szrj for (i = 0; i < 100; i++) 118*38fd1498Szrj { 119*38fd1498Szrj f = phi (a[0], s); 120*38fd1498Szrj s = phi (a[1], f); 121*38fd1498Szrj x = phi (b[10], x); 122*38fd1498Szrj 123*38fd1498Szrj f = f + s; 124*38fd1498Szrj a[i+2] = f; 125*38fd1498Szrj x = x + i; 126*38fd1498Szrj b[10] = x; 127*38fd1498Szrj } 128*38fd1498Szrj 129*38fd1498Szrj 6) Factor F for unrolling is determined as the smallest common multiple of 130*38fd1498Szrj (N + 1) for each root reference (N for references for that we avoided 131*38fd1498Szrj creating RN). If F and the loop is small enough, loop is unrolled F 132*38fd1498Szrj times. The stores to RN (R0) in the copies of the loop body are 133*38fd1498Szrj periodically replaced with R0, R1, ... (R1, R2, ...), so that they can 134*38fd1498Szrj be coalesced and the copies can be eliminated. 135*38fd1498Szrj 136*38fd1498Szrj TODO -- copy propagation and other optimizations may change the live 137*38fd1498Szrj ranges of the temporary registers and prevent them from being coalesced; 138*38fd1498Szrj this may increase the register pressure. 139*38fd1498Szrj 140*38fd1498Szrj In our case, F = 2 and the (main loop of the) result is 141*38fd1498Szrj 142*38fd1498Szrj for (i = 0; i < ...; i += 2) 143*38fd1498Szrj { 144*38fd1498Szrj f = phi (a[0], f); 145*38fd1498Szrj s = phi (a[1], s); 146*38fd1498Szrj x = phi (b[10], x); 147*38fd1498Szrj 148*38fd1498Szrj f = f + s; 149*38fd1498Szrj a[i+2] = f; 150*38fd1498Szrj x = x + i; 151*38fd1498Szrj b[10] = x; 152*38fd1498Szrj 153*38fd1498Szrj s = s + f; 154*38fd1498Szrj a[i+3] = s; 155*38fd1498Szrj x = x + i; 156*38fd1498Szrj b[10] = x; 157*38fd1498Szrj } 158*38fd1498Szrj 159*38fd1498Szrj Apart from predictive commoning on Load-Load and Store-Load chains, we 160*38fd1498Szrj also support Store-Store chains -- stores killed by other store can be 161*38fd1498Szrj eliminated. Given below example: 162*38fd1498Szrj 163*38fd1498Szrj for (i = 0; i < n; i++) 164*38fd1498Szrj { 165*38fd1498Szrj a[i] = 1; 166*38fd1498Szrj a[i+2] = 2; 167*38fd1498Szrj } 168*38fd1498Szrj 169*38fd1498Szrj It can be replaced with: 170*38fd1498Szrj 171*38fd1498Szrj t0 = a[0]; 172*38fd1498Szrj t1 = a[1]; 173*38fd1498Szrj for (i = 0; i < n; i++) 174*38fd1498Szrj { 175*38fd1498Szrj a[i] = 1; 176*38fd1498Szrj t2 = 2; 177*38fd1498Szrj t0 = t1; 178*38fd1498Szrj t1 = t2; 179*38fd1498Szrj } 180*38fd1498Szrj a[n] = t0; 181*38fd1498Szrj a[n+1] = t1; 182*38fd1498Szrj 183*38fd1498Szrj If the loop runs more than 1 iterations, it can be further simplified into: 184*38fd1498Szrj 185*38fd1498Szrj for (i = 0; i < n; i++) 186*38fd1498Szrj { 187*38fd1498Szrj a[i] = 1; 188*38fd1498Szrj } 189*38fd1498Szrj a[n] = 2; 190*38fd1498Szrj a[n+1] = 2; 191*38fd1498Szrj 192*38fd1498Szrj The interesting part is this can be viewed either as general store motion 193*38fd1498Szrj or general dead store elimination in either intra/inter-iterations way. 194*38fd1498Szrj 195*38fd1498Szrj With trivial effort, we also support load inside Store-Store chains if the 196*38fd1498Szrj load is dominated by a store statement in the same iteration of loop. You 197*38fd1498Szrj can see this as a restricted Store-Mixed-Load-Store chain. 198*38fd1498Szrj 199*38fd1498Szrj TODO: For now, we don't support store-store chains in multi-exit loops. We 200*38fd1498Szrj force to not unroll in case of store-store chain even if other chains might 201*38fd1498Szrj ask for unroll. 202*38fd1498Szrj 203*38fd1498Szrj Predictive commoning can be generalized for arbitrary computations (not 204*38fd1498Szrj just memory loads), and also nontrivial transfer functions (e.g., replacing 205*38fd1498Szrj i * i with ii_last + 2 * i + 1), to generalize strength reduction. */ 206*38fd1498Szrj 207*38fd1498Szrj #include "config.h" 208*38fd1498Szrj #include "system.h" 209*38fd1498Szrj #include "coretypes.h" 210*38fd1498Szrj #include "backend.h" 211*38fd1498Szrj #include "rtl.h" 212*38fd1498Szrj #include "tree.h" 213*38fd1498Szrj #include "gimple.h" 214*38fd1498Szrj #include "predict.h" 215*38fd1498Szrj #include "tree-pass.h" 216*38fd1498Szrj #include "ssa.h" 217*38fd1498Szrj #include "gimple-pretty-print.h" 218*38fd1498Szrj #include "alias.h" 219*38fd1498Szrj #include "fold-const.h" 220*38fd1498Szrj #include "cfgloop.h" 221*38fd1498Szrj #include "tree-eh.h" 222*38fd1498Szrj #include "gimplify.h" 223*38fd1498Szrj #include "gimple-iterator.h" 224*38fd1498Szrj #include "gimplify-me.h" 225*38fd1498Szrj #include "tree-ssa-loop-ivopts.h" 226*38fd1498Szrj #include "tree-ssa-loop-manip.h" 227*38fd1498Szrj #include "tree-ssa-loop-niter.h" 228*38fd1498Szrj #include "tree-ssa-loop.h" 229*38fd1498Szrj #include "tree-into-ssa.h" 230*38fd1498Szrj #include "tree-dfa.h" 231*38fd1498Szrj #include "tree-ssa.h" 232*38fd1498Szrj #include "tree-data-ref.h" 233*38fd1498Szrj #include "tree-scalar-evolution.h" 234*38fd1498Szrj #include "params.h" 235*38fd1498Szrj #include "tree-affine.h" 236*38fd1498Szrj #include "builtins.h" 237*38fd1498Szrj 238*38fd1498Szrj /* The maximum number of iterations between the considered memory 239*38fd1498Szrj references. */ 240*38fd1498Szrj 241*38fd1498Szrj #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8) 242*38fd1498Szrj 243*38fd1498Szrj /* Data references (or phi nodes that carry data reference values across 244*38fd1498Szrj loop iterations). */ 245*38fd1498Szrj 246*38fd1498Szrj typedef struct dref_d 247*38fd1498Szrj { 248*38fd1498Szrj /* The reference itself. */ 249*38fd1498Szrj struct data_reference *ref; 250*38fd1498Szrj 251*38fd1498Szrj /* The statement in that the reference appears. */ 252*38fd1498Szrj gimple *stmt; 253*38fd1498Szrj 254*38fd1498Szrj /* In case that STMT is a phi node, this field is set to the SSA name 255*38fd1498Szrj defined by it in replace_phis_by_defined_names (in order to avoid 256*38fd1498Szrj pointing to phi node that got reallocated in the meantime). */ 257*38fd1498Szrj tree name_defined_by_phi; 258*38fd1498Szrj 259*38fd1498Szrj /* Distance of the reference from the root of the chain (in number of 260*38fd1498Szrj iterations of the loop). */ 261*38fd1498Szrj unsigned distance; 262*38fd1498Szrj 263*38fd1498Szrj /* Number of iterations offset from the first reference in the component. */ 264*38fd1498Szrj widest_int offset; 265*38fd1498Szrj 266*38fd1498Szrj /* Number of the reference in a component, in dominance ordering. */ 267*38fd1498Szrj unsigned pos; 268*38fd1498Szrj 269*38fd1498Szrj /* True if the memory reference is always accessed when the loop is 270*38fd1498Szrj entered. */ 271*38fd1498Szrj unsigned always_accessed : 1; 272*38fd1498Szrj } *dref; 273*38fd1498Szrj 274*38fd1498Szrj 275*38fd1498Szrj /* Type of the chain of the references. */ 276*38fd1498Szrj 277*38fd1498Szrj enum chain_type 278*38fd1498Szrj { 279*38fd1498Szrj /* The addresses of the references in the chain are constant. */ 280*38fd1498Szrj CT_INVARIANT, 281*38fd1498Szrj 282*38fd1498Szrj /* There are only loads in the chain. */ 283*38fd1498Szrj CT_LOAD, 284*38fd1498Szrj 285*38fd1498Szrj /* Root of the chain is store, the rest are loads. */ 286*38fd1498Szrj CT_STORE_LOAD, 287*38fd1498Szrj 288*38fd1498Szrj /* There are only stores in the chain. */ 289*38fd1498Szrj CT_STORE_STORE, 290*38fd1498Szrj 291*38fd1498Szrj /* A combination of two chains. */ 292*38fd1498Szrj CT_COMBINATION 293*38fd1498Szrj }; 294*38fd1498Szrj 295*38fd1498Szrj /* Chains of data references. */ 296*38fd1498Szrj 297*38fd1498Szrj typedef struct chain 298*38fd1498Szrj { 299*38fd1498Szrj /* Type of the chain. */ 300*38fd1498Szrj enum chain_type type; 301*38fd1498Szrj 302*38fd1498Szrj /* For combination chains, the operator and the two chains that are 303*38fd1498Szrj combined, and the type of the result. */ 304*38fd1498Szrj enum tree_code op; 305*38fd1498Szrj tree rslt_type; 306*38fd1498Szrj struct chain *ch1, *ch2; 307*38fd1498Szrj 308*38fd1498Szrj /* The references in the chain. */ 309*38fd1498Szrj vec<dref> refs; 310*38fd1498Szrj 311*38fd1498Szrj /* The maximum distance of the reference in the chain from the root. */ 312*38fd1498Szrj unsigned length; 313*38fd1498Szrj 314*38fd1498Szrj /* The variables used to copy the value throughout iterations. */ 315*38fd1498Szrj vec<tree> vars; 316*38fd1498Szrj 317*38fd1498Szrj /* Initializers for the variables. */ 318*38fd1498Szrj vec<tree> inits; 319*38fd1498Szrj 320*38fd1498Szrj /* Finalizers for the eliminated stores. */ 321*38fd1498Szrj vec<tree> finis; 322*38fd1498Szrj 323*38fd1498Szrj /* gimple stmts intializing the initial variables of the chain. */ 324*38fd1498Szrj gimple_seq init_seq; 325*38fd1498Szrj 326*38fd1498Szrj /* gimple stmts finalizing the eliminated stores of the chain. */ 327*38fd1498Szrj gimple_seq fini_seq; 328*38fd1498Szrj 329*38fd1498Szrj /* True if there is a use of a variable with the maximal distance 330*38fd1498Szrj that comes after the root in the loop. */ 331*38fd1498Szrj unsigned has_max_use_after : 1; 332*38fd1498Szrj 333*38fd1498Szrj /* True if all the memory references in the chain are always accessed. */ 334*38fd1498Szrj unsigned all_always_accessed : 1; 335*38fd1498Szrj 336*38fd1498Szrj /* True if this chain was combined together with some other chain. */ 337*38fd1498Szrj unsigned combined : 1; 338*38fd1498Szrj 339*38fd1498Szrj /* True if this is store elimination chain and eliminated stores store 340*38fd1498Szrj loop invariant value into memory. */ 341*38fd1498Szrj unsigned inv_store_elimination : 1; 342*38fd1498Szrj } *chain_p; 343*38fd1498Szrj 344*38fd1498Szrj 345*38fd1498Szrj /* Describes the knowledge about the step of the memory references in 346*38fd1498Szrj the component. */ 347*38fd1498Szrj 348*38fd1498Szrj enum ref_step_type 349*38fd1498Szrj { 350*38fd1498Szrj /* The step is zero. */ 351*38fd1498Szrj RS_INVARIANT, 352*38fd1498Szrj 353*38fd1498Szrj /* The step is nonzero. */ 354*38fd1498Szrj RS_NONZERO, 355*38fd1498Szrj 356*38fd1498Szrj /* The step may or may not be nonzero. */ 357*38fd1498Szrj RS_ANY 358*38fd1498Szrj }; 359*38fd1498Szrj 360*38fd1498Szrj /* Components of the data dependence graph. */ 361*38fd1498Szrj 362*38fd1498Szrj struct component 363*38fd1498Szrj { 364*38fd1498Szrj /* The references in the component. */ 365*38fd1498Szrj vec<dref> refs; 366*38fd1498Szrj 367*38fd1498Szrj /* What we know about the step of the references in the component. */ 368*38fd1498Szrj enum ref_step_type comp_step; 369*38fd1498Szrj 370*38fd1498Szrj /* True if all references in component are stores and we try to do 371*38fd1498Szrj intra/inter loop iteration dead store elimination. */ 372*38fd1498Szrj bool eliminate_store_p; 373*38fd1498Szrj 374*38fd1498Szrj /* Next component in the list. */ 375*38fd1498Szrj struct component *next; 376*38fd1498Szrj }; 377*38fd1498Szrj 378*38fd1498Szrj /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */ 379*38fd1498Szrj 380*38fd1498Szrj static bitmap looparound_phis; 381*38fd1498Szrj 382*38fd1498Szrj /* Cache used by tree_to_aff_combination_expand. */ 383*38fd1498Szrj 384*38fd1498Szrj static hash_map<tree, name_expansion *> *name_expansions; 385*38fd1498Szrj 386*38fd1498Szrj /* Dumps data reference REF to FILE. */ 387*38fd1498Szrj 388*38fd1498Szrj extern void dump_dref (FILE *, dref); 389*38fd1498Szrj void 390*38fd1498Szrj dump_dref (FILE *file, dref ref) 391*38fd1498Szrj { 392*38fd1498Szrj if (ref->ref) 393*38fd1498Szrj { 394*38fd1498Szrj fprintf (file, " "); 395*38fd1498Szrj print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM); 396*38fd1498Szrj fprintf (file, " (id %u%s)\n", ref->pos, 397*38fd1498Szrj DR_IS_READ (ref->ref) ? "" : ", write"); 398*38fd1498Szrj 399*38fd1498Szrj fprintf (file, " offset "); 400*38fd1498Szrj print_decs (ref->offset, file); 401*38fd1498Szrj fprintf (file, "\n"); 402*38fd1498Szrj 403*38fd1498Szrj fprintf (file, " distance %u\n", ref->distance); 404*38fd1498Szrj } 405*38fd1498Szrj else 406*38fd1498Szrj { 407*38fd1498Szrj if (gimple_code (ref->stmt) == GIMPLE_PHI) 408*38fd1498Szrj fprintf (file, " looparound ref\n"); 409*38fd1498Szrj else 410*38fd1498Szrj fprintf (file, " combination ref\n"); 411*38fd1498Szrj fprintf (file, " in statement "); 412*38fd1498Szrj print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM); 413*38fd1498Szrj fprintf (file, "\n"); 414*38fd1498Szrj fprintf (file, " distance %u\n", ref->distance); 415*38fd1498Szrj } 416*38fd1498Szrj 417*38fd1498Szrj } 418*38fd1498Szrj 419*38fd1498Szrj /* Dumps CHAIN to FILE. */ 420*38fd1498Szrj 421*38fd1498Szrj extern void dump_chain (FILE *, chain_p); 422*38fd1498Szrj void 423*38fd1498Szrj dump_chain (FILE *file, chain_p chain) 424*38fd1498Szrj { 425*38fd1498Szrj dref a; 426*38fd1498Szrj const char *chain_type; 427*38fd1498Szrj unsigned i; 428*38fd1498Szrj tree var; 429*38fd1498Szrj 430*38fd1498Szrj switch (chain->type) 431*38fd1498Szrj { 432*38fd1498Szrj case CT_INVARIANT: 433*38fd1498Szrj chain_type = "Load motion"; 434*38fd1498Szrj break; 435*38fd1498Szrj 436*38fd1498Szrj case CT_LOAD: 437*38fd1498Szrj chain_type = "Loads-only"; 438*38fd1498Szrj break; 439*38fd1498Szrj 440*38fd1498Szrj case CT_STORE_LOAD: 441*38fd1498Szrj chain_type = "Store-loads"; 442*38fd1498Szrj break; 443*38fd1498Szrj 444*38fd1498Szrj case CT_STORE_STORE: 445*38fd1498Szrj chain_type = "Store-stores"; 446*38fd1498Szrj break; 447*38fd1498Szrj 448*38fd1498Szrj case CT_COMBINATION: 449*38fd1498Szrj chain_type = "Combination"; 450*38fd1498Szrj break; 451*38fd1498Szrj 452*38fd1498Szrj default: 453*38fd1498Szrj gcc_unreachable (); 454*38fd1498Szrj } 455*38fd1498Szrj 456*38fd1498Szrj fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain, 457*38fd1498Szrj chain->combined ? " (combined)" : ""); 458*38fd1498Szrj if (chain->type != CT_INVARIANT) 459*38fd1498Szrj fprintf (file, " max distance %u%s\n", chain->length, 460*38fd1498Szrj chain->has_max_use_after ? "" : ", may reuse first"); 461*38fd1498Szrj 462*38fd1498Szrj if (chain->type == CT_COMBINATION) 463*38fd1498Szrj { 464*38fd1498Szrj fprintf (file, " equal to %p %s %p in type ", 465*38fd1498Szrj (void *) chain->ch1, op_symbol_code (chain->op), 466*38fd1498Szrj (void *) chain->ch2); 467*38fd1498Szrj print_generic_expr (file, chain->rslt_type, TDF_SLIM); 468*38fd1498Szrj fprintf (file, "\n"); 469*38fd1498Szrj } 470*38fd1498Szrj 471*38fd1498Szrj if (chain->vars.exists ()) 472*38fd1498Szrj { 473*38fd1498Szrj fprintf (file, " vars"); 474*38fd1498Szrj FOR_EACH_VEC_ELT (chain->vars, i, var) 475*38fd1498Szrj { 476*38fd1498Szrj fprintf (file, " "); 477*38fd1498Szrj print_generic_expr (file, var, TDF_SLIM); 478*38fd1498Szrj } 479*38fd1498Szrj fprintf (file, "\n"); 480*38fd1498Szrj } 481*38fd1498Szrj 482*38fd1498Szrj if (chain->inits.exists ()) 483*38fd1498Szrj { 484*38fd1498Szrj fprintf (file, " inits"); 485*38fd1498Szrj FOR_EACH_VEC_ELT (chain->inits, i, var) 486*38fd1498Szrj { 487*38fd1498Szrj fprintf (file, " "); 488*38fd1498Szrj print_generic_expr (file, var, TDF_SLIM); 489*38fd1498Szrj } 490*38fd1498Szrj fprintf (file, "\n"); 491*38fd1498Szrj } 492*38fd1498Szrj 493*38fd1498Szrj fprintf (file, " references:\n"); 494*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, a) 495*38fd1498Szrj dump_dref (file, a); 496*38fd1498Szrj 497*38fd1498Szrj fprintf (file, "\n"); 498*38fd1498Szrj } 499*38fd1498Szrj 500*38fd1498Szrj /* Dumps CHAINS to FILE. */ 501*38fd1498Szrj 502*38fd1498Szrj extern void dump_chains (FILE *, vec<chain_p> ); 503*38fd1498Szrj void 504*38fd1498Szrj dump_chains (FILE *file, vec<chain_p> chains) 505*38fd1498Szrj { 506*38fd1498Szrj chain_p chain; 507*38fd1498Szrj unsigned i; 508*38fd1498Szrj 509*38fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain) 510*38fd1498Szrj dump_chain (file, chain); 511*38fd1498Szrj } 512*38fd1498Szrj 513*38fd1498Szrj /* Dumps COMP to FILE. */ 514*38fd1498Szrj 515*38fd1498Szrj extern void dump_component (FILE *, struct component *); 516*38fd1498Szrj void 517*38fd1498Szrj dump_component (FILE *file, struct component *comp) 518*38fd1498Szrj { 519*38fd1498Szrj dref a; 520*38fd1498Szrj unsigned i; 521*38fd1498Szrj 522*38fd1498Szrj fprintf (file, "Component%s:\n", 523*38fd1498Szrj comp->comp_step == RS_INVARIANT ? " (invariant)" : ""); 524*38fd1498Szrj FOR_EACH_VEC_ELT (comp->refs, i, a) 525*38fd1498Szrj dump_dref (file, a); 526*38fd1498Szrj fprintf (file, "\n"); 527*38fd1498Szrj } 528*38fd1498Szrj 529*38fd1498Szrj /* Dumps COMPS to FILE. */ 530*38fd1498Szrj 531*38fd1498Szrj extern void dump_components (FILE *, struct component *); 532*38fd1498Szrj void 533*38fd1498Szrj dump_components (FILE *file, struct component *comps) 534*38fd1498Szrj { 535*38fd1498Szrj struct component *comp; 536*38fd1498Szrj 537*38fd1498Szrj for (comp = comps; comp; comp = comp->next) 538*38fd1498Szrj dump_component (file, comp); 539*38fd1498Szrj } 540*38fd1498Szrj 541*38fd1498Szrj /* Frees a chain CHAIN. */ 542*38fd1498Szrj 543*38fd1498Szrj static void 544*38fd1498Szrj release_chain (chain_p chain) 545*38fd1498Szrj { 546*38fd1498Szrj dref ref; 547*38fd1498Szrj unsigned i; 548*38fd1498Szrj 549*38fd1498Szrj if (chain == NULL) 550*38fd1498Szrj return; 551*38fd1498Szrj 552*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, ref) 553*38fd1498Szrj free (ref); 554*38fd1498Szrj 555*38fd1498Szrj chain->refs.release (); 556*38fd1498Szrj chain->vars.release (); 557*38fd1498Szrj chain->inits.release (); 558*38fd1498Szrj if (chain->init_seq) 559*38fd1498Szrj gimple_seq_discard (chain->init_seq); 560*38fd1498Szrj 561*38fd1498Szrj chain->finis.release (); 562*38fd1498Szrj if (chain->fini_seq) 563*38fd1498Szrj gimple_seq_discard (chain->fini_seq); 564*38fd1498Szrj 565*38fd1498Szrj free (chain); 566*38fd1498Szrj } 567*38fd1498Szrj 568*38fd1498Szrj /* Frees CHAINS. */ 569*38fd1498Szrj 570*38fd1498Szrj static void 571*38fd1498Szrj release_chains (vec<chain_p> chains) 572*38fd1498Szrj { 573*38fd1498Szrj unsigned i; 574*38fd1498Szrj chain_p chain; 575*38fd1498Szrj 576*38fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain) 577*38fd1498Szrj release_chain (chain); 578*38fd1498Szrj chains.release (); 579*38fd1498Szrj } 580*38fd1498Szrj 581*38fd1498Szrj /* Frees a component COMP. */ 582*38fd1498Szrj 583*38fd1498Szrj static void 584*38fd1498Szrj release_component (struct component *comp) 585*38fd1498Szrj { 586*38fd1498Szrj comp->refs.release (); 587*38fd1498Szrj free (comp); 588*38fd1498Szrj } 589*38fd1498Szrj 590*38fd1498Szrj /* Frees list of components COMPS. */ 591*38fd1498Szrj 592*38fd1498Szrj static void 593*38fd1498Szrj release_components (struct component *comps) 594*38fd1498Szrj { 595*38fd1498Szrj struct component *act, *next; 596*38fd1498Szrj 597*38fd1498Szrj for (act = comps; act; act = next) 598*38fd1498Szrj { 599*38fd1498Szrj next = act->next; 600*38fd1498Szrj release_component (act); 601*38fd1498Szrj } 602*38fd1498Szrj } 603*38fd1498Szrj 604*38fd1498Szrj /* Finds a root of tree given by FATHERS containing A, and performs path 605*38fd1498Szrj shortening. */ 606*38fd1498Szrj 607*38fd1498Szrj static unsigned 608*38fd1498Szrj component_of (unsigned fathers[], unsigned a) 609*38fd1498Szrj { 610*38fd1498Szrj unsigned root, n; 611*38fd1498Szrj 612*38fd1498Szrj for (root = a; root != fathers[root]; root = fathers[root]) 613*38fd1498Szrj continue; 614*38fd1498Szrj 615*38fd1498Szrj for (; a != root; a = n) 616*38fd1498Szrj { 617*38fd1498Szrj n = fathers[a]; 618*38fd1498Szrj fathers[a] = root; 619*38fd1498Szrj } 620*38fd1498Szrj 621*38fd1498Szrj return root; 622*38fd1498Szrj } 623*38fd1498Szrj 624*38fd1498Szrj /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the 625*38fd1498Szrj components, A and B are components to merge. */ 626*38fd1498Szrj 627*38fd1498Szrj static void 628*38fd1498Szrj merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b) 629*38fd1498Szrj { 630*38fd1498Szrj unsigned ca = component_of (fathers, a); 631*38fd1498Szrj unsigned cb = component_of (fathers, b); 632*38fd1498Szrj 633*38fd1498Szrj if (ca == cb) 634*38fd1498Szrj return; 635*38fd1498Szrj 636*38fd1498Szrj if (sizes[ca] < sizes[cb]) 637*38fd1498Szrj { 638*38fd1498Szrj sizes[cb] += sizes[ca]; 639*38fd1498Szrj fathers[ca] = cb; 640*38fd1498Szrj } 641*38fd1498Szrj else 642*38fd1498Szrj { 643*38fd1498Szrj sizes[ca] += sizes[cb]; 644*38fd1498Szrj fathers[cb] = ca; 645*38fd1498Szrj } 646*38fd1498Szrj } 647*38fd1498Szrj 648*38fd1498Szrj /* Returns true if A is a reference that is suitable for predictive commoning 649*38fd1498Szrj in the innermost loop that contains it. REF_STEP is set according to the 650*38fd1498Szrj step of the reference A. */ 651*38fd1498Szrj 652*38fd1498Szrj static bool 653*38fd1498Szrj suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step) 654*38fd1498Szrj { 655*38fd1498Szrj tree ref = DR_REF (a), step = DR_STEP (a); 656*38fd1498Szrj 657*38fd1498Szrj if (!step 658*38fd1498Szrj || TREE_THIS_VOLATILE (ref) 659*38fd1498Szrj || !is_gimple_reg_type (TREE_TYPE (ref)) 660*38fd1498Szrj || tree_could_throw_p (ref)) 661*38fd1498Szrj return false; 662*38fd1498Szrj 663*38fd1498Szrj if (integer_zerop (step)) 664*38fd1498Szrj *ref_step = RS_INVARIANT; 665*38fd1498Szrj else if (integer_nonzerop (step)) 666*38fd1498Szrj *ref_step = RS_NONZERO; 667*38fd1498Szrj else 668*38fd1498Szrj *ref_step = RS_ANY; 669*38fd1498Szrj 670*38fd1498Szrj return true; 671*38fd1498Szrj } 672*38fd1498Szrj 673*38fd1498Szrj /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */ 674*38fd1498Szrj 675*38fd1498Szrj static void 676*38fd1498Szrj aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset) 677*38fd1498Szrj { 678*38fd1498Szrj tree type = TREE_TYPE (DR_OFFSET (dr)); 679*38fd1498Szrj aff_tree delta; 680*38fd1498Szrj 681*38fd1498Szrj tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, 682*38fd1498Szrj &name_expansions); 683*38fd1498Szrj aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr))); 684*38fd1498Szrj aff_combination_add (offset, &delta); 685*38fd1498Szrj } 686*38fd1498Szrj 687*38fd1498Szrj /* Determines number of iterations of the innermost enclosing loop before B 688*38fd1498Szrj refers to exactly the same location as A and stores it to OFF. If A and 689*38fd1498Szrj B do not have the same step, they never meet, or anything else fails, 690*38fd1498Szrj returns false, otherwise returns true. Both A and B are assumed to 691*38fd1498Szrj satisfy suitable_reference_p. */ 692*38fd1498Szrj 693*38fd1498Szrj static bool 694*38fd1498Szrj determine_offset (struct data_reference *a, struct data_reference *b, 695*38fd1498Szrj poly_widest_int *off) 696*38fd1498Szrj { 697*38fd1498Szrj aff_tree diff, baseb, step; 698*38fd1498Szrj tree typea, typeb; 699*38fd1498Szrj 700*38fd1498Szrj /* Check that both the references access the location in the same type. */ 701*38fd1498Szrj typea = TREE_TYPE (DR_REF (a)); 702*38fd1498Szrj typeb = TREE_TYPE (DR_REF (b)); 703*38fd1498Szrj if (!useless_type_conversion_p (typeb, typea)) 704*38fd1498Szrj return false; 705*38fd1498Szrj 706*38fd1498Szrj /* Check whether the base address and the step of both references is the 707*38fd1498Szrj same. */ 708*38fd1498Szrj if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0) 709*38fd1498Szrj || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0)) 710*38fd1498Szrj return false; 711*38fd1498Szrj 712*38fd1498Szrj if (integer_zerop (DR_STEP (a))) 713*38fd1498Szrj { 714*38fd1498Szrj /* If the references have loop invariant address, check that they access 715*38fd1498Szrj exactly the same location. */ 716*38fd1498Szrj *off = 0; 717*38fd1498Szrj return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0) 718*38fd1498Szrj && operand_equal_p (DR_INIT (a), DR_INIT (b), 0)); 719*38fd1498Szrj } 720*38fd1498Szrj 721*38fd1498Szrj /* Compare the offsets of the addresses, and check whether the difference 722*38fd1498Szrj is a multiple of step. */ 723*38fd1498Szrj aff_combination_dr_offset (a, &diff); 724*38fd1498Szrj aff_combination_dr_offset (b, &baseb); 725*38fd1498Szrj aff_combination_scale (&baseb, -1); 726*38fd1498Szrj aff_combination_add (&diff, &baseb); 727*38fd1498Szrj 728*38fd1498Szrj tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)), 729*38fd1498Szrj &step, &name_expansions); 730*38fd1498Szrj return aff_combination_constant_multiple_p (&diff, &step, off); 731*38fd1498Szrj } 732*38fd1498Szrj 733*38fd1498Szrj /* Returns the last basic block in LOOP for that we are sure that 734*38fd1498Szrj it is executed whenever the loop is entered. */ 735*38fd1498Szrj 736*38fd1498Szrj static basic_block 737*38fd1498Szrj last_always_executed_block (struct loop *loop) 738*38fd1498Szrj { 739*38fd1498Szrj unsigned i; 740*38fd1498Szrj vec<edge> exits = get_loop_exit_edges (loop); 741*38fd1498Szrj edge ex; 742*38fd1498Szrj basic_block last = loop->latch; 743*38fd1498Szrj 744*38fd1498Szrj FOR_EACH_VEC_ELT (exits, i, ex) 745*38fd1498Szrj last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src); 746*38fd1498Szrj exits.release (); 747*38fd1498Szrj 748*38fd1498Szrj return last; 749*38fd1498Szrj } 750*38fd1498Szrj 751*38fd1498Szrj /* Splits dependence graph on DATAREFS described by DEPENDS to components. */ 752*38fd1498Szrj 753*38fd1498Szrj static struct component * 754*38fd1498Szrj split_data_refs_to_components (struct loop *loop, 755*38fd1498Szrj vec<data_reference_p> datarefs, 756*38fd1498Szrj vec<ddr_p> depends) 757*38fd1498Szrj { 758*38fd1498Szrj unsigned i, n = datarefs.length (); 759*38fd1498Szrj unsigned ca, ia, ib, bad; 760*38fd1498Szrj unsigned *comp_father = XNEWVEC (unsigned, n + 1); 761*38fd1498Szrj unsigned *comp_size = XNEWVEC (unsigned, n + 1); 762*38fd1498Szrj struct component **comps; 763*38fd1498Szrj struct data_reference *dr, *dra, *drb; 764*38fd1498Szrj struct data_dependence_relation *ddr; 765*38fd1498Szrj struct component *comp_list = NULL, *comp; 766*38fd1498Szrj dref dataref; 767*38fd1498Szrj /* Don't do store elimination if loop has multiple exit edges. */ 768*38fd1498Szrj bool eliminate_store_p = single_exit (loop) != NULL; 769*38fd1498Szrj basic_block last_always_executed = last_always_executed_block (loop); 770*38fd1498Szrj 771*38fd1498Szrj FOR_EACH_VEC_ELT (datarefs, i, dr) 772*38fd1498Szrj { 773*38fd1498Szrj if (!DR_REF (dr)) 774*38fd1498Szrj { 775*38fd1498Szrj /* A fake reference for call or asm_expr that may clobber memory; 776*38fd1498Szrj just fail. */ 777*38fd1498Szrj goto end; 778*38fd1498Szrj } 779*38fd1498Szrj /* predcom pass isn't prepared to handle calls with data references. */ 780*38fd1498Szrj if (is_gimple_call (DR_STMT (dr))) 781*38fd1498Szrj goto end; 782*38fd1498Szrj dr->aux = (void *) (size_t) i; 783*38fd1498Szrj comp_father[i] = i; 784*38fd1498Szrj comp_size[i] = 1; 785*38fd1498Szrj } 786*38fd1498Szrj 787*38fd1498Szrj /* A component reserved for the "bad" data references. */ 788*38fd1498Szrj comp_father[n] = n; 789*38fd1498Szrj comp_size[n] = 1; 790*38fd1498Szrj 791*38fd1498Szrj FOR_EACH_VEC_ELT (datarefs, i, dr) 792*38fd1498Szrj { 793*38fd1498Szrj enum ref_step_type dummy; 794*38fd1498Szrj 795*38fd1498Szrj if (!suitable_reference_p (dr, &dummy)) 796*38fd1498Szrj { 797*38fd1498Szrj ia = (unsigned) (size_t) dr->aux; 798*38fd1498Szrj merge_comps (comp_father, comp_size, n, ia); 799*38fd1498Szrj } 800*38fd1498Szrj } 801*38fd1498Szrj 802*38fd1498Szrj FOR_EACH_VEC_ELT (depends, i, ddr) 803*38fd1498Szrj { 804*38fd1498Szrj poly_widest_int dummy_off; 805*38fd1498Szrj 806*38fd1498Szrj if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 807*38fd1498Szrj continue; 808*38fd1498Szrj 809*38fd1498Szrj dra = DDR_A (ddr); 810*38fd1498Szrj drb = DDR_B (ddr); 811*38fd1498Szrj 812*38fd1498Szrj /* Don't do store elimination if there is any unknown dependence for 813*38fd1498Szrj any store data reference. */ 814*38fd1498Szrj if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb)) 815*38fd1498Szrj && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know 816*38fd1498Szrj || DDR_NUM_DIST_VECTS (ddr) == 0)) 817*38fd1498Szrj eliminate_store_p = false; 818*38fd1498Szrj 819*38fd1498Szrj ia = component_of (comp_father, (unsigned) (size_t) dra->aux); 820*38fd1498Szrj ib = component_of (comp_father, (unsigned) (size_t) drb->aux); 821*38fd1498Szrj if (ia == ib) 822*38fd1498Szrj continue; 823*38fd1498Szrj 824*38fd1498Szrj bad = component_of (comp_father, n); 825*38fd1498Szrj 826*38fd1498Szrj /* If both A and B are reads, we may ignore unsuitable dependences. */ 827*38fd1498Szrj if (DR_IS_READ (dra) && DR_IS_READ (drb)) 828*38fd1498Szrj { 829*38fd1498Szrj if (ia == bad || ib == bad 830*38fd1498Szrj || !determine_offset (dra, drb, &dummy_off)) 831*38fd1498Szrj continue; 832*38fd1498Szrj } 833*38fd1498Szrj /* If A is read and B write or vice versa and there is unsuitable 834*38fd1498Szrj dependence, instead of merging both components into a component 835*38fd1498Szrj that will certainly not pass suitable_component_p, just put the 836*38fd1498Szrj read into bad component, perhaps at least the write together with 837*38fd1498Szrj all the other data refs in it's component will be optimizable. */ 838*38fd1498Szrj else if (DR_IS_READ (dra) && ib != bad) 839*38fd1498Szrj { 840*38fd1498Szrj if (ia == bad) 841*38fd1498Szrj continue; 842*38fd1498Szrj else if (!determine_offset (dra, drb, &dummy_off)) 843*38fd1498Szrj { 844*38fd1498Szrj merge_comps (comp_father, comp_size, bad, ia); 845*38fd1498Szrj continue; 846*38fd1498Szrj } 847*38fd1498Szrj } 848*38fd1498Szrj else if (DR_IS_READ (drb) && ia != bad) 849*38fd1498Szrj { 850*38fd1498Szrj if (ib == bad) 851*38fd1498Szrj continue; 852*38fd1498Szrj else if (!determine_offset (dra, drb, &dummy_off)) 853*38fd1498Szrj { 854*38fd1498Szrj merge_comps (comp_father, comp_size, bad, ib); 855*38fd1498Szrj continue; 856*38fd1498Szrj } 857*38fd1498Szrj } 858*38fd1498Szrj else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb) 859*38fd1498Szrj && ia != bad && ib != bad 860*38fd1498Szrj && !determine_offset (dra, drb, &dummy_off)) 861*38fd1498Szrj { 862*38fd1498Szrj merge_comps (comp_father, comp_size, bad, ia); 863*38fd1498Szrj merge_comps (comp_father, comp_size, bad, ib); 864*38fd1498Szrj continue; 865*38fd1498Szrj } 866*38fd1498Szrj 867*38fd1498Szrj merge_comps (comp_father, comp_size, ia, ib); 868*38fd1498Szrj } 869*38fd1498Szrj 870*38fd1498Szrj if (eliminate_store_p) 871*38fd1498Szrj { 872*38fd1498Szrj tree niters = number_of_latch_executions (loop); 873*38fd1498Szrj 874*38fd1498Szrj /* Don't do store elimination if niters info is unknown because stores 875*38fd1498Szrj in the last iteration can't be eliminated and we need to recover it 876*38fd1498Szrj after loop. */ 877*38fd1498Szrj eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know); 878*38fd1498Szrj } 879*38fd1498Szrj 880*38fd1498Szrj comps = XCNEWVEC (struct component *, n); 881*38fd1498Szrj bad = component_of (comp_father, n); 882*38fd1498Szrj FOR_EACH_VEC_ELT (datarefs, i, dr) 883*38fd1498Szrj { 884*38fd1498Szrj ia = (unsigned) (size_t) dr->aux; 885*38fd1498Szrj ca = component_of (comp_father, ia); 886*38fd1498Szrj if (ca == bad) 887*38fd1498Szrj continue; 888*38fd1498Szrj 889*38fd1498Szrj comp = comps[ca]; 890*38fd1498Szrj if (!comp) 891*38fd1498Szrj { 892*38fd1498Szrj comp = XCNEW (struct component); 893*38fd1498Szrj comp->refs.create (comp_size[ca]); 894*38fd1498Szrj comp->eliminate_store_p = eliminate_store_p; 895*38fd1498Szrj comps[ca] = comp; 896*38fd1498Szrj } 897*38fd1498Szrj 898*38fd1498Szrj dataref = XCNEW (struct dref_d); 899*38fd1498Szrj dataref->ref = dr; 900*38fd1498Szrj dataref->stmt = DR_STMT (dr); 901*38fd1498Szrj dataref->offset = 0; 902*38fd1498Szrj dataref->distance = 0; 903*38fd1498Szrj 904*38fd1498Szrj dataref->always_accessed 905*38fd1498Szrj = dominated_by_p (CDI_DOMINATORS, last_always_executed, 906*38fd1498Szrj gimple_bb (dataref->stmt)); 907*38fd1498Szrj dataref->pos = comp->refs.length (); 908*38fd1498Szrj comp->refs.quick_push (dataref); 909*38fd1498Szrj } 910*38fd1498Szrj 911*38fd1498Szrj for (i = 0; i < n; i++) 912*38fd1498Szrj { 913*38fd1498Szrj comp = comps[i]; 914*38fd1498Szrj if (comp) 915*38fd1498Szrj { 916*38fd1498Szrj comp->next = comp_list; 917*38fd1498Szrj comp_list = comp; 918*38fd1498Szrj } 919*38fd1498Szrj } 920*38fd1498Szrj free (comps); 921*38fd1498Szrj 922*38fd1498Szrj end: 923*38fd1498Szrj free (comp_father); 924*38fd1498Szrj free (comp_size); 925*38fd1498Szrj return comp_list; 926*38fd1498Szrj } 927*38fd1498Szrj 928*38fd1498Szrj /* Returns true if the component COMP satisfies the conditions 929*38fd1498Szrj described in 2) at the beginning of this file. LOOP is the current 930*38fd1498Szrj loop. */ 931*38fd1498Szrj 932*38fd1498Szrj static bool 933*38fd1498Szrj suitable_component_p (struct loop *loop, struct component *comp) 934*38fd1498Szrj { 935*38fd1498Szrj unsigned i; 936*38fd1498Szrj dref a, first; 937*38fd1498Szrj basic_block ba, bp = loop->header; 938*38fd1498Szrj bool ok, has_write = false; 939*38fd1498Szrj 940*38fd1498Szrj FOR_EACH_VEC_ELT (comp->refs, i, a) 941*38fd1498Szrj { 942*38fd1498Szrj ba = gimple_bb (a->stmt); 943*38fd1498Szrj 944*38fd1498Szrj if (!just_once_each_iteration_p (loop, ba)) 945*38fd1498Szrj return false; 946*38fd1498Szrj 947*38fd1498Szrj gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp)); 948*38fd1498Szrj bp = ba; 949*38fd1498Szrj 950*38fd1498Szrj if (DR_IS_WRITE (a->ref)) 951*38fd1498Szrj has_write = true; 952*38fd1498Szrj } 953*38fd1498Szrj 954*38fd1498Szrj first = comp->refs[0]; 955*38fd1498Szrj ok = suitable_reference_p (first->ref, &comp->comp_step); 956*38fd1498Szrj gcc_assert (ok); 957*38fd1498Szrj first->offset = 0; 958*38fd1498Szrj 959*38fd1498Szrj for (i = 1; comp->refs.iterate (i, &a); i++) 960*38fd1498Szrj { 961*38fd1498Szrj /* Polynomial offsets are no use, since we need to know the 962*38fd1498Szrj gap between iteration numbers at compile time. */ 963*38fd1498Szrj poly_widest_int offset; 964*38fd1498Szrj if (!determine_offset (first->ref, a->ref, &offset) 965*38fd1498Szrj || !offset.is_constant (&a->offset)) 966*38fd1498Szrj return false; 967*38fd1498Szrj 968*38fd1498Szrj enum ref_step_type a_step; 969*38fd1498Szrj gcc_checking_assert (suitable_reference_p (a->ref, &a_step) 970*38fd1498Szrj && a_step == comp->comp_step); 971*38fd1498Szrj } 972*38fd1498Szrj 973*38fd1498Szrj /* If there is a write inside the component, we must know whether the 974*38fd1498Szrj step is nonzero or not -- we would not otherwise be able to recognize 975*38fd1498Szrj whether the value accessed by reads comes from the OFFSET-th iteration 976*38fd1498Szrj or the previous one. */ 977*38fd1498Szrj if (has_write && comp->comp_step == RS_ANY) 978*38fd1498Szrj return false; 979*38fd1498Szrj 980*38fd1498Szrj return true; 981*38fd1498Szrj } 982*38fd1498Szrj 983*38fd1498Szrj /* Check the conditions on references inside each of components COMPS, 984*38fd1498Szrj and remove the unsuitable components from the list. The new list 985*38fd1498Szrj of components is returned. The conditions are described in 2) at 986*38fd1498Szrj the beginning of this file. LOOP is the current loop. */ 987*38fd1498Szrj 988*38fd1498Szrj static struct component * 989*38fd1498Szrj filter_suitable_components (struct loop *loop, struct component *comps) 990*38fd1498Szrj { 991*38fd1498Szrj struct component **comp, *act; 992*38fd1498Szrj 993*38fd1498Szrj for (comp = &comps; *comp; ) 994*38fd1498Szrj { 995*38fd1498Szrj act = *comp; 996*38fd1498Szrj if (suitable_component_p (loop, act)) 997*38fd1498Szrj comp = &act->next; 998*38fd1498Szrj else 999*38fd1498Szrj { 1000*38fd1498Szrj dref ref; 1001*38fd1498Szrj unsigned i; 1002*38fd1498Szrj 1003*38fd1498Szrj *comp = act->next; 1004*38fd1498Szrj FOR_EACH_VEC_ELT (act->refs, i, ref) 1005*38fd1498Szrj free (ref); 1006*38fd1498Szrj release_component (act); 1007*38fd1498Szrj } 1008*38fd1498Szrj } 1009*38fd1498Szrj 1010*38fd1498Szrj return comps; 1011*38fd1498Szrj } 1012*38fd1498Szrj 1013*38fd1498Szrj /* Compares two drefs A and B by their offset and position. Callback for 1014*38fd1498Szrj qsort. */ 1015*38fd1498Szrj 1016*38fd1498Szrj static int 1017*38fd1498Szrj order_drefs (const void *a, const void *b) 1018*38fd1498Szrj { 1019*38fd1498Szrj const dref *const da = (const dref *) a; 1020*38fd1498Szrj const dref *const db = (const dref *) b; 1021*38fd1498Szrj int offcmp = wi::cmps ((*da)->offset, (*db)->offset); 1022*38fd1498Szrj 1023*38fd1498Szrj if (offcmp != 0) 1024*38fd1498Szrj return offcmp; 1025*38fd1498Szrj 1026*38fd1498Szrj return (*da)->pos - (*db)->pos; 1027*38fd1498Szrj } 1028*38fd1498Szrj 1029*38fd1498Szrj /* Compares two drefs A and B by their position. Callback for qsort. */ 1030*38fd1498Szrj 1031*38fd1498Szrj static int 1032*38fd1498Szrj order_drefs_by_pos (const void *a, const void *b) 1033*38fd1498Szrj { 1034*38fd1498Szrj const dref *const da = (const dref *) a; 1035*38fd1498Szrj const dref *const db = (const dref *) b; 1036*38fd1498Szrj 1037*38fd1498Szrj return (*da)->pos - (*db)->pos; 1038*38fd1498Szrj } 1039*38fd1498Szrj 1040*38fd1498Szrj /* Returns root of the CHAIN. */ 1041*38fd1498Szrj 1042*38fd1498Szrj static inline dref 1043*38fd1498Szrj get_chain_root (chain_p chain) 1044*38fd1498Szrj { 1045*38fd1498Szrj return chain->refs[0]; 1046*38fd1498Szrj } 1047*38fd1498Szrj 1048*38fd1498Szrj /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't 1049*38fd1498Szrj exist. */ 1050*38fd1498Szrj 1051*38fd1498Szrj static inline dref 1052*38fd1498Szrj get_chain_last_write_at (chain_p chain, unsigned distance) 1053*38fd1498Szrj { 1054*38fd1498Szrj for (unsigned i = chain->refs.length (); i > 0; i--) 1055*38fd1498Szrj if (DR_IS_WRITE (chain->refs[i - 1]->ref) 1056*38fd1498Szrj && distance == chain->refs[i - 1]->distance) 1057*38fd1498Szrj return chain->refs[i - 1]; 1058*38fd1498Szrj 1059*38fd1498Szrj return NULL; 1060*38fd1498Szrj } 1061*38fd1498Szrj 1062*38fd1498Szrj /* Given CHAIN, returns the last write ref with the same distance before load 1063*38fd1498Szrj at index LOAD_IDX, or NULL if it doesn't exist. */ 1064*38fd1498Szrj 1065*38fd1498Szrj static inline dref 1066*38fd1498Szrj get_chain_last_write_before_load (chain_p chain, unsigned load_idx) 1067*38fd1498Szrj { 1068*38fd1498Szrj gcc_assert (load_idx < chain->refs.length ()); 1069*38fd1498Szrj 1070*38fd1498Szrj unsigned distance = chain->refs[load_idx]->distance; 1071*38fd1498Szrj 1072*38fd1498Szrj for (unsigned i = load_idx; i > 0; i--) 1073*38fd1498Szrj if (DR_IS_WRITE (chain->refs[i - 1]->ref) 1074*38fd1498Szrj && distance == chain->refs[i - 1]->distance) 1075*38fd1498Szrj return chain->refs[i - 1]; 1076*38fd1498Szrj 1077*38fd1498Szrj return NULL; 1078*38fd1498Szrj } 1079*38fd1498Szrj 1080*38fd1498Szrj /* Adds REF to the chain CHAIN. */ 1081*38fd1498Szrj 1082*38fd1498Szrj static void 1083*38fd1498Szrj add_ref_to_chain (chain_p chain, dref ref) 1084*38fd1498Szrj { 1085*38fd1498Szrj dref root = get_chain_root (chain); 1086*38fd1498Szrj 1087*38fd1498Szrj gcc_assert (wi::les_p (root->offset, ref->offset)); 1088*38fd1498Szrj widest_int dist = ref->offset - root->offset; 1089*38fd1498Szrj gcc_assert (wi::fits_uhwi_p (dist)); 1090*38fd1498Szrj 1091*38fd1498Szrj chain->refs.safe_push (ref); 1092*38fd1498Szrj 1093*38fd1498Szrj ref->distance = dist.to_uhwi (); 1094*38fd1498Szrj 1095*38fd1498Szrj if (ref->distance >= chain->length) 1096*38fd1498Szrj { 1097*38fd1498Szrj chain->length = ref->distance; 1098*38fd1498Szrj chain->has_max_use_after = false; 1099*38fd1498Szrj } 1100*38fd1498Szrj 1101*38fd1498Szrj /* Promote this chain to CT_STORE_STORE if it has multiple stores. */ 1102*38fd1498Szrj if (DR_IS_WRITE (ref->ref)) 1103*38fd1498Szrj chain->type = CT_STORE_STORE; 1104*38fd1498Szrj 1105*38fd1498Szrj /* Don't set the flag for store-store chain since there is no use. */ 1106*38fd1498Szrj if (chain->type != CT_STORE_STORE 1107*38fd1498Szrj && ref->distance == chain->length 1108*38fd1498Szrj && ref->pos > root->pos) 1109*38fd1498Szrj chain->has_max_use_after = true; 1110*38fd1498Szrj 1111*38fd1498Szrj chain->all_always_accessed &= ref->always_accessed; 1112*38fd1498Szrj } 1113*38fd1498Szrj 1114*38fd1498Szrj /* Returns the chain for invariant component COMP. */ 1115*38fd1498Szrj 1116*38fd1498Szrj static chain_p 1117*38fd1498Szrj make_invariant_chain (struct component *comp) 1118*38fd1498Szrj { 1119*38fd1498Szrj chain_p chain = XCNEW (struct chain); 1120*38fd1498Szrj unsigned i; 1121*38fd1498Szrj dref ref; 1122*38fd1498Szrj 1123*38fd1498Szrj chain->type = CT_INVARIANT; 1124*38fd1498Szrj 1125*38fd1498Szrj chain->all_always_accessed = true; 1126*38fd1498Szrj 1127*38fd1498Szrj FOR_EACH_VEC_ELT (comp->refs, i, ref) 1128*38fd1498Szrj { 1129*38fd1498Szrj chain->refs.safe_push (ref); 1130*38fd1498Szrj chain->all_always_accessed &= ref->always_accessed; 1131*38fd1498Szrj } 1132*38fd1498Szrj 1133*38fd1498Szrj chain->inits = vNULL; 1134*38fd1498Szrj chain->finis = vNULL; 1135*38fd1498Szrj 1136*38fd1498Szrj return chain; 1137*38fd1498Szrj } 1138*38fd1498Szrj 1139*38fd1498Szrj /* Make a new chain of type TYPE rooted at REF. */ 1140*38fd1498Szrj 1141*38fd1498Szrj static chain_p 1142*38fd1498Szrj make_rooted_chain (dref ref, enum chain_type type) 1143*38fd1498Szrj { 1144*38fd1498Szrj chain_p chain = XCNEW (struct chain); 1145*38fd1498Szrj 1146*38fd1498Szrj chain->type = type; 1147*38fd1498Szrj chain->refs.safe_push (ref); 1148*38fd1498Szrj chain->all_always_accessed = ref->always_accessed; 1149*38fd1498Szrj ref->distance = 0; 1150*38fd1498Szrj 1151*38fd1498Szrj chain->inits = vNULL; 1152*38fd1498Szrj chain->finis = vNULL; 1153*38fd1498Szrj 1154*38fd1498Szrj return chain; 1155*38fd1498Szrj } 1156*38fd1498Szrj 1157*38fd1498Szrj /* Returns true if CHAIN is not trivial. */ 1158*38fd1498Szrj 1159*38fd1498Szrj static bool 1160*38fd1498Szrj nontrivial_chain_p (chain_p chain) 1161*38fd1498Szrj { 1162*38fd1498Szrj return chain != NULL && chain->refs.length () > 1; 1163*38fd1498Szrj } 1164*38fd1498Szrj 1165*38fd1498Szrj /* Returns the ssa name that contains the value of REF, or NULL_TREE if there 1166*38fd1498Szrj is no such name. */ 1167*38fd1498Szrj 1168*38fd1498Szrj static tree 1169*38fd1498Szrj name_for_ref (dref ref) 1170*38fd1498Szrj { 1171*38fd1498Szrj tree name; 1172*38fd1498Szrj 1173*38fd1498Szrj if (is_gimple_assign (ref->stmt)) 1174*38fd1498Szrj { 1175*38fd1498Szrj if (!ref->ref || DR_IS_READ (ref->ref)) 1176*38fd1498Szrj name = gimple_assign_lhs (ref->stmt); 1177*38fd1498Szrj else 1178*38fd1498Szrj name = gimple_assign_rhs1 (ref->stmt); 1179*38fd1498Szrj } 1180*38fd1498Szrj else 1181*38fd1498Szrj name = PHI_RESULT (ref->stmt); 1182*38fd1498Szrj 1183*38fd1498Szrj return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE); 1184*38fd1498Szrj } 1185*38fd1498Szrj 1186*38fd1498Szrj /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in 1187*38fd1498Szrj iterations of the innermost enclosing loop). */ 1188*38fd1498Szrj 1189*38fd1498Szrj static bool 1190*38fd1498Szrj valid_initializer_p (struct data_reference *ref, 1191*38fd1498Szrj unsigned distance, struct data_reference *root) 1192*38fd1498Szrj { 1193*38fd1498Szrj aff_tree diff, base, step; 1194*38fd1498Szrj poly_widest_int off; 1195*38fd1498Szrj 1196*38fd1498Szrj /* Both REF and ROOT must be accessing the same object. */ 1197*38fd1498Szrj if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0)) 1198*38fd1498Szrj return false; 1199*38fd1498Szrj 1200*38fd1498Szrj /* The initializer is defined outside of loop, hence its address must be 1201*38fd1498Szrj invariant inside the loop. */ 1202*38fd1498Szrj gcc_assert (integer_zerop (DR_STEP (ref))); 1203*38fd1498Szrj 1204*38fd1498Szrj /* If the address of the reference is invariant, initializer must access 1205*38fd1498Szrj exactly the same location. */ 1206*38fd1498Szrj if (integer_zerop (DR_STEP (root))) 1207*38fd1498Szrj return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0) 1208*38fd1498Szrj && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0)); 1209*38fd1498Szrj 1210*38fd1498Szrj /* Verify that this index of REF is equal to the root's index at 1211*38fd1498Szrj -DISTANCE-th iteration. */ 1212*38fd1498Szrj aff_combination_dr_offset (root, &diff); 1213*38fd1498Szrj aff_combination_dr_offset (ref, &base); 1214*38fd1498Szrj aff_combination_scale (&base, -1); 1215*38fd1498Szrj aff_combination_add (&diff, &base); 1216*38fd1498Szrj 1217*38fd1498Szrj tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)), 1218*38fd1498Szrj &step, &name_expansions); 1219*38fd1498Szrj if (!aff_combination_constant_multiple_p (&diff, &step, &off)) 1220*38fd1498Szrj return false; 1221*38fd1498Szrj 1222*38fd1498Szrj if (maybe_ne (off, distance)) 1223*38fd1498Szrj return false; 1224*38fd1498Szrj 1225*38fd1498Szrj return true; 1226*38fd1498Szrj } 1227*38fd1498Szrj 1228*38fd1498Szrj /* Finds looparound phi node of LOOP that copies the value of REF, and if its 1229*38fd1498Szrj initial value is correct (equal to initial value of REF shifted by one 1230*38fd1498Szrj iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT 1231*38fd1498Szrj is the root of the current chain. */ 1232*38fd1498Szrj 1233*38fd1498Szrj static gphi * 1234*38fd1498Szrj find_looparound_phi (struct loop *loop, dref ref, dref root) 1235*38fd1498Szrj { 1236*38fd1498Szrj tree name, init, init_ref; 1237*38fd1498Szrj gphi *phi = NULL; 1238*38fd1498Szrj gimple *init_stmt; 1239*38fd1498Szrj edge latch = loop_latch_edge (loop); 1240*38fd1498Szrj struct data_reference init_dr; 1241*38fd1498Szrj gphi_iterator psi; 1242*38fd1498Szrj 1243*38fd1498Szrj if (is_gimple_assign (ref->stmt)) 1244*38fd1498Szrj { 1245*38fd1498Szrj if (DR_IS_READ (ref->ref)) 1246*38fd1498Szrj name = gimple_assign_lhs (ref->stmt); 1247*38fd1498Szrj else 1248*38fd1498Szrj name = gimple_assign_rhs1 (ref->stmt); 1249*38fd1498Szrj } 1250*38fd1498Szrj else 1251*38fd1498Szrj name = PHI_RESULT (ref->stmt); 1252*38fd1498Szrj if (!name) 1253*38fd1498Szrj return NULL; 1254*38fd1498Szrj 1255*38fd1498Szrj for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1256*38fd1498Szrj { 1257*38fd1498Szrj phi = psi.phi (); 1258*38fd1498Szrj if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name) 1259*38fd1498Szrj break; 1260*38fd1498Szrj } 1261*38fd1498Szrj 1262*38fd1498Szrj if (gsi_end_p (psi)) 1263*38fd1498Szrj return NULL; 1264*38fd1498Szrj 1265*38fd1498Szrj init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1266*38fd1498Szrj if (TREE_CODE (init) != SSA_NAME) 1267*38fd1498Szrj return NULL; 1268*38fd1498Szrj init_stmt = SSA_NAME_DEF_STMT (init); 1269*38fd1498Szrj if (gimple_code (init_stmt) != GIMPLE_ASSIGN) 1270*38fd1498Szrj return NULL; 1271*38fd1498Szrj gcc_assert (gimple_assign_lhs (init_stmt) == init); 1272*38fd1498Szrj 1273*38fd1498Szrj init_ref = gimple_assign_rhs1 (init_stmt); 1274*38fd1498Szrj if (!REFERENCE_CLASS_P (init_ref) 1275*38fd1498Szrj && !DECL_P (init_ref)) 1276*38fd1498Szrj return NULL; 1277*38fd1498Szrj 1278*38fd1498Szrj /* Analyze the behavior of INIT_REF with respect to LOOP (innermost 1279*38fd1498Szrj loop enclosing PHI). */ 1280*38fd1498Szrj memset (&init_dr, 0, sizeof (struct data_reference)); 1281*38fd1498Szrj DR_REF (&init_dr) = init_ref; 1282*38fd1498Szrj DR_STMT (&init_dr) = phi; 1283*38fd1498Szrj if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop)) 1284*38fd1498Szrj return NULL; 1285*38fd1498Szrj 1286*38fd1498Szrj if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) 1287*38fd1498Szrj return NULL; 1288*38fd1498Szrj 1289*38fd1498Szrj return phi; 1290*38fd1498Szrj } 1291*38fd1498Szrj 1292*38fd1498Szrj /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */ 1293*38fd1498Szrj 1294*38fd1498Szrj static void 1295*38fd1498Szrj insert_looparound_copy (chain_p chain, dref ref, gphi *phi) 1296*38fd1498Szrj { 1297*38fd1498Szrj dref nw = XCNEW (struct dref_d), aref; 1298*38fd1498Szrj unsigned i; 1299*38fd1498Szrj 1300*38fd1498Szrj nw->stmt = phi; 1301*38fd1498Szrj nw->distance = ref->distance + 1; 1302*38fd1498Szrj nw->always_accessed = 1; 1303*38fd1498Szrj 1304*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, aref) 1305*38fd1498Szrj if (aref->distance >= nw->distance) 1306*38fd1498Szrj break; 1307*38fd1498Szrj chain->refs.safe_insert (i, nw); 1308*38fd1498Szrj 1309*38fd1498Szrj if (nw->distance > chain->length) 1310*38fd1498Szrj { 1311*38fd1498Szrj chain->length = nw->distance; 1312*38fd1498Szrj chain->has_max_use_after = false; 1313*38fd1498Szrj } 1314*38fd1498Szrj } 1315*38fd1498Szrj 1316*38fd1498Szrj /* For references in CHAIN that are copied around the LOOP (created previously 1317*38fd1498Szrj by PRE, or by user), add the results of such copies to the chain. This 1318*38fd1498Szrj enables us to remove the copies by unrolling, and may need less registers 1319*38fd1498Szrj (also, it may allow us to combine chains together). */ 1320*38fd1498Szrj 1321*38fd1498Szrj static void 1322*38fd1498Szrj add_looparound_copies (struct loop *loop, chain_p chain) 1323*38fd1498Szrj { 1324*38fd1498Szrj unsigned i; 1325*38fd1498Szrj dref ref, root = get_chain_root (chain); 1326*38fd1498Szrj gphi *phi; 1327*38fd1498Szrj 1328*38fd1498Szrj if (chain->type == CT_STORE_STORE) 1329*38fd1498Szrj return; 1330*38fd1498Szrj 1331*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, ref) 1332*38fd1498Szrj { 1333*38fd1498Szrj phi = find_looparound_phi (loop, ref, root); 1334*38fd1498Szrj if (!phi) 1335*38fd1498Szrj continue; 1336*38fd1498Szrj 1337*38fd1498Szrj bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi))); 1338*38fd1498Szrj insert_looparound_copy (chain, ref, phi); 1339*38fd1498Szrj } 1340*38fd1498Szrj } 1341*38fd1498Szrj 1342*38fd1498Szrj /* Find roots of the values and determine distances in the component COMP. 1343*38fd1498Szrj The references are redistributed into CHAINS. LOOP is the current 1344*38fd1498Szrj loop. */ 1345*38fd1498Szrj 1346*38fd1498Szrj static void 1347*38fd1498Szrj determine_roots_comp (struct loop *loop, 1348*38fd1498Szrj struct component *comp, 1349*38fd1498Szrj vec<chain_p> *chains) 1350*38fd1498Szrj { 1351*38fd1498Szrj unsigned i; 1352*38fd1498Szrj dref a; 1353*38fd1498Szrj chain_p chain = NULL; 1354*38fd1498Szrj widest_int last_ofs = 0; 1355*38fd1498Szrj enum chain_type type; 1356*38fd1498Szrj 1357*38fd1498Szrj /* Invariants are handled specially. */ 1358*38fd1498Szrj if (comp->comp_step == RS_INVARIANT) 1359*38fd1498Szrj { 1360*38fd1498Szrj chain = make_invariant_chain (comp); 1361*38fd1498Szrj chains->safe_push (chain); 1362*38fd1498Szrj return; 1363*38fd1498Szrj } 1364*38fd1498Szrj 1365*38fd1498Szrj /* Trivial component. */ 1366*38fd1498Szrj if (comp->refs.length () <= 1) 1367*38fd1498Szrj { 1368*38fd1498Szrj if (comp->refs.length () == 1) 1369*38fd1498Szrj { 1370*38fd1498Szrj free (comp->refs[0]); 1371*38fd1498Szrj comp->refs.truncate (0); 1372*38fd1498Szrj } 1373*38fd1498Szrj return; 1374*38fd1498Szrj } 1375*38fd1498Szrj 1376*38fd1498Szrj comp->refs.qsort (order_drefs); 1377*38fd1498Szrj 1378*38fd1498Szrj /* For Store-Store chain, we only support load if it is dominated by a 1379*38fd1498Szrj store statement in the same iteration of loop. */ 1380*38fd1498Szrj if (comp->eliminate_store_p) 1381*38fd1498Szrj for (a = NULL, i = 0; i < comp->refs.length (); i++) 1382*38fd1498Szrj { 1383*38fd1498Szrj if (DR_IS_WRITE (comp->refs[i]->ref)) 1384*38fd1498Szrj a = comp->refs[i]; 1385*38fd1498Szrj else if (a == NULL || a->offset != comp->refs[i]->offset) 1386*38fd1498Szrj { 1387*38fd1498Szrj /* If there is load that is not dominated by a store in the 1388*38fd1498Szrj same iteration of loop, clear the flag so no Store-Store 1389*38fd1498Szrj chain is generated for this component. */ 1390*38fd1498Szrj comp->eliminate_store_p = false; 1391*38fd1498Szrj break; 1392*38fd1498Szrj } 1393*38fd1498Szrj } 1394*38fd1498Szrj 1395*38fd1498Szrj /* Determine roots and create chains for components. */ 1396*38fd1498Szrj FOR_EACH_VEC_ELT (comp->refs, i, a) 1397*38fd1498Szrj { 1398*38fd1498Szrj if (!chain 1399*38fd1498Szrj || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref)) 1400*38fd1498Szrj || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref)) 1401*38fd1498Szrj || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs)) 1402*38fd1498Szrj { 1403*38fd1498Szrj if (nontrivial_chain_p (chain)) 1404*38fd1498Szrj { 1405*38fd1498Szrj add_looparound_copies (loop, chain); 1406*38fd1498Szrj chains->safe_push (chain); 1407*38fd1498Szrj } 1408*38fd1498Szrj else 1409*38fd1498Szrj release_chain (chain); 1410*38fd1498Szrj 1411*38fd1498Szrj /* Determine type of the chain. If the root reference is a load, 1412*38fd1498Szrj this can only be a CT_LOAD chain; other chains are intialized 1413*38fd1498Szrj to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when 1414*38fd1498Szrj new reference is added. */ 1415*38fd1498Szrj type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD; 1416*38fd1498Szrj chain = make_rooted_chain (a, type); 1417*38fd1498Szrj last_ofs = a->offset; 1418*38fd1498Szrj continue; 1419*38fd1498Szrj } 1420*38fd1498Szrj 1421*38fd1498Szrj add_ref_to_chain (chain, a); 1422*38fd1498Szrj } 1423*38fd1498Szrj 1424*38fd1498Szrj if (nontrivial_chain_p (chain)) 1425*38fd1498Szrj { 1426*38fd1498Szrj add_looparound_copies (loop, chain); 1427*38fd1498Szrj chains->safe_push (chain); 1428*38fd1498Szrj } 1429*38fd1498Szrj else 1430*38fd1498Szrj release_chain (chain); 1431*38fd1498Szrj } 1432*38fd1498Szrj 1433*38fd1498Szrj /* Find roots of the values and determine distances in components COMPS, and 1434*38fd1498Szrj separates the references to CHAINS. LOOP is the current loop. */ 1435*38fd1498Szrj 1436*38fd1498Szrj static void 1437*38fd1498Szrj determine_roots (struct loop *loop, 1438*38fd1498Szrj struct component *comps, vec<chain_p> *chains) 1439*38fd1498Szrj { 1440*38fd1498Szrj struct component *comp; 1441*38fd1498Szrj 1442*38fd1498Szrj for (comp = comps; comp; comp = comp->next) 1443*38fd1498Szrj determine_roots_comp (loop, comp, chains); 1444*38fd1498Szrj } 1445*38fd1498Szrj 1446*38fd1498Szrj /* Replace the reference in statement STMT with temporary variable 1447*38fd1498Szrj NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of 1448*38fd1498Szrj the reference in the statement. IN_LHS is true if the reference 1449*38fd1498Szrj is in the lhs of STMT, false if it is in rhs. */ 1450*38fd1498Szrj 1451*38fd1498Szrj static void 1452*38fd1498Szrj replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs) 1453*38fd1498Szrj { 1454*38fd1498Szrj tree val; 1455*38fd1498Szrj gassign *new_stmt; 1456*38fd1498Szrj gimple_stmt_iterator bsi, psi; 1457*38fd1498Szrj 1458*38fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI) 1459*38fd1498Szrj { 1460*38fd1498Szrj gcc_assert (!in_lhs && !set); 1461*38fd1498Szrj 1462*38fd1498Szrj val = PHI_RESULT (stmt); 1463*38fd1498Szrj bsi = gsi_after_labels (gimple_bb (stmt)); 1464*38fd1498Szrj psi = gsi_for_stmt (stmt); 1465*38fd1498Szrj remove_phi_node (&psi, false); 1466*38fd1498Szrj 1467*38fd1498Szrj /* Turn the phi node into GIMPLE_ASSIGN. */ 1468*38fd1498Szrj new_stmt = gimple_build_assign (val, new_tree); 1469*38fd1498Szrj gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT); 1470*38fd1498Szrj return; 1471*38fd1498Szrj } 1472*38fd1498Szrj 1473*38fd1498Szrj /* Since the reference is of gimple_reg type, it should only 1474*38fd1498Szrj appear as lhs or rhs of modify statement. */ 1475*38fd1498Szrj gcc_assert (is_gimple_assign (stmt)); 1476*38fd1498Szrj 1477*38fd1498Szrj bsi = gsi_for_stmt (stmt); 1478*38fd1498Szrj 1479*38fd1498Szrj /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */ 1480*38fd1498Szrj if (!set) 1481*38fd1498Szrj { 1482*38fd1498Szrj gcc_assert (!in_lhs); 1483*38fd1498Szrj gimple_assign_set_rhs_from_tree (&bsi, new_tree); 1484*38fd1498Szrj stmt = gsi_stmt (bsi); 1485*38fd1498Szrj update_stmt (stmt); 1486*38fd1498Szrj return; 1487*38fd1498Szrj } 1488*38fd1498Szrj 1489*38fd1498Szrj if (in_lhs) 1490*38fd1498Szrj { 1491*38fd1498Szrj /* We have statement 1492*38fd1498Szrj 1493*38fd1498Szrj OLD = VAL 1494*38fd1498Szrj 1495*38fd1498Szrj If OLD is a memory reference, then VAL is gimple_val, and we transform 1496*38fd1498Szrj this to 1497*38fd1498Szrj 1498*38fd1498Szrj OLD = VAL 1499*38fd1498Szrj NEW = VAL 1500*38fd1498Szrj 1501*38fd1498Szrj Otherwise, we are replacing a combination chain, 1502*38fd1498Szrj VAL is the expression that performs the combination, and OLD is an 1503*38fd1498Szrj SSA name. In this case, we transform the assignment to 1504*38fd1498Szrj 1505*38fd1498Szrj OLD = VAL 1506*38fd1498Szrj NEW = OLD 1507*38fd1498Szrj 1508*38fd1498Szrj */ 1509*38fd1498Szrj 1510*38fd1498Szrj val = gimple_assign_lhs (stmt); 1511*38fd1498Szrj if (TREE_CODE (val) != SSA_NAME) 1512*38fd1498Szrj { 1513*38fd1498Szrj val = gimple_assign_rhs1 (stmt); 1514*38fd1498Szrj gcc_assert (gimple_assign_single_p (stmt)); 1515*38fd1498Szrj if (TREE_CLOBBER_P (val)) 1516*38fd1498Szrj val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree)); 1517*38fd1498Szrj else 1518*38fd1498Szrj gcc_assert (gimple_assign_copy_p (stmt)); 1519*38fd1498Szrj } 1520*38fd1498Szrj } 1521*38fd1498Szrj else 1522*38fd1498Szrj { 1523*38fd1498Szrj /* VAL = OLD 1524*38fd1498Szrj 1525*38fd1498Szrj is transformed to 1526*38fd1498Szrj 1527*38fd1498Szrj VAL = OLD 1528*38fd1498Szrj NEW = VAL */ 1529*38fd1498Szrj 1530*38fd1498Szrj val = gimple_assign_lhs (stmt); 1531*38fd1498Szrj } 1532*38fd1498Szrj 1533*38fd1498Szrj new_stmt = gimple_build_assign (new_tree, unshare_expr (val)); 1534*38fd1498Szrj gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT); 1535*38fd1498Szrj } 1536*38fd1498Szrj 1537*38fd1498Szrj /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration 1538*38fd1498Szrj of the loop it was analyzed in. Append init stmts to STMTS. */ 1539*38fd1498Szrj 1540*38fd1498Szrj static tree 1541*38fd1498Szrj ref_at_iteration (data_reference_p dr, int iter, 1542*38fd1498Szrj gimple_seq *stmts, tree niters = NULL_TREE) 1543*38fd1498Szrj { 1544*38fd1498Szrj tree off = DR_OFFSET (dr); 1545*38fd1498Szrj tree coff = DR_INIT (dr); 1546*38fd1498Szrj tree ref = DR_REF (dr); 1547*38fd1498Szrj enum tree_code ref_code = ERROR_MARK; 1548*38fd1498Szrj tree ref_type = NULL_TREE; 1549*38fd1498Szrj tree ref_op1 = NULL_TREE; 1550*38fd1498Szrj tree ref_op2 = NULL_TREE; 1551*38fd1498Szrj tree new_offset; 1552*38fd1498Szrj 1553*38fd1498Szrj if (iter != 0) 1554*38fd1498Szrj { 1555*38fd1498Szrj new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)); 1556*38fd1498Szrj if (TREE_CODE (new_offset) == INTEGER_CST) 1557*38fd1498Szrj coff = size_binop (PLUS_EXPR, coff, new_offset); 1558*38fd1498Szrj else 1559*38fd1498Szrj off = size_binop (PLUS_EXPR, off, new_offset); 1560*38fd1498Szrj } 1561*38fd1498Szrj 1562*38fd1498Szrj if (niters != NULL_TREE) 1563*38fd1498Szrj { 1564*38fd1498Szrj niters = fold_convert (ssizetype, niters); 1565*38fd1498Szrj new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters); 1566*38fd1498Szrj if (TREE_CODE (niters) == INTEGER_CST) 1567*38fd1498Szrj coff = size_binop (PLUS_EXPR, coff, new_offset); 1568*38fd1498Szrj else 1569*38fd1498Szrj off = size_binop (PLUS_EXPR, off, new_offset); 1570*38fd1498Szrj } 1571*38fd1498Szrj 1572*38fd1498Szrj /* While data-ref analysis punts on bit offsets it still handles 1573*38fd1498Szrj bitfield accesses at byte boundaries. Cope with that. Note that 1574*38fd1498Szrj if the bitfield object also starts at a byte-boundary we can simply 1575*38fd1498Szrj replicate the COMPONENT_REF, but we have to subtract the component's 1576*38fd1498Szrj byte-offset from the MEM_REF address first. 1577*38fd1498Szrj Otherwise we simply build a BIT_FIELD_REF knowing that the bits 1578*38fd1498Szrj start at offset zero. */ 1579*38fd1498Szrj if (TREE_CODE (ref) == COMPONENT_REF 1580*38fd1498Szrj && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))) 1581*38fd1498Szrj { 1582*38fd1498Szrj unsigned HOST_WIDE_INT boff; 1583*38fd1498Szrj tree field = TREE_OPERAND (ref, 1); 1584*38fd1498Szrj tree offset = component_ref_field_offset (ref); 1585*38fd1498Szrj ref_type = TREE_TYPE (ref); 1586*38fd1498Szrj boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field)); 1587*38fd1498Szrj /* This can occur in Ada. See the comment in get_bit_range. */ 1588*38fd1498Szrj if (boff % BITS_PER_UNIT != 0 1589*38fd1498Szrj || !tree_fits_uhwi_p (offset)) 1590*38fd1498Szrj { 1591*38fd1498Szrj ref_code = BIT_FIELD_REF; 1592*38fd1498Szrj ref_op1 = DECL_SIZE (field); 1593*38fd1498Szrj ref_op2 = bitsize_zero_node; 1594*38fd1498Szrj } 1595*38fd1498Szrj else 1596*38fd1498Szrj { 1597*38fd1498Szrj boff >>= LOG2_BITS_PER_UNIT; 1598*38fd1498Szrj boff += tree_to_uhwi (offset); 1599*38fd1498Szrj coff = size_binop (MINUS_EXPR, coff, ssize_int (boff)); 1600*38fd1498Szrj ref_code = COMPONENT_REF; 1601*38fd1498Szrj ref_op1 = field; 1602*38fd1498Szrj ref_op2 = TREE_OPERAND (ref, 2); 1603*38fd1498Szrj ref = TREE_OPERAND (ref, 0); 1604*38fd1498Szrj } 1605*38fd1498Szrj } 1606*38fd1498Szrj tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off); 1607*38fd1498Szrj addr = force_gimple_operand_1 (unshare_expr (addr), stmts, 1608*38fd1498Szrj is_gimple_mem_ref_addr, NULL_TREE); 1609*38fd1498Szrj tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff); 1610*38fd1498Szrj tree type = build_aligned_type (TREE_TYPE (ref), 1611*38fd1498Szrj get_object_alignment (ref)); 1612*38fd1498Szrj ref = build2 (MEM_REF, type, addr, alias_ptr); 1613*38fd1498Szrj if (ref_type) 1614*38fd1498Szrj ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2); 1615*38fd1498Szrj return ref; 1616*38fd1498Szrj } 1617*38fd1498Szrj 1618*38fd1498Szrj /* Get the initialization expression for the INDEX-th temporary variable 1619*38fd1498Szrj of CHAIN. */ 1620*38fd1498Szrj 1621*38fd1498Szrj static tree 1622*38fd1498Szrj get_init_expr (chain_p chain, unsigned index) 1623*38fd1498Szrj { 1624*38fd1498Szrj if (chain->type == CT_COMBINATION) 1625*38fd1498Szrj { 1626*38fd1498Szrj tree e1 = get_init_expr (chain->ch1, index); 1627*38fd1498Szrj tree e2 = get_init_expr (chain->ch2, index); 1628*38fd1498Szrj 1629*38fd1498Szrj return fold_build2 (chain->op, chain->rslt_type, e1, e2); 1630*38fd1498Szrj } 1631*38fd1498Szrj else 1632*38fd1498Szrj return chain->inits[index]; 1633*38fd1498Szrj } 1634*38fd1498Szrj 1635*38fd1498Szrj /* Returns a new temporary variable used for the I-th variable carrying 1636*38fd1498Szrj value of REF. The variable's uid is marked in TMP_VARS. */ 1637*38fd1498Szrj 1638*38fd1498Szrj static tree 1639*38fd1498Szrj predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars) 1640*38fd1498Szrj { 1641*38fd1498Szrj tree type = TREE_TYPE (ref); 1642*38fd1498Szrj /* We never access the components of the temporary variable in predictive 1643*38fd1498Szrj commoning. */ 1644*38fd1498Szrj tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i)); 1645*38fd1498Szrj bitmap_set_bit (tmp_vars, DECL_UID (var)); 1646*38fd1498Szrj return var; 1647*38fd1498Szrj } 1648*38fd1498Szrj 1649*38fd1498Szrj /* Creates the variables for CHAIN, as well as phi nodes for them and 1650*38fd1498Szrj initialization on entry to LOOP. Uids of the newly created 1651*38fd1498Szrj temporary variables are marked in TMP_VARS. */ 1652*38fd1498Szrj 1653*38fd1498Szrj static void 1654*38fd1498Szrj initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars) 1655*38fd1498Szrj { 1656*38fd1498Szrj unsigned i; 1657*38fd1498Szrj unsigned n = chain->length; 1658*38fd1498Szrj dref root = get_chain_root (chain); 1659*38fd1498Szrj bool reuse_first = !chain->has_max_use_after; 1660*38fd1498Szrj tree ref, init, var, next; 1661*38fd1498Szrj gphi *phi; 1662*38fd1498Szrj gimple_seq stmts; 1663*38fd1498Szrj edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop); 1664*38fd1498Szrj 1665*38fd1498Szrj /* If N == 0, then all the references are within the single iteration. And 1666*38fd1498Szrj since this is an nonempty chain, reuse_first cannot be true. */ 1667*38fd1498Szrj gcc_assert (n > 0 || !reuse_first); 1668*38fd1498Szrj 1669*38fd1498Szrj chain->vars.create (n + 1); 1670*38fd1498Szrj 1671*38fd1498Szrj if (chain->type == CT_COMBINATION) 1672*38fd1498Szrj ref = gimple_assign_lhs (root->stmt); 1673*38fd1498Szrj else 1674*38fd1498Szrj ref = DR_REF (root->ref); 1675*38fd1498Szrj 1676*38fd1498Szrj for (i = 0; i < n + (reuse_first ? 0 : 1); i++) 1677*38fd1498Szrj { 1678*38fd1498Szrj var = predcom_tmp_var (ref, i, tmp_vars); 1679*38fd1498Szrj chain->vars.quick_push (var); 1680*38fd1498Szrj } 1681*38fd1498Szrj if (reuse_first) 1682*38fd1498Szrj chain->vars.quick_push (chain->vars[0]); 1683*38fd1498Szrj 1684*38fd1498Szrj FOR_EACH_VEC_ELT (chain->vars, i, var) 1685*38fd1498Szrj chain->vars[i] = make_ssa_name (var); 1686*38fd1498Szrj 1687*38fd1498Szrj for (i = 0; i < n; i++) 1688*38fd1498Szrj { 1689*38fd1498Szrj var = chain->vars[i]; 1690*38fd1498Szrj next = chain->vars[i + 1]; 1691*38fd1498Szrj init = get_init_expr (chain, i); 1692*38fd1498Szrj 1693*38fd1498Szrj init = force_gimple_operand (init, &stmts, true, NULL_TREE); 1694*38fd1498Szrj if (stmts) 1695*38fd1498Szrj gsi_insert_seq_on_edge_immediate (entry, stmts); 1696*38fd1498Szrj 1697*38fd1498Szrj phi = create_phi_node (var, loop->header); 1698*38fd1498Szrj add_phi_arg (phi, init, entry, UNKNOWN_LOCATION); 1699*38fd1498Szrj add_phi_arg (phi, next, latch, UNKNOWN_LOCATION); 1700*38fd1498Szrj } 1701*38fd1498Szrj } 1702*38fd1498Szrj 1703*38fd1498Szrj /* For inter-iteration store elimination CHAIN in LOOP, returns true if 1704*38fd1498Szrj all stores to be eliminated store loop invariant values into memory. 1705*38fd1498Szrj In this case, we can use these invariant values directly after LOOP. */ 1706*38fd1498Szrj 1707*38fd1498Szrj static bool 1708*38fd1498Szrj is_inv_store_elimination_chain (struct loop *loop, chain_p chain) 1709*38fd1498Szrj { 1710*38fd1498Szrj if (chain->length == 0 || chain->type != CT_STORE_STORE) 1711*38fd1498Szrj return false; 1712*38fd1498Szrj 1713*38fd1498Szrj gcc_assert (!chain->has_max_use_after); 1714*38fd1498Szrj 1715*38fd1498Szrj /* If loop iterates for unknown times or fewer times than chain->lenght, 1716*38fd1498Szrj we still need to setup root variable and propagate it with PHI node. */ 1717*38fd1498Szrj tree niters = number_of_latch_executions (loop); 1718*38fd1498Szrj if (TREE_CODE (niters) != INTEGER_CST 1719*38fd1498Szrj || wi::leu_p (wi::to_wide (niters), chain->length)) 1720*38fd1498Szrj return false; 1721*38fd1498Szrj 1722*38fd1498Szrj /* Check stores in chain for elimination if they only store loop invariant 1723*38fd1498Szrj values. */ 1724*38fd1498Szrj for (unsigned i = 0; i < chain->length; i++) 1725*38fd1498Szrj { 1726*38fd1498Szrj dref a = get_chain_last_write_at (chain, i); 1727*38fd1498Szrj if (a == NULL) 1728*38fd1498Szrj continue; 1729*38fd1498Szrj 1730*38fd1498Szrj gimple *def_stmt, *stmt = a->stmt; 1731*38fd1498Szrj if (!gimple_assign_single_p (stmt)) 1732*38fd1498Szrj return false; 1733*38fd1498Szrj 1734*38fd1498Szrj tree val = gimple_assign_rhs1 (stmt); 1735*38fd1498Szrj if (TREE_CLOBBER_P (val)) 1736*38fd1498Szrj return false; 1737*38fd1498Szrj 1738*38fd1498Szrj if (CONSTANT_CLASS_P (val)) 1739*38fd1498Szrj continue; 1740*38fd1498Szrj 1741*38fd1498Szrj if (TREE_CODE (val) != SSA_NAME) 1742*38fd1498Szrj return false; 1743*38fd1498Szrj 1744*38fd1498Szrj def_stmt = SSA_NAME_DEF_STMT (val); 1745*38fd1498Szrj if (gimple_nop_p (def_stmt)) 1746*38fd1498Szrj continue; 1747*38fd1498Szrj 1748*38fd1498Szrj if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 1749*38fd1498Szrj return false; 1750*38fd1498Szrj } 1751*38fd1498Szrj return true; 1752*38fd1498Szrj } 1753*38fd1498Szrj 1754*38fd1498Szrj /* Creates root variables for store elimination CHAIN in which stores for 1755*38fd1498Szrj elimination only store loop invariant values. In this case, we neither 1756*38fd1498Szrj need to load root variables before loop nor propagate it with PHI nodes. */ 1757*38fd1498Szrj 1758*38fd1498Szrj static void 1759*38fd1498Szrj initialize_root_vars_store_elim_1 (chain_p chain) 1760*38fd1498Szrj { 1761*38fd1498Szrj tree var; 1762*38fd1498Szrj unsigned i, n = chain->length; 1763*38fd1498Szrj 1764*38fd1498Szrj chain->vars.create (n); 1765*38fd1498Szrj chain->vars.safe_grow_cleared (n); 1766*38fd1498Szrj 1767*38fd1498Szrj /* Initialize root value for eliminated stores at each distance. */ 1768*38fd1498Szrj for (i = 0; i < n; i++) 1769*38fd1498Szrj { 1770*38fd1498Szrj dref a = get_chain_last_write_at (chain, i); 1771*38fd1498Szrj if (a == NULL) 1772*38fd1498Szrj continue; 1773*38fd1498Szrj 1774*38fd1498Szrj var = gimple_assign_rhs1 (a->stmt); 1775*38fd1498Szrj chain->vars[a->distance] = var; 1776*38fd1498Szrj } 1777*38fd1498Szrj 1778*38fd1498Szrj /* We don't propagate values with PHI nodes, so manually propagate value 1779*38fd1498Szrj to bubble positions. */ 1780*38fd1498Szrj var = chain->vars[0]; 1781*38fd1498Szrj for (i = 1; i < n; i++) 1782*38fd1498Szrj { 1783*38fd1498Szrj if (chain->vars[i] != NULL_TREE) 1784*38fd1498Szrj { 1785*38fd1498Szrj var = chain->vars[i]; 1786*38fd1498Szrj continue; 1787*38fd1498Szrj } 1788*38fd1498Szrj chain->vars[i] = var; 1789*38fd1498Szrj } 1790*38fd1498Szrj 1791*38fd1498Szrj /* Revert the vector. */ 1792*38fd1498Szrj for (i = 0; i < n / 2; i++) 1793*38fd1498Szrj std::swap (chain->vars[i], chain->vars[n - i - 1]); 1794*38fd1498Szrj } 1795*38fd1498Szrj 1796*38fd1498Szrj /* Creates root variables for store elimination CHAIN in which stores for 1797*38fd1498Szrj elimination store loop variant values. In this case, we may need to 1798*38fd1498Szrj load root variables before LOOP and propagate it with PHI nodes. Uids 1799*38fd1498Szrj of the newly created root variables are marked in TMP_VARS. */ 1800*38fd1498Szrj 1801*38fd1498Szrj static void 1802*38fd1498Szrj initialize_root_vars_store_elim_2 (struct loop *loop, 1803*38fd1498Szrj chain_p chain, bitmap tmp_vars) 1804*38fd1498Szrj { 1805*38fd1498Szrj unsigned i, n = chain->length; 1806*38fd1498Szrj tree ref, init, var, next, val, phi_result; 1807*38fd1498Szrj gimple *stmt; 1808*38fd1498Szrj gimple_seq stmts; 1809*38fd1498Szrj 1810*38fd1498Szrj chain->vars.create (n); 1811*38fd1498Szrj 1812*38fd1498Szrj ref = DR_REF (get_chain_root (chain)->ref); 1813*38fd1498Szrj for (i = 0; i < n; i++) 1814*38fd1498Szrj { 1815*38fd1498Szrj var = predcom_tmp_var (ref, i, tmp_vars); 1816*38fd1498Szrj chain->vars.quick_push (var); 1817*38fd1498Szrj } 1818*38fd1498Szrj 1819*38fd1498Szrj FOR_EACH_VEC_ELT (chain->vars, i, var) 1820*38fd1498Szrj chain->vars[i] = make_ssa_name (var); 1821*38fd1498Szrj 1822*38fd1498Szrj /* Root values are either rhs operand of stores to be eliminated, or 1823*38fd1498Szrj loaded from memory before loop. */ 1824*38fd1498Szrj auto_vec<tree> vtemps; 1825*38fd1498Szrj vtemps.safe_grow_cleared (n); 1826*38fd1498Szrj for (i = 0; i < n; i++) 1827*38fd1498Szrj { 1828*38fd1498Szrj init = get_init_expr (chain, i); 1829*38fd1498Szrj if (init == NULL_TREE) 1830*38fd1498Szrj { 1831*38fd1498Szrj /* Root value is rhs operand of the store to be eliminated if 1832*38fd1498Szrj it isn't loaded from memory before loop. */ 1833*38fd1498Szrj dref a = get_chain_last_write_at (chain, i); 1834*38fd1498Szrj val = gimple_assign_rhs1 (a->stmt); 1835*38fd1498Szrj if (TREE_CLOBBER_P (val)) 1836*38fd1498Szrj { 1837*38fd1498Szrj val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var)); 1838*38fd1498Szrj gimple_assign_set_rhs1 (a->stmt, val); 1839*38fd1498Szrj } 1840*38fd1498Szrj 1841*38fd1498Szrj vtemps[n - i - 1] = val; 1842*38fd1498Szrj } 1843*38fd1498Szrj else 1844*38fd1498Szrj { 1845*38fd1498Szrj edge latch = loop_latch_edge (loop); 1846*38fd1498Szrj edge entry = loop_preheader_edge (loop); 1847*38fd1498Szrj 1848*38fd1498Szrj /* Root value is loaded from memory before loop, we also need 1849*38fd1498Szrj to add PHI nodes to propagate the value across iterations. */ 1850*38fd1498Szrj init = force_gimple_operand (init, &stmts, true, NULL_TREE); 1851*38fd1498Szrj if (stmts) 1852*38fd1498Szrj gsi_insert_seq_on_edge_immediate (entry, stmts); 1853*38fd1498Szrj 1854*38fd1498Szrj next = chain->vars[n - i]; 1855*38fd1498Szrj phi_result = copy_ssa_name (next); 1856*38fd1498Szrj gphi *phi = create_phi_node (phi_result, loop->header); 1857*38fd1498Szrj add_phi_arg (phi, init, entry, UNKNOWN_LOCATION); 1858*38fd1498Szrj add_phi_arg (phi, next, latch, UNKNOWN_LOCATION); 1859*38fd1498Szrj vtemps[n - i - 1] = phi_result; 1860*38fd1498Szrj } 1861*38fd1498Szrj } 1862*38fd1498Szrj 1863*38fd1498Szrj /* Find the insertion position. */ 1864*38fd1498Szrj dref last = get_chain_root (chain); 1865*38fd1498Szrj for (i = 0; i < chain->refs.length (); i++) 1866*38fd1498Szrj { 1867*38fd1498Szrj if (chain->refs[i]->pos > last->pos) 1868*38fd1498Szrj last = chain->refs[i]; 1869*38fd1498Szrj } 1870*38fd1498Szrj 1871*38fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt); 1872*38fd1498Szrj 1873*38fd1498Szrj /* Insert statements copying root value to root variable. */ 1874*38fd1498Szrj for (i = 0; i < n; i++) 1875*38fd1498Szrj { 1876*38fd1498Szrj var = chain->vars[i]; 1877*38fd1498Szrj val = vtemps[i]; 1878*38fd1498Szrj stmt = gimple_build_assign (var, val); 1879*38fd1498Szrj gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1880*38fd1498Szrj } 1881*38fd1498Szrj } 1882*38fd1498Szrj 1883*38fd1498Szrj /* Generates stores for CHAIN's eliminated stores in LOOP's last 1884*38fd1498Szrj (CHAIN->length - 1) iterations. */ 1885*38fd1498Szrj 1886*38fd1498Szrj static void 1887*38fd1498Szrj finalize_eliminated_stores (struct loop *loop, chain_p chain) 1888*38fd1498Szrj { 1889*38fd1498Szrj unsigned i, n = chain->length; 1890*38fd1498Szrj 1891*38fd1498Szrj for (i = 0; i < n; i++) 1892*38fd1498Szrj { 1893*38fd1498Szrj tree var = chain->vars[i]; 1894*38fd1498Szrj tree fini = chain->finis[n - i - 1]; 1895*38fd1498Szrj gimple *stmt = gimple_build_assign (fini, var); 1896*38fd1498Szrj 1897*38fd1498Szrj gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt); 1898*38fd1498Szrj } 1899*38fd1498Szrj 1900*38fd1498Szrj if (chain->fini_seq) 1901*38fd1498Szrj { 1902*38fd1498Szrj gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq); 1903*38fd1498Szrj chain->fini_seq = NULL; 1904*38fd1498Szrj } 1905*38fd1498Szrj } 1906*38fd1498Szrj 1907*38fd1498Szrj /* Initializes a variable for load motion for ROOT and prepares phi nodes and 1908*38fd1498Szrj initialization on entry to LOOP if necessary. The ssa name for the variable 1909*38fd1498Szrj is stored in VARS. If WRITTEN is true, also a phi node to copy its value 1910*38fd1498Szrj around the loop is created. Uid of the newly created temporary variable 1911*38fd1498Szrj is marked in TMP_VARS. INITS is the list containing the (single) 1912*38fd1498Szrj initializer. */ 1913*38fd1498Szrj 1914*38fd1498Szrj static void 1915*38fd1498Szrj initialize_root_vars_lm (struct loop *loop, dref root, bool written, 1916*38fd1498Szrj vec<tree> *vars, vec<tree> inits, 1917*38fd1498Szrj bitmap tmp_vars) 1918*38fd1498Szrj { 1919*38fd1498Szrj unsigned i; 1920*38fd1498Szrj tree ref = DR_REF (root->ref), init, var, next; 1921*38fd1498Szrj gimple_seq stmts; 1922*38fd1498Szrj gphi *phi; 1923*38fd1498Szrj edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop); 1924*38fd1498Szrj 1925*38fd1498Szrj /* Find the initializer for the variable, and check that it cannot 1926*38fd1498Szrj trap. */ 1927*38fd1498Szrj init = inits[0]; 1928*38fd1498Szrj 1929*38fd1498Szrj vars->create (written ? 2 : 1); 1930*38fd1498Szrj var = predcom_tmp_var (ref, 0, tmp_vars); 1931*38fd1498Szrj vars->quick_push (var); 1932*38fd1498Szrj if (written) 1933*38fd1498Szrj vars->quick_push ((*vars)[0]); 1934*38fd1498Szrj 1935*38fd1498Szrj FOR_EACH_VEC_ELT (*vars, i, var) 1936*38fd1498Szrj (*vars)[i] = make_ssa_name (var); 1937*38fd1498Szrj 1938*38fd1498Szrj var = (*vars)[0]; 1939*38fd1498Szrj 1940*38fd1498Szrj init = force_gimple_operand (init, &stmts, written, NULL_TREE); 1941*38fd1498Szrj if (stmts) 1942*38fd1498Szrj gsi_insert_seq_on_edge_immediate (entry, stmts); 1943*38fd1498Szrj 1944*38fd1498Szrj if (written) 1945*38fd1498Szrj { 1946*38fd1498Szrj next = (*vars)[1]; 1947*38fd1498Szrj phi = create_phi_node (var, loop->header); 1948*38fd1498Szrj add_phi_arg (phi, init, entry, UNKNOWN_LOCATION); 1949*38fd1498Szrj add_phi_arg (phi, next, latch, UNKNOWN_LOCATION); 1950*38fd1498Szrj } 1951*38fd1498Szrj else 1952*38fd1498Szrj { 1953*38fd1498Szrj gassign *init_stmt = gimple_build_assign (var, init); 1954*38fd1498Szrj gsi_insert_on_edge_immediate (entry, init_stmt); 1955*38fd1498Szrj } 1956*38fd1498Szrj } 1957*38fd1498Szrj 1958*38fd1498Szrj 1959*38fd1498Szrj /* Execute load motion for references in chain CHAIN. Uids of the newly 1960*38fd1498Szrj created temporary variables are marked in TMP_VARS. */ 1961*38fd1498Szrj 1962*38fd1498Szrj static void 1963*38fd1498Szrj execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars) 1964*38fd1498Szrj { 1965*38fd1498Szrj auto_vec<tree> vars; 1966*38fd1498Szrj dref a; 1967*38fd1498Szrj unsigned n_writes = 0, ridx, i; 1968*38fd1498Szrj tree var; 1969*38fd1498Szrj 1970*38fd1498Szrj gcc_assert (chain->type == CT_INVARIANT); 1971*38fd1498Szrj gcc_assert (!chain->combined); 1972*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, a) 1973*38fd1498Szrj if (DR_IS_WRITE (a->ref)) 1974*38fd1498Szrj n_writes++; 1975*38fd1498Szrj 1976*38fd1498Szrj /* If there are no reads in the loop, there is nothing to do. */ 1977*38fd1498Szrj if (n_writes == chain->refs.length ()) 1978*38fd1498Szrj return; 1979*38fd1498Szrj 1980*38fd1498Szrj initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0, 1981*38fd1498Szrj &vars, chain->inits, tmp_vars); 1982*38fd1498Szrj 1983*38fd1498Szrj ridx = 0; 1984*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, a) 1985*38fd1498Szrj { 1986*38fd1498Szrj bool is_read = DR_IS_READ (a->ref); 1987*38fd1498Szrj 1988*38fd1498Szrj if (DR_IS_WRITE (a->ref)) 1989*38fd1498Szrj { 1990*38fd1498Szrj n_writes--; 1991*38fd1498Szrj if (n_writes) 1992*38fd1498Szrj { 1993*38fd1498Szrj var = vars[0]; 1994*38fd1498Szrj var = make_ssa_name (SSA_NAME_VAR (var)); 1995*38fd1498Szrj vars[0] = var; 1996*38fd1498Szrj } 1997*38fd1498Szrj else 1998*38fd1498Szrj ridx = 1; 1999*38fd1498Szrj } 2000*38fd1498Szrj 2001*38fd1498Szrj replace_ref_with (a->stmt, vars[ridx], 2002*38fd1498Szrj !is_read, !is_read); 2003*38fd1498Szrj } 2004*38fd1498Szrj } 2005*38fd1498Szrj 2006*38fd1498Szrj /* Returns the single statement in that NAME is used, excepting 2007*38fd1498Szrj the looparound phi nodes contained in one of the chains. If there is no 2008*38fd1498Szrj such statement, or more statements, NULL is returned. */ 2009*38fd1498Szrj 2010*38fd1498Szrj static gimple * 2011*38fd1498Szrj single_nonlooparound_use (tree name) 2012*38fd1498Szrj { 2013*38fd1498Szrj use_operand_p use; 2014*38fd1498Szrj imm_use_iterator it; 2015*38fd1498Szrj gimple *stmt, *ret = NULL; 2016*38fd1498Szrj 2017*38fd1498Szrj FOR_EACH_IMM_USE_FAST (use, it, name) 2018*38fd1498Szrj { 2019*38fd1498Szrj stmt = USE_STMT (use); 2020*38fd1498Szrj 2021*38fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI) 2022*38fd1498Szrj { 2023*38fd1498Szrj /* Ignore uses in looparound phi nodes. Uses in other phi nodes 2024*38fd1498Szrj could not be processed anyway, so just fail for them. */ 2025*38fd1498Szrj if (bitmap_bit_p (looparound_phis, 2026*38fd1498Szrj SSA_NAME_VERSION (PHI_RESULT (stmt)))) 2027*38fd1498Szrj continue; 2028*38fd1498Szrj 2029*38fd1498Szrj return NULL; 2030*38fd1498Szrj } 2031*38fd1498Szrj else if (is_gimple_debug (stmt)) 2032*38fd1498Szrj continue; 2033*38fd1498Szrj else if (ret != NULL) 2034*38fd1498Szrj return NULL; 2035*38fd1498Szrj else 2036*38fd1498Szrj ret = stmt; 2037*38fd1498Szrj } 2038*38fd1498Szrj 2039*38fd1498Szrj return ret; 2040*38fd1498Szrj } 2041*38fd1498Szrj 2042*38fd1498Szrj /* Remove statement STMT, as well as the chain of assignments in that it is 2043*38fd1498Szrj used. */ 2044*38fd1498Szrj 2045*38fd1498Szrj static void 2046*38fd1498Szrj remove_stmt (gimple *stmt) 2047*38fd1498Szrj { 2048*38fd1498Szrj tree name; 2049*38fd1498Szrj gimple *next; 2050*38fd1498Szrj gimple_stmt_iterator psi; 2051*38fd1498Szrj 2052*38fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI) 2053*38fd1498Szrj { 2054*38fd1498Szrj name = PHI_RESULT (stmt); 2055*38fd1498Szrj next = single_nonlooparound_use (name); 2056*38fd1498Szrj reset_debug_uses (stmt); 2057*38fd1498Szrj psi = gsi_for_stmt (stmt); 2058*38fd1498Szrj remove_phi_node (&psi, true); 2059*38fd1498Szrj 2060*38fd1498Szrj if (!next 2061*38fd1498Szrj || !gimple_assign_ssa_name_copy_p (next) 2062*38fd1498Szrj || gimple_assign_rhs1 (next) != name) 2063*38fd1498Szrj return; 2064*38fd1498Szrj 2065*38fd1498Szrj stmt = next; 2066*38fd1498Szrj } 2067*38fd1498Szrj 2068*38fd1498Szrj while (1) 2069*38fd1498Szrj { 2070*38fd1498Szrj gimple_stmt_iterator bsi; 2071*38fd1498Szrj 2072*38fd1498Szrj bsi = gsi_for_stmt (stmt); 2073*38fd1498Szrj 2074*38fd1498Szrj name = gimple_assign_lhs (stmt); 2075*38fd1498Szrj if (TREE_CODE (name) == SSA_NAME) 2076*38fd1498Szrj { 2077*38fd1498Szrj next = single_nonlooparound_use (name); 2078*38fd1498Szrj reset_debug_uses (stmt); 2079*38fd1498Szrj } 2080*38fd1498Szrj else 2081*38fd1498Szrj { 2082*38fd1498Szrj /* This is a store to be eliminated. */ 2083*38fd1498Szrj gcc_assert (gimple_vdef (stmt) != NULL); 2084*38fd1498Szrj next = NULL; 2085*38fd1498Szrj } 2086*38fd1498Szrj 2087*38fd1498Szrj unlink_stmt_vdef (stmt); 2088*38fd1498Szrj gsi_remove (&bsi, true); 2089*38fd1498Szrj release_defs (stmt); 2090*38fd1498Szrj 2091*38fd1498Szrj if (!next 2092*38fd1498Szrj || !gimple_assign_ssa_name_copy_p (next) 2093*38fd1498Szrj || gimple_assign_rhs1 (next) != name) 2094*38fd1498Szrj return; 2095*38fd1498Szrj 2096*38fd1498Szrj stmt = next; 2097*38fd1498Szrj } 2098*38fd1498Szrj } 2099*38fd1498Szrj 2100*38fd1498Szrj /* Perform the predictive commoning optimization for a chain CHAIN. 2101*38fd1498Szrj Uids of the newly created temporary variables are marked in TMP_VARS.*/ 2102*38fd1498Szrj 2103*38fd1498Szrj static void 2104*38fd1498Szrj execute_pred_commoning_chain (struct loop *loop, chain_p chain, 2105*38fd1498Szrj bitmap tmp_vars) 2106*38fd1498Szrj { 2107*38fd1498Szrj unsigned i; 2108*38fd1498Szrj dref a; 2109*38fd1498Szrj tree var; 2110*38fd1498Szrj bool in_lhs; 2111*38fd1498Szrj 2112*38fd1498Szrj if (chain->combined) 2113*38fd1498Szrj { 2114*38fd1498Szrj /* For combined chains, just remove the statements that are used to 2115*38fd1498Szrj compute the values of the expression (except for the root one). 2116*38fd1498Szrj We delay this until after all chains are processed. */ 2117*38fd1498Szrj } 2118*38fd1498Szrj else if (chain->type == CT_STORE_STORE) 2119*38fd1498Szrj { 2120*38fd1498Szrj if (chain->length > 0) 2121*38fd1498Szrj { 2122*38fd1498Szrj if (chain->inv_store_elimination) 2123*38fd1498Szrj { 2124*38fd1498Szrj /* If dead stores in this chain only store loop invariant 2125*38fd1498Szrj values, we can simply record the invariant value and use 2126*38fd1498Szrj it directly after loop. */ 2127*38fd1498Szrj initialize_root_vars_store_elim_1 (chain); 2128*38fd1498Szrj } 2129*38fd1498Szrj else 2130*38fd1498Szrj { 2131*38fd1498Szrj /* If dead stores in this chain store loop variant values, 2132*38fd1498Szrj we need to set up the variables by loading from memory 2133*38fd1498Szrj before loop and propagating it with PHI nodes. */ 2134*38fd1498Szrj initialize_root_vars_store_elim_2 (loop, chain, tmp_vars); 2135*38fd1498Szrj } 2136*38fd1498Szrj 2137*38fd1498Szrj /* For inter-iteration store elimination chain, stores at each 2138*38fd1498Szrj distance in loop's last (chain->length - 1) iterations can't 2139*38fd1498Szrj be eliminated, because there is no following killing store. 2140*38fd1498Szrj We need to generate these stores after loop. */ 2141*38fd1498Szrj finalize_eliminated_stores (loop, chain); 2142*38fd1498Szrj } 2143*38fd1498Szrj 2144*38fd1498Szrj bool last_store_p = true; 2145*38fd1498Szrj for (i = chain->refs.length (); i > 0; i--) 2146*38fd1498Szrj { 2147*38fd1498Szrj a = chain->refs[i - 1]; 2148*38fd1498Szrj /* Preserve the last store of the chain. Eliminate other stores 2149*38fd1498Szrj which are killed by the last one. */ 2150*38fd1498Szrj if (DR_IS_WRITE (a->ref)) 2151*38fd1498Szrj { 2152*38fd1498Szrj if (last_store_p) 2153*38fd1498Szrj last_store_p = false; 2154*38fd1498Szrj else 2155*38fd1498Szrj remove_stmt (a->stmt); 2156*38fd1498Szrj 2157*38fd1498Szrj continue; 2158*38fd1498Szrj } 2159*38fd1498Szrj 2160*38fd1498Szrj /* Any load in Store-Store chain must be dominated by a previous 2161*38fd1498Szrj store, we replace the load reference with rhs of the store. */ 2162*38fd1498Szrj dref b = get_chain_last_write_before_load (chain, i - 1); 2163*38fd1498Szrj gcc_assert (b != NULL); 2164*38fd1498Szrj var = gimple_assign_rhs1 (b->stmt); 2165*38fd1498Szrj replace_ref_with (a->stmt, var, false, false); 2166*38fd1498Szrj } 2167*38fd1498Szrj } 2168*38fd1498Szrj else 2169*38fd1498Szrj { 2170*38fd1498Szrj /* For non-combined chains, set up the variables that hold its value. */ 2171*38fd1498Szrj initialize_root_vars (loop, chain, tmp_vars); 2172*38fd1498Szrj a = get_chain_root (chain); 2173*38fd1498Szrj in_lhs = (chain->type == CT_STORE_LOAD 2174*38fd1498Szrj || chain->type == CT_COMBINATION); 2175*38fd1498Szrj replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs); 2176*38fd1498Szrj 2177*38fd1498Szrj /* Replace the uses of the original references by these variables. */ 2178*38fd1498Szrj for (i = 1; chain->refs.iterate (i, &a); i++) 2179*38fd1498Szrj { 2180*38fd1498Szrj var = chain->vars[chain->length - a->distance]; 2181*38fd1498Szrj replace_ref_with (a->stmt, var, false, false); 2182*38fd1498Szrj } 2183*38fd1498Szrj } 2184*38fd1498Szrj } 2185*38fd1498Szrj 2186*38fd1498Szrj /* Determines the unroll factor necessary to remove as many temporary variable 2187*38fd1498Szrj copies as possible. CHAINS is the list of chains that will be 2188*38fd1498Szrj optimized. */ 2189*38fd1498Szrj 2190*38fd1498Szrj static unsigned 2191*38fd1498Szrj determine_unroll_factor (vec<chain_p> chains) 2192*38fd1498Szrj { 2193*38fd1498Szrj chain_p chain; 2194*38fd1498Szrj unsigned factor = 1, af, nfactor, i; 2195*38fd1498Szrj unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 2196*38fd1498Szrj 2197*38fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain) 2198*38fd1498Szrj { 2199*38fd1498Szrj if (chain->type == CT_INVARIANT) 2200*38fd1498Szrj continue; 2201*38fd1498Szrj /* For now we can't handle unrolling when eliminating stores. */ 2202*38fd1498Szrj else if (chain->type == CT_STORE_STORE) 2203*38fd1498Szrj return 1; 2204*38fd1498Szrj 2205*38fd1498Szrj if (chain->combined) 2206*38fd1498Szrj { 2207*38fd1498Szrj /* For combined chains, we can't handle unrolling if we replace 2208*38fd1498Szrj looparound PHIs. */ 2209*38fd1498Szrj dref a; 2210*38fd1498Szrj unsigned j; 2211*38fd1498Szrj for (j = 1; chain->refs.iterate (j, &a); j++) 2212*38fd1498Szrj if (gimple_code (a->stmt) == GIMPLE_PHI) 2213*38fd1498Szrj return 1; 2214*38fd1498Szrj continue; 2215*38fd1498Szrj } 2216*38fd1498Szrj 2217*38fd1498Szrj /* The best unroll factor for this chain is equal to the number of 2218*38fd1498Szrj temporary variables that we create for it. */ 2219*38fd1498Szrj af = chain->length; 2220*38fd1498Szrj if (chain->has_max_use_after) 2221*38fd1498Szrj af++; 2222*38fd1498Szrj 2223*38fd1498Szrj nfactor = factor * af / gcd (factor, af); 2224*38fd1498Szrj if (nfactor <= max) 2225*38fd1498Szrj factor = nfactor; 2226*38fd1498Szrj } 2227*38fd1498Szrj 2228*38fd1498Szrj return factor; 2229*38fd1498Szrj } 2230*38fd1498Szrj 2231*38fd1498Szrj /* Perform the predictive commoning optimization for CHAINS. 2232*38fd1498Szrj Uids of the newly created temporary variables are marked in TMP_VARS. */ 2233*38fd1498Szrj 2234*38fd1498Szrj static void 2235*38fd1498Szrj execute_pred_commoning (struct loop *loop, vec<chain_p> chains, 2236*38fd1498Szrj bitmap tmp_vars) 2237*38fd1498Szrj { 2238*38fd1498Szrj chain_p chain; 2239*38fd1498Szrj unsigned i; 2240*38fd1498Szrj 2241*38fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain) 2242*38fd1498Szrj { 2243*38fd1498Szrj if (chain->type == CT_INVARIANT) 2244*38fd1498Szrj execute_load_motion (loop, chain, tmp_vars); 2245*38fd1498Szrj else 2246*38fd1498Szrj execute_pred_commoning_chain (loop, chain, tmp_vars); 2247*38fd1498Szrj } 2248*38fd1498Szrj 2249*38fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain) 2250*38fd1498Szrj { 2251*38fd1498Szrj if (chain->type == CT_INVARIANT) 2252*38fd1498Szrj ; 2253*38fd1498Szrj else if (chain->combined) 2254*38fd1498Szrj { 2255*38fd1498Szrj /* For combined chains, just remove the statements that are used to 2256*38fd1498Szrj compute the values of the expression (except for the root one). */ 2257*38fd1498Szrj dref a; 2258*38fd1498Szrj unsigned j; 2259*38fd1498Szrj for (j = 1; chain->refs.iterate (j, &a); j++) 2260*38fd1498Szrj remove_stmt (a->stmt); 2261*38fd1498Szrj } 2262*38fd1498Szrj } 2263*38fd1498Szrj 2264*38fd1498Szrj update_ssa (TODO_update_ssa_only_virtuals); 2265*38fd1498Szrj } 2266*38fd1498Szrj 2267*38fd1498Szrj /* For each reference in CHAINS, if its defining statement is 2268*38fd1498Szrj phi node, record the ssa name that is defined by it. */ 2269*38fd1498Szrj 2270*38fd1498Szrj static void 2271*38fd1498Szrj replace_phis_by_defined_names (vec<chain_p> chains) 2272*38fd1498Szrj { 2273*38fd1498Szrj chain_p chain; 2274*38fd1498Szrj dref a; 2275*38fd1498Szrj unsigned i, j; 2276*38fd1498Szrj 2277*38fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain) 2278*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, j, a) 2279*38fd1498Szrj { 2280*38fd1498Szrj if (gimple_code (a->stmt) == GIMPLE_PHI) 2281*38fd1498Szrj { 2282*38fd1498Szrj a->name_defined_by_phi = PHI_RESULT (a->stmt); 2283*38fd1498Szrj a->stmt = NULL; 2284*38fd1498Szrj } 2285*38fd1498Szrj } 2286*38fd1498Szrj } 2287*38fd1498Szrj 2288*38fd1498Szrj /* For each reference in CHAINS, if name_defined_by_phi is not 2289*38fd1498Szrj NULL, use it to set the stmt field. */ 2290*38fd1498Szrj 2291*38fd1498Szrj static void 2292*38fd1498Szrj replace_names_by_phis (vec<chain_p> chains) 2293*38fd1498Szrj { 2294*38fd1498Szrj chain_p chain; 2295*38fd1498Szrj dref a; 2296*38fd1498Szrj unsigned i, j; 2297*38fd1498Szrj 2298*38fd1498Szrj FOR_EACH_VEC_ELT (chains, i, chain) 2299*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, j, a) 2300*38fd1498Szrj if (a->stmt == NULL) 2301*38fd1498Szrj { 2302*38fd1498Szrj a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi); 2303*38fd1498Szrj gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI); 2304*38fd1498Szrj a->name_defined_by_phi = NULL_TREE; 2305*38fd1498Szrj } 2306*38fd1498Szrj } 2307*38fd1498Szrj 2308*38fd1498Szrj /* Wrapper over execute_pred_commoning, to pass it as a callback 2309*38fd1498Szrj to tree_transform_and_unroll_loop. */ 2310*38fd1498Szrj 2311*38fd1498Szrj struct epcc_data 2312*38fd1498Szrj { 2313*38fd1498Szrj vec<chain_p> chains; 2314*38fd1498Szrj bitmap tmp_vars; 2315*38fd1498Szrj }; 2316*38fd1498Szrj 2317*38fd1498Szrj static void 2318*38fd1498Szrj execute_pred_commoning_cbck (struct loop *loop, void *data) 2319*38fd1498Szrj { 2320*38fd1498Szrj struct epcc_data *const dta = (struct epcc_data *) data; 2321*38fd1498Szrj 2322*38fd1498Szrj /* Restore phi nodes that were replaced by ssa names before 2323*38fd1498Szrj tree_transform_and_unroll_loop (see detailed description in 2324*38fd1498Szrj tree_predictive_commoning_loop). */ 2325*38fd1498Szrj replace_names_by_phis (dta->chains); 2326*38fd1498Szrj execute_pred_commoning (loop, dta->chains, dta->tmp_vars); 2327*38fd1498Szrj } 2328*38fd1498Szrj 2329*38fd1498Szrj /* Base NAME and all the names in the chain of phi nodes that use it 2330*38fd1498Szrj on variable VAR. The phi nodes are recognized by being in the copies of 2331*38fd1498Szrj the header of the LOOP. */ 2332*38fd1498Szrj 2333*38fd1498Szrj static void 2334*38fd1498Szrj base_names_in_chain_on (struct loop *loop, tree name, tree var) 2335*38fd1498Szrj { 2336*38fd1498Szrj gimple *stmt, *phi; 2337*38fd1498Szrj imm_use_iterator iter; 2338*38fd1498Szrj 2339*38fd1498Szrj replace_ssa_name_symbol (name, var); 2340*38fd1498Szrj 2341*38fd1498Szrj while (1) 2342*38fd1498Szrj { 2343*38fd1498Szrj phi = NULL; 2344*38fd1498Szrj FOR_EACH_IMM_USE_STMT (stmt, iter, name) 2345*38fd1498Szrj { 2346*38fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI 2347*38fd1498Szrj && flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 2348*38fd1498Szrj { 2349*38fd1498Szrj phi = stmt; 2350*38fd1498Szrj BREAK_FROM_IMM_USE_STMT (iter); 2351*38fd1498Szrj } 2352*38fd1498Szrj } 2353*38fd1498Szrj if (!phi) 2354*38fd1498Szrj return; 2355*38fd1498Szrj 2356*38fd1498Szrj name = PHI_RESULT (phi); 2357*38fd1498Szrj replace_ssa_name_symbol (name, var); 2358*38fd1498Szrj } 2359*38fd1498Szrj } 2360*38fd1498Szrj 2361*38fd1498Szrj /* Given an unrolled LOOP after predictive commoning, remove the 2362*38fd1498Szrj register copies arising from phi nodes by changing the base 2363*38fd1498Szrj variables of SSA names. TMP_VARS is the set of the temporary variables 2364*38fd1498Szrj for those we want to perform this. */ 2365*38fd1498Szrj 2366*38fd1498Szrj static void 2367*38fd1498Szrj eliminate_temp_copies (struct loop *loop, bitmap tmp_vars) 2368*38fd1498Szrj { 2369*38fd1498Szrj edge e; 2370*38fd1498Szrj gphi *phi; 2371*38fd1498Szrj gimple *stmt; 2372*38fd1498Szrj tree name, use, var; 2373*38fd1498Szrj gphi_iterator psi; 2374*38fd1498Szrj 2375*38fd1498Szrj e = loop_latch_edge (loop); 2376*38fd1498Szrj for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 2377*38fd1498Szrj { 2378*38fd1498Szrj phi = psi.phi (); 2379*38fd1498Szrj name = PHI_RESULT (phi); 2380*38fd1498Szrj var = SSA_NAME_VAR (name); 2381*38fd1498Szrj if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var))) 2382*38fd1498Szrj continue; 2383*38fd1498Szrj use = PHI_ARG_DEF_FROM_EDGE (phi, e); 2384*38fd1498Szrj gcc_assert (TREE_CODE (use) == SSA_NAME); 2385*38fd1498Szrj 2386*38fd1498Szrj /* Base all the ssa names in the ud and du chain of NAME on VAR. */ 2387*38fd1498Szrj stmt = SSA_NAME_DEF_STMT (use); 2388*38fd1498Szrj while (gimple_code (stmt) == GIMPLE_PHI 2389*38fd1498Szrj /* In case we could not unroll the loop enough to eliminate 2390*38fd1498Szrj all copies, we may reach the loop header before the defining 2391*38fd1498Szrj statement (in that case, some register copies will be present 2392*38fd1498Szrj in loop latch in the final code, corresponding to the newly 2393*38fd1498Szrj created looparound phi nodes). */ 2394*38fd1498Szrj && gimple_bb (stmt) != loop->header) 2395*38fd1498Szrj { 2396*38fd1498Szrj gcc_assert (single_pred_p (gimple_bb (stmt))); 2397*38fd1498Szrj use = PHI_ARG_DEF (stmt, 0); 2398*38fd1498Szrj stmt = SSA_NAME_DEF_STMT (use); 2399*38fd1498Szrj } 2400*38fd1498Szrj 2401*38fd1498Szrj base_names_in_chain_on (loop, use, var); 2402*38fd1498Szrj } 2403*38fd1498Szrj } 2404*38fd1498Szrj 2405*38fd1498Szrj /* Returns true if CHAIN is suitable to be combined. */ 2406*38fd1498Szrj 2407*38fd1498Szrj static bool 2408*38fd1498Szrj chain_can_be_combined_p (chain_p chain) 2409*38fd1498Szrj { 2410*38fd1498Szrj return (!chain->combined 2411*38fd1498Szrj && (chain->type == CT_LOAD || chain->type == CT_COMBINATION)); 2412*38fd1498Szrj } 2413*38fd1498Szrj 2414*38fd1498Szrj /* Returns the modify statement that uses NAME. Skips over assignment 2415*38fd1498Szrj statements, NAME is replaced with the actual name used in the returned 2416*38fd1498Szrj statement. */ 2417*38fd1498Szrj 2418*38fd1498Szrj static gimple * 2419*38fd1498Szrj find_use_stmt (tree *name) 2420*38fd1498Szrj { 2421*38fd1498Szrj gimple *stmt; 2422*38fd1498Szrj tree rhs, lhs; 2423*38fd1498Szrj 2424*38fd1498Szrj /* Skip over assignments. */ 2425*38fd1498Szrj while (1) 2426*38fd1498Szrj { 2427*38fd1498Szrj stmt = single_nonlooparound_use (*name); 2428*38fd1498Szrj if (!stmt) 2429*38fd1498Szrj return NULL; 2430*38fd1498Szrj 2431*38fd1498Szrj if (gimple_code (stmt) != GIMPLE_ASSIGN) 2432*38fd1498Szrj return NULL; 2433*38fd1498Szrj 2434*38fd1498Szrj lhs = gimple_assign_lhs (stmt); 2435*38fd1498Szrj if (TREE_CODE (lhs) != SSA_NAME) 2436*38fd1498Szrj return NULL; 2437*38fd1498Szrj 2438*38fd1498Szrj if (gimple_assign_copy_p (stmt)) 2439*38fd1498Szrj { 2440*38fd1498Szrj rhs = gimple_assign_rhs1 (stmt); 2441*38fd1498Szrj if (rhs != *name) 2442*38fd1498Szrj return NULL; 2443*38fd1498Szrj 2444*38fd1498Szrj *name = lhs; 2445*38fd1498Szrj } 2446*38fd1498Szrj else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) 2447*38fd1498Szrj == GIMPLE_BINARY_RHS) 2448*38fd1498Szrj return stmt; 2449*38fd1498Szrj else 2450*38fd1498Szrj return NULL; 2451*38fd1498Szrj } 2452*38fd1498Szrj } 2453*38fd1498Szrj 2454*38fd1498Szrj /* Returns true if we may perform reassociation for operation CODE in TYPE. */ 2455*38fd1498Szrj 2456*38fd1498Szrj static bool 2457*38fd1498Szrj may_reassociate_p (tree type, enum tree_code code) 2458*38fd1498Szrj { 2459*38fd1498Szrj if (FLOAT_TYPE_P (type) 2460*38fd1498Szrj && !flag_unsafe_math_optimizations) 2461*38fd1498Szrj return false; 2462*38fd1498Szrj 2463*38fd1498Szrj return (commutative_tree_code (code) 2464*38fd1498Szrj && associative_tree_code (code)); 2465*38fd1498Szrj } 2466*38fd1498Szrj 2467*38fd1498Szrj /* If the operation used in STMT is associative and commutative, go through the 2468*38fd1498Szrj tree of the same operations and returns its root. Distance to the root 2469*38fd1498Szrj is stored in DISTANCE. */ 2470*38fd1498Szrj 2471*38fd1498Szrj static gimple * 2472*38fd1498Szrj find_associative_operation_root (gimple *stmt, unsigned *distance) 2473*38fd1498Szrj { 2474*38fd1498Szrj tree lhs; 2475*38fd1498Szrj gimple *next; 2476*38fd1498Szrj enum tree_code code = gimple_assign_rhs_code (stmt); 2477*38fd1498Szrj tree type = TREE_TYPE (gimple_assign_lhs (stmt)); 2478*38fd1498Szrj unsigned dist = 0; 2479*38fd1498Szrj 2480*38fd1498Szrj if (!may_reassociate_p (type, code)) 2481*38fd1498Szrj return NULL; 2482*38fd1498Szrj 2483*38fd1498Szrj while (1) 2484*38fd1498Szrj { 2485*38fd1498Szrj lhs = gimple_assign_lhs (stmt); 2486*38fd1498Szrj gcc_assert (TREE_CODE (lhs) == SSA_NAME); 2487*38fd1498Szrj 2488*38fd1498Szrj next = find_use_stmt (&lhs); 2489*38fd1498Szrj if (!next 2490*38fd1498Szrj || gimple_assign_rhs_code (next) != code) 2491*38fd1498Szrj break; 2492*38fd1498Szrj 2493*38fd1498Szrj stmt = next; 2494*38fd1498Szrj dist++; 2495*38fd1498Szrj } 2496*38fd1498Szrj 2497*38fd1498Szrj if (distance) 2498*38fd1498Szrj *distance = dist; 2499*38fd1498Szrj return stmt; 2500*38fd1498Szrj } 2501*38fd1498Szrj 2502*38fd1498Szrj /* Returns the common statement in that NAME1 and NAME2 have a use. If there 2503*38fd1498Szrj is no such statement, returns NULL_TREE. In case the operation used on 2504*38fd1498Szrj NAME1 and NAME2 is associative and commutative, returns the root of the 2505*38fd1498Szrj tree formed by this operation instead of the statement that uses NAME1 or 2506*38fd1498Szrj NAME2. */ 2507*38fd1498Szrj 2508*38fd1498Szrj static gimple * 2509*38fd1498Szrj find_common_use_stmt (tree *name1, tree *name2) 2510*38fd1498Szrj { 2511*38fd1498Szrj gimple *stmt1, *stmt2; 2512*38fd1498Szrj 2513*38fd1498Szrj stmt1 = find_use_stmt (name1); 2514*38fd1498Szrj if (!stmt1) 2515*38fd1498Szrj return NULL; 2516*38fd1498Szrj 2517*38fd1498Szrj stmt2 = find_use_stmt (name2); 2518*38fd1498Szrj if (!stmt2) 2519*38fd1498Szrj return NULL; 2520*38fd1498Szrj 2521*38fd1498Szrj if (stmt1 == stmt2) 2522*38fd1498Szrj return stmt1; 2523*38fd1498Szrj 2524*38fd1498Szrj stmt1 = find_associative_operation_root (stmt1, NULL); 2525*38fd1498Szrj if (!stmt1) 2526*38fd1498Szrj return NULL; 2527*38fd1498Szrj stmt2 = find_associative_operation_root (stmt2, NULL); 2528*38fd1498Szrj if (!stmt2) 2529*38fd1498Szrj return NULL; 2530*38fd1498Szrj 2531*38fd1498Szrj return (stmt1 == stmt2 ? stmt1 : NULL); 2532*38fd1498Szrj } 2533*38fd1498Szrj 2534*38fd1498Szrj /* Checks whether R1 and R2 are combined together using CODE, with the result 2535*38fd1498Szrj in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1 2536*38fd1498Szrj if it is true. If CODE is ERROR_MARK, set these values instead. */ 2537*38fd1498Szrj 2538*38fd1498Szrj static bool 2539*38fd1498Szrj combinable_refs_p (dref r1, dref r2, 2540*38fd1498Szrj enum tree_code *code, bool *swap, tree *rslt_type) 2541*38fd1498Szrj { 2542*38fd1498Szrj enum tree_code acode; 2543*38fd1498Szrj bool aswap; 2544*38fd1498Szrj tree atype; 2545*38fd1498Szrj tree name1, name2; 2546*38fd1498Szrj gimple *stmt; 2547*38fd1498Szrj 2548*38fd1498Szrj name1 = name_for_ref (r1); 2549*38fd1498Szrj name2 = name_for_ref (r2); 2550*38fd1498Szrj gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE); 2551*38fd1498Szrj 2552*38fd1498Szrj stmt = find_common_use_stmt (&name1, &name2); 2553*38fd1498Szrj 2554*38fd1498Szrj if (!stmt 2555*38fd1498Szrj /* A simple post-dominance check - make sure the combination 2556*38fd1498Szrj is executed under the same condition as the references. */ 2557*38fd1498Szrj || (gimple_bb (stmt) != gimple_bb (r1->stmt) 2558*38fd1498Szrj && gimple_bb (stmt) != gimple_bb (r2->stmt))) 2559*38fd1498Szrj return false; 2560*38fd1498Szrj 2561*38fd1498Szrj acode = gimple_assign_rhs_code (stmt); 2562*38fd1498Szrj aswap = (!commutative_tree_code (acode) 2563*38fd1498Szrj && gimple_assign_rhs1 (stmt) != name1); 2564*38fd1498Szrj atype = TREE_TYPE (gimple_assign_lhs (stmt)); 2565*38fd1498Szrj 2566*38fd1498Szrj if (*code == ERROR_MARK) 2567*38fd1498Szrj { 2568*38fd1498Szrj *code = acode; 2569*38fd1498Szrj *swap = aswap; 2570*38fd1498Szrj *rslt_type = atype; 2571*38fd1498Szrj return true; 2572*38fd1498Szrj } 2573*38fd1498Szrj 2574*38fd1498Szrj return (*code == acode 2575*38fd1498Szrj && *swap == aswap 2576*38fd1498Szrj && *rslt_type == atype); 2577*38fd1498Szrj } 2578*38fd1498Szrj 2579*38fd1498Szrj /* Remove OP from the operation on rhs of STMT, and replace STMT with 2580*38fd1498Szrj an assignment of the remaining operand. */ 2581*38fd1498Szrj 2582*38fd1498Szrj static void 2583*38fd1498Szrj remove_name_from_operation (gimple *stmt, tree op) 2584*38fd1498Szrj { 2585*38fd1498Szrj tree other_op; 2586*38fd1498Szrj gimple_stmt_iterator si; 2587*38fd1498Szrj 2588*38fd1498Szrj gcc_assert (is_gimple_assign (stmt)); 2589*38fd1498Szrj 2590*38fd1498Szrj if (gimple_assign_rhs1 (stmt) == op) 2591*38fd1498Szrj other_op = gimple_assign_rhs2 (stmt); 2592*38fd1498Szrj else 2593*38fd1498Szrj other_op = gimple_assign_rhs1 (stmt); 2594*38fd1498Szrj 2595*38fd1498Szrj si = gsi_for_stmt (stmt); 2596*38fd1498Szrj gimple_assign_set_rhs_from_tree (&si, other_op); 2597*38fd1498Szrj 2598*38fd1498Szrj /* We should not have reallocated STMT. */ 2599*38fd1498Szrj gcc_assert (gsi_stmt (si) == stmt); 2600*38fd1498Szrj 2601*38fd1498Szrj update_stmt (stmt); 2602*38fd1498Szrj } 2603*38fd1498Szrj 2604*38fd1498Szrj /* Reassociates the expression in that NAME1 and NAME2 are used so that they 2605*38fd1498Szrj are combined in a single statement, and returns this statement. */ 2606*38fd1498Szrj 2607*38fd1498Szrj static gimple * 2608*38fd1498Szrj reassociate_to_the_same_stmt (tree name1, tree name2) 2609*38fd1498Szrj { 2610*38fd1498Szrj gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2; 2611*38fd1498Szrj gassign *new_stmt, *tmp_stmt; 2612*38fd1498Szrj tree new_name, tmp_name, var, r1, r2; 2613*38fd1498Szrj unsigned dist1, dist2; 2614*38fd1498Szrj enum tree_code code; 2615*38fd1498Szrj tree type = TREE_TYPE (name1); 2616*38fd1498Szrj gimple_stmt_iterator bsi; 2617*38fd1498Szrj 2618*38fd1498Szrj stmt1 = find_use_stmt (&name1); 2619*38fd1498Szrj stmt2 = find_use_stmt (&name2); 2620*38fd1498Szrj root1 = find_associative_operation_root (stmt1, &dist1); 2621*38fd1498Szrj root2 = find_associative_operation_root (stmt2, &dist2); 2622*38fd1498Szrj code = gimple_assign_rhs_code (stmt1); 2623*38fd1498Szrj 2624*38fd1498Szrj gcc_assert (root1 && root2 && root1 == root2 2625*38fd1498Szrj && code == gimple_assign_rhs_code (stmt2)); 2626*38fd1498Szrj 2627*38fd1498Szrj /* Find the root of the nearest expression in that both NAME1 and NAME2 2628*38fd1498Szrj are used. */ 2629*38fd1498Szrj r1 = name1; 2630*38fd1498Szrj s1 = stmt1; 2631*38fd1498Szrj r2 = name2; 2632*38fd1498Szrj s2 = stmt2; 2633*38fd1498Szrj 2634*38fd1498Szrj while (dist1 > dist2) 2635*38fd1498Szrj { 2636*38fd1498Szrj s1 = find_use_stmt (&r1); 2637*38fd1498Szrj r1 = gimple_assign_lhs (s1); 2638*38fd1498Szrj dist1--; 2639*38fd1498Szrj } 2640*38fd1498Szrj while (dist2 > dist1) 2641*38fd1498Szrj { 2642*38fd1498Szrj s2 = find_use_stmt (&r2); 2643*38fd1498Szrj r2 = gimple_assign_lhs (s2); 2644*38fd1498Szrj dist2--; 2645*38fd1498Szrj } 2646*38fd1498Szrj 2647*38fd1498Szrj while (s1 != s2) 2648*38fd1498Szrj { 2649*38fd1498Szrj s1 = find_use_stmt (&r1); 2650*38fd1498Szrj r1 = gimple_assign_lhs (s1); 2651*38fd1498Szrj s2 = find_use_stmt (&r2); 2652*38fd1498Szrj r2 = gimple_assign_lhs (s2); 2653*38fd1498Szrj } 2654*38fd1498Szrj 2655*38fd1498Szrj /* Remove NAME1 and NAME2 from the statements in that they are used 2656*38fd1498Szrj currently. */ 2657*38fd1498Szrj remove_name_from_operation (stmt1, name1); 2658*38fd1498Szrj remove_name_from_operation (stmt2, name2); 2659*38fd1498Szrj 2660*38fd1498Szrj /* Insert the new statement combining NAME1 and NAME2 before S1, and 2661*38fd1498Szrj combine it with the rhs of S1. */ 2662*38fd1498Szrj var = create_tmp_reg (type, "predreastmp"); 2663*38fd1498Szrj new_name = make_ssa_name (var); 2664*38fd1498Szrj new_stmt = gimple_build_assign (new_name, code, name1, name2); 2665*38fd1498Szrj 2666*38fd1498Szrj var = create_tmp_reg (type, "predreastmp"); 2667*38fd1498Szrj tmp_name = make_ssa_name (var); 2668*38fd1498Szrj 2669*38fd1498Szrj /* Rhs of S1 may now be either a binary expression with operation 2670*38fd1498Szrj CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1, 2671*38fd1498Szrj so that name1 or name2 was removed from it). */ 2672*38fd1498Szrj tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1), 2673*38fd1498Szrj gimple_assign_rhs1 (s1), 2674*38fd1498Szrj gimple_assign_rhs2 (s1)); 2675*38fd1498Szrj 2676*38fd1498Szrj bsi = gsi_for_stmt (s1); 2677*38fd1498Szrj gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name); 2678*38fd1498Szrj s1 = gsi_stmt (bsi); 2679*38fd1498Szrj update_stmt (s1); 2680*38fd1498Szrj 2681*38fd1498Szrj gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT); 2682*38fd1498Szrj gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT); 2683*38fd1498Szrj 2684*38fd1498Szrj return new_stmt; 2685*38fd1498Szrj } 2686*38fd1498Szrj 2687*38fd1498Szrj /* Returns the statement that combines references R1 and R2. In case R1 2688*38fd1498Szrj and R2 are not used in the same statement, but they are used with an 2689*38fd1498Szrj associative and commutative operation in the same expression, reassociate 2690*38fd1498Szrj the expression so that they are used in the same statement. */ 2691*38fd1498Szrj 2692*38fd1498Szrj static gimple * 2693*38fd1498Szrj stmt_combining_refs (dref r1, dref r2) 2694*38fd1498Szrj { 2695*38fd1498Szrj gimple *stmt1, *stmt2; 2696*38fd1498Szrj tree name1 = name_for_ref (r1); 2697*38fd1498Szrj tree name2 = name_for_ref (r2); 2698*38fd1498Szrj 2699*38fd1498Szrj stmt1 = find_use_stmt (&name1); 2700*38fd1498Szrj stmt2 = find_use_stmt (&name2); 2701*38fd1498Szrj if (stmt1 == stmt2) 2702*38fd1498Szrj return stmt1; 2703*38fd1498Szrj 2704*38fd1498Szrj return reassociate_to_the_same_stmt (name1, name2); 2705*38fd1498Szrj } 2706*38fd1498Szrj 2707*38fd1498Szrj /* Tries to combine chains CH1 and CH2 together. If this succeeds, the 2708*38fd1498Szrj description of the new chain is returned, otherwise we return NULL. */ 2709*38fd1498Szrj 2710*38fd1498Szrj static chain_p 2711*38fd1498Szrj combine_chains (chain_p ch1, chain_p ch2) 2712*38fd1498Szrj { 2713*38fd1498Szrj dref r1, r2, nw; 2714*38fd1498Szrj enum tree_code op = ERROR_MARK; 2715*38fd1498Szrj bool swap = false; 2716*38fd1498Szrj chain_p new_chain; 2717*38fd1498Szrj unsigned i; 2718*38fd1498Szrj tree rslt_type = NULL_TREE; 2719*38fd1498Szrj 2720*38fd1498Szrj if (ch1 == ch2) 2721*38fd1498Szrj return NULL; 2722*38fd1498Szrj if (ch1->length != ch2->length) 2723*38fd1498Szrj return NULL; 2724*38fd1498Szrj 2725*38fd1498Szrj if (ch1->refs.length () != ch2->refs.length ()) 2726*38fd1498Szrj return NULL; 2727*38fd1498Szrj 2728*38fd1498Szrj for (i = 0; (ch1->refs.iterate (i, &r1) 2729*38fd1498Szrj && ch2->refs.iterate (i, &r2)); i++) 2730*38fd1498Szrj { 2731*38fd1498Szrj if (r1->distance != r2->distance) 2732*38fd1498Szrj return NULL; 2733*38fd1498Szrj 2734*38fd1498Szrj if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type)) 2735*38fd1498Szrj return NULL; 2736*38fd1498Szrj } 2737*38fd1498Szrj 2738*38fd1498Szrj if (swap) 2739*38fd1498Szrj std::swap (ch1, ch2); 2740*38fd1498Szrj 2741*38fd1498Szrj new_chain = XCNEW (struct chain); 2742*38fd1498Szrj new_chain->type = CT_COMBINATION; 2743*38fd1498Szrj new_chain->op = op; 2744*38fd1498Szrj new_chain->ch1 = ch1; 2745*38fd1498Szrj new_chain->ch2 = ch2; 2746*38fd1498Szrj new_chain->rslt_type = rslt_type; 2747*38fd1498Szrj new_chain->length = ch1->length; 2748*38fd1498Szrj 2749*38fd1498Szrj for (i = 0; (ch1->refs.iterate (i, &r1) 2750*38fd1498Szrj && ch2->refs.iterate (i, &r2)); i++) 2751*38fd1498Szrj { 2752*38fd1498Szrj nw = XCNEW (struct dref_d); 2753*38fd1498Szrj nw->stmt = stmt_combining_refs (r1, r2); 2754*38fd1498Szrj nw->distance = r1->distance; 2755*38fd1498Szrj 2756*38fd1498Szrj new_chain->refs.safe_push (nw); 2757*38fd1498Szrj } 2758*38fd1498Szrj 2759*38fd1498Szrj ch1->combined = true; 2760*38fd1498Szrj ch2->combined = true; 2761*38fd1498Szrj return new_chain; 2762*38fd1498Szrj } 2763*38fd1498Szrj 2764*38fd1498Szrj /* Recursively update position information of all offspring chains to ROOT 2765*38fd1498Szrj chain's position information. */ 2766*38fd1498Szrj 2767*38fd1498Szrj static void 2768*38fd1498Szrj update_pos_for_combined_chains (chain_p root) 2769*38fd1498Szrj { 2770*38fd1498Szrj chain_p ch1 = root->ch1, ch2 = root->ch2; 2771*38fd1498Szrj dref ref, ref1, ref2; 2772*38fd1498Szrj for (unsigned j = 0; (root->refs.iterate (j, &ref) 2773*38fd1498Szrj && ch1->refs.iterate (j, &ref1) 2774*38fd1498Szrj && ch2->refs.iterate (j, &ref2)); ++j) 2775*38fd1498Szrj ref1->pos = ref2->pos = ref->pos; 2776*38fd1498Szrj 2777*38fd1498Szrj if (ch1->type == CT_COMBINATION) 2778*38fd1498Szrj update_pos_for_combined_chains (ch1); 2779*38fd1498Szrj if (ch2->type == CT_COMBINATION) 2780*38fd1498Szrj update_pos_for_combined_chains (ch2); 2781*38fd1498Szrj } 2782*38fd1498Szrj 2783*38fd1498Szrj /* Returns true if statement S1 dominates statement S2. */ 2784*38fd1498Szrj 2785*38fd1498Szrj static bool 2786*38fd1498Szrj pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2) 2787*38fd1498Szrj { 2788*38fd1498Szrj basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 2789*38fd1498Szrj 2790*38fd1498Szrj if (!bb1 || s1 == s2) 2791*38fd1498Szrj return true; 2792*38fd1498Szrj 2793*38fd1498Szrj if (bb1 == bb2) 2794*38fd1498Szrj return gimple_uid (s1) < gimple_uid (s2); 2795*38fd1498Szrj 2796*38fd1498Szrj return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 2797*38fd1498Szrj } 2798*38fd1498Szrj 2799*38fd1498Szrj /* Try to combine the CHAINS in LOOP. */ 2800*38fd1498Szrj 2801*38fd1498Szrj static void 2802*38fd1498Szrj try_combine_chains (struct loop *loop, vec<chain_p> *chains) 2803*38fd1498Szrj { 2804*38fd1498Szrj unsigned i, j; 2805*38fd1498Szrj chain_p ch1, ch2, cch; 2806*38fd1498Szrj auto_vec<chain_p> worklist; 2807*38fd1498Szrj bool combined_p = false; 2808*38fd1498Szrj 2809*38fd1498Szrj FOR_EACH_VEC_ELT (*chains, i, ch1) 2810*38fd1498Szrj if (chain_can_be_combined_p (ch1)) 2811*38fd1498Szrj worklist.safe_push (ch1); 2812*38fd1498Szrj 2813*38fd1498Szrj while (!worklist.is_empty ()) 2814*38fd1498Szrj { 2815*38fd1498Szrj ch1 = worklist.pop (); 2816*38fd1498Szrj if (!chain_can_be_combined_p (ch1)) 2817*38fd1498Szrj continue; 2818*38fd1498Szrj 2819*38fd1498Szrj FOR_EACH_VEC_ELT (*chains, j, ch2) 2820*38fd1498Szrj { 2821*38fd1498Szrj if (!chain_can_be_combined_p (ch2)) 2822*38fd1498Szrj continue; 2823*38fd1498Szrj 2824*38fd1498Szrj cch = combine_chains (ch1, ch2); 2825*38fd1498Szrj if (cch) 2826*38fd1498Szrj { 2827*38fd1498Szrj worklist.safe_push (cch); 2828*38fd1498Szrj chains->safe_push (cch); 2829*38fd1498Szrj combined_p = true; 2830*38fd1498Szrj break; 2831*38fd1498Szrj } 2832*38fd1498Szrj } 2833*38fd1498Szrj } 2834*38fd1498Szrj if (!combined_p) 2835*38fd1498Szrj return; 2836*38fd1498Szrj 2837*38fd1498Szrj /* Setup UID for all statements in dominance order. */ 2838*38fd1498Szrj basic_block *bbs = get_loop_body (loop); 2839*38fd1498Szrj renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes); 2840*38fd1498Szrj free (bbs); 2841*38fd1498Szrj 2842*38fd1498Szrj /* Re-association in combined chains may generate statements different to 2843*38fd1498Szrj order of references of the original chain. We need to keep references 2844*38fd1498Szrj of combined chain in dominance order so that all uses will be inserted 2845*38fd1498Szrj after definitions. Note: 2846*38fd1498Szrj A) This is necessary for all combined chains. 2847*38fd1498Szrj B) This is only necessary for ZERO distance references because other 2848*38fd1498Szrj references inherit value from loop carried PHIs. 2849*38fd1498Szrj 2850*38fd1498Szrj We first update position information for all combined chains. */ 2851*38fd1498Szrj dref ref; 2852*38fd1498Szrj for (i = 0; chains->iterate (i, &ch1); ++i) 2853*38fd1498Szrj { 2854*38fd1498Szrj if (ch1->type != CT_COMBINATION || ch1->combined) 2855*38fd1498Szrj continue; 2856*38fd1498Szrj 2857*38fd1498Szrj for (j = 0; ch1->refs.iterate (j, &ref); ++j) 2858*38fd1498Szrj ref->pos = gimple_uid (ref->stmt); 2859*38fd1498Szrj 2860*38fd1498Szrj update_pos_for_combined_chains (ch1); 2861*38fd1498Szrj } 2862*38fd1498Szrj /* Then sort references according to newly updated position information. */ 2863*38fd1498Szrj for (i = 0; chains->iterate (i, &ch1); ++i) 2864*38fd1498Szrj { 2865*38fd1498Szrj if (ch1->type != CT_COMBINATION && !ch1->combined) 2866*38fd1498Szrj continue; 2867*38fd1498Szrj 2868*38fd1498Szrj /* Find the first reference with non-ZERO distance. */ 2869*38fd1498Szrj if (ch1->length == 0) 2870*38fd1498Szrj j = ch1->refs.length(); 2871*38fd1498Szrj else 2872*38fd1498Szrj { 2873*38fd1498Szrj for (j = 0; ch1->refs.iterate (j, &ref); ++j) 2874*38fd1498Szrj if (ref->distance != 0) 2875*38fd1498Szrj break; 2876*38fd1498Szrj } 2877*38fd1498Szrj 2878*38fd1498Szrj /* Sort all ZERO distance references by position. */ 2879*38fd1498Szrj qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos); 2880*38fd1498Szrj 2881*38fd1498Szrj if (ch1->combined) 2882*38fd1498Szrj continue; 2883*38fd1498Szrj 2884*38fd1498Szrj /* For ZERO length chain, has_max_use_after must be true since root 2885*38fd1498Szrj combined stmt must dominates others. */ 2886*38fd1498Szrj if (ch1->length == 0) 2887*38fd1498Szrj { 2888*38fd1498Szrj ch1->has_max_use_after = true; 2889*38fd1498Szrj continue; 2890*38fd1498Szrj } 2891*38fd1498Szrj /* Check if there is use at max distance after root for combined chains 2892*38fd1498Szrj and set flag accordingly. */ 2893*38fd1498Szrj ch1->has_max_use_after = false; 2894*38fd1498Szrj gimple *root_stmt = get_chain_root (ch1)->stmt; 2895*38fd1498Szrj for (j = 1; ch1->refs.iterate (j, &ref); ++j) 2896*38fd1498Szrj { 2897*38fd1498Szrj if (ref->distance == ch1->length 2898*38fd1498Szrj && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt)) 2899*38fd1498Szrj { 2900*38fd1498Szrj ch1->has_max_use_after = true; 2901*38fd1498Szrj break; 2902*38fd1498Szrj } 2903*38fd1498Szrj } 2904*38fd1498Szrj } 2905*38fd1498Szrj } 2906*38fd1498Szrj 2907*38fd1498Szrj /* Prepare initializers for store elimination CHAIN in LOOP. Returns false 2908*38fd1498Szrj if this is impossible because one of these initializers may trap, true 2909*38fd1498Szrj otherwise. */ 2910*38fd1498Szrj 2911*38fd1498Szrj static bool 2912*38fd1498Szrj prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain) 2913*38fd1498Szrj { 2914*38fd1498Szrj unsigned i, n = chain->length; 2915*38fd1498Szrj 2916*38fd1498Szrj /* For now we can't eliminate stores if some of them are conditional 2917*38fd1498Szrj executed. */ 2918*38fd1498Szrj if (!chain->all_always_accessed) 2919*38fd1498Szrj return false; 2920*38fd1498Szrj 2921*38fd1498Szrj /* Nothing to intialize for intra-iteration store elimination. */ 2922*38fd1498Szrj if (n == 0 && chain->type == CT_STORE_STORE) 2923*38fd1498Szrj return true; 2924*38fd1498Szrj 2925*38fd1498Szrj /* For store elimination chain, there is nothing to initialize if stores 2926*38fd1498Szrj to be eliminated only store loop invariant values into memory. */ 2927*38fd1498Szrj if (chain->type == CT_STORE_STORE 2928*38fd1498Szrj && is_inv_store_elimination_chain (loop, chain)) 2929*38fd1498Szrj { 2930*38fd1498Szrj chain->inv_store_elimination = true; 2931*38fd1498Szrj return true; 2932*38fd1498Szrj } 2933*38fd1498Szrj 2934*38fd1498Szrj chain->inits.create (n); 2935*38fd1498Szrj chain->inits.safe_grow_cleared (n); 2936*38fd1498Szrj 2937*38fd1498Szrj /* For store eliminatin chain like below: 2938*38fd1498Szrj 2939*38fd1498Szrj for (i = 0; i < len; i++) 2940*38fd1498Szrj { 2941*38fd1498Szrj a[i] = 1; 2942*38fd1498Szrj // a[i + 1] = ... 2943*38fd1498Szrj a[i + 2] = 3; 2944*38fd1498Szrj } 2945*38fd1498Szrj 2946*38fd1498Szrj store to a[i + 1] is missed in loop body, it acts like bubbles. The 2947*38fd1498Szrj content of a[i + 1] remain the same if the loop iterates fewer times 2948*38fd1498Szrj than chain->length. We need to set up root variables for such stores 2949*38fd1498Szrj by loading from memory before loop. Note we only need to load bubble 2950*38fd1498Szrj elements because loop body is guaranteed to be executed at least once 2951*38fd1498Szrj after loop's preheader edge. */ 2952*38fd1498Szrj auto_vec<bool> bubbles; 2953*38fd1498Szrj bubbles.safe_grow_cleared (n + 1); 2954*38fd1498Szrj for (i = 0; i < chain->refs.length (); i++) 2955*38fd1498Szrj bubbles[chain->refs[i]->distance] = true; 2956*38fd1498Szrj 2957*38fd1498Szrj struct data_reference *dr = get_chain_root (chain)->ref; 2958*38fd1498Szrj for (i = 0; i < n; i++) 2959*38fd1498Szrj { 2960*38fd1498Szrj if (bubbles[i]) 2961*38fd1498Szrj continue; 2962*38fd1498Szrj 2963*38fd1498Szrj gimple_seq stmts = NULL; 2964*38fd1498Szrj 2965*38fd1498Szrj tree init = ref_at_iteration (dr, (int) 0 - i, &stmts); 2966*38fd1498Szrj if (stmts) 2967*38fd1498Szrj gimple_seq_add_seq_without_update (&chain->init_seq, stmts); 2968*38fd1498Szrj 2969*38fd1498Szrj chain->inits[i] = init; 2970*38fd1498Szrj } 2971*38fd1498Szrj 2972*38fd1498Szrj return true; 2973*38fd1498Szrj } 2974*38fd1498Szrj 2975*38fd1498Szrj /* Prepare initializers for CHAIN in LOOP. Returns false if this is 2976*38fd1498Szrj impossible because one of these initializers may trap, true otherwise. */ 2977*38fd1498Szrj 2978*38fd1498Szrj static bool 2979*38fd1498Szrj prepare_initializers_chain (struct loop *loop, chain_p chain) 2980*38fd1498Szrj { 2981*38fd1498Szrj unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length; 2982*38fd1498Szrj struct data_reference *dr = get_chain_root (chain)->ref; 2983*38fd1498Szrj tree init; 2984*38fd1498Szrj dref laref; 2985*38fd1498Szrj edge entry = loop_preheader_edge (loop); 2986*38fd1498Szrj 2987*38fd1498Szrj if (chain->type == CT_STORE_STORE) 2988*38fd1498Szrj return prepare_initializers_chain_store_elim (loop, chain); 2989*38fd1498Szrj 2990*38fd1498Szrj /* Find the initializers for the variables, and check that they cannot 2991*38fd1498Szrj trap. */ 2992*38fd1498Szrj chain->inits.create (n); 2993*38fd1498Szrj for (i = 0; i < n; i++) 2994*38fd1498Szrj chain->inits.quick_push (NULL_TREE); 2995*38fd1498Szrj 2996*38fd1498Szrj /* If we have replaced some looparound phi nodes, use their initializers 2997*38fd1498Szrj instead of creating our own. */ 2998*38fd1498Szrj FOR_EACH_VEC_ELT (chain->refs, i, laref) 2999*38fd1498Szrj { 3000*38fd1498Szrj if (gimple_code (laref->stmt) != GIMPLE_PHI) 3001*38fd1498Szrj continue; 3002*38fd1498Szrj 3003*38fd1498Szrj gcc_assert (laref->distance > 0); 3004*38fd1498Szrj chain->inits[n - laref->distance] 3005*38fd1498Szrj = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry); 3006*38fd1498Szrj } 3007*38fd1498Szrj 3008*38fd1498Szrj for (i = 0; i < n; i++) 3009*38fd1498Szrj { 3010*38fd1498Szrj gimple_seq stmts = NULL; 3011*38fd1498Szrj 3012*38fd1498Szrj if (chain->inits[i] != NULL_TREE) 3013*38fd1498Szrj continue; 3014*38fd1498Szrj 3015*38fd1498Szrj init = ref_at_iteration (dr, (int) i - n, &stmts); 3016*38fd1498Szrj if (!chain->all_always_accessed && tree_could_trap_p (init)) 3017*38fd1498Szrj { 3018*38fd1498Szrj gimple_seq_discard (stmts); 3019*38fd1498Szrj return false; 3020*38fd1498Szrj } 3021*38fd1498Szrj 3022*38fd1498Szrj if (stmts) 3023*38fd1498Szrj gimple_seq_add_seq_without_update (&chain->init_seq, stmts); 3024*38fd1498Szrj 3025*38fd1498Szrj chain->inits[i] = init; 3026*38fd1498Szrj } 3027*38fd1498Szrj 3028*38fd1498Szrj return true; 3029*38fd1498Szrj } 3030*38fd1498Szrj 3031*38fd1498Szrj /* Prepare initializers for CHAINS in LOOP, and free chains that cannot 3032*38fd1498Szrj be used because the initializers might trap. */ 3033*38fd1498Szrj 3034*38fd1498Szrj static void 3035*38fd1498Szrj prepare_initializers (struct loop *loop, vec<chain_p> chains) 3036*38fd1498Szrj { 3037*38fd1498Szrj chain_p chain; 3038*38fd1498Szrj unsigned i; 3039*38fd1498Szrj 3040*38fd1498Szrj for (i = 0; i < chains.length (); ) 3041*38fd1498Szrj { 3042*38fd1498Szrj chain = chains[i]; 3043*38fd1498Szrj if (prepare_initializers_chain (loop, chain)) 3044*38fd1498Szrj i++; 3045*38fd1498Szrj else 3046*38fd1498Szrj { 3047*38fd1498Szrj release_chain (chain); 3048*38fd1498Szrj chains.unordered_remove (i); 3049*38fd1498Szrj } 3050*38fd1498Szrj } 3051*38fd1498Szrj } 3052*38fd1498Szrj 3053*38fd1498Szrj /* Generates finalizer memory references for CHAIN in LOOP. Returns true 3054*38fd1498Szrj if finalizer code for CHAIN can be generated, otherwise false. */ 3055*38fd1498Szrj 3056*38fd1498Szrj static bool 3057*38fd1498Szrj prepare_finalizers_chain (struct loop *loop, chain_p chain) 3058*38fd1498Szrj { 3059*38fd1498Szrj unsigned i, n = chain->length; 3060*38fd1498Szrj struct data_reference *dr = get_chain_root (chain)->ref; 3061*38fd1498Szrj tree fini, niters = number_of_latch_executions (loop); 3062*38fd1498Szrj 3063*38fd1498Szrj /* For now we can't eliminate stores if some of them are conditional 3064*38fd1498Szrj executed. */ 3065*38fd1498Szrj if (!chain->all_always_accessed) 3066*38fd1498Szrj return false; 3067*38fd1498Szrj 3068*38fd1498Szrj chain->finis.create (n); 3069*38fd1498Szrj for (i = 0; i < n; i++) 3070*38fd1498Szrj chain->finis.quick_push (NULL_TREE); 3071*38fd1498Szrj 3072*38fd1498Szrj /* We never use looparound phi node for store elimination chains. */ 3073*38fd1498Szrj 3074*38fd1498Szrj /* Find the finalizers for the variables, and check that they cannot 3075*38fd1498Szrj trap. */ 3076*38fd1498Szrj for (i = 0; i < n; i++) 3077*38fd1498Szrj { 3078*38fd1498Szrj gimple_seq stmts = NULL; 3079*38fd1498Szrj gcc_assert (chain->finis[i] == NULL_TREE); 3080*38fd1498Szrj 3081*38fd1498Szrj if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME) 3082*38fd1498Szrj { 3083*38fd1498Szrj niters = unshare_expr (niters); 3084*38fd1498Szrj niters = force_gimple_operand (niters, &stmts, true, NULL); 3085*38fd1498Szrj if (stmts) 3086*38fd1498Szrj { 3087*38fd1498Szrj gimple_seq_add_seq_without_update (&chain->fini_seq, stmts); 3088*38fd1498Szrj stmts = NULL; 3089*38fd1498Szrj } 3090*38fd1498Szrj } 3091*38fd1498Szrj fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters); 3092*38fd1498Szrj if (stmts) 3093*38fd1498Szrj gimple_seq_add_seq_without_update (&chain->fini_seq, stmts); 3094*38fd1498Szrj 3095*38fd1498Szrj chain->finis[i] = fini; 3096*38fd1498Szrj } 3097*38fd1498Szrj 3098*38fd1498Szrj return true; 3099*38fd1498Szrj } 3100*38fd1498Szrj 3101*38fd1498Szrj /* Generates finalizer memory reference for CHAINS in LOOP. Returns true 3102*38fd1498Szrj if finalizer code generation for CHAINS breaks loop closed ssa form. */ 3103*38fd1498Szrj 3104*38fd1498Szrj static bool 3105*38fd1498Szrj prepare_finalizers (struct loop *loop, vec<chain_p> chains) 3106*38fd1498Szrj { 3107*38fd1498Szrj chain_p chain; 3108*38fd1498Szrj unsigned i; 3109*38fd1498Szrj bool loop_closed_ssa = false; 3110*38fd1498Szrj 3111*38fd1498Szrj for (i = 0; i < chains.length ();) 3112*38fd1498Szrj { 3113*38fd1498Szrj chain = chains[i]; 3114*38fd1498Szrj 3115*38fd1498Szrj /* Finalizer is only necessary for inter-iteration store elimination 3116*38fd1498Szrj chains. */ 3117*38fd1498Szrj if (chain->length == 0 || chain->type != CT_STORE_STORE) 3118*38fd1498Szrj { 3119*38fd1498Szrj i++; 3120*38fd1498Szrj continue; 3121*38fd1498Szrj } 3122*38fd1498Szrj 3123*38fd1498Szrj if (prepare_finalizers_chain (loop, chain)) 3124*38fd1498Szrj { 3125*38fd1498Szrj i++; 3126*38fd1498Szrj /* Be conservative, assume loop closed ssa form is corrupted 3127*38fd1498Szrj by store-store chain. Though it's not always the case if 3128*38fd1498Szrj eliminated stores only store loop invariant values into 3129*38fd1498Szrj memory. */ 3130*38fd1498Szrj loop_closed_ssa = true; 3131*38fd1498Szrj } 3132*38fd1498Szrj else 3133*38fd1498Szrj { 3134*38fd1498Szrj release_chain (chain); 3135*38fd1498Szrj chains.unordered_remove (i); 3136*38fd1498Szrj } 3137*38fd1498Szrj } 3138*38fd1498Szrj return loop_closed_ssa; 3139*38fd1498Szrj } 3140*38fd1498Szrj 3141*38fd1498Szrj /* Insert all initializing gimple stmts into loop's entry edge. */ 3142*38fd1498Szrj 3143*38fd1498Szrj static void 3144*38fd1498Szrj insert_init_seqs (struct loop *loop, vec<chain_p> chains) 3145*38fd1498Szrj { 3146*38fd1498Szrj unsigned i; 3147*38fd1498Szrj edge entry = loop_preheader_edge (loop); 3148*38fd1498Szrj 3149*38fd1498Szrj for (i = 0; i < chains.length (); ++i) 3150*38fd1498Szrj if (chains[i]->init_seq) 3151*38fd1498Szrj { 3152*38fd1498Szrj gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq); 3153*38fd1498Szrj chains[i]->init_seq = NULL; 3154*38fd1498Szrj } 3155*38fd1498Szrj } 3156*38fd1498Szrj 3157*38fd1498Szrj /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value 3158*38fd1498Szrj if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa 3159*38fd1498Szrj form was corrupted. */ 3160*38fd1498Szrj 3161*38fd1498Szrj static unsigned 3162*38fd1498Szrj tree_predictive_commoning_loop (struct loop *loop) 3163*38fd1498Szrj { 3164*38fd1498Szrj vec<data_reference_p> datarefs; 3165*38fd1498Szrj vec<ddr_p> dependences; 3166*38fd1498Szrj struct component *components; 3167*38fd1498Szrj vec<chain_p> chains = vNULL; 3168*38fd1498Szrj unsigned unroll_factor; 3169*38fd1498Szrj struct tree_niter_desc desc; 3170*38fd1498Szrj bool unroll = false, loop_closed_ssa = false; 3171*38fd1498Szrj edge exit; 3172*38fd1498Szrj 3173*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3174*38fd1498Szrj fprintf (dump_file, "Processing loop %d\n", loop->num); 3175*38fd1498Szrj 3176*38fd1498Szrj /* Nothing for predicitive commoning if loop only iterates 1 time. */ 3177*38fd1498Szrj if (get_max_loop_iterations_int (loop) == 0) 3178*38fd1498Szrj { 3179*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3180*38fd1498Szrj fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n"); 3181*38fd1498Szrj 3182*38fd1498Szrj return 0; 3183*38fd1498Szrj } 3184*38fd1498Szrj 3185*38fd1498Szrj /* Find the data references and split them into components according to their 3186*38fd1498Szrj dependence relations. */ 3187*38fd1498Szrj auto_vec<loop_p, 3> loop_nest; 3188*38fd1498Szrj dependences.create (10); 3189*38fd1498Szrj datarefs.create (10); 3190*38fd1498Szrj if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs, 3191*38fd1498Szrj &dependences)) 3192*38fd1498Szrj { 3193*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3194*38fd1498Szrj fprintf (dump_file, "Cannot analyze data dependencies\n"); 3195*38fd1498Szrj free_data_refs (datarefs); 3196*38fd1498Szrj free_dependence_relations (dependences); 3197*38fd1498Szrj return 0; 3198*38fd1498Szrj } 3199*38fd1498Szrj 3200*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3201*38fd1498Szrj dump_data_dependence_relations (dump_file, dependences); 3202*38fd1498Szrj 3203*38fd1498Szrj components = split_data_refs_to_components (loop, datarefs, dependences); 3204*38fd1498Szrj loop_nest.release (); 3205*38fd1498Szrj free_dependence_relations (dependences); 3206*38fd1498Szrj if (!components) 3207*38fd1498Szrj { 3208*38fd1498Szrj free_data_refs (datarefs); 3209*38fd1498Szrj free_affine_expand_cache (&name_expansions); 3210*38fd1498Szrj return 0; 3211*38fd1498Szrj } 3212*38fd1498Szrj 3213*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3214*38fd1498Szrj { 3215*38fd1498Szrj fprintf (dump_file, "Initial state:\n\n"); 3216*38fd1498Szrj dump_components (dump_file, components); 3217*38fd1498Szrj } 3218*38fd1498Szrj 3219*38fd1498Szrj /* Find the suitable components and split them into chains. */ 3220*38fd1498Szrj components = filter_suitable_components (loop, components); 3221*38fd1498Szrj 3222*38fd1498Szrj auto_bitmap tmp_vars; 3223*38fd1498Szrj looparound_phis = BITMAP_ALLOC (NULL); 3224*38fd1498Szrj determine_roots (loop, components, &chains); 3225*38fd1498Szrj release_components (components); 3226*38fd1498Szrj 3227*38fd1498Szrj if (!chains.exists ()) 3228*38fd1498Szrj { 3229*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3230*38fd1498Szrj fprintf (dump_file, 3231*38fd1498Szrj "Predictive commoning failed: no suitable chains\n"); 3232*38fd1498Szrj goto end; 3233*38fd1498Szrj } 3234*38fd1498Szrj prepare_initializers (loop, chains); 3235*38fd1498Szrj loop_closed_ssa = prepare_finalizers (loop, chains); 3236*38fd1498Szrj 3237*38fd1498Szrj /* Try to combine the chains that are always worked with together. */ 3238*38fd1498Szrj try_combine_chains (loop, &chains); 3239*38fd1498Szrj 3240*38fd1498Szrj insert_init_seqs (loop, chains); 3241*38fd1498Szrj 3242*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3243*38fd1498Szrj { 3244*38fd1498Szrj fprintf (dump_file, "Before commoning:\n\n"); 3245*38fd1498Szrj dump_chains (dump_file, chains); 3246*38fd1498Szrj } 3247*38fd1498Szrj 3248*38fd1498Szrj /* Determine the unroll factor, and if the loop should be unrolled, ensure 3249*38fd1498Szrj that its number of iterations is divisible by the factor. */ 3250*38fd1498Szrj unroll_factor = determine_unroll_factor (chains); 3251*38fd1498Szrj scev_reset (); 3252*38fd1498Szrj unroll = (unroll_factor > 1 3253*38fd1498Szrj && can_unroll_loop_p (loop, unroll_factor, &desc)); 3254*38fd1498Szrj exit = single_dom_exit (loop); 3255*38fd1498Szrj 3256*38fd1498Szrj /* Execute the predictive commoning transformations, and possibly unroll the 3257*38fd1498Szrj loop. */ 3258*38fd1498Szrj if (unroll) 3259*38fd1498Szrj { 3260*38fd1498Szrj struct epcc_data dta; 3261*38fd1498Szrj 3262*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3263*38fd1498Szrj fprintf (dump_file, "Unrolling %u times.\n", unroll_factor); 3264*38fd1498Szrj 3265*38fd1498Szrj dta.chains = chains; 3266*38fd1498Szrj dta.tmp_vars = tmp_vars; 3267*38fd1498Szrj 3268*38fd1498Szrj update_ssa (TODO_update_ssa_only_virtuals); 3269*38fd1498Szrj 3270*38fd1498Szrj /* Cfg manipulations performed in tree_transform_and_unroll_loop before 3271*38fd1498Szrj execute_pred_commoning_cbck is called may cause phi nodes to be 3272*38fd1498Szrj reallocated, which is a problem since CHAINS may point to these 3273*38fd1498Szrj statements. To fix this, we store the ssa names defined by the 3274*38fd1498Szrj phi nodes here instead of the phi nodes themselves, and restore 3275*38fd1498Szrj the phi nodes in execute_pred_commoning_cbck. A bit hacky. */ 3276*38fd1498Szrj replace_phis_by_defined_names (chains); 3277*38fd1498Szrj 3278*38fd1498Szrj tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc, 3279*38fd1498Szrj execute_pred_commoning_cbck, &dta); 3280*38fd1498Szrj eliminate_temp_copies (loop, tmp_vars); 3281*38fd1498Szrj } 3282*38fd1498Szrj else 3283*38fd1498Szrj { 3284*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS)) 3285*38fd1498Szrj fprintf (dump_file, 3286*38fd1498Szrj "Executing predictive commoning without unrolling.\n"); 3287*38fd1498Szrj execute_pred_commoning (loop, chains, tmp_vars); 3288*38fd1498Szrj } 3289*38fd1498Szrj 3290*38fd1498Szrj end: ; 3291*38fd1498Szrj release_chains (chains); 3292*38fd1498Szrj free_data_refs (datarefs); 3293*38fd1498Szrj BITMAP_FREE (looparound_phis); 3294*38fd1498Szrj 3295*38fd1498Szrj free_affine_expand_cache (&name_expansions); 3296*38fd1498Szrj 3297*38fd1498Szrj return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0); 3298*38fd1498Szrj } 3299*38fd1498Szrj 3300*38fd1498Szrj /* Runs predictive commoning. */ 3301*38fd1498Szrj 3302*38fd1498Szrj unsigned 3303*38fd1498Szrj tree_predictive_commoning (void) 3304*38fd1498Szrj { 3305*38fd1498Szrj struct loop *loop; 3306*38fd1498Szrj unsigned ret = 0, changed = 0; 3307*38fd1498Szrj 3308*38fd1498Szrj initialize_original_copy_tables (); 3309*38fd1498Szrj FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) 3310*38fd1498Szrj if (optimize_loop_for_speed_p (loop)) 3311*38fd1498Szrj { 3312*38fd1498Szrj changed |= tree_predictive_commoning_loop (loop); 3313*38fd1498Szrj } 3314*38fd1498Szrj free_original_copy_tables (); 3315*38fd1498Szrj 3316*38fd1498Szrj if (changed > 0) 3317*38fd1498Szrj { 3318*38fd1498Szrj scev_reset (); 3319*38fd1498Szrj 3320*38fd1498Szrj if (changed > 1) 3321*38fd1498Szrj rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); 3322*38fd1498Szrj 3323*38fd1498Szrj ret = TODO_cleanup_cfg; 3324*38fd1498Szrj } 3325*38fd1498Szrj 3326*38fd1498Szrj return ret; 3327*38fd1498Szrj } 3328*38fd1498Szrj 3329*38fd1498Szrj /* Predictive commoning Pass. */ 3330*38fd1498Szrj 3331*38fd1498Szrj static unsigned 3332*38fd1498Szrj run_tree_predictive_commoning (struct function *fun) 3333*38fd1498Szrj { 3334*38fd1498Szrj if (number_of_loops (fun) <= 1) 3335*38fd1498Szrj return 0; 3336*38fd1498Szrj 3337*38fd1498Szrj return tree_predictive_commoning (); 3338*38fd1498Szrj } 3339*38fd1498Szrj 3340*38fd1498Szrj namespace { 3341*38fd1498Szrj 3342*38fd1498Szrj const pass_data pass_data_predcom = 3343*38fd1498Szrj { 3344*38fd1498Szrj GIMPLE_PASS, /* type */ 3345*38fd1498Szrj "pcom", /* name */ 3346*38fd1498Szrj OPTGROUP_LOOP, /* optinfo_flags */ 3347*38fd1498Szrj TV_PREDCOM, /* tv_id */ 3348*38fd1498Szrj PROP_cfg, /* properties_required */ 3349*38fd1498Szrj 0, /* properties_provided */ 3350*38fd1498Szrj 0, /* properties_destroyed */ 3351*38fd1498Szrj 0, /* todo_flags_start */ 3352*38fd1498Szrj TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 3353*38fd1498Szrj }; 3354*38fd1498Szrj 3355*38fd1498Szrj class pass_predcom : public gimple_opt_pass 3356*38fd1498Szrj { 3357*38fd1498Szrj public: 3358*38fd1498Szrj pass_predcom (gcc::context *ctxt) 3359*38fd1498Szrj : gimple_opt_pass (pass_data_predcom, ctxt) 3360*38fd1498Szrj {} 3361*38fd1498Szrj 3362*38fd1498Szrj /* opt_pass methods: */ 3363*38fd1498Szrj virtual bool gate (function *) { return flag_predictive_commoning != 0; } 3364*38fd1498Szrj virtual unsigned int execute (function *fun) 3365*38fd1498Szrj { 3366*38fd1498Szrj return run_tree_predictive_commoning (fun); 3367*38fd1498Szrj } 3368*38fd1498Szrj 3369*38fd1498Szrj }; // class pass_predcom 3370*38fd1498Szrj 3371*38fd1498Szrj } // anon namespace 3372*38fd1498Szrj 3373*38fd1498Szrj gimple_opt_pass * 3374*38fd1498Szrj make_pass_predcom (gcc::context *ctxt) 3375*38fd1498Szrj { 3376*38fd1498Szrj return new pass_predcom (ctxt); 3377*38fd1498Szrj } 3378*38fd1498Szrj 3379*38fd1498Szrj 3380