xref: /dflybsd-src/contrib/gcc-8.0/gcc/cfgloopmanip.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Loop manipulation code for GNU compiler.
2*38fd1498Szrj    Copyright (C) 2002-2018 Free Software Foundation, Inc.
3*38fd1498Szrj 
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj 
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
7*38fd1498Szrj the terms of the GNU General Public License as published by the Free
8*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
9*38fd1498Szrj version.
10*38fd1498Szrj 
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj 
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
19*38fd1498Szrj 
20*38fd1498Szrj #include "config.h"
21*38fd1498Szrj #include "system.h"
22*38fd1498Szrj #include "coretypes.h"
23*38fd1498Szrj #include "backend.h"
24*38fd1498Szrj #include "rtl.h"
25*38fd1498Szrj #include "tree.h"
26*38fd1498Szrj #include "gimple.h"
27*38fd1498Szrj #include "cfghooks.h"
28*38fd1498Szrj #include "cfganal.h"
29*38fd1498Szrj #include "cfgloop.h"
30*38fd1498Szrj #include "gimple-iterator.h"
31*38fd1498Szrj #include "gimplify-me.h"
32*38fd1498Szrj #include "tree-ssa-loop-manip.h"
33*38fd1498Szrj #include "dumpfile.h"
34*38fd1498Szrj 
35*38fd1498Szrj static void copy_loops_to (struct loop **, int,
36*38fd1498Szrj 			   struct loop *);
37*38fd1498Szrj static void loop_redirect_edge (edge, basic_block);
38*38fd1498Szrj static void remove_bbs (basic_block *, int);
39*38fd1498Szrj static bool rpe_enum_p (const_basic_block, const void *);
40*38fd1498Szrj static int find_path (edge, basic_block **);
41*38fd1498Szrj static void fix_loop_placements (struct loop *, bool *);
42*38fd1498Szrj static bool fix_bb_placement (basic_block);
43*38fd1498Szrj static void fix_bb_placements (basic_block, bool *, bitmap);
44*38fd1498Szrj 
45*38fd1498Szrj /* Checks whether basic block BB is dominated by DATA.  */
46*38fd1498Szrj static bool
rpe_enum_p(const_basic_block bb,const void * data)47*38fd1498Szrj rpe_enum_p (const_basic_block bb, const void *data)
48*38fd1498Szrj {
49*38fd1498Szrj   return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
50*38fd1498Szrj }
51*38fd1498Szrj 
52*38fd1498Szrj /* Remove basic blocks BBS.  NBBS is the number of the basic blocks.  */
53*38fd1498Szrj 
54*38fd1498Szrj static void
remove_bbs(basic_block * bbs,int nbbs)55*38fd1498Szrj remove_bbs (basic_block *bbs, int nbbs)
56*38fd1498Szrj {
57*38fd1498Szrj   int i;
58*38fd1498Szrj 
59*38fd1498Szrj   for (i = 0; i < nbbs; i++)
60*38fd1498Szrj     delete_basic_block (bbs[i]);
61*38fd1498Szrj }
62*38fd1498Szrj 
63*38fd1498Szrj /* Find path -- i.e. the basic blocks dominated by edge E and put them
64*38fd1498Szrj    into array BBS, that will be allocated large enough to contain them.
65*38fd1498Szrj    E->dest must have exactly one predecessor for this to work (it is
66*38fd1498Szrj    easy to achieve and we do not put it here because we do not want to
67*38fd1498Szrj    alter anything by this function).  The number of basic blocks in the
68*38fd1498Szrj    path is returned.  */
69*38fd1498Szrj static int
find_path(edge e,basic_block ** bbs)70*38fd1498Szrj find_path (edge e, basic_block **bbs)
71*38fd1498Szrj {
72*38fd1498Szrj   gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
73*38fd1498Szrj 
74*38fd1498Szrj   /* Find bbs in the path.  */
75*38fd1498Szrj   *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
76*38fd1498Szrj   return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
77*38fd1498Szrj 			     n_basic_blocks_for_fn (cfun), e->dest);
78*38fd1498Szrj }
79*38fd1498Szrj 
80*38fd1498Szrj /* Fix placement of basic block BB inside loop hierarchy --
81*38fd1498Szrj    Let L be a loop to that BB belongs.  Then every successor of BB must either
82*38fd1498Szrj      1) belong to some superloop of loop L, or
83*38fd1498Szrj      2) be a header of loop K such that K->outer is superloop of L
84*38fd1498Szrj    Returns true if we had to move BB into other loop to enforce this condition,
85*38fd1498Szrj    false if the placement of BB was already correct (provided that placements
86*38fd1498Szrj    of its successors are correct).  */
87*38fd1498Szrj static bool
fix_bb_placement(basic_block bb)88*38fd1498Szrj fix_bb_placement (basic_block bb)
89*38fd1498Szrj {
90*38fd1498Szrj   edge e;
91*38fd1498Szrj   edge_iterator ei;
92*38fd1498Szrj   struct loop *loop = current_loops->tree_root, *act;
93*38fd1498Szrj 
94*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
95*38fd1498Szrj     {
96*38fd1498Szrj       if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
97*38fd1498Szrj 	continue;
98*38fd1498Szrj 
99*38fd1498Szrj       act = e->dest->loop_father;
100*38fd1498Szrj       if (act->header == e->dest)
101*38fd1498Szrj 	act = loop_outer (act);
102*38fd1498Szrj 
103*38fd1498Szrj       if (flow_loop_nested_p (loop, act))
104*38fd1498Szrj 	loop = act;
105*38fd1498Szrj     }
106*38fd1498Szrj 
107*38fd1498Szrj   if (loop == bb->loop_father)
108*38fd1498Szrj     return false;
109*38fd1498Szrj 
110*38fd1498Szrj   remove_bb_from_loops (bb);
111*38fd1498Szrj   add_bb_to_loop (bb, loop);
112*38fd1498Szrj 
113*38fd1498Szrj   return true;
114*38fd1498Szrj }
115*38fd1498Szrj 
116*38fd1498Szrj /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
117*38fd1498Szrj    of LOOP to that leads at least one exit edge of LOOP, and set it
118*38fd1498Szrj    as the immediate superloop of LOOP.  Return true if the immediate superloop
119*38fd1498Szrj    of LOOP changed.
120*38fd1498Szrj 
121*38fd1498Szrj    IRRED_INVALIDATED is set to true if a change in the loop structures might
122*38fd1498Szrj    invalidate the information about irreducible regions.  */
123*38fd1498Szrj 
124*38fd1498Szrj static bool
fix_loop_placement(struct loop * loop,bool * irred_invalidated)125*38fd1498Szrj fix_loop_placement (struct loop *loop, bool *irred_invalidated)
126*38fd1498Szrj {
127*38fd1498Szrj   unsigned i;
128*38fd1498Szrj   edge e;
129*38fd1498Szrj   vec<edge> exits = get_loop_exit_edges (loop);
130*38fd1498Szrj   struct loop *father = current_loops->tree_root, *act;
131*38fd1498Szrj   bool ret = false;
132*38fd1498Szrj 
133*38fd1498Szrj   FOR_EACH_VEC_ELT (exits, i, e)
134*38fd1498Szrj     {
135*38fd1498Szrj       act = find_common_loop (loop, e->dest->loop_father);
136*38fd1498Szrj       if (flow_loop_nested_p (father, act))
137*38fd1498Szrj 	father = act;
138*38fd1498Szrj     }
139*38fd1498Szrj 
140*38fd1498Szrj   if (father != loop_outer (loop))
141*38fd1498Szrj     {
142*38fd1498Szrj       for (act = loop_outer (loop); act != father; act = loop_outer (act))
143*38fd1498Szrj 	act->num_nodes -= loop->num_nodes;
144*38fd1498Szrj       flow_loop_tree_node_remove (loop);
145*38fd1498Szrj       flow_loop_tree_node_add (father, loop);
146*38fd1498Szrj 
147*38fd1498Szrj       /* The exit edges of LOOP no longer exits its original immediate
148*38fd1498Szrj 	 superloops; remove them from the appropriate exit lists.  */
149*38fd1498Szrj       FOR_EACH_VEC_ELT (exits, i, e)
150*38fd1498Szrj 	{
151*38fd1498Szrj 	  /* We may need to recompute irreducible loops.  */
152*38fd1498Szrj 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
153*38fd1498Szrj 	    *irred_invalidated = true;
154*38fd1498Szrj 	  rescan_loop_exit (e, false, false);
155*38fd1498Szrj 	}
156*38fd1498Szrj 
157*38fd1498Szrj       ret = true;
158*38fd1498Szrj     }
159*38fd1498Szrj 
160*38fd1498Szrj   exits.release ();
161*38fd1498Szrj   return ret;
162*38fd1498Szrj }
163*38fd1498Szrj 
164*38fd1498Szrj /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
165*38fd1498Szrj    enforce condition stated in description of fix_bb_placement. We
166*38fd1498Szrj    start from basic block FROM that had some of its successors removed, so that
167*38fd1498Szrj    his placement no longer has to be correct, and iteratively fix placement of
168*38fd1498Szrj    its predecessors that may change if placement of FROM changed.  Also fix
169*38fd1498Szrj    placement of subloops of FROM->loop_father, that might also be altered due
170*38fd1498Szrj    to this change; the condition for them is similar, except that instead of
171*38fd1498Szrj    successors we consider edges coming out of the loops.
172*38fd1498Szrj 
173*38fd1498Szrj    If the changes may invalidate the information about irreducible regions,
174*38fd1498Szrj    IRRED_INVALIDATED is set to true.
175*38fd1498Szrj 
176*38fd1498Szrj    If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
177*38fd1498Szrj    changed loop_father are collected there. */
178*38fd1498Szrj 
179*38fd1498Szrj static void
fix_bb_placements(basic_block from,bool * irred_invalidated,bitmap loop_closed_ssa_invalidated)180*38fd1498Szrj fix_bb_placements (basic_block from,
181*38fd1498Szrj 		   bool *irred_invalidated,
182*38fd1498Szrj 		   bitmap loop_closed_ssa_invalidated)
183*38fd1498Szrj {
184*38fd1498Szrj   basic_block *queue, *qtop, *qbeg, *qend;
185*38fd1498Szrj   struct loop *base_loop, *target_loop;
186*38fd1498Szrj   edge e;
187*38fd1498Szrj 
188*38fd1498Szrj   /* We pass through blocks back-reachable from FROM, testing whether some
189*38fd1498Szrj      of their successors moved to outer loop.  It may be necessary to
190*38fd1498Szrj      iterate several times, but it is finite, as we stop unless we move
191*38fd1498Szrj      the basic block up the loop structure.  The whole story is a bit
192*38fd1498Szrj      more complicated due to presence of subloops, those are moved using
193*38fd1498Szrj      fix_loop_placement.  */
194*38fd1498Szrj 
195*38fd1498Szrj   base_loop = from->loop_father;
196*38fd1498Szrj   /* If we are already in the outermost loop, the basic blocks cannot be moved
197*38fd1498Szrj      outside of it.  If FROM is the header of the base loop, it cannot be moved
198*38fd1498Szrj      outside of it, either.  In both cases, we can end now.  */
199*38fd1498Szrj   if (base_loop == current_loops->tree_root
200*38fd1498Szrj       || from == base_loop->header)
201*38fd1498Szrj     return;
202*38fd1498Szrj 
203*38fd1498Szrj   auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
204*38fd1498Szrj   bitmap_clear (in_queue);
205*38fd1498Szrj   bitmap_set_bit (in_queue, from->index);
206*38fd1498Szrj   /* Prevent us from going out of the base_loop.  */
207*38fd1498Szrj   bitmap_set_bit (in_queue, base_loop->header->index);
208*38fd1498Szrj 
209*38fd1498Szrj   queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
210*38fd1498Szrj   qtop = queue + base_loop->num_nodes + 1;
211*38fd1498Szrj   qbeg = queue;
212*38fd1498Szrj   qend = queue + 1;
213*38fd1498Szrj   *qbeg = from;
214*38fd1498Szrj 
215*38fd1498Szrj   while (qbeg != qend)
216*38fd1498Szrj     {
217*38fd1498Szrj       edge_iterator ei;
218*38fd1498Szrj       from = *qbeg;
219*38fd1498Szrj       qbeg++;
220*38fd1498Szrj       if (qbeg == qtop)
221*38fd1498Szrj 	qbeg = queue;
222*38fd1498Szrj       bitmap_clear_bit (in_queue, from->index);
223*38fd1498Szrj 
224*38fd1498Szrj       if (from->loop_father->header == from)
225*38fd1498Szrj 	{
226*38fd1498Szrj 	  /* Subloop header, maybe move the loop upward.  */
227*38fd1498Szrj 	  if (!fix_loop_placement (from->loop_father, irred_invalidated))
228*38fd1498Szrj 	    continue;
229*38fd1498Szrj 	  target_loop = loop_outer (from->loop_father);
230*38fd1498Szrj 	  if (loop_closed_ssa_invalidated)
231*38fd1498Szrj 	    {
232*38fd1498Szrj 	      basic_block *bbs = get_loop_body (from->loop_father);
233*38fd1498Szrj 	      for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
234*38fd1498Szrj 		bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
235*38fd1498Szrj 	      free (bbs);
236*38fd1498Szrj 	    }
237*38fd1498Szrj 	}
238*38fd1498Szrj       else
239*38fd1498Szrj 	{
240*38fd1498Szrj 	  /* Ordinary basic block.  */
241*38fd1498Szrj 	  if (!fix_bb_placement (from))
242*38fd1498Szrj 	    continue;
243*38fd1498Szrj 	  target_loop = from->loop_father;
244*38fd1498Szrj 	  if (loop_closed_ssa_invalidated)
245*38fd1498Szrj 	    bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
246*38fd1498Szrj 	}
247*38fd1498Szrj 
248*38fd1498Szrj       FOR_EACH_EDGE (e, ei, from->succs)
249*38fd1498Szrj 	{
250*38fd1498Szrj 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
251*38fd1498Szrj 	    *irred_invalidated = true;
252*38fd1498Szrj 	}
253*38fd1498Szrj 
254*38fd1498Szrj       /* Something has changed, insert predecessors into queue.  */
255*38fd1498Szrj       FOR_EACH_EDGE (e, ei, from->preds)
256*38fd1498Szrj 	{
257*38fd1498Szrj 	  basic_block pred = e->src;
258*38fd1498Szrj 	  struct loop *nca;
259*38fd1498Szrj 
260*38fd1498Szrj 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
261*38fd1498Szrj 	    *irred_invalidated = true;
262*38fd1498Szrj 
263*38fd1498Szrj 	  if (bitmap_bit_p (in_queue, pred->index))
264*38fd1498Szrj 	    continue;
265*38fd1498Szrj 
266*38fd1498Szrj 	  /* If it is subloop, then it either was not moved, or
267*38fd1498Szrj 	     the path up the loop tree from base_loop do not contain
268*38fd1498Szrj 	     it.  */
269*38fd1498Szrj 	  nca = find_common_loop (pred->loop_father, base_loop);
270*38fd1498Szrj 	  if (pred->loop_father != base_loop
271*38fd1498Szrj 	      && (nca == base_loop
272*38fd1498Szrj 		  || nca != pred->loop_father))
273*38fd1498Szrj 	    pred = pred->loop_father->header;
274*38fd1498Szrj 	  else if (!flow_loop_nested_p (target_loop, pred->loop_father))
275*38fd1498Szrj 	    {
276*38fd1498Szrj 	      /* If PRED is already higher in the loop hierarchy than the
277*38fd1498Szrj 		 TARGET_LOOP to that we moved FROM, the change of the position
278*38fd1498Szrj 		 of FROM does not affect the position of PRED, so there is no
279*38fd1498Szrj 		 point in processing it.  */
280*38fd1498Szrj 	      continue;
281*38fd1498Szrj 	    }
282*38fd1498Szrj 
283*38fd1498Szrj 	  if (bitmap_bit_p (in_queue, pred->index))
284*38fd1498Szrj 	    continue;
285*38fd1498Szrj 
286*38fd1498Szrj 	  /* Schedule the basic block.  */
287*38fd1498Szrj 	  *qend = pred;
288*38fd1498Szrj 	  qend++;
289*38fd1498Szrj 	  if (qend == qtop)
290*38fd1498Szrj 	    qend = queue;
291*38fd1498Szrj 	  bitmap_set_bit (in_queue, pred->index);
292*38fd1498Szrj 	}
293*38fd1498Szrj     }
294*38fd1498Szrj   free (queue);
295*38fd1498Szrj }
296*38fd1498Szrj 
297*38fd1498Szrj /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
298*38fd1498Szrj    and update loop structures and dominators.  Return true if we were able
299*38fd1498Szrj    to remove the path, false otherwise (and nothing is affected then).  */
300*38fd1498Szrj bool
remove_path(edge e,bool * irred_invalidated,bitmap loop_closed_ssa_invalidated)301*38fd1498Szrj remove_path (edge e, bool *irred_invalidated,
302*38fd1498Szrj 	     bitmap loop_closed_ssa_invalidated)
303*38fd1498Szrj {
304*38fd1498Szrj   edge ae;
305*38fd1498Szrj   basic_block *rem_bbs, *bord_bbs, from, bb;
306*38fd1498Szrj   vec<basic_block> dom_bbs;
307*38fd1498Szrj   int i, nrem, n_bord_bbs;
308*38fd1498Szrj   bool local_irred_invalidated = false;
309*38fd1498Szrj   edge_iterator ei;
310*38fd1498Szrj   struct loop *l, *f;
311*38fd1498Szrj 
312*38fd1498Szrj   if (! irred_invalidated)
313*38fd1498Szrj     irred_invalidated = &local_irred_invalidated;
314*38fd1498Szrj 
315*38fd1498Szrj   if (!can_remove_branch_p (e))
316*38fd1498Szrj     return false;
317*38fd1498Szrj 
318*38fd1498Szrj   /* Keep track of whether we need to update information about irreducible
319*38fd1498Szrj      regions.  This is the case if the removed area is a part of the
320*38fd1498Szrj      irreducible region, or if the set of basic blocks that belong to a loop
321*38fd1498Szrj      that is inside an irreducible region is changed, or if such a loop is
322*38fd1498Szrj      removed.  */
323*38fd1498Szrj   if (e->flags & EDGE_IRREDUCIBLE_LOOP)
324*38fd1498Szrj     *irred_invalidated = true;
325*38fd1498Szrj 
326*38fd1498Szrj   /* We need to check whether basic blocks are dominated by the edge
327*38fd1498Szrj      e, but we only have basic block dominators.  This is easy to
328*38fd1498Szrj      fix -- when e->dest has exactly one predecessor, this corresponds
329*38fd1498Szrj      to blocks dominated by e->dest, if not, split the edge.  */
330*38fd1498Szrj   if (!single_pred_p (e->dest))
331*38fd1498Szrj     e = single_pred_edge (split_edge (e));
332*38fd1498Szrj 
333*38fd1498Szrj   /* It may happen that by removing path we remove one or more loops
334*38fd1498Szrj      we belong to.  In this case first unloop the loops, then proceed
335*38fd1498Szrj      normally.   We may assume that e->dest is not a header of any loop,
336*38fd1498Szrj      as it now has exactly one predecessor.  */
337*38fd1498Szrj   for (l = e->src->loop_father; loop_outer (l); l = f)
338*38fd1498Szrj     {
339*38fd1498Szrj       f = loop_outer (l);
340*38fd1498Szrj       if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
341*38fd1498Szrj         unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
342*38fd1498Szrj     }
343*38fd1498Szrj 
344*38fd1498Szrj   /* Identify the path.  */
345*38fd1498Szrj   nrem = find_path (e, &rem_bbs);
346*38fd1498Szrj 
347*38fd1498Szrj   n_bord_bbs = 0;
348*38fd1498Szrj   bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
349*38fd1498Szrj   auto_sbitmap seen (last_basic_block_for_fn (cfun));
350*38fd1498Szrj   bitmap_clear (seen);
351*38fd1498Szrj 
352*38fd1498Szrj   /* Find "border" hexes -- i.e. those with predecessor in removed path.  */
353*38fd1498Szrj   for (i = 0; i < nrem; i++)
354*38fd1498Szrj     bitmap_set_bit (seen, rem_bbs[i]->index);
355*38fd1498Szrj   if (!*irred_invalidated)
356*38fd1498Szrj     FOR_EACH_EDGE (ae, ei, e->src->succs)
357*38fd1498Szrj       if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
358*38fd1498Szrj 	  && !bitmap_bit_p (seen, ae->dest->index)
359*38fd1498Szrj 	  && ae->flags & EDGE_IRREDUCIBLE_LOOP)
360*38fd1498Szrj 	{
361*38fd1498Szrj 	  *irred_invalidated = true;
362*38fd1498Szrj 	  break;
363*38fd1498Szrj 	}
364*38fd1498Szrj 
365*38fd1498Szrj   for (i = 0; i < nrem; i++)
366*38fd1498Szrj     {
367*38fd1498Szrj       bb = rem_bbs[i];
368*38fd1498Szrj       FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
369*38fd1498Szrj 	if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
370*38fd1498Szrj 	    && !bitmap_bit_p (seen, ae->dest->index))
371*38fd1498Szrj 	  {
372*38fd1498Szrj 	    bitmap_set_bit (seen, ae->dest->index);
373*38fd1498Szrj 	    bord_bbs[n_bord_bbs++] = ae->dest;
374*38fd1498Szrj 
375*38fd1498Szrj 	    if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
376*38fd1498Szrj 	      *irred_invalidated = true;
377*38fd1498Szrj 	  }
378*38fd1498Szrj     }
379*38fd1498Szrj 
380*38fd1498Szrj   /* Remove the path.  */
381*38fd1498Szrj   from = e->src;
382*38fd1498Szrj   remove_branch (e);
383*38fd1498Szrj   dom_bbs.create (0);
384*38fd1498Szrj 
385*38fd1498Szrj   /* Cancel loops contained in the path.  */
386*38fd1498Szrj   for (i = 0; i < nrem; i++)
387*38fd1498Szrj     if (rem_bbs[i]->loop_father->header == rem_bbs[i])
388*38fd1498Szrj       cancel_loop_tree (rem_bbs[i]->loop_father);
389*38fd1498Szrj 
390*38fd1498Szrj   remove_bbs (rem_bbs, nrem);
391*38fd1498Szrj   free (rem_bbs);
392*38fd1498Szrj 
393*38fd1498Szrj   /* Find blocks whose dominators may be affected.  */
394*38fd1498Szrj   bitmap_clear (seen);
395*38fd1498Szrj   for (i = 0; i < n_bord_bbs; i++)
396*38fd1498Szrj     {
397*38fd1498Szrj       basic_block ldom;
398*38fd1498Szrj 
399*38fd1498Szrj       bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
400*38fd1498Szrj       if (bitmap_bit_p (seen, bb->index))
401*38fd1498Szrj 	continue;
402*38fd1498Szrj       bitmap_set_bit (seen, bb->index);
403*38fd1498Szrj 
404*38fd1498Szrj       for (ldom = first_dom_son (CDI_DOMINATORS, bb);
405*38fd1498Szrj 	   ldom;
406*38fd1498Szrj 	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
407*38fd1498Szrj 	if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
408*38fd1498Szrj 	  dom_bbs.safe_push (ldom);
409*38fd1498Szrj     }
410*38fd1498Szrj 
411*38fd1498Szrj   /* Recount dominators.  */
412*38fd1498Szrj   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
413*38fd1498Szrj   dom_bbs.release ();
414*38fd1498Szrj   free (bord_bbs);
415*38fd1498Szrj 
416*38fd1498Szrj   /* Fix placements of basic blocks inside loops and the placement of
417*38fd1498Szrj      loops in the loop tree.  */
418*38fd1498Szrj   fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
419*38fd1498Szrj   fix_loop_placements (from->loop_father, irred_invalidated);
420*38fd1498Szrj 
421*38fd1498Szrj   if (local_irred_invalidated
422*38fd1498Szrj       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
423*38fd1498Szrj     mark_irreducible_loops ();
424*38fd1498Szrj 
425*38fd1498Szrj   return true;
426*38fd1498Szrj }
427*38fd1498Szrj 
428*38fd1498Szrj /* Creates place for a new LOOP in loops structure of FN.  */
429*38fd1498Szrj 
430*38fd1498Szrj void
place_new_loop(struct function * fn,struct loop * loop)431*38fd1498Szrj place_new_loop (struct function *fn, struct loop *loop)
432*38fd1498Szrj {
433*38fd1498Szrj   loop->num = number_of_loops (fn);
434*38fd1498Szrj   vec_safe_push (loops_for_fn (fn)->larray, loop);
435*38fd1498Szrj }
436*38fd1498Szrj 
437*38fd1498Szrj /* Given LOOP structure with filled header and latch, find the body of the
438*38fd1498Szrj    corresponding loop and add it to loops tree.  Insert the LOOP as a son of
439*38fd1498Szrj    outer.  */
440*38fd1498Szrj 
441*38fd1498Szrj void
add_loop(struct loop * loop,struct loop * outer)442*38fd1498Szrj add_loop (struct loop *loop, struct loop *outer)
443*38fd1498Szrj {
444*38fd1498Szrj   basic_block *bbs;
445*38fd1498Szrj   int i, n;
446*38fd1498Szrj   struct loop *subloop;
447*38fd1498Szrj   edge e;
448*38fd1498Szrj   edge_iterator ei;
449*38fd1498Szrj 
450*38fd1498Szrj   /* Add it to loop structure.  */
451*38fd1498Szrj   place_new_loop (cfun, loop);
452*38fd1498Szrj   flow_loop_tree_node_add (outer, loop);
453*38fd1498Szrj 
454*38fd1498Szrj   /* Find its nodes.  */
455*38fd1498Szrj   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
456*38fd1498Szrj   n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
457*38fd1498Szrj 
458*38fd1498Szrj   for (i = 0; i < n; i++)
459*38fd1498Szrj     {
460*38fd1498Szrj       if (bbs[i]->loop_father == outer)
461*38fd1498Szrj 	{
462*38fd1498Szrj 	  remove_bb_from_loops (bbs[i]);
463*38fd1498Szrj 	  add_bb_to_loop (bbs[i], loop);
464*38fd1498Szrj 	  continue;
465*38fd1498Szrj 	}
466*38fd1498Szrj 
467*38fd1498Szrj       loop->num_nodes++;
468*38fd1498Szrj 
469*38fd1498Szrj       /* If we find a direct subloop of OUTER, move it to LOOP.  */
470*38fd1498Szrj       subloop = bbs[i]->loop_father;
471*38fd1498Szrj       if (loop_outer (subloop) == outer
472*38fd1498Szrj 	  && subloop->header == bbs[i])
473*38fd1498Szrj 	{
474*38fd1498Szrj 	  flow_loop_tree_node_remove (subloop);
475*38fd1498Szrj 	  flow_loop_tree_node_add (loop, subloop);
476*38fd1498Szrj 	}
477*38fd1498Szrj     }
478*38fd1498Szrj 
479*38fd1498Szrj   /* Update the information about loop exit edges.  */
480*38fd1498Szrj   for (i = 0; i < n; i++)
481*38fd1498Szrj     {
482*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bbs[i]->succs)
483*38fd1498Szrj 	{
484*38fd1498Szrj 	  rescan_loop_exit (e, false, false);
485*38fd1498Szrj 	}
486*38fd1498Szrj     }
487*38fd1498Szrj 
488*38fd1498Szrj   free (bbs);
489*38fd1498Szrj }
490*38fd1498Szrj 
491*38fd1498Szrj /* Scale profile of loop by P.  */
492*38fd1498Szrj 
493*38fd1498Szrj void
scale_loop_frequencies(struct loop * loop,profile_probability p)494*38fd1498Szrj scale_loop_frequencies (struct loop *loop, profile_probability p)
495*38fd1498Szrj {
496*38fd1498Szrj   basic_block *bbs;
497*38fd1498Szrj 
498*38fd1498Szrj   bbs = get_loop_body (loop);
499*38fd1498Szrj   scale_bbs_frequencies (bbs, loop->num_nodes, p);
500*38fd1498Szrj   free (bbs);
501*38fd1498Szrj }
502*38fd1498Szrj 
503*38fd1498Szrj /* Scale profile in LOOP by P.
504*38fd1498Szrj    If ITERATION_BOUND is non-zero, scale even further if loop is predicted
505*38fd1498Szrj    to iterate too many times.
506*38fd1498Szrj    Before caling this function, preheader block profile should be already
507*38fd1498Szrj    scaled to final count.  This is necessary because loop iterations are
508*38fd1498Szrj    determined by comparing header edge count to latch ege count and thus
509*38fd1498Szrj    they need to be scaled synchronously.  */
510*38fd1498Szrj 
511*38fd1498Szrj void
scale_loop_profile(struct loop * loop,profile_probability p,gcov_type iteration_bound)512*38fd1498Szrj scale_loop_profile (struct loop *loop, profile_probability p,
513*38fd1498Szrj 		    gcov_type iteration_bound)
514*38fd1498Szrj {
515*38fd1498Szrj   edge e, preheader_e;
516*38fd1498Szrj   edge_iterator ei;
517*38fd1498Szrj 
518*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
519*38fd1498Szrj     {
520*38fd1498Szrj       fprintf (dump_file, ";; Scaling loop %i with scale ",
521*38fd1498Szrj 	       loop->num);
522*38fd1498Szrj       p.dump (dump_file);
523*38fd1498Szrj       fprintf (dump_file, " bounding iterations to %i\n",
524*38fd1498Szrj 	       (int)iteration_bound);
525*38fd1498Szrj     }
526*38fd1498Szrj 
527*38fd1498Szrj   /* Scale the probabilities.  */
528*38fd1498Szrj   scale_loop_frequencies (loop, p);
529*38fd1498Szrj 
530*38fd1498Szrj   if (iteration_bound == 0)
531*38fd1498Szrj     return;
532*38fd1498Szrj 
533*38fd1498Szrj   gcov_type iterations = expected_loop_iterations_unbounded (loop, NULL, true);
534*38fd1498Szrj 
535*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
536*38fd1498Szrj     {
537*38fd1498Szrj       fprintf (dump_file, ";; guessed iterations after scaling %i\n",
538*38fd1498Szrj 	       (int)iterations);
539*38fd1498Szrj     }
540*38fd1498Szrj 
541*38fd1498Szrj   /* See if loop is predicted to iterate too many times.  */
542*38fd1498Szrj   if (iterations <= iteration_bound)
543*38fd1498Szrj     return;
544*38fd1498Szrj 
545*38fd1498Szrj   preheader_e = loop_preheader_edge (loop);
546*38fd1498Szrj 
547*38fd1498Szrj   /* We could handle also loops without preheaders, but bounding is
548*38fd1498Szrj      currently used only by optimizers that have preheaders constructed.  */
549*38fd1498Szrj   gcc_checking_assert (preheader_e);
550*38fd1498Szrj   profile_count count_in = preheader_e->count ();
551*38fd1498Szrj 
552*38fd1498Szrj   if (count_in > profile_count::zero ()
553*38fd1498Szrj       && loop->header->count.initialized_p ())
554*38fd1498Szrj     {
555*38fd1498Szrj       profile_count count_delta = profile_count::zero ();
556*38fd1498Szrj 
557*38fd1498Szrj       e = single_exit (loop);
558*38fd1498Szrj       if (e)
559*38fd1498Szrj 	{
560*38fd1498Szrj 	  edge other_e;
561*38fd1498Szrj 	  FOR_EACH_EDGE (other_e, ei, e->src->succs)
562*38fd1498Szrj 	    if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
563*38fd1498Szrj 		&& e != other_e)
564*38fd1498Szrj 	      break;
565*38fd1498Szrj 
566*38fd1498Szrj 	  /* Probability of exit must be 1/iterations.  */
567*38fd1498Szrj 	  count_delta = e->count ();
568*38fd1498Szrj 	  e->probability = profile_probability::always ()
569*38fd1498Szrj 				    .apply_scale (1, iteration_bound);
570*38fd1498Szrj 	  other_e->probability = e->probability.invert ();
571*38fd1498Szrj 
572*38fd1498Szrj 	  /* In code below we only handle the following two updates.  */
573*38fd1498Szrj 	  if (other_e->dest != loop->header
574*38fd1498Szrj 	      && other_e->dest != loop->latch
575*38fd1498Szrj 	      && (dump_file && (dump_flags & TDF_DETAILS)))
576*38fd1498Szrj 	    {
577*38fd1498Szrj 	      fprintf (dump_file, ";; giving up on update of paths from "
578*38fd1498Szrj 		       "exit condition to latch\n");
579*38fd1498Szrj 	    }
580*38fd1498Szrj 	}
581*38fd1498Szrj       else
582*38fd1498Szrj         if (dump_file && (dump_flags & TDF_DETAILS))
583*38fd1498Szrj 	  fprintf (dump_file, ";; Loop has multiple exit edges; "
584*38fd1498Szrj 	      		      "giving up on exit condition update\n");
585*38fd1498Szrj 
586*38fd1498Szrj       /* Roughly speaking we want to reduce the loop body profile by the
587*38fd1498Szrj 	 difference of loop iterations.  We however can do better if
588*38fd1498Szrj 	 we look at the actual profile, if it is available.  */
589*38fd1498Szrj       p = profile_probability::always ();
590*38fd1498Szrj 
591*38fd1498Szrj       count_in = count_in.apply_scale (iteration_bound, 1);
592*38fd1498Szrj       p = count_in.probability_in (loop->header->count);
593*38fd1498Szrj       if (!(p > profile_probability::never ()))
594*38fd1498Szrj 	p = profile_probability::very_unlikely ();
595*38fd1498Szrj 
596*38fd1498Szrj       if (p == profile_probability::always ()
597*38fd1498Szrj 	  || !p.initialized_p ())
598*38fd1498Szrj 	return;
599*38fd1498Szrj 
600*38fd1498Szrj       /* If latch exists, change its count, since we changed
601*38fd1498Szrj 	 probability of exit.  Theoretically we should update everything from
602*38fd1498Szrj 	 source of exit edge to latch, but for vectorizer this is enough.  */
603*38fd1498Szrj       if (loop->latch && loop->latch != e->src)
604*38fd1498Szrj 	loop->latch->count += count_delta;
605*38fd1498Szrj 
606*38fd1498Szrj       /* Scale the probabilities.  */
607*38fd1498Szrj       scale_loop_frequencies (loop, p);
608*38fd1498Szrj 
609*38fd1498Szrj       /* Change latch's count back.  */
610*38fd1498Szrj       if (loop->latch && loop->latch != e->src)
611*38fd1498Szrj 	loop->latch->count -= count_delta;
612*38fd1498Szrj 
613*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
614*38fd1498Szrj 	fprintf (dump_file, ";; guessed iterations are now %i\n",
615*38fd1498Szrj 		 (int)expected_loop_iterations_unbounded (loop, NULL, true));
616*38fd1498Szrj     }
617*38fd1498Szrj }
618*38fd1498Szrj 
619*38fd1498Szrj /* Recompute dominance information for basic blocks outside LOOP.  */
620*38fd1498Szrj 
621*38fd1498Szrj static void
update_dominators_in_loop(struct loop * loop)622*38fd1498Szrj update_dominators_in_loop (struct loop *loop)
623*38fd1498Szrj {
624*38fd1498Szrj   vec<basic_block> dom_bbs = vNULL;
625*38fd1498Szrj   basic_block *body;
626*38fd1498Szrj   unsigned i;
627*38fd1498Szrj 
628*38fd1498Szrj   auto_sbitmap seen (last_basic_block_for_fn (cfun));
629*38fd1498Szrj   bitmap_clear (seen);
630*38fd1498Szrj   body = get_loop_body (loop);
631*38fd1498Szrj 
632*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
633*38fd1498Szrj     bitmap_set_bit (seen, body[i]->index);
634*38fd1498Szrj 
635*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
636*38fd1498Szrj     {
637*38fd1498Szrj       basic_block ldom;
638*38fd1498Szrj 
639*38fd1498Szrj       for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
640*38fd1498Szrj 	   ldom;
641*38fd1498Szrj 	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
642*38fd1498Szrj 	if (!bitmap_bit_p (seen, ldom->index))
643*38fd1498Szrj 	  {
644*38fd1498Szrj 	    bitmap_set_bit (seen, ldom->index);
645*38fd1498Szrj 	    dom_bbs.safe_push (ldom);
646*38fd1498Szrj 	  }
647*38fd1498Szrj     }
648*38fd1498Szrj 
649*38fd1498Szrj   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
650*38fd1498Szrj   free (body);
651*38fd1498Szrj   dom_bbs.release ();
652*38fd1498Szrj }
653*38fd1498Szrj 
654*38fd1498Szrj /* Creates an if region as shown above. CONDITION is used to create
655*38fd1498Szrj    the test for the if.
656*38fd1498Szrj 
657*38fd1498Szrj    |
658*38fd1498Szrj    |     -------------                 -------------
659*38fd1498Szrj    |     |  pred_bb  |                 |  pred_bb  |
660*38fd1498Szrj    |     -------------                 -------------
661*38fd1498Szrj    |           |                             |
662*38fd1498Szrj    |           |                             | ENTRY_EDGE
663*38fd1498Szrj    |           | ENTRY_EDGE                  V
664*38fd1498Szrj    |           |             ====>     -------------
665*38fd1498Szrj    |           |                       |  cond_bb  |
666*38fd1498Szrj    |           |                       | CONDITION |
667*38fd1498Szrj    |           |                       -------------
668*38fd1498Szrj    |           V                        /         \
669*38fd1498Szrj    |     -------------         e_false /           \ e_true
670*38fd1498Szrj    |     |  succ_bb  |                V             V
671*38fd1498Szrj    |     -------------         -----------       -----------
672*38fd1498Szrj    |                           | false_bb |      | true_bb |
673*38fd1498Szrj    |                           -----------       -----------
674*38fd1498Szrj    |                                   \           /
675*38fd1498Szrj    |                                    \         /
676*38fd1498Szrj    |                                     V       V
677*38fd1498Szrj    |                                   -------------
678*38fd1498Szrj    |                                   |  join_bb  |
679*38fd1498Szrj    |                                   -------------
680*38fd1498Szrj    |                                         | exit_edge (result)
681*38fd1498Szrj    |                                         V
682*38fd1498Szrj    |                                    -----------
683*38fd1498Szrj    |                                    | succ_bb |
684*38fd1498Szrj    |                                    -----------
685*38fd1498Szrj    |
686*38fd1498Szrj  */
687*38fd1498Szrj 
688*38fd1498Szrj edge
create_empty_if_region_on_edge(edge entry_edge,tree condition)689*38fd1498Szrj create_empty_if_region_on_edge (edge entry_edge, tree condition)
690*38fd1498Szrj {
691*38fd1498Szrj 
692*38fd1498Szrj   basic_block cond_bb, true_bb, false_bb, join_bb;
693*38fd1498Szrj   edge e_true, e_false, exit_edge;
694*38fd1498Szrj   gcond *cond_stmt;
695*38fd1498Szrj   tree simple_cond;
696*38fd1498Szrj   gimple_stmt_iterator gsi;
697*38fd1498Szrj 
698*38fd1498Szrj   cond_bb = split_edge (entry_edge);
699*38fd1498Szrj 
700*38fd1498Szrj   /* Insert condition in cond_bb.  */
701*38fd1498Szrj   gsi = gsi_last_bb (cond_bb);
702*38fd1498Szrj   simple_cond =
703*38fd1498Szrj     force_gimple_operand_gsi (&gsi, condition, true, NULL,
704*38fd1498Szrj 			      false, GSI_NEW_STMT);
705*38fd1498Szrj   cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
706*38fd1498Szrj   gsi = gsi_last_bb (cond_bb);
707*38fd1498Szrj   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
708*38fd1498Szrj 
709*38fd1498Szrj   join_bb = split_edge (single_succ_edge (cond_bb));
710*38fd1498Szrj 
711*38fd1498Szrj   e_true = single_succ_edge (cond_bb);
712*38fd1498Szrj   true_bb = split_edge (e_true);
713*38fd1498Szrj 
714*38fd1498Szrj   e_false = make_edge (cond_bb, join_bb, 0);
715*38fd1498Szrj   false_bb = split_edge (e_false);
716*38fd1498Szrj 
717*38fd1498Szrj   e_true->flags &= ~EDGE_FALLTHRU;
718*38fd1498Szrj   e_true->flags |= EDGE_TRUE_VALUE;
719*38fd1498Szrj   e_false->flags &= ~EDGE_FALLTHRU;
720*38fd1498Szrj   e_false->flags |= EDGE_FALSE_VALUE;
721*38fd1498Szrj 
722*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
723*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
724*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
725*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
726*38fd1498Szrj 
727*38fd1498Szrj   exit_edge = single_succ_edge (join_bb);
728*38fd1498Szrj 
729*38fd1498Szrj   if (single_pred_p (exit_edge->dest))
730*38fd1498Szrj     set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
731*38fd1498Szrj 
732*38fd1498Szrj   return exit_edge;
733*38fd1498Szrj }
734*38fd1498Szrj 
735*38fd1498Szrj /* create_empty_loop_on_edge
736*38fd1498Szrj    |
737*38fd1498Szrj    |    - pred_bb -                   ------ pred_bb ------
738*38fd1498Szrj    |   |           |                 | iv0 = initial_value |
739*38fd1498Szrj    |    -----|-----                   ---------|-----------
740*38fd1498Szrj    |         |                       ______    | entry_edge
741*38fd1498Szrj    |         | entry_edge           /      |   |
742*38fd1498Szrj    |         |             ====>   |      -V---V- loop_header -------------
743*38fd1498Szrj    |         V                     |     | iv_before = phi (iv0, iv_after) |
744*38fd1498Szrj    |    - succ_bb -                |      ---|-----------------------------
745*38fd1498Szrj    |   |           |               |         |
746*38fd1498Szrj    |    -----------                |      ---V--- loop_body ---------------
747*38fd1498Szrj    |                               |     | iv_after = iv_before + stride   |
748*38fd1498Szrj    |                               |     | if (iv_before < upper_bound)    |
749*38fd1498Szrj    |                               |      ---|--------------\--------------
750*38fd1498Szrj    |                               |         |               \ exit_e
751*38fd1498Szrj    |                               |         V                \
752*38fd1498Szrj    |                               |       - loop_latch -      V- succ_bb -
753*38fd1498Szrj    |                               |      |              |     |           |
754*38fd1498Szrj    |                               |       /-------------       -----------
755*38fd1498Szrj    |                                \ ___ /
756*38fd1498Szrj 
757*38fd1498Szrj    Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
758*38fd1498Szrj    that is used before the increment of IV. IV_BEFORE should be used for
759*38fd1498Szrj    adding code to the body that uses the IV.  OUTER is the outer loop in
760*38fd1498Szrj    which the new loop should be inserted.
761*38fd1498Szrj 
762*38fd1498Szrj    Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
763*38fd1498Szrj    inserted on the loop entry edge.  This implies that this function
764*38fd1498Szrj    should be used only when the UPPER_BOUND expression is a loop
765*38fd1498Szrj    invariant.  */
766*38fd1498Szrj 
767*38fd1498Szrj struct loop *
create_empty_loop_on_edge(edge entry_edge,tree initial_value,tree stride,tree upper_bound,tree iv,tree * iv_before,tree * iv_after,struct loop * outer)768*38fd1498Szrj create_empty_loop_on_edge (edge entry_edge,
769*38fd1498Szrj 			   tree initial_value,
770*38fd1498Szrj 			   tree stride, tree upper_bound,
771*38fd1498Szrj 			   tree iv,
772*38fd1498Szrj 			   tree *iv_before,
773*38fd1498Szrj 			   tree *iv_after,
774*38fd1498Szrj 			   struct loop *outer)
775*38fd1498Szrj {
776*38fd1498Szrj   basic_block loop_header, loop_latch, succ_bb, pred_bb;
777*38fd1498Szrj   struct loop *loop;
778*38fd1498Szrj   gimple_stmt_iterator gsi;
779*38fd1498Szrj   gimple_seq stmts;
780*38fd1498Szrj   gcond *cond_expr;
781*38fd1498Szrj   tree exit_test;
782*38fd1498Szrj   edge exit_e;
783*38fd1498Szrj 
784*38fd1498Szrj   gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
785*38fd1498Szrj 
786*38fd1498Szrj   /* Create header, latch and wire up the loop.  */
787*38fd1498Szrj   pred_bb = entry_edge->src;
788*38fd1498Szrj   loop_header = split_edge (entry_edge);
789*38fd1498Szrj   loop_latch = split_edge (single_succ_edge (loop_header));
790*38fd1498Szrj   succ_bb = single_succ (loop_latch);
791*38fd1498Szrj   make_edge (loop_header, succ_bb, 0);
792*38fd1498Szrj   redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
793*38fd1498Szrj 
794*38fd1498Szrj   /* Set immediate dominator information.  */
795*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
796*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
797*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
798*38fd1498Szrj 
799*38fd1498Szrj   /* Initialize a loop structure and put it in a loop hierarchy.  */
800*38fd1498Szrj   loop = alloc_loop ();
801*38fd1498Szrj   loop->header = loop_header;
802*38fd1498Szrj   loop->latch = loop_latch;
803*38fd1498Szrj   add_loop (loop, outer);
804*38fd1498Szrj 
805*38fd1498Szrj   /* TODO: Fix counts.  */
806*38fd1498Szrj   scale_loop_frequencies (loop, profile_probability::even ());
807*38fd1498Szrj 
808*38fd1498Szrj   /* Update dominators.  */
809*38fd1498Szrj   update_dominators_in_loop (loop);
810*38fd1498Szrj 
811*38fd1498Szrj   /* Modify edge flags.  */
812*38fd1498Szrj   exit_e = single_exit (loop);
813*38fd1498Szrj   exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
814*38fd1498Szrj   single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
815*38fd1498Szrj 
816*38fd1498Szrj   /* Construct IV code in loop.  */
817*38fd1498Szrj   initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
818*38fd1498Szrj   if (stmts)
819*38fd1498Szrj     {
820*38fd1498Szrj       gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
821*38fd1498Szrj       gsi_commit_edge_inserts ();
822*38fd1498Szrj     }
823*38fd1498Szrj 
824*38fd1498Szrj   upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
825*38fd1498Szrj   if (stmts)
826*38fd1498Szrj     {
827*38fd1498Szrj       gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
828*38fd1498Szrj       gsi_commit_edge_inserts ();
829*38fd1498Szrj     }
830*38fd1498Szrj 
831*38fd1498Szrj   gsi = gsi_last_bb (loop_header);
832*38fd1498Szrj   create_iv (initial_value, stride, iv, loop, &gsi, false,
833*38fd1498Szrj 	     iv_before, iv_after);
834*38fd1498Szrj 
835*38fd1498Szrj   /* Insert loop exit condition.  */
836*38fd1498Szrj   cond_expr = gimple_build_cond
837*38fd1498Szrj     (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
838*38fd1498Szrj 
839*38fd1498Szrj   exit_test = gimple_cond_lhs (cond_expr);
840*38fd1498Szrj   exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
841*38fd1498Szrj 					false, GSI_NEW_STMT);
842*38fd1498Szrj   gimple_cond_set_lhs (cond_expr, exit_test);
843*38fd1498Szrj   gsi = gsi_last_bb (exit_e->src);
844*38fd1498Szrj   gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
845*38fd1498Szrj 
846*38fd1498Szrj   split_block_after_labels (loop_header);
847*38fd1498Szrj 
848*38fd1498Szrj   return loop;
849*38fd1498Szrj }
850*38fd1498Szrj 
851*38fd1498Szrj /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
852*38fd1498Szrj    latch to header and update loop tree and dominators
853*38fd1498Szrj    accordingly. Everything between them plus LATCH_EDGE destination must
854*38fd1498Szrj    be dominated by HEADER_EDGE destination, and back-reachable from
855*38fd1498Szrj    LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
856*38fd1498Szrj    FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
857*38fd1498Szrj    TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
858*38fd1498Szrj    Returns the newly created loop.  Frequencies and counts in the new loop
859*38fd1498Szrj    are scaled by FALSE_SCALE and in the old one by TRUE_SCALE.  */
860*38fd1498Szrj 
861*38fd1498Szrj struct loop *
loopify(edge latch_edge,edge header_edge,basic_block switch_bb,edge true_edge,edge false_edge,bool redirect_all_edges,profile_probability true_scale,profile_probability false_scale)862*38fd1498Szrj loopify (edge latch_edge, edge header_edge,
863*38fd1498Szrj 	 basic_block switch_bb, edge true_edge, edge false_edge,
864*38fd1498Szrj 	 bool redirect_all_edges, profile_probability true_scale,
865*38fd1498Szrj 	 profile_probability false_scale)
866*38fd1498Szrj {
867*38fd1498Szrj   basic_block succ_bb = latch_edge->dest;
868*38fd1498Szrj   basic_block pred_bb = header_edge->src;
869*38fd1498Szrj   struct loop *loop = alloc_loop ();
870*38fd1498Szrj   struct loop *outer = loop_outer (succ_bb->loop_father);
871*38fd1498Szrj   profile_count cnt;
872*38fd1498Szrj 
873*38fd1498Szrj   loop->header = header_edge->dest;
874*38fd1498Szrj   loop->latch = latch_edge->src;
875*38fd1498Szrj 
876*38fd1498Szrj   cnt = header_edge->count ();
877*38fd1498Szrj 
878*38fd1498Szrj   /* Redirect edges.  */
879*38fd1498Szrj   loop_redirect_edge (latch_edge, loop->header);
880*38fd1498Szrj   loop_redirect_edge (true_edge, succ_bb);
881*38fd1498Szrj 
882*38fd1498Szrj   /* During loop versioning, one of the switch_bb edge is already properly
883*38fd1498Szrj      set. Do not redirect it again unless redirect_all_edges is true.  */
884*38fd1498Szrj   if (redirect_all_edges)
885*38fd1498Szrj     {
886*38fd1498Szrj       loop_redirect_edge (header_edge, switch_bb);
887*38fd1498Szrj       loop_redirect_edge (false_edge, loop->header);
888*38fd1498Szrj 
889*38fd1498Szrj       /* Update dominators.  */
890*38fd1498Szrj       set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
891*38fd1498Szrj       set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
892*38fd1498Szrj     }
893*38fd1498Szrj 
894*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
895*38fd1498Szrj 
896*38fd1498Szrj   /* Compute new loop.  */
897*38fd1498Szrj   add_loop (loop, outer);
898*38fd1498Szrj 
899*38fd1498Szrj   /* Add switch_bb to appropriate loop.  */
900*38fd1498Szrj   if (switch_bb->loop_father)
901*38fd1498Szrj     remove_bb_from_loops (switch_bb);
902*38fd1498Szrj   add_bb_to_loop (switch_bb, outer);
903*38fd1498Szrj 
904*38fd1498Szrj   /* Fix counts.  */
905*38fd1498Szrj   if (redirect_all_edges)
906*38fd1498Szrj     {
907*38fd1498Szrj       switch_bb->count = cnt;
908*38fd1498Szrj     }
909*38fd1498Szrj   scale_loop_frequencies (loop, false_scale);
910*38fd1498Szrj   scale_loop_frequencies (succ_bb->loop_father, true_scale);
911*38fd1498Szrj   update_dominators_in_loop (loop);
912*38fd1498Szrj 
913*38fd1498Szrj   return loop;
914*38fd1498Szrj }
915*38fd1498Szrj 
916*38fd1498Szrj /* Remove the latch edge of a LOOP and update loops to indicate that
917*38fd1498Szrj    the LOOP was removed.  After this function, original loop latch will
918*38fd1498Szrj    have no successor, which caller is expected to fix somehow.
919*38fd1498Szrj 
920*38fd1498Szrj    If this may cause the information about irreducible regions to become
921*38fd1498Szrj    invalid, IRRED_INVALIDATED is set to true.
922*38fd1498Szrj 
923*38fd1498Szrj    LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
924*38fd1498Szrj    basic blocks that had non-trivial update on their loop_father.*/
925*38fd1498Szrj 
926*38fd1498Szrj void
unloop(struct loop * loop,bool * irred_invalidated,bitmap loop_closed_ssa_invalidated)927*38fd1498Szrj unloop (struct loop *loop, bool *irred_invalidated,
928*38fd1498Szrj 	bitmap loop_closed_ssa_invalidated)
929*38fd1498Szrj {
930*38fd1498Szrj   basic_block *body;
931*38fd1498Szrj   struct loop *ploop;
932*38fd1498Szrj   unsigned i, n;
933*38fd1498Szrj   basic_block latch = loop->latch;
934*38fd1498Szrj   bool dummy = false;
935*38fd1498Szrj 
936*38fd1498Szrj   if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
937*38fd1498Szrj     *irred_invalidated = true;
938*38fd1498Szrj 
939*38fd1498Szrj   /* This is relatively straightforward.  The dominators are unchanged, as
940*38fd1498Szrj      loop header dominates loop latch, so the only thing we have to care of
941*38fd1498Szrj      is the placement of loops and basic blocks inside the loop tree.  We
942*38fd1498Szrj      move them all to the loop->outer, and then let fix_bb_placements do
943*38fd1498Szrj      its work.  */
944*38fd1498Szrj 
945*38fd1498Szrj   body = get_loop_body (loop);
946*38fd1498Szrj   n = loop->num_nodes;
947*38fd1498Szrj   for (i = 0; i < n; i++)
948*38fd1498Szrj     if (body[i]->loop_father == loop)
949*38fd1498Szrj       {
950*38fd1498Szrj 	remove_bb_from_loops (body[i]);
951*38fd1498Szrj 	add_bb_to_loop (body[i], loop_outer (loop));
952*38fd1498Szrj       }
953*38fd1498Szrj   free (body);
954*38fd1498Szrj 
955*38fd1498Szrj   while (loop->inner)
956*38fd1498Szrj     {
957*38fd1498Szrj       ploop = loop->inner;
958*38fd1498Szrj       flow_loop_tree_node_remove (ploop);
959*38fd1498Szrj       flow_loop_tree_node_add (loop_outer (loop), ploop);
960*38fd1498Szrj     }
961*38fd1498Szrj 
962*38fd1498Szrj   /* Remove the loop and free its data.  */
963*38fd1498Szrj   delete_loop (loop);
964*38fd1498Szrj 
965*38fd1498Szrj   remove_edge (single_succ_edge (latch));
966*38fd1498Szrj 
967*38fd1498Szrj   /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
968*38fd1498Szrj      there is an irreducible region inside the cancelled loop, the flags will
969*38fd1498Szrj      be still correct.  */
970*38fd1498Szrj   fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
971*38fd1498Szrj }
972*38fd1498Szrj 
973*38fd1498Szrj /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
974*38fd1498Szrj    condition stated in description of fix_loop_placement holds for them.
975*38fd1498Szrj    It is used in case when we removed some edges coming out of LOOP, which
976*38fd1498Szrj    may cause the right placement of LOOP inside loop tree to change.
977*38fd1498Szrj 
978*38fd1498Szrj    IRRED_INVALIDATED is set to true if a change in the loop structures might
979*38fd1498Szrj    invalidate the information about irreducible regions.  */
980*38fd1498Szrj 
981*38fd1498Szrj static void
fix_loop_placements(struct loop * loop,bool * irred_invalidated)982*38fd1498Szrj fix_loop_placements (struct loop *loop, bool *irred_invalidated)
983*38fd1498Szrj {
984*38fd1498Szrj   struct loop *outer;
985*38fd1498Szrj 
986*38fd1498Szrj   while (loop_outer (loop))
987*38fd1498Szrj     {
988*38fd1498Szrj       outer = loop_outer (loop);
989*38fd1498Szrj       if (!fix_loop_placement (loop, irred_invalidated))
990*38fd1498Szrj 	break;
991*38fd1498Szrj 
992*38fd1498Szrj       /* Changing the placement of a loop in the loop tree may alter the
993*38fd1498Szrj 	 validity of condition 2) of the description of fix_bb_placement
994*38fd1498Szrj 	 for its preheader, because the successor is the header and belongs
995*38fd1498Szrj 	 to the loop.  So call fix_bb_placements to fix up the placement
996*38fd1498Szrj 	 of the preheader and (possibly) of its predecessors.  */
997*38fd1498Szrj       fix_bb_placements (loop_preheader_edge (loop)->src,
998*38fd1498Szrj 			 irred_invalidated, NULL);
999*38fd1498Szrj       loop = outer;
1000*38fd1498Szrj     }
1001*38fd1498Szrj }
1002*38fd1498Szrj 
1003*38fd1498Szrj /* Duplicate loop bounds and other information we store about
1004*38fd1498Szrj    the loop into its duplicate.  */
1005*38fd1498Szrj 
1006*38fd1498Szrj void
copy_loop_info(struct loop * loop,struct loop * target)1007*38fd1498Szrj copy_loop_info (struct loop *loop, struct loop *target)
1008*38fd1498Szrj {
1009*38fd1498Szrj   gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1010*38fd1498Szrj   target->any_upper_bound = loop->any_upper_bound;
1011*38fd1498Szrj   target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1012*38fd1498Szrj   target->any_likely_upper_bound = loop->any_likely_upper_bound;
1013*38fd1498Szrj   target->nb_iterations_likely_upper_bound
1014*38fd1498Szrj     = loop->nb_iterations_likely_upper_bound;
1015*38fd1498Szrj   target->any_estimate = loop->any_estimate;
1016*38fd1498Szrj   target->nb_iterations_estimate = loop->nb_iterations_estimate;
1017*38fd1498Szrj   target->estimate_state = loop->estimate_state;
1018*38fd1498Szrj   target->constraints = loop->constraints;
1019*38fd1498Szrj   target->warned_aggressive_loop_optimizations
1020*38fd1498Szrj     |= loop->warned_aggressive_loop_optimizations;
1021*38fd1498Szrj   target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1022*38fd1498Szrj }
1023*38fd1498Szrj 
1024*38fd1498Szrj /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1025*38fd1498Szrj    created loop into loops structure.  If AFTER is non-null
1026*38fd1498Szrj    the new loop is added at AFTER->next, otherwise in front of TARGETs
1027*38fd1498Szrj    sibling list.  */
1028*38fd1498Szrj struct loop *
duplicate_loop(struct loop * loop,struct loop * target,struct loop * after)1029*38fd1498Szrj duplicate_loop (struct loop *loop, struct loop *target, struct loop *after)
1030*38fd1498Szrj {
1031*38fd1498Szrj   struct loop *cloop;
1032*38fd1498Szrj   cloop = alloc_loop ();
1033*38fd1498Szrj   place_new_loop (cfun, cloop);
1034*38fd1498Szrj 
1035*38fd1498Szrj   copy_loop_info (loop, cloop);
1036*38fd1498Szrj 
1037*38fd1498Szrj   /* Mark the new loop as copy of LOOP.  */
1038*38fd1498Szrj   set_loop_copy (loop, cloop);
1039*38fd1498Szrj 
1040*38fd1498Szrj   /* Add it to target.  */
1041*38fd1498Szrj   flow_loop_tree_node_add (target, cloop, after);
1042*38fd1498Szrj 
1043*38fd1498Szrj   return cloop;
1044*38fd1498Szrj }
1045*38fd1498Szrj 
1046*38fd1498Szrj /* Copies structure of subloops of LOOP into TARGET loop, placing
1047*38fd1498Szrj    newly created loops into loop tree at the end of TARGETs sibling
1048*38fd1498Szrj    list in the original order.  */
1049*38fd1498Szrj void
duplicate_subloops(struct loop * loop,struct loop * target)1050*38fd1498Szrj duplicate_subloops (struct loop *loop, struct loop *target)
1051*38fd1498Szrj {
1052*38fd1498Szrj   struct loop *aloop, *cloop, *tail;
1053*38fd1498Szrj 
1054*38fd1498Szrj   for (tail = target->inner; tail && tail->next; tail = tail->next)
1055*38fd1498Szrj     ;
1056*38fd1498Szrj   for (aloop = loop->inner; aloop; aloop = aloop->next)
1057*38fd1498Szrj     {
1058*38fd1498Szrj       cloop = duplicate_loop (aloop, target, tail);
1059*38fd1498Szrj       tail = cloop;
1060*38fd1498Szrj       gcc_assert(!tail->next);
1061*38fd1498Szrj       duplicate_subloops (aloop, cloop);
1062*38fd1498Szrj     }
1063*38fd1498Szrj }
1064*38fd1498Szrj 
1065*38fd1498Szrj /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1066*38fd1498Szrj    into TARGET loop, placing newly created loops into loop tree adding
1067*38fd1498Szrj    them to TARGETs sibling list at the end in order.  */
1068*38fd1498Szrj static void
copy_loops_to(struct loop ** copied_loops,int n,struct loop * target)1069*38fd1498Szrj copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
1070*38fd1498Szrj {
1071*38fd1498Szrj   struct loop *aloop, *tail;
1072*38fd1498Szrj   int i;
1073*38fd1498Szrj 
1074*38fd1498Szrj   for (tail = target->inner; tail && tail->next; tail = tail->next)
1075*38fd1498Szrj     ;
1076*38fd1498Szrj   for (i = 0; i < n; i++)
1077*38fd1498Szrj     {
1078*38fd1498Szrj       aloop = duplicate_loop (copied_loops[i], target, tail);
1079*38fd1498Szrj       tail = aloop;
1080*38fd1498Szrj       gcc_assert(!tail->next);
1081*38fd1498Szrj       duplicate_subloops (copied_loops[i], aloop);
1082*38fd1498Szrj     }
1083*38fd1498Szrj }
1084*38fd1498Szrj 
1085*38fd1498Szrj /* Redirects edge E to basic block DEST.  */
1086*38fd1498Szrj static void
loop_redirect_edge(edge e,basic_block dest)1087*38fd1498Szrj loop_redirect_edge (edge e, basic_block dest)
1088*38fd1498Szrj {
1089*38fd1498Szrj   if (e->dest == dest)
1090*38fd1498Szrj     return;
1091*38fd1498Szrj 
1092*38fd1498Szrj   redirect_edge_and_branch_force (e, dest);
1093*38fd1498Szrj }
1094*38fd1498Szrj 
1095*38fd1498Szrj /* Check whether LOOP's body can be duplicated.  */
1096*38fd1498Szrj bool
can_duplicate_loop_p(const struct loop * loop)1097*38fd1498Szrj can_duplicate_loop_p (const struct loop *loop)
1098*38fd1498Szrj {
1099*38fd1498Szrj   int ret;
1100*38fd1498Szrj   basic_block *bbs = get_loop_body (loop);
1101*38fd1498Szrj 
1102*38fd1498Szrj   ret = can_copy_bbs_p (bbs, loop->num_nodes);
1103*38fd1498Szrj   free (bbs);
1104*38fd1498Szrj 
1105*38fd1498Szrj   return ret;
1106*38fd1498Szrj }
1107*38fd1498Szrj 
1108*38fd1498Szrj /* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
1109*38fd1498Szrj    loop structure and dominators (order of inner subloops is retained).
1110*38fd1498Szrj    E's destination must be LOOP header for this to work, i.e. it must be entry
1111*38fd1498Szrj    or latch edge of this loop; these are unique, as the loops must have
1112*38fd1498Szrj    preheaders for this function to work correctly (in case E is latch, the
1113*38fd1498Szrj    function unrolls the loop, if E is entry edge, it peels the loop).  Store
1114*38fd1498Szrj    edges created by copying ORIG edge from copies corresponding to set bits in
1115*38fd1498Szrj    WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1116*38fd1498Szrj    are numbered in order given by control flow through them) into TO_REMOVE
1117*38fd1498Szrj    array.  Returns false if duplication is
1118*38fd1498Szrj    impossible.  */
1119*38fd1498Szrj 
1120*38fd1498Szrj bool
duplicate_loop_to_header_edge(struct loop * loop,edge e,unsigned int ndupl,sbitmap wont_exit,edge orig,vec<edge> * to_remove,int flags)1121*38fd1498Szrj duplicate_loop_to_header_edge (struct loop *loop, edge e,
1122*38fd1498Szrj 			       unsigned int ndupl, sbitmap wont_exit,
1123*38fd1498Szrj 			       edge orig, vec<edge> *to_remove,
1124*38fd1498Szrj 			       int flags)
1125*38fd1498Szrj {
1126*38fd1498Szrj   struct loop *target, *aloop;
1127*38fd1498Szrj   struct loop **orig_loops;
1128*38fd1498Szrj   unsigned n_orig_loops;
1129*38fd1498Szrj   basic_block header = loop->header, latch = loop->latch;
1130*38fd1498Szrj   basic_block *new_bbs, *bbs, *first_active;
1131*38fd1498Szrj   basic_block new_bb, bb, first_active_latch = NULL;
1132*38fd1498Szrj   edge ae, latch_edge;
1133*38fd1498Szrj   edge spec_edges[2], new_spec_edges[2];
1134*38fd1498Szrj   const int SE_LATCH = 0;
1135*38fd1498Szrj   const int SE_ORIG = 1;
1136*38fd1498Szrj   unsigned i, j, n;
1137*38fd1498Szrj   int is_latch = (latch == e->src);
1138*38fd1498Szrj   profile_probability *scale_step = NULL;
1139*38fd1498Szrj   profile_probability scale_main = profile_probability::always ();
1140*38fd1498Szrj   profile_probability scale_act = profile_probability::always ();
1141*38fd1498Szrj   profile_count after_exit_num = profile_count::zero (),
1142*38fd1498Szrj 	        after_exit_den = profile_count::zero ();
1143*38fd1498Szrj   bool scale_after_exit = false;
1144*38fd1498Szrj   int add_irreducible_flag;
1145*38fd1498Szrj   basic_block place_after;
1146*38fd1498Szrj   bitmap bbs_to_scale = NULL;
1147*38fd1498Szrj   bitmap_iterator bi;
1148*38fd1498Szrj 
1149*38fd1498Szrj   gcc_assert (e->dest == loop->header);
1150*38fd1498Szrj   gcc_assert (ndupl > 0);
1151*38fd1498Szrj 
1152*38fd1498Szrj   if (orig)
1153*38fd1498Szrj     {
1154*38fd1498Szrj       /* Orig must be edge out of the loop.  */
1155*38fd1498Szrj       gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1156*38fd1498Szrj       gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1157*38fd1498Szrj     }
1158*38fd1498Szrj 
1159*38fd1498Szrj   n = loop->num_nodes;
1160*38fd1498Szrj   bbs = get_loop_body_in_dom_order (loop);
1161*38fd1498Szrj   gcc_assert (bbs[0] == loop->header);
1162*38fd1498Szrj   gcc_assert (bbs[n  - 1] == loop->latch);
1163*38fd1498Szrj 
1164*38fd1498Szrj   /* Check whether duplication is possible.  */
1165*38fd1498Szrj   if (!can_copy_bbs_p (bbs, loop->num_nodes))
1166*38fd1498Szrj     {
1167*38fd1498Szrj       free (bbs);
1168*38fd1498Szrj       return false;
1169*38fd1498Szrj     }
1170*38fd1498Szrj   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1171*38fd1498Szrj 
1172*38fd1498Szrj   /* In case we are doing loop peeling and the loop is in the middle of
1173*38fd1498Szrj      irreducible region, the peeled copies will be inside it too.  */
1174*38fd1498Szrj   add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1175*38fd1498Szrj   gcc_assert (!is_latch || !add_irreducible_flag);
1176*38fd1498Szrj 
1177*38fd1498Szrj   /* Find edge from latch.  */
1178*38fd1498Szrj   latch_edge = loop_latch_edge (loop);
1179*38fd1498Szrj 
1180*38fd1498Szrj   if (flags & DLTHE_FLAG_UPDATE_FREQ)
1181*38fd1498Szrj     {
1182*38fd1498Szrj       /* Calculate coefficients by that we have to scale counts
1183*38fd1498Szrj 	 of duplicated loop bodies.  */
1184*38fd1498Szrj       profile_count count_in = header->count;
1185*38fd1498Szrj       profile_count count_le = latch_edge->count ();
1186*38fd1498Szrj       profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1187*38fd1498Szrj       profile_probability prob_pass_thru = count_le.probability_in (count_in);
1188*38fd1498Szrj       profile_probability prob_pass_wont_exit =
1189*38fd1498Szrj 	      (count_le + count_out_orig).probability_in (count_in);
1190*38fd1498Szrj 
1191*38fd1498Szrj       if (orig && orig->probability.initialized_p ()
1192*38fd1498Szrj 	  && !(orig->probability == profile_probability::always ()))
1193*38fd1498Szrj 	{
1194*38fd1498Szrj 	  /* The blocks that are dominated by a removed exit edge ORIG have
1195*38fd1498Szrj 	     frequencies scaled by this.  */
1196*38fd1498Szrj 	  if (orig->count ().initialized_p ())
1197*38fd1498Szrj 	    {
1198*38fd1498Szrj 	      after_exit_num = orig->src->count;
1199*38fd1498Szrj 	      after_exit_den = after_exit_num - orig->count ();
1200*38fd1498Szrj 	      scale_after_exit = true;
1201*38fd1498Szrj 	    }
1202*38fd1498Szrj 	  bbs_to_scale = BITMAP_ALLOC (NULL);
1203*38fd1498Szrj 	  for (i = 0; i < n; i++)
1204*38fd1498Szrj 	    {
1205*38fd1498Szrj 	      if (bbs[i] != orig->src
1206*38fd1498Szrj 		  && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1207*38fd1498Szrj 		bitmap_set_bit (bbs_to_scale, i);
1208*38fd1498Szrj 	    }
1209*38fd1498Szrj 	}
1210*38fd1498Szrj 
1211*38fd1498Szrj       scale_step = XNEWVEC (profile_probability, ndupl);
1212*38fd1498Szrj 
1213*38fd1498Szrj       for (i = 1; i <= ndupl; i++)
1214*38fd1498Szrj 	scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1215*38fd1498Szrj 				? prob_pass_wont_exit
1216*38fd1498Szrj 				: prob_pass_thru;
1217*38fd1498Szrj 
1218*38fd1498Szrj       /* Complete peeling is special as the probability of exit in last
1219*38fd1498Szrj 	 copy becomes 1.  */
1220*38fd1498Szrj       if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1221*38fd1498Szrj 	{
1222*38fd1498Szrj 	  profile_count wanted_count = e->count ();
1223*38fd1498Szrj 
1224*38fd1498Szrj 	  gcc_assert (!is_latch);
1225*38fd1498Szrj 	  /* First copy has count of incoming edge.  Each subsequent
1226*38fd1498Szrj 	     count should be reduced by prob_pass_wont_exit.  Caller
1227*38fd1498Szrj 	     should've managed the flags so all except for original loop
1228*38fd1498Szrj 	     has won't exist set.  */
1229*38fd1498Szrj 	  scale_act = wanted_count.probability_in (count_in);
1230*38fd1498Szrj 	  /* Now simulate the duplication adjustments and compute header
1231*38fd1498Szrj 	     frequency of the last copy.  */
1232*38fd1498Szrj 	  for (i = 0; i < ndupl; i++)
1233*38fd1498Szrj 	    wanted_count = wanted_count.apply_probability (scale_step [i]);
1234*38fd1498Szrj 	  scale_main = wanted_count.probability_in (count_in);
1235*38fd1498Szrj 	}
1236*38fd1498Szrj       /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1237*38fd1498Szrj 	 First iteration will be original loop followed by duplicated bodies.
1238*38fd1498Szrj 	 It is necessary to scale down the original so we get right overall
1239*38fd1498Szrj 	 number of iterations.  */
1240*38fd1498Szrj       else if (is_latch)
1241*38fd1498Szrj 	{
1242*38fd1498Szrj 	  profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1243*38fd1498Szrj 							? prob_pass_wont_exit
1244*38fd1498Szrj 							: prob_pass_thru;
1245*38fd1498Szrj 	  profile_probability p = prob_pass_main;
1246*38fd1498Szrj 	  profile_count scale_main_den = count_in;
1247*38fd1498Szrj 	  for (i = 0; i < ndupl; i++)
1248*38fd1498Szrj 	    {
1249*38fd1498Szrj 	      scale_main_den += count_in.apply_probability (p);
1250*38fd1498Szrj 	      p = p * scale_step[i];
1251*38fd1498Szrj 	    }
1252*38fd1498Szrj 	  /* If original loop is executed COUNT_IN times, the unrolled
1253*38fd1498Szrj 	     loop will account SCALE_MAIN_DEN times.  */
1254*38fd1498Szrj 	  scale_main = count_in.probability_in (scale_main_den);
1255*38fd1498Szrj 	  scale_act = scale_main * prob_pass_main;
1256*38fd1498Szrj 	}
1257*38fd1498Szrj       else
1258*38fd1498Szrj 	{
1259*38fd1498Szrj 	  profile_count preheader_count = e->count ();
1260*38fd1498Szrj 	  for (i = 0; i < ndupl; i++)
1261*38fd1498Szrj 	    scale_main = scale_main * scale_step[i];
1262*38fd1498Szrj 	  scale_act = preheader_count.probability_in (count_in);
1263*38fd1498Szrj 	}
1264*38fd1498Szrj     }
1265*38fd1498Szrj 
1266*38fd1498Szrj   /* Loop the new bbs will belong to.  */
1267*38fd1498Szrj   target = e->src->loop_father;
1268*38fd1498Szrj 
1269*38fd1498Szrj   /* Original loops.  */
1270*38fd1498Szrj   n_orig_loops = 0;
1271*38fd1498Szrj   for (aloop = loop->inner; aloop; aloop = aloop->next)
1272*38fd1498Szrj     n_orig_loops++;
1273*38fd1498Szrj   orig_loops = XNEWVEC (struct loop *, n_orig_loops);
1274*38fd1498Szrj   for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1275*38fd1498Szrj     orig_loops[i] = aloop;
1276*38fd1498Szrj 
1277*38fd1498Szrj   set_loop_copy (loop, target);
1278*38fd1498Szrj 
1279*38fd1498Szrj   first_active = XNEWVEC (basic_block, n);
1280*38fd1498Szrj   if (is_latch)
1281*38fd1498Szrj     {
1282*38fd1498Szrj       memcpy (first_active, bbs, n * sizeof (basic_block));
1283*38fd1498Szrj       first_active_latch = latch;
1284*38fd1498Szrj     }
1285*38fd1498Szrj 
1286*38fd1498Szrj   spec_edges[SE_ORIG] = orig;
1287*38fd1498Szrj   spec_edges[SE_LATCH] = latch_edge;
1288*38fd1498Szrj 
1289*38fd1498Szrj   place_after = e->src;
1290*38fd1498Szrj   for (j = 0; j < ndupl; j++)
1291*38fd1498Szrj     {
1292*38fd1498Szrj       /* Copy loops.  */
1293*38fd1498Szrj       copy_loops_to (orig_loops, n_orig_loops, target);
1294*38fd1498Szrj 
1295*38fd1498Szrj       /* Copy bbs.  */
1296*38fd1498Szrj       copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1297*38fd1498Szrj 		place_after, true);
1298*38fd1498Szrj       place_after = new_spec_edges[SE_LATCH]->src;
1299*38fd1498Szrj 
1300*38fd1498Szrj       if (flags & DLTHE_RECORD_COPY_NUMBER)
1301*38fd1498Szrj 	for (i = 0; i < n; i++)
1302*38fd1498Szrj 	  {
1303*38fd1498Szrj 	    gcc_assert (!new_bbs[i]->aux);
1304*38fd1498Szrj 	    new_bbs[i]->aux = (void *)(size_t)(j + 1);
1305*38fd1498Szrj 	  }
1306*38fd1498Szrj 
1307*38fd1498Szrj       /* Note whether the blocks and edges belong to an irreducible loop.  */
1308*38fd1498Szrj       if (add_irreducible_flag)
1309*38fd1498Szrj 	{
1310*38fd1498Szrj 	  for (i = 0; i < n; i++)
1311*38fd1498Szrj 	    new_bbs[i]->flags |= BB_DUPLICATED;
1312*38fd1498Szrj 	  for (i = 0; i < n; i++)
1313*38fd1498Szrj 	    {
1314*38fd1498Szrj 	      edge_iterator ei;
1315*38fd1498Szrj 	      new_bb = new_bbs[i];
1316*38fd1498Szrj 	      if (new_bb->loop_father == target)
1317*38fd1498Szrj 		new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1318*38fd1498Szrj 
1319*38fd1498Szrj 	      FOR_EACH_EDGE (ae, ei, new_bb->succs)
1320*38fd1498Szrj 		if ((ae->dest->flags & BB_DUPLICATED)
1321*38fd1498Szrj 		    && (ae->src->loop_father == target
1322*38fd1498Szrj 			|| ae->dest->loop_father == target))
1323*38fd1498Szrj 		  ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1324*38fd1498Szrj 	    }
1325*38fd1498Szrj 	  for (i = 0; i < n; i++)
1326*38fd1498Szrj 	    new_bbs[i]->flags &= ~BB_DUPLICATED;
1327*38fd1498Szrj 	}
1328*38fd1498Szrj 
1329*38fd1498Szrj       /* Redirect the special edges.  */
1330*38fd1498Szrj       if (is_latch)
1331*38fd1498Szrj 	{
1332*38fd1498Szrj 	  redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1333*38fd1498Szrj 	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1334*38fd1498Szrj 					  loop->header);
1335*38fd1498Szrj 	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1336*38fd1498Szrj 	  latch = loop->latch = new_bbs[n - 1];
1337*38fd1498Szrj 	  e = latch_edge = new_spec_edges[SE_LATCH];
1338*38fd1498Szrj 	}
1339*38fd1498Szrj       else
1340*38fd1498Szrj 	{
1341*38fd1498Szrj 	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1342*38fd1498Szrj 					  loop->header);
1343*38fd1498Szrj 	  redirect_edge_and_branch_force (e, new_bbs[0]);
1344*38fd1498Szrj 	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1345*38fd1498Szrj 	  e = new_spec_edges[SE_LATCH];
1346*38fd1498Szrj 	}
1347*38fd1498Szrj 
1348*38fd1498Szrj       /* Record exit edge in this copy.  */
1349*38fd1498Szrj       if (orig && bitmap_bit_p (wont_exit, j + 1))
1350*38fd1498Szrj 	{
1351*38fd1498Szrj 	  if (to_remove)
1352*38fd1498Szrj 	    to_remove->safe_push (new_spec_edges[SE_ORIG]);
1353*38fd1498Szrj 	  force_edge_cold (new_spec_edges[SE_ORIG], true);
1354*38fd1498Szrj 
1355*38fd1498Szrj 	  /* Scale the frequencies of the blocks dominated by the exit.  */
1356*38fd1498Szrj 	  if (bbs_to_scale && scale_after_exit)
1357*38fd1498Szrj 	    {
1358*38fd1498Szrj 	      EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1359*38fd1498Szrj 		scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1360*38fd1498Szrj 						     after_exit_den);
1361*38fd1498Szrj 	    }
1362*38fd1498Szrj 	}
1363*38fd1498Szrj 
1364*38fd1498Szrj       /* Record the first copy in the control flow order if it is not
1365*38fd1498Szrj 	 the original loop (i.e. in case of peeling).  */
1366*38fd1498Szrj       if (!first_active_latch)
1367*38fd1498Szrj 	{
1368*38fd1498Szrj 	  memcpy (first_active, new_bbs, n * sizeof (basic_block));
1369*38fd1498Szrj 	  first_active_latch = new_bbs[n - 1];
1370*38fd1498Szrj 	}
1371*38fd1498Szrj 
1372*38fd1498Szrj       /* Set counts and frequencies.  */
1373*38fd1498Szrj       if (flags & DLTHE_FLAG_UPDATE_FREQ)
1374*38fd1498Szrj 	{
1375*38fd1498Szrj 	  scale_bbs_frequencies (new_bbs, n, scale_act);
1376*38fd1498Szrj 	  scale_act = scale_act * scale_step[j];
1377*38fd1498Szrj 	}
1378*38fd1498Szrj     }
1379*38fd1498Szrj   free (new_bbs);
1380*38fd1498Szrj   free (orig_loops);
1381*38fd1498Szrj 
1382*38fd1498Szrj   /* Record the exit edge in the original loop body, and update the frequencies.  */
1383*38fd1498Szrj   if (orig && bitmap_bit_p (wont_exit, 0))
1384*38fd1498Szrj     {
1385*38fd1498Szrj       if (to_remove)
1386*38fd1498Szrj 	to_remove->safe_push (orig);
1387*38fd1498Szrj       force_edge_cold (orig, true);
1388*38fd1498Szrj 
1389*38fd1498Szrj       /* Scale the frequencies of the blocks dominated by the exit.  */
1390*38fd1498Szrj       if (bbs_to_scale && scale_after_exit)
1391*38fd1498Szrj 	{
1392*38fd1498Szrj 	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1393*38fd1498Szrj 	    scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1394*38fd1498Szrj 						 after_exit_den);
1395*38fd1498Szrj 	}
1396*38fd1498Szrj     }
1397*38fd1498Szrj 
1398*38fd1498Szrj   /* Update the original loop.  */
1399*38fd1498Szrj   if (!is_latch)
1400*38fd1498Szrj     set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1401*38fd1498Szrj   if (flags & DLTHE_FLAG_UPDATE_FREQ)
1402*38fd1498Szrj     {
1403*38fd1498Szrj       scale_bbs_frequencies (bbs, n, scale_main);
1404*38fd1498Szrj       free (scale_step);
1405*38fd1498Szrj     }
1406*38fd1498Szrj 
1407*38fd1498Szrj   /* Update dominators of outer blocks if affected.  */
1408*38fd1498Szrj   for (i = 0; i < n; i++)
1409*38fd1498Szrj     {
1410*38fd1498Szrj       basic_block dominated, dom_bb;
1411*38fd1498Szrj       vec<basic_block> dom_bbs;
1412*38fd1498Szrj       unsigned j;
1413*38fd1498Szrj 
1414*38fd1498Szrj       bb = bbs[i];
1415*38fd1498Szrj       bb->aux = 0;
1416*38fd1498Szrj 
1417*38fd1498Szrj       dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1418*38fd1498Szrj       FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1419*38fd1498Szrj 	{
1420*38fd1498Szrj 	  if (flow_bb_inside_loop_p (loop, dominated))
1421*38fd1498Szrj 	    continue;
1422*38fd1498Szrj 	  dom_bb = nearest_common_dominator (
1423*38fd1498Szrj 			CDI_DOMINATORS, first_active[i], first_active_latch);
1424*38fd1498Szrj 	  set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1425*38fd1498Szrj 	}
1426*38fd1498Szrj       dom_bbs.release ();
1427*38fd1498Szrj     }
1428*38fd1498Szrj   free (first_active);
1429*38fd1498Szrj 
1430*38fd1498Szrj   free (bbs);
1431*38fd1498Szrj   BITMAP_FREE (bbs_to_scale);
1432*38fd1498Szrj 
1433*38fd1498Szrj   return true;
1434*38fd1498Szrj }
1435*38fd1498Szrj 
1436*38fd1498Szrj /* A callback for make_forwarder block, to redirect all edges except for
1437*38fd1498Szrj    MFB_KJ_EDGE to the entry part.  E is the edge for that we should decide
1438*38fd1498Szrj    whether to redirect it.  */
1439*38fd1498Szrj 
1440*38fd1498Szrj edge mfb_kj_edge;
1441*38fd1498Szrj bool
mfb_keep_just(edge e)1442*38fd1498Szrj mfb_keep_just (edge e)
1443*38fd1498Szrj {
1444*38fd1498Szrj   return e != mfb_kj_edge;
1445*38fd1498Szrj }
1446*38fd1498Szrj 
1447*38fd1498Szrj /* True when a candidate preheader BLOCK has predecessors from LOOP.  */
1448*38fd1498Szrj 
1449*38fd1498Szrj static bool
has_preds_from_loop(basic_block block,struct loop * loop)1450*38fd1498Szrj has_preds_from_loop (basic_block block, struct loop *loop)
1451*38fd1498Szrj {
1452*38fd1498Szrj   edge e;
1453*38fd1498Szrj   edge_iterator ei;
1454*38fd1498Szrj 
1455*38fd1498Szrj   FOR_EACH_EDGE (e, ei, block->preds)
1456*38fd1498Szrj     if (e->src->loop_father == loop)
1457*38fd1498Szrj       return true;
1458*38fd1498Szrj   return false;
1459*38fd1498Szrj }
1460*38fd1498Szrj 
1461*38fd1498Szrj /* Creates a pre-header for a LOOP.  Returns newly created block.  Unless
1462*38fd1498Szrj    CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1463*38fd1498Szrj    entry; otherwise we also force preheader block to have only one successor.
1464*38fd1498Szrj    When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1465*38fd1498Szrj    to be a fallthru predecessor to the loop header and to have only
1466*38fd1498Szrj    predecessors from outside of the loop.
1467*38fd1498Szrj    The function also updates dominators.  */
1468*38fd1498Szrj 
1469*38fd1498Szrj basic_block
create_preheader(struct loop * loop,int flags)1470*38fd1498Szrj create_preheader (struct loop *loop, int flags)
1471*38fd1498Szrj {
1472*38fd1498Szrj   edge e;
1473*38fd1498Szrj   basic_block dummy;
1474*38fd1498Szrj   int nentry = 0;
1475*38fd1498Szrj   bool irred = false;
1476*38fd1498Szrj   bool latch_edge_was_fallthru;
1477*38fd1498Szrj   edge one_succ_pred = NULL, single_entry = NULL;
1478*38fd1498Szrj   edge_iterator ei;
1479*38fd1498Szrj 
1480*38fd1498Szrj   FOR_EACH_EDGE (e, ei, loop->header->preds)
1481*38fd1498Szrj     {
1482*38fd1498Szrj       if (e->src == loop->latch)
1483*38fd1498Szrj 	continue;
1484*38fd1498Szrj       irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1485*38fd1498Szrj       nentry++;
1486*38fd1498Szrj       single_entry = e;
1487*38fd1498Szrj       if (single_succ_p (e->src))
1488*38fd1498Szrj 	one_succ_pred = e;
1489*38fd1498Szrj     }
1490*38fd1498Szrj   gcc_assert (nentry);
1491*38fd1498Szrj   if (nentry == 1)
1492*38fd1498Szrj     {
1493*38fd1498Szrj       bool need_forwarder_block = false;
1494*38fd1498Szrj 
1495*38fd1498Szrj       /* We do not allow entry block to be the loop preheader, since we
1496*38fd1498Szrj 	     cannot emit code there.  */
1497*38fd1498Szrj       if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1498*38fd1498Szrj         need_forwarder_block = true;
1499*38fd1498Szrj       else
1500*38fd1498Szrj         {
1501*38fd1498Szrj           /* If we want simple preheaders, also force the preheader to have
1502*38fd1498Szrj              just a single successor.  */
1503*38fd1498Szrj           if ((flags & CP_SIMPLE_PREHEADERS)
1504*38fd1498Szrj               && !single_succ_p (single_entry->src))
1505*38fd1498Szrj             need_forwarder_block = true;
1506*38fd1498Szrj           /* If we want fallthru preheaders, also create forwarder block when
1507*38fd1498Szrj              preheader ends with a jump or has predecessors from loop.  */
1508*38fd1498Szrj           else if ((flags & CP_FALLTHRU_PREHEADERS)
1509*38fd1498Szrj                    && (JUMP_P (BB_END (single_entry->src))
1510*38fd1498Szrj                        || has_preds_from_loop (single_entry->src, loop)))
1511*38fd1498Szrj             need_forwarder_block = true;
1512*38fd1498Szrj         }
1513*38fd1498Szrj       if (! need_forwarder_block)
1514*38fd1498Szrj 	return NULL;
1515*38fd1498Szrj     }
1516*38fd1498Szrj 
1517*38fd1498Szrj   mfb_kj_edge = loop_latch_edge (loop);
1518*38fd1498Szrj   latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1519*38fd1498Szrj   if (nentry == 1
1520*38fd1498Szrj       && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1521*38fd1498Szrj   	  || (single_entry->flags & EDGE_CROSSING) == 0))
1522*38fd1498Szrj     dummy = split_edge (single_entry);
1523*38fd1498Szrj   else
1524*38fd1498Szrj     {
1525*38fd1498Szrj       edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1526*38fd1498Szrj       dummy = fallthru->src;
1527*38fd1498Szrj       loop->header = fallthru->dest;
1528*38fd1498Szrj     }
1529*38fd1498Szrj 
1530*38fd1498Szrj   /* Try to be clever in placing the newly created preheader.  The idea is to
1531*38fd1498Szrj      avoid breaking any "fallthruness" relationship between blocks.
1532*38fd1498Szrj 
1533*38fd1498Szrj      The preheader was created just before the header and all incoming edges
1534*38fd1498Szrj      to the header were redirected to the preheader, except the latch edge.
1535*38fd1498Szrj      So the only problematic case is when this latch edge was a fallthru
1536*38fd1498Szrj      edge: it is not anymore after the preheader creation so we have broken
1537*38fd1498Szrj      the fallthruness.  We're therefore going to look for a better place.  */
1538*38fd1498Szrj   if (latch_edge_was_fallthru)
1539*38fd1498Szrj     {
1540*38fd1498Szrj       if (one_succ_pred)
1541*38fd1498Szrj 	e = one_succ_pred;
1542*38fd1498Szrj       else
1543*38fd1498Szrj 	e = EDGE_PRED (dummy, 0);
1544*38fd1498Szrj 
1545*38fd1498Szrj       move_block_after (dummy, e->src);
1546*38fd1498Szrj     }
1547*38fd1498Szrj 
1548*38fd1498Szrj   if (irred)
1549*38fd1498Szrj     {
1550*38fd1498Szrj       dummy->flags |= BB_IRREDUCIBLE_LOOP;
1551*38fd1498Szrj       single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1552*38fd1498Szrj     }
1553*38fd1498Szrj 
1554*38fd1498Szrj   if (dump_file)
1555*38fd1498Szrj     fprintf (dump_file, "Created preheader block for loop %i\n",
1556*38fd1498Szrj 	     loop->num);
1557*38fd1498Szrj 
1558*38fd1498Szrj   if (flags & CP_FALLTHRU_PREHEADERS)
1559*38fd1498Szrj     gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1560*38fd1498Szrj                 && !JUMP_P (BB_END (dummy)));
1561*38fd1498Szrj 
1562*38fd1498Szrj   return dummy;
1563*38fd1498Szrj }
1564*38fd1498Szrj 
1565*38fd1498Szrj /* Create preheaders for each loop; for meaning of FLAGS see create_preheader.  */
1566*38fd1498Szrj 
1567*38fd1498Szrj void
create_preheaders(int flags)1568*38fd1498Szrj create_preheaders (int flags)
1569*38fd1498Szrj {
1570*38fd1498Szrj   struct loop *loop;
1571*38fd1498Szrj 
1572*38fd1498Szrj   if (!current_loops)
1573*38fd1498Szrj     return;
1574*38fd1498Szrj 
1575*38fd1498Szrj   FOR_EACH_LOOP (loop, 0)
1576*38fd1498Szrj     create_preheader (loop, flags);
1577*38fd1498Szrj   loops_state_set (LOOPS_HAVE_PREHEADERS);
1578*38fd1498Szrj }
1579*38fd1498Szrj 
1580*38fd1498Szrj /* Forces all loop latches to have only single successor.  */
1581*38fd1498Szrj 
1582*38fd1498Szrj void
force_single_succ_latches(void)1583*38fd1498Szrj force_single_succ_latches (void)
1584*38fd1498Szrj {
1585*38fd1498Szrj   struct loop *loop;
1586*38fd1498Szrj   edge e;
1587*38fd1498Szrj 
1588*38fd1498Szrj   FOR_EACH_LOOP (loop, 0)
1589*38fd1498Szrj     {
1590*38fd1498Szrj       if (loop->latch != loop->header && single_succ_p (loop->latch))
1591*38fd1498Szrj 	continue;
1592*38fd1498Szrj 
1593*38fd1498Szrj       e = find_edge (loop->latch, loop->header);
1594*38fd1498Szrj       gcc_checking_assert (e != NULL);
1595*38fd1498Szrj 
1596*38fd1498Szrj       split_edge (e);
1597*38fd1498Szrj     }
1598*38fd1498Szrj   loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1599*38fd1498Szrj }
1600*38fd1498Szrj 
1601*38fd1498Szrj /* This function is called from loop_version.  It splits the entry edge
1602*38fd1498Szrj    of the loop we want to version, adds the versioning condition, and
1603*38fd1498Szrj    adjust the edges to the two versions of the loop appropriately.
1604*38fd1498Szrj    e is an incoming edge. Returns the basic block containing the
1605*38fd1498Szrj    condition.
1606*38fd1498Szrj 
1607*38fd1498Szrj    --- edge e ---- > [second_head]
1608*38fd1498Szrj 
1609*38fd1498Szrj    Split it and insert new conditional expression and adjust edges.
1610*38fd1498Szrj 
1611*38fd1498Szrj     --- edge e ---> [cond expr] ---> [first_head]
1612*38fd1498Szrj 			|
1613*38fd1498Szrj 			+---------> [second_head]
1614*38fd1498Szrj 
1615*38fd1498Szrj   THEN_PROB is the probability of then branch of the condition.
1616*38fd1498Szrj   ELSE_PROB is the probability of else branch. Note that they may be both
1617*38fd1498Szrj   REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1618*38fd1498Szrj   IFN_LOOP_DIST_ALIAS.  */
1619*38fd1498Szrj 
1620*38fd1498Szrj static basic_block
lv_adjust_loop_entry_edge(basic_block first_head,basic_block second_head,edge e,void * cond_expr,profile_probability then_prob,profile_probability else_prob)1621*38fd1498Szrj lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1622*38fd1498Szrj 			   edge e, void *cond_expr,
1623*38fd1498Szrj 			   profile_probability then_prob,
1624*38fd1498Szrj 			   profile_probability else_prob)
1625*38fd1498Szrj {
1626*38fd1498Szrj   basic_block new_head = NULL;
1627*38fd1498Szrj   edge e1;
1628*38fd1498Szrj 
1629*38fd1498Szrj   gcc_assert (e->dest == second_head);
1630*38fd1498Szrj 
1631*38fd1498Szrj   /* Split edge 'e'. This will create a new basic block, where we can
1632*38fd1498Szrj      insert conditional expr.  */
1633*38fd1498Szrj   new_head = split_edge (e);
1634*38fd1498Szrj 
1635*38fd1498Szrj   lv_add_condition_to_bb (first_head, second_head, new_head,
1636*38fd1498Szrj 			  cond_expr);
1637*38fd1498Szrj 
1638*38fd1498Szrj   /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there.  */
1639*38fd1498Szrj   e = single_succ_edge (new_head);
1640*38fd1498Szrj   e1 = make_edge (new_head, first_head,
1641*38fd1498Szrj 		  current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1642*38fd1498Szrj   e1->probability = then_prob;
1643*38fd1498Szrj   e->probability = else_prob;
1644*38fd1498Szrj 
1645*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1646*38fd1498Szrj   set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1647*38fd1498Szrj 
1648*38fd1498Szrj   /* Adjust loop header phi nodes.  */
1649*38fd1498Szrj   lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1650*38fd1498Szrj 
1651*38fd1498Szrj   return new_head;
1652*38fd1498Szrj }
1653*38fd1498Szrj 
1654*38fd1498Szrj /* Main entry point for Loop Versioning transformation.
1655*38fd1498Szrj 
1656*38fd1498Szrj    This transformation given a condition and a loop, creates
1657*38fd1498Szrj    -if (condition) { loop_copy1 } else { loop_copy2 },
1658*38fd1498Szrj    where loop_copy1 is the loop transformed in one way, and loop_copy2
1659*38fd1498Szrj    is the loop transformed in another way (or unchanged). COND_EXPR
1660*38fd1498Szrj    may be a run time test for things that were not resolved by static
1661*38fd1498Szrj    analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1662*38fd1498Szrj 
1663*38fd1498Szrj    If non-NULL, CONDITION_BB is set to the basic block containing the
1664*38fd1498Szrj    condition.
1665*38fd1498Szrj 
1666*38fd1498Szrj    THEN_PROB is the probability of the then edge of the if.  THEN_SCALE
1667*38fd1498Szrj    is the ratio by that the frequencies in the original loop should
1668*38fd1498Szrj    be scaled.  ELSE_SCALE is the ratio by that the frequencies in the
1669*38fd1498Szrj    new loop should be scaled.
1670*38fd1498Szrj 
1671*38fd1498Szrj    If PLACE_AFTER is true, we place the new loop after LOOP in the
1672*38fd1498Szrj    instruction stream, otherwise it is placed before LOOP.  */
1673*38fd1498Szrj 
1674*38fd1498Szrj struct loop *
loop_version(struct loop * loop,void * cond_expr,basic_block * condition_bb,profile_probability then_prob,profile_probability else_prob,profile_probability then_scale,profile_probability else_scale,bool place_after)1675*38fd1498Szrj loop_version (struct loop *loop,
1676*38fd1498Szrj 	      void *cond_expr, basic_block *condition_bb,
1677*38fd1498Szrj 	      profile_probability then_prob, profile_probability else_prob,
1678*38fd1498Szrj 	      profile_probability then_scale, profile_probability else_scale,
1679*38fd1498Szrj 	      bool place_after)
1680*38fd1498Szrj {
1681*38fd1498Szrj   basic_block first_head, second_head;
1682*38fd1498Szrj   edge entry, latch_edge, true_edge, false_edge;
1683*38fd1498Szrj   int irred_flag;
1684*38fd1498Szrj   struct loop *nloop;
1685*38fd1498Szrj   basic_block cond_bb;
1686*38fd1498Szrj 
1687*38fd1498Szrj   /* Record entry and latch edges for the loop */
1688*38fd1498Szrj   entry = loop_preheader_edge (loop);
1689*38fd1498Szrj   irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1690*38fd1498Szrj   entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1691*38fd1498Szrj 
1692*38fd1498Szrj   /* Note down head of loop as first_head.  */
1693*38fd1498Szrj   first_head = entry->dest;
1694*38fd1498Szrj 
1695*38fd1498Szrj   /* Duplicate loop.  */
1696*38fd1498Szrj   if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1697*38fd1498Szrj 					       NULL, NULL, NULL, 0))
1698*38fd1498Szrj     {
1699*38fd1498Szrj       entry->flags |= irred_flag;
1700*38fd1498Szrj       return NULL;
1701*38fd1498Szrj     }
1702*38fd1498Szrj 
1703*38fd1498Szrj   /* After duplication entry edge now points to new loop head block.
1704*38fd1498Szrj      Note down new head as second_head.  */
1705*38fd1498Szrj   second_head = entry->dest;
1706*38fd1498Szrj 
1707*38fd1498Szrj   /* Split loop entry edge and insert new block with cond expr.  */
1708*38fd1498Szrj   cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
1709*38fd1498Szrj 					entry, cond_expr, then_prob, else_prob);
1710*38fd1498Szrj   if (condition_bb)
1711*38fd1498Szrj     *condition_bb = cond_bb;
1712*38fd1498Szrj 
1713*38fd1498Szrj   if (!cond_bb)
1714*38fd1498Szrj     {
1715*38fd1498Szrj       entry->flags |= irred_flag;
1716*38fd1498Szrj       return NULL;
1717*38fd1498Szrj     }
1718*38fd1498Szrj 
1719*38fd1498Szrj   latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1720*38fd1498Szrj 
1721*38fd1498Szrj   extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1722*38fd1498Szrj   nloop = loopify (latch_edge,
1723*38fd1498Szrj 		   single_pred_edge (get_bb_copy (loop->header)),
1724*38fd1498Szrj 		   cond_bb, true_edge, false_edge,
1725*38fd1498Szrj 		   false /* Do not redirect all edges.  */,
1726*38fd1498Szrj 		   then_scale, else_scale);
1727*38fd1498Szrj 
1728*38fd1498Szrj   copy_loop_info (loop, nloop);
1729*38fd1498Szrj 
1730*38fd1498Szrj   /* loopify redirected latch_edge. Update its PENDING_STMTS.  */
1731*38fd1498Szrj   lv_flush_pending_stmts (latch_edge);
1732*38fd1498Szrj 
1733*38fd1498Szrj   /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS.  */
1734*38fd1498Szrj   extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1735*38fd1498Szrj   lv_flush_pending_stmts (false_edge);
1736*38fd1498Szrj   /* Adjust irreducible flag.  */
1737*38fd1498Szrj   if (irred_flag)
1738*38fd1498Szrj     {
1739*38fd1498Szrj       cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1740*38fd1498Szrj       loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1741*38fd1498Szrj       loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1742*38fd1498Szrj       single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1743*38fd1498Szrj     }
1744*38fd1498Szrj 
1745*38fd1498Szrj   if (place_after)
1746*38fd1498Szrj     {
1747*38fd1498Szrj       basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1748*38fd1498Szrj       unsigned i;
1749*38fd1498Szrj 
1750*38fd1498Szrj       after = loop->latch;
1751*38fd1498Szrj 
1752*38fd1498Szrj       for (i = 0; i < nloop->num_nodes; i++)
1753*38fd1498Szrj 	{
1754*38fd1498Szrj 	  move_block_after (bbs[i], after);
1755*38fd1498Szrj 	  after = bbs[i];
1756*38fd1498Szrj 	}
1757*38fd1498Szrj       free (bbs);
1758*38fd1498Szrj     }
1759*38fd1498Szrj 
1760*38fd1498Szrj   /* At this point condition_bb is loop preheader with two successors,
1761*38fd1498Szrj      first_head and second_head.   Make sure that loop preheader has only
1762*38fd1498Szrj      one successor.  */
1763*38fd1498Szrj   split_edge (loop_preheader_edge (loop));
1764*38fd1498Szrj   split_edge (loop_preheader_edge (nloop));
1765*38fd1498Szrj 
1766*38fd1498Szrj   return nloop;
1767*38fd1498Szrj }
1768