xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-loop-manip.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* High-level loop manipulation functions.
2    Copyright (C) 2004-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"	/* ??? for TODO_update_ssa but this isn't a pass.  */
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "cfganal.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimplify-me.h"
35 #include "tree-cfg.h"
36 #include "tree-ssa-loop-ivopts.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-ssa-loop-niter.h"
39 #include "tree-ssa-loop.h"
40 #include "tree-into-ssa.h"
41 #include "tree-ssa.h"
42 #include "cfgloop.h"
43 #include "tree-scalar-evolution.h"
44 #include "tree-inline.h"
45 
46 /* All bitmaps for rewriting into loop-closed SSA go on this obstack,
47    so that we can free them all at once.  */
48 static bitmap_obstack loop_renamer_obstack;
49 
50 /* Creates an induction variable with value BASE + STEP * iteration in LOOP.
51    It is expected that neither BASE nor STEP are shared with other expressions
52    (unless the sharing rules allow this).  Use VAR as a base var_decl for it
53    (if NULL, a new temporary will be created).  The increment will occur at
54    INCR_POS (after it if AFTER is true, before it otherwise).  INCR_POS and
55    AFTER can be computed using standard_iv_increment_position.  The ssa versions
56    of the variable before and after increment will be stored in VAR_BEFORE and
57    VAR_AFTER (unless they are NULL).  */
58 
59 void
create_iv(tree base,tree step,tree var,class loop * loop,gimple_stmt_iterator * incr_pos,bool after,tree * var_before,tree * var_after)60 create_iv (tree base, tree step, tree var, class loop *loop,
61 	   gimple_stmt_iterator *incr_pos, bool after,
62 	   tree *var_before, tree *var_after)
63 {
64   gassign *stmt;
65   gphi *phi;
66   tree initial, step1;
67   gimple_seq stmts;
68   tree vb, va;
69   enum tree_code incr_op = PLUS_EXPR;
70   edge pe = loop_preheader_edge (loop);
71 
72   if (var != NULL_TREE)
73     {
74       vb = make_ssa_name (var);
75       va = make_ssa_name (var);
76     }
77   else
78     {
79       vb = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
80       va = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
81     }
82   if (var_before)
83     *var_before = vb;
84   if (var_after)
85     *var_after = va;
86 
87   /* For easier readability of the created code, produce MINUS_EXPRs
88      when suitable.  */
89   if (TREE_CODE (step) == INTEGER_CST)
90     {
91       if (TYPE_UNSIGNED (TREE_TYPE (step)))
92 	{
93 	  step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
94 	  if (tree_int_cst_lt (step1, step))
95 	    {
96 	      incr_op = MINUS_EXPR;
97 	      step = step1;
98 	    }
99 	}
100       else
101 	{
102 	  bool ovf;
103 
104 	  if (!tree_expr_nonnegative_warnv_p (step, &ovf)
105 	      && may_negate_without_overflow_p (step))
106 	    {
107 	      incr_op = MINUS_EXPR;
108 	      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
109 	    }
110 	}
111     }
112   if (POINTER_TYPE_P (TREE_TYPE (base)))
113     {
114       if (TREE_CODE (base) == ADDR_EXPR)
115 	mark_addressable (TREE_OPERAND (base, 0));
116       step = convert_to_ptrofftype (step);
117       if (incr_op == MINUS_EXPR)
118 	step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
119       incr_op = POINTER_PLUS_EXPR;
120     }
121   /* Gimplify the step if necessary.  We put the computations in front of the
122      loop (i.e. the step should be loop invariant).  */
123   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
124   if (stmts)
125     gsi_insert_seq_on_edge_immediate (pe, stmts);
126 
127   stmt = gimple_build_assign (va, incr_op, vb, step);
128   /* Prevent the increment from inheriting a bogus location if it is not put
129      immediately after a statement whose location is known.  */
130   if (after)
131     {
132       if (gsi_end_p (*incr_pos)
133 	  || (is_gimple_debug (gsi_stmt (*incr_pos))
134 	      && gsi_bb (*incr_pos)
135 	      && gsi_end_p (gsi_last_nondebug_bb (gsi_bb (*incr_pos)))))
136 	{
137 	  edge e = single_succ_edge (gsi_bb (*incr_pos));
138 	  gimple_set_location (stmt, e->goto_locus);
139 	}
140       gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT);
141     }
142   else
143     {
144       gimple_stmt_iterator gsi = *incr_pos;
145       if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
146 	gsi_next_nondebug (&gsi);
147       if (!gsi_end_p (gsi))
148 	gimple_set_location (stmt, gimple_location (gsi_stmt (gsi)));
149       gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT);
150     }
151 
152   initial = force_gimple_operand (base, &stmts, true, var);
153   if (stmts)
154     gsi_insert_seq_on_edge_immediate (pe, stmts);
155 
156   phi = create_phi_node (vb, loop->header);
157   add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION);
158   add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION);
159 }
160 
161 /* Return the innermost superloop LOOP of USE_LOOP that is a superloop of
162    both DEF_LOOP and USE_LOOP.  */
163 
164 static inline class loop *
find_sibling_superloop(class loop * use_loop,class loop * def_loop)165 find_sibling_superloop (class loop *use_loop, class loop *def_loop)
166 {
167   unsigned ud = loop_depth (use_loop);
168   unsigned dd = loop_depth (def_loop);
169   gcc_assert (ud > 0 && dd > 0);
170   if (ud > dd)
171     use_loop = superloop_at_depth (use_loop, dd);
172   if (ud < dd)
173     def_loop = superloop_at_depth (def_loop, ud);
174   while (loop_outer (use_loop) != loop_outer (def_loop))
175     {
176       use_loop = loop_outer (use_loop);
177       def_loop = loop_outer (def_loop);
178       gcc_assert (use_loop && def_loop);
179     }
180   return use_loop;
181 }
182 
183 /* DEF_BB is a basic block containing a DEF that needs rewriting into
184    loop-closed SSA form.  USE_BLOCKS is the set of basic blocks containing
185    uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in
186    USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_B).
187    ALL_EXITS[I] is the set of all basic blocks that exit loop I.
188 
189    Compute the subset of LOOP_EXITS that exit the loop containing DEF_BB
190    or one of its loop fathers, in which DEF is live.  This set is returned
191    in the bitmap LIVE_EXITS.
192 
193    Instead of computing the complete livein set of the def, we use the loop
194    nesting tree as a form of poor man's structure analysis.  This greatly
195    speeds up the analysis, which is important because this function may be
196    called on all SSA names that need rewriting, one at a time.  */
197 
198 static void
compute_live_loop_exits(bitmap live_exits,bitmap use_blocks,bitmap * loop_exits,basic_block def_bb)199 compute_live_loop_exits (bitmap live_exits, bitmap use_blocks,
200 			 bitmap *loop_exits, basic_block def_bb)
201 {
202   unsigned i;
203   bitmap_iterator bi;
204   class loop *def_loop = def_bb->loop_father;
205   unsigned def_loop_depth = loop_depth (def_loop);
206   bitmap def_loop_exits;
207 
208   /* Normally the work list size is bounded by the number of basic
209      blocks in the largest loop.  We don't know this number, but we
210      can be fairly sure that it will be relatively small.  */
211   auto_vec<basic_block> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128));
212 
213   EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi)
214     {
215       basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i);
216       class loop *use_loop = use_bb->loop_father;
217       gcc_checking_assert (def_loop != use_loop
218 			   && ! flow_loop_nested_p (def_loop, use_loop));
219       if (! flow_loop_nested_p (use_loop, def_loop))
220 	use_bb = find_sibling_superloop (use_loop, def_loop)->header;
221       if (bitmap_set_bit (live_exits, use_bb->index))
222 	worklist.safe_push (use_bb);
223     }
224 
225   /* Iterate until the worklist is empty.  */
226   while (! worklist.is_empty ())
227     {
228       edge e;
229       edge_iterator ei;
230 
231       /* Pull a block off the worklist.  */
232       basic_block bb = worklist.pop ();
233 
234       /* Make sure we have at least enough room in the work list
235 	 for all predecessors of this block.  */
236       worklist.reserve (EDGE_COUNT (bb->preds));
237 
238       /* For each predecessor block.  */
239       FOR_EACH_EDGE (e, ei, bb->preds)
240 	{
241 	  basic_block pred = e->src;
242 	  class loop *pred_loop = pred->loop_father;
243 	  unsigned pred_loop_depth = loop_depth (pred_loop);
244 	  bool pred_visited;
245 
246 	  /* We should have met DEF_BB along the way.  */
247 	  gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun));
248 
249 	  if (pred_loop_depth >= def_loop_depth)
250 	    {
251 	      if (pred_loop_depth > def_loop_depth)
252 		pred_loop = superloop_at_depth (pred_loop, def_loop_depth);
253 	      /* If we've reached DEF_LOOP, our train ends here.  */
254 	      if (pred_loop == def_loop)
255 		continue;
256 	    }
257 	  else if (! flow_loop_nested_p (pred_loop, def_loop))
258 	    pred = find_sibling_superloop (pred_loop, def_loop)->header;
259 
260 	  /* Add PRED to the LIVEIN set.  PRED_VISITED is true if
261 	     we had already added PRED to LIVEIN before.  */
262 	  pred_visited = !bitmap_set_bit (live_exits, pred->index);
263 
264 	  /* If we have visited PRED before, don't add it to the worklist.
265 	     If BB dominates PRED, then we're probably looking at a loop.
266 	     We're only interested in looking up in the dominance tree
267 	     because DEF_BB dominates all the uses.  */
268 	  if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb))
269 	    continue;
270 
271 	  worklist.quick_push (pred);
272 	}
273     }
274 
275   def_loop_exits = BITMAP_ALLOC (&loop_renamer_obstack);
276   for (class loop *loop = def_loop;
277        loop != current_loops->tree_root;
278        loop = loop_outer (loop))
279     bitmap_ior_into (def_loop_exits, loop_exits[loop->num]);
280   bitmap_and_into (live_exits, def_loop_exits);
281   BITMAP_FREE (def_loop_exits);
282 }
283 
284 /* Add a loop-closing PHI for VAR in basic block EXIT.  */
285 
286 static void
add_exit_phi(basic_block exit,tree var)287 add_exit_phi (basic_block exit, tree var)
288 {
289   gphi *phi;
290   edge e;
291   edge_iterator ei;
292 
293   /* Check that at least one of the edges entering the EXIT block exits
294      the loop, or a superloop of that loop, that VAR is defined in.  */
295   if (flag_checking)
296     {
297       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
298       basic_block def_bb = gimple_bb (def_stmt);
299       FOR_EACH_EDGE (e, ei, exit->preds)
300 	{
301 	  class loop *aloop = find_common_loop (def_bb->loop_father,
302 						 e->src->loop_father);
303 	  if (!flow_bb_inside_loop_p (aloop, e->dest))
304 	    break;
305 	}
306       gcc_assert (e);
307     }
308 
309   phi = create_phi_node (NULL_TREE, exit);
310   create_new_def_for (var, phi, gimple_phi_result_ptr (phi));
311   FOR_EACH_EDGE (e, ei, exit->preds)
312     add_phi_arg (phi, var, e, UNKNOWN_LOCATION);
313 
314   if (dump_file && (dump_flags & TDF_DETAILS))
315     {
316       fprintf (dump_file, ";; Created LCSSA PHI: ");
317       print_gimple_stmt (dump_file, phi, 0, dump_flags);
318     }
319 }
320 
321 /* Add exit phis for VAR that is used in LIVEIN.
322    Exits of the loops are stored in LOOP_EXITS.  */
323 
324 static void
add_exit_phis_var(tree var,bitmap use_blocks,bitmap * loop_exits)325 add_exit_phis_var (tree var, bitmap use_blocks, bitmap *loop_exits)
326 {
327   unsigned index;
328   bitmap_iterator bi;
329   basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
330   bitmap live_exits = BITMAP_ALLOC (&loop_renamer_obstack);
331 
332   gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index));
333 
334   compute_live_loop_exits (live_exits, use_blocks, loop_exits, def_bb);
335 
336   EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi)
337     {
338       add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var);
339     }
340 
341   BITMAP_FREE (live_exits);
342 }
343 
344 /* Add exit phis for the names marked in NAMES_TO_RENAME.
345    Exits of the loops are stored in EXITS.  Sets of blocks where the ssa
346    names are used are stored in USE_BLOCKS.  */
347 
348 static void
add_exit_phis(bitmap names_to_rename,bitmap * use_blocks,bitmap * loop_exits)349 add_exit_phis (bitmap names_to_rename, bitmap *use_blocks, bitmap *loop_exits)
350 {
351   unsigned i;
352   bitmap_iterator bi;
353 
354   EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
355     {
356       add_exit_phis_var (ssa_name (i), use_blocks[i], loop_exits);
357     }
358 }
359 
360 /* Fill the array of bitmaps LOOP_EXITS with all loop exit edge targets.  */
361 
362 static void
get_loops_exits(bitmap * loop_exits)363 get_loops_exits (bitmap *loop_exits)
364 {
365   class loop *loop;
366   unsigned j;
367   edge e;
368 
369   FOR_EACH_LOOP (loop, 0)
370     {
371       vec<edge> exit_edges = get_loop_exit_edges (loop);
372       loop_exits[loop->num] = BITMAP_ALLOC (&loop_renamer_obstack);
373       FOR_EACH_VEC_ELT (exit_edges, j, e)
374         bitmap_set_bit (loop_exits[loop->num], e->dest->index);
375       exit_edges.release ();
376     }
377 }
378 
379 /* For USE in BB, if it is used outside of the loop it is defined in,
380    mark it for rewrite.  Record basic block BB where it is used
381    to USE_BLOCKS.  Record the ssa name index to NEED_PHIS bitmap.
382    Note that for USEs in phis, BB should be the src of the edge corresponding to
383    the use, rather than the bb containing the phi.  */
384 
385 static void
find_uses_to_rename_use(basic_block bb,tree use,bitmap * use_blocks,bitmap need_phis)386 find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
387 			 bitmap need_phis)
388 {
389   unsigned ver;
390   basic_block def_bb;
391   class loop *def_loop;
392 
393   if (TREE_CODE (use) != SSA_NAME)
394     return;
395 
396   ver = SSA_NAME_VERSION (use);
397   def_bb = gimple_bb (SSA_NAME_DEF_STMT (use));
398   if (!def_bb)
399     return;
400   def_loop = def_bb->loop_father;
401 
402   /* If the definition is not inside a loop, it is not interesting.  */
403   if (!loop_outer (def_loop))
404     return;
405 
406   /* If the use is not outside of the loop it is defined in, it is not
407      interesting.  */
408   if (flow_bb_inside_loop_p (def_loop, bb))
409     return;
410 
411   /* If we're seeing VER for the first time, we still have to allocate
412      a bitmap for its uses.  */
413   if (bitmap_set_bit (need_phis, ver))
414     use_blocks[ver] = BITMAP_ALLOC (&loop_renamer_obstack);
415   bitmap_set_bit (use_blocks[ver], bb->index);
416 }
417 
418 /* For uses matching USE_FLAGS in STMT, mark names that are used outside of the
419    loop they are defined to rewrite.  Record the set of blocks in which the ssa
420    names are used to USE_BLOCKS, and the ssa names themselves to NEED_PHIS.  */
421 
422 static void
find_uses_to_rename_stmt(gimple * stmt,bitmap * use_blocks,bitmap need_phis,int use_flags)423 find_uses_to_rename_stmt (gimple *stmt, bitmap *use_blocks, bitmap need_phis,
424 			  int use_flags)
425 {
426   ssa_op_iter iter;
427   tree var;
428   basic_block bb = gimple_bb (stmt);
429 
430   if (is_gimple_debug (stmt))
431     return;
432 
433   /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows SSA_OP_VIRTUAL_USES
434      only.  */
435   if (use_flags == SSA_OP_VIRTUAL_USES)
436     {
437       tree vuse = gimple_vuse (stmt);
438       if (vuse != NULL_TREE)
439 	find_uses_to_rename_use (bb, gimple_vuse (stmt), use_blocks, need_phis);
440     }
441   else
442     FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, use_flags)
443       find_uses_to_rename_use (bb, var, use_blocks, need_phis);
444 }
445 
446 /* Marks names matching USE_FLAGS that are used in BB and outside of the loop
447    they are defined in for rewrite.  Records the set of blocks in which the ssa
448    names are used to USE_BLOCKS.  Record the SSA names that will
449    need exit PHIs in NEED_PHIS.  */
450 
451 static void
find_uses_to_rename_bb(basic_block bb,bitmap * use_blocks,bitmap need_phis,int use_flags)452 find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis,
453 			int use_flags)
454 {
455   edge e;
456   edge_iterator ei;
457   bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
458   bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
459 
460   FOR_EACH_EDGE (e, ei, bb->succs)
461     for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi);
462 	 gsi_next (&bsi))
463       {
464         gphi *phi = bsi.phi ();
465 	bool virtual_p = virtual_operand_p (gimple_phi_result (phi));
466 	if ((virtual_p && do_virtuals)
467 	    || (!virtual_p && do_nonvirtuals))
468 	  find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e),
469 				   use_blocks, need_phis);
470       }
471 
472   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
473        gsi_next (&bsi))
474     find_uses_to_rename_stmt (gsi_stmt (bsi), use_blocks, need_phis,
475 			      use_flags);
476 }
477 
478 /* Marks names matching USE_FLAGS that are used outside of the loop they are
479    defined in for rewrite.  Records the set of blocks in which the ssa names are
480    used to USE_BLOCKS.  Record the SSA names that will need exit PHIs in
481    NEED_PHIS.  If CHANGED_BBS is not NULL, scan only blocks in this set.  */
482 
483 static void
find_uses_to_rename(bitmap changed_bbs,bitmap * use_blocks,bitmap need_phis,int use_flags)484 find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis,
485 		     int use_flags)
486 {
487   basic_block bb;
488   unsigned index;
489   bitmap_iterator bi;
490 
491   if (changed_bbs)
492     EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
493       {
494 	bb = BASIC_BLOCK_FOR_FN (cfun, index);
495 	if (bb)
496 	  find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
497       }
498   else
499     FOR_EACH_BB_FN (bb, cfun)
500       find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
501 }
502 
503 /* Mark uses of DEF that are used outside of the loop they are defined in for
504    rewrite.  Record the set of blocks in which the ssa names are used to
505    USE_BLOCKS.  Record the SSA names that will need exit PHIs in NEED_PHIS.  */
506 
507 static void
find_uses_to_rename_def(tree def,bitmap * use_blocks,bitmap need_phis)508 find_uses_to_rename_def (tree def, bitmap *use_blocks, bitmap need_phis)
509 {
510   gimple *use_stmt;
511   imm_use_iterator imm_iter;
512 
513   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
514     {
515       if (is_gimple_debug (use_stmt))
516 	continue;
517 
518       basic_block use_bb = gimple_bb (use_stmt);
519 
520       use_operand_p use_p;
521       FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
522 	{
523 	  if (gimple_code (use_stmt) == GIMPLE_PHI)
524 	    {
525 	      edge e = gimple_phi_arg_edge (as_a <gphi *> (use_stmt),
526 					    PHI_ARG_INDEX_FROM_USE (use_p));
527 	      use_bb = e->src;
528 	    }
529 	  find_uses_to_rename_use (use_bb, USE_FROM_PTR (use_p), use_blocks,
530 				   need_phis);
531 	}
532     }
533 }
534 
535 /* Marks names matching USE_FLAGS that are defined in LOOP and used outside of
536    it for rewrite.  Records the set of blocks in which the ssa names are used to
537    USE_BLOCKS.  Record the SSA names that will need exit PHIs in NEED_PHIS.  */
538 
539 static void
find_uses_to_rename_in_loop(class loop * loop,bitmap * use_blocks,bitmap need_phis,int use_flags)540 find_uses_to_rename_in_loop (class loop *loop, bitmap *use_blocks,
541 			     bitmap need_phis, int use_flags)
542 {
543   bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
544   bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
545   int def_flags = ((do_virtuals ? SSA_OP_VIRTUAL_DEFS : 0)
546 		   | (do_nonvirtuals ? SSA_OP_DEF : 0));
547 
548 
549   basic_block *bbs = get_loop_body (loop);
550 
551   for (unsigned int i = 0; i < loop->num_nodes; i++)
552     {
553       basic_block bb = bbs[i];
554 
555       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
556 	   gsi_next (&bsi))
557 	{
558 	  gphi *phi = bsi.phi ();
559 	  tree res = gimple_phi_result (phi);
560 	  bool virtual_p = virtual_operand_p (res);
561 	  if ((virtual_p && do_virtuals)
562 	      || (!virtual_p && do_nonvirtuals))
563 	    find_uses_to_rename_def (res, use_blocks, need_phis);
564       }
565 
566       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
567 	   gsi_next (&bsi))
568 	{
569 	  gimple *stmt = gsi_stmt (bsi);
570 	  /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows
571 	     SSA_OP_VIRTUAL_DEFS only.  */
572 	  if (def_flags == SSA_OP_VIRTUAL_DEFS)
573 	    {
574 	      tree vdef = gimple_vdef (stmt);
575 	      if (vdef != NULL)
576 		find_uses_to_rename_def (vdef, use_blocks, need_phis);
577 	    }
578 	  else
579 	    {
580 	      tree var;
581 	      ssa_op_iter iter;
582 	      FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, def_flags)
583 		find_uses_to_rename_def (var, use_blocks, need_phis);
584 	    }
585 	}
586     }
587 
588   XDELETEVEC (bbs);
589 }
590 
591 /* Rewrites the program into a loop closed ssa form -- i.e. inserts extra
592    phi nodes to ensure that no variable is used outside the loop it is
593    defined in.
594 
595    This strengthening of the basic ssa form has several advantages:
596 
597    1) Updating it during unrolling/peeling/versioning is trivial, since
598       we do not need to care about the uses outside of the loop.
599       The same applies to virtual operands which are also rewritten into
600       loop closed SSA form.  Note that virtual operands are always live
601       until function exit.
602    2) The behavior of all uses of an induction variable is the same.
603       Without this, you need to distinguish the case when the variable
604       is used outside of the loop it is defined in, for example
605 
606       for (i = 0; i < 100; i++)
607 	{
608 	  for (j = 0; j < 100; j++)
609 	    {
610 	      k = i + j;
611 	      use1 (k);
612 	    }
613 	  use2 (k);
614 	}
615 
616       Looking from the outer loop with the normal SSA form, the first use of k
617       is not well-behaved, while the second one is an induction variable with
618       base 99 and step 1.
619 
620       If LOOP is non-null, only rewrite uses that have defs in LOOP.  Otherwise,
621       if CHANGED_BBS is not NULL, we look for uses outside loops only in the
622       basic blocks in this set.
623 
624       USE_FLAGS allows us to specify whether we want virtual, non-virtual or
625       both variables rewritten.
626 
627       UPDATE_FLAG is used in the call to update_ssa.  See
628       TODO_update_ssa* for documentation.  */
629 
630 void
rewrite_into_loop_closed_ssa_1(bitmap changed_bbs,unsigned update_flag,int use_flags,class loop * loop)631 rewrite_into_loop_closed_ssa_1 (bitmap changed_bbs, unsigned update_flag,
632 				int use_flags, class loop *loop)
633 {
634   bitmap *use_blocks;
635   bitmap names_to_rename;
636 
637   loops_state_set (LOOP_CLOSED_SSA);
638   if (number_of_loops (cfun) <= 1)
639     return;
640 
641   /* If the pass has caused the SSA form to be out-of-date, update it
642      now.  */
643   if (update_flag != 0)
644     update_ssa (update_flag);
645   else if (flag_checking)
646     verify_ssa (true, true);
647 
648   bitmap_obstack_initialize (&loop_renamer_obstack);
649 
650   names_to_rename = BITMAP_ALLOC (&loop_renamer_obstack);
651 
652   /* Uses of names to rename.  We don't have to initialize this array,
653      because we know that we will only have entries for the SSA names
654      in NAMES_TO_RENAME.  */
655   use_blocks = XNEWVEC (bitmap, num_ssa_names);
656 
657   if (loop != NULL)
658     {
659       gcc_assert (changed_bbs == NULL);
660       find_uses_to_rename_in_loop (loop, use_blocks, names_to_rename,
661 				   use_flags);
662     }
663   else
664     {
665       gcc_assert (loop == NULL);
666       find_uses_to_rename (changed_bbs, use_blocks, names_to_rename, use_flags);
667     }
668 
669   if (!bitmap_empty_p (names_to_rename))
670     {
671       /* An array of bitmaps where LOOP_EXITS[I] is the set of basic blocks
672 	 that are the destination of an edge exiting loop number I.  */
673       bitmap *loop_exits = XNEWVEC (bitmap, number_of_loops (cfun));
674       get_loops_exits (loop_exits);
675 
676       /* Add the PHI nodes on exits of the loops for the names we need to
677 	 rewrite.  */
678       add_exit_phis (names_to_rename, use_blocks, loop_exits);
679 
680       free (loop_exits);
681 
682       /* Fix up all the names found to be used outside their original
683 	 loops.  */
684       update_ssa (TODO_update_ssa);
685     }
686 
687   bitmap_obstack_release (&loop_renamer_obstack);
688   free (use_blocks);
689 }
690 
691 /* Rewrites the non-virtual defs and uses into a loop closed ssa form.  If
692    CHANGED_BBS is not NULL, we look for uses outside loops only in the basic
693    blocks in this set.  UPDATE_FLAG is used in the call to update_ssa.  See
694    TODO_update_ssa* for documentation.  */
695 
696 void
rewrite_into_loop_closed_ssa(bitmap changed_bbs,unsigned update_flag)697 rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
698 {
699   rewrite_into_loop_closed_ssa_1 (changed_bbs, update_flag, SSA_OP_USE, NULL);
700 }
701 
702 /* Rewrites virtual defs and uses with def in LOOP into loop closed ssa
703    form.  */
704 
705 void
rewrite_virtuals_into_loop_closed_ssa(class loop * loop)706 rewrite_virtuals_into_loop_closed_ssa (class loop *loop)
707 {
708   rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_VIRTUAL_USES, loop);
709 }
710 
711 /* Check invariants of the loop closed ssa form for the def in DEF_BB.  */
712 
713 static void
check_loop_closed_ssa_def(basic_block def_bb,tree def)714 check_loop_closed_ssa_def (basic_block def_bb, tree def)
715 {
716   use_operand_p use_p;
717   imm_use_iterator iterator;
718   FOR_EACH_IMM_USE_FAST (use_p, iterator, def)
719     {
720       if (is_gimple_debug (USE_STMT (use_p)))
721 	continue;
722 
723       basic_block use_bb = gimple_bb (USE_STMT (use_p));
724       if (is_a <gphi *> (USE_STMT (use_p)))
725 	use_bb = EDGE_PRED (use_bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
726 
727       gcc_assert (flow_bb_inside_loop_p (def_bb->loop_father, use_bb));
728     }
729 }
730 
731 /* Checks invariants of loop closed ssa form in BB.  */
732 
733 static void
check_loop_closed_ssa_bb(basic_block bb)734 check_loop_closed_ssa_bb (basic_block bb)
735 {
736   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
737        gsi_next (&bsi))
738     {
739       gphi *phi = bsi.phi ();
740 
741       if (!virtual_operand_p (PHI_RESULT (phi)))
742 	check_loop_closed_ssa_def (bb, PHI_RESULT (phi));
743     }
744 
745   for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); !gsi_end_p (bsi);
746        gsi_next_nondebug (&bsi))
747     {
748       ssa_op_iter iter;
749       tree var;
750       gimple *stmt = gsi_stmt (bsi);
751 
752       FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
753 	check_loop_closed_ssa_def (bb, var);
754     }
755 }
756 
757 /* Checks that invariants of the loop closed ssa form are preserved.
758    Call verify_ssa when VERIFY_SSA_P is true.  Note all loops are checked
759    if LOOP is NULL, otherwise, only LOOP is checked.  */
760 
761 DEBUG_FUNCTION void
verify_loop_closed_ssa(bool verify_ssa_p,class loop * loop)762 verify_loop_closed_ssa (bool verify_ssa_p, class loop *loop)
763 {
764   if (number_of_loops (cfun) <= 1)
765     return;
766 
767   if (verify_ssa_p)
768     verify_ssa (false, true);
769 
770   timevar_push (TV_VERIFY_LOOP_CLOSED);
771 
772   if (loop == NULL)
773     {
774       basic_block bb;
775 
776       FOR_EACH_BB_FN (bb, cfun)
777 	if (bb->loop_father && bb->loop_father->num > 0)
778 	  check_loop_closed_ssa_bb (bb);
779     }
780   else
781     {
782       basic_block *bbs = get_loop_body (loop);
783 
784       for (unsigned i = 0; i < loop->num_nodes; ++i)
785 	check_loop_closed_ssa_bb (bbs[i]);
786 
787       free (bbs);
788     }
789 
790   timevar_pop (TV_VERIFY_LOOP_CLOSED);
791 }
792 
793 /* Split loop exit edge EXIT.  The things are a bit complicated by a need to
794    preserve the loop closed ssa form.  If COPY_CONSTANTS_P is true then
795    forwarder PHIs are also created for constant arguments.
796    The newly created block is returned.  */
797 
798 basic_block
split_loop_exit_edge(edge exit,bool copy_constants_p)799 split_loop_exit_edge (edge exit, bool copy_constants_p)
800 {
801   basic_block dest = exit->dest;
802   basic_block bb = split_edge (exit);
803   gphi *phi, *new_phi;
804   tree new_name, name;
805   use_operand_p op_p;
806   gphi_iterator psi;
807   location_t locus;
808 
809   for (psi = gsi_start_phis (dest); !gsi_end_p (psi); gsi_next (&psi))
810     {
811       phi = psi.phi ();
812       op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb));
813       locus = gimple_phi_arg_location_from_edge (phi, single_succ_edge (bb));
814 
815       name = USE_FROM_PTR (op_p);
816 
817       /* If the argument of the PHI node is a constant, we do not need
818 	 to keep it inside loop.  */
819       if (TREE_CODE (name) != SSA_NAME
820 	  && !copy_constants_p)
821 	continue;
822 
823       /* Otherwise create an auxiliary phi node that will copy the value
824 	 of the SSA name out of the loop.  */
825       new_name = duplicate_ssa_name (PHI_RESULT (phi), NULL);
826       new_phi = create_phi_node (new_name, bb);
827       add_phi_arg (new_phi, name, exit, locus);
828       SET_USE (op_p, new_name);
829     }
830 
831   return bb;
832 }
833 
834 /* Returns the basic block in that statements should be emitted for induction
835    variables incremented at the end of the LOOP.  */
836 
837 basic_block
ip_end_pos(class loop * loop)838 ip_end_pos (class loop *loop)
839 {
840   return loop->latch;
841 }
842 
843 /* Returns the basic block in that statements should be emitted for induction
844    variables incremented just before exit condition of a LOOP.  */
845 
846 basic_block
ip_normal_pos(class loop * loop)847 ip_normal_pos (class loop *loop)
848 {
849   gimple *last;
850   basic_block bb;
851   edge exit;
852 
853   if (!single_pred_p (loop->latch))
854     return NULL;
855 
856   bb = single_pred (loop->latch);
857   last = last_stmt (bb);
858   if (!last
859       || gimple_code (last) != GIMPLE_COND)
860     return NULL;
861 
862   exit = EDGE_SUCC (bb, 0);
863   if (exit->dest == loop->latch)
864     exit = EDGE_SUCC (bb, 1);
865 
866   if (flow_bb_inside_loop_p (loop, exit->dest))
867     return NULL;
868 
869   return bb;
870 }
871 
872 /* Stores the standard position for induction variable increment in LOOP
873    (just before the exit condition if it is available and latch block is empty,
874    end of the latch block otherwise) to BSI.  INSERT_AFTER is set to true if
875    the increment should be inserted after *BSI.  */
876 
877 void
standard_iv_increment_position(class loop * loop,gimple_stmt_iterator * bsi,bool * insert_after)878 standard_iv_increment_position (class loop *loop, gimple_stmt_iterator *bsi,
879 				bool *insert_after)
880 {
881   basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop);
882   gimple *last = last_stmt (latch);
883 
884   if (!bb
885       || (last && gimple_code (last) != GIMPLE_LABEL))
886     {
887       *bsi = gsi_last_bb (latch);
888       *insert_after = true;
889     }
890   else
891     {
892       *bsi = gsi_last_bb (bb);
893       *insert_after = false;
894     }
895 }
896 
897 /* Copies phi node arguments for duplicated blocks.  The index of the first
898    duplicated block is FIRST_NEW_BLOCK.  */
899 
900 static void
copy_phi_node_args(unsigned first_new_block)901 copy_phi_node_args (unsigned first_new_block)
902 {
903   unsigned i;
904 
905   for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
906     BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED;
907 
908   for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
909     add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i));
910 
911   for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
912     BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED;
913 }
914 
915 
916 /* The same as cfgloopmanip.c:duplicate_loop_to_header_edge, but also
917    updates the PHI nodes at start of the copied region.  In order to
918    achieve this, only loops whose exits all lead to the same location
919    are handled.
920 
921    Notice that we do not completely update the SSA web after
922    duplication.  The caller is responsible for calling update_ssa
923    after the loop has been duplicated.  */
924 
925 bool
gimple_duplicate_loop_to_header_edge(class loop * loop,edge e,unsigned int ndupl,sbitmap wont_exit,edge orig,vec<edge> * to_remove,int flags)926 gimple_duplicate_loop_to_header_edge (class loop *loop, edge e,
927 				    unsigned int ndupl, sbitmap wont_exit,
928 				    edge orig, vec<edge> *to_remove,
929 				    int flags)
930 {
931   unsigned first_new_block;
932 
933   if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
934     return false;
935   if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
936     return false;
937 
938   first_new_block = last_basic_block_for_fn (cfun);
939   if (!duplicate_loop_to_header_edge (loop, e, ndupl, wont_exit,
940 				      orig, to_remove, flags))
941     return false;
942 
943   /* Readd the removed phi args for e.  */
944   flush_pending_stmts (e);
945 
946   /* Copy the phi node arguments.  */
947   copy_phi_node_args (first_new_block);
948 
949   scev_reset ();
950 
951   return true;
952 }
953 
954 /* Returns true if we can unroll LOOP FACTOR times.  Number
955    of iterations of the loop is returned in NITER.  */
956 
957 bool
can_unroll_loop_p(class loop * loop,unsigned factor,class tree_niter_desc * niter)958 can_unroll_loop_p (class loop *loop, unsigned factor,
959 		   class tree_niter_desc *niter)
960 {
961   edge exit;
962 
963   /* Check whether unrolling is possible.  We only want to unroll loops
964      for that we are able to determine number of iterations.  We also
965      want to split the extra iterations of the loop from its end,
966      therefore we require that the loop has precisely one
967      exit.  */
968 
969   exit = single_dom_exit (loop);
970   if (!exit)
971     return false;
972 
973   if (!number_of_iterations_exit (loop, exit, niter, false)
974       || niter->cmp == ERROR_MARK
975       /* Scalar evolutions analysis might have copy propagated
976 	 the abnormal ssa names into these expressions, hence
977 	 emitting the computations based on them during loop
978 	 unrolling might create overlapping life ranges for
979 	 them, and failures in out-of-ssa.  */
980       || contains_abnormal_ssa_name_p (niter->may_be_zero)
981       || contains_abnormal_ssa_name_p (niter->control.base)
982       || contains_abnormal_ssa_name_p (niter->control.step)
983       || contains_abnormal_ssa_name_p (niter->bound))
984     return false;
985 
986   /* And of course, we must be able to duplicate the loop.  */
987   if (!can_duplicate_loop_p (loop))
988     return false;
989 
990   /* The final loop should be small enough.  */
991   if (tree_num_loop_insns (loop, &eni_size_weights) * factor
992       > (unsigned) param_max_unrolled_insns)
993     return false;
994 
995   return true;
996 }
997 
998 /* Determines the conditions that control execution of LOOP unrolled FACTOR
999    times.  DESC is number of iterations of LOOP.  ENTER_COND is set to
1000    condition that must be true if the main loop can be entered.
1001    EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
1002    how the exit from the unrolled loop should be controlled.  */
1003 
1004 static void
determine_exit_conditions(class loop * loop,class tree_niter_desc * desc,unsigned factor,tree * enter_cond,tree * exit_base,tree * exit_step,enum tree_code * exit_cmp,tree * exit_bound)1005 determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
1006 			   unsigned factor, tree *enter_cond,
1007 			   tree *exit_base, tree *exit_step,
1008 			   enum tree_code *exit_cmp, tree *exit_bound)
1009 {
1010   gimple_seq stmts;
1011   tree base = desc->control.base;
1012   tree step = desc->control.step;
1013   tree bound = desc->bound;
1014   tree type = TREE_TYPE (step);
1015   tree bigstep, delta;
1016   tree min = lower_bound_in_type (type, type);
1017   tree max = upper_bound_in_type (type, type);
1018   enum tree_code cmp = desc->cmp;
1019   tree cond = boolean_true_node, assum;
1020 
1021   /* For pointers, do the arithmetics in the type of step.  */
1022   base = fold_convert (type, base);
1023   bound = fold_convert (type, bound);
1024 
1025   *enter_cond = boolean_false_node;
1026   *exit_base = NULL_TREE;
1027   *exit_step = NULL_TREE;
1028   *exit_cmp = ERROR_MARK;
1029   *exit_bound = NULL_TREE;
1030   gcc_assert (cmp != ERROR_MARK);
1031 
1032   /* We only need to be correct when we answer question
1033      "Do at least FACTOR more iterations remain?" in the unrolled loop.
1034      Thus, transforming BASE + STEP * i <> BOUND to
1035      BASE + STEP * i < BOUND is ok.  */
1036   if (cmp == NE_EXPR)
1037     {
1038       if (tree_int_cst_sign_bit (step))
1039 	cmp = GT_EXPR;
1040       else
1041 	cmp = LT_EXPR;
1042     }
1043   else if (cmp == LT_EXPR)
1044     {
1045       gcc_assert (!tree_int_cst_sign_bit (step));
1046     }
1047   else if (cmp == GT_EXPR)
1048     {
1049       gcc_assert (tree_int_cst_sign_bit (step));
1050     }
1051   else
1052     gcc_unreachable ();
1053 
1054   /* The main body of the loop may be entered iff:
1055 
1056      1) desc->may_be_zero is false.
1057      2) it is possible to check that there are at least FACTOR iterations
1058 	of the loop, i.e., BOUND - step * FACTOR does not overflow.
1059      3) # of iterations is at least FACTOR  */
1060 
1061   if (!integer_zerop (desc->may_be_zero))
1062     cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1063 			invert_truthvalue (desc->may_be_zero),
1064 			cond);
1065 
1066   bigstep = fold_build2 (MULT_EXPR, type, step,
1067 			 build_int_cst_type (type, factor));
1068   delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
1069   if (cmp == LT_EXPR)
1070     assum = fold_build2 (GE_EXPR, boolean_type_node,
1071 			 bound,
1072 			 fold_build2 (PLUS_EXPR, type, min, delta));
1073   else
1074     assum = fold_build2 (LE_EXPR, boolean_type_node,
1075 			 bound,
1076 			 fold_build2 (PLUS_EXPR, type, max, delta));
1077   cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1078 
1079   bound = fold_build2 (MINUS_EXPR, type, bound, delta);
1080   assum = fold_build2 (cmp, boolean_type_node, base, bound);
1081   cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1082 
1083   cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
1084   if (stmts)
1085     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1086   /* cond now may be a gimple comparison, which would be OK, but also any
1087      other gimple rhs (say a && b).  In this case we need to force it to
1088      operand.  */
1089   if (!is_gimple_condexpr (cond))
1090     {
1091       cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
1092       if (stmts)
1093 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1094     }
1095   *enter_cond = cond;
1096 
1097   base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
1098   if (stmts)
1099     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1100   bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
1101   if (stmts)
1102     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1103 
1104   *exit_base = base;
1105   *exit_step = bigstep;
1106   *exit_cmp = cmp;
1107   *exit_bound = bound;
1108 }
1109 
1110 /* Scales the frequencies of all basic blocks in LOOP that are strictly
1111    dominated by BB by NUM/DEN.  */
1112 
1113 static void
scale_dominated_blocks_in_loop(class loop * loop,basic_block bb,profile_count num,profile_count den)1114 scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
1115 				profile_count num, profile_count den)
1116 {
1117   basic_block son;
1118 
1119   if (!den.nonzero_p () && !(num == profile_count::zero ()))
1120     return;
1121 
1122   for (son = first_dom_son (CDI_DOMINATORS, bb);
1123        son;
1124        son = next_dom_son (CDI_DOMINATORS, son))
1125     {
1126       if (!flow_bb_inside_loop_p (loop, son))
1127 	continue;
1128       scale_bbs_frequencies_profile_count (&son, 1, num, den);
1129       scale_dominated_blocks_in_loop (loop, son, num, den);
1130     }
1131 }
1132 
1133 /* Return estimated niter for LOOP after unrolling by FACTOR times.  */
1134 
1135 gcov_type
niter_for_unrolled_loop(class loop * loop,unsigned factor)1136 niter_for_unrolled_loop (class loop *loop, unsigned factor)
1137 {
1138   gcc_assert (factor != 0);
1139   bool profile_p = false;
1140   gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
1141   /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
1142      "+ 1" converts latch iterations to loop iterations and the "- 1"
1143      converts back.  */
1144   gcov_type new_est_niter = est_niter / factor;
1145 
1146   if (est_niter == -1)
1147     return -1;
1148 
1149   /* Without profile feedback, loops for which we do not know a better estimate
1150      are assumed to roll 10 times.  When we unroll such loop, it appears to
1151      roll too little, and it may even seem to be cold.  To avoid this, we
1152      ensure that the created loop appears to roll at least 5 times (but at
1153      most as many times as before unrolling).  Don't do adjustment if profile
1154      feedback is present.  */
1155   if (new_est_niter < 5 && !profile_p)
1156     {
1157       if (est_niter < 5)
1158 	new_est_niter = est_niter;
1159       else
1160 	new_est_niter = 5;
1161     }
1162 
1163   if (loop->any_upper_bound)
1164     {
1165       /* As above, this is really CEIL (upper_bound + 1, factor) - 1.  */
1166       widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
1167 					 factor);
1168       if (wi::ltu_p (bound, new_est_niter))
1169 	new_est_niter = bound.to_uhwi ();
1170     }
1171 
1172   return new_est_niter;
1173 }
1174 
1175 /* Unroll LOOP FACTOR times.  DESC describes number of iterations of LOOP.
1176    EXIT is the exit of the loop to that DESC corresponds.
1177 
1178    If N is number of iterations of the loop and MAY_BE_ZERO is the condition
1179    under that loop exits in the first iteration even if N != 0,
1180 
1181    while (1)
1182      {
1183        x = phi (init, next);
1184 
1185        pre;
1186        if (st)
1187          break;
1188        post;
1189      }
1190 
1191    becomes (with possibly the exit conditions formulated a bit differently,
1192    avoiding the need to create a new iv):
1193 
1194    if (MAY_BE_ZERO || N < FACTOR)
1195      goto rest;
1196 
1197    do
1198      {
1199        x = phi (init, next);
1200 
1201        pre;
1202        post;
1203        pre;
1204        post;
1205        ...
1206        pre;
1207        post;
1208        N -= FACTOR;
1209 
1210      } while (N >= FACTOR);
1211 
1212    rest:
1213      init' = phi (init, x);
1214 
1215    while (1)
1216      {
1217        x = phi (init', next);
1218 
1219        pre;
1220        if (st)
1221          break;
1222        post;
1223      }
1224 
1225    Before the loop is unrolled, TRANSFORM is called for it (only for the
1226    unrolled loop, but not for its versioned copy).  DATA is passed to
1227    TRANSFORM.  */
1228 
1229 /* Probability in % that the unrolled loop is entered.  Just a guess.  */
1230 #define PROB_UNROLLED_LOOP_ENTERED 90
1231 
1232 void
tree_transform_and_unroll_loop(class loop * loop,unsigned factor,edge exit,class tree_niter_desc * desc,transform_callback transform,void * data)1233 tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
1234 				edge exit, class tree_niter_desc *desc,
1235 				transform_callback transform,
1236 				void *data)
1237 {
1238   gcond *exit_if;
1239   tree ctr_before, ctr_after;
1240   tree enter_main_cond, exit_base, exit_step, exit_bound;
1241   enum tree_code exit_cmp;
1242   gphi *phi_old_loop, *phi_new_loop, *phi_rest;
1243   gphi_iterator psi_old_loop, psi_new_loop;
1244   tree init, next, new_init;
1245   class loop *new_loop;
1246   basic_block rest, exit_bb;
1247   edge old_entry, new_entry, old_latch, precond_edge, new_exit;
1248   edge new_nonexit, e;
1249   gimple_stmt_iterator bsi;
1250   use_operand_p op;
1251   bool ok;
1252   unsigned i;
1253   profile_probability prob, prob_entry, scale_unrolled;
1254   profile_count freq_e, freq_h;
1255   gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
1256   unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1257   auto_vec<edge> to_remove;
1258 
1259   determine_exit_conditions (loop, desc, factor,
1260 			     &enter_main_cond, &exit_base, &exit_step,
1261 			     &exit_cmp, &exit_bound);
1262 
1263   /* Let us assume that the unrolled loop is quite likely to be entered.  */
1264   if (integer_nonzerop (enter_main_cond))
1265     prob_entry = profile_probability::always ();
1266   else
1267     prob_entry = profile_probability::guessed_always ()
1268 			.apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
1269 
1270   /* The values for scales should keep profile consistent, and somewhat close
1271      to correct.
1272 
1273      TODO: The current value of SCALE_REST makes it appear that the loop that
1274      is created by splitting the remaining iterations of the unrolled loop is
1275      executed the same number of times as the original loop, and with the same
1276      frequencies, which is obviously wrong.  This does not appear to cause
1277      problems, so we do not bother with fixing it for now.  To make the profile
1278      correct, we would need to change the probability of the exit edge of the
1279      loop, and recompute the distribution of frequencies in its body because
1280      of this change (scale the frequencies of blocks before and after the exit
1281      by appropriate factors).  */
1282   scale_unrolled = prob_entry;
1283 
1284   new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
1285 			   prob_entry.invert (), scale_unrolled,
1286 			   profile_probability::guessed_always (),
1287 			   true);
1288   gcc_assert (new_loop != NULL);
1289   update_ssa (TODO_update_ssa);
1290 
1291   /* Prepare the cfg and update the phi nodes.  Move the loop exit to the
1292      loop latch (and make its condition dummy, for the moment).  */
1293   rest = loop_preheader_edge (new_loop)->src;
1294   precond_edge = single_pred_edge (rest);
1295   split_edge (loop_latch_edge (loop));
1296   exit_bb = single_pred (loop->latch);
1297 
1298   /* Since the exit edge will be removed, the frequency of all the blocks
1299      in the loop that are dominated by it must be scaled by
1300      1 / (1 - exit->probability).  */
1301   if (exit->probability.initialized_p ())
1302     scale_dominated_blocks_in_loop (loop, exit->src,
1303 				    /* We are scaling up here so probability
1304 				       does not fit.  */
1305 				    loop->header->count,
1306 				    loop->header->count
1307 				    - loop->header->count.apply_probability
1308 					 (exit->probability));
1309 
1310   bsi = gsi_last_bb (exit_bb);
1311   exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
1312 			       integer_zero_node,
1313 			       NULL_TREE, NULL_TREE);
1314 
1315   gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT);
1316   new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
1317   rescan_loop_exit (new_exit, true, false);
1318 
1319   /* Set the probability of new exit to the same of the old one.  Fix
1320      the frequency of the latch block, by scaling it back by
1321      1 - exit->probability.  */
1322   new_exit->probability = exit->probability;
1323   new_nonexit = single_pred_edge (loop->latch);
1324   new_nonexit->probability = exit->probability.invert ();
1325   new_nonexit->flags = EDGE_TRUE_VALUE;
1326   if (new_nonexit->probability.initialized_p ())
1327     scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability);
1328 
1329   old_entry = loop_preheader_edge (loop);
1330   new_entry = loop_preheader_edge (new_loop);
1331   old_latch = loop_latch_edge (loop);
1332   for (psi_old_loop = gsi_start_phis (loop->header),
1333        psi_new_loop = gsi_start_phis (new_loop->header);
1334        !gsi_end_p (psi_old_loop);
1335        gsi_next (&psi_old_loop), gsi_next (&psi_new_loop))
1336     {
1337       phi_old_loop = psi_old_loop.phi ();
1338       phi_new_loop = psi_new_loop.phi ();
1339 
1340       init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
1341       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
1342       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
1343       next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
1344 
1345       /* Prefer using original variable as a base for the new ssa name.
1346 	 This is necessary for virtual ops, and useful in order to avoid
1347 	 losing debug info for real ops.  */
1348       if (TREE_CODE (next) == SSA_NAME
1349 	  && useless_type_conversion_p (TREE_TYPE (next),
1350 					TREE_TYPE (init)))
1351 	new_init = copy_ssa_name (next);
1352       else if (TREE_CODE (init) == SSA_NAME
1353 	       && useless_type_conversion_p (TREE_TYPE (init),
1354 					     TREE_TYPE (next)))
1355 	new_init = copy_ssa_name (init);
1356       else if (useless_type_conversion_p (TREE_TYPE (next), TREE_TYPE (init)))
1357 	new_init = make_temp_ssa_name (TREE_TYPE (next), NULL, "unrinittmp");
1358       else
1359 	new_init = make_temp_ssa_name (TREE_TYPE (init), NULL, "unrinittmp");
1360 
1361       phi_rest = create_phi_node (new_init, rest);
1362 
1363       add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION);
1364       add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION);
1365       SET_USE (op, new_init);
1366     }
1367 
1368   remove_path (exit);
1369 
1370   /* Transform the loop.  */
1371   if (transform)
1372     (*transform) (loop, data);
1373 
1374   /* Unroll the loop and remove the exits in all iterations except for the
1375      last one.  */
1376   auto_sbitmap wont_exit (factor);
1377   bitmap_ones (wont_exit);
1378   bitmap_clear_bit (wont_exit, factor - 1);
1379 
1380   ok = gimple_duplicate_loop_to_header_edge
1381 	  (loop, loop_latch_edge (loop), factor - 1,
1382 	   wont_exit, new_exit, &to_remove, DLTHE_FLAG_UPDATE_FREQ);
1383   gcc_assert (ok);
1384 
1385   FOR_EACH_VEC_ELT (to_remove, i, e)
1386     {
1387       ok = remove_path (e);
1388       gcc_assert (ok);
1389     }
1390   update_ssa (TODO_update_ssa);
1391 
1392   /* Ensure that the frequencies in the loop match the new estimated
1393      number of iterations, and change the probability of the new
1394      exit edge.  */
1395 
1396   freq_h = loop->header->count;
1397   freq_e = (loop_preheader_edge (loop))->count ();
1398   if (freq_h.nonzero_p ())
1399     {
1400       /* Avoid dropping loop body profile counter to 0 because of zero count
1401 	 in loop's preheader.  */
1402       if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ()))
1403         freq_e = freq_e.force_nonzero ();
1404       scale_loop_frequencies (loop, freq_e.probability_in (freq_h));
1405     }
1406 
1407   exit_bb = single_pred (loop->latch);
1408   new_exit = find_edge (exit_bb, rest);
1409   new_exit->probability = profile_probability::always ()
1410 				.apply_scale (1, new_est_niter + 1);
1411 
1412   rest->count += new_exit->count ();
1413 
1414   new_nonexit = single_pred_edge (loop->latch);
1415   prob = new_nonexit->probability;
1416   new_nonexit->probability = new_exit->probability.invert ();
1417   prob = new_nonexit->probability / prob;
1418   if (prob.initialized_p ())
1419     scale_bbs_frequencies (&loop->latch, 1, prob);
1420 
1421   /* Finally create the new counter for number of iterations and add the new
1422      exit instruction.  */
1423   bsi = gsi_last_nondebug_bb (exit_bb);
1424   exit_if = as_a <gcond *> (gsi_stmt (bsi));
1425   create_iv (exit_base, exit_step, NULL_TREE, loop,
1426 	     &bsi, false, &ctr_before, &ctr_after);
1427   gimple_cond_set_code (exit_if, exit_cmp);
1428   gimple_cond_set_lhs (exit_if, ctr_after);
1429   gimple_cond_set_rhs (exit_if, exit_bound);
1430   update_stmt (exit_if);
1431 
1432   checking_verify_flow_info ();
1433   checking_verify_loop_structure ();
1434   checking_verify_loop_closed_ssa (true, loop);
1435   checking_verify_loop_closed_ssa (true, new_loop);
1436 }
1437 
1438 /* Wrapper over tree_transform_and_unroll_loop for case we do not
1439    want to transform the loop before unrolling.  The meaning
1440    of the arguments is the same as for tree_transform_and_unroll_loop.  */
1441 
1442 void
tree_unroll_loop(class loop * loop,unsigned factor,edge exit,class tree_niter_desc * desc)1443 tree_unroll_loop (class loop *loop, unsigned factor,
1444 		  edge exit, class tree_niter_desc *desc)
1445 {
1446   tree_transform_and_unroll_loop (loop, factor, exit, desc,
1447 				  NULL, NULL);
1448 }
1449 
1450 /* Rewrite the phi node at position PSI in function of the main
1451    induction variable MAIN_IV and insert the generated code at GSI.  */
1452 
1453 static void
rewrite_phi_with_iv(loop_p loop,gphi_iterator * psi,gimple_stmt_iterator * gsi,tree main_iv)1454 rewrite_phi_with_iv (loop_p loop,
1455 		     gphi_iterator *psi,
1456 		     gimple_stmt_iterator *gsi,
1457 		     tree main_iv)
1458 {
1459   affine_iv iv;
1460   gassign *stmt;
1461   gphi *phi = psi->phi ();
1462   tree atype, mtype, val, res = PHI_RESULT (phi);
1463 
1464   if (virtual_operand_p (res) || res == main_iv)
1465     {
1466       gsi_next (psi);
1467       return;
1468     }
1469 
1470   if (!simple_iv (loop, loop, res, &iv, true))
1471     {
1472       gsi_next (psi);
1473       return;
1474     }
1475 
1476   remove_phi_node (psi, false);
1477 
1478   atype = TREE_TYPE (res);
1479   mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
1480   val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
1481 		     fold_convert (mtype, main_iv));
1482   val = fold_build2 (POINTER_TYPE_P (atype)
1483 		     ? POINTER_PLUS_EXPR : PLUS_EXPR,
1484 		     atype, unshare_expr (iv.base), val);
1485   val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true,
1486 				  GSI_SAME_STMT);
1487   stmt = gimple_build_assign (res, val);
1488   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1489 }
1490 
1491 /* Rewrite all the phi nodes of LOOP in function of the main induction
1492    variable MAIN_IV.  */
1493 
1494 static void
rewrite_all_phi_nodes_with_iv(loop_p loop,tree main_iv)1495 rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
1496 {
1497   unsigned i;
1498   basic_block *bbs = get_loop_body_in_dom_order (loop);
1499   gphi_iterator psi;
1500 
1501   for (i = 0; i < loop->num_nodes; i++)
1502     {
1503       basic_block bb = bbs[i];
1504       gimple_stmt_iterator gsi = gsi_after_labels (bb);
1505 
1506       if (bb->loop_father != loop)
1507 	continue;
1508 
1509       for (psi = gsi_start_phis (bb); !gsi_end_p (psi); )
1510 	rewrite_phi_with_iv (loop, &psi, &gsi, main_iv);
1511     }
1512 
1513   free (bbs);
1514 }
1515 
1516 /* Bases all the induction variables in LOOP on a single induction variable
1517    (with base 0 and step 1), whose final value is compared with *NIT.  When the
1518    IV type precision has to be larger than *NIT type precision, *NIT is
1519    converted to the larger type, the conversion code is inserted before the
1520    loop, and *NIT is updated to the new definition.  When BUMP_IN_LATCH is true,
1521    the induction variable is incremented in the loop latch, otherwise it is
1522    incremented in the loop header.  Return the induction variable that was
1523    created.  */
1524 
1525 tree
canonicalize_loop_ivs(class loop * loop,tree * nit,bool bump_in_latch)1526 canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch)
1527 {
1528   unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
1529   unsigned original_precision = precision;
1530   tree type, var_before;
1531   gimple_stmt_iterator gsi;
1532   gphi_iterator psi;
1533   gcond *stmt;
1534   edge exit = single_dom_exit (loop);
1535   gimple_seq stmts;
1536   bool unsigned_p = false;
1537 
1538   for (psi = gsi_start_phis (loop->header);
1539        !gsi_end_p (psi); gsi_next (&psi))
1540     {
1541       gphi *phi = psi.phi ();
1542       tree res = PHI_RESULT (phi);
1543       bool uns;
1544 
1545       type = TREE_TYPE (res);
1546       if (virtual_operand_p (res)
1547 	  || (!INTEGRAL_TYPE_P (type)
1548 	      && !POINTER_TYPE_P (type))
1549 	  || TYPE_PRECISION (type) < precision)
1550 	continue;
1551 
1552       uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type);
1553 
1554       if (TYPE_PRECISION (type) > precision)
1555 	unsigned_p = uns;
1556       else
1557 	unsigned_p |= uns;
1558 
1559       precision = TYPE_PRECISION (type);
1560     }
1561 
1562   scalar_int_mode mode = smallest_int_mode_for_size (precision);
1563   precision = GET_MODE_PRECISION (mode);
1564   type = build_nonstandard_integer_type (precision, unsigned_p);
1565 
1566   if (original_precision != precision
1567       || TYPE_UNSIGNED (TREE_TYPE (*nit)) != unsigned_p)
1568     {
1569       *nit = fold_convert (type, *nit);
1570       *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE);
1571       if (stmts)
1572 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1573     }
1574 
1575   if (bump_in_latch)
1576     gsi = gsi_last_bb (loop->latch);
1577   else
1578     gsi = gsi_last_nondebug_bb (loop->header);
1579   create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
1580 	     loop, &gsi, bump_in_latch, &var_before, NULL);
1581 
1582   rewrite_all_phi_nodes_with_iv (loop, var_before);
1583 
1584   stmt = as_a <gcond *> (last_stmt (exit->src));
1585   /* Make the loop exit if the control condition is not satisfied.  */
1586   if (exit->flags & EDGE_TRUE_VALUE)
1587     {
1588       edge te, fe;
1589 
1590       extract_true_false_edges_from_block (exit->src, &te, &fe);
1591       te->flags = EDGE_FALSE_VALUE;
1592       fe->flags = EDGE_TRUE_VALUE;
1593     }
1594   gimple_cond_set_code (stmt, LT_EXPR);
1595   gimple_cond_set_lhs (stmt, var_before);
1596   gimple_cond_set_rhs (stmt, *nit);
1597   update_stmt (stmt);
1598 
1599   return var_before;
1600 }
1601