xref: /netbsd-src/external/gpl3/gcc/dist/gcc/loop-init.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Loop optimizer initialization routines and RTL loop optimization passes.
2    Copyright (C) 2002-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "cfghooks.h"
28 #include "df.h"
29 #include "regs.h"
30 #include "cfgcleanup.h"
31 #include "cfgloop.h"
32 #include "tree-pass.h"
33 #include "tree-ssa-loop-niter.h"
34 #include "loop-unroll.h"
35 #include "tree-scalar-evolution.h"
36 #include "tree-cfgcleanup.h"
37 
38 
39 /* Apply FLAGS to the loop state.  */
40 
41 static void
apply_loop_flags(unsigned flags)42 apply_loop_flags (unsigned flags)
43 {
44   if (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES)
45     {
46       /* If the loops may have multiple latches, we cannot canonicalize
47 	 them further (and most of the loop manipulation functions will
48 	 not work).  However, we avoid modifying cfg, which some
49 	 passes may want.  */
50       gcc_assert ((flags & ~(LOOPS_MAY_HAVE_MULTIPLE_LATCHES
51 			     | LOOPS_HAVE_RECORDED_EXITS
52 			     | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) == 0);
53       loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
54     }
55   else
56     disambiguate_loops_with_multiple_latches ();
57 
58   /* Create pre-headers.  */
59   if (flags & LOOPS_HAVE_PREHEADERS)
60     {
61       int cp_flags = CP_SIMPLE_PREHEADERS;
62 
63       if (flags & LOOPS_HAVE_FALLTHRU_PREHEADERS)
64         cp_flags |= CP_FALLTHRU_PREHEADERS;
65 
66       create_preheaders (cp_flags);
67     }
68 
69   /* Force all latches to have only single successor.  */
70   if (flags & LOOPS_HAVE_SIMPLE_LATCHES)
71     force_single_succ_latches ();
72 
73   /* Mark irreducible loops.  */
74   if (flags & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)
75     mark_irreducible_loops ();
76 
77   if (flags & LOOPS_HAVE_RECORDED_EXITS)
78     record_loop_exits ();
79 }
80 
81 /* Initialize loop structures.  This is used by the tree and RTL loop
82    optimizers.  FLAGS specify what properties to compute and/or ensure for
83    loops.  */
84 
85 void
loop_optimizer_init(unsigned flags)86 loop_optimizer_init (unsigned flags)
87 {
88   timevar_push (TV_LOOP_INIT);
89 
90   if (!current_loops)
91     {
92       gcc_assert (!(cfun->curr_properties & PROP_loops));
93 
94       /* Find the loops.  */
95       current_loops = flow_loops_find (NULL);
96     }
97   else
98     {
99       bool recorded_exits = loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS);
100       bool needs_fixup = loops_state_satisfies_p (LOOPS_NEED_FIXUP);
101 
102       gcc_assert (cfun->curr_properties & PROP_loops);
103 
104       /* Ensure that the dominators are computed, like flow_loops_find does.  */
105       calculate_dominance_info (CDI_DOMINATORS);
106 
107       if (!needs_fixup)
108 	checking_verify_loop_structure ();
109 
110       /* Clear all flags.  */
111       if (recorded_exits)
112 	release_recorded_exits (cfun);
113       loops_state_clear (~0U);
114 
115       if (needs_fixup)
116 	{
117 	  /* Apply LOOPS_MAY_HAVE_MULTIPLE_LATCHES early as fix_loop_structure
118 	     re-applies flags.  */
119 	  loops_state_set (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
120 	  fix_loop_structure (NULL);
121 	}
122     }
123 
124   /* Apply flags to loops.  */
125   apply_loop_flags (flags);
126 
127   /* Dump loops.  */
128   flow_loops_dump (dump_file, NULL, 1);
129 
130   checking_verify_loop_structure ();
131 
132   timevar_pop (TV_LOOP_INIT);
133 }
134 
135 /* Finalize loop structures.  */
136 
137 void
loop_optimizer_finalize(struct function * fn,bool clean_loop_closed_phi)138 loop_optimizer_finalize (struct function *fn, bool clean_loop_closed_phi)
139 {
140   basic_block bb;
141 
142   timevar_push (TV_LOOP_FINI);
143 
144   if (clean_loop_closed_phi && loops_state_satisfies_p (fn, LOOP_CLOSED_SSA))
145     {
146       clean_up_loop_closed_phi (fn);
147       loops_state_clear (fn, LOOP_CLOSED_SSA);
148     }
149 
150   if (loops_state_satisfies_p (fn, LOOPS_HAVE_RECORDED_EXITS))
151     release_recorded_exits (fn);
152 
153   free_numbers_of_iterations_estimates (fn);
154 
155   /* If we should preserve loop structure, do not free it but clear
156      flags that advanced properties are there as we are not preserving
157      that in full.  */
158   if (fn->curr_properties & PROP_loops)
159     {
160       loops_state_clear (fn, LOOP_CLOSED_SSA
161 			 | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
162 			 | LOOPS_HAVE_PREHEADERS
163 			 | LOOPS_HAVE_SIMPLE_LATCHES
164 			 | LOOPS_HAVE_FALLTHRU_PREHEADERS);
165       loops_state_set (fn, LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
166       goto loop_fini_done;
167     }
168 
169   for (auto loop : loops_list (fn, 0))
170     free_simple_loop_desc (loop);
171 
172   /* Clean up.  */
173   flow_loops_free (loops_for_fn (fn));
174   ggc_free (loops_for_fn (fn));
175   set_loops_for_fn (fn, NULL);
176 
177   FOR_ALL_BB_FN (bb, fn)
178     {
179       bb->loop_father = NULL;
180     }
181 
182 loop_fini_done:
183   timevar_pop (TV_LOOP_FINI);
184 }
185 
186 /* The structure of loops might have changed.  Some loops might get removed
187    (and their headers and latches were set to NULL), loop exists might get
188    removed (thus the loop nesting may be wrong), and some blocks and edges
189    were changed (so the information about bb --> loop mapping does not have
190    to be correct).  But still for the remaining loops the header dominates
191    the latch, and loops did not get new subloops (new loops might possibly
192    get created, but we are not interested in them).  Fix up the mess.
193 
194    If CHANGED_BBS is not NULL, basic blocks whose loop depth has changed are
195    marked in it.
196 
197    Returns the number of new discovered loops.  */
198 
199 unsigned
fix_loop_structure(bitmap changed_bbs)200 fix_loop_structure (bitmap changed_bbs)
201 {
202   basic_block bb;
203   int record_exits = 0;
204   unsigned old_nloops, i;
205 
206   timevar_push (TV_LOOP_INIT);
207 
208   if (dump_file && (dump_flags & TDF_DETAILS))
209     fprintf (dump_file, "fix_loop_structure: fixing up loops for function\n");
210 
211   /* We need exact and fast dominance info to be available.  */
212   gcc_assert (dom_info_state (CDI_DOMINATORS) == DOM_OK);
213 
214   if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
215     {
216       release_recorded_exits (cfun);
217       record_exits = LOOPS_HAVE_RECORDED_EXITS;
218     }
219 
220   /* Remember the depth of the blocks in the loop hierarchy, so that we can
221      recognize blocks whose loop nesting relationship has changed.  */
222   if (changed_bbs)
223     FOR_EACH_BB_FN (bb, cfun)
224       bb->aux = (void *) (size_t) loop_depth (bb->loop_father);
225 
226   /* Remove the dead loops from structures.  We start from the innermost
227      loops, so that when we remove the loops, we know that the loops inside
228      are preserved, and do not waste time relinking loops that will be
229      removed later.  */
230   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
231     {
232       /* Detect the case that the loop is no longer present even though
233          it wasn't marked for removal.
234 	 ???  If we do that we can get away with not marking loops for
235 	 removal at all.  And possibly avoid some spurious removals.  */
236       if (loop->header
237 	  && bb_loop_header_p (loop->header))
238 	continue;
239 
240       if (dump_file && (dump_flags & TDF_DETAILS))
241 	fprintf (dump_file, "fix_loop_structure: removing loop %d\n",
242 		 loop->num);
243 
244       while (loop->inner)
245 	{
246 	  class loop *ploop = loop->inner;
247 	  flow_loop_tree_node_remove (ploop);
248 	  flow_loop_tree_node_add (loop_outer (loop), ploop);
249 	}
250 
251       /* Remove the loop.  */
252       if (loop->header)
253 	loop->former_header = loop->header;
254       else
255 	gcc_assert (loop->former_header != NULL);
256       loop->header = NULL;
257       flow_loop_tree_node_remove (loop);
258     }
259 
260   /* Remember the number of loops so we can return how many new loops
261      flow_loops_find discovered.  */
262   old_nloops = number_of_loops (cfun);
263 
264   /* Re-compute loop structure in-place.  */
265   flow_loops_find (current_loops);
266 
267   /* Mark the blocks whose loop has changed.  */
268   if (changed_bbs)
269     {
270       FOR_EACH_BB_FN (bb, cfun)
271 	{
272 	  if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux)
273 	    bitmap_set_bit (changed_bbs, bb->index);
274 
275     	  bb->aux = NULL;
276 	}
277     }
278 
279   /* Finally free deleted loops.  */
280   bool any_deleted = false;
281   class loop *loop;
282   FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
283     if (loop && loop->header == NULL)
284       {
285 	if (dump_file
286 	    && ((unsigned) loop->former_header->index
287 		< basic_block_info_for_fn (cfun)->length ()))
288 	  {
289 	    basic_block former_header
290 	      = BASIC_BLOCK_FOR_FN (cfun, loop->former_header->index);
291 	    /* If the old header still exists we want to check if the
292 	       original loop is re-discovered or the old header is now
293 	       part of a newly discovered loop.
294 	       In both cases we should have avoided removing the loop.  */
295 	    if (former_header == loop->former_header)
296 	      {
297 		if (former_header->loop_father->header == former_header)
298 		  fprintf (dump_file, "fix_loop_structure: rediscovered "
299 			   "removed loop %d as loop %d with old header %d\n",
300 			   loop->num, former_header->loop_father->num,
301 			   former_header->index);
302 		else if ((unsigned) former_header->loop_father->num
303 			 >= old_nloops)
304 		  fprintf (dump_file, "fix_loop_structure: header %d of "
305 			   "removed loop %d is part of the newly "
306 			   "discovered loop %d with header %d\n",
307 			   former_header->index, loop->num,
308 			   former_header->loop_father->num,
309 			   former_header->loop_father->header->index);
310 	      }
311 	  }
312 	(*get_loops (cfun))[i] = NULL;
313 	flow_loop_free (loop);
314 	any_deleted = true;
315       }
316 
317   /* If we deleted loops then the cached scalar evolutions refering to
318      those loops become invalid.  */
319   if (any_deleted && scev_initialized_p ())
320     scev_reset_htab ();
321 
322   loops_state_clear (LOOPS_NEED_FIXUP);
323 
324   /* Apply flags to loops.  */
325   apply_loop_flags (current_loops->state | record_exits);
326 
327   checking_verify_loop_structure ();
328 
329   timevar_pop (TV_LOOP_INIT);
330 
331   return number_of_loops (cfun) - old_nloops;
332 }
333 
334 /* The RTL loop superpass.  The actual passes are subpasses.  See passes.cc for
335    more on that.  */
336 
337 namespace {
338 
339 const pass_data pass_data_loop2 =
340 {
341   RTL_PASS, /* type */
342   "loop2", /* name */
343   OPTGROUP_LOOP, /* optinfo_flags */
344   TV_LOOP, /* tv_id */
345   0, /* properties_required */
346   0, /* properties_provided */
347   0, /* properties_destroyed */
348   0, /* todo_flags_start */
349   0, /* todo_flags_finish */
350 };
351 
352 class pass_loop2 : public rtl_opt_pass
353 {
354 public:
pass_loop2(gcc::context * ctxt)355   pass_loop2 (gcc::context *ctxt)
356     : rtl_opt_pass (pass_data_loop2, ctxt)
357   {}
358 
359   /* opt_pass methods: */
360   virtual bool gate (function *);
361 
362 }; // class pass_loop2
363 
364 bool
gate(function * fun)365 pass_loop2::gate (function *fun)
366 {
367   if (optimize > 0
368       && (flag_move_loop_invariants
369 	  || flag_unswitch_loops
370 	  || flag_unroll_loops
371 	  || (flag_branch_on_count_reg && targetm.have_doloop_end ())
372 	  || cfun->has_unroll))
373     return true;
374   else
375     {
376       /* No longer preserve loops, remove them now.  */
377       fun->curr_properties &= ~PROP_loops;
378       if (current_loops)
379 	loop_optimizer_finalize ();
380       return false;
381     }
382 }
383 
384 } // anon namespace
385 
386 rtl_opt_pass *
make_pass_loop2(gcc::context * ctxt)387 make_pass_loop2 (gcc::context *ctxt)
388 {
389   return new pass_loop2 (ctxt);
390 }
391 
392 
393 /* Initialization of the RTL loop passes.  */
394 static unsigned int
rtl_loop_init(void)395 rtl_loop_init (void)
396 {
397   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
398 
399   if (dump_file)
400     {
401       dump_reg_info (dump_file);
402       dump_flow_info (dump_file, dump_flags);
403     }
404 
405   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
406   return 0;
407 }
408 
409 namespace {
410 
411 const pass_data pass_data_rtl_loop_init =
412 {
413   RTL_PASS, /* type */
414   "loop2_init", /* name */
415   OPTGROUP_LOOP, /* optinfo_flags */
416   TV_LOOP, /* tv_id */
417   0, /* properties_required */
418   0, /* properties_provided */
419   0, /* properties_destroyed */
420   0, /* todo_flags_start */
421   0, /* todo_flags_finish */
422 };
423 
424 class pass_rtl_loop_init : public rtl_opt_pass
425 {
426 public:
pass_rtl_loop_init(gcc::context * ctxt)427   pass_rtl_loop_init (gcc::context *ctxt)
428     : rtl_opt_pass (pass_data_rtl_loop_init, ctxt)
429   {}
430 
431   /* opt_pass methods: */
execute(function *)432   virtual unsigned int execute (function *) { return rtl_loop_init (); }
433 
434 }; // class pass_rtl_loop_init
435 
436 } // anon namespace
437 
438 rtl_opt_pass *
make_pass_rtl_loop_init(gcc::context * ctxt)439 make_pass_rtl_loop_init (gcc::context *ctxt)
440 {
441   return new pass_rtl_loop_init (ctxt);
442 }
443 
444 
445 /* Finalization of the RTL loop passes.  */
446 
447 namespace {
448 
449 const pass_data pass_data_rtl_loop_done =
450 {
451   RTL_PASS, /* type */
452   "loop2_done", /* name */
453   OPTGROUP_LOOP, /* optinfo_flags */
454   TV_LOOP, /* tv_id */
455   0, /* properties_required */
456   0, /* properties_provided */
457   PROP_loops, /* properties_destroyed */
458   0, /* todo_flags_start */
459   0, /* todo_flags_finish */
460 };
461 
462 class pass_rtl_loop_done : public rtl_opt_pass
463 {
464 public:
pass_rtl_loop_done(gcc::context * ctxt)465   pass_rtl_loop_done (gcc::context *ctxt)
466     : rtl_opt_pass (pass_data_rtl_loop_done, ctxt)
467   {}
468 
469   /* opt_pass methods: */
470   virtual unsigned int execute (function *);
471 
472 }; // class pass_rtl_loop_done
473 
474 unsigned int
execute(function * fun)475 pass_rtl_loop_done::execute (function *fun)
476 {
477   /* No longer preserve loops, remove them now.  */
478   fun->curr_properties &= ~PROP_loops;
479   loop_optimizer_finalize ();
480   free_dominance_info (CDI_DOMINATORS);
481 
482   cleanup_cfg (0);
483   if (dump_file)
484     {
485       dump_reg_info (dump_file);
486       dump_flow_info (dump_file, dump_flags);
487     }
488 
489   return 0;
490 }
491 
492 } // anon namespace
493 
494 rtl_opt_pass *
make_pass_rtl_loop_done(gcc::context * ctxt)495 make_pass_rtl_loop_done (gcc::context *ctxt)
496 {
497   return new pass_rtl_loop_done (ctxt);
498 }
499 
500 
501 /* Loop invariant code motion.  */
502 
503 namespace {
504 
505 const pass_data pass_data_rtl_move_loop_invariants =
506 {
507   RTL_PASS, /* type */
508   "loop2_invariant", /* name */
509   OPTGROUP_LOOP, /* optinfo_flags */
510   TV_LOOP_MOVE_INVARIANTS, /* tv_id */
511   0, /* properties_required */
512   0, /* properties_provided */
513   0, /* properties_destroyed */
514   0, /* todo_flags_start */
515   ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
516 };
517 
518 class pass_rtl_move_loop_invariants : public rtl_opt_pass
519 {
520 public:
pass_rtl_move_loop_invariants(gcc::context * ctxt)521   pass_rtl_move_loop_invariants (gcc::context *ctxt)
522     : rtl_opt_pass (pass_data_rtl_move_loop_invariants, ctxt)
523   {}
524 
525   /* opt_pass methods: */
gate(function *)526   virtual bool gate (function *) { return flag_move_loop_invariants; }
execute(function * fun)527   virtual unsigned int execute (function *fun)
528     {
529       if (number_of_loops (fun) > 1)
530 	move_loop_invariants ();
531       return 0;
532     }
533 
534 }; // class pass_rtl_move_loop_invariants
535 
536 } // anon namespace
537 
538 rtl_opt_pass *
make_pass_rtl_move_loop_invariants(gcc::context * ctxt)539 make_pass_rtl_move_loop_invariants (gcc::context *ctxt)
540 {
541   return new pass_rtl_move_loop_invariants (ctxt);
542 }
543 
544 
545 namespace {
546 
547 const pass_data pass_data_rtl_unroll_loops =
548 {
549   RTL_PASS, /* type */
550   "loop2_unroll", /* name */
551   OPTGROUP_LOOP, /* optinfo_flags */
552   TV_LOOP_UNROLL, /* tv_id */
553   0, /* properties_required */
554   0, /* properties_provided */
555   0, /* properties_destroyed */
556   0, /* todo_flags_start */
557   0, /* todo_flags_finish */
558 };
559 
560 class pass_rtl_unroll_loops : public rtl_opt_pass
561 {
562 public:
pass_rtl_unroll_loops(gcc::context * ctxt)563   pass_rtl_unroll_loops (gcc::context *ctxt)
564     : rtl_opt_pass (pass_data_rtl_unroll_loops, ctxt)
565   {}
566 
567   /* opt_pass methods: */
gate(function *)568   virtual bool gate (function *)
569     {
570       return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll);
571     }
572 
573   virtual unsigned int execute (function *);
574 
575 }; // class pass_rtl_unroll_loops
576 
577 unsigned int
execute(function * fun)578 pass_rtl_unroll_loops::execute (function *fun)
579 {
580   if (number_of_loops (fun) > 1)
581     {
582       int flags = 0;
583       if (dump_file)
584 	df_dump (dump_file);
585 
586       if (flag_unroll_loops)
587 	flags |= UAP_UNROLL;
588       if (flag_unroll_all_loops)
589 	flags |= UAP_UNROLL_ALL;
590 
591       unroll_loops (flags);
592     }
593   return 0;
594 }
595 
596 } // anon namespace
597 
598 rtl_opt_pass *
make_pass_rtl_unroll_loops(gcc::context * ctxt)599 make_pass_rtl_unroll_loops (gcc::context *ctxt)
600 {
601   return new pass_rtl_unroll_loops (ctxt);
602 }
603 
604 
605 namespace {
606 
607 const pass_data pass_data_rtl_doloop =
608 {
609   RTL_PASS, /* type */
610   "loop2_doloop", /* name */
611   OPTGROUP_LOOP, /* optinfo_flags */
612   TV_LOOP_DOLOOP, /* tv_id */
613   0, /* properties_required */
614   0, /* properties_provided */
615   0, /* properties_destroyed */
616   0, /* todo_flags_start */
617   0, /* todo_flags_finish */
618 };
619 
620 class pass_rtl_doloop : public rtl_opt_pass
621 {
622 public:
pass_rtl_doloop(gcc::context * ctxt)623   pass_rtl_doloop (gcc::context *ctxt)
624     : rtl_opt_pass (pass_data_rtl_doloop, ctxt)
625   {}
626 
627   /* opt_pass methods: */
628   virtual bool gate (function *);
629   virtual unsigned int execute (function *);
630 
631 }; // class pass_rtl_doloop
632 
633 bool
gate(function *)634 pass_rtl_doloop::gate (function *)
635 {
636   return (flag_branch_on_count_reg && targetm.have_doloop_end ());
637 }
638 
639 unsigned int
execute(function * fun)640 pass_rtl_doloop::execute (function *fun)
641 {
642   if (number_of_loops (fun) > 1)
643     doloop_optimize_loops ();
644   return 0;
645 }
646 
647 } // anon namespace
648 
649 rtl_opt_pass *
make_pass_rtl_doloop(gcc::context * ctxt)650 make_pass_rtl_doloop (gcc::context *ctxt)
651 {
652   return new pass_rtl_doloop (ctxt);
653 }
654