xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-ssa-threadbackward.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* SSA Jump Threading
2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "predict.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "fold-const.h"
28 #include "cfgloop.h"
29 #include "gimple-iterator.h"
30 #include "tree-cfg.h"
31 #include "tree-ssa-threadupdate.h"
32 #include "tree-ssa-loop.h"
33 #include "cfganal.h"
34 #include "tree-pass.h"
35 #include "gimple-ssa.h"
36 #include "tree-phinodes.h"
37 #include "tree-inline.h"
38 #include "tree-vectorizer.h"
39 #include "value-range.h"
40 #include "gimple-range.h"
41 #include "tree-ssa-threadedge.h"
42 #include "gimple-range-path.h"
43 #include "ssa.h"
44 #include "tree-cfgcleanup.h"
45 #include "tree-pretty-print.h"
46 #include "cfghooks.h"
47 #include "dbgcnt.h"
48 
49 // Path registry for the backwards threader.  After all paths have been
50 // registered with register_path(), thread_through_all_blocks() is called
51 // to modify the CFG.
52 
53 class back_threader_registry : public back_jt_path_registry
54 {
55 public:
56   bool register_path (const vec<basic_block> &, edge taken);
57 };
58 
59 // Class to abstract the profitability code for the backwards threader.
60 
61 class back_threader_profitability
62 {
63 public:
back_threader_profitability(bool speed_p)64   back_threader_profitability (bool speed_p)
65     : m_speed_p (speed_p)
66   { }
67   bool profitable_path_p (const vec<basic_block> &, tree name, edge taken,
68 			  bool *irreducible_loop = NULL);
69 private:
70   const bool m_speed_p;
71 };
72 
73 // Back threader flags.
74 #define BT_NONE 0
75 // Generate fast code at the expense of code size.
76 #define BT_SPEED 1
77 // Resolve unknown SSAs on entry to a threading path.  If set, use the
78 // ranger.  If not, assume all ranges on entry to a path are VARYING.
79 #define BT_RESOLVE 2
80 
81 class back_threader
82 {
83 public:
84   back_threader (function *fun, unsigned flags, bool first);
85   ~back_threader ();
86   unsigned thread_blocks ();
87 private:
88   void maybe_thread_block (basic_block bb);
89   void find_paths (basic_block bb, tree name);
90   bool debug_counter ();
91   edge maybe_register_path ();
92   void maybe_register_path_dump (edge taken_edge);
93   void find_paths_to_names (basic_block bb, bitmap imports);
94   void resolve_phi (gphi *phi, bitmap imports);
95   edge find_taken_edge (const vec<basic_block> &path);
96   edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
97   edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
98   virtual void debug ();
99   virtual void dump (FILE *out);
100 
101   back_threader_registry m_registry;
102   back_threader_profitability m_profit;
103   path_range_query *m_solver;
104 
105   // Current path being analyzed.
106   auto_vec<basic_block> m_path;
107   // Hash to mark visited BBs while analyzing a path.
108   hash_set<basic_block> m_visited_bbs;
109   // The set of SSA names, any of which could potentially change the
110   // value of the final conditional in a path.
111   auto_bitmap m_imports;
112   // The last statement in the path.
113   gimple *m_last_stmt;
114   // This is a bit of a wart.  It's used to pass the LHS SSA name to
115   // the profitability engine.
116   tree m_name;
117   // Marker to differentiate unreachable edges.
118   static const edge UNREACHABLE_EDGE;
119   // Set to TRUE if unknown SSA names along a path should be resolved
120   // with the ranger.  Otherwise, unknown SSA names are assumed to be
121   // VARYING.  Setting to true is more precise but slower.
122   function *m_fun;
123   unsigned m_flags;
124   // Set to TRUE for the first of each thread[12] pass or the first of
125   // each threadfull[12] pass.  This is used to differentiate between
126   // the different threading passes so we can set up debug counters.
127   bool m_first;
128 };
129 
130 // Used to differentiate unreachable edges, so we may stop the search
131 // in a the given direction.
132 const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
133 
back_threader(function * fun,unsigned flags,bool first)134 back_threader::back_threader (function *fun, unsigned flags, bool first)
135   : m_profit (flags & BT_SPEED),
136     m_first (first)
137 {
138   if (flags & BT_SPEED)
139     loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
140   else
141     loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
142 
143   m_fun = fun;
144   m_flags = flags;
145   m_last_stmt = NULL;
146 
147   // The path solver needs EDGE_DFS_BACK in resolving mode.
148   if (flags & BT_RESOLVE)
149     mark_dfs_back_edges ();
150   m_solver = new path_range_query (flags & BT_RESOLVE);
151 }
152 
~back_threader()153 back_threader::~back_threader ()
154 {
155   delete m_solver;
156 
157   loop_optimizer_finalize ();
158 }
159 
160 // A wrapper for the various debug counters for the threading passes.
161 // Returns TRUE if it's OK to register the current threading
162 // candidate.
163 
164 bool
debug_counter()165 back_threader::debug_counter ()
166 {
167   // The ethread pass is mostly harmless ;-).
168   if ((m_flags & BT_SPEED) == 0)
169     return true;
170 
171   if (m_flags & BT_RESOLVE)
172     {
173       if (m_first && !dbg_cnt (back_threadfull1))
174 	return false;
175 
176       if (!m_first && !dbg_cnt (back_threadfull2))
177 	return false;
178     }
179   else
180     {
181       if (m_first && !dbg_cnt (back_thread1))
182 	return false;
183 
184       if (!m_first && !dbg_cnt (back_thread2))
185 	return false;
186     }
187   return true;
188 }
189 
190 static void
dump_path(FILE * dump_file,const vec<basic_block> & path)191 dump_path (FILE *dump_file, const vec<basic_block> &path)
192 {
193   for (unsigned i = path.length (); i > 0; --i)
194     {
195       basic_block bb = path[i - 1];
196       fprintf (dump_file, "%d", bb->index);
197       if (i > 1)
198 	fprintf (dump_file, "->");
199     }
200 }
201 
202 // Dump details of an attempt to register a path.
203 
204 void
maybe_register_path_dump(edge taken)205 back_threader::maybe_register_path_dump (edge taken)
206 {
207   if (m_path.is_empty ())
208     return;
209 
210   fprintf (dump_file, "path: ");
211   dump_path (dump_file, m_path);
212   fprintf (dump_file, "->");
213 
214   if (taken == UNREACHABLE_EDGE)
215     fprintf (dump_file, "xx REJECTED (unreachable)\n");
216   else if (taken)
217     fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
218   else
219     fprintf (dump_file, "xx REJECTED\n");
220 }
221 
222 // If an outgoing edge can be determined out of the current path,
223 // register it for jump threading and return the taken edge.
224 //
225 // Return NULL if it is unprofitable to thread this path, or the
226 // outgoing edge is unknown.  Return UNREACHABLE_EDGE if the path is
227 // unreachable.
228 
229 edge
maybe_register_path()230 back_threader::maybe_register_path ()
231 {
232   edge taken_edge = find_taken_edge (m_path);
233 
234   if (taken_edge && taken_edge != UNREACHABLE_EDGE)
235     {
236       if (m_visited_bbs.contains (taken_edge->dest))
237 	{
238 	  // Avoid circular paths by indicating there is nothing to
239 	  // see in this direction.
240 	  taken_edge = UNREACHABLE_EDGE;
241 	}
242       else
243 	{
244 	  bool irreducible = false;
245 	  if (m_profit.profitable_path_p (m_path, m_name, taken_edge,
246 					  &irreducible)
247 	      && debug_counter ())
248 	    {
249 	      m_registry.register_path (m_path, taken_edge);
250 
251 	      if (irreducible)
252 		vect_free_loop_info_assumptions (m_path[0]->loop_father);
253 	    }
254 	  else
255 	    taken_edge = NULL;
256 	}
257     }
258 
259   if (dump_file && (dump_flags & TDF_DETAILS))
260     maybe_register_path_dump (taken_edge);
261 
262   return taken_edge;
263 }
264 
265 // Return the known taken edge out of a path.  If the path can be
266 // determined to be unreachable, return UNREACHABLE_EDGE.  If no
267 // outgoing edge can be calculated, return NULL.
268 
269 edge
find_taken_edge(const vec<basic_block> & path)270 back_threader::find_taken_edge (const vec<basic_block> &path)
271 {
272   gcc_checking_assert (path.length () > 1);
273   switch (gimple_code (m_last_stmt))
274     {
275     case GIMPLE_COND:
276       return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
277 
278     case GIMPLE_SWITCH:
279       return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
280 
281     default:
282       return NULL;
283     }
284 }
285 
286 // Same as find_taken_edge, but for paths ending in a switch.
287 
288 edge
find_taken_edge_switch(const vec<basic_block> & path,gswitch * sw)289 back_threader::find_taken_edge_switch (const vec<basic_block> &path,
290 				       gswitch *sw)
291 {
292   tree name = gimple_switch_index (sw);
293   int_range_max r;
294 
295   m_solver->compute_ranges (path, m_imports);
296   m_solver->range_of_expr (r, name, sw);
297 
298   if (r.undefined_p ())
299     return UNREACHABLE_EDGE;
300 
301   if (r.varying_p ())
302     return NULL;
303 
304   tree label = find_case_label_range (sw, &r);
305   if (!label)
306     return NULL;
307 
308   return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
309 }
310 
311 // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
312 
313 edge
find_taken_edge_cond(const vec<basic_block> & path,gcond * cond)314 back_threader::find_taken_edge_cond (const vec<basic_block> &path,
315 				     gcond *cond)
316 {
317   int_range_max r;
318 
319   m_solver->compute_ranges (path, m_imports);
320   m_solver->range_of_stmt (r, cond);
321 
322   if (m_solver->unreachable_path_p ())
323     return UNREACHABLE_EDGE;
324 
325   int_range<2> true_range (boolean_true_node, boolean_true_node);
326   int_range<2> false_range (boolean_false_node, boolean_false_node);
327 
328   if (r == true_range || r == false_range)
329     {
330       edge e_true, e_false;
331       basic_block bb = gimple_bb (cond);
332       extract_true_false_edges_from_block (bb, &e_true, &e_false);
333       return r == true_range ? e_true : e_false;
334     }
335   return NULL;
336 }
337 
338 // Populate a vector of trees from a bitmap.
339 
340 static inline void
populate_worklist(vec<tree> & worklist,bitmap bits)341 populate_worklist (vec<tree> &worklist, bitmap bits)
342 {
343   bitmap_iterator bi;
344   unsigned i;
345 
346   EXECUTE_IF_SET_IN_BITMAP (bits, 0, i, bi)
347     {
348       tree name = ssa_name (i);
349       worklist.quick_push (name);
350     }
351 }
352 
353 // Find jump threading paths that go through a PHI.
354 
355 void
resolve_phi(gphi * phi,bitmap interesting)356 back_threader::resolve_phi (gphi *phi, bitmap interesting)
357 {
358   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (phi)))
359     return;
360 
361   for (size_t i = 0; i < gimple_phi_num_args (phi); ++i)
362     {
363       edge e = gimple_phi_arg_edge (phi, i);
364 
365       // This is like path_crosses_loops in profitable_path_p but more
366       // restrictive to avoid peeling off loop iterations (see
367       // tree-ssa/pr14341.c for an example).
368       bool profitable_p = m_path[0]->loop_father == e->src->loop_father;
369       if (!profitable_p)
370 	{
371 	  if (dump_file && (dump_flags & TDF_DETAILS))
372 	    {
373 	      fprintf (dump_file,
374 		       "  FAIL: path through PHI in bb%d (incoming bb:%d) crosses loop\n",
375 		       e->dest->index, e->src->index);
376 	      fprintf (dump_file, "path: %d->", e->src->index);
377 	      dump_path (dump_file, m_path);
378 	      fprintf (dump_file, "->xx REJECTED\n");
379 	    }
380 	  continue;
381 	}
382 
383       tree arg = gimple_phi_arg_def (phi, i);
384       unsigned v = 0;
385 
386       if (TREE_CODE (arg) == SSA_NAME
387 	  && !bitmap_bit_p (interesting, SSA_NAME_VERSION (arg)))
388 	{
389 	  // Record that ARG is interesting when searching down this path.
390 	  v = SSA_NAME_VERSION (arg);
391 	  gcc_checking_assert (v != 0);
392 	  bitmap_set_bit (interesting, v);
393 	  bitmap_set_bit (m_imports, v);
394 	}
395 
396       find_paths_to_names (e->src, interesting);
397 
398       if (v)
399 	bitmap_clear_bit (interesting, v);
400     }
401 }
402 
403 // Find jump threading paths to any of the SSA names in the
404 // INTERESTING bitmap, and register any such paths.
405 //
406 // BB is the current path being processed.
407 
408 void
find_paths_to_names(basic_block bb,bitmap interesting)409 back_threader::find_paths_to_names (basic_block bb, bitmap interesting)
410 {
411   if (m_visited_bbs.add (bb))
412     return;
413 
414   m_path.safe_push (bb);
415 
416   // Try to resolve the path without looking back.
417   if (m_path.length () > 1
418       && (!m_profit.profitable_path_p (m_path, m_name, NULL)
419 	  || maybe_register_path ()))
420     {
421       m_path.pop ();
422       m_visited_bbs.remove (bb);
423       return;
424     }
425 
426   auto_bitmap processed;
427   bool done = false;
428   // We use a worklist instead of iterating through the bitmap,
429   // because we may add new items in-flight.
430   auto_vec<tree> worklist (bitmap_count_bits (interesting));
431   populate_worklist (worklist, interesting);
432   while (!worklist.is_empty ())
433     {
434       tree name = worklist.pop ();
435       unsigned i = SSA_NAME_VERSION (name);
436       gimple *def_stmt = SSA_NAME_DEF_STMT (name);
437       basic_block def_bb = gimple_bb (def_stmt);
438 
439       // Process any PHIs defined in this block.
440       if (def_bb == bb
441 	  && bitmap_set_bit (processed, i)
442 	  && gimple_code (def_stmt) == GIMPLE_PHI)
443 	{
444 	  resolve_phi (as_a<gphi *> (def_stmt), interesting);
445 	  done = true;
446 	  break;
447 	}
448     }
449   // If there are interesting names not yet processed, keep looking.
450   if (!done)
451     {
452       bitmap_and_compl_into (interesting, processed);
453       if (!bitmap_empty_p (interesting))
454 	{
455 	  edge_iterator iter;
456 	  edge e;
457 	  FOR_EACH_EDGE (e, iter, bb->preds)
458 	    if ((e->flags & EDGE_ABNORMAL) == 0)
459 	      find_paths_to_names (e->src, interesting);
460 	}
461     }
462 
463   // Reset things to their original state.
464   bitmap_ior_into (interesting, processed);
465   m_path.pop ();
466   m_visited_bbs.remove (bb);
467 }
468 
469 // Search backwards from BB looking for paths where the final
470 // conditional out of BB can be determined.  NAME is the LHS of the
471 // final conditional.  Register such paths for jump threading.
472 
473 void
find_paths(basic_block bb,tree name)474 back_threader::find_paths (basic_block bb, tree name)
475 {
476   gimple *stmt = last_stmt (bb);
477   if (!stmt
478       || (gimple_code (stmt) != GIMPLE_COND
479 	  && gimple_code (stmt) != GIMPLE_SWITCH))
480     return;
481 
482   if (EDGE_COUNT (bb->succs) > 1
483       || single_succ_to_potentially_threadable_block (bb))
484     {
485       m_last_stmt = stmt;
486       m_visited_bbs.empty ();
487       m_path.truncate (0);
488       m_name = name;
489       m_solver->compute_imports (m_imports, bb);
490 
491       auto_bitmap interesting;
492       bitmap_copy (interesting, m_imports);
493       find_paths_to_names (bb, interesting);
494     }
495 }
496 
497 // Simple helper to get the last statement from BB, which is assumed
498 // to be a control statement.  Return NULL if the last statement is
499 // not a control statement.
500 
501 static gimple *
get_gimple_control_stmt(basic_block bb)502 get_gimple_control_stmt (basic_block bb)
503 {
504   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
505 
506   if (gsi_end_p (gsi))
507     return NULL;
508 
509   gimple *stmt = gsi_stmt (gsi);
510   enum gimple_code code = gimple_code (stmt);
511   if (code == GIMPLE_COND || code == GIMPLE_SWITCH || code == GIMPLE_GOTO)
512     return stmt;
513   return NULL;
514 }
515 
516 // Search backwards from BB looking for paths where the final
517 // conditional maybe threaded to a successor block.  Record such paths
518 // for jump threading.
519 
520 void
maybe_thread_block(basic_block bb)521 back_threader::maybe_thread_block (basic_block bb)
522 {
523   gimple *stmt = get_gimple_control_stmt (bb);
524   if (!stmt)
525     return;
526 
527   enum gimple_code code = gimple_code (stmt);
528   tree name;
529   if (code == GIMPLE_SWITCH)
530     name = gimple_switch_index (as_a <gswitch *> (stmt));
531   else if (code == GIMPLE_COND)
532     name = gimple_cond_lhs (stmt);
533   else if (code == GIMPLE_GOTO)
534     name = gimple_goto_dest (stmt);
535   else
536     name = NULL;
537 
538   find_paths (bb, name);
539 }
540 
541 DEBUG_FUNCTION void
debug(const vec<basic_block> & path)542 debug (const vec <basic_block> &path)
543 {
544   dump_path (stderr, path);
545   fputc ('\n', stderr);
546 }
547 
548 void
dump(FILE * out)549 back_threader::dump (FILE *out)
550 {
551   m_solver->dump (out);
552   fprintf (out, "\nCandidates for pre-computation:\n");
553   fprintf (out, "===================================\n");
554 
555   bitmap_iterator bi;
556   unsigned i;
557 
558   EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
559     {
560       tree name = ssa_name (i);
561       print_generic_expr (out, name, TDF_NONE);
562       fprintf (out, "\n");
563     }
564 }
565 
566 void
debug()567 back_threader::debug ()
568 {
569   dump (stderr);
570 }
571 
572 /* Examine jump threading path PATH and return TRUE if it is profitable to
573    thread it, otherwise return FALSE.
574 
575    NAME is the SSA_NAME of the variable we found to have a constant
576    value on PATH.  If unknown, SSA_NAME is NULL.
577 
578    If the taken edge out of the path is known ahead of time it is passed in
579    TAKEN_EDGE, otherwise it is NULL.
580 
581    CREATES_IRREDUCIBLE_LOOP, if non-null is set to TRUE if threading this path
582    would create an irreducible loop.
583 
584    ?? It seems we should be able to loosen some of the restrictions in
585    this function after loop optimizations have run.  */
586 
587 bool
profitable_path_p(const vec<basic_block> & m_path,tree name,edge taken_edge,bool * creates_irreducible_loop)588 back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
589 						tree name,
590 						edge taken_edge,
591 						bool *creates_irreducible_loop)
592 {
593   gcc_checking_assert (!m_path.is_empty ());
594 
595   /* We can an empty path here (excluding the DEF block) when the
596      statement that makes a conditional generate a compile-time
597      constant result is in the same block as the conditional.
598 
599      That's not really a jump threading opportunity, but instead is
600      simple cprop & simplification.  We could handle it here if we
601      wanted by wiring up all the incoming edges.  If we run this
602      early in IPA, that might be worth doing.   For now we just
603      reject that case.  */
604   if (m_path.length () <= 1)
605       return false;
606 
607   if (m_path.length () > (unsigned) param_max_fsm_thread_length)
608     {
609       if (dump_file && (dump_flags & TDF_DETAILS))
610 	fprintf (dump_file, "  FAIL: Jump-thread path not considered: "
611 		 "the number of basic blocks on the path "
612 		 "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
613       return false;
614     }
615 
616   int n_insns = 0;
617   gimple_stmt_iterator gsi;
618   loop_p loop = m_path[0]->loop_father;
619   bool threaded_through_latch = false;
620   bool multiway_branch_in_path = false;
621   bool threaded_multiway_branch = false;
622   bool contains_hot_bb = false;
623 
624   if (dump_file && (dump_flags & TDF_DETAILS))
625     fprintf (dump_file, "Checking profitability of path (backwards): ");
626 
627   /* Count the number of instructions on the path: as these instructions
628      will have to be duplicated, we will not record the path if there
629      are too many instructions on the path.  Also check that all the
630      blocks in the path belong to a single loop.  */
631   for (unsigned j = 0; j < m_path.length (); j++)
632     {
633       basic_block bb = m_path[j];
634 
635       if (dump_file && (dump_flags & TDF_DETAILS))
636 	fprintf (dump_file, " bb:%i", bb->index);
637       /* Remember, blocks in the path are stored in opposite order in
638 	 the PATH array.  The last entry in the array represents the
639 	 block with an outgoing edge that we will redirect to the jump
640 	 threading path.  Thus we don't care how many statements are
641 	 in that block because it will not be copied or whether or not
642 	 it ends in a multiway branch.  */
643       if (j < m_path.length () - 1)
644 	{
645 	  int orig_n_insns = n_insns;
646 	  /* PHIs in the path will create degenerate PHIS in the
647 	     copied path which will then get propagated away, so
648 	     looking at just the duplicate path the PHIs would
649 	     seem unimportant.
650 
651 	     But those PHIs, because they're assignments to objects
652 	     typically with lives that exist outside the thread path,
653 	     will tend to generate PHIs (or at least new PHI arguments)
654 	     at points where we leave the thread path and rejoin
655 	     the original blocks.  So we do want to account for them.
656 
657 	     We ignore virtual PHIs.  We also ignore cases where BB
658 	     has a single incoming edge.  That's the most common
659 	     degenerate PHI we'll see here.  Finally we ignore PHIs
660 	     that are associated with the value we're tracking as
661 	     that object likely dies.  */
662 	  if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
663 	    {
664 	      for (gphi_iterator gsip = gsi_start_phis (bb);
665 		   !gsi_end_p (gsip);
666 		   gsi_next (&gsip))
667 		{
668 		  gphi *phi = gsip.phi ();
669 		  tree dst = gimple_phi_result (phi);
670 
671 		  /* Note that if both NAME and DST are anonymous
672 		     SSA_NAMEs, then we do not have enough information
673 		     to consider them associated.  */
674 		  if (dst != name
675 		      && name
676 		      && TREE_CODE (name) == SSA_NAME
677 		      && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
678 			  || !SSA_NAME_VAR (dst))
679 		      && !virtual_operand_p (dst))
680 		    ++n_insns;
681 		}
682 	    }
683 
684 	  if (!contains_hot_bb && m_speed_p)
685 	    contains_hot_bb |= optimize_bb_for_speed_p (bb);
686 	  for (gsi = gsi_after_labels (bb);
687 	       !gsi_end_p (gsi);
688 	       gsi_next_nondebug (&gsi))
689 	    {
690 	      /* Do not allow OpenACC loop markers and __builtin_constant_p on
691 		 threading paths.  The latter is disallowed, because an
692 		 expression might be constant on two threading paths, and
693 		 become non-constant (i.e.: phi) when they merge.  */
694 	      gimple *stmt = gsi_stmt (gsi);
695 	      if (gimple_call_internal_p (stmt, IFN_UNIQUE)
696 		  || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
697 		return false;
698 	      /* Do not count empty statements and labels.  */
699 	      if (gimple_code (stmt) != GIMPLE_NOP
700 		  && !is_gimple_debug (stmt))
701 		n_insns += estimate_num_insns (stmt, &eni_size_weights);
702 	    }
703 	  if (dump_file && (dump_flags & TDF_DETAILS))
704 	    fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns);
705 
706 	  /* We do not look at the block with the threaded branch
707 	     in this loop.  So if any block with a last statement that
708 	     is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
709 	     multiway branch on our path.
710 
711 	     The block in PATH[0] is special, it's the block were we're
712 	     going to be able to eliminate its branch.  */
713 	  gimple *last = last_stmt (bb);
714 	  if (last && (gimple_code (last) == GIMPLE_SWITCH
715 		       || gimple_code (last) == GIMPLE_GOTO))
716 	    {
717 	      if (j == 0)
718 		threaded_multiway_branch = true;
719 	      else
720 		multiway_branch_in_path = true;
721 	    }
722 	}
723 
724       /* Note if we thread through the latch, we will want to include
725 	 the last entry in the array when determining if we thread
726 	 through the loop latch.  */
727       if (loop->latch == bb)
728 	{
729 	  threaded_through_latch = true;
730 	  if (dump_file && (dump_flags & TDF_DETAILS))
731 	    fprintf (dump_file, " (latch)");
732 	}
733     }
734 
735   gimple *stmt = get_gimple_control_stmt (m_path[0]);
736   gcc_assert (stmt);
737 
738   /* We are going to remove the control statement at the end of the
739      last block in the threading path.  So don't count it against our
740      statement count.  */
741 
742   int stmt_insns = estimate_num_insns (stmt, &eni_size_weights);
743   n_insns-= stmt_insns;
744 
745   if (dump_file && (dump_flags & TDF_DETAILS))
746     fprintf (dump_file, "\n  Control statement insns: %i\n"
747 	     "  Overall: %i insns\n",
748 	     stmt_insns, n_insns);
749 
750   if (creates_irreducible_loop)
751     {
752       /* If this path threaded through the loop latch back into the
753 	 same loop and the destination does not dominate the loop
754 	 latch, then this thread would create an irreducible loop.  */
755       *creates_irreducible_loop = false;
756       if (taken_edge
757 	  && threaded_through_latch
758 	  && loop == taken_edge->dest->loop_father
759 	  && (determine_bb_domination_status (loop, taken_edge->dest)
760 	      == DOMST_NONDOMINATING))
761 	*creates_irreducible_loop = true;
762     }
763 
764   /* Threading is profitable if the path duplicated is hot but also
765      in a case we separate cold path from hot path and permit optimization
766      of the hot path later.  Be on the agressive side here. In some testcases,
767      as in PR 78407 this leads to noticeable improvements.  */
768   if (m_speed_p
769       && ((taken_edge && optimize_edge_for_speed_p (taken_edge))
770 	  || contains_hot_bb))
771     {
772       if (n_insns >= param_max_fsm_thread_path_insns)
773 	{
774 	  if (dump_file && (dump_flags & TDF_DETAILS))
775 	    fprintf (dump_file, "  FAIL: Jump-thread path not considered: "
776 		     "the number of instructions on the path "
777 		     "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
778 	  return false;
779 	}
780     }
781   else if (!m_speed_p && n_insns > 1)
782     {
783       if (dump_file && (dump_flags & TDF_DETAILS))
784 	fprintf (dump_file, "  FAIL: Jump-thread path not considered: "
785 		 "duplication of %i insns is needed and optimizing for size.\n",
786 		 n_insns);
787       return false;
788     }
789 
790   /* We avoid creating irreducible inner loops unless we thread through
791      a multiway branch, in which case we have deemed it worth losing
792      other loop optimizations later.
793 
794      We also consider it worth creating an irreducible inner loop if
795      the number of copied statement is low relative to the length of
796      the path -- in that case there's little the traditional loop
797      optimizer would have done anyway, so an irreducible loop is not
798      so bad.  */
799   if (!threaded_multiway_branch
800       && creates_irreducible_loop
801       && *creates_irreducible_loop
802       && (n_insns * (unsigned) param_fsm_scale_path_stmts
803 	  > (m_path.length () *
804 	     (unsigned) param_fsm_scale_path_blocks)))
805 
806     {
807       if (dump_file && (dump_flags & TDF_DETAILS))
808 	fprintf (dump_file,
809 		 "  FAIL: Would create irreducible loop without threading "
810 		 "multiway branch.\n");
811       return false;
812     }
813 
814   /* The generic copier used by the backthreader does not re-use an
815      existing threading path to reduce code duplication.  So for that
816      case, drastically reduce the number of statements we are allowed
817      to copy.  */
818   if (!(threaded_through_latch && threaded_multiway_branch)
819       && (n_insns * param_fsm_scale_path_stmts
820 	  >= param_max_jump_thread_duplication_stmts))
821     {
822       if (dump_file && (dump_flags & TDF_DETAILS))
823 	fprintf (dump_file,
824 		 "  FAIL: Did not thread around loop and would copy too "
825 		 "many statements.\n");
826       return false;
827     }
828 
829   /* When there is a multi-way branch on the path, then threading can
830      explode the CFG due to duplicating the edges for that multi-way
831      branch.  So like above, only allow a multi-way branch on the path
832      if we actually thread a multi-way branch.  */
833   if (!threaded_multiway_branch && multiway_branch_in_path)
834     {
835       if (dump_file && (dump_flags & TDF_DETAILS))
836 	fprintf (dump_file,
837 		 "  FAIL: Thread through multiway branch without threading "
838 		 "a multiway branch.\n");
839       return false;
840     }
841 
842   /* Threading through an empty latch would cause code to be added to
843      the latch.  This could alter the loop form sufficiently to cause
844      loop optimizations to fail.  Disable these threads until after
845      loop optimizations have run.  */
846   if ((threaded_through_latch
847        || (taken_edge && taken_edge->dest == loop->latch))
848       && !(cfun->curr_properties & PROP_loop_opts_done)
849       && empty_block_p (loop->latch))
850     {
851       if (dump_file && (dump_flags & TDF_DETAILS))
852 	fprintf (dump_file,
853 		 "  FAIL: Thread through latch before loop opts would create non-empty latch\n");
854       return false;
855 
856     }
857   return true;
858 }
859 
860 /* The current path PATH is a vector of blocks forming a jump threading
861    path in reverse order.  TAKEN_EDGE is the edge taken from path[0].
862 
863    Convert the current path into the form used by register_jump_thread and
864    register it.
865 
866    Return TRUE if successful or FALSE otherwise.  */
867 
868 bool
register_path(const vec<basic_block> & m_path,edge taken_edge)869 back_threader_registry::register_path (const vec<basic_block> &m_path,
870 				       edge taken_edge)
871 {
872   vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
873 
874   // The generic copier ignores the edge type.  We can build the
875   // thread edges with any type.
876   for (unsigned int j = 0; j + 1 < m_path.length (); j++)
877     {
878       basic_block bb1 = m_path[m_path.length () - j - 1];
879       basic_block bb2 = m_path[m_path.length () - j - 2];
880 
881       edge e = find_edge (bb1, bb2);
882       gcc_assert (e);
883       push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
884     }
885 
886   push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
887   register_jump_thread (jump_thread_path);
888   return true;
889 }
890 
891 // Thread all suitable paths in the current function.
892 //
893 // Return TODO_flags.
894 
895 unsigned int
thread_blocks()896 back_threader::thread_blocks ()
897 {
898   basic_block bb;
899   FOR_EACH_BB_FN (bb, m_fun)
900     if (EDGE_COUNT (bb->succs) > 1)
901       maybe_thread_block (bb);
902 
903   bool changed = m_registry.thread_through_all_blocks (true);
904 
905   if (m_flags & BT_SPEED)
906     return changed ? TODO_cleanup_cfg : 0;
907 
908   return false;
909 }
910 
911 namespace {
912 
913 const pass_data pass_data_early_thread_jumps =
914 {
915   GIMPLE_PASS,
916   "ethread",
917   OPTGROUP_NONE,
918   TV_TREE_SSA_THREAD_JUMPS,
919   ( PROP_cfg | PROP_ssa ),
920   0,
921   0,
922   0,
923   ( TODO_cleanup_cfg | TODO_update_ssa ),
924 };
925 
926 const pass_data pass_data_thread_jumps =
927 {
928   GIMPLE_PASS,
929   "thread",
930   OPTGROUP_NONE,
931   TV_TREE_SSA_THREAD_JUMPS,
932   ( PROP_cfg | PROP_ssa ),
933   0,
934   0,
935   0,
936   TODO_update_ssa,
937 };
938 
939 const pass_data pass_data_thread_jumps_full =
940 {
941   GIMPLE_PASS,
942   "threadfull",
943   OPTGROUP_NONE,
944   TV_TREE_SSA_THREAD_JUMPS,
945   ( PROP_cfg | PROP_ssa ),
946   0,
947   0,
948   0,
949   TODO_update_ssa,
950 };
951 
952 // Early jump threading pass optimizing for size.
953 class pass_early_thread_jumps : public gimple_opt_pass
954 {
955 public:
pass_early_thread_jumps(gcc::context * ctxt)956   pass_early_thread_jumps (gcc::context *ctxt)
957     : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
958   {}
959 
clone()960   opt_pass * clone () override
961   {
962     return new pass_early_thread_jumps (m_ctxt);
963   }
set_pass_param(unsigned int,bool param)964   void set_pass_param (unsigned int, bool param) override
965   {
966     m_first = param;
967   }
gate(function *)968   bool gate (function *) override
969   {
970     return flag_thread_jumps;
971   }
execute(function * fun)972   unsigned int execute (function *fun) override
973   {
974     back_threader threader (fun, BT_NONE, m_first);
975     return threader.thread_blocks ();
976   }
977 private:
978   bool m_first;
979 };
980 
981 // Jump threading pass without resolving of unknown SSAs.
982 class pass_thread_jumps : public gimple_opt_pass
983 {
984 public:
pass_thread_jumps(gcc::context * ctxt)985   pass_thread_jumps (gcc::context *ctxt)
986     : gimple_opt_pass (pass_data_thread_jumps, ctxt)
987   {}
clone(void)988   opt_pass * clone (void) override
989   {
990     return new pass_thread_jumps (m_ctxt);
991   }
set_pass_param(unsigned int,bool param)992   void set_pass_param (unsigned int, bool param) override
993   {
994     m_first = param;
995   }
gate(function *)996   bool gate (function *) override
997   {
998     return flag_thread_jumps && flag_expensive_optimizations;
999   }
execute(function * fun)1000   unsigned int execute (function *fun) override
1001   {
1002     back_threader threader (fun, BT_SPEED, m_first);
1003     return threader.thread_blocks ();
1004   }
1005 private:
1006   bool m_first;
1007 };
1008 
1009 // Jump threading pass that fully resolves unknown SSAs.
1010 class pass_thread_jumps_full : public gimple_opt_pass
1011 {
1012 public:
pass_thread_jumps_full(gcc::context * ctxt)1013   pass_thread_jumps_full (gcc::context *ctxt)
1014     : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1015   {}
clone(void)1016   opt_pass * clone (void) override
1017   {
1018     return new pass_thread_jumps_full (m_ctxt);
1019   }
set_pass_param(unsigned int,bool param)1020   void set_pass_param (unsigned int, bool param) override
1021   {
1022     m_first = param;
1023   }
gate(function *)1024   bool gate (function *) override
1025   {
1026     return flag_thread_jumps && flag_expensive_optimizations;
1027   }
execute(function * fun)1028   unsigned int execute (function *fun) override
1029   {
1030     back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1031     return threader.thread_blocks ();
1032   }
1033 private:
1034   bool m_first;
1035 };
1036 
1037 } // namespace {
1038 
1039 gimple_opt_pass *
make_pass_thread_jumps(gcc::context * ctxt)1040 make_pass_thread_jumps (gcc::context *ctxt)
1041 {
1042   return new pass_thread_jumps (ctxt);
1043 }
1044 
1045 gimple_opt_pass *
make_pass_thread_jumps_full(gcc::context * ctxt)1046 make_pass_thread_jumps_full (gcc::context *ctxt)
1047 {
1048   return new pass_thread_jumps_full (ctxt);
1049 }
1050 
1051 gimple_opt_pass *
make_pass_early_thread_jumps(gcc::context * ctxt)1052 make_pass_early_thread_jumps (gcc::context *ctxt)
1053 {
1054   return new pass_early_thread_jumps (ctxt);
1055 }
1056