xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-loop-manip.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Vectorizer Specific Loop Manipulations
2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
44 
45 /*************************************************************************
46   Simple Loop Peeling Utilities
47 
48   Utilities to support loop peeling for vectorization purposes.
49  *************************************************************************/
50 
51 
52 /* Renames the use *OP_P.  */
53 
54 static void
55 rename_use_op (use_operand_p op_p)
56 {
57   tree new_name;
58 
59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60     return;
61 
62   new_name = get_current_def (USE_FROM_PTR (op_p));
63 
64   /* Something defined outside of the loop.  */
65   if (!new_name)
66     return;
67 
68   /* An ordinary ssa name defined in the loop.  */
69 
70   SET_USE (op_p, new_name);
71 }
72 
73 
74 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76    true.  */
77 
78 static void
79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
80 {
81   gimple *stmt;
82   use_operand_p use_p;
83   ssa_op_iter iter;
84   edge e;
85   edge_iterator ei;
86   struct loop *loop = bb->loop_father;
87   struct loop *outer_loop = NULL;
88 
89   if (rename_from_outer_loop)
90     {
91       gcc_assert (loop);
92       outer_loop = loop_outer (loop);
93     }
94 
95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96        gsi_next (&gsi))
97     {
98       stmt = gsi_stmt (gsi);
99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 	rename_use_op (use_p);
101     }
102 
103   FOR_EACH_EDGE (e, ei, bb->preds)
104     {
105       if (!flow_bb_inside_loop_p (loop, e->src))
106 	{
107 	  if (!rename_from_outer_loop)
108 	    continue;
109 	  if (e->src != outer_loop->header)
110 	    {
111 	      if (outer_loop->inner->next)
112 		{
113 		  /* If outer_loop has 2 inner loops, allow there to
114 		     be an extra basic block which decides which of the
115 		     two loops to use using LOOP_VECTORIZED.  */
116 		  if (!single_pred_p (e->src)
117 		      || single_pred (e->src) != outer_loop->header)
118 		    continue;
119 		}
120 	      else
121 		continue;
122 	    }
123 	}
124       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
125 	   gsi_next (&gsi))
126         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
127     }
128 }
129 
130 
131 struct adjust_info
132 {
133   tree from, to;
134   basic_block bb;
135 };
136 
137 /* A stack of values to be adjusted in debug stmts.  We have to
138    process them LIFO, so that the closest substitution applies.  If we
139    processed them FIFO, without the stack, we might substitute uses
140    with a PHI DEF that would soon become non-dominant, and when we got
141    to the suitable one, it wouldn't have anything to substitute any
142    more.  */
143 static vec<adjust_info, va_heap> adjust_vec;
144 
145 /* Adjust any debug stmts that referenced AI->from values to use the
146    loop-closed AI->to, if the references are dominated by AI->bb and
147    not by the definition of AI->from.  */
148 
149 static void
150 adjust_debug_stmts_now (adjust_info *ai)
151 {
152   basic_block bbphi = ai->bb;
153   tree orig_def = ai->from;
154   tree new_def = ai->to;
155   imm_use_iterator imm_iter;
156   gimple *stmt;
157   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
158 
159   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
160 
161   /* Adjust any debug stmts that held onto non-loop-closed
162      references.  */
163   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
164     {
165       use_operand_p use_p;
166       basic_block bbuse;
167 
168       if (!is_gimple_debug (stmt))
169 	continue;
170 
171       gcc_assert (gimple_debug_bind_p (stmt));
172 
173       bbuse = gimple_bb (stmt);
174 
175       if ((bbuse == bbphi
176 	   || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
177 	  && !(bbuse == bbdef
178 	       || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
179 	{
180 	  if (new_def)
181 	    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
182 	      SET_USE (use_p, new_def);
183 	  else
184 	    {
185 	      gimple_debug_bind_reset_value (stmt);
186 	      update_stmt (stmt);
187 	    }
188 	}
189     }
190 }
191 
192 /* Adjust debug stmts as scheduled before.  */
193 
194 static void
195 adjust_vec_debug_stmts (void)
196 {
197   if (!MAY_HAVE_DEBUG_STMTS)
198     return;
199 
200   gcc_assert (adjust_vec.exists ());
201 
202   while (!adjust_vec.is_empty ())
203     {
204       adjust_debug_stmts_now (&adjust_vec.last ());
205       adjust_vec.pop ();
206     }
207 }
208 
209 /* Adjust any debug stmts that referenced FROM values to use the
210    loop-closed TO, if the references are dominated by BB and not by
211    the definition of FROM.  If adjust_vec is non-NULL, adjustments
212    will be postponed until adjust_vec_debug_stmts is called.  */
213 
214 static void
215 adjust_debug_stmts (tree from, tree to, basic_block bb)
216 {
217   adjust_info ai;
218 
219   if (MAY_HAVE_DEBUG_STMTS
220       && TREE_CODE (from) == SSA_NAME
221       && ! SSA_NAME_IS_DEFAULT_DEF (from)
222       && ! virtual_operand_p (from))
223     {
224       ai.from = from;
225       ai.to = to;
226       ai.bb = bb;
227 
228       if (adjust_vec.exists ())
229 	adjust_vec.safe_push (ai);
230       else
231 	adjust_debug_stmts_now (&ai);
232     }
233 }
234 
235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
236    to adjust any debug stmts that referenced the old phi arg,
237    presumably non-loop-closed references left over from other
238    transformations.  */
239 
240 static void
241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
242 {
243   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
244 
245   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
246 
247   if (MAY_HAVE_DEBUG_STMTS)
248     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
249 			gimple_bb (update_phi));
250 }
251 
252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
253    that starts at zero, increases by one and its limit is NITERS.
254 
255    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
256 
257 void
258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
259 {
260   tree indx_before_incr, indx_after_incr;
261   gcond *cond_stmt;
262   gcond *orig_cond;
263   edge exit_edge = single_exit (loop);
264   gimple_stmt_iterator loop_cond_gsi;
265   gimple_stmt_iterator incr_gsi;
266   bool insert_after;
267   tree init = build_int_cst (TREE_TYPE (niters), 0);
268   tree step = build_int_cst (TREE_TYPE (niters), 1);
269   source_location loop_loc;
270   enum tree_code code;
271 
272   orig_cond = get_loop_exit_condition (loop);
273   gcc_assert (orig_cond);
274   loop_cond_gsi = gsi_for_stmt (orig_cond);
275 
276   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
277   create_iv (init, step, NULL_TREE, loop,
278              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
279 
280   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
281 					      true, NULL_TREE, true,
282 					      GSI_SAME_STMT);
283   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
284 				     true, GSI_SAME_STMT);
285 
286   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
287   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
288 				 NULL_TREE);
289 
290   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
291 
292   /* Remove old loop exit test:  */
293   gsi_remove (&loop_cond_gsi, true);
294   free_stmt_vec_info (orig_cond);
295 
296   loop_loc = find_loop_location (loop);
297   if (dump_enabled_p ())
298     {
299       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
300 	dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
301 		     LOCATION_LINE (loop_loc));
302       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
303     }
304 
305   /* Record the number of latch iterations.  */
306   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
307 				     build_int_cst (TREE_TYPE (niters), 1));
308 }
309 
310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
311    For all PHI arguments in FROM->dest and TO->dest from those
312    edges ensure that TO->dest PHI arguments have current_def
313    to that in from.  */
314 
315 static void
316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
317 {
318   gimple_stmt_iterator gsi_from, gsi_to;
319 
320   for (gsi_from = gsi_start_phis (from->dest),
321        gsi_to = gsi_start_phis (to->dest);
322        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
323     {
324       gimple *from_phi = gsi_stmt (gsi_from);
325       gimple *to_phi = gsi_stmt (gsi_to);
326       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
327       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
328       if (virtual_operand_p (from_arg))
329 	{
330 	  gsi_next (&gsi_from);
331 	  continue;
332 	}
333       if (virtual_operand_p (to_arg))
334 	{
335 	  gsi_next (&gsi_to);
336 	  continue;
337 	}
338       if (TREE_CODE (from_arg) != SSA_NAME)
339 	gcc_assert (operand_equal_p (from_arg, to_arg, 0));
340       else
341 	{
342 	  if (get_current_def (to_arg) == NULL_TREE)
343 	    set_current_def (to_arg, get_current_def (from_arg));
344 	}
345       gsi_next (&gsi_from);
346       gsi_next (&gsi_to);
347     }
348 
349   gphi *from_phi = get_virtual_phi (from->dest);
350   gphi *to_phi = get_virtual_phi (to->dest);
351   if (from_phi)
352     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
353 		     get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
354 }
355 
356 
357 /* Given LOOP this function generates a new copy of it and puts it
358    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
359    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
360    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
361    entry or exit of LOOP.  */
362 
363 struct loop *
364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
365 					struct loop *scalar_loop, edge e)
366 {
367   struct loop *new_loop;
368   basic_block *new_bbs, *bbs, *pbbs;
369   bool at_exit;
370   bool was_imm_dom;
371   basic_block exit_dest;
372   edge exit, new_exit;
373   bool duplicate_outer_loop = false;
374 
375   exit = single_exit (loop);
376   at_exit = (e == exit);
377   if (!at_exit && e != loop_preheader_edge (loop))
378     return NULL;
379 
380   if (scalar_loop == NULL)
381     scalar_loop = loop;
382 
383   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
384   pbbs = bbs + 1;
385   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
386   /* Allow duplication of outer loops.  */
387   if (scalar_loop->inner)
388     duplicate_outer_loop = true;
389   /* Check whether duplication is possible.  */
390   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
391     {
392       free (bbs);
393       return NULL;
394     }
395 
396   /* Generate new loop structure.  */
397   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
398   duplicate_subloops (scalar_loop, new_loop);
399 
400   exit_dest = exit->dest;
401   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
402 					  exit_dest) == loop->header ?
403 		 true : false);
404 
405   /* Also copy the pre-header, this avoids jumping through hoops to
406      duplicate the loop entry PHI arguments.  Create an empty
407      pre-header unconditionally for this.  */
408   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
409   edge entry_e = single_pred_edge (preheader);
410   bbs[0] = preheader;
411   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
412 
413   exit = single_exit (scalar_loop);
414   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
415 	    &exit, 1, &new_exit, NULL,
416 	    at_exit ? loop->latch : e->src, true);
417   exit = single_exit (loop);
418   basic_block new_preheader = new_bbs[0];
419 
420   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
421 
422   if (scalar_loop != loop)
423     {
424       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
425 	 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
426 	 but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
427 	 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
428 	 header) to have current_def set, so copy them over.  */
429       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
430 						exit);
431       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
432 							   0),
433 						EDGE_SUCC (loop->latch, 0));
434     }
435 
436   if (at_exit) /* Add the loop copy at exit.  */
437     {
438       if (scalar_loop != loop)
439 	{
440 	  gphi_iterator gsi;
441 	  new_exit = redirect_edge_and_branch (new_exit, exit_dest);
442 
443 	  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
444 	       gsi_next (&gsi))
445 	    {
446 	      gphi *phi = gsi.phi ();
447 	      tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
448 	      location_t orig_locus
449 		= gimple_phi_arg_location_from_edge (phi, e);
450 
451 	      add_phi_arg (phi, orig_arg, new_exit, orig_locus);
452 	    }
453 	}
454       redirect_edge_and_branch_force (e, new_preheader);
455       flush_pending_stmts (e);
456       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
457       if (was_imm_dom || duplicate_outer_loop)
458 	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
459 
460       /* And remove the non-necessary forwarder again.  Keep the other
461          one so we have a proper pre-header for the loop at the exit edge.  */
462       redirect_edge_pred (single_succ_edge (preheader),
463 			  single_pred (preheader));
464       delete_basic_block (preheader);
465       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
466 			       loop_preheader_edge (scalar_loop)->src);
467     }
468   else /* Add the copy at entry.  */
469     {
470       if (scalar_loop != loop)
471 	{
472 	  /* Remove the non-necessary forwarder of scalar_loop again.  */
473 	  redirect_edge_pred (single_succ_edge (preheader),
474 			      single_pred (preheader));
475 	  delete_basic_block (preheader);
476 	  set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
477 				   loop_preheader_edge (scalar_loop)->src);
478 	  preheader = split_edge (loop_preheader_edge (loop));
479 	  entry_e = single_pred_edge (preheader);
480 	}
481 
482       redirect_edge_and_branch_force (entry_e, new_preheader);
483       flush_pending_stmts (entry_e);
484       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
485 
486       redirect_edge_and_branch_force (new_exit, preheader);
487       flush_pending_stmts (new_exit);
488       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
489 
490       /* And remove the non-necessary forwarder again.  Keep the other
491          one so we have a proper pre-header for the loop at the exit edge.  */
492       redirect_edge_pred (single_succ_edge (new_preheader),
493 			  single_pred (new_preheader));
494       delete_basic_block (new_preheader);
495       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
496 			       loop_preheader_edge (new_loop)->src);
497     }
498 
499   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
500     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
501 
502   if (scalar_loop != loop)
503     {
504       /* Update new_loop->header PHIs, so that on the preheader
505 	 edge they are the ones from loop rather than scalar_loop.  */
506       gphi_iterator gsi_orig, gsi_new;
507       edge orig_e = loop_preheader_edge (loop);
508       edge new_e = loop_preheader_edge (new_loop);
509 
510       for (gsi_orig = gsi_start_phis (loop->header),
511 	   gsi_new = gsi_start_phis (new_loop->header);
512 	   !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
513 	   gsi_next (&gsi_orig), gsi_next (&gsi_new))
514 	{
515 	  gphi *orig_phi = gsi_orig.phi ();
516 	  gphi *new_phi = gsi_new.phi ();
517 	  tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
518 	  location_t orig_locus
519 	    = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
520 
521 	  add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
522 	}
523     }
524 
525   free (new_bbs);
526   free (bbs);
527 
528   checking_verify_dominators (CDI_DOMINATORS);
529 
530   return new_loop;
531 }
532 
533 
534 /* Given the condition expression COND, put it as the last statement of
535    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
536    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
537    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
538    new edge as irreducible if IRREDUCIBLE_P is true.  */
539 
540 static edge
541 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
542 		       basic_block guard_to, basic_block dom_bb,
543 		       int probability, bool irreducible_p)
544 {
545   gimple_stmt_iterator gsi;
546   edge new_e, enter_e;
547   gcond *cond_stmt;
548   gimple_seq gimplify_stmt_list = NULL;
549 
550   enter_e = EDGE_SUCC (guard_bb, 0);
551   enter_e->flags &= ~EDGE_FALLTHRU;
552   enter_e->flags |= EDGE_FALSE_VALUE;
553   gsi = gsi_last_bb (guard_bb);
554 
555   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
556 				 NULL_TREE);
557   if (gimplify_stmt_list)
558     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
559 
560   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
561   gsi = gsi_last_bb (guard_bb);
562   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
563 
564   /* Add new edge to connect guard block to the merge/loop-exit block.  */
565   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
566 
567   new_e->count = guard_bb->count;
568   new_e->probability = probability;
569   new_e->count = apply_probability (enter_e->count, probability);
570   if (irreducible_p)
571     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
572 
573   enter_e->count -= new_e->count;
574   enter_e->probability = inverse_probability (probability);
575   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
576 
577   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
578   if (enter_e->dest->loop_father->header == enter_e->dest)
579     split_edge (enter_e);
580 
581   return new_e;
582 }
583 
584 
585 /* This function verifies that the following restrictions apply to LOOP:
586    (1) it consists of exactly 2 basic blocks - header, and an empty latch
587        for innermost loop and 5 basic blocks for outer-loop.
588    (2) it is single entry, single exit
589    (3) its exit condition is the last stmt in the header
590    (4) E is the entry/exit edge of LOOP.
591  */
592 
593 bool
594 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
595 {
596   edge exit_e = single_exit (loop);
597   edge entry_e = loop_preheader_edge (loop);
598   gcond *orig_cond = get_loop_exit_condition (loop);
599   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
600   unsigned int num_bb = loop->inner? 5 : 2;
601 
602   /* All loops have an outer scope; the only case loop->outer is NULL is for
603      the function itself.  */
604   if (!loop_outer (loop)
605       || loop->num_nodes != num_bb
606       || !empty_block_p (loop->latch)
607       || !single_exit (loop)
608       /* Verify that new loop exit condition can be trivially modified.  */
609       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
610       || (e != exit_e && e != entry_e))
611     return false;
612 
613   return true;
614 }
615 
616 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
617    in the exit bb and rename all the uses after the loop.  This simplifies
618    the *guard[12] routines, which assume loop closed SSA form for all PHIs
619    (but normally loop closed SSA form doesn't require virtual PHIs to be
620    in the same form).  Doing this early simplifies the checking what
621    uses should be renamed.  */
622 
623 static void
624 create_lcssa_for_virtual_phi (struct loop *loop)
625 {
626   gphi_iterator gsi;
627   edge exit_e = single_exit (loop);
628 
629   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
630     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
631       {
632 	gphi *phi = gsi.phi ();
633 	for (gsi = gsi_start_phis (exit_e->dest);
634 	     !gsi_end_p (gsi); gsi_next (&gsi))
635 	  if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
636 	    break;
637 	if (gsi_end_p (gsi))
638 	  {
639 	    tree new_vop = copy_ssa_name (PHI_RESULT (phi));
640 	    gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
641 	    tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
642 	    imm_use_iterator imm_iter;
643 	    gimple *stmt;
644 	    use_operand_p use_p;
645 
646 	    SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
647 	      = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
648 	    add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
649 	    gimple_phi_set_result (new_phi, new_vop);
650 	    FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
651 	      if (stmt != new_phi
652 		  && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
653 		FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
654 		  SET_USE (use_p, new_vop);
655 	  }
656 	break;
657       }
658 
659 }
660 
661 /* Function vect_get_loop_location.
662 
663    Extract the location of the loop in the source code.
664    If the loop is not well formed for vectorization, an estimated
665    location is calculated.
666    Return the loop location if succeed and NULL if not.  */
667 
668 source_location
669 find_loop_location (struct loop *loop)
670 {
671   gimple *stmt = NULL;
672   basic_block bb;
673   gimple_stmt_iterator si;
674 
675   if (!loop)
676     return UNKNOWN_LOCATION;
677 
678   stmt = get_loop_exit_condition (loop);
679 
680   if (stmt
681       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
682     return gimple_location (stmt);
683 
684   /* If we got here the loop is probably not "well formed",
685      try to estimate the loop location */
686 
687   if (!loop->header)
688     return UNKNOWN_LOCATION;
689 
690   bb = loop->header;
691 
692   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
693     {
694       stmt = gsi_stmt (si);
695       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
696         return gimple_location (stmt);
697     }
698 
699   return UNKNOWN_LOCATION;
700 }
701 
702 /* Return true if PHI defines an IV of the loop to be vectorized.  */
703 
704 static bool
705 iv_phi_p (gphi *phi)
706 {
707   if (virtual_operand_p (PHI_RESULT (phi)))
708     return false;
709 
710   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
711   gcc_assert (stmt_info != NULL);
712   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
713       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
714     return false;
715 
716   return true;
717 }
718 
719 /* Function vect_can_advance_ivs_p
720 
721    In case the number of iterations that LOOP iterates is unknown at compile
722    time, an epilog loop will be generated, and the loop induction variables
723    (IVs) will be "advanced" to the value they are supposed to take just before
724    the epilog loop.  Here we check that the access function of the loop IVs
725    and the expression that represents the loop bound are simple enough.
726    These restrictions will be relaxed in the future.  */
727 
728 bool
729 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
730 {
731   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
732   basic_block bb = loop->header;
733   gphi_iterator gsi;
734 
735   /* Analyze phi functions of the loop header.  */
736 
737   if (dump_enabled_p ())
738     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
739   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
740     {
741       tree evolution_part;
742 
743       gphi *phi = gsi.phi ();
744       if (dump_enabled_p ())
745 	{
746           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
747           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
748 	}
749 
750       /* Skip virtual phi's. The data dependences that are associated with
751 	 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
752 
753 	 Skip reduction phis.  */
754       if (!iv_phi_p (phi))
755 	{
756 	  if (dump_enabled_p ())
757 	    dump_printf_loc (MSG_NOTE, vect_location,
758 			     "reduc or virtual phi. skip.\n");
759 	  continue;
760 	}
761 
762       /* Analyze the evolution function.  */
763 
764       evolution_part
765 	= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
766       if (evolution_part == NULL_TREE)
767         {
768 	  if (dump_enabled_p ())
769 	    dump_printf (MSG_MISSED_OPTIMIZATION,
770 			 "No access function or evolution.\n");
771 	  return false;
772         }
773 
774       /* FORNOW: We do not transform initial conditions of IVs
775 	 which evolution functions are not invariants in the loop.  */
776 
777       if (!expr_invariant_in_loop_p (loop, evolution_part))
778 	{
779 	  if (dump_enabled_p ())
780 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
781 			     "evolution not invariant in loop.\n");
782 	  return false;
783 	}
784 
785       /* FORNOW: We do not transform initial conditions of IVs
786 	 which evolution functions are a polynomial of degree >= 2.  */
787 
788       if (tree_is_chrec (evolution_part))
789 	{
790 	  if (dump_enabled_p ())
791 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
792 			     "evolution is chrec.\n");
793 	  return false;
794 	}
795     }
796 
797   return true;
798 }
799 
800 
801 /*   Function vect_update_ivs_after_vectorizer.
802 
803      "Advance" the induction variables of LOOP to the value they should take
804      after the execution of LOOP.  This is currently necessary because the
805      vectorizer does not handle induction variables that are used after the
806      loop.  Such a situation occurs when the last iterations of LOOP are
807      peeled, because:
808      1. We introduced new uses after LOOP for IVs that were not originally used
809         after LOOP: the IVs of LOOP are now used by an epilog loop.
810      2. LOOP is going to be vectorized; this means that it will iterate N/VF
811         times, whereas the loop IVs should be bumped N times.
812 
813      Input:
814      - LOOP - a loop that is going to be vectorized. The last few iterations
815               of LOOP were peeled.
816      - NITERS - the number of iterations that LOOP executes (before it is
817                 vectorized). i.e, the number of times the ivs should be bumped.
818      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
819                   coming out from LOOP on which there are uses of the LOOP ivs
820 		  (this is the path from LOOP->exit to epilog_loop->preheader).
821 
822                   The new definitions of the ivs are placed in LOOP->exit.
823                   The phi args associated with the edge UPDATE_E in the bb
824                   UPDATE_E->dest are updated accordingly.
825 
826      Assumption 1: Like the rest of the vectorizer, this function assumes
827      a single loop exit that has a single predecessor.
828 
829      Assumption 2: The phi nodes in the LOOP header and in update_bb are
830      organized in the same order.
831 
832      Assumption 3: The access function of the ivs is simple enough (see
833      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
834 
835      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
836      coming out of LOOP on which the ivs of LOOP are used (this is the path
837      that leads to the epilog loop; other paths skip the epilog loop).  This
838      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
839      needs to have its phis updated.
840  */
841 
842 static void
843 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
844 				  tree niters, edge update_e)
845 {
846   gphi_iterator gsi, gsi1;
847   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
848   basic_block update_bb = update_e->dest;
849   basic_block exit_bb = single_exit (loop)->dest;
850 
851   /* Make sure there exists a single-predecessor exit bb:  */
852   gcc_assert (single_pred_p (exit_bb));
853   gcc_assert (single_succ_edge (exit_bb) == update_e);
854 
855   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
856        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
857        gsi_next (&gsi), gsi_next (&gsi1))
858     {
859       tree init_expr;
860       tree step_expr, off;
861       tree type;
862       tree var, ni, ni_name;
863       gimple_stmt_iterator last_gsi;
864 
865       gphi *phi = gsi.phi ();
866       gphi *phi1 = gsi1.phi ();
867       if (dump_enabled_p ())
868 	{
869 	  dump_printf_loc (MSG_NOTE, vect_location,
870 			   "vect_update_ivs_after_vectorizer: phi: ");
871 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
872 	}
873 
874       /* Skip reduction and virtual phis.  */
875       if (!iv_phi_p (phi))
876 	{
877 	  if (dump_enabled_p ())
878 	    dump_printf_loc (MSG_NOTE, vect_location,
879 			     "reduc or virtual phi. skip.\n");
880 	  continue;
881 	}
882 
883       type = TREE_TYPE (gimple_phi_result (phi));
884       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
885       step_expr = unshare_expr (step_expr);
886 
887       /* FORNOW: We do not support IVs whose evolution function is a polynomial
888          of degree >= 2 or exponential.  */
889       gcc_assert (!tree_is_chrec (step_expr));
890 
891       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
892 
893       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
894 			 fold_convert (TREE_TYPE (step_expr), niters),
895 			 step_expr);
896       if (POINTER_TYPE_P (type))
897 	ni = fold_build_pointer_plus (init_expr, off);
898       else
899 	ni = fold_build2 (PLUS_EXPR, type,
900 			  init_expr, fold_convert (type, off));
901 
902       var = create_tmp_var (type, "tmp");
903 
904       last_gsi = gsi_last_bb (exit_bb);
905       gimple_seq new_stmts = NULL;
906       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
907       /* Exit_bb shouldn't be empty.  */
908       if (!gsi_end_p (last_gsi))
909 	gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
910       else
911 	gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
912 
913       /* Fix phi expressions in the successor bb.  */
914       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
915     }
916 }
917 
918 /* Function vect_gen_prolog_loop_niters
919 
920    Generate the number of iterations which should be peeled as prolog for the
921    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
922    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
923    As a result, after the execution of this loop, the data reference DR will
924    refer to an aligned location.  The following computation is generated:
925 
926    If the misalignment of DR is known at compile time:
927      addr_mis = int mis = DR_MISALIGNMENT (dr);
928    Else, compute address misalignment in bytes:
929      addr_mis = addr & (vectype_align - 1)
930 
931    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
932 
933    (elem_size = element type size; an element is the scalar element whose type
934    is the inner type of the vectype)
935 
936    The computations will be emitted at the end of BB.  We also compute and
937    store upper bound (included) of the result in BOUND.
938 
939    When the step of the data-ref in the loop is not 1 (as in interleaved data
940    and SLP), the number of iterations of the prolog must be divided by the step
941    (which is equal to the size of interleaved group).
942 
943    The above formulas assume that VF == number of elements in the vector. This
944    may not hold when there are multiple-types in the loop.
945    In this case, for some data-references in the loop the VF does not represent
946    the number of elements that fit in the vector.  Therefore, instead of VF we
947    use TYPE_VECTOR_SUBPARTS.  */
948 
949 static tree
950 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
951 			     basic_block bb, int *bound)
952 {
953   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
954   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
955   tree var;
956   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
957   gimple_seq stmts = NULL, new_stmts = NULL;
958   tree iters, iters_name;
959   gimple *dr_stmt = DR_STMT (dr);
960   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
961   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
962   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
963   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
964 
965   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
966     {
967       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
968 
969       if (dump_enabled_p ())
970         dump_printf_loc (MSG_NOTE, vect_location,
971                          "known peeling = %d.\n", npeel);
972 
973       iters = build_int_cst (niters_type, npeel);
974       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
975     }
976   else
977     {
978       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
979       tree offset = negative
980 	  ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
981       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
982 						&stmts, offset, loop);
983       tree type = unsigned_type_for (TREE_TYPE (start_addr));
984       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
985       HOST_WIDE_INT elem_size =
986                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
987       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
988       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
989       tree nelements_tree = build_int_cst (type, nelements);
990       tree byte_misalign;
991       tree elem_misalign;
992 
993       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
994       byte_misalign =
995 	fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
996 		     vectype_align_minus_1);
997 
998       /* Create:  elem_misalign = byte_misalign / element_size  */
999       elem_misalign =
1000 	fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1001 
1002       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1003       if (negative)
1004 	iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1005       else
1006 	iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1007       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1008       iters = fold_convert (niters_type, iters);
1009       *bound = nelements - 1;
1010     }
1011 
1012   if (dump_enabled_p ())
1013     {
1014       dump_printf_loc (MSG_NOTE, vect_location,
1015                        "niters for prolog loop: ");
1016       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1017       dump_printf (MSG_NOTE, "\n");
1018     }
1019 
1020   var = create_tmp_var (niters_type, "prolog_loop_niters");
1021   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1022 
1023   if (new_stmts)
1024     gimple_seq_add_seq (&stmts, new_stmts);
1025   if (stmts)
1026     {
1027       gcc_assert (single_succ_p (bb));
1028       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1029       if (gsi_end_p (gsi))
1030 	gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1031       else
1032 	gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1033     }
1034   return iters_name;
1035 }
1036 
1037 
1038 /* Function vect_update_init_of_dr
1039 
1040    NITERS iterations were peeled from LOOP.  DR represents a data reference
1041    in LOOP.  This function updates the information recorded in DR to
1042    account for the fact that the first NITERS iterations had already been
1043    executed.  Specifically, it updates the OFFSET field of DR.  */
1044 
1045 static void
1046 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1047 {
1048   tree offset = DR_OFFSET (dr);
1049 
1050   niters = fold_build2 (MULT_EXPR, sizetype,
1051 			fold_convert (sizetype, niters),
1052 			fold_convert (sizetype, DR_STEP (dr)));
1053   offset = fold_build2 (PLUS_EXPR, sizetype,
1054 			fold_convert (sizetype, offset), niters);
1055   DR_OFFSET (dr) = offset;
1056 }
1057 
1058 
1059 /* Function vect_update_inits_of_drs
1060 
1061    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1062    This function updates the information recorded for the data references in
1063    the loop to account for the fact that the first NITERS iterations had
1064    already been executed.  Specifically, it updates the initial_condition of
1065    the access_function of all the data_references in the loop.  */
1066 
1067 static void
1068 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1069 {
1070   unsigned int i;
1071   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1072   struct data_reference *dr;
1073 
1074   if (dump_enabled_p ())
1075     dump_printf_loc (MSG_NOTE, vect_location,
1076 		     "=== vect_update_inits_of_dr ===\n");
1077 
1078   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1079   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1080     {
1081       gimple_seq seq;
1082       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1083       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1084 
1085       niters = fold_convert (sizetype, niters);
1086       niters = force_gimple_operand (niters, &seq, false, var);
1087       if (seq)
1088 	{
1089 	  basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1090 	  gcc_assert (!new_bb);
1091 	}
1092     }
1093 
1094   FOR_EACH_VEC_ELT (datarefs, i, dr)
1095     vect_update_init_of_dr (dr, niters);
1096 }
1097 
1098 
1099 /* This function builds ni_name = number of iterations.  Statements
1100    are emitted on the loop preheader edge.  */
1101 
1102 tree
1103 vect_build_loop_niters (loop_vec_info loop_vinfo)
1104 {
1105   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1106   if (TREE_CODE (ni) == INTEGER_CST)
1107     return ni;
1108   else
1109     {
1110       tree ni_name, var;
1111       gimple_seq stmts = NULL;
1112       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1113 
1114       var = create_tmp_var (TREE_TYPE (ni), "niters");
1115       ni_name = force_gimple_operand (ni, &stmts, false, var);
1116       if (stmts)
1117 	gsi_insert_seq_on_edge_immediate (pe, stmts);
1118 
1119       return ni_name;
1120     }
1121 }
1122 
1123 /* Calculate the number of iterations above which vectorized loop will be
1124    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1125    of prolog loop.  If it's integer const, the integer number is also passed
1126    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1127    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1128    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1129    (rather than vectorized) loop will be executed.  This function stores
1130    upper bound (included) of the result in BOUND_SCALAR.  */
1131 
1132 static tree
1133 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1134 			     int bound_prolog, int vfm1, int th,
1135 			     int *bound_scalar, bool check_profitability)
1136 {
1137   tree type = TREE_TYPE (niters_prolog);
1138   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1139 			     build_int_cst (type, vfm1));
1140 
1141   *bound_scalar = vfm1 + bound_prolog;
1142   if (check_profitability)
1143     {
1144       /* TH indicates the minimum niters of vectorized loop, while we
1145 	 compute the maximum niters of scalar loop.  */
1146       th--;
1147       /* Peeling for constant times.  */
1148       if (int_niters_prolog >= 0)
1149 	{
1150 	  *bound_scalar = (int_niters_prolog + vfm1 < th
1151 			    ? th
1152 			    : vfm1 + int_niters_prolog);
1153 	  return build_int_cst (type, *bound_scalar);
1154 	}
1155       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1156 	 bound (inlcuded) of niters of prolog loop.  */
1157       if (th >=  vfm1 + bound_prolog)
1158 	{
1159 	  *bound_scalar = th;
1160 	  return build_int_cst (type, th);
1161 	}
1162       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1163       else if (th > vfm1)
1164 	return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1165     }
1166   return niters;
1167 }
1168 
1169 /* This function generates the following statements:
1170 
1171    niters = number of iterations loop executes (after peeling)
1172    niters_vector = niters / vf
1173 
1174    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1175    true if NITERS doesn't overflow.  */
1176 
1177 void
1178 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1179 			     tree *niters_vector_ptr, bool niters_no_overflow)
1180 {
1181   tree ni_minus_gap, var;
1182   tree niters_vector;
1183   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1184   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1185   tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1186 
1187   /* If epilogue loop is required because of data accesses with gaps, we
1188      subtract one iteration from the total number of iterations here for
1189      correct calculation of RATIO.  */
1190   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1191     {
1192       ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1193 				  niters,
1194 				  build_one_cst (TREE_TYPE (niters)));
1195       if (!is_gimple_val (ni_minus_gap))
1196 	{
1197 	  var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1198 	  gimple *stmts = NULL;
1199 	  ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1200 					       true, var);
1201 	  gsi_insert_seq_on_edge_immediate (pe, stmts);
1202 	}
1203     }
1204   else
1205     ni_minus_gap = niters;
1206 
1207   /* Create: niters >> log2(vf) */
1208   /* If it's known that niters == number of latch executions + 1 doesn't
1209      overflow, we can generate niters >> log2(vf); otherwise we generate
1210      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1211      will be at least one.  */
1212   if (niters_no_overflow)
1213     niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1214 				 ni_minus_gap, log_vf);
1215   else
1216     niters_vector
1217       = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1218 		     fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1219 				  fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1220 					       ni_minus_gap,
1221 					       build_int_cst
1222 						 (TREE_TYPE (niters), vf)),
1223 				  log_vf),
1224 		     build_int_cst (TREE_TYPE (niters), 1));
1225 
1226   if (!is_gimple_val (niters_vector))
1227     {
1228       var = create_tmp_var (TREE_TYPE (niters), "bnd");
1229       gimple *stmts = NULL;
1230       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1231       gsi_insert_seq_on_edge_immediate (pe, stmts);
1232     }
1233   *niters_vector_ptr = niters_vector;
1234 
1235   return;
1236 }
1237 
1238 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1239    loop specified by LOOP_VINFO after vectorization, compute the number
1240    of iterations before vectorization (niters_vector * vf) and store it
1241    to NITERS_VECTOR_MULT_VF_PTR.  */
1242 
1243 static void
1244 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1245 				     tree niters_vector,
1246 				     tree *niters_vector_mult_vf_ptr)
1247 {
1248   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1249   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1250   tree type = TREE_TYPE (niters_vector);
1251   tree log_vf = build_int_cst (type, exact_log2 (vf));
1252   basic_block exit_bb = single_exit (loop)->dest;
1253 
1254   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1255   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1256 					    niters_vector, log_vf);
1257   if (!is_gimple_val (niters_vector_mult_vf))
1258     {
1259       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1260       gimple_seq stmts = NULL;
1261       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1262 						    &stmts, true, var);
1263       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1264       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1265     }
1266   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1267 }
1268 
1269 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1270    from SECOND/FIRST and puts it at the original loop's preheader/exit
1271    edge, the two loops are arranged as below:
1272 
1273        preheader_a:
1274      first_loop:
1275        header_a:
1276 	 i_1 = PHI<i_0, i_2>;
1277 	 ...
1278 	 i_2 = i_1 + 1;
1279 	 if (cond_a)
1280 	   goto latch_a;
1281 	 else
1282 	   goto between_bb;
1283        latch_a:
1284 	 goto header_a;
1285 
1286        between_bb:
1287 	 ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1288 
1289      second_loop:
1290        header_b:
1291 	 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1292 				 or with i_2 if no LCSSA phi is created
1293 				 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1294 	 ...
1295 	 i_4 = i_3 + 1;
1296 	 if (cond_b)
1297 	   goto latch_b;
1298 	 else
1299 	   goto exit_bb;
1300        latch_b:
1301 	 goto header_b;
1302 
1303        exit_bb:
1304 
1305    This function creates loop closed SSA for the first loop; update the
1306    second loop's PHI nodes by replacing argument on incoming edge with the
1307    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1308    is false, Loop closed ssa phis will only be created for non-iv phis for
1309    the first loop.
1310 
1311    This function assumes exit bb of the first loop is preheader bb of the
1312    second loop, i.e, between_bb in the example code.  With PHIs updated,
1313    the second loop will execute rest iterations of the first.  */
1314 
1315 static void
1316 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1317 				   struct loop *first, struct loop *second,
1318 				   bool create_lcssa_for_iv_phis)
1319 {
1320   gphi_iterator gsi_update, gsi_orig;
1321   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1322 
1323   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1324   edge second_preheader_e = loop_preheader_edge (second);
1325   basic_block between_bb = single_exit (first)->dest;
1326 
1327   gcc_assert (between_bb == second_preheader_e->src);
1328   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1329   /* Either the first loop or the second is the loop to be vectorized.  */
1330   gcc_assert (loop == first || loop == second);
1331 
1332   for (gsi_orig = gsi_start_phis (first->header),
1333        gsi_update = gsi_start_phis (second->header);
1334        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1335        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1336     {
1337       gphi *orig_phi = gsi_orig.phi ();
1338       gphi *update_phi = gsi_update.phi ();
1339 
1340       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1341       /* Generate lcssa PHI node for the first loop.  */
1342       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1343       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1344 	{
1345 	  tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1346 	  gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1347 	  add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1348 	  arg = new_res;
1349 	}
1350 
1351       /* Update PHI node in the second loop by replacing arg on the loop's
1352 	 incoming edge.  */
1353       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1354     }
1355 }
1356 
1357 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1358    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1359    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1360    appear like below:
1361 
1362        guard_bb:
1363 	 if (cond)
1364 	   goto merge_bb;
1365 	 else
1366 	   goto skip_loop;
1367 
1368      skip_loop:
1369        header_a:
1370 	 i_1 = PHI<i_0, i_2>;
1371 	 ...
1372 	 i_2 = i_1 + 1;
1373 	 if (cond_a)
1374 	   goto latch_a;
1375 	 else
1376 	   goto exit_a;
1377        latch_a:
1378 	 goto header_a;
1379 
1380        exit_a:
1381 	 i_5 = PHI<i_2>;
1382 
1383        merge_bb:
1384 	 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1385 
1386      update_loop:
1387        header_b:
1388 	 i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1389 	 ...
1390 	 i_4 = i_3 + 1;
1391 	 if (cond_b)
1392 	   goto latch_b;
1393 	 else
1394 	   goto exit_bb;
1395        latch_b:
1396 	 goto header_b;
1397 
1398        exit_bb:
1399 
1400    This function creates PHI nodes at merge_bb and replaces the use of i_5
1401    in the update_loop's PHI node with the result of new PHI result.  */
1402 
1403 static void
1404 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1405 				    struct loop *update_loop,
1406 				    edge guard_edge, edge merge_edge)
1407 {
1408   source_location merge_loc, guard_loc;
1409   edge orig_e = loop_preheader_edge (skip_loop);
1410   edge update_e = loop_preheader_edge (update_loop);
1411   gphi_iterator gsi_orig, gsi_update;
1412 
1413   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1414 	gsi_update = gsi_start_phis (update_loop->header));
1415        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1416        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1417     {
1418       gphi *orig_phi = gsi_orig.phi ();
1419       gphi *update_phi = gsi_update.phi ();
1420 
1421       /* Generate new phi node at merge bb of the guard.  */
1422       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1423       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1424 
1425       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1426 	 args in NEW_PHI for these edges.  */
1427       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1428       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1429       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1430       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1431       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1432       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1433 
1434       /* Update phi in UPDATE_PHI.  */
1435       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1436     }
1437 }
1438 
1439 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1440    this function searches for the corresponding lcssa phi node in exit
1441    bb of LOOP.  If it is found, return the phi result; otherwise return
1442    NULL.  */
1443 
1444 static tree
1445 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1446 		gphi *lcssa_phi)
1447 {
1448   gphi_iterator gsi;
1449   edge e = single_exit (loop);
1450 
1451   gcc_assert (single_pred_p (e->dest));
1452   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1453     {
1454       gphi *phi = gsi.phi ();
1455       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1456 			   PHI_ARG_DEF (lcssa_phi, 0), 0))
1457 	return PHI_RESULT (phi);
1458     }
1459   return NULL_TREE;
1460 }
1461 
1462 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1463    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1464    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1465    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1466    The CFG looks like:
1467 
1468      loop:
1469        header_a:
1470 	 i_1 = PHI<i_0, i_2>;
1471 	 ...
1472 	 i_2 = i_1 + 1;
1473 	 if (cond_a)
1474 	   goto latch_a;
1475 	 else
1476 	   goto exit_a;
1477        latch_a:
1478 	 goto header_a;
1479 
1480        exit_a:
1481 
1482        guard_bb:
1483 	 if (cond)
1484 	   goto merge_bb;
1485 	 else
1486 	   goto epilog_loop;
1487 
1488        ;; fall_through_bb
1489 
1490      epilog_loop:
1491        header_b:
1492 	 i_3 = PHI<i_2, i_4>;
1493 	 ...
1494 	 i_4 = i_3 + 1;
1495 	 if (cond_b)
1496 	   goto latch_b;
1497 	 else
1498 	   goto merge_bb;
1499        latch_b:
1500 	 goto header_b;
1501 
1502        merge_bb:
1503 	 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1504 
1505        exit_bb:
1506 	 i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1507 
1508    For each name used out side EPILOG (i.e - for each name that has a lcssa
1509    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1510    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1511    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1512    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1513    in exit_bb will also be updated.  */
1514 
1515 static void
1516 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1517 				    edge guard_edge, edge merge_edge)
1518 {
1519   gphi_iterator gsi;
1520   basic_block merge_bb = guard_edge->dest;
1521 
1522   gcc_assert (single_succ_p (merge_bb));
1523   edge e = single_succ_edge (merge_bb);
1524   basic_block exit_bb = e->dest;
1525   gcc_assert (single_pred_p (exit_bb));
1526   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1527 
1528   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1529     {
1530       gphi *update_phi = gsi.phi ();
1531       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1532       /* This loop-closed-phi actually doesn't represent a use out of the
1533 	 loop - the phi arg is a constant.  */
1534       if (TREE_CODE (old_arg) != SSA_NAME)
1535 	continue;
1536 
1537       tree merge_arg = get_current_def (old_arg);
1538       if (!merge_arg)
1539 	merge_arg = old_arg;
1540 
1541       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1542       /* If the var is live after loop but not a reduction, we simply
1543 	 use the old arg.  */
1544       if (!guard_arg)
1545 	guard_arg = old_arg;
1546 
1547       /* Create new phi node in MERGE_BB:  */
1548       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1549       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1550 
1551       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1552 	 the two PHI args in merge_phi for these edges.  */
1553       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1554       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1555 
1556       /* Update the original phi in exit_bb.  */
1557       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1558     }
1559 }
1560 
1561 /* EPILOG loop is duplicated from the original loop for vectorizing,
1562    the arg of its loop closed ssa PHI needs to be updated.  */
1563 
1564 static void
1565 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1566 {
1567   gphi_iterator gsi;
1568   basic_block exit_bb = single_exit (epilog)->dest;
1569 
1570   gcc_assert (single_pred_p (exit_bb));
1571   edge e = EDGE_PRED (exit_bb, 0);
1572   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1573     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1574 }
1575 
1576 /* Function vect_do_peeling.
1577 
1578    Input:
1579    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1580 
1581        preheader:
1582      LOOP:
1583        header_bb:
1584 	 loop_body
1585 	 if (exit_loop_cond) goto exit_bb
1586 	 else                goto header_bb
1587        exit_bb:
1588 
1589    - NITERS: The number of iterations of the loop.
1590    - NITERSM1: The number of iterations of the loop's latch.
1591    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1592    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1593 			      CHECK_PROFITABILITY is true.
1594    Output:
1595    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1596 
1597    This function peels prolog and epilog from the loop, adds guards skipping
1598    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1599    would look like:
1600 
1601        guard_bb_1:
1602 	 if (prefer_scalar_loop) goto merge_bb_1
1603 	 else                    goto guard_bb_2
1604 
1605        guard_bb_2:
1606          if (skip_prolog) goto merge_bb_2
1607          else             goto prolog_preheader
1608 
1609        prolog_preheader:
1610      PROLOG:
1611        prolog_header_bb:
1612 	 prolog_body
1613 	 if (exit_prolog_cond) goto prolog_exit_bb
1614 	 else                  goto prolog_header_bb
1615        prolog_exit_bb:
1616 
1617        merge_bb_2:
1618 
1619        vector_preheader:
1620      VECTOR LOOP:
1621        vector_header_bb:
1622 	 vector_body
1623 	 if (exit_vector_cond) goto vector_exit_bb
1624 	 else                  goto vector_header_bb
1625        vector_exit_bb:
1626 
1627        guard_bb_3:
1628 	 if (skip_epilog) goto merge_bb_3
1629 	 else             goto epilog_preheader
1630 
1631        merge_bb_1:
1632 
1633        epilog_preheader:
1634      EPILOG:
1635        epilog_header_bb:
1636 	 epilog_body
1637 	 if (exit_epilog_cond) goto merge_bb_3
1638 	 else                  goto epilog_header_bb
1639 
1640        merge_bb_3:
1641 
1642    Note this function peels prolog and epilog only if it's necessary,
1643    as well as guards.
1644    Returns created epilogue or NULL.
1645 
1646    TODO: Guard for prefer_scalar_loop should be emitted along with
1647    versioning conditions if loop versioning is needed.  */
1648 
1649 
1650 struct loop *
1651 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1652 		 tree *niters_vector, int th, bool check_profitability,
1653 		 bool niters_no_overflow)
1654 {
1655   edge e, guard_e;
1656   tree type = TREE_TYPE (niters), guard_cond;
1657   basic_block guard_bb, guard_to;
1658   int prob_prolog, prob_vector, prob_epilog;
1659   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1660   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1661   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1662   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1663 			 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1664 
1665   if (!prolog_peeling && !epilog_peeling)
1666     return NULL;
1667 
1668   prob_vector = 9 * REG_BR_PROB_BASE / 10;
1669   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1670     vf = 3;
1671   prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1672   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1673 
1674   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675   struct loop *first_loop = loop;
1676   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1677   create_lcssa_for_virtual_phi (loop);
1678   update_ssa (TODO_update_ssa_only_virtuals);
1679 
1680   if (MAY_HAVE_DEBUG_STMTS)
1681     {
1682       gcc_assert (!adjust_vec.exists ());
1683       adjust_vec.create (32);
1684     }
1685   initialize_original_copy_tables ();
1686 
1687   /* Prolog loop may be skipped.  */
1688   bool skip_prolog = (prolog_peeling != 0);
1689   /* Skip to epilog if scalar loop may be preferred.  It's only used when
1690      we peel for epilog loop.  */
1691   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1692   /* Epilog loop must be executed if the number of iterations for epilog
1693      loop is known at compile time, otherwise we need to add a check at
1694      the end of vector loop and skip to the end of epilog loop.  */
1695   bool skip_epilog = (prolog_peeling < 0
1696 		      || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1697   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1698   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1699     skip_epilog = false;
1700 
1701   /* Record the anchor bb at which guard should be placed if scalar loop
1702      may be preferred.  */
1703   basic_block anchor = loop_preheader_edge (loop)->src;
1704   if (skip_vector)
1705     {
1706       split_edge (loop_preheader_edge (loop));
1707 
1708       /* Due to the order in which we peel prolog and epilog, we first
1709 	 propagate probability to the whole loop.  The purpose is to
1710 	 avoid adjusting probabilities of both prolog and vector loops
1711 	 separately.  Note in this case, the probability of epilog loop
1712 	 needs to be scaled back later.  */
1713       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1714       scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector,
1715 				 REG_BR_PROB_BASE);
1716       scale_loop_profile (loop, prob_vector, bound);
1717     }
1718 
1719   tree niters_prolog = build_int_cst (type, 0);
1720   source_location loop_loc = find_loop_location (loop);
1721   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1722   if (prolog_peeling)
1723     {
1724       e = loop_preheader_edge (loop);
1725       if (!slpeel_can_duplicate_loop_p (loop, e))
1726 	{
1727 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1728 			   "loop can't be duplicated to preheader edge.\n");
1729 	  gcc_unreachable ();
1730 	}
1731       /* Peel prolog and put it on preheader edge of loop.  */
1732       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1733       if (!prolog)
1734 	{
1735 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1736 			   "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1737 	  gcc_unreachable ();
1738 	}
1739       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1740       first_loop = prolog;
1741       reset_original_copy_tables ();
1742 
1743       /* Generate and update the number of iterations for prolog loop.  */
1744       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1745 						   &bound_prolog);
1746       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1747 
1748       /* Skip the prolog loop.  */
1749       if (skip_prolog)
1750 	{
1751 	  guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1752 				    niters_prolog, build_int_cst (type, 0));
1753 	  guard_bb = loop_preheader_edge (prolog)->src;
1754 	  basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1755 	  guard_to = split_edge (loop_preheader_edge (loop));
1756 	  guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1757 					   guard_to, guard_bb,
1758 					   inverse_probability (prob_prolog),
1759 					   irred_flag);
1760 	  e = EDGE_PRED (guard_to, 0);
1761 	  e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1762 	  slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1763 
1764 	  scale_bbs_frequencies_int (&bb_after_prolog, 1, prob_prolog,
1765 				     REG_BR_PROB_BASE);
1766 	  scale_loop_profile (prolog, prob_prolog, bound_prolog);
1767 	}
1768       /* Update init address of DRs.  */
1769       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1770       /* Update niters for vector loop.  */
1771       LOOP_VINFO_NITERS (loop_vinfo)
1772 	= fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1773       LOOP_VINFO_NITERSM1 (loop_vinfo)
1774 	= fold_build2 (MINUS_EXPR, type,
1775 		       LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1776       niters = vect_build_loop_niters (loop_vinfo);
1777 
1778       /* Prolog iterates at most bound_prolog times, latch iterates at
1779 	 most bound_prolog - 1 times.  */
1780       record_niter_bound (prolog, bound_prolog - 1, false, true);
1781       delete_update_ssa ();
1782       adjust_vec_debug_stmts ();
1783       scev_reset ();
1784     }
1785 
1786   if (epilog_peeling)
1787     {
1788       e = single_exit (loop);
1789       if (!slpeel_can_duplicate_loop_p (loop, e))
1790 	{
1791 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1792 			   "loop can't be duplicated to exit edge.\n");
1793 	  gcc_unreachable ();
1794 	}
1795       /* Peel epilog and put it on exit edge of loop.  */
1796       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1797       if (!epilog)
1798 	{
1799 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1800 			   "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1801 	  gcc_unreachable ();
1802 	}
1803       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1804 
1805       /* Scalar version loop may be preferred.  In this case, add guard
1806 	 and skip to epilog.  Note this only happens when the number of
1807 	 iterations of loop is unknown at compile time, otherwise this
1808 	 won't be vectorized.  */
1809       if (skip_vector)
1810 	{
1811 	  /* Additional epilogue iteration is peeled if gap exists.  */
1812 	  bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1813 	  tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1814 						bound_prolog,
1815 						peel_for_gaps ? vf : vf - 1,
1816 						th, &bound_scalar,
1817 						check_profitability);
1818 	  /* Build guard against NITERSM1 since NITERS may overflow.  */
1819 	  guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1820 	  guard_bb = anchor;
1821 	  guard_to = split_edge (loop_preheader_edge (epilog));
1822 	  guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1823 					   guard_to, guard_bb,
1824 					   inverse_probability (prob_vector),
1825 					   irred_flag);
1826 	  e = EDGE_PRED (guard_to, 0);
1827 	  e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1828 	  slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1829 
1830 	  /* Simply propagate profile info from guard_bb to guard_to which is
1831 	     a merge point of control flow.  */
1832 	  guard_to->frequency = guard_bb->frequency;
1833 	  guard_to->count = guard_bb->count;
1834 	  single_succ_edge (guard_to)->count = guard_to->count;
1835 	  /* Scale probability of epilog loop back.  */
1836 	  int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector;
1837 	  scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE);
1838 	}
1839 
1840       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1841       tree niters_vector_mult_vf;
1842       /* If loop is peeled for non-zero constant times, now niters refers to
1843 	 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1844 	 overflows.  */
1845       niters_no_overflow |= (prolog_peeling > 0);
1846       vect_gen_vector_loop_niters (loop_vinfo, niters,
1847 				   niters_vector, niters_no_overflow);
1848       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1849 					   &niters_vector_mult_vf);
1850       /* Update IVs of original loop as if they were advanced by
1851 	 niters_vector_mult_vf steps.  */
1852       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1853       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1854       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1855 					update_e);
1856 
1857       if (skip_epilog)
1858 	{
1859 	  guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1860 				    niters, niters_vector_mult_vf);
1861 	  guard_bb = single_exit (loop)->dest;
1862 	  guard_to = split_edge (single_exit (epilog));
1863 	  guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1864 					   skip_vector ? anchor : guard_bb,
1865 					   inverse_probability (prob_epilog),
1866 					   irred_flag);
1867 	  slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1868 					      single_exit (epilog));
1869 	  /* Only need to handle basic block before epilog loop if it's not
1870 	     the guard_bb, which is the case when skip_vector is true.  */
1871 	  if (guard_bb != bb_before_epilog)
1872 	    {
1873 	      prob_epilog = (combine_probabilities (prob_vector, prob_epilog)
1874 			     + inverse_probability (prob_vector));
1875 
1876 	      scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog,
1877 					 REG_BR_PROB_BASE);
1878 	    }
1879 	  scale_loop_profile (epilog, prob_epilog, bound);
1880 	}
1881       else
1882 	slpeel_update_phi_nodes_for_lcssa (epilog);
1883 
1884       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1885       /* We share epilog loop with scalar version loop.  */
1886       bound = MAX (bound, bound_scalar - 1);
1887       record_niter_bound (epilog, bound, false, true);
1888 
1889       delete_update_ssa ();
1890       adjust_vec_debug_stmts ();
1891       scev_reset ();
1892     }
1893   adjust_vec.release ();
1894   free_original_copy_tables ();
1895 
1896   return epilog;
1897 }
1898 
1899 /* Function vect_create_cond_for_niters_checks.
1900 
1901    Create a conditional expression that represents the run-time checks for
1902    loop's niter.  The loop is guaranteed to to terminate if the run-time
1903    checks hold.
1904 
1905    Input:
1906    COND_EXPR  - input conditional expression.  New conditions will be chained
1907 		with logical AND operation.  If it is NULL, then the function
1908 		is used to return the number of alias checks.
1909    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1910 		to be checked.
1911 
1912    Output:
1913    COND_EXPR - conditional expression.
1914 
1915    The returned COND_EXPR is the conditional expression to be used in the
1916    if statement that controls which version of the loop gets executed at
1917    runtime.  */
1918 
1919 static void
1920 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1921 {
1922   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1923 
1924   if (*cond_expr)
1925     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1926 			      *cond_expr, part_cond_expr);
1927   else
1928     *cond_expr = part_cond_expr;
1929 }
1930 
1931 /* Function vect_create_cond_for_align_checks.
1932 
1933    Create a conditional expression that represents the alignment checks for
1934    all of data references (array element references) whose alignment must be
1935    checked at runtime.
1936 
1937    Input:
1938    COND_EXPR  - input conditional expression.  New conditions will be chained
1939                 with logical AND operation.
1940    LOOP_VINFO - two fields of the loop information are used.
1941                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1942                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1943 
1944    Output:
1945    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1946                          expression.
1947    The returned value is the conditional expression to be used in the if
1948    statement that controls which version of the loop gets executed at runtime.
1949 
1950    The algorithm makes two assumptions:
1951      1) The number of bytes "n" in a vector is a power of 2.
1952      2) An address "a" is aligned if a%n is zero and that this
1953         test can be done as a&(n-1) == 0.  For example, for 16
1954         byte vectors the test is a&0xf == 0.  */
1955 
1956 static void
1957 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1958                                    tree *cond_expr,
1959 				   gimple_seq *cond_expr_stmt_list)
1960 {
1961   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1962   vec<gimple *> may_misalign_stmts
1963     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1964   gimple *ref_stmt;
1965   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1966   tree mask_cst;
1967   unsigned int i;
1968   tree int_ptrsize_type;
1969   char tmp_name[20];
1970   tree or_tmp_name = NULL_TREE;
1971   tree and_tmp_name;
1972   gimple *and_stmt;
1973   tree ptrsize_zero;
1974   tree part_cond_expr;
1975 
1976   /* Check that mask is one less than a power of 2, i.e., mask is
1977      all zeros followed by all ones.  */
1978   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1979 
1980   int_ptrsize_type = signed_type_for (ptr_type_node);
1981 
1982   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1983      of the first vector of the i'th data reference. */
1984 
1985   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1986     {
1987       gimple_seq new_stmt_list = NULL;
1988       tree addr_base;
1989       tree addr_tmp_name;
1990       tree new_or_tmp_name;
1991       gimple *addr_stmt, *or_stmt;
1992       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1993       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1994       bool negative = tree_int_cst_compare
1995 	(DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1996       tree offset = negative
1997 	? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1998 
1999       /* create: addr_tmp = (int)(address_of_first_vector) */
2000       addr_base =
2001 	vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2002 					      offset, loop);
2003       if (new_stmt_list != NULL)
2004 	gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2005 
2006       sprintf (tmp_name, "addr2int%d", i);
2007       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2008       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2009       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2010 
2011       /* The addresses are OR together.  */
2012 
2013       if (or_tmp_name != NULL_TREE)
2014         {
2015           /* create: or_tmp = or_tmp | addr_tmp */
2016           sprintf (tmp_name, "orptrs%d", i);
2017 	  new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2018 	  or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2019 					 or_tmp_name, addr_tmp_name);
2020 	  gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2021           or_tmp_name = new_or_tmp_name;
2022         }
2023       else
2024         or_tmp_name = addr_tmp_name;
2025 
2026     } /* end for i */
2027 
2028   mask_cst = build_int_cst (int_ptrsize_type, mask);
2029 
2030   /* create: and_tmp = or_tmp & mask  */
2031   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2032 
2033   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2034 				  or_tmp_name, mask_cst);
2035   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2036 
2037   /* Make and_tmp the left operand of the conditional test against zero.
2038      if and_tmp has a nonzero bit then some address is unaligned.  */
2039   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2040   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2041 				and_tmp_name, ptrsize_zero);
2042   if (*cond_expr)
2043     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2044 			      *cond_expr, part_cond_expr);
2045   else
2046     *cond_expr = part_cond_expr;
2047 }
2048 
2049 /* Given two data references and segment lengths described by DR_A and DR_B,
2050    create expression checking if the two addresses ranges intersect with
2051    each other based on index of the two addresses.  This can only be done
2052    if DR_A and DR_B referring to the same (array) object and the index is
2053    the only difference.  For example:
2054 
2055                        DR_A                           DR_B
2056       data-ref         arr[i]                         arr[j]
2057       base_object      arr                            arr
2058       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2059 
2060    The addresses and their index are like:
2061 
2062         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2063      ------------------------------------------------------->
2064         |   |   |   |   |          |   |   |   |   |
2065      ------------------------------------------------------->
2066         i_0 ...         i_0+4      j_0 ...         j_0+4
2067 
2068    We can create expression based on index rather than address:
2069 
2070      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2071 
2072    Note evolution step of index needs to be considered in comparison.  */
2073 
2074 static bool
2075 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2076 				     const dr_with_seg_len& dr_a,
2077 				     const dr_with_seg_len& dr_b)
2078 {
2079   if (integer_zerop (DR_STEP (dr_a.dr))
2080       || integer_zerop (DR_STEP (dr_b.dr))
2081       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2082     return false;
2083 
2084   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2085     return false;
2086 
2087   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2088     return false;
2089 
2090   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2091     return false;
2092 
2093   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2094     return false;
2095 
2096   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2097 
2098   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2099   unsigned HOST_WIDE_INT abs_step
2100     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2101 
2102   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2103   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2104   /* Infer the number of iterations with which the memory segment is accessed
2105      by DR.  In other words, alias is checked if memory segment accessed by
2106      DR_A in some iterations intersect with memory segment accessed by DR_B
2107      in the same amount iterations.
2108      Note segnment length is a linear function of number of iterations with
2109      DR_STEP as the coefficient.  */
2110   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2111   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2112 
2113   unsigned int i;
2114   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2115   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2116     {
2117       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2118       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2119       /* Two indices must be the same if they are not scev, or not scev wrto
2120 	 current loop being vecorized.  */
2121       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2122 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
2123 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
2124 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2125 	{
2126 	  if (operand_equal_p (access1, access2, 0))
2127 	    continue;
2128 
2129 	  return false;
2130 	}
2131       /* The two indices must have the same step.  */
2132       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2133 	return false;
2134 
2135       tree idx_step = CHREC_RIGHT (access1);
2136       /* Index must have const step, otherwise DR_STEP won't be constant.  */
2137       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2138       /* Index must evaluate in the same direction as DR.  */
2139       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2140 
2141       tree min1 = CHREC_LEFT (access1);
2142       tree min2 = CHREC_LEFT (access2);
2143       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2144 	return false;
2145 
2146       /* Ideally, alias can be checked against loop's control IV, but we
2147 	 need to prove linear mapping between control IV and reference
2148 	 index.  Although that should be true, we check against (array)
2149 	 index of data reference.  Like segment length, index length is
2150 	 linear function of the number of iterations with index_step as
2151 	 the coefficient, i.e, niter_len * idx_step.  */
2152       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2153 				   build_int_cst (TREE_TYPE (min1),
2154 						  niter_len1));
2155       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2156 				   build_int_cst (TREE_TYPE (min2),
2157 						  niter_len2));
2158       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2159       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2160       /* Adjust ranges for negative step.  */
2161       if (neg_step)
2162 	{
2163 	  min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2164 	  max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2165 			      CHREC_LEFT (access1), idx_step);
2166 	  min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2167 	  max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2168 			      CHREC_LEFT (access2), idx_step);
2169 	}
2170       tree part_cond_expr
2171 	= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2172 	    fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2173 	    fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2174       if (*cond_expr)
2175 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2176 				  *cond_expr, part_cond_expr);
2177       else
2178 	*cond_expr = part_cond_expr;
2179     }
2180   return true;
2181 }
2182 
2183 /* Given two data references and segment lengths described by DR_A and DR_B,
2184    create expression checking if the two addresses ranges intersect with
2185    each other:
2186 
2187      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2188      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
2189 
2190 static void
2191 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2192 			       const dr_with_seg_len& dr_a,
2193 			       const dr_with_seg_len& dr_b)
2194 {
2195   *cond_expr = NULL_TREE;
2196   if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2197     return;
2198 
2199   tree segment_length_a = dr_a.seg_len;
2200   tree segment_length_b = dr_b.seg_len;
2201   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2202   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2203   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2204 
2205   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2206 			  offset_a, DR_INIT (dr_a.dr));
2207   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2208 			  offset_b, DR_INIT (dr_b.dr));
2209   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2210   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2211 
2212   tree seg_a_min = addr_base_a;
2213   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2214   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2215      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2216      [a, a+12) */
2217   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2218     {
2219       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2220       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2221       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2222     }
2223 
2224   tree seg_b_min = addr_base_b;
2225   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2226   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2227     {
2228       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2229       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2230       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2231     }
2232   *cond_expr
2233     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2234 	fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2235 	fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2236 }
2237 
2238 /* Function vect_create_cond_for_alias_checks.
2239 
2240    Create a conditional expression that represents the run-time checks for
2241    overlapping of address ranges represented by a list of data references
2242    relations passed as input.
2243 
2244    Input:
2245    COND_EXPR  - input conditional expression.  New conditions will be chained
2246                 with logical AND operation.  If it is NULL, then the function
2247                 is used to return the number of alias checks.
2248    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2249 	        to be checked.
2250 
2251    Output:
2252    COND_EXPR - conditional expression.
2253 
2254    The returned COND_EXPR is the conditional expression to be used in the if
2255    statement that controls which version of the loop gets executed at runtime.
2256 */
2257 
2258 void
2259 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2260 {
2261   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2262     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2263   tree part_cond_expr;
2264 
2265   if (comp_alias_ddrs.is_empty ())
2266     return;
2267 
2268   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2269     {
2270       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2271       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2272 
2273       if (dump_enabled_p ())
2274 	{
2275 	  dump_printf_loc (MSG_NOTE, vect_location,
2276 			   "create runtime check for data references ");
2277 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2278 	  dump_printf (MSG_NOTE, " and ");
2279 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2280 	  dump_printf (MSG_NOTE, "\n");
2281 	}
2282 
2283       /* Create condition expression for each pair data references.  */
2284       create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2285       if (*cond_expr)
2286 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2287 				  *cond_expr, part_cond_expr);
2288       else
2289 	*cond_expr = part_cond_expr;
2290     }
2291 
2292   if (dump_enabled_p ())
2293     dump_printf_loc (MSG_NOTE, vect_location,
2294 		     "created %u versioning for alias checks.\n",
2295 		     comp_alias_ddrs.length ());
2296 }
2297 
2298 
2299 /* Function vect_loop_versioning.
2300 
2301    If the loop has data references that may or may not be aligned or/and
2302    has data reference relations whose independence was not proven then
2303    two versions of the loop need to be generated, one which is vectorized
2304    and one which isn't.  A test is then generated to control which of the
2305    loops is executed.  The test checks for the alignment of all of the
2306    data references that may or may not be aligned.  An additional
2307    sequence of runtime tests is generated for each pairs of DDRs whose
2308    independence was not proven.  The vectorized version of loop is
2309    executed only if both alias and alignment tests are passed.
2310 
2311    The test generated to check which version of loop is executed
2312    is modified to also check for profitability as indicated by the
2313    cost model threshold TH.
2314 
2315    The versioning precondition(s) are placed in *COND_EXPR and
2316    *COND_EXPR_STMT_LIST.  */
2317 
2318 void
2319 vect_loop_versioning (loop_vec_info loop_vinfo,
2320 		      unsigned int th, bool check_profitability)
2321 {
2322   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2323   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2324   basic_block condition_bb;
2325   gphi_iterator gsi;
2326   gimple_stmt_iterator cond_exp_gsi;
2327   basic_block merge_bb;
2328   basic_block new_exit_bb;
2329   edge new_exit_e, e;
2330   gphi *orig_phi, *new_phi;
2331   tree cond_expr = NULL_TREE;
2332   gimple_seq cond_expr_stmt_list = NULL;
2333   tree arg;
2334   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2335   gimple_seq gimplify_stmt_list = NULL;
2336   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2337   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2338   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2339   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2340 
2341   if (check_profitability)
2342     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2343 			     build_int_cst (TREE_TYPE (scalar_loop_iters),
2344 						       th));
2345 
2346   if (version_niter)
2347     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2348 
2349   if (cond_expr)
2350     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2351 					is_gimple_condexpr, NULL_TREE);
2352 
2353   if (version_align)
2354     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2355 				       &cond_expr_stmt_list);
2356 
2357   if (version_alias)
2358     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2359 
2360   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2361 				      is_gimple_condexpr, NULL_TREE);
2362   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2363 
2364   initialize_original_copy_tables ();
2365   if (scalar_loop)
2366     {
2367       edge scalar_e;
2368       basic_block preheader, scalar_preheader;
2369 
2370       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2371 	 scale LOOP's frequencies instead.  */
2372       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2373 			    prob, REG_BR_PROB_BASE - prob,
2374 			    REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2375       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2376       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2377 	 while we need to move it above LOOP's preheader.  */
2378       e = loop_preheader_edge (loop);
2379       scalar_e = loop_preheader_edge (scalar_loop);
2380       gcc_assert (empty_block_p (e->src)
2381 		  && single_pred_p (e->src));
2382       gcc_assert (empty_block_p (scalar_e->src)
2383 		  && single_pred_p (scalar_e->src));
2384       gcc_assert (single_pred_p (condition_bb));
2385       preheader = e->src;
2386       scalar_preheader = scalar_e->src;
2387       scalar_e = find_edge (condition_bb, scalar_preheader);
2388       e = single_pred_edge (preheader);
2389       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2390 				      scalar_preheader);
2391       redirect_edge_and_branch_force (scalar_e, preheader);
2392       redirect_edge_and_branch_force (e, condition_bb);
2393       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2394 			       single_pred (condition_bb));
2395       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2396 			       single_pred (scalar_preheader));
2397       set_immediate_dominator (CDI_DOMINATORS, preheader,
2398 			       condition_bb);
2399     }
2400   else
2401     nloop = loop_version (loop, cond_expr, &condition_bb,
2402 			  prob, REG_BR_PROB_BASE - prob,
2403 			  prob, REG_BR_PROB_BASE - prob, true);
2404 
2405   if (version_niter)
2406     {
2407       /* The versioned loop could be infinite, we need to clear existing
2408 	 niter information which is copied from the original loop.  */
2409       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2410       vect_free_loop_info_assumptions (nloop);
2411       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2412       loop_constraint_set (loop, LOOP_C_INFINITE);
2413     }
2414 
2415   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2416       && dump_enabled_p ())
2417     {
2418       if (version_alias)
2419         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2420                          "loop versioned for vectorization because of "
2421 			 "possible aliasing\n");
2422       if (version_align)
2423         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2424                          "loop versioned for vectorization to enhance "
2425 			 "alignment\n");
2426 
2427     }
2428   free_original_copy_tables ();
2429 
2430   /* Loop versioning violates an assumption we try to maintain during
2431      vectorization - that the loop exit block has a single predecessor.
2432      After versioning, the exit block of both loop versions is the same
2433      basic block (i.e. it has two predecessors). Just in order to simplify
2434      following transformations in the vectorizer, we fix this situation
2435      here by adding a new (empty) block on the exit-edge of the loop,
2436      with the proper loop-exit phis to maintain loop-closed-form.
2437      If loop versioning wasn't done from loop, but scalar_loop instead,
2438      merge_bb will have already just a single successor.  */
2439 
2440   merge_bb = single_exit (loop)->dest;
2441   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2442     {
2443       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2444       new_exit_bb = split_edge (single_exit (loop));
2445       new_exit_e = single_exit (loop);
2446       e = EDGE_SUCC (new_exit_bb, 0);
2447 
2448       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2449 	{
2450 	  tree new_res;
2451 	  orig_phi = gsi.phi ();
2452 	  new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2453 	  new_phi = create_phi_node (new_res, new_exit_bb);
2454 	  arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2455 	  add_phi_arg (new_phi, arg, new_exit_e,
2456 		       gimple_phi_arg_location_from_edge (orig_phi, e));
2457 	  adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2458 	}
2459     }
2460 
2461   /* End loop-exit-fixes after versioning.  */
2462 
2463   if (cond_expr_stmt_list)
2464     {
2465       cond_exp_gsi = gsi_last_bb (condition_bb);
2466       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2467 			     GSI_SAME_STMT);
2468     }
2469   update_ssa (TODO_update_ssa);
2470 }
2471