xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-loop-manip.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /* Vectorizer Specific Loop Manipulations
2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 
44 /*************************************************************************
45   Simple Loop Peeling Utilities
46 
47   Utilities to support loop peeling for vectorization purposes.
48  *************************************************************************/
49 
50 
51 /* Renames the use *OP_P.  */
52 
53 static void
54 rename_use_op (use_operand_p op_p)
55 {
56   tree new_name;
57 
58   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
59     return;
60 
61   new_name = get_current_def (USE_FROM_PTR (op_p));
62 
63   /* Something defined outside of the loop.  */
64   if (!new_name)
65     return;
66 
67   /* An ordinary ssa name defined in the loop.  */
68 
69   SET_USE (op_p, new_name);
70 }
71 
72 
73 /* Renames the variables in basic block BB.  Allow renaming  of PHI argumnets
74    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
75    true.  */
76 
77 static void
78 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
79 {
80   gimple *stmt;
81   use_operand_p use_p;
82   ssa_op_iter iter;
83   edge e;
84   edge_iterator ei;
85   struct loop *loop = bb->loop_father;
86   struct loop *outer_loop = NULL;
87 
88   if (rename_from_outer_loop)
89     {
90       gcc_assert (loop);
91       outer_loop = loop_outer (loop);
92     }
93 
94   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
95        gsi_next (&gsi))
96     {
97       stmt = gsi_stmt (gsi);
98       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
99 	rename_use_op (use_p);
100     }
101 
102   FOR_EACH_EDGE (e, ei, bb->preds)
103     {
104       if (!flow_bb_inside_loop_p (loop, e->src)
105 	  && (!rename_from_outer_loop || e->src != outer_loop->header))
106 	continue;
107       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
108 	   gsi_next (&gsi))
109         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
110     }
111 }
112 
113 
114 struct adjust_info
115 {
116   tree from, to;
117   basic_block bb;
118 };
119 
120 /* A stack of values to be adjusted in debug stmts.  We have to
121    process them LIFO, so that the closest substitution applies.  If we
122    processed them FIFO, without the stack, we might substitute uses
123    with a PHI DEF that would soon become non-dominant, and when we got
124    to the suitable one, it wouldn't have anything to substitute any
125    more.  */
126 static vec<adjust_info, va_heap> adjust_vec;
127 
128 /* Adjust any debug stmts that referenced AI->from values to use the
129    loop-closed AI->to, if the references are dominated by AI->bb and
130    not by the definition of AI->from.  */
131 
132 static void
133 adjust_debug_stmts_now (adjust_info *ai)
134 {
135   basic_block bbphi = ai->bb;
136   tree orig_def = ai->from;
137   tree new_def = ai->to;
138   imm_use_iterator imm_iter;
139   gimple *stmt;
140   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
141 
142   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
143 
144   /* Adjust any debug stmts that held onto non-loop-closed
145      references.  */
146   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
147     {
148       use_operand_p use_p;
149       basic_block bbuse;
150 
151       if (!is_gimple_debug (stmt))
152 	continue;
153 
154       gcc_assert (gimple_debug_bind_p (stmt));
155 
156       bbuse = gimple_bb (stmt);
157 
158       if ((bbuse == bbphi
159 	   || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
160 	  && !(bbuse == bbdef
161 	       || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
162 	{
163 	  if (new_def)
164 	    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
165 	      SET_USE (use_p, new_def);
166 	  else
167 	    {
168 	      gimple_debug_bind_reset_value (stmt);
169 	      update_stmt (stmt);
170 	    }
171 	}
172     }
173 }
174 
175 /* Adjust debug stmts as scheduled before.  */
176 
177 static void
178 adjust_vec_debug_stmts (void)
179 {
180   if (!MAY_HAVE_DEBUG_STMTS)
181     return;
182 
183   gcc_assert (adjust_vec.exists ());
184 
185   while (!adjust_vec.is_empty ())
186     {
187       adjust_debug_stmts_now (&adjust_vec.last ());
188       adjust_vec.pop ();
189     }
190 
191   adjust_vec.release ();
192 }
193 
194 /* Adjust any debug stmts that referenced FROM values to use the
195    loop-closed TO, if the references are dominated by BB and not by
196    the definition of FROM.  If adjust_vec is non-NULL, adjustments
197    will be postponed until adjust_vec_debug_stmts is called.  */
198 
199 static void
200 adjust_debug_stmts (tree from, tree to, basic_block bb)
201 {
202   adjust_info ai;
203 
204   if (MAY_HAVE_DEBUG_STMTS
205       && TREE_CODE (from) == SSA_NAME
206       && ! SSA_NAME_IS_DEFAULT_DEF (from)
207       && ! virtual_operand_p (from))
208     {
209       ai.from = from;
210       ai.to = to;
211       ai.bb = bb;
212 
213       if (adjust_vec.exists ())
214 	adjust_vec.safe_push (ai);
215       else
216 	adjust_debug_stmts_now (&ai);
217     }
218 }
219 
220 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
221    to adjust any debug stmts that referenced the old phi arg,
222    presumably non-loop-closed references left over from other
223    transformations.  */
224 
225 static void
226 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
227 {
228   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
229 
230   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
231 
232   if (MAY_HAVE_DEBUG_STMTS)
233     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
234 			gimple_bb (update_phi));
235 }
236 
237 
238 /* Update PHI nodes for a guard of the LOOP.
239 
240    Input:
241    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
242         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
243         originates from the guard-bb, skips LOOP and reaches the (unique) exit
244         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
245         We denote this bb NEW_MERGE_BB because before the guard code was added
246         it had a single predecessor (the LOOP header), and now it became a merge
247         point of two paths - the path that ends with the LOOP exit-edge, and
248         the path that ends with GUARD_EDGE.
249    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
250         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
251 
252    ===> The CFG before the guard-code was added:
253         LOOP_header_bb:
254           loop_body
255           if (exit_loop) goto update_bb
256           else           goto LOOP_header_bb
257         update_bb:
258 
259    ==> The CFG after the guard-code was added:
260         guard_bb:
261           if (LOOP_guard_condition) goto new_merge_bb
262           else                      goto LOOP_header_bb
263         LOOP_header_bb:
264           loop_body
265           if (exit_loop_condition) goto new_merge_bb
266           else                     goto LOOP_header_bb
267         new_merge_bb:
268           goto update_bb
269         update_bb:
270 
271    ==> The CFG after this function:
272         guard_bb:
273           if (LOOP_guard_condition) goto new_merge_bb
274           else                      goto LOOP_header_bb
275         LOOP_header_bb:
276           loop_body
277           if (exit_loop_condition) goto new_exit_bb
278           else                     goto LOOP_header_bb
279         new_exit_bb:
280         new_merge_bb:
281           goto update_bb
282         update_bb:
283 
284    This function:
285    1. creates and updates the relevant phi nodes to account for the new
286       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
287       1.1. Create phi nodes at NEW_MERGE_BB.
288       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
289            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
290    2. preserves loop-closed-ssa-form by creating the required phi nodes
291       at the exit of LOOP (i.e, in NEW_EXIT_BB).
292 
293    There are two flavors to this function:
294 
295    slpeel_update_phi_nodes_for_guard1:
296      Here the guard controls whether we enter or skip LOOP, where LOOP is a
297      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
298      for variables that have phis in the loop header.
299 
300    slpeel_update_phi_nodes_for_guard2:
301      Here the guard controls whether we enter or skip LOOP, where LOOP is an
302      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
303      for variables that have phis in the loop exit.
304 
305    I.E., the overall structure is:
306 
307         loop1_preheader_bb:
308                 guard1 (goto loop1/merge1_bb)
309         loop1
310         loop1_exit_bb:
311                 guard2 (goto merge1_bb/merge2_bb)
312         merge1_bb
313         loop2
314         loop2_exit_bb
315         merge2_bb
316         next_bb
317 
318    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
319    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
320    that have phis in loop1->header).
321 
322    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
323    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
324    that have phis in next_bb). It also adds some of these phis to
325    loop1_exit_bb.
326 
327    slpeel_update_phi_nodes_for_guard1 is always called before
328    slpeel_update_phi_nodes_for_guard2. They are both needed in order
329    to create correct data-flow and loop-closed-ssa-form.
330 
331    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
332    that change between iterations of a loop (and therefore have a phi-node
333    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
334    phis for variables that are used out of the loop (and therefore have
335    loop-closed exit phis). Some variables may be both updated between
336    iterations and used after the loop. This is why in loop1_exit_bb we
337    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
338    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
339 
340    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
341      an original loop. i.e., we have:
342 
343            orig_loop
344            guard_bb (goto LOOP/new_merge)
345            new_loop <-- LOOP
346            new_exit
347            new_merge
348            next_bb
349 
350      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
351      have:
352 
353            new_loop
354            guard_bb (goto LOOP/new_merge)
355            orig_loop <-- LOOP
356            new_exit
357            new_merge
358            next_bb
359 
360      The SSA names defined in the original loop have a current
361      reaching definition that records the corresponding new ssa-name
362      used in the new duplicated loop copy.
363   */
364 
365 /* Function slpeel_update_phi_nodes_for_guard1
366 
367    Input:
368    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
369    - DEFS - a bitmap of ssa names to mark new names for which we recorded
370             information.
371 
372    In the context of the overall structure, we have:
373 
374         loop1_preheader_bb:
375                 guard1 (goto loop1/merge1_bb)
376 LOOP->  loop1
377         loop1_exit_bb:
378                 guard2 (goto merge1_bb/merge2_bb)
379         merge1_bb
380         loop2
381         loop2_exit_bb
382         merge2_bb
383         next_bb
384 
385    For each name updated between loop iterations (i.e - for each name that has
386    an entry (loop-header) phi in LOOP) we create a new phi in:
387    1. merge1_bb (to account for the edge from guard1)
388    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
389 */
390 
391 static void
392 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
393                                     bool is_new_loop, basic_block *new_exit_bb)
394 {
395   gphi *orig_phi, *new_phi;
396   gphi *update_phi, *update_phi2;
397   tree guard_arg, loop_arg;
398   basic_block new_merge_bb = guard_edge->dest;
399   edge e = EDGE_SUCC (new_merge_bb, 0);
400   basic_block update_bb = e->dest;
401   basic_block orig_bb = loop->header;
402   edge new_exit_e;
403   tree current_new_name;
404   gphi_iterator gsi_orig, gsi_update;
405 
406   /* Create new bb between loop and new_merge_bb.  */
407   *new_exit_bb = split_edge (single_exit (loop));
408 
409   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
410 
411   for (gsi_orig = gsi_start_phis (orig_bb),
412        gsi_update = gsi_start_phis (update_bb);
413        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
414        gsi_next (&gsi_orig), gsi_next (&gsi_update))
415     {
416       source_location loop_locus, guard_locus;
417       tree new_res;
418       orig_phi = gsi_orig.phi ();
419       update_phi = gsi_update.phi ();
420 
421       /** 1. Handle new-merge-point phis  **/
422 
423       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
424       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
425       new_phi = create_phi_node (new_res, new_merge_bb);
426 
427       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
428             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
429       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
430       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
431 						      EDGE_SUCC (loop->latch,
432 								 0));
433       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
434       guard_locus
435 	= gimple_phi_arg_location_from_edge (orig_phi,
436 					     loop_preheader_edge (loop));
437 
438       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
439       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
440 
441       /* 1.3. Update phi in successor block.  */
442       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
443                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
444       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
445       update_phi2 = new_phi;
446 
447 
448       /** 2. Handle loop-closed-ssa-form phis  **/
449 
450       if (virtual_operand_p (PHI_RESULT (orig_phi)))
451 	continue;
452 
453       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
454       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
455       new_phi = create_phi_node (new_res, *new_exit_bb);
456 
457       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
458       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
459 
460       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
461       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
462       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
463 				  PHI_RESULT (new_phi));
464 
465       /* 2.4. Record the newly created name with set_current_def.
466          We want to find a name such that
467                 name = get_current_def (orig_loop_name)
468          and to set its current definition as follows:
469                 set_current_def (name, new_phi_name)
470 
471          If LOOP is a new loop then loop_arg is already the name we're
472          looking for. If LOOP is the original loop, then loop_arg is
473          the orig_loop_name and the relevant name is recorded in its
474          current reaching definition.  */
475       if (is_new_loop)
476         current_new_name = loop_arg;
477       else
478         {
479           current_new_name = get_current_def (loop_arg);
480 	  /* current_def is not available only if the variable does not
481 	     change inside the loop, in which case we also don't care
482 	     about recording a current_def for it because we won't be
483 	     trying to create loop-exit-phis for it.  */
484 	  if (!current_new_name)
485 	    continue;
486         }
487       tree new_name = get_current_def (current_new_name);
488       /* Because of peeled_chrec optimization it is possible that we have
489 	 set this earlier.  Verify the PHI has the same value.  */
490       if (new_name)
491 	{
492 	  gimple *phi = SSA_NAME_DEF_STMT (new_name);
493 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI
494 		      && gimple_bb (phi) == *new_exit_bb
495 		      && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
496 			  == loop_arg));
497 	  continue;
498 	}
499 
500       set_current_def (current_new_name, PHI_RESULT (new_phi));
501     }
502 }
503 
504 
505 /* Function slpeel_update_phi_nodes_for_guard2
506 
507    Input:
508    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
509 
510    In the context of the overall structure, we have:
511 
512         loop1_preheader_bb:
513                 guard1 (goto loop1/merge1_bb)
514         loop1
515         loop1_exit_bb:
516                 guard2 (goto merge1_bb/merge2_bb)
517         merge1_bb
518 LOOP->  loop2
519         loop2_exit_bb
520         merge2_bb
521         next_bb
522 
523    For each name used out side the loop (i.e - for each name that has an exit
524    phi in next_bb) we create a new phi in:
525    1. merge2_bb (to account for the edge from guard_bb)
526    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
527    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
528       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
529 */
530 
531 static void
532 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
533                                     bool is_new_loop, basic_block *new_exit_bb)
534 {
535   gphi *orig_phi, *new_phi;
536   gphi *update_phi, *update_phi2;
537   tree guard_arg, loop_arg;
538   basic_block new_merge_bb = guard_edge->dest;
539   edge e = EDGE_SUCC (new_merge_bb, 0);
540   basic_block update_bb = e->dest;
541   edge new_exit_e;
542   tree orig_def, orig_def_new_name;
543   tree new_name, new_name2;
544   tree arg;
545   gphi_iterator gsi;
546 
547   /* Create new bb between loop and new_merge_bb.  */
548   *new_exit_bb = split_edge (single_exit (loop));
549 
550   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
551 
552   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
553     {
554       tree new_res;
555       update_phi = gsi.phi ();
556       orig_phi = update_phi;
557       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
558       /* This loop-closed-phi actually doesn't represent a use
559          out of the loop - the phi arg is a constant.  */
560       if (TREE_CODE (orig_def) != SSA_NAME)
561         continue;
562       orig_def_new_name = get_current_def (orig_def);
563       arg = NULL_TREE;
564 
565       /** 1. Handle new-merge-point phis  **/
566 
567       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
568       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
569       new_phi = create_phi_node (new_res, new_merge_bb);
570 
571       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
572             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
573       new_name = orig_def;
574       new_name2 = NULL_TREE;
575       if (orig_def_new_name)
576         {
577           new_name = orig_def_new_name;
578 	  /* Some variables have both loop-entry-phis and loop-exit-phis.
579 	     Such variables were given yet newer names by phis placed in
580 	     guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
581 	     new_name2 = get_current_def (get_current_def (orig_name)).  */
582           new_name2 = get_current_def (new_name);
583         }
584 
585       if (is_new_loop)
586         {
587           guard_arg = orig_def;
588           loop_arg = new_name;
589         }
590       else
591         {
592           guard_arg = new_name;
593           loop_arg = orig_def;
594         }
595       if (new_name2)
596         guard_arg = new_name2;
597 
598       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
599       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
600 
601       /* 1.3. Update phi in successor block.  */
602       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
603       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
604       update_phi2 = new_phi;
605 
606 
607       /** 2. Handle loop-closed-ssa-form phis  **/
608 
609       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
610       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
611       new_phi = create_phi_node (new_res, *new_exit_bb);
612 
613       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
614       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
615 
616       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
617       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
618       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
619 				  PHI_RESULT (new_phi));
620 
621 
622       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
623 
624       /* 3.1. Find the relevant names that need an exit-phi in
625 	 GUARD_BB, i.e. names for which
626 	 slpeel_update_phi_nodes_for_guard1 had not already created a
627 	 phi node. This is the case for names that are used outside
628 	 the loop (and therefore need an exit phi) but are not updated
629 	 across loop iterations (and therefore don't have a
630 	 loop-header-phi).
631 
632 	 slpeel_update_phi_nodes_for_guard1 is responsible for
633 	 creating loop-exit phis in GUARD_BB for names that have a
634 	 loop-header-phi.  When such a phi is created we also record
635 	 the new name in its current definition.  If this new name
636 	 exists, then guard_arg was set to this new name (see 1.2
637 	 above).  Therefore, if guard_arg is not this new name, this
638 	 is an indication that an exit-phi in GUARD_BB was not yet
639 	 created, so we take care of it here.  */
640       if (guard_arg == new_name2)
641 	continue;
642       arg = guard_arg;
643 
644       /* 3.2. Generate new phi node in GUARD_BB:  */
645       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
646       new_phi = create_phi_node (new_res, guard_edge->src);
647 
648       /* 3.3. GUARD_BB has one incoming edge:  */
649       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
650       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
651 		   UNKNOWN_LOCATION);
652 
653       /* 3.4. Update phi in successor of GUARD_BB:  */
654       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
655                                                                 == guard_arg);
656       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
657 				  PHI_RESULT (new_phi));
658     }
659 }
660 
661 
662 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
663    that starts at zero, increases by one and its limit is NITERS.
664 
665    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
666 
667 void
668 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
669 {
670   tree indx_before_incr, indx_after_incr;
671   gcond *cond_stmt;
672   gcond *orig_cond;
673   edge exit_edge = single_exit (loop);
674   gimple_stmt_iterator loop_cond_gsi;
675   gimple_stmt_iterator incr_gsi;
676   bool insert_after;
677   tree init = build_int_cst (TREE_TYPE (niters), 0);
678   tree step = build_int_cst (TREE_TYPE (niters), 1);
679   source_location loop_loc;
680   enum tree_code code;
681 
682   orig_cond = get_loop_exit_condition (loop);
683   gcc_assert (orig_cond);
684   loop_cond_gsi = gsi_for_stmt (orig_cond);
685 
686   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
687   create_iv (init, step, NULL_TREE, loop,
688              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
689 
690   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
691 					      true, NULL_TREE, true,
692 					      GSI_SAME_STMT);
693   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
694 				     true, GSI_SAME_STMT);
695 
696   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
697   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
698 				 NULL_TREE);
699 
700   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
701 
702   /* Remove old loop exit test:  */
703   gsi_remove (&loop_cond_gsi, true);
704   free_stmt_vec_info (orig_cond);
705 
706   loop_loc = find_loop_location (loop);
707   if (dump_enabled_p ())
708     {
709       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
710 	dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
711 		     LOCATION_LINE (loop_loc));
712       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
713       dump_printf (MSG_NOTE, "\n");
714     }
715   loop->nb_iterations = niters;
716 }
717 
718 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
719    For all PHI arguments in FROM->dest and TO->dest from those
720    edges ensure that TO->dest PHI arguments have current_def
721    to that in from.  */
722 
723 static void
724 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
725 {
726   gimple_stmt_iterator gsi_from, gsi_to;
727 
728   for (gsi_from = gsi_start_phis (from->dest),
729        gsi_to = gsi_start_phis (to->dest);
730        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
731     {
732       gimple *from_phi = gsi_stmt (gsi_from);
733       gimple *to_phi = gsi_stmt (gsi_to);
734       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
735       if (TREE_CODE (from_arg) != SSA_NAME)
736 	{
737 	  gsi_next (&gsi_from);
738 	  continue;
739 	}
740       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
741       if (TREE_CODE (to_arg) != SSA_NAME)
742 	{
743 	  gsi_next (&gsi_to);
744 	  continue;
745 	}
746       if (get_current_def (to_arg) == NULL_TREE)
747 	set_current_def (to_arg, get_current_def (from_arg));
748       gsi_next (&gsi_from);
749       gsi_next (&gsi_to);
750     }
751 }
752 
753 
754 /* Given LOOP this function generates a new copy of it and puts it
755    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
756    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
757    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
758    entry or exit of LOOP.  */
759 
760 struct loop *
761 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
762 					struct loop *scalar_loop, edge e)
763 {
764   struct loop *new_loop;
765   basic_block *new_bbs, *bbs;
766   bool at_exit;
767   bool was_imm_dom;
768   basic_block exit_dest;
769   edge exit, new_exit;
770   bool duplicate_outer_loop = false;
771 
772   exit = single_exit (loop);
773   at_exit = (e == exit);
774   if (!at_exit && e != loop_preheader_edge (loop))
775     return NULL;
776 
777   if (scalar_loop == NULL)
778     scalar_loop = loop;
779 
780   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
781   get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
782   /* Allow duplication of outer loops.  */
783   if (scalar_loop->inner)
784     duplicate_outer_loop = true;
785   /* Check whether duplication is possible.  */
786   if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
787     {
788       free (bbs);
789       return NULL;
790     }
791 
792   /* Generate new loop structure.  */
793   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
794   duplicate_subloops (scalar_loop, new_loop);
795 
796   exit_dest = exit->dest;
797   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
798 					  exit_dest) == loop->header ?
799 		 true : false);
800 
801   /* Also copy the pre-header, this avoids jumping through hoops to
802      duplicate the loop entry PHI arguments.  Create an empty
803      pre-header unconditionally for this.  */
804   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
805   edge entry_e = single_pred_edge (preheader);
806   bbs[scalar_loop->num_nodes] = preheader;
807   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
808 
809   exit = single_exit (scalar_loop);
810   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
811 	    &exit, 1, &new_exit, NULL,
812 	    e->src, true);
813   exit = single_exit (loop);
814   basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
815 
816   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
817 
818   if (scalar_loop != loop)
819     {
820       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
821 	 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
822 	 but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
823 	 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
824 	 header) to have current_def set, so copy them over.  */
825       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
826 						exit);
827       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
828 							   0),
829 						EDGE_SUCC (loop->latch, 0));
830     }
831 
832   if (at_exit) /* Add the loop copy at exit.  */
833     {
834       if (scalar_loop != loop)
835 	{
836 	  gphi_iterator gsi;
837 	  new_exit = redirect_edge_and_branch (new_exit, exit_dest);
838 
839 	  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
840 	       gsi_next (&gsi))
841 	    {
842 	      gphi *phi = gsi.phi ();
843 	      tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
844 	      location_t orig_locus
845 		= gimple_phi_arg_location_from_edge (phi, e);
846 
847 	      add_phi_arg (phi, orig_arg, new_exit, orig_locus);
848 	    }
849 	}
850       redirect_edge_and_branch_force (e, new_preheader);
851       flush_pending_stmts (e);
852       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
853       if (was_imm_dom || duplicate_outer_loop)
854 	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
855 
856       /* And remove the non-necessary forwarder again.  Keep the other
857          one so we have a proper pre-header for the loop at the exit edge.  */
858       redirect_edge_pred (single_succ_edge (preheader),
859 			  single_pred (preheader));
860       delete_basic_block (preheader);
861       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
862 			       loop_preheader_edge (scalar_loop)->src);
863     }
864   else /* Add the copy at entry.  */
865     {
866       if (scalar_loop != loop)
867 	{
868 	  /* Remove the non-necessary forwarder of scalar_loop again.  */
869 	  redirect_edge_pred (single_succ_edge (preheader),
870 			      single_pred (preheader));
871 	  delete_basic_block (preheader);
872 	  set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
873 				   loop_preheader_edge (scalar_loop)->src);
874 	  preheader = split_edge (loop_preheader_edge (loop));
875 	  entry_e = single_pred_edge (preheader);
876 	}
877 
878       redirect_edge_and_branch_force (entry_e, new_preheader);
879       flush_pending_stmts (entry_e);
880       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
881 
882       redirect_edge_and_branch_force (new_exit, preheader);
883       flush_pending_stmts (new_exit);
884       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
885 
886       /* And remove the non-necessary forwarder again.  Keep the other
887          one so we have a proper pre-header for the loop at the exit edge.  */
888       redirect_edge_pred (single_succ_edge (new_preheader),
889 			  single_pred (new_preheader));
890       delete_basic_block (new_preheader);
891       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
892 			       loop_preheader_edge (new_loop)->src);
893     }
894 
895   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
896     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
897 
898   if (scalar_loop != loop)
899     {
900       /* Update new_loop->header PHIs, so that on the preheader
901 	 edge they are the ones from loop rather than scalar_loop.  */
902       gphi_iterator gsi_orig, gsi_new;
903       edge orig_e = loop_preheader_edge (loop);
904       edge new_e = loop_preheader_edge (new_loop);
905 
906       for (gsi_orig = gsi_start_phis (loop->header),
907 	   gsi_new = gsi_start_phis (new_loop->header);
908 	   !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
909 	   gsi_next (&gsi_orig), gsi_next (&gsi_new))
910 	{
911 	  gphi *orig_phi = gsi_orig.phi ();
912 	  gphi *new_phi = gsi_new.phi ();
913 	  tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
914 	  location_t orig_locus
915 	    = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
916 
917 	  add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
918 	}
919     }
920 
921   free (new_bbs);
922   free (bbs);
923 
924   checking_verify_dominators (CDI_DOMINATORS);
925 
926   return new_loop;
927 }
928 
929 
930 /* Given the condition statement COND, put it as the last statement
931    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
932    Assumes that this is the single exit of the guarded loop.
933    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
934 
935 static edge
936 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
937 		       gimple_seq cond_expr_stmt_list,
938 		       basic_block exit_bb, basic_block dom_bb,
939 		       int probability)
940 {
941   gimple_stmt_iterator gsi;
942   edge new_e, enter_e;
943   gcond *cond_stmt;
944   gimple_seq gimplify_stmt_list = NULL;
945 
946   enter_e = EDGE_SUCC (guard_bb, 0);
947   enter_e->flags &= ~EDGE_FALLTHRU;
948   enter_e->flags |= EDGE_FALSE_VALUE;
949   gsi = gsi_last_bb (guard_bb);
950 
951   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
952 				 NULL_TREE);
953   if (gimplify_stmt_list)
954     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
955   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
956   if (cond_expr_stmt_list)
957     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
958 
959   gsi = gsi_last_bb (guard_bb);
960   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
961 
962   /* Add new edge to connect guard block to the merge/loop-exit block.  */
963   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
964 
965   new_e->count = guard_bb->count;
966   new_e->probability = probability;
967   new_e->count = apply_probability (enter_e->count, probability);
968   enter_e->count -= new_e->count;
969   enter_e->probability = inverse_probability (probability);
970   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
971   return new_e;
972 }
973 
974 
975 /* This function verifies that the following restrictions apply to LOOP:
976    (1) it consists of exactly 2 basic blocks - header, and an empty latch
977        for innermost loop and 5 basic blocks for outer-loop.
978    (2) it is single entry, single exit
979    (3) its exit condition is the last stmt in the header
980    (4) E is the entry/exit edge of LOOP.
981  */
982 
983 bool
984 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
985 {
986   edge exit_e = single_exit (loop);
987   edge entry_e = loop_preheader_edge (loop);
988   gcond *orig_cond = get_loop_exit_condition (loop);
989   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
990   unsigned int num_bb = loop->inner? 5 : 2;
991 
992       /* All loops have an outer scope; the only case loop->outer is NULL is for
993          the function itself.  */
994       if (!loop_outer (loop)
995       || loop->num_nodes != num_bb
996       || !empty_block_p (loop->latch)
997       || !single_exit (loop)
998       /* Verify that new loop exit condition can be trivially modified.  */
999       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1000       || (e != exit_e && e != entry_e))
1001     return false;
1002 
1003   return true;
1004 }
1005 
1006 static void
1007 slpeel_checking_verify_cfg_after_peeling (struct loop *first_loop,
1008 					  struct loop *second_loop)
1009 {
1010   if (!flag_checking)
1011     return;
1012 
1013   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1014   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1015   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1016 
1017   /* A guard that controls whether the second_loop is to be executed or skipped
1018      is placed in first_loop->exit.  first_loop->exit therefore has two
1019      successors - one is the preheader of second_loop, and the other is a bb
1020      after second_loop.
1021    */
1022   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1023 
1024   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1025         of second_loop.  */
1026 
1027   /* The preheader of new_loop is expected to have two predecessors:
1028      first_loop->exit and the block that precedes first_loop.  */
1029 
1030   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1031               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1032                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1033                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1034                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1035 
1036   /* Verify that the other successor of first_loop->exit is after the
1037      second_loop.  */
1038   /* TODO */
1039 }
1040 
1041 /* If the run time cost model check determines that vectorization is
1042    not profitable and hence scalar loop should be generated then set
1043    FIRST_NITERS to prologue peeled iterations. This will allow all the
1044    iterations to be executed in the prologue peeled scalar loop.  */
1045 
1046 static void
1047 set_prologue_iterations (basic_block bb_before_first_loop,
1048 			 tree *first_niters,
1049 			 struct loop *loop,
1050 			 unsigned int th,
1051 			 int probability)
1052 {
1053   edge e;
1054   basic_block cond_bb, then_bb;
1055   tree var, prologue_after_cost_adjust_name;
1056   gimple_stmt_iterator gsi;
1057   gphi *newphi;
1058   edge e_true, e_false, e_fallthru;
1059   gcond *cond_stmt;
1060   gimple_seq stmts = NULL;
1061   tree cost_pre_condition = NULL_TREE;
1062   tree scalar_loop_iters =
1063     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1064 
1065   e = single_pred_edge (bb_before_first_loop);
1066   cond_bb = split_edge (e);
1067 
1068   e = single_pred_edge (bb_before_first_loop);
1069   then_bb = split_edge (e);
1070   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1071 
1072   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1073 				   EDGE_FALSE_VALUE);
1074   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1075 
1076   e_true = EDGE_PRED (then_bb, 0);
1077   e_true->flags &= ~EDGE_FALLTHRU;
1078   e_true->flags |= EDGE_TRUE_VALUE;
1079 
1080   e_true->probability = probability;
1081   e_false->probability = inverse_probability (probability);
1082   e_true->count = apply_probability (cond_bb->count, probability);
1083   e_false->count = cond_bb->count - e_true->count;
1084   then_bb->frequency = EDGE_FREQUENCY (e_true);
1085   then_bb->count = e_true->count;
1086 
1087   e_fallthru = EDGE_SUCC (then_bb, 0);
1088   e_fallthru->count = then_bb->count;
1089 
1090   gsi = gsi_last_bb (cond_bb);
1091   cost_pre_condition =
1092     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1093 	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1094   cost_pre_condition =
1095     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1096 				NULL_TREE, false, GSI_CONTINUE_LINKING);
1097   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1098 					   NULL_TREE, NULL_TREE);
1099   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1100 
1101   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1102 			"prologue_after_cost_adjust");
1103   prologue_after_cost_adjust_name =
1104     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1105 
1106   gsi = gsi_last_bb (then_bb);
1107   if (stmts)
1108     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1109 
1110   newphi = create_phi_node (var, bb_before_first_loop);
1111   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1112 	       UNKNOWN_LOCATION);
1113   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1114 
1115   *first_niters = PHI_RESULT (newphi);
1116 }
1117 
1118 /* Function slpeel_tree_peel_loop_to_edge.
1119 
1120    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1121    that is placed on the entry (exit) edge E of LOOP. After this transformation
1122    we have two loops one after the other - first-loop iterates FIRST_NITERS
1123    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1124    If the cost model indicates that it is profitable to emit a scalar
1125    loop instead of the vector one, then the prolog (epilog) loop will iterate
1126    for the entire unchanged scalar iterations of the loop.
1127 
1128    Input:
1129    - LOOP: the loop to be peeled.
1130    - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1131 	should be copied.
1132    - E: the exit or entry edge of LOOP.
1133         If it is the entry edge, we peel the first iterations of LOOP. In this
1134         case first-loop is LOOP, and second-loop is the newly created loop.
1135         If it is the exit edge, we peel the last iterations of LOOP. In this
1136         case, first-loop is the newly created loop, and second-loop is LOOP.
1137    - NITERS: the number of iterations that LOOP iterates.
1138    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1139    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1140         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1141         is false, the caller of this function may want to take care of this
1142         (this can be useful if we don't want new stmts added to first-loop).
1143    - TH: cost model profitability threshold of iterations for vectorization.
1144    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1145                           during versioning and hence needs to occur during
1146 			  prologue generation or whether cost model check
1147 			  has not occurred during prologue generation and hence
1148 			  needs to occur during epilogue generation.
1149    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1150    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1151 
1152 
1153    Output:
1154    The function returns a pointer to the new loop-copy, or NULL if it failed
1155    to perform the transformation.
1156 
1157    The function generates two if-then-else guards: one before the first loop,
1158    and the other before the second loop:
1159    The first guard is:
1160      if (FIRST_NITERS == 0) then skip the first loop,
1161      and go directly to the second loop.
1162    The second guard is:
1163      if (FIRST_NITERS == NITERS) then skip the second loop.
1164 
1165    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1166    then the generated condition is combined with COND_EXPR and the
1167    statements in COND_EXPR_STMT_LIST are emitted together with it.
1168 
1169    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1170    FORNOW the resulting code will not be in loop-closed-ssa form.
1171 */
1172 
1173 static struct loop *
1174 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1175 			       edge e, tree *first_niters,
1176 			       tree niters, bool update_first_loop_count,
1177 			       unsigned int th, bool check_profitability,
1178 			       tree cond_expr, gimple_seq cond_expr_stmt_list,
1179 			       int bound1, int bound2)
1180 {
1181   struct loop *new_loop = NULL, *first_loop, *second_loop;
1182   edge skip_e;
1183   tree pre_condition = NULL_TREE;
1184   basic_block bb_before_second_loop, bb_after_second_loop;
1185   basic_block bb_before_first_loop;
1186   basic_block bb_between_loops;
1187   basic_block new_exit_bb;
1188   gphi_iterator gsi;
1189   edge exit_e = single_exit (loop);
1190   source_location loop_loc;
1191   /* There are many aspects to how likely the first loop is going to be executed.
1192      Without histogram we can't really do good job.  Simply set it to
1193      2/3, so the first loop is not reordered to the end of function and
1194      the hot path through stays short.  */
1195   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1196   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1197   int probability_of_second_loop;
1198 
1199   if (!slpeel_can_duplicate_loop_p (loop, e))
1200     return NULL;
1201 
1202   /* We might have a queued need to update virtual SSA form.  As we
1203      delete the update SSA machinery below after doing a regular
1204      incremental SSA update during loop copying make sure we don't
1205      lose that fact.
1206      ???  Needing to update virtual SSA form by renaming is unfortunate
1207      but not all of the vectorizer code inserting new loads / stores
1208      properly assigns virtual operands to those statements.  */
1209   update_ssa (TODO_update_ssa_only_virtuals);
1210 
1211   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1212      in the exit bb and rename all the uses after the loop.  This simplifies
1213      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1214      (but normally loop closed SSA form doesn't require virtual PHIs to be
1215      in the same form).  Doing this early simplifies the checking what
1216      uses should be renamed.  */
1217   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1218     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1219       {
1220 	gphi *phi = gsi.phi ();
1221 	for (gsi = gsi_start_phis (exit_e->dest);
1222 	     !gsi_end_p (gsi); gsi_next (&gsi))
1223 	  if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1224 	    break;
1225 	if (gsi_end_p (gsi))
1226 	  {
1227 	    tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1228 	    gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
1229 	    tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1230 	    imm_use_iterator imm_iter;
1231 	    gimple *stmt;
1232 	    use_operand_p use_p;
1233 
1234 	    SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
1235 	      = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
1236 	    add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1237 	    gimple_phi_set_result (new_phi, new_vop);
1238 	    FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1239 	      if (stmt != new_phi
1240 		  && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1241 		FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1242 		  SET_USE (use_p, new_vop);
1243 	  }
1244 	break;
1245       }
1246 
1247   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1248         Resulting CFG would be:
1249 
1250         first_loop:
1251         do {
1252         } while ...
1253 
1254         second_loop:
1255         do {
1256         } while ...
1257 
1258         orig_exit_bb:
1259    */
1260 
1261   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1262 							   e)))
1263     {
1264       loop_loc = find_loop_location (loop);
1265       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1266                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1267       return NULL;
1268     }
1269 
1270   if (MAY_HAVE_DEBUG_STMTS)
1271     {
1272       gcc_assert (!adjust_vec.exists ());
1273       adjust_vec.create (32);
1274     }
1275 
1276   if (e == exit_e)
1277     {
1278       /* NEW_LOOP was placed after LOOP.  */
1279       first_loop = loop;
1280       second_loop = new_loop;
1281     }
1282   else
1283     {
1284       /* NEW_LOOP was placed before LOOP.  */
1285       first_loop = new_loop;
1286       second_loop = loop;
1287     }
1288 
1289   /* 2.  Add the guard code in one of the following ways:
1290 
1291      2.a Add the guard that controls whether the first loop is executed.
1292          This occurs when this function is invoked for prologue or epilogue
1293 	 generation and when the cost model check can be done at compile time.
1294 
1295          Resulting CFG would be:
1296 
1297          bb_before_first_loop:
1298          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1299                                 GOTO first-loop
1300 
1301          first_loop:
1302          do {
1303          } while ...
1304 
1305          bb_before_second_loop:
1306 
1307          second_loop:
1308          do {
1309          } while ...
1310 
1311          orig_exit_bb:
1312 
1313      2.b Add the cost model check that allows the prologue
1314          to iterate for the entire unchanged scalar
1315          iterations of the loop in the event that the cost
1316          model indicates that the scalar loop is more
1317          profitable than the vector one. This occurs when
1318 	 this function is invoked for prologue generation
1319 	 and the cost model check needs to be done at run
1320 	 time.
1321 
1322          Resulting CFG after prologue peeling would be:
1323 
1324          if (scalar_loop_iterations <= th)
1325            FIRST_NITERS = scalar_loop_iterations
1326 
1327          bb_before_first_loop:
1328          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1329                                 GOTO first-loop
1330 
1331          first_loop:
1332          do {
1333          } while ...
1334 
1335          bb_before_second_loop:
1336 
1337          second_loop:
1338          do {
1339          } while ...
1340 
1341          orig_exit_bb:
1342 
1343      2.c Add the cost model check that allows the epilogue
1344          to iterate for the entire unchanged scalar
1345          iterations of the loop in the event that the cost
1346          model indicates that the scalar loop is more
1347          profitable than the vector one. This occurs when
1348 	 this function is invoked for epilogue generation
1349 	 and the cost model check needs to be done at run
1350 	 time.  This check is combined with any pre-existing
1351 	 check in COND_EXPR to avoid versioning.
1352 
1353          Resulting CFG after prologue peeling would be:
1354 
1355          bb_before_first_loop:
1356          if ((scalar_loop_iterations <= th)
1357              ||
1358              FIRST_NITERS == 0) GOTO bb_before_second_loop
1359                                 GOTO first-loop
1360 
1361          first_loop:
1362          do {
1363          } while ...
1364 
1365          bb_before_second_loop:
1366 
1367          second_loop:
1368          do {
1369          } while ...
1370 
1371          orig_exit_bb:
1372   */
1373 
1374   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1375   /* Loop copying insterted a forwarder block for us here.  */
1376   bb_before_second_loop = single_exit (first_loop)->dest;
1377 
1378   probability_of_second_loop = (inverse_probability (first_guard_probability)
1379 			        + combine_probabilities (second_guard_probability,
1380                                                          first_guard_probability));
1381   /* Theoretically preheader edge of first loop and exit edge should have
1382      same frequencies.  Loop exit probablities are however easy to get wrong.
1383      It is safer to copy value from original loop entry.  */
1384   bb_before_second_loop->frequency
1385      = combine_probabilities (bb_before_first_loop->frequency,
1386                               probability_of_second_loop);
1387   bb_before_second_loop->count
1388      = apply_probability (bb_before_first_loop->count,
1389 			  probability_of_second_loop);
1390   single_succ_edge (bb_before_second_loop)->count
1391      = bb_before_second_loop->count;
1392 
1393   /* Epilogue peeling.  */
1394   if (!update_first_loop_count)
1395     {
1396       loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1397       tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1398       unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1399       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1400 	limit = limit + 1;
1401       if (check_profitability
1402 	  && th > limit)
1403 	limit = th;
1404       pre_condition =
1405 	fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1406 		     build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1407       if (cond_expr)
1408 	{
1409 	  pre_condition =
1410 	    fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1411 			 pre_condition,
1412 			 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1413 				      cond_expr));
1414 	}
1415     }
1416 
1417   /* Prologue peeling.  */
1418   else
1419     {
1420       if (check_profitability)
1421 	set_prologue_iterations (bb_before_first_loop, first_niters,
1422 				 loop, th, first_guard_probability);
1423 
1424       pre_condition =
1425 	fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1426 		     build_int_cst (TREE_TYPE (*first_niters), 0));
1427     }
1428 
1429   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1430 				  cond_expr_stmt_list,
1431                                   bb_before_second_loop, bb_before_first_loop,
1432 				  inverse_probability (first_guard_probability));
1433   scale_loop_profile (first_loop, first_guard_probability,
1434 		      check_profitability && (int)th > bound1 ? th : bound1);
1435   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1436 				      first_loop == new_loop,
1437 				      &new_exit_bb);
1438 
1439 
1440   /* 3. Add the guard that controls whether the second loop is executed.
1441         Resulting CFG would be:
1442 
1443         bb_before_first_loop:
1444         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1445                                GOTO first-loop
1446 
1447         first_loop:
1448         do {
1449         } while ...
1450 
1451         bb_between_loops:
1452         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1453                                     GOTO bb_before_second_loop
1454 
1455         bb_before_second_loop:
1456 
1457         second_loop:
1458         do {
1459         } while ...
1460 
1461         bb_after_second_loop:
1462 
1463         orig_exit_bb:
1464    */
1465 
1466   bb_between_loops = new_exit_bb;
1467   bb_after_second_loop = split_edge (single_exit (second_loop));
1468 
1469   pre_condition =
1470 	fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1471   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1472                                   bb_after_second_loop, bb_before_first_loop,
1473 				  inverse_probability (second_guard_probability));
1474   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1475   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1476                                      second_loop == new_loop, &new_exit_bb);
1477 
1478   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1479    */
1480   if (update_first_loop_count)
1481     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1482 
1483   delete_update_ssa ();
1484 
1485   adjust_vec_debug_stmts ();
1486 
1487   return new_loop;
1488 }
1489 
1490 /* Function vect_get_loop_location.
1491 
1492    Extract the location of the loop in the source code.
1493    If the loop is not well formed for vectorization, an estimated
1494    location is calculated.
1495    Return the loop location if succeed and NULL if not.  */
1496 
1497 source_location
1498 find_loop_location (struct loop *loop)
1499 {
1500   gimple *stmt = NULL;
1501   basic_block bb;
1502   gimple_stmt_iterator si;
1503 
1504   if (!loop)
1505     return UNKNOWN_LOCATION;
1506 
1507   stmt = get_loop_exit_condition (loop);
1508 
1509   if (stmt
1510       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1511     return gimple_location (stmt);
1512 
1513   /* If we got here the loop is probably not "well formed",
1514      try to estimate the loop location */
1515 
1516   if (!loop->header)
1517     return UNKNOWN_LOCATION;
1518 
1519   bb = loop->header;
1520 
1521   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1522     {
1523       stmt = gsi_stmt (si);
1524       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1525         return gimple_location (stmt);
1526     }
1527 
1528   return UNKNOWN_LOCATION;
1529 }
1530 
1531 
1532 /* Function vect_can_advance_ivs_p
1533 
1534    In case the number of iterations that LOOP iterates is unknown at compile
1535    time, an epilog loop will be generated, and the loop induction variables
1536    (IVs) will be "advanced" to the value they are supposed to take just before
1537    the epilog loop.  Here we check that the access function of the loop IVs
1538    and the expression that represents the loop bound are simple enough.
1539    These restrictions will be relaxed in the future.  */
1540 
1541 bool
1542 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1543 {
1544   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1545   basic_block bb = loop->header;
1546   gimple *phi;
1547   gphi_iterator gsi;
1548 
1549   /* Analyze phi functions of the loop header.  */
1550 
1551   if (dump_enabled_p ())
1552     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1553   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1554     {
1555       tree evolution_part;
1556 
1557       phi = gsi.phi ();
1558       if (dump_enabled_p ())
1559 	{
1560           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1561           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1562           dump_printf (MSG_NOTE, "\n");
1563 	}
1564 
1565       /* Skip virtual phi's. The data dependences that are associated with
1566          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1567 
1568       if (virtual_operand_p (PHI_RESULT (phi)))
1569 	{
1570 	  if (dump_enabled_p ())
1571 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1572                              "virtual phi. skip.\n");
1573 	  continue;
1574 	}
1575 
1576       /* Skip reduction phis.  */
1577 
1578       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1579         {
1580           if (dump_enabled_p ())
1581             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1582                              "reduc phi. skip.\n");
1583           continue;
1584         }
1585 
1586       /* Analyze the evolution function.  */
1587 
1588       evolution_part
1589 	= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1590       if (evolution_part == NULL_TREE)
1591         {
1592 	  if (dump_enabled_p ())
1593 	    dump_printf (MSG_MISSED_OPTIMIZATION,
1594 			 "No access function or evolution.\n");
1595 	  return false;
1596         }
1597 
1598       /* FORNOW: We do not transform initial conditions of IVs
1599 	 which evolution functions are a polynomial of degree >= 2.  */
1600 
1601       if (tree_is_chrec (evolution_part))
1602 	return false;
1603     }
1604 
1605   return true;
1606 }
1607 
1608 
1609 /*   Function vect_update_ivs_after_vectorizer.
1610 
1611      "Advance" the induction variables of LOOP to the value they should take
1612      after the execution of LOOP.  This is currently necessary because the
1613      vectorizer does not handle induction variables that are used after the
1614      loop.  Such a situation occurs when the last iterations of LOOP are
1615      peeled, because:
1616      1. We introduced new uses after LOOP for IVs that were not originally used
1617         after LOOP: the IVs of LOOP are now used by an epilog loop.
1618      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1619         times, whereas the loop IVs should be bumped N times.
1620 
1621      Input:
1622      - LOOP - a loop that is going to be vectorized. The last few iterations
1623               of LOOP were peeled.
1624      - NITERS - the number of iterations that LOOP executes (before it is
1625                 vectorized). i.e, the number of times the ivs should be bumped.
1626      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1627                   coming out from LOOP on which there are uses of the LOOP ivs
1628 		  (this is the path from LOOP->exit to epilog_loop->preheader).
1629 
1630                   The new definitions of the ivs are placed in LOOP->exit.
1631                   The phi args associated with the edge UPDATE_E in the bb
1632                   UPDATE_E->dest are updated accordingly.
1633 
1634      Assumption 1: Like the rest of the vectorizer, this function assumes
1635      a single loop exit that has a single predecessor.
1636 
1637      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1638      organized in the same order.
1639 
1640      Assumption 3: The access function of the ivs is simple enough (see
1641      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1642 
1643      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1644      coming out of LOOP on which the ivs of LOOP are used (this is the path
1645      that leads to the epilog loop; other paths skip the epilog loop).  This
1646      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1647      needs to have its phis updated.
1648  */
1649 
1650 static void
1651 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1652 				  edge update_e)
1653 {
1654   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1655   basic_block exit_bb = single_exit (loop)->dest;
1656   gphi *phi, *phi1;
1657   gphi_iterator gsi, gsi1;
1658   basic_block update_bb = update_e->dest;
1659 
1660   gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1661 
1662   /* Make sure there exists a single-predecessor exit bb:  */
1663   gcc_assert (single_pred_p (exit_bb));
1664 
1665   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1666        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1667        gsi_next (&gsi), gsi_next (&gsi1))
1668     {
1669       tree init_expr;
1670       tree step_expr, off;
1671       tree type;
1672       tree var, ni, ni_name;
1673       gimple_stmt_iterator last_gsi;
1674       stmt_vec_info stmt_info;
1675 
1676       phi = gsi.phi ();
1677       phi1 = gsi1.phi ();
1678       if (dump_enabled_p ())
1679         {
1680           dump_printf_loc (MSG_NOTE, vect_location,
1681                            "vect_update_ivs_after_vectorizer: phi: ");
1682 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1683           dump_printf (MSG_NOTE, "\n");
1684         }
1685 
1686       /* Skip virtual phi's.  */
1687       if (virtual_operand_p (PHI_RESULT (phi)))
1688 	{
1689 	  if (dump_enabled_p ())
1690 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1691                              "virtual phi. skip.\n");
1692 	  continue;
1693 	}
1694 
1695       /* Skip reduction phis.  */
1696       stmt_info = vinfo_for_stmt (phi);
1697       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1698 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1699         {
1700 	  if (dump_enabled_p ())
1701 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1702                              "reduc phi. skip.\n");
1703           continue;
1704         }
1705 
1706       type = TREE_TYPE (gimple_phi_result (phi));
1707       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1708       step_expr = unshare_expr (step_expr);
1709 
1710       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1711          of degree >= 2 or exponential.  */
1712       gcc_assert (!tree_is_chrec (step_expr));
1713 
1714       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1715 
1716       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1717 			 fold_convert (TREE_TYPE (step_expr), niters),
1718 			 step_expr);
1719       if (POINTER_TYPE_P (type))
1720 	ni = fold_build_pointer_plus (init_expr, off);
1721       else
1722 	ni = fold_build2 (PLUS_EXPR, type,
1723 			  init_expr, fold_convert (type, off));
1724 
1725       var = create_tmp_var (type, "tmp");
1726 
1727       last_gsi = gsi_last_bb (exit_bb);
1728       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1729 					  true, GSI_SAME_STMT);
1730 
1731       /* Fix phi expressions in the successor bb.  */
1732       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1733     }
1734 }
1735 
1736 /* Function vect_do_peeling_for_loop_bound
1737 
1738    Peel the last iterations of the loop represented by LOOP_VINFO.
1739    The peeled iterations form a new epilog loop.  Given that the loop now
1740    iterates NITERS times, the new epilog loop iterates
1741    NITERS % VECTORIZATION_FACTOR times.
1742 
1743    The original loop will later be made to iterate
1744    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1745 
1746    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1747    test.  */
1748 
1749 void
1750 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1751 				tree ni_name, tree ratio_mult_vf_name,
1752 				unsigned int th, bool check_profitability)
1753 {
1754   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1755   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1756   struct loop *new_loop;
1757   edge update_e;
1758   basic_block preheader;
1759   int loop_num;
1760   int max_iter;
1761   tree cond_expr = NULL_TREE;
1762   gimple_seq cond_expr_stmt_list = NULL;
1763 
1764   if (dump_enabled_p ())
1765     dump_printf_loc (MSG_NOTE, vect_location,
1766                      "=== vect_do_peeling_for_loop_bound ===\n");
1767 
1768   initialize_original_copy_tables ();
1769 
1770   loop_num  = loop->num;
1771 
1772   new_loop
1773     = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1774 				     &ratio_mult_vf_name, ni_name, false,
1775 				     th, check_profitability,
1776 				     cond_expr, cond_expr_stmt_list,
1777 				     0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1778   gcc_assert (new_loop);
1779   gcc_assert (loop_num == loop->num);
1780   slpeel_checking_verify_cfg_after_peeling (loop, new_loop);
1781 
1782   /* A guard that controls whether the new_loop is to be executed or skipped
1783      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1784      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1785      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1786      is on the path where the LOOP IVs are used and need to be updated.  */
1787 
1788   preheader = loop_preheader_edge (new_loop)->src;
1789   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1790     update_e = EDGE_PRED (preheader, 0);
1791   else
1792     update_e = EDGE_PRED (preheader, 1);
1793 
1794   /* Update IVs of original loop as if they were advanced
1795      by ratio_mult_vf_name steps.  */
1796   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1797 
1798   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1799      and this means N-2 loopback edge executions.
1800 
1801      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1802      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1803   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1804 	      ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1805 	      : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1806   if (check_profitability)
1807     max_iter = MAX (max_iter, (int) th - 1);
1808   record_niter_bound (new_loop, max_iter, false, true);
1809   dump_printf (MSG_NOTE,
1810                "Setting upper bound of nb iterations for epilogue "
1811                "loop to %d\n", max_iter);
1812 
1813   /* After peeling we have to reset scalar evolution analyzer.  */
1814   scev_reset ();
1815 
1816   free_original_copy_tables ();
1817 }
1818 
1819 
1820 /* Function vect_gen_niters_for_prolog_loop
1821 
1822    Set the number of iterations for the loop represented by LOOP_VINFO
1823    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1824    and the misalignment of DR - the data reference recorded in
1825    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1826    this loop, the data reference DR will refer to an aligned location.
1827 
1828    The following computation is generated:
1829 
1830    If the misalignment of DR is known at compile time:
1831      addr_mis = int mis = DR_MISALIGNMENT (dr);
1832    Else, compute address misalignment in bytes:
1833      addr_mis = addr & (vectype_align - 1)
1834 
1835    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1836 
1837    (elem_size = element type size; an element is the scalar element whose type
1838    is the inner type of the vectype)
1839 
1840    When the step of the data-ref in the loop is not 1 (as in interleaved data
1841    and SLP), the number of iterations of the prolog must be divided by the step
1842    (which is equal to the size of interleaved group).
1843 
1844    The above formulas assume that VF == number of elements in the vector. This
1845    may not hold when there are multiple-types in the loop.
1846    In this case, for some data-references in the loop the VF does not represent
1847    the number of elements that fit in the vector.  Therefore, instead of VF we
1848    use TYPE_VECTOR_SUBPARTS.  */
1849 
1850 static tree
1851 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1852 {
1853   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1854   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1855   tree var;
1856   gimple_seq stmts;
1857   tree iters, iters_name;
1858   edge pe;
1859   basic_block new_bb;
1860   gimple *dr_stmt = DR_STMT (dr);
1861   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1862   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1863   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1864   tree niters_type = TREE_TYPE (loop_niters);
1865   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1866 
1867   pe = loop_preheader_edge (loop);
1868 
1869   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1870     {
1871       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1872 
1873       if (dump_enabled_p ())
1874         dump_printf_loc (MSG_NOTE, vect_location,
1875                          "known peeling = %d.\n", npeel);
1876 
1877       iters = build_int_cst (niters_type, npeel);
1878       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1879     }
1880   else
1881     {
1882       gimple_seq new_stmts = NULL;
1883       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1884       tree offset = negative
1885 	  ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1886       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1887 						&new_stmts, offset, loop);
1888       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1889       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1890       HOST_WIDE_INT elem_size =
1891                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1892       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1893       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1894       tree nelements_tree = build_int_cst (type, nelements);
1895       tree byte_misalign;
1896       tree elem_misalign;
1897 
1898       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1899       gcc_assert (!new_bb);
1900 
1901       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1902       byte_misalign =
1903         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1904                      vectype_align_minus_1);
1905 
1906       /* Create:  elem_misalign = byte_misalign / element_size  */
1907       elem_misalign =
1908         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1909 
1910       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1911       if (negative)
1912 	iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1913       else
1914 	iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1915       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1916       iters = fold_convert (niters_type, iters);
1917       *bound = nelements;
1918     }
1919 
1920   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1921   /* If the loop bound is known at compile time we already verified that it is
1922      greater than vf; since the misalignment ('iters') is at most vf, there's
1923      no need to generate the MIN_EXPR in this case.  */
1924   if (TREE_CODE (loop_niters) != INTEGER_CST)
1925     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1926 
1927   if (dump_enabled_p ())
1928     {
1929       dump_printf_loc (MSG_NOTE, vect_location,
1930                        "niters for prolog loop: ");
1931       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1932       dump_printf (MSG_NOTE, "\n");
1933     }
1934 
1935   var = create_tmp_var (niters_type, "prolog_loop_niters");
1936   stmts = NULL;
1937   iters_name = force_gimple_operand (iters, &stmts, false, var);
1938 
1939   /* Insert stmt on loop preheader edge.  */
1940   if (stmts)
1941     {
1942       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1943       gcc_assert (!new_bb);
1944     }
1945 
1946   return iters_name;
1947 }
1948 
1949 
1950 /* Function vect_update_init_of_dr
1951 
1952    NITERS iterations were peeled from LOOP.  DR represents a data reference
1953    in LOOP.  This function updates the information recorded in DR to
1954    account for the fact that the first NITERS iterations had already been
1955    executed.  Specifically, it updates the OFFSET field of DR.  */
1956 
1957 static void
1958 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1959 {
1960   tree offset = DR_OFFSET (dr);
1961 
1962   niters = fold_build2 (MULT_EXPR, sizetype,
1963 			fold_convert (sizetype, niters),
1964 			fold_convert (sizetype, DR_STEP (dr)));
1965   offset = fold_build2 (PLUS_EXPR, sizetype,
1966 			fold_convert (sizetype, offset), niters);
1967   DR_OFFSET (dr) = offset;
1968 }
1969 
1970 
1971 /* Function vect_update_inits_of_drs
1972 
1973    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1974    This function updates the information recorded for the data references in
1975    the loop to account for the fact that the first NITERS iterations had
1976    already been executed.  Specifically, it updates the initial_condition of
1977    the access_function of all the data_references in the loop.  */
1978 
1979 static void
1980 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1981 {
1982   unsigned int i;
1983   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1984   struct data_reference *dr;
1985 
1986  if (dump_enabled_p ())
1987     dump_printf_loc (MSG_NOTE, vect_location,
1988                      "=== vect_update_inits_of_dr ===\n");
1989 
1990   FOR_EACH_VEC_ELT (datarefs, i, dr)
1991     vect_update_init_of_dr (dr, niters);
1992 }
1993 
1994 
1995 /* Function vect_do_peeling_for_alignment
1996 
1997    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1998    'niters' is set to the misalignment of one of the data references in the
1999    loop, thereby forcing it to refer to an aligned location at the beginning
2000    of the execution of this loop.  The data reference for which we are
2001    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2002 
2003 void
2004 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
2005 			       unsigned int th, bool check_profitability)
2006 {
2007   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2008   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2009   tree niters_of_prolog_loop;
2010   tree wide_prolog_niters;
2011   struct loop *new_loop;
2012   int max_iter;
2013   int bound = 0;
2014 
2015   if (dump_enabled_p ())
2016     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2017                      "loop peeled for vectorization to enhance"
2018                      " alignment\n");
2019 
2020   initialize_original_copy_tables ();
2021 
2022   gimple_seq stmts = NULL;
2023   gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2024   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2025 							   ni_name,
2026 							   &bound);
2027 
2028   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2029   new_loop =
2030     slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2031 				   loop_preheader_edge (loop),
2032 				   &niters_of_prolog_loop, ni_name, true,
2033 				   th, check_profitability, NULL_TREE, NULL,
2034 				   bound, 0);
2035 
2036   gcc_assert (new_loop);
2037   slpeel_checking_verify_cfg_after_peeling (new_loop, loop);
2038   /* For vectorization factor N, we need to copy at most N-1 values
2039      for alignment and this means N-2 loopback edge executions.  */
2040   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2041   if (check_profitability)
2042     max_iter = MAX (max_iter, (int) th - 1);
2043   record_niter_bound (new_loop, max_iter, false, true);
2044   dump_printf (MSG_NOTE,
2045                "Setting upper bound of nb iterations for prologue "
2046                "loop to %d\n", max_iter);
2047 
2048   /* Update number of times loop executes.  */
2049   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2050 		TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2051   LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2052 		TREE_TYPE (ni_name),
2053 		LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2054 
2055   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2056     wide_prolog_niters = niters_of_prolog_loop;
2057   else
2058     {
2059       gimple_seq seq = NULL;
2060       edge pe = loop_preheader_edge (loop);
2061       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2062       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2063       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2064                                                  var);
2065       if (seq)
2066 	{
2067 	  /* Insert stmt on loop preheader edge.  */
2068           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2069           gcc_assert (!new_bb);
2070         }
2071     }
2072 
2073   /* Update the init conditions of the access functions of all data refs.  */
2074   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2075 
2076   /* After peeling we have to reset scalar evolution analyzer.  */
2077   scev_reset ();
2078 
2079   free_original_copy_tables ();
2080 }
2081 
2082 
2083 /* Function vect_create_cond_for_align_checks.
2084 
2085    Create a conditional expression that represents the alignment checks for
2086    all of data references (array element references) whose alignment must be
2087    checked at runtime.
2088 
2089    Input:
2090    COND_EXPR  - input conditional expression.  New conditions will be chained
2091                 with logical AND operation.
2092    LOOP_VINFO - two fields of the loop information are used.
2093                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2094                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2095 
2096    Output:
2097    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2098                          expression.
2099    The returned value is the conditional expression to be used in the if
2100    statement that controls which version of the loop gets executed at runtime.
2101 
2102    The algorithm makes two assumptions:
2103      1) The number of bytes "n" in a vector is a power of 2.
2104      2) An address "a" is aligned if a%n is zero and that this
2105         test can be done as a&(n-1) == 0.  For example, for 16
2106         byte vectors the test is a&0xf == 0.  */
2107 
2108 static void
2109 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2110                                    tree *cond_expr,
2111 				   gimple_seq *cond_expr_stmt_list)
2112 {
2113   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2114   vec<gimple *> may_misalign_stmts
2115     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2116   gimple *ref_stmt;
2117   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2118   tree mask_cst;
2119   unsigned int i;
2120   tree int_ptrsize_type;
2121   char tmp_name[20];
2122   tree or_tmp_name = NULL_TREE;
2123   tree and_tmp_name;
2124   gimple *and_stmt;
2125   tree ptrsize_zero;
2126   tree part_cond_expr;
2127 
2128   /* Check that mask is one less than a power of 2, i.e., mask is
2129      all zeros followed by all ones.  */
2130   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2131 
2132   int_ptrsize_type = signed_type_for (ptr_type_node);
2133 
2134   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2135      of the first vector of the i'th data reference. */
2136 
2137   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2138     {
2139       gimple_seq new_stmt_list = NULL;
2140       tree addr_base;
2141       tree addr_tmp_name;
2142       tree new_or_tmp_name;
2143       gimple *addr_stmt, *or_stmt;
2144       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2145       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2146       bool negative = tree_int_cst_compare
2147 	(DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2148       tree offset = negative
2149 	? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2150 
2151       /* create: addr_tmp = (int)(address_of_first_vector) */
2152       addr_base =
2153 	vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2154 					      offset, loop);
2155       if (new_stmt_list != NULL)
2156 	gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2157 
2158       sprintf (tmp_name, "addr2int%d", i);
2159       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2160       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2161       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2162 
2163       /* The addresses are OR together.  */
2164 
2165       if (or_tmp_name != NULL_TREE)
2166         {
2167           /* create: or_tmp = or_tmp | addr_tmp */
2168           sprintf (tmp_name, "orptrs%d", i);
2169 	  new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2170 	  or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2171 					 or_tmp_name, addr_tmp_name);
2172 	  gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2173           or_tmp_name = new_or_tmp_name;
2174         }
2175       else
2176         or_tmp_name = addr_tmp_name;
2177 
2178     } /* end for i */
2179 
2180   mask_cst = build_int_cst (int_ptrsize_type, mask);
2181 
2182   /* create: and_tmp = or_tmp & mask  */
2183   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2184 
2185   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2186 				  or_tmp_name, mask_cst);
2187   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2188 
2189   /* Make and_tmp the left operand of the conditional test against zero.
2190      if and_tmp has a nonzero bit then some address is unaligned.  */
2191   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2192   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2193 				and_tmp_name, ptrsize_zero);
2194   if (*cond_expr)
2195     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2196 			      *cond_expr, part_cond_expr);
2197   else
2198     *cond_expr = part_cond_expr;
2199 }
2200 
2201 /* Function vect_create_cond_for_alias_checks.
2202 
2203    Create a conditional expression that represents the run-time checks for
2204    overlapping of address ranges represented by a list of data references
2205    relations passed as input.
2206 
2207    Input:
2208    COND_EXPR  - input conditional expression.  New conditions will be chained
2209                 with logical AND operation.  If it is NULL, then the function
2210                 is used to return the number of alias checks.
2211    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2212 	        to be checked.
2213 
2214    Output:
2215    COND_EXPR - conditional expression.
2216 
2217    The returned COND_EXPR is the conditional expression to be used in the if
2218    statement that controls which version of the loop gets executed at runtime.
2219 */
2220 
2221 void
2222 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2223 {
2224   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2225     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2226   tree part_cond_expr;
2227 
2228   /* Create expression
2229      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2230      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2231      &&
2232      ...
2233      &&
2234      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2235      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2236 
2237   if (comp_alias_ddrs.is_empty ())
2238     return;
2239 
2240   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2241     {
2242       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2243       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2244       tree segment_length_a = dr_a.seg_len;
2245       tree segment_length_b = dr_b.seg_len;
2246 
2247       tree addr_base_a
2248 	= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2249       tree addr_base_b
2250 	= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2251 
2252       if (dump_enabled_p ())
2253 	{
2254 	  dump_printf_loc (MSG_NOTE, vect_location,
2255 			   "create runtime check for data references ");
2256 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2257 	  dump_printf (MSG_NOTE, " and ");
2258 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2259 	  dump_printf (MSG_NOTE, "\n");
2260 	}
2261 
2262       tree seg_a_min = addr_base_a;
2263       tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2264       /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2265 	 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2266 	 [a, a+12) */
2267       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2268 	{
2269 	  tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2270 	  seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2271 	  seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2272 	}
2273 
2274       tree seg_b_min = addr_base_b;
2275       tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2276       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2277 	{
2278 	  tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2279 	  seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2280 	  seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2281 	}
2282 
2283       part_cond_expr =
2284       	fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2285 	  fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2286 	  fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2287 
2288       if (*cond_expr)
2289 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2290 				  *cond_expr, part_cond_expr);
2291       else
2292 	*cond_expr = part_cond_expr;
2293     }
2294 
2295   if (dump_enabled_p ())
2296     dump_printf_loc (MSG_NOTE, vect_location,
2297 		     "created %u versioning for alias checks.\n",
2298 		     comp_alias_ddrs.length ());
2299 }
2300 
2301 
2302 /* Function vect_loop_versioning.
2303 
2304    If the loop has data references that may or may not be aligned or/and
2305    has data reference relations whose independence was not proven then
2306    two versions of the loop need to be generated, one which is vectorized
2307    and one which isn't.  A test is then generated to control which of the
2308    loops is executed.  The test checks for the alignment of all of the
2309    data references that may or may not be aligned.  An additional
2310    sequence of runtime tests is generated for each pairs of DDRs whose
2311    independence was not proven.  The vectorized version of loop is
2312    executed only if both alias and alignment tests are passed.
2313 
2314    The test generated to check which version of loop is executed
2315    is modified to also check for profitability as indicated by the
2316    cost model initially.
2317 
2318    The versioning precondition(s) are placed in *COND_EXPR and
2319    *COND_EXPR_STMT_LIST.  */
2320 
2321 void
2322 vect_loop_versioning (loop_vec_info loop_vinfo,
2323 		      unsigned int th, bool check_profitability)
2324 {
2325   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2326   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2327   basic_block condition_bb;
2328   gphi_iterator gsi;
2329   gimple_stmt_iterator cond_exp_gsi;
2330   basic_block merge_bb;
2331   basic_block new_exit_bb;
2332   edge new_exit_e, e;
2333   gphi *orig_phi, *new_phi;
2334   tree cond_expr = NULL_TREE;
2335   gimple_seq cond_expr_stmt_list = NULL;
2336   tree arg;
2337   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2338   gimple_seq gimplify_stmt_list = NULL;
2339   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2340   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2341   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2342 
2343   if (check_profitability)
2344     {
2345       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2346 			       build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2347       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2348 					  is_gimple_condexpr, NULL_TREE);
2349     }
2350 
2351   if (version_align)
2352     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2353 				       &cond_expr_stmt_list);
2354 
2355   if (version_alias)
2356     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2357 
2358   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2359 				      is_gimple_condexpr, NULL_TREE);
2360   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2361 
2362   initialize_original_copy_tables ();
2363   if (scalar_loop)
2364     {
2365       edge scalar_e;
2366       basic_block preheader, scalar_preheader;
2367 
2368       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2369 	 scale LOOP's frequencies instead.  */
2370       loop_version (scalar_loop, cond_expr, &condition_bb,
2371 		    prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2372       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2373       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2374 	 while we need to move it above LOOP's preheader.  */
2375       e = loop_preheader_edge (loop);
2376       scalar_e = loop_preheader_edge (scalar_loop);
2377       gcc_assert (empty_block_p (e->src)
2378 		  && single_pred_p (e->src));
2379       gcc_assert (empty_block_p (scalar_e->src)
2380 		  && single_pred_p (scalar_e->src));
2381       gcc_assert (single_pred_p (condition_bb));
2382       preheader = e->src;
2383       scalar_preheader = scalar_e->src;
2384       scalar_e = find_edge (condition_bb, scalar_preheader);
2385       e = single_pred_edge (preheader);
2386       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2387 				      scalar_preheader);
2388       redirect_edge_and_branch_force (scalar_e, preheader);
2389       redirect_edge_and_branch_force (e, condition_bb);
2390       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2391 			       single_pred (condition_bb));
2392       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2393 			       single_pred (scalar_preheader));
2394       set_immediate_dominator (CDI_DOMINATORS, preheader,
2395 			       condition_bb);
2396     }
2397   else
2398     loop_version (loop, cond_expr, &condition_bb,
2399 		  prob, prob, REG_BR_PROB_BASE - prob, true);
2400 
2401   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2402       && dump_enabled_p ())
2403     {
2404       if (version_alias)
2405         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2406                          "loop versioned for vectorization because of "
2407 			 "possible aliasing\n");
2408       if (version_align)
2409         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2410                          "loop versioned for vectorization to enhance "
2411 			 "alignment\n");
2412 
2413     }
2414   free_original_copy_tables ();
2415 
2416   /* Loop versioning violates an assumption we try to maintain during
2417      vectorization - that the loop exit block has a single predecessor.
2418      After versioning, the exit block of both loop versions is the same
2419      basic block (i.e. it has two predecessors). Just in order to simplify
2420      following transformations in the vectorizer, we fix this situation
2421      here by adding a new (empty) block on the exit-edge of the loop,
2422      with the proper loop-exit phis to maintain loop-closed-form.
2423      If loop versioning wasn't done from loop, but scalar_loop instead,
2424      merge_bb will have already just a single successor.  */
2425 
2426   merge_bb = single_exit (loop)->dest;
2427   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2428     {
2429       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2430       new_exit_bb = split_edge (single_exit (loop));
2431       new_exit_e = single_exit (loop);
2432       e = EDGE_SUCC (new_exit_bb, 0);
2433 
2434       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2435 	{
2436 	  tree new_res;
2437 	  orig_phi = gsi.phi ();
2438 	  new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2439 	  new_phi = create_phi_node (new_res, new_exit_bb);
2440 	  arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2441 	  add_phi_arg (new_phi, arg, new_exit_e,
2442 		       gimple_phi_arg_location_from_edge (orig_phi, e));
2443 	  adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2444 	}
2445     }
2446 
2447   /* End loop-exit-fixes after versioning.  */
2448 
2449   if (cond_expr_stmt_list)
2450     {
2451       cond_exp_gsi = gsi_last_bb (condition_bb);
2452       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2453 			     GSI_SAME_STMT);
2454     }
2455   update_ssa (TODO_update_ssa);
2456 }
2457