xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-loop-manip.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Vectorizer Specific Loop Manipulations
2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "predict.h"
39 #include "hard-reg-set.h"
40 #include "input.h"
41 #include "function.h"
42 #include "dominance.h"
43 #include "cfg.h"
44 #include "cfganal.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "gimple-expr.h"
50 #include "is-a.h"
51 #include "gimple.h"
52 #include "gimplify.h"
53 #include "gimple-iterator.h"
54 #include "gimplify-me.h"
55 #include "gimple-ssa.h"
56 #include "tree-cfg.h"
57 #include "tree-phinodes.h"
58 #include "ssa-iterators.h"
59 #include "stringpool.h"
60 #include "tree-ssanames.h"
61 #include "tree-ssa-loop-manip.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "tree-pass.h"
65 #include "cfgloop.h"
66 #include "diagnostic-core.h"
67 #include "tree-scalar-evolution.h"
68 #include "tree-vectorizer.h"
69 #include "langhooks.h"
70 
71 /*************************************************************************
72   Simple Loop Peeling Utilities
73 
74   Utilities to support loop peeling for vectorization purposes.
75  *************************************************************************/
76 
77 
78 /* Renames the use *OP_P.  */
79 
80 static void
81 rename_use_op (use_operand_p op_p)
82 {
83   tree new_name;
84 
85   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
86     return;
87 
88   new_name = get_current_def (USE_FROM_PTR (op_p));
89 
90   /* Something defined outside of the loop.  */
91   if (!new_name)
92     return;
93 
94   /* An ordinary ssa name defined in the loop.  */
95 
96   SET_USE (op_p, new_name);
97 }
98 
99 
100 /* Renames the variables in basic block BB.  */
101 
102 static void
103 rename_variables_in_bb (basic_block bb)
104 {
105   gimple stmt;
106   use_operand_p use_p;
107   ssa_op_iter iter;
108   edge e;
109   edge_iterator ei;
110   struct loop *loop = bb->loop_father;
111 
112   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
113        gsi_next (&gsi))
114     {
115       stmt = gsi_stmt (gsi);
116       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
117 	rename_use_op (use_p);
118     }
119 
120   FOR_EACH_EDGE (e, ei, bb->preds)
121     {
122       if (!flow_bb_inside_loop_p (loop, e->src))
123 	continue;
124       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
125 	   gsi_next (&gsi))
126         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
127     }
128 }
129 
130 
131 typedef struct
132 {
133   tree from, to;
134   basic_block bb;
135 } adjust_info;
136 
137 /* A stack of values to be adjusted in debug stmts.  We have to
138    process them LIFO, so that the closest substitution applies.  If we
139    processed them FIFO, without the stack, we might substitute uses
140    with a PHI DEF that would soon become non-dominant, and when we got
141    to the suitable one, it wouldn't have anything to substitute any
142    more.  */
143 static vec<adjust_info, va_heap> adjust_vec;
144 
145 /* Adjust any debug stmts that referenced AI->from values to use the
146    loop-closed AI->to, if the references are dominated by AI->bb and
147    not by the definition of AI->from.  */
148 
149 static void
150 adjust_debug_stmts_now (adjust_info *ai)
151 {
152   basic_block bbphi = ai->bb;
153   tree orig_def = ai->from;
154   tree new_def = ai->to;
155   imm_use_iterator imm_iter;
156   gimple stmt;
157   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
158 
159   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
160 
161   /* Adjust any debug stmts that held onto non-loop-closed
162      references.  */
163   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
164     {
165       use_operand_p use_p;
166       basic_block bbuse;
167 
168       if (!is_gimple_debug (stmt))
169 	continue;
170 
171       gcc_assert (gimple_debug_bind_p (stmt));
172 
173       bbuse = gimple_bb (stmt);
174 
175       if ((bbuse == bbphi
176 	   || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
177 	  && !(bbuse == bbdef
178 	       || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
179 	{
180 	  if (new_def)
181 	    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
182 	      SET_USE (use_p, new_def);
183 	  else
184 	    {
185 	      gimple_debug_bind_reset_value (stmt);
186 	      update_stmt (stmt);
187 	    }
188 	}
189     }
190 }
191 
192 /* Adjust debug stmts as scheduled before.  */
193 
194 static void
195 adjust_vec_debug_stmts (void)
196 {
197   if (!MAY_HAVE_DEBUG_STMTS)
198     return;
199 
200   gcc_assert (adjust_vec.exists ());
201 
202   while (!adjust_vec.is_empty ())
203     {
204       adjust_debug_stmts_now (&adjust_vec.last ());
205       adjust_vec.pop ();
206     }
207 
208   adjust_vec.release ();
209 }
210 
211 /* Adjust any debug stmts that referenced FROM values to use the
212    loop-closed TO, if the references are dominated by BB and not by
213    the definition of FROM.  If adjust_vec is non-NULL, adjustments
214    will be postponed until adjust_vec_debug_stmts is called.  */
215 
216 static void
217 adjust_debug_stmts (tree from, tree to, basic_block bb)
218 {
219   adjust_info ai;
220 
221   if (MAY_HAVE_DEBUG_STMTS
222       && TREE_CODE (from) == SSA_NAME
223       && ! SSA_NAME_IS_DEFAULT_DEF (from)
224       && ! virtual_operand_p (from))
225     {
226       ai.from = from;
227       ai.to = to;
228       ai.bb = bb;
229 
230       if (adjust_vec.exists ())
231 	adjust_vec.safe_push (ai);
232       else
233 	adjust_debug_stmts_now (&ai);
234     }
235 }
236 
237 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
238    to adjust any debug stmts that referenced the old phi arg,
239    presumably non-loop-closed references left over from other
240    transformations.  */
241 
242 static void
243 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
244 {
245   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
246 
247   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
248 
249   if (MAY_HAVE_DEBUG_STMTS)
250     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
251 			gimple_bb (update_phi));
252 }
253 
254 
255 /* Update PHI nodes for a guard of the LOOP.
256 
257    Input:
258    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
259         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
260         originates from the guard-bb, skips LOOP and reaches the (unique) exit
261         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
262         We denote this bb NEW_MERGE_BB because before the guard code was added
263         it had a single predecessor (the LOOP header), and now it became a merge
264         point of two paths - the path that ends with the LOOP exit-edge, and
265         the path that ends with GUARD_EDGE.
266    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
267         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
268 
269    ===> The CFG before the guard-code was added:
270         LOOP_header_bb:
271           loop_body
272           if (exit_loop) goto update_bb
273           else           goto LOOP_header_bb
274         update_bb:
275 
276    ==> The CFG after the guard-code was added:
277         guard_bb:
278           if (LOOP_guard_condition) goto new_merge_bb
279           else                      goto LOOP_header_bb
280         LOOP_header_bb:
281           loop_body
282           if (exit_loop_condition) goto new_merge_bb
283           else                     goto LOOP_header_bb
284         new_merge_bb:
285           goto update_bb
286         update_bb:
287 
288    ==> The CFG after this function:
289         guard_bb:
290           if (LOOP_guard_condition) goto new_merge_bb
291           else                      goto LOOP_header_bb
292         LOOP_header_bb:
293           loop_body
294           if (exit_loop_condition) goto new_exit_bb
295           else                     goto LOOP_header_bb
296         new_exit_bb:
297         new_merge_bb:
298           goto update_bb
299         update_bb:
300 
301    This function:
302    1. creates and updates the relevant phi nodes to account for the new
303       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
304       1.1. Create phi nodes at NEW_MERGE_BB.
305       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
306            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
307    2. preserves loop-closed-ssa-form by creating the required phi nodes
308       at the exit of LOOP (i.e, in NEW_EXIT_BB).
309 
310    There are two flavors to this function:
311 
312    slpeel_update_phi_nodes_for_guard1:
313      Here the guard controls whether we enter or skip LOOP, where LOOP is a
314      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
315      for variables that have phis in the loop header.
316 
317    slpeel_update_phi_nodes_for_guard2:
318      Here the guard controls whether we enter or skip LOOP, where LOOP is an
319      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
320      for variables that have phis in the loop exit.
321 
322    I.E., the overall structure is:
323 
324         loop1_preheader_bb:
325                 guard1 (goto loop1/merge1_bb)
326         loop1
327         loop1_exit_bb:
328                 guard2 (goto merge1_bb/merge2_bb)
329         merge1_bb
330         loop2
331         loop2_exit_bb
332         merge2_bb
333         next_bb
334 
335    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
336    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
337    that have phis in loop1->header).
338 
339    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
340    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
341    that have phis in next_bb). It also adds some of these phis to
342    loop1_exit_bb.
343 
344    slpeel_update_phi_nodes_for_guard1 is always called before
345    slpeel_update_phi_nodes_for_guard2. They are both needed in order
346    to create correct data-flow and loop-closed-ssa-form.
347 
348    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
349    that change between iterations of a loop (and therefore have a phi-node
350    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
351    phis for variables that are used out of the loop (and therefore have
352    loop-closed exit phis). Some variables may be both updated between
353    iterations and used after the loop. This is why in loop1_exit_bb we
354    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
355    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
356 
357    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
358      an original loop. i.e., we have:
359 
360            orig_loop
361            guard_bb (goto LOOP/new_merge)
362            new_loop <-- LOOP
363            new_exit
364            new_merge
365            next_bb
366 
367      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
368      have:
369 
370            new_loop
371            guard_bb (goto LOOP/new_merge)
372            orig_loop <-- LOOP
373            new_exit
374            new_merge
375            next_bb
376 
377      The SSA names defined in the original loop have a current
378      reaching definition that that records the corresponding new
379      ssa-name used in the new duplicated loop copy.
380   */
381 
382 /* Function slpeel_update_phi_nodes_for_guard1
383 
384    Input:
385    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
386    - DEFS - a bitmap of ssa names to mark new names for which we recorded
387             information.
388 
389    In the context of the overall structure, we have:
390 
391         loop1_preheader_bb:
392                 guard1 (goto loop1/merge1_bb)
393 LOOP->  loop1
394         loop1_exit_bb:
395                 guard2 (goto merge1_bb/merge2_bb)
396         merge1_bb
397         loop2
398         loop2_exit_bb
399         merge2_bb
400         next_bb
401 
402    For each name updated between loop iterations (i.e - for each name that has
403    an entry (loop-header) phi in LOOP) we create a new phi in:
404    1. merge1_bb (to account for the edge from guard1)
405    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
406 */
407 
408 static void
409 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
410                                     bool is_new_loop, basic_block *new_exit_bb)
411 {
412   gphi *orig_phi, *new_phi;
413   gphi *update_phi, *update_phi2;
414   tree guard_arg, loop_arg;
415   basic_block new_merge_bb = guard_edge->dest;
416   edge e = EDGE_SUCC (new_merge_bb, 0);
417   basic_block update_bb = e->dest;
418   basic_block orig_bb = loop->header;
419   edge new_exit_e;
420   tree current_new_name;
421   gphi_iterator gsi_orig, gsi_update;
422 
423   /* Create new bb between loop and new_merge_bb.  */
424   *new_exit_bb = split_edge (single_exit (loop));
425 
426   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
427 
428   for (gsi_orig = gsi_start_phis (orig_bb),
429        gsi_update = gsi_start_phis (update_bb);
430        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
431        gsi_next (&gsi_orig), gsi_next (&gsi_update))
432     {
433       source_location loop_locus, guard_locus;
434       tree new_res;
435       orig_phi = gsi_orig.phi ();
436       update_phi = gsi_update.phi ();
437 
438       /** 1. Handle new-merge-point phis  **/
439 
440       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
441       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
442       new_phi = create_phi_node (new_res, new_merge_bb);
443 
444       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
445             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
446       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
447       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
448 						      EDGE_SUCC (loop->latch,
449 								 0));
450       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
451       guard_locus
452 	= gimple_phi_arg_location_from_edge (orig_phi,
453 					     loop_preheader_edge (loop));
454 
455       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
456       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
457 
458       /* 1.3. Update phi in successor block.  */
459       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
460                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
461       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
462       update_phi2 = new_phi;
463 
464 
465       /** 2. Handle loop-closed-ssa-form phis  **/
466 
467       if (virtual_operand_p (PHI_RESULT (orig_phi)))
468 	continue;
469 
470       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
471       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
472       new_phi = create_phi_node (new_res, *new_exit_bb);
473 
474       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
475       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
476 
477       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
478       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
479       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
480 				  PHI_RESULT (new_phi));
481 
482       /* 2.4. Record the newly created name with set_current_def.
483          We want to find a name such that
484                 name = get_current_def (orig_loop_name)
485          and to set its current definition as follows:
486                 set_current_def (name, new_phi_name)
487 
488          If LOOP is a new loop then loop_arg is already the name we're
489          looking for. If LOOP is the original loop, then loop_arg is
490          the orig_loop_name and the relevant name is recorded in its
491          current reaching definition.  */
492       if (is_new_loop)
493         current_new_name = loop_arg;
494       else
495         {
496           current_new_name = get_current_def (loop_arg);
497 	  /* current_def is not available only if the variable does not
498 	     change inside the loop, in which case we also don't care
499 	     about recording a current_def for it because we won't be
500 	     trying to create loop-exit-phis for it.  */
501 	  if (!current_new_name)
502 	    continue;
503         }
504       tree new_name = get_current_def (current_new_name);
505       /* Because of peeled_chrec optimization it is possible that we have
506 	 set this earlier.  Verify the PHI has the same value.  */
507       if (new_name)
508 	{
509 	  gimple phi = SSA_NAME_DEF_STMT (new_name);
510 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI
511 		      && gimple_bb (phi) == *new_exit_bb
512 		      && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
513 			  == loop_arg));
514 	  continue;
515 	}
516 
517       set_current_def (current_new_name, PHI_RESULT (new_phi));
518     }
519 }
520 
521 
522 /* Function slpeel_update_phi_nodes_for_guard2
523 
524    Input:
525    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
526 
527    In the context of the overall structure, we have:
528 
529         loop1_preheader_bb:
530                 guard1 (goto loop1/merge1_bb)
531         loop1
532         loop1_exit_bb:
533                 guard2 (goto merge1_bb/merge2_bb)
534         merge1_bb
535 LOOP->  loop2
536         loop2_exit_bb
537         merge2_bb
538         next_bb
539 
540    For each name used out side the loop (i.e - for each name that has an exit
541    phi in next_bb) we create a new phi in:
542    1. merge2_bb (to account for the edge from guard_bb)
543    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
544    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
545       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
546 */
547 
548 static void
549 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
550                                     bool is_new_loop, basic_block *new_exit_bb)
551 {
552   gphi *orig_phi, *new_phi;
553   gphi *update_phi, *update_phi2;
554   tree guard_arg, loop_arg;
555   basic_block new_merge_bb = guard_edge->dest;
556   edge e = EDGE_SUCC (new_merge_bb, 0);
557   basic_block update_bb = e->dest;
558   edge new_exit_e;
559   tree orig_def, orig_def_new_name;
560   tree new_name, new_name2;
561   tree arg;
562   gphi_iterator gsi;
563 
564   /* Create new bb between loop and new_merge_bb.  */
565   *new_exit_bb = split_edge (single_exit (loop));
566 
567   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
568 
569   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
570     {
571       tree new_res;
572       update_phi = gsi.phi ();
573       orig_phi = update_phi;
574       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
575       /* This loop-closed-phi actually doesn't represent a use
576          out of the loop - the phi arg is a constant.  */
577       if (TREE_CODE (orig_def) != SSA_NAME)
578         continue;
579       orig_def_new_name = get_current_def (orig_def);
580       arg = NULL_TREE;
581 
582       /** 1. Handle new-merge-point phis  **/
583 
584       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
585       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
586       new_phi = create_phi_node (new_res, new_merge_bb);
587 
588       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
589             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
590       new_name = orig_def;
591       new_name2 = NULL_TREE;
592       if (orig_def_new_name)
593         {
594           new_name = orig_def_new_name;
595 	  /* Some variables have both loop-entry-phis and loop-exit-phis.
596 	     Such variables were given yet newer names by phis placed in
597 	     guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
598 	     new_name2 = get_current_def (get_current_def (orig_name)).  */
599           new_name2 = get_current_def (new_name);
600         }
601 
602       if (is_new_loop)
603         {
604           guard_arg = orig_def;
605           loop_arg = new_name;
606         }
607       else
608         {
609           guard_arg = new_name;
610           loop_arg = orig_def;
611         }
612       if (new_name2)
613         guard_arg = new_name2;
614 
615       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
616       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
617 
618       /* 1.3. Update phi in successor block.  */
619       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
620       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
621       update_phi2 = new_phi;
622 
623 
624       /** 2. Handle loop-closed-ssa-form phis  **/
625 
626       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
627       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
628       new_phi = create_phi_node (new_res, *new_exit_bb);
629 
630       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
631       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
632 
633       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
634       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
635       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
636 				  PHI_RESULT (new_phi));
637 
638 
639       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
640 
641       /* 3.1. Find the relevant names that need an exit-phi in
642 	 GUARD_BB, i.e. names for which
643 	 slpeel_update_phi_nodes_for_guard1 had not already created a
644 	 phi node. This is the case for names that are used outside
645 	 the loop (and therefore need an exit phi) but are not updated
646 	 across loop iterations (and therefore don't have a
647 	 loop-header-phi).
648 
649 	 slpeel_update_phi_nodes_for_guard1 is responsible for
650 	 creating loop-exit phis in GUARD_BB for names that have a
651 	 loop-header-phi.  When such a phi is created we also record
652 	 the new name in its current definition.  If this new name
653 	 exists, then guard_arg was set to this new name (see 1.2
654 	 above).  Therefore, if guard_arg is not this new name, this
655 	 is an indication that an exit-phi in GUARD_BB was not yet
656 	 created, so we take care of it here.  */
657       if (guard_arg == new_name2)
658 	continue;
659       arg = guard_arg;
660 
661       /* 3.2. Generate new phi node in GUARD_BB:  */
662       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
663       new_phi = create_phi_node (new_res, guard_edge->src);
664 
665       /* 3.3. GUARD_BB has one incoming edge:  */
666       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
667       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
668 		   UNKNOWN_LOCATION);
669 
670       /* 3.4. Update phi in successor of GUARD_BB:  */
671       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
672                                                                 == guard_arg);
673       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
674 				  PHI_RESULT (new_phi));
675     }
676 }
677 
678 
679 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
680    that starts at zero, increases by one and its limit is NITERS.
681 
682    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
683 
684 void
685 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
686 {
687   tree indx_before_incr, indx_after_incr;
688   gcond *cond_stmt;
689   gcond *orig_cond;
690   edge exit_edge = single_exit (loop);
691   gimple_stmt_iterator loop_cond_gsi;
692   gimple_stmt_iterator incr_gsi;
693   bool insert_after;
694   tree init = build_int_cst (TREE_TYPE (niters), 0);
695   tree step = build_int_cst (TREE_TYPE (niters), 1);
696   source_location loop_loc;
697   enum tree_code code;
698 
699   orig_cond = get_loop_exit_condition (loop);
700   gcc_assert (orig_cond);
701   loop_cond_gsi = gsi_for_stmt (orig_cond);
702 
703   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
704   create_iv (init, step, NULL_TREE, loop,
705              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
706 
707   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
708 					      true, NULL_TREE, true,
709 					      GSI_SAME_STMT);
710   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
711 				     true, GSI_SAME_STMT);
712 
713   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
714   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
715 				 NULL_TREE);
716 
717   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
718 
719   /* Remove old loop exit test:  */
720   gsi_remove (&loop_cond_gsi, true);
721   free_stmt_vec_info (orig_cond);
722 
723   loop_loc = find_loop_location (loop);
724   if (dump_enabled_p ())
725     {
726       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
727 	dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
728 		     LOCATION_LINE (loop_loc));
729       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
730       dump_printf (MSG_NOTE, "\n");
731     }
732   loop->nb_iterations = niters;
733 }
734 
735 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
736    For all PHI arguments in FROM->dest and TO->dest from those
737    edges ensure that TO->dest PHI arguments have current_def
738    to that in from.  */
739 
740 static void
741 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
742 {
743   gimple_stmt_iterator gsi_from, gsi_to;
744 
745   for (gsi_from = gsi_start_phis (from->dest),
746        gsi_to = gsi_start_phis (to->dest);
747        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
748        gsi_next (&gsi_from), gsi_next (&gsi_to))
749     {
750       gimple from_phi = gsi_stmt (gsi_from);
751       gimple to_phi = gsi_stmt (gsi_to);
752       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
753       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
754       if (TREE_CODE (from_arg) == SSA_NAME
755 	  && TREE_CODE (to_arg) == SSA_NAME
756 	  && get_current_def (to_arg) == NULL_TREE)
757 	set_current_def (to_arg, get_current_def (from_arg));
758     }
759 }
760 
761 
762 /* Given LOOP this function generates a new copy of it and puts it
763    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
764    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
765    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
766    entry or exit of LOOP.  */
767 
768 struct loop *
769 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
770 					struct loop *scalar_loop, edge e)
771 {
772   struct loop *new_loop;
773   basic_block *new_bbs, *bbs;
774   bool at_exit;
775   bool was_imm_dom;
776   basic_block exit_dest;
777   edge exit, new_exit;
778 
779   exit = single_exit (loop);
780   at_exit = (e == exit);
781   if (!at_exit && e != loop_preheader_edge (loop))
782     return NULL;
783 
784   if (scalar_loop == NULL)
785     scalar_loop = loop;
786 
787   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
788   get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
789 
790   /* Check whether duplication is possible.  */
791   if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
792     {
793       free (bbs);
794       return NULL;
795     }
796 
797   /* Generate new loop structure.  */
798   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
799   duplicate_subloops (scalar_loop, new_loop);
800 
801   exit_dest = exit->dest;
802   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
803 					  exit_dest) == loop->header ?
804 		 true : false);
805 
806   /* Also copy the pre-header, this avoids jumping through hoops to
807      duplicate the loop entry PHI arguments.  Create an empty
808      pre-header unconditionally for this.  */
809   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
810   edge entry_e = single_pred_edge (preheader);
811   bbs[scalar_loop->num_nodes] = preheader;
812   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
813 
814   exit = single_exit (scalar_loop);
815   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
816 	    &exit, 1, &new_exit, NULL,
817 	    e->src, true);
818   exit = single_exit (loop);
819   basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
820 
821   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
822 
823   if (scalar_loop != loop)
824     {
825       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
826 	 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
827 	 but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
828 	 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
829 	 header) to have current_def set, so copy them over.  */
830       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
831 						exit);
832       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
833 							   0),
834 						EDGE_SUCC (loop->latch, 0));
835     }
836 
837   if (at_exit) /* Add the loop copy at exit.  */
838     {
839       if (scalar_loop != loop)
840 	{
841 	  gphi_iterator gsi;
842 	  new_exit = redirect_edge_and_branch (new_exit, exit_dest);
843 
844 	  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
845 	       gsi_next (&gsi))
846 	    {
847 	      gphi *phi = gsi.phi ();
848 	      tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
849 	      location_t orig_locus
850 		= gimple_phi_arg_location_from_edge (phi, e);
851 
852 	      add_phi_arg (phi, orig_arg, new_exit, orig_locus);
853 	    }
854 	}
855       redirect_edge_and_branch_force (e, new_preheader);
856       flush_pending_stmts (e);
857       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
858       if (was_imm_dom)
859 	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
860 
861       /* And remove the non-necessary forwarder again.  Keep the other
862          one so we have a proper pre-header for the loop at the exit edge.  */
863       redirect_edge_pred (single_succ_edge (preheader),
864 			  single_pred (preheader));
865       delete_basic_block (preheader);
866       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
867 			       loop_preheader_edge (scalar_loop)->src);
868     }
869   else /* Add the copy at entry.  */
870     {
871       if (scalar_loop != loop)
872 	{
873 	  /* Remove the non-necessary forwarder of scalar_loop again.  */
874 	  redirect_edge_pred (single_succ_edge (preheader),
875 			      single_pred (preheader));
876 	  delete_basic_block (preheader);
877 	  set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
878 				   loop_preheader_edge (scalar_loop)->src);
879 	  preheader = split_edge (loop_preheader_edge (loop));
880 	  entry_e = single_pred_edge (preheader);
881 	}
882 
883       redirect_edge_and_branch_force (entry_e, new_preheader);
884       flush_pending_stmts (entry_e);
885       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
886 
887       redirect_edge_and_branch_force (new_exit, preheader);
888       flush_pending_stmts (new_exit);
889       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
890 
891       /* And remove the non-necessary forwarder again.  Keep the other
892          one so we have a proper pre-header for the loop at the exit edge.  */
893       redirect_edge_pred (single_succ_edge (new_preheader),
894 			  single_pred (new_preheader));
895       delete_basic_block (new_preheader);
896       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
897 			       loop_preheader_edge (new_loop)->src);
898     }
899 
900   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
901     rename_variables_in_bb (new_bbs[i]);
902 
903   if (scalar_loop != loop)
904     {
905       /* Update new_loop->header PHIs, so that on the preheader
906 	 edge they are the ones from loop rather than scalar_loop.  */
907       gphi_iterator gsi_orig, gsi_new;
908       edge orig_e = loop_preheader_edge (loop);
909       edge new_e = loop_preheader_edge (new_loop);
910 
911       for (gsi_orig = gsi_start_phis (loop->header),
912 	   gsi_new = gsi_start_phis (new_loop->header);
913 	   !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
914 	   gsi_next (&gsi_orig), gsi_next (&gsi_new))
915 	{
916 	  gphi *orig_phi = gsi_orig.phi ();
917 	  gphi *new_phi = gsi_new.phi ();
918 	  tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
919 	  location_t orig_locus
920 	    = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
921 
922 	  add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
923 	}
924     }
925 
926   free (new_bbs);
927   free (bbs);
928 
929 #ifdef ENABLE_CHECKING
930   verify_dominators (CDI_DOMINATORS);
931 #endif
932 
933   return new_loop;
934 }
935 
936 
937 /* Given the condition statement COND, put it as the last statement
938    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
939    Assumes that this is the single exit of the guarded loop.
940    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
941 
942 static edge
943 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
944 		       gimple_seq cond_expr_stmt_list,
945 		       basic_block exit_bb, basic_block dom_bb,
946 		       int probability)
947 {
948   gimple_stmt_iterator gsi;
949   edge new_e, enter_e;
950   gcond *cond_stmt;
951   gimple_seq gimplify_stmt_list = NULL;
952 
953   enter_e = EDGE_SUCC (guard_bb, 0);
954   enter_e->flags &= ~EDGE_FALLTHRU;
955   enter_e->flags |= EDGE_FALSE_VALUE;
956   gsi = gsi_last_bb (guard_bb);
957 
958   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
959 				 NULL_TREE);
960   if (gimplify_stmt_list)
961     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
962   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
963   if (cond_expr_stmt_list)
964     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
965 
966   gsi = gsi_last_bb (guard_bb);
967   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
968 
969   /* Add new edge to connect guard block to the merge/loop-exit block.  */
970   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
971 
972   new_e->count = guard_bb->count;
973   new_e->probability = probability;
974   new_e->count = apply_probability (enter_e->count, probability);
975   enter_e->count -= new_e->count;
976   enter_e->probability = inverse_probability (probability);
977   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
978   return new_e;
979 }
980 
981 
982 /* This function verifies that the following restrictions apply to LOOP:
983    (1) it is innermost
984    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
985    (3) it is single entry, single exit
986    (4) its exit condition is the last stmt in the header
987    (5) E is the entry/exit edge of LOOP.
988  */
989 
990 bool
991 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
992 {
993   edge exit_e = single_exit (loop);
994   edge entry_e = loop_preheader_edge (loop);
995   gcond *orig_cond = get_loop_exit_condition (loop);
996   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
997 
998   if (loop->inner
999       /* All loops have an outer scope; the only case loop->outer is NULL is for
1000          the function itself.  */
1001       || !loop_outer (loop)
1002       || loop->num_nodes != 2
1003       || !empty_block_p (loop->latch)
1004       || !single_exit (loop)
1005       /* Verify that new loop exit condition can be trivially modified.  */
1006       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1007       || (e != exit_e && e != entry_e))
1008     return false;
1009 
1010   return true;
1011 }
1012 
1013 #ifdef ENABLE_CHECKING
1014 static void
1015 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1016                                  struct loop *second_loop)
1017 {
1018   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1019   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1020   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1021 
1022   /* A guard that controls whether the second_loop is to be executed or skipped
1023      is placed in first_loop->exit.  first_loop->exit therefore has two
1024      successors - one is the preheader of second_loop, and the other is a bb
1025      after second_loop.
1026    */
1027   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1028 
1029   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1030         of second_loop.  */
1031 
1032   /* The preheader of new_loop is expected to have two predecessors:
1033      first_loop->exit and the block that precedes first_loop.  */
1034 
1035   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1036               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1037                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1038                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1039                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1040 
1041   /* Verify that the other successor of first_loop->exit is after the
1042      second_loop.  */
1043   /* TODO */
1044 }
1045 #endif
1046 
1047 /* If the run time cost model check determines that vectorization is
1048    not profitable and hence scalar loop should be generated then set
1049    FIRST_NITERS to prologue peeled iterations. This will allow all the
1050    iterations to be executed in the prologue peeled scalar loop.  */
1051 
1052 static void
1053 set_prologue_iterations (basic_block bb_before_first_loop,
1054 			 tree *first_niters,
1055 			 struct loop *loop,
1056 			 unsigned int th,
1057 			 int probability)
1058 {
1059   edge e;
1060   basic_block cond_bb, then_bb;
1061   tree var, prologue_after_cost_adjust_name;
1062   gimple_stmt_iterator gsi;
1063   gphi *newphi;
1064   edge e_true, e_false, e_fallthru;
1065   gcond *cond_stmt;
1066   gimple_seq stmts = NULL;
1067   tree cost_pre_condition = NULL_TREE;
1068   tree scalar_loop_iters =
1069     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1070 
1071   e = single_pred_edge (bb_before_first_loop);
1072   cond_bb = split_edge (e);
1073 
1074   e = single_pred_edge (bb_before_first_loop);
1075   then_bb = split_edge (e);
1076   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1077 
1078   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1079 				   EDGE_FALSE_VALUE);
1080   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1081 
1082   e_true = EDGE_PRED (then_bb, 0);
1083   e_true->flags &= ~EDGE_FALLTHRU;
1084   e_true->flags |= EDGE_TRUE_VALUE;
1085 
1086   e_true->probability = probability;
1087   e_false->probability = inverse_probability (probability);
1088   e_true->count = apply_probability (cond_bb->count, probability);
1089   e_false->count = cond_bb->count - e_true->count;
1090   then_bb->frequency = EDGE_FREQUENCY (e_true);
1091   then_bb->count = e_true->count;
1092 
1093   e_fallthru = EDGE_SUCC (then_bb, 0);
1094   e_fallthru->count = then_bb->count;
1095 
1096   gsi = gsi_last_bb (cond_bb);
1097   cost_pre_condition =
1098     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1099 	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1100   cost_pre_condition =
1101     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1102 				NULL_TREE, false, GSI_CONTINUE_LINKING);
1103   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1104 					   NULL_TREE, NULL_TREE);
1105   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1106 
1107   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1108 			"prologue_after_cost_adjust");
1109   prologue_after_cost_adjust_name =
1110     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1111 
1112   gsi = gsi_last_bb (then_bb);
1113   if (stmts)
1114     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1115 
1116   newphi = create_phi_node (var, bb_before_first_loop);
1117   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1118 	       UNKNOWN_LOCATION);
1119   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1120 
1121   *first_niters = PHI_RESULT (newphi);
1122 }
1123 
1124 /* Function slpeel_tree_peel_loop_to_edge.
1125 
1126    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1127    that is placed on the entry (exit) edge E of LOOP. After this transformation
1128    we have two loops one after the other - first-loop iterates FIRST_NITERS
1129    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1130    If the cost model indicates that it is profitable to emit a scalar
1131    loop instead of the vector one, then the prolog (epilog) loop will iterate
1132    for the entire unchanged scalar iterations of the loop.
1133 
1134    Input:
1135    - LOOP: the loop to be peeled.
1136    - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1137 	should be copied.
1138    - E: the exit or entry edge of LOOP.
1139         If it is the entry edge, we peel the first iterations of LOOP. In this
1140         case first-loop is LOOP, and second-loop is the newly created loop.
1141         If it is the exit edge, we peel the last iterations of LOOP. In this
1142         case, first-loop is the newly created loop, and second-loop is LOOP.
1143    - NITERS: the number of iterations that LOOP iterates.
1144    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1145    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1146         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1147         is false, the caller of this function may want to take care of this
1148         (this can be useful if we don't want new stmts added to first-loop).
1149    - TH: cost model profitability threshold of iterations for vectorization.
1150    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1151                           during versioning and hence needs to occur during
1152 			  prologue generation or whether cost model check
1153 			  has not occurred during prologue generation and hence
1154 			  needs to occur during epilogue generation.
1155    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1156    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1157 
1158 
1159    Output:
1160    The function returns a pointer to the new loop-copy, or NULL if it failed
1161    to perform the transformation.
1162 
1163    The function generates two if-then-else guards: one before the first loop,
1164    and the other before the second loop:
1165    The first guard is:
1166      if (FIRST_NITERS == 0) then skip the first loop,
1167      and go directly to the second loop.
1168    The second guard is:
1169      if (FIRST_NITERS == NITERS) then skip the second loop.
1170 
1171    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1172    then the generated condition is combined with COND_EXPR and the
1173    statements in COND_EXPR_STMT_LIST are emitted together with it.
1174 
1175    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1176    FORNOW the resulting code will not be in loop-closed-ssa form.
1177 */
1178 
1179 static struct loop *
1180 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1181 			       edge e, tree *first_niters,
1182 			       tree niters, bool update_first_loop_count,
1183 			       unsigned int th, bool check_profitability,
1184 			       tree cond_expr, gimple_seq cond_expr_stmt_list,
1185 			       int bound1, int bound2)
1186 {
1187   struct loop *new_loop = NULL, *first_loop, *second_loop;
1188   edge skip_e;
1189   tree pre_condition = NULL_TREE;
1190   basic_block bb_before_second_loop, bb_after_second_loop;
1191   basic_block bb_before_first_loop;
1192   basic_block bb_between_loops;
1193   basic_block new_exit_bb;
1194   gphi_iterator gsi;
1195   edge exit_e = single_exit (loop);
1196   source_location loop_loc;
1197   /* There are many aspects to how likely the first loop is going to be executed.
1198      Without histogram we can't really do good job.  Simply set it to
1199      2/3, so the first loop is not reordered to the end of function and
1200      the hot path through stays short.  */
1201   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1202   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1203   int probability_of_second_loop;
1204 
1205   if (!slpeel_can_duplicate_loop_p (loop, e))
1206     return NULL;
1207 
1208   /* We might have a queued need to update virtual SSA form.  As we
1209      delete the update SSA machinery below after doing a regular
1210      incremental SSA update during loop copying make sure we don't
1211      lose that fact.
1212      ???  Needing to update virtual SSA form by renaming is unfortunate
1213      but not all of the vectorizer code inserting new loads / stores
1214      properly assigns virtual operands to those statements.  */
1215   update_ssa (TODO_update_ssa_only_virtuals);
1216 
1217   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1218      in the exit bb and rename all the uses after the loop.  This simplifies
1219      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1220      (but normally loop closed SSA form doesn't require virtual PHIs to be
1221      in the same form).  Doing this early simplifies the checking what
1222      uses should be renamed.  */
1223   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1224     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1225       {
1226 	gphi *phi = gsi.phi ();
1227 	for (gsi = gsi_start_phis (exit_e->dest);
1228 	     !gsi_end_p (gsi); gsi_next (&gsi))
1229 	  if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1230 	    break;
1231 	if (gsi_end_p (gsi))
1232 	  {
1233 	    tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1234 	    gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
1235 	    tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1236 	    imm_use_iterator imm_iter;
1237 	    gimple stmt;
1238 	    use_operand_p use_p;
1239 
1240 	    add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1241 	    gimple_phi_set_result (new_phi, new_vop);
1242 	    FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1243 	      if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1244 		FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1245 		  SET_USE (use_p, new_vop);
1246 	  }
1247 	break;
1248       }
1249 
1250   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1251         Resulting CFG would be:
1252 
1253         first_loop:
1254         do {
1255         } while ...
1256 
1257         second_loop:
1258         do {
1259         } while ...
1260 
1261         orig_exit_bb:
1262    */
1263 
1264   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1265 							   e)))
1266     {
1267       loop_loc = find_loop_location (loop);
1268       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1269                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1270       return NULL;
1271     }
1272 
1273   if (MAY_HAVE_DEBUG_STMTS)
1274     {
1275       gcc_assert (!adjust_vec.exists ());
1276       adjust_vec.create (32);
1277     }
1278 
1279   if (e == exit_e)
1280     {
1281       /* NEW_LOOP was placed after LOOP.  */
1282       first_loop = loop;
1283       second_loop = new_loop;
1284     }
1285   else
1286     {
1287       /* NEW_LOOP was placed before LOOP.  */
1288       first_loop = new_loop;
1289       second_loop = loop;
1290     }
1291 
1292   /* 2.  Add the guard code in one of the following ways:
1293 
1294      2.a Add the guard that controls whether the first loop is executed.
1295          This occurs when this function is invoked for prologue or epilogue
1296 	 generation and when the cost model check can be done at compile time.
1297 
1298          Resulting CFG would be:
1299 
1300          bb_before_first_loop:
1301          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1302                                 GOTO first-loop
1303 
1304          first_loop:
1305          do {
1306          } while ...
1307 
1308          bb_before_second_loop:
1309 
1310          second_loop:
1311          do {
1312          } while ...
1313 
1314          orig_exit_bb:
1315 
1316      2.b Add the cost model check that allows the prologue
1317          to iterate for the entire unchanged scalar
1318          iterations of the loop in the event that the cost
1319          model indicates that the scalar loop is more
1320          profitable than the vector one. This occurs when
1321 	 this function is invoked for prologue generation
1322 	 and the cost model check needs to be done at run
1323 	 time.
1324 
1325          Resulting CFG after prologue peeling would be:
1326 
1327          if (scalar_loop_iterations <= th)
1328            FIRST_NITERS = scalar_loop_iterations
1329 
1330          bb_before_first_loop:
1331          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1332                                 GOTO first-loop
1333 
1334          first_loop:
1335          do {
1336          } while ...
1337 
1338          bb_before_second_loop:
1339 
1340          second_loop:
1341          do {
1342          } while ...
1343 
1344          orig_exit_bb:
1345 
1346      2.c Add the cost model check that allows the epilogue
1347          to iterate for the entire unchanged scalar
1348          iterations of the loop in the event that the cost
1349          model indicates that the scalar loop is more
1350          profitable than the vector one. This occurs when
1351 	 this function is invoked for epilogue generation
1352 	 and the cost model check needs to be done at run
1353 	 time.  This check is combined with any pre-existing
1354 	 check in COND_EXPR to avoid versioning.
1355 
1356          Resulting CFG after prologue peeling would be:
1357 
1358          bb_before_first_loop:
1359          if ((scalar_loop_iterations <= th)
1360              ||
1361              FIRST_NITERS == 0) GOTO bb_before_second_loop
1362                                 GOTO first-loop
1363 
1364          first_loop:
1365          do {
1366          } while ...
1367 
1368          bb_before_second_loop:
1369 
1370          second_loop:
1371          do {
1372          } while ...
1373 
1374          orig_exit_bb:
1375   */
1376 
1377   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1378   /* Loop copying insterted a forwarder block for us here.  */
1379   bb_before_second_loop = single_exit (first_loop)->dest;
1380 
1381   probability_of_second_loop = (inverse_probability (first_guard_probability)
1382 			        + combine_probabilities (second_guard_probability,
1383                                                          first_guard_probability));
1384   /* Theoretically preheader edge of first loop and exit edge should have
1385      same frequencies.  Loop exit probablities are however easy to get wrong.
1386      It is safer to copy value from original loop entry.  */
1387   bb_before_second_loop->frequency
1388      = combine_probabilities (bb_before_first_loop->frequency,
1389                               probability_of_second_loop);
1390   bb_before_second_loop->count
1391      = apply_probability (bb_before_first_loop->count,
1392 			  probability_of_second_loop);
1393   single_succ_edge (bb_before_second_loop)->count
1394      = bb_before_second_loop->count;
1395 
1396   /* Epilogue peeling.  */
1397   if (!update_first_loop_count)
1398     {
1399       loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1400       tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1401       unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1402       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1403 	limit = limit + 1;
1404       if (check_profitability
1405 	  && th > limit)
1406 	limit = th;
1407       pre_condition =
1408 	fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1409 		     build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1410       if (cond_expr)
1411 	{
1412 	  pre_condition =
1413 	    fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1414 			 pre_condition,
1415 			 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1416 				      cond_expr));
1417 	}
1418     }
1419 
1420   /* Prologue peeling.  */
1421   else
1422     {
1423       if (check_profitability)
1424 	set_prologue_iterations (bb_before_first_loop, first_niters,
1425 				 loop, th, first_guard_probability);
1426 
1427       pre_condition =
1428 	fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1429 		     build_int_cst (TREE_TYPE (*first_niters), 0));
1430     }
1431 
1432   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1433 				  cond_expr_stmt_list,
1434                                   bb_before_second_loop, bb_before_first_loop,
1435 				  inverse_probability (first_guard_probability));
1436   scale_loop_profile (first_loop, first_guard_probability,
1437 		      check_profitability && (int)th > bound1 ? th : bound1);
1438   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1439 				      first_loop == new_loop,
1440 				      &new_exit_bb);
1441 
1442 
1443   /* 3. Add the guard that controls whether the second loop is executed.
1444         Resulting CFG would be:
1445 
1446         bb_before_first_loop:
1447         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1448                                GOTO first-loop
1449 
1450         first_loop:
1451         do {
1452         } while ...
1453 
1454         bb_between_loops:
1455         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1456                                     GOTO bb_before_second_loop
1457 
1458         bb_before_second_loop:
1459 
1460         second_loop:
1461         do {
1462         } while ...
1463 
1464         bb_after_second_loop:
1465 
1466         orig_exit_bb:
1467    */
1468 
1469   bb_between_loops = new_exit_bb;
1470   bb_after_second_loop = split_edge (single_exit (second_loop));
1471 
1472   pre_condition =
1473 	fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1474   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1475                                   bb_after_second_loop, bb_before_first_loop,
1476 				  inverse_probability (second_guard_probability));
1477   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1478   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1479                                      second_loop == new_loop, &new_exit_bb);
1480 
1481   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1482    */
1483   if (update_first_loop_count)
1484     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1485 
1486   delete_update_ssa ();
1487 
1488   adjust_vec_debug_stmts ();
1489 
1490   return new_loop;
1491 }
1492 
1493 /* Function vect_get_loop_location.
1494 
1495    Extract the location of the loop in the source code.
1496    If the loop is not well formed for vectorization, an estimated
1497    location is calculated.
1498    Return the loop location if succeed and NULL if not.  */
1499 
1500 source_location
1501 find_loop_location (struct loop *loop)
1502 {
1503   gimple stmt = NULL;
1504   basic_block bb;
1505   gimple_stmt_iterator si;
1506 
1507   if (!loop)
1508     return UNKNOWN_LOCATION;
1509 
1510   stmt = get_loop_exit_condition (loop);
1511 
1512   if (stmt
1513       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1514     return gimple_location (stmt);
1515 
1516   /* If we got here the loop is probably not "well formed",
1517      try to estimate the loop location */
1518 
1519   if (!loop->header)
1520     return UNKNOWN_LOCATION;
1521 
1522   bb = loop->header;
1523 
1524   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1525     {
1526       stmt = gsi_stmt (si);
1527       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1528         return gimple_location (stmt);
1529     }
1530 
1531   return UNKNOWN_LOCATION;
1532 }
1533 
1534 
1535 /* Function vect_can_advance_ivs_p
1536 
1537    In case the number of iterations that LOOP iterates is unknown at compile
1538    time, an epilog loop will be generated, and the loop induction variables
1539    (IVs) will be "advanced" to the value they are supposed to take just before
1540    the epilog loop.  Here we check that the access function of the loop IVs
1541    and the expression that represents the loop bound are simple enough.
1542    These restrictions will be relaxed in the future.  */
1543 
1544 bool
1545 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1546 {
1547   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1548   basic_block bb = loop->header;
1549   gimple phi;
1550   gphi_iterator gsi;
1551 
1552   /* Analyze phi functions of the loop header.  */
1553 
1554   if (dump_enabled_p ())
1555     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1556   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1557     {
1558       tree evolution_part;
1559 
1560       phi = gsi.phi ();
1561       if (dump_enabled_p ())
1562 	{
1563           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1564           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1565           dump_printf (MSG_NOTE, "\n");
1566 	}
1567 
1568       /* Skip virtual phi's. The data dependences that are associated with
1569          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1570 
1571       if (virtual_operand_p (PHI_RESULT (phi)))
1572 	{
1573 	  if (dump_enabled_p ())
1574 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1575                              "virtual phi. skip.\n");
1576 	  continue;
1577 	}
1578 
1579       /* Skip reduction phis.  */
1580 
1581       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1582         {
1583           if (dump_enabled_p ())
1584             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1585                              "reduc phi. skip.\n");
1586           continue;
1587         }
1588 
1589       /* Analyze the evolution function.  */
1590 
1591       evolution_part
1592 	= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1593       if (evolution_part == NULL_TREE)
1594         {
1595 	  if (dump_enabled_p ())
1596 	    dump_printf (MSG_MISSED_OPTIMIZATION,
1597 			 "No access function or evolution.\n");
1598 	  return false;
1599         }
1600 
1601       /* FORNOW: We do not transform initial conditions of IVs
1602 	 which evolution functions are a polynomial of degree >= 2.  */
1603 
1604       if (tree_is_chrec (evolution_part))
1605 	return false;
1606     }
1607 
1608   return true;
1609 }
1610 
1611 
1612 /*   Function vect_update_ivs_after_vectorizer.
1613 
1614      "Advance" the induction variables of LOOP to the value they should take
1615      after the execution of LOOP.  This is currently necessary because the
1616      vectorizer does not handle induction variables that are used after the
1617      loop.  Such a situation occurs when the last iterations of LOOP are
1618      peeled, because:
1619      1. We introduced new uses after LOOP for IVs that were not originally used
1620         after LOOP: the IVs of LOOP are now used by an epilog loop.
1621      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1622         times, whereas the loop IVs should be bumped N times.
1623 
1624      Input:
1625      - LOOP - a loop that is going to be vectorized. The last few iterations
1626               of LOOP were peeled.
1627      - NITERS - the number of iterations that LOOP executes (before it is
1628                 vectorized). i.e, the number of times the ivs should be bumped.
1629      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1630                   coming out from LOOP on which there are uses of the LOOP ivs
1631 		  (this is the path from LOOP->exit to epilog_loop->preheader).
1632 
1633                   The new definitions of the ivs are placed in LOOP->exit.
1634                   The phi args associated with the edge UPDATE_E in the bb
1635                   UPDATE_E->dest are updated accordingly.
1636 
1637      Assumption 1: Like the rest of the vectorizer, this function assumes
1638      a single loop exit that has a single predecessor.
1639 
1640      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1641      organized in the same order.
1642 
1643      Assumption 3: The access function of the ivs is simple enough (see
1644      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1645 
1646      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1647      coming out of LOOP on which the ivs of LOOP are used (this is the path
1648      that leads to the epilog loop; other paths skip the epilog loop).  This
1649      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1650      needs to have its phis updated.
1651  */
1652 
1653 static void
1654 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1655 				  edge update_e)
1656 {
1657   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1658   basic_block exit_bb = single_exit (loop)->dest;
1659   gphi *phi, *phi1;
1660   gphi_iterator gsi, gsi1;
1661   basic_block update_bb = update_e->dest;
1662 
1663   gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1664 
1665   /* Make sure there exists a single-predecessor exit bb:  */
1666   gcc_assert (single_pred_p (exit_bb));
1667 
1668   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1669        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1670        gsi_next (&gsi), gsi_next (&gsi1))
1671     {
1672       tree init_expr;
1673       tree step_expr, off;
1674       tree type;
1675       tree var, ni, ni_name;
1676       gimple_stmt_iterator last_gsi;
1677       stmt_vec_info stmt_info;
1678 
1679       phi = gsi.phi ();
1680       phi1 = gsi1.phi ();
1681       if (dump_enabled_p ())
1682         {
1683           dump_printf_loc (MSG_NOTE, vect_location,
1684                            "vect_update_ivs_after_vectorizer: phi: ");
1685 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1686           dump_printf (MSG_NOTE, "\n");
1687         }
1688 
1689       /* Skip virtual phi's.  */
1690       if (virtual_operand_p (PHI_RESULT (phi)))
1691 	{
1692 	  if (dump_enabled_p ())
1693 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1694                              "virtual phi. skip.\n");
1695 	  continue;
1696 	}
1697 
1698       /* Skip reduction phis.  */
1699       stmt_info = vinfo_for_stmt (phi);
1700       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1701         {
1702 	  if (dump_enabled_p ())
1703 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1704                              "reduc phi. skip.\n");
1705           continue;
1706         }
1707 
1708       type = TREE_TYPE (gimple_phi_result (phi));
1709       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1710       step_expr = unshare_expr (step_expr);
1711 
1712       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1713          of degree >= 2 or exponential.  */
1714       gcc_assert (!tree_is_chrec (step_expr));
1715 
1716       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1717 
1718       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1719 			 fold_convert (TREE_TYPE (step_expr), niters),
1720 			 step_expr);
1721       if (POINTER_TYPE_P (type))
1722 	ni = fold_build_pointer_plus (init_expr, off);
1723       else
1724 	ni = fold_build2 (PLUS_EXPR, type,
1725 			  init_expr, fold_convert (type, off));
1726 
1727       var = create_tmp_var (type, "tmp");
1728 
1729       last_gsi = gsi_last_bb (exit_bb);
1730       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1731 					  true, GSI_SAME_STMT);
1732 
1733       /* Fix phi expressions in the successor bb.  */
1734       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1735     }
1736 }
1737 
1738 /* Function vect_do_peeling_for_loop_bound
1739 
1740    Peel the last iterations of the loop represented by LOOP_VINFO.
1741    The peeled iterations form a new epilog loop.  Given that the loop now
1742    iterates NITERS times, the new epilog loop iterates
1743    NITERS % VECTORIZATION_FACTOR times.
1744 
1745    The original loop will later be made to iterate
1746    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1747 
1748    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1749    test.  */
1750 
1751 void
1752 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1753 				tree ni_name, tree ratio_mult_vf_name,
1754 				unsigned int th, bool check_profitability)
1755 {
1756   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1757   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1758   struct loop *new_loop;
1759   edge update_e;
1760   basic_block preheader;
1761   int loop_num;
1762   int max_iter;
1763   tree cond_expr = NULL_TREE;
1764   gimple_seq cond_expr_stmt_list = NULL;
1765 
1766   if (dump_enabled_p ())
1767     dump_printf_loc (MSG_NOTE, vect_location,
1768                      "=== vect_do_peeling_for_loop_bound ===\n");
1769 
1770   initialize_original_copy_tables ();
1771 
1772   loop_num  = loop->num;
1773 
1774   new_loop
1775     = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1776 				     &ratio_mult_vf_name, ni_name, false,
1777 				     th, check_profitability,
1778 				     cond_expr, cond_expr_stmt_list,
1779 				     0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1780   gcc_assert (new_loop);
1781   gcc_assert (loop_num == loop->num);
1782 #ifdef ENABLE_CHECKING
1783   slpeel_verify_cfg_after_peeling (loop, new_loop);
1784 #endif
1785 
1786   /* A guard that controls whether the new_loop is to be executed or skipped
1787      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1788      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1789      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1790      is on the path where the LOOP IVs are used and need to be updated.  */
1791 
1792   preheader = loop_preheader_edge (new_loop)->src;
1793   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1794     update_e = EDGE_PRED (preheader, 0);
1795   else
1796     update_e = EDGE_PRED (preheader, 1);
1797 
1798   /* Update IVs of original loop as if they were advanced
1799      by ratio_mult_vf_name steps.  */
1800   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1801 
1802   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1803      and this means N-2 loopback edge executions.
1804 
1805      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1806      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1807   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1808 	      ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1809 	      : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1810   if (check_profitability)
1811     max_iter = MAX (max_iter, (int) th - 1);
1812   record_niter_bound (new_loop, max_iter, false, true);
1813   dump_printf (MSG_NOTE,
1814                "Setting upper bound of nb iterations for epilogue "
1815                "loop to %d\n", max_iter);
1816 
1817   /* After peeling we have to reset scalar evolution analyzer.  */
1818   scev_reset ();
1819 
1820   free_original_copy_tables ();
1821 }
1822 
1823 
1824 /* Function vect_gen_niters_for_prolog_loop
1825 
1826    Set the number of iterations for the loop represented by LOOP_VINFO
1827    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1828    and the misalignment of DR - the data reference recorded in
1829    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1830    this loop, the data reference DR will refer to an aligned location.
1831 
1832    The following computation is generated:
1833 
1834    If the misalignment of DR is known at compile time:
1835      addr_mis = int mis = DR_MISALIGNMENT (dr);
1836    Else, compute address misalignment in bytes:
1837      addr_mis = addr & (vectype_align - 1)
1838 
1839    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1840 
1841    (elem_size = element type size; an element is the scalar element whose type
1842    is the inner type of the vectype)
1843 
1844    When the step of the data-ref in the loop is not 1 (as in interleaved data
1845    and SLP), the number of iterations of the prolog must be divided by the step
1846    (which is equal to the size of interleaved group).
1847 
1848    The above formulas assume that VF == number of elements in the vector. This
1849    may not hold when there are multiple-types in the loop.
1850    In this case, for some data-references in the loop the VF does not represent
1851    the number of elements that fit in the vector.  Therefore, instead of VF we
1852    use TYPE_VECTOR_SUBPARTS.  */
1853 
1854 static tree
1855 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1856 {
1857   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1858   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1859   tree var;
1860   gimple_seq stmts;
1861   tree iters, iters_name;
1862   edge pe;
1863   basic_block new_bb;
1864   gimple dr_stmt = DR_STMT (dr);
1865   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1866   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1867   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1868   tree niters_type = TREE_TYPE (loop_niters);
1869   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1870 
1871   pe = loop_preheader_edge (loop);
1872 
1873   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1874     {
1875       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1876 
1877       if (dump_enabled_p ())
1878         dump_printf_loc (MSG_NOTE, vect_location,
1879                          "known peeling = %d.\n", npeel);
1880 
1881       iters = build_int_cst (niters_type, npeel);
1882       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1883     }
1884   else
1885     {
1886       gimple_seq new_stmts = NULL;
1887       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1888       tree offset = negative
1889 	  ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1890       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1891 						&new_stmts, offset, loop);
1892       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1893       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1894       HOST_WIDE_INT elem_size =
1895                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1896       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1897       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1898       tree nelements_tree = build_int_cst (type, nelements);
1899       tree byte_misalign;
1900       tree elem_misalign;
1901 
1902       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1903       gcc_assert (!new_bb);
1904 
1905       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1906       byte_misalign =
1907         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1908                      vectype_align_minus_1);
1909 
1910       /* Create:  elem_misalign = byte_misalign / element_size  */
1911       elem_misalign =
1912         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1913 
1914       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1915       if (negative)
1916 	iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1917       else
1918 	iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1919       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1920       iters = fold_convert (niters_type, iters);
1921       *bound = nelements;
1922     }
1923 
1924   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1925   /* If the loop bound is known at compile time we already verified that it is
1926      greater than vf; since the misalignment ('iters') is at most vf, there's
1927      no need to generate the MIN_EXPR in this case.  */
1928   if (TREE_CODE (loop_niters) != INTEGER_CST)
1929     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1930 
1931   if (dump_enabled_p ())
1932     {
1933       dump_printf_loc (MSG_NOTE, vect_location,
1934                        "niters for prolog loop: ");
1935       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1936       dump_printf (MSG_NOTE, "\n");
1937     }
1938 
1939   var = create_tmp_var (niters_type, "prolog_loop_niters");
1940   stmts = NULL;
1941   iters_name = force_gimple_operand (iters, &stmts, false, var);
1942 
1943   /* Insert stmt on loop preheader edge.  */
1944   if (stmts)
1945     {
1946       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1947       gcc_assert (!new_bb);
1948     }
1949 
1950   return iters_name;
1951 }
1952 
1953 
1954 /* Function vect_update_init_of_dr
1955 
1956    NITERS iterations were peeled from LOOP.  DR represents a data reference
1957    in LOOP.  This function updates the information recorded in DR to
1958    account for the fact that the first NITERS iterations had already been
1959    executed.  Specifically, it updates the OFFSET field of DR.  */
1960 
1961 static void
1962 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1963 {
1964   tree offset = DR_OFFSET (dr);
1965 
1966   niters = fold_build2 (MULT_EXPR, sizetype,
1967 			fold_convert (sizetype, niters),
1968 			fold_convert (sizetype, DR_STEP (dr)));
1969   offset = fold_build2 (PLUS_EXPR, sizetype,
1970 			fold_convert (sizetype, offset), niters);
1971   DR_OFFSET (dr) = offset;
1972 }
1973 
1974 
1975 /* Function vect_update_inits_of_drs
1976 
1977    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1978    This function updates the information recorded for the data references in
1979    the loop to account for the fact that the first NITERS iterations had
1980    already been executed.  Specifically, it updates the initial_condition of
1981    the access_function of all the data_references in the loop.  */
1982 
1983 static void
1984 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1985 {
1986   unsigned int i;
1987   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1988   struct data_reference *dr;
1989 
1990  if (dump_enabled_p ())
1991     dump_printf_loc (MSG_NOTE, vect_location,
1992                      "=== vect_update_inits_of_dr ===\n");
1993 
1994   FOR_EACH_VEC_ELT (datarefs, i, dr)
1995     vect_update_init_of_dr (dr, niters);
1996 }
1997 
1998 
1999 /* Function vect_do_peeling_for_alignment
2000 
2001    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2002    'niters' is set to the misalignment of one of the data references in the
2003    loop, thereby forcing it to refer to an aligned location at the beginning
2004    of the execution of this loop.  The data reference for which we are
2005    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2006 
2007 void
2008 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
2009 			       unsigned int th, bool check_profitability)
2010 {
2011   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2012   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2013   tree niters_of_prolog_loop;
2014   tree wide_prolog_niters;
2015   struct loop *new_loop;
2016   int max_iter;
2017   int bound = 0;
2018 
2019   if (dump_enabled_p ())
2020     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2021                      "loop peeled for vectorization to enhance"
2022                      " alignment\n");
2023 
2024   initialize_original_copy_tables ();
2025 
2026   gimple_seq stmts = NULL;
2027   gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2028   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2029 							   ni_name,
2030 							   &bound);
2031 
2032   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2033   new_loop =
2034     slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2035 				   loop_preheader_edge (loop),
2036 				   &niters_of_prolog_loop, ni_name, true,
2037 				   th, check_profitability, NULL_TREE, NULL,
2038 				   bound, 0);
2039 
2040   gcc_assert (new_loop);
2041 #ifdef ENABLE_CHECKING
2042   slpeel_verify_cfg_after_peeling (new_loop, loop);
2043 #endif
2044   /* For vectorization factor N, we need to copy at most N-1 values
2045      for alignment and this means N-2 loopback edge executions.  */
2046   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2047   if (check_profitability)
2048     max_iter = MAX (max_iter, (int) th - 1);
2049   record_niter_bound (new_loop, max_iter, false, true);
2050   dump_printf (MSG_NOTE,
2051                "Setting upper bound of nb iterations for prologue "
2052                "loop to %d\n", max_iter);
2053 
2054   /* Update number of times loop executes.  */
2055   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2056 		TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2057   LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2058 		TREE_TYPE (ni_name),
2059 		LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2060 
2061   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2062     wide_prolog_niters = niters_of_prolog_loop;
2063   else
2064     {
2065       gimple_seq seq = NULL;
2066       edge pe = loop_preheader_edge (loop);
2067       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2068       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2069       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2070                                                  var);
2071       if (seq)
2072 	{
2073 	  /* Insert stmt on loop preheader edge.  */
2074           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2075           gcc_assert (!new_bb);
2076         }
2077     }
2078 
2079   /* Update the init conditions of the access functions of all data refs.  */
2080   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2081 
2082   /* After peeling we have to reset scalar evolution analyzer.  */
2083   scev_reset ();
2084 
2085   free_original_copy_tables ();
2086 }
2087 
2088 
2089 /* Function vect_create_cond_for_align_checks.
2090 
2091    Create a conditional expression that represents the alignment checks for
2092    all of data references (array element references) whose alignment must be
2093    checked at runtime.
2094 
2095    Input:
2096    COND_EXPR  - input conditional expression.  New conditions will be chained
2097                 with logical AND operation.
2098    LOOP_VINFO - two fields of the loop information are used.
2099                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2100                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2101 
2102    Output:
2103    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2104                          expression.
2105    The returned value is the conditional expression to be used in the if
2106    statement that controls which version of the loop gets executed at runtime.
2107 
2108    The algorithm makes two assumptions:
2109      1) The number of bytes "n" in a vector is a power of 2.
2110      2) An address "a" is aligned if a%n is zero and that this
2111         test can be done as a&(n-1) == 0.  For example, for 16
2112         byte vectors the test is a&0xf == 0.  */
2113 
2114 static void
2115 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2116                                    tree *cond_expr,
2117 				   gimple_seq *cond_expr_stmt_list)
2118 {
2119   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2120   vec<gimple> may_misalign_stmts
2121     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2122   gimple ref_stmt;
2123   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2124   tree mask_cst;
2125   unsigned int i;
2126   tree int_ptrsize_type;
2127   char tmp_name[20];
2128   tree or_tmp_name = NULL_TREE;
2129   tree and_tmp_name;
2130   gimple and_stmt;
2131   tree ptrsize_zero;
2132   tree part_cond_expr;
2133 
2134   /* Check that mask is one less than a power of 2, i.e., mask is
2135      all zeros followed by all ones.  */
2136   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2137 
2138   int_ptrsize_type = signed_type_for (ptr_type_node);
2139 
2140   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2141      of the first vector of the i'th data reference. */
2142 
2143   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2144     {
2145       gimple_seq new_stmt_list = NULL;
2146       tree addr_base;
2147       tree addr_tmp_name;
2148       tree new_or_tmp_name;
2149       gimple addr_stmt, or_stmt;
2150       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2151       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2152       bool negative = tree_int_cst_compare
2153 	(DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2154       tree offset = negative
2155 	? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2156 
2157       /* create: addr_tmp = (int)(address_of_first_vector) */
2158       addr_base =
2159 	vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2160 					      offset, loop);
2161       if (new_stmt_list != NULL)
2162 	gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2163 
2164       sprintf (tmp_name, "addr2int%d", i);
2165       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2166       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2167       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2168 
2169       /* The addresses are OR together.  */
2170 
2171       if (or_tmp_name != NULL_TREE)
2172         {
2173           /* create: or_tmp = or_tmp | addr_tmp */
2174           sprintf (tmp_name, "orptrs%d", i);
2175 	  new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2176 	  or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2177 					 or_tmp_name, addr_tmp_name);
2178 	  gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2179           or_tmp_name = new_or_tmp_name;
2180         }
2181       else
2182         or_tmp_name = addr_tmp_name;
2183 
2184     } /* end for i */
2185 
2186   mask_cst = build_int_cst (int_ptrsize_type, mask);
2187 
2188   /* create: and_tmp = or_tmp & mask  */
2189   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2190 
2191   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2192 				  or_tmp_name, mask_cst);
2193   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2194 
2195   /* Make and_tmp the left operand of the conditional test against zero.
2196      if and_tmp has a nonzero bit then some address is unaligned.  */
2197   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2198   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2199 				and_tmp_name, ptrsize_zero);
2200   if (*cond_expr)
2201     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2202 			      *cond_expr, part_cond_expr);
2203   else
2204     *cond_expr = part_cond_expr;
2205 }
2206 
2207 /* Function vect_create_cond_for_alias_checks.
2208 
2209    Create a conditional expression that represents the run-time checks for
2210    overlapping of address ranges represented by a list of data references
2211    relations passed as input.
2212 
2213    Input:
2214    COND_EXPR  - input conditional expression.  New conditions will be chained
2215                 with logical AND operation.  If it is NULL, then the function
2216                 is used to return the number of alias checks.
2217    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2218 	        to be checked.
2219 
2220    Output:
2221    COND_EXPR - conditional expression.
2222 
2223    The returned COND_EXPR is the conditional expression to be used in the if
2224    statement that controls which version of the loop gets executed at runtime.
2225 */
2226 
2227 void
2228 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2229 {
2230   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2231     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2232   tree part_cond_expr;
2233 
2234   /* Create expression
2235      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2236      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2237      &&
2238      ...
2239      &&
2240      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2241      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2242 
2243   if (comp_alias_ddrs.is_empty ())
2244     return;
2245 
2246   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2247     {
2248       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2249       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2250       tree segment_length_a = dr_a.seg_len;
2251       tree segment_length_b = dr_b.seg_len;
2252 
2253       tree addr_base_a
2254 	= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2255       tree addr_base_b
2256 	= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2257 
2258       if (dump_enabled_p ())
2259 	{
2260 	  dump_printf_loc (MSG_NOTE, vect_location,
2261 			   "create runtime check for data references ");
2262 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2263 	  dump_printf (MSG_NOTE, " and ");
2264 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2265 	  dump_printf (MSG_NOTE, "\n");
2266 	}
2267 
2268       tree seg_a_min = addr_base_a;
2269       tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2270       /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2271 	 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2272 	 [a, a+12) */
2273       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2274 	{
2275 	  tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2276 	  seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2277 	  seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2278 	}
2279 
2280       tree seg_b_min = addr_base_b;
2281       tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2282       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2283 	{
2284 	  tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2285 	  seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2286 	  seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2287 	}
2288 
2289       part_cond_expr =
2290       	fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2291 	  fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2292 	  fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2293 
2294       if (*cond_expr)
2295 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2296 				  *cond_expr, part_cond_expr);
2297       else
2298 	*cond_expr = part_cond_expr;
2299     }
2300 
2301   if (dump_enabled_p ())
2302     dump_printf_loc (MSG_NOTE, vect_location,
2303 		     "created %u versioning for alias checks.\n",
2304 		     comp_alias_ddrs.length ());
2305 
2306   comp_alias_ddrs.release ();
2307 }
2308 
2309 
2310 /* Function vect_loop_versioning.
2311 
2312    If the loop has data references that may or may not be aligned or/and
2313    has data reference relations whose independence was not proven then
2314    two versions of the loop need to be generated, one which is vectorized
2315    and one which isn't.  A test is then generated to control which of the
2316    loops is executed.  The test checks for the alignment of all of the
2317    data references that may or may not be aligned.  An additional
2318    sequence of runtime tests is generated for each pairs of DDRs whose
2319    independence was not proven.  The vectorized version of loop is
2320    executed only if both alias and alignment tests are passed.
2321 
2322    The test generated to check which version of loop is executed
2323    is modified to also check for profitability as indicated by the
2324    cost model initially.
2325 
2326    The versioning precondition(s) are placed in *COND_EXPR and
2327    *COND_EXPR_STMT_LIST.  */
2328 
2329 void
2330 vect_loop_versioning (loop_vec_info loop_vinfo,
2331 		      unsigned int th, bool check_profitability)
2332 {
2333   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2334   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2335   basic_block condition_bb;
2336   gphi_iterator gsi;
2337   gimple_stmt_iterator cond_exp_gsi;
2338   basic_block merge_bb;
2339   basic_block new_exit_bb;
2340   edge new_exit_e, e;
2341   gphi *orig_phi, *new_phi;
2342   tree cond_expr = NULL_TREE;
2343   gimple_seq cond_expr_stmt_list = NULL;
2344   tree arg;
2345   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2346   gimple_seq gimplify_stmt_list = NULL;
2347   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2348   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2349   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2350 
2351   if (check_profitability)
2352     {
2353       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2354 			       build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2355       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2356 					  is_gimple_condexpr, NULL_TREE);
2357     }
2358 
2359   if (version_align)
2360     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2361 				       &cond_expr_stmt_list);
2362 
2363   if (version_alias)
2364     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2365 
2366   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2367 				      is_gimple_condexpr, NULL_TREE);
2368   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2369 
2370   initialize_original_copy_tables ();
2371   if (scalar_loop)
2372     {
2373       edge scalar_e;
2374       basic_block preheader, scalar_preheader;
2375 
2376       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2377 	 scale LOOP's frequencies instead.  */
2378       loop_version (scalar_loop, cond_expr, &condition_bb,
2379 		    prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2380       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2381       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2382 	 while we need to move it above LOOP's preheader.  */
2383       e = loop_preheader_edge (loop);
2384       scalar_e = loop_preheader_edge (scalar_loop);
2385       gcc_assert (empty_block_p (e->src)
2386 		  && single_pred_p (e->src));
2387       gcc_assert (empty_block_p (scalar_e->src)
2388 		  && single_pred_p (scalar_e->src));
2389       gcc_assert (single_pred_p (condition_bb));
2390       preheader = e->src;
2391       scalar_preheader = scalar_e->src;
2392       scalar_e = find_edge (condition_bb, scalar_preheader);
2393       e = single_pred_edge (preheader);
2394       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2395 				      scalar_preheader);
2396       redirect_edge_and_branch_force (scalar_e, preheader);
2397       redirect_edge_and_branch_force (e, condition_bb);
2398       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2399 			       single_pred (condition_bb));
2400       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2401 			       single_pred (scalar_preheader));
2402       set_immediate_dominator (CDI_DOMINATORS, preheader,
2403 			       condition_bb);
2404     }
2405   else
2406     loop_version (loop, cond_expr, &condition_bb,
2407 		  prob, prob, REG_BR_PROB_BASE - prob, true);
2408 
2409   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2410       && dump_enabled_p ())
2411     {
2412       if (version_alias)
2413         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2414                          "loop versioned for vectorization because of "
2415 			 "possible aliasing\n");
2416       if (version_align)
2417         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2418                          "loop versioned for vectorization to enhance "
2419 			 "alignment\n");
2420 
2421     }
2422   free_original_copy_tables ();
2423 
2424   /* Loop versioning violates an assumption we try to maintain during
2425      vectorization - that the loop exit block has a single predecessor.
2426      After versioning, the exit block of both loop versions is the same
2427      basic block (i.e. it has two predecessors). Just in order to simplify
2428      following transformations in the vectorizer, we fix this situation
2429      here by adding a new (empty) block on the exit-edge of the loop,
2430      with the proper loop-exit phis to maintain loop-closed-form.
2431      If loop versioning wasn't done from loop, but scalar_loop instead,
2432      merge_bb will have already just a single successor.  */
2433 
2434   merge_bb = single_exit (loop)->dest;
2435   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2436     {
2437       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2438       new_exit_bb = split_edge (single_exit (loop));
2439       new_exit_e = single_exit (loop);
2440       e = EDGE_SUCC (new_exit_bb, 0);
2441 
2442       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2443 	{
2444 	  tree new_res;
2445 	  orig_phi = gsi.phi ();
2446 	  new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2447 	  new_phi = create_phi_node (new_res, new_exit_bb);
2448 	  arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2449 	  add_phi_arg (new_phi, arg, new_exit_e,
2450 		       gimple_phi_arg_location_from_edge (orig_phi, e));
2451 	  adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2452 	}
2453     }
2454 
2455   /* End loop-exit-fixes after versioning.  */
2456 
2457   if (cond_expr_stmt_list)
2458     {
2459       cond_exp_gsi = gsi_last_bb (condition_bb);
2460       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2461 			     GSI_SAME_STMT);
2462     }
2463   update_ssa (TODO_update_ssa);
2464 }
2465