xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-loop.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Loop Vectorization
2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4    Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "optabs-tree.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "cfganal.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "cfgloop.h"
46 #include "params.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "gimple-fold.h"
50 #include "cgraph.h"
51 #include "tree-cfg.h"
52 #include "tree-if-conv.h"
53 #include "tree-eh.h"
54 
55 /* Loop Vectorization Pass.
56 
57    This pass tries to vectorize loops.
58 
59    For example, the vectorizer transforms the following simple loop:
60 
61         short a[N]; short b[N]; short c[N]; int i;
62 
63         for (i=0; i<N; i++){
64           a[i] = b[i] + c[i];
65         }
66 
67    as if it was manually vectorized by rewriting the source code into:
68 
69         typedef int __attribute__((mode(V8HI))) v8hi;
70         short a[N];  short b[N]; short c[N];   int i;
71         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
72         v8hi va, vb, vc;
73 
74         for (i=0; i<N/8; i++){
75           vb = pb[i];
76           vc = pc[i];
77           va = vb + vc;
78           pa[i] = va;
79         }
80 
81         The main entry to this pass is vectorize_loops(), in which
82    the vectorizer applies a set of analyses on a given set of loops,
83    followed by the actual vectorization transformation for the loops that
84    had successfully passed the analysis phase.
85         Throughout this pass we make a distinction between two types of
86    data: scalars (which are represented by SSA_NAMES), and memory references
87    ("data-refs").  These two types of data require different handling both
88    during analysis and transformation. The types of data-refs that the
89    vectorizer currently supports are ARRAY_REFS which base is an array DECL
90    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
91    accesses are required to have a simple (consecutive) access pattern.
92 
93    Analysis phase:
94    ===============
95         The driver for the analysis phase is vect_analyze_loop().
96    It applies a set of analyses, some of which rely on the scalar evolution
97    analyzer (scev) developed by Sebastian Pop.
98 
99         During the analysis phase the vectorizer records some information
100    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
101    loop, as well as general information about the loop as a whole, which is
102    recorded in a "loop_vec_info" struct attached to each loop.
103 
104    Transformation phase:
105    =====================
106         The loop transformation phase scans all the stmts in the loop, and
107    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
108    the loop that needs to be vectorized.  It inserts the vector code sequence
109    just before the scalar stmt S, and records a pointer to the vector code
110    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
111    attached to S).  This pointer will be used for the vectorization of following
112    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
113    otherwise, we rely on dead code elimination for removing it.
114 
115         For example, say stmt S1 was vectorized into stmt VS1:
116 
117    VS1: vb = px[i];
118    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
119    S2:  a = b;
120 
121    To vectorize stmt S2, the vectorizer first finds the stmt that defines
122    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
123    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
124    resulting sequence would be:
125 
126    VS1: vb = px[i];
127    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
128    VS2: va = vb;
129    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
130 
131         Operands that are not SSA_NAMEs, are data-refs that appear in
132    load/store operations (like 'x[i]' in S1), and are handled differently.
133 
134    Target modeling:
135    =================
136         Currently the only target specific information that is used is the
137    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
138    Targets that can support different sizes of vectors, for now will need
139    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
140    flexibility will be added in the future.
141 
142         Since we only vectorize operations which vector form can be
143    expressed using existing tree codes, to verify that an operation is
144    supported, the vectorizer checks the relevant optab at the relevant
145    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
146    the value found is CODE_FOR_nothing, then there's no target support, and
147    we can't vectorize the stmt.
148 
149    For additional information on this project see:
150    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
151 */
152 
153 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
154 
155 /* Function vect_determine_vectorization_factor
156 
157    Determine the vectorization factor (VF).  VF is the number of data elements
158    that are operated upon in parallel in a single iteration of the vectorized
159    loop.  For example, when vectorizing a loop that operates on 4byte elements,
160    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
161    elements can fit in a single vector register.
162 
163    We currently support vectorization of loops in which all types operated upon
164    are of the same size.  Therefore this function currently sets VF according to
165    the size of the types operated upon, and fails if there are multiple sizes
166    in the loop.
167 
168    VF is also the factor by which the loop iterations are strip-mined, e.g.:
169    original loop:
170         for (i=0; i<N; i++){
171           a[i] = b[i] + c[i];
172         }
173 
174    vectorized loop:
175         for (i=0; i<N; i+=VF){
176           a[i:VF] = b[i:VF] + c[i:VF];
177         }
178 */
179 
180 static bool
181 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
182 {
183   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
184   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
185   unsigned nbbs = loop->num_nodes;
186   unsigned int vectorization_factor = 0;
187   tree scalar_type = NULL_TREE;
188   gphi *phi;
189   tree vectype;
190   unsigned int nunits;
191   stmt_vec_info stmt_info;
192   unsigned i;
193   HOST_WIDE_INT dummy;
194   gimple *stmt, *pattern_stmt = NULL;
195   gimple_seq pattern_def_seq = NULL;
196   gimple_stmt_iterator pattern_def_si = gsi_none ();
197   bool analyze_pattern_stmt = false;
198   bool bool_result;
199   auto_vec<stmt_vec_info> mask_producers;
200 
201   if (dump_enabled_p ())
202     dump_printf_loc (MSG_NOTE, vect_location,
203                      "=== vect_determine_vectorization_factor ===\n");
204 
205   for (i = 0; i < nbbs; i++)
206     {
207       basic_block bb = bbs[i];
208 
209       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
210 	   gsi_next (&si))
211 	{
212 	  phi = si.phi ();
213 	  stmt_info = vinfo_for_stmt (phi);
214 	  if (dump_enabled_p ())
215 	    {
216 	      dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
217 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
218 	    }
219 
220 	  gcc_assert (stmt_info);
221 
222 	  if (STMT_VINFO_RELEVANT_P (stmt_info)
223 	      || STMT_VINFO_LIVE_P (stmt_info))
224             {
225 	      gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
226               scalar_type = TREE_TYPE (PHI_RESULT (phi));
227 
228 	      if (dump_enabled_p ())
229 		{
230 		  dump_printf_loc (MSG_NOTE, vect_location,
231                                    "get vectype for scalar type:  ");
232 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
233                   dump_printf (MSG_NOTE, "\n");
234 		}
235 
236 	      vectype = get_vectype_for_scalar_type (scalar_type);
237 	      if (!vectype)
238 		{
239 		  if (dump_enabled_p ())
240 		    {
241 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
242                                        "not vectorized: unsupported "
243                                        "data-type ");
244 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
245                                          scalar_type);
246                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
247 		    }
248 		  return false;
249 		}
250 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
251 
252 	      if (dump_enabled_p ())
253 		{
254 		  dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
255 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
256                   dump_printf (MSG_NOTE, "\n");
257 		}
258 
259 	      nunits = TYPE_VECTOR_SUBPARTS (vectype);
260 	      if (dump_enabled_p ())
261 		dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
262                                  nunits);
263 
264 	      if (!vectorization_factor
265 		  || (nunits > vectorization_factor))
266 		vectorization_factor = nunits;
267 	    }
268 	}
269 
270       for (gimple_stmt_iterator si = gsi_start_bb (bb);
271 	   !gsi_end_p (si) || analyze_pattern_stmt;)
272         {
273           tree vf_vectype;
274 
275           if (analyze_pattern_stmt)
276 	    stmt = pattern_stmt;
277           else
278             stmt = gsi_stmt (si);
279 
280           stmt_info = vinfo_for_stmt (stmt);
281 
282 	  if (dump_enabled_p ())
283 	    {
284 	      dump_printf_loc (MSG_NOTE, vect_location,
285                                "==> examining statement: ");
286 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
287 	    }
288 
289 	  gcc_assert (stmt_info);
290 
291 	  /* Skip stmts which do not need to be vectorized.  */
292 	  if ((!STMT_VINFO_RELEVANT_P (stmt_info)
293 	       && !STMT_VINFO_LIVE_P (stmt_info))
294 	      || gimple_clobber_p (stmt))
295             {
296               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
297                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
298                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
299                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
300                 {
301                   stmt = pattern_stmt;
302                   stmt_info = vinfo_for_stmt (pattern_stmt);
303                   if (dump_enabled_p ())
304                     {
305                       dump_printf_loc (MSG_NOTE, vect_location,
306                                        "==> examining pattern statement: ");
307                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
308                     }
309                 }
310               else
311 	        {
312 	          if (dump_enabled_p ())
313 	            dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
314                   gsi_next (&si);
315 	          continue;
316                 }
317 	    }
318           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
319                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
320                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
321                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
322             analyze_pattern_stmt = true;
323 
324 	  /* If a pattern statement has def stmts, analyze them too.  */
325 	  if (is_pattern_stmt_p (stmt_info))
326 	    {
327 	      if (pattern_def_seq == NULL)
328 		{
329 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
330 		  pattern_def_si = gsi_start (pattern_def_seq);
331 		}
332 	      else if (!gsi_end_p (pattern_def_si))
333 		gsi_next (&pattern_def_si);
334 	      if (pattern_def_seq != NULL)
335 		{
336 		  gimple *pattern_def_stmt = NULL;
337 		  stmt_vec_info pattern_def_stmt_info = NULL;
338 
339 		  while (!gsi_end_p (pattern_def_si))
340 		    {
341 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
342 		      pattern_def_stmt_info
343 			= vinfo_for_stmt (pattern_def_stmt);
344 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
345 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
346 			break;
347 		      gsi_next (&pattern_def_si);
348 		    }
349 
350 		  if (!gsi_end_p (pattern_def_si))
351 		    {
352 		      if (dump_enabled_p ())
353 			{
354 			  dump_printf_loc (MSG_NOTE, vect_location,
355                                            "==> examining pattern def stmt: ");
356 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
357                                             pattern_def_stmt, 0);
358 			}
359 
360 		      stmt = pattern_def_stmt;
361 		      stmt_info = pattern_def_stmt_info;
362 		    }
363 		  else
364 		    {
365 		      pattern_def_si = gsi_none ();
366 		      analyze_pattern_stmt = false;
367 		    }
368 		}
369 	      else
370 		analyze_pattern_stmt = false;
371 	    }
372 
373 	  if (gimple_get_lhs (stmt) == NULL_TREE
374 	      /* MASK_STORE has no lhs, but is ok.  */
375 	      && (!is_gimple_call (stmt)
376 		  || !gimple_call_internal_p (stmt)
377 		  || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
378 	    {
379 	      if (is_gimple_call (stmt))
380 		{
381 		  /* Ignore calls with no lhs.  These must be calls to
382 		     #pragma omp simd functions, and what vectorization factor
383 		     it really needs can't be determined until
384 		     vectorizable_simd_clone_call.  */
385 		  if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
386 		    {
387 		      pattern_def_seq = NULL;
388 		      gsi_next (&si);
389 		    }
390 		  continue;
391 		}
392 	      if (dump_enabled_p ())
393 		{
394 	          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395                                    "not vectorized: irregular stmt.");
396 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
397                                     0);
398 		}
399 	      return false;
400 	    }
401 
402 	  if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
403 	    {
404 	      if (dump_enabled_p ())
405 	        {
406 	          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
407                                    "not vectorized: vector stmt in loop:");
408 	          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
409 	        }
410 	      return false;
411 	    }
412 
413 	  bool_result = false;
414 
415 	  if (STMT_VINFO_VECTYPE (stmt_info))
416 	    {
417 	      /* The only case when a vectype had been already set is for stmts
418 	         that contain a dataref, or for "pattern-stmts" (stmts
419 		 generated by the vectorizer to represent/replace a certain
420 		 idiom).  */
421 	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
422 			  || is_pattern_stmt_p (stmt_info)
423 			  || !gsi_end_p (pattern_def_si));
424 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
425 	    }
426 	  else
427 	    {
428 	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
429 	      if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
430 		scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
431 	      else
432 		scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
433 
434 	      /* Bool ops don't participate in vectorization factor
435 		 computation.  For comparison use compared types to
436 		 compute a factor.  */
437 	      if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
438 		  && is_gimple_assign (stmt)
439 		  && gimple_assign_rhs_code (stmt) != COND_EXPR)
440 		{
441 		  if (STMT_VINFO_RELEVANT_P (stmt_info)
442 		      || STMT_VINFO_LIVE_P (stmt_info))
443 		    mask_producers.safe_push (stmt_info);
444 		  bool_result = true;
445 
446 		  if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
447 		      == tcc_comparison
448 		      && !VECT_SCALAR_BOOLEAN_TYPE_P
449 			    (TREE_TYPE (gimple_assign_rhs1 (stmt))))
450 		    scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
451 		  else
452 		    {
453 		      if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
454 			{
455 			  pattern_def_seq = NULL;
456 			  gsi_next (&si);
457 			}
458 		      continue;
459 		    }
460 		}
461 
462 	      if (dump_enabled_p ())
463 		{
464 		  dump_printf_loc (MSG_NOTE, vect_location,
465                                    "get vectype for scalar type:  ");
466 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
467                   dump_printf (MSG_NOTE, "\n");
468 		}
469 	      vectype = get_vectype_for_scalar_type (scalar_type);
470 	      if (!vectype)
471 		{
472 		  if (dump_enabled_p ())
473 		    {
474 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
475                                        "not vectorized: unsupported "
476                                        "data-type ");
477 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
478                                          scalar_type);
479                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
480 		    }
481 		  return false;
482 		}
483 
484 	      if (!bool_result)
485 		STMT_VINFO_VECTYPE (stmt_info) = vectype;
486 
487 	      if (dump_enabled_p ())
488 		{
489 		  dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
490 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
491                   dump_printf (MSG_NOTE, "\n");
492 		}
493             }
494 
495 	  /* Don't try to compute VF out scalar types if we stmt
496 	     produces boolean vector.  Use result vectype instead.  */
497 	  if (VECTOR_BOOLEAN_TYPE_P (vectype))
498 	    vf_vectype = vectype;
499 	  else
500 	    {
501 	      /* The vectorization factor is according to the smallest
502 		 scalar type (or the largest vector size, but we only
503 		 support one vector size per loop).  */
504 	      if (!bool_result)
505 		scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
506 							     &dummy);
507 	      if (dump_enabled_p ())
508 		{
509 		  dump_printf_loc (MSG_NOTE, vect_location,
510 				   "get vectype for scalar type:  ");
511 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
512 		  dump_printf (MSG_NOTE, "\n");
513 		}
514 	      vf_vectype = get_vectype_for_scalar_type (scalar_type);
515 	    }
516 	  if (!vf_vectype)
517 	    {
518 	      if (dump_enabled_p ())
519 		{
520 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
521                                    "not vectorized: unsupported data-type ");
522 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
523                                      scalar_type);
524                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
525 		}
526 	      return false;
527 	    }
528 
529 	  if ((GET_MODE_SIZE (TYPE_MODE (vectype))
530 	       != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
531 	    {
532 	      if (dump_enabled_p ())
533 		{
534 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
535                                    "not vectorized: different sized vector "
536                                    "types in statement, ");
537 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
538                                      vectype);
539 		  dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
540 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
541                                      vf_vectype);
542                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
543 		}
544 	      return false;
545 	    }
546 
547 	  if (dump_enabled_p ())
548 	    {
549 	      dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
550 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
551               dump_printf (MSG_NOTE, "\n");
552 	    }
553 
554 	  nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
555 	  if (dump_enabled_p ())
556 	    dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
557 	  if (!vectorization_factor
558 	      || (nunits > vectorization_factor))
559 	    vectorization_factor = nunits;
560 
561 	  if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
562 	    {
563 	      pattern_def_seq = NULL;
564 	      gsi_next (&si);
565 	    }
566         }
567     }
568 
569   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
570   if (dump_enabled_p ())
571     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
572                      vectorization_factor);
573   if (vectorization_factor <= 1)
574     {
575       if (dump_enabled_p ())
576         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
577                          "not vectorized: unsupported data-type\n");
578       return false;
579     }
580   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
581 
582   for (i = 0; i < mask_producers.length (); i++)
583     {
584       tree mask_type = NULL;
585 
586       stmt = STMT_VINFO_STMT (mask_producers[i]);
587 
588       if (is_gimple_assign (stmt)
589 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
590 	  && !VECT_SCALAR_BOOLEAN_TYPE_P
591 				      (TREE_TYPE (gimple_assign_rhs1 (stmt))))
592 	{
593 	  scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
594 	  mask_type = get_mask_type_for_scalar_type (scalar_type);
595 
596 	  if (!mask_type)
597 	    {
598 	      if (dump_enabled_p ())
599 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
600 				 "not vectorized: unsupported mask\n");
601 	      return false;
602 	    }
603 	}
604       else
605 	{
606 	  tree rhs;
607 	  ssa_op_iter iter;
608 	  gimple *def_stmt;
609 	  enum vect_def_type dt;
610 
611 	  FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
612 	    {
613 	      if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
614 				       &def_stmt, &dt, &vectype))
615 		{
616 		  if (dump_enabled_p ())
617 		    {
618 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
619 				       "not vectorized: can't compute mask type "
620 				       "for statement, ");
621 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
622 					0);
623 		    }
624 		  return false;
625 		}
626 
627 	      /* No vectype probably means external definition.
628 		 Allow it in case there is another operand which
629 		 allows to determine mask type.  */
630 	      if (!vectype)
631 		continue;
632 
633 	      if (!mask_type)
634 		mask_type = vectype;
635 	      else if (TYPE_VECTOR_SUBPARTS (mask_type)
636 		       != TYPE_VECTOR_SUBPARTS (vectype))
637 		{
638 		  if (dump_enabled_p ())
639 		    {
640 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
641 				       "not vectorized: different sized masks "
642 				       "types in statement, ");
643 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
644 					 mask_type);
645 		      dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
646 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
647 					 vectype);
648 		      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
649 		    }
650 		  return false;
651 		}
652 	      else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
653 		       != VECTOR_BOOLEAN_TYPE_P (vectype))
654 		{
655 		  if (dump_enabled_p ())
656 		    {
657 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
658 				       "not vectorized: mixed mask and "
659 				       "nonmask vector types in statement, ");
660 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
661 					 mask_type);
662 		      dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
663 		      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
664 					 vectype);
665 		      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
666 		    }
667 		  return false;
668 		}
669 	    }
670 
671 	  /* We may compare boolean value loaded as vector of integers.
672 	     Fix mask_type in such case.  */
673 	  if (mask_type
674 	      && !VECTOR_BOOLEAN_TYPE_P (mask_type)
675 	      && gimple_code (stmt) == GIMPLE_ASSIGN
676 	      && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
677 	    mask_type = build_same_sized_truth_vector_type (mask_type);
678 	}
679 
680       /* No mask_type should mean loop invariant predicate.
681 	 This is probably a subject for optimization in
682 	 if-conversion.  */
683       if (!mask_type)
684 	{
685 	  if (dump_enabled_p ())
686 	    {
687 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
688 			       "not vectorized: can't compute mask type "
689 			       "for statement, ");
690 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
691 				0);
692 	    }
693 	  return false;
694 	}
695 
696       STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
697     }
698 
699   return true;
700 }
701 
702 
703 /* Function vect_is_simple_iv_evolution.
704 
705    FORNOW: A simple evolution of an induction variables in the loop is
706    considered a polynomial evolution.  */
707 
708 static bool
709 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
710                              tree * step)
711 {
712   tree init_expr;
713   tree step_expr;
714   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
715   basic_block bb;
716 
717   /* When there is no evolution in this loop, the evolution function
718      is not "simple".  */
719   if (evolution_part == NULL_TREE)
720     return false;
721 
722   /* When the evolution is a polynomial of degree >= 2
723      the evolution function is not "simple".  */
724   if (tree_is_chrec (evolution_part))
725     return false;
726 
727   step_expr = evolution_part;
728   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
729 
730   if (dump_enabled_p ())
731     {
732       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
733       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
734       dump_printf (MSG_NOTE, ",  init: ");
735       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
736       dump_printf (MSG_NOTE, "\n");
737     }
738 
739   *init = init_expr;
740   *step = step_expr;
741 
742   if (TREE_CODE (step_expr) != INTEGER_CST
743       && (TREE_CODE (step_expr) != SSA_NAME
744 	  || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
745 	      && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
746 	  || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
747 	      && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
748 		  || !flag_associative_math)))
749       && (TREE_CODE (step_expr) != REAL_CST
750 	  || !flag_associative_math))
751     {
752       if (dump_enabled_p ())
753         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
754                          "step unknown.\n");
755       return false;
756     }
757 
758   return true;
759 }
760 
761 /* Function vect_analyze_scalar_cycles_1.
762 
763    Examine the cross iteration def-use cycles of scalar variables
764    in LOOP.  LOOP_VINFO represents the loop that is now being
765    considered for vectorization (can be LOOP, or an outer-loop
766    enclosing LOOP).  */
767 
768 static void
769 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
770 {
771   basic_block bb = loop->header;
772   tree init, step;
773   auto_vec<gimple *, 64> worklist;
774   gphi_iterator gsi;
775   bool double_reduc;
776 
777   if (dump_enabled_p ())
778     dump_printf_loc (MSG_NOTE, vect_location,
779                      "=== vect_analyze_scalar_cycles ===\n");
780 
781   /* First - identify all inductions.  Reduction detection assumes that all the
782      inductions have been identified, therefore, this order must not be
783      changed.  */
784   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
785     {
786       gphi *phi = gsi.phi ();
787       tree access_fn = NULL;
788       tree def = PHI_RESULT (phi);
789       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
790 
791       if (dump_enabled_p ())
792 	{
793 	  dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
794 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
795 	}
796 
797       /* Skip virtual phi's.  The data dependences that are associated with
798          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
799       if (virtual_operand_p (def))
800 	continue;
801 
802       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
803 
804       /* Analyze the evolution function.  */
805       access_fn = analyze_scalar_evolution (loop, def);
806       if (access_fn)
807 	{
808 	  STRIP_NOPS (access_fn);
809 	  if (dump_enabled_p ())
810 	    {
811 	      dump_printf_loc (MSG_NOTE, vect_location,
812                                "Access function of PHI: ");
813 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
814               dump_printf (MSG_NOTE, "\n");
815 	    }
816 	  STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
817 	    = initial_condition_in_loop_num (access_fn, loop->num);
818 	  STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
819 	    = evolution_part_in_loop_num (access_fn, loop->num);
820 	}
821 
822       if (!access_fn
823 	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
824 	  || (LOOP_VINFO_LOOP (loop_vinfo) != loop
825 	      && TREE_CODE (step) != INTEGER_CST))
826 	{
827 	  worklist.safe_push (phi);
828 	  continue;
829 	}
830 
831       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
832 		  != NULL_TREE);
833       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
834 
835       if (dump_enabled_p ())
836 	dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
837       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
838     }
839 
840 
841   /* Second - identify all reductions and nested cycles.  */
842   while (worklist.length () > 0)
843     {
844       gimple *phi = worklist.pop ();
845       tree def = PHI_RESULT (phi);
846       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
847       gimple *reduc_stmt;
848       bool nested_cycle;
849 
850       if (dump_enabled_p ())
851         {
852           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
853           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
854         }
855 
856       gcc_assert (!virtual_operand_p (def)
857 		  && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
858 
859       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
860       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
861 						&double_reduc, false);
862       if (reduc_stmt)
863         {
864           if (double_reduc)
865             {
866               if (dump_enabled_p ())
867                 dump_printf_loc (MSG_NOTE, vect_location,
868 				 "Detected double reduction.\n");
869 
870               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
871               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
872                                                     vect_double_reduction_def;
873             }
874           else
875             {
876               if (nested_cycle)
877                 {
878                   if (dump_enabled_p ())
879                     dump_printf_loc (MSG_NOTE, vect_location,
880 				     "Detected vectorizable nested cycle.\n");
881 
882                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
883                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
884                                                              vect_nested_cycle;
885                 }
886               else
887                 {
888                   if (dump_enabled_p ())
889                     dump_printf_loc (MSG_NOTE, vect_location,
890 				     "Detected reduction.\n");
891 
892                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
893                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
894                                                            vect_reduction_def;
895                   /* Store the reduction cycles for possible vectorization in
896                      loop-aware SLP.  */
897                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
898                 }
899             }
900         }
901       else
902         if (dump_enabled_p ())
903           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
904 			   "Unknown def-use cycle pattern.\n");
905     }
906 }
907 
908 
909 /* Function vect_analyze_scalar_cycles.
910 
911    Examine the cross iteration def-use cycles of scalar variables, by
912    analyzing the loop-header PHIs of scalar variables.  Classify each
913    cycle as one of the following: invariant, induction, reduction, unknown.
914    We do that for the loop represented by LOOP_VINFO, and also to its
915    inner-loop, if exists.
916    Examples for scalar cycles:
917 
918    Example1: reduction:
919 
920               loop1:
921               for (i=0; i<N; i++)
922                  sum += a[i];
923 
924    Example2: induction:
925 
926               loop2:
927               for (i=0; i<N; i++)
928                  a[i] = i;  */
929 
930 static void
931 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
932 {
933   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
934 
935   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
936 
937   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
938      Reductions in such inner-loop therefore have different properties than
939      the reductions in the nest that gets vectorized:
940      1. When vectorized, they are executed in the same order as in the original
941         scalar loop, so we can't change the order of computation when
942         vectorizing them.
943      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
944         current checks are too strict.  */
945 
946   if (loop->inner)
947     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
948 }
949 
950 /* Transfer group and reduction information from STMT to its pattern stmt.  */
951 
952 static void
953 vect_fixup_reduc_chain (gimple *stmt)
954 {
955   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
956   gimple *stmtp;
957   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
958 	      && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
959   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
960   do
961     {
962       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
963       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
964       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
965       if (stmt)
966 	GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
967 	  = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
968     }
969   while (stmt);
970   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
971 }
972 
973 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
974 
975 static void
976 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
977 {
978   gimple *first;
979   unsigned i;
980 
981   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
982     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
983       {
984 	gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first));
985 	while (next)
986 	  {
987 	    if (! STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)))
988 	      break;
989 	    next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
990 	  }
991 	/* If not all stmt in the chain are patterns try to handle
992 	   the chain without patterns.  */
993 	if (! next)
994 	  {
995 	    vect_fixup_reduc_chain (first);
996 	    LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
997 	      = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
998 	  }
999       }
1000 }
1001 
1002 /* Function vect_get_loop_niters.
1003 
1004    Determine how many iterations the loop is executed and place it
1005    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
1006    in NUMBER_OF_ITERATIONSM1.  Place the condition under which the
1007    niter information holds in ASSUMPTIONS.
1008 
1009    Return the loop exit condition.  */
1010 
1011 
1012 static gcond *
1013 vect_get_loop_niters (struct loop *loop, tree *assumptions,
1014 		      tree *number_of_iterations, tree *number_of_iterationsm1)
1015 {
1016   edge exit = single_exit (loop);
1017   struct tree_niter_desc niter_desc;
1018   tree niter_assumptions, niter, may_be_zero;
1019   gcond *cond = get_loop_exit_condition (loop);
1020 
1021   *assumptions = boolean_true_node;
1022   *number_of_iterationsm1 = chrec_dont_know;
1023   *number_of_iterations = chrec_dont_know;
1024   if (dump_enabled_p ())
1025     dump_printf_loc (MSG_NOTE, vect_location,
1026 		     "=== get_loop_niters ===\n");
1027 
1028   if (!exit)
1029     return cond;
1030 
1031   niter = chrec_dont_know;
1032   may_be_zero = NULL_TREE;
1033   niter_assumptions = boolean_true_node;
1034   if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
1035       || chrec_contains_undetermined (niter_desc.niter))
1036     return cond;
1037 
1038   niter_assumptions = niter_desc.assumptions;
1039   may_be_zero = niter_desc.may_be_zero;
1040   niter = niter_desc.niter;
1041 
1042   if (may_be_zero && integer_zerop (may_be_zero))
1043     may_be_zero = NULL_TREE;
1044 
1045   if (may_be_zero)
1046     {
1047       if (COMPARISON_CLASS_P (may_be_zero))
1048 	{
1049 	  /* Try to combine may_be_zero with assumptions, this can simplify
1050 	     computation of niter expression.  */
1051 	  if (niter_assumptions && !integer_nonzerop (niter_assumptions))
1052 	    niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1053 					     niter_assumptions,
1054 					     fold_build1 (TRUTH_NOT_EXPR,
1055 							  boolean_type_node,
1056 							  may_be_zero));
1057 	  else
1058 	    niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
1059 				 build_int_cst (TREE_TYPE (niter), 0),
1060 				 rewrite_to_non_trapping_overflow (niter));
1061 
1062 	  may_be_zero = NULL_TREE;
1063 	}
1064       else if (integer_nonzerop (may_be_zero))
1065 	{
1066 	  *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0);
1067 	  *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1);
1068 	  return cond;
1069 	}
1070       else
1071 	return cond;
1072     }
1073 
1074   *assumptions = niter_assumptions;
1075   *number_of_iterationsm1 = niter;
1076 
1077   /* We want the number of loop header executions which is the number
1078      of latch executions plus one.
1079      ???  For UINT_MAX latch executions this number overflows to zero
1080      for loops like do { n++; } while (n != 0);  */
1081   if (niter && !chrec_contains_undetermined (niter))
1082     niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),
1083 			  build_int_cst (TREE_TYPE (niter), 1));
1084   *number_of_iterations = niter;
1085 
1086   return cond;
1087 }
1088 
1089 /* Function bb_in_loop_p
1090 
1091    Used as predicate for dfs order traversal of the loop bbs.  */
1092 
1093 static bool
1094 bb_in_loop_p (const_basic_block bb, const void *data)
1095 {
1096   const struct loop *const loop = (const struct loop *)data;
1097   if (flow_bb_inside_loop_p (loop, bb))
1098     return true;
1099   return false;
1100 }
1101 
1102 
1103 /* Function new_loop_vec_info.
1104 
1105    Create and initialize a new loop_vec_info struct for LOOP, as well as
1106    stmt_vec_info structs for all the stmts in LOOP.  */
1107 
1108 static loop_vec_info
1109 new_loop_vec_info (struct loop *loop)
1110 {
1111   loop_vec_info res;
1112   basic_block *bbs;
1113   gimple_stmt_iterator si;
1114   unsigned int i, nbbs;
1115 
1116   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1117   res->kind = vec_info::loop;
1118   LOOP_VINFO_LOOP (res) = loop;
1119 
1120   bbs = get_loop_body (loop);
1121 
1122   /* Create/Update stmt_info for all stmts in the loop.  */
1123   for (i = 0; i < loop->num_nodes; i++)
1124     {
1125       basic_block bb = bbs[i];
1126 
1127       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1128 	{
1129 	  gimple *phi = gsi_stmt (si);
1130 	  gimple_set_uid (phi, 0);
1131 	  set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
1132 	}
1133 
1134       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1135 	{
1136 	  gimple *stmt = gsi_stmt (si);
1137 	  gimple_set_uid (stmt, 0);
1138 	  set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
1139 	}
1140     }
1141 
1142   /* CHECKME: We want to visit all BBs before their successors (except for
1143      latch blocks, for which this assertion wouldn't hold).  In the simple
1144      case of the loop forms we allow, a dfs order of the BBs would the same
1145      as reversed postorder traversal, so we are safe.  */
1146 
1147    free (bbs);
1148    bbs = XCNEWVEC (basic_block, loop->num_nodes);
1149    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1150                               bbs, loop->num_nodes, loop);
1151    gcc_assert (nbbs == loop->num_nodes);
1152 
1153   LOOP_VINFO_BBS (res) = bbs;
1154   LOOP_VINFO_NITERSM1 (res) = NULL;
1155   LOOP_VINFO_NITERS (res) = NULL;
1156   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1157   LOOP_VINFO_NITERS_ASSUMPTIONS (res) = NULL;
1158   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
1159   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1160   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
1161   LOOP_VINFO_VECT_FACTOR (res) = 0;
1162   LOOP_VINFO_LOOP_NEST (res) = vNULL;
1163   LOOP_VINFO_DATAREFS (res) = vNULL;
1164   LOOP_VINFO_DDRS (res) = vNULL;
1165   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1166   LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL;
1167   LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL;
1168   LOOP_VINFO_GROUPED_STORES (res) = vNULL;
1169   LOOP_VINFO_REDUCTIONS (res) = vNULL;
1170   LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL;
1171   LOOP_VINFO_SLP_INSTANCES (res) = vNULL;
1172   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1173   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1174   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1175   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1176   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1177   LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
1178 
1179   return res;
1180 }
1181 
1182 
1183 /* Function destroy_loop_vec_info.
1184 
1185    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1186    stmts in the loop.  */
1187 
1188 void
1189 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1190 {
1191   struct loop *loop;
1192   basic_block *bbs;
1193   int nbbs;
1194   gimple_stmt_iterator si;
1195   int j;
1196   vec<slp_instance> slp_instances;
1197   slp_instance instance;
1198   bool swapped;
1199 
1200   if (!loop_vinfo)
1201     return;
1202 
1203   loop = LOOP_VINFO_LOOP (loop_vinfo);
1204 
1205   bbs = LOOP_VINFO_BBS (loop_vinfo);
1206   nbbs = clean_stmts ? loop->num_nodes : 0;
1207   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1208 
1209   for (j = 0; j < nbbs; j++)
1210     {
1211       basic_block bb = bbs[j];
1212       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1213         free_stmt_vec_info (gsi_stmt (si));
1214 
1215       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1216         {
1217 	  gimple *stmt = gsi_stmt (si);
1218 
1219 	  /* We may have broken canonical form by moving a constant
1220 	     into RHS1 of a commutative op.  Fix such occurrences.  */
1221 	  if (swapped && is_gimple_assign (stmt))
1222 	    {
1223 	      enum tree_code code = gimple_assign_rhs_code (stmt);
1224 
1225 	      if ((code == PLUS_EXPR
1226 		   || code == POINTER_PLUS_EXPR
1227 		   || code == MULT_EXPR)
1228 		  && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1229 		swap_ssa_operands (stmt,
1230 				   gimple_assign_rhs1_ptr (stmt),
1231 				   gimple_assign_rhs2_ptr (stmt));
1232 	      else if (code == COND_EXPR
1233 		       && CONSTANT_CLASS_P (gimple_assign_rhs2 (stmt)))
1234 		{
1235 		  tree cond_expr = gimple_assign_rhs1 (stmt);
1236 		  enum tree_code cond_code = TREE_CODE (cond_expr);
1237 
1238 		  if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1239 		    {
1240 		      bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr,
1241 								  0));
1242 		      cond_code = invert_tree_comparison (cond_code,
1243 							  honor_nans);
1244 		      if (cond_code != ERROR_MARK)
1245 			{
1246 			  TREE_SET_CODE (cond_expr, cond_code);
1247 			  swap_ssa_operands (stmt,
1248 					     gimple_assign_rhs2_ptr (stmt),
1249 					     gimple_assign_rhs3_ptr (stmt));
1250 			}
1251 		    }
1252 		}
1253 	    }
1254 
1255 	  /* Free stmt_vec_info.  */
1256 	  free_stmt_vec_info (stmt);
1257           gsi_next (&si);
1258         }
1259     }
1260 
1261   free (LOOP_VINFO_BBS (loop_vinfo));
1262   vect_destroy_datarefs (loop_vinfo);
1263   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1264   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1265   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1266   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
1267   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1268   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1269   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1270     vect_free_slp_instance (instance);
1271 
1272   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1273   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1274   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1275   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1276 
1277   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1278   loop_vinfo->scalar_cost_vec.release ();
1279 
1280   free (loop_vinfo);
1281   loop->aux = NULL;
1282 }
1283 
1284 
1285 /* Calculate the cost of one scalar iteration of the loop.  */
1286 static void
1287 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1288 {
1289   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1290   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1291   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1292   int innerloop_iters, i;
1293 
1294   /* Count statements in scalar loop.  Using this as scalar cost for a single
1295      iteration for now.
1296 
1297      TODO: Add outer loop support.
1298 
1299      TODO: Consider assigning different costs to different scalar
1300      statements.  */
1301 
1302   /* FORNOW.  */
1303   innerloop_iters = 1;
1304   if (loop->inner)
1305     innerloop_iters = 50; /* FIXME */
1306 
1307   for (i = 0; i < nbbs; i++)
1308     {
1309       gimple_stmt_iterator si;
1310       basic_block bb = bbs[i];
1311 
1312       if (bb->loop_father == loop->inner)
1313         factor = innerloop_iters;
1314       else
1315         factor = 1;
1316 
1317       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1318         {
1319 	  gimple *stmt = gsi_stmt (si);
1320           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1321 
1322           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1323             continue;
1324 
1325           /* Skip stmts that are not vectorized inside the loop.  */
1326           if (stmt_info
1327               && !STMT_VINFO_RELEVANT_P (stmt_info)
1328               && (!STMT_VINFO_LIVE_P (stmt_info)
1329                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1330 	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1331             continue;
1332 
1333 	  vect_cost_for_stmt kind;
1334           if (STMT_VINFO_DATA_REF (stmt_info))
1335             {
1336               if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1337                kind = scalar_load;
1338              else
1339                kind = scalar_store;
1340             }
1341           else
1342             kind = scalar_stmt;
1343 
1344 	  scalar_single_iter_cost
1345 	    += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1346 				 factor, kind, stmt_info, 0, vect_prologue);
1347         }
1348     }
1349   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1350     = scalar_single_iter_cost;
1351 }
1352 
1353 
1354 /* Function vect_analyze_loop_form_1.
1355 
1356    Verify that certain CFG restrictions hold, including:
1357    - the loop has a pre-header
1358    - the loop has a single entry and exit
1359    - the loop exit condition is simple enough
1360    - the number of iterations can be analyzed, i.e, a countable loop.  The
1361      niter could be analyzed under some assumptions.  */
1362 
1363 bool
1364 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1365 			  tree *assumptions, tree *number_of_iterationsm1,
1366 			  tree *number_of_iterations, gcond **inner_loop_cond)
1367 {
1368   if (dump_enabled_p ())
1369     dump_printf_loc (MSG_NOTE, vect_location,
1370 		     "=== vect_analyze_loop_form ===\n");
1371 
1372   /* Different restrictions apply when we are considering an inner-most loop,
1373      vs. an outer (nested) loop.
1374      (FORNOW. May want to relax some of these restrictions in the future).  */
1375 
1376   if (!loop->inner)
1377     {
1378       /* Inner-most loop.  We currently require that the number of BBs is
1379 	 exactly 2 (the header and latch).  Vectorizable inner-most loops
1380 	 look like this:
1381 
1382                         (pre-header)
1383                            |
1384                           header <--------+
1385                            | |            |
1386                            | +--> latch --+
1387                            |
1388                         (exit-bb)  */
1389 
1390       if (loop->num_nodes != 2)
1391         {
1392           if (dump_enabled_p ())
1393             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1394 			     "not vectorized: control flow in loop.\n");
1395           return false;
1396         }
1397 
1398       if (empty_block_p (loop->header))
1399 	{
1400 	  if (dump_enabled_p ())
1401 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1402 			     "not vectorized: empty loop.\n");
1403 	  return false;
1404 	}
1405     }
1406   else
1407     {
1408       struct loop *innerloop = loop->inner;
1409       edge entryedge;
1410 
1411       /* Nested loop. We currently require that the loop is doubly-nested,
1412 	 contains a single inner loop, and the number of BBs is exactly 5.
1413 	 Vectorizable outer-loops look like this:
1414 
1415 			(pre-header)
1416 			   |
1417 			  header <---+
1418 			   |         |
1419 		          inner-loop |
1420 			   |         |
1421 			  tail ------+
1422 			   |
1423 		        (exit-bb)
1424 
1425 	 The inner-loop has the properties expected of inner-most loops
1426 	 as described above.  */
1427 
1428       if ((loop->inner)->inner || (loop->inner)->next)
1429 	{
1430 	  if (dump_enabled_p ())
1431 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1432 			     "not vectorized: multiple nested loops.\n");
1433 	  return false;
1434 	}
1435 
1436       if (loop->num_nodes != 5)
1437         {
1438 	  if (dump_enabled_p ())
1439 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1440 			     "not vectorized: control flow in loop.\n");
1441 	  return false;
1442         }
1443 
1444       entryedge = loop_preheader_edge (innerloop);
1445       if (entryedge->src != loop->header
1446 	  || !single_exit (innerloop)
1447 	  || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1448 	{
1449 	  if (dump_enabled_p ())
1450 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1451 			     "not vectorized: unsupported outerloop form.\n");
1452 	  return false;
1453 	}
1454 
1455       /* Analyze the inner-loop.  */
1456       tree inner_niterm1, inner_niter, inner_assumptions;
1457       if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1458 				      &inner_assumptions, &inner_niterm1,
1459 				      &inner_niter, NULL)
1460 	  /* Don't support analyzing niter under assumptions for inner
1461 	     loop.  */
1462 	  || !integer_onep (inner_assumptions))
1463 	{
1464 	  if (dump_enabled_p ())
1465             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1466 			     "not vectorized: Bad inner loop.\n");
1467 	  return false;
1468 	}
1469 
1470       if (!expr_invariant_in_loop_p (loop, inner_niter))
1471 	{
1472 	  if (dump_enabled_p ())
1473 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1474 			     "not vectorized: inner-loop count not"
1475                              " invariant.\n");
1476 	  return false;
1477 	}
1478 
1479       if (dump_enabled_p ())
1480         dump_printf_loc (MSG_NOTE, vect_location,
1481 			 "Considering outer-loop vectorization.\n");
1482     }
1483 
1484   if (!single_exit (loop)
1485       || EDGE_COUNT (loop->header->preds) != 2)
1486     {
1487       if (dump_enabled_p ())
1488         {
1489           if (!single_exit (loop))
1490 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1491 			     "not vectorized: multiple exits.\n");
1492           else if (EDGE_COUNT (loop->header->preds) != 2)
1493 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1494 			     "not vectorized: too many incoming edges.\n");
1495         }
1496       return false;
1497     }
1498 
1499   /* We assume that the loop exit condition is at the end of the loop. i.e,
1500      that the loop is represented as a do-while (with a proper if-guard
1501      before the loop if needed), where the loop header contains all the
1502      executable statements, and the latch is empty.  */
1503   if (!empty_block_p (loop->latch)
1504       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1505     {
1506       if (dump_enabled_p ())
1507 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1508 			 "not vectorized: latch block not empty.\n");
1509       return false;
1510     }
1511 
1512   /* Make sure the exit is not abnormal.  */
1513   edge e = single_exit (loop);
1514   if (e->flags & EDGE_ABNORMAL)
1515     {
1516       if (dump_enabled_p ())
1517 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1518 			 "not vectorized: abnormal loop exit edge.\n");
1519       return false;
1520     }
1521 
1522   *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations,
1523 				     number_of_iterationsm1);
1524   if (!*loop_cond)
1525     {
1526       if (dump_enabled_p ())
1527 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1528 			 "not vectorized: complicated exit condition.\n");
1529       return false;
1530     }
1531 
1532   if (integer_zerop (*assumptions)
1533       || !*number_of_iterations
1534       || chrec_contains_undetermined (*number_of_iterations))
1535     {
1536       if (dump_enabled_p ())
1537 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1538 			 "not vectorized: number of iterations cannot be "
1539 			 "computed.\n");
1540       return false;
1541     }
1542 
1543   if (integer_zerop (*number_of_iterations))
1544     {
1545       if (dump_enabled_p ())
1546 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1547 			 "not vectorized: number of iterations = 0.\n");
1548       return false;
1549     }
1550 
1551   return true;
1552 }
1553 
1554 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form.  */
1555 
1556 loop_vec_info
1557 vect_analyze_loop_form (struct loop *loop)
1558 {
1559   tree assumptions, number_of_iterations, number_of_iterationsm1;
1560   gcond *loop_cond, *inner_loop_cond = NULL;
1561 
1562   if (! vect_analyze_loop_form_1 (loop, &loop_cond,
1563 				  &assumptions, &number_of_iterationsm1,
1564 				  &number_of_iterations, &inner_loop_cond))
1565     return NULL;
1566 
1567   loop_vec_info loop_vinfo = new_loop_vec_info (loop);
1568   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1569   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1570   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1571   if (!integer_onep (assumptions))
1572     {
1573       /* We consider to vectorize this loop by versioning it under
1574 	 some assumptions.  In order to do this, we need to clear
1575 	 existing information computed by scev and niter analyzer.  */
1576       scev_reset_htab ();
1577       free_numbers_of_iterations_estimates_loop (loop);
1578       /* Also set flag for this loop so that following scev and niter
1579 	 analysis are done under the assumptions.  */
1580       loop_constraint_set (loop, LOOP_C_FINITE);
1581       /* Also record the assumptions for versioning.  */
1582       LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = assumptions;
1583     }
1584 
1585   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1586     {
1587       if (dump_enabled_p ())
1588         {
1589           dump_printf_loc (MSG_NOTE, vect_location,
1590 			   "Symbolic number of iterations is ");
1591 	  dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1592           dump_printf (MSG_NOTE, "\n");
1593         }
1594     }
1595 
1596   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1597   if (inner_loop_cond)
1598     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
1599       = loop_exit_ctrl_vec_info_type;
1600 
1601   gcc_assert (!loop->aux);
1602   loop->aux = loop_vinfo;
1603   return loop_vinfo;
1604 }
1605 
1606 
1607 
1608 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1609    statements update the vectorization factor.  */
1610 
1611 static void
1612 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1613 {
1614   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1615   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1616   int nbbs = loop->num_nodes;
1617   unsigned int vectorization_factor;
1618   int i;
1619 
1620   if (dump_enabled_p ())
1621     dump_printf_loc (MSG_NOTE, vect_location,
1622 		     "=== vect_update_vf_for_slp ===\n");
1623 
1624   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1625   gcc_assert (vectorization_factor != 0);
1626 
1627   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1628      vectorization factor of the loop is the unrolling factor required by
1629      the SLP instances.  If that unrolling factor is 1, we say, that we
1630      perform pure SLP on loop - cross iteration parallelism is not
1631      exploited.  */
1632   bool only_slp_in_loop = true;
1633   for (i = 0; i < nbbs; i++)
1634     {
1635       basic_block bb = bbs[i];
1636       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1637 	   gsi_next (&si))
1638 	{
1639 	  gimple *stmt = gsi_stmt (si);
1640 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1641 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1642 	      && STMT_VINFO_RELATED_STMT (stmt_info))
1643 	    {
1644 	      stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1645 	      stmt_info = vinfo_for_stmt (stmt);
1646 	    }
1647 	  if ((STMT_VINFO_RELEVANT_P (stmt_info)
1648 	       || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1649 	      && !PURE_SLP_STMT (stmt_info))
1650 	    /* STMT needs both SLP and loop-based vectorization.  */
1651 	    only_slp_in_loop = false;
1652 	}
1653     }
1654 
1655   if (only_slp_in_loop)
1656     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1657   else
1658     vectorization_factor
1659       = least_common_multiple (vectorization_factor,
1660 			       LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1661 
1662   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1663   if (dump_enabled_p ())
1664     dump_printf_loc (MSG_NOTE, vect_location,
1665 		     "Updating vectorization factor to %d\n",
1666 		     vectorization_factor);
1667 }
1668 
1669 /* Function vect_analyze_loop_operations.
1670 
1671    Scan the loop stmts and make sure they are all vectorizable.  */
1672 
1673 static bool
1674 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1675 {
1676   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1677   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1678   int nbbs = loop->num_nodes;
1679   int i;
1680   stmt_vec_info stmt_info;
1681   bool need_to_vectorize = false;
1682   bool ok;
1683 
1684   if (dump_enabled_p ())
1685     dump_printf_loc (MSG_NOTE, vect_location,
1686 		     "=== vect_analyze_loop_operations ===\n");
1687 
1688   for (i = 0; i < nbbs; i++)
1689     {
1690       basic_block bb = bbs[i];
1691 
1692       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1693 	   gsi_next (&si))
1694         {
1695           gphi *phi = si.phi ();
1696           ok = true;
1697 
1698           stmt_info = vinfo_for_stmt (phi);
1699           if (dump_enabled_p ())
1700             {
1701               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1702               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1703             }
1704 	  if (virtual_operand_p (gimple_phi_result (phi)))
1705 	    continue;
1706 
1707           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1708              (i.e., a phi in the tail of the outer-loop).  */
1709           if (! is_loop_header_bb_p (bb))
1710             {
1711               /* FORNOW: we currently don't support the case that these phis
1712                  are not used in the outerloop (unless it is double reduction,
1713                  i.e., this phi is vect_reduction_def), cause this case
1714                  requires to actually do something here.  */
1715               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1716                    || STMT_VINFO_LIVE_P (stmt_info))
1717                   && STMT_VINFO_DEF_TYPE (stmt_info)
1718                      != vect_double_reduction_def)
1719                 {
1720                   if (dump_enabled_p ())
1721 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1722 				     "Unsupported loop-closed phi in "
1723 				     "outer-loop.\n");
1724                   return false;
1725                 }
1726 
1727               /* If PHI is used in the outer loop, we check that its operand
1728                  is defined in the inner loop.  */
1729               if (STMT_VINFO_RELEVANT_P (stmt_info))
1730                 {
1731                   tree phi_op;
1732 		  gimple *op_def_stmt;
1733 
1734                   if (gimple_phi_num_args (phi) != 1)
1735                     return false;
1736 
1737                   phi_op = PHI_ARG_DEF (phi, 0);
1738                   if (TREE_CODE (phi_op) != SSA_NAME)
1739                     return false;
1740 
1741                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1742 		  if (gimple_nop_p (op_def_stmt)
1743 		      || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1744 		      || !vinfo_for_stmt (op_def_stmt))
1745                     return false;
1746 
1747                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1748                         != vect_used_in_outer
1749                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1750                            != vect_used_in_outer_by_reduction)
1751                     return false;
1752                 }
1753 
1754               continue;
1755             }
1756 
1757           gcc_assert (stmt_info);
1758 
1759           if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1760                || STMT_VINFO_LIVE_P (stmt_info))
1761               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1762             {
1763               /* A scalar-dependence cycle that we don't support.  */
1764               if (dump_enabled_p ())
1765 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1766 				 "not vectorized: scalar dependence cycle.\n");
1767               return false;
1768             }
1769 
1770           if (STMT_VINFO_RELEVANT_P (stmt_info))
1771             {
1772               need_to_vectorize = true;
1773               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1774                 ok = vectorizable_induction (phi, NULL, NULL);
1775             }
1776 
1777 	  if (ok && STMT_VINFO_LIVE_P (stmt_info))
1778 	    ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
1779 
1780           if (!ok)
1781             {
1782               if (dump_enabled_p ())
1783                 {
1784 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1785 				   "not vectorized: relevant phi not "
1786 				   "supported: ");
1787                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1788                 }
1789 	      return false;
1790             }
1791         }
1792 
1793       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1794 	   gsi_next (&si))
1795         {
1796 	  gimple *stmt = gsi_stmt (si);
1797 	  if (!gimple_clobber_p (stmt)
1798 	      && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1799 	    return false;
1800         }
1801     } /* bbs */
1802 
1803   /* All operations in the loop are either irrelevant (deal with loop
1804      control, or dead), or only used outside the loop and can be moved
1805      out of the loop (e.g. invariants, inductions).  The loop can be
1806      optimized away by scalar optimizations.  We're better off not
1807      touching this loop.  */
1808   if (!need_to_vectorize)
1809     {
1810       if (dump_enabled_p ())
1811         dump_printf_loc (MSG_NOTE, vect_location,
1812 			 "All the computation can be taken out of the loop.\n");
1813       if (dump_enabled_p ())
1814 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1815 			 "not vectorized: redundant loop. no profit to "
1816 			 "vectorize.\n");
1817       return false;
1818     }
1819 
1820   return true;
1821 }
1822 
1823 
1824 /* Function vect_analyze_loop_2.
1825 
1826    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1827    for it.  The different analyses will record information in the
1828    loop_vec_info struct.  */
1829 static bool
1830 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1831 {
1832   bool ok;
1833   int max_vf = MAX_VECTORIZATION_FACTOR;
1834   int min_vf = 2;
1835   unsigned int n_stmts = 0;
1836 
1837   /* The first group of checks is independent of the vector size.  */
1838   fatal = true;
1839 
1840   /* Find all data references in the loop (which correspond to vdefs/vuses)
1841      and analyze their evolution in the loop.  */
1842 
1843   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1844 
1845   loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1846   if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1847     {
1848       if (dump_enabled_p ())
1849 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1850 			 "not vectorized: loop nest containing two "
1851 			 "or more consecutive inner loops cannot be "
1852 			 "vectorized\n");
1853       return false;
1854     }
1855 
1856   for (unsigned i = 0; i < loop->num_nodes; i++)
1857     for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1858 	 !gsi_end_p (gsi); gsi_next (&gsi))
1859       {
1860 	gimple *stmt = gsi_stmt (gsi);
1861 	if (is_gimple_debug (stmt))
1862 	  continue;
1863 	++n_stmts;
1864 	if (!find_data_references_in_stmt (loop, stmt,
1865 					   &LOOP_VINFO_DATAREFS (loop_vinfo)))
1866 	  {
1867 	    if (is_gimple_call (stmt) && loop->safelen)
1868 	      {
1869 		tree fndecl = gimple_call_fndecl (stmt), op;
1870 		if (fndecl != NULL_TREE)
1871 		  {
1872 		    cgraph_node *node = cgraph_node::get (fndecl);
1873 		    if (node != NULL && node->simd_clones != NULL)
1874 		      {
1875 			unsigned int j, n = gimple_call_num_args (stmt);
1876 			for (j = 0; j < n; j++)
1877 			  {
1878 			    op = gimple_call_arg (stmt, j);
1879 			    if (DECL_P (op)
1880 				|| (REFERENCE_CLASS_P (op)
1881 				    && get_base_address (op)))
1882 			      break;
1883 			  }
1884 			op = gimple_call_lhs (stmt);
1885 			/* Ignore #pragma omp declare simd functions
1886 			   if they don't have data references in the
1887 			   call stmt itself.  */
1888 			if (j == n
1889 			    && !(op
1890 				 && (DECL_P (op)
1891 				     || (REFERENCE_CLASS_P (op)
1892 					 && get_base_address (op)))))
1893 			  continue;
1894 		      }
1895 		  }
1896 	      }
1897 	    if (dump_enabled_p ())
1898 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1899 			       "not vectorized: loop contains function "
1900 			       "calls or data references that cannot "
1901 			       "be analyzed\n");
1902 	    return false;
1903 	  }
1904       }
1905 
1906   /* Analyze the data references and also adjust the minimal
1907      vectorization factor according to the loads and stores.  */
1908 
1909   ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1910   if (!ok)
1911     {
1912       if (dump_enabled_p ())
1913 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1914 			 "bad data references.\n");
1915       return false;
1916     }
1917 
1918   /* Classify all cross-iteration scalar data-flow cycles.
1919      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1920   vect_analyze_scalar_cycles (loop_vinfo);
1921 
1922   vect_pattern_recog (loop_vinfo);
1923 
1924   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1925 
1926   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1927      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1928 
1929   ok = vect_analyze_data_ref_accesses (loop_vinfo);
1930   if (!ok)
1931     {
1932       if (dump_enabled_p ())
1933 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1934 			 "bad data access.\n");
1935       return false;
1936     }
1937 
1938   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1939 
1940   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1941   if (!ok)
1942     {
1943       if (dump_enabled_p ())
1944 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1945 			 "unexpected pattern.\n");
1946       return false;
1947     }
1948 
1949   /* While the rest of the analysis below depends on it in some way.  */
1950   fatal = false;
1951 
1952   /* Analyze data dependences between the data-refs in the loop
1953      and adjust the maximum vectorization factor according to
1954      the dependences.
1955      FORNOW: fail at the first data dependence that we encounter.  */
1956 
1957   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1958   if (!ok
1959       || max_vf < min_vf)
1960     {
1961       if (dump_enabled_p ())
1962 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1963 			     "bad data dependence.\n");
1964       return false;
1965     }
1966 
1967   ok = vect_determine_vectorization_factor (loop_vinfo);
1968   if (!ok)
1969     {
1970       if (dump_enabled_p ())
1971 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972 			 "can't determine vectorization factor.\n");
1973       return false;
1974     }
1975   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1976     {
1977       if (dump_enabled_p ())
1978 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1979 			 "bad data dependence.\n");
1980       return false;
1981     }
1982 
1983   /* Compute the scalar iteration cost.  */
1984   vect_compute_single_scalar_iteration_cost (loop_vinfo);
1985 
1986   int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1987   HOST_WIDE_INT estimated_niter;
1988   unsigned th;
1989   int min_scalar_loop_bound;
1990 
1991   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1992   ok = vect_analyze_slp (loop_vinfo, n_stmts);
1993   if (!ok)
1994     return false;
1995 
1996   /* If there are any SLP instances mark them as pure_slp.  */
1997   bool slp = vect_make_slp_decision (loop_vinfo);
1998   if (slp)
1999     {
2000       /* Find stmts that need to be both vectorized and SLPed.  */
2001       vect_detect_hybrid_slp (loop_vinfo);
2002 
2003       /* Update the vectorization factor based on the SLP decision.  */
2004       vect_update_vf_for_slp (loop_vinfo);
2005     }
2006 
2007   /* This is the point where we can re-start analysis with SLP forced off.  */
2008 start_over:
2009 
2010   /* Now the vectorization factor is final.  */
2011   unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2012   gcc_assert (vectorization_factor != 0);
2013 
2014   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
2015     dump_printf_loc (MSG_NOTE, vect_location,
2016 		     "vectorization_factor = %d, niters = "
2017 		     HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
2018 		     LOOP_VINFO_INT_NITERS (loop_vinfo));
2019 
2020   HOST_WIDE_INT max_niter
2021     = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2022   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2023        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
2024       || (max_niter != -1
2025 	  && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
2026     {
2027       if (dump_enabled_p ())
2028 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2029 			 "not vectorized: iteration count smaller than "
2030 			 "vectorization factor.\n");
2031       return false;
2032     }
2033 
2034   /* Analyze the alignment of the data-refs in the loop.
2035      Fail if a data reference is found that cannot be vectorized.  */
2036 
2037   ok = vect_analyze_data_refs_alignment (loop_vinfo);
2038   if (!ok)
2039     {
2040       if (dump_enabled_p ())
2041 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2042 			 "bad data alignment.\n");
2043       return false;
2044     }
2045 
2046   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
2047      It is important to call pruning after vect_analyze_data_ref_accesses,
2048      since we use grouping information gathered by interleaving analysis.  */
2049   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2050   if (!ok)
2051     return false;
2052 
2053   /* Do not invoke vect_enhance_data_refs_alignment for eplilogue
2054      vectorization.  */
2055   if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2056     {
2057     /* This pass will decide on using loop versioning and/or loop peeling in
2058        order to enhance the alignment of data references in the loop.  */
2059     ok = vect_enhance_data_refs_alignment (loop_vinfo);
2060     if (!ok)
2061       {
2062 	if (dump_enabled_p ())
2063 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2064 			   "bad data alignment.\n");
2065         return false;
2066       }
2067     }
2068 
2069   if (slp)
2070     {
2071       /* Analyze operations in the SLP instances.  Note this may
2072 	 remove unsupported SLP instances which makes the above
2073 	 SLP kind detection invalid.  */
2074       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
2075       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
2076 				   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2077       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
2078 	goto again;
2079     }
2080 
2081   /* Scan all the remaining operations in the loop that are not subject
2082      to SLP and make sure they are vectorizable.  */
2083   ok = vect_analyze_loop_operations (loop_vinfo);
2084   if (!ok)
2085     {
2086       if (dump_enabled_p ())
2087 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2088 			 "bad operation or unsupported loop bound.\n");
2089       return false;
2090     }
2091 
2092   /* If epilog loop is required because of data accesses with gaps,
2093      one additional iteration needs to be peeled.  Check if there is
2094      enough iterations for vectorization.  */
2095   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2096       && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2097     {
2098       int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2099       tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2100 
2101       if (wi::to_widest (scalar_niters) < vf)
2102 	{
2103 	  if (dump_enabled_p ())
2104 	    dump_printf_loc (MSG_NOTE, vect_location,
2105 			     "loop has no enough iterations to support"
2106 			     " peeling for gaps.\n");
2107 	  return false;
2108 	}
2109     }
2110 
2111   /* Analyze cost.  Decide if worth while to vectorize.  */
2112   int min_profitable_estimate, min_profitable_iters;
2113   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2114 				      &min_profitable_estimate);
2115 
2116   if (min_profitable_iters < 0)
2117     {
2118       if (dump_enabled_p ())
2119 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120 			 "not vectorized: vectorization not profitable.\n");
2121       if (dump_enabled_p ())
2122 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2123 			 "not vectorized: vector version will never be "
2124 			 "profitable.\n");
2125       goto again;
2126     }
2127 
2128   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
2129 			    * vectorization_factor) - 1);
2130 
2131   /* Use the cost model only if it is more conservative than user specified
2132      threshold.  */
2133   th = (unsigned) min_scalar_loop_bound;
2134   if (min_profitable_iters
2135       && (!min_scalar_loop_bound
2136           || min_profitable_iters > min_scalar_loop_bound))
2137     th = (unsigned) min_profitable_iters;
2138 
2139   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2140 
2141   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2142       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
2143     {
2144       if (dump_enabled_p ())
2145 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2146 			 "not vectorized: vectorization not profitable.\n");
2147       if (dump_enabled_p ())
2148         dump_printf_loc (MSG_NOTE, vect_location,
2149 			 "not vectorized: iteration count smaller than user "
2150 			 "specified loop bound parameter or minimum profitable "
2151 			 "iterations (whichever is more conservative).\n");
2152       goto again;
2153     }
2154 
2155   estimated_niter
2156     = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2157   if (estimated_niter == -1)
2158     estimated_niter = max_niter;
2159   if (estimated_niter != -1
2160       && ((unsigned HOST_WIDE_INT) estimated_niter
2161           <= MAX (th, (unsigned)min_profitable_estimate)))
2162     {
2163       if (dump_enabled_p ())
2164 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2165 			 "not vectorized: estimated iteration count too "
2166                          "small.\n");
2167       if (dump_enabled_p ())
2168         dump_printf_loc (MSG_NOTE, vect_location,
2169 			 "not vectorized: estimated iteration count smaller "
2170                          "than specified loop bound parameter or minimum "
2171                          "profitable iterations (whichever is more "
2172                          "conservative).\n");
2173       goto again;
2174     }
2175 
2176   /* Decide whether we need to create an epilogue loop to handle
2177      remaining scalar iterations.  */
2178   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
2179         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2180        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2181 
2182   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2183       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2184     {
2185       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
2186 		   - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
2187 	  < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2188 	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2189     }
2190   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2191 	   || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2192 	       < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2193                /* In case of versioning, check if the maximum number of
2194                   iterations is greater than th.  If they are identical,
2195                   the epilogue is unnecessary.  */
2196 	       && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2197                    || (unsigned HOST_WIDE_INT) max_niter > th)))
2198     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2199 
2200   /* If an epilogue loop is required make sure we can create one.  */
2201   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2202       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2203     {
2204       if (dump_enabled_p ())
2205         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2206       if (!vect_can_advance_ivs_p (loop_vinfo)
2207 	  || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2208 					   single_exit (LOOP_VINFO_LOOP
2209 							 (loop_vinfo))))
2210         {
2211           if (dump_enabled_p ())
2212 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2213 			     "not vectorized: can't create required "
2214 			     "epilog loop\n");
2215           goto again;
2216         }
2217     }
2218 
2219   gcc_assert (vectorization_factor
2220 	      == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2221 
2222   /* Ok to vectorize!  */
2223   return true;
2224 
2225 again:
2226   /* Try again with SLP forced off but if we didn't do any SLP there is
2227      no point in re-trying.  */
2228   if (!slp)
2229     return false;
2230 
2231   /* If there are reduction chains re-trying will fail anyway.  */
2232   if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
2233     return false;
2234 
2235   /* Likewise if the grouped loads or stores in the SLP cannot be handled
2236      via interleaving or lane instructions.  */
2237   slp_instance instance;
2238   slp_tree node;
2239   unsigned i, j;
2240   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
2241     {
2242       stmt_vec_info vinfo;
2243       vinfo = vinfo_for_stmt
2244 	  (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
2245       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
2246 	continue;
2247       vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2248       unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo);
2249       tree vectype = STMT_VINFO_VECTYPE (vinfo);
2250       if (! vect_store_lanes_supported (vectype, size)
2251 	  && ! vect_grouped_store_supported (vectype, size))
2252 	return false;
2253       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
2254 	{
2255 	  vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2256 	  vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2257 	  bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
2258 	  size = STMT_VINFO_GROUP_SIZE (vinfo);
2259 	  vectype = STMT_VINFO_VECTYPE (vinfo);
2260 	  if (! vect_load_lanes_supported (vectype, size)
2261 	      && ! vect_grouped_load_supported (vectype, single_element_p,
2262 						size))
2263 	    return false;
2264 	}
2265     }
2266 
2267   if (dump_enabled_p ())
2268     dump_printf_loc (MSG_NOTE, vect_location,
2269 		     "re-trying with SLP disabled\n");
2270 
2271   /* Roll back state appropriately.  No SLP this time.  */
2272   slp = false;
2273   /* Restore vectorization factor as it were without SLP.  */
2274   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
2275   /* Free the SLP instances.  */
2276   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
2277     vect_free_slp_instance (instance);
2278   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
2279   /* Reset SLP type to loop_vect on all stmts.  */
2280   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2281     {
2282       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2283       for (gimple_stmt_iterator si = gsi_start_bb (bb);
2284 	   !gsi_end_p (si); gsi_next (&si))
2285 	{
2286 	  stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
2287 	  STMT_SLP_TYPE (stmt_info) = loop_vect;
2288 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2289 	    {
2290 	      stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2291 	      STMT_SLP_TYPE (stmt_info) = loop_vect;
2292 	      for (gimple_stmt_iterator pi
2293 		     = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2294 		   !gsi_end_p (pi); gsi_next (&pi))
2295 		{
2296 		  gimple *pstmt = gsi_stmt (pi);
2297 		  STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect;
2298 		}
2299 	    }
2300 	}
2301     }
2302   /* Free optimized alias test DDRS.  */
2303   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
2304   /* Reset target cost data.  */
2305   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2306   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
2307     = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
2308   /* Reset assorted flags.  */
2309   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
2310   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
2311   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
2312 
2313   goto start_over;
2314 }
2315 
2316 /* Function vect_analyze_loop.
2317 
2318    Apply a set of analyses on LOOP, and create a loop_vec_info struct
2319    for it.  The different analyses will record information in the
2320    loop_vec_info struct.  If ORIG_LOOP_VINFO is not NULL epilogue must
2321    be vectorized.  */
2322 loop_vec_info
2323 vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
2324 {
2325   loop_vec_info loop_vinfo;
2326   unsigned int vector_sizes;
2327 
2328   /* Autodetect first vector size we try.  */
2329   current_vector_size = 0;
2330   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2331 
2332   if (dump_enabled_p ())
2333     dump_printf_loc (MSG_NOTE, vect_location,
2334 		     "===== analyze_loop_nest =====\n");
2335 
2336   if (loop_outer (loop)
2337       && loop_vec_info_for_loop (loop_outer (loop))
2338       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2339     {
2340       if (dump_enabled_p ())
2341 	dump_printf_loc (MSG_NOTE, vect_location,
2342 			 "outer-loop already vectorized.\n");
2343       return NULL;
2344     }
2345 
2346   while (1)
2347     {
2348       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2349       loop_vinfo = vect_analyze_loop_form (loop);
2350       if (!loop_vinfo)
2351 	{
2352 	  if (dump_enabled_p ())
2353 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2354 			     "bad loop form.\n");
2355 	  return NULL;
2356 	}
2357 
2358       bool fatal = false;
2359 
2360       if (orig_loop_vinfo)
2361 	LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
2362 
2363       if (vect_analyze_loop_2 (loop_vinfo, fatal))
2364 	{
2365 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2366 
2367 	  return loop_vinfo;
2368 	}
2369 
2370       destroy_loop_vec_info (loop_vinfo, true);
2371 
2372       vector_sizes &= ~current_vector_size;
2373       if (fatal
2374 	  || vector_sizes == 0
2375 	  || current_vector_size == 0)
2376 	return NULL;
2377 
2378       /* Try the next biggest vector size.  */
2379       current_vector_size = 1 << floor_log2 (vector_sizes);
2380       if (dump_enabled_p ())
2381 	dump_printf_loc (MSG_NOTE, vect_location,
2382 			 "***** Re-trying analysis with "
2383 			 "vector size %d\n", current_vector_size);
2384     }
2385 }
2386 
2387 
2388 /* Function reduction_code_for_scalar_code
2389 
2390    Input:
2391    CODE - tree_code of a reduction operations.
2392 
2393    Output:
2394    REDUC_CODE - the corresponding tree-code to be used to reduce the
2395       vector of partial results into a single scalar result, or ERROR_MARK
2396       if the operation is a supported reduction operation, but does not have
2397       such a tree-code.
2398 
2399    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2400 
2401 static bool
2402 reduction_code_for_scalar_code (enum tree_code code,
2403                                 enum tree_code *reduc_code)
2404 {
2405   switch (code)
2406     {
2407       case MAX_EXPR:
2408         *reduc_code = REDUC_MAX_EXPR;
2409         return true;
2410 
2411       case MIN_EXPR:
2412         *reduc_code = REDUC_MIN_EXPR;
2413         return true;
2414 
2415       case PLUS_EXPR:
2416         *reduc_code = REDUC_PLUS_EXPR;
2417         return true;
2418 
2419       case MULT_EXPR:
2420       case MINUS_EXPR:
2421       case BIT_IOR_EXPR:
2422       case BIT_XOR_EXPR:
2423       case BIT_AND_EXPR:
2424         *reduc_code = ERROR_MARK;
2425         return true;
2426 
2427       default:
2428        return false;
2429     }
2430 }
2431 
2432 
2433 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2434    STMT is printed with a message MSG. */
2435 
2436 static void
2437 report_vect_op (int msg_type, gimple *stmt, const char *msg)
2438 {
2439   dump_printf_loc (msg_type, vect_location, "%s", msg);
2440   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2441 }
2442 
2443 
2444 /* Detect SLP reduction of the form:
2445 
2446    #a1 = phi <a5, a0>
2447    a2 = operation (a1)
2448    a3 = operation (a2)
2449    a4 = operation (a3)
2450    a5 = operation (a4)
2451 
2452    #a = phi <a5>
2453 
2454    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2455    FIRST_STMT is the first reduction stmt in the chain
2456    (a2 = operation (a1)).
2457 
2458    Return TRUE if a reduction chain was detected.  */
2459 
2460 static bool
2461 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2462 		       gimple *first_stmt)
2463 {
2464   struct loop *loop = (gimple_bb (phi))->loop_father;
2465   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2466   enum tree_code code;
2467   gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2468   stmt_vec_info use_stmt_info, current_stmt_info;
2469   tree lhs;
2470   imm_use_iterator imm_iter;
2471   use_operand_p use_p;
2472   int nloop_uses, size = 0, n_out_of_loop_uses;
2473   bool found = false;
2474 
2475   if (loop != vect_loop)
2476     return false;
2477 
2478   lhs = PHI_RESULT (phi);
2479   code = gimple_assign_rhs_code (first_stmt);
2480   while (1)
2481     {
2482       nloop_uses = 0;
2483       n_out_of_loop_uses = 0;
2484       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2485         {
2486 	  gimple *use_stmt = USE_STMT (use_p);
2487 	  if (is_gimple_debug (use_stmt))
2488 	    continue;
2489 
2490           /* Check if we got back to the reduction phi.  */
2491 	  if (use_stmt == phi)
2492             {
2493 	      loop_use_stmt = use_stmt;
2494               found = true;
2495               break;
2496             }
2497 
2498           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2499             {
2500 	      loop_use_stmt = use_stmt;
2501 	      nloop_uses++;
2502             }
2503            else
2504              n_out_of_loop_uses++;
2505 
2506            /* There are can be either a single use in the loop or two uses in
2507               phi nodes.  */
2508            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2509              return false;
2510         }
2511 
2512       if (found)
2513         break;
2514 
2515       /* We reached a statement with no loop uses.  */
2516       if (nloop_uses == 0)
2517 	return false;
2518 
2519       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2520       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2521         return false;
2522 
2523       if (!is_gimple_assign (loop_use_stmt)
2524 	  || code != gimple_assign_rhs_code (loop_use_stmt)
2525 	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2526         return false;
2527 
2528       /* Insert USE_STMT into reduction chain.  */
2529       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2530       if (current_stmt)
2531         {
2532           current_stmt_info = vinfo_for_stmt (current_stmt);
2533 	  GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2534           GROUP_FIRST_ELEMENT (use_stmt_info)
2535             = GROUP_FIRST_ELEMENT (current_stmt_info);
2536         }
2537       else
2538 	GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2539 
2540       lhs = gimple_assign_lhs (loop_use_stmt);
2541       current_stmt = loop_use_stmt;
2542       size++;
2543    }
2544 
2545   if (!found || loop_use_stmt != phi || size < 2)
2546     return false;
2547 
2548   /* Swap the operands, if needed, to make the reduction operand be the second
2549      operand.  */
2550   lhs = PHI_RESULT (phi);
2551   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2552   while (next_stmt)
2553     {
2554       if (gimple_assign_rhs2 (next_stmt) == lhs)
2555 	{
2556 	  tree op = gimple_assign_rhs1 (next_stmt);
2557 	  gimple *def_stmt = NULL;
2558 
2559           if (TREE_CODE (op) == SSA_NAME)
2560             def_stmt = SSA_NAME_DEF_STMT (op);
2561 
2562 	  /* Check that the other def is either defined in the loop
2563 	     ("vect_internal_def"), or it's an induction (defined by a
2564 	     loop-header phi-node).  */
2565           if (def_stmt
2566               && gimple_bb (def_stmt)
2567 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2568               && (is_gimple_assign (def_stmt)
2569                   || is_gimple_call (def_stmt)
2570                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2571                            == vect_induction_def
2572                   || (gimple_code (def_stmt) == GIMPLE_PHI
2573                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2574                                   == vect_internal_def
2575                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2576 	    {
2577 	      lhs = gimple_assign_lhs (next_stmt);
2578 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2579  	      continue;
2580 	    }
2581 
2582 	  return false;
2583 	}
2584       else
2585 	{
2586           tree op = gimple_assign_rhs2 (next_stmt);
2587 	  gimple *def_stmt = NULL;
2588 
2589           if (TREE_CODE (op) == SSA_NAME)
2590             def_stmt = SSA_NAME_DEF_STMT (op);
2591 
2592           /* Check that the other def is either defined in the loop
2593             ("vect_internal_def"), or it's an induction (defined by a
2594             loop-header phi-node).  */
2595           if (def_stmt
2596               && gimple_bb (def_stmt)
2597 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2598               && (is_gimple_assign (def_stmt)
2599                   || is_gimple_call (def_stmt)
2600                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2601                               == vect_induction_def
2602                   || (gimple_code (def_stmt) == GIMPLE_PHI
2603                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2604                                   == vect_internal_def
2605                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2606   	    {
2607 	      if (dump_enabled_p ())
2608 		{
2609 		  dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2610 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2611 		}
2612 
2613 	      swap_ssa_operands (next_stmt,
2614 	 		         gimple_assign_rhs1_ptr (next_stmt),
2615                                  gimple_assign_rhs2_ptr (next_stmt));
2616 	      update_stmt (next_stmt);
2617 
2618 	      if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2619 		LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2620 	    }
2621 	  else
2622 	    return false;
2623         }
2624 
2625       lhs = gimple_assign_lhs (next_stmt);
2626       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2627     }
2628 
2629   /* Save the chain for further analysis in SLP detection.  */
2630   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2631   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2632   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2633 
2634   return true;
2635 }
2636 
2637 
2638 /* Function vect_is_simple_reduction_1
2639 
2640    (1) Detect a cross-iteration def-use cycle that represents a simple
2641    reduction computation.  We look for the following pattern:
2642 
2643    loop_header:
2644      a1 = phi < a0, a2 >
2645      a3 = ...
2646      a2 = operation (a3, a1)
2647 
2648    or
2649 
2650    a3 = ...
2651    loop_header:
2652      a1 = phi < a0, a2 >
2653      a2 = operation (a3, a1)
2654 
2655    such that:
2656    1. operation is commutative and associative and it is safe to
2657       change the order of the computation (if CHECK_REDUCTION is true)
2658    2. no uses for a2 in the loop (a2 is used out of the loop)
2659    3. no uses of a1 in the loop besides the reduction operation
2660    4. no uses of a1 outside the loop.
2661 
2662    Conditions 1,4 are tested here.
2663    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2664 
2665    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2666    nested cycles, if CHECK_REDUCTION is false.
2667 
2668    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2669    reductions:
2670 
2671      a1 = phi < a0, a2 >
2672      inner loop (def of a3)
2673      a2 = phi < a3 >
2674 
2675    (4) Detect condition expressions, ie:
2676      for (int i = 0; i < N; i++)
2677        if (a[i] < val)
2678 	ret_val = a[i];
2679 
2680 */
2681 
2682 static gimple *
2683 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2684 			  bool check_reduction, bool *double_reduc,
2685 			  bool need_wrapping_integral_overflow,
2686 			  enum vect_reduction_type *v_reduc_type)
2687 {
2688   struct loop *loop = (gimple_bb (phi))->loop_father;
2689   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2690   edge latch_e = loop_latch_edge (loop);
2691   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2692   gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
2693   enum tree_code orig_code, code;
2694   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2695   tree type;
2696   int nloop_uses;
2697   tree name;
2698   imm_use_iterator imm_iter;
2699   use_operand_p use_p;
2700   bool phi_def;
2701 
2702   *double_reduc = false;
2703   *v_reduc_type = TREE_CODE_REDUCTION;
2704 
2705   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2706      otherwise, we assume outer loop vectorization.  */
2707   gcc_assert ((check_reduction && loop == vect_loop)
2708               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2709 
2710   name = PHI_RESULT (phi);
2711   /* ???  If there are no uses of the PHI result the inner loop reduction
2712      won't be detected as possibly double-reduction by vectorizable_reduction
2713      because that tries to walk the PHI arg from the preheader edge which
2714      can be constant.  See PR60382.  */
2715   if (has_zero_uses (name))
2716     return NULL;
2717   nloop_uses = 0;
2718   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2719     {
2720       gimple *use_stmt = USE_STMT (use_p);
2721       if (is_gimple_debug (use_stmt))
2722 	continue;
2723 
2724       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2725         {
2726           if (dump_enabled_p ())
2727 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2728 			     "intermediate value used outside loop.\n");
2729 
2730           return NULL;
2731         }
2732 
2733       nloop_uses++;
2734       if (nloop_uses > 1)
2735         {
2736           if (dump_enabled_p ())
2737 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2738 			     "reduction used in loop.\n");
2739           return NULL;
2740         }
2741 
2742       phi_use_stmt = use_stmt;
2743     }
2744 
2745   if (TREE_CODE (loop_arg) != SSA_NAME)
2746     {
2747       if (dump_enabled_p ())
2748 	{
2749 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2750 			   "reduction: not ssa_name: ");
2751 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2752           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2753 	}
2754       return NULL;
2755     }
2756 
2757   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2758   if (!def_stmt)
2759     {
2760       if (dump_enabled_p ())
2761 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2762 			 "reduction: no def_stmt.\n");
2763       return NULL;
2764     }
2765 
2766   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2767     {
2768       if (dump_enabled_p ())
2769 	dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2770       return NULL;
2771     }
2772 
2773   if (is_gimple_assign (def_stmt))
2774     {
2775       name = gimple_assign_lhs (def_stmt);
2776       phi_def = false;
2777     }
2778   else
2779     {
2780       name = PHI_RESULT (def_stmt);
2781       phi_def = true;
2782     }
2783 
2784   nloop_uses = 0;
2785   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2786     {
2787       gimple *use_stmt = USE_STMT (use_p);
2788       if (is_gimple_debug (use_stmt))
2789 	continue;
2790       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2791 	nloop_uses++;
2792       if (nloop_uses > 1)
2793 	{
2794 	  if (dump_enabled_p ())
2795 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2796 			     "reduction used in loop.\n");
2797 	  return NULL;
2798 	}
2799     }
2800 
2801   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2802      defined in the inner loop.  */
2803   if (phi_def)
2804     {
2805       op1 = PHI_ARG_DEF (def_stmt, 0);
2806 
2807       if (gimple_phi_num_args (def_stmt) != 1
2808           || TREE_CODE (op1) != SSA_NAME)
2809         {
2810           if (dump_enabled_p ())
2811 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2812 			     "unsupported phi node definition.\n");
2813 
2814           return NULL;
2815         }
2816 
2817       def1 = SSA_NAME_DEF_STMT (op1);
2818       if (gimple_bb (def1)
2819 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2820           && loop->inner
2821           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2822           && is_gimple_assign (def1)
2823 	  && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
2824         {
2825           if (dump_enabled_p ())
2826             report_vect_op (MSG_NOTE, def_stmt,
2827 			    "detected double reduction: ");
2828 
2829           *double_reduc = true;
2830           return def_stmt;
2831         }
2832 
2833       return NULL;
2834     }
2835 
2836   code = orig_code = gimple_assign_rhs_code (def_stmt);
2837 
2838   /* We can handle "res -= x[i]", which is non-associative by
2839      simply rewriting this into "res += -x[i]".  Avoid changing
2840      gimple instruction for the first simple tests and only do this
2841      if we're allowed to change code at all.  */
2842   if (code == MINUS_EXPR
2843       && (op1 = gimple_assign_rhs1 (def_stmt))
2844       && TREE_CODE (op1) == SSA_NAME
2845       && SSA_NAME_DEF_STMT (op1) == phi)
2846     code = PLUS_EXPR;
2847 
2848   if (code == COND_EXPR)
2849     {
2850       if (check_reduction)
2851 	*v_reduc_type = COND_REDUCTION;
2852     }
2853   else if (!commutative_tree_code (code) || !associative_tree_code (code))
2854     {
2855       if (dump_enabled_p ())
2856 	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2857 			"reduction: not commutative/associative: ");
2858       return NULL;
2859     }
2860 
2861   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2862     {
2863       if (code != COND_EXPR)
2864         {
2865 	  if (dump_enabled_p ())
2866 	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2867 			    "reduction: not binary operation: ");
2868 
2869           return NULL;
2870         }
2871 
2872       op3 = gimple_assign_rhs1 (def_stmt);
2873       if (COMPARISON_CLASS_P (op3))
2874         {
2875           op4 = TREE_OPERAND (op3, 1);
2876           op3 = TREE_OPERAND (op3, 0);
2877         }
2878 
2879       op1 = gimple_assign_rhs2 (def_stmt);
2880       op2 = gimple_assign_rhs3 (def_stmt);
2881 
2882       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2883         {
2884           if (dump_enabled_p ())
2885             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2886 			    "reduction: uses not ssa_names: ");
2887 
2888           return NULL;
2889         }
2890     }
2891   else
2892     {
2893       op1 = gimple_assign_rhs1 (def_stmt);
2894       op2 = gimple_assign_rhs2 (def_stmt);
2895 
2896       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2897         {
2898           if (dump_enabled_p ())
2899 	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2900 			    "reduction: uses not ssa_names: ");
2901 
2902           return NULL;
2903         }
2904    }
2905 
2906   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2907   if ((TREE_CODE (op1) == SSA_NAME
2908        && !types_compatible_p (type,TREE_TYPE (op1)))
2909       || (TREE_CODE (op2) == SSA_NAME
2910           && !types_compatible_p (type, TREE_TYPE (op2)))
2911       || (op3 && TREE_CODE (op3) == SSA_NAME
2912           && !types_compatible_p (type, TREE_TYPE (op3)))
2913       || (op4 && TREE_CODE (op4) == SSA_NAME
2914           && !types_compatible_p (type, TREE_TYPE (op4))))
2915     {
2916       if (dump_enabled_p ())
2917         {
2918           dump_printf_loc (MSG_NOTE, vect_location,
2919 			   "reduction: multiple types: operation type: ");
2920           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2921           dump_printf (MSG_NOTE, ", operands types: ");
2922           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2923 			     TREE_TYPE (op1));
2924           dump_printf (MSG_NOTE, ",");
2925           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2926 			     TREE_TYPE (op2));
2927           if (op3)
2928             {
2929               dump_printf (MSG_NOTE, ",");
2930               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2931 				 TREE_TYPE (op3));
2932             }
2933 
2934           if (op4)
2935             {
2936               dump_printf (MSG_NOTE, ",");
2937               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2938 				 TREE_TYPE (op4));
2939             }
2940           dump_printf (MSG_NOTE, "\n");
2941         }
2942 
2943       return NULL;
2944     }
2945 
2946   /* Check that it's ok to change the order of the computation.
2947      Generally, when vectorizing a reduction we change the order of the
2948      computation.  This may change the behavior of the program in some
2949      cases, so we need to check that this is ok.  One exception is when
2950      vectorizing an outer-loop: the inner-loop is executed sequentially,
2951      and therefore vectorizing reductions in the inner-loop during
2952      outer-loop vectorization is safe.  */
2953 
2954   if (*v_reduc_type != COND_REDUCTION
2955       && check_reduction)
2956     {
2957       /* CHECKME: check for !flag_finite_math_only too?  */
2958       if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math)
2959 	{
2960 	  /* Changing the order of operations changes the semantics.  */
2961 	  if (dump_enabled_p ())
2962 	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2963 			"reduction: unsafe fp math optimization: ");
2964 	  return NULL;
2965 	}
2966       else if (INTEGRAL_TYPE_P (type))
2967 	{
2968 	  if (!operation_no_trapping_overflow (type, code))
2969 	    {
2970 	      /* Changing the order of operations changes the semantics.  */
2971 	      if (dump_enabled_p ())
2972 		report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2973 				"reduction: unsafe int math optimization"
2974 				" (overflow traps): ");
2975 	      return NULL;
2976 	    }
2977 	  if (need_wrapping_integral_overflow
2978 	      && !TYPE_OVERFLOW_WRAPS (type)
2979 	      && operation_can_overflow (code))
2980 	    {
2981 	      /* Changing the order of operations changes the semantics.  */
2982 	      if (dump_enabled_p ())
2983 		report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2984 				"reduction: unsafe int math optimization"
2985 				" (overflow doesn't wrap): ");
2986 	      return NULL;
2987 	    }
2988 	}
2989       else if (SAT_FIXED_POINT_TYPE_P (type))
2990 	{
2991 	  /* Changing the order of operations changes the semantics.  */
2992 	  if (dump_enabled_p ())
2993 	  report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2994 			  "reduction: unsafe fixed-point math optimization: ");
2995 	  return NULL;
2996 	}
2997     }
2998 
2999   /* Reduction is safe. We're dealing with one of the following:
3000      1) integer arithmetic and no trapv
3001      2) floating point arithmetic, and special flags permit this optimization
3002      3) nested cycle (i.e., outer loop vectorization).  */
3003   if (TREE_CODE (op1) == SSA_NAME)
3004     def1 = SSA_NAME_DEF_STMT (op1);
3005 
3006   if (TREE_CODE (op2) == SSA_NAME)
3007     def2 = SSA_NAME_DEF_STMT (op2);
3008 
3009   if (code != COND_EXPR
3010       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
3011     {
3012       if (dump_enabled_p ())
3013 	report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
3014       return NULL;
3015     }
3016 
3017   /* Check that one def is the reduction def, defined by PHI,
3018      the other def is either defined in the loop ("vect_internal_def"),
3019      or it's an induction (defined by a loop-header phi-node).  */
3020 
3021   if (def2 && def2 == phi
3022       && (code == COND_EXPR
3023 	  || !def1 || gimple_nop_p (def1)
3024 	  || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
3025           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
3026               && (is_gimple_assign (def1)
3027 		  || is_gimple_call (def1)
3028   	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3029                       == vect_induction_def
3030    	          || (gimple_code (def1) == GIMPLE_PHI
3031 	              && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3032                           == vect_internal_def
3033  	              && !is_loop_header_bb_p (gimple_bb (def1)))))))
3034     {
3035       if (dump_enabled_p ())
3036 	report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3037       return def_stmt;
3038     }
3039 
3040   if (def1 && def1 == phi
3041       && (code == COND_EXPR
3042 	  || !def2 || gimple_nop_p (def2)
3043 	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
3044 	  || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
3045 	      && (is_gimple_assign (def2)
3046 		  || is_gimple_call (def2)
3047 		  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3048 		       == vect_induction_def
3049 		  || (gimple_code (def2) == GIMPLE_PHI
3050 		      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3051 			   == vect_internal_def
3052 		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
3053     {
3054       if (check_reduction && orig_code != MINUS_EXPR)
3055 	{
3056 	  /* Check if we can swap operands (just for simplicity - so that
3057 	     the rest of the code can assume that the reduction variable
3058 	     is always the last (second) argument).  */
3059 	  if (code == COND_EXPR)
3060 	    {
3061 	      /* Swap cond_expr by inverting the condition.  */
3062 	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
3063 	      enum tree_code invert_code = ERROR_MARK;
3064 	      enum tree_code cond_code = TREE_CODE (cond_expr);
3065 
3066 	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
3067 		{
3068 		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
3069 		  invert_code = invert_tree_comparison (cond_code, honor_nans);
3070 		}
3071 	      if (invert_code != ERROR_MARK)
3072 		{
3073 		  TREE_SET_CODE (cond_expr, invert_code);
3074 		  swap_ssa_operands (def_stmt,
3075 				     gimple_assign_rhs2_ptr (def_stmt),
3076 				     gimple_assign_rhs3_ptr (def_stmt));
3077 		}
3078 	      else
3079 		{
3080 		  if (dump_enabled_p ())
3081 		    report_vect_op (MSG_NOTE, def_stmt,
3082 				    "detected reduction: cannot swap operands "
3083 				    "for cond_expr");
3084 		  return NULL;
3085 		}
3086 	    }
3087 	  else
3088 	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
3089 			       gimple_assign_rhs2_ptr (def_stmt));
3090 
3091 	  if (dump_enabled_p ())
3092 	    report_vect_op (MSG_NOTE, def_stmt,
3093 			    "detected reduction: need to swap operands: ");
3094 
3095 	  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
3096 	    LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
3097         }
3098       else
3099         {
3100           if (dump_enabled_p ())
3101             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3102         }
3103 
3104       return def_stmt;
3105     }
3106 
3107   /* Try to find SLP reduction chain.  */
3108   if (check_reduction && code != COND_EXPR
3109       && vect_is_slp_reduction (loop_info, phi, def_stmt))
3110     {
3111       if (dump_enabled_p ())
3112         report_vect_op (MSG_NOTE, def_stmt,
3113 			"reduction: detected reduction chain: ");
3114 
3115       return def_stmt;
3116     }
3117 
3118   if (dump_enabled_p ())
3119     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3120 		    "reduction: unknown pattern: ");
3121 
3122   return NULL;
3123 }
3124 
3125 /* Wrapper around vect_is_simple_reduction_1, which will modify code
3126    in-place if it enables detection of more reductions.  Arguments
3127    as there.  */
3128 
3129 gimple *
3130 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
3131 			     bool check_reduction, bool *double_reduc,
3132 			     bool need_wrapping_integral_overflow)
3133 {
3134   enum vect_reduction_type v_reduc_type;
3135   return vect_is_simple_reduction (loop_info, phi, check_reduction,
3136 				   double_reduc,
3137 				   need_wrapping_integral_overflow,
3138 				   &v_reduc_type);
3139 }
3140 
3141 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
3142 int
3143 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3144                              int *peel_iters_epilogue,
3145                              stmt_vector_for_cost *scalar_cost_vec,
3146 			     stmt_vector_for_cost *prologue_cost_vec,
3147 			     stmt_vector_for_cost *epilogue_cost_vec)
3148 {
3149   int retval = 0;
3150   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3151 
3152   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3153     {
3154       *peel_iters_epilogue = vf/2;
3155       if (dump_enabled_p ())
3156         dump_printf_loc (MSG_NOTE, vect_location,
3157 			 "cost model: epilogue peel iters set to vf/2 "
3158 			 "because loop iterations are unknown .\n");
3159 
3160       /* If peeled iterations are known but number of scalar loop
3161          iterations are unknown, count a taken branch per peeled loop.  */
3162       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3163 				 NULL, 0, vect_prologue);
3164       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3165 				 NULL, 0, vect_epilogue);
3166     }
3167   else
3168     {
3169       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
3170       peel_iters_prologue = niters < peel_iters_prologue ?
3171                             niters : peel_iters_prologue;
3172       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
3173       /* If we need to peel for gaps, but no peeling is required, we have to
3174 	 peel VF iterations.  */
3175       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
3176         *peel_iters_epilogue = vf;
3177     }
3178 
3179   stmt_info_for_cost *si;
3180   int j;
3181   if (peel_iters_prologue)
3182     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3183 	{
3184 	  stmt_vec_info stmt_info
3185 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3186 	  retval += record_stmt_cost (prologue_cost_vec,
3187 				      si->count * peel_iters_prologue,
3188 				      si->kind, stmt_info, si->misalign,
3189 				      vect_prologue);
3190 	}
3191   if (*peel_iters_epilogue)
3192     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3193 	{
3194 	  stmt_vec_info stmt_info
3195 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3196 	  retval += record_stmt_cost (epilogue_cost_vec,
3197 				      si->count * *peel_iters_epilogue,
3198 				      si->kind, stmt_info, si->misalign,
3199 				      vect_epilogue);
3200 	}
3201 
3202   return retval;
3203 }
3204 
3205 /* Function vect_estimate_min_profitable_iters
3206 
3207    Return the number of iterations required for the vector version of the
3208    loop to be profitable relative to the cost of the scalar version of the
3209    loop.
3210 
3211    *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
3212    of iterations for vectorization.  -1 value means loop vectorization
3213    is not profitable.  This returned value may be used for dynamic
3214    profitability check.
3215 
3216    *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
3217    for static check against estimated number of iterations.  */
3218 
3219 static void
3220 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3221 				    int *ret_min_profitable_niters,
3222 				    int *ret_min_profitable_estimate)
3223 {
3224   int min_profitable_iters;
3225   int min_profitable_estimate;
3226   int peel_iters_prologue;
3227   int peel_iters_epilogue;
3228   unsigned vec_inside_cost = 0;
3229   int vec_outside_cost = 0;
3230   unsigned vec_prologue_cost = 0;
3231   unsigned vec_epilogue_cost = 0;
3232   int scalar_single_iter_cost = 0;
3233   int scalar_outside_cost = 0;
3234   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3235   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3236   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3237 
3238   /* Cost model disabled.  */
3239   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3240     {
3241       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3242       *ret_min_profitable_niters = 0;
3243       *ret_min_profitable_estimate = 0;
3244       return;
3245     }
3246 
3247   /* Requires loop versioning tests to handle misalignment.  */
3248   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3249     {
3250       /*  FIXME: Make cost depend on complexity of individual check.  */
3251       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
3252       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3253 			    vect_prologue);
3254       dump_printf (MSG_NOTE,
3255                    "cost model: Adding cost of checks for loop "
3256                    "versioning to treat misalignment.\n");
3257     }
3258 
3259   /* Requires loop versioning with alias checks.  */
3260   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3261     {
3262       /*  FIXME: Make cost depend on complexity of individual check.  */
3263       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
3264       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3265 			    vect_prologue);
3266       dump_printf (MSG_NOTE,
3267                    "cost model: Adding cost of checks for loop "
3268                    "versioning aliasing.\n");
3269     }
3270 
3271   /* Requires loop versioning with niter checks.  */
3272   if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
3273     {
3274       /*  FIXME: Make cost depend on complexity of individual check.  */
3275       (void) add_stmt_cost (target_cost_data, 1, vector_stmt, NULL, 0,
3276 			    vect_prologue);
3277       dump_printf (MSG_NOTE,
3278 		   "cost model: Adding cost of checks for loop "
3279 		   "versioning niters.\n");
3280     }
3281 
3282   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
3283     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
3284 			  vect_prologue);
3285 
3286   /* Count statements in scalar loop.  Using this as scalar cost for a single
3287      iteration for now.
3288 
3289      TODO: Add outer loop support.
3290 
3291      TODO: Consider assigning different costs to different scalar
3292      statements.  */
3293 
3294   scalar_single_iter_cost
3295     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3296 
3297   /* Add additional cost for the peeled instructions in prologue and epilogue
3298      loop.
3299 
3300      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3301      at compile-time - we assume it's vf/2 (the worst would be vf-1).
3302 
3303      TODO: Build an expression that represents peel_iters for prologue and
3304      epilogue to be used in a run-time test.  */
3305 
3306   if (npeel  < 0)
3307     {
3308       peel_iters_prologue = vf/2;
3309       dump_printf (MSG_NOTE, "cost model: "
3310                    "prologue peel iters set to vf/2.\n");
3311 
3312       /* If peeling for alignment is unknown, loop bound of main loop becomes
3313          unknown.  */
3314       peel_iters_epilogue = vf/2;
3315       dump_printf (MSG_NOTE, "cost model: "
3316                    "epilogue peel iters set to vf/2 because "
3317                    "peeling for alignment is unknown.\n");
3318 
3319       /* If peeled iterations are unknown, count a taken branch and a not taken
3320          branch per peeled loop. Even if scalar loop iterations are known,
3321          vector iterations are not known since peeled prologue iterations are
3322          not known. Hence guards remain the same.  */
3323       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3324 			    NULL, 0, vect_prologue);
3325       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3326 			    NULL, 0, vect_prologue);
3327       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3328 			    NULL, 0, vect_epilogue);
3329       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3330 			    NULL, 0, vect_epilogue);
3331       stmt_info_for_cost *si;
3332       int j;
3333       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3334 	{
3335 	  struct _stmt_vec_info *stmt_info
3336 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3337 	  (void) add_stmt_cost (target_cost_data,
3338 				si->count * peel_iters_prologue,
3339 				si->kind, stmt_info, si->misalign,
3340 				vect_prologue);
3341 	  (void) add_stmt_cost (target_cost_data,
3342 				si->count * peel_iters_epilogue,
3343 				si->kind, stmt_info, si->misalign,
3344 				vect_epilogue);
3345 	}
3346     }
3347   else
3348     {
3349       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
3350       stmt_info_for_cost *si;
3351       int j;
3352       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3353 
3354       prologue_cost_vec.create (2);
3355       epilogue_cost_vec.create (2);
3356       peel_iters_prologue = npeel;
3357 
3358       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
3359 					  &peel_iters_epilogue,
3360 					  &LOOP_VINFO_SCALAR_ITERATION_COST
3361 					    (loop_vinfo),
3362 					  &prologue_cost_vec,
3363 					  &epilogue_cost_vec);
3364 
3365       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3366 	{
3367 	  struct _stmt_vec_info *stmt_info
3368 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3369 	  (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3370 				si->misalign, vect_prologue);
3371 	}
3372 
3373       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3374 	{
3375 	  struct _stmt_vec_info *stmt_info
3376 	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3377 	  (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3378 				si->misalign, vect_epilogue);
3379 	}
3380 
3381       prologue_cost_vec.release ();
3382       epilogue_cost_vec.release ();
3383     }
3384 
3385   /* FORNOW: The scalar outside cost is incremented in one of the
3386      following ways:
3387 
3388      1. The vectorizer checks for alignment and aliasing and generates
3389      a condition that allows dynamic vectorization.  A cost model
3390      check is ANDED with the versioning condition.  Hence scalar code
3391      path now has the added cost of the versioning check.
3392 
3393        if (cost > th & versioning_check)
3394          jmp to vector code
3395 
3396      Hence run-time scalar is incremented by not-taken branch cost.
3397 
3398      2. The vectorizer then checks if a prologue is required.  If the
3399      cost model check was not done before during versioning, it has to
3400      be done before the prologue check.
3401 
3402        if (cost <= th)
3403          prologue = scalar_iters
3404        if (prologue == 0)
3405          jmp to vector code
3406        else
3407          execute prologue
3408        if (prologue == num_iters)
3409 	 go to exit
3410 
3411      Hence the run-time scalar cost is incremented by a taken branch,
3412      plus a not-taken branch, plus a taken branch cost.
3413 
3414      3. The vectorizer then checks if an epilogue is required.  If the
3415      cost model check was not done before during prologue check, it
3416      has to be done with the epilogue check.
3417 
3418        if (prologue == 0)
3419          jmp to vector code
3420        else
3421          execute prologue
3422        if (prologue == num_iters)
3423 	 go to exit
3424        vector code:
3425          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3426            jmp to epilogue
3427 
3428      Hence the run-time scalar cost should be incremented by 2 taken
3429      branches.
3430 
3431      TODO: The back end may reorder the BBS's differently and reverse
3432      conditions/branch directions.  Change the estimates below to
3433      something more reasonable.  */
3434 
3435   /* If the number of iterations is known and we do not do versioning, we can
3436      decide whether to vectorize at compile time.  Hence the scalar version
3437      do not carry cost model guard costs.  */
3438   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3439       || LOOP_REQUIRES_VERSIONING (loop_vinfo))
3440     {
3441       /* Cost model check occurs at versioning.  */
3442       if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
3443 	scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3444       else
3445 	{
3446 	  /* Cost model check occurs at prologue generation.  */
3447 	  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3448 	    scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3449 	      + vect_get_stmt_cost (cond_branch_not_taken);
3450 	  /* Cost model check occurs at epilogue generation.  */
3451 	  else
3452 	    scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3453 	}
3454     }
3455 
3456   /* Complete the target-specific cost calculations.  */
3457   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3458 	       &vec_inside_cost, &vec_epilogue_cost);
3459 
3460   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3461 
3462   if (dump_enabled_p ())
3463     {
3464       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3465       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3466                    vec_inside_cost);
3467       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3468                    vec_prologue_cost);
3469       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3470                    vec_epilogue_cost);
3471       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3472                    scalar_single_iter_cost);
3473       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3474                    scalar_outside_cost);
3475       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3476                    vec_outside_cost);
3477       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3478                    peel_iters_prologue);
3479       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3480                    peel_iters_epilogue);
3481     }
3482 
3483   /* Calculate number of iterations required to make the vector version
3484      profitable, relative to the loop bodies only.  The following condition
3485      must hold true:
3486      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3487      where
3488      SIC = scalar iteration cost, VIC = vector iteration cost,
3489      VOC = vector outside cost, VF = vectorization factor,
3490      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3491      SOC = scalar outside cost for run time cost model check.  */
3492 
3493   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3494     {
3495       if (vec_outside_cost <= 0)
3496         min_profitable_iters = 1;
3497       else
3498         {
3499           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3500 				  - vec_inside_cost * peel_iters_prologue
3501                                   - vec_inside_cost * peel_iters_epilogue)
3502                                  / ((scalar_single_iter_cost * vf)
3503                                     - vec_inside_cost);
3504 
3505           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3506               <= (((int) vec_inside_cost * min_profitable_iters)
3507                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3508             min_profitable_iters++;
3509         }
3510     }
3511   /* vector version will never be profitable.  */
3512   else
3513     {
3514       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3515 	warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3516 		    "did not happen for a simd loop");
3517 
3518       if (dump_enabled_p ())
3519         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3520 			 "cost model: the vector iteration cost = %d "
3521 			 "divided by the scalar iteration cost = %d "
3522 			 "is greater or equal to the vectorization factor = %d"
3523                          ".\n",
3524 			 vec_inside_cost, scalar_single_iter_cost, vf);
3525       *ret_min_profitable_niters = -1;
3526       *ret_min_profitable_estimate = -1;
3527       return;
3528     }
3529 
3530   dump_printf (MSG_NOTE,
3531 	       "  Calculated minimum iters for profitability: %d\n",
3532 	       min_profitable_iters);
3533 
3534   min_profitable_iters =
3535 	min_profitable_iters < vf ? vf : min_profitable_iters;
3536 
3537   /* Because the condition we create is:
3538      if (niters <= min_profitable_iters)
3539        then skip the vectorized loop.  */
3540   min_profitable_iters--;
3541 
3542   if (dump_enabled_p ())
3543     dump_printf_loc (MSG_NOTE, vect_location,
3544                      "  Runtime profitability threshold = %d\n",
3545                      min_profitable_iters);
3546 
3547   *ret_min_profitable_niters = min_profitable_iters;
3548 
3549   /* Calculate number of iterations required to make the vector version
3550      profitable, relative to the loop bodies only.
3551 
3552      Non-vectorized variant is SIC * niters and it must win over vector
3553      variant on the expected loop trip count.  The following condition must hold true:
3554      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3555 
3556   if (vec_outside_cost <= 0)
3557     min_profitable_estimate = 1;
3558   else
3559     {
3560       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3561 				 - vec_inside_cost * peel_iters_prologue
3562 				 - vec_inside_cost * peel_iters_epilogue)
3563 				 / ((scalar_single_iter_cost * vf)
3564 				   - vec_inside_cost);
3565     }
3566   min_profitable_estimate --;
3567   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3568   if (dump_enabled_p ())
3569     dump_printf_loc (MSG_NOTE, vect_location,
3570 		     "  Static estimate profitability threshold = %d\n",
3571 		     min_profitable_estimate);
3572 
3573   *ret_min_profitable_estimate = min_profitable_estimate;
3574 }
3575 
3576 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3577    vector elements (not bits) for a vector of mode MODE.  */
3578 static void
3579 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3580 			      unsigned char *sel)
3581 {
3582   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3583 
3584   for (i = 0; i < nelt; i++)
3585     sel[i] = (i + offset) & (2*nelt - 1);
3586 }
3587 
3588 /* Checks whether the target supports whole-vector shifts for vectors of mode
3589    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3590    it supports vec_perm_const with masks for all necessary shift amounts.  */
3591 static bool
3592 have_whole_vector_shift (enum machine_mode mode)
3593 {
3594   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3595     return true;
3596 
3597   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3598     return false;
3599 
3600   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3601   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3602 
3603   for (i = nelt/2; i >= 1; i/=2)
3604     {
3605       calc_vec_perm_mask_for_shift (mode, i, sel);
3606       if (!can_vec_perm_p (mode, false, sel))
3607 	return false;
3608     }
3609   return true;
3610 }
3611 
3612 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3613 
3614 static tree
3615 get_reduction_op (gimple *stmt, int reduc_index)
3616 {
3617   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3618     {
3619     case GIMPLE_SINGLE_RHS:
3620       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3621 		  == ternary_op);
3622       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3623     case GIMPLE_UNARY_RHS:
3624       return gimple_assign_rhs1 (stmt);
3625     case GIMPLE_BINARY_RHS:
3626       return (reduc_index
3627 	      ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3628     case GIMPLE_TERNARY_RHS:
3629       return gimple_op (stmt, reduc_index + 1);
3630     default:
3631       gcc_unreachable ();
3632     }
3633 }
3634 
3635 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3636    functions. Design better to avoid maintenance issues.  */
3637 
3638 /* Function vect_model_reduction_cost.
3639 
3640    Models cost for a reduction operation, including the vector ops
3641    generated within the strip-mine loop, the initial definition before
3642    the loop, and the epilogue code that must be generated.  */
3643 
3644 static bool
3645 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3646 			   int ncopies, int reduc_index)
3647 {
3648   int prologue_cost = 0, epilogue_cost = 0;
3649   enum tree_code code;
3650   optab optab;
3651   tree vectype;
3652   gimple *stmt, *orig_stmt;
3653   tree reduction_op;
3654   machine_mode mode;
3655   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3656   struct loop *loop = NULL;
3657   void *target_cost_data;
3658 
3659   if (loop_vinfo)
3660     {
3661       loop = LOOP_VINFO_LOOP (loop_vinfo);
3662       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3663     }
3664   else
3665     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3666 
3667   /* Condition reductions generate two reductions in the loop.  */
3668   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3669     ncopies *= 2;
3670 
3671   /* Cost of reduction op inside loop.  */
3672   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3673 					stmt_info, 0, vect_body);
3674   stmt = STMT_VINFO_STMT (stmt_info);
3675 
3676   reduction_op = get_reduction_op (stmt, reduc_index);
3677 
3678   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3679   if (!vectype)
3680     {
3681       if (dump_enabled_p ())
3682         {
3683 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3684 			   "unsupported data-type ");
3685           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3686 			     TREE_TYPE (reduction_op));
3687           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3688         }
3689       return false;
3690    }
3691 
3692   mode = TYPE_MODE (vectype);
3693   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3694 
3695   if (!orig_stmt)
3696     orig_stmt = STMT_VINFO_STMT (stmt_info);
3697 
3698   code = gimple_assign_rhs_code (orig_stmt);
3699 
3700   /* Add in cost for initial definition.
3701      For cond reduction we have four vectors: initial index, step, initial
3702      result of the data reduction, initial value of the index reduction.  */
3703   int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
3704 		       == COND_REDUCTION ? 4 : 1;
3705   prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
3706 				  scalar_to_vec, stmt_info, 0,
3707 				  vect_prologue);
3708 
3709   /* Determine cost of epilogue code.
3710 
3711      We have a reduction operator that will reduce the vector in one statement.
3712      Also requires scalar extract.  */
3713 
3714   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3715     {
3716       if (reduc_code != ERROR_MARK)
3717 	{
3718 	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3719 	    {
3720 	      /* An EQ stmt and an COND_EXPR stmt.  */
3721 	      epilogue_cost += add_stmt_cost (target_cost_data, 2,
3722 					      vector_stmt, stmt_info, 0,
3723 					      vect_epilogue);
3724 	      /* Reduction of the max index and a reduction of the found
3725 		 values.  */
3726 	      epilogue_cost += add_stmt_cost (target_cost_data, 2,
3727 					      vec_to_scalar, stmt_info, 0,
3728 					      vect_epilogue);
3729 	      /* A broadcast of the max value.  */
3730 	      epilogue_cost += add_stmt_cost (target_cost_data, 1,
3731 					      scalar_to_vec, stmt_info, 0,
3732 					      vect_epilogue);
3733 	    }
3734 	  else
3735 	    {
3736 	      epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3737 					      stmt_info, 0, vect_epilogue);
3738 	      epilogue_cost += add_stmt_cost (target_cost_data, 1,
3739 					      vec_to_scalar, stmt_info, 0,
3740 					      vect_epilogue);
3741 	    }
3742 	}
3743       else
3744 	{
3745 	  int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3746 	  tree bitsize =
3747 	    TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3748 	  int element_bitsize = tree_to_uhwi (bitsize);
3749 	  int nelements = vec_size_in_bits / element_bitsize;
3750 
3751 	  optab = optab_for_tree_code (code, vectype, optab_default);
3752 
3753 	  /* We have a whole vector shift available.  */
3754 	  if (VECTOR_MODE_P (mode)
3755 	      && optab_handler (optab, mode) != CODE_FOR_nothing
3756 	      && have_whole_vector_shift (mode))
3757 	    {
3758 	      /* Final reduction via vector shifts and the reduction operator.
3759 		 Also requires scalar extract.  */
3760 	      epilogue_cost += add_stmt_cost (target_cost_data,
3761 					      exact_log2 (nelements) * 2,
3762 					      vector_stmt, stmt_info, 0,
3763 					      vect_epilogue);
3764 	      epilogue_cost += add_stmt_cost (target_cost_data, 1,
3765 					      vec_to_scalar, stmt_info, 0,
3766 					      vect_epilogue);
3767 	    }
3768 	  else
3769 	    /* Use extracts and reduction op for final reduction.  For N
3770 	       elements, we have N extracts and N-1 reduction ops.  */
3771 	    epilogue_cost += add_stmt_cost (target_cost_data,
3772 					    nelements + nelements - 1,
3773 					    vector_stmt, stmt_info, 0,
3774 					    vect_epilogue);
3775 	}
3776     }
3777 
3778   if (dump_enabled_p ())
3779     dump_printf (MSG_NOTE,
3780                  "vect_model_reduction_cost: inside_cost = %d, "
3781                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3782                  prologue_cost, epilogue_cost);
3783 
3784   return true;
3785 }
3786 
3787 
3788 /* Function vect_model_induction_cost.
3789 
3790    Models cost for induction operations.  */
3791 
3792 static void
3793 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3794 {
3795   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3796   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3797   unsigned inside_cost, prologue_cost;
3798 
3799   /* loop cost for vec_loop.  */
3800   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3801 			       stmt_info, 0, vect_body);
3802 
3803   /* prologue cost for vec_init and vec_step.  */
3804   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3805 				 stmt_info, 0, vect_prologue);
3806 
3807   if (dump_enabled_p ())
3808     dump_printf_loc (MSG_NOTE, vect_location,
3809                      "vect_model_induction_cost: inside_cost = %d, "
3810                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3811 }
3812 
3813 
3814 /* Function get_initial_def_for_induction
3815 
3816    Input:
3817    STMT - a stmt that performs an induction operation in the loop.
3818    IV_PHI - the initial value of the induction variable
3819 
3820    Output:
3821    Return a vector variable, initialized with the first VF values of
3822    the induction variable.  E.g., for an iv with IV_PHI='X' and
3823    evolution S, for a vector of 4 units, we want to return:
3824    [X, X + S, X + 2*S, X + 3*S].  */
3825 
3826 static tree
3827 get_initial_def_for_induction (gimple *iv_phi)
3828 {
3829   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3830   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3831   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3832   tree vectype;
3833   int nunits;
3834   edge pe = loop_preheader_edge (loop);
3835   struct loop *iv_loop;
3836   basic_block new_bb;
3837   tree new_vec, vec_init, vec_step, t;
3838   tree new_name;
3839   gimple *new_stmt;
3840   gphi *induction_phi;
3841   tree induc_def, vec_def, vec_dest;
3842   tree init_expr, step_expr;
3843   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3844   int i;
3845   int ncopies;
3846   tree expr;
3847   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3848   bool nested_in_vect_loop = false;
3849   gimple_seq stmts;
3850   imm_use_iterator imm_iter;
3851   use_operand_p use_p;
3852   gimple *exit_phi;
3853   edge latch_e;
3854   tree loop_arg;
3855   gimple_stmt_iterator si;
3856   basic_block bb = gimple_bb (iv_phi);
3857   tree stepvectype;
3858   tree resvectype;
3859 
3860   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3861   if (nested_in_vect_loop_p (loop, iv_phi))
3862     {
3863       nested_in_vect_loop = true;
3864       iv_loop = loop->inner;
3865     }
3866   else
3867     iv_loop = loop;
3868   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3869 
3870   latch_e = loop_latch_edge (iv_loop);
3871   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3872 
3873   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3874   gcc_assert (step_expr != NULL_TREE);
3875 
3876   pe = loop_preheader_edge (iv_loop);
3877   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3878 				     loop_preheader_edge (iv_loop));
3879 
3880   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3881   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3882   gcc_assert (vectype);
3883   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3884   ncopies = vf / nunits;
3885 
3886   gcc_assert (phi_info);
3887   gcc_assert (ncopies >= 1);
3888 
3889   /* Convert the step to the desired type.  */
3890   stmts = NULL;
3891   step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
3892   if (stmts)
3893     {
3894       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3895       gcc_assert (!new_bb);
3896     }
3897 
3898   /* Find the first insertion point in the BB.  */
3899   si = gsi_after_labels (bb);
3900 
3901   /* Create the vector that holds the initial_value of the induction.  */
3902   if (nested_in_vect_loop)
3903     {
3904       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3905 	 been created during vectorization of previous stmts.  We obtain it
3906 	 from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3907       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi);
3908       /* If the initial value is not of proper type, convert it.  */
3909       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3910 	{
3911 	  new_stmt
3912 	    = gimple_build_assign (vect_get_new_ssa_name (vectype,
3913 							  vect_simple_var,
3914 							  "vec_iv_"),
3915 				   VIEW_CONVERT_EXPR,
3916 				   build1 (VIEW_CONVERT_EXPR, vectype,
3917 					   vec_init));
3918 	  vec_init = gimple_assign_lhs (new_stmt);
3919 	  new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3920 						 new_stmt);
3921 	  gcc_assert (!new_bb);
3922 	  set_vinfo_for_stmt (new_stmt,
3923 			      new_stmt_vec_info (new_stmt, loop_vinfo));
3924 	}
3925     }
3926   else
3927     {
3928       vec<constructor_elt, va_gc> *v;
3929 
3930       /* iv_loop is the loop to be vectorized. Create:
3931 	 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3932       stmts = NULL;
3933       new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
3934 
3935       vec_alloc (v, nunits);
3936       bool constant_p = is_gimple_min_invariant (new_name);
3937       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3938       for (i = 1; i < nunits; i++)
3939 	{
3940 	  /* Create: new_name_i = new_name + step_expr  */
3941 	  new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
3942 				   new_name, step_expr);
3943 	  if (!is_gimple_min_invariant (new_name))
3944 	    constant_p = false;
3945 	  CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3946 	}
3947       if (stmts)
3948 	{
3949 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3950 	  gcc_assert (!new_bb);
3951 	}
3952 
3953       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3954       if (constant_p)
3955 	new_vec = build_vector_from_ctor (vectype, v);
3956       else
3957 	new_vec = build_constructor (vectype, v);
3958       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3959     }
3960 
3961 
3962   /* Create the vector that holds the step of the induction.  */
3963   if (nested_in_vect_loop)
3964     /* iv_loop is nested in the loop to be vectorized. Generate:
3965        vec_step = [S, S, S, S]  */
3966     new_name = step_expr;
3967   else
3968     {
3969       /* iv_loop is the loop to be vectorized. Generate:
3970 	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3971       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3972 	{
3973 	  expr = build_int_cst (integer_type_node, vf);
3974 	  expr = fold_convert (TREE_TYPE (step_expr), expr);
3975 	}
3976       else
3977 	expr = build_int_cst (TREE_TYPE (step_expr), vf);
3978       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3979 			      expr, step_expr);
3980       if (TREE_CODE (step_expr) == SSA_NAME)
3981 	new_name = vect_init_vector (iv_phi, new_name,
3982 				     TREE_TYPE (step_expr), NULL);
3983     }
3984 
3985   t = unshare_expr (new_name);
3986   gcc_assert (CONSTANT_CLASS_P (new_name)
3987 	      || TREE_CODE (new_name) == SSA_NAME);
3988   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3989   gcc_assert (stepvectype);
3990   new_vec = build_vector_from_val (stepvectype, t);
3991   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3992 
3993 
3994   /* Create the following def-use cycle:
3995      loop prolog:
3996          vec_init = ...
3997 	 vec_step = ...
3998      loop:
3999          vec_iv = PHI <vec_init, vec_loop>
4000          ...
4001          STMT
4002          ...
4003          vec_loop = vec_iv + vec_step;  */
4004 
4005   /* Create the induction-phi that defines the induction-operand.  */
4006   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
4007   induction_phi = create_phi_node (vec_dest, iv_loop->header);
4008   set_vinfo_for_stmt (induction_phi,
4009 		      new_stmt_vec_info (induction_phi, loop_vinfo));
4010   induc_def = PHI_RESULT (induction_phi);
4011 
4012   /* Create the iv update inside the loop  */
4013   new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
4014   vec_def = make_ssa_name (vec_dest, new_stmt);
4015   gimple_assign_set_lhs (new_stmt, vec_def);
4016   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
4017   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
4018 
4019   /* Set the arguments of the phi node:  */
4020   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
4021   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
4022 	       UNKNOWN_LOCATION);
4023 
4024 
4025   /* In case that vectorization factor (VF) is bigger than the number
4026      of elements that we can fit in a vectype (nunits), we have to generate
4027      more than one vector stmt - i.e - we need to "unroll" the
4028      vector stmt by a factor VF/nunits.  For more details see documentation
4029      in vectorizable_operation.  */
4030 
4031   if (ncopies > 1)
4032     {
4033       stmt_vec_info prev_stmt_vinfo;
4034       /* FORNOW. This restriction should be relaxed.  */
4035       gcc_assert (!nested_in_vect_loop);
4036 
4037       /* Create the vector that holds the step of the induction.  */
4038       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
4039 	{
4040 	  expr = build_int_cst (integer_type_node, nunits);
4041 	  expr = fold_convert (TREE_TYPE (step_expr), expr);
4042 	}
4043       else
4044 	expr = build_int_cst (TREE_TYPE (step_expr), nunits);
4045       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
4046 			      expr, step_expr);
4047       if (TREE_CODE (step_expr) == SSA_NAME)
4048 	new_name = vect_init_vector (iv_phi, new_name,
4049 				     TREE_TYPE (step_expr), NULL);
4050       t = unshare_expr (new_name);
4051       gcc_assert (CONSTANT_CLASS_P (new_name)
4052 		  || TREE_CODE (new_name) == SSA_NAME);
4053       new_vec = build_vector_from_val (stepvectype, t);
4054       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
4055 
4056       vec_def = induc_def;
4057       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
4058       for (i = 1; i < ncopies; i++)
4059 	{
4060 	  /* vec_i = vec_prev + vec_step  */
4061 	  new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
4062 					  vec_def, vec_step);
4063 	  vec_def = make_ssa_name (vec_dest, new_stmt);
4064 	  gimple_assign_set_lhs (new_stmt, vec_def);
4065 
4066 	  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
4067 	  if (!useless_type_conversion_p (resvectype, vectype))
4068 	    {
4069 	      new_stmt
4070 		= gimple_build_assign
4071 			(vect_get_new_vect_var (resvectype, vect_simple_var,
4072 						"vec_iv_"),
4073 			 VIEW_CONVERT_EXPR,
4074 			 build1 (VIEW_CONVERT_EXPR, resvectype,
4075 				 gimple_assign_lhs (new_stmt)));
4076 	      gimple_assign_set_lhs (new_stmt,
4077 				     make_ssa_name
4078 				       (gimple_assign_lhs (new_stmt), new_stmt));
4079 	      gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
4080 	    }
4081 	  set_vinfo_for_stmt (new_stmt,
4082 			      new_stmt_vec_info (new_stmt, loop_vinfo));
4083 	  STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
4084 	  prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
4085 	}
4086     }
4087 
4088   if (nested_in_vect_loop)
4089     {
4090       /* Find the loop-closed exit-phi of the induction, and record
4091          the final vector of induction results:  */
4092       exit_phi = NULL;
4093       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
4094         {
4095 	  gimple *use_stmt = USE_STMT (use_p);
4096 	  if (is_gimple_debug (use_stmt))
4097 	    continue;
4098 
4099 	  if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
4100 	    {
4101 	      exit_phi = use_stmt;
4102 	      break;
4103 	    }
4104         }
4105       if (exit_phi)
4106 	{
4107 	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
4108 	  /* FORNOW. Currently not supporting the case that an inner-loop induction
4109 	     is not used in the outer-loop (i.e. only outside the outer-loop).  */
4110 	  gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
4111 		      && !STMT_VINFO_LIVE_P (stmt_vinfo));
4112 
4113 	  STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
4114 	  if (dump_enabled_p ())
4115 	    {
4116 	      dump_printf_loc (MSG_NOTE, vect_location,
4117 			       "vector of inductions after inner-loop:");
4118 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
4119 	    }
4120 	}
4121     }
4122 
4123 
4124   if (dump_enabled_p ())
4125     {
4126       dump_printf_loc (MSG_NOTE, vect_location,
4127 		       "transform induction: created def-use cycle: ");
4128       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
4129       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
4130 			SSA_NAME_DEF_STMT (vec_def), 0);
4131     }
4132 
4133   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
4134   if (!useless_type_conversion_p (resvectype, vectype))
4135     {
4136       new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
4137 							     vect_simple_var,
4138 							     "vec_iv_"),
4139 				      VIEW_CONVERT_EXPR,
4140 				      build1 (VIEW_CONVERT_EXPR, resvectype,
4141 					      induc_def));
4142       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
4143       gimple_assign_set_lhs (new_stmt, induc_def);
4144       si = gsi_after_labels (bb);
4145       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
4146       set_vinfo_for_stmt (new_stmt,
4147 			  new_stmt_vec_info (new_stmt, loop_vinfo));
4148       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
4149 	= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
4150     }
4151 
4152   return induc_def;
4153 }
4154 
4155 
4156 /* Function get_initial_def_for_reduction
4157 
4158    Input:
4159    STMT - a stmt that performs a reduction operation in the loop.
4160    INIT_VAL - the initial value of the reduction variable
4161 
4162    Output:
4163    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
4164         of the reduction (used for adjusting the epilog - see below).
4165    Return a vector variable, initialized according to the operation that STMT
4166         performs. This vector will be used as the initial value of the
4167         vector of partial results.
4168 
4169    Option1 (adjust in epilog): Initialize the vector as follows:
4170      add/bit or/xor:    [0,0,...,0,0]
4171      mult/bit and:      [1,1,...,1,1]
4172      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
4173    and when necessary (e.g. add/mult case) let the caller know
4174    that it needs to adjust the result by init_val.
4175 
4176    Option2: Initialize the vector as follows:
4177      add/bit or/xor:    [init_val,0,0,...,0]
4178      mult/bit and:      [init_val,1,1,...,1]
4179      min/max/cond_expr: [init_val,init_val,...,init_val]
4180    and no adjustments are needed.
4181 
4182    For example, for the following code:
4183 
4184    s = init_val;
4185    for (i=0;i<n;i++)
4186      s = s + a[i];
4187 
4188    STMT is 's = s + a[i]', and the reduction variable is 's'.
4189    For a vector of 4 units, we want to return either [0,0,0,init_val],
4190    or [0,0,0,0] and let the caller know that it needs to adjust
4191    the result at the end by 'init_val'.
4192 
4193    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
4194    initialization vector is simpler (same element in all entries), if
4195    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
4196 
4197    A cost model should help decide between these two schemes.  */
4198 
4199 tree
4200 get_initial_def_for_reduction (gimple *stmt, tree init_val,
4201                                tree *adjustment_def)
4202 {
4203   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
4204   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4205   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4206   tree scalar_type = TREE_TYPE (init_val);
4207   tree vectype = get_vectype_for_scalar_type (scalar_type);
4208   int nunits;
4209   enum tree_code code = gimple_assign_rhs_code (stmt);
4210   tree def_for_init;
4211   tree init_def;
4212   tree *elts;
4213   int i;
4214   bool nested_in_vect_loop = false;
4215   REAL_VALUE_TYPE real_init_val = dconst0;
4216   int int_init_val = 0;
4217   gimple *def_stmt = NULL;
4218   gimple_seq stmts = NULL;
4219 
4220   gcc_assert (vectype);
4221   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4222 
4223   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
4224 	      || SCALAR_FLOAT_TYPE_P (scalar_type));
4225 
4226   if (nested_in_vect_loop_p (loop, stmt))
4227     nested_in_vect_loop = true;
4228   else
4229     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
4230 
4231   /* In case of double reduction we only create a vector variable to be put
4232      in the reduction phi node.  The actual statement creation is done in
4233      vect_create_epilog_for_reduction.  */
4234   if (adjustment_def && nested_in_vect_loop
4235       && TREE_CODE (init_val) == SSA_NAME
4236       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
4237       && gimple_code (def_stmt) == GIMPLE_PHI
4238       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
4239       && vinfo_for_stmt (def_stmt)
4240       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
4241           == vect_double_reduction_def)
4242     {
4243       *adjustment_def = NULL;
4244       return vect_create_destination_var (init_val, vectype);
4245     }
4246 
4247   /* In case of a nested reduction do not use an adjustment def as
4248      that case is not supported by the epilogue generation correctly
4249      if ncopies is not one.  */
4250   if (adjustment_def && nested_in_vect_loop)
4251     {
4252       *adjustment_def = NULL;
4253       return vect_get_vec_def_for_operand (init_val, stmt);
4254     }
4255 
4256   switch (code)
4257     {
4258       case WIDEN_SUM_EXPR:
4259       case DOT_PROD_EXPR:
4260       case SAD_EXPR:
4261       case PLUS_EXPR:
4262       case MINUS_EXPR:
4263       case BIT_IOR_EXPR:
4264       case BIT_XOR_EXPR:
4265       case MULT_EXPR:
4266       case BIT_AND_EXPR:
4267         /* ADJUSTMENT_DEF is NULL when called from
4268            vect_create_epilog_for_reduction to vectorize double reduction.  */
4269         if (adjustment_def)
4270 	  *adjustment_def = init_val;
4271 
4272         if (code == MULT_EXPR)
4273           {
4274             real_init_val = dconst1;
4275             int_init_val = 1;
4276           }
4277 
4278         if (code == BIT_AND_EXPR)
4279           int_init_val = -1;
4280 
4281         if (SCALAR_FLOAT_TYPE_P (scalar_type))
4282           def_for_init = build_real (scalar_type, real_init_val);
4283         else
4284           def_for_init = build_int_cst (scalar_type, int_init_val);
4285 
4286         /* Create a vector of '0' or '1' except the first element.  */
4287 	elts = XALLOCAVEC (tree, nunits);
4288         for (i = nunits - 2; i >= 0; --i)
4289 	  elts[i + 1] = def_for_init;
4290 
4291         /* Option1: the first element is '0' or '1' as well.  */
4292         if (adjustment_def)
4293           {
4294 	    elts[0] = def_for_init;
4295             init_def = build_vector (vectype, elts);
4296             break;
4297           }
4298 
4299         /* Option2: the first element is INIT_VAL.  */
4300 	elts[0] = init_val;
4301         if (TREE_CONSTANT (init_val))
4302           init_def = build_vector (vectype, elts);
4303         else
4304 	  {
4305 	    vec<constructor_elt, va_gc> *v;
4306 	    vec_alloc (v, nunits);
4307 	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
4308 	    for (i = 1; i < nunits; ++i)
4309 	      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
4310 	    init_def = build_constructor (vectype, v);
4311 	  }
4312 
4313         break;
4314 
4315       case MIN_EXPR:
4316       case MAX_EXPR:
4317       case COND_EXPR:
4318 	if (adjustment_def)
4319           {
4320 	    *adjustment_def = NULL_TREE;
4321 	    if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
4322 	      {
4323 		init_def = vect_get_vec_def_for_operand (init_val, stmt);
4324 		break;
4325 	      }
4326 	  }
4327 	init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
4328 	if (! gimple_seq_empty_p (stmts))
4329 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4330 	init_def = build_vector_from_val (vectype, init_val);
4331 	break;
4332 
4333       default:
4334         gcc_unreachable ();
4335     }
4336 
4337   return init_def;
4338 }
4339 
4340 /* Function vect_create_epilog_for_reduction
4341 
4342    Create code at the loop-epilog to finalize the result of a reduction
4343    computation.
4344 
4345    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4346      reduction statements.
4347    STMT is the scalar reduction stmt that is being vectorized.
4348    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4349      number of elements that we can fit in a vectype (nunits).  In this case
4350      we have to generate more than one vector stmt - i.e - we need to "unroll"
4351      the vector stmt by a factor VF/nunits.  For more details see documentation
4352      in vectorizable_operation.
4353    REDUC_CODE is the tree-code for the epilog reduction.
4354    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4355      computation.
4356    REDUC_INDEX is the index of the operand in the right hand side of the
4357      statement that is defined by REDUCTION_PHI.
4358    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4359    SLP_NODE is an SLP node containing a group of reduction statements. The
4360      first one in this group is STMT.
4361    INDUCTION_INDEX is the index of the loop for condition reductions.
4362      Otherwise it is undefined.
4363    INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case
4364      when the COND_EXPR is never true in the loop.  It needs to
4365      be smaller than any value of the IV in the loop.
4366 
4367    This function:
4368    1. Creates the reduction def-use cycles: sets the arguments for
4369       REDUCTION_PHIS:
4370       The loop-entry argument is the vectorized initial-value of the reduction.
4371       The loop-latch argument is taken from VECT_DEFS - the vector of partial
4372       sums.
4373    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4374       by applying the operation specified by REDUC_CODE if available, or by
4375       other means (whole-vector shifts or a scalar loop).
4376       The function also creates a new phi node at the loop exit to preserve
4377       loop-closed form, as illustrated below.
4378 
4379      The flow at the entry to this function:
4380 
4381         loop:
4382           vec_def = phi <null, null>            # REDUCTION_PHI
4383           VECT_DEF = vector_stmt                # vectorized form of STMT
4384           s_loop = scalar_stmt                  # (scalar) STMT
4385         loop_exit:
4386           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4387           use <s_out0>
4388           use <s_out0>
4389 
4390      The above is transformed by this function into:
4391 
4392         loop:
4393           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4394           VECT_DEF = vector_stmt                # vectorized form of STMT
4395           s_loop = scalar_stmt                  # (scalar) STMT
4396         loop_exit:
4397           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4398           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4399           v_out2 = reduce <v_out1>
4400           s_out3 = extract_field <v_out2, 0>
4401           s_out4 = adjust_result <s_out3>
4402           use <s_out4>
4403           use <s_out4>
4404 */
4405 
4406 static void
4407 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4408 				  int ncopies, enum tree_code reduc_code,
4409 				  vec<gimple *> reduction_phis,
4410                                   int reduc_index, bool double_reduc,
4411 				  slp_tree slp_node, tree induction_index,
4412 				  tree induc_val)
4413 {
4414   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4415   stmt_vec_info prev_phi_info;
4416   tree vectype;
4417   machine_mode mode;
4418   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4419   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4420   basic_block exit_bb;
4421   tree scalar_dest;
4422   tree scalar_type;
4423   gimple *new_phi = NULL, *phi;
4424   gimple_stmt_iterator exit_gsi;
4425   tree vec_dest;
4426   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4427   gimple *epilog_stmt = NULL;
4428   enum tree_code code = gimple_assign_rhs_code (stmt);
4429   gimple *exit_phi;
4430   tree bitsize;
4431   tree adjustment_def = NULL;
4432   tree vec_initial_def = NULL;
4433   tree reduction_op, expr, def, initial_def = NULL;
4434   tree orig_name, scalar_result;
4435   imm_use_iterator imm_iter, phi_imm_iter;
4436   use_operand_p use_p, phi_use_p;
4437   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4438   bool nested_in_vect_loop = false;
4439   auto_vec<gimple *> new_phis;
4440   auto_vec<gimple *> inner_phis;
4441   enum vect_def_type dt = vect_unknown_def_type;
4442   int j, i;
4443   auto_vec<tree> scalar_results;
4444   unsigned int group_size = 1, k, ratio;
4445   auto_vec<tree> vec_initial_defs;
4446   auto_vec<gimple *> phis;
4447   bool slp_reduc = false;
4448   tree new_phi_result;
4449   gimple *inner_phi = NULL;
4450 
4451   if (slp_node)
4452     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4453 
4454   if (nested_in_vect_loop_p (loop, stmt))
4455     {
4456       outer_loop = loop;
4457       loop = loop->inner;
4458       nested_in_vect_loop = true;
4459       gcc_assert (!slp_node);
4460     }
4461 
4462   reduction_op = get_reduction_op (stmt, reduc_index);
4463 
4464   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4465   gcc_assert (vectype);
4466   mode = TYPE_MODE (vectype);
4467 
4468   /* 1. Create the reduction def-use cycle:
4469      Set the arguments of REDUCTION_PHIS, i.e., transform
4470 
4471         loop:
4472           vec_def = phi <null, null>            # REDUCTION_PHI
4473           VECT_DEF = vector_stmt                # vectorized form of STMT
4474           ...
4475 
4476      into:
4477 
4478         loop:
4479           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4480           VECT_DEF = vector_stmt                # vectorized form of STMT
4481           ...
4482 
4483      (in case of SLP, do it for all the phis). */
4484 
4485   /* Get the loop-entry arguments.  */
4486   enum vect_def_type initial_def_dt = vect_unknown_def_type;
4487   if (slp_node)
4488     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4489                        NULL, slp_node, reduc_index);
4490   else
4491     {
4492       /* Get at the scalar def before the loop, that defines the initial value
4493 	 of the reduction variable.  */
4494       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
4495       initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4496 					   loop_preheader_edge (loop));
4497       /* Optimize: if initial_def is for REDUC_MAX smaller than the base
4498 	 and we can't use zero for induc_val, use initial_def.  Similarly
4499 	 for REDUC_MIN and initial_def larger than the base.  */
4500       if (TREE_CODE (initial_def) == INTEGER_CST
4501 	  && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4502 	      == INTEGER_INDUC_COND_REDUCTION)
4503 	  && !integer_zerop (induc_val)
4504 	  && tree_int_cst_lt (initial_def, induc_val))
4505 	induc_val = initial_def;
4506       vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt);
4507       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
4508 						       &adjustment_def);
4509       vec_initial_defs.create (1);
4510       vec_initial_defs.quick_push (vec_initial_def);
4511     }
4512 
4513   /* Set phi nodes arguments.  */
4514   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4515     {
4516       tree vec_init_def, def;
4517       gimple_seq stmts;
4518       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4519 					   true, NULL_TREE);
4520       if (stmts)
4521 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4522 
4523       def = vect_defs[i];
4524       for (j = 0; j < ncopies; j++)
4525         {
4526 	  if (j != 0)
4527 	    {
4528 	      phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4529 	      if (nested_in_vect_loop)
4530 		vec_init_def
4531 		  = vect_get_vec_def_for_stmt_copy (initial_def_dt,
4532 						    vec_init_def);
4533 	    }
4534 
4535 	  /* Set the loop-entry arg of the reduction-phi.  */
4536 
4537 	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4538 	      == INTEGER_INDUC_COND_REDUCTION)
4539 	    {
4540 	      /* Initialise the reduction phi to zero.  This prevents initial
4541 		 values of non-zero interferring with the reduction op.  */
4542 	      gcc_assert (ncopies == 1);
4543 	      gcc_assert (i == 0);
4544 
4545 	      tree vec_init_def_type = TREE_TYPE (vec_init_def);
4546 	      tree induc_val_vec
4547 		= build_vector_from_val (vec_init_def_type, induc_val);
4548 
4549 	      add_phi_arg (as_a <gphi *> (phi), induc_val_vec,
4550 			   loop_preheader_edge (loop), UNKNOWN_LOCATION);
4551 	    }
4552 	  else
4553 	    add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4554 			 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4555 
4556           /* Set the loop-latch arg for the reduction-phi.  */
4557           if (j > 0)
4558             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4559 
4560           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4561 		       UNKNOWN_LOCATION);
4562 
4563           if (dump_enabled_p ())
4564             {
4565               dump_printf_loc (MSG_NOTE, vect_location,
4566 			       "transform reduction: created def-use cycle: ");
4567               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4568               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4569             }
4570         }
4571     }
4572 
4573   /* 2. Create epilog code.
4574         The reduction epilog code operates across the elements of the vector
4575         of partial results computed by the vectorized loop.
4576         The reduction epilog code consists of:
4577 
4578         step 1: compute the scalar result in a vector (v_out2)
4579         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4580         step 3: adjust the scalar result (s_out3) if needed.
4581 
4582         Step 1 can be accomplished using one the following three schemes:
4583           (scheme 1) using reduc_code, if available.
4584           (scheme 2) using whole-vector shifts, if available.
4585           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4586                      combined.
4587 
4588           The overall epilog code looks like this:
4589 
4590           s_out0 = phi <s_loop>         # original EXIT_PHI
4591           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4592           v_out2 = reduce <v_out1>              # step 1
4593           s_out3 = extract_field <v_out2, 0>    # step 2
4594           s_out4 = adjust_result <s_out3>       # step 3
4595 
4596           (step 3 is optional, and steps 1 and 2 may be combined).
4597           Lastly, the uses of s_out0 are replaced by s_out4.  */
4598 
4599 
4600   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4601          v_out1 = phi <VECT_DEF>
4602          Store them in NEW_PHIS.  */
4603 
4604   exit_bb = single_exit (loop)->dest;
4605   prev_phi_info = NULL;
4606   new_phis.create (vect_defs.length ());
4607   FOR_EACH_VEC_ELT (vect_defs, i, def)
4608     {
4609       for (j = 0; j < ncopies; j++)
4610         {
4611 	  tree new_def = copy_ssa_name (def);
4612           phi = create_phi_node (new_def, exit_bb);
4613           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
4614           if (j == 0)
4615             new_phis.quick_push (phi);
4616           else
4617 	    {
4618 	      def = vect_get_vec_def_for_stmt_copy (dt, def);
4619 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4620 	    }
4621 
4622           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4623           prev_phi_info = vinfo_for_stmt (phi);
4624         }
4625     }
4626 
4627   /* The epilogue is created for the outer-loop, i.e., for the loop being
4628      vectorized.  Create exit phis for the outer loop.  */
4629   if (double_reduc)
4630     {
4631       loop = outer_loop;
4632       exit_bb = single_exit (loop)->dest;
4633       inner_phis.create (vect_defs.length ());
4634       FOR_EACH_VEC_ELT (new_phis, i, phi)
4635 	{
4636 	  tree new_result = copy_ssa_name (PHI_RESULT (phi));
4637 	  gphi *outer_phi = create_phi_node (new_result, exit_bb);
4638 	  SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4639 			   PHI_RESULT (phi));
4640 	  set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4641 							    loop_vinfo));
4642 	  inner_phis.quick_push (phi);
4643 	  new_phis[i] = outer_phi;
4644 	  prev_phi_info = vinfo_for_stmt (outer_phi);
4645           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4646             {
4647 	      phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4648 	      new_result = copy_ssa_name (PHI_RESULT (phi));
4649 	      outer_phi = create_phi_node (new_result, exit_bb);
4650 	      SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4651 			       PHI_RESULT (phi));
4652 	      set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4653 								loop_vinfo));
4654 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4655 	      prev_phi_info = vinfo_for_stmt (outer_phi);
4656 	    }
4657 	}
4658     }
4659 
4660   exit_gsi = gsi_after_labels (exit_bb);
4661 
4662   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4663          (i.e. when reduc_code is not available) and in the final adjustment
4664 	 code (if needed).  Also get the original scalar reduction variable as
4665          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4666          represents a reduction pattern), the tree-code and scalar-def are
4667          taken from the original stmt that the pattern-stmt (STMT) replaces.
4668          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4669          are taken from STMT.  */
4670 
4671   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4672   if (!orig_stmt)
4673     {
4674       /* Regular reduction  */
4675       orig_stmt = stmt;
4676     }
4677   else
4678     {
4679       /* Reduction pattern  */
4680       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4681       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4682       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4683     }
4684 
4685   code = gimple_assign_rhs_code (orig_stmt);
4686   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4687      partial results are added and not subtracted.  */
4688   if (code == MINUS_EXPR)
4689     code = PLUS_EXPR;
4690 
4691   scalar_dest = gimple_assign_lhs (orig_stmt);
4692   scalar_type = TREE_TYPE (scalar_dest);
4693   scalar_results.create (group_size);
4694   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4695   bitsize = TYPE_SIZE (scalar_type);
4696 
4697   /* In case this is a reduction in an inner-loop while vectorizing an outer
4698      loop - we don't need to extract a single scalar result at the end of the
4699      inner-loop (unless it is double reduction, i.e., the use of reduction is
4700      outside the outer-loop).  The final vector of partial results will be used
4701      in the vectorized outer-loop, or reduced to a scalar result at the end of
4702      the outer-loop.  */
4703   if (nested_in_vect_loop && !double_reduc)
4704     goto vect_finalize_reduction;
4705 
4706   /* SLP reduction without reduction chain, e.g.,
4707      # a1 = phi <a2, a0>
4708      # b1 = phi <b2, b0>
4709      a2 = operation (a1)
4710      b2 = operation (b1)  */
4711   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4712 
4713   /* In case of reduction chain, e.g.,
4714      # a1 = phi <a3, a0>
4715      a2 = operation (a1)
4716      a3 = operation (a2),
4717 
4718      we may end up with more than one vector result.  Here we reduce them to
4719      one vector.  */
4720   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4721     {
4722       tree first_vect = PHI_RESULT (new_phis[0]);
4723       tree tmp;
4724       gassign *new_vec_stmt = NULL;
4725 
4726       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4727       for (k = 1; k < new_phis.length (); k++)
4728         {
4729 	  gimple *next_phi = new_phis[k];
4730           tree second_vect = PHI_RESULT (next_phi);
4731 
4732           tmp = build2 (code, vectype,  first_vect, second_vect);
4733           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4734           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4735           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4736           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4737         }
4738 
4739       new_phi_result = first_vect;
4740       if (new_vec_stmt)
4741         {
4742           new_phis.truncate (0);
4743           new_phis.safe_push (new_vec_stmt);
4744         }
4745     }
4746   else
4747     new_phi_result = PHI_RESULT (new_phis[0]);
4748 
4749   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4750     {
4751       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4752 	 various data values where the condition matched and another vector
4753 	 (INDUCTION_INDEX) containing all the indexes of those matches.  We
4754 	 need to extract the last matching index (which will be the index with
4755 	 highest value) and use this to index into the data vector.
4756 	 For the case where there were no matches, the data vector will contain
4757 	 all default values and the index vector will be all zeros.  */
4758 
4759       /* Get various versions of the type of the vector of indexes.  */
4760       tree index_vec_type = TREE_TYPE (induction_index);
4761       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
4762       tree index_scalar_type = TREE_TYPE (index_vec_type);
4763       tree index_vec_cmp_type = build_same_sized_truth_vector_type
4764 	(index_vec_type);
4765 
4766       /* Get an unsigned integer version of the type of the data vector.  */
4767       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
4768       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
4769       tree vectype_unsigned = build_vector_type
4770 	(scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
4771 
4772       /* First we need to create a vector (ZERO_VEC) of zeros and another
4773 	 vector (MAX_INDEX_VEC) filled with the last matching index, which we
4774 	 can create using a MAX reduction and then expanding.
4775 	 In the case where the loop never made any matches, the max index will
4776 	 be zero.  */
4777 
4778       /* Vector of {0, 0, 0,...}.  */
4779       tree zero_vec = make_ssa_name (vectype);
4780       tree zero_vec_rhs = build_zero_cst (vectype);
4781       gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4782       gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4783 
4784       /* Find maximum value from the vector of found indexes.  */
4785       tree max_index = make_ssa_name (index_scalar_type);
4786       gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
4787 						    induction_index);
4788       gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4789 
4790       /* Vector of {max_index, max_index, max_index,...}.  */
4791       tree max_index_vec = make_ssa_name (index_vec_type);
4792       tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4793 						      max_index);
4794       gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
4795 							max_index_vec_rhs);
4796       gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
4797 
4798       /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4799 	 with the vector (INDUCTION_INDEX) of found indexes, choosing values
4800 	 from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4801 	 otherwise.  Only one value should match, resulting in a vector
4802 	 (VEC_COND) with one data value and the rest zeros.
4803 	 In the case where the loop never made any matches, every index will
4804 	 match, resulting in a vector with all data values (which will all be
4805 	 the default value).  */
4806 
4807       /* Compare the max index vector to the vector of found indexes to find
4808 	 the position of the max value.  */
4809       tree vec_compare = make_ssa_name (index_vec_cmp_type);
4810       gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
4811 						      induction_index,
4812 						      max_index_vec);
4813       gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
4814 
4815       /* Use the compare to choose either values from the data vector or
4816 	 zero.  */
4817       tree vec_cond = make_ssa_name (vectype);
4818       gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
4819 						   vec_compare, new_phi_result,
4820 						   zero_vec);
4821       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
4822 
4823       /* Finally we need to extract the data value from the vector (VEC_COND)
4824 	 into a scalar (MATCHED_DATA_REDUC).  Logically we want to do a OR
4825 	 reduction, but because this doesn't exist, we can use a MAX reduction
4826 	 instead.  The data value might be signed or a float so we need to cast
4827 	 it first.
4828 	 In the case where the loop never made any matches, the data values are
4829 	 all identical, and so will reduce down correctly.  */
4830 
4831       /* Make the matched data values unsigned.  */
4832       tree vec_cond_cast = make_ssa_name (vectype_unsigned);
4833       tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
4834 				       vec_cond);
4835       gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
4836 							VIEW_CONVERT_EXPR,
4837 							vec_cond_cast_rhs);
4838       gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4839 
4840       /* Reduce down to a scalar value.  */
4841       tree data_reduc = make_ssa_name (scalar_type_unsigned);
4842       optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
4843 				      optab_default);
4844       gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
4845 		  != CODE_FOR_nothing);
4846       gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4847 						     REDUC_MAX_EXPR,
4848 						     vec_cond_cast);
4849       gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4850 
4851       /* Convert the reduced value back to the result type and set as the
4852 	 result.  */
4853       tree data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
4854 				     data_reduc);
4855       epilog_stmt = gimple_build_assign (new_scalar_dest, data_reduc_cast);
4856       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4857       gimple_assign_set_lhs (epilog_stmt, new_temp);
4858       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4859       scalar_results.safe_push (new_temp);
4860     }
4861 
4862   /* 2.3 Create the reduction code, using one of the three schemes described
4863          above. In SLP we simply need to extract all the elements from the
4864          vector (without reducing them), so we use scalar shifts.  */
4865   else if (reduc_code != ERROR_MARK && !slp_reduc)
4866     {
4867       tree tmp;
4868       tree vec_elem_type;
4869 
4870       /*** Case 1:  Create:
4871            v_out2 = reduc_expr <v_out1>  */
4872 
4873       if (dump_enabled_p ())
4874         dump_printf_loc (MSG_NOTE, vect_location,
4875 			 "Reduce using direct vector reduction.\n");
4876 
4877       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4878       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4879 	{
4880           tree tmp_dest =
4881 	      vect_create_destination_var (scalar_dest, vec_elem_type);
4882 	  tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4883 	  epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4884 	  new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4885 	  gimple_assign_set_lhs (epilog_stmt, new_temp);
4886 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4887 
4888 	  tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4889 	}
4890       else
4891 	tmp = build1 (reduc_code, scalar_type, new_phi_result);
4892 
4893       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4894       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4895       gimple_assign_set_lhs (epilog_stmt, new_temp);
4896       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4897 
4898       if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4899 	   == INTEGER_INDUC_COND_REDUCTION)
4900 	  && !operand_equal_p (initial_def, induc_val, 0))
4901 	{
4902 	  /* Earlier we set the initial value to be a vector if induc_val
4903 	     values.  Check the result and if it is induc_val then replace
4904 	     with the original initial value, unless induc_val is
4905 	     the same as initial_def already.  */
4906 	  tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
4907 				  induc_val);
4908 
4909 	  tmp = make_ssa_name (new_scalar_dest);
4910 	  epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4911 					     initial_def, new_temp);
4912 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4913 	  new_temp = tmp;
4914 	}
4915 
4916       scalar_results.safe_push (new_temp);
4917     }
4918   else
4919     {
4920       bool reduce_with_shift = have_whole_vector_shift (mode);
4921       int element_bitsize = tree_to_uhwi (bitsize);
4922       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4923       tree vec_temp;
4924 
4925       /* Regardless of whether we have a whole vector shift, if we're
4926          emulating the operation via tree-vect-generic, we don't want
4927          to use it.  Only the first round of the reduction is likely
4928          to still be profitable via emulation.  */
4929       /* ??? It might be better to emit a reduction tree code here, so that
4930          tree-vect-generic can expand the first round via bit tricks.  */
4931       if (!VECTOR_MODE_P (mode))
4932         reduce_with_shift = false;
4933       else
4934         {
4935           optab optab = optab_for_tree_code (code, vectype, optab_default);
4936           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4937             reduce_with_shift = false;
4938         }
4939 
4940       if (reduce_with_shift && !slp_reduc)
4941         {
4942           int nelements = vec_size_in_bits / element_bitsize;
4943           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4944 
4945           int elt_offset;
4946 
4947           tree zero_vec = build_zero_cst (vectype);
4948           /*** Case 2: Create:
4949              for (offset = nelements/2; offset >= 1; offset/=2)
4950                 {
4951                   Create:  va' = vec_shift <va, offset>
4952                   Create:  va = vop <va, va'>
4953                 }  */
4954 
4955           tree rhs;
4956 
4957           if (dump_enabled_p ())
4958             dump_printf_loc (MSG_NOTE, vect_location,
4959 			     "Reduce using vector shifts\n");
4960 
4961           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4962           new_temp = new_phi_result;
4963           for (elt_offset = nelements / 2;
4964                elt_offset >= 1;
4965                elt_offset /= 2)
4966             {
4967               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4968               tree mask = vect_gen_perm_mask_any (vectype, sel);
4969 	      epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4970 						 new_temp, zero_vec, mask);
4971               new_name = make_ssa_name (vec_dest, epilog_stmt);
4972               gimple_assign_set_lhs (epilog_stmt, new_name);
4973               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4974 
4975 	      epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4976 						 new_temp);
4977               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4978               gimple_assign_set_lhs (epilog_stmt, new_temp);
4979               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4980             }
4981 
4982 	  /* 2.4  Extract the final scalar result.  Create:
4983 	     s_out3 = extract_field <v_out2, bitpos>  */
4984 
4985 	  if (dump_enabled_p ())
4986 	    dump_printf_loc (MSG_NOTE, vect_location,
4987 			     "extract scalar result\n");
4988 
4989 	  rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4990 			bitsize, bitsize_zero_node);
4991 	  epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4992 	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4993 	  gimple_assign_set_lhs (epilog_stmt, new_temp);
4994 	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4995 	  scalar_results.safe_push (new_temp);
4996         }
4997       else
4998         {
4999           /*** Case 3: Create:
5000              s = extract_field <v_out2, 0>
5001              for (offset = element_size;
5002                   offset < vector_size;
5003                   offset += element_size;)
5004                {
5005                  Create:  s' = extract_field <v_out2, offset>
5006                  Create:  s = op <s, s'>  // For non SLP cases
5007                }  */
5008 
5009           if (dump_enabled_p ())
5010             dump_printf_loc (MSG_NOTE, vect_location,
5011 			     "Reduce using scalar code.\n");
5012 
5013           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
5014           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
5015             {
5016               int bit_offset;
5017               if (gimple_code (new_phi) == GIMPLE_PHI)
5018                 vec_temp = PHI_RESULT (new_phi);
5019               else
5020                 vec_temp = gimple_assign_lhs (new_phi);
5021               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
5022 				 bitsize_zero_node);
5023               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
5024               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
5025               gimple_assign_set_lhs (epilog_stmt, new_temp);
5026               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5027 
5028               /* In SLP we don't need to apply reduction operation, so we just
5029                  collect s' values in SCALAR_RESULTS.  */
5030               if (slp_reduc)
5031                 scalar_results.safe_push (new_temp);
5032 
5033               for (bit_offset = element_bitsize;
5034                    bit_offset < vec_size_in_bits;
5035                    bit_offset += element_bitsize)
5036                 {
5037                   tree bitpos = bitsize_int (bit_offset);
5038                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
5039                                      bitsize, bitpos);
5040 
5041                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
5042                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
5043                   gimple_assign_set_lhs (epilog_stmt, new_name);
5044                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5045 
5046                   if (slp_reduc)
5047                     {
5048                       /* In SLP we don't need to apply reduction operation, so
5049                          we just collect s' values in SCALAR_RESULTS.  */
5050                       new_temp = new_name;
5051                       scalar_results.safe_push (new_name);
5052                     }
5053                   else
5054                     {
5055 		      epilog_stmt = gimple_build_assign (new_scalar_dest, code,
5056 							 new_name, new_temp);
5057                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
5058                       gimple_assign_set_lhs (epilog_stmt, new_temp);
5059                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5060                     }
5061                 }
5062             }
5063 
5064           /* The only case where we need to reduce scalar results in SLP, is
5065              unrolling.  If the size of SCALAR_RESULTS is greater than
5066              GROUP_SIZE, we reduce them combining elements modulo
5067              GROUP_SIZE.  */
5068           if (slp_reduc)
5069             {
5070               tree res, first_res, new_res;
5071 	      gimple *new_stmt;
5072 
5073               /* Reduce multiple scalar results in case of SLP unrolling.  */
5074               for (j = group_size; scalar_results.iterate (j, &res);
5075                    j++)
5076                 {
5077                   first_res = scalar_results[j % group_size];
5078 		  new_stmt = gimple_build_assign (new_scalar_dest, code,
5079 						  first_res, res);
5080                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
5081                   gimple_assign_set_lhs (new_stmt, new_res);
5082                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
5083                   scalar_results[j % group_size] = new_res;
5084                 }
5085             }
5086           else
5087             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
5088             scalar_results.safe_push (new_temp);
5089         }
5090     }
5091 
5092 vect_finalize_reduction:
5093 
5094   if (double_reduc)
5095     loop = loop->inner;
5096 
5097   /* 2.5 Adjust the final result by the initial value of the reduction
5098 	 variable. (When such adjustment is not needed, then
5099 	 'adjustment_def' is zero).  For example, if code is PLUS we create:
5100 	 new_temp = loop_exit_def + adjustment_def  */
5101 
5102   if (adjustment_def)
5103     {
5104       gcc_assert (!slp_reduc);
5105       if (nested_in_vect_loop)
5106 	{
5107           new_phi = new_phis[0];
5108 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
5109 	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
5110 	  new_dest = vect_create_destination_var (scalar_dest, vectype);
5111 	}
5112       else
5113 	{
5114           new_temp = scalar_results[0];
5115 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
5116 	  expr = build2 (code, scalar_type, new_temp, adjustment_def);
5117 	  new_dest = vect_create_destination_var (scalar_dest, scalar_type);
5118 	}
5119 
5120       epilog_stmt = gimple_build_assign (new_dest, expr);
5121       new_temp = make_ssa_name (new_dest, epilog_stmt);
5122       gimple_assign_set_lhs (epilog_stmt, new_temp);
5123       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5124       if (nested_in_vect_loop)
5125         {
5126           set_vinfo_for_stmt (epilog_stmt,
5127                               new_stmt_vec_info (epilog_stmt, loop_vinfo));
5128           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
5129                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
5130 
5131           if (!double_reduc)
5132             scalar_results.quick_push (new_temp);
5133           else
5134             scalar_results[0] = new_temp;
5135         }
5136       else
5137         scalar_results[0] = new_temp;
5138 
5139       new_phis[0] = epilog_stmt;
5140     }
5141 
5142   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
5143           phis with new adjusted scalar results, i.e., replace use <s_out0>
5144           with use <s_out4>.
5145 
5146      Transform:
5147         loop_exit:
5148           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
5149           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
5150           v_out2 = reduce <v_out1>
5151           s_out3 = extract_field <v_out2, 0>
5152           s_out4 = adjust_result <s_out3>
5153           use <s_out0>
5154           use <s_out0>
5155 
5156      into:
5157 
5158         loop_exit:
5159           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
5160           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
5161           v_out2 = reduce <v_out1>
5162           s_out3 = extract_field <v_out2, 0>
5163           s_out4 = adjust_result <s_out3>
5164           use <s_out4>
5165           use <s_out4> */
5166 
5167 
5168   /* In SLP reduction chain we reduce vector results into one vector if
5169      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
5170      the last stmt in the reduction chain, since we are looking for the loop
5171      exit phi node.  */
5172   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
5173     {
5174       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
5175       /* Handle reduction patterns.  */
5176       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
5177 	dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
5178 
5179       scalar_dest = gimple_assign_lhs (dest_stmt);
5180       group_size = 1;
5181     }
5182 
5183   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
5184      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
5185      need to match SCALAR_RESULTS with corresponding statements.  The first
5186      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
5187      the first vector stmt, etc.
5188      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
5189   if (group_size > new_phis.length ())
5190     {
5191       ratio = group_size / new_phis.length ();
5192       gcc_assert (!(group_size % new_phis.length ()));
5193     }
5194   else
5195     ratio = 1;
5196 
5197   for (k = 0; k < group_size; k++)
5198     {
5199       if (k % ratio == 0)
5200         {
5201           epilog_stmt = new_phis[k / ratio];
5202           reduction_phi = reduction_phis[k / ratio];
5203 	  if (double_reduc)
5204 	    inner_phi = inner_phis[k / ratio];
5205         }
5206 
5207       if (slp_reduc)
5208         {
5209 	  gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
5210 
5211           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
5212           /* SLP statements can't participate in patterns.  */
5213           gcc_assert (!orig_stmt);
5214           scalar_dest = gimple_assign_lhs (current_stmt);
5215         }
5216 
5217       phis.create (3);
5218       /* Find the loop-closed-use at the loop exit of the original scalar
5219          result.  (The reduction result is expected to have two immediate uses -
5220          one at the latch block, and one at the loop exit).  */
5221       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5222         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
5223 	    && !is_gimple_debug (USE_STMT (use_p)))
5224           phis.safe_push (USE_STMT (use_p));
5225 
5226       /* While we expect to have found an exit_phi because of loop-closed-ssa
5227          form we can end up without one if the scalar cycle is dead.  */
5228 
5229       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5230         {
5231           if (outer_loop)
5232             {
5233               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
5234               gphi *vect_phi;
5235 
5236               /* FORNOW. Currently not supporting the case that an inner-loop
5237                  reduction is not used in the outer-loop (but only outside the
5238                  outer-loop), unless it is double reduction.  */
5239               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5240                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
5241                           || double_reduc);
5242 
5243 	      if (double_reduc)
5244 		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
5245 	      else
5246 		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
5247               if (!double_reduc
5248                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
5249                       != vect_double_reduction_def)
5250                 continue;
5251 
5252               /* Handle double reduction:
5253 
5254                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
5255                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
5256                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
5257                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
5258 
5259                  At that point the regular reduction (stmt2 and stmt3) is
5260                  already vectorized, as well as the exit phi node, stmt4.
5261                  Here we vectorize the phi node of double reduction, stmt1, and
5262                  update all relevant statements.  */
5263 
5264               /* Go through all the uses of s2 to find double reduction phi
5265                  node, i.e., stmt1 above.  */
5266               orig_name = PHI_RESULT (exit_phi);
5267               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5268                 {
5269                   stmt_vec_info use_stmt_vinfo;
5270                   stmt_vec_info new_phi_vinfo;
5271                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
5272                   basic_block bb = gimple_bb (use_stmt);
5273 		  gimple *use;
5274 
5275                   /* Check that USE_STMT is really double reduction phi
5276                      node.  */
5277                   if (gimple_code (use_stmt) != GIMPLE_PHI
5278                       || gimple_phi_num_args (use_stmt) != 2
5279                       || bb->loop_father != outer_loop)
5280                     continue;
5281                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
5282                   if (!use_stmt_vinfo
5283                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
5284                           != vect_double_reduction_def)
5285 		    continue;
5286 
5287                   /* Create vector phi node for double reduction:
5288                      vs1 = phi <vs0, vs2>
5289                      vs1 was created previously in this function by a call to
5290                        vect_get_vec_def_for_operand and is stored in
5291                        vec_initial_def;
5292                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5293                      vs0 is created here.  */
5294 
5295                   /* Create vector phi node.  */
5296                   vect_phi = create_phi_node (vec_initial_def, bb);
5297                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
5298                                     loop_vec_info_for_loop (outer_loop));
5299                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
5300 
5301                   /* Create vs0 - initial def of the double reduction phi.  */
5302                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
5303                                              loop_preheader_edge (outer_loop));
5304                   init_def = get_initial_def_for_reduction (stmt,
5305                                                           preheader_arg, NULL);
5306                   vect_phi_init = vect_init_vector (use_stmt, init_def,
5307                                                     vectype, NULL);
5308 
5309                   /* Update phi node arguments with vs0 and vs2.  */
5310                   add_phi_arg (vect_phi, vect_phi_init,
5311                                loop_preheader_edge (outer_loop),
5312                                UNKNOWN_LOCATION);
5313                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
5314                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5315                   if (dump_enabled_p ())
5316                     {
5317                       dump_printf_loc (MSG_NOTE, vect_location,
5318 				       "created double reduction phi node: ");
5319                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5320                     }
5321 
5322                   vect_phi_res = PHI_RESULT (vect_phi);
5323 
5324                   /* Replace the use, i.e., set the correct vs1 in the regular
5325                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
5326                      loop is redundant.  */
5327                   use = reduction_phi;
5328                   for (j = 0; j < ncopies; j++)
5329                     {
5330                       edge pr_edge = loop_preheader_edge (loop);
5331                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
5332                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
5333                     }
5334                 }
5335             }
5336         }
5337 
5338       phis.release ();
5339       if (nested_in_vect_loop)
5340         {
5341           if (double_reduc)
5342             loop = outer_loop;
5343           else
5344             continue;
5345         }
5346 
5347       phis.create (3);
5348       /* Find the loop-closed-use at the loop exit of the original scalar
5349          result.  (The reduction result is expected to have two immediate uses,
5350          one at the latch block, and one at the loop exit).  For double
5351          reductions we are looking for exit phis of the outer loop.  */
5352       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5353         {
5354           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5355 	    {
5356 	      if (!is_gimple_debug (USE_STMT (use_p)))
5357 		phis.safe_push (USE_STMT (use_p));
5358 	    }
5359           else
5360             {
5361               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
5362                 {
5363                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
5364 
5365                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
5366                     {
5367                       if (!flow_bb_inside_loop_p (loop,
5368                                              gimple_bb (USE_STMT (phi_use_p)))
5369 			  && !is_gimple_debug (USE_STMT (phi_use_p)))
5370                         phis.safe_push (USE_STMT (phi_use_p));
5371                     }
5372                 }
5373             }
5374         }
5375 
5376       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5377         {
5378           /* Replace the uses:  */
5379           orig_name = PHI_RESULT (exit_phi);
5380           scalar_result = scalar_results[k];
5381           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5382             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
5383               SET_USE (use_p, scalar_result);
5384         }
5385 
5386       phis.release ();
5387     }
5388 }
5389 
5390 
5391 /* Function is_nonwrapping_integer_induction.
5392 
5393    Check if STMT (which is part of loop LOOP) both increments and
5394    does not cause overflow.  */
5395 
5396 static bool
5397 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
5398 {
5399   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
5400   tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
5401   tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5402   tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
5403   widest_int ni, max_loop_value, lhs_max;
5404   bool overflow = false;
5405 
5406   /* Make sure the loop is integer based.  */
5407   if (TREE_CODE (base) != INTEGER_CST
5408       || TREE_CODE (step) != INTEGER_CST)
5409     return false;
5410 
5411   /* Check that the induction increments.  */
5412   if (tree_int_cst_sgn (step) == -1)
5413     return false;
5414 
5415   /* Check that the max size of the loop will not wrap.  */
5416 
5417   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5418     return true;
5419 
5420   if (! max_stmt_executions (loop, &ni))
5421     return false;
5422 
5423   max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
5424 			    &overflow);
5425   if (overflow)
5426     return false;
5427 
5428   max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
5429 			    TYPE_SIGN (lhs_type), &overflow);
5430   if (overflow)
5431     return false;
5432 
5433   return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
5434 	  <= TYPE_PRECISION (lhs_type));
5435 }
5436 
5437 /* Function vectorizable_reduction.
5438 
5439    Check if STMT performs a reduction operation that can be vectorized.
5440    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5441    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5442    Return FALSE if not a vectorizable STMT, TRUE otherwise.
5443 
5444    This function also handles reduction idioms (patterns) that have been
5445    recognized in advance during vect_pattern_recog.  In this case, STMT may be
5446    of this form:
5447      X = pattern_expr (arg0, arg1, ..., X)
5448    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5449    sequence that had been detected and replaced by the pattern-stmt (STMT).
5450 
5451    This function also handles reduction of condition expressions, for example:
5452      for (int i = 0; i < N; i++)
5453        if (a[i] < value)
5454 	 last = a[i];
5455    This is handled by vectorising the loop and creating an additional vector
5456    containing the loop indexes for which "a[i] < value" was true.  In the
5457    function epilogue this is reduced to a single max value and then used to
5458    index into the vector of results.
5459 
5460    In some cases of reduction patterns, the type of the reduction variable X is
5461    different than the type of the other arguments of STMT.
5462    In such cases, the vectype that is used when transforming STMT into a vector
5463    stmt is different than the vectype that is used to determine the
5464    vectorization factor, because it consists of a different number of elements
5465    than the actual number of elements that are being operated upon in parallel.
5466 
5467    For example, consider an accumulation of shorts into an int accumulator.
5468    On some targets it's possible to vectorize this pattern operating on 8
5469    shorts at a time (hence, the vectype for purposes of determining the
5470    vectorization factor should be V8HI); on the other hand, the vectype that
5471    is used to create the vector form is actually V4SI (the type of the result).
5472 
5473    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5474    indicates what is the actual level of parallelism (V8HI in the example), so
5475    that the right vectorization factor would be derived.  This vectype
5476    corresponds to the type of arguments to the reduction stmt, and should *NOT*
5477    be used to create the vectorized stmt.  The right vectype for the vectorized
5478    stmt is obtained from the type of the result X:
5479         get_vectype_for_scalar_type (TREE_TYPE (X))
5480 
5481    This means that, contrary to "regular" reductions (or "regular" stmts in
5482    general), the following equation:
5483       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5484    does *NOT* necessarily hold for reduction patterns.  */
5485 
5486 bool
5487 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
5488 			gimple **vec_stmt, slp_tree slp_node)
5489 {
5490   tree vec_dest;
5491   tree scalar_dest;
5492   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
5493   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5494   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5495   tree vectype_in = NULL_TREE;
5496   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5497   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5498   enum tree_code code, orig_code, epilog_reduc_code;
5499   machine_mode vec_mode;
5500   int op_type;
5501   optab optab, reduc_optab;
5502   tree new_temp = NULL_TREE;
5503   gimple *def_stmt;
5504   enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
5505   gphi *new_phi = NULL;
5506   gimple *cond_reduc_def_stmt = NULL;
5507   tree scalar_type;
5508   bool is_simple_use;
5509   gimple *orig_stmt;
5510   stmt_vec_info orig_stmt_info;
5511   tree expr = NULL_TREE;
5512   int i;
5513   int ncopies;
5514   int epilog_copies;
5515   stmt_vec_info prev_stmt_info, prev_phi_info;
5516   bool single_defuse_cycle = false;
5517   tree reduc_def = NULL_TREE;
5518   gimple *new_stmt = NULL;
5519   int j;
5520   tree ops[3];
5521   bool nested_cycle = false, found_nested_cycle_def = false;
5522   gimple *reduc_def_stmt = NULL;
5523   bool double_reduc = false, dummy;
5524   basic_block def_bb;
5525   struct loop * def_stmt_loop, *outer_loop = NULL;
5526   tree def_arg;
5527   gimple *def_arg_stmt;
5528   auto_vec<tree> vec_oprnds0;
5529   auto_vec<tree> vec_oprnds1;
5530   auto_vec<tree> vect_defs;
5531   auto_vec<gimple *> phis;
5532   int vec_num;
5533   tree def0, def1, tem, op1 = NULL_TREE;
5534   bool first_p = true;
5535   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5536   tree cond_reduc_val = NULL_TREE;
5537 
5538   /* In case of reduction chain we switch to the first stmt in the chain, but
5539      we don't update STMT_INFO, since only the last stmt is marked as reduction
5540      and has reduction properties.  */
5541   if (GROUP_FIRST_ELEMENT (stmt_info)
5542       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
5543     {
5544       stmt = GROUP_FIRST_ELEMENT (stmt_info);
5545       first_p = false;
5546     }
5547 
5548   if (nested_in_vect_loop_p (loop, stmt))
5549     {
5550       outer_loop = loop;
5551       loop = loop->inner;
5552       nested_cycle = true;
5553     }
5554 
5555   /* 1. Is vectorizable reduction?  */
5556   /* Not supportable if the reduction variable is used in the loop, unless
5557      it's a reduction chain.  */
5558   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5559       && !GROUP_FIRST_ELEMENT (stmt_info))
5560     return false;
5561 
5562   /* Reductions that are not used even in an enclosing outer-loop,
5563      are expected to be "live" (used out of the loop).  */
5564   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5565       && !STMT_VINFO_LIVE_P (stmt_info))
5566     return false;
5567 
5568   /* Make sure it was already recognized as a reduction computation.  */
5569   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
5570       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
5571     return false;
5572 
5573   /* 2. Has this been recognized as a reduction pattern?
5574 
5575      Check if STMT represents a pattern that has been recognized
5576      in earlier analysis stages.  For stmts that represent a pattern,
5577      the STMT_VINFO_RELATED_STMT field records the last stmt in
5578      the original sequence that constitutes the pattern.  */
5579 
5580   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
5581   if (orig_stmt)
5582     {
5583       orig_stmt_info = vinfo_for_stmt (orig_stmt);
5584       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5585       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5586     }
5587 
5588   /* 3. Check the operands of the operation.  The first operands are defined
5589         inside the loop body. The last operand is the reduction variable,
5590         which is defined by the loop-header-phi.  */
5591 
5592   gcc_assert (is_gimple_assign (stmt));
5593 
5594   /* Flatten RHS.  */
5595   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5596     {
5597     case GIMPLE_SINGLE_RHS:
5598       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
5599       if (op_type == ternary_op)
5600 	{
5601 	  tree rhs = gimple_assign_rhs1 (stmt);
5602 	  ops[0] = TREE_OPERAND (rhs, 0);
5603 	  ops[1] = TREE_OPERAND (rhs, 1);
5604 	  ops[2] = TREE_OPERAND (rhs, 2);
5605 	  code = TREE_CODE (rhs);
5606 	}
5607       else
5608 	return false;
5609       break;
5610 
5611     case GIMPLE_BINARY_RHS:
5612       code = gimple_assign_rhs_code (stmt);
5613       op_type = TREE_CODE_LENGTH (code);
5614       gcc_assert (op_type == binary_op);
5615       ops[0] = gimple_assign_rhs1 (stmt);
5616       ops[1] = gimple_assign_rhs2 (stmt);
5617       break;
5618 
5619     case GIMPLE_TERNARY_RHS:
5620       code = gimple_assign_rhs_code (stmt);
5621       op_type = TREE_CODE_LENGTH (code);
5622       gcc_assert (op_type == ternary_op);
5623       ops[0] = gimple_assign_rhs1 (stmt);
5624       ops[1] = gimple_assign_rhs2 (stmt);
5625       ops[2] = gimple_assign_rhs3 (stmt);
5626       break;
5627 
5628     case GIMPLE_UNARY_RHS:
5629       return false;
5630 
5631     default:
5632       gcc_unreachable ();
5633     }
5634   /* The default is that the reduction variable is the last in statement.  */
5635   int reduc_index = op_type - 1;
5636   if (code == MINUS_EXPR)
5637     reduc_index = 0;
5638 
5639   if (code == COND_EXPR && slp_node)
5640     return false;
5641 
5642   scalar_dest = gimple_assign_lhs (stmt);
5643   scalar_type = TREE_TYPE (scalar_dest);
5644   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5645       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5646     return false;
5647 
5648   /* Do not try to vectorize bit-precision reductions.  */
5649   if ((TYPE_PRECISION (scalar_type)
5650        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5651     return false;
5652 
5653   /* All uses but the last are expected to be defined in the loop.
5654      The last use is the reduction variable.  In case of nested cycle this
5655      assumption is not true: we use reduc_index to record the index of the
5656      reduction variable.  */
5657   for (i = 0; i < op_type; i++)
5658     {
5659       if (i == reduc_index)
5660 	continue;
5661 
5662       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5663       if (i == 0 && code == COND_EXPR)
5664         continue;
5665 
5666       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
5667 					  &def_stmt, &dt, &tem);
5668       if (!vectype_in)
5669 	vectype_in = tem;
5670       gcc_assert (is_simple_use);
5671 
5672       if (dt != vect_internal_def
5673 	  && dt != vect_external_def
5674 	  && dt != vect_constant_def
5675 	  && dt != vect_induction_def
5676           && !(dt == vect_nested_cycle && nested_cycle))
5677 	return false;
5678 
5679       if (dt == vect_nested_cycle)
5680         {
5681           found_nested_cycle_def = true;
5682           reduc_def_stmt = def_stmt;
5683           reduc_index = i;
5684         }
5685 
5686       if (i == 1 && code == COND_EXPR)
5687 	{
5688 	  /* Record how value of COND_EXPR is defined.  */
5689 	  if (dt == vect_constant_def)
5690 	    {
5691 	      cond_reduc_dt = dt;
5692 	      cond_reduc_val = ops[i];
5693 	    }
5694 	  if (dt == vect_induction_def
5695 	      && def_stmt != NULL
5696 	      && is_nonwrapping_integer_induction (def_stmt, loop))
5697 	    {
5698 	      cond_reduc_dt = dt;
5699 	      cond_reduc_def_stmt = def_stmt;
5700 	    }
5701 	}
5702     }
5703 
5704   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
5705 				      &def_stmt, &dt, &tem);
5706   if (!vectype_in)
5707     vectype_in = tem;
5708   gcc_assert (is_simple_use);
5709   if (!found_nested_cycle_def)
5710     reduc_def_stmt = def_stmt;
5711 
5712   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5713     return false;
5714 
5715   if (!(dt == vect_reduction_def
5716 	|| dt == vect_nested_cycle
5717 	|| ((dt == vect_internal_def || dt == vect_external_def
5718 	     || dt == vect_constant_def || dt == vect_induction_def)
5719 	    && nested_cycle && found_nested_cycle_def)))
5720     {
5721       /* For pattern recognized stmts, orig_stmt might be a reduction,
5722 	 but some helper statements for the pattern might not, or
5723 	 might be COND_EXPRs with reduction uses in the condition.  */
5724       gcc_assert (orig_stmt);
5725       return false;
5726     }
5727 
5728   enum vect_reduction_type v_reduc_type;
5729   gimple *tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
5730 					  !nested_cycle, &dummy, false,
5731 					  &v_reduc_type);
5732 
5733   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
5734   /* If we have a condition reduction, see if we can simplify it further.  */
5735   if (v_reduc_type == COND_REDUCTION)
5736     {
5737       if (cond_reduc_dt == vect_induction_def)
5738 	{
5739 	  stmt_vec_info cond_stmt_vinfo = vinfo_for_stmt (cond_reduc_def_stmt);
5740 	  tree base
5741 	    = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (cond_stmt_vinfo);
5742 
5743 	  gcc_assert (TREE_CODE (base) == INTEGER_CST);
5744 	  cond_reduc_val = NULL_TREE;
5745 	  /* Find a suitable value below base; punt if base is the minimum
5746 	     value of the type for now.  */
5747 	  if (tree_int_cst_sgn (base) == 1)
5748 	    cond_reduc_val = build_int_cst (TREE_TYPE (base), 0);
5749 	  else if (tree_int_cst_lt (TYPE_MIN_VALUE (TREE_TYPE (base)), base))
5750 	    cond_reduc_val
5751 	      = int_const_binop (MINUS_EXPR, base, integer_one_node);
5752 	  if (cond_reduc_val)
5753 	    {
5754 	      if (dump_enabled_p ())
5755 		dump_printf_loc (MSG_NOTE, vect_location,
5756 				 "condition expression based on "
5757 				 "integer induction.\n");
5758 	      STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5759 		= INTEGER_INDUC_COND_REDUCTION;
5760 	    }
5761 	}
5762 
5763       /* Loop peeling modifies initial value of reduction PHI, which
5764 	 makes the reduction stmt to be transformed different to the
5765 	 original stmt analyzed.  We need to record reduction code for
5766 	 CONST_COND_REDUCTION type reduction at analyzing stage, thus
5767 	 it can be used directly at transform stage.  */
5768       if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
5769 	  || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
5770 	{
5771 	  /* Also set the reduction type to CONST_COND_REDUCTION.  */
5772 	  gcc_assert (cond_reduc_dt == vect_constant_def);
5773 	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
5774 	}
5775       else if (cond_reduc_dt == vect_constant_def)
5776 	{
5777 	  enum vect_def_type cond_initial_dt;
5778 	  gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
5779 	  tree cond_initial_val
5780 	    = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
5781 
5782 	  gcc_assert (cond_reduc_val != NULL_TREE);
5783 	  vect_is_simple_use (cond_initial_val, loop_vinfo,
5784 			      &def_stmt, &cond_initial_dt);
5785 	  if (cond_initial_dt == vect_constant_def
5786 	      && types_compatible_p (TREE_TYPE (cond_initial_val),
5787 				     TREE_TYPE (cond_reduc_val)))
5788 	    {
5789 	      tree e = fold_build2 (LE_EXPR, boolean_type_node,
5790 				    cond_initial_val, cond_reduc_val);
5791 	      if (e && (integer_onep (e) || integer_zerop (e)))
5792 		{
5793 		  if (dump_enabled_p ())
5794 		    dump_printf_loc (MSG_NOTE, vect_location,
5795 				     "condition expression based on "
5796 				     "compile time constant.\n");
5797 		  /* Record reduction code at analysis stage.  */
5798 		  STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
5799 		    = integer_onep (e) ? MAX_EXPR : MIN_EXPR;
5800 		  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5801 		    = CONST_COND_REDUCTION;
5802 		}
5803 	    }
5804 	}
5805     }
5806 
5807   if (orig_stmt)
5808     gcc_assert (tmp == orig_stmt
5809 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5810   else
5811     /* We changed STMT to be the first stmt in reduction chain, hence we
5812        check that in this case the first element in the chain is STMT.  */
5813     gcc_assert (stmt == tmp
5814 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5815 
5816   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5817     return false;
5818 
5819   if (slp_node)
5820     ncopies = 1;
5821   else
5822     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5823                / TYPE_VECTOR_SUBPARTS (vectype_in));
5824 
5825   gcc_assert (ncopies >= 1);
5826 
5827   vec_mode = TYPE_MODE (vectype_in);
5828 
5829   if (code == COND_EXPR)
5830     {
5831       /* Only call during the analysis stage, otherwise we'll lose
5832 	 STMT_VINFO_TYPE.  */
5833       if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
5834 						ops[reduc_index], 0, NULL))
5835         {
5836           if (dump_enabled_p ())
5837 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5838 			     "unsupported condition in reduction\n");
5839 	  return false;
5840         }
5841     }
5842   else
5843     {
5844       /* 4. Supportable by target?  */
5845 
5846       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5847 	  || code == LROTATE_EXPR || code == RROTATE_EXPR)
5848 	{
5849 	  /* Shifts and rotates are only supported by vectorizable_shifts,
5850 	     not vectorizable_reduction.  */
5851           if (dump_enabled_p ())
5852 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5853 			     "unsupported shift or rotation.\n");
5854 	  return false;
5855 	}
5856 
5857       /* 4.1. check support for the operation in the loop  */
5858       optab = optab_for_tree_code (code, vectype_in, optab_default);
5859       if (!optab)
5860         {
5861           if (dump_enabled_p ())
5862 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5863 			     "no optab.\n");
5864 
5865           return false;
5866         }
5867 
5868       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5869         {
5870           if (dump_enabled_p ())
5871             dump_printf (MSG_NOTE, "op not supported by target.\n");
5872 
5873           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5874               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5875 	          < vect_min_worthwhile_factor (code))
5876             return false;
5877 
5878           if (dump_enabled_p ())
5879   	    dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5880         }
5881 
5882       /* Worthwhile without SIMD support?  */
5883       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5884           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5885    	     < vect_min_worthwhile_factor (code))
5886         {
5887           if (dump_enabled_p ())
5888 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5889 			     "not worthwhile without SIMD support.\n");
5890 
5891           return false;
5892         }
5893     }
5894 
5895   /* 4.2. Check support for the epilog operation.
5896 
5897           If STMT represents a reduction pattern, then the type of the
5898           reduction variable may be different than the type of the rest
5899           of the arguments.  For example, consider the case of accumulation
5900           of shorts into an int accumulator; The original code:
5901                         S1: int_a = (int) short_a;
5902           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5903 
5904           was replaced with:
5905                         STMT: int_acc = widen_sum <short_a, int_acc>
5906 
5907           This means that:
5908           1. The tree-code that is used to create the vector operation in the
5909              epilog code (that reduces the partial results) is not the
5910              tree-code of STMT, but is rather the tree-code of the original
5911              stmt from the pattern that STMT is replacing.  I.e, in the example
5912              above we want to use 'widen_sum' in the loop, but 'plus' in the
5913              epilog.
5914           2. The type (mode) we use to check available target support
5915              for the vector operation to be created in the *epilog*, is
5916              determined by the type of the reduction variable (in the example
5917              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5918              However the type (mode) we use to check available target support
5919              for the vector operation to be created *inside the loop*, is
5920              determined by the type of the other arguments to STMT (in the
5921              example we'd check this: optab_handler (widen_sum_optab,
5922 	     vect_short_mode)).
5923 
5924           This is contrary to "regular" reductions, in which the types of all
5925           the arguments are the same as the type of the reduction variable.
5926           For "regular" reductions we can therefore use the same vector type
5927           (and also the same tree-code) when generating the epilog code and
5928           when generating the code inside the loop.  */
5929 
5930   if (orig_stmt)
5931     {
5932       /* This is a reduction pattern: get the vectype from the type of the
5933          reduction variable, and get the tree-code from orig_stmt.  */
5934       gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5935 		  == TREE_CODE_REDUCTION);
5936       orig_code = gimple_assign_rhs_code (orig_stmt);
5937       gcc_assert (vectype_out);
5938       vec_mode = TYPE_MODE (vectype_out);
5939     }
5940   else
5941     {
5942       /* Regular reduction: use the same vectype and tree-code as used for
5943          the vector code inside the loop can be used for the epilog code. */
5944       orig_code = code;
5945 
5946       if (code == MINUS_EXPR)
5947 	orig_code = PLUS_EXPR;
5948 
5949       /* For simple condition reductions, replace with the actual expression
5950 	 we want to base our reduction around.  */
5951       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
5952 	{
5953 	  orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
5954 	  gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
5955 	}
5956       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5957 	       == INTEGER_INDUC_COND_REDUCTION)
5958 	orig_code = MAX_EXPR;
5959     }
5960 
5961   if (nested_cycle)
5962     {
5963       def_bb = gimple_bb (reduc_def_stmt);
5964       def_stmt_loop = def_bb->loop_father;
5965       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5966                                        loop_preheader_edge (def_stmt_loop));
5967       if (TREE_CODE (def_arg) == SSA_NAME
5968           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5969           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5970           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5971           && vinfo_for_stmt (def_arg_stmt)
5972           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5973               == vect_double_reduction_def)
5974         double_reduc = true;
5975     }
5976 
5977   epilog_reduc_code = ERROR_MARK;
5978 
5979   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
5980     {
5981       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5982 	{
5983 	  reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5984                                          optab_default);
5985 	  if (!reduc_optab)
5986 	    {
5987 	      if (dump_enabled_p ())
5988 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5989 				 "no optab for reduction.\n");
5990 
5991 	      epilog_reduc_code = ERROR_MARK;
5992 	    }
5993 	  else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5994 	    {
5995 	      if (dump_enabled_p ())
5996 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5997 				 "reduc op not supported by target.\n");
5998 
5999 	      epilog_reduc_code = ERROR_MARK;
6000 	    }
6001 
6002 	  /* When epilog_reduc_code is ERROR_MARK then a reduction will be
6003 	     generated in the epilog using multiple expressions.  This does not
6004 	     work for condition reductions.  */
6005 	  if (epilog_reduc_code == ERROR_MARK
6006 	      && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
6007 			== INTEGER_INDUC_COND_REDUCTION
6008 		  || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
6009 			== CONST_COND_REDUCTION))
6010 	    {
6011 	      if (dump_enabled_p ())
6012 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6013 				 "no reduc code for scalar code.\n");
6014 	      return false;
6015 	    }
6016 	}
6017       else
6018 	{
6019 	  if (!nested_cycle || double_reduc)
6020 	    {
6021 	      if (dump_enabled_p ())
6022 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6023 				 "no reduc code for scalar code.\n");
6024 
6025 	      return false;
6026 	    }
6027 	}
6028     }
6029   else
6030     {
6031       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
6032       cr_index_scalar_type = make_unsigned_type (scalar_precision);
6033       cr_index_vector_type = build_vector_type
6034 	(cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
6035 
6036       epilog_reduc_code = REDUC_MAX_EXPR;
6037       optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
6038 				   optab_default);
6039       if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
6040 	  == CODE_FOR_nothing)
6041 	{
6042 	  if (dump_enabled_p ())
6043 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6044 			     "reduc max op not supported by target.\n");
6045 	  return false;
6046 	}
6047     }
6048 
6049   if ((double_reduc
6050        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
6051       && ncopies > 1)
6052     {
6053       if (dump_enabled_p ())
6054 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6055 			 "multiple types in double reduction or condition "
6056 			 "reduction.\n");
6057       return false;
6058     }
6059 
6060   /* In case of widenning multiplication by a constant, we update the type
6061      of the constant to be the type of the other operand.  We check that the
6062      constant fits the type in the pattern recognition pass.  */
6063   if (code == DOT_PROD_EXPR
6064       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
6065     {
6066       if (TREE_CODE (ops[0]) == INTEGER_CST)
6067         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
6068       else if (TREE_CODE (ops[1]) == INTEGER_CST)
6069         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
6070       else
6071         {
6072           if (dump_enabled_p ())
6073 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6074 			     "invalid types in dot-prod\n");
6075 
6076           return false;
6077         }
6078     }
6079 
6080   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6081     {
6082       widest_int ni;
6083 
6084       if (! max_loop_iterations (loop, &ni))
6085 	{
6086 	  if (dump_enabled_p ())
6087 	    dump_printf_loc (MSG_NOTE, vect_location,
6088 			     "loop count not known, cannot create cond "
6089 			     "reduction.\n");
6090 	  return false;
6091 	}
6092       /* Convert backedges to iterations.  */
6093       ni += 1;
6094 
6095       /* The additional index will be the same type as the condition.  Check
6096 	 that the loop can fit into this less one (because we'll use up the
6097 	 zero slot for when there are no matches).  */
6098       tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
6099       if (wi::geu_p (ni, wi::to_widest (max_index)))
6100 	{
6101 	  if (dump_enabled_p ())
6102 	    dump_printf_loc (MSG_NOTE, vect_location,
6103 			     "loop size is greater than data size.\n");
6104 	  return false;
6105 	}
6106     }
6107 
6108   if (!vec_stmt) /* transformation not required.  */
6109     {
6110       if (first_p
6111 	  && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
6112 					 reduc_index))
6113         return false;
6114       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
6115       return true;
6116     }
6117 
6118   /** Transform.  **/
6119 
6120   if (dump_enabled_p ())
6121     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
6122 
6123   /* FORNOW: Multiple types are not supported for condition.  */
6124   if (code == COND_EXPR)
6125     gcc_assert (ncopies == 1);
6126 
6127   /* Create the destination vector  */
6128   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6129 
6130   /* In case the vectorization factor (VF) is bigger than the number
6131      of elements that we can fit in a vectype (nunits), we have to generate
6132      more than one vector stmt - i.e - we need to "unroll" the
6133      vector stmt by a factor VF/nunits.  For more details see documentation
6134      in vectorizable_operation.  */
6135 
6136   /* If the reduction is used in an outer loop we need to generate
6137      VF intermediate results, like so (e.g. for ncopies=2):
6138 	r0 = phi (init, r0)
6139 	r1 = phi (init, r1)
6140 	r0 = x0 + r0;
6141         r1 = x1 + r1;
6142     (i.e. we generate VF results in 2 registers).
6143     In this case we have a separate def-use cycle for each copy, and therefore
6144     for each copy we get the vector def for the reduction variable from the
6145     respective phi node created for this copy.
6146 
6147     Otherwise (the reduction is unused in the loop nest), we can combine
6148     together intermediate results, like so (e.g. for ncopies=2):
6149 	r = phi (init, r)
6150 	r = x0 + r;
6151 	r = x1 + r;
6152    (i.e. we generate VF/2 results in a single register).
6153    In this case for each copy we get the vector def for the reduction variable
6154    from the vectorized reduction operation generated in the previous iteration.
6155   */
6156 
6157   if (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
6158     {
6159       single_defuse_cycle = true;
6160       epilog_copies = 1;
6161     }
6162   else
6163     epilog_copies = ncopies;
6164 
6165   prev_stmt_info = NULL;
6166   prev_phi_info = NULL;
6167   if (slp_node)
6168     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6169   else
6170     {
6171       vec_num = 1;
6172       vec_oprnds0.create (1);
6173       if (op_type == ternary_op)
6174         vec_oprnds1.create (1);
6175     }
6176 
6177   phis.create (vec_num);
6178   vect_defs.create (vec_num);
6179   if (!slp_node)
6180     vect_defs.quick_push (NULL_TREE);
6181 
6182   for (j = 0; j < ncopies; j++)
6183     {
6184       if (j == 0 || !single_defuse_cycle)
6185 	{
6186           for (i = 0; i < vec_num; i++)
6187             {
6188               /* Create the reduction-phi that defines the reduction
6189                  operand.  */
6190               new_phi = create_phi_node (vec_dest, loop->header);
6191               set_vinfo_for_stmt (new_phi,
6192                                   new_stmt_vec_info (new_phi, loop_vinfo));
6193                if (j == 0 || slp_node)
6194                  phis.quick_push (new_phi);
6195             }
6196         }
6197 
6198       if (code == COND_EXPR)
6199         {
6200           gcc_assert (!slp_node);
6201           vectorizable_condition (stmt, gsi, vec_stmt,
6202                                   PHI_RESULT (phis[0]),
6203                                   reduc_index, NULL);
6204           /* Multiple types are not supported for condition.  */
6205           break;
6206         }
6207 
6208       /* Handle uses.  */
6209       if (j == 0)
6210         {
6211 	  if (slp_node)
6212 	    {
6213 	      /* Get vec defs for all the operands except the reduction index,
6214 		 ensuring the ordering of the ops in the vector is kept.  */
6215 	      auto_vec<tree, 3> slp_ops;
6216 	      auto_vec<vec<tree>, 3> vec_defs;
6217 
6218 	      slp_ops.quick_push (reduc_index == 0 ? NULL : ops[0]);
6219 	      slp_ops.quick_push (reduc_index == 1 ? NULL : ops[1]);
6220 	      if (op_type == ternary_op)
6221 		slp_ops.quick_push (reduc_index == 2 ? NULL : ops[2]);
6222 
6223 	      vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
6224 
6225 	      vec_oprnds0.safe_splice (vec_defs[reduc_index == 0 ? 1 : 0]);
6226 	      vec_defs[reduc_index == 0 ? 1 : 0].release ();
6227 	      if (op_type == ternary_op)
6228 		{
6229 		  vec_oprnds1.safe_splice (vec_defs[reduc_index == 2 ? 1 : 2]);
6230 		  vec_defs[reduc_index == 2 ? 1 : 2].release ();
6231 		}
6232 	    }
6233           else
6234 	    {
6235               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
6236                                                             stmt);
6237               vec_oprnds0.quick_push (loop_vec_def0);
6238               if (op_type == ternary_op)
6239                {
6240 		 op1 = reduc_index == 0 ? ops[2] : ops[1];
6241                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
6242                  vec_oprnds1.quick_push (loop_vec_def1);
6243                }
6244 	    }
6245         }
6246       else
6247         {
6248           if (!slp_node)
6249             {
6250               enum vect_def_type dt;
6251 	      gimple *dummy_stmt;
6252 
6253               vect_is_simple_use (ops[!reduc_index], loop_vinfo,
6254                                   &dummy_stmt, &dt);
6255               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
6256                                                               loop_vec_def0);
6257               vec_oprnds0[0] = loop_vec_def0;
6258               if (op_type == ternary_op)
6259                 {
6260                   vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt);
6261                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
6262                                                                 loop_vec_def1);
6263                   vec_oprnds1[0] = loop_vec_def1;
6264                 }
6265             }
6266 
6267           if (single_defuse_cycle)
6268             reduc_def = gimple_assign_lhs (new_stmt);
6269 
6270           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
6271         }
6272 
6273       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
6274         {
6275           if (slp_node)
6276             reduc_def = PHI_RESULT (phis[i]);
6277           else
6278             {
6279               if (!single_defuse_cycle || j == 0)
6280                 reduc_def = PHI_RESULT (new_phi);
6281             }
6282 
6283           def1 = ((op_type == ternary_op)
6284                   ? vec_oprnds1[i] : NULL);
6285           if (op_type == binary_op)
6286             {
6287               if (reduc_index == 0)
6288                 expr = build2 (code, vectype_out, reduc_def, def0);
6289               else
6290                 expr = build2 (code, vectype_out, def0, reduc_def);
6291             }
6292           else
6293             {
6294               if (reduc_index == 0)
6295                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
6296               else
6297                 {
6298                   if (reduc_index == 1)
6299                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
6300                   else
6301                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
6302                 }
6303             }
6304 
6305           new_stmt = gimple_build_assign (vec_dest, expr);
6306           new_temp = make_ssa_name (vec_dest, new_stmt);
6307           gimple_assign_set_lhs (new_stmt, new_temp);
6308           vect_finish_stmt_generation (stmt, new_stmt, gsi);
6309 
6310           if (slp_node)
6311             {
6312               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6313               vect_defs.quick_push (new_temp);
6314             }
6315           else
6316             vect_defs[0] = new_temp;
6317         }
6318 
6319       if (slp_node)
6320         continue;
6321 
6322       if (j == 0)
6323 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6324       else
6325 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6326 
6327       prev_stmt_info = vinfo_for_stmt (new_stmt);
6328       prev_phi_info = vinfo_for_stmt (new_phi);
6329     }
6330 
6331   tree indx_before_incr, indx_after_incr, cond_name = NULL;
6332 
6333   /* Finalize the reduction-phi (set its arguments) and create the
6334      epilog reduction code.  */
6335   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
6336     {
6337       new_temp = gimple_assign_lhs (*vec_stmt);
6338       vect_defs[0] = new_temp;
6339 
6340       /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
6341 	 which is updated with the current index of the loop for every match of
6342 	 the original loop's cond_expr (VEC_STMT).  This results in a vector
6343 	 containing the last time the condition passed for that vector lane.
6344 	 The first match will be a 1 to allow 0 to be used for non-matching
6345 	 indexes.  If there are no matches at all then the vector will be all
6346 	 zeroes.  */
6347       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6348 	{
6349 	  int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6350 	  int k;
6351 
6352 	  gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
6353 
6354 	  /* First we create a simple vector induction variable which starts
6355 	     with the values {1,2,3,...} (SERIES_VECT) and increments by the
6356 	     vector size (STEP).  */
6357 
6358 	  /* Create a {1,2,3,...} vector.  */
6359 	  tree *vtemp = XALLOCAVEC (tree, nunits_out);
6360 	  for (k = 0; k < nunits_out; ++k)
6361 	    vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
6362 	  tree series_vect = build_vector (cr_index_vector_type, vtemp);
6363 
6364 	  /* Create a vector of the step value.  */
6365 	  tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6366 	  tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6367 
6368 	  /* Create an induction variable.  */
6369 	  gimple_stmt_iterator incr_gsi;
6370 	  bool insert_after;
6371 	  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6372 	  create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
6373 		     insert_after, &indx_before_incr, &indx_after_incr);
6374 
6375 	  /* Next create a new phi node vector (NEW_PHI_TREE) which starts
6376 	     filled with zeros (VEC_ZERO).  */
6377 
6378 	  /* Create a vector of 0s.  */
6379 	  tree zero = build_zero_cst (cr_index_scalar_type);
6380 	  tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6381 
6382 	  /* Create a vector phi node.  */
6383 	  tree new_phi_tree = make_ssa_name (cr_index_vector_type);
6384 	  new_phi = create_phi_node (new_phi_tree, loop->header);
6385 	  set_vinfo_for_stmt (new_phi,
6386 			      new_stmt_vec_info (new_phi, loop_vinfo));
6387 	  add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
6388 		       UNKNOWN_LOCATION);
6389 
6390 	  /* Now take the condition from the loops original cond_expr
6391 	     (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
6392 	     every match uses values from the induction variable
6393 	     (INDEX_BEFORE_INCR) otherwise uses values from the phi node
6394 	     (NEW_PHI_TREE).
6395 	     Finally, we update the phi (NEW_PHI_TREE) to take the value of
6396 	     the new cond_expr (INDEX_COND_EXPR).  */
6397 
6398 	  /* Duplicate the condition from vec_stmt.  */
6399 	  tree ccompare = unshare_expr (gimple_assign_rhs1 (*vec_stmt));
6400 
6401 	  /* Create a conditional, where the condition is taken from vec_stmt
6402 	     (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
6403 	     else is the phi (NEW_PHI_TREE).  */
6404 	  tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
6405 					 ccompare, indx_before_incr,
6406 					 new_phi_tree);
6407 	  cond_name = make_ssa_name (cr_index_vector_type);
6408 	  gimple *index_condition = gimple_build_assign (cond_name,
6409 							 index_cond_expr);
6410 	  gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
6411 	  stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
6412 							    loop_vinfo);
6413 	  STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
6414 	  set_vinfo_for_stmt (index_condition, index_vec_info);
6415 
6416 	  /* Update the phi with the vec cond.  */
6417 	  add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
6418 		       UNKNOWN_LOCATION);
6419 	}
6420     }
6421 
6422   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
6423                                     epilog_reduc_code, phis, reduc_index,
6424 				    double_reduc, slp_node, cond_name,
6425 				    cond_reduc_val);
6426 
6427   return true;
6428 }
6429 
6430 /* Function vect_min_worthwhile_factor.
6431 
6432    For a loop where we could vectorize the operation indicated by CODE,
6433    return the minimum vectorization factor that makes it worthwhile
6434    to use generic vectors.  */
6435 int
6436 vect_min_worthwhile_factor (enum tree_code code)
6437 {
6438   switch (code)
6439     {
6440     case PLUS_EXPR:
6441     case MINUS_EXPR:
6442     case NEGATE_EXPR:
6443       return 4;
6444 
6445     case BIT_AND_EXPR:
6446     case BIT_IOR_EXPR:
6447     case BIT_XOR_EXPR:
6448     case BIT_NOT_EXPR:
6449       return 2;
6450 
6451     default:
6452       return INT_MAX;
6453     }
6454 }
6455 
6456 
6457 /* Function vectorizable_induction
6458 
6459    Check if PHI performs an induction computation that can be vectorized.
6460    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6461    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6462    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6463 
6464 bool
6465 vectorizable_induction (gimple *phi,
6466 			gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6467 			gimple **vec_stmt)
6468 {
6469   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6470   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6471   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6472   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6473   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6474   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6475   tree vec_def;
6476 
6477   gcc_assert (ncopies >= 1);
6478   /* FORNOW. These restrictions should be relaxed.  */
6479   if (nested_in_vect_loop_p (loop, phi))
6480     {
6481       imm_use_iterator imm_iter;
6482       use_operand_p use_p;
6483       gimple *exit_phi;
6484       edge latch_e;
6485       tree loop_arg;
6486 
6487       if (ncopies > 1)
6488 	{
6489 	  if (dump_enabled_p ())
6490 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6491 			     "multiple types in nested loop.\n");
6492 	  return false;
6493 	}
6494 
6495       exit_phi = NULL;
6496       latch_e = loop_latch_edge (loop->inner);
6497       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6498       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6499 	{
6500 	  gimple *use_stmt = USE_STMT (use_p);
6501 	  if (is_gimple_debug (use_stmt))
6502 	    continue;
6503 
6504 	  if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
6505 	    {
6506 	      exit_phi = use_stmt;
6507 	      break;
6508 	    }
6509 	}
6510       if (exit_phi)
6511 	{
6512 	  stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
6513 	  if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6514 		&& !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6515 	    {
6516 	      if (dump_enabled_p ())
6517 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6518 				 "inner-loop induction only used outside "
6519 				 "of the outer vectorized loop.\n");
6520 	      return false;
6521 	    }
6522 	}
6523     }
6524 
6525   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6526     return false;
6527 
6528   /* FORNOW: SLP not supported.  */
6529   if (STMT_SLP_TYPE (stmt_info))
6530     return false;
6531 
6532   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
6533 
6534   if (gimple_code (phi) != GIMPLE_PHI)
6535     return false;
6536 
6537   if (!vec_stmt) /* transformation not required.  */
6538     {
6539       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6540       if (dump_enabled_p ())
6541         dump_printf_loc (MSG_NOTE, vect_location,
6542                          "=== vectorizable_induction ===\n");
6543       vect_model_induction_cost (stmt_info, ncopies);
6544       return true;
6545     }
6546 
6547   /** Transform.  **/
6548 
6549   if (dump_enabled_p ())
6550     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
6551 
6552   vec_def = get_initial_def_for_induction (phi);
6553   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
6554   return true;
6555 }
6556 
6557 /* Function vectorizable_live_operation.
6558 
6559    STMT computes a value that is used outside the loop.  Check if
6560    it can be supported.  */
6561 
6562 bool
6563 vectorizable_live_operation (gimple *stmt,
6564 			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6565 			     slp_tree slp_node, int slp_index,
6566 			     gimple **vec_stmt)
6567 {
6568   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6569   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6570   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6571   imm_use_iterator imm_iter;
6572   tree lhs, lhs_type, bitsize, vec_bitsize;
6573   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6574   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6575   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6576   gimple *use_stmt;
6577   auto_vec<tree> vec_oprnds;
6578 
6579   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
6580 
6581   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
6582     return false;
6583 
6584   /* FORNOW.  CHECKME.  */
6585   if (nested_in_vect_loop_p (loop, stmt))
6586     return false;
6587 
6588   /* If STMT is not relevant and it is a simple assignment and its inputs are
6589      invariant then it can remain in place, unvectorized.  The original last
6590      scalar value that it computes will be used.  */
6591   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6592     {
6593       gcc_assert (is_simple_and_all_uses_invariant (stmt, loop_vinfo));
6594       if (dump_enabled_p ())
6595 	dump_printf_loc (MSG_NOTE, vect_location,
6596 			 "statement is simple and uses invariant.  Leaving in "
6597 			 "place.\n");
6598       return true;
6599     }
6600 
6601   if (!vec_stmt)
6602     /* No transformation required.  */
6603     return true;
6604 
6605   /* If stmt has a related stmt, then use that for getting the lhs.  */
6606   if (is_pattern_stmt_p (stmt_info))
6607     stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6608 
6609   lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt)
6610 	: gimple_get_lhs (stmt);
6611   lhs_type = TREE_TYPE (lhs);
6612 
6613   bitsize = TYPE_SIZE (TREE_TYPE (vectype));
6614   vec_bitsize = TYPE_SIZE (vectype);
6615 
6616   /* Get the vectorized lhs of STMT and the lane to use (counted in bits).  */
6617   tree vec_lhs, bitstart;
6618   if (slp_node)
6619     {
6620       gcc_assert (slp_index >= 0);
6621 
6622       int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length ();
6623       int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6624 
6625       /* Get the last occurrence of the scalar index from the concatenation of
6626 	 all the slp vectors. Calculate which slp vector it is and the index
6627 	 within.  */
6628       int pos = (num_vec * nunits) - num_scalar + slp_index;
6629       int vec_entry = pos / nunits;
6630       int vec_index = pos % nunits;
6631 
6632       /* Get the correct slp vectorized stmt.  */
6633       vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]);
6634 
6635       /* Get entry to use.  */
6636       bitstart = build_int_cst (unsigned_type_node, vec_index);
6637       bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
6638     }
6639   else
6640     {
6641       enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info);
6642       vec_lhs = vect_get_vec_def_for_operand_1 (stmt, dt);
6643 
6644       /* For multiple copies, get the last copy.  */
6645       for (int i = 1; i < ncopies; ++i)
6646 	vec_lhs = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type,
6647 						  vec_lhs);
6648 
6649       /* Get the last lane in the vector.  */
6650       bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
6651     }
6652 
6653   /* Create a new vectorized stmt for the uses of STMT and insert outside the
6654      loop.  */
6655   gimple_seq stmts = NULL;
6656   tree bftype = TREE_TYPE (vectype);
6657   if (VECTOR_BOOLEAN_TYPE_P (vectype))
6658     bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
6659   tree new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart);
6660   new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), &stmts,
6661 				   true, NULL_TREE);
6662   if (stmts)
6663     gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts);
6664 
6665   /* Replace use of lhs with newly computed result.  If the use stmt is a
6666      single arg PHI, just replace all uses of PHI result.  It's necessary
6667      because lcssa PHI defining lhs may be before newly inserted stmt.  */
6668   use_operand_p use_p;
6669   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
6670     if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
6671 	&& !is_gimple_debug (use_stmt))
6672     {
6673       if (gimple_code (use_stmt) == GIMPLE_PHI
6674 	  && gimple_phi_num_args (use_stmt) == 1)
6675 	{
6676 	  replace_uses_by (gimple_phi_result (use_stmt), new_tree);
6677 	}
6678       else
6679 	{
6680 	  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6681 	    SET_USE (use_p, new_tree);
6682 	}
6683       update_stmt (use_stmt);
6684     }
6685 
6686   return true;
6687 }
6688 
6689 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
6690 
6691 static void
6692 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
6693 {
6694   ssa_op_iter op_iter;
6695   imm_use_iterator imm_iter;
6696   def_operand_p def_p;
6697   gimple *ustmt;
6698 
6699   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
6700     {
6701       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
6702 	{
6703 	  basic_block bb;
6704 
6705 	  if (!is_gimple_debug (ustmt))
6706 	    continue;
6707 
6708 	  bb = gimple_bb (ustmt);
6709 
6710 	  if (!flow_bb_inside_loop_p (loop, bb))
6711 	    {
6712 	      if (gimple_debug_bind_p (ustmt))
6713 		{
6714 		  if (dump_enabled_p ())
6715 		    dump_printf_loc (MSG_NOTE, vect_location,
6716                                      "killing debug use\n");
6717 
6718 		  gimple_debug_bind_reset_value (ustmt);
6719 		  update_stmt (ustmt);
6720 		}
6721 	      else
6722 		gcc_unreachable ();
6723 	    }
6724 	}
6725     }
6726 }
6727 
6728 /* Given loop represented by LOOP_VINFO, return true if computation of
6729    LOOP_VINFO_NITERS (= LOOP_VINFO_NITERSM1 + 1) doesn't overflow, false
6730    otherwise.  */
6731 
6732 static bool
6733 loop_niters_no_overflow (loop_vec_info loop_vinfo)
6734 {
6735   /* Constant case.  */
6736   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6737     {
6738       tree cst_niters = LOOP_VINFO_NITERS (loop_vinfo);
6739       tree cst_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
6740 
6741       gcc_assert (TREE_CODE (cst_niters) == INTEGER_CST);
6742       gcc_assert (TREE_CODE (cst_nitersm1) == INTEGER_CST);
6743       if (wi::to_widest (cst_nitersm1) < wi::to_widest (cst_niters))
6744 	return true;
6745     }
6746 
6747   widest_int max;
6748   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6749   /* Check the upper bound of loop niters.  */
6750   if (get_max_loop_iterations (loop, &max))
6751     {
6752       tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
6753       signop sgn = TYPE_SIGN (type);
6754       widest_int type_max = widest_int::from (wi::max_value (type), sgn);
6755       if (max < type_max)
6756 	return true;
6757     }
6758   return false;
6759 }
6760 
6761 /* Scale profiling counters by estimation for LOOP which is vectorized
6762    by factor VF.  */
6763 
6764 static void
6765 scale_profile_for_vect_loop (struct loop *loop, unsigned vf)
6766 {
6767   edge preheader = loop_preheader_edge (loop);
6768   /* Reduce loop iterations by the vectorization factor.  */
6769   gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf);
6770   gcov_type freq_h = loop->header->count, freq_e = preheader->count;
6771 
6772   /* Use frequency only if counts are zero.  */
6773   if (freq_h == 0 && freq_e == 0)
6774     {
6775       freq_h = loop->header->frequency;
6776       freq_e = EDGE_FREQUENCY (preheader);
6777     }
6778   if (freq_h != 0)
6779     {
6780       gcov_type scale;
6781 
6782       /* Avoid dropping loop body profile counter to 0 because of zero count
6783 	 in loop's preheader.  */
6784       freq_e = MAX (freq_e, 1);
6785       /* This should not overflow.  */
6786       scale = GCOV_COMPUTE_SCALE (freq_e * (new_est_niter + 1), freq_h);
6787       scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
6788     }
6789 
6790   basic_block exit_bb = single_pred (loop->latch);
6791   edge exit_e = single_exit (loop);
6792   exit_e->count = loop_preheader_edge (loop)->count;
6793   exit_e->probability = REG_BR_PROB_BASE / (new_est_niter + 1);
6794 
6795   edge exit_l = single_pred_edge (loop->latch);
6796   int prob = exit_l->probability;
6797   exit_l->probability = REG_BR_PROB_BASE - exit_e->probability;
6798   exit_l->count = exit_bb->count - exit_e->count;
6799   if (exit_l->count < 0)
6800     exit_l->count = 0;
6801   if (prob > 0)
6802     scale_bbs_frequencies_int (&loop->latch, 1, exit_l->probability, prob);
6803 }
6804 
6805 /* Function vect_transform_loop.
6806 
6807    The analysis phase has determined that the loop is vectorizable.
6808    Vectorize the loop - created vectorized stmts to replace the scalar
6809    stmts in the loop, and update the loop exit condition.
6810    Returns scalar epilogue loop if any.  */
6811 
6812 struct loop *
6813 vect_transform_loop (loop_vec_info loop_vinfo)
6814 {
6815   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6816   struct loop *epilogue = NULL;
6817   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6818   int nbbs = loop->num_nodes;
6819   int i;
6820   tree niters_vector = NULL;
6821   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6822   bool grouped_store;
6823   bool slp_scheduled = false;
6824   gimple *stmt, *pattern_stmt;
6825   gimple_seq pattern_def_seq = NULL;
6826   gimple_stmt_iterator pattern_def_si = gsi_none ();
6827   bool transform_pattern_stmt = false;
6828   bool check_profitability = false;
6829   int th;
6830 
6831   if (dump_enabled_p ())
6832     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
6833 
6834   /* Use the more conservative vectorization threshold.  If the number
6835      of iterations is constant assume the cost check has been performed
6836      by our caller.  If the threshold makes all loops profitable that
6837      run at least the vectorization factor number of times checking
6838      is pointless, too.  */
6839   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
6840   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
6841       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6842     {
6843       if (dump_enabled_p ())
6844 	dump_printf_loc (MSG_NOTE, vect_location,
6845 			 "Profitability threshold is %d loop iterations.\n",
6846                          th);
6847       check_profitability = true;
6848     }
6849 
6850   /* Make sure there exists a single-predecessor exit bb.  Do this before
6851      versioning.   */
6852   edge e = single_exit (loop);
6853   if (! single_pred_p (e->dest))
6854     {
6855       split_loop_exit_edge (e);
6856       if (dump_enabled_p ())
6857 	dump_printf (MSG_NOTE, "split exit edge\n");
6858     }
6859 
6860   /* Version the loop first, if required, so the profitability check
6861      comes first.  */
6862 
6863   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
6864     {
6865       vect_loop_versioning (loop_vinfo, th, check_profitability);
6866       check_profitability = false;
6867     }
6868 
6869   /* Make sure there exists a single-predecessor exit bb also on the
6870      scalar loop copy.  Do this after versioning but before peeling
6871      so CFG structure is fine for both scalar and if-converted loop
6872      to make slpeel_duplicate_current_defs_from_edges face matched
6873      loop closed PHI nodes on the exit.  */
6874   if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
6875     {
6876       e = single_exit (LOOP_VINFO_SCALAR_LOOP (loop_vinfo));
6877       if (! single_pred_p (e->dest))
6878 	{
6879 	  split_loop_exit_edge (e);
6880 	  if (dump_enabled_p ())
6881 	    dump_printf (MSG_NOTE, "split exit edge of scalar loop\n");
6882 	}
6883     }
6884 
6885   tree niters = vect_build_loop_niters (loop_vinfo);
6886   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
6887   tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
6888   bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
6889   epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
6890 			      check_profitability, niters_no_overflow);
6891   if (niters_vector == NULL_TREE)
6892     {
6893       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6894 	niters_vector
6895 	  = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6896 			   LOOP_VINFO_INT_NITERS (loop_vinfo) / vf);
6897       else
6898 	vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
6899 				     niters_no_overflow);
6900     }
6901 
6902   /* 1) Make sure the loop header has exactly two entries
6903      2) Make sure we have a preheader basic block.  */
6904 
6905   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6906 
6907   split_edge (loop_preheader_edge (loop));
6908 
6909   /* FORNOW: the vectorizer supports only loops which body consist
6910      of one basic block (header + empty latch). When the vectorizer will
6911      support more involved loop forms, the order by which the BBs are
6912      traversed need to be reconsidered.  */
6913 
6914   for (i = 0; i < nbbs; i++)
6915     {
6916       basic_block bb = bbs[i];
6917       stmt_vec_info stmt_info;
6918 
6919       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6920 	   gsi_next (&si))
6921         {
6922 	  gphi *phi = si.phi ();
6923 	  if (dump_enabled_p ())
6924 	    {
6925 	      dump_printf_loc (MSG_NOTE, vect_location,
6926                                "------>vectorizing phi: ");
6927 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6928 	    }
6929 	  stmt_info = vinfo_for_stmt (phi);
6930 	  if (!stmt_info)
6931 	    continue;
6932 
6933 	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6934 	    vect_loop_kill_debug_uses (loop, phi);
6935 
6936 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
6937 	      && !STMT_VINFO_LIVE_P (stmt_info))
6938 	    continue;
6939 
6940 	  if (STMT_VINFO_VECTYPE (stmt_info)
6941 	      && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6942 		  != (unsigned HOST_WIDE_INT) vf)
6943 	      && dump_enabled_p ())
6944 	    dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6945 
6946 	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6947 	    {
6948 	      if (dump_enabled_p ())
6949 		dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6950 	      vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6951 	    }
6952 	}
6953 
6954       pattern_stmt = NULL;
6955       for (gimple_stmt_iterator si = gsi_start_bb (bb);
6956 	   !gsi_end_p (si) || transform_pattern_stmt;)
6957 	{
6958 	  bool is_store;
6959 
6960           if (transform_pattern_stmt)
6961 	    stmt = pattern_stmt;
6962           else
6963 	    {
6964 	      stmt = gsi_stmt (si);
6965 	      /* During vectorization remove existing clobber stmts.  */
6966 	      if (gimple_clobber_p (stmt))
6967 		{
6968 		  unlink_stmt_vdef (stmt);
6969 		  gsi_remove (&si, true);
6970 		  release_defs (stmt);
6971 		  continue;
6972 		}
6973 	    }
6974 
6975 	  if (dump_enabled_p ())
6976 	    {
6977 	      dump_printf_loc (MSG_NOTE, vect_location,
6978 			       "------>vectorizing statement: ");
6979 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6980 	    }
6981 
6982 	  stmt_info = vinfo_for_stmt (stmt);
6983 
6984 	  /* vector stmts created in the outer-loop during vectorization of
6985 	     stmts in an inner-loop may not have a stmt_info, and do not
6986 	     need to be vectorized.  */
6987 	  if (!stmt_info)
6988 	    {
6989 	      gsi_next (&si);
6990 	      continue;
6991 	    }
6992 
6993 	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6994 	    vect_loop_kill_debug_uses (loop, stmt);
6995 
6996 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
6997 	      && !STMT_VINFO_LIVE_P (stmt_info))
6998             {
6999               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7000                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
7001                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7002                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7003                 {
7004                   stmt = pattern_stmt;
7005                   stmt_info = vinfo_for_stmt (stmt);
7006                 }
7007               else
7008 	        {
7009    	          gsi_next (&si);
7010 	          continue;
7011                 }
7012 	    }
7013           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7014                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
7015                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7016                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7017             transform_pattern_stmt = true;
7018 
7019 	  /* If pattern statement has def stmts, vectorize them too.  */
7020 	  if (is_pattern_stmt_p (stmt_info))
7021 	    {
7022 	      if (pattern_def_seq == NULL)
7023 		{
7024 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
7025 		  pattern_def_si = gsi_start (pattern_def_seq);
7026 		}
7027 	      else if (!gsi_end_p (pattern_def_si))
7028 		gsi_next (&pattern_def_si);
7029 	      if (pattern_def_seq != NULL)
7030 		{
7031 		  gimple *pattern_def_stmt = NULL;
7032 		  stmt_vec_info pattern_def_stmt_info = NULL;
7033 
7034 		  while (!gsi_end_p (pattern_def_si))
7035 		    {
7036 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
7037 		      pattern_def_stmt_info
7038 			= vinfo_for_stmt (pattern_def_stmt);
7039 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
7040 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
7041 			break;
7042 		      gsi_next (&pattern_def_si);
7043 		    }
7044 
7045 		  if (!gsi_end_p (pattern_def_si))
7046 		    {
7047 		      if (dump_enabled_p ())
7048 			{
7049 			  dump_printf_loc (MSG_NOTE, vect_location,
7050 					   "==> vectorizing pattern def "
7051 					   "stmt: ");
7052 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
7053 					    pattern_def_stmt, 0);
7054 			}
7055 
7056 		      stmt = pattern_def_stmt;
7057 		      stmt_info = pattern_def_stmt_info;
7058 		    }
7059 		  else
7060 		    {
7061 		      pattern_def_si = gsi_none ();
7062 		      transform_pattern_stmt = false;
7063 		    }
7064 		}
7065 	      else
7066 		transform_pattern_stmt = false;
7067             }
7068 
7069 	  if (STMT_VINFO_VECTYPE (stmt_info))
7070 	    {
7071 	      unsigned int nunits
7072 		= (unsigned int)
7073 		  TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7074 	      if (!STMT_SLP_TYPE (stmt_info)
7075 		  && nunits != (unsigned int) vf
7076 		  && dump_enabled_p ())
7077 		  /* For SLP VF is set according to unrolling factor, and not
7078 		     to vector size, hence for SLP this print is not valid.  */
7079 		dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
7080 	    }
7081 
7082 	  /* SLP. Schedule all the SLP instances when the first SLP stmt is
7083 	     reached.  */
7084 	  if (STMT_SLP_TYPE (stmt_info))
7085 	    {
7086 	      if (!slp_scheduled)
7087 		{
7088 		  slp_scheduled = true;
7089 
7090 		  if (dump_enabled_p ())
7091 		    dump_printf_loc (MSG_NOTE, vect_location,
7092 				     "=== scheduling SLP instances ===\n");
7093 
7094 		  vect_schedule_slp (loop_vinfo);
7095 		}
7096 
7097 	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
7098 	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
7099 		{
7100 		  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7101 		    {
7102 		      pattern_def_seq = NULL;
7103 		      gsi_next (&si);
7104 		    }
7105 		  continue;
7106 		}
7107 	    }
7108 
7109 	  /* -------- vectorize statement ------------ */
7110 	  if (dump_enabled_p ())
7111 	    dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
7112 
7113 	  grouped_store = false;
7114 	  is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
7115           if (is_store)
7116             {
7117 	      if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7118 		{
7119 		  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7120 		     interleaving chain was completed - free all the stores in
7121 		     the chain.  */
7122 		  gsi_next (&si);
7123 		  vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
7124 		}
7125 	      else
7126 		{
7127 		  /* Free the attached stmt_vec_info and remove the stmt.  */
7128 		  gimple *store = gsi_stmt (si);
7129 		  free_stmt_vec_info (store);
7130 		  unlink_stmt_vdef (store);
7131 		  gsi_remove (&si, true);
7132 		  release_defs (store);
7133 		}
7134 
7135 	      /* Stores can only appear at the end of pattern statements.  */
7136 	      gcc_assert (!transform_pattern_stmt);
7137 	      pattern_def_seq = NULL;
7138 	    }
7139 	  else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7140 	    {
7141 	      pattern_def_seq = NULL;
7142 	      gsi_next (&si);
7143 	    }
7144 	}		        /* stmts in BB */
7145     }				/* BBs in loop */
7146 
7147   slpeel_make_loop_iterate_ntimes (loop, niters_vector);
7148 
7149   scale_profile_for_vect_loop (loop, vf);
7150 
7151   /* The minimum number of iterations performed by the epilogue.  This
7152      is 1 when peeling for gaps because we always need a final scalar
7153      iteration.  */
7154   int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0;
7155   /* +1 to convert latch counts to loop iteration counts,
7156      -min_epilogue_iters to remove iterations that cannot be performed
7157        by the vector code.  */
7158   int bias = 1 - min_epilogue_iters;
7159   /* In these calculations the "- 1" converts loop iteration counts
7160      back to latch counts.  */
7161   if (loop->any_upper_bound)
7162     loop->nb_iterations_upper_bound
7163       = wi::udiv_floor (loop->nb_iterations_upper_bound + bias, vf) - 1;
7164   if (loop->any_likely_upper_bound)
7165     loop->nb_iterations_likely_upper_bound
7166       = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + bias, vf) - 1;
7167   if (loop->any_estimate)
7168     loop->nb_iterations_estimate
7169       = wi::udiv_floor (loop->nb_iterations_estimate + bias, vf) - 1;
7170 
7171   if (dump_enabled_p ())
7172     {
7173       if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
7174 	{
7175 	  dump_printf_loc (MSG_NOTE, vect_location,
7176 			   "LOOP VECTORIZED\n");
7177 	  if (loop->inner)
7178 	    dump_printf_loc (MSG_NOTE, vect_location,
7179 			     "OUTER LOOP VECTORIZED\n");
7180 	  dump_printf (MSG_NOTE, "\n");
7181 	}
7182       else
7183 	dump_printf_loc (MSG_NOTE, vect_location,
7184 			 "LOOP EPILOGUE VECTORIZED (VS=%d)\n",
7185 			 current_vector_size);
7186     }
7187 
7188   /* Free SLP instances here because otherwise stmt reference counting
7189      won't work.  */
7190   slp_instance instance;
7191   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
7192     vect_free_slp_instance (instance);
7193   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
7194   /* Clear-up safelen field since its value is invalid after vectorization
7195      since vectorized loop can have loop-carried dependencies.  */
7196   loop->safelen = 0;
7197 
7198   /* Don't vectorize epilogue for epilogue.  */
7199   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
7200     epilogue = NULL;
7201 
7202   if (epilogue)
7203     {
7204 	unsigned int vector_sizes
7205 	  = targetm.vectorize.autovectorize_vector_sizes ();
7206 	vector_sizes &= current_vector_size - 1;
7207 
7208 	if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
7209 	  epilogue = NULL;
7210 	else if (!vector_sizes)
7211 	  epilogue = NULL;
7212 	else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7213 		 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
7214 	  {
7215 	    int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
7216 	    int ratio = current_vector_size / smallest_vec_size;
7217 	    int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
7218 	      - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
7219 	    eiters = eiters % vf;
7220 
7221 	    epilogue->nb_iterations_upper_bound = eiters - 1;
7222 
7223 	    if (eiters < vf / ratio)
7224 	      epilogue = NULL;
7225 	    }
7226     }
7227 
7228   if (epilogue)
7229     {
7230       epilogue->force_vectorize = loop->force_vectorize;
7231       epilogue->safelen = loop->safelen;
7232       epilogue->dont_vectorize = false;
7233 
7234       /* We may need to if-convert epilogue to vectorize it.  */
7235       if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
7236 	tree_if_conversion (epilogue);
7237     }
7238 
7239   return epilogue;
7240 }
7241 
7242 /* The code below is trying to perform simple optimization - revert
7243    if-conversion for masked stores, i.e. if the mask of a store is zero
7244    do not perform it and all stored value producers also if possible.
7245    For example,
7246      for (i=0; i<n; i++)
7247        if (c[i])
7248 	{
7249 	  p1[i] += 1;
7250 	  p2[i] = p3[i] +2;
7251 	}
7252    this transformation will produce the following semi-hammock:
7253 
7254    if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
7255      {
7256        vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
7257        vect__12.22_172 = vect__11.19_170 + vect_cst__171;
7258        MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
7259        vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
7260        vect__19.28_184 = vect__18.25_182 + vect_cst__183;
7261        MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
7262      }
7263 */
7264 
7265 void
7266 optimize_mask_stores (struct loop *loop)
7267 {
7268   basic_block *bbs = get_loop_body (loop);
7269   unsigned nbbs = loop->num_nodes;
7270   unsigned i;
7271   basic_block bb;
7272   struct loop *bb_loop;
7273   gimple_stmt_iterator gsi;
7274   gimple *stmt;
7275   auto_vec<gimple *> worklist;
7276 
7277   vect_location = find_loop_location (loop);
7278   /* Pick up all masked stores in loop if any.  */
7279   for (i = 0; i < nbbs; i++)
7280     {
7281       bb = bbs[i];
7282       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
7283 	   gsi_next (&gsi))
7284 	{
7285 	  stmt = gsi_stmt (gsi);
7286 	  if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
7287 	    worklist.safe_push (stmt);
7288 	}
7289     }
7290 
7291   free (bbs);
7292   if (worklist.is_empty ())
7293     return;
7294 
7295   /* Loop has masked stores.  */
7296   while (!worklist.is_empty ())
7297     {
7298       gimple *last, *last_store;
7299       edge e, efalse;
7300       tree mask;
7301       basic_block store_bb, join_bb;
7302       gimple_stmt_iterator gsi_to;
7303       tree vdef, new_vdef;
7304       gphi *phi;
7305       tree vectype;
7306       tree zero;
7307 
7308       last = worklist.pop ();
7309       mask = gimple_call_arg (last, 2);
7310       bb = gimple_bb (last);
7311       /* Create then_bb and if-then structure in CFG, then_bb belongs to
7312 	 the same loop as if_bb.  It could be different to LOOP when two
7313 	 level loop-nest is vectorized and mask_store belongs to the inner
7314 	 one.  */
7315       e = split_block (bb, last);
7316       bb_loop = bb->loop_father;
7317       gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop));
7318       join_bb = e->dest;
7319       store_bb = create_empty_bb (bb);
7320       add_bb_to_loop (store_bb, bb_loop);
7321       e->flags = EDGE_TRUE_VALUE;
7322       efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
7323       /* Put STORE_BB to likely part.  */
7324       efalse->probability = PROB_UNLIKELY;
7325       store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse);
7326       make_edge (store_bb, join_bb, EDGE_FALLTHRU);
7327       if (dom_info_available_p (CDI_DOMINATORS))
7328 	set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
7329       if (dump_enabled_p ())
7330 	dump_printf_loc (MSG_NOTE, vect_location,
7331 			 "Create new block %d to sink mask stores.",
7332 			 store_bb->index);
7333       /* Create vector comparison with boolean result.  */
7334       vectype = TREE_TYPE (mask);
7335       zero = build_zero_cst (vectype);
7336       stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
7337       gsi = gsi_last_bb (bb);
7338       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7339       /* Create new PHI node for vdef of the last masked store:
7340 	 .MEM_2 = VDEF <.MEM_1>
7341 	 will be converted to
7342 	 .MEM.3 = VDEF <.MEM_1>
7343 	 and new PHI node will be created in join bb
7344 	 .MEM_2 = PHI <.MEM_1, .MEM_3>
7345       */
7346       vdef = gimple_vdef (last);
7347       new_vdef = make_ssa_name (gimple_vop (cfun), last);
7348       gimple_set_vdef (last, new_vdef);
7349       phi = create_phi_node (vdef, join_bb);
7350       add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);
7351 
7352       /* Put all masked stores with the same mask to STORE_BB if possible.  */
7353       while (true)
7354 	{
7355 	  gimple_stmt_iterator gsi_from;
7356 	  gimple *stmt1 = NULL;
7357 
7358 	  /* Move masked store to STORE_BB.  */
7359 	  last_store = last;
7360 	  gsi = gsi_for_stmt (last);
7361 	  gsi_from = gsi;
7362 	  /* Shift GSI to the previous stmt for further traversal.  */
7363 	  gsi_prev (&gsi);
7364 	  gsi_to = gsi_start_bb (store_bb);
7365 	  gsi_move_before (&gsi_from, &gsi_to);
7366 	  /* Setup GSI_TO to the non-empty block start.  */
7367 	  gsi_to = gsi_start_bb (store_bb);
7368 	  if (dump_enabled_p ())
7369 	    {
7370 	      dump_printf_loc (MSG_NOTE, vect_location,
7371 			       "Move stmt to created bb\n");
7372 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, last, 0);
7373 	    }
7374 	  /* Move all stored value producers if possible.  */
7375 	  while (!gsi_end_p (gsi))
7376 	    {
7377 	      tree lhs;
7378 	      imm_use_iterator imm_iter;
7379 	      use_operand_p use_p;
7380 	      bool res;
7381 
7382 	      /* Skip debug statements.  */
7383 	      if (is_gimple_debug (gsi_stmt (gsi)))
7384 		{
7385 		  gsi_prev (&gsi);
7386 		  continue;
7387 		}
7388 	      stmt1 = gsi_stmt (gsi);
7389 	      /* Do not consider statements writing to memory or having
7390 		 volatile operand.  */
7391 	      if (gimple_vdef (stmt1)
7392 		  || gimple_has_volatile_ops (stmt1))
7393 		break;
7394 	      gsi_from = gsi;
7395 	      gsi_prev (&gsi);
7396 	      lhs = gimple_get_lhs (stmt1);
7397 	      if (!lhs)
7398 		break;
7399 
7400 	      /* LHS of vectorized stmt must be SSA_NAME.  */
7401 	      if (TREE_CODE (lhs) != SSA_NAME)
7402 		break;
7403 
7404 	      if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
7405 		{
7406 		  /* Remove dead scalar statement.  */
7407 		  if (has_zero_uses (lhs))
7408 		    {
7409 		      gsi_remove (&gsi_from, true);
7410 		      continue;
7411 		    }
7412 		}
7413 
7414 	      /* Check that LHS does not have uses outside of STORE_BB.  */
7415 	      res = true;
7416 	      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
7417 		{
7418 		  gimple *use_stmt;
7419 		  use_stmt = USE_STMT (use_p);
7420 		  if (is_gimple_debug (use_stmt))
7421 		    continue;
7422 		  if (gimple_bb (use_stmt) != store_bb)
7423 		    {
7424 		      res = false;
7425 		      break;
7426 		    }
7427 		}
7428 	      if (!res)
7429 		break;
7430 
7431 	      if (gimple_vuse (stmt1)
7432 		  && gimple_vuse (stmt1) != gimple_vuse (last_store))
7433 		break;
7434 
7435 	      /* Can move STMT1 to STORE_BB.  */
7436 	      if (dump_enabled_p ())
7437 		{
7438 		  dump_printf_loc (MSG_NOTE, vect_location,
7439 				   "Move stmt to created bb\n");
7440 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
7441 		}
7442 	      gsi_move_before (&gsi_from, &gsi_to);
7443 	      /* Shift GSI_TO for further insertion.  */
7444 	      gsi_prev (&gsi_to);
7445 	    }
7446 	  /* Put other masked stores with the same mask to STORE_BB.  */
7447 	  if (worklist.is_empty ()
7448 	      || gimple_call_arg (worklist.last (), 2) != mask
7449 	      || worklist.last () != stmt1)
7450 	    break;
7451 	  last = worklist.pop ();
7452 	}
7453       add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
7454     }
7455 }
7456