xref: /dflybsd-src/contrib/gcc-4.7/gcc/tree-vect-loop.c (revision 0a8dc9fc45f4d0b236341a473fac4a486375f60c)
1e4b17023SJohn Marino /* Loop Vectorization
2e4b17023SJohn Marino    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3e4b17023SJohn Marino    Free Software Foundation, Inc.
4e4b17023SJohn Marino    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5e4b17023SJohn Marino    Ira Rosen <irar@il.ibm.com>
6e4b17023SJohn Marino 
7e4b17023SJohn Marino This file is part of GCC.
8e4b17023SJohn Marino 
9e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
10e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
11e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
12e4b17023SJohn Marino version.
13e4b17023SJohn Marino 
14e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
16e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17e4b17023SJohn Marino for more details.
18e4b17023SJohn Marino 
19e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20e4b17023SJohn Marino along with GCC; see the file COPYING3.  If not see
21e4b17023SJohn Marino <http://www.gnu.org/licenses/>.  */
22e4b17023SJohn Marino 
23e4b17023SJohn Marino #include "config.h"
24e4b17023SJohn Marino #include "system.h"
25e4b17023SJohn Marino #include "coretypes.h"
26e4b17023SJohn Marino #include "tm.h"
27e4b17023SJohn Marino #include "ggc.h"
28e4b17023SJohn Marino #include "tree.h"
29e4b17023SJohn Marino #include "basic-block.h"
30e4b17023SJohn Marino #include "tree-pretty-print.h"
31e4b17023SJohn Marino #include "gimple-pretty-print.h"
32e4b17023SJohn Marino #include "tree-flow.h"
33e4b17023SJohn Marino #include "tree-dump.h"
34e4b17023SJohn Marino #include "cfgloop.h"
35e4b17023SJohn Marino #include "cfglayout.h"
36e4b17023SJohn Marino #include "expr.h"
37e4b17023SJohn Marino #include "recog.h"
38e4b17023SJohn Marino #include "optabs.h"
39e4b17023SJohn Marino #include "params.h"
40e4b17023SJohn Marino #include "diagnostic-core.h"
41e4b17023SJohn Marino #include "tree-chrec.h"
42e4b17023SJohn Marino #include "tree-scalar-evolution.h"
43e4b17023SJohn Marino #include "tree-vectorizer.h"
44e4b17023SJohn Marino #include "target.h"
45e4b17023SJohn Marino 
46e4b17023SJohn Marino /* Loop Vectorization Pass.
47e4b17023SJohn Marino 
48e4b17023SJohn Marino    This pass tries to vectorize loops.
49e4b17023SJohn Marino 
50e4b17023SJohn Marino    For example, the vectorizer transforms the following simple loop:
51e4b17023SJohn Marino 
52e4b17023SJohn Marino         short a[N]; short b[N]; short c[N]; int i;
53e4b17023SJohn Marino 
54e4b17023SJohn Marino         for (i=0; i<N; i++){
55e4b17023SJohn Marino           a[i] = b[i] + c[i];
56e4b17023SJohn Marino         }
57e4b17023SJohn Marino 
58e4b17023SJohn Marino    as if it was manually vectorized by rewriting the source code into:
59e4b17023SJohn Marino 
60e4b17023SJohn Marino         typedef int __attribute__((mode(V8HI))) v8hi;
61e4b17023SJohn Marino         short a[N];  short b[N]; short c[N];   int i;
62e4b17023SJohn Marino         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
63e4b17023SJohn Marino         v8hi va, vb, vc;
64e4b17023SJohn Marino 
65e4b17023SJohn Marino         for (i=0; i<N/8; i++){
66e4b17023SJohn Marino           vb = pb[i];
67e4b17023SJohn Marino           vc = pc[i];
68e4b17023SJohn Marino           va = vb + vc;
69e4b17023SJohn Marino           pa[i] = va;
70e4b17023SJohn Marino         }
71e4b17023SJohn Marino 
72e4b17023SJohn Marino         The main entry to this pass is vectorize_loops(), in which
73e4b17023SJohn Marino    the vectorizer applies a set of analyses on a given set of loops,
74e4b17023SJohn Marino    followed by the actual vectorization transformation for the loops that
75e4b17023SJohn Marino    had successfully passed the analysis phase.
76e4b17023SJohn Marino         Throughout this pass we make a distinction between two types of
77e4b17023SJohn Marino    data: scalars (which are represented by SSA_NAMES), and memory references
78e4b17023SJohn Marino    ("data-refs").  These two types of data require different handling both
79e4b17023SJohn Marino    during analysis and transformation. The types of data-refs that the
80e4b17023SJohn Marino    vectorizer currently supports are ARRAY_REFS which base is an array DECL
81e4b17023SJohn Marino    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
82e4b17023SJohn Marino    accesses are required to have a simple (consecutive) access pattern.
83e4b17023SJohn Marino 
84e4b17023SJohn Marino    Analysis phase:
85e4b17023SJohn Marino    ===============
86e4b17023SJohn Marino         The driver for the analysis phase is vect_analyze_loop().
87e4b17023SJohn Marino    It applies a set of analyses, some of which rely on the scalar evolution
88e4b17023SJohn Marino    analyzer (scev) developed by Sebastian Pop.
89e4b17023SJohn Marino 
90e4b17023SJohn Marino         During the analysis phase the vectorizer records some information
91e4b17023SJohn Marino    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
92e4b17023SJohn Marino    loop, as well as general information about the loop as a whole, which is
93e4b17023SJohn Marino    recorded in a "loop_vec_info" struct attached to each loop.
94e4b17023SJohn Marino 
95e4b17023SJohn Marino    Transformation phase:
96e4b17023SJohn Marino    =====================
97e4b17023SJohn Marino         The loop transformation phase scans all the stmts in the loop, and
98e4b17023SJohn Marino    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
99e4b17023SJohn Marino    the loop that needs to be vectorized.  It inserts the vector code sequence
100e4b17023SJohn Marino    just before the scalar stmt S, and records a pointer to the vector code
101e4b17023SJohn Marino    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
102e4b17023SJohn Marino    attached to S).  This pointer will be used for the vectorization of following
103e4b17023SJohn Marino    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
104e4b17023SJohn Marino    otherwise, we rely on dead code elimination for removing it.
105e4b17023SJohn Marino 
106e4b17023SJohn Marino         For example, say stmt S1 was vectorized into stmt VS1:
107e4b17023SJohn Marino 
108e4b17023SJohn Marino    VS1: vb = px[i];
109e4b17023SJohn Marino    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
110e4b17023SJohn Marino    S2:  a = b;
111e4b17023SJohn Marino 
112e4b17023SJohn Marino    To vectorize stmt S2, the vectorizer first finds the stmt that defines
113e4b17023SJohn Marino    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
114e4b17023SJohn Marino    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
115e4b17023SJohn Marino    resulting sequence would be:
116e4b17023SJohn Marino 
117e4b17023SJohn Marino    VS1: vb = px[i];
118e4b17023SJohn Marino    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
119e4b17023SJohn Marino    VS2: va = vb;
120e4b17023SJohn Marino    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
121e4b17023SJohn Marino 
122e4b17023SJohn Marino         Operands that are not SSA_NAMEs, are data-refs that appear in
123e4b17023SJohn Marino    load/store operations (like 'x[i]' in S1), and are handled differently.
124e4b17023SJohn Marino 
125e4b17023SJohn Marino    Target modeling:
126e4b17023SJohn Marino    =================
127e4b17023SJohn Marino         Currently the only target specific information that is used is the
128e4b17023SJohn Marino    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
129e4b17023SJohn Marino    Targets that can support different sizes of vectors, for now will need
130e4b17023SJohn Marino    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
131e4b17023SJohn Marino    flexibility will be added in the future.
132e4b17023SJohn Marino 
133e4b17023SJohn Marino         Since we only vectorize operations which vector form can be
134e4b17023SJohn Marino    expressed using existing tree codes, to verify that an operation is
135e4b17023SJohn Marino    supported, the vectorizer checks the relevant optab at the relevant
136e4b17023SJohn Marino    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
137e4b17023SJohn Marino    the value found is CODE_FOR_nothing, then there's no target support, and
138e4b17023SJohn Marino    we can't vectorize the stmt.
139e4b17023SJohn Marino 
140e4b17023SJohn Marino    For additional information on this project see:
141e4b17023SJohn Marino    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
142e4b17023SJohn Marino */
143e4b17023SJohn Marino 
144e4b17023SJohn Marino /* Function vect_determine_vectorization_factor
145e4b17023SJohn Marino 
146e4b17023SJohn Marino    Determine the vectorization factor (VF).  VF is the number of data elements
147e4b17023SJohn Marino    that are operated upon in parallel in a single iteration of the vectorized
148e4b17023SJohn Marino    loop.  For example, when vectorizing a loop that operates on 4byte elements,
149e4b17023SJohn Marino    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
150e4b17023SJohn Marino    elements can fit in a single vector register.
151e4b17023SJohn Marino 
152e4b17023SJohn Marino    We currently support vectorization of loops in which all types operated upon
153e4b17023SJohn Marino    are of the same size.  Therefore this function currently sets VF according to
154e4b17023SJohn Marino    the size of the types operated upon, and fails if there are multiple sizes
155e4b17023SJohn Marino    in the loop.
156e4b17023SJohn Marino 
157e4b17023SJohn Marino    VF is also the factor by which the loop iterations are strip-mined, e.g.:
158e4b17023SJohn Marino    original loop:
159e4b17023SJohn Marino         for (i=0; i<N; i++){
160e4b17023SJohn Marino           a[i] = b[i] + c[i];
161e4b17023SJohn Marino         }
162e4b17023SJohn Marino 
163e4b17023SJohn Marino    vectorized loop:
164e4b17023SJohn Marino         for (i=0; i<N; i+=VF){
165e4b17023SJohn Marino           a[i:VF] = b[i:VF] + c[i:VF];
166e4b17023SJohn Marino         }
167e4b17023SJohn Marino */
168e4b17023SJohn Marino 
169e4b17023SJohn Marino static bool
vect_determine_vectorization_factor(loop_vec_info loop_vinfo)170e4b17023SJohn Marino vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
171e4b17023SJohn Marino {
172e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
173e4b17023SJohn Marino   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
174e4b17023SJohn Marino   int nbbs = loop->num_nodes;
175e4b17023SJohn Marino   gimple_stmt_iterator si;
176e4b17023SJohn Marino   unsigned int vectorization_factor = 0;
177e4b17023SJohn Marino   tree scalar_type;
178e4b17023SJohn Marino   gimple phi;
179e4b17023SJohn Marino   tree vectype;
180e4b17023SJohn Marino   unsigned int nunits;
181e4b17023SJohn Marino   stmt_vec_info stmt_info;
182e4b17023SJohn Marino   int i;
183e4b17023SJohn Marino   HOST_WIDE_INT dummy;
184e4b17023SJohn Marino   gimple stmt, pattern_stmt = NULL;
185e4b17023SJohn Marino   gimple_seq pattern_def_seq = NULL;
186e4b17023SJohn Marino   gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
187e4b17023SJohn Marino   bool analyze_pattern_stmt = false;
188e4b17023SJohn Marino 
189e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
190e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
191e4b17023SJohn Marino 
192e4b17023SJohn Marino   for (i = 0; i < nbbs; i++)
193e4b17023SJohn Marino     {
194e4b17023SJohn Marino       basic_block bb = bbs[i];
195e4b17023SJohn Marino 
196e4b17023SJohn Marino       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
197e4b17023SJohn Marino 	{
198e4b17023SJohn Marino 	  phi = gsi_stmt (si);
199e4b17023SJohn Marino 	  stmt_info = vinfo_for_stmt (phi);
200e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
201e4b17023SJohn Marino 	    {
202e4b17023SJohn Marino 	      fprintf (vect_dump, "==> examining phi: ");
203e4b17023SJohn Marino 	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
204e4b17023SJohn Marino 	    }
205e4b17023SJohn Marino 
206e4b17023SJohn Marino 	  gcc_assert (stmt_info);
207e4b17023SJohn Marino 
208e4b17023SJohn Marino 	  if (STMT_VINFO_RELEVANT_P (stmt_info))
209e4b17023SJohn Marino             {
210e4b17023SJohn Marino 	      gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
211e4b17023SJohn Marino               scalar_type = TREE_TYPE (PHI_RESULT (phi));
212e4b17023SJohn Marino 
213e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_DETAILS))
214e4b17023SJohn Marino 		{
215e4b17023SJohn Marino 		  fprintf (vect_dump, "get vectype for scalar type:  ");
216e4b17023SJohn Marino 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
217e4b17023SJohn Marino 		}
218e4b17023SJohn Marino 
219e4b17023SJohn Marino 	      vectype = get_vectype_for_scalar_type (scalar_type);
220e4b17023SJohn Marino 	      if (!vectype)
221e4b17023SJohn Marino 		{
222e4b17023SJohn Marino 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
223e4b17023SJohn Marino 		    {
224e4b17023SJohn Marino 		      fprintf (vect_dump,
225e4b17023SJohn Marino 		               "not vectorized: unsupported data-type ");
226e4b17023SJohn Marino 		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
227e4b17023SJohn Marino 		    }
228e4b17023SJohn Marino 		  return false;
229e4b17023SJohn Marino 		}
230e4b17023SJohn Marino 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
231e4b17023SJohn Marino 
232e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_DETAILS))
233e4b17023SJohn Marino 		{
234e4b17023SJohn Marino 		  fprintf (vect_dump, "vectype: ");
235e4b17023SJohn Marino 		  print_generic_expr (vect_dump, vectype, TDF_SLIM);
236e4b17023SJohn Marino 		}
237e4b17023SJohn Marino 
238e4b17023SJohn Marino 	      nunits = TYPE_VECTOR_SUBPARTS (vectype);
239e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_DETAILS))
240e4b17023SJohn Marino 		fprintf (vect_dump, "nunits = %d", nunits);
241e4b17023SJohn Marino 
242e4b17023SJohn Marino 	      if (!vectorization_factor
243e4b17023SJohn Marino 		  || (nunits > vectorization_factor))
244e4b17023SJohn Marino 		vectorization_factor = nunits;
245e4b17023SJohn Marino 	    }
246e4b17023SJohn Marino 	}
247e4b17023SJohn Marino 
248e4b17023SJohn Marino       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
249e4b17023SJohn Marino         {
250e4b17023SJohn Marino           tree vf_vectype;
251e4b17023SJohn Marino 
252e4b17023SJohn Marino           if (analyze_pattern_stmt)
253e4b17023SJohn Marino 	    stmt = pattern_stmt;
254e4b17023SJohn Marino           else
255e4b17023SJohn Marino             stmt = gsi_stmt (si);
256e4b17023SJohn Marino 
257e4b17023SJohn Marino           stmt_info = vinfo_for_stmt (stmt);
258e4b17023SJohn Marino 
259e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
260e4b17023SJohn Marino 	    {
261e4b17023SJohn Marino 	      fprintf (vect_dump, "==> examining statement: ");
262e4b17023SJohn Marino 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
263e4b17023SJohn Marino 	    }
264e4b17023SJohn Marino 
265e4b17023SJohn Marino 	  gcc_assert (stmt_info);
266e4b17023SJohn Marino 
267e4b17023SJohn Marino 	  /* Skip stmts which do not need to be vectorized.  */
268e4b17023SJohn Marino 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
269e4b17023SJohn Marino 	      && !STMT_VINFO_LIVE_P (stmt_info))
270e4b17023SJohn Marino             {
271e4b17023SJohn Marino               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
272e4b17023SJohn Marino                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
273e4b17023SJohn Marino                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
274e4b17023SJohn Marino                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
275e4b17023SJohn Marino                 {
276e4b17023SJohn Marino                   stmt = pattern_stmt;
277e4b17023SJohn Marino                   stmt_info = vinfo_for_stmt (pattern_stmt);
278e4b17023SJohn Marino                   if (vect_print_dump_info (REPORT_DETAILS))
279e4b17023SJohn Marino                     {
280e4b17023SJohn Marino                       fprintf (vect_dump, "==> examining pattern statement: ");
281e4b17023SJohn Marino                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
282e4b17023SJohn Marino                     }
283e4b17023SJohn Marino                 }
284e4b17023SJohn Marino               else
285e4b17023SJohn Marino 	        {
286e4b17023SJohn Marino 	          if (vect_print_dump_info (REPORT_DETAILS))
287e4b17023SJohn Marino 	            fprintf (vect_dump, "skip.");
288e4b17023SJohn Marino                   gsi_next (&si);
289e4b17023SJohn Marino 	          continue;
290e4b17023SJohn Marino                 }
291e4b17023SJohn Marino 	    }
292e4b17023SJohn Marino           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
293e4b17023SJohn Marino                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
294e4b17023SJohn Marino                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
295e4b17023SJohn Marino                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
296e4b17023SJohn Marino             analyze_pattern_stmt = true;
297e4b17023SJohn Marino 
298e4b17023SJohn Marino 	  /* If a pattern statement has def stmts, analyze them too.  */
299e4b17023SJohn Marino 	  if (is_pattern_stmt_p (stmt_info))
300e4b17023SJohn Marino 	    {
301e4b17023SJohn Marino 	      if (pattern_def_seq == NULL)
302e4b17023SJohn Marino 		{
303e4b17023SJohn Marino 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
304e4b17023SJohn Marino 		  pattern_def_si = gsi_start (pattern_def_seq);
305e4b17023SJohn Marino 		}
306e4b17023SJohn Marino 	      else if (!gsi_end_p (pattern_def_si))
307e4b17023SJohn Marino 		gsi_next (&pattern_def_si);
308e4b17023SJohn Marino 	      if (pattern_def_seq != NULL)
309e4b17023SJohn Marino 		{
310e4b17023SJohn Marino 		  gimple pattern_def_stmt = NULL;
311e4b17023SJohn Marino 		  stmt_vec_info pattern_def_stmt_info = NULL;
312e4b17023SJohn Marino 
313e4b17023SJohn Marino 		  while (!gsi_end_p (pattern_def_si))
314e4b17023SJohn Marino 		    {
315e4b17023SJohn Marino 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
316e4b17023SJohn Marino 		      pattern_def_stmt_info
317e4b17023SJohn Marino 			= vinfo_for_stmt (pattern_def_stmt);
318e4b17023SJohn Marino 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
319e4b17023SJohn Marino 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
320e4b17023SJohn Marino 			break;
321e4b17023SJohn Marino 		      gsi_next (&pattern_def_si);
322e4b17023SJohn Marino 		    }
323e4b17023SJohn Marino 
324e4b17023SJohn Marino 		  if (!gsi_end_p (pattern_def_si))
325e4b17023SJohn Marino 		    {
326e4b17023SJohn Marino 		      if (vect_print_dump_info (REPORT_DETAILS))
327e4b17023SJohn Marino 			{
328e4b17023SJohn Marino 			  fprintf (vect_dump,
329e4b17023SJohn Marino 				   "==> examining pattern def stmt: ");
330e4b17023SJohn Marino 			  print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
331e4b17023SJohn Marino 					     TDF_SLIM);
332e4b17023SJohn Marino 			}
333e4b17023SJohn Marino 
334e4b17023SJohn Marino 		      stmt = pattern_def_stmt;
335e4b17023SJohn Marino 		      stmt_info = pattern_def_stmt_info;
336e4b17023SJohn Marino 		    }
337e4b17023SJohn Marino 		  else
338e4b17023SJohn Marino 		    {
339e4b17023SJohn Marino 		      pattern_def_si = gsi_start (NULL);
340e4b17023SJohn Marino 		      analyze_pattern_stmt = false;
341e4b17023SJohn Marino 		    }
342e4b17023SJohn Marino 		}
343e4b17023SJohn Marino 	      else
344e4b17023SJohn Marino 		analyze_pattern_stmt = false;
345e4b17023SJohn Marino 	    }
346e4b17023SJohn Marino 
347e4b17023SJohn Marino 	  if (gimple_get_lhs (stmt) == NULL_TREE)
348e4b17023SJohn Marino 	    {
349e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
350e4b17023SJohn Marino 		{
351e4b17023SJohn Marino 	          fprintf (vect_dump, "not vectorized: irregular stmt.");
352e4b17023SJohn Marino 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
353e4b17023SJohn Marino 		}
354e4b17023SJohn Marino 	      return false;
355e4b17023SJohn Marino 	    }
356e4b17023SJohn Marino 
357e4b17023SJohn Marino 	  if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
358e4b17023SJohn Marino 	    {
359e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
360e4b17023SJohn Marino 	        {
361e4b17023SJohn Marino 	          fprintf (vect_dump, "not vectorized: vector stmt in loop:");
362e4b17023SJohn Marino 	          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
363e4b17023SJohn Marino 	        }
364e4b17023SJohn Marino 	      return false;
365e4b17023SJohn Marino 	    }
366e4b17023SJohn Marino 
367e4b17023SJohn Marino 	  if (STMT_VINFO_VECTYPE (stmt_info))
368e4b17023SJohn Marino 	    {
369e4b17023SJohn Marino 	      /* The only case when a vectype had been already set is for stmts
370e4b17023SJohn Marino 	         that contain a dataref, or for "pattern-stmts" (stmts
371e4b17023SJohn Marino 		 generated by the vectorizer to represent/replace a certain
372e4b17023SJohn Marino 		 idiom).  */
373e4b17023SJohn Marino 	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
374e4b17023SJohn Marino 			  || is_pattern_stmt_p (stmt_info)
375e4b17023SJohn Marino 			  || !gsi_end_p (pattern_def_si));
376e4b17023SJohn Marino 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
377e4b17023SJohn Marino 	    }
378e4b17023SJohn Marino 	  else
379e4b17023SJohn Marino 	    {
380e4b17023SJohn Marino 	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
381e4b17023SJohn Marino 	      scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
382e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_DETAILS))
383e4b17023SJohn Marino 		{
384e4b17023SJohn Marino 		  fprintf (vect_dump, "get vectype for scalar type:  ");
385e4b17023SJohn Marino 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
386e4b17023SJohn Marino 		}
387e4b17023SJohn Marino 	      vectype = get_vectype_for_scalar_type (scalar_type);
388e4b17023SJohn Marino 	      if (!vectype)
389e4b17023SJohn Marino 		{
390e4b17023SJohn Marino 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
391e4b17023SJohn Marino 		    {
392e4b17023SJohn Marino 		      fprintf (vect_dump,
393e4b17023SJohn Marino 			       "not vectorized: unsupported data-type ");
394e4b17023SJohn Marino 		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
395e4b17023SJohn Marino 		    }
396e4b17023SJohn Marino 		  return false;
397e4b17023SJohn Marino 		}
398e4b17023SJohn Marino 
399e4b17023SJohn Marino 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
400e4b17023SJohn Marino             }
401e4b17023SJohn Marino 
402e4b17023SJohn Marino 	  /* The vectorization factor is according to the smallest
403e4b17023SJohn Marino 	     scalar type (or the largest vector size, but we only
404e4b17023SJohn Marino 	     support one vector size per loop).  */
405e4b17023SJohn Marino 	  scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
406e4b17023SJohn Marino 						       &dummy);
407e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
408e4b17023SJohn Marino 	    {
409e4b17023SJohn Marino 	      fprintf (vect_dump, "get vectype for scalar type:  ");
410e4b17023SJohn Marino 	      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
411e4b17023SJohn Marino 	    }
412e4b17023SJohn Marino 	  vf_vectype = get_vectype_for_scalar_type (scalar_type);
413e4b17023SJohn Marino 	  if (!vf_vectype)
414e4b17023SJohn Marino 	    {
415e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
416e4b17023SJohn Marino 		{
417e4b17023SJohn Marino 		  fprintf (vect_dump,
418e4b17023SJohn Marino 			   "not vectorized: unsupported data-type ");
419e4b17023SJohn Marino 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
420e4b17023SJohn Marino 		}
421e4b17023SJohn Marino 	      return false;
422e4b17023SJohn Marino 	    }
423e4b17023SJohn Marino 
424e4b17023SJohn Marino 	  if ((GET_MODE_SIZE (TYPE_MODE (vectype))
425e4b17023SJohn Marino 	       != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
426e4b17023SJohn Marino 	    {
427e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
428e4b17023SJohn Marino 		{
429e4b17023SJohn Marino 		  fprintf (vect_dump,
430e4b17023SJohn Marino 			   "not vectorized: different sized vector "
431e4b17023SJohn Marino 			   "types in statement, ");
432e4b17023SJohn Marino 		  print_generic_expr (vect_dump, vectype, TDF_SLIM);
433e4b17023SJohn Marino 		  fprintf (vect_dump, " and ");
434e4b17023SJohn Marino 		  print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
435e4b17023SJohn Marino 		}
436e4b17023SJohn Marino 	      return false;
437e4b17023SJohn Marino 	    }
438e4b17023SJohn Marino 
439e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
440e4b17023SJohn Marino 	    {
441e4b17023SJohn Marino 	      fprintf (vect_dump, "vectype: ");
442e4b17023SJohn Marino 	      print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
443e4b17023SJohn Marino 	    }
444e4b17023SJohn Marino 
445e4b17023SJohn Marino 	  nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
446e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
447e4b17023SJohn Marino 	    fprintf (vect_dump, "nunits = %d", nunits);
448e4b17023SJohn Marino 
449e4b17023SJohn Marino 	  if (!vectorization_factor
450e4b17023SJohn Marino 	      || (nunits > vectorization_factor))
451e4b17023SJohn Marino 	    vectorization_factor = nunits;
452e4b17023SJohn Marino 
453e4b17023SJohn Marino 	  if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
454e4b17023SJohn Marino 	    {
455e4b17023SJohn Marino 	      pattern_def_seq = NULL;
456e4b17023SJohn Marino 	      gsi_next (&si);
457e4b17023SJohn Marino 	    }
458e4b17023SJohn Marino         }
459e4b17023SJohn Marino     }
460e4b17023SJohn Marino 
461e4b17023SJohn Marino   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
462e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
463e4b17023SJohn Marino     fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
464e4b17023SJohn Marino   if (vectorization_factor <= 1)
465e4b17023SJohn Marino     {
466e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
467e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: unsupported data-type");
468e4b17023SJohn Marino       return false;
469e4b17023SJohn Marino     }
470e4b17023SJohn Marino   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
471e4b17023SJohn Marino 
472e4b17023SJohn Marino   return true;
473e4b17023SJohn Marino }
474e4b17023SJohn Marino 
475e4b17023SJohn Marino 
476e4b17023SJohn Marino /* Function vect_is_simple_iv_evolution.
477e4b17023SJohn Marino 
478e4b17023SJohn Marino    FORNOW: A simple evolution of an induction variables in the loop is
479e4b17023SJohn Marino    considered a polynomial evolution with constant step.  */
480e4b17023SJohn Marino 
481e4b17023SJohn Marino static bool
vect_is_simple_iv_evolution(unsigned loop_nb,tree access_fn,tree * init,tree * step)482e4b17023SJohn Marino vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
483e4b17023SJohn Marino                              tree * step)
484e4b17023SJohn Marino {
485e4b17023SJohn Marino   tree init_expr;
486e4b17023SJohn Marino   tree step_expr;
487e4b17023SJohn Marino   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
488e4b17023SJohn Marino 
489e4b17023SJohn Marino   /* When there is no evolution in this loop, the evolution function
490e4b17023SJohn Marino      is not "simple".  */
491e4b17023SJohn Marino   if (evolution_part == NULL_TREE)
492e4b17023SJohn Marino     return false;
493e4b17023SJohn Marino 
494e4b17023SJohn Marino   /* When the evolution is a polynomial of degree >= 2
495e4b17023SJohn Marino      the evolution function is not "simple".  */
496e4b17023SJohn Marino   if (tree_is_chrec (evolution_part))
497e4b17023SJohn Marino     return false;
498e4b17023SJohn Marino 
499e4b17023SJohn Marino   step_expr = evolution_part;
500e4b17023SJohn Marino   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
501e4b17023SJohn Marino 
502e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
503e4b17023SJohn Marino     {
504e4b17023SJohn Marino       fprintf (vect_dump, "step: ");
505e4b17023SJohn Marino       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
506e4b17023SJohn Marino       fprintf (vect_dump, ",  init: ");
507e4b17023SJohn Marino       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
508e4b17023SJohn Marino     }
509e4b17023SJohn Marino 
510e4b17023SJohn Marino   *init = init_expr;
511e4b17023SJohn Marino   *step = step_expr;
512e4b17023SJohn Marino 
513e4b17023SJohn Marino   if (TREE_CODE (step_expr) != INTEGER_CST)
514e4b17023SJohn Marino     {
515e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
516e4b17023SJohn Marino         fprintf (vect_dump, "step unknown.");
517e4b17023SJohn Marino       return false;
518e4b17023SJohn Marino     }
519e4b17023SJohn Marino 
520e4b17023SJohn Marino   return true;
521e4b17023SJohn Marino }
522e4b17023SJohn Marino 
523e4b17023SJohn Marino /* Function vect_analyze_scalar_cycles_1.
524e4b17023SJohn Marino 
525e4b17023SJohn Marino    Examine the cross iteration def-use cycles of scalar variables
526e4b17023SJohn Marino    in LOOP.  LOOP_VINFO represents the loop that is now being
527e4b17023SJohn Marino    considered for vectorization (can be LOOP, or an outer-loop
528e4b17023SJohn Marino    enclosing LOOP).  */
529e4b17023SJohn Marino 
530e4b17023SJohn Marino static void
vect_analyze_scalar_cycles_1(loop_vec_info loop_vinfo,struct loop * loop)531e4b17023SJohn Marino vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
532e4b17023SJohn Marino {
533e4b17023SJohn Marino   basic_block bb = loop->header;
534e4b17023SJohn Marino   tree dumy;
535e4b17023SJohn Marino   VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
536e4b17023SJohn Marino   gimple_stmt_iterator gsi;
537e4b17023SJohn Marino   bool double_reduc;
538e4b17023SJohn Marino 
539e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
540e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
541e4b17023SJohn Marino 
542e4b17023SJohn Marino   /* First - identify all inductions.  Reduction detection assumes that all the
543e4b17023SJohn Marino      inductions have been identified, therefore, this order must not be
544e4b17023SJohn Marino      changed.  */
545e4b17023SJohn Marino   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
546e4b17023SJohn Marino     {
547e4b17023SJohn Marino       gimple phi = gsi_stmt (gsi);
548e4b17023SJohn Marino       tree access_fn = NULL;
549e4b17023SJohn Marino       tree def = PHI_RESULT (phi);
550e4b17023SJohn Marino       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
551e4b17023SJohn Marino 
552e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
553e4b17023SJohn Marino 	{
554e4b17023SJohn Marino 	  fprintf (vect_dump, "Analyze phi: ");
555e4b17023SJohn Marino 	  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
556e4b17023SJohn Marino 	}
557e4b17023SJohn Marino 
558e4b17023SJohn Marino       /* Skip virtual phi's.  The data dependences that are associated with
559e4b17023SJohn Marino          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
560e4b17023SJohn Marino       if (!is_gimple_reg (SSA_NAME_VAR (def)))
561e4b17023SJohn Marino 	continue;
562e4b17023SJohn Marino 
563e4b17023SJohn Marino       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
564e4b17023SJohn Marino 
565e4b17023SJohn Marino       /* Analyze the evolution function.  */
566e4b17023SJohn Marino       access_fn = analyze_scalar_evolution (loop, def);
567e4b17023SJohn Marino       if (access_fn)
568e4b17023SJohn Marino 	{
569e4b17023SJohn Marino 	  STRIP_NOPS (access_fn);
570e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
571e4b17023SJohn Marino 	    {
572e4b17023SJohn Marino 	      fprintf (vect_dump, "Access function of PHI: ");
573e4b17023SJohn Marino 	      print_generic_expr (vect_dump, access_fn, TDF_SLIM);
574e4b17023SJohn Marino 	    }
575e4b17023SJohn Marino 	  STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
576e4b17023SJohn Marino 	    = evolution_part_in_loop_num (access_fn, loop->num);
577e4b17023SJohn Marino 	}
578e4b17023SJohn Marino 
579e4b17023SJohn Marino       if (!access_fn
580e4b17023SJohn Marino 	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
581e4b17023SJohn Marino 	{
582e4b17023SJohn Marino 	  VEC_safe_push (gimple, heap, worklist, phi);
583e4b17023SJohn Marino 	  continue;
584e4b17023SJohn Marino 	}
585e4b17023SJohn Marino 
586e4b17023SJohn Marino       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
587e4b17023SJohn Marino 
588e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
589e4b17023SJohn Marino 	fprintf (vect_dump, "Detected induction.");
590e4b17023SJohn Marino       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
591e4b17023SJohn Marino     }
592e4b17023SJohn Marino 
593e4b17023SJohn Marino 
594e4b17023SJohn Marino   /* Second - identify all reductions and nested cycles.  */
595e4b17023SJohn Marino   while (VEC_length (gimple, worklist) > 0)
596e4b17023SJohn Marino     {
597e4b17023SJohn Marino       gimple phi = VEC_pop (gimple, worklist);
598e4b17023SJohn Marino       tree def = PHI_RESULT (phi);
599e4b17023SJohn Marino       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
600e4b17023SJohn Marino       gimple reduc_stmt;
601e4b17023SJohn Marino       bool nested_cycle;
602e4b17023SJohn Marino 
603e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
604e4b17023SJohn Marino         {
605e4b17023SJohn Marino           fprintf (vect_dump, "Analyze phi: ");
606e4b17023SJohn Marino           print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
607e4b17023SJohn Marino         }
608e4b17023SJohn Marino 
609e4b17023SJohn Marino       gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
610e4b17023SJohn Marino       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
611e4b17023SJohn Marino 
612e4b17023SJohn Marino       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
613e4b17023SJohn Marino       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
614e4b17023SJohn Marino 						&double_reduc);
615e4b17023SJohn Marino       if (reduc_stmt)
616e4b17023SJohn Marino         {
617e4b17023SJohn Marino           if (double_reduc)
618e4b17023SJohn Marino             {
619e4b17023SJohn Marino               if (vect_print_dump_info (REPORT_DETAILS))
620e4b17023SJohn Marino                 fprintf (vect_dump, "Detected double reduction.");
621e4b17023SJohn Marino 
622e4b17023SJohn Marino               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
623e4b17023SJohn Marino               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
624e4b17023SJohn Marino                                                     vect_double_reduction_def;
625e4b17023SJohn Marino             }
626e4b17023SJohn Marino           else
627e4b17023SJohn Marino             {
628e4b17023SJohn Marino               if (nested_cycle)
629e4b17023SJohn Marino                 {
630e4b17023SJohn Marino                   if (vect_print_dump_info (REPORT_DETAILS))
631e4b17023SJohn Marino                     fprintf (vect_dump, "Detected vectorizable nested cycle.");
632e4b17023SJohn Marino 
633e4b17023SJohn Marino                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
634e4b17023SJohn Marino                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
635e4b17023SJohn Marino                                                              vect_nested_cycle;
636e4b17023SJohn Marino                 }
637e4b17023SJohn Marino               else
638e4b17023SJohn Marino                 {
639e4b17023SJohn Marino                   if (vect_print_dump_info (REPORT_DETAILS))
640e4b17023SJohn Marino                     fprintf (vect_dump, "Detected reduction.");
641e4b17023SJohn Marino 
642e4b17023SJohn Marino                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
643e4b17023SJohn Marino                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
644e4b17023SJohn Marino                                                            vect_reduction_def;
645e4b17023SJohn Marino                   /* Store the reduction cycles for possible vectorization in
646e4b17023SJohn Marino                      loop-aware SLP.  */
647e4b17023SJohn Marino                   VEC_safe_push (gimple, heap,
648e4b17023SJohn Marino                                  LOOP_VINFO_REDUCTIONS (loop_vinfo),
649e4b17023SJohn Marino                                  reduc_stmt);
650e4b17023SJohn Marino                 }
651e4b17023SJohn Marino             }
652e4b17023SJohn Marino         }
653e4b17023SJohn Marino       else
654e4b17023SJohn Marino         if (vect_print_dump_info (REPORT_DETAILS))
655e4b17023SJohn Marino           fprintf (vect_dump, "Unknown def-use cycle pattern.");
656e4b17023SJohn Marino     }
657e4b17023SJohn Marino 
658e4b17023SJohn Marino   VEC_free (gimple, heap, worklist);
659e4b17023SJohn Marino }
660e4b17023SJohn Marino 
661e4b17023SJohn Marino 
662e4b17023SJohn Marino /* Function vect_analyze_scalar_cycles.
663e4b17023SJohn Marino 
664e4b17023SJohn Marino    Examine the cross iteration def-use cycles of scalar variables, by
665e4b17023SJohn Marino    analyzing the loop-header PHIs of scalar variables.  Classify each
666e4b17023SJohn Marino    cycle as one of the following: invariant, induction, reduction, unknown.
667e4b17023SJohn Marino    We do that for the loop represented by LOOP_VINFO, and also to its
668e4b17023SJohn Marino    inner-loop, if exists.
669e4b17023SJohn Marino    Examples for scalar cycles:
670e4b17023SJohn Marino 
671e4b17023SJohn Marino    Example1: reduction:
672e4b17023SJohn Marino 
673e4b17023SJohn Marino               loop1:
674e4b17023SJohn Marino               for (i=0; i<N; i++)
675e4b17023SJohn Marino                  sum += a[i];
676e4b17023SJohn Marino 
677e4b17023SJohn Marino    Example2: induction:
678e4b17023SJohn Marino 
679e4b17023SJohn Marino               loop2:
680e4b17023SJohn Marino               for (i=0; i<N; i++)
681e4b17023SJohn Marino                  a[i] = i;  */
682e4b17023SJohn Marino 
683e4b17023SJohn Marino static void
vect_analyze_scalar_cycles(loop_vec_info loop_vinfo)684e4b17023SJohn Marino vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
685e4b17023SJohn Marino {
686e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
687e4b17023SJohn Marino 
688e4b17023SJohn Marino   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
689e4b17023SJohn Marino 
690e4b17023SJohn Marino   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
691e4b17023SJohn Marino      Reductions in such inner-loop therefore have different properties than
692e4b17023SJohn Marino      the reductions in the nest that gets vectorized:
693e4b17023SJohn Marino      1. When vectorized, they are executed in the same order as in the original
694e4b17023SJohn Marino         scalar loop, so we can't change the order of computation when
695e4b17023SJohn Marino         vectorizing them.
696e4b17023SJohn Marino      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
697e4b17023SJohn Marino         current checks are too strict.  */
698e4b17023SJohn Marino 
699e4b17023SJohn Marino   if (loop->inner)
700e4b17023SJohn Marino     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
701e4b17023SJohn Marino }
702e4b17023SJohn Marino 
703e4b17023SJohn Marino /* Function vect_get_loop_niters.
704e4b17023SJohn Marino 
705e4b17023SJohn Marino    Determine how many iterations the loop is executed.
706e4b17023SJohn Marino    If an expression that represents the number of iterations
707e4b17023SJohn Marino    can be constructed, place it in NUMBER_OF_ITERATIONS.
708e4b17023SJohn Marino    Return the loop exit condition.  */
709e4b17023SJohn Marino 
710e4b17023SJohn Marino static gimple
vect_get_loop_niters(struct loop * loop,tree * number_of_iterations)711e4b17023SJohn Marino vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
712e4b17023SJohn Marino {
713e4b17023SJohn Marino   tree niters;
714e4b17023SJohn Marino 
715e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
716e4b17023SJohn Marino     fprintf (vect_dump, "=== get_loop_niters ===");
717e4b17023SJohn Marino 
718e4b17023SJohn Marino   niters = number_of_exit_cond_executions (loop);
719e4b17023SJohn Marino 
720e4b17023SJohn Marino   if (niters != NULL_TREE
721e4b17023SJohn Marino       && niters != chrec_dont_know)
722e4b17023SJohn Marino     {
723e4b17023SJohn Marino       *number_of_iterations = niters;
724e4b17023SJohn Marino 
725e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
726e4b17023SJohn Marino         {
727e4b17023SJohn Marino           fprintf (vect_dump, "==> get_loop_niters:" );
728e4b17023SJohn Marino           print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
729e4b17023SJohn Marino         }
730e4b17023SJohn Marino     }
731e4b17023SJohn Marino 
732e4b17023SJohn Marino   return get_loop_exit_condition (loop);
733e4b17023SJohn Marino }
734e4b17023SJohn Marino 
735e4b17023SJohn Marino 
736e4b17023SJohn Marino /* Function bb_in_loop_p
737e4b17023SJohn Marino 
738e4b17023SJohn Marino    Used as predicate for dfs order traversal of the loop bbs.  */
739e4b17023SJohn Marino 
740e4b17023SJohn Marino static bool
bb_in_loop_p(const_basic_block bb,const void * data)741e4b17023SJohn Marino bb_in_loop_p (const_basic_block bb, const void *data)
742e4b17023SJohn Marino {
743e4b17023SJohn Marino   const struct loop *const loop = (const struct loop *)data;
744e4b17023SJohn Marino   if (flow_bb_inside_loop_p (loop, bb))
745e4b17023SJohn Marino     return true;
746e4b17023SJohn Marino   return false;
747e4b17023SJohn Marino }
748e4b17023SJohn Marino 
749e4b17023SJohn Marino 
750e4b17023SJohn Marino /* Function new_loop_vec_info.
751e4b17023SJohn Marino 
752e4b17023SJohn Marino    Create and initialize a new loop_vec_info struct for LOOP, as well as
753e4b17023SJohn Marino    stmt_vec_info structs for all the stmts in LOOP.  */
754e4b17023SJohn Marino 
755e4b17023SJohn Marino static loop_vec_info
new_loop_vec_info(struct loop * loop)756e4b17023SJohn Marino new_loop_vec_info (struct loop *loop)
757e4b17023SJohn Marino {
758e4b17023SJohn Marino   loop_vec_info res;
759e4b17023SJohn Marino   basic_block *bbs;
760e4b17023SJohn Marino   gimple_stmt_iterator si;
761e4b17023SJohn Marino   unsigned int i, nbbs;
762e4b17023SJohn Marino 
763e4b17023SJohn Marino   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
764e4b17023SJohn Marino   LOOP_VINFO_LOOP (res) = loop;
765e4b17023SJohn Marino 
766e4b17023SJohn Marino   bbs = get_loop_body (loop);
767e4b17023SJohn Marino 
768e4b17023SJohn Marino   /* Create/Update stmt_info for all stmts in the loop.  */
769e4b17023SJohn Marino   for (i = 0; i < loop->num_nodes; i++)
770e4b17023SJohn Marino     {
771e4b17023SJohn Marino       basic_block bb = bbs[i];
772e4b17023SJohn Marino 
773e4b17023SJohn Marino       /* BBs in a nested inner-loop will have been already processed (because
774e4b17023SJohn Marino          we will have called vect_analyze_loop_form for any nested inner-loop).
775e4b17023SJohn Marino          Therefore, for stmts in an inner-loop we just want to update the
776e4b17023SJohn Marino          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
777e4b17023SJohn Marino          loop_info of the outer-loop we are currently considering to vectorize
778e4b17023SJohn Marino          (instead of the loop_info of the inner-loop).
779e4b17023SJohn Marino          For stmts in other BBs we need to create a stmt_info from scratch.  */
780e4b17023SJohn Marino       if (bb->loop_father != loop)
781e4b17023SJohn Marino         {
782e4b17023SJohn Marino           /* Inner-loop bb.  */
783e4b17023SJohn Marino           gcc_assert (loop->inner && bb->loop_father == loop->inner);
784e4b17023SJohn Marino           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
785e4b17023SJohn Marino             {
786e4b17023SJohn Marino               gimple phi = gsi_stmt (si);
787e4b17023SJohn Marino               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
788e4b17023SJohn Marino               loop_vec_info inner_loop_vinfo =
789e4b17023SJohn Marino                 STMT_VINFO_LOOP_VINFO (stmt_info);
790e4b17023SJohn Marino               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
791e4b17023SJohn Marino               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
792e4b17023SJohn Marino             }
793e4b17023SJohn Marino           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
794e4b17023SJohn Marino            {
795e4b17023SJohn Marino               gimple stmt = gsi_stmt (si);
796e4b17023SJohn Marino               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797e4b17023SJohn Marino               loop_vec_info inner_loop_vinfo =
798e4b17023SJohn Marino                  STMT_VINFO_LOOP_VINFO (stmt_info);
799e4b17023SJohn Marino               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
800e4b17023SJohn Marino               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
801e4b17023SJohn Marino            }
802e4b17023SJohn Marino         }
803e4b17023SJohn Marino       else
804e4b17023SJohn Marino         {
805e4b17023SJohn Marino           /* bb in current nest.  */
806e4b17023SJohn Marino           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
807e4b17023SJohn Marino             {
808e4b17023SJohn Marino               gimple phi = gsi_stmt (si);
809e4b17023SJohn Marino               gimple_set_uid (phi, 0);
810e4b17023SJohn Marino               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
811e4b17023SJohn Marino             }
812e4b17023SJohn Marino 
813e4b17023SJohn Marino           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
814e4b17023SJohn Marino             {
815e4b17023SJohn Marino               gimple stmt = gsi_stmt (si);
816e4b17023SJohn Marino               gimple_set_uid (stmt, 0);
817e4b17023SJohn Marino               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
818e4b17023SJohn Marino             }
819e4b17023SJohn Marino         }
820e4b17023SJohn Marino     }
821e4b17023SJohn Marino 
822e4b17023SJohn Marino   /* CHECKME: We want to visit all BBs before their successors (except for
823e4b17023SJohn Marino      latch blocks, for which this assertion wouldn't hold).  In the simple
824e4b17023SJohn Marino      case of the loop forms we allow, a dfs order of the BBs would the same
825e4b17023SJohn Marino      as reversed postorder traversal, so we are safe.  */
826e4b17023SJohn Marino 
827e4b17023SJohn Marino    free (bbs);
828e4b17023SJohn Marino    bbs = XCNEWVEC (basic_block, loop->num_nodes);
829e4b17023SJohn Marino    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
830e4b17023SJohn Marino                               bbs, loop->num_nodes, loop);
831e4b17023SJohn Marino    gcc_assert (nbbs == loop->num_nodes);
832e4b17023SJohn Marino 
833e4b17023SJohn Marino   LOOP_VINFO_BBS (res) = bbs;
834e4b17023SJohn Marino   LOOP_VINFO_NITERS (res) = NULL;
835e4b17023SJohn Marino   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
836e4b17023SJohn Marino   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
837e4b17023SJohn Marino   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
838e4b17023SJohn Marino   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
839e4b17023SJohn Marino   LOOP_VINFO_VECT_FACTOR (res) = 0;
840e4b17023SJohn Marino   LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
841e4b17023SJohn Marino   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
842e4b17023SJohn Marino   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
843e4b17023SJohn Marino   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
844e4b17023SJohn Marino   LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
845e4b17023SJohn Marino     VEC_alloc (gimple, heap,
846e4b17023SJohn Marino                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
847e4b17023SJohn Marino   LOOP_VINFO_MAY_ALIAS_DDRS (res) =
848e4b17023SJohn Marino     VEC_alloc (ddr_p, heap,
849e4b17023SJohn Marino                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
850e4b17023SJohn Marino   LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
851e4b17023SJohn Marino   LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
852e4b17023SJohn Marino   LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
853e4b17023SJohn Marino   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
854e4b17023SJohn Marino   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
855e4b17023SJohn Marino   LOOP_VINFO_PEELING_HTAB (res) = NULL;
856e4b17023SJohn Marino   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
857e4b17023SJohn Marino 
858e4b17023SJohn Marino   return res;
859e4b17023SJohn Marino }
860e4b17023SJohn Marino 
861e4b17023SJohn Marino 
862e4b17023SJohn Marino /* Function destroy_loop_vec_info.
863e4b17023SJohn Marino 
864e4b17023SJohn Marino    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
865e4b17023SJohn Marino    stmts in the loop.  */
866e4b17023SJohn Marino 
867e4b17023SJohn Marino void
destroy_loop_vec_info(loop_vec_info loop_vinfo,bool clean_stmts)868e4b17023SJohn Marino destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
869e4b17023SJohn Marino {
870e4b17023SJohn Marino   struct loop *loop;
871e4b17023SJohn Marino   basic_block *bbs;
872e4b17023SJohn Marino   int nbbs;
873e4b17023SJohn Marino   gimple_stmt_iterator si;
874e4b17023SJohn Marino   int j;
875e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances;
876e4b17023SJohn Marino   slp_instance instance;
877e4b17023SJohn Marino 
878e4b17023SJohn Marino   if (!loop_vinfo)
879e4b17023SJohn Marino     return;
880e4b17023SJohn Marino 
881e4b17023SJohn Marino   loop = LOOP_VINFO_LOOP (loop_vinfo);
882e4b17023SJohn Marino 
883e4b17023SJohn Marino   bbs = LOOP_VINFO_BBS (loop_vinfo);
884e4b17023SJohn Marino   nbbs = loop->num_nodes;
885e4b17023SJohn Marino 
886e4b17023SJohn Marino   if (!clean_stmts)
887e4b17023SJohn Marino     {
888e4b17023SJohn Marino       free (LOOP_VINFO_BBS (loop_vinfo));
889e4b17023SJohn Marino       free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
890e4b17023SJohn Marino       free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
891e4b17023SJohn Marino       VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
892e4b17023SJohn Marino       VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
893e4b17023SJohn Marino       VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
894e4b17023SJohn Marino 
895e4b17023SJohn Marino       free (loop_vinfo);
896e4b17023SJohn Marino       loop->aux = NULL;
897e4b17023SJohn Marino       return;
898e4b17023SJohn Marino     }
899e4b17023SJohn Marino 
900e4b17023SJohn Marino   for (j = 0; j < nbbs; j++)
901e4b17023SJohn Marino     {
902e4b17023SJohn Marino       basic_block bb = bbs[j];
903e4b17023SJohn Marino       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
904e4b17023SJohn Marino         free_stmt_vec_info (gsi_stmt (si));
905e4b17023SJohn Marino 
906e4b17023SJohn Marino       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
907e4b17023SJohn Marino         {
908e4b17023SJohn Marino           gimple stmt = gsi_stmt (si);
909e4b17023SJohn Marino 	  /* Free stmt_vec_info.  */
910e4b17023SJohn Marino 	  free_stmt_vec_info (stmt);
911e4b17023SJohn Marino           gsi_next (&si);
912e4b17023SJohn Marino         }
913e4b17023SJohn Marino     }
914e4b17023SJohn Marino 
915e4b17023SJohn Marino   free (LOOP_VINFO_BBS (loop_vinfo));
916e4b17023SJohn Marino   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
917e4b17023SJohn Marino   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
918e4b17023SJohn Marino   VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
919e4b17023SJohn Marino   VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
920e4b17023SJohn Marino   VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
921e4b17023SJohn Marino   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
922e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, j, instance)
923e4b17023SJohn Marino     vect_free_slp_instance (instance);
924e4b17023SJohn Marino 
925e4b17023SJohn Marino   VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
926e4b17023SJohn Marino   VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
927e4b17023SJohn Marino   VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
928e4b17023SJohn Marino   VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
929e4b17023SJohn Marino 
930e4b17023SJohn Marino   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
931e4b17023SJohn Marino     htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
932e4b17023SJohn Marino 
933e4b17023SJohn Marino   free (loop_vinfo);
934e4b17023SJohn Marino   loop->aux = NULL;
935e4b17023SJohn Marino }
936e4b17023SJohn Marino 
937e4b17023SJohn Marino 
938e4b17023SJohn Marino /* Function vect_analyze_loop_1.
939e4b17023SJohn Marino 
940e4b17023SJohn Marino    Apply a set of analyses on LOOP, and create a loop_vec_info struct
941e4b17023SJohn Marino    for it. The different analyses will record information in the
942e4b17023SJohn Marino    loop_vec_info struct.  This is a subset of the analyses applied in
943e4b17023SJohn Marino    vect_analyze_loop, to be applied on an inner-loop nested in the loop
944e4b17023SJohn Marino    that is now considered for (outer-loop) vectorization.  */
945e4b17023SJohn Marino 
946e4b17023SJohn Marino static loop_vec_info
vect_analyze_loop_1(struct loop * loop)947e4b17023SJohn Marino vect_analyze_loop_1 (struct loop *loop)
948e4b17023SJohn Marino {
949e4b17023SJohn Marino   loop_vec_info loop_vinfo;
950e4b17023SJohn Marino 
951e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
952e4b17023SJohn Marino     fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
953e4b17023SJohn Marino 
954e4b17023SJohn Marino   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
955e4b17023SJohn Marino 
956e4b17023SJohn Marino   loop_vinfo = vect_analyze_loop_form (loop);
957e4b17023SJohn Marino   if (!loop_vinfo)
958e4b17023SJohn Marino     {
959e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
960e4b17023SJohn Marino         fprintf (vect_dump, "bad inner-loop form.");
961e4b17023SJohn Marino       return NULL;
962e4b17023SJohn Marino     }
963e4b17023SJohn Marino 
964e4b17023SJohn Marino   return loop_vinfo;
965e4b17023SJohn Marino }
966e4b17023SJohn Marino 
967e4b17023SJohn Marino 
968e4b17023SJohn Marino /* Function vect_analyze_loop_form.
969e4b17023SJohn Marino 
970e4b17023SJohn Marino    Verify that certain CFG restrictions hold, including:
971e4b17023SJohn Marino    - the loop has a pre-header
972e4b17023SJohn Marino    - the loop has a single entry and exit
973e4b17023SJohn Marino    - the loop exit condition is simple enough, and the number of iterations
974e4b17023SJohn Marino      can be analyzed (a countable loop).  */
975e4b17023SJohn Marino 
976e4b17023SJohn Marino loop_vec_info
vect_analyze_loop_form(struct loop * loop)977e4b17023SJohn Marino vect_analyze_loop_form (struct loop *loop)
978e4b17023SJohn Marino {
979e4b17023SJohn Marino   loop_vec_info loop_vinfo;
980e4b17023SJohn Marino   gimple loop_cond;
981e4b17023SJohn Marino   tree number_of_iterations = NULL;
982e4b17023SJohn Marino   loop_vec_info inner_loop_vinfo = NULL;
983e4b17023SJohn Marino 
984e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
985e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_analyze_loop_form ===");
986e4b17023SJohn Marino 
987e4b17023SJohn Marino   /* Different restrictions apply when we are considering an inner-most loop,
988e4b17023SJohn Marino      vs. an outer (nested) loop.
989e4b17023SJohn Marino      (FORNOW. May want to relax some of these restrictions in the future).  */
990e4b17023SJohn Marino 
991e4b17023SJohn Marino   if (!loop->inner)
992e4b17023SJohn Marino     {
993e4b17023SJohn Marino       /* Inner-most loop.  We currently require that the number of BBs is
994e4b17023SJohn Marino 	 exactly 2 (the header and latch).  Vectorizable inner-most loops
995e4b17023SJohn Marino 	 look like this:
996e4b17023SJohn Marino 
997e4b17023SJohn Marino                         (pre-header)
998e4b17023SJohn Marino                            |
999e4b17023SJohn Marino                           header <--------+
1000e4b17023SJohn Marino                            | |            |
1001e4b17023SJohn Marino                            | +--> latch --+
1002e4b17023SJohn Marino                            |
1003e4b17023SJohn Marino                         (exit-bb)  */
1004e4b17023SJohn Marino 
1005e4b17023SJohn Marino       if (loop->num_nodes != 2)
1006e4b17023SJohn Marino         {
1007e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1008e4b17023SJohn Marino             fprintf (vect_dump, "not vectorized: control flow in loop.");
1009e4b17023SJohn Marino           return NULL;
1010e4b17023SJohn Marino         }
1011e4b17023SJohn Marino 
1012e4b17023SJohn Marino       if (empty_block_p (loop->header))
1013e4b17023SJohn Marino     {
1014e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1015e4b17023SJohn Marino             fprintf (vect_dump, "not vectorized: empty loop.");
1016e4b17023SJohn Marino       return NULL;
1017e4b17023SJohn Marino     }
1018e4b17023SJohn Marino     }
1019e4b17023SJohn Marino   else
1020e4b17023SJohn Marino     {
1021e4b17023SJohn Marino       struct loop *innerloop = loop->inner;
1022e4b17023SJohn Marino       edge entryedge;
1023e4b17023SJohn Marino 
1024e4b17023SJohn Marino       /* Nested loop. We currently require that the loop is doubly-nested,
1025e4b17023SJohn Marino 	 contains a single inner loop, and the number of BBs is exactly 5.
1026e4b17023SJohn Marino 	 Vectorizable outer-loops look like this:
1027e4b17023SJohn Marino 
1028e4b17023SJohn Marino 			(pre-header)
1029e4b17023SJohn Marino 			   |
1030e4b17023SJohn Marino 			  header <---+
1031e4b17023SJohn Marino 			   |         |
1032e4b17023SJohn Marino 		          inner-loop |
1033e4b17023SJohn Marino 			   |         |
1034e4b17023SJohn Marino 			  tail ------+
1035e4b17023SJohn Marino 			   |
1036e4b17023SJohn Marino 		        (exit-bb)
1037e4b17023SJohn Marino 
1038e4b17023SJohn Marino 	 The inner-loop has the properties expected of inner-most loops
1039e4b17023SJohn Marino 	 as described above.  */
1040e4b17023SJohn Marino 
1041e4b17023SJohn Marino       if ((loop->inner)->inner || (loop->inner)->next)
1042e4b17023SJohn Marino 	{
1043e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1044e4b17023SJohn Marino 	    fprintf (vect_dump, "not vectorized: multiple nested loops.");
1045e4b17023SJohn Marino 	  return NULL;
1046e4b17023SJohn Marino 	}
1047e4b17023SJohn Marino 
1048e4b17023SJohn Marino       /* Analyze the inner-loop.  */
1049e4b17023SJohn Marino       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1050e4b17023SJohn Marino       if (!inner_loop_vinfo)
1051e4b17023SJohn Marino 	{
1052e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1053e4b17023SJohn Marino             fprintf (vect_dump, "not vectorized: Bad inner loop.");
1054e4b17023SJohn Marino 	  return NULL;
1055e4b17023SJohn Marino 	}
1056e4b17023SJohn Marino 
1057e4b17023SJohn Marino       if (!expr_invariant_in_loop_p (loop,
1058e4b17023SJohn Marino 					LOOP_VINFO_NITERS (inner_loop_vinfo)))
1059e4b17023SJohn Marino 	{
1060e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1061e4b17023SJohn Marino 	    fprintf (vect_dump,
1062e4b17023SJohn Marino 		     "not vectorized: inner-loop count not invariant.");
1063e4b17023SJohn Marino 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1064e4b17023SJohn Marino 	  return NULL;
1065e4b17023SJohn Marino 	}
1066e4b17023SJohn Marino 
1067e4b17023SJohn Marino       if (loop->num_nodes != 5)
1068e4b17023SJohn Marino         {
1069e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1070e4b17023SJohn Marino 	    fprintf (vect_dump, "not vectorized: control flow in loop.");
1071e4b17023SJohn Marino 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1072e4b17023SJohn Marino 	  return NULL;
1073e4b17023SJohn Marino         }
1074e4b17023SJohn Marino 
1075e4b17023SJohn Marino       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1076e4b17023SJohn Marino       entryedge = EDGE_PRED (innerloop->header, 0);
1077e4b17023SJohn Marino       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1078e4b17023SJohn Marino 	entryedge = EDGE_PRED (innerloop->header, 1);
1079e4b17023SJohn Marino 
1080e4b17023SJohn Marino       if (entryedge->src != loop->header
1081e4b17023SJohn Marino 	  || !single_exit (innerloop)
1082e4b17023SJohn Marino 	  || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1083e4b17023SJohn Marino 	{
1084e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1085e4b17023SJohn Marino 	    fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
1086e4b17023SJohn Marino 	  destroy_loop_vec_info (inner_loop_vinfo, true);
1087e4b17023SJohn Marino 	  return NULL;
1088e4b17023SJohn Marino 	}
1089e4b17023SJohn Marino 
1090e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1091e4b17023SJohn Marino         fprintf (vect_dump, "Considering outer-loop vectorization.");
1092e4b17023SJohn Marino     }
1093e4b17023SJohn Marino 
1094e4b17023SJohn Marino   if (!single_exit (loop)
1095e4b17023SJohn Marino       || EDGE_COUNT (loop->header->preds) != 2)
1096e4b17023SJohn Marino     {
1097e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1098e4b17023SJohn Marino         {
1099e4b17023SJohn Marino           if (!single_exit (loop))
1100e4b17023SJohn Marino             fprintf (vect_dump, "not vectorized: multiple exits.");
1101e4b17023SJohn Marino           else if (EDGE_COUNT (loop->header->preds) != 2)
1102e4b17023SJohn Marino             fprintf (vect_dump, "not vectorized: too many incoming edges.");
1103e4b17023SJohn Marino         }
1104e4b17023SJohn Marino       if (inner_loop_vinfo)
1105e4b17023SJohn Marino 	destroy_loop_vec_info (inner_loop_vinfo, true);
1106e4b17023SJohn Marino       return NULL;
1107e4b17023SJohn Marino     }
1108e4b17023SJohn Marino 
1109e4b17023SJohn Marino   /* We assume that the loop exit condition is at the end of the loop. i.e,
1110e4b17023SJohn Marino      that the loop is represented as a do-while (with a proper if-guard
1111e4b17023SJohn Marino      before the loop if needed), where the loop header contains all the
1112e4b17023SJohn Marino      executable statements, and the latch is empty.  */
1113e4b17023SJohn Marino   if (!empty_block_p (loop->latch)
1114e4b17023SJohn Marino         || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1115e4b17023SJohn Marino     {
1116e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1117e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: unexpected loop form.");
1118e4b17023SJohn Marino       if (inner_loop_vinfo)
1119e4b17023SJohn Marino 	destroy_loop_vec_info (inner_loop_vinfo, true);
1120e4b17023SJohn Marino       return NULL;
1121e4b17023SJohn Marino     }
1122e4b17023SJohn Marino 
1123e4b17023SJohn Marino   /* Make sure there exists a single-predecessor exit bb:  */
1124e4b17023SJohn Marino   if (!single_pred_p (single_exit (loop)->dest))
1125e4b17023SJohn Marino     {
1126e4b17023SJohn Marino       edge e = single_exit (loop);
1127e4b17023SJohn Marino       if (!(e->flags & EDGE_ABNORMAL))
1128e4b17023SJohn Marino 	{
1129e4b17023SJohn Marino 	  split_loop_exit_edge (e);
1130e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
1131e4b17023SJohn Marino 	    fprintf (vect_dump, "split exit edge.");
1132e4b17023SJohn Marino 	}
1133e4b17023SJohn Marino       else
1134e4b17023SJohn Marino 	{
1135e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1136e4b17023SJohn Marino 	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1137e4b17023SJohn Marino 	  if (inner_loop_vinfo)
1138e4b17023SJohn Marino 	    destroy_loop_vec_info (inner_loop_vinfo, true);
1139e4b17023SJohn Marino 	  return NULL;
1140e4b17023SJohn Marino 	}
1141e4b17023SJohn Marino     }
1142e4b17023SJohn Marino 
1143e4b17023SJohn Marino   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1144e4b17023SJohn Marino   if (!loop_cond)
1145e4b17023SJohn Marino     {
1146e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1147e4b17023SJohn Marino 	fprintf (vect_dump, "not vectorized: complicated exit condition.");
1148e4b17023SJohn Marino       if (inner_loop_vinfo)
1149e4b17023SJohn Marino 	destroy_loop_vec_info (inner_loop_vinfo, true);
1150e4b17023SJohn Marino       return NULL;
1151e4b17023SJohn Marino     }
1152e4b17023SJohn Marino 
1153e4b17023SJohn Marino   if (!number_of_iterations)
1154e4b17023SJohn Marino     {
1155e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1156e4b17023SJohn Marino 	fprintf (vect_dump,
1157e4b17023SJohn Marino 		 "not vectorized: number of iterations cannot be computed.");
1158e4b17023SJohn Marino       if (inner_loop_vinfo)
1159e4b17023SJohn Marino 	destroy_loop_vec_info (inner_loop_vinfo, true);
1160e4b17023SJohn Marino       return NULL;
1161e4b17023SJohn Marino     }
1162e4b17023SJohn Marino 
1163e4b17023SJohn Marino   if (chrec_contains_undetermined (number_of_iterations))
1164e4b17023SJohn Marino     {
1165e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1166e4b17023SJohn Marino         fprintf (vect_dump, "Infinite number of iterations.");
1167e4b17023SJohn Marino       if (inner_loop_vinfo)
1168e4b17023SJohn Marino 	destroy_loop_vec_info (inner_loop_vinfo, true);
1169e4b17023SJohn Marino       return NULL;
1170e4b17023SJohn Marino     }
1171e4b17023SJohn Marino 
1172e4b17023SJohn Marino   if (!NITERS_KNOWN_P (number_of_iterations))
1173e4b17023SJohn Marino     {
1174e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1175e4b17023SJohn Marino         {
1176e4b17023SJohn Marino           fprintf (vect_dump, "Symbolic number of iterations is ");
1177e4b17023SJohn Marino           print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1178e4b17023SJohn Marino         }
1179e4b17023SJohn Marino     }
1180e4b17023SJohn Marino   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1181e4b17023SJohn Marino     {
1182e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1183e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1184e4b17023SJohn Marino       if (inner_loop_vinfo)
1185e4b17023SJohn Marino         destroy_loop_vec_info (inner_loop_vinfo, false);
1186e4b17023SJohn Marino       return NULL;
1187e4b17023SJohn Marino     }
1188e4b17023SJohn Marino 
1189e4b17023SJohn Marino   loop_vinfo = new_loop_vec_info (loop);
1190e4b17023SJohn Marino   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1191e4b17023SJohn Marino   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1192e4b17023SJohn Marino 
1193e4b17023SJohn Marino   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1194e4b17023SJohn Marino 
1195e4b17023SJohn Marino   /* CHECKME: May want to keep it around it in the future.  */
1196e4b17023SJohn Marino   if (inner_loop_vinfo)
1197e4b17023SJohn Marino     destroy_loop_vec_info (inner_loop_vinfo, false);
1198e4b17023SJohn Marino 
1199e4b17023SJohn Marino   gcc_assert (!loop->aux);
1200e4b17023SJohn Marino   loop->aux = loop_vinfo;
1201e4b17023SJohn Marino   return loop_vinfo;
1202e4b17023SJohn Marino }
1203e4b17023SJohn Marino 
1204e4b17023SJohn Marino 
1205e4b17023SJohn Marino /* Get cost by calling cost target builtin.  */
1206e4b17023SJohn Marino 
1207e4b17023SJohn Marino static inline int
vect_get_cost(enum vect_cost_for_stmt type_of_cost)1208e4b17023SJohn Marino vect_get_cost (enum vect_cost_for_stmt type_of_cost)
1209e4b17023SJohn Marino {
1210e4b17023SJohn Marino   tree dummy_type = NULL;
1211e4b17023SJohn Marino   int dummy = 0;
1212e4b17023SJohn Marino 
1213e4b17023SJohn Marino   return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
1214e4b17023SJohn Marino                                                        dummy_type, dummy);
1215e4b17023SJohn Marino }
1216e4b17023SJohn Marino 
1217e4b17023SJohn Marino 
1218e4b17023SJohn Marino /* Function vect_analyze_loop_operations.
1219e4b17023SJohn Marino 
1220e4b17023SJohn Marino    Scan the loop stmts and make sure they are all vectorizable.  */
1221e4b17023SJohn Marino 
1222e4b17023SJohn Marino static bool
vect_analyze_loop_operations(loop_vec_info loop_vinfo,bool slp)1223e4b17023SJohn Marino vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1224e4b17023SJohn Marino {
1225e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1226e4b17023SJohn Marino   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1227e4b17023SJohn Marino   int nbbs = loop->num_nodes;
1228e4b17023SJohn Marino   gimple_stmt_iterator si;
1229e4b17023SJohn Marino   unsigned int vectorization_factor = 0;
1230e4b17023SJohn Marino   int i;
1231e4b17023SJohn Marino   gimple phi;
1232e4b17023SJohn Marino   stmt_vec_info stmt_info;
1233e4b17023SJohn Marino   bool need_to_vectorize = false;
1234e4b17023SJohn Marino   int min_profitable_iters;
1235e4b17023SJohn Marino   int min_scalar_loop_bound;
1236e4b17023SJohn Marino   unsigned int th;
1237e4b17023SJohn Marino   bool only_slp_in_loop = true, ok;
1238e4b17023SJohn Marino 
1239e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1240e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
1241e4b17023SJohn Marino 
1242e4b17023SJohn Marino   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1243e4b17023SJohn Marino   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1244e4b17023SJohn Marino   if (slp)
1245e4b17023SJohn Marino     {
1246e4b17023SJohn Marino       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1247e4b17023SJohn Marino 	 vectorization factor of the loop is the unrolling factor required by
1248e4b17023SJohn Marino 	 the SLP instances.  If that unrolling factor is 1, we say, that we
1249e4b17023SJohn Marino 	 perform pure SLP on loop - cross iteration parallelism is not
1250e4b17023SJohn Marino 	 exploited.  */
1251e4b17023SJohn Marino       for (i = 0; i < nbbs; i++)
1252e4b17023SJohn Marino 	{
1253e4b17023SJohn Marino 	  basic_block bb = bbs[i];
1254e4b17023SJohn Marino 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1255e4b17023SJohn Marino 	    {
1256e4b17023SJohn Marino 	      gimple stmt = gsi_stmt (si);
1257e4b17023SJohn Marino 	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1258e4b17023SJohn Marino 	      gcc_assert (stmt_info);
1259e4b17023SJohn Marino 	      if ((STMT_VINFO_RELEVANT_P (stmt_info)
1260e4b17023SJohn Marino 		   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1261e4b17023SJohn Marino 		  && !PURE_SLP_STMT (stmt_info))
1262e4b17023SJohn Marino 		/* STMT needs both SLP and loop-based vectorization.  */
1263e4b17023SJohn Marino 		only_slp_in_loop = false;
1264e4b17023SJohn Marino 	    }
1265e4b17023SJohn Marino 	}
1266e4b17023SJohn Marino 
1267e4b17023SJohn Marino       if (only_slp_in_loop)
1268e4b17023SJohn Marino 	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1269e4b17023SJohn Marino       else
1270e4b17023SJohn Marino 	vectorization_factor = least_common_multiple (vectorization_factor,
1271e4b17023SJohn Marino 				LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1272e4b17023SJohn Marino 
1273e4b17023SJohn Marino       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1274e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1275e4b17023SJohn Marino 	fprintf (vect_dump, "Updating vectorization factor to %d ",
1276e4b17023SJohn Marino 	 		    vectorization_factor);
1277e4b17023SJohn Marino     }
1278e4b17023SJohn Marino 
1279e4b17023SJohn Marino   for (i = 0; i < nbbs; i++)
1280e4b17023SJohn Marino     {
1281e4b17023SJohn Marino       basic_block bb = bbs[i];
1282e4b17023SJohn Marino 
1283e4b17023SJohn Marino       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1284e4b17023SJohn Marino         {
1285e4b17023SJohn Marino           phi = gsi_stmt (si);
1286e4b17023SJohn Marino           ok = true;
1287e4b17023SJohn Marino 
1288e4b17023SJohn Marino           stmt_info = vinfo_for_stmt (phi);
1289e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
1290e4b17023SJohn Marino             {
1291e4b17023SJohn Marino               fprintf (vect_dump, "examining phi: ");
1292e4b17023SJohn Marino               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1293e4b17023SJohn Marino             }
1294e4b17023SJohn Marino 
1295e4b17023SJohn Marino           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1296e4b17023SJohn Marino              (i.e., a phi in the tail of the outer-loop).  */
1297e4b17023SJohn Marino           if (! is_loop_header_bb_p (bb))
1298e4b17023SJohn Marino             {
1299e4b17023SJohn Marino               /* FORNOW: we currently don't support the case that these phis
1300e4b17023SJohn Marino                  are not used in the outerloop (unless it is double reduction,
1301e4b17023SJohn Marino                  i.e., this phi is vect_reduction_def), cause this case
1302e4b17023SJohn Marino                  requires to actually do something here.  */
1303e4b17023SJohn Marino               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1304e4b17023SJohn Marino                    || STMT_VINFO_LIVE_P (stmt_info))
1305e4b17023SJohn Marino                   && STMT_VINFO_DEF_TYPE (stmt_info)
1306e4b17023SJohn Marino                      != vect_double_reduction_def)
1307e4b17023SJohn Marino                 {
1308e4b17023SJohn Marino                   if (vect_print_dump_info (REPORT_DETAILS))
1309e4b17023SJohn Marino                     fprintf (vect_dump,
1310e4b17023SJohn Marino                              "Unsupported loop-closed phi in outer-loop.");
1311e4b17023SJohn Marino                   return false;
1312e4b17023SJohn Marino                 }
1313e4b17023SJohn Marino 
1314e4b17023SJohn Marino               /* If PHI is used in the outer loop, we check that its operand
1315e4b17023SJohn Marino                  is defined in the inner loop.  */
1316e4b17023SJohn Marino               if (STMT_VINFO_RELEVANT_P (stmt_info))
1317e4b17023SJohn Marino                 {
1318e4b17023SJohn Marino                   tree phi_op;
1319e4b17023SJohn Marino                   gimple op_def_stmt;
1320e4b17023SJohn Marino 
1321e4b17023SJohn Marino                   if (gimple_phi_num_args (phi) != 1)
1322e4b17023SJohn Marino                     return false;
1323e4b17023SJohn Marino 
1324e4b17023SJohn Marino                   phi_op = PHI_ARG_DEF (phi, 0);
1325e4b17023SJohn Marino                   if (TREE_CODE (phi_op) != SSA_NAME)
1326e4b17023SJohn Marino                     return false;
1327e4b17023SJohn Marino 
1328e4b17023SJohn Marino                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1329e4b17023SJohn Marino 		  if (!op_def_stmt
1330e4b17023SJohn Marino 		      || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1331e4b17023SJohn Marino 		      || !vinfo_for_stmt (op_def_stmt))
1332e4b17023SJohn Marino                     return false;
1333e4b17023SJohn Marino 
1334e4b17023SJohn Marino                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1335e4b17023SJohn Marino                         != vect_used_in_outer
1336e4b17023SJohn Marino                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1337e4b17023SJohn Marino                            != vect_used_in_outer_by_reduction)
1338e4b17023SJohn Marino                     return false;
1339e4b17023SJohn Marino                 }
1340e4b17023SJohn Marino 
1341e4b17023SJohn Marino               continue;
1342e4b17023SJohn Marino             }
1343e4b17023SJohn Marino 
1344e4b17023SJohn Marino           gcc_assert (stmt_info);
1345e4b17023SJohn Marino 
1346e4b17023SJohn Marino           if (STMT_VINFO_LIVE_P (stmt_info))
1347e4b17023SJohn Marino             {
1348e4b17023SJohn Marino               /* FORNOW: not yet supported.  */
1349e4b17023SJohn Marino               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1350e4b17023SJohn Marino                 fprintf (vect_dump, "not vectorized: value used after loop.");
1351e4b17023SJohn Marino               return false;
1352e4b17023SJohn Marino             }
1353e4b17023SJohn Marino 
1354e4b17023SJohn Marino           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1355e4b17023SJohn Marino               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1356e4b17023SJohn Marino             {
1357e4b17023SJohn Marino               /* A scalar-dependence cycle that we don't support.  */
1358e4b17023SJohn Marino               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1359e4b17023SJohn Marino                 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
1360e4b17023SJohn Marino               return false;
1361e4b17023SJohn Marino             }
1362e4b17023SJohn Marino 
1363e4b17023SJohn Marino           if (STMT_VINFO_RELEVANT_P (stmt_info))
1364e4b17023SJohn Marino             {
1365e4b17023SJohn Marino               need_to_vectorize = true;
1366e4b17023SJohn Marino               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1367e4b17023SJohn Marino                 ok = vectorizable_induction (phi, NULL, NULL);
1368e4b17023SJohn Marino             }
1369e4b17023SJohn Marino 
1370e4b17023SJohn Marino           if (!ok)
1371e4b17023SJohn Marino             {
1372e4b17023SJohn Marino               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1373e4b17023SJohn Marino                 {
1374e4b17023SJohn Marino                   fprintf (vect_dump,
1375e4b17023SJohn Marino                            "not vectorized: relevant phi not supported: ");
1376e4b17023SJohn Marino                   print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1377e4b17023SJohn Marino                 }
1378e4b17023SJohn Marino               return false;
1379e4b17023SJohn Marino             }
1380e4b17023SJohn Marino         }
1381e4b17023SJohn Marino 
1382e4b17023SJohn Marino       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1383e4b17023SJohn Marino         {
1384e4b17023SJohn Marino           gimple stmt = gsi_stmt (si);
1385e4b17023SJohn Marino 	  if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1386e4b17023SJohn Marino 	    return false;
1387e4b17023SJohn Marino         }
1388e4b17023SJohn Marino     } /* bbs */
1389e4b17023SJohn Marino 
1390e4b17023SJohn Marino   /* All operations in the loop are either irrelevant (deal with loop
1391e4b17023SJohn Marino      control, or dead), or only used outside the loop and can be moved
1392e4b17023SJohn Marino      out of the loop (e.g. invariants, inductions).  The loop can be
1393e4b17023SJohn Marino      optimized away by scalar optimizations.  We're better off not
1394e4b17023SJohn Marino      touching this loop.  */
1395e4b17023SJohn Marino   if (!need_to_vectorize)
1396e4b17023SJohn Marino     {
1397e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1398e4b17023SJohn Marino         fprintf (vect_dump,
1399e4b17023SJohn Marino                  "All the computation can be taken out of the loop.");
1400e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1401e4b17023SJohn Marino         fprintf (vect_dump,
1402e4b17023SJohn Marino                  "not vectorized: redundant loop. no profit to vectorize.");
1403e4b17023SJohn Marino       return false;
1404e4b17023SJohn Marino     }
1405e4b17023SJohn Marino 
1406e4b17023SJohn Marino   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1407e4b17023SJohn Marino       && vect_print_dump_info (REPORT_DETAILS))
1408e4b17023SJohn Marino     fprintf (vect_dump,
1409e4b17023SJohn Marino         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1410e4b17023SJohn Marino         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
1411e4b17023SJohn Marino 
1412e4b17023SJohn Marino   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1413e4b17023SJohn Marino       && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1414e4b17023SJohn Marino     {
1415e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1416e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: iteration count too small.");
1417e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1418e4b17023SJohn Marino         fprintf (vect_dump,"not vectorized: iteration count smaller than "
1419e4b17023SJohn Marino                  "vectorization factor.");
1420e4b17023SJohn Marino       return false;
1421e4b17023SJohn Marino     }
1422e4b17023SJohn Marino 
1423e4b17023SJohn Marino   /* Analyze cost.  Decide if worth while to vectorize.  */
1424e4b17023SJohn Marino 
1425e4b17023SJohn Marino   /* Once VF is set, SLP costs should be updated since the number of created
1426e4b17023SJohn Marino      vector stmts depends on VF.  */
1427e4b17023SJohn Marino   vect_update_slp_costs_according_to_vf (loop_vinfo);
1428e4b17023SJohn Marino 
1429e4b17023SJohn Marino   min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
1430e4b17023SJohn Marino   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1431e4b17023SJohn Marino 
1432e4b17023SJohn Marino   if (min_profitable_iters < 0)
1433e4b17023SJohn Marino     {
1434e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1435e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: vectorization not profitable.");
1436e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1437e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: vector version will never be "
1438e4b17023SJohn Marino                  "profitable.");
1439e4b17023SJohn Marino       return false;
1440e4b17023SJohn Marino     }
1441e4b17023SJohn Marino 
1442e4b17023SJohn Marino   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1443e4b17023SJohn Marino                             * vectorization_factor) - 1);
1444e4b17023SJohn Marino 
1445e4b17023SJohn Marino   /* Use the cost model only if it is more conservative than user specified
1446e4b17023SJohn Marino      threshold.  */
1447e4b17023SJohn Marino 
1448e4b17023SJohn Marino   th = (unsigned) min_scalar_loop_bound;
1449e4b17023SJohn Marino   if (min_profitable_iters
1450e4b17023SJohn Marino       && (!min_scalar_loop_bound
1451e4b17023SJohn Marino           || min_profitable_iters > min_scalar_loop_bound))
1452e4b17023SJohn Marino     th = (unsigned) min_profitable_iters;
1453e4b17023SJohn Marino 
1454e4b17023SJohn Marino   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1455e4b17023SJohn Marino       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1456e4b17023SJohn Marino     {
1457e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1458e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: vectorization not "
1459e4b17023SJohn Marino                  "profitable.");
1460e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1461e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: iteration count smaller than "
1462e4b17023SJohn Marino                  "user specified loop bound parameter or minimum "
1463e4b17023SJohn Marino                  "profitable iterations (whichever is more conservative).");
1464e4b17023SJohn Marino       return false;
1465e4b17023SJohn Marino     }
1466e4b17023SJohn Marino 
1467e4b17023SJohn Marino   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1468e4b17023SJohn Marino       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1469*95d28233SJohn Marino       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1470e4b17023SJohn Marino     {
1471e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1472e4b17023SJohn Marino         fprintf (vect_dump, "epilog loop required.");
1473e4b17023SJohn Marino       if (!vect_can_advance_ivs_p (loop_vinfo))
1474e4b17023SJohn Marino         {
1475e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1476e4b17023SJohn Marino             fprintf (vect_dump,
1477e4b17023SJohn Marino                      "not vectorized: can't create epilog loop 1.");
1478e4b17023SJohn Marino           return false;
1479e4b17023SJohn Marino         }
1480e4b17023SJohn Marino       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1481e4b17023SJohn Marino         {
1482e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1483e4b17023SJohn Marino             fprintf (vect_dump,
1484e4b17023SJohn Marino                      "not vectorized: can't create epilog loop 2.");
1485e4b17023SJohn Marino           return false;
1486e4b17023SJohn Marino         }
1487e4b17023SJohn Marino     }
1488e4b17023SJohn Marino 
1489e4b17023SJohn Marino   return true;
1490e4b17023SJohn Marino }
1491e4b17023SJohn Marino 
1492e4b17023SJohn Marino 
1493e4b17023SJohn Marino /* Function vect_analyze_loop_2.
1494e4b17023SJohn Marino 
1495e4b17023SJohn Marino    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1496e4b17023SJohn Marino    for it.  The different analyses will record information in the
1497e4b17023SJohn Marino    loop_vec_info struct.  */
1498e4b17023SJohn Marino static bool
vect_analyze_loop_2(loop_vec_info loop_vinfo)1499e4b17023SJohn Marino vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1500e4b17023SJohn Marino {
1501e4b17023SJohn Marino   bool ok, slp = false;
1502e4b17023SJohn Marino   int max_vf = MAX_VECTORIZATION_FACTOR;
1503e4b17023SJohn Marino   int min_vf = 2;
1504e4b17023SJohn Marino 
1505e4b17023SJohn Marino   /* Find all data references in the loop (which correspond to vdefs/vuses)
1506e4b17023SJohn Marino      and analyze their evolution in the loop.  Also adjust the minimal
1507e4b17023SJohn Marino      vectorization factor according to the loads and stores.
1508e4b17023SJohn Marino 
1509e4b17023SJohn Marino      FORNOW: Handle only simple, array references, which
1510e4b17023SJohn Marino      alignment can be forced, and aligned pointer-references.  */
1511e4b17023SJohn Marino 
1512e4b17023SJohn Marino   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1513e4b17023SJohn Marino   if (!ok)
1514e4b17023SJohn Marino     {
1515e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1516e4b17023SJohn Marino 	fprintf (vect_dump, "bad data references.");
1517e4b17023SJohn Marino       return false;
1518e4b17023SJohn Marino     }
1519e4b17023SJohn Marino 
1520e4b17023SJohn Marino   /* Classify all cross-iteration scalar data-flow cycles.
1521e4b17023SJohn Marino      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1522e4b17023SJohn Marino 
1523e4b17023SJohn Marino   vect_analyze_scalar_cycles (loop_vinfo);
1524e4b17023SJohn Marino 
1525e4b17023SJohn Marino   vect_pattern_recog (loop_vinfo);
1526e4b17023SJohn Marino 
1527e4b17023SJohn Marino   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1528e4b17023SJohn Marino 
1529e4b17023SJohn Marino   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1530e4b17023SJohn Marino   if (!ok)
1531e4b17023SJohn Marino     {
1532e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1533e4b17023SJohn Marino 	fprintf (vect_dump, "unexpected pattern.");
1534e4b17023SJohn Marino       return false;
1535e4b17023SJohn Marino     }
1536e4b17023SJohn Marino 
1537e4b17023SJohn Marino   /* Analyze data dependences between the data-refs in the loop
1538e4b17023SJohn Marino      and adjust the maximum vectorization factor according to
1539e4b17023SJohn Marino      the dependences.
1540e4b17023SJohn Marino      FORNOW: fail at the first data dependence that we encounter.  */
1541e4b17023SJohn Marino 
1542e4b17023SJohn Marino   ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
1543e4b17023SJohn Marino   if (!ok
1544e4b17023SJohn Marino       || max_vf < min_vf)
1545e4b17023SJohn Marino     {
1546e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1547e4b17023SJohn Marino 	fprintf (vect_dump, "bad data dependence.");
1548e4b17023SJohn Marino       return false;
1549e4b17023SJohn Marino     }
1550e4b17023SJohn Marino 
1551e4b17023SJohn Marino   ok = vect_determine_vectorization_factor (loop_vinfo);
1552e4b17023SJohn Marino   if (!ok)
1553e4b17023SJohn Marino     {
1554e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1555e4b17023SJohn Marino         fprintf (vect_dump, "can't determine vectorization factor.");
1556e4b17023SJohn Marino       return false;
1557e4b17023SJohn Marino     }
1558e4b17023SJohn Marino   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1559e4b17023SJohn Marino     {
1560e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1561e4b17023SJohn Marino 	fprintf (vect_dump, "bad data dependence.");
1562e4b17023SJohn Marino       return false;
1563e4b17023SJohn Marino     }
1564e4b17023SJohn Marino 
1565e4b17023SJohn Marino   /* Analyze the alignment of the data-refs in the loop.
1566e4b17023SJohn Marino      Fail if a data reference is found that cannot be vectorized.  */
1567e4b17023SJohn Marino 
1568e4b17023SJohn Marino   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1569e4b17023SJohn Marino   if (!ok)
1570e4b17023SJohn Marino     {
1571e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1572e4b17023SJohn Marino 	fprintf (vect_dump, "bad data alignment.");
1573e4b17023SJohn Marino       return false;
1574e4b17023SJohn Marino     }
1575e4b17023SJohn Marino 
1576e4b17023SJohn Marino   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1577e4b17023SJohn Marino      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1578e4b17023SJohn Marino 
1579e4b17023SJohn Marino   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1580e4b17023SJohn Marino   if (!ok)
1581e4b17023SJohn Marino     {
1582e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1583e4b17023SJohn Marino 	fprintf (vect_dump, "bad data access.");
1584e4b17023SJohn Marino       return false;
1585e4b17023SJohn Marino     }
1586e4b17023SJohn Marino 
1587e4b17023SJohn Marino   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1588e4b17023SJohn Marino      It is important to call pruning after vect_analyze_data_ref_accesses,
1589e4b17023SJohn Marino      since we use grouping information gathered by interleaving analysis.  */
1590e4b17023SJohn Marino   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1591e4b17023SJohn Marino   if (!ok)
1592e4b17023SJohn Marino     {
1593e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1594e4b17023SJohn Marino 	fprintf (vect_dump, "too long list of versioning for alias "
1595e4b17023SJohn Marino 			    "run-time tests.");
1596e4b17023SJohn Marino       return false;
1597e4b17023SJohn Marino     }
1598e4b17023SJohn Marino 
1599e4b17023SJohn Marino   /* This pass will decide on using loop versioning and/or loop peeling in
1600e4b17023SJohn Marino      order to enhance the alignment of data references in the loop.  */
1601e4b17023SJohn Marino 
1602e4b17023SJohn Marino   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1603e4b17023SJohn Marino   if (!ok)
1604e4b17023SJohn Marino     {
1605e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1606e4b17023SJohn Marino         fprintf (vect_dump, "bad data alignment.");
1607e4b17023SJohn Marino       return false;
1608e4b17023SJohn Marino     }
1609e4b17023SJohn Marino 
1610e4b17023SJohn Marino   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1611e4b17023SJohn Marino   ok = vect_analyze_slp (loop_vinfo, NULL);
1612e4b17023SJohn Marino   if (ok)
1613e4b17023SJohn Marino     {
1614e4b17023SJohn Marino       /* Decide which possible SLP instances to SLP.  */
1615e4b17023SJohn Marino       slp = vect_make_slp_decision (loop_vinfo);
1616e4b17023SJohn Marino 
1617e4b17023SJohn Marino       /* Find stmts that need to be both vectorized and SLPed.  */
1618e4b17023SJohn Marino       vect_detect_hybrid_slp (loop_vinfo);
1619e4b17023SJohn Marino     }
1620e4b17023SJohn Marino   else
1621e4b17023SJohn Marino     return false;
1622e4b17023SJohn Marino 
1623e4b17023SJohn Marino   /* Scan all the operations in the loop and make sure they are
1624e4b17023SJohn Marino      vectorizable.  */
1625e4b17023SJohn Marino 
1626e4b17023SJohn Marino   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1627e4b17023SJohn Marino   if (!ok)
1628e4b17023SJohn Marino     {
1629e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1630e4b17023SJohn Marino 	fprintf (vect_dump, "bad operation or unsupported loop bound.");
1631e4b17023SJohn Marino       return false;
1632e4b17023SJohn Marino     }
1633e4b17023SJohn Marino 
1634e4b17023SJohn Marino   return true;
1635e4b17023SJohn Marino }
1636e4b17023SJohn Marino 
1637e4b17023SJohn Marino /* Function vect_analyze_loop.
1638e4b17023SJohn Marino 
1639e4b17023SJohn Marino    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1640e4b17023SJohn Marino    for it.  The different analyses will record information in the
1641e4b17023SJohn Marino    loop_vec_info struct.  */
1642e4b17023SJohn Marino loop_vec_info
vect_analyze_loop(struct loop * loop)1643e4b17023SJohn Marino vect_analyze_loop (struct loop *loop)
1644e4b17023SJohn Marino {
1645e4b17023SJohn Marino   loop_vec_info loop_vinfo;
1646e4b17023SJohn Marino   unsigned int vector_sizes;
1647e4b17023SJohn Marino 
1648e4b17023SJohn Marino   /* Autodetect first vector size we try.  */
1649e4b17023SJohn Marino   current_vector_size = 0;
1650e4b17023SJohn Marino   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1651e4b17023SJohn Marino 
1652e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
1653e4b17023SJohn Marino     fprintf (vect_dump, "===== analyze_loop_nest =====");
1654e4b17023SJohn Marino 
1655e4b17023SJohn Marino   if (loop_outer (loop)
1656e4b17023SJohn Marino       && loop_vec_info_for_loop (loop_outer (loop))
1657e4b17023SJohn Marino       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1658e4b17023SJohn Marino     {
1659e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1660e4b17023SJohn Marino 	fprintf (vect_dump, "outer-loop already vectorized.");
1661e4b17023SJohn Marino       return NULL;
1662e4b17023SJohn Marino     }
1663e4b17023SJohn Marino 
1664e4b17023SJohn Marino   while (1)
1665e4b17023SJohn Marino     {
1666e4b17023SJohn Marino       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1667e4b17023SJohn Marino       loop_vinfo = vect_analyze_loop_form (loop);
1668e4b17023SJohn Marino       if (!loop_vinfo)
1669e4b17023SJohn Marino 	{
1670e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
1671e4b17023SJohn Marino 	    fprintf (vect_dump, "bad loop form.");
1672e4b17023SJohn Marino 	  return NULL;
1673e4b17023SJohn Marino 	}
1674e4b17023SJohn Marino 
1675e4b17023SJohn Marino       if (vect_analyze_loop_2 (loop_vinfo))
1676e4b17023SJohn Marino 	{
1677e4b17023SJohn Marino 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1678e4b17023SJohn Marino 
1679e4b17023SJohn Marino 	  return loop_vinfo;
1680e4b17023SJohn Marino 	}
1681e4b17023SJohn Marino 
1682e4b17023SJohn Marino       destroy_loop_vec_info (loop_vinfo, true);
1683e4b17023SJohn Marino 
1684e4b17023SJohn Marino       vector_sizes &= ~current_vector_size;
1685e4b17023SJohn Marino       if (vector_sizes == 0
1686e4b17023SJohn Marino 	  || current_vector_size == 0)
1687e4b17023SJohn Marino 	return NULL;
1688e4b17023SJohn Marino 
1689e4b17023SJohn Marino       /* Try the next biggest vector size.  */
1690e4b17023SJohn Marino       current_vector_size = 1 << floor_log2 (vector_sizes);
1691e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
1692e4b17023SJohn Marino 	fprintf (vect_dump, "***** Re-trying analysis with "
1693e4b17023SJohn Marino 		 "vector size %d\n", current_vector_size);
1694e4b17023SJohn Marino     }
1695e4b17023SJohn Marino }
1696e4b17023SJohn Marino 
1697e4b17023SJohn Marino 
1698e4b17023SJohn Marino /* Function reduction_code_for_scalar_code
1699e4b17023SJohn Marino 
1700e4b17023SJohn Marino    Input:
1701e4b17023SJohn Marino    CODE - tree_code of a reduction operations.
1702e4b17023SJohn Marino 
1703e4b17023SJohn Marino    Output:
1704e4b17023SJohn Marino    REDUC_CODE - the corresponding tree-code to be used to reduce the
1705e4b17023SJohn Marino       vector of partial results into a single scalar result (which
1706e4b17023SJohn Marino       will also reside in a vector) or ERROR_MARK if the operation is
1707e4b17023SJohn Marino       a supported reduction operation, but does not have such tree-code.
1708e4b17023SJohn Marino 
1709e4b17023SJohn Marino    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1710e4b17023SJohn Marino 
1711e4b17023SJohn Marino static bool
reduction_code_for_scalar_code(enum tree_code code,enum tree_code * reduc_code)1712e4b17023SJohn Marino reduction_code_for_scalar_code (enum tree_code code,
1713e4b17023SJohn Marino                                 enum tree_code *reduc_code)
1714e4b17023SJohn Marino {
1715e4b17023SJohn Marino   switch (code)
1716e4b17023SJohn Marino     {
1717e4b17023SJohn Marino       case MAX_EXPR:
1718e4b17023SJohn Marino         *reduc_code = REDUC_MAX_EXPR;
1719e4b17023SJohn Marino         return true;
1720e4b17023SJohn Marino 
1721e4b17023SJohn Marino       case MIN_EXPR:
1722e4b17023SJohn Marino         *reduc_code = REDUC_MIN_EXPR;
1723e4b17023SJohn Marino         return true;
1724e4b17023SJohn Marino 
1725e4b17023SJohn Marino       case PLUS_EXPR:
1726e4b17023SJohn Marino         *reduc_code = REDUC_PLUS_EXPR;
1727e4b17023SJohn Marino         return true;
1728e4b17023SJohn Marino 
1729e4b17023SJohn Marino       case MULT_EXPR:
1730e4b17023SJohn Marino       case MINUS_EXPR:
1731e4b17023SJohn Marino       case BIT_IOR_EXPR:
1732e4b17023SJohn Marino       case BIT_XOR_EXPR:
1733e4b17023SJohn Marino       case BIT_AND_EXPR:
1734e4b17023SJohn Marino         *reduc_code = ERROR_MARK;
1735e4b17023SJohn Marino         return true;
1736e4b17023SJohn Marino 
1737e4b17023SJohn Marino       default:
1738e4b17023SJohn Marino        return false;
1739e4b17023SJohn Marino     }
1740e4b17023SJohn Marino }
1741e4b17023SJohn Marino 
1742e4b17023SJohn Marino 
1743e4b17023SJohn Marino /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1744e4b17023SJohn Marino    STMT is printed with a message MSG. */
1745e4b17023SJohn Marino 
1746e4b17023SJohn Marino static void
report_vect_op(gimple stmt,const char * msg)1747e4b17023SJohn Marino report_vect_op (gimple stmt, const char *msg)
1748e4b17023SJohn Marino {
1749e4b17023SJohn Marino   fprintf (vect_dump, "%s", msg);
1750e4b17023SJohn Marino   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1751e4b17023SJohn Marino }
1752e4b17023SJohn Marino 
1753e4b17023SJohn Marino 
1754e4b17023SJohn Marino /* Detect SLP reduction of the form:
1755e4b17023SJohn Marino 
1756e4b17023SJohn Marino    #a1 = phi <a5, a0>
1757e4b17023SJohn Marino    a2 = operation (a1)
1758e4b17023SJohn Marino    a3 = operation (a2)
1759e4b17023SJohn Marino    a4 = operation (a3)
1760e4b17023SJohn Marino    a5 = operation (a4)
1761e4b17023SJohn Marino 
1762e4b17023SJohn Marino    #a = phi <a5>
1763e4b17023SJohn Marino 
1764e4b17023SJohn Marino    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1765e4b17023SJohn Marino    FIRST_STMT is the first reduction stmt in the chain
1766e4b17023SJohn Marino    (a2 = operation (a1)).
1767e4b17023SJohn Marino 
1768e4b17023SJohn Marino    Return TRUE if a reduction chain was detected.  */
1769e4b17023SJohn Marino 
1770e4b17023SJohn Marino static bool
vect_is_slp_reduction(loop_vec_info loop_info,gimple phi,gimple first_stmt)1771e4b17023SJohn Marino vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1772e4b17023SJohn Marino {
1773e4b17023SJohn Marino   struct loop *loop = (gimple_bb (phi))->loop_father;
1774e4b17023SJohn Marino   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1775e4b17023SJohn Marino   enum tree_code code;
1776e4b17023SJohn Marino   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1777e4b17023SJohn Marino   stmt_vec_info use_stmt_info, current_stmt_info;
1778e4b17023SJohn Marino   tree lhs;
1779e4b17023SJohn Marino   imm_use_iterator imm_iter;
1780e4b17023SJohn Marino   use_operand_p use_p;
1781e4b17023SJohn Marino   int nloop_uses, size = 0, n_out_of_loop_uses;
1782e4b17023SJohn Marino   bool found = false;
1783e4b17023SJohn Marino 
1784e4b17023SJohn Marino   if (loop != vect_loop)
1785e4b17023SJohn Marino     return false;
1786e4b17023SJohn Marino 
1787e4b17023SJohn Marino   lhs = PHI_RESULT (phi);
1788e4b17023SJohn Marino   code = gimple_assign_rhs_code (first_stmt);
1789e4b17023SJohn Marino   while (1)
1790e4b17023SJohn Marino     {
1791e4b17023SJohn Marino       nloop_uses = 0;
1792e4b17023SJohn Marino       n_out_of_loop_uses = 0;
1793e4b17023SJohn Marino       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1794e4b17023SJohn Marino         {
1795e4b17023SJohn Marino 	  gimple use_stmt = USE_STMT (use_p);
1796e4b17023SJohn Marino           if (is_gimple_debug (use_stmt))
1797e4b17023SJohn Marino             continue;
1798e4b17023SJohn Marino 
1799e4b17023SJohn Marino 	  use_stmt = USE_STMT (use_p);
1800e4b17023SJohn Marino 
1801e4b17023SJohn Marino           /* Check if we got back to the reduction phi.  */
1802e4b17023SJohn Marino 	  if (use_stmt == phi)
1803e4b17023SJohn Marino             {
1804e4b17023SJohn Marino 	      loop_use_stmt = use_stmt;
1805e4b17023SJohn Marino               found = true;
1806e4b17023SJohn Marino               break;
1807e4b17023SJohn Marino             }
1808e4b17023SJohn Marino 
1809e4b17023SJohn Marino           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1810e4b17023SJohn Marino             {
1811e4b17023SJohn Marino               if (vinfo_for_stmt (use_stmt)
1812e4b17023SJohn Marino                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1813e4b17023SJohn Marino                 {
1814e4b17023SJohn Marino                   loop_use_stmt = use_stmt;
1815e4b17023SJohn Marino                   nloop_uses++;
1816e4b17023SJohn Marino                 }
1817e4b17023SJohn Marino             }
1818e4b17023SJohn Marino            else
1819e4b17023SJohn Marino              n_out_of_loop_uses++;
1820e4b17023SJohn Marino 
1821e4b17023SJohn Marino            /* There are can be either a single use in the loop or two uses in
1822e4b17023SJohn Marino               phi nodes.  */
1823e4b17023SJohn Marino            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1824e4b17023SJohn Marino              return false;
1825e4b17023SJohn Marino         }
1826e4b17023SJohn Marino 
1827e4b17023SJohn Marino       if (found)
1828e4b17023SJohn Marino         break;
1829e4b17023SJohn Marino 
1830e4b17023SJohn Marino       /* We reached a statement with no loop uses.  */
1831e4b17023SJohn Marino       if (nloop_uses == 0)
1832e4b17023SJohn Marino 	return false;
1833e4b17023SJohn Marino 
1834e4b17023SJohn Marino       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1835e4b17023SJohn Marino       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1836e4b17023SJohn Marino         return false;
1837e4b17023SJohn Marino 
1838e4b17023SJohn Marino       if (!is_gimple_assign (loop_use_stmt)
1839e4b17023SJohn Marino 	  || code != gimple_assign_rhs_code (loop_use_stmt)
1840e4b17023SJohn Marino 	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1841e4b17023SJohn Marino         return false;
1842e4b17023SJohn Marino 
1843e4b17023SJohn Marino       /* Insert USE_STMT into reduction chain.  */
1844e4b17023SJohn Marino       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1845e4b17023SJohn Marino       if (current_stmt)
1846e4b17023SJohn Marino         {
1847e4b17023SJohn Marino           current_stmt_info = vinfo_for_stmt (current_stmt);
1848e4b17023SJohn Marino 	  GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1849e4b17023SJohn Marino           GROUP_FIRST_ELEMENT (use_stmt_info)
1850e4b17023SJohn Marino             = GROUP_FIRST_ELEMENT (current_stmt_info);
1851e4b17023SJohn Marino         }
1852e4b17023SJohn Marino       else
1853e4b17023SJohn Marino 	GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1854e4b17023SJohn Marino 
1855e4b17023SJohn Marino       lhs = gimple_assign_lhs (loop_use_stmt);
1856e4b17023SJohn Marino       current_stmt = loop_use_stmt;
1857e4b17023SJohn Marino       size++;
1858e4b17023SJohn Marino    }
1859e4b17023SJohn Marino 
1860e4b17023SJohn Marino   if (!found || loop_use_stmt != phi || size < 2)
1861e4b17023SJohn Marino     return false;
1862e4b17023SJohn Marino 
1863e4b17023SJohn Marino   /* Swap the operands, if needed, to make the reduction operand be the second
1864e4b17023SJohn Marino      operand.  */
1865e4b17023SJohn Marino   lhs = PHI_RESULT (phi);
1866e4b17023SJohn Marino   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1867e4b17023SJohn Marino   while (next_stmt)
1868e4b17023SJohn Marino     {
1869e4b17023SJohn Marino       if (gimple_assign_rhs2 (next_stmt) == lhs)
1870e4b17023SJohn Marino 	{
1871e4b17023SJohn Marino 	  tree op = gimple_assign_rhs1 (next_stmt);
1872e4b17023SJohn Marino           gimple def_stmt = NULL;
1873e4b17023SJohn Marino 
1874e4b17023SJohn Marino           if (TREE_CODE (op) == SSA_NAME)
1875e4b17023SJohn Marino             def_stmt = SSA_NAME_DEF_STMT (op);
1876e4b17023SJohn Marino 
1877e4b17023SJohn Marino 	  /* Check that the other def is either defined in the loop
1878e4b17023SJohn Marino 	     ("vect_internal_def"), or it's an induction (defined by a
1879e4b17023SJohn Marino 	     loop-header phi-node).  */
1880e4b17023SJohn Marino           if (def_stmt
1881e4b17023SJohn Marino               && gimple_bb (def_stmt)
1882e4b17023SJohn Marino 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1883e4b17023SJohn Marino               && (is_gimple_assign (def_stmt)
1884e4b17023SJohn Marino                   || is_gimple_call (def_stmt)
1885e4b17023SJohn Marino                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1886e4b17023SJohn Marino                            == vect_induction_def
1887e4b17023SJohn Marino                   || (gimple_code (def_stmt) == GIMPLE_PHI
1888e4b17023SJohn Marino                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1889e4b17023SJohn Marino                                   == vect_internal_def
1890e4b17023SJohn Marino                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1891e4b17023SJohn Marino 	    {
1892e4b17023SJohn Marino 	      lhs = gimple_assign_lhs (next_stmt);
1893e4b17023SJohn Marino 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1894e4b17023SJohn Marino  	      continue;
1895e4b17023SJohn Marino 	    }
1896e4b17023SJohn Marino 
1897e4b17023SJohn Marino 	  return false;
1898e4b17023SJohn Marino 	}
1899e4b17023SJohn Marino       else
1900e4b17023SJohn Marino 	{
1901e4b17023SJohn Marino           tree op = gimple_assign_rhs2 (next_stmt);
1902e4b17023SJohn Marino           gimple def_stmt = NULL;
1903e4b17023SJohn Marino 
1904e4b17023SJohn Marino           if (TREE_CODE (op) == SSA_NAME)
1905e4b17023SJohn Marino             def_stmt = SSA_NAME_DEF_STMT (op);
1906e4b17023SJohn Marino 
1907e4b17023SJohn Marino           /* Check that the other def is either defined in the loop
1908e4b17023SJohn Marino             ("vect_internal_def"), or it's an induction (defined by a
1909e4b17023SJohn Marino             loop-header phi-node).  */
1910e4b17023SJohn Marino           if (def_stmt
1911e4b17023SJohn Marino               && gimple_bb (def_stmt)
1912e4b17023SJohn Marino 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1913e4b17023SJohn Marino               && (is_gimple_assign (def_stmt)
1914e4b17023SJohn Marino                   || is_gimple_call (def_stmt)
1915e4b17023SJohn Marino                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1916e4b17023SJohn Marino                               == vect_induction_def
1917e4b17023SJohn Marino                   || (gimple_code (def_stmt) == GIMPLE_PHI
1918e4b17023SJohn Marino                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1919e4b17023SJohn Marino                                   == vect_internal_def
1920e4b17023SJohn Marino                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1921e4b17023SJohn Marino   	    {
1922e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_DETAILS))
1923e4b17023SJohn Marino 		{
1924e4b17023SJohn Marino 		  fprintf (vect_dump, "swapping oprnds: ");
1925e4b17023SJohn Marino 		  print_gimple_stmt (vect_dump, next_stmt, 0, TDF_SLIM);
1926e4b17023SJohn Marino 		}
1927e4b17023SJohn Marino 
1928e4b17023SJohn Marino 	      swap_tree_operands (next_stmt,
1929e4b17023SJohn Marino 	 		          gimple_assign_rhs1_ptr (next_stmt),
1930e4b17023SJohn Marino                                   gimple_assign_rhs2_ptr (next_stmt));
1931e4b17023SJohn Marino 	      mark_symbols_for_renaming (next_stmt);
1932e4b17023SJohn Marino 	    }
1933e4b17023SJohn Marino 	  else
1934e4b17023SJohn Marino 	    return false;
1935e4b17023SJohn Marino         }
1936e4b17023SJohn Marino 
1937e4b17023SJohn Marino       lhs = gimple_assign_lhs (next_stmt);
1938e4b17023SJohn Marino       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1939e4b17023SJohn Marino     }
1940e4b17023SJohn Marino 
1941e4b17023SJohn Marino   /* Save the chain for further analysis in SLP detection.  */
1942e4b17023SJohn Marino   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1943e4b17023SJohn Marino   VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
1944e4b17023SJohn Marino   GROUP_SIZE (vinfo_for_stmt (first)) = size;
1945e4b17023SJohn Marino 
1946e4b17023SJohn Marino   return true;
1947e4b17023SJohn Marino }
1948e4b17023SJohn Marino 
1949e4b17023SJohn Marino 
1950e4b17023SJohn Marino /* Function vect_is_simple_reduction_1
1951e4b17023SJohn Marino 
1952e4b17023SJohn Marino    (1) Detect a cross-iteration def-use cycle that represents a simple
1953e4b17023SJohn Marino    reduction computation.  We look for the following pattern:
1954e4b17023SJohn Marino 
1955e4b17023SJohn Marino    loop_header:
1956e4b17023SJohn Marino      a1 = phi < a0, a2 >
1957e4b17023SJohn Marino      a3 = ...
1958e4b17023SJohn Marino      a2 = operation (a3, a1)
1959e4b17023SJohn Marino 
1960e4b17023SJohn Marino    such that:
1961e4b17023SJohn Marino    1. operation is commutative and associative and it is safe to
1962e4b17023SJohn Marino       change the order of the computation (if CHECK_REDUCTION is true)
1963e4b17023SJohn Marino    2. no uses for a2 in the loop (a2 is used out of the loop)
1964e4b17023SJohn Marino    3. no uses of a1 in the loop besides the reduction operation
1965e4b17023SJohn Marino    4. no uses of a1 outside the loop.
1966e4b17023SJohn Marino 
1967e4b17023SJohn Marino    Conditions 1,4 are tested here.
1968e4b17023SJohn Marino    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1969e4b17023SJohn Marino 
1970e4b17023SJohn Marino    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1971e4b17023SJohn Marino    nested cycles, if CHECK_REDUCTION is false.
1972e4b17023SJohn Marino 
1973e4b17023SJohn Marino    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1974e4b17023SJohn Marino    reductions:
1975e4b17023SJohn Marino 
1976e4b17023SJohn Marino      a1 = phi < a0, a2 >
1977e4b17023SJohn Marino      inner loop (def of a3)
1978e4b17023SJohn Marino      a2 = phi < a3 >
1979e4b17023SJohn Marino 
1980e4b17023SJohn Marino    If MODIFY is true it tries also to rework the code in-place to enable
1981e4b17023SJohn Marino    detection of more reduction patterns.  For the time being we rewrite
1982e4b17023SJohn Marino    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1983e4b17023SJohn Marino */
1984e4b17023SJohn Marino 
1985e4b17023SJohn Marino static gimple
vect_is_simple_reduction_1(loop_vec_info loop_info,gimple phi,bool check_reduction,bool * double_reduc,bool modify)1986e4b17023SJohn Marino vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
1987e4b17023SJohn Marino 			    bool check_reduction, bool *double_reduc,
1988e4b17023SJohn Marino 			    bool modify)
1989e4b17023SJohn Marino {
1990e4b17023SJohn Marino   struct loop *loop = (gimple_bb (phi))->loop_father;
1991e4b17023SJohn Marino   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1992e4b17023SJohn Marino   edge latch_e = loop_latch_edge (loop);
1993e4b17023SJohn Marino   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1994e4b17023SJohn Marino   gimple def_stmt, def1 = NULL, def2 = NULL;
1995e4b17023SJohn Marino   enum tree_code orig_code, code;
1996e4b17023SJohn Marino   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
1997e4b17023SJohn Marino   tree type;
1998e4b17023SJohn Marino   int nloop_uses;
1999e4b17023SJohn Marino   tree name;
2000e4b17023SJohn Marino   imm_use_iterator imm_iter;
2001e4b17023SJohn Marino   use_operand_p use_p;
2002e4b17023SJohn Marino   bool phi_def;
2003e4b17023SJohn Marino 
2004e4b17023SJohn Marino   *double_reduc = false;
2005e4b17023SJohn Marino 
2006e4b17023SJohn Marino   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2007e4b17023SJohn Marino      otherwise, we assume outer loop vectorization.  */
2008e4b17023SJohn Marino   gcc_assert ((check_reduction && loop == vect_loop)
2009e4b17023SJohn Marino               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2010e4b17023SJohn Marino 
2011e4b17023SJohn Marino   name = PHI_RESULT (phi);
2012*95d28233SJohn Marino   /* ???  If there are no uses of the PHI result the inner loop reduction
2013*95d28233SJohn Marino      won't be detected as possibly double-reduction by vectorizable_reduction
2014*95d28233SJohn Marino      because that tries to walk the PHI arg from the preheader edge which
2015*95d28233SJohn Marino      can be constant.  See PR60382.  */
2016*95d28233SJohn Marino   if (has_zero_uses (name))
2017*95d28233SJohn Marino     return NULL;
2018e4b17023SJohn Marino   nloop_uses = 0;
2019e4b17023SJohn Marino   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2020e4b17023SJohn Marino     {
2021e4b17023SJohn Marino       gimple use_stmt = USE_STMT (use_p);
2022e4b17023SJohn Marino       if (is_gimple_debug (use_stmt))
2023e4b17023SJohn Marino 	continue;
2024e4b17023SJohn Marino 
2025e4b17023SJohn Marino       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2026e4b17023SJohn Marino         {
2027e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2028e4b17023SJohn Marino             fprintf (vect_dump, "intermediate value used outside loop.");
2029e4b17023SJohn Marino 
2030e4b17023SJohn Marino           return NULL;
2031e4b17023SJohn Marino         }
2032e4b17023SJohn Marino 
2033e4b17023SJohn Marino       if (vinfo_for_stmt (use_stmt)
2034e4b17023SJohn Marino 	  && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2035e4b17023SJohn Marino         nloop_uses++;
2036e4b17023SJohn Marino       if (nloop_uses > 1)
2037e4b17023SJohn Marino         {
2038e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2039e4b17023SJohn Marino             fprintf (vect_dump, "reduction used in loop.");
2040e4b17023SJohn Marino           return NULL;
2041e4b17023SJohn Marino         }
2042e4b17023SJohn Marino     }
2043e4b17023SJohn Marino 
2044e4b17023SJohn Marino   if (TREE_CODE (loop_arg) != SSA_NAME)
2045e4b17023SJohn Marino     {
2046e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2047e4b17023SJohn Marino 	{
2048e4b17023SJohn Marino 	  fprintf (vect_dump, "reduction: not ssa_name: ");
2049e4b17023SJohn Marino 	  print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
2050e4b17023SJohn Marino 	}
2051e4b17023SJohn Marino       return NULL;
2052e4b17023SJohn Marino     }
2053e4b17023SJohn Marino 
2054e4b17023SJohn Marino   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2055e4b17023SJohn Marino   if (!def_stmt)
2056e4b17023SJohn Marino     {
2057e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2058e4b17023SJohn Marino 	fprintf (vect_dump, "reduction: no def_stmt.");
2059e4b17023SJohn Marino       return NULL;
2060e4b17023SJohn Marino     }
2061e4b17023SJohn Marino 
2062e4b17023SJohn Marino   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2063e4b17023SJohn Marino     {
2064e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2065e4b17023SJohn Marino         print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
2066e4b17023SJohn Marino       return NULL;
2067e4b17023SJohn Marino     }
2068e4b17023SJohn Marino 
2069e4b17023SJohn Marino   if (is_gimple_assign (def_stmt))
2070e4b17023SJohn Marino     {
2071e4b17023SJohn Marino       name = gimple_assign_lhs (def_stmt);
2072e4b17023SJohn Marino       phi_def = false;
2073e4b17023SJohn Marino     }
2074e4b17023SJohn Marino   else
2075e4b17023SJohn Marino     {
2076e4b17023SJohn Marino       name = PHI_RESULT (def_stmt);
2077e4b17023SJohn Marino       phi_def = true;
2078e4b17023SJohn Marino     }
2079e4b17023SJohn Marino 
2080e4b17023SJohn Marino   nloop_uses = 0;
2081e4b17023SJohn Marino   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2082e4b17023SJohn Marino     {
2083e4b17023SJohn Marino       gimple use_stmt = USE_STMT (use_p);
2084e4b17023SJohn Marino       if (is_gimple_debug (use_stmt))
2085e4b17023SJohn Marino 	continue;
2086e4b17023SJohn Marino       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2087e4b17023SJohn Marino 	  && vinfo_for_stmt (use_stmt)
2088e4b17023SJohn Marino 	  && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2089e4b17023SJohn Marino 	nloop_uses++;
2090e4b17023SJohn Marino       if (nloop_uses > 1)
2091e4b17023SJohn Marino 	{
2092e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
2093e4b17023SJohn Marino 	    fprintf (vect_dump, "reduction used in loop.");
2094e4b17023SJohn Marino 	  return NULL;
2095e4b17023SJohn Marino 	}
2096e4b17023SJohn Marino     }
2097e4b17023SJohn Marino 
2098e4b17023SJohn Marino   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2099e4b17023SJohn Marino      defined in the inner loop.  */
2100e4b17023SJohn Marino   if (phi_def)
2101e4b17023SJohn Marino     {
2102e4b17023SJohn Marino       op1 = PHI_ARG_DEF (def_stmt, 0);
2103e4b17023SJohn Marino 
2104e4b17023SJohn Marino       if (gimple_phi_num_args (def_stmt) != 1
2105e4b17023SJohn Marino           || TREE_CODE (op1) != SSA_NAME)
2106e4b17023SJohn Marino         {
2107e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2108e4b17023SJohn Marino             fprintf (vect_dump, "unsupported phi node definition.");
2109e4b17023SJohn Marino 
2110e4b17023SJohn Marino           return NULL;
2111e4b17023SJohn Marino         }
2112e4b17023SJohn Marino 
2113e4b17023SJohn Marino       def1 = SSA_NAME_DEF_STMT (op1);
2114e4b17023SJohn Marino       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2115e4b17023SJohn Marino           && loop->inner
2116e4b17023SJohn Marino           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2117e4b17023SJohn Marino           && is_gimple_assign (def1))
2118e4b17023SJohn Marino         {
2119e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2120e4b17023SJohn Marino             report_vect_op (def_stmt, "detected double reduction: ");
2121e4b17023SJohn Marino 
2122e4b17023SJohn Marino           *double_reduc = true;
2123e4b17023SJohn Marino           return def_stmt;
2124e4b17023SJohn Marino         }
2125e4b17023SJohn Marino 
2126e4b17023SJohn Marino       return NULL;
2127e4b17023SJohn Marino     }
2128e4b17023SJohn Marino 
2129e4b17023SJohn Marino   code = orig_code = gimple_assign_rhs_code (def_stmt);
2130e4b17023SJohn Marino 
2131e4b17023SJohn Marino   /* We can handle "res -= x[i]", which is non-associative by
2132e4b17023SJohn Marino      simply rewriting this into "res += -x[i]".  Avoid changing
2133e4b17023SJohn Marino      gimple instruction for the first simple tests and only do this
2134e4b17023SJohn Marino      if we're allowed to change code at all.  */
2135e4b17023SJohn Marino   if (code == MINUS_EXPR
2136e4b17023SJohn Marino       && modify
2137e4b17023SJohn Marino       && (op1 = gimple_assign_rhs1 (def_stmt))
2138e4b17023SJohn Marino       && TREE_CODE (op1) == SSA_NAME
2139e4b17023SJohn Marino       && SSA_NAME_DEF_STMT (op1) == phi)
2140e4b17023SJohn Marino     code = PLUS_EXPR;
2141e4b17023SJohn Marino 
2142e4b17023SJohn Marino   if (check_reduction
2143e4b17023SJohn Marino       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2144e4b17023SJohn Marino     {
2145e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2146e4b17023SJohn Marino         report_vect_op (def_stmt, "reduction: not commutative/associative: ");
2147e4b17023SJohn Marino       return NULL;
2148e4b17023SJohn Marino     }
2149e4b17023SJohn Marino 
2150e4b17023SJohn Marino   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2151e4b17023SJohn Marino     {
2152e4b17023SJohn Marino       if (code != COND_EXPR)
2153e4b17023SJohn Marino         {
2154e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2155e4b17023SJohn Marino 	    report_vect_op (def_stmt, "reduction: not binary operation: ");
2156e4b17023SJohn Marino 
2157e4b17023SJohn Marino           return NULL;
2158e4b17023SJohn Marino         }
2159e4b17023SJohn Marino 
2160e4b17023SJohn Marino       op3 = gimple_assign_rhs1 (def_stmt);
2161e4b17023SJohn Marino       if (COMPARISON_CLASS_P (op3))
2162e4b17023SJohn Marino         {
2163e4b17023SJohn Marino           op4 = TREE_OPERAND (op3, 1);
2164e4b17023SJohn Marino           op3 = TREE_OPERAND (op3, 0);
2165e4b17023SJohn Marino         }
2166e4b17023SJohn Marino 
2167e4b17023SJohn Marino       op1 = gimple_assign_rhs2 (def_stmt);
2168e4b17023SJohn Marino       op2 = gimple_assign_rhs3 (def_stmt);
2169e4b17023SJohn Marino 
2170e4b17023SJohn Marino       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2171e4b17023SJohn Marino         {
2172e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2173e4b17023SJohn Marino             report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2174e4b17023SJohn Marino 
2175e4b17023SJohn Marino           return NULL;
2176e4b17023SJohn Marino         }
2177e4b17023SJohn Marino     }
2178e4b17023SJohn Marino   else
2179e4b17023SJohn Marino     {
2180e4b17023SJohn Marino       op1 = gimple_assign_rhs1 (def_stmt);
2181e4b17023SJohn Marino       op2 = gimple_assign_rhs2 (def_stmt);
2182e4b17023SJohn Marino 
2183e4b17023SJohn Marino       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2184e4b17023SJohn Marino         {
2185e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2186e4b17023SJohn Marino 	    report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2187e4b17023SJohn Marino 
2188e4b17023SJohn Marino           return NULL;
2189e4b17023SJohn Marino         }
2190e4b17023SJohn Marino    }
2191e4b17023SJohn Marino 
2192e4b17023SJohn Marino   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2193e4b17023SJohn Marino   if ((TREE_CODE (op1) == SSA_NAME
2194e4b17023SJohn Marino        && !types_compatible_p (type,TREE_TYPE (op1)))
2195e4b17023SJohn Marino       || (TREE_CODE (op2) == SSA_NAME
2196e4b17023SJohn Marino           && !types_compatible_p (type, TREE_TYPE (op2)))
2197e4b17023SJohn Marino       || (op3 && TREE_CODE (op3) == SSA_NAME
2198e4b17023SJohn Marino           && !types_compatible_p (type, TREE_TYPE (op3)))
2199e4b17023SJohn Marino       || (op4 && TREE_CODE (op4) == SSA_NAME
2200e4b17023SJohn Marino           && !types_compatible_p (type, TREE_TYPE (op4))))
2201e4b17023SJohn Marino     {
2202e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2203e4b17023SJohn Marino         {
2204e4b17023SJohn Marino           fprintf (vect_dump, "reduction: multiple types: operation type: ");
2205e4b17023SJohn Marino           print_generic_expr (vect_dump, type, TDF_SLIM);
2206e4b17023SJohn Marino           fprintf (vect_dump, ", operands types: ");
2207e4b17023SJohn Marino           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2208e4b17023SJohn Marino           fprintf (vect_dump, ",");
2209e4b17023SJohn Marino           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2210e4b17023SJohn Marino           if (op3)
2211e4b17023SJohn Marino             {
2212e4b17023SJohn Marino               fprintf (vect_dump, ",");
2213e4b17023SJohn Marino               print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
2214e4b17023SJohn Marino             }
2215e4b17023SJohn Marino 
2216e4b17023SJohn Marino           if (op4)
2217e4b17023SJohn Marino             {
2218e4b17023SJohn Marino               fprintf (vect_dump, ",");
2219e4b17023SJohn Marino               print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
2220e4b17023SJohn Marino             }
2221e4b17023SJohn Marino         }
2222e4b17023SJohn Marino 
2223e4b17023SJohn Marino       return NULL;
2224e4b17023SJohn Marino     }
2225e4b17023SJohn Marino 
2226e4b17023SJohn Marino   /* Check that it's ok to change the order of the computation.
2227e4b17023SJohn Marino      Generally, when vectorizing a reduction we change the order of the
2228e4b17023SJohn Marino      computation.  This may change the behavior of the program in some
2229e4b17023SJohn Marino      cases, so we need to check that this is ok.  One exception is when
2230e4b17023SJohn Marino      vectorizing an outer-loop: the inner-loop is executed sequentially,
2231e4b17023SJohn Marino      and therefore vectorizing reductions in the inner-loop during
2232e4b17023SJohn Marino      outer-loop vectorization is safe.  */
2233e4b17023SJohn Marino 
2234e4b17023SJohn Marino   /* CHECKME: check for !flag_finite_math_only too?  */
2235e4b17023SJohn Marino   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2236e4b17023SJohn Marino       && check_reduction)
2237e4b17023SJohn Marino     {
2238e4b17023SJohn Marino       /* Changing the order of operations changes the semantics.  */
2239e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2240e4b17023SJohn Marino 	report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
2241e4b17023SJohn Marino       return NULL;
2242e4b17023SJohn Marino     }
2243e4b17023SJohn Marino   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2244e4b17023SJohn Marino 	   && check_reduction)
2245e4b17023SJohn Marino     {
2246e4b17023SJohn Marino       /* Changing the order of operations changes the semantics.  */
2247e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2248e4b17023SJohn Marino 	report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
2249e4b17023SJohn Marino       return NULL;
2250e4b17023SJohn Marino     }
2251e4b17023SJohn Marino   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2252e4b17023SJohn Marino     {
2253e4b17023SJohn Marino       /* Changing the order of operations changes the semantics.  */
2254e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2255e4b17023SJohn Marino 	report_vect_op (def_stmt,
2256e4b17023SJohn Marino 			"reduction: unsafe fixed-point math optimization: ");
2257e4b17023SJohn Marino       return NULL;
2258e4b17023SJohn Marino     }
2259e4b17023SJohn Marino 
2260e4b17023SJohn Marino   /* If we detected "res -= x[i]" earlier, rewrite it into
2261e4b17023SJohn Marino      "res += -x[i]" now.  If this turns out to be useless reassoc
2262e4b17023SJohn Marino      will clean it up again.  */
2263e4b17023SJohn Marino   if (orig_code == MINUS_EXPR)
2264e4b17023SJohn Marino     {
2265e4b17023SJohn Marino       tree rhs = gimple_assign_rhs2 (def_stmt);
22665ce9237cSJohn Marino       tree var = TREE_CODE (rhs) == SSA_NAME
22675ce9237cSJohn Marino 		 ? SSA_NAME_VAR (rhs)
22685ce9237cSJohn Marino 		 : create_tmp_reg (TREE_TYPE (rhs), NULL);
22695ce9237cSJohn Marino       tree negrhs = make_ssa_name (var, NULL);
2270e4b17023SJohn Marino       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2271e4b17023SJohn Marino 							 rhs, NULL);
2272e4b17023SJohn Marino       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2273e4b17023SJohn Marino       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2274e4b17023SJohn Marino 							  loop_info, NULL));
2275e4b17023SJohn Marino       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2276e4b17023SJohn Marino       gimple_assign_set_rhs2 (def_stmt, negrhs);
2277e4b17023SJohn Marino       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2278e4b17023SJohn Marino       update_stmt (def_stmt);
2279e4b17023SJohn Marino     }
2280e4b17023SJohn Marino 
2281e4b17023SJohn Marino   /* Reduction is safe. We're dealing with one of the following:
2282e4b17023SJohn Marino      1) integer arithmetic and no trapv
2283e4b17023SJohn Marino      2) floating point arithmetic, and special flags permit this optimization
2284e4b17023SJohn Marino      3) nested cycle (i.e., outer loop vectorization).  */
2285e4b17023SJohn Marino   if (TREE_CODE (op1) == SSA_NAME)
2286e4b17023SJohn Marino     def1 = SSA_NAME_DEF_STMT (op1);
2287e4b17023SJohn Marino 
2288e4b17023SJohn Marino   if (TREE_CODE (op2) == SSA_NAME)
2289e4b17023SJohn Marino     def2 = SSA_NAME_DEF_STMT (op2);
2290e4b17023SJohn Marino 
2291e4b17023SJohn Marino   if (code != COND_EXPR
2292e4b17023SJohn Marino       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2293e4b17023SJohn Marino     {
2294e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2295e4b17023SJohn Marino 	report_vect_op (def_stmt, "reduction: no defs for operands: ");
2296e4b17023SJohn Marino       return NULL;
2297e4b17023SJohn Marino     }
2298e4b17023SJohn Marino 
2299e4b17023SJohn Marino   /* Check that one def is the reduction def, defined by PHI,
2300e4b17023SJohn Marino      the other def is either defined in the loop ("vect_internal_def"),
2301e4b17023SJohn Marino      or it's an induction (defined by a loop-header phi-node).  */
2302e4b17023SJohn Marino 
2303e4b17023SJohn Marino   if (def2 && def2 == phi
2304e4b17023SJohn Marino       && (code == COND_EXPR
2305e4b17023SJohn Marino 	  || !def1 || gimple_nop_p (def1)
2306e4b17023SJohn Marino           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2307e4b17023SJohn Marino               && (is_gimple_assign (def1)
2308e4b17023SJohn Marino 		  || is_gimple_call (def1)
2309e4b17023SJohn Marino   	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2310e4b17023SJohn Marino                       == vect_induction_def
2311e4b17023SJohn Marino    	          || (gimple_code (def1) == GIMPLE_PHI
2312e4b17023SJohn Marino 	              && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2313e4b17023SJohn Marino                           == vect_internal_def
2314e4b17023SJohn Marino  	              && !is_loop_header_bb_p (gimple_bb (def1)))))))
2315e4b17023SJohn Marino     {
2316e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2317e4b17023SJohn Marino 	report_vect_op (def_stmt, "detected reduction: ");
2318e4b17023SJohn Marino       return def_stmt;
2319e4b17023SJohn Marino     }
2320e4b17023SJohn Marino 
2321e4b17023SJohn Marino   if (def1 && def1 == phi
2322e4b17023SJohn Marino       && (code == COND_EXPR
2323e4b17023SJohn Marino 	  || !def2 || gimple_nop_p (def2)
2324e4b17023SJohn Marino           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2325e4b17023SJohn Marino  	      && (is_gimple_assign (def2)
2326e4b17023SJohn Marino 		  || is_gimple_call (def2)
2327e4b17023SJohn Marino 	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2328e4b17023SJohn Marino                       == vect_induction_def
2329e4b17023SJohn Marino  	          || (gimple_code (def2) == GIMPLE_PHI
2330e4b17023SJohn Marino 		      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2331e4b17023SJohn Marino                           == vect_internal_def
2332e4b17023SJohn Marino 		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
2333e4b17023SJohn Marino     {
2334e4b17023SJohn Marino       if (check_reduction)
2335e4b17023SJohn Marino         {
2336e4b17023SJohn Marino           /* Swap operands (just for simplicity - so that the rest of the code
2337e4b17023SJohn Marino 	     can assume that the reduction variable is always the last (second)
2338e4b17023SJohn Marino 	     argument).  */
2339e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2340e4b17023SJohn Marino 	    report_vect_op (def_stmt,
2341e4b17023SJohn Marino 	  	            "detected reduction: need to swap operands: ");
2342e4b17023SJohn Marino 
2343e4b17023SJohn Marino           swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2344e4b17023SJohn Marino  			      gimple_assign_rhs2_ptr (def_stmt));
2345e4b17023SJohn Marino         }
2346e4b17023SJohn Marino       else
2347e4b17023SJohn Marino         {
2348e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2349e4b17023SJohn Marino             report_vect_op (def_stmt, "detected reduction: ");
2350e4b17023SJohn Marino         }
2351e4b17023SJohn Marino 
2352e4b17023SJohn Marino       return def_stmt;
2353e4b17023SJohn Marino     }
2354e4b17023SJohn Marino 
2355e4b17023SJohn Marino   /* Try to find SLP reduction chain.  */
2356e4b17023SJohn Marino   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2357e4b17023SJohn Marino     {
2358e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2359e4b17023SJohn Marino         report_vect_op (def_stmt, "reduction: detected reduction chain: ");
2360e4b17023SJohn Marino 
2361e4b17023SJohn Marino       return def_stmt;
2362e4b17023SJohn Marino     }
2363e4b17023SJohn Marino 
2364e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
2365e4b17023SJohn Marino     report_vect_op (def_stmt, "reduction: unknown pattern: ");
2366e4b17023SJohn Marino 
2367e4b17023SJohn Marino   return NULL;
2368e4b17023SJohn Marino }
2369e4b17023SJohn Marino 
2370e4b17023SJohn Marino /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2371e4b17023SJohn Marino    in-place.  Arguments as there.  */
2372e4b17023SJohn Marino 
2373e4b17023SJohn Marino static gimple
vect_is_simple_reduction(loop_vec_info loop_info,gimple phi,bool check_reduction,bool * double_reduc)2374e4b17023SJohn Marino vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2375e4b17023SJohn Marino                           bool check_reduction, bool *double_reduc)
2376e4b17023SJohn Marino {
2377e4b17023SJohn Marino   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2378e4b17023SJohn Marino 				     double_reduc, false);
2379e4b17023SJohn Marino }
2380e4b17023SJohn Marino 
2381e4b17023SJohn Marino /* Wrapper around vect_is_simple_reduction_1, which will modify code
2382e4b17023SJohn Marino    in-place if it enables detection of more reductions.  Arguments
2383e4b17023SJohn Marino    as there.  */
2384e4b17023SJohn Marino 
2385e4b17023SJohn Marino gimple
vect_force_simple_reduction(loop_vec_info loop_info,gimple phi,bool check_reduction,bool * double_reduc)2386e4b17023SJohn Marino vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2387e4b17023SJohn Marino                           bool check_reduction, bool *double_reduc)
2388e4b17023SJohn Marino {
2389e4b17023SJohn Marino   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2390e4b17023SJohn Marino 				     double_reduc, true);
2391e4b17023SJohn Marino }
2392e4b17023SJohn Marino 
2393e4b17023SJohn Marino /* Calculate the cost of one scalar iteration of the loop.  */
2394e4b17023SJohn Marino int
vect_get_single_scalar_iteration_cost(loop_vec_info loop_vinfo)23955ce9237cSJohn Marino vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2396e4b17023SJohn Marino {
2397e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2398e4b17023SJohn Marino   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2399e4b17023SJohn Marino   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2400e4b17023SJohn Marino   int innerloop_iters, i, stmt_cost;
2401e4b17023SJohn Marino 
2402e4b17023SJohn Marino   /* Count statements in scalar loop.  Using this as scalar cost for a single
2403e4b17023SJohn Marino      iteration for now.
2404e4b17023SJohn Marino 
2405e4b17023SJohn Marino      TODO: Add outer loop support.
2406e4b17023SJohn Marino 
2407e4b17023SJohn Marino      TODO: Consider assigning different costs to different scalar
2408e4b17023SJohn Marino      statements.  */
2409e4b17023SJohn Marino 
2410e4b17023SJohn Marino   /* FORNOW.  */
2411e4b17023SJohn Marino   innerloop_iters = 1;
2412e4b17023SJohn Marino   if (loop->inner)
2413e4b17023SJohn Marino     innerloop_iters = 50; /* FIXME */
2414e4b17023SJohn Marino 
2415e4b17023SJohn Marino   for (i = 0; i < nbbs; i++)
2416e4b17023SJohn Marino     {
2417e4b17023SJohn Marino       gimple_stmt_iterator si;
2418e4b17023SJohn Marino       basic_block bb = bbs[i];
2419e4b17023SJohn Marino 
2420e4b17023SJohn Marino       if (bb->loop_father == loop->inner)
2421e4b17023SJohn Marino         factor = innerloop_iters;
2422e4b17023SJohn Marino       else
2423e4b17023SJohn Marino         factor = 1;
2424e4b17023SJohn Marino 
2425e4b17023SJohn Marino       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2426e4b17023SJohn Marino         {
2427e4b17023SJohn Marino           gimple stmt = gsi_stmt (si);
2428e4b17023SJohn Marino           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2429e4b17023SJohn Marino 
2430e4b17023SJohn Marino           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2431e4b17023SJohn Marino             continue;
2432e4b17023SJohn Marino 
2433e4b17023SJohn Marino           /* Skip stmts that are not vectorized inside the loop.  */
2434e4b17023SJohn Marino           if (stmt_info
2435e4b17023SJohn Marino               && !STMT_VINFO_RELEVANT_P (stmt_info)
2436e4b17023SJohn Marino               && (!STMT_VINFO_LIVE_P (stmt_info)
2437e4b17023SJohn Marino                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2438e4b17023SJohn Marino 	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2439e4b17023SJohn Marino             continue;
2440e4b17023SJohn Marino 
2441e4b17023SJohn Marino           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2442e4b17023SJohn Marino             {
2443e4b17023SJohn Marino               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2444e4b17023SJohn Marino                stmt_cost = vect_get_cost (scalar_load);
2445e4b17023SJohn Marino              else
2446e4b17023SJohn Marino                stmt_cost = vect_get_cost (scalar_store);
2447e4b17023SJohn Marino             }
2448e4b17023SJohn Marino           else
2449e4b17023SJohn Marino             stmt_cost = vect_get_cost (scalar_stmt);
2450e4b17023SJohn Marino 
2451e4b17023SJohn Marino           scalar_single_iter_cost += stmt_cost * factor;
2452e4b17023SJohn Marino         }
2453e4b17023SJohn Marino     }
2454e4b17023SJohn Marino   return scalar_single_iter_cost;
2455e4b17023SJohn Marino }
2456e4b17023SJohn Marino 
2457e4b17023SJohn Marino /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2458e4b17023SJohn Marino int
vect_get_known_peeling_cost(loop_vec_info loop_vinfo,int peel_iters_prologue,int * peel_iters_epilogue,int scalar_single_iter_cost)2459e4b17023SJohn Marino vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2460e4b17023SJohn Marino                              int *peel_iters_epilogue,
2461e4b17023SJohn Marino                              int scalar_single_iter_cost)
2462e4b17023SJohn Marino {
2463e4b17023SJohn Marino   int peel_guard_costs = 0;
2464e4b17023SJohn Marino   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2465e4b17023SJohn Marino 
2466e4b17023SJohn Marino   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2467e4b17023SJohn Marino     {
2468e4b17023SJohn Marino       *peel_iters_epilogue = vf/2;
2469e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_COST))
2470e4b17023SJohn Marino         fprintf (vect_dump, "cost model: "
2471e4b17023SJohn Marino                             "epilogue peel iters set to vf/2 because "
2472e4b17023SJohn Marino                             "loop iterations are unknown .");
2473e4b17023SJohn Marino 
2474e4b17023SJohn Marino       /* If peeled iterations are known but number of scalar loop
2475e4b17023SJohn Marino          iterations are unknown, count a taken branch per peeled loop.  */
2476e4b17023SJohn Marino       peel_guard_costs =  2 * vect_get_cost (cond_branch_taken);
2477e4b17023SJohn Marino     }
2478e4b17023SJohn Marino   else
2479e4b17023SJohn Marino     {
2480e4b17023SJohn Marino       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2481e4b17023SJohn Marino       peel_iters_prologue = niters < peel_iters_prologue ?
2482e4b17023SJohn Marino                             niters : peel_iters_prologue;
2483e4b17023SJohn Marino       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2484e4b17023SJohn Marino       /* If we need to peel for gaps, but no peeling is required, we have to
2485e4b17023SJohn Marino 	 peel VF iterations.  */
2486e4b17023SJohn Marino       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2487e4b17023SJohn Marino         *peel_iters_epilogue = vf;
2488e4b17023SJohn Marino     }
2489e4b17023SJohn Marino 
2490e4b17023SJohn Marino    return (peel_iters_prologue * scalar_single_iter_cost)
2491e4b17023SJohn Marino             + (*peel_iters_epilogue * scalar_single_iter_cost)
2492e4b17023SJohn Marino            + peel_guard_costs;
2493e4b17023SJohn Marino }
2494e4b17023SJohn Marino 
2495e4b17023SJohn Marino /* Function vect_estimate_min_profitable_iters
2496e4b17023SJohn Marino 
2497e4b17023SJohn Marino    Return the number of iterations required for the vector version of the
2498e4b17023SJohn Marino    loop to be profitable relative to the cost of the scalar version of the
2499e4b17023SJohn Marino    loop.
2500e4b17023SJohn Marino 
2501e4b17023SJohn Marino    TODO: Take profile info into account before making vectorization
2502e4b17023SJohn Marino    decisions, if available.  */
2503e4b17023SJohn Marino 
2504e4b17023SJohn Marino int
vect_estimate_min_profitable_iters(loop_vec_info loop_vinfo)2505e4b17023SJohn Marino vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
2506e4b17023SJohn Marino {
2507e4b17023SJohn Marino   int i;
2508e4b17023SJohn Marino   int min_profitable_iters;
2509e4b17023SJohn Marino   int peel_iters_prologue;
2510e4b17023SJohn Marino   int peel_iters_epilogue;
2511e4b17023SJohn Marino   int vec_inside_cost = 0;
2512e4b17023SJohn Marino   int vec_outside_cost = 0;
2513e4b17023SJohn Marino   int scalar_single_iter_cost = 0;
2514e4b17023SJohn Marino   int scalar_outside_cost = 0;
2515e4b17023SJohn Marino   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2516e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2517e4b17023SJohn Marino   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2518e4b17023SJohn Marino   int nbbs = loop->num_nodes;
2519e4b17023SJohn Marino   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2520e4b17023SJohn Marino   int peel_guard_costs = 0;
2521e4b17023SJohn Marino   int innerloop_iters = 0, factor;
2522e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances;
2523e4b17023SJohn Marino   slp_instance instance;
2524e4b17023SJohn Marino 
2525e4b17023SJohn Marino   /* Cost model disabled.  */
2526e4b17023SJohn Marino   if (!flag_vect_cost_model)
2527e4b17023SJohn Marino     {
2528e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_COST))
2529e4b17023SJohn Marino         fprintf (vect_dump, "cost model disabled.");
2530e4b17023SJohn Marino       return 0;
2531e4b17023SJohn Marino     }
2532e4b17023SJohn Marino 
2533e4b17023SJohn Marino   /* Requires loop versioning tests to handle misalignment.  */
2534e4b17023SJohn Marino   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2535e4b17023SJohn Marino     {
2536e4b17023SJohn Marino       /*  FIXME: Make cost depend on complexity of individual check.  */
2537e4b17023SJohn Marino       vec_outside_cost +=
2538e4b17023SJohn Marino 	VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
2539e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_COST))
2540e4b17023SJohn Marino         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2541e4b17023SJohn Marino                  "versioning to treat misalignment.\n");
2542e4b17023SJohn Marino     }
2543e4b17023SJohn Marino 
2544e4b17023SJohn Marino   /* Requires loop versioning with alias checks.  */
2545e4b17023SJohn Marino   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2546e4b17023SJohn Marino     {
2547e4b17023SJohn Marino       /*  FIXME: Make cost depend on complexity of individual check.  */
2548e4b17023SJohn Marino       vec_outside_cost +=
2549e4b17023SJohn Marino         VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
2550e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_COST))
2551e4b17023SJohn Marino         fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2552e4b17023SJohn Marino                  "versioning aliasing.\n");
2553e4b17023SJohn Marino     }
2554e4b17023SJohn Marino 
2555e4b17023SJohn Marino   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2556e4b17023SJohn Marino       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2557e4b17023SJohn Marino     vec_outside_cost += vect_get_cost (cond_branch_taken);
2558e4b17023SJohn Marino 
2559e4b17023SJohn Marino   /* Count statements in scalar loop.  Using this as scalar cost for a single
2560e4b17023SJohn Marino      iteration for now.
2561e4b17023SJohn Marino 
2562e4b17023SJohn Marino      TODO: Add outer loop support.
2563e4b17023SJohn Marino 
2564e4b17023SJohn Marino      TODO: Consider assigning different costs to different scalar
2565e4b17023SJohn Marino      statements.  */
2566e4b17023SJohn Marino 
2567e4b17023SJohn Marino   /* FORNOW.  */
2568e4b17023SJohn Marino   if (loop->inner)
2569e4b17023SJohn Marino     innerloop_iters = 50; /* FIXME */
2570e4b17023SJohn Marino 
2571e4b17023SJohn Marino   for (i = 0; i < nbbs; i++)
2572e4b17023SJohn Marino     {
2573e4b17023SJohn Marino       gimple_stmt_iterator si;
2574e4b17023SJohn Marino       basic_block bb = bbs[i];
2575e4b17023SJohn Marino 
2576e4b17023SJohn Marino       if (bb->loop_father == loop->inner)
2577e4b17023SJohn Marino  	factor = innerloop_iters;
2578e4b17023SJohn Marino       else
2579e4b17023SJohn Marino  	factor = 1;
2580e4b17023SJohn Marino 
2581e4b17023SJohn Marino       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2582e4b17023SJohn Marino 	{
2583e4b17023SJohn Marino 	  gimple stmt = gsi_stmt (si);
2584e4b17023SJohn Marino 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2585e4b17023SJohn Marino 
2586e4b17023SJohn Marino 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2587e4b17023SJohn Marino 	    {
2588e4b17023SJohn Marino 	      stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2589e4b17023SJohn Marino 	      stmt_info = vinfo_for_stmt (stmt);
2590e4b17023SJohn Marino 	    }
2591e4b17023SJohn Marino 
2592e4b17023SJohn Marino 	  /* Skip stmts that are not vectorized inside the loop.  */
2593e4b17023SJohn Marino 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
2594e4b17023SJohn Marino 	      && (!STMT_VINFO_LIVE_P (stmt_info)
2595e4b17023SJohn Marino                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
2596e4b17023SJohn Marino 	    continue;
2597e4b17023SJohn Marino 
2598e4b17023SJohn Marino 	  vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
2599e4b17023SJohn Marino 	  /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2600e4b17023SJohn Marino 	     some of the "outside" costs are generated inside the outer-loop.  */
2601e4b17023SJohn Marino 	  vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
2602e4b17023SJohn Marino           if (is_pattern_stmt_p (stmt_info)
2603e4b17023SJohn Marino 	      && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
2604e4b17023SJohn Marino             {
2605e4b17023SJohn Marino 	      gimple_stmt_iterator gsi;
2606e4b17023SJohn Marino 
2607e4b17023SJohn Marino 	      for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2608e4b17023SJohn Marino 		   !gsi_end_p (gsi); gsi_next (&gsi))
2609e4b17023SJohn Marino                 {
2610e4b17023SJohn Marino                   gimple pattern_def_stmt = gsi_stmt (gsi);
2611e4b17023SJohn Marino                   stmt_vec_info pattern_def_stmt_info
2612e4b17023SJohn Marino 		    = vinfo_for_stmt (pattern_def_stmt);
2613e4b17023SJohn Marino                   if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
2614e4b17023SJohn Marino                       || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
2615e4b17023SJohn Marino 		    {
2616e4b17023SJohn Marino                       vec_inside_cost
2617e4b17023SJohn Marino 			+= STMT_VINFO_INSIDE_OF_LOOP_COST
2618e4b17023SJohn Marino 			   (pattern_def_stmt_info) * factor;
2619e4b17023SJohn Marino                       vec_outside_cost
2620e4b17023SJohn Marino 			+= STMT_VINFO_OUTSIDE_OF_LOOP_COST
2621e4b17023SJohn Marino 			   (pattern_def_stmt_info);
2622e4b17023SJohn Marino                     }
2623e4b17023SJohn Marino 		}
2624e4b17023SJohn Marino 	    }
2625e4b17023SJohn Marino 	}
2626e4b17023SJohn Marino     }
2627e4b17023SJohn Marino 
26285ce9237cSJohn Marino   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2629e4b17023SJohn Marino 
2630e4b17023SJohn Marino   /* Add additional cost for the peeled instructions in prologue and epilogue
2631e4b17023SJohn Marino      loop.
2632e4b17023SJohn Marino 
2633e4b17023SJohn Marino      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2634e4b17023SJohn Marino      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2635e4b17023SJohn Marino 
2636e4b17023SJohn Marino      TODO: Build an expression that represents peel_iters for prologue and
2637e4b17023SJohn Marino      epilogue to be used in a run-time test.  */
2638e4b17023SJohn Marino 
2639e4b17023SJohn Marino   if (npeel  < 0)
2640e4b17023SJohn Marino     {
2641e4b17023SJohn Marino       peel_iters_prologue = vf/2;
2642e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_COST))
2643e4b17023SJohn Marino         fprintf (vect_dump, "cost model: "
2644e4b17023SJohn Marino                  "prologue peel iters set to vf/2.");
2645e4b17023SJohn Marino 
2646e4b17023SJohn Marino       /* If peeling for alignment is unknown, loop bound of main loop becomes
2647e4b17023SJohn Marino          unknown.  */
2648e4b17023SJohn Marino       peel_iters_epilogue = vf/2;
2649e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_COST))
2650e4b17023SJohn Marino         fprintf (vect_dump, "cost model: "
2651e4b17023SJohn Marino                  "epilogue peel iters set to vf/2 because "
2652e4b17023SJohn Marino                  "peeling for alignment is unknown .");
2653e4b17023SJohn Marino 
2654e4b17023SJohn Marino       /* If peeled iterations are unknown, count a taken branch and a not taken
2655e4b17023SJohn Marino          branch per peeled loop. Even if scalar loop iterations are known,
2656e4b17023SJohn Marino          vector iterations are not known since peeled prologue iterations are
2657e4b17023SJohn Marino          not known. Hence guards remain the same.  */
2658e4b17023SJohn Marino       peel_guard_costs +=  2 * (vect_get_cost (cond_branch_taken)
2659e4b17023SJohn Marino                                 + vect_get_cost (cond_branch_not_taken));
2660e4b17023SJohn Marino       vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
2661e4b17023SJohn Marino                            + (peel_iters_epilogue * scalar_single_iter_cost)
2662e4b17023SJohn Marino                            + peel_guard_costs;
2663e4b17023SJohn Marino     }
2664e4b17023SJohn Marino   else
2665e4b17023SJohn Marino     {
2666e4b17023SJohn Marino       peel_iters_prologue = npeel;
2667e4b17023SJohn Marino       vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
2668e4b17023SJohn Marino                                     peel_iters_prologue, &peel_iters_epilogue,
2669e4b17023SJohn Marino                                     scalar_single_iter_cost);
2670e4b17023SJohn Marino     }
2671e4b17023SJohn Marino 
2672e4b17023SJohn Marino   /* FORNOW: The scalar outside cost is incremented in one of the
2673e4b17023SJohn Marino      following ways:
2674e4b17023SJohn Marino 
2675e4b17023SJohn Marino      1. The vectorizer checks for alignment and aliasing and generates
2676e4b17023SJohn Marino      a condition that allows dynamic vectorization.  A cost model
2677e4b17023SJohn Marino      check is ANDED with the versioning condition.  Hence scalar code
2678e4b17023SJohn Marino      path now has the added cost of the versioning check.
2679e4b17023SJohn Marino 
2680e4b17023SJohn Marino        if (cost > th & versioning_check)
2681e4b17023SJohn Marino          jmp to vector code
2682e4b17023SJohn Marino 
2683e4b17023SJohn Marino      Hence run-time scalar is incremented by not-taken branch cost.
2684e4b17023SJohn Marino 
2685e4b17023SJohn Marino      2. The vectorizer then checks if a prologue is required.  If the
2686e4b17023SJohn Marino      cost model check was not done before during versioning, it has to
2687e4b17023SJohn Marino      be done before the prologue check.
2688e4b17023SJohn Marino 
2689e4b17023SJohn Marino        if (cost <= th)
2690e4b17023SJohn Marino          prologue = scalar_iters
2691e4b17023SJohn Marino        if (prologue == 0)
2692e4b17023SJohn Marino          jmp to vector code
2693e4b17023SJohn Marino        else
2694e4b17023SJohn Marino          execute prologue
2695e4b17023SJohn Marino        if (prologue == num_iters)
2696e4b17023SJohn Marino 	 go to exit
2697e4b17023SJohn Marino 
2698e4b17023SJohn Marino      Hence the run-time scalar cost is incremented by a taken branch,
2699e4b17023SJohn Marino      plus a not-taken branch, plus a taken branch cost.
2700e4b17023SJohn Marino 
2701e4b17023SJohn Marino      3. The vectorizer then checks if an epilogue is required.  If the
2702e4b17023SJohn Marino      cost model check was not done before during prologue check, it
2703e4b17023SJohn Marino      has to be done with the epilogue check.
2704e4b17023SJohn Marino 
2705e4b17023SJohn Marino        if (prologue == 0)
2706e4b17023SJohn Marino          jmp to vector code
2707e4b17023SJohn Marino        else
2708e4b17023SJohn Marino          execute prologue
2709e4b17023SJohn Marino        if (prologue == num_iters)
2710e4b17023SJohn Marino 	 go to exit
2711e4b17023SJohn Marino        vector code:
2712e4b17023SJohn Marino          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2713e4b17023SJohn Marino            jmp to epilogue
2714e4b17023SJohn Marino 
2715e4b17023SJohn Marino      Hence the run-time scalar cost should be incremented by 2 taken
2716e4b17023SJohn Marino      branches.
2717e4b17023SJohn Marino 
2718e4b17023SJohn Marino      TODO: The back end may reorder the BBS's differently and reverse
2719e4b17023SJohn Marino      conditions/branch directions.  Change the estimates below to
2720e4b17023SJohn Marino      something more reasonable.  */
2721e4b17023SJohn Marino 
2722e4b17023SJohn Marino   /* If the number of iterations is known and we do not do versioning, we can
2723e4b17023SJohn Marino      decide whether to vectorize at compile time.  Hence the scalar version
2724e4b17023SJohn Marino      do not carry cost model guard costs.  */
2725e4b17023SJohn Marino   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2726e4b17023SJohn Marino       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2727e4b17023SJohn Marino       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2728e4b17023SJohn Marino     {
2729e4b17023SJohn Marino       /* Cost model check occurs at versioning.  */
2730e4b17023SJohn Marino       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2731e4b17023SJohn Marino           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2732e4b17023SJohn Marino 	scalar_outside_cost += vect_get_cost (cond_branch_not_taken);
2733e4b17023SJohn Marino       else
2734e4b17023SJohn Marino 	{
2735e4b17023SJohn Marino 	  /* Cost model check occurs at prologue generation.  */
2736e4b17023SJohn Marino 	  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2737e4b17023SJohn Marino 	    scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken)
2738e4b17023SJohn Marino                                    + vect_get_cost (cond_branch_not_taken);
2739e4b17023SJohn Marino 	  /* Cost model check occurs at epilogue generation.  */
2740e4b17023SJohn Marino 	  else
2741e4b17023SJohn Marino 	    scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken);
2742e4b17023SJohn Marino 	}
2743e4b17023SJohn Marino     }
2744e4b17023SJohn Marino 
2745e4b17023SJohn Marino   /* Add SLP costs.  */
2746e4b17023SJohn Marino   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2747e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2748e4b17023SJohn Marino     {
2749e4b17023SJohn Marino       vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
2750e4b17023SJohn Marino       vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
2751e4b17023SJohn Marino     }
2752e4b17023SJohn Marino 
2753e4b17023SJohn Marino   /* Calculate number of iterations required to make the vector version
2754e4b17023SJohn Marino      profitable, relative to the loop bodies only.  The following condition
2755e4b17023SJohn Marino      must hold true:
2756e4b17023SJohn Marino      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2757e4b17023SJohn Marino      where
2758e4b17023SJohn Marino      SIC = scalar iteration cost, VIC = vector iteration cost,
2759e4b17023SJohn Marino      VOC = vector outside cost, VF = vectorization factor,
2760e4b17023SJohn Marino      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2761e4b17023SJohn Marino      SOC = scalar outside cost for run time cost model check.  */
2762e4b17023SJohn Marino 
2763e4b17023SJohn Marino   if ((scalar_single_iter_cost * vf) > vec_inside_cost)
2764e4b17023SJohn Marino     {
2765e4b17023SJohn Marino       if (vec_outside_cost <= 0)
2766e4b17023SJohn Marino         min_profitable_iters = 1;
2767e4b17023SJohn Marino       else
2768e4b17023SJohn Marino         {
2769e4b17023SJohn Marino           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2770e4b17023SJohn Marino 				  - vec_inside_cost * peel_iters_prologue
2771e4b17023SJohn Marino                                   - vec_inside_cost * peel_iters_epilogue)
2772e4b17023SJohn Marino                                  / ((scalar_single_iter_cost * vf)
2773e4b17023SJohn Marino                                     - vec_inside_cost);
2774e4b17023SJohn Marino 
2775e4b17023SJohn Marino           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2776e4b17023SJohn Marino               <= ((vec_inside_cost * min_profitable_iters)
2777e4b17023SJohn Marino                   + ((vec_outside_cost - scalar_outside_cost) * vf)))
2778e4b17023SJohn Marino             min_profitable_iters++;
2779e4b17023SJohn Marino         }
2780e4b17023SJohn Marino     }
2781e4b17023SJohn Marino   /* vector version will never be profitable.  */
2782e4b17023SJohn Marino   else
2783e4b17023SJohn Marino     {
2784e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_COST))
2785e4b17023SJohn Marino         fprintf (vect_dump, "cost model: the vector iteration cost = %d "
2786e4b17023SJohn Marino 		 "divided by the scalar iteration cost = %d "
2787e4b17023SJohn Marino 		 "is greater or equal to the vectorization factor = %d.",
2788e4b17023SJohn Marino                  vec_inside_cost, scalar_single_iter_cost, vf);
2789e4b17023SJohn Marino       return -1;
2790e4b17023SJohn Marino     }
2791e4b17023SJohn Marino 
2792e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_COST))
2793e4b17023SJohn Marino     {
2794e4b17023SJohn Marino       fprintf (vect_dump, "Cost model analysis: \n");
2795e4b17023SJohn Marino       fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
2796e4b17023SJohn Marino 	       vec_inside_cost);
2797e4b17023SJohn Marino       fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
2798e4b17023SJohn Marino 	       vec_outside_cost);
2799e4b17023SJohn Marino       fprintf (vect_dump, "  Scalar iteration cost: %d\n",
2800e4b17023SJohn Marino 	       scalar_single_iter_cost);
2801e4b17023SJohn Marino       fprintf (vect_dump, "  Scalar outside cost: %d\n", scalar_outside_cost);
2802e4b17023SJohn Marino       fprintf (vect_dump, "  prologue iterations: %d\n",
2803e4b17023SJohn Marino                peel_iters_prologue);
2804e4b17023SJohn Marino       fprintf (vect_dump, "  epilogue iterations: %d\n",
2805e4b17023SJohn Marino                peel_iters_epilogue);
2806e4b17023SJohn Marino       fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
2807e4b17023SJohn Marino 	       min_profitable_iters);
2808e4b17023SJohn Marino     }
2809e4b17023SJohn Marino 
2810e4b17023SJohn Marino   min_profitable_iters =
2811e4b17023SJohn Marino 	min_profitable_iters < vf ? vf : min_profitable_iters;
2812e4b17023SJohn Marino 
2813e4b17023SJohn Marino   /* Because the condition we create is:
2814e4b17023SJohn Marino      if (niters <= min_profitable_iters)
2815e4b17023SJohn Marino        then skip the vectorized loop.  */
2816e4b17023SJohn Marino   min_profitable_iters--;
2817e4b17023SJohn Marino 
2818e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_COST))
2819e4b17023SJohn Marino     fprintf (vect_dump, "  Profitability threshold = %d\n",
2820e4b17023SJohn Marino 	     min_profitable_iters);
2821e4b17023SJohn Marino 
2822e4b17023SJohn Marino   return min_profitable_iters;
2823e4b17023SJohn Marino }
2824e4b17023SJohn Marino 
2825e4b17023SJohn Marino 
2826e4b17023SJohn Marino /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2827e4b17023SJohn Marino    functions. Design better to avoid maintenance issues.  */
2828e4b17023SJohn Marino 
2829e4b17023SJohn Marino /* Function vect_model_reduction_cost.
2830e4b17023SJohn Marino 
2831e4b17023SJohn Marino    Models cost for a reduction operation, including the vector ops
2832e4b17023SJohn Marino    generated within the strip-mine loop, the initial definition before
2833e4b17023SJohn Marino    the loop, and the epilogue code that must be generated.  */
2834e4b17023SJohn Marino 
2835e4b17023SJohn Marino static bool
vect_model_reduction_cost(stmt_vec_info stmt_info,enum tree_code reduc_code,int ncopies)2836e4b17023SJohn Marino vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2837e4b17023SJohn Marino 			   int ncopies)
2838e4b17023SJohn Marino {
2839e4b17023SJohn Marino   int outer_cost = 0;
2840e4b17023SJohn Marino   enum tree_code code;
2841e4b17023SJohn Marino   optab optab;
2842e4b17023SJohn Marino   tree vectype;
2843e4b17023SJohn Marino   gimple stmt, orig_stmt;
2844e4b17023SJohn Marino   tree reduction_op;
2845e4b17023SJohn Marino   enum machine_mode mode;
2846e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2847e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2848e4b17023SJohn Marino 
2849e4b17023SJohn Marino 
2850e4b17023SJohn Marino   /* Cost of reduction op inside loop.  */
2851e4b17023SJohn Marino   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2852e4b17023SJohn Marino     += ncopies * vect_get_cost (vector_stmt);
2853e4b17023SJohn Marino 
2854e4b17023SJohn Marino   stmt = STMT_VINFO_STMT (stmt_info);
2855e4b17023SJohn Marino 
2856e4b17023SJohn Marino   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2857e4b17023SJohn Marino     {
2858e4b17023SJohn Marino     case GIMPLE_SINGLE_RHS:
2859e4b17023SJohn Marino       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2860e4b17023SJohn Marino       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2861e4b17023SJohn Marino       break;
2862e4b17023SJohn Marino     case GIMPLE_UNARY_RHS:
2863e4b17023SJohn Marino       reduction_op = gimple_assign_rhs1 (stmt);
2864e4b17023SJohn Marino       break;
2865e4b17023SJohn Marino     case GIMPLE_BINARY_RHS:
2866e4b17023SJohn Marino       reduction_op = gimple_assign_rhs2 (stmt);
2867e4b17023SJohn Marino       break;
2868e4b17023SJohn Marino     case GIMPLE_TERNARY_RHS:
2869e4b17023SJohn Marino       reduction_op = gimple_assign_rhs3 (stmt);
2870e4b17023SJohn Marino       break;
2871e4b17023SJohn Marino     default:
2872e4b17023SJohn Marino       gcc_unreachable ();
2873e4b17023SJohn Marino     }
2874e4b17023SJohn Marino 
2875e4b17023SJohn Marino   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2876e4b17023SJohn Marino   if (!vectype)
2877e4b17023SJohn Marino     {
2878e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_COST))
2879e4b17023SJohn Marino         {
2880e4b17023SJohn Marino           fprintf (vect_dump, "unsupported data-type ");
2881e4b17023SJohn Marino           print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
2882e4b17023SJohn Marino         }
2883e4b17023SJohn Marino       return false;
2884e4b17023SJohn Marino    }
2885e4b17023SJohn Marino 
2886e4b17023SJohn Marino   mode = TYPE_MODE (vectype);
2887e4b17023SJohn Marino   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2888e4b17023SJohn Marino 
2889e4b17023SJohn Marino   if (!orig_stmt)
2890e4b17023SJohn Marino     orig_stmt = STMT_VINFO_STMT (stmt_info);
2891e4b17023SJohn Marino 
2892e4b17023SJohn Marino   code = gimple_assign_rhs_code (orig_stmt);
2893e4b17023SJohn Marino 
2894e4b17023SJohn Marino   /* Add in cost for initial definition.  */
2895e4b17023SJohn Marino   outer_cost += vect_get_cost (scalar_to_vec);
2896e4b17023SJohn Marino 
2897e4b17023SJohn Marino   /* Determine cost of epilogue code.
2898e4b17023SJohn Marino 
2899e4b17023SJohn Marino      We have a reduction operator that will reduce the vector in one statement.
2900e4b17023SJohn Marino      Also requires scalar extract.  */
2901e4b17023SJohn Marino 
2902e4b17023SJohn Marino   if (!nested_in_vect_loop_p (loop, orig_stmt))
2903e4b17023SJohn Marino     {
2904e4b17023SJohn Marino       if (reduc_code != ERROR_MARK)
2905e4b17023SJohn Marino 	outer_cost += vect_get_cost (vector_stmt)
2906e4b17023SJohn Marino                       + vect_get_cost (vec_to_scalar);
2907e4b17023SJohn Marino       else
2908e4b17023SJohn Marino 	{
2909e4b17023SJohn Marino 	  int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2910e4b17023SJohn Marino 	  tree bitsize =
2911e4b17023SJohn Marino 	    TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
2912e4b17023SJohn Marino 	  int element_bitsize = tree_low_cst (bitsize, 1);
2913e4b17023SJohn Marino 	  int nelements = vec_size_in_bits / element_bitsize;
2914e4b17023SJohn Marino 
2915e4b17023SJohn Marino 	  optab = optab_for_tree_code (code, vectype, optab_default);
2916e4b17023SJohn Marino 
2917e4b17023SJohn Marino 	  /* We have a whole vector shift available.  */
2918e4b17023SJohn Marino 	  if (VECTOR_MODE_P (mode)
2919e4b17023SJohn Marino 	      && optab_handler (optab, mode) != CODE_FOR_nothing
2920e4b17023SJohn Marino 	      && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
2921e4b17023SJohn Marino 	    /* Final reduction via vector shifts and the reduction operator. Also
2922e4b17023SJohn Marino 	       requires scalar extract.  */
2923e4b17023SJohn Marino 	    outer_cost += ((exact_log2(nelements) * 2)
2924e4b17023SJohn Marino               * vect_get_cost (vector_stmt)
2925e4b17023SJohn Marino   	      + vect_get_cost (vec_to_scalar));
2926e4b17023SJohn Marino 	  else
2927e4b17023SJohn Marino 	    /* Use extracts and reduction op for final reduction.  For N elements,
2928e4b17023SJohn Marino                we have N extracts and N-1 reduction ops.  */
2929e4b17023SJohn Marino 	    outer_cost += ((nelements + nelements - 1)
2930e4b17023SJohn Marino               * vect_get_cost (vector_stmt));
2931e4b17023SJohn Marino 	}
2932e4b17023SJohn Marino     }
2933e4b17023SJohn Marino 
2934e4b17023SJohn Marino   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
2935e4b17023SJohn Marino 
2936e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_COST))
2937e4b17023SJohn Marino     fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
2938e4b17023SJohn Marino              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2939e4b17023SJohn Marino              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2940e4b17023SJohn Marino 
2941e4b17023SJohn Marino   return true;
2942e4b17023SJohn Marino }
2943e4b17023SJohn Marino 
2944e4b17023SJohn Marino 
2945e4b17023SJohn Marino /* Function vect_model_induction_cost.
2946e4b17023SJohn Marino 
2947e4b17023SJohn Marino    Models cost for induction operations.  */
2948e4b17023SJohn Marino 
2949e4b17023SJohn Marino static void
vect_model_induction_cost(stmt_vec_info stmt_info,int ncopies)2950e4b17023SJohn Marino vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
2951e4b17023SJohn Marino {
2952e4b17023SJohn Marino   /* loop cost for vec_loop.  */
2953e4b17023SJohn Marino   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2954e4b17023SJohn Marino     = ncopies * vect_get_cost (vector_stmt);
2955e4b17023SJohn Marino   /* prologue cost for vec_init and vec_step.  */
2956e4b17023SJohn Marino   STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)
2957e4b17023SJohn Marino     = 2 * vect_get_cost (scalar_to_vec);
2958e4b17023SJohn Marino 
2959e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_COST))
2960e4b17023SJohn Marino     fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
2961e4b17023SJohn Marino              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2962e4b17023SJohn Marino              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2963e4b17023SJohn Marino }
2964e4b17023SJohn Marino 
2965e4b17023SJohn Marino 
2966e4b17023SJohn Marino /* Function get_initial_def_for_induction
2967e4b17023SJohn Marino 
2968e4b17023SJohn Marino    Input:
2969e4b17023SJohn Marino    STMT - a stmt that performs an induction operation in the loop.
2970e4b17023SJohn Marino    IV_PHI - the initial value of the induction variable
2971e4b17023SJohn Marino 
2972e4b17023SJohn Marino    Output:
2973e4b17023SJohn Marino    Return a vector variable, initialized with the first VF values of
2974e4b17023SJohn Marino    the induction variable.  E.g., for an iv with IV_PHI='X' and
2975e4b17023SJohn Marino    evolution S, for a vector of 4 units, we want to return:
2976e4b17023SJohn Marino    [X, X + S, X + 2*S, X + 3*S].  */
2977e4b17023SJohn Marino 
2978e4b17023SJohn Marino static tree
get_initial_def_for_induction(gimple iv_phi)2979e4b17023SJohn Marino get_initial_def_for_induction (gimple iv_phi)
2980e4b17023SJohn Marino {
2981e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
2982e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2983e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2984e4b17023SJohn Marino   tree scalar_type;
2985e4b17023SJohn Marino   tree vectype;
2986e4b17023SJohn Marino   int nunits;
2987e4b17023SJohn Marino   edge pe = loop_preheader_edge (loop);
2988e4b17023SJohn Marino   struct loop *iv_loop;
2989e4b17023SJohn Marino   basic_block new_bb;
2990e4b17023SJohn Marino   tree vec, vec_init, vec_step, t;
2991e4b17023SJohn Marino   tree access_fn;
2992e4b17023SJohn Marino   tree new_var;
2993e4b17023SJohn Marino   tree new_name;
2994e4b17023SJohn Marino   gimple init_stmt, induction_phi, new_stmt;
2995e4b17023SJohn Marino   tree induc_def, vec_def, vec_dest;
2996e4b17023SJohn Marino   tree init_expr, step_expr;
2997e4b17023SJohn Marino   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2998e4b17023SJohn Marino   int i;
2999e4b17023SJohn Marino   bool ok;
3000e4b17023SJohn Marino   int ncopies;
3001e4b17023SJohn Marino   tree expr;
3002e4b17023SJohn Marino   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3003e4b17023SJohn Marino   bool nested_in_vect_loop = false;
3004e4b17023SJohn Marino   gimple_seq stmts = NULL;
3005e4b17023SJohn Marino   imm_use_iterator imm_iter;
3006e4b17023SJohn Marino   use_operand_p use_p;
3007e4b17023SJohn Marino   gimple exit_phi;
3008e4b17023SJohn Marino   edge latch_e;
3009e4b17023SJohn Marino   tree loop_arg;
3010e4b17023SJohn Marino   gimple_stmt_iterator si;
3011e4b17023SJohn Marino   basic_block bb = gimple_bb (iv_phi);
3012e4b17023SJohn Marino   tree stepvectype;
3013e4b17023SJohn Marino   tree resvectype;
3014e4b17023SJohn Marino 
3015e4b17023SJohn Marino   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3016e4b17023SJohn Marino   if (nested_in_vect_loop_p (loop, iv_phi))
3017e4b17023SJohn Marino     {
3018e4b17023SJohn Marino       nested_in_vect_loop = true;
3019e4b17023SJohn Marino       iv_loop = loop->inner;
3020e4b17023SJohn Marino     }
3021e4b17023SJohn Marino   else
3022e4b17023SJohn Marino     iv_loop = loop;
3023e4b17023SJohn Marino   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3024e4b17023SJohn Marino 
3025e4b17023SJohn Marino   latch_e = loop_latch_edge (iv_loop);
3026e4b17023SJohn Marino   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3027e4b17023SJohn Marino 
3028e4b17023SJohn Marino   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3029e4b17023SJohn Marino   gcc_assert (access_fn);
3030e4b17023SJohn Marino   STRIP_NOPS (access_fn);
3031e4b17023SJohn Marino   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3032e4b17023SJohn Marino                                     &init_expr, &step_expr);
3033e4b17023SJohn Marino   gcc_assert (ok);
3034e4b17023SJohn Marino   pe = loop_preheader_edge (iv_loop);
3035e4b17023SJohn Marino 
3036e4b17023SJohn Marino   scalar_type = TREE_TYPE (init_expr);
3037e4b17023SJohn Marino   vectype = get_vectype_for_scalar_type (scalar_type);
3038e4b17023SJohn Marino   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3039e4b17023SJohn Marino   gcc_assert (vectype);
3040e4b17023SJohn Marino   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3041e4b17023SJohn Marino   ncopies = vf / nunits;
3042e4b17023SJohn Marino 
3043e4b17023SJohn Marino   gcc_assert (phi_info);
3044e4b17023SJohn Marino   gcc_assert (ncopies >= 1);
3045e4b17023SJohn Marino 
3046e4b17023SJohn Marino   /* Find the first insertion point in the BB.  */
3047e4b17023SJohn Marino   si = gsi_after_labels (bb);
3048e4b17023SJohn Marino 
3049e4b17023SJohn Marino   /* Create the vector that holds the initial_value of the induction.  */
3050e4b17023SJohn Marino   if (nested_in_vect_loop)
3051e4b17023SJohn Marino     {
3052e4b17023SJohn Marino       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3053e4b17023SJohn Marino 	 been created during vectorization of previous stmts.  We obtain it
3054e4b17023SJohn Marino 	 from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3055e4b17023SJohn Marino       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3056e4b17023SJohn Marino                                            loop_preheader_edge (iv_loop));
3057e4b17023SJohn Marino       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3058e4b17023SJohn Marino     }
3059e4b17023SJohn Marino   else
3060e4b17023SJohn Marino     {
3061e4b17023SJohn Marino       /* iv_loop is the loop to be vectorized. Create:
3062e4b17023SJohn Marino 	 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3063e4b17023SJohn Marino       new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
3064e4b17023SJohn Marino       add_referenced_var (new_var);
3065e4b17023SJohn Marino 
3066e4b17023SJohn Marino       new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
3067e4b17023SJohn Marino       if (stmts)
3068e4b17023SJohn Marino 	{
3069e4b17023SJohn Marino 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3070e4b17023SJohn Marino 	  gcc_assert (!new_bb);
3071e4b17023SJohn Marino 	}
3072e4b17023SJohn Marino 
3073e4b17023SJohn Marino       t = NULL_TREE;
3074e4b17023SJohn Marino       t = tree_cons (NULL_TREE, new_name, t);
3075e4b17023SJohn Marino       for (i = 1; i < nunits; i++)
3076e4b17023SJohn Marino 	{
3077e4b17023SJohn Marino 	  /* Create: new_name_i = new_name + step_expr  */
3078e4b17023SJohn Marino 	  enum tree_code code = POINTER_TYPE_P (scalar_type)
3079e4b17023SJohn Marino 				? POINTER_PLUS_EXPR : PLUS_EXPR;
3080e4b17023SJohn Marino 	  init_stmt = gimple_build_assign_with_ops (code, new_var,
3081e4b17023SJohn Marino 						    new_name, step_expr);
3082e4b17023SJohn Marino 	  new_name = make_ssa_name (new_var, init_stmt);
3083e4b17023SJohn Marino 	  gimple_assign_set_lhs (init_stmt, new_name);
3084e4b17023SJohn Marino 
3085e4b17023SJohn Marino 	  new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3086e4b17023SJohn Marino 	  gcc_assert (!new_bb);
3087e4b17023SJohn Marino 
3088e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
3089e4b17023SJohn Marino 	    {
3090e4b17023SJohn Marino 	      fprintf (vect_dump, "created new init_stmt: ");
3091e4b17023SJohn Marino 	      print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
3092e4b17023SJohn Marino 	    }
3093e4b17023SJohn Marino 	  t = tree_cons (NULL_TREE, new_name, t);
3094e4b17023SJohn Marino 	}
3095e4b17023SJohn Marino       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3096e4b17023SJohn Marino       vec = build_constructor_from_list (vectype, nreverse (t));
3097e4b17023SJohn Marino       vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
3098e4b17023SJohn Marino     }
3099e4b17023SJohn Marino 
3100e4b17023SJohn Marino 
3101e4b17023SJohn Marino   /* Create the vector that holds the step of the induction.  */
3102e4b17023SJohn Marino   if (nested_in_vect_loop)
3103e4b17023SJohn Marino     /* iv_loop is nested in the loop to be vectorized. Generate:
3104e4b17023SJohn Marino        vec_step = [S, S, S, S]  */
3105e4b17023SJohn Marino     new_name = step_expr;
3106e4b17023SJohn Marino   else
3107e4b17023SJohn Marino     {
3108e4b17023SJohn Marino       /* iv_loop is the loop to be vectorized. Generate:
3109e4b17023SJohn Marino 	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3110e4b17023SJohn Marino       expr = build_int_cst (TREE_TYPE (step_expr), vf);
3111e4b17023SJohn Marino       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3112e4b17023SJohn Marino 			      expr, step_expr);
3113e4b17023SJohn Marino     }
3114e4b17023SJohn Marino 
3115e4b17023SJohn Marino   t = unshare_expr (new_name);
3116e4b17023SJohn Marino   gcc_assert (CONSTANT_CLASS_P (new_name));
3117e4b17023SJohn Marino   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3118e4b17023SJohn Marino   gcc_assert (stepvectype);
3119e4b17023SJohn Marino   vec = build_vector_from_val (stepvectype, t);
3120e4b17023SJohn Marino   vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3121e4b17023SJohn Marino 
3122e4b17023SJohn Marino 
3123e4b17023SJohn Marino   /* Create the following def-use cycle:
3124e4b17023SJohn Marino      loop prolog:
3125e4b17023SJohn Marino          vec_init = ...
3126e4b17023SJohn Marino 	 vec_step = ...
3127e4b17023SJohn Marino      loop:
3128e4b17023SJohn Marino          vec_iv = PHI <vec_init, vec_loop>
3129e4b17023SJohn Marino          ...
3130e4b17023SJohn Marino          STMT
3131e4b17023SJohn Marino          ...
3132e4b17023SJohn Marino          vec_loop = vec_iv + vec_step;  */
3133e4b17023SJohn Marino 
3134e4b17023SJohn Marino   /* Create the induction-phi that defines the induction-operand.  */
3135e4b17023SJohn Marino   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3136e4b17023SJohn Marino   add_referenced_var (vec_dest);
3137e4b17023SJohn Marino   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3138e4b17023SJohn Marino   set_vinfo_for_stmt (induction_phi,
3139e4b17023SJohn Marino 		      new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3140e4b17023SJohn Marino   induc_def = PHI_RESULT (induction_phi);
3141e4b17023SJohn Marino 
3142e4b17023SJohn Marino   /* Create the iv update inside the loop  */
3143e4b17023SJohn Marino   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3144e4b17023SJohn Marino 					   induc_def, vec_step);
3145e4b17023SJohn Marino   vec_def = make_ssa_name (vec_dest, new_stmt);
3146e4b17023SJohn Marino   gimple_assign_set_lhs (new_stmt, vec_def);
3147e4b17023SJohn Marino   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3148e4b17023SJohn Marino   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3149e4b17023SJohn Marino                                                    NULL));
3150e4b17023SJohn Marino 
3151e4b17023SJohn Marino   /* Set the arguments of the phi node:  */
3152e4b17023SJohn Marino   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3153e4b17023SJohn Marino   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3154e4b17023SJohn Marino 	       UNKNOWN_LOCATION);
3155e4b17023SJohn Marino 
3156e4b17023SJohn Marino 
3157e4b17023SJohn Marino   /* In case that vectorization factor (VF) is bigger than the number
3158e4b17023SJohn Marino      of elements that we can fit in a vectype (nunits), we have to generate
3159e4b17023SJohn Marino      more than one vector stmt - i.e - we need to "unroll" the
3160e4b17023SJohn Marino      vector stmt by a factor VF/nunits.  For more details see documentation
3161e4b17023SJohn Marino      in vectorizable_operation.  */
3162e4b17023SJohn Marino 
3163e4b17023SJohn Marino   if (ncopies > 1)
3164e4b17023SJohn Marino     {
3165e4b17023SJohn Marino       stmt_vec_info prev_stmt_vinfo;
3166e4b17023SJohn Marino       /* FORNOW. This restriction should be relaxed.  */
3167e4b17023SJohn Marino       gcc_assert (!nested_in_vect_loop);
3168e4b17023SJohn Marino 
3169e4b17023SJohn Marino       /* Create the vector that holds the step of the induction.  */
3170e4b17023SJohn Marino       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3171e4b17023SJohn Marino       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3172e4b17023SJohn Marino 			      expr, step_expr);
3173e4b17023SJohn Marino       t = unshare_expr (new_name);
3174e4b17023SJohn Marino       gcc_assert (CONSTANT_CLASS_P (new_name));
3175e4b17023SJohn Marino       vec = build_vector_from_val (stepvectype, t);
3176e4b17023SJohn Marino       vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3177e4b17023SJohn Marino 
3178e4b17023SJohn Marino       vec_def = induc_def;
3179e4b17023SJohn Marino       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3180e4b17023SJohn Marino       for (i = 1; i < ncopies; i++)
3181e4b17023SJohn Marino 	{
3182e4b17023SJohn Marino 	  /* vec_i = vec_prev + vec_step  */
3183e4b17023SJohn Marino 	  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3184e4b17023SJohn Marino 						   vec_def, vec_step);
3185e4b17023SJohn Marino 	  vec_def = make_ssa_name (vec_dest, new_stmt);
3186e4b17023SJohn Marino 	  gimple_assign_set_lhs (new_stmt, vec_def);
3187e4b17023SJohn Marino 
3188e4b17023SJohn Marino 	  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3189e4b17023SJohn Marino 	  if (!useless_type_conversion_p (resvectype, vectype))
3190e4b17023SJohn Marino 	    {
3191e4b17023SJohn Marino 	      new_stmt = gimple_build_assign_with_ops
3192e4b17023SJohn Marino 		  (VIEW_CONVERT_EXPR,
3193e4b17023SJohn Marino 		   vect_get_new_vect_var (resvectype, vect_simple_var,
3194e4b17023SJohn Marino 					  "vec_iv_"),
3195e4b17023SJohn Marino 		   build1 (VIEW_CONVERT_EXPR, resvectype,
3196e4b17023SJohn Marino 			   gimple_assign_lhs (new_stmt)), NULL_TREE);
3197e4b17023SJohn Marino 	      gimple_assign_set_lhs (new_stmt,
3198e4b17023SJohn Marino 				     make_ssa_name
3199e4b17023SJohn Marino 				       (gimple_assign_lhs (new_stmt), new_stmt));
3200e4b17023SJohn Marino 	      gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3201e4b17023SJohn Marino 	    }
3202e4b17023SJohn Marino 	  set_vinfo_for_stmt (new_stmt,
3203e4b17023SJohn Marino 			      new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3204e4b17023SJohn Marino 	  STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3205e4b17023SJohn Marino 	  prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3206e4b17023SJohn Marino 	}
3207e4b17023SJohn Marino     }
3208e4b17023SJohn Marino 
3209e4b17023SJohn Marino   if (nested_in_vect_loop)
3210e4b17023SJohn Marino     {
3211e4b17023SJohn Marino       /* Find the loop-closed exit-phi of the induction, and record
3212e4b17023SJohn Marino          the final vector of induction results:  */
3213e4b17023SJohn Marino       exit_phi = NULL;
3214e4b17023SJohn Marino       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3215e4b17023SJohn Marino         {
3216e4b17023SJohn Marino 	  if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3217e4b17023SJohn Marino 	    {
3218e4b17023SJohn Marino 	      exit_phi = USE_STMT (use_p);
3219e4b17023SJohn Marino 	      break;
3220e4b17023SJohn Marino 	    }
3221e4b17023SJohn Marino         }
3222e4b17023SJohn Marino       if (exit_phi)
3223e4b17023SJohn Marino 	{
3224e4b17023SJohn Marino 	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3225e4b17023SJohn Marino 	  /* FORNOW. Currently not supporting the case that an inner-loop induction
3226e4b17023SJohn Marino 	     is not used in the outer-loop (i.e. only outside the outer-loop).  */
3227e4b17023SJohn Marino 	  gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3228e4b17023SJohn Marino 		      && !STMT_VINFO_LIVE_P (stmt_vinfo));
3229e4b17023SJohn Marino 
3230e4b17023SJohn Marino 	  STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3231e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
3232e4b17023SJohn Marino 	    {
3233e4b17023SJohn Marino 	      fprintf (vect_dump, "vector of inductions after inner-loop:");
3234e4b17023SJohn Marino 	      print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
3235e4b17023SJohn Marino 	    }
3236e4b17023SJohn Marino 	}
3237e4b17023SJohn Marino     }
3238e4b17023SJohn Marino 
3239e4b17023SJohn Marino 
3240e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
3241e4b17023SJohn Marino     {
3242e4b17023SJohn Marino       fprintf (vect_dump, "transform induction: created def-use cycle: ");
3243e4b17023SJohn Marino       print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
3244e4b17023SJohn Marino       fprintf (vect_dump, "\n");
3245e4b17023SJohn Marino       print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
3246e4b17023SJohn Marino     }
3247e4b17023SJohn Marino 
3248e4b17023SJohn Marino   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3249e4b17023SJohn Marino   if (!useless_type_conversion_p (resvectype, vectype))
3250e4b17023SJohn Marino     {
3251e4b17023SJohn Marino       new_stmt = gimple_build_assign_with_ops
3252e4b17023SJohn Marino 	 (VIEW_CONVERT_EXPR,
3253e4b17023SJohn Marino 	  vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3254e4b17023SJohn Marino 	  build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3255e4b17023SJohn Marino       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3256e4b17023SJohn Marino       gimple_assign_set_lhs (new_stmt, induc_def);
3257e4b17023SJohn Marino       si = gsi_start_bb (bb);
3258e4b17023SJohn Marino       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3259e4b17023SJohn Marino       set_vinfo_for_stmt (new_stmt,
3260e4b17023SJohn Marino 			  new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3261e4b17023SJohn Marino       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3262e4b17023SJohn Marino 	= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3263e4b17023SJohn Marino     }
3264e4b17023SJohn Marino 
3265e4b17023SJohn Marino   return induc_def;
3266e4b17023SJohn Marino }
3267e4b17023SJohn Marino 
3268e4b17023SJohn Marino 
3269e4b17023SJohn Marino /* Function get_initial_def_for_reduction
3270e4b17023SJohn Marino 
3271e4b17023SJohn Marino    Input:
3272e4b17023SJohn Marino    STMT - a stmt that performs a reduction operation in the loop.
3273e4b17023SJohn Marino    INIT_VAL - the initial value of the reduction variable
3274e4b17023SJohn Marino 
3275e4b17023SJohn Marino    Output:
3276e4b17023SJohn Marino    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3277e4b17023SJohn Marino         of the reduction (used for adjusting the epilog - see below).
3278e4b17023SJohn Marino    Return a vector variable, initialized according to the operation that STMT
3279e4b17023SJohn Marino         performs. This vector will be used as the initial value of the
3280e4b17023SJohn Marino         vector of partial results.
3281e4b17023SJohn Marino 
3282e4b17023SJohn Marino    Option1 (adjust in epilog): Initialize the vector as follows:
3283e4b17023SJohn Marino      add/bit or/xor:    [0,0,...,0,0]
3284e4b17023SJohn Marino      mult/bit and:      [1,1,...,1,1]
3285e4b17023SJohn Marino      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3286e4b17023SJohn Marino    and when necessary (e.g. add/mult case) let the caller know
3287e4b17023SJohn Marino    that it needs to adjust the result by init_val.
3288e4b17023SJohn Marino 
3289e4b17023SJohn Marino    Option2: Initialize the vector as follows:
3290e4b17023SJohn Marino      add/bit or/xor:    [init_val,0,0,...,0]
3291e4b17023SJohn Marino      mult/bit and:      [init_val,1,1,...,1]
3292e4b17023SJohn Marino      min/max/cond_expr: [init_val,init_val,...,init_val]
3293e4b17023SJohn Marino    and no adjustments are needed.
3294e4b17023SJohn Marino 
3295e4b17023SJohn Marino    For example, for the following code:
3296e4b17023SJohn Marino 
3297e4b17023SJohn Marino    s = init_val;
3298e4b17023SJohn Marino    for (i=0;i<n;i++)
3299e4b17023SJohn Marino      s = s + a[i];
3300e4b17023SJohn Marino 
3301e4b17023SJohn Marino    STMT is 's = s + a[i]', and the reduction variable is 's'.
3302e4b17023SJohn Marino    For a vector of 4 units, we want to return either [0,0,0,init_val],
3303e4b17023SJohn Marino    or [0,0,0,0] and let the caller know that it needs to adjust
3304e4b17023SJohn Marino    the result at the end by 'init_val'.
3305e4b17023SJohn Marino 
3306e4b17023SJohn Marino    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3307e4b17023SJohn Marino    initialization vector is simpler (same element in all entries), if
3308e4b17023SJohn Marino    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3309e4b17023SJohn Marino 
3310e4b17023SJohn Marino    A cost model should help decide between these two schemes.  */
3311e4b17023SJohn Marino 
3312e4b17023SJohn Marino tree
get_initial_def_for_reduction(gimple stmt,tree init_val,tree * adjustment_def)3313e4b17023SJohn Marino get_initial_def_for_reduction (gimple stmt, tree init_val,
3314e4b17023SJohn Marino                                tree *adjustment_def)
3315e4b17023SJohn Marino {
3316e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3317e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3318e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3319e4b17023SJohn Marino   tree scalar_type = TREE_TYPE (init_val);
3320e4b17023SJohn Marino   tree vectype = get_vectype_for_scalar_type (scalar_type);
3321e4b17023SJohn Marino   int nunits;
3322e4b17023SJohn Marino   enum tree_code code = gimple_assign_rhs_code (stmt);
3323e4b17023SJohn Marino   tree def_for_init;
3324e4b17023SJohn Marino   tree init_def;
3325e4b17023SJohn Marino   tree t = NULL_TREE;
3326e4b17023SJohn Marino   int i;
3327e4b17023SJohn Marino   bool nested_in_vect_loop = false;
3328e4b17023SJohn Marino   tree init_value;
3329e4b17023SJohn Marino   REAL_VALUE_TYPE real_init_val = dconst0;
3330e4b17023SJohn Marino   int int_init_val = 0;
3331e4b17023SJohn Marino   gimple def_stmt = NULL;
3332e4b17023SJohn Marino 
3333e4b17023SJohn Marino   gcc_assert (vectype);
3334e4b17023SJohn Marino   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3335e4b17023SJohn Marino 
3336e4b17023SJohn Marino   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3337e4b17023SJohn Marino 	      || SCALAR_FLOAT_TYPE_P (scalar_type));
3338e4b17023SJohn Marino 
3339e4b17023SJohn Marino   if (nested_in_vect_loop_p (loop, stmt))
3340e4b17023SJohn Marino     nested_in_vect_loop = true;
3341e4b17023SJohn Marino   else
3342e4b17023SJohn Marino     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3343e4b17023SJohn Marino 
3344e4b17023SJohn Marino   /* In case of double reduction we only create a vector variable to be put
3345e4b17023SJohn Marino      in the reduction phi node.  The actual statement creation is done in
3346e4b17023SJohn Marino      vect_create_epilog_for_reduction.  */
3347e4b17023SJohn Marino   if (adjustment_def && nested_in_vect_loop
3348e4b17023SJohn Marino       && TREE_CODE (init_val) == SSA_NAME
3349e4b17023SJohn Marino       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3350e4b17023SJohn Marino       && gimple_code (def_stmt) == GIMPLE_PHI
3351e4b17023SJohn Marino       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3352e4b17023SJohn Marino       && vinfo_for_stmt (def_stmt)
3353e4b17023SJohn Marino       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3354e4b17023SJohn Marino           == vect_double_reduction_def)
3355e4b17023SJohn Marino     {
3356e4b17023SJohn Marino       *adjustment_def = NULL;
3357e4b17023SJohn Marino       return vect_create_destination_var (init_val, vectype);
3358e4b17023SJohn Marino     }
3359e4b17023SJohn Marino 
3360e4b17023SJohn Marino   if (TREE_CONSTANT (init_val))
3361e4b17023SJohn Marino     {
3362e4b17023SJohn Marino       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3363e4b17023SJohn Marino         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3364e4b17023SJohn Marino       else
3365e4b17023SJohn Marino         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3366e4b17023SJohn Marino     }
3367e4b17023SJohn Marino   else
3368e4b17023SJohn Marino     init_value = init_val;
3369e4b17023SJohn Marino 
3370e4b17023SJohn Marino   switch (code)
3371e4b17023SJohn Marino     {
3372e4b17023SJohn Marino       case WIDEN_SUM_EXPR:
3373e4b17023SJohn Marino       case DOT_PROD_EXPR:
3374e4b17023SJohn Marino       case PLUS_EXPR:
3375e4b17023SJohn Marino       case MINUS_EXPR:
3376e4b17023SJohn Marino       case BIT_IOR_EXPR:
3377e4b17023SJohn Marino       case BIT_XOR_EXPR:
3378e4b17023SJohn Marino       case MULT_EXPR:
3379e4b17023SJohn Marino       case BIT_AND_EXPR:
3380e4b17023SJohn Marino         /* ADJUSMENT_DEF is NULL when called from
3381e4b17023SJohn Marino            vect_create_epilog_for_reduction to vectorize double reduction.  */
3382e4b17023SJohn Marino         if (adjustment_def)
3383e4b17023SJohn Marino           {
3384e4b17023SJohn Marino             if (nested_in_vect_loop)
3385e4b17023SJohn Marino               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3386e4b17023SJohn Marino                                                               NULL);
3387e4b17023SJohn Marino             else
3388e4b17023SJohn Marino               *adjustment_def = init_val;
3389e4b17023SJohn Marino           }
3390e4b17023SJohn Marino 
3391e4b17023SJohn Marino         if (code == MULT_EXPR)
3392e4b17023SJohn Marino           {
3393e4b17023SJohn Marino             real_init_val = dconst1;
3394e4b17023SJohn Marino             int_init_val = 1;
3395e4b17023SJohn Marino           }
3396e4b17023SJohn Marino 
3397e4b17023SJohn Marino         if (code == BIT_AND_EXPR)
3398e4b17023SJohn Marino           int_init_val = -1;
3399e4b17023SJohn Marino 
3400e4b17023SJohn Marino         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3401e4b17023SJohn Marino           def_for_init = build_real (scalar_type, real_init_val);
3402e4b17023SJohn Marino         else
3403e4b17023SJohn Marino           def_for_init = build_int_cst (scalar_type, int_init_val);
3404e4b17023SJohn Marino 
3405e4b17023SJohn Marino         /* Create a vector of '0' or '1' except the first element.  */
3406e4b17023SJohn Marino         for (i = nunits - 2; i >= 0; --i)
3407e4b17023SJohn Marino           t = tree_cons (NULL_TREE, def_for_init, t);
3408e4b17023SJohn Marino 
3409e4b17023SJohn Marino         /* Option1: the first element is '0' or '1' as well.  */
3410e4b17023SJohn Marino         if (adjustment_def)
3411e4b17023SJohn Marino           {
3412e4b17023SJohn Marino             t = tree_cons (NULL_TREE, def_for_init, t);
3413e4b17023SJohn Marino             init_def = build_vector (vectype, t);
3414e4b17023SJohn Marino             break;
3415e4b17023SJohn Marino           }
3416e4b17023SJohn Marino 
3417e4b17023SJohn Marino         /* Option2: the first element is INIT_VAL.  */
3418e4b17023SJohn Marino         t = tree_cons (NULL_TREE, init_value, t);
3419e4b17023SJohn Marino         if (TREE_CONSTANT (init_val))
3420e4b17023SJohn Marino           init_def = build_vector (vectype, t);
3421e4b17023SJohn Marino         else
3422e4b17023SJohn Marino           init_def = build_constructor_from_list (vectype, t);
3423e4b17023SJohn Marino 
3424e4b17023SJohn Marino         break;
3425e4b17023SJohn Marino 
3426e4b17023SJohn Marino       case MIN_EXPR:
3427e4b17023SJohn Marino       case MAX_EXPR:
3428e4b17023SJohn Marino       case COND_EXPR:
3429e4b17023SJohn Marino         if (adjustment_def)
3430e4b17023SJohn Marino           {
3431e4b17023SJohn Marino             *adjustment_def = NULL_TREE;
3432e4b17023SJohn Marino             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3433e4b17023SJohn Marino             break;
3434e4b17023SJohn Marino           }
3435e4b17023SJohn Marino 
3436e4b17023SJohn Marino 	init_def = build_vector_from_val (vectype, init_value);
3437e4b17023SJohn Marino         break;
3438e4b17023SJohn Marino 
3439e4b17023SJohn Marino       default:
3440e4b17023SJohn Marino         gcc_unreachable ();
3441e4b17023SJohn Marino     }
3442e4b17023SJohn Marino 
3443e4b17023SJohn Marino   return init_def;
3444e4b17023SJohn Marino }
3445e4b17023SJohn Marino 
3446e4b17023SJohn Marino 
3447e4b17023SJohn Marino /* Function vect_create_epilog_for_reduction
3448e4b17023SJohn Marino 
3449e4b17023SJohn Marino    Create code at the loop-epilog to finalize the result of a reduction
3450e4b17023SJohn Marino    computation.
3451e4b17023SJohn Marino 
3452e4b17023SJohn Marino    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3453e4b17023SJohn Marino      reduction statements.
3454e4b17023SJohn Marino    STMT is the scalar reduction stmt that is being vectorized.
3455e4b17023SJohn Marino    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3456e4b17023SJohn Marino      number of elements that we can fit in a vectype (nunits).  In this case
3457e4b17023SJohn Marino      we have to generate more than one vector stmt - i.e - we need to "unroll"
3458e4b17023SJohn Marino      the vector stmt by a factor VF/nunits.  For more details see documentation
3459e4b17023SJohn Marino      in vectorizable_operation.
3460e4b17023SJohn Marino    REDUC_CODE is the tree-code for the epilog reduction.
3461e4b17023SJohn Marino    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3462e4b17023SJohn Marino      computation.
3463e4b17023SJohn Marino    REDUC_INDEX is the index of the operand in the right hand side of the
3464e4b17023SJohn Marino      statement that is defined by REDUCTION_PHI.
3465e4b17023SJohn Marino    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3466e4b17023SJohn Marino    SLP_NODE is an SLP node containing a group of reduction statements. The
3467e4b17023SJohn Marino      first one in this group is STMT.
3468e4b17023SJohn Marino 
3469e4b17023SJohn Marino    This function:
3470e4b17023SJohn Marino    1. Creates the reduction def-use cycles: sets the arguments for
3471e4b17023SJohn Marino       REDUCTION_PHIS:
3472e4b17023SJohn Marino       The loop-entry argument is the vectorized initial-value of the reduction.
3473e4b17023SJohn Marino       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3474e4b17023SJohn Marino       sums.
3475e4b17023SJohn Marino    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3476e4b17023SJohn Marino       by applying the operation specified by REDUC_CODE if available, or by
3477e4b17023SJohn Marino       other means (whole-vector shifts or a scalar loop).
3478e4b17023SJohn Marino       The function also creates a new phi node at the loop exit to preserve
3479e4b17023SJohn Marino       loop-closed form, as illustrated below.
3480e4b17023SJohn Marino 
3481e4b17023SJohn Marino      The flow at the entry to this function:
3482e4b17023SJohn Marino 
3483e4b17023SJohn Marino         loop:
3484e4b17023SJohn Marino           vec_def = phi <null, null>            # REDUCTION_PHI
3485e4b17023SJohn Marino           VECT_DEF = vector_stmt                # vectorized form of STMT
3486e4b17023SJohn Marino           s_loop = scalar_stmt                  # (scalar) STMT
3487e4b17023SJohn Marino         loop_exit:
3488e4b17023SJohn Marino           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3489e4b17023SJohn Marino           use <s_out0>
3490e4b17023SJohn Marino           use <s_out0>
3491e4b17023SJohn Marino 
3492e4b17023SJohn Marino      The above is transformed by this function into:
3493e4b17023SJohn Marino 
3494e4b17023SJohn Marino         loop:
3495e4b17023SJohn Marino           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3496e4b17023SJohn Marino           VECT_DEF = vector_stmt                # vectorized form of STMT
3497e4b17023SJohn Marino           s_loop = scalar_stmt                  # (scalar) STMT
3498e4b17023SJohn Marino         loop_exit:
3499e4b17023SJohn Marino           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3500e4b17023SJohn Marino           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3501e4b17023SJohn Marino           v_out2 = reduce <v_out1>
3502e4b17023SJohn Marino           s_out3 = extract_field <v_out2, 0>
3503e4b17023SJohn Marino           s_out4 = adjust_result <s_out3>
3504e4b17023SJohn Marino           use <s_out4>
3505e4b17023SJohn Marino           use <s_out4>
3506e4b17023SJohn Marino */
3507e4b17023SJohn Marino 
3508e4b17023SJohn Marino static void
vect_create_epilog_for_reduction(VEC (tree,heap)* vect_defs,gimple stmt,int ncopies,enum tree_code reduc_code,VEC (gimple,heap)* reduction_phis,int reduc_index,bool double_reduc,slp_tree slp_node)3509e4b17023SJohn Marino vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
3510e4b17023SJohn Marino 				  int ncopies, enum tree_code reduc_code,
3511e4b17023SJohn Marino 				  VEC (gimple, heap) *reduction_phis,
3512e4b17023SJohn Marino                                   int reduc_index, bool double_reduc,
3513e4b17023SJohn Marino                                   slp_tree slp_node)
3514e4b17023SJohn Marino {
3515e4b17023SJohn Marino   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3516e4b17023SJohn Marino   stmt_vec_info prev_phi_info;
3517e4b17023SJohn Marino   tree vectype;
3518e4b17023SJohn Marino   enum machine_mode mode;
3519e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3520e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3521e4b17023SJohn Marino   basic_block exit_bb;
3522e4b17023SJohn Marino   tree scalar_dest;
3523e4b17023SJohn Marino   tree scalar_type;
3524e4b17023SJohn Marino   gimple new_phi = NULL, phi;
3525e4b17023SJohn Marino   gimple_stmt_iterator exit_gsi;
3526e4b17023SJohn Marino   tree vec_dest;
3527e4b17023SJohn Marino   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3528e4b17023SJohn Marino   gimple epilog_stmt = NULL;
3529e4b17023SJohn Marino   enum tree_code code = gimple_assign_rhs_code (stmt);
3530e4b17023SJohn Marino   gimple exit_phi;
3531e4b17023SJohn Marino   tree bitsize, bitpos;
3532e4b17023SJohn Marino   tree adjustment_def = NULL;
3533e4b17023SJohn Marino   tree vec_initial_def = NULL;
3534e4b17023SJohn Marino   tree reduction_op, expr, def;
3535e4b17023SJohn Marino   tree orig_name, scalar_result;
3536e4b17023SJohn Marino   imm_use_iterator imm_iter, phi_imm_iter;
3537e4b17023SJohn Marino   use_operand_p use_p, phi_use_p;
3538e4b17023SJohn Marino   bool extract_scalar_result = false;
3539e4b17023SJohn Marino   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3540e4b17023SJohn Marino   bool nested_in_vect_loop = false;
3541e4b17023SJohn Marino   VEC (gimple, heap) *new_phis = NULL;
3542e4b17023SJohn Marino   VEC (gimple, heap) *inner_phis = NULL;
3543e4b17023SJohn Marino   enum vect_def_type dt = vect_unknown_def_type;
3544e4b17023SJohn Marino   int j, i;
3545e4b17023SJohn Marino   VEC (tree, heap) *scalar_results = NULL;
3546e4b17023SJohn Marino   unsigned int group_size = 1, k, ratio;
3547e4b17023SJohn Marino   VEC (tree, heap) *vec_initial_defs = NULL;
3548e4b17023SJohn Marino   VEC (gimple, heap) *phis;
3549e4b17023SJohn Marino   bool slp_reduc = false;
3550e4b17023SJohn Marino   tree new_phi_result;
3551e4b17023SJohn Marino   gimple inner_phi = NULL;
3552e4b17023SJohn Marino 
3553e4b17023SJohn Marino   if (slp_node)
3554e4b17023SJohn Marino     group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
3555e4b17023SJohn Marino 
3556e4b17023SJohn Marino   if (nested_in_vect_loop_p (loop, stmt))
3557e4b17023SJohn Marino     {
3558e4b17023SJohn Marino       outer_loop = loop;
3559e4b17023SJohn Marino       loop = loop->inner;
3560e4b17023SJohn Marino       nested_in_vect_loop = true;
3561e4b17023SJohn Marino       gcc_assert (!slp_node);
3562e4b17023SJohn Marino     }
3563e4b17023SJohn Marino 
3564e4b17023SJohn Marino   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3565e4b17023SJohn Marino     {
3566e4b17023SJohn Marino     case GIMPLE_SINGLE_RHS:
3567e4b17023SJohn Marino       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3568e4b17023SJohn Marino 		  == ternary_op);
3569e4b17023SJohn Marino       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3570e4b17023SJohn Marino       break;
3571e4b17023SJohn Marino     case GIMPLE_UNARY_RHS:
3572e4b17023SJohn Marino       reduction_op = gimple_assign_rhs1 (stmt);
3573e4b17023SJohn Marino       break;
3574e4b17023SJohn Marino     case GIMPLE_BINARY_RHS:
3575e4b17023SJohn Marino       reduction_op = reduc_index ?
3576e4b17023SJohn Marino                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3577e4b17023SJohn Marino       break;
3578e4b17023SJohn Marino     case GIMPLE_TERNARY_RHS:
3579e4b17023SJohn Marino       reduction_op = gimple_op (stmt, reduc_index + 1);
3580e4b17023SJohn Marino       break;
3581e4b17023SJohn Marino     default:
3582e4b17023SJohn Marino       gcc_unreachable ();
3583e4b17023SJohn Marino     }
3584e4b17023SJohn Marino 
3585e4b17023SJohn Marino   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3586e4b17023SJohn Marino   gcc_assert (vectype);
3587e4b17023SJohn Marino   mode = TYPE_MODE (vectype);
3588e4b17023SJohn Marino 
3589e4b17023SJohn Marino   /* 1. Create the reduction def-use cycle:
3590e4b17023SJohn Marino      Set the arguments of REDUCTION_PHIS, i.e., transform
3591e4b17023SJohn Marino 
3592e4b17023SJohn Marino         loop:
3593e4b17023SJohn Marino           vec_def = phi <null, null>            # REDUCTION_PHI
3594e4b17023SJohn Marino           VECT_DEF = vector_stmt                # vectorized form of STMT
3595e4b17023SJohn Marino           ...
3596e4b17023SJohn Marino 
3597e4b17023SJohn Marino      into:
3598e4b17023SJohn Marino 
3599e4b17023SJohn Marino         loop:
3600e4b17023SJohn Marino           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3601e4b17023SJohn Marino           VECT_DEF = vector_stmt                # vectorized form of STMT
3602e4b17023SJohn Marino           ...
3603e4b17023SJohn Marino 
3604e4b17023SJohn Marino      (in case of SLP, do it for all the phis). */
3605e4b17023SJohn Marino 
3606e4b17023SJohn Marino   /* Get the loop-entry arguments.  */
3607e4b17023SJohn Marino   if (slp_node)
3608e4b17023SJohn Marino     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3609e4b17023SJohn Marino                        NULL, slp_node, reduc_index);
3610e4b17023SJohn Marino   else
3611e4b17023SJohn Marino     {
3612e4b17023SJohn Marino       vec_initial_defs = VEC_alloc (tree, heap, 1);
3613e4b17023SJohn Marino      /* For the case of reduction, vect_get_vec_def_for_operand returns
3614e4b17023SJohn Marino         the scalar def before the loop, that defines the initial value
3615e4b17023SJohn Marino         of the reduction variable.  */
3616e4b17023SJohn Marino       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3617e4b17023SJohn Marino                                                       &adjustment_def);
3618e4b17023SJohn Marino       VEC_quick_push (tree, vec_initial_defs, vec_initial_def);
3619e4b17023SJohn Marino     }
3620e4b17023SJohn Marino 
3621e4b17023SJohn Marino   /* Set phi nodes arguments.  */
3622e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, reduction_phis, i, phi)
3623e4b17023SJohn Marino     {
3624e4b17023SJohn Marino       tree vec_init_def = VEC_index (tree, vec_initial_defs, i);
3625e4b17023SJohn Marino       tree def = VEC_index (tree, vect_defs, i);
3626e4b17023SJohn Marino       for (j = 0; j < ncopies; j++)
3627e4b17023SJohn Marino         {
3628e4b17023SJohn Marino           /* Set the loop-entry arg of the reduction-phi.  */
3629e4b17023SJohn Marino           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3630e4b17023SJohn Marino                        UNKNOWN_LOCATION);
3631e4b17023SJohn Marino 
3632e4b17023SJohn Marino           /* Set the loop-latch arg for the reduction-phi.  */
3633e4b17023SJohn Marino           if (j > 0)
3634e4b17023SJohn Marino             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3635e4b17023SJohn Marino 
3636e4b17023SJohn Marino           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3637e4b17023SJohn Marino 
3638e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
3639e4b17023SJohn Marino             {
3640e4b17023SJohn Marino               fprintf (vect_dump, "transform reduction: created def-use"
3641e4b17023SJohn Marino                                   " cycle: ");
3642e4b17023SJohn Marino               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3643e4b17023SJohn Marino               fprintf (vect_dump, "\n");
3644e4b17023SJohn Marino               print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0,
3645e4b17023SJohn Marino                                  TDF_SLIM);
3646e4b17023SJohn Marino             }
3647e4b17023SJohn Marino 
3648e4b17023SJohn Marino           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3649e4b17023SJohn Marino         }
3650e4b17023SJohn Marino     }
3651e4b17023SJohn Marino 
3652e4b17023SJohn Marino   VEC_free (tree, heap, vec_initial_defs);
3653e4b17023SJohn Marino 
3654e4b17023SJohn Marino   /* 2. Create epilog code.
3655e4b17023SJohn Marino         The reduction epilog code operates across the elements of the vector
3656e4b17023SJohn Marino         of partial results computed by the vectorized loop.
3657e4b17023SJohn Marino         The reduction epilog code consists of:
3658e4b17023SJohn Marino 
3659e4b17023SJohn Marino         step 1: compute the scalar result in a vector (v_out2)
3660e4b17023SJohn Marino         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3661e4b17023SJohn Marino         step 3: adjust the scalar result (s_out3) if needed.
3662e4b17023SJohn Marino 
3663e4b17023SJohn Marino         Step 1 can be accomplished using one the following three schemes:
3664e4b17023SJohn Marino           (scheme 1) using reduc_code, if available.
3665e4b17023SJohn Marino           (scheme 2) using whole-vector shifts, if available.
3666e4b17023SJohn Marino           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3667e4b17023SJohn Marino                      combined.
3668e4b17023SJohn Marino 
3669e4b17023SJohn Marino           The overall epilog code looks like this:
3670e4b17023SJohn Marino 
3671e4b17023SJohn Marino           s_out0 = phi <s_loop>         # original EXIT_PHI
3672e4b17023SJohn Marino           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3673e4b17023SJohn Marino           v_out2 = reduce <v_out1>              # step 1
3674e4b17023SJohn Marino           s_out3 = extract_field <v_out2, 0>    # step 2
3675e4b17023SJohn Marino           s_out4 = adjust_result <s_out3>       # step 3
3676e4b17023SJohn Marino 
3677e4b17023SJohn Marino           (step 3 is optional, and steps 1 and 2 may be combined).
3678e4b17023SJohn Marino           Lastly, the uses of s_out0 are replaced by s_out4.  */
3679e4b17023SJohn Marino 
3680e4b17023SJohn Marino 
3681e4b17023SJohn Marino   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3682e4b17023SJohn Marino          v_out1 = phi <VECT_DEF>
3683e4b17023SJohn Marino          Store them in NEW_PHIS.  */
3684e4b17023SJohn Marino 
3685e4b17023SJohn Marino   exit_bb = single_exit (loop)->dest;
3686e4b17023SJohn Marino   prev_phi_info = NULL;
3687e4b17023SJohn Marino   new_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3688e4b17023SJohn Marino   FOR_EACH_VEC_ELT (tree, vect_defs, i, def)
3689e4b17023SJohn Marino     {
3690e4b17023SJohn Marino       for (j = 0; j < ncopies; j++)
3691e4b17023SJohn Marino         {
3692e4b17023SJohn Marino           phi = create_phi_node (SSA_NAME_VAR (def), exit_bb);
3693e4b17023SJohn Marino           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3694e4b17023SJohn Marino           if (j == 0)
3695e4b17023SJohn Marino             VEC_quick_push (gimple, new_phis, phi);
3696e4b17023SJohn Marino           else
3697e4b17023SJohn Marino 	    {
3698e4b17023SJohn Marino 	      def = vect_get_vec_def_for_stmt_copy (dt, def);
3699e4b17023SJohn Marino 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3700e4b17023SJohn Marino 	    }
3701e4b17023SJohn Marino 
3702e4b17023SJohn Marino           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3703e4b17023SJohn Marino           prev_phi_info = vinfo_for_stmt (phi);
3704e4b17023SJohn Marino         }
3705e4b17023SJohn Marino     }
3706e4b17023SJohn Marino 
3707e4b17023SJohn Marino   /* The epilogue is created for the outer-loop, i.e., for the loop being
3708e4b17023SJohn Marino      vectorized.  Create exit phis for the outer loop.  */
3709e4b17023SJohn Marino   if (double_reduc)
3710e4b17023SJohn Marino     {
3711e4b17023SJohn Marino       loop = outer_loop;
3712e4b17023SJohn Marino       exit_bb = single_exit (loop)->dest;
3713e4b17023SJohn Marino       inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3714e4b17023SJohn Marino       FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
3715e4b17023SJohn Marino 	{
3716e4b17023SJohn Marino 	  gimple outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3717e4b17023SJohn Marino 					      exit_bb);
3718e4b17023SJohn Marino 	  SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3719e4b17023SJohn Marino 			   PHI_RESULT (phi));
3720e4b17023SJohn Marino 	  set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3721e4b17023SJohn Marino 							    loop_vinfo, NULL));
3722e4b17023SJohn Marino 	  VEC_quick_push (gimple, inner_phis, phi);
3723e4b17023SJohn Marino 	  VEC_replace (gimple, new_phis, i, outer_phi);
3724e4b17023SJohn Marino 	  prev_phi_info = vinfo_for_stmt (outer_phi);
3725e4b17023SJohn Marino           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3726e4b17023SJohn Marino             {
3727e4b17023SJohn Marino 	      phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3728e4b17023SJohn Marino 	      outer_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
3729e4b17023SJohn Marino 					   exit_bb);
3730e4b17023SJohn Marino 	      SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3731e4b17023SJohn Marino 			       PHI_RESULT (phi));
3732e4b17023SJohn Marino 	      set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3733e4b17023SJohn Marino 							loop_vinfo, NULL));
3734e4b17023SJohn Marino 	      STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3735e4b17023SJohn Marino 	      prev_phi_info = vinfo_for_stmt (outer_phi);
3736e4b17023SJohn Marino 	    }
3737e4b17023SJohn Marino 	}
3738e4b17023SJohn Marino     }
3739e4b17023SJohn Marino 
3740e4b17023SJohn Marino   exit_gsi = gsi_after_labels (exit_bb);
3741e4b17023SJohn Marino 
3742e4b17023SJohn Marino   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3743e4b17023SJohn Marino          (i.e. when reduc_code is not available) and in the final adjustment
3744e4b17023SJohn Marino 	 code (if needed).  Also get the original scalar reduction variable as
3745e4b17023SJohn Marino          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3746e4b17023SJohn Marino          represents a reduction pattern), the tree-code and scalar-def are
3747e4b17023SJohn Marino          taken from the original stmt that the pattern-stmt (STMT) replaces.
3748e4b17023SJohn Marino          Otherwise (it is a regular reduction) - the tree-code and scalar-def
3749e4b17023SJohn Marino          are taken from STMT.  */
3750e4b17023SJohn Marino 
3751e4b17023SJohn Marino   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3752e4b17023SJohn Marino   if (!orig_stmt)
3753e4b17023SJohn Marino     {
3754e4b17023SJohn Marino       /* Regular reduction  */
3755e4b17023SJohn Marino       orig_stmt = stmt;
3756e4b17023SJohn Marino     }
3757e4b17023SJohn Marino   else
3758e4b17023SJohn Marino     {
3759e4b17023SJohn Marino       /* Reduction pattern  */
3760e4b17023SJohn Marino       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3761e4b17023SJohn Marino       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3762e4b17023SJohn Marino       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3763e4b17023SJohn Marino     }
3764e4b17023SJohn Marino 
3765e4b17023SJohn Marino   code = gimple_assign_rhs_code (orig_stmt);
3766e4b17023SJohn Marino   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3767e4b17023SJohn Marino      partial results are added and not subtracted.  */
3768e4b17023SJohn Marino   if (code == MINUS_EXPR)
3769e4b17023SJohn Marino     code = PLUS_EXPR;
3770e4b17023SJohn Marino 
3771e4b17023SJohn Marino   scalar_dest = gimple_assign_lhs (orig_stmt);
3772e4b17023SJohn Marino   scalar_type = TREE_TYPE (scalar_dest);
3773e4b17023SJohn Marino   scalar_results = VEC_alloc (tree, heap, group_size);
3774e4b17023SJohn Marino   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3775e4b17023SJohn Marino   bitsize = TYPE_SIZE (scalar_type);
3776e4b17023SJohn Marino 
3777e4b17023SJohn Marino   /* In case this is a reduction in an inner-loop while vectorizing an outer
3778e4b17023SJohn Marino      loop - we don't need to extract a single scalar result at the end of the
3779e4b17023SJohn Marino      inner-loop (unless it is double reduction, i.e., the use of reduction is
3780e4b17023SJohn Marino      outside the outer-loop).  The final vector of partial results will be used
3781e4b17023SJohn Marino      in the vectorized outer-loop, or reduced to a scalar result at the end of
3782e4b17023SJohn Marino      the outer-loop.  */
3783e4b17023SJohn Marino   if (nested_in_vect_loop && !double_reduc)
3784e4b17023SJohn Marino     goto vect_finalize_reduction;
3785e4b17023SJohn Marino 
3786e4b17023SJohn Marino   /* SLP reduction without reduction chain, e.g.,
3787e4b17023SJohn Marino      # a1 = phi <a2, a0>
3788e4b17023SJohn Marino      # b1 = phi <b2, b0>
3789e4b17023SJohn Marino      a2 = operation (a1)
3790e4b17023SJohn Marino      b2 = operation (b1)  */
3791e4b17023SJohn Marino   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3792e4b17023SJohn Marino 
3793e4b17023SJohn Marino   /* In case of reduction chain, e.g.,
3794e4b17023SJohn Marino      # a1 = phi <a3, a0>
3795e4b17023SJohn Marino      a2 = operation (a1)
3796e4b17023SJohn Marino      a3 = operation (a2),
3797e4b17023SJohn Marino 
3798e4b17023SJohn Marino      we may end up with more than one vector result.  Here we reduce them to
3799e4b17023SJohn Marino      one vector.  */
3800e4b17023SJohn Marino   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3801e4b17023SJohn Marino     {
3802e4b17023SJohn Marino       tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3803e4b17023SJohn Marino       tree tmp;
3804e4b17023SJohn Marino       gimple new_vec_stmt = NULL;
3805e4b17023SJohn Marino 
3806e4b17023SJohn Marino       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3807e4b17023SJohn Marino       for (k = 1; k < VEC_length (gimple, new_phis); k++)
3808e4b17023SJohn Marino         {
3809e4b17023SJohn Marino           gimple next_phi = VEC_index (gimple, new_phis, k);
3810e4b17023SJohn Marino           tree second_vect = PHI_RESULT (next_phi);
3811e4b17023SJohn Marino 
3812e4b17023SJohn Marino           tmp = build2 (code, vectype,  first_vect, second_vect);
3813e4b17023SJohn Marino           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
3814e4b17023SJohn Marino           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
3815e4b17023SJohn Marino           gimple_assign_set_lhs (new_vec_stmt, first_vect);
3816e4b17023SJohn Marino           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
3817e4b17023SJohn Marino         }
3818e4b17023SJohn Marino 
3819e4b17023SJohn Marino       new_phi_result = first_vect;
3820e4b17023SJohn Marino       if (new_vec_stmt)
3821e4b17023SJohn Marino         {
3822e4b17023SJohn Marino           VEC_truncate (gimple, new_phis, 0);
3823e4b17023SJohn Marino           VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
3824e4b17023SJohn Marino         }
3825e4b17023SJohn Marino     }
3826e4b17023SJohn Marino   else
3827e4b17023SJohn Marino     new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3828e4b17023SJohn Marino 
3829e4b17023SJohn Marino   /* 2.3 Create the reduction code, using one of the three schemes described
3830e4b17023SJohn Marino          above. In SLP we simply need to extract all the elements from the
3831e4b17023SJohn Marino          vector (without reducing them), so we use scalar shifts.  */
3832e4b17023SJohn Marino   if (reduc_code != ERROR_MARK && !slp_reduc)
3833e4b17023SJohn Marino     {
3834e4b17023SJohn Marino       tree tmp;
3835e4b17023SJohn Marino 
3836e4b17023SJohn Marino       /*** Case 1:  Create:
3837e4b17023SJohn Marino            v_out2 = reduc_expr <v_out1>  */
3838e4b17023SJohn Marino 
3839e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
3840e4b17023SJohn Marino         fprintf (vect_dump, "Reduce using direct vector reduction.");
3841e4b17023SJohn Marino 
3842e4b17023SJohn Marino       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3843e4b17023SJohn Marino       tmp = build1 (reduc_code, vectype, new_phi_result);
3844e4b17023SJohn Marino       epilog_stmt = gimple_build_assign (vec_dest, tmp);
3845e4b17023SJohn Marino       new_temp = make_ssa_name (vec_dest, epilog_stmt);
3846e4b17023SJohn Marino       gimple_assign_set_lhs (epilog_stmt, new_temp);
3847e4b17023SJohn Marino       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3848e4b17023SJohn Marino 
3849e4b17023SJohn Marino       extract_scalar_result = true;
3850e4b17023SJohn Marino     }
3851e4b17023SJohn Marino   else
3852e4b17023SJohn Marino     {
3853e4b17023SJohn Marino       enum tree_code shift_code = ERROR_MARK;
3854e4b17023SJohn Marino       bool have_whole_vector_shift = true;
3855e4b17023SJohn Marino       int bit_offset;
3856e4b17023SJohn Marino       int element_bitsize = tree_low_cst (bitsize, 1);
3857e4b17023SJohn Marino       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3858e4b17023SJohn Marino       tree vec_temp;
3859e4b17023SJohn Marino 
3860e4b17023SJohn Marino       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3861e4b17023SJohn Marino         shift_code = VEC_RSHIFT_EXPR;
3862e4b17023SJohn Marino       else
3863e4b17023SJohn Marino         have_whole_vector_shift = false;
3864e4b17023SJohn Marino 
3865e4b17023SJohn Marino       /* Regardless of whether we have a whole vector shift, if we're
3866e4b17023SJohn Marino          emulating the operation via tree-vect-generic, we don't want
3867e4b17023SJohn Marino          to use it.  Only the first round of the reduction is likely
3868e4b17023SJohn Marino          to still be profitable via emulation.  */
3869e4b17023SJohn Marino       /* ??? It might be better to emit a reduction tree code here, so that
3870e4b17023SJohn Marino          tree-vect-generic can expand the first round via bit tricks.  */
3871e4b17023SJohn Marino       if (!VECTOR_MODE_P (mode))
3872e4b17023SJohn Marino         have_whole_vector_shift = false;
3873e4b17023SJohn Marino       else
3874e4b17023SJohn Marino         {
3875e4b17023SJohn Marino           optab optab = optab_for_tree_code (code, vectype, optab_default);
3876e4b17023SJohn Marino           if (optab_handler (optab, mode) == CODE_FOR_nothing)
3877e4b17023SJohn Marino             have_whole_vector_shift = false;
3878e4b17023SJohn Marino         }
3879e4b17023SJohn Marino 
3880e4b17023SJohn Marino       if (have_whole_vector_shift && !slp_reduc)
3881e4b17023SJohn Marino         {
3882e4b17023SJohn Marino           /*** Case 2: Create:
3883e4b17023SJohn Marino              for (offset = VS/2; offset >= element_size; offset/=2)
3884e4b17023SJohn Marino                 {
3885e4b17023SJohn Marino                   Create:  va' = vec_shift <va, offset>
3886e4b17023SJohn Marino                   Create:  va = vop <va, va'>
3887e4b17023SJohn Marino                 }  */
3888e4b17023SJohn Marino 
3889e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
3890e4b17023SJohn Marino             fprintf (vect_dump, "Reduce using vector shifts");
3891e4b17023SJohn Marino 
3892e4b17023SJohn Marino           vec_dest = vect_create_destination_var (scalar_dest, vectype);
3893e4b17023SJohn Marino           new_temp = new_phi_result;
3894e4b17023SJohn Marino           for (bit_offset = vec_size_in_bits/2;
3895e4b17023SJohn Marino                bit_offset >= element_bitsize;
3896e4b17023SJohn Marino                bit_offset /= 2)
3897e4b17023SJohn Marino             {
3898e4b17023SJohn Marino               tree bitpos = size_int (bit_offset);
3899e4b17023SJohn Marino 
3900e4b17023SJohn Marino               epilog_stmt = gimple_build_assign_with_ops (shift_code,
3901e4b17023SJohn Marino                                                vec_dest, new_temp, bitpos);
3902e4b17023SJohn Marino               new_name = make_ssa_name (vec_dest, epilog_stmt);
3903e4b17023SJohn Marino               gimple_assign_set_lhs (epilog_stmt, new_name);
3904e4b17023SJohn Marino               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3905e4b17023SJohn Marino 
3906e4b17023SJohn Marino               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
3907e4b17023SJohn Marino                                                           new_name, new_temp);
3908e4b17023SJohn Marino               new_temp = make_ssa_name (vec_dest, epilog_stmt);
3909e4b17023SJohn Marino               gimple_assign_set_lhs (epilog_stmt, new_temp);
3910e4b17023SJohn Marino               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3911e4b17023SJohn Marino             }
3912e4b17023SJohn Marino 
3913e4b17023SJohn Marino           extract_scalar_result = true;
3914e4b17023SJohn Marino         }
3915e4b17023SJohn Marino       else
3916e4b17023SJohn Marino         {
3917e4b17023SJohn Marino           tree rhs;
3918e4b17023SJohn Marino 
3919e4b17023SJohn Marino           /*** Case 3: Create:
3920e4b17023SJohn Marino              s = extract_field <v_out2, 0>
3921e4b17023SJohn Marino              for (offset = element_size;
3922e4b17023SJohn Marino                   offset < vector_size;
3923e4b17023SJohn Marino                   offset += element_size;)
3924e4b17023SJohn Marino                {
3925e4b17023SJohn Marino                  Create:  s' = extract_field <v_out2, offset>
3926e4b17023SJohn Marino                  Create:  s = op <s, s'>  // For non SLP cases
3927e4b17023SJohn Marino                }  */
3928e4b17023SJohn Marino 
3929e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
3930e4b17023SJohn Marino             fprintf (vect_dump, "Reduce using scalar code. ");
3931e4b17023SJohn Marino 
3932e4b17023SJohn Marino           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3933e4b17023SJohn Marino           FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
3934e4b17023SJohn Marino             {
3935e4b17023SJohn Marino               if (gimple_code (new_phi) == GIMPLE_PHI)
3936e4b17023SJohn Marino                 vec_temp = PHI_RESULT (new_phi);
3937e4b17023SJohn Marino               else
3938e4b17023SJohn Marino                 vec_temp = gimple_assign_lhs (new_phi);
3939e4b17023SJohn Marino               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3940e4b17023SJohn Marino                             bitsize_zero_node);
3941e4b17023SJohn Marino               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3942e4b17023SJohn Marino               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3943e4b17023SJohn Marino               gimple_assign_set_lhs (epilog_stmt, new_temp);
3944e4b17023SJohn Marino               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3945e4b17023SJohn Marino 
3946e4b17023SJohn Marino               /* In SLP we don't need to apply reduction operation, so we just
3947e4b17023SJohn Marino                  collect s' values in SCALAR_RESULTS.  */
3948e4b17023SJohn Marino               if (slp_reduc)
3949e4b17023SJohn Marino                 VEC_safe_push (tree, heap, scalar_results, new_temp);
3950e4b17023SJohn Marino 
3951e4b17023SJohn Marino               for (bit_offset = element_bitsize;
3952e4b17023SJohn Marino                    bit_offset < vec_size_in_bits;
3953e4b17023SJohn Marino                    bit_offset += element_bitsize)
3954e4b17023SJohn Marino                 {
3955e4b17023SJohn Marino                   tree bitpos = bitsize_int (bit_offset);
3956e4b17023SJohn Marino                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
3957e4b17023SJohn Marino                                      bitsize, bitpos);
3958e4b17023SJohn Marino 
3959e4b17023SJohn Marino                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3960e4b17023SJohn Marino                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
3961e4b17023SJohn Marino                   gimple_assign_set_lhs (epilog_stmt, new_name);
3962e4b17023SJohn Marino                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3963e4b17023SJohn Marino 
3964e4b17023SJohn Marino                   if (slp_reduc)
3965e4b17023SJohn Marino                     {
3966e4b17023SJohn Marino                       /* In SLP we don't need to apply reduction operation, so
3967e4b17023SJohn Marino                          we just collect s' values in SCALAR_RESULTS.  */
3968e4b17023SJohn Marino                       new_temp = new_name;
3969e4b17023SJohn Marino                       VEC_safe_push (tree, heap, scalar_results, new_name);
3970e4b17023SJohn Marino                     }
3971e4b17023SJohn Marino                   else
3972e4b17023SJohn Marino                     {
3973e4b17023SJohn Marino                       epilog_stmt = gimple_build_assign_with_ops (code,
3974e4b17023SJohn Marino                                           new_scalar_dest, new_name, new_temp);
3975e4b17023SJohn Marino                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3976e4b17023SJohn Marino                       gimple_assign_set_lhs (epilog_stmt, new_temp);
3977e4b17023SJohn Marino                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3978e4b17023SJohn Marino                     }
3979e4b17023SJohn Marino                 }
3980e4b17023SJohn Marino             }
3981e4b17023SJohn Marino 
3982e4b17023SJohn Marino           /* The only case where we need to reduce scalar results in SLP, is
3983e4b17023SJohn Marino              unrolling.  If the size of SCALAR_RESULTS is greater than
3984e4b17023SJohn Marino              GROUP_SIZE, we reduce them combining elements modulo
3985e4b17023SJohn Marino              GROUP_SIZE.  */
3986e4b17023SJohn Marino           if (slp_reduc)
3987e4b17023SJohn Marino             {
3988e4b17023SJohn Marino               tree res, first_res, new_res;
3989e4b17023SJohn Marino               gimple new_stmt;
3990e4b17023SJohn Marino 
3991e4b17023SJohn Marino               /* Reduce multiple scalar results in case of SLP unrolling.  */
3992e4b17023SJohn Marino               for (j = group_size; VEC_iterate (tree, scalar_results, j, res);
3993e4b17023SJohn Marino                    j++)
3994e4b17023SJohn Marino                 {
3995e4b17023SJohn Marino                   first_res = VEC_index (tree, scalar_results, j % group_size);
3996e4b17023SJohn Marino                   new_stmt = gimple_build_assign_with_ops (code,
3997e4b17023SJohn Marino                                               new_scalar_dest, first_res, res);
3998e4b17023SJohn Marino                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
3999e4b17023SJohn Marino                   gimple_assign_set_lhs (new_stmt, new_res);
4000e4b17023SJohn Marino                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4001e4b17023SJohn Marino                   VEC_replace (tree, scalar_results, j % group_size, new_res);
4002e4b17023SJohn Marino                 }
4003e4b17023SJohn Marino             }
4004e4b17023SJohn Marino           else
4005e4b17023SJohn Marino             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4006e4b17023SJohn Marino             VEC_safe_push (tree, heap, scalar_results, new_temp);
4007e4b17023SJohn Marino 
4008e4b17023SJohn Marino           extract_scalar_result = false;
4009e4b17023SJohn Marino         }
4010e4b17023SJohn Marino     }
4011e4b17023SJohn Marino 
4012e4b17023SJohn Marino   /* 2.4  Extract the final scalar result.  Create:
4013e4b17023SJohn Marino           s_out3 = extract_field <v_out2, bitpos>  */
4014e4b17023SJohn Marino 
4015e4b17023SJohn Marino   if (extract_scalar_result)
4016e4b17023SJohn Marino     {
4017e4b17023SJohn Marino       tree rhs;
4018e4b17023SJohn Marino 
4019e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
4020e4b17023SJohn Marino         fprintf (vect_dump, "extract scalar result");
4021e4b17023SJohn Marino 
4022e4b17023SJohn Marino       if (BYTES_BIG_ENDIAN)
4023e4b17023SJohn Marino         bitpos = size_binop (MULT_EXPR,
4024e4b17023SJohn Marino                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4025e4b17023SJohn Marino                              TYPE_SIZE (scalar_type));
4026e4b17023SJohn Marino       else
4027e4b17023SJohn Marino         bitpos = bitsize_zero_node;
4028e4b17023SJohn Marino 
4029e4b17023SJohn Marino       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4030e4b17023SJohn Marino       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4031e4b17023SJohn Marino       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4032e4b17023SJohn Marino       gimple_assign_set_lhs (epilog_stmt, new_temp);
4033e4b17023SJohn Marino       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4034e4b17023SJohn Marino       VEC_safe_push (tree, heap, scalar_results, new_temp);
4035e4b17023SJohn Marino     }
4036e4b17023SJohn Marino 
4037e4b17023SJohn Marino vect_finalize_reduction:
4038e4b17023SJohn Marino 
4039e4b17023SJohn Marino   if (double_reduc)
4040e4b17023SJohn Marino     loop = loop->inner;
4041e4b17023SJohn Marino 
4042e4b17023SJohn Marino   /* 2.5 Adjust the final result by the initial value of the reduction
4043e4b17023SJohn Marino 	 variable. (When such adjustment is not needed, then
4044e4b17023SJohn Marino 	 'adjustment_def' is zero).  For example, if code is PLUS we create:
4045e4b17023SJohn Marino 	 new_temp = loop_exit_def + adjustment_def  */
4046e4b17023SJohn Marino 
4047e4b17023SJohn Marino   if (adjustment_def)
4048e4b17023SJohn Marino     {
4049e4b17023SJohn Marino       gcc_assert (!slp_reduc);
4050e4b17023SJohn Marino       if (nested_in_vect_loop)
4051e4b17023SJohn Marino 	{
4052e4b17023SJohn Marino           new_phi = VEC_index (gimple, new_phis, 0);
4053e4b17023SJohn Marino 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4054e4b17023SJohn Marino 	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4055e4b17023SJohn Marino 	  new_dest = vect_create_destination_var (scalar_dest, vectype);
4056e4b17023SJohn Marino 	}
4057e4b17023SJohn Marino       else
4058e4b17023SJohn Marino 	{
4059e4b17023SJohn Marino           new_temp = VEC_index (tree, scalar_results, 0);
4060e4b17023SJohn Marino 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4061e4b17023SJohn Marino 	  expr = build2 (code, scalar_type, new_temp, adjustment_def);
4062e4b17023SJohn Marino 	  new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4063e4b17023SJohn Marino 	}
4064e4b17023SJohn Marino 
4065e4b17023SJohn Marino       epilog_stmt = gimple_build_assign (new_dest, expr);
4066e4b17023SJohn Marino       new_temp = make_ssa_name (new_dest, epilog_stmt);
4067e4b17023SJohn Marino       gimple_assign_set_lhs (epilog_stmt, new_temp);
4068e4b17023SJohn Marino       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4069e4b17023SJohn Marino       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4070e4b17023SJohn Marino       if (nested_in_vect_loop)
4071e4b17023SJohn Marino         {
4072e4b17023SJohn Marino           set_vinfo_for_stmt (epilog_stmt,
4073e4b17023SJohn Marino                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4074e4b17023SJohn Marino                                                  NULL));
4075e4b17023SJohn Marino           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4076e4b17023SJohn Marino                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4077e4b17023SJohn Marino 
4078e4b17023SJohn Marino           if (!double_reduc)
4079e4b17023SJohn Marino             VEC_quick_push (tree, scalar_results, new_temp);
4080e4b17023SJohn Marino           else
4081e4b17023SJohn Marino             VEC_replace (tree, scalar_results, 0, new_temp);
4082e4b17023SJohn Marino         }
4083e4b17023SJohn Marino       else
4084e4b17023SJohn Marino         VEC_replace (tree, scalar_results, 0, new_temp);
4085e4b17023SJohn Marino 
4086e4b17023SJohn Marino       VEC_replace (gimple, new_phis, 0, epilog_stmt);
4087e4b17023SJohn Marino     }
4088e4b17023SJohn Marino 
4089e4b17023SJohn Marino   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4090e4b17023SJohn Marino           phis with new adjusted scalar results, i.e., replace use <s_out0>
4091e4b17023SJohn Marino           with use <s_out4>.
4092e4b17023SJohn Marino 
4093e4b17023SJohn Marino      Transform:
4094e4b17023SJohn Marino         loop_exit:
4095e4b17023SJohn Marino           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4096e4b17023SJohn Marino           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4097e4b17023SJohn Marino           v_out2 = reduce <v_out1>
4098e4b17023SJohn Marino           s_out3 = extract_field <v_out2, 0>
4099e4b17023SJohn Marino           s_out4 = adjust_result <s_out3>
4100e4b17023SJohn Marino           use <s_out0>
4101e4b17023SJohn Marino           use <s_out0>
4102e4b17023SJohn Marino 
4103e4b17023SJohn Marino      into:
4104e4b17023SJohn Marino 
4105e4b17023SJohn Marino         loop_exit:
4106e4b17023SJohn Marino           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4107e4b17023SJohn Marino           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4108e4b17023SJohn Marino           v_out2 = reduce <v_out1>
4109e4b17023SJohn Marino           s_out3 = extract_field <v_out2, 0>
4110e4b17023SJohn Marino           s_out4 = adjust_result <s_out3>
4111e4b17023SJohn Marino           use <s_out4>
4112e4b17023SJohn Marino           use <s_out4> */
4113e4b17023SJohn Marino 
4114e4b17023SJohn Marino 
4115e4b17023SJohn Marino   /* In SLP reduction chain we reduce vector results into one vector if
4116e4b17023SJohn Marino      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4117e4b17023SJohn Marino      the last stmt in the reduction chain, since we are looking for the loop
4118e4b17023SJohn Marino      exit phi node.  */
4119e4b17023SJohn Marino   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4120e4b17023SJohn Marino     {
4121e4b17023SJohn Marino       scalar_dest = gimple_assign_lhs (VEC_index (gimple,
4122e4b17023SJohn Marino                                        SLP_TREE_SCALAR_STMTS (slp_node),
4123e4b17023SJohn Marino                                        group_size - 1));
4124e4b17023SJohn Marino       group_size = 1;
4125e4b17023SJohn Marino     }
4126e4b17023SJohn Marino 
4127e4b17023SJohn Marino   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4128e4b17023SJohn Marino      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4129e4b17023SJohn Marino      need to match SCALAR_RESULTS with corresponding statements.  The first
4130e4b17023SJohn Marino      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4131e4b17023SJohn Marino      the first vector stmt, etc.
4132e4b17023SJohn Marino      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4133e4b17023SJohn Marino   if (group_size > VEC_length (gimple, new_phis))
4134e4b17023SJohn Marino     {
4135e4b17023SJohn Marino       ratio = group_size / VEC_length (gimple, new_phis);
4136e4b17023SJohn Marino       gcc_assert (!(group_size % VEC_length (gimple, new_phis)));
4137e4b17023SJohn Marino     }
4138e4b17023SJohn Marino   else
4139e4b17023SJohn Marino     ratio = 1;
4140e4b17023SJohn Marino 
4141e4b17023SJohn Marino   for (k = 0; k < group_size; k++)
4142e4b17023SJohn Marino     {
4143e4b17023SJohn Marino       if (k % ratio == 0)
4144e4b17023SJohn Marino         {
4145e4b17023SJohn Marino           epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
4146e4b17023SJohn Marino           reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
4147e4b17023SJohn Marino 	  if (double_reduc)
4148e4b17023SJohn Marino 	    inner_phi = VEC_index (gimple, inner_phis, k / ratio);
4149e4b17023SJohn Marino         }
4150e4b17023SJohn Marino 
4151e4b17023SJohn Marino       if (slp_reduc)
4152e4b17023SJohn Marino         {
4153e4b17023SJohn Marino           gimple current_stmt = VEC_index (gimple,
4154e4b17023SJohn Marino                                        SLP_TREE_SCALAR_STMTS (slp_node), k);
4155e4b17023SJohn Marino 
4156e4b17023SJohn Marino           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4157e4b17023SJohn Marino           /* SLP statements can't participate in patterns.  */
4158e4b17023SJohn Marino           gcc_assert (!orig_stmt);
4159e4b17023SJohn Marino           scalar_dest = gimple_assign_lhs (current_stmt);
4160e4b17023SJohn Marino         }
4161e4b17023SJohn Marino 
4162e4b17023SJohn Marino       phis = VEC_alloc (gimple, heap, 3);
4163e4b17023SJohn Marino       /* Find the loop-closed-use at the loop exit of the original scalar
4164e4b17023SJohn Marino          result.  (The reduction result is expected to have two immediate uses -
4165e4b17023SJohn Marino          one at the latch block, and one at the loop exit).  */
4166e4b17023SJohn Marino       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4167*95d28233SJohn Marino         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4168*95d28233SJohn Marino 	    && !is_gimple_debug (USE_STMT (use_p)))
4169e4b17023SJohn Marino           VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4170e4b17023SJohn Marino 
4171e4b17023SJohn Marino       /* We expect to have found an exit_phi because of loop-closed-ssa
4172e4b17023SJohn Marino          form.  */
4173e4b17023SJohn Marino       gcc_assert (!VEC_empty (gimple, phis));
4174e4b17023SJohn Marino 
4175e4b17023SJohn Marino       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4176e4b17023SJohn Marino         {
4177e4b17023SJohn Marino           if (outer_loop)
4178e4b17023SJohn Marino             {
4179e4b17023SJohn Marino               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4180e4b17023SJohn Marino               gimple vect_phi;
4181e4b17023SJohn Marino 
4182e4b17023SJohn Marino               /* FORNOW. Currently not supporting the case that an inner-loop
4183e4b17023SJohn Marino                  reduction is not used in the outer-loop (but only outside the
4184e4b17023SJohn Marino                  outer-loop), unless it is double reduction.  */
4185e4b17023SJohn Marino               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4186e4b17023SJohn Marino                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4187e4b17023SJohn Marino                           || double_reduc);
4188e4b17023SJohn Marino 
4189e4b17023SJohn Marino               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4190e4b17023SJohn Marino               if (!double_reduc
4191e4b17023SJohn Marino                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4192e4b17023SJohn Marino                       != vect_double_reduction_def)
4193e4b17023SJohn Marino                 continue;
4194e4b17023SJohn Marino 
4195e4b17023SJohn Marino               /* Handle double reduction:
4196e4b17023SJohn Marino 
4197e4b17023SJohn Marino                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4198e4b17023SJohn Marino                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4199e4b17023SJohn Marino                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4200e4b17023SJohn Marino                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4201e4b17023SJohn Marino 
4202e4b17023SJohn Marino                  At that point the regular reduction (stmt2 and stmt3) is
4203e4b17023SJohn Marino                  already vectorized, as well as the exit phi node, stmt4.
4204e4b17023SJohn Marino                  Here we vectorize the phi node of double reduction, stmt1, and
4205e4b17023SJohn Marino                  update all relevant statements.  */
4206e4b17023SJohn Marino 
4207e4b17023SJohn Marino               /* Go through all the uses of s2 to find double reduction phi
4208e4b17023SJohn Marino                  node, i.e., stmt1 above.  */
4209e4b17023SJohn Marino               orig_name = PHI_RESULT (exit_phi);
4210e4b17023SJohn Marino               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4211e4b17023SJohn Marino                 {
4212e4b17023SJohn Marino                   stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4213e4b17023SJohn Marino                   stmt_vec_info new_phi_vinfo;
4214e4b17023SJohn Marino                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4215e4b17023SJohn Marino                   basic_block bb = gimple_bb (use_stmt);
4216e4b17023SJohn Marino                   gimple use;
4217e4b17023SJohn Marino 
4218e4b17023SJohn Marino                   /* Check that USE_STMT is really double reduction phi
4219e4b17023SJohn Marino                      node.  */
4220e4b17023SJohn Marino                   if (gimple_code (use_stmt) != GIMPLE_PHI
4221e4b17023SJohn Marino                       || gimple_phi_num_args (use_stmt) != 2
4222e4b17023SJohn Marino                       || !use_stmt_vinfo
4223e4b17023SJohn Marino                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4224e4b17023SJohn Marino                           != vect_double_reduction_def
4225e4b17023SJohn Marino                       || bb->loop_father != outer_loop)
4226e4b17023SJohn Marino                     continue;
4227e4b17023SJohn Marino 
4228e4b17023SJohn Marino                   /* Create vector phi node for double reduction:
4229e4b17023SJohn Marino                      vs1 = phi <vs0, vs2>
4230e4b17023SJohn Marino                      vs1 was created previously in this function by a call to
4231e4b17023SJohn Marino                        vect_get_vec_def_for_operand and is stored in
4232e4b17023SJohn Marino                        vec_initial_def;
4233e4b17023SJohn Marino                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4234e4b17023SJohn Marino                      vs0 is created here.  */
4235e4b17023SJohn Marino 
4236e4b17023SJohn Marino                   /* Create vector phi node.  */
4237e4b17023SJohn Marino                   vect_phi = create_phi_node (vec_initial_def, bb);
4238e4b17023SJohn Marino                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4239e4b17023SJohn Marino                                     loop_vec_info_for_loop (outer_loop), NULL);
4240e4b17023SJohn Marino                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4241e4b17023SJohn Marino 
4242e4b17023SJohn Marino                   /* Create vs0 - initial def of the double reduction phi.  */
4243e4b17023SJohn Marino                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4244e4b17023SJohn Marino                                              loop_preheader_edge (outer_loop));
4245e4b17023SJohn Marino                   init_def = get_initial_def_for_reduction (stmt,
4246e4b17023SJohn Marino                                                           preheader_arg, NULL);
4247e4b17023SJohn Marino                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4248e4b17023SJohn Marino                                                     vectype, NULL);
4249e4b17023SJohn Marino 
4250e4b17023SJohn Marino                   /* Update phi node arguments with vs0 and vs2.  */
4251e4b17023SJohn Marino                   add_phi_arg (vect_phi, vect_phi_init,
4252e4b17023SJohn Marino                                loop_preheader_edge (outer_loop),
4253e4b17023SJohn Marino                                UNKNOWN_LOCATION);
4254e4b17023SJohn Marino                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4255e4b17023SJohn Marino                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4256e4b17023SJohn Marino                   if (vect_print_dump_info (REPORT_DETAILS))
4257e4b17023SJohn Marino                     {
4258e4b17023SJohn Marino                       fprintf (vect_dump, "created double reduction phi "
4259e4b17023SJohn Marino                                           "node: ");
4260e4b17023SJohn Marino                       print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
4261e4b17023SJohn Marino                     }
4262e4b17023SJohn Marino 
4263e4b17023SJohn Marino                   vect_phi_res = PHI_RESULT (vect_phi);
4264e4b17023SJohn Marino 
4265e4b17023SJohn Marino                   /* Replace the use, i.e., set the correct vs1 in the regular
4266e4b17023SJohn Marino                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4267e4b17023SJohn Marino                      loop is redundant.  */
4268e4b17023SJohn Marino                   use = reduction_phi;
4269e4b17023SJohn Marino                   for (j = 0; j < ncopies; j++)
4270e4b17023SJohn Marino                     {
4271e4b17023SJohn Marino                       edge pr_edge = loop_preheader_edge (loop);
4272e4b17023SJohn Marino                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4273e4b17023SJohn Marino                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4274e4b17023SJohn Marino                     }
4275e4b17023SJohn Marino                 }
4276e4b17023SJohn Marino             }
4277e4b17023SJohn Marino         }
4278e4b17023SJohn Marino 
4279e4b17023SJohn Marino       VEC_free (gimple, heap, phis);
4280e4b17023SJohn Marino       if (nested_in_vect_loop)
4281e4b17023SJohn Marino         {
4282e4b17023SJohn Marino           if (double_reduc)
4283e4b17023SJohn Marino             loop = outer_loop;
4284e4b17023SJohn Marino           else
4285e4b17023SJohn Marino             continue;
4286e4b17023SJohn Marino         }
4287e4b17023SJohn Marino 
4288e4b17023SJohn Marino       phis = VEC_alloc (gimple, heap, 3);
4289e4b17023SJohn Marino       /* Find the loop-closed-use at the loop exit of the original scalar
4290e4b17023SJohn Marino          result.  (The reduction result is expected to have two immediate uses,
4291e4b17023SJohn Marino          one at the latch block, and one at the loop exit).  For double
4292e4b17023SJohn Marino          reductions we are looking for exit phis of the outer loop.  */
4293e4b17023SJohn Marino       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4294e4b17023SJohn Marino         {
4295e4b17023SJohn Marino           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4296*95d28233SJohn Marino 	    {
4297*95d28233SJohn Marino 	      if (!is_gimple_debug (USE_STMT (use_p)))
4298e4b17023SJohn Marino 		VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4299*95d28233SJohn Marino 	    }
4300e4b17023SJohn Marino           else
4301e4b17023SJohn Marino             {
4302e4b17023SJohn Marino               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4303e4b17023SJohn Marino                 {
4304e4b17023SJohn Marino                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4305e4b17023SJohn Marino 
4306e4b17023SJohn Marino                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4307e4b17023SJohn Marino                     {
4308e4b17023SJohn Marino                       if (!flow_bb_inside_loop_p (loop,
4309*95d28233SJohn Marino                                              gimple_bb (USE_STMT (phi_use_p)))
4310*95d28233SJohn Marino 			  && !is_gimple_debug (USE_STMT (phi_use_p)))
4311e4b17023SJohn Marino                         VEC_safe_push (gimple, heap, phis,
4312e4b17023SJohn Marino                                        USE_STMT (phi_use_p));
4313e4b17023SJohn Marino                     }
4314e4b17023SJohn Marino                 }
4315e4b17023SJohn Marino             }
4316e4b17023SJohn Marino         }
4317e4b17023SJohn Marino 
4318e4b17023SJohn Marino       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4319e4b17023SJohn Marino         {
4320e4b17023SJohn Marino           /* Replace the uses:  */
4321e4b17023SJohn Marino           orig_name = PHI_RESULT (exit_phi);
4322e4b17023SJohn Marino           scalar_result = VEC_index (tree, scalar_results, k);
4323e4b17023SJohn Marino           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4324e4b17023SJohn Marino             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4325e4b17023SJohn Marino               SET_USE (use_p, scalar_result);
4326e4b17023SJohn Marino         }
4327e4b17023SJohn Marino 
4328e4b17023SJohn Marino       VEC_free (gimple, heap, phis);
4329e4b17023SJohn Marino     }
4330e4b17023SJohn Marino 
4331e4b17023SJohn Marino   VEC_free (tree, heap, scalar_results);
4332e4b17023SJohn Marino   VEC_free (gimple, heap, new_phis);
4333e4b17023SJohn Marino }
4334e4b17023SJohn Marino 
4335e4b17023SJohn Marino 
4336e4b17023SJohn Marino /* Function vectorizable_reduction.
4337e4b17023SJohn Marino 
4338e4b17023SJohn Marino    Check if STMT performs a reduction operation that can be vectorized.
4339e4b17023SJohn Marino    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4340e4b17023SJohn Marino    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4341e4b17023SJohn Marino    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4342e4b17023SJohn Marino 
4343e4b17023SJohn Marino    This function also handles reduction idioms (patterns) that have been
4344e4b17023SJohn Marino    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4345e4b17023SJohn Marino    of this form:
4346e4b17023SJohn Marino      X = pattern_expr (arg0, arg1, ..., X)
4347e4b17023SJohn Marino    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4348e4b17023SJohn Marino    sequence that had been detected and replaced by the pattern-stmt (STMT).
4349e4b17023SJohn Marino 
4350e4b17023SJohn Marino    In some cases of reduction patterns, the type of the reduction variable X is
4351e4b17023SJohn Marino    different than the type of the other arguments of STMT.
4352e4b17023SJohn Marino    In such cases, the vectype that is used when transforming STMT into a vector
4353e4b17023SJohn Marino    stmt is different than the vectype that is used to determine the
4354e4b17023SJohn Marino    vectorization factor, because it consists of a different number of elements
4355e4b17023SJohn Marino    than the actual number of elements that are being operated upon in parallel.
4356e4b17023SJohn Marino 
4357e4b17023SJohn Marino    For example, consider an accumulation of shorts into an int accumulator.
4358e4b17023SJohn Marino    On some targets it's possible to vectorize this pattern operating on 8
4359e4b17023SJohn Marino    shorts at a time (hence, the vectype for purposes of determining the
4360e4b17023SJohn Marino    vectorization factor should be V8HI); on the other hand, the vectype that
4361e4b17023SJohn Marino    is used to create the vector form is actually V4SI (the type of the result).
4362e4b17023SJohn Marino 
4363e4b17023SJohn Marino    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4364e4b17023SJohn Marino    indicates what is the actual level of parallelism (V8HI in the example), so
4365e4b17023SJohn Marino    that the right vectorization factor would be derived.  This vectype
4366e4b17023SJohn Marino    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4367e4b17023SJohn Marino    be used to create the vectorized stmt.  The right vectype for the vectorized
4368e4b17023SJohn Marino    stmt is obtained from the type of the result X:
4369e4b17023SJohn Marino         get_vectype_for_scalar_type (TREE_TYPE (X))
4370e4b17023SJohn Marino 
4371e4b17023SJohn Marino    This means that, contrary to "regular" reductions (or "regular" stmts in
4372e4b17023SJohn Marino    general), the following equation:
4373e4b17023SJohn Marino       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4374e4b17023SJohn Marino    does *NOT* necessarily hold for reduction patterns.  */
4375e4b17023SJohn Marino 
4376e4b17023SJohn Marino bool
vectorizable_reduction(gimple stmt,gimple_stmt_iterator * gsi,gimple * vec_stmt,slp_tree slp_node)4377e4b17023SJohn Marino vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4378e4b17023SJohn Marino 			gimple *vec_stmt, slp_tree slp_node)
4379e4b17023SJohn Marino {
4380e4b17023SJohn Marino   tree vec_dest;
4381e4b17023SJohn Marino   tree scalar_dest;
4382e4b17023SJohn Marino   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4383e4b17023SJohn Marino   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4384e4b17023SJohn Marino   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4385e4b17023SJohn Marino   tree vectype_in = NULL_TREE;
4386e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4387e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4388e4b17023SJohn Marino   enum tree_code code, orig_code, epilog_reduc_code;
4389e4b17023SJohn Marino   enum machine_mode vec_mode;
4390e4b17023SJohn Marino   int op_type;
4391e4b17023SJohn Marino   optab optab, reduc_optab;
4392e4b17023SJohn Marino   tree new_temp = NULL_TREE;
4393e4b17023SJohn Marino   tree def;
4394e4b17023SJohn Marino   gimple def_stmt;
4395e4b17023SJohn Marino   enum vect_def_type dt;
4396e4b17023SJohn Marino   gimple new_phi = NULL;
4397e4b17023SJohn Marino   tree scalar_type;
4398e4b17023SJohn Marino   bool is_simple_use;
4399e4b17023SJohn Marino   gimple orig_stmt;
4400e4b17023SJohn Marino   stmt_vec_info orig_stmt_info;
4401e4b17023SJohn Marino   tree expr = NULL_TREE;
4402e4b17023SJohn Marino   int i;
4403e4b17023SJohn Marino   int ncopies;
4404e4b17023SJohn Marino   int epilog_copies;
4405e4b17023SJohn Marino   stmt_vec_info prev_stmt_info, prev_phi_info;
4406e4b17023SJohn Marino   bool single_defuse_cycle = false;
4407e4b17023SJohn Marino   tree reduc_def = NULL_TREE;
4408e4b17023SJohn Marino   gimple new_stmt = NULL;
4409e4b17023SJohn Marino   int j;
4410e4b17023SJohn Marino   tree ops[3];
4411e4b17023SJohn Marino   bool nested_cycle = false, found_nested_cycle_def = false;
4412e4b17023SJohn Marino   gimple reduc_def_stmt = NULL;
4413e4b17023SJohn Marino   /* The default is that the reduction variable is the last in statement.  */
4414e4b17023SJohn Marino   int reduc_index = 2;
4415e4b17023SJohn Marino   bool double_reduc = false, dummy;
4416e4b17023SJohn Marino   basic_block def_bb;
4417e4b17023SJohn Marino   struct loop * def_stmt_loop, *outer_loop = NULL;
4418e4b17023SJohn Marino   tree def_arg;
4419e4b17023SJohn Marino   gimple def_arg_stmt;
4420e4b17023SJohn Marino   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
4421e4b17023SJohn Marino   VEC (gimple, heap) *phis = NULL;
4422e4b17023SJohn Marino   int vec_num;
4423e4b17023SJohn Marino   tree def0, def1, tem, op0, op1 = NULL_TREE;
4424e4b17023SJohn Marino 
4425e4b17023SJohn Marino   /* In case of reduction chain we switch to the first stmt in the chain, but
4426e4b17023SJohn Marino      we don't update STMT_INFO, since only the last stmt is marked as reduction
4427e4b17023SJohn Marino      and has reduction properties.  */
4428e4b17023SJohn Marino   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4429e4b17023SJohn Marino     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4430e4b17023SJohn Marino 
4431e4b17023SJohn Marino   if (nested_in_vect_loop_p (loop, stmt))
4432e4b17023SJohn Marino     {
4433e4b17023SJohn Marino       outer_loop = loop;
4434e4b17023SJohn Marino       loop = loop->inner;
4435e4b17023SJohn Marino       nested_cycle = true;
4436e4b17023SJohn Marino     }
4437e4b17023SJohn Marino 
4438e4b17023SJohn Marino   /* 1. Is vectorizable reduction?  */
4439e4b17023SJohn Marino   /* Not supportable if the reduction variable is used in the loop, unless
4440e4b17023SJohn Marino      it's a reduction chain.  */
4441e4b17023SJohn Marino   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4442e4b17023SJohn Marino       && !GROUP_FIRST_ELEMENT (stmt_info))
4443e4b17023SJohn Marino     return false;
4444e4b17023SJohn Marino 
4445e4b17023SJohn Marino   /* Reductions that are not used even in an enclosing outer-loop,
4446e4b17023SJohn Marino      are expected to be "live" (used out of the loop).  */
4447e4b17023SJohn Marino   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4448e4b17023SJohn Marino       && !STMT_VINFO_LIVE_P (stmt_info))
4449e4b17023SJohn Marino     return false;
4450e4b17023SJohn Marino 
4451e4b17023SJohn Marino   /* Make sure it was already recognized as a reduction computation.  */
4452e4b17023SJohn Marino   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4453e4b17023SJohn Marino       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4454e4b17023SJohn Marino     return false;
4455e4b17023SJohn Marino 
4456e4b17023SJohn Marino   /* 2. Has this been recognized as a reduction pattern?
4457e4b17023SJohn Marino 
4458e4b17023SJohn Marino      Check if STMT represents a pattern that has been recognized
4459e4b17023SJohn Marino      in earlier analysis stages.  For stmts that represent a pattern,
4460e4b17023SJohn Marino      the STMT_VINFO_RELATED_STMT field records the last stmt in
4461e4b17023SJohn Marino      the original sequence that constitutes the pattern.  */
4462e4b17023SJohn Marino 
4463e4b17023SJohn Marino   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4464e4b17023SJohn Marino   if (orig_stmt)
4465e4b17023SJohn Marino     {
4466e4b17023SJohn Marino       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4467e4b17023SJohn Marino       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4468e4b17023SJohn Marino       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4469e4b17023SJohn Marino     }
4470e4b17023SJohn Marino 
4471e4b17023SJohn Marino   /* 3. Check the operands of the operation.  The first operands are defined
4472e4b17023SJohn Marino         inside the loop body. The last operand is the reduction variable,
4473e4b17023SJohn Marino         which is defined by the loop-header-phi.  */
4474e4b17023SJohn Marino 
4475e4b17023SJohn Marino   gcc_assert (is_gimple_assign (stmt));
4476e4b17023SJohn Marino 
4477e4b17023SJohn Marino   /* Flatten RHS.  */
4478e4b17023SJohn Marino   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4479e4b17023SJohn Marino     {
4480e4b17023SJohn Marino     case GIMPLE_SINGLE_RHS:
4481e4b17023SJohn Marino       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4482e4b17023SJohn Marino       if (op_type == ternary_op)
4483e4b17023SJohn Marino 	{
4484e4b17023SJohn Marino 	  tree rhs = gimple_assign_rhs1 (stmt);
4485e4b17023SJohn Marino 	  ops[0] = TREE_OPERAND (rhs, 0);
4486e4b17023SJohn Marino 	  ops[1] = TREE_OPERAND (rhs, 1);
4487e4b17023SJohn Marino 	  ops[2] = TREE_OPERAND (rhs, 2);
4488e4b17023SJohn Marino 	  code = TREE_CODE (rhs);
4489e4b17023SJohn Marino 	}
4490e4b17023SJohn Marino       else
4491e4b17023SJohn Marino 	return false;
4492e4b17023SJohn Marino       break;
4493e4b17023SJohn Marino 
4494e4b17023SJohn Marino     case GIMPLE_BINARY_RHS:
4495e4b17023SJohn Marino       code = gimple_assign_rhs_code (stmt);
4496e4b17023SJohn Marino       op_type = TREE_CODE_LENGTH (code);
4497e4b17023SJohn Marino       gcc_assert (op_type == binary_op);
4498e4b17023SJohn Marino       ops[0] = gimple_assign_rhs1 (stmt);
4499e4b17023SJohn Marino       ops[1] = gimple_assign_rhs2 (stmt);
4500e4b17023SJohn Marino       break;
4501e4b17023SJohn Marino 
4502e4b17023SJohn Marino     case GIMPLE_TERNARY_RHS:
4503e4b17023SJohn Marino       code = gimple_assign_rhs_code (stmt);
4504e4b17023SJohn Marino       op_type = TREE_CODE_LENGTH (code);
4505e4b17023SJohn Marino       gcc_assert (op_type == ternary_op);
4506e4b17023SJohn Marino       ops[0] = gimple_assign_rhs1 (stmt);
4507e4b17023SJohn Marino       ops[1] = gimple_assign_rhs2 (stmt);
4508e4b17023SJohn Marino       ops[2] = gimple_assign_rhs3 (stmt);
4509e4b17023SJohn Marino       break;
4510e4b17023SJohn Marino 
4511e4b17023SJohn Marino     case GIMPLE_UNARY_RHS:
4512e4b17023SJohn Marino       return false;
4513e4b17023SJohn Marino 
4514e4b17023SJohn Marino     default:
4515e4b17023SJohn Marino       gcc_unreachable ();
4516e4b17023SJohn Marino     }
4517e4b17023SJohn Marino 
4518e4b17023SJohn Marino   if (code == COND_EXPR && slp_node)
4519e4b17023SJohn Marino     return false;
4520e4b17023SJohn Marino 
4521e4b17023SJohn Marino   scalar_dest = gimple_assign_lhs (stmt);
4522e4b17023SJohn Marino   scalar_type = TREE_TYPE (scalar_dest);
4523e4b17023SJohn Marino   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4524e4b17023SJohn Marino       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4525e4b17023SJohn Marino     return false;
4526e4b17023SJohn Marino 
4527e4b17023SJohn Marino   /* Do not try to vectorize bit-precision reductions.  */
4528e4b17023SJohn Marino   if ((TYPE_PRECISION (scalar_type)
4529e4b17023SJohn Marino        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4530e4b17023SJohn Marino     return false;
4531e4b17023SJohn Marino 
4532e4b17023SJohn Marino   /* All uses but the last are expected to be defined in the loop.
4533e4b17023SJohn Marino      The last use is the reduction variable.  In case of nested cycle this
4534e4b17023SJohn Marino      assumption is not true: we use reduc_index to record the index of the
4535e4b17023SJohn Marino      reduction variable.  */
4536e4b17023SJohn Marino   for (i = 0; i < op_type - 1; i++)
4537e4b17023SJohn Marino     {
4538e4b17023SJohn Marino       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4539e4b17023SJohn Marino       if (i == 0 && code == COND_EXPR)
4540e4b17023SJohn Marino         continue;
4541e4b17023SJohn Marino 
4542e4b17023SJohn Marino       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4543e4b17023SJohn Marino 					    &def_stmt, &def, &dt, &tem);
4544e4b17023SJohn Marino       if (!vectype_in)
4545e4b17023SJohn Marino 	vectype_in = tem;
4546e4b17023SJohn Marino       gcc_assert (is_simple_use);
4547e4b17023SJohn Marino 
4548e4b17023SJohn Marino       if (dt != vect_internal_def
4549e4b17023SJohn Marino 	  && dt != vect_external_def
4550e4b17023SJohn Marino 	  && dt != vect_constant_def
4551e4b17023SJohn Marino 	  && dt != vect_induction_def
4552e4b17023SJohn Marino           && !(dt == vect_nested_cycle && nested_cycle))
4553e4b17023SJohn Marino 	return false;
4554e4b17023SJohn Marino 
4555e4b17023SJohn Marino       if (dt == vect_nested_cycle)
4556e4b17023SJohn Marino         {
4557e4b17023SJohn Marino           found_nested_cycle_def = true;
4558e4b17023SJohn Marino           reduc_def_stmt = def_stmt;
4559e4b17023SJohn Marino           reduc_index = i;
4560e4b17023SJohn Marino         }
4561e4b17023SJohn Marino     }
4562e4b17023SJohn Marino 
4563e4b17023SJohn Marino   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4564e4b17023SJohn Marino 					&def_stmt, &def, &dt, &tem);
4565e4b17023SJohn Marino   if (!vectype_in)
4566e4b17023SJohn Marino     vectype_in = tem;
4567e4b17023SJohn Marino   gcc_assert (is_simple_use);
45685ce9237cSJohn Marino   if (!(dt == vect_reduction_def
4569e4b17023SJohn Marino 	|| dt == vect_nested_cycle
4570e4b17023SJohn Marino 	|| ((dt == vect_internal_def || dt == vect_external_def
4571e4b17023SJohn Marino 	     || dt == vect_constant_def || dt == vect_induction_def)
45725ce9237cSJohn Marino 	    && nested_cycle && found_nested_cycle_def)))
45735ce9237cSJohn Marino     {
45745ce9237cSJohn Marino       /* For pattern recognized stmts, orig_stmt might be a reduction,
45755ce9237cSJohn Marino 	 but some helper statements for the pattern might not, or
45765ce9237cSJohn Marino 	 might be COND_EXPRs with reduction uses in the condition.  */
45775ce9237cSJohn Marino       gcc_assert (orig_stmt);
45785ce9237cSJohn Marino       return false;
45795ce9237cSJohn Marino     }
4580e4b17023SJohn Marino   if (!found_nested_cycle_def)
4581e4b17023SJohn Marino     reduc_def_stmt = def_stmt;
4582e4b17023SJohn Marino 
4583e4b17023SJohn Marino   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4584e4b17023SJohn Marino   if (orig_stmt)
4585e4b17023SJohn Marino     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4586e4b17023SJohn Marino                                                        reduc_def_stmt,
4587e4b17023SJohn Marino                                                        !nested_cycle,
4588e4b17023SJohn Marino                                                        &dummy));
4589e4b17023SJohn Marino   else
4590e4b17023SJohn Marino     {
4591e4b17023SJohn Marino       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4592e4b17023SJohn Marino                                              !nested_cycle, &dummy);
4593e4b17023SJohn Marino       /* We changed STMT to be the first stmt in reduction chain, hence we
4594e4b17023SJohn Marino          check that in this case the first element in the chain is STMT.  */
4595e4b17023SJohn Marino       gcc_assert (stmt == tmp
4596e4b17023SJohn Marino                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4597e4b17023SJohn Marino     }
4598e4b17023SJohn Marino 
4599e4b17023SJohn Marino   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4600e4b17023SJohn Marino     return false;
4601e4b17023SJohn Marino 
4602e4b17023SJohn Marino   if (slp_node || PURE_SLP_STMT (stmt_info))
4603e4b17023SJohn Marino     ncopies = 1;
4604e4b17023SJohn Marino   else
4605e4b17023SJohn Marino     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4606e4b17023SJohn Marino                / TYPE_VECTOR_SUBPARTS (vectype_in));
4607e4b17023SJohn Marino 
4608e4b17023SJohn Marino   gcc_assert (ncopies >= 1);
4609e4b17023SJohn Marino 
4610e4b17023SJohn Marino   vec_mode = TYPE_MODE (vectype_in);
4611e4b17023SJohn Marino 
4612e4b17023SJohn Marino   if (code == COND_EXPR)
4613e4b17023SJohn Marino     {
4614e4b17023SJohn Marino       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4615e4b17023SJohn Marino         {
4616e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4617e4b17023SJohn Marino             fprintf (vect_dump, "unsupported condition in reduction");
4618e4b17023SJohn Marino 
4619e4b17023SJohn Marino             return false;
4620e4b17023SJohn Marino         }
4621e4b17023SJohn Marino     }
4622e4b17023SJohn Marino   else
4623e4b17023SJohn Marino     {
4624e4b17023SJohn Marino       /* 4. Supportable by target?  */
4625e4b17023SJohn Marino 
4626e4b17023SJohn Marino       /* 4.1. check support for the operation in the loop  */
4627e4b17023SJohn Marino       optab = optab_for_tree_code (code, vectype_in, optab_default);
4628e4b17023SJohn Marino       if (!optab)
4629e4b17023SJohn Marino         {
4630e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4631e4b17023SJohn Marino             fprintf (vect_dump, "no optab.");
4632e4b17023SJohn Marino 
4633e4b17023SJohn Marino           return false;
4634e4b17023SJohn Marino         }
4635e4b17023SJohn Marino 
4636e4b17023SJohn Marino       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4637e4b17023SJohn Marino         {
4638e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4639e4b17023SJohn Marino             fprintf (vect_dump, "op not supported by target.");
4640e4b17023SJohn Marino 
4641e4b17023SJohn Marino           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4642e4b17023SJohn Marino               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4643e4b17023SJohn Marino 	          < vect_min_worthwhile_factor (code))
4644e4b17023SJohn Marino             return false;
4645e4b17023SJohn Marino 
4646e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4647e4b17023SJohn Marino   	    fprintf (vect_dump, "proceeding using word mode.");
4648e4b17023SJohn Marino         }
4649e4b17023SJohn Marino 
4650e4b17023SJohn Marino       /* Worthwhile without SIMD support?  */
4651e4b17023SJohn Marino       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4652e4b17023SJohn Marino           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4653e4b17023SJohn Marino    	     < vect_min_worthwhile_factor (code))
4654e4b17023SJohn Marino         {
4655e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4656e4b17023SJohn Marino 	    fprintf (vect_dump, "not worthwhile without SIMD support.");
4657e4b17023SJohn Marino 
4658e4b17023SJohn Marino           return false;
4659e4b17023SJohn Marino         }
4660e4b17023SJohn Marino     }
4661e4b17023SJohn Marino 
4662e4b17023SJohn Marino   /* 4.2. Check support for the epilog operation.
4663e4b17023SJohn Marino 
4664e4b17023SJohn Marino           If STMT represents a reduction pattern, then the type of the
4665e4b17023SJohn Marino           reduction variable may be different than the type of the rest
4666e4b17023SJohn Marino           of the arguments.  For example, consider the case of accumulation
4667e4b17023SJohn Marino           of shorts into an int accumulator; The original code:
4668e4b17023SJohn Marino                         S1: int_a = (int) short_a;
4669e4b17023SJohn Marino           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4670e4b17023SJohn Marino 
4671e4b17023SJohn Marino           was replaced with:
4672e4b17023SJohn Marino                         STMT: int_acc = widen_sum <short_a, int_acc>
4673e4b17023SJohn Marino 
4674e4b17023SJohn Marino           This means that:
4675e4b17023SJohn Marino           1. The tree-code that is used to create the vector operation in the
4676e4b17023SJohn Marino              epilog code (that reduces the partial results) is not the
4677e4b17023SJohn Marino              tree-code of STMT, but is rather the tree-code of the original
4678e4b17023SJohn Marino              stmt from the pattern that STMT is replacing.  I.e, in the example
4679e4b17023SJohn Marino              above we want to use 'widen_sum' in the loop, but 'plus' in the
4680e4b17023SJohn Marino              epilog.
4681e4b17023SJohn Marino           2. The type (mode) we use to check available target support
4682e4b17023SJohn Marino              for the vector operation to be created in the *epilog*, is
4683e4b17023SJohn Marino              determined by the type of the reduction variable (in the example
4684e4b17023SJohn Marino              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4685e4b17023SJohn Marino              However the type (mode) we use to check available target support
4686e4b17023SJohn Marino              for the vector operation to be created *inside the loop*, is
4687e4b17023SJohn Marino              determined by the type of the other arguments to STMT (in the
4688e4b17023SJohn Marino              example we'd check this: optab_handler (widen_sum_optab,
4689e4b17023SJohn Marino 	     vect_short_mode)).
4690e4b17023SJohn Marino 
4691e4b17023SJohn Marino           This is contrary to "regular" reductions, in which the types of all
4692e4b17023SJohn Marino           the arguments are the same as the type of the reduction variable.
4693e4b17023SJohn Marino           For "regular" reductions we can therefore use the same vector type
4694e4b17023SJohn Marino           (and also the same tree-code) when generating the epilog code and
4695e4b17023SJohn Marino           when generating the code inside the loop.  */
4696e4b17023SJohn Marino 
4697e4b17023SJohn Marino   if (orig_stmt)
4698e4b17023SJohn Marino     {
4699e4b17023SJohn Marino       /* This is a reduction pattern: get the vectype from the type of the
4700e4b17023SJohn Marino          reduction variable, and get the tree-code from orig_stmt.  */
4701e4b17023SJohn Marino       orig_code = gimple_assign_rhs_code (orig_stmt);
4702e4b17023SJohn Marino       gcc_assert (vectype_out);
4703e4b17023SJohn Marino       vec_mode = TYPE_MODE (vectype_out);
4704e4b17023SJohn Marino     }
4705e4b17023SJohn Marino   else
4706e4b17023SJohn Marino     {
4707e4b17023SJohn Marino       /* Regular reduction: use the same vectype and tree-code as used for
4708e4b17023SJohn Marino          the vector code inside the loop can be used for the epilog code. */
4709e4b17023SJohn Marino       orig_code = code;
4710e4b17023SJohn Marino     }
4711e4b17023SJohn Marino 
4712e4b17023SJohn Marino   if (nested_cycle)
4713e4b17023SJohn Marino     {
4714e4b17023SJohn Marino       def_bb = gimple_bb (reduc_def_stmt);
4715e4b17023SJohn Marino       def_stmt_loop = def_bb->loop_father;
4716e4b17023SJohn Marino       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4717e4b17023SJohn Marino                                        loop_preheader_edge (def_stmt_loop));
4718e4b17023SJohn Marino       if (TREE_CODE (def_arg) == SSA_NAME
4719e4b17023SJohn Marino           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4720e4b17023SJohn Marino           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4721e4b17023SJohn Marino           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4722e4b17023SJohn Marino           && vinfo_for_stmt (def_arg_stmt)
4723e4b17023SJohn Marino           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4724e4b17023SJohn Marino               == vect_double_reduction_def)
4725e4b17023SJohn Marino         double_reduc = true;
4726e4b17023SJohn Marino     }
4727e4b17023SJohn Marino 
4728e4b17023SJohn Marino   epilog_reduc_code = ERROR_MARK;
4729e4b17023SJohn Marino   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4730e4b17023SJohn Marino     {
4731e4b17023SJohn Marino       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4732e4b17023SJohn Marino                                          optab_default);
4733e4b17023SJohn Marino       if (!reduc_optab)
4734e4b17023SJohn Marino         {
4735e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4736e4b17023SJohn Marino             fprintf (vect_dump, "no optab for reduction.");
4737e4b17023SJohn Marino 
4738e4b17023SJohn Marino           epilog_reduc_code = ERROR_MARK;
4739e4b17023SJohn Marino         }
4740e4b17023SJohn Marino 
4741e4b17023SJohn Marino       if (reduc_optab
4742e4b17023SJohn Marino           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4743e4b17023SJohn Marino         {
4744e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4745e4b17023SJohn Marino             fprintf (vect_dump, "reduc op not supported by target.");
4746e4b17023SJohn Marino 
4747e4b17023SJohn Marino           epilog_reduc_code = ERROR_MARK;
4748e4b17023SJohn Marino         }
4749e4b17023SJohn Marino     }
4750e4b17023SJohn Marino   else
4751e4b17023SJohn Marino     {
4752e4b17023SJohn Marino       if (!nested_cycle || double_reduc)
4753e4b17023SJohn Marino         {
4754e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4755e4b17023SJohn Marino             fprintf (vect_dump, "no reduc code for scalar code.");
4756e4b17023SJohn Marino 
4757e4b17023SJohn Marino           return false;
4758e4b17023SJohn Marino         }
4759e4b17023SJohn Marino     }
4760e4b17023SJohn Marino 
4761e4b17023SJohn Marino   if (double_reduc && ncopies > 1)
4762e4b17023SJohn Marino     {
4763e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
4764e4b17023SJohn Marino         fprintf (vect_dump, "multiple types in double reduction");
4765e4b17023SJohn Marino 
4766e4b17023SJohn Marino       return false;
4767e4b17023SJohn Marino     }
4768e4b17023SJohn Marino 
4769e4b17023SJohn Marino   /* In case of widenning multiplication by a constant, we update the type
4770e4b17023SJohn Marino      of the constant to be the type of the other operand.  We check that the
4771e4b17023SJohn Marino      constant fits the type in the pattern recognition pass.  */
4772e4b17023SJohn Marino   if (code == DOT_PROD_EXPR
4773e4b17023SJohn Marino       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
4774e4b17023SJohn Marino     {
4775e4b17023SJohn Marino       if (TREE_CODE (ops[0]) == INTEGER_CST)
4776e4b17023SJohn Marino         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
4777e4b17023SJohn Marino       else if (TREE_CODE (ops[1]) == INTEGER_CST)
4778e4b17023SJohn Marino         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4779e4b17023SJohn Marino       else
4780e4b17023SJohn Marino         {
4781e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
4782e4b17023SJohn Marino             fprintf (vect_dump, "invalid types in dot-prod");
4783e4b17023SJohn Marino 
4784e4b17023SJohn Marino           return false;
4785e4b17023SJohn Marino         }
4786e4b17023SJohn Marino     }
4787e4b17023SJohn Marino 
4788e4b17023SJohn Marino   if (!vec_stmt) /* transformation not required.  */
4789e4b17023SJohn Marino     {
4790e4b17023SJohn Marino       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4791e4b17023SJohn Marino         return false;
4792e4b17023SJohn Marino       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4793e4b17023SJohn Marino       return true;
4794e4b17023SJohn Marino     }
4795e4b17023SJohn Marino 
4796e4b17023SJohn Marino   /** Transform.  **/
4797e4b17023SJohn Marino 
4798e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
4799e4b17023SJohn Marino     fprintf (vect_dump, "transform reduction.");
4800e4b17023SJohn Marino 
4801e4b17023SJohn Marino   /* FORNOW: Multiple types are not supported for condition.  */
4802e4b17023SJohn Marino   if (code == COND_EXPR)
4803e4b17023SJohn Marino     gcc_assert (ncopies == 1);
4804e4b17023SJohn Marino 
4805e4b17023SJohn Marino   /* Create the destination vector  */
4806e4b17023SJohn Marino   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4807e4b17023SJohn Marino 
4808e4b17023SJohn Marino   /* In case the vectorization factor (VF) is bigger than the number
4809e4b17023SJohn Marino      of elements that we can fit in a vectype (nunits), we have to generate
4810e4b17023SJohn Marino      more than one vector stmt - i.e - we need to "unroll" the
4811e4b17023SJohn Marino      vector stmt by a factor VF/nunits.  For more details see documentation
4812e4b17023SJohn Marino      in vectorizable_operation.  */
4813e4b17023SJohn Marino 
4814e4b17023SJohn Marino   /* If the reduction is used in an outer loop we need to generate
4815e4b17023SJohn Marino      VF intermediate results, like so (e.g. for ncopies=2):
4816e4b17023SJohn Marino 	r0 = phi (init, r0)
4817e4b17023SJohn Marino 	r1 = phi (init, r1)
4818e4b17023SJohn Marino 	r0 = x0 + r0;
4819e4b17023SJohn Marino         r1 = x1 + r1;
4820e4b17023SJohn Marino     (i.e. we generate VF results in 2 registers).
4821e4b17023SJohn Marino     In this case we have a separate def-use cycle for each copy, and therefore
4822e4b17023SJohn Marino     for each copy we get the vector def for the reduction variable from the
4823e4b17023SJohn Marino     respective phi node created for this copy.
4824e4b17023SJohn Marino 
4825e4b17023SJohn Marino     Otherwise (the reduction is unused in the loop nest), we can combine
4826e4b17023SJohn Marino     together intermediate results, like so (e.g. for ncopies=2):
4827e4b17023SJohn Marino 	r = phi (init, r)
4828e4b17023SJohn Marino 	r = x0 + r;
4829e4b17023SJohn Marino 	r = x1 + r;
4830e4b17023SJohn Marino    (i.e. we generate VF/2 results in a single register).
4831e4b17023SJohn Marino    In this case for each copy we get the vector def for the reduction variable
4832e4b17023SJohn Marino    from the vectorized reduction operation generated in the previous iteration.
4833e4b17023SJohn Marino   */
4834e4b17023SJohn Marino 
4835e4b17023SJohn Marino   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
4836e4b17023SJohn Marino     {
4837e4b17023SJohn Marino       single_defuse_cycle = true;
4838e4b17023SJohn Marino       epilog_copies = 1;
4839e4b17023SJohn Marino     }
4840e4b17023SJohn Marino   else
4841e4b17023SJohn Marino     epilog_copies = ncopies;
4842e4b17023SJohn Marino 
4843e4b17023SJohn Marino   prev_stmt_info = NULL;
4844e4b17023SJohn Marino   prev_phi_info = NULL;
4845e4b17023SJohn Marino   if (slp_node)
4846e4b17023SJohn Marino     {
4847e4b17023SJohn Marino       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4848e4b17023SJohn Marino       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
4849e4b17023SJohn Marino                   == TYPE_VECTOR_SUBPARTS (vectype_in));
4850e4b17023SJohn Marino     }
4851e4b17023SJohn Marino   else
4852e4b17023SJohn Marino     {
4853e4b17023SJohn Marino       vec_num = 1;
4854e4b17023SJohn Marino       vec_oprnds0 = VEC_alloc (tree, heap, 1);
4855e4b17023SJohn Marino       if (op_type == ternary_op)
4856e4b17023SJohn Marino         vec_oprnds1 = VEC_alloc (tree, heap, 1);
4857e4b17023SJohn Marino     }
4858e4b17023SJohn Marino 
4859e4b17023SJohn Marino   phis = VEC_alloc (gimple, heap, vec_num);
4860e4b17023SJohn Marino   vect_defs = VEC_alloc (tree, heap, vec_num);
4861e4b17023SJohn Marino   if (!slp_node)
4862e4b17023SJohn Marino     VEC_quick_push (tree, vect_defs, NULL_TREE);
4863e4b17023SJohn Marino 
4864e4b17023SJohn Marino   for (j = 0; j < ncopies; j++)
4865e4b17023SJohn Marino     {
4866e4b17023SJohn Marino       if (j == 0 || !single_defuse_cycle)
4867e4b17023SJohn Marino 	{
4868e4b17023SJohn Marino           for (i = 0; i < vec_num; i++)
4869e4b17023SJohn Marino             {
4870e4b17023SJohn Marino               /* Create the reduction-phi that defines the reduction
4871e4b17023SJohn Marino                  operand.  */
4872e4b17023SJohn Marino               new_phi = create_phi_node (vec_dest, loop->header);
4873e4b17023SJohn Marino               set_vinfo_for_stmt (new_phi,
4874e4b17023SJohn Marino                                   new_stmt_vec_info (new_phi, loop_vinfo,
4875e4b17023SJohn Marino                                                      NULL));
4876e4b17023SJohn Marino                if (j == 0 || slp_node)
4877e4b17023SJohn Marino                  VEC_quick_push (gimple, phis, new_phi);
4878e4b17023SJohn Marino             }
4879e4b17023SJohn Marino         }
4880e4b17023SJohn Marino 
4881e4b17023SJohn Marino       if (code == COND_EXPR)
4882e4b17023SJohn Marino         {
4883e4b17023SJohn Marino           gcc_assert (!slp_node);
4884e4b17023SJohn Marino           vectorizable_condition (stmt, gsi, vec_stmt,
4885e4b17023SJohn Marino                                   PHI_RESULT (VEC_index (gimple, phis, 0)),
4886e4b17023SJohn Marino                                   reduc_index, NULL);
4887e4b17023SJohn Marino           /* Multiple types are not supported for condition.  */
4888e4b17023SJohn Marino           break;
4889e4b17023SJohn Marino         }
4890e4b17023SJohn Marino 
4891e4b17023SJohn Marino       /* Handle uses.  */
4892e4b17023SJohn Marino       if (j == 0)
4893e4b17023SJohn Marino         {
4894e4b17023SJohn Marino           op0 = ops[!reduc_index];
4895e4b17023SJohn Marino           if (op_type == ternary_op)
4896e4b17023SJohn Marino             {
4897e4b17023SJohn Marino               if (reduc_index == 0)
4898e4b17023SJohn Marino                 op1 = ops[2];
4899e4b17023SJohn Marino               else
4900e4b17023SJohn Marino                 op1 = ops[1];
4901e4b17023SJohn Marino             }
4902e4b17023SJohn Marino 
4903e4b17023SJohn Marino           if (slp_node)
4904e4b17023SJohn Marino             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4905e4b17023SJohn Marino                                slp_node, -1);
4906e4b17023SJohn Marino           else
4907e4b17023SJohn Marino             {
4908e4b17023SJohn Marino               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
4909e4b17023SJohn Marino                                                             stmt, NULL);
4910e4b17023SJohn Marino               VEC_quick_push (tree, vec_oprnds0, loop_vec_def0);
4911e4b17023SJohn Marino               if (op_type == ternary_op)
4912e4b17023SJohn Marino                {
4913e4b17023SJohn Marino                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
4914e4b17023SJohn Marino                                                                NULL);
4915e4b17023SJohn Marino                  VEC_quick_push (tree, vec_oprnds1, loop_vec_def1);
4916e4b17023SJohn Marino                }
4917e4b17023SJohn Marino             }
4918e4b17023SJohn Marino         }
4919e4b17023SJohn Marino       else
4920e4b17023SJohn Marino         {
4921e4b17023SJohn Marino           if (!slp_node)
4922e4b17023SJohn Marino             {
4923e4b17023SJohn Marino               enum vect_def_type dt;
4924e4b17023SJohn Marino               gimple dummy_stmt;
4925e4b17023SJohn Marino               tree dummy;
4926e4b17023SJohn Marino 
4927e4b17023SJohn Marino               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
4928e4b17023SJohn Marino                                   &dummy_stmt, &dummy, &dt);
4929e4b17023SJohn Marino               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
4930e4b17023SJohn Marino                                                               loop_vec_def0);
4931e4b17023SJohn Marino               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
4932e4b17023SJohn Marino               if (op_type == ternary_op)
4933e4b17023SJohn Marino                 {
4934e4b17023SJohn Marino                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
4935e4b17023SJohn Marino                                       &dummy, &dt);
4936e4b17023SJohn Marino                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
4937e4b17023SJohn Marino                                                                 loop_vec_def1);
4938e4b17023SJohn Marino                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
4939e4b17023SJohn Marino                 }
4940e4b17023SJohn Marino             }
4941e4b17023SJohn Marino 
4942e4b17023SJohn Marino           if (single_defuse_cycle)
4943e4b17023SJohn Marino             reduc_def = gimple_assign_lhs (new_stmt);
4944e4b17023SJohn Marino 
4945e4b17023SJohn Marino           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
4946e4b17023SJohn Marino         }
4947e4b17023SJohn Marino 
4948e4b17023SJohn Marino       FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, def0)
4949e4b17023SJohn Marino         {
4950e4b17023SJohn Marino           if (slp_node)
4951e4b17023SJohn Marino             reduc_def = PHI_RESULT (VEC_index (gimple, phis, i));
4952e4b17023SJohn Marino           else
4953e4b17023SJohn Marino             {
4954e4b17023SJohn Marino               if (!single_defuse_cycle || j == 0)
4955e4b17023SJohn Marino                 reduc_def = PHI_RESULT (new_phi);
4956e4b17023SJohn Marino             }
4957e4b17023SJohn Marino 
4958e4b17023SJohn Marino           def1 = ((op_type == ternary_op)
4959e4b17023SJohn Marino                   ? VEC_index (tree, vec_oprnds1, i) : NULL);
4960e4b17023SJohn Marino           if (op_type == binary_op)
4961e4b17023SJohn Marino             {
4962e4b17023SJohn Marino               if (reduc_index == 0)
4963e4b17023SJohn Marino                 expr = build2 (code, vectype_out, reduc_def, def0);
4964e4b17023SJohn Marino               else
4965e4b17023SJohn Marino                 expr = build2 (code, vectype_out, def0, reduc_def);
4966e4b17023SJohn Marino             }
4967e4b17023SJohn Marino           else
4968e4b17023SJohn Marino             {
4969e4b17023SJohn Marino               if (reduc_index == 0)
4970e4b17023SJohn Marino                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
4971e4b17023SJohn Marino               else
4972e4b17023SJohn Marino                 {
4973e4b17023SJohn Marino                   if (reduc_index == 1)
4974e4b17023SJohn Marino                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
4975e4b17023SJohn Marino                   else
4976e4b17023SJohn Marino                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
4977e4b17023SJohn Marino                 }
4978e4b17023SJohn Marino             }
4979e4b17023SJohn Marino 
4980e4b17023SJohn Marino           new_stmt = gimple_build_assign (vec_dest, expr);
4981e4b17023SJohn Marino           new_temp = make_ssa_name (vec_dest, new_stmt);
4982e4b17023SJohn Marino           gimple_assign_set_lhs (new_stmt, new_temp);
4983e4b17023SJohn Marino           vect_finish_stmt_generation (stmt, new_stmt, gsi);
4984e4b17023SJohn Marino 
4985e4b17023SJohn Marino           if (slp_node)
4986e4b17023SJohn Marino             {
4987e4b17023SJohn Marino               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4988e4b17023SJohn Marino               VEC_quick_push (tree, vect_defs, new_temp);
4989e4b17023SJohn Marino             }
4990e4b17023SJohn Marino           else
4991e4b17023SJohn Marino             VEC_replace (tree, vect_defs, 0, new_temp);
4992e4b17023SJohn Marino         }
4993e4b17023SJohn Marino 
4994e4b17023SJohn Marino       if (slp_node)
4995e4b17023SJohn Marino         continue;
4996e4b17023SJohn Marino 
4997e4b17023SJohn Marino       if (j == 0)
4998e4b17023SJohn Marino 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4999e4b17023SJohn Marino       else
5000e4b17023SJohn Marino 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5001e4b17023SJohn Marino 
5002e4b17023SJohn Marino       prev_stmt_info = vinfo_for_stmt (new_stmt);
5003e4b17023SJohn Marino       prev_phi_info = vinfo_for_stmt (new_phi);
5004e4b17023SJohn Marino     }
5005e4b17023SJohn Marino 
5006e4b17023SJohn Marino   /* Finalize the reduction-phi (set its arguments) and create the
5007e4b17023SJohn Marino      epilog reduction code.  */
5008e4b17023SJohn Marino   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5009e4b17023SJohn Marino     {
5010e4b17023SJohn Marino       new_temp = gimple_assign_lhs (*vec_stmt);
5011e4b17023SJohn Marino       VEC_replace (tree, vect_defs, 0, new_temp);
5012e4b17023SJohn Marino     }
5013e4b17023SJohn Marino 
5014e4b17023SJohn Marino   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5015e4b17023SJohn Marino                                     epilog_reduc_code, phis, reduc_index,
5016e4b17023SJohn Marino                                     double_reduc, slp_node);
5017e4b17023SJohn Marino 
5018e4b17023SJohn Marino   VEC_free (gimple, heap, phis);
5019e4b17023SJohn Marino   VEC_free (tree, heap, vec_oprnds0);
5020e4b17023SJohn Marino   if (vec_oprnds1)
5021e4b17023SJohn Marino     VEC_free (tree, heap, vec_oprnds1);
5022e4b17023SJohn Marino 
5023e4b17023SJohn Marino   return true;
5024e4b17023SJohn Marino }
5025e4b17023SJohn Marino 
5026e4b17023SJohn Marino /* Function vect_min_worthwhile_factor.
5027e4b17023SJohn Marino 
5028e4b17023SJohn Marino    For a loop where we could vectorize the operation indicated by CODE,
5029e4b17023SJohn Marino    return the minimum vectorization factor that makes it worthwhile
5030e4b17023SJohn Marino    to use generic vectors.  */
5031e4b17023SJohn Marino int
vect_min_worthwhile_factor(enum tree_code code)5032e4b17023SJohn Marino vect_min_worthwhile_factor (enum tree_code code)
5033e4b17023SJohn Marino {
5034e4b17023SJohn Marino   switch (code)
5035e4b17023SJohn Marino     {
5036e4b17023SJohn Marino     case PLUS_EXPR:
5037e4b17023SJohn Marino     case MINUS_EXPR:
5038e4b17023SJohn Marino     case NEGATE_EXPR:
5039e4b17023SJohn Marino       return 4;
5040e4b17023SJohn Marino 
5041e4b17023SJohn Marino     case BIT_AND_EXPR:
5042e4b17023SJohn Marino     case BIT_IOR_EXPR:
5043e4b17023SJohn Marino     case BIT_XOR_EXPR:
5044e4b17023SJohn Marino     case BIT_NOT_EXPR:
5045e4b17023SJohn Marino       return 2;
5046e4b17023SJohn Marino 
5047e4b17023SJohn Marino     default:
5048e4b17023SJohn Marino       return INT_MAX;
5049e4b17023SJohn Marino     }
5050e4b17023SJohn Marino }
5051e4b17023SJohn Marino 
5052e4b17023SJohn Marino 
5053e4b17023SJohn Marino /* Function vectorizable_induction
5054e4b17023SJohn Marino 
5055e4b17023SJohn Marino    Check if PHI performs an induction computation that can be vectorized.
5056e4b17023SJohn Marino    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5057e4b17023SJohn Marino    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5058e4b17023SJohn Marino    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5059e4b17023SJohn Marino 
5060e4b17023SJohn Marino bool
vectorizable_induction(gimple phi,gimple_stmt_iterator * gsi ATTRIBUTE_UNUSED,gimple * vec_stmt)5061e4b17023SJohn Marino vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5062e4b17023SJohn Marino 			gimple *vec_stmt)
5063e4b17023SJohn Marino {
5064e4b17023SJohn Marino   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5065e4b17023SJohn Marino   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5066e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5067e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5068e4b17023SJohn Marino   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5069e4b17023SJohn Marino   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5070e4b17023SJohn Marino   tree vec_def;
5071e4b17023SJohn Marino 
5072e4b17023SJohn Marino   gcc_assert (ncopies >= 1);
5073e4b17023SJohn Marino   /* FORNOW. These restrictions should be relaxed.  */
5074e4b17023SJohn Marino   if (nested_in_vect_loop_p (loop, phi))
5075e4b17023SJohn Marino     {
5076e4b17023SJohn Marino       imm_use_iterator imm_iter;
5077e4b17023SJohn Marino       use_operand_p use_p;
5078e4b17023SJohn Marino       gimple exit_phi;
5079e4b17023SJohn Marino       edge latch_e;
5080e4b17023SJohn Marino       tree loop_arg;
5081e4b17023SJohn Marino 
5082e4b17023SJohn Marino       if (ncopies > 1)
5083e4b17023SJohn Marino 	{
5084e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
5085e4b17023SJohn Marino 	    fprintf (vect_dump, "multiple types in nested loop.");
5086e4b17023SJohn Marino 	  return false;
5087e4b17023SJohn Marino 	}
5088e4b17023SJohn Marino 
5089e4b17023SJohn Marino       exit_phi = NULL;
5090e4b17023SJohn Marino       latch_e = loop_latch_edge (loop->inner);
5091e4b17023SJohn Marino       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5092e4b17023SJohn Marino       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5093e4b17023SJohn Marino 	{
5094e4b17023SJohn Marino 	  if (!flow_bb_inside_loop_p (loop->inner,
5095e4b17023SJohn Marino 				      gimple_bb (USE_STMT (use_p))))
5096e4b17023SJohn Marino 	    {
5097e4b17023SJohn Marino 	      exit_phi = USE_STMT (use_p);
5098e4b17023SJohn Marino 	      break;
5099e4b17023SJohn Marino 	    }
5100e4b17023SJohn Marino 	}
5101e4b17023SJohn Marino       if (exit_phi)
5102e4b17023SJohn Marino 	{
5103e4b17023SJohn Marino 	  stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5104e4b17023SJohn Marino 	  if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5105e4b17023SJohn Marino 		&& !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5106e4b17023SJohn Marino 	    {
5107e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_DETAILS))
5108e4b17023SJohn Marino 		fprintf (vect_dump, "inner-loop induction only used outside "
5109e4b17023SJohn Marino 			 "of the outer vectorized loop.");
5110e4b17023SJohn Marino 	      return false;
5111e4b17023SJohn Marino 	    }
5112e4b17023SJohn Marino 	}
5113e4b17023SJohn Marino     }
5114e4b17023SJohn Marino 
5115e4b17023SJohn Marino   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5116e4b17023SJohn Marino     return false;
5117e4b17023SJohn Marino 
5118e4b17023SJohn Marino   /* FORNOW: SLP not supported.  */
5119e4b17023SJohn Marino   if (STMT_SLP_TYPE (stmt_info))
5120e4b17023SJohn Marino     return false;
5121e4b17023SJohn Marino 
5122e4b17023SJohn Marino   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5123e4b17023SJohn Marino 
5124e4b17023SJohn Marino   if (gimple_code (phi) != GIMPLE_PHI)
5125e4b17023SJohn Marino     return false;
5126e4b17023SJohn Marino 
5127e4b17023SJohn Marino   if (!vec_stmt) /* transformation not required.  */
5128e4b17023SJohn Marino     {
5129e4b17023SJohn Marino       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5130e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
5131e4b17023SJohn Marino         fprintf (vect_dump, "=== vectorizable_induction ===");
5132e4b17023SJohn Marino       vect_model_induction_cost (stmt_info, ncopies);
5133e4b17023SJohn Marino       return true;
5134e4b17023SJohn Marino     }
5135e4b17023SJohn Marino 
5136e4b17023SJohn Marino   /** Transform.  **/
5137e4b17023SJohn Marino 
5138e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
5139e4b17023SJohn Marino     fprintf (vect_dump, "transform induction phi.");
5140e4b17023SJohn Marino 
5141e4b17023SJohn Marino   vec_def = get_initial_def_for_induction (phi);
5142e4b17023SJohn Marino   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5143e4b17023SJohn Marino   return true;
5144e4b17023SJohn Marino }
5145e4b17023SJohn Marino 
5146e4b17023SJohn Marino /* Function vectorizable_live_operation.
5147e4b17023SJohn Marino 
5148e4b17023SJohn Marino    STMT computes a value that is used outside the loop.  Check if
5149e4b17023SJohn Marino    it can be supported.  */
5150e4b17023SJohn Marino 
5151e4b17023SJohn Marino bool
vectorizable_live_operation(gimple stmt,gimple_stmt_iterator * gsi ATTRIBUTE_UNUSED,gimple * vec_stmt ATTRIBUTE_UNUSED)5152e4b17023SJohn Marino vectorizable_live_operation (gimple stmt,
5153e4b17023SJohn Marino 			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5154e4b17023SJohn Marino 			     gimple *vec_stmt ATTRIBUTE_UNUSED)
5155e4b17023SJohn Marino {
5156e4b17023SJohn Marino   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5157e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5158e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5159e4b17023SJohn Marino   int i;
5160e4b17023SJohn Marino   int op_type;
5161e4b17023SJohn Marino   tree op;
5162e4b17023SJohn Marino   tree def;
5163e4b17023SJohn Marino   gimple def_stmt;
5164e4b17023SJohn Marino   enum vect_def_type dt;
5165e4b17023SJohn Marino   enum tree_code code;
5166e4b17023SJohn Marino   enum gimple_rhs_class rhs_class;
5167e4b17023SJohn Marino 
5168e4b17023SJohn Marino   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5169e4b17023SJohn Marino 
5170e4b17023SJohn Marino   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5171e4b17023SJohn Marino     return false;
5172e4b17023SJohn Marino 
5173e4b17023SJohn Marino   if (!is_gimple_assign (stmt))
5174e4b17023SJohn Marino     return false;
5175e4b17023SJohn Marino 
5176e4b17023SJohn Marino   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5177e4b17023SJohn Marino     return false;
5178e4b17023SJohn Marino 
5179e4b17023SJohn Marino   /* FORNOW. CHECKME. */
5180e4b17023SJohn Marino   if (nested_in_vect_loop_p (loop, stmt))
5181e4b17023SJohn Marino     return false;
5182e4b17023SJohn Marino 
5183e4b17023SJohn Marino   code = gimple_assign_rhs_code (stmt);
5184e4b17023SJohn Marino   op_type = TREE_CODE_LENGTH (code);
5185e4b17023SJohn Marino   rhs_class = get_gimple_rhs_class (code);
5186e4b17023SJohn Marino   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5187e4b17023SJohn Marino   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5188e4b17023SJohn Marino 
5189e4b17023SJohn Marino   /* FORNOW: support only if all uses are invariant.  This means
5190e4b17023SJohn Marino      that the scalar operations can remain in place, unvectorized.
5191e4b17023SJohn Marino      The original last scalar value that they compute will be used.  */
5192e4b17023SJohn Marino 
5193e4b17023SJohn Marino   for (i = 0; i < op_type; i++)
5194e4b17023SJohn Marino     {
5195e4b17023SJohn Marino       if (rhs_class == GIMPLE_SINGLE_RHS)
5196e4b17023SJohn Marino 	op = TREE_OPERAND (gimple_op (stmt, 1), i);
5197e4b17023SJohn Marino       else
5198e4b17023SJohn Marino 	op = gimple_op (stmt, i + 1);
5199e4b17023SJohn Marino       if (op
5200e4b17023SJohn Marino           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5201e4b17023SJohn Marino 				  &dt))
5202e4b17023SJohn Marino         {
5203e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
5204e4b17023SJohn Marino             fprintf (vect_dump, "use not simple.");
5205e4b17023SJohn Marino           return false;
5206e4b17023SJohn Marino         }
5207e4b17023SJohn Marino 
5208e4b17023SJohn Marino       if (dt != vect_external_def && dt != vect_constant_def)
5209e4b17023SJohn Marino         return false;
5210e4b17023SJohn Marino     }
5211e4b17023SJohn Marino 
5212e4b17023SJohn Marino   /* No transformation is required for the cases we currently support.  */
5213e4b17023SJohn Marino   return true;
5214e4b17023SJohn Marino }
5215e4b17023SJohn Marino 
5216e4b17023SJohn Marino /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5217e4b17023SJohn Marino 
5218e4b17023SJohn Marino static void
vect_loop_kill_debug_uses(struct loop * loop,gimple stmt)5219e4b17023SJohn Marino vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5220e4b17023SJohn Marino {
5221e4b17023SJohn Marino   ssa_op_iter op_iter;
5222e4b17023SJohn Marino   imm_use_iterator imm_iter;
5223e4b17023SJohn Marino   def_operand_p def_p;
5224e4b17023SJohn Marino   gimple ustmt;
5225e4b17023SJohn Marino 
5226e4b17023SJohn Marino   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5227e4b17023SJohn Marino     {
5228e4b17023SJohn Marino       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5229e4b17023SJohn Marino 	{
5230e4b17023SJohn Marino 	  basic_block bb;
5231e4b17023SJohn Marino 
5232e4b17023SJohn Marino 	  if (!is_gimple_debug (ustmt))
5233e4b17023SJohn Marino 	    continue;
5234e4b17023SJohn Marino 
5235e4b17023SJohn Marino 	  bb = gimple_bb (ustmt);
5236e4b17023SJohn Marino 
5237e4b17023SJohn Marino 	  if (!flow_bb_inside_loop_p (loop, bb))
5238e4b17023SJohn Marino 	    {
5239e4b17023SJohn Marino 	      if (gimple_debug_bind_p (ustmt))
5240e4b17023SJohn Marino 		{
5241e4b17023SJohn Marino 		  if (vect_print_dump_info (REPORT_DETAILS))
5242e4b17023SJohn Marino 		    fprintf (vect_dump, "killing debug use");
5243e4b17023SJohn Marino 
5244e4b17023SJohn Marino 		  gimple_debug_bind_reset_value (ustmt);
5245e4b17023SJohn Marino 		  update_stmt (ustmt);
5246e4b17023SJohn Marino 		}
5247e4b17023SJohn Marino 	      else
5248e4b17023SJohn Marino 		gcc_unreachable ();
5249e4b17023SJohn Marino 	    }
5250e4b17023SJohn Marino 	}
5251e4b17023SJohn Marino     }
5252e4b17023SJohn Marino }
5253e4b17023SJohn Marino 
5254e4b17023SJohn Marino /* Function vect_transform_loop.
5255e4b17023SJohn Marino 
5256e4b17023SJohn Marino    The analysis phase has determined that the loop is vectorizable.
5257e4b17023SJohn Marino    Vectorize the loop - created vectorized stmts to replace the scalar
5258e4b17023SJohn Marino    stmts in the loop, and update the loop exit condition.  */
5259e4b17023SJohn Marino 
5260e4b17023SJohn Marino void
vect_transform_loop(loop_vec_info loop_vinfo)5261e4b17023SJohn Marino vect_transform_loop (loop_vec_info loop_vinfo)
5262e4b17023SJohn Marino {
5263e4b17023SJohn Marino   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5264e4b17023SJohn Marino   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5265e4b17023SJohn Marino   int nbbs = loop->num_nodes;
5266e4b17023SJohn Marino   gimple_stmt_iterator si;
5267e4b17023SJohn Marino   int i;
5268e4b17023SJohn Marino   tree ratio = NULL;
5269e4b17023SJohn Marino   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5270e4b17023SJohn Marino   bool strided_store;
5271e4b17023SJohn Marino   bool slp_scheduled = false;
5272e4b17023SJohn Marino   unsigned int nunits;
5273e4b17023SJohn Marino   tree cond_expr = NULL_TREE;
5274e4b17023SJohn Marino   gimple_seq cond_expr_stmt_list = NULL;
5275e4b17023SJohn Marino   bool do_peeling_for_loop_bound;
5276e4b17023SJohn Marino   gimple stmt, pattern_stmt;
5277e4b17023SJohn Marino   gimple_seq pattern_def_seq = NULL;
5278e4b17023SJohn Marino   gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
5279e4b17023SJohn Marino   bool transform_pattern_stmt = false;
5280e4b17023SJohn Marino 
5281e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
5282e4b17023SJohn Marino     fprintf (vect_dump, "=== vec_transform_loop ===");
5283e4b17023SJohn Marino 
5284e4b17023SJohn Marino   /* Peel the loop if there are data refs with unknown alignment.
5285e4b17023SJohn Marino      Only one data ref with unknown store is allowed.  */
5286e4b17023SJohn Marino 
5287e4b17023SJohn Marino   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5288e4b17023SJohn Marino     vect_do_peeling_for_alignment (loop_vinfo);
5289e4b17023SJohn Marino 
5290e4b17023SJohn Marino   do_peeling_for_loop_bound
5291e4b17023SJohn Marino     = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5292e4b17023SJohn Marino        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5293e4b17023SJohn Marino 	   && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5294e4b17023SJohn Marino        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
5295e4b17023SJohn Marino 
5296e4b17023SJohn Marino   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5297e4b17023SJohn Marino       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5298e4b17023SJohn Marino     vect_loop_versioning (loop_vinfo,
5299e4b17023SJohn Marino 			  !do_peeling_for_loop_bound,
5300e4b17023SJohn Marino 			  &cond_expr, &cond_expr_stmt_list);
5301e4b17023SJohn Marino 
5302e4b17023SJohn Marino   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5303e4b17023SJohn Marino      compile time constant), or it is a constant that doesn't divide by the
5304e4b17023SJohn Marino      vectorization factor, then an epilog loop needs to be created.
5305e4b17023SJohn Marino      We therefore duplicate the loop: the original loop will be vectorized,
5306e4b17023SJohn Marino      and will compute the first (n/VF) iterations.  The second copy of the loop
5307e4b17023SJohn Marino      will remain scalar and will compute the remaining (n%VF) iterations.
5308e4b17023SJohn Marino      (VF is the vectorization factor).  */
5309e4b17023SJohn Marino 
5310e4b17023SJohn Marino   if (do_peeling_for_loop_bound)
5311e4b17023SJohn Marino     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5312e4b17023SJohn Marino 				    cond_expr, cond_expr_stmt_list);
5313e4b17023SJohn Marino   else
5314e4b17023SJohn Marino     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5315e4b17023SJohn Marino 		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5316e4b17023SJohn Marino 
5317e4b17023SJohn Marino   /* 1) Make sure the loop header has exactly two entries
5318e4b17023SJohn Marino      2) Make sure we have a preheader basic block.  */
5319e4b17023SJohn Marino 
5320e4b17023SJohn Marino   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5321e4b17023SJohn Marino 
5322e4b17023SJohn Marino   split_edge (loop_preheader_edge (loop));
5323e4b17023SJohn Marino 
5324e4b17023SJohn Marino   /* FORNOW: the vectorizer supports only loops which body consist
5325e4b17023SJohn Marino      of one basic block (header + empty latch). When the vectorizer will
5326e4b17023SJohn Marino      support more involved loop forms, the order by which the BBs are
5327e4b17023SJohn Marino      traversed need to be reconsidered.  */
5328e4b17023SJohn Marino 
5329e4b17023SJohn Marino   for (i = 0; i < nbbs; i++)
5330e4b17023SJohn Marino     {
5331e4b17023SJohn Marino       basic_block bb = bbs[i];
5332e4b17023SJohn Marino       stmt_vec_info stmt_info;
5333e4b17023SJohn Marino       gimple phi;
5334e4b17023SJohn Marino 
5335e4b17023SJohn Marino       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5336e4b17023SJohn Marino         {
5337e4b17023SJohn Marino 	  phi = gsi_stmt (si);
5338e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
5339e4b17023SJohn Marino 	    {
5340e4b17023SJohn Marino 	      fprintf (vect_dump, "------>vectorizing phi: ");
5341e4b17023SJohn Marino 	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
5342e4b17023SJohn Marino 	    }
5343e4b17023SJohn Marino 	  stmt_info = vinfo_for_stmt (phi);
5344e4b17023SJohn Marino 	  if (!stmt_info)
5345e4b17023SJohn Marino 	    continue;
5346e4b17023SJohn Marino 
5347e4b17023SJohn Marino 	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5348e4b17023SJohn Marino 	    vect_loop_kill_debug_uses (loop, phi);
5349e4b17023SJohn Marino 
5350e4b17023SJohn Marino 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5351e4b17023SJohn Marino 	      && !STMT_VINFO_LIVE_P (stmt_info))
5352e4b17023SJohn Marino 	    continue;
5353e4b17023SJohn Marino 
5354e4b17023SJohn Marino 	  if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5355e4b17023SJohn Marino 	        != (unsigned HOST_WIDE_INT) vectorization_factor)
5356e4b17023SJohn Marino 	      && vect_print_dump_info (REPORT_DETAILS))
5357e4b17023SJohn Marino 	    fprintf (vect_dump, "multiple-types.");
5358e4b17023SJohn Marino 
5359e4b17023SJohn Marino 	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5360e4b17023SJohn Marino 	    {
5361e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_DETAILS))
5362e4b17023SJohn Marino 		fprintf (vect_dump, "transform phi.");
5363e4b17023SJohn Marino 	      vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5364e4b17023SJohn Marino 	    }
5365e4b17023SJohn Marino 	}
5366e4b17023SJohn Marino 
5367e4b17023SJohn Marino       pattern_stmt = NULL;
5368e4b17023SJohn Marino       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5369e4b17023SJohn Marino 	{
5370e4b17023SJohn Marino 	  bool is_store;
5371e4b17023SJohn Marino 
5372e4b17023SJohn Marino           if (transform_pattern_stmt)
5373e4b17023SJohn Marino 	    stmt = pattern_stmt;
5374e4b17023SJohn Marino           else
5375e4b17023SJohn Marino             stmt = gsi_stmt (si);
5376e4b17023SJohn Marino 
5377e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
5378e4b17023SJohn Marino 	    {
5379e4b17023SJohn Marino 	      fprintf (vect_dump, "------>vectorizing statement: ");
5380e4b17023SJohn Marino 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5381e4b17023SJohn Marino 	    }
5382e4b17023SJohn Marino 
5383e4b17023SJohn Marino 	  stmt_info = vinfo_for_stmt (stmt);
5384e4b17023SJohn Marino 
5385e4b17023SJohn Marino 	  /* vector stmts created in the outer-loop during vectorization of
5386e4b17023SJohn Marino 	     stmts in an inner-loop may not have a stmt_info, and do not
5387e4b17023SJohn Marino 	     need to be vectorized.  */
5388e4b17023SJohn Marino 	  if (!stmt_info)
5389e4b17023SJohn Marino 	    {
5390e4b17023SJohn Marino 	      gsi_next (&si);
5391e4b17023SJohn Marino 	      continue;
5392e4b17023SJohn Marino 	    }
5393e4b17023SJohn Marino 
5394e4b17023SJohn Marino 	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5395e4b17023SJohn Marino 	    vect_loop_kill_debug_uses (loop, stmt);
5396e4b17023SJohn Marino 
5397e4b17023SJohn Marino 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5398e4b17023SJohn Marino 	      && !STMT_VINFO_LIVE_P (stmt_info))
5399e4b17023SJohn Marino             {
5400e4b17023SJohn Marino               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5401e4b17023SJohn Marino                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5402e4b17023SJohn Marino                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5403e4b17023SJohn Marino                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5404e4b17023SJohn Marino                 {
5405e4b17023SJohn Marino                   stmt = pattern_stmt;
5406e4b17023SJohn Marino                   stmt_info = vinfo_for_stmt (stmt);
5407e4b17023SJohn Marino                 }
5408e4b17023SJohn Marino               else
5409e4b17023SJohn Marino 	        {
5410e4b17023SJohn Marino    	          gsi_next (&si);
5411e4b17023SJohn Marino 	          continue;
5412e4b17023SJohn Marino                 }
5413e4b17023SJohn Marino 	    }
5414e4b17023SJohn Marino           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5415e4b17023SJohn Marino                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5416e4b17023SJohn Marino                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5417e4b17023SJohn Marino                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5418e4b17023SJohn Marino             transform_pattern_stmt = true;
5419e4b17023SJohn Marino 
5420e4b17023SJohn Marino 	  /* If pattern statement has def stmts, vectorize them too.  */
5421e4b17023SJohn Marino 	  if (is_pattern_stmt_p (stmt_info))
5422e4b17023SJohn Marino 	    {
5423e4b17023SJohn Marino 	      if (pattern_def_seq == NULL)
5424e4b17023SJohn Marino 		{
5425e4b17023SJohn Marino 		  pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5426e4b17023SJohn Marino 		  pattern_def_si = gsi_start (pattern_def_seq);
5427e4b17023SJohn Marino 		}
5428e4b17023SJohn Marino 	      else if (!gsi_end_p (pattern_def_si))
5429e4b17023SJohn Marino 		gsi_next (&pattern_def_si);
5430e4b17023SJohn Marino 	      if (pattern_def_seq != NULL)
5431e4b17023SJohn Marino 		{
5432e4b17023SJohn Marino 		  gimple pattern_def_stmt = NULL;
5433e4b17023SJohn Marino 		  stmt_vec_info pattern_def_stmt_info = NULL;
5434e4b17023SJohn Marino 
5435e4b17023SJohn Marino 		  while (!gsi_end_p (pattern_def_si))
5436e4b17023SJohn Marino 		    {
5437e4b17023SJohn Marino 		      pattern_def_stmt = gsi_stmt (pattern_def_si);
5438e4b17023SJohn Marino 		      pattern_def_stmt_info
5439e4b17023SJohn Marino 			= vinfo_for_stmt (pattern_def_stmt);
5440e4b17023SJohn Marino 		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5441e4b17023SJohn Marino 			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5442e4b17023SJohn Marino 			break;
5443e4b17023SJohn Marino 		      gsi_next (&pattern_def_si);
5444e4b17023SJohn Marino 		    }
5445e4b17023SJohn Marino 
5446e4b17023SJohn Marino 		  if (!gsi_end_p (pattern_def_si))
5447e4b17023SJohn Marino 		    {
5448e4b17023SJohn Marino 		      if (vect_print_dump_info (REPORT_DETAILS))
5449e4b17023SJohn Marino 			{
5450e4b17023SJohn Marino 			  fprintf (vect_dump, "==> vectorizing pattern def"
5451e4b17023SJohn Marino 					      " stmt: ");
5452e4b17023SJohn Marino 			  print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
5453e4b17023SJohn Marino 					     TDF_SLIM);
5454e4b17023SJohn Marino 			}
5455e4b17023SJohn Marino 
5456e4b17023SJohn Marino 		      stmt = pattern_def_stmt;
5457e4b17023SJohn Marino 		      stmt_info = pattern_def_stmt_info;
5458e4b17023SJohn Marino 		    }
5459e4b17023SJohn Marino 		  else
5460e4b17023SJohn Marino 		    {
5461e4b17023SJohn Marino 		      pattern_def_si = gsi_start (NULL);
5462e4b17023SJohn Marino 		      transform_pattern_stmt = false;
5463e4b17023SJohn Marino 		    }
5464e4b17023SJohn Marino 		}
5465e4b17023SJohn Marino 	      else
5466e4b17023SJohn Marino 		transform_pattern_stmt = false;
5467e4b17023SJohn Marino             }
5468e4b17023SJohn Marino 
5469e4b17023SJohn Marino 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5470e4b17023SJohn Marino 	  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5471e4b17023SJohn Marino                                                STMT_VINFO_VECTYPE (stmt_info));
5472e4b17023SJohn Marino 	  if (!STMT_SLP_TYPE (stmt_info)
5473e4b17023SJohn Marino 	      && nunits != (unsigned int) vectorization_factor
5474e4b17023SJohn Marino               && vect_print_dump_info (REPORT_DETAILS))
5475e4b17023SJohn Marino 	    /* For SLP VF is set according to unrolling factor, and not to
5476e4b17023SJohn Marino 	       vector size, hence for SLP this print is not valid.  */
5477e4b17023SJohn Marino             fprintf (vect_dump, "multiple-types.");
5478e4b17023SJohn Marino 
5479e4b17023SJohn Marino 	  /* SLP. Schedule all the SLP instances when the first SLP stmt is
5480e4b17023SJohn Marino 	     reached.  */
5481e4b17023SJohn Marino 	  if (STMT_SLP_TYPE (stmt_info))
5482e4b17023SJohn Marino 	    {
5483e4b17023SJohn Marino 	      if (!slp_scheduled)
5484e4b17023SJohn Marino 		{
5485e4b17023SJohn Marino 		  slp_scheduled = true;
5486e4b17023SJohn Marino 
5487e4b17023SJohn Marino 		  if (vect_print_dump_info (REPORT_DETAILS))
5488e4b17023SJohn Marino 		    fprintf (vect_dump, "=== scheduling SLP instances ===");
5489e4b17023SJohn Marino 
5490e4b17023SJohn Marino 		  vect_schedule_slp (loop_vinfo, NULL);
5491e4b17023SJohn Marino 		}
5492e4b17023SJohn Marino 
5493e4b17023SJohn Marino 	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5494e4b17023SJohn Marino 	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5495e4b17023SJohn Marino 		{
5496e4b17023SJohn Marino 		  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5497e4b17023SJohn Marino 		    {
5498e4b17023SJohn Marino 		      pattern_def_seq = NULL;
5499e4b17023SJohn Marino 		      gsi_next (&si);
5500e4b17023SJohn Marino 		    }
5501e4b17023SJohn Marino 		  continue;
5502e4b17023SJohn Marino 		}
5503e4b17023SJohn Marino 	    }
5504e4b17023SJohn Marino 
5505e4b17023SJohn Marino 	  /* -------- vectorize statement ------------ */
5506e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_DETAILS))
5507e4b17023SJohn Marino 	    fprintf (vect_dump, "transform statement.");
5508e4b17023SJohn Marino 
5509e4b17023SJohn Marino 	  strided_store = false;
5510e4b17023SJohn Marino 	  is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL, NULL);
5511e4b17023SJohn Marino           if (is_store)
5512e4b17023SJohn Marino             {
5513e4b17023SJohn Marino 	      if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5514e4b17023SJohn Marino 		{
5515e4b17023SJohn Marino 		  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5516e4b17023SJohn Marino 		     interleaving chain was completed - free all the stores in
5517e4b17023SJohn Marino 		     the chain.  */
5518e4b17023SJohn Marino 		  gsi_next (&si);
5519e4b17023SJohn Marino 		  vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5520e4b17023SJohn Marino  		  continue;
5521e4b17023SJohn Marino 		}
5522e4b17023SJohn Marino 	      else
5523e4b17023SJohn Marino 		{
5524e4b17023SJohn Marino 		  /* Free the attached stmt_vec_info and remove the stmt.  */
5525e4b17023SJohn Marino 		  free_stmt_vec_info (gsi_stmt (si));
5526e4b17023SJohn Marino 		  gsi_remove (&si, true);
5527e4b17023SJohn Marino 		  continue;
5528e4b17023SJohn Marino 		}
5529e4b17023SJohn Marino 	    }
5530e4b17023SJohn Marino 
5531e4b17023SJohn Marino 	  if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5532e4b17023SJohn Marino 	    {
5533e4b17023SJohn Marino 	      pattern_def_seq = NULL;
5534e4b17023SJohn Marino 	      gsi_next (&si);
5535e4b17023SJohn Marino 	    }
5536e4b17023SJohn Marino 	}		        /* stmts in BB */
5537e4b17023SJohn Marino     }				/* BBs in loop */
5538e4b17023SJohn Marino 
5539e4b17023SJohn Marino   slpeel_make_loop_iterate_ntimes (loop, ratio);
5540e4b17023SJohn Marino 
5541e4b17023SJohn Marino   /* The memory tags and pointers in vectorized statements need to
5542e4b17023SJohn Marino      have their SSA forms updated.  FIXME, why can't this be delayed
5543e4b17023SJohn Marino      until all the loops have been transformed?  */
5544e4b17023SJohn Marino   update_ssa (TODO_update_ssa);
5545e4b17023SJohn Marino 
5546e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5547e4b17023SJohn Marino     fprintf (vect_dump, "LOOP VECTORIZED.");
5548e4b17023SJohn Marino   if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
5549e4b17023SJohn Marino     fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
5550e4b17023SJohn Marino }
5551