xref: /dflybsd-src/contrib/gcc-4.7/gcc/tree-vect-slp.c (revision 81fc95a5293ee307c688a350a3feb4734aaddbb4)
1e4b17023SJohn Marino /* SLP - Basic Block Vectorization
2e4b17023SJohn Marino    Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3e4b17023SJohn Marino    Free Software Foundation, Inc.
4e4b17023SJohn Marino    Contributed by Dorit Naishlos <dorit@il.ibm.com>
5e4b17023SJohn Marino    and Ira Rosen <irar@il.ibm.com>
6e4b17023SJohn Marino 
7e4b17023SJohn Marino This file is part of GCC.
8e4b17023SJohn Marino 
9e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
10e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
11e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
12e4b17023SJohn Marino version.
13e4b17023SJohn Marino 
14e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
16e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17e4b17023SJohn Marino for more details.
18e4b17023SJohn Marino 
19e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20e4b17023SJohn Marino along with GCC; see the file COPYING3.  If not see
21e4b17023SJohn Marino <http://www.gnu.org/licenses/>.  */
22e4b17023SJohn Marino 
23e4b17023SJohn Marino #include "config.h"
24e4b17023SJohn Marino #include "system.h"
25e4b17023SJohn Marino #include "coretypes.h"
26e4b17023SJohn Marino #include "tm.h"
27e4b17023SJohn Marino #include "ggc.h"
28e4b17023SJohn Marino #include "tree.h"
29e4b17023SJohn Marino #include "target.h"
30e4b17023SJohn Marino #include "basic-block.h"
31e4b17023SJohn Marino #include "tree-pretty-print.h"
32e4b17023SJohn Marino #include "gimple-pretty-print.h"
33e4b17023SJohn Marino #include "tree-flow.h"
34e4b17023SJohn Marino #include "tree-dump.h"
35e4b17023SJohn Marino #include "cfgloop.h"
36e4b17023SJohn Marino #include "cfglayout.h"
37e4b17023SJohn Marino #include "expr.h"
38e4b17023SJohn Marino #include "recog.h"
39e4b17023SJohn Marino #include "optabs.h"
40e4b17023SJohn Marino #include "tree-vectorizer.h"
41e4b17023SJohn Marino #include "langhooks.h"
42e4b17023SJohn Marino 
43e4b17023SJohn Marino /* Extract the location of the basic block in the source code.
44e4b17023SJohn Marino    Return the basic block location if succeed and NULL if not.  */
45e4b17023SJohn Marino 
46e4b17023SJohn Marino LOC
find_bb_location(basic_block bb)47e4b17023SJohn Marino find_bb_location (basic_block bb)
48e4b17023SJohn Marino {
49e4b17023SJohn Marino   gimple stmt = NULL;
50e4b17023SJohn Marino   gimple_stmt_iterator si;
51e4b17023SJohn Marino 
52e4b17023SJohn Marino   if (!bb)
53e4b17023SJohn Marino     return UNKNOWN_LOC;
54e4b17023SJohn Marino 
55e4b17023SJohn Marino   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56e4b17023SJohn Marino     {
57e4b17023SJohn Marino       stmt = gsi_stmt (si);
58e4b17023SJohn Marino       if (gimple_location (stmt) != UNKNOWN_LOC)
59e4b17023SJohn Marino         return gimple_location (stmt);
60e4b17023SJohn Marino     }
61e4b17023SJohn Marino 
62e4b17023SJohn Marino   return UNKNOWN_LOC;
63e4b17023SJohn Marino }
64e4b17023SJohn Marino 
65e4b17023SJohn Marino 
66e4b17023SJohn Marino /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
67e4b17023SJohn Marino 
68e4b17023SJohn Marino static void
vect_free_slp_tree(slp_tree node)69e4b17023SJohn Marino vect_free_slp_tree (slp_tree node)
70e4b17023SJohn Marino {
71e4b17023SJohn Marino   int i;
72e4b17023SJohn Marino   slp_void_p child;
73e4b17023SJohn Marino 
74e4b17023SJohn Marino   if (!node)
75e4b17023SJohn Marino     return;
76e4b17023SJohn Marino 
77e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78e4b17023SJohn Marino     vect_free_slp_tree ((slp_tree) child);
79e4b17023SJohn Marino 
80e4b17023SJohn Marino   VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
81e4b17023SJohn Marino   VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82e4b17023SJohn Marino 
83e4b17023SJohn Marino   if (SLP_TREE_VEC_STMTS (node))
84e4b17023SJohn Marino     VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
85e4b17023SJohn Marino 
86e4b17023SJohn Marino   free (node);
87e4b17023SJohn Marino }
88e4b17023SJohn Marino 
89e4b17023SJohn Marino 
90e4b17023SJohn Marino /* Free the memory allocated for the SLP instance.  */
91e4b17023SJohn Marino 
92e4b17023SJohn Marino void
vect_free_slp_instance(slp_instance instance)93e4b17023SJohn Marino vect_free_slp_instance (slp_instance instance)
94e4b17023SJohn Marino {
95e4b17023SJohn Marino   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
96e4b17023SJohn Marino   VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
97e4b17023SJohn Marino   VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98e4b17023SJohn Marino   free (instance);
99e4b17023SJohn Marino }
100e4b17023SJohn Marino 
101e4b17023SJohn Marino 
102e4b17023SJohn Marino /* Create an SLP node for SCALAR_STMTS.  */
103e4b17023SJohn Marino 
104e4b17023SJohn Marino static slp_tree
vect_create_new_slp_node(VEC (gimple,heap)* scalar_stmts)105e4b17023SJohn Marino vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
106e4b17023SJohn Marino {
107e4b17023SJohn Marino   slp_tree node;
108e4b17023SJohn Marino   gimple stmt = VEC_index (gimple, scalar_stmts, 0);
109e4b17023SJohn Marino   unsigned int nops;
110e4b17023SJohn Marino 
111e4b17023SJohn Marino   if (is_gimple_call (stmt))
112e4b17023SJohn Marino     nops = gimple_call_num_args (stmt);
113e4b17023SJohn Marino   else if (is_gimple_assign (stmt))
114e4b17023SJohn Marino     {
115e4b17023SJohn Marino       nops = gimple_num_ops (stmt) - 1;
116e4b17023SJohn Marino       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
117e4b17023SJohn Marino 	nops++;
118e4b17023SJohn Marino     }
119e4b17023SJohn Marino   else
120e4b17023SJohn Marino     return NULL;
121e4b17023SJohn Marino 
122e4b17023SJohn Marino   node = XNEW (struct _slp_tree);
123e4b17023SJohn Marino   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
124e4b17023SJohn Marino   SLP_TREE_VEC_STMTS (node) = NULL;
125e4b17023SJohn Marino   SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
126e4b17023SJohn Marino   SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
127e4b17023SJohn Marino   SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
128e4b17023SJohn Marino 
129e4b17023SJohn Marino   return node;
130e4b17023SJohn Marino }
131e4b17023SJohn Marino 
132e4b17023SJohn Marino 
133e4b17023SJohn Marino /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
134e4b17023SJohn Marino    operand.  */
VEC(slp_oprnd_info,heap)135e4b17023SJohn Marino static VEC (slp_oprnd_info, heap) *
136e4b17023SJohn Marino vect_create_oprnd_info (int nops, int group_size)
137e4b17023SJohn Marino {
138e4b17023SJohn Marino   int i;
139e4b17023SJohn Marino   slp_oprnd_info oprnd_info;
140e4b17023SJohn Marino   VEC (slp_oprnd_info, heap) *oprnds_info;
141e4b17023SJohn Marino 
142e4b17023SJohn Marino   oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
143e4b17023SJohn Marino   for (i = 0; i < nops; i++)
144e4b17023SJohn Marino     {
145e4b17023SJohn Marino       oprnd_info = XNEW (struct _slp_oprnd_info);
146e4b17023SJohn Marino       oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
147e4b17023SJohn Marino       oprnd_info->first_dt = vect_uninitialized_def;
148e4b17023SJohn Marino       oprnd_info->first_def_type = NULL_TREE;
149e4b17023SJohn Marino       oprnd_info->first_const_oprnd = NULL_TREE;
150e4b17023SJohn Marino       oprnd_info->first_pattern = false;
151e4b17023SJohn Marino       VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
152e4b17023SJohn Marino     }
153e4b17023SJohn Marino 
154e4b17023SJohn Marino   return oprnds_info;
155e4b17023SJohn Marino }
156e4b17023SJohn Marino 
157e4b17023SJohn Marino 
158e4b17023SJohn Marino /* Free operands info.  */
159e4b17023SJohn Marino 
160e4b17023SJohn Marino static void
vect_free_oprnd_info(VEC (slp_oprnd_info,heap)** oprnds_info)161e4b17023SJohn Marino vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
162e4b17023SJohn Marino {
163e4b17023SJohn Marino   int i;
164e4b17023SJohn Marino   slp_oprnd_info oprnd_info;
165e4b17023SJohn Marino 
166e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
167e4b17023SJohn Marino     {
168e4b17023SJohn Marino       VEC_free (gimple, heap, oprnd_info->def_stmts);
169e4b17023SJohn Marino       XDELETE (oprnd_info);
170e4b17023SJohn Marino     }
171e4b17023SJohn Marino 
172e4b17023SJohn Marino   VEC_free (slp_oprnd_info, heap, *oprnds_info);
173e4b17023SJohn Marino }
174e4b17023SJohn Marino 
175e4b17023SJohn Marino 
176e4b17023SJohn Marino /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
177e4b17023SJohn Marino    they are of a valid type and that they match the defs of the first stmt of
178e4b17023SJohn Marino    the SLP group (stored in OPRNDS_INFO).  */
179e4b17023SJohn Marino 
180e4b17023SJohn Marino static bool
vect_get_and_check_slp_defs(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,slp_tree slp_node,gimple stmt,int ncopies_for_cost,bool first,VEC (slp_oprnd_info,heap)** oprnds_info)181e4b17023SJohn Marino vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
182e4b17023SJohn Marino                              slp_tree slp_node, gimple stmt,
183e4b17023SJohn Marino 			     int ncopies_for_cost, bool first,
184e4b17023SJohn Marino                              VEC (slp_oprnd_info, heap) **oprnds_info)
185e4b17023SJohn Marino {
186e4b17023SJohn Marino   tree oprnd;
187e4b17023SJohn Marino   unsigned int i, number_of_oprnds;
188e4b17023SJohn Marino   tree def, def_op0 = NULL_TREE;
189e4b17023SJohn Marino   gimple def_stmt;
190e4b17023SJohn Marino   enum vect_def_type dt = vect_uninitialized_def;
191e4b17023SJohn Marino   enum vect_def_type dt_op0 = vect_uninitialized_def;
192e4b17023SJohn Marino   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
193e4b17023SJohn Marino   tree lhs = gimple_get_lhs (stmt);
194e4b17023SJohn Marino   struct loop *loop = NULL;
195e4b17023SJohn Marino   enum tree_code rhs_code;
196e4b17023SJohn Marino   bool different_types = false;
197e4b17023SJohn Marino   bool pattern = false;
198e4b17023SJohn Marino   slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
199e4b17023SJohn Marino   int op_idx = 1;
200e4b17023SJohn Marino   tree compare_rhs = NULL_TREE;
201e4b17023SJohn Marino 
202e4b17023SJohn Marino   if (loop_vinfo)
203e4b17023SJohn Marino     loop = LOOP_VINFO_LOOP (loop_vinfo);
204e4b17023SJohn Marino 
205e4b17023SJohn Marino   if (is_gimple_call (stmt))
206e4b17023SJohn Marino     {
207e4b17023SJohn Marino       number_of_oprnds = gimple_call_num_args (stmt);
208e4b17023SJohn Marino       op_idx = 3;
209e4b17023SJohn Marino     }
210e4b17023SJohn Marino   else if (is_gimple_assign (stmt))
211e4b17023SJohn Marino     {
212e4b17023SJohn Marino       number_of_oprnds = gimple_num_ops (stmt) - 1;
213e4b17023SJohn Marino       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
214e4b17023SJohn Marino         number_of_oprnds++;
215e4b17023SJohn Marino     }
216e4b17023SJohn Marino   else
217e4b17023SJohn Marino     return false;
218e4b17023SJohn Marino 
219e4b17023SJohn Marino   for (i = 0; i < number_of_oprnds; i++)
220e4b17023SJohn Marino     {
221e4b17023SJohn Marino       if (compare_rhs)
222e4b17023SJohn Marino 	{
223e4b17023SJohn Marino 	  oprnd = compare_rhs;
224e4b17023SJohn Marino 	  compare_rhs = NULL_TREE;
225e4b17023SJohn Marino 	}
226e4b17023SJohn Marino       else
227e4b17023SJohn Marino         oprnd = gimple_op (stmt, op_idx++);
228e4b17023SJohn Marino 
229e4b17023SJohn Marino       oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
230e4b17023SJohn Marino 
231e4b17023SJohn Marino       if (COMPARISON_CLASS_P (oprnd))
232e4b17023SJohn Marino         {
233e4b17023SJohn Marino           compare_rhs = TREE_OPERAND (oprnd, 1);
234e4b17023SJohn Marino           oprnd = TREE_OPERAND (oprnd, 0);
235e4b17023SJohn Marino 	}
236e4b17023SJohn Marino 
237e4b17023SJohn Marino       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
238e4b17023SJohn Marino 			       &def, &dt)
239e4b17023SJohn Marino 	  || (!def_stmt && dt != vect_constant_def))
240e4b17023SJohn Marino 	{
241e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_SLP))
242e4b17023SJohn Marino 	    {
243e4b17023SJohn Marino 	      fprintf (vect_dump, "Build SLP failed: can't find def for ");
244e4b17023SJohn Marino 	      print_generic_expr (vect_dump, oprnd, TDF_SLIM);
245e4b17023SJohn Marino 	    }
246e4b17023SJohn Marino 
247e4b17023SJohn Marino 	  return false;
248e4b17023SJohn Marino 	}
249e4b17023SJohn Marino 
250e4b17023SJohn Marino       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
251e4b17023SJohn Marino          from the pattern.  Check that all the stmts of the node are in the
252e4b17023SJohn Marino          pattern.  */
253e4b17023SJohn Marino       if (loop && def_stmt && gimple_bb (def_stmt)
254e4b17023SJohn Marino           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
255e4b17023SJohn Marino           && vinfo_for_stmt (def_stmt)
256e4b17023SJohn Marino           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
257e4b17023SJohn Marino           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
258e4b17023SJohn Marino           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
259e4b17023SJohn Marino         {
260e4b17023SJohn Marino           pattern = true;
261e4b17023SJohn Marino           if (!first && !oprnd_info->first_pattern)
262e4b17023SJohn Marino 	    {
263e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_DETAILS))
264e4b17023SJohn Marino 		{
265e4b17023SJohn Marino 		  fprintf (vect_dump, "Build SLP failed: some of the stmts"
266e4b17023SJohn Marino 				" are in a pattern, and others are not ");
267e4b17023SJohn Marino 		  print_generic_expr (vect_dump, oprnd, TDF_SLIM);
268e4b17023SJohn Marino 		}
269e4b17023SJohn Marino 
270e4b17023SJohn Marino 	      return false;
271e4b17023SJohn Marino             }
272e4b17023SJohn Marino 
273e4b17023SJohn Marino           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
274e4b17023SJohn Marino           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
275e4b17023SJohn Marino 
276e4b17023SJohn Marino           if (dt == vect_unknown_def_type)
277e4b17023SJohn Marino             {
278e4b17023SJohn Marino               if (vect_print_dump_info (REPORT_DETAILS))
279e4b17023SJohn Marino                 fprintf (vect_dump, "Unsupported pattern.");
280e4b17023SJohn Marino               return false;
281e4b17023SJohn Marino             }
282e4b17023SJohn Marino 
283e4b17023SJohn Marino           switch (gimple_code (def_stmt))
284e4b17023SJohn Marino             {
285e4b17023SJohn Marino               case GIMPLE_PHI:
286e4b17023SJohn Marino                 def = gimple_phi_result (def_stmt);
287e4b17023SJohn Marino                 break;
288e4b17023SJohn Marino 
289e4b17023SJohn Marino               case GIMPLE_ASSIGN:
290e4b17023SJohn Marino                 def = gimple_assign_lhs (def_stmt);
291e4b17023SJohn Marino                 break;
292e4b17023SJohn Marino 
293e4b17023SJohn Marino               default:
294e4b17023SJohn Marino                 if (vect_print_dump_info (REPORT_DETAILS))
295e4b17023SJohn Marino                   fprintf (vect_dump, "unsupported defining stmt: ");
296e4b17023SJohn Marino                 return false;
297e4b17023SJohn Marino             }
298e4b17023SJohn Marino         }
299e4b17023SJohn Marino 
300e4b17023SJohn Marino       if (first)
301e4b17023SJohn Marino 	{
302e4b17023SJohn Marino 	  oprnd_info->first_dt = dt;
303e4b17023SJohn Marino 	  oprnd_info->first_pattern = pattern;
304e4b17023SJohn Marino 	  if (def)
305e4b17023SJohn Marino 	    {
306e4b17023SJohn Marino 	      oprnd_info->first_def_type = TREE_TYPE (def);
307e4b17023SJohn Marino 	      oprnd_info->first_const_oprnd = NULL_TREE;
308e4b17023SJohn Marino 	    }
309e4b17023SJohn Marino 	  else
310e4b17023SJohn Marino             {
311e4b17023SJohn Marino               oprnd_info->first_def_type = NULL_TREE;
312e4b17023SJohn Marino               oprnd_info->first_const_oprnd = oprnd;
313e4b17023SJohn Marino             }
314e4b17023SJohn Marino 
315e4b17023SJohn Marino 	  if (i == 0)
316e4b17023SJohn Marino 	    {
317e4b17023SJohn Marino 	      def_op0 = def;
318e4b17023SJohn Marino 	      dt_op0 = dt;
319e4b17023SJohn Marino 	      /* Analyze costs (for the first stmt of the group only).  */
320e4b17023SJohn Marino 	      if (REFERENCE_CLASS_P (lhs))
321e4b17023SJohn Marino 		/* Store.  */
322e4b17023SJohn Marino                 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
323e4b17023SJohn Marino                                         dt, slp_node);
324e4b17023SJohn Marino 	      else
325e4b17023SJohn Marino 		{
326e4b17023SJohn Marino 		  enum vect_def_type dts[2];
327e4b17023SJohn Marino 		  dts[0] = dt;
328e4b17023SJohn Marino 		  dts[1] = vect_uninitialized_def;
329e4b17023SJohn Marino 		  /* Not memory operation (we don't call this function for
330e4b17023SJohn Marino 		     loads).  */
331e4b17023SJohn Marino 		  vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
332e4b17023SJohn Marino 					  slp_node);
333e4b17023SJohn Marino 		}
334e4b17023SJohn Marino 	    }
335e4b17023SJohn Marino 	}
336e4b17023SJohn Marino       else
337e4b17023SJohn Marino 	{
338e4b17023SJohn Marino 	  /* Not first stmt of the group, check that the def-stmt/s match
339e4b17023SJohn Marino 	     the def-stmt/s of the first stmt.  Allow different definition
340e4b17023SJohn Marino 	     types for reduction chains: the first stmt must be a
341e4b17023SJohn Marino 	     vect_reduction_def (a phi node), and the rest
342e4b17023SJohn Marino 	     vect_internal_def.  */
343e4b17023SJohn Marino 	  if (((oprnd_info->first_dt != dt
344e4b17023SJohn Marino                 && !(oprnd_info->first_dt == vect_reduction_def
345e4b17023SJohn Marino                      && dt == vect_internal_def))
346e4b17023SJohn Marino                || (oprnd_info->first_def_type != NULL_TREE
347e4b17023SJohn Marino 		   && def
348e4b17023SJohn Marino 		   && !types_compatible_p (oprnd_info->first_def_type,
349e4b17023SJohn Marino 					   TREE_TYPE (def))))
350e4b17023SJohn Marino 	       || (!def
351e4b17023SJohn Marino 		   && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
352e4b17023SJohn Marino 					   TREE_TYPE (oprnd)))
353e4b17023SJohn Marino 	       || different_types)
354e4b17023SJohn Marino 	    {
355e4b17023SJohn Marino 	      if (number_of_oprnds != 2)
356e4b17023SJohn Marino 		{
357e4b17023SJohn Marino 		  if (vect_print_dump_info (REPORT_SLP))
358e4b17023SJohn Marino 		    fprintf (vect_dump, "Build SLP failed: different types ");
359e4b17023SJohn Marino 
360e4b17023SJohn Marino 		  return false;
361e4b17023SJohn Marino                 }
362e4b17023SJohn Marino 
363e4b17023SJohn Marino 	      /* Try to swap operands in case of binary operation.  */
364e4b17023SJohn Marino               if (i == 0)
365e4b17023SJohn Marino                 different_types = true;
366e4b17023SJohn Marino               else
367e4b17023SJohn Marino 		{
368e4b17023SJohn Marino 		  oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
369e4b17023SJohn Marino 		  if (is_gimple_assign (stmt)
370e4b17023SJohn Marino 		      && (rhs_code = gimple_assign_rhs_code (stmt))
371e4b17023SJohn Marino  		      && TREE_CODE_CLASS (rhs_code) == tcc_binary
372e4b17023SJohn Marino 		      && commutative_tree_code (rhs_code)
373e4b17023SJohn Marino 		      && oprnd0_info->first_dt == dt
374e4b17023SJohn Marino 		      && oprnd_info->first_dt == dt_op0
375e4b17023SJohn Marino 		      && def_op0 && def
376e4b17023SJohn Marino 		      && !(oprnd0_info->first_def_type
377e4b17023SJohn Marino 			   && !types_compatible_p (oprnd0_info->first_def_type,
378e4b17023SJohn Marino 			                           TREE_TYPE (def)))
379e4b17023SJohn Marino                       && !(oprnd_info->first_def_type
380e4b17023SJohn Marino                            && !types_compatible_p (oprnd_info->first_def_type,
381e4b17023SJohn Marino                                                    TREE_TYPE (def_op0))))
382e4b17023SJohn Marino                     {
383e4b17023SJohn Marino                       if (vect_print_dump_info (REPORT_SLP))
384e4b17023SJohn Marino 	                {
385e4b17023SJohn Marino 			  fprintf (vect_dump, "Swapping operands of ");
386e4b17023SJohn Marino  		          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
387e4b17023SJohn Marino 			}
388e4b17023SJohn Marino 
389e4b17023SJohn Marino  		      swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
390e4b17023SJohn Marino  	                                  gimple_assign_rhs2_ptr (stmt));
391e4b17023SJohn Marino 		    }
392e4b17023SJohn Marino                   else
393e4b17023SJohn Marino                     {
394e4b17023SJohn Marino          	      if (vect_print_dump_info (REPORT_SLP))
395e4b17023SJohn Marino 			fprintf (vect_dump, "Build SLP failed: different types ");
396e4b17023SJohn Marino 
397e4b17023SJohn Marino 		      return false;
398e4b17023SJohn Marino 		    }
399e4b17023SJohn Marino 		}
400e4b17023SJohn Marino 	    }
401e4b17023SJohn Marino 	}
402e4b17023SJohn Marino 
403e4b17023SJohn Marino       /* Check the types of the definitions.  */
404e4b17023SJohn Marino       switch (dt)
405e4b17023SJohn Marino 	{
406e4b17023SJohn Marino 	case vect_constant_def:
407e4b17023SJohn Marino 	case vect_external_def:
408e4b17023SJohn Marino         case vect_reduction_def:
409e4b17023SJohn Marino 	  break;
410e4b17023SJohn Marino 
411e4b17023SJohn Marino 	case vect_internal_def:
412e4b17023SJohn Marino           if (different_types)
413e4b17023SJohn Marino             {
414e4b17023SJohn Marino 	      oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
415e4b17023SJohn Marino 	      oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
416e4b17023SJohn Marino               if (i == 0)
417e4b17023SJohn Marino                 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
418e4b17023SJohn Marino               else
419e4b17023SJohn Marino                 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
420e4b17023SJohn Marino             }
421e4b17023SJohn Marino 	  else
422e4b17023SJohn Marino  	    VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
423e4b17023SJohn Marino 
424e4b17023SJohn Marino 	  break;
425e4b17023SJohn Marino 
426e4b17023SJohn Marino 	default:
427e4b17023SJohn Marino 	  /* FORNOW: Not supported.  */
428e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_SLP))
429e4b17023SJohn Marino 	    {
430e4b17023SJohn Marino 	      fprintf (vect_dump, "Build SLP failed: illegal type of def ");
431e4b17023SJohn Marino 	      print_generic_expr (vect_dump, def, TDF_SLIM);
432e4b17023SJohn Marino 	    }
433e4b17023SJohn Marino 
434e4b17023SJohn Marino 	  return false;
435e4b17023SJohn Marino 	}
436e4b17023SJohn Marino     }
437e4b17023SJohn Marino 
438e4b17023SJohn Marino   return true;
439e4b17023SJohn Marino }
440e4b17023SJohn Marino 
441e4b17023SJohn Marino 
442e4b17023SJohn Marino /* Recursively build an SLP tree starting from NODE.
443e4b17023SJohn Marino    Fail (and return FALSE) if def-stmts are not isomorphic, require data
444e4b17023SJohn Marino    permutation or are of unsupported types of operation.  Otherwise, return
445e4b17023SJohn Marino    TRUE.  */
446e4b17023SJohn Marino 
447e4b17023SJohn Marino static bool
vect_build_slp_tree(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,slp_tree * node,unsigned int group_size,int * inside_cost,int * outside_cost,int ncopies_for_cost,unsigned int * max_nunits,VEC (int,heap)** load_permutation,VEC (slp_tree,heap)** loads,unsigned int vectorization_factor,bool * loads_permuted)448e4b17023SJohn Marino vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
449e4b17023SJohn Marino                      slp_tree *node, unsigned int group_size,
450e4b17023SJohn Marino                      int *inside_cost, int *outside_cost,
451e4b17023SJohn Marino                      int ncopies_for_cost, unsigned int *max_nunits,
452e4b17023SJohn Marino                      VEC (int, heap) **load_permutation,
453e4b17023SJohn Marino                      VEC (slp_tree, heap) **loads,
454e4b17023SJohn Marino                      unsigned int vectorization_factor, bool *loads_permuted)
455e4b17023SJohn Marino {
456e4b17023SJohn Marino   unsigned int i;
457e4b17023SJohn Marino   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
458e4b17023SJohn Marino   gimple stmt = VEC_index (gimple, stmts, 0);
459e4b17023SJohn Marino   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
460e4b17023SJohn Marino   enum tree_code first_cond_code = ERROR_MARK;
461e4b17023SJohn Marino   tree lhs;
462e4b17023SJohn Marino   bool stop_recursion = false, need_same_oprnds = false;
463e4b17023SJohn Marino   tree vectype, scalar_type, first_op1 = NULL_TREE;
464e4b17023SJohn Marino   unsigned int ncopies;
465e4b17023SJohn Marino   optab optab;
466e4b17023SJohn Marino   int icode;
467e4b17023SJohn Marino   enum machine_mode optab_op2_mode;
468e4b17023SJohn Marino   enum machine_mode vec_mode;
469e4b17023SJohn Marino   struct data_reference *first_dr;
470e4b17023SJohn Marino   HOST_WIDE_INT dummy;
471e4b17023SJohn Marino   bool permutation = false;
472e4b17023SJohn Marino   unsigned int load_place;
473e4b17023SJohn Marino   gimple first_load, prev_first_load = NULL;
474e4b17023SJohn Marino   VEC (slp_oprnd_info, heap) *oprnds_info;
475e4b17023SJohn Marino   unsigned int nops;
476e4b17023SJohn Marino   slp_oprnd_info oprnd_info;
477e4b17023SJohn Marino   tree cond;
478e4b17023SJohn Marino 
479e4b17023SJohn Marino   if (is_gimple_call (stmt))
480e4b17023SJohn Marino     nops = gimple_call_num_args (stmt);
481e4b17023SJohn Marino   else if (is_gimple_assign (stmt))
482e4b17023SJohn Marino     {
483e4b17023SJohn Marino       nops = gimple_num_ops (stmt) - 1;
484e4b17023SJohn Marino       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
485e4b17023SJohn Marino 	nops++;
486e4b17023SJohn Marino     }
487e4b17023SJohn Marino   else
488e4b17023SJohn Marino     return false;
489e4b17023SJohn Marino 
490e4b17023SJohn Marino   oprnds_info = vect_create_oprnd_info (nops, group_size);
491e4b17023SJohn Marino 
492e4b17023SJohn Marino   /* For every stmt in NODE find its def stmt/s.  */
493e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
494e4b17023SJohn Marino     {
495e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_SLP))
496e4b17023SJohn Marino 	{
497e4b17023SJohn Marino 	  fprintf (vect_dump, "Build SLP for ");
498e4b17023SJohn Marino 	  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
499e4b17023SJohn Marino 	}
500e4b17023SJohn Marino 
501e4b17023SJohn Marino       /* Fail to vectorize statements marked as unvectorizable.  */
502e4b17023SJohn Marino       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
503e4b17023SJohn Marino         {
504e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_SLP))
505e4b17023SJohn Marino             {
506e4b17023SJohn Marino               fprintf (vect_dump,
507e4b17023SJohn Marino                        "Build SLP failed: unvectorizable statement ");
508e4b17023SJohn Marino               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
509e4b17023SJohn Marino             }
510e4b17023SJohn Marino 
511e4b17023SJohn Marino 	  vect_free_oprnd_info (&oprnds_info);
512e4b17023SJohn Marino           return false;
513e4b17023SJohn Marino         }
514e4b17023SJohn Marino 
515e4b17023SJohn Marino       lhs = gimple_get_lhs (stmt);
516e4b17023SJohn Marino       if (lhs == NULL_TREE)
517e4b17023SJohn Marino 	{
518e4b17023SJohn Marino 	  if (vect_print_dump_info (REPORT_SLP))
519e4b17023SJohn Marino 	    {
520e4b17023SJohn Marino 	      fprintf (vect_dump,
521e4b17023SJohn Marino 		       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
522e4b17023SJohn Marino 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523e4b17023SJohn Marino 	    }
524e4b17023SJohn Marino 
525e4b17023SJohn Marino 	  vect_free_oprnd_info (&oprnds_info);
526e4b17023SJohn Marino 	  return false;
527e4b17023SJohn Marino 	}
528e4b17023SJohn Marino 
529e4b17023SJohn Marino        if (is_gimple_assign (stmt)
530e4b17023SJohn Marino 	   && gimple_assign_rhs_code (stmt) == COND_EXPR
531e4b17023SJohn Marino            && (cond = gimple_assign_rhs1 (stmt))
532e4b17023SJohn Marino            && !COMPARISON_CLASS_P (cond))
533e4b17023SJohn Marino         {
534e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_SLP))
535e4b17023SJohn Marino             {
536e4b17023SJohn Marino               fprintf (vect_dump,
537e4b17023SJohn Marino                        "Build SLP failed: condition is not comparison ");
538e4b17023SJohn Marino               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
539e4b17023SJohn Marino             }
540e4b17023SJohn Marino 
541e4b17023SJohn Marino 	  vect_free_oprnd_info (&oprnds_info);
542e4b17023SJohn Marino           return false;
543e4b17023SJohn Marino         }
544e4b17023SJohn Marino 
545e4b17023SJohn Marino       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
546e4b17023SJohn Marino       vectype = get_vectype_for_scalar_type (scalar_type);
547e4b17023SJohn Marino       if (!vectype)
548e4b17023SJohn Marino         {
549e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_SLP))
550e4b17023SJohn Marino             {
551e4b17023SJohn Marino               fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
552e4b17023SJohn Marino               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
553e4b17023SJohn Marino             }
554e4b17023SJohn Marino 
555e4b17023SJohn Marino 	  vect_free_oprnd_info (&oprnds_info);
556e4b17023SJohn Marino           return false;
557e4b17023SJohn Marino         }
558e4b17023SJohn Marino 
559e4b17023SJohn Marino       /* In case of multiple types we need to detect the smallest type.  */
560e4b17023SJohn Marino       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
561e4b17023SJohn Marino         {
562e4b17023SJohn Marino           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
563e4b17023SJohn Marino           if (bb_vinfo)
564e4b17023SJohn Marino             vectorization_factor = *max_nunits;
565e4b17023SJohn Marino         }
566e4b17023SJohn Marino 
567e4b17023SJohn Marino       ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
568e4b17023SJohn Marino 
569e4b17023SJohn Marino       if (is_gimple_call (stmt))
570e4b17023SJohn Marino 	{
571e4b17023SJohn Marino 	  rhs_code = CALL_EXPR;
572e4b17023SJohn Marino 	  if (gimple_call_internal_p (stmt)
573e4b17023SJohn Marino 	      || gimple_call_tail_p (stmt)
574e4b17023SJohn Marino 	      || gimple_call_noreturn_p (stmt)
575e4b17023SJohn Marino 	      || !gimple_call_nothrow_p (stmt)
576e4b17023SJohn Marino 	      || gimple_call_chain (stmt))
577e4b17023SJohn Marino 	    {
578e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_SLP))
579e4b17023SJohn Marino 		{
580e4b17023SJohn Marino 		  fprintf (vect_dump,
581e4b17023SJohn Marino 			   "Build SLP failed: unsupported call type ");
582e4b17023SJohn Marino 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
583e4b17023SJohn Marino 		}
584e4b17023SJohn Marino 
585e4b17023SJohn Marino 	      vect_free_oprnd_info (&oprnds_info);
586e4b17023SJohn Marino 	      return false;
587e4b17023SJohn Marino 	    }
588e4b17023SJohn Marino 	}
589e4b17023SJohn Marino       else
590e4b17023SJohn Marino 	rhs_code = gimple_assign_rhs_code (stmt);
591e4b17023SJohn Marino 
592e4b17023SJohn Marino       /* Check the operation.  */
593e4b17023SJohn Marino       if (i == 0)
594e4b17023SJohn Marino 	{
595e4b17023SJohn Marino 	  first_stmt_code = rhs_code;
596e4b17023SJohn Marino 
597e4b17023SJohn Marino 	  /* Shift arguments should be equal in all the packed stmts for a
598e4b17023SJohn Marino 	     vector shift with scalar shift operand.  */
599e4b17023SJohn Marino 	  if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
600e4b17023SJohn Marino 	      || rhs_code == LROTATE_EXPR
601e4b17023SJohn Marino 	      || rhs_code == RROTATE_EXPR)
602e4b17023SJohn Marino 	    {
603e4b17023SJohn Marino 	      vec_mode = TYPE_MODE (vectype);
604e4b17023SJohn Marino 
605e4b17023SJohn Marino 	      /* First see if we have a vector/vector shift.  */
606e4b17023SJohn Marino 	      optab = optab_for_tree_code (rhs_code, vectype,
607e4b17023SJohn Marino 					   optab_vector);
608e4b17023SJohn Marino 
609e4b17023SJohn Marino 	      if (!optab
610e4b17023SJohn Marino 		  || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
611e4b17023SJohn Marino 		{
612e4b17023SJohn Marino 		  /* No vector/vector shift, try for a vector/scalar shift.  */
613e4b17023SJohn Marino 		  optab = optab_for_tree_code (rhs_code, vectype,
614e4b17023SJohn Marino 					       optab_scalar);
615e4b17023SJohn Marino 
616e4b17023SJohn Marino 		  if (!optab)
617e4b17023SJohn Marino 		    {
618e4b17023SJohn Marino 		      if (vect_print_dump_info (REPORT_SLP))
619e4b17023SJohn Marino 			fprintf (vect_dump, "Build SLP failed: no optab.");
620e4b17023SJohn Marino 	  	      vect_free_oprnd_info (&oprnds_info);
621e4b17023SJohn Marino 		      return false;
622e4b17023SJohn Marino 		    }
623e4b17023SJohn Marino 		  icode = (int) optab_handler (optab, vec_mode);
624e4b17023SJohn Marino 		  if (icode == CODE_FOR_nothing)
625e4b17023SJohn Marino 		    {
626e4b17023SJohn Marino 		      if (vect_print_dump_info (REPORT_SLP))
627e4b17023SJohn Marino 			fprintf (vect_dump, "Build SLP failed: "
628e4b17023SJohn Marino 				            "op not supported by target.");
629e4b17023SJohn Marino 	  	      vect_free_oprnd_info (&oprnds_info);
630e4b17023SJohn Marino 		      return false;
631e4b17023SJohn Marino 		    }
632e4b17023SJohn Marino 		  optab_op2_mode = insn_data[icode].operand[2].mode;
633e4b17023SJohn Marino 		  if (!VECTOR_MODE_P (optab_op2_mode))
634e4b17023SJohn Marino 		    {
635e4b17023SJohn Marino 		      need_same_oprnds = true;
636e4b17023SJohn Marino 		      first_op1 = gimple_assign_rhs2 (stmt);
637e4b17023SJohn Marino 		    }
638e4b17023SJohn Marino 		}
639e4b17023SJohn Marino 	    }
640e4b17023SJohn Marino 	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
641e4b17023SJohn Marino             {
642e4b17023SJohn Marino               need_same_oprnds = true;
643e4b17023SJohn Marino               first_op1 = gimple_assign_rhs2 (stmt);
644e4b17023SJohn Marino             }
645e4b17023SJohn Marino 	}
646e4b17023SJohn Marino       else
647e4b17023SJohn Marino 	{
648e4b17023SJohn Marino 	  if (first_stmt_code != rhs_code
649e4b17023SJohn Marino 	      && (first_stmt_code != IMAGPART_EXPR
650e4b17023SJohn Marino 		  || rhs_code != REALPART_EXPR)
651e4b17023SJohn Marino 	      && (first_stmt_code != REALPART_EXPR
652e4b17023SJohn Marino 		  || rhs_code != IMAGPART_EXPR)
653e4b17023SJohn Marino               && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
654e4b17023SJohn Marino                    && (first_stmt_code == ARRAY_REF
655e4b17023SJohn Marino                        || first_stmt_code == INDIRECT_REF
656e4b17023SJohn Marino                        || first_stmt_code == COMPONENT_REF
657e4b17023SJohn Marino                        || first_stmt_code == MEM_REF)))
658e4b17023SJohn Marino 	    {
659e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_SLP))
660e4b17023SJohn Marino 		{
661e4b17023SJohn Marino 		  fprintf (vect_dump,
662e4b17023SJohn Marino 			   "Build SLP failed: different operation in stmt ");
663e4b17023SJohn Marino 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
664e4b17023SJohn Marino 		}
665e4b17023SJohn Marino 
666e4b17023SJohn Marino 	      vect_free_oprnd_info (&oprnds_info);
667e4b17023SJohn Marino 	      return false;
668e4b17023SJohn Marino 	    }
669e4b17023SJohn Marino 
670e4b17023SJohn Marino 	  if (need_same_oprnds
671e4b17023SJohn Marino 	      && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
672e4b17023SJohn Marino 	    {
673e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_SLP))
674e4b17023SJohn Marino 		{
675e4b17023SJohn Marino 		  fprintf (vect_dump,
676e4b17023SJohn Marino 			   "Build SLP failed: different shift arguments in ");
677e4b17023SJohn Marino 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
678e4b17023SJohn Marino 		}
679e4b17023SJohn Marino 
680e4b17023SJohn Marino 	      vect_free_oprnd_info (&oprnds_info);
681e4b17023SJohn Marino 	      return false;
682e4b17023SJohn Marino 	    }
683e4b17023SJohn Marino 
684e4b17023SJohn Marino 	  if (rhs_code == CALL_EXPR)
685e4b17023SJohn Marino 	    {
686e4b17023SJohn Marino 	      gimple first_stmt = VEC_index (gimple, stmts, 0);
687e4b17023SJohn Marino 	      if (gimple_call_num_args (stmt) != nops
688e4b17023SJohn Marino 		  || !operand_equal_p (gimple_call_fn (first_stmt),
689e4b17023SJohn Marino 				       gimple_call_fn (stmt), 0)
690e4b17023SJohn Marino 		  || gimple_call_fntype (first_stmt)
691e4b17023SJohn Marino 		     != gimple_call_fntype (stmt))
692e4b17023SJohn Marino 		{
693e4b17023SJohn Marino 		  if (vect_print_dump_info (REPORT_SLP))
694e4b17023SJohn Marino 		    {
695e4b17023SJohn Marino 		      fprintf (vect_dump,
696e4b17023SJohn Marino 			       "Build SLP failed: different calls in ");
697e4b17023SJohn Marino 		      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
698e4b17023SJohn Marino 		    }
699e4b17023SJohn Marino 
700e4b17023SJohn Marino 		  vect_free_oprnd_info (&oprnds_info);
701e4b17023SJohn Marino 		  return false;
702e4b17023SJohn Marino 		}
703e4b17023SJohn Marino 	    }
704e4b17023SJohn Marino 	}
705e4b17023SJohn Marino 
706e4b17023SJohn Marino       /* Strided store or load.  */
707e4b17023SJohn Marino       if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
708e4b17023SJohn Marino 	{
709e4b17023SJohn Marino 	  if (REFERENCE_CLASS_P (lhs))
710e4b17023SJohn Marino 	    {
711e4b17023SJohn Marino 	      /* Store.  */
712e4b17023SJohn Marino 	      if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
713e4b17023SJohn Marino 						stmt, ncopies_for_cost,
714e4b17023SJohn Marino 						(i == 0), &oprnds_info))
715e4b17023SJohn Marino 		{
716e4b17023SJohn Marino 	  	  vect_free_oprnd_info (&oprnds_info);
717e4b17023SJohn Marino  		  return false;
718e4b17023SJohn Marino 		}
719e4b17023SJohn Marino 	    }
720e4b17023SJohn Marino 	  else
721e4b17023SJohn Marino 	    {
722e4b17023SJohn Marino 	      /* Load.  */
723e4b17023SJohn Marino               /* FORNOW: Check that there is no gap between the loads.  */
724e4b17023SJohn Marino               if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
725e4b17023SJohn Marino                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
726e4b17023SJohn Marino                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
727e4b17023SJohn Marino                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
728e4b17023SJohn Marino                 {
729e4b17023SJohn Marino                   if (vect_print_dump_info (REPORT_SLP))
730e4b17023SJohn Marino                     {
731e4b17023SJohn Marino                       fprintf (vect_dump, "Build SLP failed: strided "
732e4b17023SJohn Marino                                           "loads have gaps ");
733e4b17023SJohn Marino                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
734e4b17023SJohn Marino                     }
735e4b17023SJohn Marino 
736e4b17023SJohn Marino 	  	  vect_free_oprnd_info (&oprnds_info);
737e4b17023SJohn Marino                   return false;
738e4b17023SJohn Marino                 }
739e4b17023SJohn Marino 
740e4b17023SJohn Marino               /* Check that the size of interleaved loads group is not
741e4b17023SJohn Marino                  greater than the SLP group size.  */
742e4b17023SJohn Marino               if (loop_vinfo
743e4b17023SJohn Marino                   && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
744e4b17023SJohn Marino                 {
745e4b17023SJohn Marino                   if (vect_print_dump_info (REPORT_SLP))
746e4b17023SJohn Marino                     {
747e4b17023SJohn Marino                       fprintf (vect_dump, "Build SLP failed: the number of "
748e4b17023SJohn Marino                                           "interleaved loads is greater than"
749e4b17023SJohn Marino                                           " the SLP group size ");
750e4b17023SJohn Marino                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
751e4b17023SJohn Marino                     }
752e4b17023SJohn Marino 
753e4b17023SJohn Marino 	  	  vect_free_oprnd_info (&oprnds_info);
754e4b17023SJohn Marino                   return false;
755e4b17023SJohn Marino                 }
756e4b17023SJohn Marino 
757e4b17023SJohn Marino               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
758e4b17023SJohn Marino               if (prev_first_load)
759e4b17023SJohn Marino                 {
760e4b17023SJohn Marino                   /* Check that there are no loads from different interleaving
761e4b17023SJohn Marino                      chains in the same node.  The only exception is complex
762e4b17023SJohn Marino                      numbers.  */
763e4b17023SJohn Marino                   if (prev_first_load != first_load
764e4b17023SJohn Marino                       && rhs_code != REALPART_EXPR
765e4b17023SJohn Marino                       && rhs_code != IMAGPART_EXPR)
766e4b17023SJohn Marino                     {
767e4b17023SJohn Marino                       if (vect_print_dump_info (REPORT_SLP))
768e4b17023SJohn Marino                         {
769e4b17023SJohn Marino                           fprintf (vect_dump, "Build SLP failed: different "
770e4b17023SJohn Marino                                            "interleaving chains in one node ");
771e4b17023SJohn Marino                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
772e4b17023SJohn Marino                         }
773e4b17023SJohn Marino 
774e4b17023SJohn Marino 	  	      vect_free_oprnd_info (&oprnds_info);
775e4b17023SJohn Marino                       return false;
776e4b17023SJohn Marino                     }
777e4b17023SJohn Marino                 }
778e4b17023SJohn Marino               else
779e4b17023SJohn Marino                 prev_first_load = first_load;
780e4b17023SJohn Marino 
781e4b17023SJohn Marino               if (first_load == stmt)
782e4b17023SJohn Marino                 {
783e4b17023SJohn Marino                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
784e4b17023SJohn Marino                   if (vect_supportable_dr_alignment (first_dr, false)
785e4b17023SJohn Marino                       == dr_unaligned_unsupported)
786e4b17023SJohn Marino                     {
787e4b17023SJohn Marino                       if (vect_print_dump_info (REPORT_SLP))
788e4b17023SJohn Marino                         {
789e4b17023SJohn Marino                           fprintf (vect_dump, "Build SLP failed: unsupported "
790e4b17023SJohn Marino                                               "unaligned load ");
791e4b17023SJohn Marino                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
792e4b17023SJohn Marino                         }
793e4b17023SJohn Marino 
794e4b17023SJohn Marino 	  	      vect_free_oprnd_info (&oprnds_info);
795e4b17023SJohn Marino                       return false;
796e4b17023SJohn Marino                     }
797e4b17023SJohn Marino 
798e4b17023SJohn Marino                   /* Analyze costs (for the first stmt in the group).  */
799e4b17023SJohn Marino                   vect_model_load_cost (vinfo_for_stmt (stmt),
800e4b17023SJohn Marino                                         ncopies_for_cost, false, *node);
801e4b17023SJohn Marino                 }
802e4b17023SJohn Marino 
803e4b17023SJohn Marino               /* Store the place of this load in the interleaving chain.  In
804e4b17023SJohn Marino                  case that permutation is needed we later decide if a specific
805e4b17023SJohn Marino                  permutation is supported.  */
806e4b17023SJohn Marino               load_place = vect_get_place_in_interleaving_chain (stmt,
807e4b17023SJohn Marino                                                                  first_load);
808e4b17023SJohn Marino               if (load_place != i)
809e4b17023SJohn Marino                 permutation = true;
810e4b17023SJohn Marino 
811e4b17023SJohn Marino               VEC_safe_push (int, heap, *load_permutation, load_place);
812e4b17023SJohn Marino 
813e4b17023SJohn Marino               /* We stop the tree when we reach a group of loads.  */
814e4b17023SJohn Marino               stop_recursion = true;
815e4b17023SJohn Marino              continue;
816e4b17023SJohn Marino            }
817e4b17023SJohn Marino         } /* Strided access.  */
818e4b17023SJohn Marino       else
819e4b17023SJohn Marino 	{
820e4b17023SJohn Marino 	  if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
821e4b17023SJohn Marino 	    {
822e4b17023SJohn Marino 	      /* Not strided load.  */
823e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_SLP))
824e4b17023SJohn Marino 		{
825e4b17023SJohn Marino 		  fprintf (vect_dump, "Build SLP failed: not strided load ");
826e4b17023SJohn Marino 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
827e4b17023SJohn Marino 		}
828e4b17023SJohn Marino 
829e4b17023SJohn Marino 	      /* FORNOW: Not strided loads are not supported.  */
830e4b17023SJohn Marino 	      vect_free_oprnd_info (&oprnds_info);
831e4b17023SJohn Marino 	      return false;
832e4b17023SJohn Marino 	    }
833e4b17023SJohn Marino 
834e4b17023SJohn Marino 	  /* Not memory operation.  */
835e4b17023SJohn Marino 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
836e4b17023SJohn Marino 	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
837e4b17023SJohn Marino 	      && rhs_code != COND_EXPR
838e4b17023SJohn Marino 	      && rhs_code != CALL_EXPR)
839e4b17023SJohn Marino 	    {
840e4b17023SJohn Marino 	      if (vect_print_dump_info (REPORT_SLP))
841e4b17023SJohn Marino 		{
842e4b17023SJohn Marino 		  fprintf (vect_dump, "Build SLP failed: operation");
843e4b17023SJohn Marino 		  fprintf (vect_dump, " unsupported ");
844e4b17023SJohn Marino 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
845e4b17023SJohn Marino 		}
846e4b17023SJohn Marino 
847e4b17023SJohn Marino 	      vect_free_oprnd_info (&oprnds_info);
848e4b17023SJohn Marino 	      return false;
849e4b17023SJohn Marino 	    }
850e4b17023SJohn Marino 
851e4b17023SJohn Marino           if (rhs_code == COND_EXPR)
852e4b17023SJohn Marino             {
853e4b17023SJohn Marino               tree cond_expr = gimple_assign_rhs1 (stmt);
854e4b17023SJohn Marino 
855e4b17023SJohn Marino 	      if (i == 0)
856e4b17023SJohn Marino 		first_cond_code = TREE_CODE (cond_expr);
857e4b17023SJohn Marino               else if (first_cond_code != TREE_CODE (cond_expr))
858e4b17023SJohn Marino                 {
859e4b17023SJohn Marino                   if (vect_print_dump_info (REPORT_SLP))
860e4b17023SJohn Marino                     {
861e4b17023SJohn Marino                       fprintf (vect_dump, "Build SLP failed: different"
862e4b17023SJohn Marino 					  " operation");
863e4b17023SJohn Marino                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
864e4b17023SJohn Marino                     }
865e4b17023SJohn Marino 
866e4b17023SJohn Marino 		  vect_free_oprnd_info (&oprnds_info);
867e4b17023SJohn Marino                   return false;
868e4b17023SJohn Marino 		}
869e4b17023SJohn Marino             }
870e4b17023SJohn Marino 
871e4b17023SJohn Marino 	  /* Find the def-stmts.  */
872e4b17023SJohn Marino 	  if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
873e4b17023SJohn Marino 					    ncopies_for_cost, (i == 0),
874e4b17023SJohn Marino 					    &oprnds_info))
875e4b17023SJohn Marino 	    {
876e4b17023SJohn Marino 	      vect_free_oprnd_info (&oprnds_info);
877e4b17023SJohn Marino 	      return false;
878e4b17023SJohn Marino 	    }
879e4b17023SJohn Marino 	}
880e4b17023SJohn Marino     }
881e4b17023SJohn Marino 
882e4b17023SJohn Marino   /* Add the costs of the node to the overall instance costs.  */
883e4b17023SJohn Marino   *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
884e4b17023SJohn Marino   *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
885e4b17023SJohn Marino 
886e4b17023SJohn Marino   /* Strided loads were reached - stop the recursion.  */
887e4b17023SJohn Marino   if (stop_recursion)
888e4b17023SJohn Marino     {
889e4b17023SJohn Marino       VEC_safe_push (slp_tree, heap, *loads, *node);
890e4b17023SJohn Marino       if (permutation)
891e4b17023SJohn Marino         {
892e4b17023SJohn Marino 
893e4b17023SJohn Marino           *loads_permuted = true;
894e4b17023SJohn Marino           *inside_cost
895e4b17023SJohn Marino             += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
896e4b17023SJohn Marino                * group_size;
897e4b17023SJohn Marino         }
898e4b17023SJohn Marino       else
899e4b17023SJohn Marino         {
900e4b17023SJohn Marino           /* We don't check here complex numbers chains, so we set
901e4b17023SJohn Marino              LOADS_PERMUTED for further check in
902e4b17023SJohn Marino              vect_supported_load_permutation_p.  */
903e4b17023SJohn Marino           if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
904e4b17023SJohn Marino             *loads_permuted = true;
905e4b17023SJohn Marino         }
906e4b17023SJohn Marino 
907e4b17023SJohn Marino       vect_free_oprnd_info (&oprnds_info);
908e4b17023SJohn Marino       return true;
909e4b17023SJohn Marino     }
910e4b17023SJohn Marino 
911e4b17023SJohn Marino   /* Create SLP_TREE nodes for the definition node/s.  */
912e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
913e4b17023SJohn Marino     {
914e4b17023SJohn Marino       slp_tree child;
915e4b17023SJohn Marino 
916e4b17023SJohn Marino       if (oprnd_info->first_dt != vect_internal_def)
917e4b17023SJohn Marino         continue;
918e4b17023SJohn Marino 
919e4b17023SJohn Marino       child = vect_create_new_slp_node (oprnd_info->def_stmts);
920e4b17023SJohn Marino       if (!child
921e4b17023SJohn Marino           || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
922e4b17023SJohn Marino 				inside_cost, outside_cost, ncopies_for_cost,
923e4b17023SJohn Marino 				max_nunits, load_permutation, loads,
924e4b17023SJohn Marino 				vectorization_factor, loads_permuted))
925e4b17023SJohn Marino         {
926e4b17023SJohn Marino 	  if (child)
927e4b17023SJohn Marino 	    oprnd_info->def_stmts = NULL;
928e4b17023SJohn Marino 	  vect_free_slp_tree (child);
929e4b17023SJohn Marino 	  vect_free_oprnd_info (&oprnds_info);
930e4b17023SJohn Marino    	  return false;
931e4b17023SJohn Marino 	}
932e4b17023SJohn Marino 
933e4b17023SJohn Marino       oprnd_info->def_stmts = NULL;
934e4b17023SJohn Marino       VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
935e4b17023SJohn Marino     }
936e4b17023SJohn Marino 
937e4b17023SJohn Marino   vect_free_oprnd_info (&oprnds_info);
938e4b17023SJohn Marino   return true;
939e4b17023SJohn Marino }
940e4b17023SJohn Marino 
941e4b17023SJohn Marino 
942e4b17023SJohn Marino static void
vect_print_slp_tree(slp_tree node)943e4b17023SJohn Marino vect_print_slp_tree (slp_tree node)
944e4b17023SJohn Marino {
945e4b17023SJohn Marino   int i;
946e4b17023SJohn Marino   gimple stmt;
947e4b17023SJohn Marino   slp_void_p child;
948e4b17023SJohn Marino 
949e4b17023SJohn Marino   if (!node)
950e4b17023SJohn Marino     return;
951e4b17023SJohn Marino 
952e4b17023SJohn Marino   fprintf (vect_dump, "node ");
953e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
954e4b17023SJohn Marino     {
955e4b17023SJohn Marino       fprintf (vect_dump, "\n\tstmt %d ", i);
956e4b17023SJohn Marino       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
957e4b17023SJohn Marino     }
958e4b17023SJohn Marino   fprintf (vect_dump, "\n");
959e4b17023SJohn Marino 
960e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
961e4b17023SJohn Marino     vect_print_slp_tree ((slp_tree) child);
962e4b17023SJohn Marino }
963e4b17023SJohn Marino 
964e4b17023SJohn Marino 
965e4b17023SJohn Marino /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
966e4b17023SJohn Marino    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
967e4b17023SJohn Marino    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
968e4b17023SJohn Marino    stmts in NODE are to be marked.  */
969e4b17023SJohn Marino 
970e4b17023SJohn Marino static void
vect_mark_slp_stmts(slp_tree node,enum slp_vect_type mark,int j)971e4b17023SJohn Marino vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
972e4b17023SJohn Marino {
973e4b17023SJohn Marino   int i;
974e4b17023SJohn Marino   gimple stmt;
975e4b17023SJohn Marino   slp_void_p child;
976e4b17023SJohn Marino 
977e4b17023SJohn Marino   if (!node)
978e4b17023SJohn Marino     return;
979e4b17023SJohn Marino 
980e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
981e4b17023SJohn Marino     if (j < 0 || i == j)
982e4b17023SJohn Marino       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
983e4b17023SJohn Marino 
984e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
985e4b17023SJohn Marino     vect_mark_slp_stmts ((slp_tree) child, mark, j);
986e4b17023SJohn Marino }
987e4b17023SJohn Marino 
988e4b17023SJohn Marino 
989e4b17023SJohn Marino /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
990e4b17023SJohn Marino 
991e4b17023SJohn Marino static void
vect_mark_slp_stmts_relevant(slp_tree node)992e4b17023SJohn Marino vect_mark_slp_stmts_relevant (slp_tree node)
993e4b17023SJohn Marino {
994e4b17023SJohn Marino   int i;
995e4b17023SJohn Marino   gimple stmt;
996e4b17023SJohn Marino   stmt_vec_info stmt_info;
997e4b17023SJohn Marino   slp_void_p child;
998e4b17023SJohn Marino 
999e4b17023SJohn Marino   if (!node)
1000e4b17023SJohn Marino     return;
1001e4b17023SJohn Marino 
1002e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1003e4b17023SJohn Marino     {
1004e4b17023SJohn Marino       stmt_info = vinfo_for_stmt (stmt);
1005e4b17023SJohn Marino       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1006e4b17023SJohn Marino                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1007e4b17023SJohn Marino       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1008e4b17023SJohn Marino     }
1009e4b17023SJohn Marino 
1010e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1011e4b17023SJohn Marino     vect_mark_slp_stmts_relevant ((slp_tree) child);
1012e4b17023SJohn Marino }
1013e4b17023SJohn Marino 
1014e4b17023SJohn Marino 
1015e4b17023SJohn Marino /* Check if the permutation required by the SLP INSTANCE is supported.
1016e4b17023SJohn Marino    Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed.  */
1017e4b17023SJohn Marino 
1018e4b17023SJohn Marino static bool
vect_supported_slp_permutation_p(slp_instance instance)1019e4b17023SJohn Marino vect_supported_slp_permutation_p (slp_instance instance)
1020e4b17023SJohn Marino {
1021e4b17023SJohn Marino   slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1022e4b17023SJohn Marino   gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1023e4b17023SJohn Marino   gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1024e4b17023SJohn Marino   VEC (slp_tree, heap) *sorted_loads = NULL;
1025e4b17023SJohn Marino   int index;
1026e4b17023SJohn Marino   slp_tree *tmp_loads = NULL;
1027e4b17023SJohn Marino   int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1028e4b17023SJohn Marino   slp_tree load;
1029e4b17023SJohn Marino 
1030e4b17023SJohn Marino   /* FORNOW: The only supported loads permutation is loads from the same
1031e4b17023SJohn Marino      location in all the loads in the node, when the data-refs in
1032e4b17023SJohn Marino      nodes of LOADS constitute an interleaving chain.
1033e4b17023SJohn Marino      Sort the nodes according to the order of accesses in the chain.  */
1034e4b17023SJohn Marino   tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1035e4b17023SJohn Marino   for (i = 0, j = 0;
1036e4b17023SJohn Marino        VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1037e4b17023SJohn Marino        && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1038e4b17023SJohn Marino        i += group_size, j++)
1039e4b17023SJohn Marino     {
1040e4b17023SJohn Marino       gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1041e4b17023SJohn Marino       /* Check that the loads are all in the same interleaving chain.  */
1042e4b17023SJohn Marino       if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1043e4b17023SJohn Marino         {
1044e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
1045e4b17023SJohn Marino             {
1046e4b17023SJohn Marino               fprintf (vect_dump, "Build SLP failed: unsupported data "
1047e4b17023SJohn Marino                                    "permutation ");
1048e4b17023SJohn Marino               print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1049e4b17023SJohn Marino             }
1050e4b17023SJohn Marino 
1051e4b17023SJohn Marino           free (tmp_loads);
1052e4b17023SJohn Marino           return false;
1053e4b17023SJohn Marino         }
1054e4b17023SJohn Marino 
1055e4b17023SJohn Marino       tmp_loads[index] = load;
1056e4b17023SJohn Marino     }
1057e4b17023SJohn Marino 
1058e4b17023SJohn Marino   sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1059e4b17023SJohn Marino   for (i = 0; i < group_size; i++)
1060e4b17023SJohn Marino      VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1061e4b17023SJohn Marino 
1062e4b17023SJohn Marino   VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1063e4b17023SJohn Marino   SLP_INSTANCE_LOADS (instance) = sorted_loads;
1064e4b17023SJohn Marino   free (tmp_loads);
1065e4b17023SJohn Marino 
1066e4b17023SJohn Marino   if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1067e4b17023SJohn Marino                                      SLP_INSTANCE_UNROLLING_FACTOR (instance),
1068e4b17023SJohn Marino                                      instance, true))
1069e4b17023SJohn Marino     return false;
1070e4b17023SJohn Marino 
1071e4b17023SJohn Marino   return true;
1072e4b17023SJohn Marino }
1073e4b17023SJohn Marino 
1074e4b17023SJohn Marino 
1075e4b17023SJohn Marino /* Rearrange the statements of NODE according to PERMUTATION.  */
1076e4b17023SJohn Marino 
1077e4b17023SJohn Marino static void
vect_slp_rearrange_stmts(slp_tree node,unsigned int group_size,VEC (int,heap)* permutation)1078e4b17023SJohn Marino vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1079e4b17023SJohn Marino                           VEC (int, heap) *permutation)
1080e4b17023SJohn Marino {
1081e4b17023SJohn Marino   gimple stmt;
1082e4b17023SJohn Marino   VEC (gimple, heap) *tmp_stmts;
1083e4b17023SJohn Marino   unsigned int index, i;
1084e4b17023SJohn Marino   slp_void_p child;
1085e4b17023SJohn Marino 
1086e4b17023SJohn Marino   if (!node)
1087e4b17023SJohn Marino     return;
1088e4b17023SJohn Marino 
1089e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1090e4b17023SJohn Marino     vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1091e4b17023SJohn Marino 
1092e4b17023SJohn Marino   gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1093e4b17023SJohn Marino   tmp_stmts = VEC_alloc (gimple, heap, group_size);
1094e4b17023SJohn Marino 
1095e4b17023SJohn Marino   for (i = 0; i < group_size; i++)
1096e4b17023SJohn Marino     VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1097e4b17023SJohn Marino 
1098e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1099e4b17023SJohn Marino     {
1100e4b17023SJohn Marino       index = VEC_index (int, permutation, i);
1101e4b17023SJohn Marino       VEC_replace (gimple, tmp_stmts, index, stmt);
1102e4b17023SJohn Marino     }
1103e4b17023SJohn Marino 
1104e4b17023SJohn Marino   VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1105e4b17023SJohn Marino   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1106e4b17023SJohn Marino }
1107e4b17023SJohn Marino 
1108e4b17023SJohn Marino 
1109e4b17023SJohn Marino /* Check if the required load permutation is supported.
1110e4b17023SJohn Marino    LOAD_PERMUTATION contains a list of indices of the loads.
1111e4b17023SJohn Marino    In SLP this permutation is relative to the order of strided stores that are
1112e4b17023SJohn Marino    the base of the SLP instance.  */
1113e4b17023SJohn Marino 
1114e4b17023SJohn Marino static bool
vect_supported_load_permutation_p(slp_instance slp_instn,int group_size,VEC (int,heap)* load_permutation)1115e4b17023SJohn Marino vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1116e4b17023SJohn Marino                                    VEC (int, heap) *load_permutation)
1117e4b17023SJohn Marino {
1118e4b17023SJohn Marino   int i = 0, j, prev = -1, next, k, number_of_groups;
1119e4b17023SJohn Marino   bool supported, bad_permutation = false;
1120e4b17023SJohn Marino   sbitmap load_index;
1121e4b17023SJohn Marino   slp_tree node, other_complex_node;
1122e4b17023SJohn Marino   gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1123e4b17023SJohn Marino   unsigned complex_numbers = 0;
1124e4b17023SJohn Marino   struct data_reference *dr;
1125e4b17023SJohn Marino   bb_vec_info bb_vinfo;
1126e4b17023SJohn Marino 
1127e4b17023SJohn Marino   /* FORNOW: permutations are only supported in SLP.  */
1128e4b17023SJohn Marino   if (!slp_instn)
1129e4b17023SJohn Marino     return false;
1130e4b17023SJohn Marino 
1131e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_SLP))
1132e4b17023SJohn Marino     {
1133e4b17023SJohn Marino       fprintf (vect_dump, "Load permutation ");
1134e4b17023SJohn Marino       FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1135e4b17023SJohn Marino         fprintf (vect_dump, "%d ", next);
1136e4b17023SJohn Marino     }
1137e4b17023SJohn Marino 
1138e4b17023SJohn Marino   /* In case of reduction every load permutation is allowed, since the order
1139e4b17023SJohn Marino      of the reduction statements is not important (as opposed to the case of
1140e4b17023SJohn Marino      strided stores).  The only condition we need to check is that all the
1141e4b17023SJohn Marino      load nodes are of the same size and have the same permutation (and then
1142e4b17023SJohn Marino      rearrange all the nodes of the SLP instance according to this
1143e4b17023SJohn Marino      permutation).  */
1144e4b17023SJohn Marino 
1145e4b17023SJohn Marino   /* Check that all the load nodes are of the same size.  */
1146e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1147e4b17023SJohn Marino     {
1148e4b17023SJohn Marino       if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1149e4b17023SJohn Marino           != (unsigned) group_size)
1150e4b17023SJohn Marino         return false;
1151e4b17023SJohn Marino 
1152e4b17023SJohn Marino       stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1153e4b17023SJohn Marino       if (is_gimple_assign (stmt)
1154e4b17023SJohn Marino           && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1155e4b17023SJohn Marino               || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1156e4b17023SJohn Marino         complex_numbers++;
1157e4b17023SJohn Marino     }
1158e4b17023SJohn Marino 
1159e4b17023SJohn Marino   /* Complex operands can be swapped as following:
1160e4b17023SJohn Marino       real_c = real_b + real_a;
1161e4b17023SJohn Marino       imag_c = imag_a + imag_b;
1162e4b17023SJohn Marino      i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1163e4b17023SJohn Marino      {real_a, imag_a} and {real_b, imag_b}.  We check here that if interleaving
1164e4b17023SJohn Marino      chains are mixed, they match the above pattern.  */
1165e4b17023SJohn Marino   if (complex_numbers)
1166e4b17023SJohn Marino     {
1167e4b17023SJohn Marino       FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1168e4b17023SJohn Marino         {
1169e4b17023SJohn Marino 	  FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1170e4b17023SJohn Marino             {
1171e4b17023SJohn Marino               if (j == 0)
1172e4b17023SJohn Marino                 first = stmt;
1173e4b17023SJohn Marino               else
1174e4b17023SJohn Marino                 {
1175e4b17023SJohn Marino                   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1176e4b17023SJohn Marino                     {
1177e4b17023SJohn Marino                       if (complex_numbers != 2)
1178e4b17023SJohn Marino                         return false;
1179e4b17023SJohn Marino 
1180e4b17023SJohn Marino                       if (i == 0)
1181e4b17023SJohn Marino                         k = 1;
1182e4b17023SJohn Marino                       else
1183e4b17023SJohn Marino                         k = 0;
1184e4b17023SJohn Marino 
1185e4b17023SJohn Marino                       other_complex_node = VEC_index (slp_tree,
1186e4b17023SJohn Marino                                             SLP_INSTANCE_LOADS (slp_instn), k);
1187e4b17023SJohn Marino                       other_node_first = VEC_index (gimple,
1188e4b17023SJohn Marino                                 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1189e4b17023SJohn Marino 
1190e4b17023SJohn Marino                       if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1191e4b17023SJohn Marino                           != other_node_first)
1192e4b17023SJohn Marino                        return false;
1193e4b17023SJohn Marino                     }
1194e4b17023SJohn Marino                 }
1195e4b17023SJohn Marino             }
1196e4b17023SJohn Marino         }
1197e4b17023SJohn Marino     }
1198e4b17023SJohn Marino 
1199e4b17023SJohn Marino   /* We checked that this case ok, so there is no need to proceed with
1200e4b17023SJohn Marino      permutation tests.  */
1201e4b17023SJohn Marino   if (complex_numbers == 2
1202e4b17023SJohn Marino       && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
1203e4b17023SJohn Marino     {
1204e4b17023SJohn Marino       VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1205e4b17023SJohn Marino       VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1206e4b17023SJohn Marino       return true;
1207e4b17023SJohn Marino     }
1208e4b17023SJohn Marino 
1209e4b17023SJohn Marino   node = SLP_INSTANCE_TREE (slp_instn);
1210e4b17023SJohn Marino   stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1211e4b17023SJohn Marino   /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1212e4b17023SJohn Marino      instance, not all the loads belong to the same node or interleaving
1213e4b17023SJohn Marino      group.  Hence, we need to divide them into groups according to
1214e4b17023SJohn Marino      GROUP_SIZE.  */
1215e4b17023SJohn Marino   number_of_groups = VEC_length (int, load_permutation) / group_size;
1216e4b17023SJohn Marino 
1217e4b17023SJohn Marino   /* Reduction (there are no data-refs in the root).
1218e4b17023SJohn Marino      In reduction chain the order of the loads is important.  */
1219e4b17023SJohn Marino   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1220e4b17023SJohn Marino       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1221e4b17023SJohn Marino     {
1222e4b17023SJohn Marino       int first_group_load_index;
1223e4b17023SJohn Marino 
1224e4b17023SJohn Marino       /* Compare all the permutation sequences to the first one.  */
1225e4b17023SJohn Marino       for (i = 1; i < number_of_groups; i++)
1226e4b17023SJohn Marino         {
1227e4b17023SJohn Marino           k = 0;
1228e4b17023SJohn Marino           for (j = i * group_size; j < i * group_size + group_size; j++)
1229e4b17023SJohn Marino             {
1230e4b17023SJohn Marino               next = VEC_index (int, load_permutation, j);
1231e4b17023SJohn Marino               first_group_load_index = VEC_index (int, load_permutation, k);
1232e4b17023SJohn Marino 
1233e4b17023SJohn Marino               if (next != first_group_load_index)
1234e4b17023SJohn Marino                 {
1235e4b17023SJohn Marino                   bad_permutation = true;
1236e4b17023SJohn Marino                   break;
1237e4b17023SJohn Marino                 }
1238e4b17023SJohn Marino 
1239e4b17023SJohn Marino               k++;
1240e4b17023SJohn Marino             }
1241e4b17023SJohn Marino 
1242e4b17023SJohn Marino           if (bad_permutation)
1243e4b17023SJohn Marino             break;
1244e4b17023SJohn Marino         }
1245e4b17023SJohn Marino 
1246e4b17023SJohn Marino       if (!bad_permutation)
1247e4b17023SJohn Marino         {
1248e4b17023SJohn Marino           /* Check that the loads in the first sequence are different and there
1249e4b17023SJohn Marino              are no gaps between them.  */
1250e4b17023SJohn Marino           load_index = sbitmap_alloc (group_size);
1251e4b17023SJohn Marino           sbitmap_zero (load_index);
1252e4b17023SJohn Marino           for (k = 0; k < group_size; k++)
1253e4b17023SJohn Marino             {
1254e4b17023SJohn Marino               first_group_load_index = VEC_index (int, load_permutation, k);
1255e4b17023SJohn Marino               if (TEST_BIT (load_index, first_group_load_index))
1256e4b17023SJohn Marino                 {
1257e4b17023SJohn Marino                   bad_permutation = true;
1258e4b17023SJohn Marino                   break;
1259e4b17023SJohn Marino                 }
1260e4b17023SJohn Marino 
1261e4b17023SJohn Marino               SET_BIT (load_index, first_group_load_index);
1262e4b17023SJohn Marino             }
1263e4b17023SJohn Marino 
1264e4b17023SJohn Marino           if (!bad_permutation)
1265e4b17023SJohn Marino             for (k = 0; k < group_size; k++)
1266e4b17023SJohn Marino               if (!TEST_BIT (load_index, k))
1267e4b17023SJohn Marino                 {
1268e4b17023SJohn Marino                   bad_permutation = true;
1269e4b17023SJohn Marino                   break;
1270e4b17023SJohn Marino                 }
1271e4b17023SJohn Marino 
1272e4b17023SJohn Marino           sbitmap_free (load_index);
1273e4b17023SJohn Marino         }
1274e4b17023SJohn Marino 
1275e4b17023SJohn Marino       if (!bad_permutation)
1276e4b17023SJohn Marino         {
1277e4b17023SJohn Marino           /* This permutation is valid for reduction.  Since the order of the
1278e4b17023SJohn Marino              statements in the nodes is not important unless they are memory
1279e4b17023SJohn Marino              accesses, we can rearrange the statements in all the nodes
1280e4b17023SJohn Marino              according to the order of the loads.  */
1281e4b17023SJohn Marino           vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1282e4b17023SJohn Marino                                     load_permutation);
1283e4b17023SJohn Marino           VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1284e4b17023SJohn Marino           return true;
1285e4b17023SJohn Marino         }
1286e4b17023SJohn Marino     }
1287e4b17023SJohn Marino 
1288e4b17023SJohn Marino   /* In basic block vectorization we allow any subchain of an interleaving
1289e4b17023SJohn Marino      chain.
1290e4b17023SJohn Marino      FORNOW: not supported in loop SLP because of realignment compications.  */
1291e4b17023SJohn Marino   bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1292e4b17023SJohn Marino   bad_permutation = false;
1293e4b17023SJohn Marino   /* Check that for every node in the instance teh loads form a subchain.  */
1294e4b17023SJohn Marino   if (bb_vinfo)
1295e4b17023SJohn Marino     {
1296e4b17023SJohn Marino       FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1297e4b17023SJohn Marino         {
1298e4b17023SJohn Marino           next_load = NULL;
1299e4b17023SJohn Marino           first_load = NULL;
1300e4b17023SJohn Marino           FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1301e4b17023SJohn Marino             {
1302e4b17023SJohn Marino               if (!first_load)
1303e4b17023SJohn Marino                 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1304e4b17023SJohn Marino               else if (first_load
1305e4b17023SJohn Marino                          != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1306e4b17023SJohn Marino                 {
1307e4b17023SJohn Marino                   bad_permutation = true;
1308e4b17023SJohn Marino 	          break;
1309e4b17023SJohn Marino 	        }
1310e4b17023SJohn Marino 
1311e4b17023SJohn Marino               if (j != 0 && next_load != load)
1312e4b17023SJohn Marino                 {
1313e4b17023SJohn Marino                   bad_permutation = true;
1314e4b17023SJohn Marino                   break;
1315e4b17023SJohn Marino                 }
1316e4b17023SJohn Marino 
1317e4b17023SJohn Marino               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1318e4b17023SJohn Marino             }
1319e4b17023SJohn Marino 
1320e4b17023SJohn Marino           if (bad_permutation)
1321e4b17023SJohn Marino             break;
1322e4b17023SJohn Marino         }
1323e4b17023SJohn Marino 
1324e4b17023SJohn Marino       /* Check that the alignment of the first load in every subchain, i.e.,
1325e4b17023SJohn Marino          the first statement in every load node, is supported.  */
1326e4b17023SJohn Marino       if (!bad_permutation)
1327e4b17023SJohn Marino         {
1328e4b17023SJohn Marino           FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1329e4b17023SJohn Marino             {
1330e4b17023SJohn Marino               first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1331e4b17023SJohn Marino               if (first_load
1332e4b17023SJohn Marino                     != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1333e4b17023SJohn Marino                 {
1334e4b17023SJohn Marino                   dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1335e4b17023SJohn Marino                   if (vect_supportable_dr_alignment (dr, false)
1336e4b17023SJohn Marino  	               == dr_unaligned_unsupported)
1337e4b17023SJohn Marino                     {
1338e4b17023SJohn Marino    		      if (vect_print_dump_info (REPORT_SLP))
1339e4b17023SJohn Marino 		        {
1340e4b17023SJohn Marino   	                  fprintf (vect_dump, "unsupported unaligned load ");
1341e4b17023SJohn Marino                           print_gimple_stmt (vect_dump, first_load, 0,
1342e4b17023SJohn Marino 					     TDF_SLIM);
1343e4b17023SJohn Marino                         }
1344e4b17023SJohn Marino   		      bad_permutation = true;
1345e4b17023SJohn Marino                       break;
1346e4b17023SJohn Marino                     }
1347e4b17023SJohn Marino 	        }
1348e4b17023SJohn Marino             }
1349e4b17023SJohn Marino 
1350e4b17023SJohn Marino           if (!bad_permutation)
1351e4b17023SJohn Marino             {
1352e4b17023SJohn Marino               VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1353e4b17023SJohn Marino               return true;
1354e4b17023SJohn Marino     	    }
1355e4b17023SJohn Marino         }
1356e4b17023SJohn Marino     }
1357e4b17023SJohn Marino 
1358e4b17023SJohn Marino   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1359e4b17023SJohn Marino      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1360e4b17023SJohn Marino      well (unless it's reduction).  */
1361e4b17023SJohn Marino   if (VEC_length (int, load_permutation)
1362e4b17023SJohn Marino       != (unsigned int) (group_size * group_size))
1363e4b17023SJohn Marino     return false;
1364e4b17023SJohn Marino 
1365e4b17023SJohn Marino   supported = true;
1366e4b17023SJohn Marino   load_index = sbitmap_alloc (group_size);
1367e4b17023SJohn Marino   sbitmap_zero (load_index);
1368e4b17023SJohn Marino   for (j = 0; j < group_size; j++)
1369e4b17023SJohn Marino     {
1370e4b17023SJohn Marino       for (i = j * group_size, k = 0;
1371e4b17023SJohn Marino            VEC_iterate (int, load_permutation, i, next) && k < group_size;
1372e4b17023SJohn Marino            i++, k++)
1373e4b17023SJohn Marino        {
1374e4b17023SJohn Marino          if (i != j * group_size && next != prev)
1375e4b17023SJohn Marino           {
1376e4b17023SJohn Marino             supported = false;
1377e4b17023SJohn Marino             break;
1378e4b17023SJohn Marino           }
1379e4b17023SJohn Marino 
1380e4b17023SJohn Marino          prev = next;
1381e4b17023SJohn Marino        }
1382e4b17023SJohn Marino 
1383e4b17023SJohn Marino       if (TEST_BIT (load_index, prev))
1384e4b17023SJohn Marino         {
1385e4b17023SJohn Marino           supported = false;
1386e4b17023SJohn Marino           break;
1387e4b17023SJohn Marino         }
1388e4b17023SJohn Marino 
1389e4b17023SJohn Marino       SET_BIT (load_index, prev);
1390e4b17023SJohn Marino     }
1391e4b17023SJohn Marino 
1392e4b17023SJohn Marino   for (j = 0; j < group_size; j++)
1393e4b17023SJohn Marino     if (!TEST_BIT (load_index, j))
1394e4b17023SJohn Marino       return false;
1395e4b17023SJohn Marino 
1396e4b17023SJohn Marino   sbitmap_free (load_index);
1397e4b17023SJohn Marino 
1398e4b17023SJohn Marino   if (supported && i == group_size * group_size
1399e4b17023SJohn Marino       && vect_supported_slp_permutation_p (slp_instn))
1400e4b17023SJohn Marino     return true;
1401e4b17023SJohn Marino 
1402e4b17023SJohn Marino   return false;
1403e4b17023SJohn Marino }
1404e4b17023SJohn Marino 
1405e4b17023SJohn Marino 
1406e4b17023SJohn Marino /* Find the first load in the loop that belongs to INSTANCE.
1407e4b17023SJohn Marino    When loads are in several SLP nodes, there can be a case in which the first
1408e4b17023SJohn Marino    load does not appear in the first SLP node to be transformed, causing
1409e4b17023SJohn Marino    incorrect order of statements.  Since we generate all the loads together,
1410e4b17023SJohn Marino    they must be inserted before the first load of the SLP instance and not
1411e4b17023SJohn Marino    before the first load of the first node of the instance.  */
1412e4b17023SJohn Marino 
1413e4b17023SJohn Marino static gimple
vect_find_first_load_in_slp_instance(slp_instance instance)1414e4b17023SJohn Marino vect_find_first_load_in_slp_instance (slp_instance instance)
1415e4b17023SJohn Marino {
1416e4b17023SJohn Marino   int i, j;
1417e4b17023SJohn Marino   slp_tree load_node;
1418e4b17023SJohn Marino   gimple first_load = NULL, load;
1419e4b17023SJohn Marino 
1420e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1421e4b17023SJohn Marino     FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1422e4b17023SJohn Marino       first_load = get_earlier_stmt (load, first_load);
1423e4b17023SJohn Marino 
1424e4b17023SJohn Marino   return first_load;
1425e4b17023SJohn Marino }
1426e4b17023SJohn Marino 
1427e4b17023SJohn Marino 
1428e4b17023SJohn Marino /* Find the last store in SLP INSTANCE.  */
1429e4b17023SJohn Marino 
1430e4b17023SJohn Marino static gimple
vect_find_last_store_in_slp_instance(slp_instance instance)1431e4b17023SJohn Marino vect_find_last_store_in_slp_instance (slp_instance instance)
1432e4b17023SJohn Marino {
1433e4b17023SJohn Marino   int i;
1434e4b17023SJohn Marino   slp_tree node;
1435e4b17023SJohn Marino   gimple last_store = NULL, store;
1436e4b17023SJohn Marino 
1437e4b17023SJohn Marino   node = SLP_INSTANCE_TREE (instance);
1438e4b17023SJohn Marino   for (i = 0;
1439e4b17023SJohn Marino        VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1440e4b17023SJohn Marino        i++)
1441e4b17023SJohn Marino     last_store = get_later_stmt (store, last_store);
1442e4b17023SJohn Marino 
1443e4b17023SJohn Marino   return last_store;
1444e4b17023SJohn Marino }
1445e4b17023SJohn Marino 
1446e4b17023SJohn Marino 
1447e4b17023SJohn Marino /* Analyze an SLP instance starting from a group of strided stores.  Call
1448e4b17023SJohn Marino    vect_build_slp_tree to build a tree of packed stmts if possible.
1449e4b17023SJohn Marino    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1450e4b17023SJohn Marino 
1451e4b17023SJohn Marino static bool
vect_analyze_slp_instance(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,gimple stmt)1452e4b17023SJohn Marino vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1453e4b17023SJohn Marino                            gimple stmt)
1454e4b17023SJohn Marino {
1455e4b17023SJohn Marino   slp_instance new_instance;
1456e4b17023SJohn Marino   slp_tree node;
1457e4b17023SJohn Marino   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1458e4b17023SJohn Marino   unsigned int unrolling_factor = 1, nunits;
1459e4b17023SJohn Marino   tree vectype, scalar_type = NULL_TREE;
1460e4b17023SJohn Marino   gimple next;
1461e4b17023SJohn Marino   unsigned int vectorization_factor = 0;
1462e4b17023SJohn Marino   int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1463e4b17023SJohn Marino   unsigned int max_nunits = 0;
1464e4b17023SJohn Marino   VEC (int, heap) *load_permutation;
1465e4b17023SJohn Marino   VEC (slp_tree, heap) *loads;
1466e4b17023SJohn Marino   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1467e4b17023SJohn Marino   bool loads_permuted = false;
1468e4b17023SJohn Marino   VEC (gimple, heap) *scalar_stmts;
1469e4b17023SJohn Marino 
1470e4b17023SJohn Marino   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1471e4b17023SJohn Marino     {
1472e4b17023SJohn Marino       if (dr)
1473e4b17023SJohn Marino         {
1474e4b17023SJohn Marino           scalar_type = TREE_TYPE (DR_REF (dr));
1475e4b17023SJohn Marino           vectype = get_vectype_for_scalar_type (scalar_type);
1476e4b17023SJohn Marino         }
1477e4b17023SJohn Marino       else
1478e4b17023SJohn Marino         {
1479e4b17023SJohn Marino           gcc_assert (loop_vinfo);
1480e4b17023SJohn Marino           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1481e4b17023SJohn Marino         }
1482e4b17023SJohn Marino 
1483e4b17023SJohn Marino       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1484e4b17023SJohn Marino     }
1485e4b17023SJohn Marino   else
1486e4b17023SJohn Marino     {
1487e4b17023SJohn Marino       gcc_assert (loop_vinfo);
1488e4b17023SJohn Marino       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1489e4b17023SJohn Marino       group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1490e4b17023SJohn Marino     }
1491e4b17023SJohn Marino 
1492e4b17023SJohn Marino   if (!vectype)
1493e4b17023SJohn Marino     {
1494e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_SLP))
1495e4b17023SJohn Marino         {
1496e4b17023SJohn Marino           fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1497e4b17023SJohn Marino           print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1498e4b17023SJohn Marino         }
1499e4b17023SJohn Marino 
1500e4b17023SJohn Marino       return false;
1501e4b17023SJohn Marino     }
1502e4b17023SJohn Marino 
1503e4b17023SJohn Marino   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1504e4b17023SJohn Marino   if (loop_vinfo)
1505e4b17023SJohn Marino     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1506e4b17023SJohn Marino   else
1507e4b17023SJohn Marino     vectorization_factor = nunits;
1508e4b17023SJohn Marino 
1509e4b17023SJohn Marino   /* Calculate the unrolling factor.  */
1510e4b17023SJohn Marino   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1511e4b17023SJohn Marino   if (unrolling_factor != 1 && !loop_vinfo)
1512e4b17023SJohn Marino     {
1513e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_SLP))
1514e4b17023SJohn Marino         fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1515e4b17023SJohn Marino                             " block SLP");
1516e4b17023SJohn Marino 
1517e4b17023SJohn Marino       return false;
1518e4b17023SJohn Marino     }
1519e4b17023SJohn Marino 
1520e4b17023SJohn Marino   /* Create a node (a root of the SLP tree) for the packed strided stores.  */
1521e4b17023SJohn Marino   scalar_stmts = VEC_alloc (gimple, heap, group_size);
1522e4b17023SJohn Marino   next = stmt;
1523e4b17023SJohn Marino   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1524e4b17023SJohn Marino     {
1525e4b17023SJohn Marino       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1526e4b17023SJohn Marino       while (next)
1527e4b17023SJohn Marino         {
1528e4b17023SJohn Marino 	  if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1529e4b17023SJohn Marino 	      && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1530e4b17023SJohn Marino 	    VEC_safe_push (gimple, heap, scalar_stmts,
1531e4b17023SJohn Marino 			STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1532e4b17023SJohn Marino 	  else
1533e4b17023SJohn Marino             VEC_safe_push (gimple, heap, scalar_stmts, next);
1534e4b17023SJohn Marino           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1535e4b17023SJohn Marino         }
1536e4b17023SJohn Marino     }
1537e4b17023SJohn Marino   else
1538e4b17023SJohn Marino     {
1539e4b17023SJohn Marino       /* Collect reduction statements.  */
1540e4b17023SJohn Marino       VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1541e4b17023SJohn Marino       for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1542e4b17023SJohn Marino 	VEC_safe_push (gimple, heap, scalar_stmts, next);
1543e4b17023SJohn Marino     }
1544e4b17023SJohn Marino 
1545e4b17023SJohn Marino   node = vect_create_new_slp_node (scalar_stmts);
1546e4b17023SJohn Marino 
1547e4b17023SJohn Marino   /* Calculate the number of vector stmts to create based on the unrolling
1548e4b17023SJohn Marino      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1549e4b17023SJohn Marino      GROUP_SIZE / NUNITS otherwise.  */
1550e4b17023SJohn Marino   ncopies_for_cost = unrolling_factor * group_size / nunits;
1551e4b17023SJohn Marino 
1552e4b17023SJohn Marino   load_permutation = VEC_alloc (int, heap, group_size * group_size);
1553e4b17023SJohn Marino   loads = VEC_alloc (slp_tree, heap, group_size);
1554e4b17023SJohn Marino 
1555e4b17023SJohn Marino   /* Build the tree for the SLP instance.  */
1556e4b17023SJohn Marino   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1557e4b17023SJohn Marino                            &inside_cost, &outside_cost, ncopies_for_cost,
1558e4b17023SJohn Marino 			   &max_nunits, &load_permutation, &loads,
1559e4b17023SJohn Marino 			   vectorization_factor, &loads_permuted))
1560e4b17023SJohn Marino     {
1561e4b17023SJohn Marino       /* Calculate the unrolling factor based on the smallest type.  */
1562e4b17023SJohn Marino       if (max_nunits > nunits)
1563e4b17023SJohn Marino         unrolling_factor = least_common_multiple (max_nunits, group_size)
1564e4b17023SJohn Marino                            / group_size;
1565e4b17023SJohn Marino 
1566e4b17023SJohn Marino       if (unrolling_factor != 1 && !loop_vinfo)
1567e4b17023SJohn Marino         {
1568e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_SLP))
1569e4b17023SJohn Marino             fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1570e4b17023SJohn Marino                                " block SLP");
1571e4b17023SJohn Marino 	  vect_free_slp_tree (node);
1572e4b17023SJohn Marino 	  VEC_free (int, heap, load_permutation);
1573e4b17023SJohn Marino 	  VEC_free (slp_tree, heap, loads);
1574e4b17023SJohn Marino           return false;
1575e4b17023SJohn Marino         }
1576e4b17023SJohn Marino 
1577e4b17023SJohn Marino       /* Create a new SLP instance.  */
1578e4b17023SJohn Marino       new_instance = XNEW (struct _slp_instance);
1579e4b17023SJohn Marino       SLP_INSTANCE_TREE (new_instance) = node;
1580e4b17023SJohn Marino       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1581e4b17023SJohn Marino       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1582e4b17023SJohn Marino       SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1583e4b17023SJohn Marino       SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1584e4b17023SJohn Marino       SLP_INSTANCE_LOADS (new_instance) = loads;
1585e4b17023SJohn Marino       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1586e4b17023SJohn Marino       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1587e4b17023SJohn Marino 
1588e4b17023SJohn Marino       if (loads_permuted)
1589e4b17023SJohn Marino         {
1590e4b17023SJohn Marino           if (!vect_supported_load_permutation_p (new_instance, group_size,
1591e4b17023SJohn Marino                                                   load_permutation))
1592e4b17023SJohn Marino             {
1593e4b17023SJohn Marino               if (vect_print_dump_info (REPORT_SLP))
1594e4b17023SJohn Marino                 {
1595e4b17023SJohn Marino                   fprintf (vect_dump, "Build SLP failed: unsupported load "
1596e4b17023SJohn Marino                                       "permutation ");
1597e4b17023SJohn Marino                   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1598e4b17023SJohn Marino                 }
1599e4b17023SJohn Marino 
1600e4b17023SJohn Marino               vect_free_slp_instance (new_instance);
1601e4b17023SJohn Marino               return false;
1602e4b17023SJohn Marino             }
1603e4b17023SJohn Marino 
1604e4b17023SJohn Marino           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1605e4b17023SJohn Marino              = vect_find_first_load_in_slp_instance (new_instance);
1606e4b17023SJohn Marino         }
1607e4b17023SJohn Marino       else
1608e4b17023SJohn Marino         VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1609e4b17023SJohn Marino 
1610e4b17023SJohn Marino       if (loop_vinfo)
1611e4b17023SJohn Marino         VEC_safe_push (slp_instance, heap,
1612e4b17023SJohn Marino                        LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1613e4b17023SJohn Marino   		       new_instance);
1614e4b17023SJohn Marino       else
1615e4b17023SJohn Marino         VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1616e4b17023SJohn Marino                        new_instance);
1617e4b17023SJohn Marino 
1618e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_SLP))
1619e4b17023SJohn Marino 	vect_print_slp_tree (node);
1620e4b17023SJohn Marino 
1621e4b17023SJohn Marino       return true;
1622e4b17023SJohn Marino     }
1623e4b17023SJohn Marino 
1624e4b17023SJohn Marino   /* Failed to SLP.  */
1625e4b17023SJohn Marino   /* Free the allocated memory.  */
1626e4b17023SJohn Marino   vect_free_slp_tree (node);
1627e4b17023SJohn Marino   VEC_free (int, heap, load_permutation);
1628e4b17023SJohn Marino   VEC_free (slp_tree, heap, loads);
1629e4b17023SJohn Marino 
1630e4b17023SJohn Marino   return false;
1631e4b17023SJohn Marino }
1632e4b17023SJohn Marino 
1633e4b17023SJohn Marino 
1634e4b17023SJohn Marino /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1635e4b17023SJohn Marino    trees of packed scalar stmts if SLP is possible.  */
1636e4b17023SJohn Marino 
1637e4b17023SJohn Marino bool
vect_analyze_slp(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)1638e4b17023SJohn Marino vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1639e4b17023SJohn Marino {
1640e4b17023SJohn Marino   unsigned int i;
1641e4b17023SJohn Marino   VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1642e4b17023SJohn Marino   gimple first_element;
1643e4b17023SJohn Marino   bool ok = false;
1644e4b17023SJohn Marino 
1645e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_SLP))
1646e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_analyze_slp ===");
1647e4b17023SJohn Marino 
1648e4b17023SJohn Marino   if (loop_vinfo)
1649e4b17023SJohn Marino     {
1650e4b17023SJohn Marino       strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1651e4b17023SJohn Marino       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1652e4b17023SJohn Marino       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1653e4b17023SJohn Marino     }
1654e4b17023SJohn Marino   else
1655e4b17023SJohn Marino     strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1656e4b17023SJohn Marino 
1657e4b17023SJohn Marino   /* Find SLP sequences starting from groups of strided stores.  */
1658e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1659e4b17023SJohn Marino     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1660e4b17023SJohn Marino       ok = true;
1661e4b17023SJohn Marino 
1662e4b17023SJohn Marino   if (bb_vinfo && !ok)
1663e4b17023SJohn Marino     {
1664e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_SLP))
1665e4b17023SJohn Marino         fprintf (vect_dump, "Failed to SLP the basic block.");
1666e4b17023SJohn Marino 
1667e4b17023SJohn Marino       return false;
1668e4b17023SJohn Marino     }
1669e4b17023SJohn Marino 
1670e4b17023SJohn Marino   if (loop_vinfo
1671e4b17023SJohn Marino       && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1672e4b17023SJohn Marino     {
1673e4b17023SJohn Marino       /* Find SLP sequences starting from reduction chains.  */
1674e4b17023SJohn Marino       FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1675e4b17023SJohn Marino         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1676e4b17023SJohn Marino           ok = true;
1677e4b17023SJohn Marino         else
1678e4b17023SJohn Marino           return false;
1679e4b17023SJohn Marino 
1680e4b17023SJohn Marino       /* Don't try to vectorize SLP reductions if reduction chain was
1681e4b17023SJohn Marino          detected.  */
1682e4b17023SJohn Marino       return ok;
1683e4b17023SJohn Marino     }
1684e4b17023SJohn Marino 
1685e4b17023SJohn Marino   /* Find SLP sequences starting from groups of reductions.  */
1686e4b17023SJohn Marino   if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1687e4b17023SJohn Marino       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1688e4b17023SJohn Marino                                     VEC_index (gimple, reductions, 0)))
1689e4b17023SJohn Marino     ok = true;
1690e4b17023SJohn Marino 
1691e4b17023SJohn Marino   return true;
1692e4b17023SJohn Marino }
1693e4b17023SJohn Marino 
1694e4b17023SJohn Marino 
1695e4b17023SJohn Marino /* For each possible SLP instance decide whether to SLP it and calculate overall
1696e4b17023SJohn Marino    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1697e4b17023SJohn Marino    least one instance.  */
1698e4b17023SJohn Marino 
1699e4b17023SJohn Marino bool
vect_make_slp_decision(loop_vec_info loop_vinfo)1700e4b17023SJohn Marino vect_make_slp_decision (loop_vec_info loop_vinfo)
1701e4b17023SJohn Marino {
1702e4b17023SJohn Marino   unsigned int i, unrolling_factor = 1;
1703e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1704e4b17023SJohn Marino   slp_instance instance;
1705e4b17023SJohn Marino   int decided_to_slp = 0;
1706e4b17023SJohn Marino 
1707e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_SLP))
1708e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_make_slp_decision ===");
1709e4b17023SJohn Marino 
1710e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1711e4b17023SJohn Marino     {
1712e4b17023SJohn Marino       /* FORNOW: SLP if you can.  */
1713e4b17023SJohn Marino       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1714e4b17023SJohn Marino 	unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1715e4b17023SJohn Marino 
1716e4b17023SJohn Marino       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1717e4b17023SJohn Marino 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1718e4b17023SJohn Marino 	 loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1719e4b17023SJohn Marino       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1720e4b17023SJohn Marino       decided_to_slp++;
1721e4b17023SJohn Marino     }
1722e4b17023SJohn Marino 
1723e4b17023SJohn Marino   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1724e4b17023SJohn Marino 
1725e4b17023SJohn Marino   if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1726e4b17023SJohn Marino     fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1727e4b17023SJohn Marino 	     decided_to_slp, unrolling_factor);
1728e4b17023SJohn Marino 
1729e4b17023SJohn Marino   return (decided_to_slp > 0);
1730e4b17023SJohn Marino }
1731e4b17023SJohn Marino 
1732e4b17023SJohn Marino 
1733e4b17023SJohn Marino /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1734e4b17023SJohn Marino    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1735e4b17023SJohn Marino 
1736e4b17023SJohn Marino static void
vect_detect_hybrid_slp_stmts(slp_tree node)1737e4b17023SJohn Marino vect_detect_hybrid_slp_stmts (slp_tree node)
1738e4b17023SJohn Marino {
1739e4b17023SJohn Marino   int i;
1740e4b17023SJohn Marino   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1741e4b17023SJohn Marino   gimple stmt = VEC_index (gimple, stmts, 0);
1742e4b17023SJohn Marino   imm_use_iterator imm_iter;
1743e4b17023SJohn Marino   gimple use_stmt;
1744e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1745e4b17023SJohn Marino   slp_void_p child;
1746e4b17023SJohn Marino   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1747e4b17023SJohn Marino   struct loop *loop = NULL;
1748e4b17023SJohn Marino   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1749e4b17023SJohn Marino   basic_block bb = NULL;
1750e4b17023SJohn Marino 
1751e4b17023SJohn Marino   if (!node)
1752e4b17023SJohn Marino     return;
1753e4b17023SJohn Marino 
1754e4b17023SJohn Marino   if (loop_vinfo)
1755e4b17023SJohn Marino     loop = LOOP_VINFO_LOOP (loop_vinfo);
1756e4b17023SJohn Marino   else
1757e4b17023SJohn Marino     bb = BB_VINFO_BB (bb_vinfo);
1758e4b17023SJohn Marino 
1759e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1760e4b17023SJohn Marino     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1761e4b17023SJohn Marino 	&& TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1762e4b17023SJohn Marino       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1763e4b17023SJohn Marino 	if (gimple_bb (use_stmt)
1764e4b17023SJohn Marino             && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1765e4b17023SJohn Marino 		 || bb == gimple_bb (use_stmt))
1766e4b17023SJohn Marino 	    && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1767e4b17023SJohn Marino 	    && !STMT_SLP_TYPE (stmt_vinfo)
1768e4b17023SJohn Marino             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1769e4b17023SJohn Marino                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1770e4b17023SJohn Marino 	    && !(gimple_code (use_stmt) == GIMPLE_PHI
1771e4b17023SJohn Marino                  && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1772e4b17023SJohn Marino                   == vect_reduction_def))
1773e4b17023SJohn Marino 	  vect_mark_slp_stmts (node, hybrid, i);
1774e4b17023SJohn Marino 
1775e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1776e4b17023SJohn Marino     vect_detect_hybrid_slp_stmts ((slp_tree) child);
1777e4b17023SJohn Marino }
1778e4b17023SJohn Marino 
1779e4b17023SJohn Marino 
1780e4b17023SJohn Marino /* Find stmts that must be both vectorized and SLPed.  */
1781e4b17023SJohn Marino 
1782e4b17023SJohn Marino void
vect_detect_hybrid_slp(loop_vec_info loop_vinfo)1783e4b17023SJohn Marino vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1784e4b17023SJohn Marino {
1785e4b17023SJohn Marino   unsigned int i;
1786e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1787e4b17023SJohn Marino   slp_instance instance;
1788e4b17023SJohn Marino 
1789e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_SLP))
1790e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1791e4b17023SJohn Marino 
1792e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1793e4b17023SJohn Marino     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1794e4b17023SJohn Marino }
1795e4b17023SJohn Marino 
1796e4b17023SJohn Marino 
1797e4b17023SJohn Marino /* Create and initialize a new bb_vec_info struct for BB, as well as
1798e4b17023SJohn Marino    stmt_vec_info structs for all the stmts in it.  */
1799e4b17023SJohn Marino 
1800e4b17023SJohn Marino static bb_vec_info
new_bb_vec_info(basic_block bb)1801e4b17023SJohn Marino new_bb_vec_info (basic_block bb)
1802e4b17023SJohn Marino {
1803e4b17023SJohn Marino   bb_vec_info res = NULL;
1804e4b17023SJohn Marino   gimple_stmt_iterator gsi;
1805e4b17023SJohn Marino 
1806e4b17023SJohn Marino   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1807e4b17023SJohn Marino   BB_VINFO_BB (res) = bb;
1808e4b17023SJohn Marino 
1809e4b17023SJohn Marino   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1810e4b17023SJohn Marino     {
1811e4b17023SJohn Marino       gimple stmt = gsi_stmt (gsi);
1812e4b17023SJohn Marino       gimple_set_uid (stmt, 0);
1813e4b17023SJohn Marino       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1814e4b17023SJohn Marino     }
1815e4b17023SJohn Marino 
1816e4b17023SJohn Marino   BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1817e4b17023SJohn Marino   BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1818e4b17023SJohn Marino 
1819e4b17023SJohn Marino   bb->aux = res;
1820e4b17023SJohn Marino   return res;
1821e4b17023SJohn Marino }
1822e4b17023SJohn Marino 
1823e4b17023SJohn Marino 
1824e4b17023SJohn Marino /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1825e4b17023SJohn Marino    stmts in the basic block.  */
1826e4b17023SJohn Marino 
1827e4b17023SJohn Marino static void
destroy_bb_vec_info(bb_vec_info bb_vinfo)1828e4b17023SJohn Marino destroy_bb_vec_info (bb_vec_info bb_vinfo)
1829e4b17023SJohn Marino {
1830e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances;
1831e4b17023SJohn Marino   slp_instance instance;
1832e4b17023SJohn Marino   basic_block bb;
1833e4b17023SJohn Marino   gimple_stmt_iterator si;
1834e4b17023SJohn Marino   unsigned i;
1835e4b17023SJohn Marino 
1836e4b17023SJohn Marino   if (!bb_vinfo)
1837e4b17023SJohn Marino     return;
1838e4b17023SJohn Marino 
1839e4b17023SJohn Marino   bb = BB_VINFO_BB (bb_vinfo);
1840e4b17023SJohn Marino 
1841e4b17023SJohn Marino   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1842e4b17023SJohn Marino     {
1843e4b17023SJohn Marino       gimple stmt = gsi_stmt (si);
1844e4b17023SJohn Marino       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1845e4b17023SJohn Marino 
1846e4b17023SJohn Marino       if (stmt_info)
1847e4b17023SJohn Marino         /* Free stmt_vec_info.  */
1848e4b17023SJohn Marino         free_stmt_vec_info (stmt);
1849e4b17023SJohn Marino     }
1850e4b17023SJohn Marino 
1851e4b17023SJohn Marino   free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1852e4b17023SJohn Marino   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1853e4b17023SJohn Marino   VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1854e4b17023SJohn Marino   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1855e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1856e4b17023SJohn Marino     vect_free_slp_instance (instance);
1857e4b17023SJohn Marino   VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1858e4b17023SJohn Marino   free (bb_vinfo);
1859e4b17023SJohn Marino   bb->aux = NULL;
1860e4b17023SJohn Marino }
1861e4b17023SJohn Marino 
1862e4b17023SJohn Marino 
1863e4b17023SJohn Marino /* Analyze statements contained in SLP tree node after recursively analyzing
1864e4b17023SJohn Marino    the subtree. Return TRUE if the operations are supported.  */
1865e4b17023SJohn Marino 
1866e4b17023SJohn Marino static bool
vect_slp_analyze_node_operations(bb_vec_info bb_vinfo,slp_tree node)1867e4b17023SJohn Marino vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1868e4b17023SJohn Marino {
1869e4b17023SJohn Marino   bool dummy;
1870e4b17023SJohn Marino   int i;
1871e4b17023SJohn Marino   gimple stmt;
1872e4b17023SJohn Marino   slp_void_p child;
1873e4b17023SJohn Marino 
1874e4b17023SJohn Marino   if (!node)
1875e4b17023SJohn Marino     return true;
1876e4b17023SJohn Marino 
1877e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1878e4b17023SJohn Marino     if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1879e4b17023SJohn Marino       return false;
1880e4b17023SJohn Marino 
1881e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1882e4b17023SJohn Marino     {
1883e4b17023SJohn Marino       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1884e4b17023SJohn Marino       gcc_assert (stmt_info);
1885e4b17023SJohn Marino       gcc_assert (PURE_SLP_STMT (stmt_info));
1886e4b17023SJohn Marino 
1887e4b17023SJohn Marino       if (!vect_analyze_stmt (stmt, &dummy, node))
1888e4b17023SJohn Marino         return false;
1889e4b17023SJohn Marino     }
1890e4b17023SJohn Marino 
1891e4b17023SJohn Marino   return true;
1892e4b17023SJohn Marino }
1893e4b17023SJohn Marino 
1894e4b17023SJohn Marino 
1895e4b17023SJohn Marino /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1896e4b17023SJohn Marino    operations are supported. */
1897e4b17023SJohn Marino 
1898e4b17023SJohn Marino static bool
vect_slp_analyze_operations(bb_vec_info bb_vinfo)1899e4b17023SJohn Marino vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1900e4b17023SJohn Marino {
1901e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1902e4b17023SJohn Marino   slp_instance instance;
1903e4b17023SJohn Marino   int i;
1904e4b17023SJohn Marino 
1905e4b17023SJohn Marino   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1906e4b17023SJohn Marino     {
1907e4b17023SJohn Marino       if (!vect_slp_analyze_node_operations (bb_vinfo,
1908e4b17023SJohn Marino                                              SLP_INSTANCE_TREE (instance)))
1909e4b17023SJohn Marino         {
1910e4b17023SJohn Marino  	  vect_free_slp_instance (instance);
1911e4b17023SJohn Marino           VEC_ordered_remove (slp_instance, slp_instances, i);
1912e4b17023SJohn Marino 	}
1913e4b17023SJohn Marino       else
1914e4b17023SJohn Marino         i++;
1915e4b17023SJohn Marino     }
1916e4b17023SJohn Marino 
1917e4b17023SJohn Marino   if (!VEC_length (slp_instance, slp_instances))
1918e4b17023SJohn Marino     return false;
1919e4b17023SJohn Marino 
1920e4b17023SJohn Marino   return true;
1921e4b17023SJohn Marino }
1922e4b17023SJohn Marino 
1923e4b17023SJohn Marino /* Check if vectorization of the basic block is profitable.  */
1924e4b17023SJohn Marino 
1925e4b17023SJohn Marino static bool
vect_bb_vectorization_profitable_p(bb_vec_info bb_vinfo)1926e4b17023SJohn Marino vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1927e4b17023SJohn Marino {
1928e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1929e4b17023SJohn Marino   slp_instance instance;
1930e4b17023SJohn Marino   int i;
1931e4b17023SJohn Marino   unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1932e4b17023SJohn Marino   unsigned int stmt_cost;
1933e4b17023SJohn Marino   gimple stmt;
1934e4b17023SJohn Marino   gimple_stmt_iterator si;
1935e4b17023SJohn Marino   basic_block bb = BB_VINFO_BB (bb_vinfo);
1936e4b17023SJohn Marino   stmt_vec_info stmt_info = NULL;
1937e4b17023SJohn Marino   tree dummy_type = NULL;
1938e4b17023SJohn Marino   int dummy = 0;
1939e4b17023SJohn Marino 
1940e4b17023SJohn Marino   /* Calculate vector costs.  */
1941e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1942e4b17023SJohn Marino     {
1943e4b17023SJohn Marino       vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1944e4b17023SJohn Marino       vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1945e4b17023SJohn Marino     }
1946e4b17023SJohn Marino 
1947e4b17023SJohn Marino   /* Calculate scalar cost.  */
1948e4b17023SJohn Marino   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1949e4b17023SJohn Marino     {
1950e4b17023SJohn Marino       stmt = gsi_stmt (si);
1951e4b17023SJohn Marino       stmt_info = vinfo_for_stmt (stmt);
1952e4b17023SJohn Marino 
1953e4b17023SJohn Marino       if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1954e4b17023SJohn Marino           || !PURE_SLP_STMT (stmt_info))
1955e4b17023SJohn Marino         continue;
1956e4b17023SJohn Marino 
1957e4b17023SJohn Marino       if (STMT_VINFO_DATA_REF (stmt_info))
1958e4b17023SJohn Marino         {
1959e4b17023SJohn Marino           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1960e4b17023SJohn Marino             stmt_cost = targetm.vectorize.builtin_vectorization_cost
1961e4b17023SJohn Marino                           (scalar_load, dummy_type, dummy);
1962e4b17023SJohn Marino           else
1963e4b17023SJohn Marino             stmt_cost = targetm.vectorize.builtin_vectorization_cost
1964e4b17023SJohn Marino                           (scalar_store, dummy_type, dummy);
1965e4b17023SJohn Marino         }
1966e4b17023SJohn Marino       else
1967e4b17023SJohn Marino         stmt_cost = targetm.vectorize.builtin_vectorization_cost
1968e4b17023SJohn Marino                       (scalar_stmt, dummy_type, dummy);
1969e4b17023SJohn Marino 
1970e4b17023SJohn Marino       scalar_cost += stmt_cost;
1971e4b17023SJohn Marino     }
1972e4b17023SJohn Marino 
1973e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_COST))
1974e4b17023SJohn Marino     {
1975e4b17023SJohn Marino       fprintf (vect_dump, "Cost model analysis: \n");
1976e4b17023SJohn Marino       fprintf (vect_dump, "  Vector inside of basic block cost: %d\n",
1977e4b17023SJohn Marino                vec_inside_cost);
1978e4b17023SJohn Marino       fprintf (vect_dump, "  Vector outside of basic block cost: %d\n",
1979e4b17023SJohn Marino                vec_outside_cost);
1980e4b17023SJohn Marino       fprintf (vect_dump, "  Scalar cost of basic block: %d", scalar_cost);
1981e4b17023SJohn Marino     }
1982e4b17023SJohn Marino 
1983e4b17023SJohn Marino   /* Vectorization is profitable if its cost is less than the cost of scalar
1984e4b17023SJohn Marino      version.  */
1985e4b17023SJohn Marino   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1986e4b17023SJohn Marino     return false;
1987e4b17023SJohn Marino 
1988e4b17023SJohn Marino   return true;
1989e4b17023SJohn Marino }
1990e4b17023SJohn Marino 
1991e4b17023SJohn Marino /* Check if the basic block can be vectorized.  */
1992e4b17023SJohn Marino 
1993e4b17023SJohn Marino static bb_vec_info
vect_slp_analyze_bb_1(basic_block bb)1994e4b17023SJohn Marino vect_slp_analyze_bb_1 (basic_block bb)
1995e4b17023SJohn Marino {
1996e4b17023SJohn Marino   bb_vec_info bb_vinfo;
1997e4b17023SJohn Marino   VEC (ddr_p, heap) *ddrs;
1998e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances;
1999e4b17023SJohn Marino   slp_instance instance;
2000e4b17023SJohn Marino   int i;
2001e4b17023SJohn Marino   int min_vf = 2;
2002e4b17023SJohn Marino   int max_vf = MAX_VECTORIZATION_FACTOR;
2003e4b17023SJohn Marino 
2004e4b17023SJohn Marino   bb_vinfo = new_bb_vec_info (bb);
2005e4b17023SJohn Marino   if (!bb_vinfo)
2006e4b17023SJohn Marino     return NULL;
2007e4b17023SJohn Marino 
2008e4b17023SJohn Marino   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2009e4b17023SJohn Marino     {
2010e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2011e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
2012e4b17023SJohn Marino                             "block.\n");
2013e4b17023SJohn Marino 
2014e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2015e4b17023SJohn Marino       return NULL;
2016e4b17023SJohn Marino     }
2017e4b17023SJohn Marino 
2018e4b17023SJohn Marino   ddrs = BB_VINFO_DDRS (bb_vinfo);
2019e4b17023SJohn Marino   if (!VEC_length (ddr_p, ddrs))
2020e4b17023SJohn Marino     {
2021e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2022e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
2023e4b17023SJohn Marino                             "block.\n");
2024e4b17023SJohn Marino 
2025e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2026e4b17023SJohn Marino       return NULL;
2027e4b17023SJohn Marino     }
2028e4b17023SJohn Marino 
2029e4b17023SJohn Marino    if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2030e4b17023SJohn Marino        || min_vf > max_vf)
2031e4b17023SJohn Marino      {
2032e4b17023SJohn Marino        if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2033e4b17023SJohn Marino 	 fprintf (vect_dump, "not vectorized: unhandled data dependence "
2034e4b17023SJohn Marino 		  "in basic block.\n");
2035e4b17023SJohn Marino 
2036e4b17023SJohn Marino        destroy_bb_vec_info (bb_vinfo);
2037e4b17023SJohn Marino        return NULL;
2038e4b17023SJohn Marino      }
2039e4b17023SJohn Marino 
2040e4b17023SJohn Marino   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2041e4b17023SJohn Marino     {
2042e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2043e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2044e4b17023SJohn Marino                             "block.\n");
2045e4b17023SJohn Marino 
2046e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2047e4b17023SJohn Marino       return NULL;
2048e4b17023SJohn Marino     }
2049e4b17023SJohn Marino 
2050e4b17023SJohn Marino   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2051e4b17023SJohn Marino     {
2052e4b17023SJohn Marino      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2053e4b17023SJohn Marino        fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2054e4b17023SJohn Marino                            "block.\n");
2055e4b17023SJohn Marino 
2056e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2057e4b17023SJohn Marino       return NULL;
2058e4b17023SJohn Marino     }
2059e4b17023SJohn Marino 
2060e4b17023SJohn Marino    if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2061e4b17023SJohn Marino     {
2062e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2063e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2064e4b17023SJohn Marino                             "block.\n");
2065e4b17023SJohn Marino 
2066e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2067e4b17023SJohn Marino       return NULL;
2068e4b17023SJohn Marino     }
2069e4b17023SJohn Marino 
2070e4b17023SJohn Marino   /* Check the SLP opportunities in the basic block, analyze and build SLP
2071e4b17023SJohn Marino      trees.  */
2072e4b17023SJohn Marino   if (!vect_analyze_slp (NULL, bb_vinfo))
2073e4b17023SJohn Marino     {
2074e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2075e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2076e4b17023SJohn Marino                             "in basic block.\n");
2077e4b17023SJohn Marino 
2078e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2079e4b17023SJohn Marino       return NULL;
2080e4b17023SJohn Marino     }
2081e4b17023SJohn Marino 
2082e4b17023SJohn Marino   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2083e4b17023SJohn Marino 
2084e4b17023SJohn Marino   /* Mark all the statements that we want to vectorize as pure SLP and
2085e4b17023SJohn Marino      relevant.  */
2086e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2087e4b17023SJohn Marino     {
2088e4b17023SJohn Marino       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2089e4b17023SJohn Marino       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2090e4b17023SJohn Marino     }
2091e4b17023SJohn Marino 
2092e4b17023SJohn Marino   if (!vect_slp_analyze_operations (bb_vinfo))
2093e4b17023SJohn Marino     {
2094e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2095e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2096e4b17023SJohn Marino 
2097e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2098e4b17023SJohn Marino       return NULL;
2099e4b17023SJohn Marino     }
2100e4b17023SJohn Marino 
2101e4b17023SJohn Marino   /* Cost model: check if the vectorization is worthwhile.  */
2102e4b17023SJohn Marino   if (flag_vect_cost_model
2103e4b17023SJohn Marino       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2104e4b17023SJohn Marino     {
2105e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2106e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: vectorization is not "
2107e4b17023SJohn Marino                             "profitable.\n");
2108e4b17023SJohn Marino 
2109e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2110e4b17023SJohn Marino       return NULL;
2111e4b17023SJohn Marino     }
2112e4b17023SJohn Marino 
2113e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
2114e4b17023SJohn Marino     fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2115e4b17023SJohn Marino 
2116e4b17023SJohn Marino   return bb_vinfo;
2117e4b17023SJohn Marino }
2118e4b17023SJohn Marino 
2119e4b17023SJohn Marino 
2120e4b17023SJohn Marino bb_vec_info
vect_slp_analyze_bb(basic_block bb)2121e4b17023SJohn Marino vect_slp_analyze_bb (basic_block bb)
2122e4b17023SJohn Marino {
2123e4b17023SJohn Marino   bb_vec_info bb_vinfo;
2124e4b17023SJohn Marino   int insns = 0;
2125e4b17023SJohn Marino   gimple_stmt_iterator gsi;
2126e4b17023SJohn Marino   unsigned int vector_sizes;
2127e4b17023SJohn Marino 
2128e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
2129e4b17023SJohn Marino     fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2130e4b17023SJohn Marino 
2131e4b17023SJohn Marino   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2132e4b17023SJohn Marino     {
2133e4b17023SJohn Marino       gimple stmt = gsi_stmt (gsi);
2134e4b17023SJohn Marino       if (!is_gimple_debug (stmt)
2135e4b17023SJohn Marino           && !gimple_nop_p (stmt)
2136e4b17023SJohn Marino           && gimple_code (stmt) != GIMPLE_LABEL)
2137e4b17023SJohn Marino         insns++;
2138e4b17023SJohn Marino     }
2139e4b17023SJohn Marino 
2140e4b17023SJohn Marino   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2141e4b17023SJohn Marino     {
2142e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2143e4b17023SJohn Marino         fprintf (vect_dump, "not vectorized: too many instructions in basic "
2144e4b17023SJohn Marino                             "block.\n");
2145e4b17023SJohn Marino 
2146e4b17023SJohn Marino       return NULL;
2147e4b17023SJohn Marino     }
2148e4b17023SJohn Marino 
2149e4b17023SJohn Marino   /* Autodetect first vector size we try.  */
2150e4b17023SJohn Marino   current_vector_size = 0;
2151e4b17023SJohn Marino   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2152e4b17023SJohn Marino 
2153e4b17023SJohn Marino   while (1)
2154e4b17023SJohn Marino     {
2155e4b17023SJohn Marino       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2156e4b17023SJohn Marino       if (bb_vinfo)
2157e4b17023SJohn Marino         return bb_vinfo;
2158e4b17023SJohn Marino 
2159e4b17023SJohn Marino       destroy_bb_vec_info (bb_vinfo);
2160e4b17023SJohn Marino 
2161e4b17023SJohn Marino       vector_sizes &= ~current_vector_size;
2162e4b17023SJohn Marino       if (vector_sizes == 0
2163e4b17023SJohn Marino           || current_vector_size == 0)
2164e4b17023SJohn Marino         return NULL;
2165e4b17023SJohn Marino 
2166e4b17023SJohn Marino       /* Try the next biggest vector size.  */
2167e4b17023SJohn Marino       current_vector_size = 1 << floor_log2 (vector_sizes);
2168e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2169e4b17023SJohn Marino         fprintf (vect_dump, "***** Re-trying analysis with "
2170e4b17023SJohn Marino                  "vector size %d\n", current_vector_size);
2171e4b17023SJohn Marino     }
2172e4b17023SJohn Marino }
2173e4b17023SJohn Marino 
2174e4b17023SJohn Marino 
2175e4b17023SJohn Marino /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2176e4b17023SJohn Marino    the number of created vector stmts depends on the unrolling factor).
2177e4b17023SJohn Marino    However, the actual number of vector stmts for every SLP node depends on
2178e4b17023SJohn Marino    VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2179e4b17023SJohn Marino    should be updated.  In this function we assume that the inside costs
2180e4b17023SJohn Marino    calculated in vect_model_xxx_cost are linear in ncopies.  */
2181e4b17023SJohn Marino 
2182e4b17023SJohn Marino void
vect_update_slp_costs_according_to_vf(loop_vec_info loop_vinfo)2183e4b17023SJohn Marino vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2184e4b17023SJohn Marino {
2185e4b17023SJohn Marino   unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2186e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2187e4b17023SJohn Marino   slp_instance instance;
2188e4b17023SJohn Marino 
2189e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_SLP))
2190e4b17023SJohn Marino     fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2191e4b17023SJohn Marino 
2192e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2193e4b17023SJohn Marino     /* We assume that costs are linear in ncopies.  */
2194e4b17023SJohn Marino     SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2195e4b17023SJohn Marino       / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2196e4b17023SJohn Marino }
2197e4b17023SJohn Marino 
2198e4b17023SJohn Marino 
2199e4b17023SJohn Marino /* For constant and loop invariant defs of SLP_NODE this function returns
2200e4b17023SJohn Marino    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2201e4b17023SJohn Marino    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2202e4b17023SJohn Marino    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2203e4b17023SJohn Marino    REDUC_INDEX is the index of the reduction operand in the statements, unless
2204e4b17023SJohn Marino    it is -1.  */
2205e4b17023SJohn Marino 
2206e4b17023SJohn Marino static void
vect_get_constant_vectors(tree op,slp_tree slp_node,VEC (tree,heap)** vec_oprnds,unsigned int op_num,unsigned int number_of_vectors,int reduc_index)2207e4b17023SJohn Marino vect_get_constant_vectors (tree op, slp_tree slp_node,
2208e4b17023SJohn Marino                            VEC (tree, heap) **vec_oprnds,
2209e4b17023SJohn Marino 			   unsigned int op_num, unsigned int number_of_vectors,
2210e4b17023SJohn Marino                            int reduc_index)
2211e4b17023SJohn Marino {
2212e4b17023SJohn Marino   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2213e4b17023SJohn Marino   gimple stmt = VEC_index (gimple, stmts, 0);
2214e4b17023SJohn Marino   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2215e4b17023SJohn Marino   int nunits;
2216e4b17023SJohn Marino   tree vec_cst;
2217e4b17023SJohn Marino   tree t = NULL_TREE;
2218e4b17023SJohn Marino   int j, number_of_places_left_in_vector;
2219e4b17023SJohn Marino   tree vector_type;
2220e4b17023SJohn Marino   tree vop;
2221e4b17023SJohn Marino   int group_size = VEC_length (gimple, stmts);
2222e4b17023SJohn Marino   unsigned int vec_num, i;
2223e4b17023SJohn Marino   int number_of_copies = 1;
2224e4b17023SJohn Marino   VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2225e4b17023SJohn Marino   bool constant_p, is_store;
2226e4b17023SJohn Marino   tree neutral_op = NULL;
2227e4b17023SJohn Marino   enum tree_code code = gimple_expr_code (stmt);
2228e4b17023SJohn Marino   gimple def_stmt;
2229e4b17023SJohn Marino   struct loop *loop;
2230e4b17023SJohn Marino 
2231e4b17023SJohn Marino   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2232e4b17023SJohn Marino       && reduc_index != -1)
2233e4b17023SJohn Marino     {
2234e4b17023SJohn Marino       op_num = reduc_index - 1;
2235e4b17023SJohn Marino       op = gimple_op (stmt, reduc_index);
2236e4b17023SJohn Marino       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2237e4b17023SJohn Marino          we need either neutral operands or the original operands.  See
2238e4b17023SJohn Marino          get_initial_def_for_reduction() for details.  */
2239e4b17023SJohn Marino       switch (code)
2240e4b17023SJohn Marino         {
2241e4b17023SJohn Marino           case WIDEN_SUM_EXPR:
2242e4b17023SJohn Marino           case DOT_PROD_EXPR:
2243e4b17023SJohn Marino           case PLUS_EXPR:
2244e4b17023SJohn Marino           case MINUS_EXPR:
2245e4b17023SJohn Marino           case BIT_IOR_EXPR:
2246e4b17023SJohn Marino           case BIT_XOR_EXPR:
2247e4b17023SJohn Marino              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2248e4b17023SJohn Marino                neutral_op = build_real (TREE_TYPE (op), dconst0);
2249e4b17023SJohn Marino              else
2250e4b17023SJohn Marino                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2251e4b17023SJohn Marino 
2252e4b17023SJohn Marino              break;
2253e4b17023SJohn Marino 
2254e4b17023SJohn Marino           case MULT_EXPR:
2255e4b17023SJohn Marino              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2256e4b17023SJohn Marino                neutral_op = build_real (TREE_TYPE (op), dconst1);
2257e4b17023SJohn Marino              else
2258e4b17023SJohn Marino                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2259e4b17023SJohn Marino 
2260e4b17023SJohn Marino              break;
2261e4b17023SJohn Marino 
2262e4b17023SJohn Marino           case BIT_AND_EXPR:
2263e4b17023SJohn Marino             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2264e4b17023SJohn Marino             break;
2265e4b17023SJohn Marino 
2266e4b17023SJohn Marino           case MAX_EXPR:
2267e4b17023SJohn Marino           case MIN_EXPR:
2268e4b17023SJohn Marino             def_stmt = SSA_NAME_DEF_STMT (op);
2269e4b17023SJohn Marino             loop = (gimple_bb (stmt))->loop_father;
2270e4b17023SJohn Marino             neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2271e4b17023SJohn Marino                                                 loop_preheader_edge (loop));
2272e4b17023SJohn Marino             break;
2273e4b17023SJohn Marino 
2274e4b17023SJohn Marino           default:
2275e4b17023SJohn Marino             neutral_op = NULL;
2276e4b17023SJohn Marino         }
2277e4b17023SJohn Marino     }
2278e4b17023SJohn Marino 
2279e4b17023SJohn Marino   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2280e4b17023SJohn Marino     {
2281e4b17023SJohn Marino       is_store = true;
2282e4b17023SJohn Marino       op = gimple_assign_rhs1 (stmt);
2283e4b17023SJohn Marino     }
2284e4b17023SJohn Marino   else
2285e4b17023SJohn Marino     is_store = false;
2286e4b17023SJohn Marino 
2287e4b17023SJohn Marino   gcc_assert (op);
2288e4b17023SJohn Marino 
2289e4b17023SJohn Marino   if (CONSTANT_CLASS_P (op))
2290e4b17023SJohn Marino     constant_p = true;
2291e4b17023SJohn Marino   else
2292e4b17023SJohn Marino     constant_p = false;
2293e4b17023SJohn Marino 
2294e4b17023SJohn Marino   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2295e4b17023SJohn Marino   gcc_assert (vector_type);
2296e4b17023SJohn Marino   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2297e4b17023SJohn Marino 
2298e4b17023SJohn Marino   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2299e4b17023SJohn Marino      created vectors. It is greater than 1 if unrolling is performed.
2300e4b17023SJohn Marino 
2301e4b17023SJohn Marino      For example, we have two scalar operands, s1 and s2 (e.g., group of
2302e4b17023SJohn Marino      strided accesses of size two), while NUNITS is four (i.e., four scalars
2303e4b17023SJohn Marino      of this type can be packed in a vector).  The output vector will contain
2304e4b17023SJohn Marino      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2305e4b17023SJohn Marino      will be 2).
2306e4b17023SJohn Marino 
2307e4b17023SJohn Marino      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2308e4b17023SJohn Marino      containing the operands.
2309e4b17023SJohn Marino 
2310e4b17023SJohn Marino      For example, NUNITS is four as before, and the group size is 8
2311e4b17023SJohn Marino      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2312e4b17023SJohn Marino      {s5, s6, s7, s8}.  */
2313e4b17023SJohn Marino 
2314e4b17023SJohn Marino   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2315e4b17023SJohn Marino 
2316e4b17023SJohn Marino   number_of_places_left_in_vector = nunits;
2317e4b17023SJohn Marino   for (j = 0; j < number_of_copies; j++)
2318e4b17023SJohn Marino     {
2319e4b17023SJohn Marino       for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2320e4b17023SJohn Marino         {
2321e4b17023SJohn Marino           if (is_store)
2322e4b17023SJohn Marino             op = gimple_assign_rhs1 (stmt);
2323e4b17023SJohn Marino           else
2324e4b17023SJohn Marino 	    {
2325e4b17023SJohn Marino 	      switch (code)
2326e4b17023SJohn Marino 		{
2327e4b17023SJohn Marino 		  case COND_EXPR:
2328e4b17023SJohn Marino 		    if (op_num == 0 || op_num == 1)
2329e4b17023SJohn Marino 		      {
2330e4b17023SJohn Marino 			tree cond = gimple_assign_rhs1 (stmt);
2331e4b17023SJohn Marino 			op = TREE_OPERAND (cond, op_num);
2332e4b17023SJohn Marino 		      }
2333e4b17023SJohn Marino 		    else
2334e4b17023SJohn Marino 		      {
2335e4b17023SJohn Marino 			if (op_num == 2)
2336e4b17023SJohn Marino 			  op = gimple_assign_rhs2 (stmt);
2337e4b17023SJohn Marino 			else
2338e4b17023SJohn Marino 			  op = gimple_assign_rhs3 (stmt);
2339e4b17023SJohn Marino 		      }
2340e4b17023SJohn Marino 		    break;
2341e4b17023SJohn Marino 
2342e4b17023SJohn Marino 		  case CALL_EXPR:
2343e4b17023SJohn Marino 		    op = gimple_call_arg (stmt, op_num);
2344e4b17023SJohn Marino 		    break;
2345e4b17023SJohn Marino 
2346e4b17023SJohn Marino 		  default:
2347e4b17023SJohn Marino 		    op = gimple_op (stmt, op_num + 1);
2348e4b17023SJohn Marino 		}
2349e4b17023SJohn Marino 	    }
2350e4b17023SJohn Marino 
2351e4b17023SJohn Marino           if (reduc_index != -1)
2352e4b17023SJohn Marino             {
2353e4b17023SJohn Marino               loop = (gimple_bb (stmt))->loop_father;
2354e4b17023SJohn Marino               def_stmt = SSA_NAME_DEF_STMT (op);
2355e4b17023SJohn Marino 
2356e4b17023SJohn Marino               gcc_assert (loop);
2357e4b17023SJohn Marino 
2358e4b17023SJohn Marino               /* Get the def before the loop.  In reduction chain we have only
2359e4b17023SJohn Marino                  one initial value.  */
2360e4b17023SJohn Marino               if ((j != (number_of_copies - 1)
2361e4b17023SJohn Marino                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2362e4b17023SJohn Marino                        && i != 0))
2363e4b17023SJohn Marino                   && neutral_op)
2364e4b17023SJohn Marino                 op = neutral_op;
2365e4b17023SJohn Marino               else
2366e4b17023SJohn Marino                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2367e4b17023SJohn Marino                                             loop_preheader_edge (loop));
2368e4b17023SJohn Marino             }
2369e4b17023SJohn Marino 
2370e4b17023SJohn Marino           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2371e4b17023SJohn Marino           t = tree_cons (NULL_TREE, op, t);
2372e4b17023SJohn Marino 
2373e4b17023SJohn Marino           number_of_places_left_in_vector--;
2374e4b17023SJohn Marino 
2375e4b17023SJohn Marino           if (number_of_places_left_in_vector == 0)
2376e4b17023SJohn Marino             {
2377e4b17023SJohn Marino               number_of_places_left_in_vector = nunits;
2378e4b17023SJohn Marino 
2379e4b17023SJohn Marino 	      if (constant_p)
2380e4b17023SJohn Marino 		vec_cst = build_vector (vector_type, t);
2381e4b17023SJohn Marino 	      else
2382e4b17023SJohn Marino 		vec_cst = build_constructor_from_list (vector_type, t);
2383e4b17023SJohn Marino               VEC_quick_push (tree, voprnds,
2384e4b17023SJohn Marino                               vect_init_vector (stmt, vec_cst, vector_type, NULL));
2385e4b17023SJohn Marino               t = NULL_TREE;
2386e4b17023SJohn Marino             }
2387e4b17023SJohn Marino         }
2388e4b17023SJohn Marino     }
2389e4b17023SJohn Marino 
2390e4b17023SJohn Marino   /* Since the vectors are created in the reverse order, we should invert
2391e4b17023SJohn Marino      them.  */
2392e4b17023SJohn Marino   vec_num = VEC_length (tree, voprnds);
2393e4b17023SJohn Marino   for (j = vec_num - 1; j >= 0; j--)
2394e4b17023SJohn Marino     {
2395e4b17023SJohn Marino       vop = VEC_index (tree, voprnds, j);
2396e4b17023SJohn Marino       VEC_quick_push (tree, *vec_oprnds, vop);
2397e4b17023SJohn Marino     }
2398e4b17023SJohn Marino 
2399e4b17023SJohn Marino   VEC_free (tree, heap, voprnds);
2400e4b17023SJohn Marino 
2401e4b17023SJohn Marino   /* In case that VF is greater than the unrolling factor needed for the SLP
2402e4b17023SJohn Marino      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2403e4b17023SJohn Marino      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2404e4b17023SJohn Marino      to replicate the vectors.  */
2405e4b17023SJohn Marino   while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2406e4b17023SJohn Marino     {
2407e4b17023SJohn Marino       tree neutral_vec = NULL;
2408e4b17023SJohn Marino 
2409e4b17023SJohn Marino       if (neutral_op)
2410e4b17023SJohn Marino         {
2411e4b17023SJohn Marino           if (!neutral_vec)
2412e4b17023SJohn Marino 	    neutral_vec = build_vector_from_val (vector_type, neutral_op);
2413e4b17023SJohn Marino 
2414e4b17023SJohn Marino           VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2415e4b17023SJohn Marino         }
2416e4b17023SJohn Marino       else
2417e4b17023SJohn Marino         {
2418e4b17023SJohn Marino           for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2419e4b17023SJohn Marino             VEC_quick_push (tree, *vec_oprnds, vop);
2420e4b17023SJohn Marino         }
2421e4b17023SJohn Marino     }
2422e4b17023SJohn Marino }
2423e4b17023SJohn Marino 
2424e4b17023SJohn Marino 
2425e4b17023SJohn Marino /* Get vectorized definitions from SLP_NODE that contains corresponding
2426e4b17023SJohn Marino    vectorized def-stmts.  */
2427e4b17023SJohn Marino 
2428e4b17023SJohn Marino static void
vect_get_slp_vect_defs(slp_tree slp_node,VEC (tree,heap)** vec_oprnds)2429e4b17023SJohn Marino vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2430e4b17023SJohn Marino {
2431e4b17023SJohn Marino   tree vec_oprnd;
2432e4b17023SJohn Marino   gimple vec_def_stmt;
2433e4b17023SJohn Marino   unsigned int i;
2434e4b17023SJohn Marino 
2435e4b17023SJohn Marino   gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2436e4b17023SJohn Marino 
2437e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2438e4b17023SJohn Marino     {
2439e4b17023SJohn Marino       gcc_assert (vec_def_stmt);
2440e4b17023SJohn Marino       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2441e4b17023SJohn Marino       VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2442e4b17023SJohn Marino     }
2443e4b17023SJohn Marino }
2444e4b17023SJohn Marino 
2445e4b17023SJohn Marino 
2446e4b17023SJohn Marino /* Get vectorized definitions for SLP_NODE.
2447e4b17023SJohn Marino    If the scalar definitions are loop invariants or constants, collect them and
2448e4b17023SJohn Marino    call vect_get_constant_vectors() to create vector stmts.
2449e4b17023SJohn Marino    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2450e4b17023SJohn Marino    must be stored in the corresponding child of SLP_NODE, and we call
2451e4b17023SJohn Marino    vect_get_slp_vect_defs () to retrieve them.  */
2452e4b17023SJohn Marino 
2453e4b17023SJohn Marino void
vect_get_slp_defs(VEC (tree,heap)* ops,slp_tree slp_node,VEC (slp_void_p,heap)** vec_oprnds,int reduc_index)2454e4b17023SJohn Marino vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2455e4b17023SJohn Marino                    VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2456e4b17023SJohn Marino {
2457e4b17023SJohn Marino   gimple first_stmt, first_def;
2458e4b17023SJohn Marino   int number_of_vects = 0, i;
2459e4b17023SJohn Marino   unsigned int child_index = 0;
2460e4b17023SJohn Marino   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2461e4b17023SJohn Marino   slp_tree child = NULL;
2462e4b17023SJohn Marino   VEC (tree, heap) *vec_defs;
2463e4b17023SJohn Marino   tree oprnd, def_lhs;
2464e4b17023SJohn Marino   bool vectorized_defs;
2465e4b17023SJohn Marino 
2466e4b17023SJohn Marino   first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2467e4b17023SJohn Marino   FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2468e4b17023SJohn Marino     {
2469e4b17023SJohn Marino       /* For each operand we check if it has vectorized definitions in a child
2470e4b17023SJohn Marino 	 node or we need to create them (for invariants and constants).  We
2471e4b17023SJohn Marino 	 check if the LHS of the first stmt of the next child matches OPRND.
2472e4b17023SJohn Marino 	 If it does, we found the correct child.  Otherwise, we call
2473e4b17023SJohn Marino 	 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2474e4b17023SJohn Marino 	 to check this child node for the next operand.  */
2475e4b17023SJohn Marino       vectorized_defs = false;
2476e4b17023SJohn Marino       if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2477e4b17023SJohn Marino         {
2478e4b17023SJohn Marino           child = (slp_tree) VEC_index (slp_void_p,
2479e4b17023SJohn Marino 					SLP_TREE_CHILDREN (slp_node),
2480e4b17023SJohn Marino 					child_index);
2481e4b17023SJohn Marino           first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2482e4b17023SJohn Marino 
2483e4b17023SJohn Marino 	  /* In the end of a pattern sequence we have a use of the original stmt,
2484e4b17023SJohn Marino 	     so we need to compare OPRND with the original def.  */
2485e4b17023SJohn Marino           if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2486e4b17023SJohn Marino 	      && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2487e4b17023SJohn Marino               && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2488e4b17023SJohn Marino             first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2489e4b17023SJohn Marino 
2490e4b17023SJohn Marino           if (is_gimple_call (first_def))
2491e4b17023SJohn Marino             def_lhs = gimple_call_lhs (first_def);
2492e4b17023SJohn Marino           else
2493e4b17023SJohn Marino             def_lhs = gimple_assign_lhs (first_def);
2494e4b17023SJohn Marino 
2495e4b17023SJohn Marino           if (operand_equal_p (oprnd, def_lhs, 0))
2496e4b17023SJohn Marino             {
2497e4b17023SJohn Marino               /* The number of vector defs is determined by the number of
2498e4b17023SJohn Marino                  vector statements in the node from which we get those
2499e4b17023SJohn Marino 		 statements.  */
2500e4b17023SJohn Marino                  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2501e4b17023SJohn Marino                  vectorized_defs = true;
2502e4b17023SJohn Marino 	      child_index++;
2503e4b17023SJohn Marino             }
2504e4b17023SJohn Marino         }
2505e4b17023SJohn Marino 
2506e4b17023SJohn Marino       if (!vectorized_defs)
2507e4b17023SJohn Marino         {
2508e4b17023SJohn Marino           if (i == 0)
2509e4b17023SJohn Marino             {
2510e4b17023SJohn Marino               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2511e4b17023SJohn Marino               /* Number of vector stmts was calculated according to LHS in
2512e4b17023SJohn Marino                  vect_schedule_slp_instance (), fix it by replacing LHS with
2513e4b17023SJohn Marino                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2514e4b17023SJohn Marino                  details.  */
2515e4b17023SJohn Marino               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2516e4b17023SJohn Marino                                              &rhs_size_unit);
2517e4b17023SJohn Marino               if (rhs_size_unit != lhs_size_unit)
2518e4b17023SJohn Marino                 {
2519e4b17023SJohn Marino                   number_of_vects *= rhs_size_unit;
2520e4b17023SJohn Marino                   number_of_vects /= lhs_size_unit;
2521e4b17023SJohn Marino                 }
2522e4b17023SJohn Marino             }
2523e4b17023SJohn Marino         }
2524e4b17023SJohn Marino 
2525e4b17023SJohn Marino       /* Allocate memory for vectorized defs.  */
2526e4b17023SJohn Marino       vec_defs = VEC_alloc (tree, heap, number_of_vects);
2527e4b17023SJohn Marino 
2528e4b17023SJohn Marino       /* For reduction defs we call vect_get_constant_vectors (), since we are
2529e4b17023SJohn Marino          looking for initial loop invariant values.  */
2530e4b17023SJohn Marino       if (vectorized_defs && reduc_index == -1)
2531e4b17023SJohn Marino         /* The defs are already vectorized.  */
2532e4b17023SJohn Marino         vect_get_slp_vect_defs (child, &vec_defs);
2533e4b17023SJohn Marino       else
2534e4b17023SJohn Marino         /* Build vectors from scalar defs.  */
2535e4b17023SJohn Marino         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2536e4b17023SJohn Marino                                    number_of_vects, reduc_index);
2537e4b17023SJohn Marino 
2538e4b17023SJohn Marino       VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2539e4b17023SJohn Marino 
2540e4b17023SJohn Marino       /* For reductions, we only need initial values.  */
2541e4b17023SJohn Marino       if (reduc_index != -1)
2542e4b17023SJohn Marino         return;
2543e4b17023SJohn Marino     }
2544e4b17023SJohn Marino }
2545e4b17023SJohn Marino 
2546e4b17023SJohn Marino 
2547e4b17023SJohn Marino /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2548e4b17023SJohn Marino    building a vector of type MASK_TYPE from it) and two input vectors placed in
2549e4b17023SJohn Marino    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2550e4b17023SJohn Marino    shifting by STRIDE elements of DR_CHAIN for every copy.
2551e4b17023SJohn Marino    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2552e4b17023SJohn Marino    copies).
2553e4b17023SJohn Marino    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2554e4b17023SJohn Marino    the created stmts must be inserted.  */
2555e4b17023SJohn Marino 
2556e4b17023SJohn Marino static inline void
vect_create_mask_and_perm(gimple stmt,gimple next_scalar_stmt,tree mask,int first_vec_indx,int second_vec_indx,gimple_stmt_iterator * gsi,slp_tree node,tree vectype,VEC (tree,heap)* dr_chain,int ncopies,int vect_stmts_counter)2557e4b17023SJohn Marino vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2558e4b17023SJohn Marino                            tree mask, int first_vec_indx, int second_vec_indx,
2559e4b17023SJohn Marino                            gimple_stmt_iterator *gsi, slp_tree node,
2560e4b17023SJohn Marino                            tree vectype, VEC(tree,heap) *dr_chain,
2561e4b17023SJohn Marino                            int ncopies, int vect_stmts_counter)
2562e4b17023SJohn Marino {
2563e4b17023SJohn Marino   tree perm_dest;
2564e4b17023SJohn Marino   gimple perm_stmt = NULL;
2565e4b17023SJohn Marino   stmt_vec_info next_stmt_info;
2566e4b17023SJohn Marino   int i, stride;
2567e4b17023SJohn Marino   tree first_vec, second_vec, data_ref;
2568e4b17023SJohn Marino 
2569e4b17023SJohn Marino   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2570e4b17023SJohn Marino 
2571e4b17023SJohn Marino   /* Initialize the vect stmts of NODE to properly insert the generated
2572e4b17023SJohn Marino      stmts later.  */
2573e4b17023SJohn Marino   for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2574e4b17023SJohn Marino        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2575e4b17023SJohn Marino     VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2576e4b17023SJohn Marino 
2577e4b17023SJohn Marino   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2578e4b17023SJohn Marino   for (i = 0; i < ncopies; i++)
2579e4b17023SJohn Marino     {
2580e4b17023SJohn Marino       first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2581e4b17023SJohn Marino       second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2582e4b17023SJohn Marino 
2583e4b17023SJohn Marino       /* Generate the permute statement.  */
2584e4b17023SJohn Marino       perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2585e4b17023SJohn Marino 						 first_vec, second_vec, mask);
2586e4b17023SJohn Marino       data_ref = make_ssa_name (perm_dest, perm_stmt);
2587e4b17023SJohn Marino       gimple_set_lhs (perm_stmt, data_ref);
2588e4b17023SJohn Marino       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2589e4b17023SJohn Marino 
2590e4b17023SJohn Marino       /* Store the vector statement in NODE.  */
2591e4b17023SJohn Marino       VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2592e4b17023SJohn Marino                    stride * i + vect_stmts_counter, perm_stmt);
2593e4b17023SJohn Marino 
2594e4b17023SJohn Marino       first_vec_indx += stride;
2595e4b17023SJohn Marino       second_vec_indx += stride;
2596e4b17023SJohn Marino     }
2597e4b17023SJohn Marino 
2598e4b17023SJohn Marino   /* Mark the scalar stmt as vectorized.  */
2599e4b17023SJohn Marino   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2600e4b17023SJohn Marino   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2601e4b17023SJohn Marino }
2602e4b17023SJohn Marino 
2603e4b17023SJohn Marino 
2604e4b17023SJohn Marino /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2605e4b17023SJohn Marino    return in CURRENT_MASK_ELEMENT its equivalent in target specific
2606e4b17023SJohn Marino    representation.  Check that the mask is valid and return FALSE if not.
2607e4b17023SJohn Marino    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2608e4b17023SJohn Marino    the next vector, i.e., the current first vector is not needed.  */
2609e4b17023SJohn Marino 
2610e4b17023SJohn Marino static bool
vect_get_mask_element(gimple stmt,int first_mask_element,int m,int mask_nunits,bool only_one_vec,int index,unsigned char * mask,int * current_mask_element,bool * need_next_vector,int * number_of_mask_fixes,bool * mask_fixed,bool * needs_first_vector)2611e4b17023SJohn Marino vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2612e4b17023SJohn Marino                        int mask_nunits, bool only_one_vec, int index,
2613e4b17023SJohn Marino 		       unsigned char *mask, int *current_mask_element,
2614e4b17023SJohn Marino                        bool *need_next_vector, int *number_of_mask_fixes,
2615e4b17023SJohn Marino                        bool *mask_fixed, bool *needs_first_vector)
2616e4b17023SJohn Marino {
2617e4b17023SJohn Marino   int i;
2618e4b17023SJohn Marino 
2619e4b17023SJohn Marino   /* Convert to target specific representation.  */
2620e4b17023SJohn Marino   *current_mask_element = first_mask_element + m;
2621e4b17023SJohn Marino   /* Adjust the value in case it's a mask for second and third vectors.  */
2622e4b17023SJohn Marino   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2623e4b17023SJohn Marino 
2624e4b17023SJohn Marino   if (*current_mask_element < mask_nunits)
2625e4b17023SJohn Marino     *needs_first_vector = true;
2626e4b17023SJohn Marino 
2627e4b17023SJohn Marino   /* We have only one input vector to permute but the mask accesses values in
2628e4b17023SJohn Marino      the next vector as well.  */
2629e4b17023SJohn Marino   if (only_one_vec && *current_mask_element >= mask_nunits)
2630e4b17023SJohn Marino     {
2631e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2632e4b17023SJohn Marino         {
2633e4b17023SJohn Marino           fprintf (vect_dump, "permutation requires at least two vectors ");
2634e4b17023SJohn Marino           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2635e4b17023SJohn Marino         }
2636e4b17023SJohn Marino 
2637e4b17023SJohn Marino       return false;
2638e4b17023SJohn Marino     }
2639e4b17023SJohn Marino 
2640e4b17023SJohn Marino   /* The mask requires the next vector.  */
2641e4b17023SJohn Marino   if (*current_mask_element >= mask_nunits * 2)
2642e4b17023SJohn Marino     {
2643e4b17023SJohn Marino       if (*needs_first_vector || *mask_fixed)
2644e4b17023SJohn Marino         {
2645e4b17023SJohn Marino           /* We either need the first vector too or have already moved to the
2646e4b17023SJohn Marino              next vector. In both cases, this permutation needs three
2647e4b17023SJohn Marino              vectors.  */
2648e4b17023SJohn Marino           if (vect_print_dump_info (REPORT_DETAILS))
2649e4b17023SJohn Marino             {
2650e4b17023SJohn Marino               fprintf (vect_dump, "permutation requires at "
2651e4b17023SJohn Marino                                   "least three vectors ");
2652e4b17023SJohn Marino               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2653e4b17023SJohn Marino             }
2654e4b17023SJohn Marino 
2655e4b17023SJohn Marino           return false;
2656e4b17023SJohn Marino         }
2657e4b17023SJohn Marino 
2658e4b17023SJohn Marino       /* We move to the next vector, dropping the first one and working with
2659e4b17023SJohn Marino          the second and the third - we need to adjust the values of the mask
2660e4b17023SJohn Marino          accordingly.  */
2661e4b17023SJohn Marino       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2662e4b17023SJohn Marino 
2663e4b17023SJohn Marino       for (i = 0; i < index; i++)
2664e4b17023SJohn Marino         mask[i] -= mask_nunits * *number_of_mask_fixes;
2665e4b17023SJohn Marino 
2666e4b17023SJohn Marino       (*number_of_mask_fixes)++;
2667e4b17023SJohn Marino       *mask_fixed = true;
2668e4b17023SJohn Marino     }
2669e4b17023SJohn Marino 
2670e4b17023SJohn Marino   *need_next_vector = *mask_fixed;
2671e4b17023SJohn Marino 
2672e4b17023SJohn Marino   /* This was the last element of this mask. Start a new one.  */
2673e4b17023SJohn Marino   if (index == mask_nunits - 1)
2674e4b17023SJohn Marino     {
2675e4b17023SJohn Marino       *number_of_mask_fixes = 1;
2676e4b17023SJohn Marino       *mask_fixed = false;
2677e4b17023SJohn Marino       *needs_first_vector = false;
2678e4b17023SJohn Marino     }
2679e4b17023SJohn Marino 
2680e4b17023SJohn Marino   return true;
2681e4b17023SJohn Marino }
2682e4b17023SJohn Marino 
2683e4b17023SJohn Marino 
2684e4b17023SJohn Marino /* Generate vector permute statements from a list of loads in DR_CHAIN.
2685e4b17023SJohn Marino    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2686e4b17023SJohn Marino    permute statements for SLP_NODE_INSTANCE.  */
2687e4b17023SJohn Marino bool
vect_transform_slp_perm_load(gimple stmt,VEC (tree,heap)* dr_chain,gimple_stmt_iterator * gsi,int vf,slp_instance slp_node_instance,bool analyze_only)2688e4b17023SJohn Marino vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2689e4b17023SJohn Marino                               gimple_stmt_iterator *gsi, int vf,
2690e4b17023SJohn Marino                               slp_instance slp_node_instance, bool analyze_only)
2691e4b17023SJohn Marino {
2692e4b17023SJohn Marino   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2693e4b17023SJohn Marino   tree mask_element_type = NULL_TREE, mask_type;
2694e4b17023SJohn Marino   int i, j, k, nunits, vec_index = 0, scalar_index;
2695e4b17023SJohn Marino   slp_tree node;
2696e4b17023SJohn Marino   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2697e4b17023SJohn Marino   gimple next_scalar_stmt;
2698e4b17023SJohn Marino   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2699e4b17023SJohn Marino   int first_mask_element;
2700e4b17023SJohn Marino   int index, unroll_factor, current_mask_element, ncopies;
2701e4b17023SJohn Marino   unsigned char *mask;
2702e4b17023SJohn Marino   bool only_one_vec = false, need_next_vector = false;
2703e4b17023SJohn Marino   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2704e4b17023SJohn Marino   int number_of_mask_fixes = 1;
2705e4b17023SJohn Marino   bool mask_fixed = false;
2706e4b17023SJohn Marino   bool needs_first_vector = false;
2707e4b17023SJohn Marino   enum machine_mode mode;
2708e4b17023SJohn Marino 
2709e4b17023SJohn Marino   mode = TYPE_MODE (vectype);
2710e4b17023SJohn Marino 
2711e4b17023SJohn Marino   if (!can_vec_perm_p (mode, false, NULL))
2712e4b17023SJohn Marino     {
2713e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
2714e4b17023SJohn Marino         {
2715e4b17023SJohn Marino           fprintf (vect_dump, "no vect permute for ");
2716e4b17023SJohn Marino           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2717e4b17023SJohn Marino         }
2718e4b17023SJohn Marino       return false;
2719e4b17023SJohn Marino     }
2720e4b17023SJohn Marino 
2721e4b17023SJohn Marino   /* The generic VEC_PERM_EXPR code always uses an integral type of the
2722e4b17023SJohn Marino      same size as the vector element being permuted.  */
2723e4b17023SJohn Marino   mask_element_type
2724e4b17023SJohn Marino     = lang_hooks.types.type_for_size
2725e4b17023SJohn Marino     (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2726e4b17023SJohn Marino   mask_type = get_vectype_for_scalar_type (mask_element_type);
2727e4b17023SJohn Marino   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2728e4b17023SJohn Marino   mask = XALLOCAVEC (unsigned char, nunits);
2729e4b17023SJohn Marino   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2730e4b17023SJohn Marino 
2731e4b17023SJohn Marino   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2732e4b17023SJohn Marino      unrolling factor.  */
2733e4b17023SJohn Marino   orig_vec_stmts_num = group_size *
2734e4b17023SJohn Marino                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2735e4b17023SJohn Marino   if (orig_vec_stmts_num == 1)
2736e4b17023SJohn Marino     only_one_vec = true;
2737e4b17023SJohn Marino 
2738e4b17023SJohn Marino   /* Number of copies is determined by the final vectorization factor
2739e4b17023SJohn Marino      relatively to SLP_NODE_INSTANCE unrolling factor.  */
2740e4b17023SJohn Marino   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2741e4b17023SJohn Marino 
2742e4b17023SJohn Marino   /* Generate permutation masks for every NODE. Number of masks for each NODE
2743e4b17023SJohn Marino      is equal to GROUP_SIZE.
2744e4b17023SJohn Marino      E.g., we have a group of three nodes with three loads from the same
2745e4b17023SJohn Marino      location in each node, and the vector size is 4. I.e., we have a
2746e4b17023SJohn Marino      a0b0c0a1b1c1... sequence and we need to create the following vectors:
2747e4b17023SJohn Marino      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2748e4b17023SJohn Marino      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2749e4b17023SJohn Marino      ...
2750e4b17023SJohn Marino 
2751e4b17023SJohn Marino      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2752e4b17023SJohn Marino      The last mask is illegal since we assume two operands for permute
2753e4b17023SJohn Marino      operation, and the mask element values can't be outside that range.
2754e4b17023SJohn Marino      Hence, the last mask must be converted into {2,5,5,5}.
2755e4b17023SJohn Marino      For the first two permutations we need the first and the second input
2756e4b17023SJohn Marino      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2757e4b17023SJohn Marino      we need the second and the third vectors: {b1,c1,a2,b2} and
2758e4b17023SJohn Marino      {c2,a3,b3,c3}.  */
2759e4b17023SJohn Marino 
2760e4b17023SJohn Marino   FOR_EACH_VEC_ELT  (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2761e4b17023SJohn Marino     {
2762e4b17023SJohn Marino       scalar_index = 0;
2763e4b17023SJohn Marino       index = 0;
2764e4b17023SJohn Marino       vect_stmts_counter = 0;
2765e4b17023SJohn Marino       vec_index = 0;
2766e4b17023SJohn Marino       first_vec_index = vec_index++;
2767e4b17023SJohn Marino       if (only_one_vec)
2768e4b17023SJohn Marino         second_vec_index = first_vec_index;
2769e4b17023SJohn Marino       else
2770e4b17023SJohn Marino         second_vec_index =  vec_index++;
2771e4b17023SJohn Marino 
2772e4b17023SJohn Marino       for (j = 0; j < unroll_factor; j++)
2773e4b17023SJohn Marino         {
2774e4b17023SJohn Marino           for (k = 0; k < group_size; k++)
2775e4b17023SJohn Marino             {
2776e4b17023SJohn Marino               first_mask_element = i + j * group_size;
2777e4b17023SJohn Marino               if (!vect_get_mask_element (stmt, first_mask_element, 0,
2778e4b17023SJohn Marino 					  nunits, only_one_vec, index,
2779e4b17023SJohn Marino 					  mask, &current_mask_element,
2780e4b17023SJohn Marino 					  &need_next_vector,
2781e4b17023SJohn Marino 					  &number_of_mask_fixes, &mask_fixed,
2782e4b17023SJohn Marino 					  &needs_first_vector))
2783e4b17023SJohn Marino 		return false;
2784e4b17023SJohn Marino 	      mask[index++] = current_mask_element;
2785e4b17023SJohn Marino 
2786e4b17023SJohn Marino               if (index == nunits)
2787e4b17023SJohn Marino                 {
2788e4b17023SJohn Marino 		  tree mask_vec = NULL;
2789e4b17023SJohn Marino 
2790e4b17023SJohn Marino 		  if (!can_vec_perm_p (mode, false, mask))
2791e4b17023SJohn Marino 		    {
2792e4b17023SJohn Marino 		      if (vect_print_dump_info (REPORT_DETAILS))
2793e4b17023SJohn Marino 			{
2794e4b17023SJohn Marino 			  fprintf (vect_dump, "unsupported vect permute { ");
2795e4b17023SJohn Marino 			  for (i = 0; i < nunits; ++i)
2796e4b17023SJohn Marino 			    fprintf (vect_dump, "%d ", mask[i]);
2797e4b17023SJohn Marino 			  fprintf (vect_dump, "}\n");
2798e4b17023SJohn Marino 			}
2799e4b17023SJohn Marino 		      return false;
2800e4b17023SJohn Marino 		    }
2801e4b17023SJohn Marino 
2802e4b17023SJohn Marino 		  while (--index >= 0)
2803e4b17023SJohn Marino 		    {
2804e4b17023SJohn Marino 		      tree t = build_int_cst (mask_element_type, mask[index]);
2805e4b17023SJohn Marino 		      mask_vec = tree_cons (NULL, t, mask_vec);
2806e4b17023SJohn Marino 		    }
2807e4b17023SJohn Marino 		  mask_vec = build_vector (mask_type, mask_vec);
2808e4b17023SJohn Marino 		  index = 0;
2809e4b17023SJohn Marino 
2810e4b17023SJohn Marino                   if (!analyze_only)
2811e4b17023SJohn Marino                     {
2812e4b17023SJohn Marino                       if (need_next_vector)
2813e4b17023SJohn Marino                         {
2814e4b17023SJohn Marino                           first_vec_index = second_vec_index;
2815e4b17023SJohn Marino                           second_vec_index = vec_index;
2816e4b17023SJohn Marino                         }
2817e4b17023SJohn Marino 
2818e4b17023SJohn Marino                       next_scalar_stmt = VEC_index (gimple,
2819e4b17023SJohn Marino                                 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2820e4b17023SJohn Marino 
2821e4b17023SJohn Marino                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
2822e4b17023SJohn Marino                                mask_vec, first_vec_index, second_vec_index,
2823e4b17023SJohn Marino 			       gsi, node, vectype, dr_chain,
2824e4b17023SJohn Marino 			       ncopies, vect_stmts_counter++);
2825e4b17023SJohn Marino                     }
2826e4b17023SJohn Marino                 }
2827e4b17023SJohn Marino             }
2828e4b17023SJohn Marino         }
2829e4b17023SJohn Marino     }
2830e4b17023SJohn Marino 
2831e4b17023SJohn Marino   return true;
2832e4b17023SJohn Marino }
2833e4b17023SJohn Marino 
2834e4b17023SJohn Marino 
2835e4b17023SJohn Marino 
2836e4b17023SJohn Marino /* Vectorize SLP instance tree in postorder.  */
2837e4b17023SJohn Marino 
2838e4b17023SJohn Marino static bool
vect_schedule_slp_instance(slp_tree node,slp_instance instance,unsigned int vectorization_factor)2839e4b17023SJohn Marino vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2840e4b17023SJohn Marino                             unsigned int vectorization_factor)
2841e4b17023SJohn Marino {
2842e4b17023SJohn Marino   gimple stmt;
2843e4b17023SJohn Marino   bool strided_store, is_store;
2844e4b17023SJohn Marino   gimple_stmt_iterator si;
2845e4b17023SJohn Marino   stmt_vec_info stmt_info;
2846e4b17023SJohn Marino   unsigned int vec_stmts_size, nunits, group_size;
2847e4b17023SJohn Marino   tree vectype;
2848e4b17023SJohn Marino   int i;
2849e4b17023SJohn Marino   slp_tree loads_node;
2850e4b17023SJohn Marino   slp_void_p child;
2851e4b17023SJohn Marino 
2852e4b17023SJohn Marino   if (!node)
2853e4b17023SJohn Marino     return false;
2854e4b17023SJohn Marino 
2855e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2856e4b17023SJohn Marino     vect_schedule_slp_instance ((slp_tree) child, instance,
2857e4b17023SJohn Marino                                 vectorization_factor);
2858e4b17023SJohn Marino 
2859e4b17023SJohn Marino   stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2860e4b17023SJohn Marino   stmt_info = vinfo_for_stmt (stmt);
2861e4b17023SJohn Marino 
2862e4b17023SJohn Marino   /* VECTYPE is the type of the destination.  */
2863e4b17023SJohn Marino   vectype = STMT_VINFO_VECTYPE (stmt_info);
2864e4b17023SJohn Marino   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2865e4b17023SJohn Marino   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2866e4b17023SJohn Marino 
2867e4b17023SJohn Marino   /* For each SLP instance calculate number of vector stmts to be created
2868e4b17023SJohn Marino      for the scalar stmts in each node of the SLP tree.  Number of vector
2869e4b17023SJohn Marino      elements in one vector iteration is the number of scalar elements in
2870e4b17023SJohn Marino      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2871e4b17023SJohn Marino      size.  */
2872e4b17023SJohn Marino   vec_stmts_size = (vectorization_factor * group_size) / nunits;
2873e4b17023SJohn Marino 
2874e4b17023SJohn Marino   /* In case of load permutation we have to allocate vectorized statements for
2875e4b17023SJohn Marino      all the nodes that participate in that permutation.  */
2876e4b17023SJohn Marino   if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2877e4b17023SJohn Marino     {
2878e4b17023SJohn Marino       FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2879e4b17023SJohn Marino         {
2880e4b17023SJohn Marino           if (!SLP_TREE_VEC_STMTS (loads_node))
2881e4b17023SJohn Marino             {
2882e4b17023SJohn Marino               SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2883e4b17023SJohn Marino                                                            vec_stmts_size);
2884e4b17023SJohn Marino               SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2885e4b17023SJohn Marino             }
2886e4b17023SJohn Marino         }
2887e4b17023SJohn Marino     }
2888e4b17023SJohn Marino 
2889e4b17023SJohn Marino   if (!SLP_TREE_VEC_STMTS (node))
2890e4b17023SJohn Marino     {
2891e4b17023SJohn Marino       SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2892e4b17023SJohn Marino       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2893e4b17023SJohn Marino     }
2894e4b17023SJohn Marino 
2895e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
2896e4b17023SJohn Marino     {
2897e4b17023SJohn Marino       fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2898e4b17023SJohn Marino       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2899e4b17023SJohn Marino     }
2900e4b17023SJohn Marino 
2901e4b17023SJohn Marino   /* Loads should be inserted before the first load.  */
2902e4b17023SJohn Marino   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2903e4b17023SJohn Marino       && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2904e4b17023SJohn Marino       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2905e4b17023SJohn Marino       && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2906e4b17023SJohn Marino     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2907e4b17023SJohn Marino   else if (is_pattern_stmt_p (stmt_info))
2908e4b17023SJohn Marino     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2909e4b17023SJohn Marino   else
2910e4b17023SJohn Marino     si = gsi_for_stmt (stmt);
2911e4b17023SJohn Marino 
2912e4b17023SJohn Marino   /* Stores should be inserted just before the last store.  */
2913e4b17023SJohn Marino   if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2914e4b17023SJohn Marino       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2915e4b17023SJohn Marino     {
2916e4b17023SJohn Marino       gimple last_store = vect_find_last_store_in_slp_instance (instance);
2917e4b17023SJohn Marino       if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
2918e4b17023SJohn Marino        last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
2919e4b17023SJohn Marino       si = gsi_for_stmt (last_store);
2920e4b17023SJohn Marino     }
2921e4b17023SJohn Marino 
2922e4b17023SJohn Marino   /* Mark the first element of the reduction chain as reduction to properly
2923e4b17023SJohn Marino      transform the node.  In the analysis phase only the last element of the
2924e4b17023SJohn Marino      chain is marked as reduction.  */
2925e4b17023SJohn Marino   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2926e4b17023SJohn Marino       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2927e4b17023SJohn Marino     {
2928e4b17023SJohn Marino       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2929e4b17023SJohn Marino       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2930e4b17023SJohn Marino     }
2931e4b17023SJohn Marino 
2932e4b17023SJohn Marino   is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2933e4b17023SJohn Marino   return is_store;
2934e4b17023SJohn Marino }
2935e4b17023SJohn Marino 
2936e4b17023SJohn Marino /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2937e4b17023SJohn Marino    For loop vectorization this is done in vectorizable_call, but for SLP
2938e4b17023SJohn Marino    it needs to be deferred until end of vect_schedule_slp, because multiple
2939e4b17023SJohn Marino    SLP instances may refer to the same scalar stmt.  */
2940e4b17023SJohn Marino 
2941e4b17023SJohn Marino static void
vect_remove_slp_scalar_calls(slp_tree node)2942e4b17023SJohn Marino vect_remove_slp_scalar_calls (slp_tree node)
2943e4b17023SJohn Marino {
2944e4b17023SJohn Marino   gimple stmt, new_stmt;
2945e4b17023SJohn Marino   gimple_stmt_iterator gsi;
2946e4b17023SJohn Marino   int i;
2947e4b17023SJohn Marino   slp_void_p child;
2948e4b17023SJohn Marino   tree lhs;
2949e4b17023SJohn Marino   stmt_vec_info stmt_info;
2950e4b17023SJohn Marino 
2951e4b17023SJohn Marino   if (!node)
2952e4b17023SJohn Marino     return;
2953e4b17023SJohn Marino 
2954e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2955e4b17023SJohn Marino     vect_remove_slp_scalar_calls ((slp_tree) child);
2956e4b17023SJohn Marino 
2957e4b17023SJohn Marino   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
2958e4b17023SJohn Marino     {
2959e4b17023SJohn Marino       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
2960e4b17023SJohn Marino 	continue;
2961e4b17023SJohn Marino       stmt_info = vinfo_for_stmt (stmt);
2962e4b17023SJohn Marino       if (stmt_info == NULL
2963e4b17023SJohn Marino 	  || is_pattern_stmt_p (stmt_info)
2964e4b17023SJohn Marino 	  || !PURE_SLP_STMT (stmt_info))
2965e4b17023SJohn Marino 	continue;
2966e4b17023SJohn Marino       lhs = gimple_call_lhs (stmt);
2967e4b17023SJohn Marino       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2968e4b17023SJohn Marino       set_vinfo_for_stmt (new_stmt, stmt_info);
2969e4b17023SJohn Marino       set_vinfo_for_stmt (stmt, NULL);
2970e4b17023SJohn Marino       STMT_VINFO_STMT (stmt_info) = new_stmt;
2971e4b17023SJohn Marino       gsi = gsi_for_stmt (stmt);
2972e4b17023SJohn Marino       gsi_replace (&gsi, new_stmt, false);
2973e4b17023SJohn Marino       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2974e4b17023SJohn Marino     }
2975e4b17023SJohn Marino }
2976e4b17023SJohn Marino 
2977e4b17023SJohn Marino /* Generate vector code for all SLP instances in the loop/basic block.  */
2978e4b17023SJohn Marino 
2979e4b17023SJohn Marino bool
vect_schedule_slp(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2980e4b17023SJohn Marino vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2981e4b17023SJohn Marino {
2982e4b17023SJohn Marino   VEC (slp_instance, heap) *slp_instances;
2983e4b17023SJohn Marino   slp_instance instance;
2984*5ce9237cSJohn Marino   slp_tree loads_node;
2985*5ce9237cSJohn Marino   unsigned int i, j, vf;
2986e4b17023SJohn Marino   bool is_store = false;
2987e4b17023SJohn Marino 
2988e4b17023SJohn Marino   if (loop_vinfo)
2989e4b17023SJohn Marino     {
2990e4b17023SJohn Marino       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2991e4b17023SJohn Marino       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2992e4b17023SJohn Marino     }
2993e4b17023SJohn Marino   else
2994e4b17023SJohn Marino     {
2995e4b17023SJohn Marino       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2996e4b17023SJohn Marino       vf = 1;
2997e4b17023SJohn Marino     }
2998e4b17023SJohn Marino 
2999e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3000e4b17023SJohn Marino     {
3001e4b17023SJohn Marino       /* Schedule the tree of INSTANCE.  */
3002e4b17023SJohn Marino       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3003e4b17023SJohn Marino                                              instance, vf);
3004*5ce9237cSJohn Marino 
3005*5ce9237cSJohn Marino       /* Clear STMT_VINFO_VEC_STMT of all loads.  With shared loads
3006*5ce9237cSJohn Marino 	 between SLP instances we fail to properly initialize the
3007*5ce9237cSJohn Marino 	 vectorized SLP stmts and confuse different load permutations.  */
3008*5ce9237cSJohn Marino       FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), j, loads_node)
3009*5ce9237cSJohn Marino 	STMT_VINFO_VEC_STMT
3010*5ce9237cSJohn Marino 	  (vinfo_for_stmt
3011*5ce9237cSJohn Marino 	    (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (loads_node), 0))) = NULL;
3012*5ce9237cSJohn Marino 
3013e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
3014e4b17023SJohn Marino 	  || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3015e4b17023SJohn Marino 	fprintf (vect_dump, "vectorizing stmts using SLP.");
3016e4b17023SJohn Marino     }
3017e4b17023SJohn Marino 
3018e4b17023SJohn Marino   FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3019e4b17023SJohn Marino     {
3020e4b17023SJohn Marino       slp_tree root = SLP_INSTANCE_TREE (instance);
3021e4b17023SJohn Marino       gimple store;
3022e4b17023SJohn Marino       unsigned int j;
3023e4b17023SJohn Marino       gimple_stmt_iterator gsi;
3024e4b17023SJohn Marino 
3025*5ce9237cSJohn Marino       /* Remove scalar call stmts.  Do not do this for basic-block
3026*5ce9237cSJohn Marino 	 vectorization as not all uses may be vectorized.
3027*5ce9237cSJohn Marino 	 ???  Why should this be necessary?  DCE should be able to
3028*5ce9237cSJohn Marino 	 remove the stmts itself.
3029*5ce9237cSJohn Marino 	 ???  For BB vectorization we can as well remove scalar
3030*5ce9237cSJohn Marino 	 stmts starting from the SLP tree root if they have no
3031*5ce9237cSJohn Marino 	 uses.  */
3032*5ce9237cSJohn Marino       if (loop_vinfo)
3033e4b17023SJohn Marino 	vect_remove_slp_scalar_calls (root);
3034e4b17023SJohn Marino 
3035e4b17023SJohn Marino       for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3036e4b17023SJohn Marino                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3037e4b17023SJohn Marino         {
3038e4b17023SJohn Marino           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3039e4b17023SJohn Marino             break;
3040e4b17023SJohn Marino 
3041e4b17023SJohn Marino          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3042e4b17023SJohn Marino            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3043e4b17023SJohn Marino           /* Free the attached stmt_vec_info and remove the stmt.  */
3044e4b17023SJohn Marino           gsi = gsi_for_stmt (store);
3045e4b17023SJohn Marino           gsi_remove (&gsi, true);
3046e4b17023SJohn Marino           free_stmt_vec_info (store);
3047e4b17023SJohn Marino         }
3048e4b17023SJohn Marino     }
3049e4b17023SJohn Marino 
3050e4b17023SJohn Marino   return is_store;
3051e4b17023SJohn Marino }
3052e4b17023SJohn Marino 
3053e4b17023SJohn Marino 
3054e4b17023SJohn Marino /* Vectorize the basic block.  */
3055e4b17023SJohn Marino 
3056e4b17023SJohn Marino void
vect_slp_transform_bb(basic_block bb)3057e4b17023SJohn Marino vect_slp_transform_bb (basic_block bb)
3058e4b17023SJohn Marino {
3059e4b17023SJohn Marino   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3060e4b17023SJohn Marino   gimple_stmt_iterator si;
3061e4b17023SJohn Marino 
3062e4b17023SJohn Marino   gcc_assert (bb_vinfo);
3063e4b17023SJohn Marino 
3064e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
3065e4b17023SJohn Marino     fprintf (vect_dump, "SLPing BB\n");
3066e4b17023SJohn Marino 
3067e4b17023SJohn Marino   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3068e4b17023SJohn Marino     {
3069e4b17023SJohn Marino       gimple stmt = gsi_stmt (si);
3070e4b17023SJohn Marino       stmt_vec_info stmt_info;
3071e4b17023SJohn Marino 
3072e4b17023SJohn Marino       if (vect_print_dump_info (REPORT_DETAILS))
3073e4b17023SJohn Marino         {
3074e4b17023SJohn Marino           fprintf (vect_dump, "------>SLPing statement: ");
3075e4b17023SJohn Marino           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3076e4b17023SJohn Marino         }
3077e4b17023SJohn Marino 
3078e4b17023SJohn Marino       stmt_info = vinfo_for_stmt (stmt);
3079e4b17023SJohn Marino       gcc_assert (stmt_info);
3080e4b17023SJohn Marino 
3081e4b17023SJohn Marino       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3082e4b17023SJohn Marino       if (STMT_SLP_TYPE (stmt_info))
3083e4b17023SJohn Marino         {
3084e4b17023SJohn Marino           vect_schedule_slp (NULL, bb_vinfo);
3085e4b17023SJohn Marino           break;
3086e4b17023SJohn Marino         }
3087e4b17023SJohn Marino     }
3088e4b17023SJohn Marino 
3089e4b17023SJohn Marino   mark_sym_for_renaming (gimple_vop (cfun));
3090e4b17023SJohn Marino   /* The memory tags and pointers in vectorized statements need to
3091e4b17023SJohn Marino      have their SSA forms updated.  FIXME, why can't this be delayed
3092e4b17023SJohn Marino      until all the loops have been transformed?  */
3093e4b17023SJohn Marino   update_ssa (TODO_update_ssa);
3094e4b17023SJohn Marino 
3095e4b17023SJohn Marino   if (vect_print_dump_info (REPORT_DETAILS))
3096e4b17023SJohn Marino     fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
3097e4b17023SJohn Marino 
3098e4b17023SJohn Marino   destroy_bb_vec_info (bb_vinfo);
3099e4b17023SJohn Marino }
3100e4b17023SJohn Marino 
3101