1e4b17023SJohn Marino /* SLP - Basic Block Vectorization
2e4b17023SJohn Marino Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3e4b17023SJohn Marino Free Software Foundation, Inc.
4e4b17023SJohn Marino Contributed by Dorit Naishlos <dorit@il.ibm.com>
5e4b17023SJohn Marino and Ira Rosen <irar@il.ibm.com>
6e4b17023SJohn Marino
7e4b17023SJohn Marino This file is part of GCC.
8e4b17023SJohn Marino
9e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
10e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
11e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
12e4b17023SJohn Marino version.
13e4b17023SJohn Marino
14e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
16e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17e4b17023SJohn Marino for more details.
18e4b17023SJohn Marino
19e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
21e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
22e4b17023SJohn Marino
23e4b17023SJohn Marino #include "config.h"
24e4b17023SJohn Marino #include "system.h"
25e4b17023SJohn Marino #include "coretypes.h"
26e4b17023SJohn Marino #include "tm.h"
27e4b17023SJohn Marino #include "ggc.h"
28e4b17023SJohn Marino #include "tree.h"
29e4b17023SJohn Marino #include "target.h"
30e4b17023SJohn Marino #include "basic-block.h"
31e4b17023SJohn Marino #include "tree-pretty-print.h"
32e4b17023SJohn Marino #include "gimple-pretty-print.h"
33e4b17023SJohn Marino #include "tree-flow.h"
34e4b17023SJohn Marino #include "tree-dump.h"
35e4b17023SJohn Marino #include "cfgloop.h"
36e4b17023SJohn Marino #include "cfglayout.h"
37e4b17023SJohn Marino #include "expr.h"
38e4b17023SJohn Marino #include "recog.h"
39e4b17023SJohn Marino #include "optabs.h"
40e4b17023SJohn Marino #include "tree-vectorizer.h"
41e4b17023SJohn Marino #include "langhooks.h"
42e4b17023SJohn Marino
43e4b17023SJohn Marino /* Extract the location of the basic block in the source code.
44e4b17023SJohn Marino Return the basic block location if succeed and NULL if not. */
45e4b17023SJohn Marino
46e4b17023SJohn Marino LOC
find_bb_location(basic_block bb)47e4b17023SJohn Marino find_bb_location (basic_block bb)
48e4b17023SJohn Marino {
49e4b17023SJohn Marino gimple stmt = NULL;
50e4b17023SJohn Marino gimple_stmt_iterator si;
51e4b17023SJohn Marino
52e4b17023SJohn Marino if (!bb)
53e4b17023SJohn Marino return UNKNOWN_LOC;
54e4b17023SJohn Marino
55e4b17023SJohn Marino for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56e4b17023SJohn Marino {
57e4b17023SJohn Marino stmt = gsi_stmt (si);
58e4b17023SJohn Marino if (gimple_location (stmt) != UNKNOWN_LOC)
59e4b17023SJohn Marino return gimple_location (stmt);
60e4b17023SJohn Marino }
61e4b17023SJohn Marino
62e4b17023SJohn Marino return UNKNOWN_LOC;
63e4b17023SJohn Marino }
64e4b17023SJohn Marino
65e4b17023SJohn Marino
66e4b17023SJohn Marino /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67e4b17023SJohn Marino
68e4b17023SJohn Marino static void
vect_free_slp_tree(slp_tree node)69e4b17023SJohn Marino vect_free_slp_tree (slp_tree node)
70e4b17023SJohn Marino {
71e4b17023SJohn Marino int i;
72e4b17023SJohn Marino slp_void_p child;
73e4b17023SJohn Marino
74e4b17023SJohn Marino if (!node)
75e4b17023SJohn Marino return;
76e4b17023SJohn Marino
77e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78e4b17023SJohn Marino vect_free_slp_tree ((slp_tree) child);
79e4b17023SJohn Marino
80e4b17023SJohn Marino VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
81e4b17023SJohn Marino VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82e4b17023SJohn Marino
83e4b17023SJohn Marino if (SLP_TREE_VEC_STMTS (node))
84e4b17023SJohn Marino VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
85e4b17023SJohn Marino
86e4b17023SJohn Marino free (node);
87e4b17023SJohn Marino }
88e4b17023SJohn Marino
89e4b17023SJohn Marino
90e4b17023SJohn Marino /* Free the memory allocated for the SLP instance. */
91e4b17023SJohn Marino
92e4b17023SJohn Marino void
vect_free_slp_instance(slp_instance instance)93e4b17023SJohn Marino vect_free_slp_instance (slp_instance instance)
94e4b17023SJohn Marino {
95e4b17023SJohn Marino vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
96e4b17023SJohn Marino VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
97e4b17023SJohn Marino VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98e4b17023SJohn Marino free (instance);
99e4b17023SJohn Marino }
100e4b17023SJohn Marino
101e4b17023SJohn Marino
102e4b17023SJohn Marino /* Create an SLP node for SCALAR_STMTS. */
103e4b17023SJohn Marino
104e4b17023SJohn Marino static slp_tree
vect_create_new_slp_node(VEC (gimple,heap)* scalar_stmts)105e4b17023SJohn Marino vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
106e4b17023SJohn Marino {
107e4b17023SJohn Marino slp_tree node;
108e4b17023SJohn Marino gimple stmt = VEC_index (gimple, scalar_stmts, 0);
109e4b17023SJohn Marino unsigned int nops;
110e4b17023SJohn Marino
111e4b17023SJohn Marino if (is_gimple_call (stmt))
112e4b17023SJohn Marino nops = gimple_call_num_args (stmt);
113e4b17023SJohn Marino else if (is_gimple_assign (stmt))
114e4b17023SJohn Marino {
115e4b17023SJohn Marino nops = gimple_num_ops (stmt) - 1;
116e4b17023SJohn Marino if (gimple_assign_rhs_code (stmt) == COND_EXPR)
117e4b17023SJohn Marino nops++;
118e4b17023SJohn Marino }
119e4b17023SJohn Marino else
120e4b17023SJohn Marino return NULL;
121e4b17023SJohn Marino
122e4b17023SJohn Marino node = XNEW (struct _slp_tree);
123e4b17023SJohn Marino SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
124e4b17023SJohn Marino SLP_TREE_VEC_STMTS (node) = NULL;
125e4b17023SJohn Marino SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
126e4b17023SJohn Marino SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
127e4b17023SJohn Marino SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
128e4b17023SJohn Marino
129e4b17023SJohn Marino return node;
130e4b17023SJohn Marino }
131e4b17023SJohn Marino
132e4b17023SJohn Marino
133e4b17023SJohn Marino /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
134e4b17023SJohn Marino operand. */
VEC(slp_oprnd_info,heap)135e4b17023SJohn Marino static VEC (slp_oprnd_info, heap) *
136e4b17023SJohn Marino vect_create_oprnd_info (int nops, int group_size)
137e4b17023SJohn Marino {
138e4b17023SJohn Marino int i;
139e4b17023SJohn Marino slp_oprnd_info oprnd_info;
140e4b17023SJohn Marino VEC (slp_oprnd_info, heap) *oprnds_info;
141e4b17023SJohn Marino
142e4b17023SJohn Marino oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
143e4b17023SJohn Marino for (i = 0; i < nops; i++)
144e4b17023SJohn Marino {
145e4b17023SJohn Marino oprnd_info = XNEW (struct _slp_oprnd_info);
146e4b17023SJohn Marino oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
147e4b17023SJohn Marino oprnd_info->first_dt = vect_uninitialized_def;
148e4b17023SJohn Marino oprnd_info->first_def_type = NULL_TREE;
149e4b17023SJohn Marino oprnd_info->first_const_oprnd = NULL_TREE;
150e4b17023SJohn Marino oprnd_info->first_pattern = false;
151e4b17023SJohn Marino VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
152e4b17023SJohn Marino }
153e4b17023SJohn Marino
154e4b17023SJohn Marino return oprnds_info;
155e4b17023SJohn Marino }
156e4b17023SJohn Marino
157e4b17023SJohn Marino
158e4b17023SJohn Marino /* Free operands info. */
159e4b17023SJohn Marino
160e4b17023SJohn Marino static void
vect_free_oprnd_info(VEC (slp_oprnd_info,heap)** oprnds_info)161e4b17023SJohn Marino vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
162e4b17023SJohn Marino {
163e4b17023SJohn Marino int i;
164e4b17023SJohn Marino slp_oprnd_info oprnd_info;
165e4b17023SJohn Marino
166e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
167e4b17023SJohn Marino {
168e4b17023SJohn Marino VEC_free (gimple, heap, oprnd_info->def_stmts);
169e4b17023SJohn Marino XDELETE (oprnd_info);
170e4b17023SJohn Marino }
171e4b17023SJohn Marino
172e4b17023SJohn Marino VEC_free (slp_oprnd_info, heap, *oprnds_info);
173e4b17023SJohn Marino }
174e4b17023SJohn Marino
175e4b17023SJohn Marino
176e4b17023SJohn Marino /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
177e4b17023SJohn Marino they are of a valid type and that they match the defs of the first stmt of
178e4b17023SJohn Marino the SLP group (stored in OPRNDS_INFO). */
179e4b17023SJohn Marino
180e4b17023SJohn Marino static bool
vect_get_and_check_slp_defs(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,slp_tree slp_node,gimple stmt,int ncopies_for_cost,bool first,VEC (slp_oprnd_info,heap)** oprnds_info)181e4b17023SJohn Marino vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
182e4b17023SJohn Marino slp_tree slp_node, gimple stmt,
183e4b17023SJohn Marino int ncopies_for_cost, bool first,
184e4b17023SJohn Marino VEC (slp_oprnd_info, heap) **oprnds_info)
185e4b17023SJohn Marino {
186e4b17023SJohn Marino tree oprnd;
187e4b17023SJohn Marino unsigned int i, number_of_oprnds;
188e4b17023SJohn Marino tree def, def_op0 = NULL_TREE;
189e4b17023SJohn Marino gimple def_stmt;
190e4b17023SJohn Marino enum vect_def_type dt = vect_uninitialized_def;
191e4b17023SJohn Marino enum vect_def_type dt_op0 = vect_uninitialized_def;
192e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
193e4b17023SJohn Marino tree lhs = gimple_get_lhs (stmt);
194e4b17023SJohn Marino struct loop *loop = NULL;
195e4b17023SJohn Marino enum tree_code rhs_code;
196e4b17023SJohn Marino bool different_types = false;
197e4b17023SJohn Marino bool pattern = false;
198e4b17023SJohn Marino slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
199e4b17023SJohn Marino int op_idx = 1;
200e4b17023SJohn Marino tree compare_rhs = NULL_TREE;
201e4b17023SJohn Marino
202e4b17023SJohn Marino if (loop_vinfo)
203e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
204e4b17023SJohn Marino
205e4b17023SJohn Marino if (is_gimple_call (stmt))
206e4b17023SJohn Marino {
207e4b17023SJohn Marino number_of_oprnds = gimple_call_num_args (stmt);
208e4b17023SJohn Marino op_idx = 3;
209e4b17023SJohn Marino }
210e4b17023SJohn Marino else if (is_gimple_assign (stmt))
211e4b17023SJohn Marino {
212e4b17023SJohn Marino number_of_oprnds = gimple_num_ops (stmt) - 1;
213e4b17023SJohn Marino if (gimple_assign_rhs_code (stmt) == COND_EXPR)
214e4b17023SJohn Marino number_of_oprnds++;
215e4b17023SJohn Marino }
216e4b17023SJohn Marino else
217e4b17023SJohn Marino return false;
218e4b17023SJohn Marino
219e4b17023SJohn Marino for (i = 0; i < number_of_oprnds; i++)
220e4b17023SJohn Marino {
221e4b17023SJohn Marino if (compare_rhs)
222e4b17023SJohn Marino {
223e4b17023SJohn Marino oprnd = compare_rhs;
224e4b17023SJohn Marino compare_rhs = NULL_TREE;
225e4b17023SJohn Marino }
226e4b17023SJohn Marino else
227e4b17023SJohn Marino oprnd = gimple_op (stmt, op_idx++);
228e4b17023SJohn Marino
229e4b17023SJohn Marino oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
230e4b17023SJohn Marino
231e4b17023SJohn Marino if (COMPARISON_CLASS_P (oprnd))
232e4b17023SJohn Marino {
233e4b17023SJohn Marino compare_rhs = TREE_OPERAND (oprnd, 1);
234e4b17023SJohn Marino oprnd = TREE_OPERAND (oprnd, 0);
235e4b17023SJohn Marino }
236e4b17023SJohn Marino
237e4b17023SJohn Marino if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
238e4b17023SJohn Marino &def, &dt)
239e4b17023SJohn Marino || (!def_stmt && dt != vect_constant_def))
240e4b17023SJohn Marino {
241e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
242e4b17023SJohn Marino {
243e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: can't find def for ");
244e4b17023SJohn Marino print_generic_expr (vect_dump, oprnd, TDF_SLIM);
245e4b17023SJohn Marino }
246e4b17023SJohn Marino
247e4b17023SJohn Marino return false;
248e4b17023SJohn Marino }
249e4b17023SJohn Marino
250e4b17023SJohn Marino /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
251e4b17023SJohn Marino from the pattern. Check that all the stmts of the node are in the
252e4b17023SJohn Marino pattern. */
253e4b17023SJohn Marino if (loop && def_stmt && gimple_bb (def_stmt)
254e4b17023SJohn Marino && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
255e4b17023SJohn Marino && vinfo_for_stmt (def_stmt)
256e4b17023SJohn Marino && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
257e4b17023SJohn Marino && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
258e4b17023SJohn Marino && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
259e4b17023SJohn Marino {
260e4b17023SJohn Marino pattern = true;
261e4b17023SJohn Marino if (!first && !oprnd_info->first_pattern)
262e4b17023SJohn Marino {
263e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
264e4b17023SJohn Marino {
265e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: some of the stmts"
266e4b17023SJohn Marino " are in a pattern, and others are not ");
267e4b17023SJohn Marino print_generic_expr (vect_dump, oprnd, TDF_SLIM);
268e4b17023SJohn Marino }
269e4b17023SJohn Marino
270e4b17023SJohn Marino return false;
271e4b17023SJohn Marino }
272e4b17023SJohn Marino
273e4b17023SJohn Marino def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
274e4b17023SJohn Marino dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
275e4b17023SJohn Marino
276e4b17023SJohn Marino if (dt == vect_unknown_def_type)
277e4b17023SJohn Marino {
278e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
279e4b17023SJohn Marino fprintf (vect_dump, "Unsupported pattern.");
280e4b17023SJohn Marino return false;
281e4b17023SJohn Marino }
282e4b17023SJohn Marino
283e4b17023SJohn Marino switch (gimple_code (def_stmt))
284e4b17023SJohn Marino {
285e4b17023SJohn Marino case GIMPLE_PHI:
286e4b17023SJohn Marino def = gimple_phi_result (def_stmt);
287e4b17023SJohn Marino break;
288e4b17023SJohn Marino
289e4b17023SJohn Marino case GIMPLE_ASSIGN:
290e4b17023SJohn Marino def = gimple_assign_lhs (def_stmt);
291e4b17023SJohn Marino break;
292e4b17023SJohn Marino
293e4b17023SJohn Marino default:
294e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
295e4b17023SJohn Marino fprintf (vect_dump, "unsupported defining stmt: ");
296e4b17023SJohn Marino return false;
297e4b17023SJohn Marino }
298e4b17023SJohn Marino }
299e4b17023SJohn Marino
300e4b17023SJohn Marino if (first)
301e4b17023SJohn Marino {
302e4b17023SJohn Marino oprnd_info->first_dt = dt;
303e4b17023SJohn Marino oprnd_info->first_pattern = pattern;
304e4b17023SJohn Marino if (def)
305e4b17023SJohn Marino {
306e4b17023SJohn Marino oprnd_info->first_def_type = TREE_TYPE (def);
307e4b17023SJohn Marino oprnd_info->first_const_oprnd = NULL_TREE;
308e4b17023SJohn Marino }
309e4b17023SJohn Marino else
310e4b17023SJohn Marino {
311e4b17023SJohn Marino oprnd_info->first_def_type = NULL_TREE;
312e4b17023SJohn Marino oprnd_info->first_const_oprnd = oprnd;
313e4b17023SJohn Marino }
314e4b17023SJohn Marino
315e4b17023SJohn Marino if (i == 0)
316e4b17023SJohn Marino {
317e4b17023SJohn Marino def_op0 = def;
318e4b17023SJohn Marino dt_op0 = dt;
319e4b17023SJohn Marino /* Analyze costs (for the first stmt of the group only). */
320e4b17023SJohn Marino if (REFERENCE_CLASS_P (lhs))
321e4b17023SJohn Marino /* Store. */
322e4b17023SJohn Marino vect_model_store_cost (stmt_info, ncopies_for_cost, false,
323e4b17023SJohn Marino dt, slp_node);
324e4b17023SJohn Marino else
325e4b17023SJohn Marino {
326e4b17023SJohn Marino enum vect_def_type dts[2];
327e4b17023SJohn Marino dts[0] = dt;
328e4b17023SJohn Marino dts[1] = vect_uninitialized_def;
329e4b17023SJohn Marino /* Not memory operation (we don't call this function for
330e4b17023SJohn Marino loads). */
331e4b17023SJohn Marino vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
332e4b17023SJohn Marino slp_node);
333e4b17023SJohn Marino }
334e4b17023SJohn Marino }
335e4b17023SJohn Marino }
336e4b17023SJohn Marino else
337e4b17023SJohn Marino {
338e4b17023SJohn Marino /* Not first stmt of the group, check that the def-stmt/s match
339e4b17023SJohn Marino the def-stmt/s of the first stmt. Allow different definition
340e4b17023SJohn Marino types for reduction chains: the first stmt must be a
341e4b17023SJohn Marino vect_reduction_def (a phi node), and the rest
342e4b17023SJohn Marino vect_internal_def. */
343e4b17023SJohn Marino if (((oprnd_info->first_dt != dt
344e4b17023SJohn Marino && !(oprnd_info->first_dt == vect_reduction_def
345e4b17023SJohn Marino && dt == vect_internal_def))
346e4b17023SJohn Marino || (oprnd_info->first_def_type != NULL_TREE
347e4b17023SJohn Marino && def
348e4b17023SJohn Marino && !types_compatible_p (oprnd_info->first_def_type,
349e4b17023SJohn Marino TREE_TYPE (def))))
350e4b17023SJohn Marino || (!def
351e4b17023SJohn Marino && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
352e4b17023SJohn Marino TREE_TYPE (oprnd)))
353e4b17023SJohn Marino || different_types)
354e4b17023SJohn Marino {
355e4b17023SJohn Marino if (number_of_oprnds != 2)
356e4b17023SJohn Marino {
357e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
358e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: different types ");
359e4b17023SJohn Marino
360e4b17023SJohn Marino return false;
361e4b17023SJohn Marino }
362e4b17023SJohn Marino
363e4b17023SJohn Marino /* Try to swap operands in case of binary operation. */
364e4b17023SJohn Marino if (i == 0)
365e4b17023SJohn Marino different_types = true;
366e4b17023SJohn Marino else
367e4b17023SJohn Marino {
368e4b17023SJohn Marino oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
369e4b17023SJohn Marino if (is_gimple_assign (stmt)
370e4b17023SJohn Marino && (rhs_code = gimple_assign_rhs_code (stmt))
371e4b17023SJohn Marino && TREE_CODE_CLASS (rhs_code) == tcc_binary
372e4b17023SJohn Marino && commutative_tree_code (rhs_code)
373e4b17023SJohn Marino && oprnd0_info->first_dt == dt
374e4b17023SJohn Marino && oprnd_info->first_dt == dt_op0
375e4b17023SJohn Marino && def_op0 && def
376e4b17023SJohn Marino && !(oprnd0_info->first_def_type
377e4b17023SJohn Marino && !types_compatible_p (oprnd0_info->first_def_type,
378e4b17023SJohn Marino TREE_TYPE (def)))
379e4b17023SJohn Marino && !(oprnd_info->first_def_type
380e4b17023SJohn Marino && !types_compatible_p (oprnd_info->first_def_type,
381e4b17023SJohn Marino TREE_TYPE (def_op0))))
382e4b17023SJohn Marino {
383e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
384e4b17023SJohn Marino {
385e4b17023SJohn Marino fprintf (vect_dump, "Swapping operands of ");
386e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
387e4b17023SJohn Marino }
388e4b17023SJohn Marino
389e4b17023SJohn Marino swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
390e4b17023SJohn Marino gimple_assign_rhs2_ptr (stmt));
391e4b17023SJohn Marino }
392e4b17023SJohn Marino else
393e4b17023SJohn Marino {
394e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
395e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: different types ");
396e4b17023SJohn Marino
397e4b17023SJohn Marino return false;
398e4b17023SJohn Marino }
399e4b17023SJohn Marino }
400e4b17023SJohn Marino }
401e4b17023SJohn Marino }
402e4b17023SJohn Marino
403e4b17023SJohn Marino /* Check the types of the definitions. */
404e4b17023SJohn Marino switch (dt)
405e4b17023SJohn Marino {
406e4b17023SJohn Marino case vect_constant_def:
407e4b17023SJohn Marino case vect_external_def:
408e4b17023SJohn Marino case vect_reduction_def:
409e4b17023SJohn Marino break;
410e4b17023SJohn Marino
411e4b17023SJohn Marino case vect_internal_def:
412e4b17023SJohn Marino if (different_types)
413e4b17023SJohn Marino {
414e4b17023SJohn Marino oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
415e4b17023SJohn Marino oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
416e4b17023SJohn Marino if (i == 0)
417e4b17023SJohn Marino VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
418e4b17023SJohn Marino else
419e4b17023SJohn Marino VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
420e4b17023SJohn Marino }
421e4b17023SJohn Marino else
422e4b17023SJohn Marino VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
423e4b17023SJohn Marino
424e4b17023SJohn Marino break;
425e4b17023SJohn Marino
426e4b17023SJohn Marino default:
427e4b17023SJohn Marino /* FORNOW: Not supported. */
428e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
429e4b17023SJohn Marino {
430e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: illegal type of def ");
431e4b17023SJohn Marino print_generic_expr (vect_dump, def, TDF_SLIM);
432e4b17023SJohn Marino }
433e4b17023SJohn Marino
434e4b17023SJohn Marino return false;
435e4b17023SJohn Marino }
436e4b17023SJohn Marino }
437e4b17023SJohn Marino
438e4b17023SJohn Marino return true;
439e4b17023SJohn Marino }
440e4b17023SJohn Marino
441e4b17023SJohn Marino
442e4b17023SJohn Marino /* Recursively build an SLP tree starting from NODE.
443e4b17023SJohn Marino Fail (and return FALSE) if def-stmts are not isomorphic, require data
444e4b17023SJohn Marino permutation or are of unsupported types of operation. Otherwise, return
445e4b17023SJohn Marino TRUE. */
446e4b17023SJohn Marino
447e4b17023SJohn Marino static bool
vect_build_slp_tree(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,slp_tree * node,unsigned int group_size,int * inside_cost,int * outside_cost,int ncopies_for_cost,unsigned int * max_nunits,VEC (int,heap)** load_permutation,VEC (slp_tree,heap)** loads,unsigned int vectorization_factor,bool * loads_permuted)448e4b17023SJohn Marino vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
449e4b17023SJohn Marino slp_tree *node, unsigned int group_size,
450e4b17023SJohn Marino int *inside_cost, int *outside_cost,
451e4b17023SJohn Marino int ncopies_for_cost, unsigned int *max_nunits,
452e4b17023SJohn Marino VEC (int, heap) **load_permutation,
453e4b17023SJohn Marino VEC (slp_tree, heap) **loads,
454e4b17023SJohn Marino unsigned int vectorization_factor, bool *loads_permuted)
455e4b17023SJohn Marino {
456e4b17023SJohn Marino unsigned int i;
457e4b17023SJohn Marino VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
458e4b17023SJohn Marino gimple stmt = VEC_index (gimple, stmts, 0);
459e4b17023SJohn Marino enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
460e4b17023SJohn Marino enum tree_code first_cond_code = ERROR_MARK;
461e4b17023SJohn Marino tree lhs;
462e4b17023SJohn Marino bool stop_recursion = false, need_same_oprnds = false;
463e4b17023SJohn Marino tree vectype, scalar_type, first_op1 = NULL_TREE;
464e4b17023SJohn Marino unsigned int ncopies;
465e4b17023SJohn Marino optab optab;
466e4b17023SJohn Marino int icode;
467e4b17023SJohn Marino enum machine_mode optab_op2_mode;
468e4b17023SJohn Marino enum machine_mode vec_mode;
469e4b17023SJohn Marino struct data_reference *first_dr;
470e4b17023SJohn Marino HOST_WIDE_INT dummy;
471e4b17023SJohn Marino bool permutation = false;
472e4b17023SJohn Marino unsigned int load_place;
473e4b17023SJohn Marino gimple first_load, prev_first_load = NULL;
474e4b17023SJohn Marino VEC (slp_oprnd_info, heap) *oprnds_info;
475e4b17023SJohn Marino unsigned int nops;
476e4b17023SJohn Marino slp_oprnd_info oprnd_info;
477e4b17023SJohn Marino tree cond;
478e4b17023SJohn Marino
479e4b17023SJohn Marino if (is_gimple_call (stmt))
480e4b17023SJohn Marino nops = gimple_call_num_args (stmt);
481e4b17023SJohn Marino else if (is_gimple_assign (stmt))
482e4b17023SJohn Marino {
483e4b17023SJohn Marino nops = gimple_num_ops (stmt) - 1;
484e4b17023SJohn Marino if (gimple_assign_rhs_code (stmt) == COND_EXPR)
485e4b17023SJohn Marino nops++;
486e4b17023SJohn Marino }
487e4b17023SJohn Marino else
488e4b17023SJohn Marino return false;
489e4b17023SJohn Marino
490e4b17023SJohn Marino oprnds_info = vect_create_oprnd_info (nops, group_size);
491e4b17023SJohn Marino
492e4b17023SJohn Marino /* For every stmt in NODE find its def stmt/s. */
493e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
494e4b17023SJohn Marino {
495e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
496e4b17023SJohn Marino {
497e4b17023SJohn Marino fprintf (vect_dump, "Build SLP for ");
498e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
499e4b17023SJohn Marino }
500e4b17023SJohn Marino
501e4b17023SJohn Marino /* Fail to vectorize statements marked as unvectorizable. */
502e4b17023SJohn Marino if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
503e4b17023SJohn Marino {
504e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
505e4b17023SJohn Marino {
506e4b17023SJohn Marino fprintf (vect_dump,
507e4b17023SJohn Marino "Build SLP failed: unvectorizable statement ");
508e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
509e4b17023SJohn Marino }
510e4b17023SJohn Marino
511e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
512e4b17023SJohn Marino return false;
513e4b17023SJohn Marino }
514e4b17023SJohn Marino
515e4b17023SJohn Marino lhs = gimple_get_lhs (stmt);
516e4b17023SJohn Marino if (lhs == NULL_TREE)
517e4b17023SJohn Marino {
518e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
519e4b17023SJohn Marino {
520e4b17023SJohn Marino fprintf (vect_dump,
521e4b17023SJohn Marino "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
522e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523e4b17023SJohn Marino }
524e4b17023SJohn Marino
525e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
526e4b17023SJohn Marino return false;
527e4b17023SJohn Marino }
528e4b17023SJohn Marino
529e4b17023SJohn Marino if (is_gimple_assign (stmt)
530e4b17023SJohn Marino && gimple_assign_rhs_code (stmt) == COND_EXPR
531e4b17023SJohn Marino && (cond = gimple_assign_rhs1 (stmt))
532e4b17023SJohn Marino && !COMPARISON_CLASS_P (cond))
533e4b17023SJohn Marino {
534e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
535e4b17023SJohn Marino {
536e4b17023SJohn Marino fprintf (vect_dump,
537e4b17023SJohn Marino "Build SLP failed: condition is not comparison ");
538e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
539e4b17023SJohn Marino }
540e4b17023SJohn Marino
541e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
542e4b17023SJohn Marino return false;
543e4b17023SJohn Marino }
544e4b17023SJohn Marino
545e4b17023SJohn Marino scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
546e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (scalar_type);
547e4b17023SJohn Marino if (!vectype)
548e4b17023SJohn Marino {
549e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
550e4b17023SJohn Marino {
551e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
552e4b17023SJohn Marino print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
553e4b17023SJohn Marino }
554e4b17023SJohn Marino
555e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
556e4b17023SJohn Marino return false;
557e4b17023SJohn Marino }
558e4b17023SJohn Marino
559e4b17023SJohn Marino /* In case of multiple types we need to detect the smallest type. */
560e4b17023SJohn Marino if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
561e4b17023SJohn Marino {
562e4b17023SJohn Marino *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
563e4b17023SJohn Marino if (bb_vinfo)
564e4b17023SJohn Marino vectorization_factor = *max_nunits;
565e4b17023SJohn Marino }
566e4b17023SJohn Marino
567e4b17023SJohn Marino ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
568e4b17023SJohn Marino
569e4b17023SJohn Marino if (is_gimple_call (stmt))
570e4b17023SJohn Marino {
571e4b17023SJohn Marino rhs_code = CALL_EXPR;
572e4b17023SJohn Marino if (gimple_call_internal_p (stmt)
573e4b17023SJohn Marino || gimple_call_tail_p (stmt)
574e4b17023SJohn Marino || gimple_call_noreturn_p (stmt)
575e4b17023SJohn Marino || !gimple_call_nothrow_p (stmt)
576e4b17023SJohn Marino || gimple_call_chain (stmt))
577e4b17023SJohn Marino {
578e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
579e4b17023SJohn Marino {
580e4b17023SJohn Marino fprintf (vect_dump,
581e4b17023SJohn Marino "Build SLP failed: unsupported call type ");
582e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
583e4b17023SJohn Marino }
584e4b17023SJohn Marino
585e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
586e4b17023SJohn Marino return false;
587e4b17023SJohn Marino }
588e4b17023SJohn Marino }
589e4b17023SJohn Marino else
590e4b17023SJohn Marino rhs_code = gimple_assign_rhs_code (stmt);
591e4b17023SJohn Marino
592e4b17023SJohn Marino /* Check the operation. */
593e4b17023SJohn Marino if (i == 0)
594e4b17023SJohn Marino {
595e4b17023SJohn Marino first_stmt_code = rhs_code;
596e4b17023SJohn Marino
597e4b17023SJohn Marino /* Shift arguments should be equal in all the packed stmts for a
598e4b17023SJohn Marino vector shift with scalar shift operand. */
599e4b17023SJohn Marino if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
600e4b17023SJohn Marino || rhs_code == LROTATE_EXPR
601e4b17023SJohn Marino || rhs_code == RROTATE_EXPR)
602e4b17023SJohn Marino {
603e4b17023SJohn Marino vec_mode = TYPE_MODE (vectype);
604e4b17023SJohn Marino
605e4b17023SJohn Marino /* First see if we have a vector/vector shift. */
606e4b17023SJohn Marino optab = optab_for_tree_code (rhs_code, vectype,
607e4b17023SJohn Marino optab_vector);
608e4b17023SJohn Marino
609e4b17023SJohn Marino if (!optab
610e4b17023SJohn Marino || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
611e4b17023SJohn Marino {
612e4b17023SJohn Marino /* No vector/vector shift, try for a vector/scalar shift. */
613e4b17023SJohn Marino optab = optab_for_tree_code (rhs_code, vectype,
614e4b17023SJohn Marino optab_scalar);
615e4b17023SJohn Marino
616e4b17023SJohn Marino if (!optab)
617e4b17023SJohn Marino {
618e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
619e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: no optab.");
620e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
621e4b17023SJohn Marino return false;
622e4b17023SJohn Marino }
623e4b17023SJohn Marino icode = (int) optab_handler (optab, vec_mode);
624e4b17023SJohn Marino if (icode == CODE_FOR_nothing)
625e4b17023SJohn Marino {
626e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
627e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: "
628e4b17023SJohn Marino "op not supported by target.");
629e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
630e4b17023SJohn Marino return false;
631e4b17023SJohn Marino }
632e4b17023SJohn Marino optab_op2_mode = insn_data[icode].operand[2].mode;
633e4b17023SJohn Marino if (!VECTOR_MODE_P (optab_op2_mode))
634e4b17023SJohn Marino {
635e4b17023SJohn Marino need_same_oprnds = true;
636e4b17023SJohn Marino first_op1 = gimple_assign_rhs2 (stmt);
637e4b17023SJohn Marino }
638e4b17023SJohn Marino }
639e4b17023SJohn Marino }
640e4b17023SJohn Marino else if (rhs_code == WIDEN_LSHIFT_EXPR)
641e4b17023SJohn Marino {
642e4b17023SJohn Marino need_same_oprnds = true;
643e4b17023SJohn Marino first_op1 = gimple_assign_rhs2 (stmt);
644e4b17023SJohn Marino }
645e4b17023SJohn Marino }
646e4b17023SJohn Marino else
647e4b17023SJohn Marino {
648e4b17023SJohn Marino if (first_stmt_code != rhs_code
649e4b17023SJohn Marino && (first_stmt_code != IMAGPART_EXPR
650e4b17023SJohn Marino || rhs_code != REALPART_EXPR)
651e4b17023SJohn Marino && (first_stmt_code != REALPART_EXPR
652e4b17023SJohn Marino || rhs_code != IMAGPART_EXPR)
653e4b17023SJohn Marino && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
654e4b17023SJohn Marino && (first_stmt_code == ARRAY_REF
655e4b17023SJohn Marino || first_stmt_code == INDIRECT_REF
656e4b17023SJohn Marino || first_stmt_code == COMPONENT_REF
657e4b17023SJohn Marino || first_stmt_code == MEM_REF)))
658e4b17023SJohn Marino {
659e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
660e4b17023SJohn Marino {
661e4b17023SJohn Marino fprintf (vect_dump,
662e4b17023SJohn Marino "Build SLP failed: different operation in stmt ");
663e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
664e4b17023SJohn Marino }
665e4b17023SJohn Marino
666e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
667e4b17023SJohn Marino return false;
668e4b17023SJohn Marino }
669e4b17023SJohn Marino
670e4b17023SJohn Marino if (need_same_oprnds
671e4b17023SJohn Marino && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
672e4b17023SJohn Marino {
673e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
674e4b17023SJohn Marino {
675e4b17023SJohn Marino fprintf (vect_dump,
676e4b17023SJohn Marino "Build SLP failed: different shift arguments in ");
677e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
678e4b17023SJohn Marino }
679e4b17023SJohn Marino
680e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
681e4b17023SJohn Marino return false;
682e4b17023SJohn Marino }
683e4b17023SJohn Marino
684e4b17023SJohn Marino if (rhs_code == CALL_EXPR)
685e4b17023SJohn Marino {
686e4b17023SJohn Marino gimple first_stmt = VEC_index (gimple, stmts, 0);
687e4b17023SJohn Marino if (gimple_call_num_args (stmt) != nops
688e4b17023SJohn Marino || !operand_equal_p (gimple_call_fn (first_stmt),
689e4b17023SJohn Marino gimple_call_fn (stmt), 0)
690e4b17023SJohn Marino || gimple_call_fntype (first_stmt)
691e4b17023SJohn Marino != gimple_call_fntype (stmt))
692e4b17023SJohn Marino {
693e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
694e4b17023SJohn Marino {
695e4b17023SJohn Marino fprintf (vect_dump,
696e4b17023SJohn Marino "Build SLP failed: different calls in ");
697e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
698e4b17023SJohn Marino }
699e4b17023SJohn Marino
700e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
701e4b17023SJohn Marino return false;
702e4b17023SJohn Marino }
703e4b17023SJohn Marino }
704e4b17023SJohn Marino }
705e4b17023SJohn Marino
706e4b17023SJohn Marino /* Strided store or load. */
707e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
708e4b17023SJohn Marino {
709e4b17023SJohn Marino if (REFERENCE_CLASS_P (lhs))
710e4b17023SJohn Marino {
711e4b17023SJohn Marino /* Store. */
712e4b17023SJohn Marino if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
713e4b17023SJohn Marino stmt, ncopies_for_cost,
714e4b17023SJohn Marino (i == 0), &oprnds_info))
715e4b17023SJohn Marino {
716e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
717e4b17023SJohn Marino return false;
718e4b17023SJohn Marino }
719e4b17023SJohn Marino }
720e4b17023SJohn Marino else
721e4b17023SJohn Marino {
722e4b17023SJohn Marino /* Load. */
723e4b17023SJohn Marino /* FORNOW: Check that there is no gap between the loads. */
724e4b17023SJohn Marino if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
725e4b17023SJohn Marino && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
726e4b17023SJohn Marino || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
727e4b17023SJohn Marino && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
728e4b17023SJohn Marino {
729e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
730e4b17023SJohn Marino {
731e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: strided "
732e4b17023SJohn Marino "loads have gaps ");
733e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
734e4b17023SJohn Marino }
735e4b17023SJohn Marino
736e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
737e4b17023SJohn Marino return false;
738e4b17023SJohn Marino }
739e4b17023SJohn Marino
740e4b17023SJohn Marino /* Check that the size of interleaved loads group is not
741e4b17023SJohn Marino greater than the SLP group size. */
742e4b17023SJohn Marino if (loop_vinfo
743e4b17023SJohn Marino && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
744e4b17023SJohn Marino {
745e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
746e4b17023SJohn Marino {
747e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: the number of "
748e4b17023SJohn Marino "interleaved loads is greater than"
749e4b17023SJohn Marino " the SLP group size ");
750e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
751e4b17023SJohn Marino }
752e4b17023SJohn Marino
753e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
754e4b17023SJohn Marino return false;
755e4b17023SJohn Marino }
756e4b17023SJohn Marino
757e4b17023SJohn Marino first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
758e4b17023SJohn Marino if (prev_first_load)
759e4b17023SJohn Marino {
760e4b17023SJohn Marino /* Check that there are no loads from different interleaving
761e4b17023SJohn Marino chains in the same node. The only exception is complex
762e4b17023SJohn Marino numbers. */
763e4b17023SJohn Marino if (prev_first_load != first_load
764e4b17023SJohn Marino && rhs_code != REALPART_EXPR
765e4b17023SJohn Marino && rhs_code != IMAGPART_EXPR)
766e4b17023SJohn Marino {
767e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
768e4b17023SJohn Marino {
769e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: different "
770e4b17023SJohn Marino "interleaving chains in one node ");
771e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
772e4b17023SJohn Marino }
773e4b17023SJohn Marino
774e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
775e4b17023SJohn Marino return false;
776e4b17023SJohn Marino }
777e4b17023SJohn Marino }
778e4b17023SJohn Marino else
779e4b17023SJohn Marino prev_first_load = first_load;
780e4b17023SJohn Marino
781e4b17023SJohn Marino if (first_load == stmt)
782e4b17023SJohn Marino {
783e4b17023SJohn Marino first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
784e4b17023SJohn Marino if (vect_supportable_dr_alignment (first_dr, false)
785e4b17023SJohn Marino == dr_unaligned_unsupported)
786e4b17023SJohn Marino {
787e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
788e4b17023SJohn Marino {
789e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: unsupported "
790e4b17023SJohn Marino "unaligned load ");
791e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
792e4b17023SJohn Marino }
793e4b17023SJohn Marino
794e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
795e4b17023SJohn Marino return false;
796e4b17023SJohn Marino }
797e4b17023SJohn Marino
798e4b17023SJohn Marino /* Analyze costs (for the first stmt in the group). */
799e4b17023SJohn Marino vect_model_load_cost (vinfo_for_stmt (stmt),
800e4b17023SJohn Marino ncopies_for_cost, false, *node);
801e4b17023SJohn Marino }
802e4b17023SJohn Marino
803e4b17023SJohn Marino /* Store the place of this load in the interleaving chain. In
804e4b17023SJohn Marino case that permutation is needed we later decide if a specific
805e4b17023SJohn Marino permutation is supported. */
806e4b17023SJohn Marino load_place = vect_get_place_in_interleaving_chain (stmt,
807e4b17023SJohn Marino first_load);
808e4b17023SJohn Marino if (load_place != i)
809e4b17023SJohn Marino permutation = true;
810e4b17023SJohn Marino
811e4b17023SJohn Marino VEC_safe_push (int, heap, *load_permutation, load_place);
812e4b17023SJohn Marino
813e4b17023SJohn Marino /* We stop the tree when we reach a group of loads. */
814e4b17023SJohn Marino stop_recursion = true;
815e4b17023SJohn Marino continue;
816e4b17023SJohn Marino }
817e4b17023SJohn Marino } /* Strided access. */
818e4b17023SJohn Marino else
819e4b17023SJohn Marino {
820e4b17023SJohn Marino if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
821e4b17023SJohn Marino {
822e4b17023SJohn Marino /* Not strided load. */
823e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
824e4b17023SJohn Marino {
825e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: not strided load ");
826e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
827e4b17023SJohn Marino }
828e4b17023SJohn Marino
829e4b17023SJohn Marino /* FORNOW: Not strided loads are not supported. */
830e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
831e4b17023SJohn Marino return false;
832e4b17023SJohn Marino }
833e4b17023SJohn Marino
834e4b17023SJohn Marino /* Not memory operation. */
835e4b17023SJohn Marino if (TREE_CODE_CLASS (rhs_code) != tcc_binary
836e4b17023SJohn Marino && TREE_CODE_CLASS (rhs_code) != tcc_unary
837e4b17023SJohn Marino && rhs_code != COND_EXPR
838e4b17023SJohn Marino && rhs_code != CALL_EXPR)
839e4b17023SJohn Marino {
840e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
841e4b17023SJohn Marino {
842e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: operation");
843e4b17023SJohn Marino fprintf (vect_dump, " unsupported ");
844e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
845e4b17023SJohn Marino }
846e4b17023SJohn Marino
847e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
848e4b17023SJohn Marino return false;
849e4b17023SJohn Marino }
850e4b17023SJohn Marino
851e4b17023SJohn Marino if (rhs_code == COND_EXPR)
852e4b17023SJohn Marino {
853e4b17023SJohn Marino tree cond_expr = gimple_assign_rhs1 (stmt);
854e4b17023SJohn Marino
855e4b17023SJohn Marino if (i == 0)
856e4b17023SJohn Marino first_cond_code = TREE_CODE (cond_expr);
857e4b17023SJohn Marino else if (first_cond_code != TREE_CODE (cond_expr))
858e4b17023SJohn Marino {
859e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
860e4b17023SJohn Marino {
861e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: different"
862e4b17023SJohn Marino " operation");
863e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
864e4b17023SJohn Marino }
865e4b17023SJohn Marino
866e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
867e4b17023SJohn Marino return false;
868e4b17023SJohn Marino }
869e4b17023SJohn Marino }
870e4b17023SJohn Marino
871e4b17023SJohn Marino /* Find the def-stmts. */
872e4b17023SJohn Marino if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
873e4b17023SJohn Marino ncopies_for_cost, (i == 0),
874e4b17023SJohn Marino &oprnds_info))
875e4b17023SJohn Marino {
876e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
877e4b17023SJohn Marino return false;
878e4b17023SJohn Marino }
879e4b17023SJohn Marino }
880e4b17023SJohn Marino }
881e4b17023SJohn Marino
882e4b17023SJohn Marino /* Add the costs of the node to the overall instance costs. */
883e4b17023SJohn Marino *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
884e4b17023SJohn Marino *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
885e4b17023SJohn Marino
886e4b17023SJohn Marino /* Strided loads were reached - stop the recursion. */
887e4b17023SJohn Marino if (stop_recursion)
888e4b17023SJohn Marino {
889e4b17023SJohn Marino VEC_safe_push (slp_tree, heap, *loads, *node);
890e4b17023SJohn Marino if (permutation)
891e4b17023SJohn Marino {
892e4b17023SJohn Marino
893e4b17023SJohn Marino *loads_permuted = true;
894e4b17023SJohn Marino *inside_cost
895e4b17023SJohn Marino += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
896e4b17023SJohn Marino * group_size;
897e4b17023SJohn Marino }
898e4b17023SJohn Marino else
899e4b17023SJohn Marino {
900e4b17023SJohn Marino /* We don't check here complex numbers chains, so we set
901e4b17023SJohn Marino LOADS_PERMUTED for further check in
902e4b17023SJohn Marino vect_supported_load_permutation_p. */
903e4b17023SJohn Marino if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
904e4b17023SJohn Marino *loads_permuted = true;
905e4b17023SJohn Marino }
906e4b17023SJohn Marino
907e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
908e4b17023SJohn Marino return true;
909e4b17023SJohn Marino }
910e4b17023SJohn Marino
911e4b17023SJohn Marino /* Create SLP_TREE nodes for the definition node/s. */
912e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
913e4b17023SJohn Marino {
914e4b17023SJohn Marino slp_tree child;
915e4b17023SJohn Marino
916e4b17023SJohn Marino if (oprnd_info->first_dt != vect_internal_def)
917e4b17023SJohn Marino continue;
918e4b17023SJohn Marino
919e4b17023SJohn Marino child = vect_create_new_slp_node (oprnd_info->def_stmts);
920e4b17023SJohn Marino if (!child
921e4b17023SJohn Marino || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
922e4b17023SJohn Marino inside_cost, outside_cost, ncopies_for_cost,
923e4b17023SJohn Marino max_nunits, load_permutation, loads,
924e4b17023SJohn Marino vectorization_factor, loads_permuted))
925e4b17023SJohn Marino {
926e4b17023SJohn Marino if (child)
927e4b17023SJohn Marino oprnd_info->def_stmts = NULL;
928e4b17023SJohn Marino vect_free_slp_tree (child);
929e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
930e4b17023SJohn Marino return false;
931e4b17023SJohn Marino }
932e4b17023SJohn Marino
933e4b17023SJohn Marino oprnd_info->def_stmts = NULL;
934e4b17023SJohn Marino VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
935e4b17023SJohn Marino }
936e4b17023SJohn Marino
937e4b17023SJohn Marino vect_free_oprnd_info (&oprnds_info);
938e4b17023SJohn Marino return true;
939e4b17023SJohn Marino }
940e4b17023SJohn Marino
941e4b17023SJohn Marino
942e4b17023SJohn Marino static void
vect_print_slp_tree(slp_tree node)943e4b17023SJohn Marino vect_print_slp_tree (slp_tree node)
944e4b17023SJohn Marino {
945e4b17023SJohn Marino int i;
946e4b17023SJohn Marino gimple stmt;
947e4b17023SJohn Marino slp_void_p child;
948e4b17023SJohn Marino
949e4b17023SJohn Marino if (!node)
950e4b17023SJohn Marino return;
951e4b17023SJohn Marino
952e4b17023SJohn Marino fprintf (vect_dump, "node ");
953e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
954e4b17023SJohn Marino {
955e4b17023SJohn Marino fprintf (vect_dump, "\n\tstmt %d ", i);
956e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
957e4b17023SJohn Marino }
958e4b17023SJohn Marino fprintf (vect_dump, "\n");
959e4b17023SJohn Marino
960e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
961e4b17023SJohn Marino vect_print_slp_tree ((slp_tree) child);
962e4b17023SJohn Marino }
963e4b17023SJohn Marino
964e4b17023SJohn Marino
965e4b17023SJohn Marino /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
966e4b17023SJohn Marino If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
967e4b17023SJohn Marino J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
968e4b17023SJohn Marino stmts in NODE are to be marked. */
969e4b17023SJohn Marino
970e4b17023SJohn Marino static void
vect_mark_slp_stmts(slp_tree node,enum slp_vect_type mark,int j)971e4b17023SJohn Marino vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
972e4b17023SJohn Marino {
973e4b17023SJohn Marino int i;
974e4b17023SJohn Marino gimple stmt;
975e4b17023SJohn Marino slp_void_p child;
976e4b17023SJohn Marino
977e4b17023SJohn Marino if (!node)
978e4b17023SJohn Marino return;
979e4b17023SJohn Marino
980e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
981e4b17023SJohn Marino if (j < 0 || i == j)
982e4b17023SJohn Marino STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
983e4b17023SJohn Marino
984e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
985e4b17023SJohn Marino vect_mark_slp_stmts ((slp_tree) child, mark, j);
986e4b17023SJohn Marino }
987e4b17023SJohn Marino
988e4b17023SJohn Marino
989e4b17023SJohn Marino /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
990e4b17023SJohn Marino
991e4b17023SJohn Marino static void
vect_mark_slp_stmts_relevant(slp_tree node)992e4b17023SJohn Marino vect_mark_slp_stmts_relevant (slp_tree node)
993e4b17023SJohn Marino {
994e4b17023SJohn Marino int i;
995e4b17023SJohn Marino gimple stmt;
996e4b17023SJohn Marino stmt_vec_info stmt_info;
997e4b17023SJohn Marino slp_void_p child;
998e4b17023SJohn Marino
999e4b17023SJohn Marino if (!node)
1000e4b17023SJohn Marino return;
1001e4b17023SJohn Marino
1002e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1003e4b17023SJohn Marino {
1004e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
1005e4b17023SJohn Marino gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1006e4b17023SJohn Marino || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1007e4b17023SJohn Marino STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1008e4b17023SJohn Marino }
1009e4b17023SJohn Marino
1010e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1011e4b17023SJohn Marino vect_mark_slp_stmts_relevant ((slp_tree) child);
1012e4b17023SJohn Marino }
1013e4b17023SJohn Marino
1014e4b17023SJohn Marino
1015e4b17023SJohn Marino /* Check if the permutation required by the SLP INSTANCE is supported.
1016e4b17023SJohn Marino Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1017e4b17023SJohn Marino
1018e4b17023SJohn Marino static bool
vect_supported_slp_permutation_p(slp_instance instance)1019e4b17023SJohn Marino vect_supported_slp_permutation_p (slp_instance instance)
1020e4b17023SJohn Marino {
1021e4b17023SJohn Marino slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1022e4b17023SJohn Marino gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1023e4b17023SJohn Marino gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1024e4b17023SJohn Marino VEC (slp_tree, heap) *sorted_loads = NULL;
1025e4b17023SJohn Marino int index;
1026e4b17023SJohn Marino slp_tree *tmp_loads = NULL;
1027e4b17023SJohn Marino int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1028e4b17023SJohn Marino slp_tree load;
1029e4b17023SJohn Marino
1030e4b17023SJohn Marino /* FORNOW: The only supported loads permutation is loads from the same
1031e4b17023SJohn Marino location in all the loads in the node, when the data-refs in
1032e4b17023SJohn Marino nodes of LOADS constitute an interleaving chain.
1033e4b17023SJohn Marino Sort the nodes according to the order of accesses in the chain. */
1034e4b17023SJohn Marino tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1035e4b17023SJohn Marino for (i = 0, j = 0;
1036e4b17023SJohn Marino VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1037e4b17023SJohn Marino && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1038e4b17023SJohn Marino i += group_size, j++)
1039e4b17023SJohn Marino {
1040e4b17023SJohn Marino gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1041e4b17023SJohn Marino /* Check that the loads are all in the same interleaving chain. */
1042e4b17023SJohn Marino if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1043e4b17023SJohn Marino {
1044e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1045e4b17023SJohn Marino {
1046e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: unsupported data "
1047e4b17023SJohn Marino "permutation ");
1048e4b17023SJohn Marino print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1049e4b17023SJohn Marino }
1050e4b17023SJohn Marino
1051e4b17023SJohn Marino free (tmp_loads);
1052e4b17023SJohn Marino return false;
1053e4b17023SJohn Marino }
1054e4b17023SJohn Marino
1055e4b17023SJohn Marino tmp_loads[index] = load;
1056e4b17023SJohn Marino }
1057e4b17023SJohn Marino
1058e4b17023SJohn Marino sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1059e4b17023SJohn Marino for (i = 0; i < group_size; i++)
1060e4b17023SJohn Marino VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1061e4b17023SJohn Marino
1062e4b17023SJohn Marino VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1063e4b17023SJohn Marino SLP_INSTANCE_LOADS (instance) = sorted_loads;
1064e4b17023SJohn Marino free (tmp_loads);
1065e4b17023SJohn Marino
1066e4b17023SJohn Marino if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1067e4b17023SJohn Marino SLP_INSTANCE_UNROLLING_FACTOR (instance),
1068e4b17023SJohn Marino instance, true))
1069e4b17023SJohn Marino return false;
1070e4b17023SJohn Marino
1071e4b17023SJohn Marino return true;
1072e4b17023SJohn Marino }
1073e4b17023SJohn Marino
1074e4b17023SJohn Marino
1075e4b17023SJohn Marino /* Rearrange the statements of NODE according to PERMUTATION. */
1076e4b17023SJohn Marino
1077e4b17023SJohn Marino static void
vect_slp_rearrange_stmts(slp_tree node,unsigned int group_size,VEC (int,heap)* permutation)1078e4b17023SJohn Marino vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1079e4b17023SJohn Marino VEC (int, heap) *permutation)
1080e4b17023SJohn Marino {
1081e4b17023SJohn Marino gimple stmt;
1082e4b17023SJohn Marino VEC (gimple, heap) *tmp_stmts;
1083e4b17023SJohn Marino unsigned int index, i;
1084e4b17023SJohn Marino slp_void_p child;
1085e4b17023SJohn Marino
1086e4b17023SJohn Marino if (!node)
1087e4b17023SJohn Marino return;
1088e4b17023SJohn Marino
1089e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1090e4b17023SJohn Marino vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1091e4b17023SJohn Marino
1092e4b17023SJohn Marino gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1093e4b17023SJohn Marino tmp_stmts = VEC_alloc (gimple, heap, group_size);
1094e4b17023SJohn Marino
1095e4b17023SJohn Marino for (i = 0; i < group_size; i++)
1096e4b17023SJohn Marino VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1097e4b17023SJohn Marino
1098e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1099e4b17023SJohn Marino {
1100e4b17023SJohn Marino index = VEC_index (int, permutation, i);
1101e4b17023SJohn Marino VEC_replace (gimple, tmp_stmts, index, stmt);
1102e4b17023SJohn Marino }
1103e4b17023SJohn Marino
1104e4b17023SJohn Marino VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1105e4b17023SJohn Marino SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1106e4b17023SJohn Marino }
1107e4b17023SJohn Marino
1108e4b17023SJohn Marino
1109e4b17023SJohn Marino /* Check if the required load permutation is supported.
1110e4b17023SJohn Marino LOAD_PERMUTATION contains a list of indices of the loads.
1111e4b17023SJohn Marino In SLP this permutation is relative to the order of strided stores that are
1112e4b17023SJohn Marino the base of the SLP instance. */
1113e4b17023SJohn Marino
1114e4b17023SJohn Marino static bool
vect_supported_load_permutation_p(slp_instance slp_instn,int group_size,VEC (int,heap)* load_permutation)1115e4b17023SJohn Marino vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1116e4b17023SJohn Marino VEC (int, heap) *load_permutation)
1117e4b17023SJohn Marino {
1118e4b17023SJohn Marino int i = 0, j, prev = -1, next, k, number_of_groups;
1119e4b17023SJohn Marino bool supported, bad_permutation = false;
1120e4b17023SJohn Marino sbitmap load_index;
1121e4b17023SJohn Marino slp_tree node, other_complex_node;
1122e4b17023SJohn Marino gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1123e4b17023SJohn Marino unsigned complex_numbers = 0;
1124e4b17023SJohn Marino struct data_reference *dr;
1125e4b17023SJohn Marino bb_vec_info bb_vinfo;
1126e4b17023SJohn Marino
1127e4b17023SJohn Marino /* FORNOW: permutations are only supported in SLP. */
1128e4b17023SJohn Marino if (!slp_instn)
1129e4b17023SJohn Marino return false;
1130e4b17023SJohn Marino
1131e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1132e4b17023SJohn Marino {
1133e4b17023SJohn Marino fprintf (vect_dump, "Load permutation ");
1134e4b17023SJohn Marino FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1135e4b17023SJohn Marino fprintf (vect_dump, "%d ", next);
1136e4b17023SJohn Marino }
1137e4b17023SJohn Marino
1138e4b17023SJohn Marino /* In case of reduction every load permutation is allowed, since the order
1139e4b17023SJohn Marino of the reduction statements is not important (as opposed to the case of
1140e4b17023SJohn Marino strided stores). The only condition we need to check is that all the
1141e4b17023SJohn Marino load nodes are of the same size and have the same permutation (and then
1142e4b17023SJohn Marino rearrange all the nodes of the SLP instance according to this
1143e4b17023SJohn Marino permutation). */
1144e4b17023SJohn Marino
1145e4b17023SJohn Marino /* Check that all the load nodes are of the same size. */
1146e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1147e4b17023SJohn Marino {
1148e4b17023SJohn Marino if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1149e4b17023SJohn Marino != (unsigned) group_size)
1150e4b17023SJohn Marino return false;
1151e4b17023SJohn Marino
1152e4b17023SJohn Marino stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1153e4b17023SJohn Marino if (is_gimple_assign (stmt)
1154e4b17023SJohn Marino && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1155e4b17023SJohn Marino || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1156e4b17023SJohn Marino complex_numbers++;
1157e4b17023SJohn Marino }
1158e4b17023SJohn Marino
1159e4b17023SJohn Marino /* Complex operands can be swapped as following:
1160e4b17023SJohn Marino real_c = real_b + real_a;
1161e4b17023SJohn Marino imag_c = imag_a + imag_b;
1162e4b17023SJohn Marino i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1163e4b17023SJohn Marino {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1164e4b17023SJohn Marino chains are mixed, they match the above pattern. */
1165e4b17023SJohn Marino if (complex_numbers)
1166e4b17023SJohn Marino {
1167e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1168e4b17023SJohn Marino {
1169e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1170e4b17023SJohn Marino {
1171e4b17023SJohn Marino if (j == 0)
1172e4b17023SJohn Marino first = stmt;
1173e4b17023SJohn Marino else
1174e4b17023SJohn Marino {
1175e4b17023SJohn Marino if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1176e4b17023SJohn Marino {
1177e4b17023SJohn Marino if (complex_numbers != 2)
1178e4b17023SJohn Marino return false;
1179e4b17023SJohn Marino
1180e4b17023SJohn Marino if (i == 0)
1181e4b17023SJohn Marino k = 1;
1182e4b17023SJohn Marino else
1183e4b17023SJohn Marino k = 0;
1184e4b17023SJohn Marino
1185e4b17023SJohn Marino other_complex_node = VEC_index (slp_tree,
1186e4b17023SJohn Marino SLP_INSTANCE_LOADS (slp_instn), k);
1187e4b17023SJohn Marino other_node_first = VEC_index (gimple,
1188e4b17023SJohn Marino SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1189e4b17023SJohn Marino
1190e4b17023SJohn Marino if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1191e4b17023SJohn Marino != other_node_first)
1192e4b17023SJohn Marino return false;
1193e4b17023SJohn Marino }
1194e4b17023SJohn Marino }
1195e4b17023SJohn Marino }
1196e4b17023SJohn Marino }
1197e4b17023SJohn Marino }
1198e4b17023SJohn Marino
1199e4b17023SJohn Marino /* We checked that this case ok, so there is no need to proceed with
1200e4b17023SJohn Marino permutation tests. */
1201e4b17023SJohn Marino if (complex_numbers == 2
1202e4b17023SJohn Marino && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
1203e4b17023SJohn Marino {
1204e4b17023SJohn Marino VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1205e4b17023SJohn Marino VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1206e4b17023SJohn Marino return true;
1207e4b17023SJohn Marino }
1208e4b17023SJohn Marino
1209e4b17023SJohn Marino node = SLP_INSTANCE_TREE (slp_instn);
1210e4b17023SJohn Marino stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1211e4b17023SJohn Marino /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1212e4b17023SJohn Marino instance, not all the loads belong to the same node or interleaving
1213e4b17023SJohn Marino group. Hence, we need to divide them into groups according to
1214e4b17023SJohn Marino GROUP_SIZE. */
1215e4b17023SJohn Marino number_of_groups = VEC_length (int, load_permutation) / group_size;
1216e4b17023SJohn Marino
1217e4b17023SJohn Marino /* Reduction (there are no data-refs in the root).
1218e4b17023SJohn Marino In reduction chain the order of the loads is important. */
1219e4b17023SJohn Marino if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1220e4b17023SJohn Marino && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1221e4b17023SJohn Marino {
1222e4b17023SJohn Marino int first_group_load_index;
1223e4b17023SJohn Marino
1224e4b17023SJohn Marino /* Compare all the permutation sequences to the first one. */
1225e4b17023SJohn Marino for (i = 1; i < number_of_groups; i++)
1226e4b17023SJohn Marino {
1227e4b17023SJohn Marino k = 0;
1228e4b17023SJohn Marino for (j = i * group_size; j < i * group_size + group_size; j++)
1229e4b17023SJohn Marino {
1230e4b17023SJohn Marino next = VEC_index (int, load_permutation, j);
1231e4b17023SJohn Marino first_group_load_index = VEC_index (int, load_permutation, k);
1232e4b17023SJohn Marino
1233e4b17023SJohn Marino if (next != first_group_load_index)
1234e4b17023SJohn Marino {
1235e4b17023SJohn Marino bad_permutation = true;
1236e4b17023SJohn Marino break;
1237e4b17023SJohn Marino }
1238e4b17023SJohn Marino
1239e4b17023SJohn Marino k++;
1240e4b17023SJohn Marino }
1241e4b17023SJohn Marino
1242e4b17023SJohn Marino if (bad_permutation)
1243e4b17023SJohn Marino break;
1244e4b17023SJohn Marino }
1245e4b17023SJohn Marino
1246e4b17023SJohn Marino if (!bad_permutation)
1247e4b17023SJohn Marino {
1248e4b17023SJohn Marino /* Check that the loads in the first sequence are different and there
1249e4b17023SJohn Marino are no gaps between them. */
1250e4b17023SJohn Marino load_index = sbitmap_alloc (group_size);
1251e4b17023SJohn Marino sbitmap_zero (load_index);
1252e4b17023SJohn Marino for (k = 0; k < group_size; k++)
1253e4b17023SJohn Marino {
1254e4b17023SJohn Marino first_group_load_index = VEC_index (int, load_permutation, k);
1255e4b17023SJohn Marino if (TEST_BIT (load_index, first_group_load_index))
1256e4b17023SJohn Marino {
1257e4b17023SJohn Marino bad_permutation = true;
1258e4b17023SJohn Marino break;
1259e4b17023SJohn Marino }
1260e4b17023SJohn Marino
1261e4b17023SJohn Marino SET_BIT (load_index, first_group_load_index);
1262e4b17023SJohn Marino }
1263e4b17023SJohn Marino
1264e4b17023SJohn Marino if (!bad_permutation)
1265e4b17023SJohn Marino for (k = 0; k < group_size; k++)
1266e4b17023SJohn Marino if (!TEST_BIT (load_index, k))
1267e4b17023SJohn Marino {
1268e4b17023SJohn Marino bad_permutation = true;
1269e4b17023SJohn Marino break;
1270e4b17023SJohn Marino }
1271e4b17023SJohn Marino
1272e4b17023SJohn Marino sbitmap_free (load_index);
1273e4b17023SJohn Marino }
1274e4b17023SJohn Marino
1275e4b17023SJohn Marino if (!bad_permutation)
1276e4b17023SJohn Marino {
1277e4b17023SJohn Marino /* This permutation is valid for reduction. Since the order of the
1278e4b17023SJohn Marino statements in the nodes is not important unless they are memory
1279e4b17023SJohn Marino accesses, we can rearrange the statements in all the nodes
1280e4b17023SJohn Marino according to the order of the loads. */
1281e4b17023SJohn Marino vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1282e4b17023SJohn Marino load_permutation);
1283e4b17023SJohn Marino VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1284e4b17023SJohn Marino return true;
1285e4b17023SJohn Marino }
1286e4b17023SJohn Marino }
1287e4b17023SJohn Marino
1288e4b17023SJohn Marino /* In basic block vectorization we allow any subchain of an interleaving
1289e4b17023SJohn Marino chain.
1290e4b17023SJohn Marino FORNOW: not supported in loop SLP because of realignment compications. */
1291e4b17023SJohn Marino bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1292e4b17023SJohn Marino bad_permutation = false;
1293e4b17023SJohn Marino /* Check that for every node in the instance teh loads form a subchain. */
1294e4b17023SJohn Marino if (bb_vinfo)
1295e4b17023SJohn Marino {
1296e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1297e4b17023SJohn Marino {
1298e4b17023SJohn Marino next_load = NULL;
1299e4b17023SJohn Marino first_load = NULL;
1300e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1301e4b17023SJohn Marino {
1302e4b17023SJohn Marino if (!first_load)
1303e4b17023SJohn Marino first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1304e4b17023SJohn Marino else if (first_load
1305e4b17023SJohn Marino != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1306e4b17023SJohn Marino {
1307e4b17023SJohn Marino bad_permutation = true;
1308e4b17023SJohn Marino break;
1309e4b17023SJohn Marino }
1310e4b17023SJohn Marino
1311e4b17023SJohn Marino if (j != 0 && next_load != load)
1312e4b17023SJohn Marino {
1313e4b17023SJohn Marino bad_permutation = true;
1314e4b17023SJohn Marino break;
1315e4b17023SJohn Marino }
1316e4b17023SJohn Marino
1317e4b17023SJohn Marino next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1318e4b17023SJohn Marino }
1319e4b17023SJohn Marino
1320e4b17023SJohn Marino if (bad_permutation)
1321e4b17023SJohn Marino break;
1322e4b17023SJohn Marino }
1323e4b17023SJohn Marino
1324e4b17023SJohn Marino /* Check that the alignment of the first load in every subchain, i.e.,
1325e4b17023SJohn Marino the first statement in every load node, is supported. */
1326e4b17023SJohn Marino if (!bad_permutation)
1327e4b17023SJohn Marino {
1328e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1329e4b17023SJohn Marino {
1330e4b17023SJohn Marino first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1331e4b17023SJohn Marino if (first_load
1332e4b17023SJohn Marino != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1333e4b17023SJohn Marino {
1334e4b17023SJohn Marino dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1335e4b17023SJohn Marino if (vect_supportable_dr_alignment (dr, false)
1336e4b17023SJohn Marino == dr_unaligned_unsupported)
1337e4b17023SJohn Marino {
1338e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1339e4b17023SJohn Marino {
1340e4b17023SJohn Marino fprintf (vect_dump, "unsupported unaligned load ");
1341e4b17023SJohn Marino print_gimple_stmt (vect_dump, first_load, 0,
1342e4b17023SJohn Marino TDF_SLIM);
1343e4b17023SJohn Marino }
1344e4b17023SJohn Marino bad_permutation = true;
1345e4b17023SJohn Marino break;
1346e4b17023SJohn Marino }
1347e4b17023SJohn Marino }
1348e4b17023SJohn Marino }
1349e4b17023SJohn Marino
1350e4b17023SJohn Marino if (!bad_permutation)
1351e4b17023SJohn Marino {
1352e4b17023SJohn Marino VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1353e4b17023SJohn Marino return true;
1354e4b17023SJohn Marino }
1355e4b17023SJohn Marino }
1356e4b17023SJohn Marino }
1357e4b17023SJohn Marino
1358e4b17023SJohn Marino /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1359e4b17023SJohn Marino GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1360e4b17023SJohn Marino well (unless it's reduction). */
1361e4b17023SJohn Marino if (VEC_length (int, load_permutation)
1362e4b17023SJohn Marino != (unsigned int) (group_size * group_size))
1363e4b17023SJohn Marino return false;
1364e4b17023SJohn Marino
1365e4b17023SJohn Marino supported = true;
1366e4b17023SJohn Marino load_index = sbitmap_alloc (group_size);
1367e4b17023SJohn Marino sbitmap_zero (load_index);
1368e4b17023SJohn Marino for (j = 0; j < group_size; j++)
1369e4b17023SJohn Marino {
1370e4b17023SJohn Marino for (i = j * group_size, k = 0;
1371e4b17023SJohn Marino VEC_iterate (int, load_permutation, i, next) && k < group_size;
1372e4b17023SJohn Marino i++, k++)
1373e4b17023SJohn Marino {
1374e4b17023SJohn Marino if (i != j * group_size && next != prev)
1375e4b17023SJohn Marino {
1376e4b17023SJohn Marino supported = false;
1377e4b17023SJohn Marino break;
1378e4b17023SJohn Marino }
1379e4b17023SJohn Marino
1380e4b17023SJohn Marino prev = next;
1381e4b17023SJohn Marino }
1382e4b17023SJohn Marino
1383e4b17023SJohn Marino if (TEST_BIT (load_index, prev))
1384e4b17023SJohn Marino {
1385e4b17023SJohn Marino supported = false;
1386e4b17023SJohn Marino break;
1387e4b17023SJohn Marino }
1388e4b17023SJohn Marino
1389e4b17023SJohn Marino SET_BIT (load_index, prev);
1390e4b17023SJohn Marino }
1391e4b17023SJohn Marino
1392e4b17023SJohn Marino for (j = 0; j < group_size; j++)
1393e4b17023SJohn Marino if (!TEST_BIT (load_index, j))
1394e4b17023SJohn Marino return false;
1395e4b17023SJohn Marino
1396e4b17023SJohn Marino sbitmap_free (load_index);
1397e4b17023SJohn Marino
1398e4b17023SJohn Marino if (supported && i == group_size * group_size
1399e4b17023SJohn Marino && vect_supported_slp_permutation_p (slp_instn))
1400e4b17023SJohn Marino return true;
1401e4b17023SJohn Marino
1402e4b17023SJohn Marino return false;
1403e4b17023SJohn Marino }
1404e4b17023SJohn Marino
1405e4b17023SJohn Marino
1406e4b17023SJohn Marino /* Find the first load in the loop that belongs to INSTANCE.
1407e4b17023SJohn Marino When loads are in several SLP nodes, there can be a case in which the first
1408e4b17023SJohn Marino load does not appear in the first SLP node to be transformed, causing
1409e4b17023SJohn Marino incorrect order of statements. Since we generate all the loads together,
1410e4b17023SJohn Marino they must be inserted before the first load of the SLP instance and not
1411e4b17023SJohn Marino before the first load of the first node of the instance. */
1412e4b17023SJohn Marino
1413e4b17023SJohn Marino static gimple
vect_find_first_load_in_slp_instance(slp_instance instance)1414e4b17023SJohn Marino vect_find_first_load_in_slp_instance (slp_instance instance)
1415e4b17023SJohn Marino {
1416e4b17023SJohn Marino int i, j;
1417e4b17023SJohn Marino slp_tree load_node;
1418e4b17023SJohn Marino gimple first_load = NULL, load;
1419e4b17023SJohn Marino
1420e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1421e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1422e4b17023SJohn Marino first_load = get_earlier_stmt (load, first_load);
1423e4b17023SJohn Marino
1424e4b17023SJohn Marino return first_load;
1425e4b17023SJohn Marino }
1426e4b17023SJohn Marino
1427e4b17023SJohn Marino
1428e4b17023SJohn Marino /* Find the last store in SLP INSTANCE. */
1429e4b17023SJohn Marino
1430e4b17023SJohn Marino static gimple
vect_find_last_store_in_slp_instance(slp_instance instance)1431e4b17023SJohn Marino vect_find_last_store_in_slp_instance (slp_instance instance)
1432e4b17023SJohn Marino {
1433e4b17023SJohn Marino int i;
1434e4b17023SJohn Marino slp_tree node;
1435e4b17023SJohn Marino gimple last_store = NULL, store;
1436e4b17023SJohn Marino
1437e4b17023SJohn Marino node = SLP_INSTANCE_TREE (instance);
1438e4b17023SJohn Marino for (i = 0;
1439e4b17023SJohn Marino VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1440e4b17023SJohn Marino i++)
1441e4b17023SJohn Marino last_store = get_later_stmt (store, last_store);
1442e4b17023SJohn Marino
1443e4b17023SJohn Marino return last_store;
1444e4b17023SJohn Marino }
1445e4b17023SJohn Marino
1446e4b17023SJohn Marino
1447e4b17023SJohn Marino /* Analyze an SLP instance starting from a group of strided stores. Call
1448e4b17023SJohn Marino vect_build_slp_tree to build a tree of packed stmts if possible.
1449e4b17023SJohn Marino Return FALSE if it's impossible to SLP any stmt in the loop. */
1450e4b17023SJohn Marino
1451e4b17023SJohn Marino static bool
vect_analyze_slp_instance(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,gimple stmt)1452e4b17023SJohn Marino vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1453e4b17023SJohn Marino gimple stmt)
1454e4b17023SJohn Marino {
1455e4b17023SJohn Marino slp_instance new_instance;
1456e4b17023SJohn Marino slp_tree node;
1457e4b17023SJohn Marino unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1458e4b17023SJohn Marino unsigned int unrolling_factor = 1, nunits;
1459e4b17023SJohn Marino tree vectype, scalar_type = NULL_TREE;
1460e4b17023SJohn Marino gimple next;
1461e4b17023SJohn Marino unsigned int vectorization_factor = 0;
1462e4b17023SJohn Marino int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1463e4b17023SJohn Marino unsigned int max_nunits = 0;
1464e4b17023SJohn Marino VEC (int, heap) *load_permutation;
1465e4b17023SJohn Marino VEC (slp_tree, heap) *loads;
1466e4b17023SJohn Marino struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1467e4b17023SJohn Marino bool loads_permuted = false;
1468e4b17023SJohn Marino VEC (gimple, heap) *scalar_stmts;
1469e4b17023SJohn Marino
1470e4b17023SJohn Marino if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1471e4b17023SJohn Marino {
1472e4b17023SJohn Marino if (dr)
1473e4b17023SJohn Marino {
1474e4b17023SJohn Marino scalar_type = TREE_TYPE (DR_REF (dr));
1475e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (scalar_type);
1476e4b17023SJohn Marino }
1477e4b17023SJohn Marino else
1478e4b17023SJohn Marino {
1479e4b17023SJohn Marino gcc_assert (loop_vinfo);
1480e4b17023SJohn Marino vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1481e4b17023SJohn Marino }
1482e4b17023SJohn Marino
1483e4b17023SJohn Marino group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1484e4b17023SJohn Marino }
1485e4b17023SJohn Marino else
1486e4b17023SJohn Marino {
1487e4b17023SJohn Marino gcc_assert (loop_vinfo);
1488e4b17023SJohn Marino vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1489e4b17023SJohn Marino group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1490e4b17023SJohn Marino }
1491e4b17023SJohn Marino
1492e4b17023SJohn Marino if (!vectype)
1493e4b17023SJohn Marino {
1494e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1495e4b17023SJohn Marino {
1496e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1497e4b17023SJohn Marino print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1498e4b17023SJohn Marino }
1499e4b17023SJohn Marino
1500e4b17023SJohn Marino return false;
1501e4b17023SJohn Marino }
1502e4b17023SJohn Marino
1503e4b17023SJohn Marino nunits = TYPE_VECTOR_SUBPARTS (vectype);
1504e4b17023SJohn Marino if (loop_vinfo)
1505e4b17023SJohn Marino vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1506e4b17023SJohn Marino else
1507e4b17023SJohn Marino vectorization_factor = nunits;
1508e4b17023SJohn Marino
1509e4b17023SJohn Marino /* Calculate the unrolling factor. */
1510e4b17023SJohn Marino unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1511e4b17023SJohn Marino if (unrolling_factor != 1 && !loop_vinfo)
1512e4b17023SJohn Marino {
1513e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1514e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1515e4b17023SJohn Marino " block SLP");
1516e4b17023SJohn Marino
1517e4b17023SJohn Marino return false;
1518e4b17023SJohn Marino }
1519e4b17023SJohn Marino
1520e4b17023SJohn Marino /* Create a node (a root of the SLP tree) for the packed strided stores. */
1521e4b17023SJohn Marino scalar_stmts = VEC_alloc (gimple, heap, group_size);
1522e4b17023SJohn Marino next = stmt;
1523e4b17023SJohn Marino if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1524e4b17023SJohn Marino {
1525e4b17023SJohn Marino /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1526e4b17023SJohn Marino while (next)
1527e4b17023SJohn Marino {
1528e4b17023SJohn Marino if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1529e4b17023SJohn Marino && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1530e4b17023SJohn Marino VEC_safe_push (gimple, heap, scalar_stmts,
1531e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1532e4b17023SJohn Marino else
1533e4b17023SJohn Marino VEC_safe_push (gimple, heap, scalar_stmts, next);
1534e4b17023SJohn Marino next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1535e4b17023SJohn Marino }
1536e4b17023SJohn Marino }
1537e4b17023SJohn Marino else
1538e4b17023SJohn Marino {
1539e4b17023SJohn Marino /* Collect reduction statements. */
1540e4b17023SJohn Marino VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1541e4b17023SJohn Marino for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1542e4b17023SJohn Marino VEC_safe_push (gimple, heap, scalar_stmts, next);
1543e4b17023SJohn Marino }
1544e4b17023SJohn Marino
1545e4b17023SJohn Marino node = vect_create_new_slp_node (scalar_stmts);
1546e4b17023SJohn Marino
1547e4b17023SJohn Marino /* Calculate the number of vector stmts to create based on the unrolling
1548e4b17023SJohn Marino factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1549e4b17023SJohn Marino GROUP_SIZE / NUNITS otherwise. */
1550e4b17023SJohn Marino ncopies_for_cost = unrolling_factor * group_size / nunits;
1551e4b17023SJohn Marino
1552e4b17023SJohn Marino load_permutation = VEC_alloc (int, heap, group_size * group_size);
1553e4b17023SJohn Marino loads = VEC_alloc (slp_tree, heap, group_size);
1554e4b17023SJohn Marino
1555e4b17023SJohn Marino /* Build the tree for the SLP instance. */
1556e4b17023SJohn Marino if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1557e4b17023SJohn Marino &inside_cost, &outside_cost, ncopies_for_cost,
1558e4b17023SJohn Marino &max_nunits, &load_permutation, &loads,
1559e4b17023SJohn Marino vectorization_factor, &loads_permuted))
1560e4b17023SJohn Marino {
1561e4b17023SJohn Marino /* Calculate the unrolling factor based on the smallest type. */
1562e4b17023SJohn Marino if (max_nunits > nunits)
1563e4b17023SJohn Marino unrolling_factor = least_common_multiple (max_nunits, group_size)
1564e4b17023SJohn Marino / group_size;
1565e4b17023SJohn Marino
1566e4b17023SJohn Marino if (unrolling_factor != 1 && !loop_vinfo)
1567e4b17023SJohn Marino {
1568e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1569e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1570e4b17023SJohn Marino " block SLP");
1571e4b17023SJohn Marino vect_free_slp_tree (node);
1572e4b17023SJohn Marino VEC_free (int, heap, load_permutation);
1573e4b17023SJohn Marino VEC_free (slp_tree, heap, loads);
1574e4b17023SJohn Marino return false;
1575e4b17023SJohn Marino }
1576e4b17023SJohn Marino
1577e4b17023SJohn Marino /* Create a new SLP instance. */
1578e4b17023SJohn Marino new_instance = XNEW (struct _slp_instance);
1579e4b17023SJohn Marino SLP_INSTANCE_TREE (new_instance) = node;
1580e4b17023SJohn Marino SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1581e4b17023SJohn Marino SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1582e4b17023SJohn Marino SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1583e4b17023SJohn Marino SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1584e4b17023SJohn Marino SLP_INSTANCE_LOADS (new_instance) = loads;
1585e4b17023SJohn Marino SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1586e4b17023SJohn Marino SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1587e4b17023SJohn Marino
1588e4b17023SJohn Marino if (loads_permuted)
1589e4b17023SJohn Marino {
1590e4b17023SJohn Marino if (!vect_supported_load_permutation_p (new_instance, group_size,
1591e4b17023SJohn Marino load_permutation))
1592e4b17023SJohn Marino {
1593e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1594e4b17023SJohn Marino {
1595e4b17023SJohn Marino fprintf (vect_dump, "Build SLP failed: unsupported load "
1596e4b17023SJohn Marino "permutation ");
1597e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1598e4b17023SJohn Marino }
1599e4b17023SJohn Marino
1600e4b17023SJohn Marino vect_free_slp_instance (new_instance);
1601e4b17023SJohn Marino return false;
1602e4b17023SJohn Marino }
1603e4b17023SJohn Marino
1604e4b17023SJohn Marino SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1605e4b17023SJohn Marino = vect_find_first_load_in_slp_instance (new_instance);
1606e4b17023SJohn Marino }
1607e4b17023SJohn Marino else
1608e4b17023SJohn Marino VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1609e4b17023SJohn Marino
1610e4b17023SJohn Marino if (loop_vinfo)
1611e4b17023SJohn Marino VEC_safe_push (slp_instance, heap,
1612e4b17023SJohn Marino LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1613e4b17023SJohn Marino new_instance);
1614e4b17023SJohn Marino else
1615e4b17023SJohn Marino VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1616e4b17023SJohn Marino new_instance);
1617e4b17023SJohn Marino
1618e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1619e4b17023SJohn Marino vect_print_slp_tree (node);
1620e4b17023SJohn Marino
1621e4b17023SJohn Marino return true;
1622e4b17023SJohn Marino }
1623e4b17023SJohn Marino
1624e4b17023SJohn Marino /* Failed to SLP. */
1625e4b17023SJohn Marino /* Free the allocated memory. */
1626e4b17023SJohn Marino vect_free_slp_tree (node);
1627e4b17023SJohn Marino VEC_free (int, heap, load_permutation);
1628e4b17023SJohn Marino VEC_free (slp_tree, heap, loads);
1629e4b17023SJohn Marino
1630e4b17023SJohn Marino return false;
1631e4b17023SJohn Marino }
1632e4b17023SJohn Marino
1633e4b17023SJohn Marino
1634e4b17023SJohn Marino /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1635e4b17023SJohn Marino trees of packed scalar stmts if SLP is possible. */
1636e4b17023SJohn Marino
1637e4b17023SJohn Marino bool
vect_analyze_slp(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)1638e4b17023SJohn Marino vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1639e4b17023SJohn Marino {
1640e4b17023SJohn Marino unsigned int i;
1641e4b17023SJohn Marino VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1642e4b17023SJohn Marino gimple first_element;
1643e4b17023SJohn Marino bool ok = false;
1644e4b17023SJohn Marino
1645e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1646e4b17023SJohn Marino fprintf (vect_dump, "=== vect_analyze_slp ===");
1647e4b17023SJohn Marino
1648e4b17023SJohn Marino if (loop_vinfo)
1649e4b17023SJohn Marino {
1650e4b17023SJohn Marino strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1651e4b17023SJohn Marino reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1652e4b17023SJohn Marino reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1653e4b17023SJohn Marino }
1654e4b17023SJohn Marino else
1655e4b17023SJohn Marino strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1656e4b17023SJohn Marino
1657e4b17023SJohn Marino /* Find SLP sequences starting from groups of strided stores. */
1658e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1659e4b17023SJohn Marino if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1660e4b17023SJohn Marino ok = true;
1661e4b17023SJohn Marino
1662e4b17023SJohn Marino if (bb_vinfo && !ok)
1663e4b17023SJohn Marino {
1664e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1665e4b17023SJohn Marino fprintf (vect_dump, "Failed to SLP the basic block.");
1666e4b17023SJohn Marino
1667e4b17023SJohn Marino return false;
1668e4b17023SJohn Marino }
1669e4b17023SJohn Marino
1670e4b17023SJohn Marino if (loop_vinfo
1671e4b17023SJohn Marino && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1672e4b17023SJohn Marino {
1673e4b17023SJohn Marino /* Find SLP sequences starting from reduction chains. */
1674e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1675e4b17023SJohn Marino if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1676e4b17023SJohn Marino ok = true;
1677e4b17023SJohn Marino else
1678e4b17023SJohn Marino return false;
1679e4b17023SJohn Marino
1680e4b17023SJohn Marino /* Don't try to vectorize SLP reductions if reduction chain was
1681e4b17023SJohn Marino detected. */
1682e4b17023SJohn Marino return ok;
1683e4b17023SJohn Marino }
1684e4b17023SJohn Marino
1685e4b17023SJohn Marino /* Find SLP sequences starting from groups of reductions. */
1686e4b17023SJohn Marino if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1687e4b17023SJohn Marino && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1688e4b17023SJohn Marino VEC_index (gimple, reductions, 0)))
1689e4b17023SJohn Marino ok = true;
1690e4b17023SJohn Marino
1691e4b17023SJohn Marino return true;
1692e4b17023SJohn Marino }
1693e4b17023SJohn Marino
1694e4b17023SJohn Marino
1695e4b17023SJohn Marino /* For each possible SLP instance decide whether to SLP it and calculate overall
1696e4b17023SJohn Marino unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1697e4b17023SJohn Marino least one instance. */
1698e4b17023SJohn Marino
1699e4b17023SJohn Marino bool
vect_make_slp_decision(loop_vec_info loop_vinfo)1700e4b17023SJohn Marino vect_make_slp_decision (loop_vec_info loop_vinfo)
1701e4b17023SJohn Marino {
1702e4b17023SJohn Marino unsigned int i, unrolling_factor = 1;
1703e4b17023SJohn Marino VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1704e4b17023SJohn Marino slp_instance instance;
1705e4b17023SJohn Marino int decided_to_slp = 0;
1706e4b17023SJohn Marino
1707e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1708e4b17023SJohn Marino fprintf (vect_dump, "=== vect_make_slp_decision ===");
1709e4b17023SJohn Marino
1710e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1711e4b17023SJohn Marino {
1712e4b17023SJohn Marino /* FORNOW: SLP if you can. */
1713e4b17023SJohn Marino if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1714e4b17023SJohn Marino unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1715e4b17023SJohn Marino
1716e4b17023SJohn Marino /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1717e4b17023SJohn Marino call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1718e4b17023SJohn Marino loop-based vectorization. Such stmts will be marked as HYBRID. */
1719e4b17023SJohn Marino vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1720e4b17023SJohn Marino decided_to_slp++;
1721e4b17023SJohn Marino }
1722e4b17023SJohn Marino
1723e4b17023SJohn Marino LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1724e4b17023SJohn Marino
1725e4b17023SJohn Marino if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1726e4b17023SJohn Marino fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1727e4b17023SJohn Marino decided_to_slp, unrolling_factor);
1728e4b17023SJohn Marino
1729e4b17023SJohn Marino return (decided_to_slp > 0);
1730e4b17023SJohn Marino }
1731e4b17023SJohn Marino
1732e4b17023SJohn Marino
1733e4b17023SJohn Marino /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1734e4b17023SJohn Marino can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1735e4b17023SJohn Marino
1736e4b17023SJohn Marino static void
vect_detect_hybrid_slp_stmts(slp_tree node)1737e4b17023SJohn Marino vect_detect_hybrid_slp_stmts (slp_tree node)
1738e4b17023SJohn Marino {
1739e4b17023SJohn Marino int i;
1740e4b17023SJohn Marino VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1741e4b17023SJohn Marino gimple stmt = VEC_index (gimple, stmts, 0);
1742e4b17023SJohn Marino imm_use_iterator imm_iter;
1743e4b17023SJohn Marino gimple use_stmt;
1744e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1745e4b17023SJohn Marino slp_void_p child;
1746e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1747e4b17023SJohn Marino struct loop *loop = NULL;
1748e4b17023SJohn Marino bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1749e4b17023SJohn Marino basic_block bb = NULL;
1750e4b17023SJohn Marino
1751e4b17023SJohn Marino if (!node)
1752e4b17023SJohn Marino return;
1753e4b17023SJohn Marino
1754e4b17023SJohn Marino if (loop_vinfo)
1755e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
1756e4b17023SJohn Marino else
1757e4b17023SJohn Marino bb = BB_VINFO_BB (bb_vinfo);
1758e4b17023SJohn Marino
1759e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1760e4b17023SJohn Marino if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1761e4b17023SJohn Marino && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1762e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1763e4b17023SJohn Marino if (gimple_bb (use_stmt)
1764e4b17023SJohn Marino && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1765e4b17023SJohn Marino || bb == gimple_bb (use_stmt))
1766e4b17023SJohn Marino && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1767e4b17023SJohn Marino && !STMT_SLP_TYPE (stmt_vinfo)
1768e4b17023SJohn Marino && (STMT_VINFO_RELEVANT (stmt_vinfo)
1769e4b17023SJohn Marino || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1770e4b17023SJohn Marino && !(gimple_code (use_stmt) == GIMPLE_PHI
1771e4b17023SJohn Marino && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1772e4b17023SJohn Marino == vect_reduction_def))
1773e4b17023SJohn Marino vect_mark_slp_stmts (node, hybrid, i);
1774e4b17023SJohn Marino
1775e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1776e4b17023SJohn Marino vect_detect_hybrid_slp_stmts ((slp_tree) child);
1777e4b17023SJohn Marino }
1778e4b17023SJohn Marino
1779e4b17023SJohn Marino
1780e4b17023SJohn Marino /* Find stmts that must be both vectorized and SLPed. */
1781e4b17023SJohn Marino
1782e4b17023SJohn Marino void
vect_detect_hybrid_slp(loop_vec_info loop_vinfo)1783e4b17023SJohn Marino vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1784e4b17023SJohn Marino {
1785e4b17023SJohn Marino unsigned int i;
1786e4b17023SJohn Marino VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1787e4b17023SJohn Marino slp_instance instance;
1788e4b17023SJohn Marino
1789e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
1790e4b17023SJohn Marino fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1791e4b17023SJohn Marino
1792e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1793e4b17023SJohn Marino vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1794e4b17023SJohn Marino }
1795e4b17023SJohn Marino
1796e4b17023SJohn Marino
1797e4b17023SJohn Marino /* Create and initialize a new bb_vec_info struct for BB, as well as
1798e4b17023SJohn Marino stmt_vec_info structs for all the stmts in it. */
1799e4b17023SJohn Marino
1800e4b17023SJohn Marino static bb_vec_info
new_bb_vec_info(basic_block bb)1801e4b17023SJohn Marino new_bb_vec_info (basic_block bb)
1802e4b17023SJohn Marino {
1803e4b17023SJohn Marino bb_vec_info res = NULL;
1804e4b17023SJohn Marino gimple_stmt_iterator gsi;
1805e4b17023SJohn Marino
1806e4b17023SJohn Marino res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1807e4b17023SJohn Marino BB_VINFO_BB (res) = bb;
1808e4b17023SJohn Marino
1809e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1810e4b17023SJohn Marino {
1811e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
1812e4b17023SJohn Marino gimple_set_uid (stmt, 0);
1813e4b17023SJohn Marino set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1814e4b17023SJohn Marino }
1815e4b17023SJohn Marino
1816e4b17023SJohn Marino BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1817e4b17023SJohn Marino BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1818e4b17023SJohn Marino
1819e4b17023SJohn Marino bb->aux = res;
1820e4b17023SJohn Marino return res;
1821e4b17023SJohn Marino }
1822e4b17023SJohn Marino
1823e4b17023SJohn Marino
1824e4b17023SJohn Marino /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1825e4b17023SJohn Marino stmts in the basic block. */
1826e4b17023SJohn Marino
1827e4b17023SJohn Marino static void
destroy_bb_vec_info(bb_vec_info bb_vinfo)1828e4b17023SJohn Marino destroy_bb_vec_info (bb_vec_info bb_vinfo)
1829e4b17023SJohn Marino {
1830e4b17023SJohn Marino VEC (slp_instance, heap) *slp_instances;
1831e4b17023SJohn Marino slp_instance instance;
1832e4b17023SJohn Marino basic_block bb;
1833e4b17023SJohn Marino gimple_stmt_iterator si;
1834e4b17023SJohn Marino unsigned i;
1835e4b17023SJohn Marino
1836e4b17023SJohn Marino if (!bb_vinfo)
1837e4b17023SJohn Marino return;
1838e4b17023SJohn Marino
1839e4b17023SJohn Marino bb = BB_VINFO_BB (bb_vinfo);
1840e4b17023SJohn Marino
1841e4b17023SJohn Marino for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1842e4b17023SJohn Marino {
1843e4b17023SJohn Marino gimple stmt = gsi_stmt (si);
1844e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1845e4b17023SJohn Marino
1846e4b17023SJohn Marino if (stmt_info)
1847e4b17023SJohn Marino /* Free stmt_vec_info. */
1848e4b17023SJohn Marino free_stmt_vec_info (stmt);
1849e4b17023SJohn Marino }
1850e4b17023SJohn Marino
1851e4b17023SJohn Marino free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1852e4b17023SJohn Marino free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1853e4b17023SJohn Marino VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1854e4b17023SJohn Marino slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1855e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1856e4b17023SJohn Marino vect_free_slp_instance (instance);
1857e4b17023SJohn Marino VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1858e4b17023SJohn Marino free (bb_vinfo);
1859e4b17023SJohn Marino bb->aux = NULL;
1860e4b17023SJohn Marino }
1861e4b17023SJohn Marino
1862e4b17023SJohn Marino
1863e4b17023SJohn Marino /* Analyze statements contained in SLP tree node after recursively analyzing
1864e4b17023SJohn Marino the subtree. Return TRUE if the operations are supported. */
1865e4b17023SJohn Marino
1866e4b17023SJohn Marino static bool
vect_slp_analyze_node_operations(bb_vec_info bb_vinfo,slp_tree node)1867e4b17023SJohn Marino vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1868e4b17023SJohn Marino {
1869e4b17023SJohn Marino bool dummy;
1870e4b17023SJohn Marino int i;
1871e4b17023SJohn Marino gimple stmt;
1872e4b17023SJohn Marino slp_void_p child;
1873e4b17023SJohn Marino
1874e4b17023SJohn Marino if (!node)
1875e4b17023SJohn Marino return true;
1876e4b17023SJohn Marino
1877e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1878e4b17023SJohn Marino if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1879e4b17023SJohn Marino return false;
1880e4b17023SJohn Marino
1881e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1882e4b17023SJohn Marino {
1883e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1884e4b17023SJohn Marino gcc_assert (stmt_info);
1885e4b17023SJohn Marino gcc_assert (PURE_SLP_STMT (stmt_info));
1886e4b17023SJohn Marino
1887e4b17023SJohn Marino if (!vect_analyze_stmt (stmt, &dummy, node))
1888e4b17023SJohn Marino return false;
1889e4b17023SJohn Marino }
1890e4b17023SJohn Marino
1891e4b17023SJohn Marino return true;
1892e4b17023SJohn Marino }
1893e4b17023SJohn Marino
1894e4b17023SJohn Marino
1895e4b17023SJohn Marino /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1896e4b17023SJohn Marino operations are supported. */
1897e4b17023SJohn Marino
1898e4b17023SJohn Marino static bool
vect_slp_analyze_operations(bb_vec_info bb_vinfo)1899e4b17023SJohn Marino vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1900e4b17023SJohn Marino {
1901e4b17023SJohn Marino VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1902e4b17023SJohn Marino slp_instance instance;
1903e4b17023SJohn Marino int i;
1904e4b17023SJohn Marino
1905e4b17023SJohn Marino for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1906e4b17023SJohn Marino {
1907e4b17023SJohn Marino if (!vect_slp_analyze_node_operations (bb_vinfo,
1908e4b17023SJohn Marino SLP_INSTANCE_TREE (instance)))
1909e4b17023SJohn Marino {
1910e4b17023SJohn Marino vect_free_slp_instance (instance);
1911e4b17023SJohn Marino VEC_ordered_remove (slp_instance, slp_instances, i);
1912e4b17023SJohn Marino }
1913e4b17023SJohn Marino else
1914e4b17023SJohn Marino i++;
1915e4b17023SJohn Marino }
1916e4b17023SJohn Marino
1917e4b17023SJohn Marino if (!VEC_length (slp_instance, slp_instances))
1918e4b17023SJohn Marino return false;
1919e4b17023SJohn Marino
1920e4b17023SJohn Marino return true;
1921e4b17023SJohn Marino }
1922e4b17023SJohn Marino
1923e4b17023SJohn Marino /* Check if vectorization of the basic block is profitable. */
1924e4b17023SJohn Marino
1925e4b17023SJohn Marino static bool
vect_bb_vectorization_profitable_p(bb_vec_info bb_vinfo)1926e4b17023SJohn Marino vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1927e4b17023SJohn Marino {
1928e4b17023SJohn Marino VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1929e4b17023SJohn Marino slp_instance instance;
1930e4b17023SJohn Marino int i;
1931e4b17023SJohn Marino unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1932e4b17023SJohn Marino unsigned int stmt_cost;
1933e4b17023SJohn Marino gimple stmt;
1934e4b17023SJohn Marino gimple_stmt_iterator si;
1935e4b17023SJohn Marino basic_block bb = BB_VINFO_BB (bb_vinfo);
1936e4b17023SJohn Marino stmt_vec_info stmt_info = NULL;
1937e4b17023SJohn Marino tree dummy_type = NULL;
1938e4b17023SJohn Marino int dummy = 0;
1939e4b17023SJohn Marino
1940e4b17023SJohn Marino /* Calculate vector costs. */
1941e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1942e4b17023SJohn Marino {
1943e4b17023SJohn Marino vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1944e4b17023SJohn Marino vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1945e4b17023SJohn Marino }
1946e4b17023SJohn Marino
1947e4b17023SJohn Marino /* Calculate scalar cost. */
1948e4b17023SJohn Marino for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1949e4b17023SJohn Marino {
1950e4b17023SJohn Marino stmt = gsi_stmt (si);
1951e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
1952e4b17023SJohn Marino
1953e4b17023SJohn Marino if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1954e4b17023SJohn Marino || !PURE_SLP_STMT (stmt_info))
1955e4b17023SJohn Marino continue;
1956e4b17023SJohn Marino
1957e4b17023SJohn Marino if (STMT_VINFO_DATA_REF (stmt_info))
1958e4b17023SJohn Marino {
1959e4b17023SJohn Marino if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1960e4b17023SJohn Marino stmt_cost = targetm.vectorize.builtin_vectorization_cost
1961e4b17023SJohn Marino (scalar_load, dummy_type, dummy);
1962e4b17023SJohn Marino else
1963e4b17023SJohn Marino stmt_cost = targetm.vectorize.builtin_vectorization_cost
1964e4b17023SJohn Marino (scalar_store, dummy_type, dummy);
1965e4b17023SJohn Marino }
1966e4b17023SJohn Marino else
1967e4b17023SJohn Marino stmt_cost = targetm.vectorize.builtin_vectorization_cost
1968e4b17023SJohn Marino (scalar_stmt, dummy_type, dummy);
1969e4b17023SJohn Marino
1970e4b17023SJohn Marino scalar_cost += stmt_cost;
1971e4b17023SJohn Marino }
1972e4b17023SJohn Marino
1973e4b17023SJohn Marino if (vect_print_dump_info (REPORT_COST))
1974e4b17023SJohn Marino {
1975e4b17023SJohn Marino fprintf (vect_dump, "Cost model analysis: \n");
1976e4b17023SJohn Marino fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1977e4b17023SJohn Marino vec_inside_cost);
1978e4b17023SJohn Marino fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1979e4b17023SJohn Marino vec_outside_cost);
1980e4b17023SJohn Marino fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1981e4b17023SJohn Marino }
1982e4b17023SJohn Marino
1983e4b17023SJohn Marino /* Vectorization is profitable if its cost is less than the cost of scalar
1984e4b17023SJohn Marino version. */
1985e4b17023SJohn Marino if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1986e4b17023SJohn Marino return false;
1987e4b17023SJohn Marino
1988e4b17023SJohn Marino return true;
1989e4b17023SJohn Marino }
1990e4b17023SJohn Marino
1991e4b17023SJohn Marino /* Check if the basic block can be vectorized. */
1992e4b17023SJohn Marino
1993e4b17023SJohn Marino static bb_vec_info
vect_slp_analyze_bb_1(basic_block bb)1994e4b17023SJohn Marino vect_slp_analyze_bb_1 (basic_block bb)
1995e4b17023SJohn Marino {
1996e4b17023SJohn Marino bb_vec_info bb_vinfo;
1997e4b17023SJohn Marino VEC (ddr_p, heap) *ddrs;
1998e4b17023SJohn Marino VEC (slp_instance, heap) *slp_instances;
1999e4b17023SJohn Marino slp_instance instance;
2000e4b17023SJohn Marino int i;
2001e4b17023SJohn Marino int min_vf = 2;
2002e4b17023SJohn Marino int max_vf = MAX_VECTORIZATION_FACTOR;
2003e4b17023SJohn Marino
2004e4b17023SJohn Marino bb_vinfo = new_bb_vec_info (bb);
2005e4b17023SJohn Marino if (!bb_vinfo)
2006e4b17023SJohn Marino return NULL;
2007e4b17023SJohn Marino
2008e4b17023SJohn Marino if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2009e4b17023SJohn Marino {
2010e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2011e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
2012e4b17023SJohn Marino "block.\n");
2013e4b17023SJohn Marino
2014e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2015e4b17023SJohn Marino return NULL;
2016e4b17023SJohn Marino }
2017e4b17023SJohn Marino
2018e4b17023SJohn Marino ddrs = BB_VINFO_DDRS (bb_vinfo);
2019e4b17023SJohn Marino if (!VEC_length (ddr_p, ddrs))
2020e4b17023SJohn Marino {
2021e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2022e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
2023e4b17023SJohn Marino "block.\n");
2024e4b17023SJohn Marino
2025e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2026e4b17023SJohn Marino return NULL;
2027e4b17023SJohn Marino }
2028e4b17023SJohn Marino
2029e4b17023SJohn Marino if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2030e4b17023SJohn Marino || min_vf > max_vf)
2031e4b17023SJohn Marino {
2032e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2033e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: unhandled data dependence "
2034e4b17023SJohn Marino "in basic block.\n");
2035e4b17023SJohn Marino
2036e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2037e4b17023SJohn Marino return NULL;
2038e4b17023SJohn Marino }
2039e4b17023SJohn Marino
2040e4b17023SJohn Marino if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2041e4b17023SJohn Marino {
2042e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2043e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2044e4b17023SJohn Marino "block.\n");
2045e4b17023SJohn Marino
2046e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2047e4b17023SJohn Marino return NULL;
2048e4b17023SJohn Marino }
2049e4b17023SJohn Marino
2050e4b17023SJohn Marino if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2051e4b17023SJohn Marino {
2052e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2053e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2054e4b17023SJohn Marino "block.\n");
2055e4b17023SJohn Marino
2056e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2057e4b17023SJohn Marino return NULL;
2058e4b17023SJohn Marino }
2059e4b17023SJohn Marino
2060e4b17023SJohn Marino if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2061e4b17023SJohn Marino {
2062e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2063e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2064e4b17023SJohn Marino "block.\n");
2065e4b17023SJohn Marino
2066e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2067e4b17023SJohn Marino return NULL;
2068e4b17023SJohn Marino }
2069e4b17023SJohn Marino
2070e4b17023SJohn Marino /* Check the SLP opportunities in the basic block, analyze and build SLP
2071e4b17023SJohn Marino trees. */
2072e4b17023SJohn Marino if (!vect_analyze_slp (NULL, bb_vinfo))
2073e4b17023SJohn Marino {
2074e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2075e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2076e4b17023SJohn Marino "in basic block.\n");
2077e4b17023SJohn Marino
2078e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2079e4b17023SJohn Marino return NULL;
2080e4b17023SJohn Marino }
2081e4b17023SJohn Marino
2082e4b17023SJohn Marino slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2083e4b17023SJohn Marino
2084e4b17023SJohn Marino /* Mark all the statements that we want to vectorize as pure SLP and
2085e4b17023SJohn Marino relevant. */
2086e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2087e4b17023SJohn Marino {
2088e4b17023SJohn Marino vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2089e4b17023SJohn Marino vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2090e4b17023SJohn Marino }
2091e4b17023SJohn Marino
2092e4b17023SJohn Marino if (!vect_slp_analyze_operations (bb_vinfo))
2093e4b17023SJohn Marino {
2094e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2095e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2096e4b17023SJohn Marino
2097e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2098e4b17023SJohn Marino return NULL;
2099e4b17023SJohn Marino }
2100e4b17023SJohn Marino
2101e4b17023SJohn Marino /* Cost model: check if the vectorization is worthwhile. */
2102e4b17023SJohn Marino if (flag_vect_cost_model
2103e4b17023SJohn Marino && !vect_bb_vectorization_profitable_p (bb_vinfo))
2104e4b17023SJohn Marino {
2105e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2106e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: vectorization is not "
2107e4b17023SJohn Marino "profitable.\n");
2108e4b17023SJohn Marino
2109e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2110e4b17023SJohn Marino return NULL;
2111e4b17023SJohn Marino }
2112e4b17023SJohn Marino
2113e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2114e4b17023SJohn Marino fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2115e4b17023SJohn Marino
2116e4b17023SJohn Marino return bb_vinfo;
2117e4b17023SJohn Marino }
2118e4b17023SJohn Marino
2119e4b17023SJohn Marino
2120e4b17023SJohn Marino bb_vec_info
vect_slp_analyze_bb(basic_block bb)2121e4b17023SJohn Marino vect_slp_analyze_bb (basic_block bb)
2122e4b17023SJohn Marino {
2123e4b17023SJohn Marino bb_vec_info bb_vinfo;
2124e4b17023SJohn Marino int insns = 0;
2125e4b17023SJohn Marino gimple_stmt_iterator gsi;
2126e4b17023SJohn Marino unsigned int vector_sizes;
2127e4b17023SJohn Marino
2128e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2129e4b17023SJohn Marino fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2130e4b17023SJohn Marino
2131e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2132e4b17023SJohn Marino {
2133e4b17023SJohn Marino gimple stmt = gsi_stmt (gsi);
2134e4b17023SJohn Marino if (!is_gimple_debug (stmt)
2135e4b17023SJohn Marino && !gimple_nop_p (stmt)
2136e4b17023SJohn Marino && gimple_code (stmt) != GIMPLE_LABEL)
2137e4b17023SJohn Marino insns++;
2138e4b17023SJohn Marino }
2139e4b17023SJohn Marino
2140e4b17023SJohn Marino if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2141e4b17023SJohn Marino {
2142e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2143e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: too many instructions in basic "
2144e4b17023SJohn Marino "block.\n");
2145e4b17023SJohn Marino
2146e4b17023SJohn Marino return NULL;
2147e4b17023SJohn Marino }
2148e4b17023SJohn Marino
2149e4b17023SJohn Marino /* Autodetect first vector size we try. */
2150e4b17023SJohn Marino current_vector_size = 0;
2151e4b17023SJohn Marino vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2152e4b17023SJohn Marino
2153e4b17023SJohn Marino while (1)
2154e4b17023SJohn Marino {
2155e4b17023SJohn Marino bb_vinfo = vect_slp_analyze_bb_1 (bb);
2156e4b17023SJohn Marino if (bb_vinfo)
2157e4b17023SJohn Marino return bb_vinfo;
2158e4b17023SJohn Marino
2159e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
2160e4b17023SJohn Marino
2161e4b17023SJohn Marino vector_sizes &= ~current_vector_size;
2162e4b17023SJohn Marino if (vector_sizes == 0
2163e4b17023SJohn Marino || current_vector_size == 0)
2164e4b17023SJohn Marino return NULL;
2165e4b17023SJohn Marino
2166e4b17023SJohn Marino /* Try the next biggest vector size. */
2167e4b17023SJohn Marino current_vector_size = 1 << floor_log2 (vector_sizes);
2168e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2169e4b17023SJohn Marino fprintf (vect_dump, "***** Re-trying analysis with "
2170e4b17023SJohn Marino "vector size %d\n", current_vector_size);
2171e4b17023SJohn Marino }
2172e4b17023SJohn Marino }
2173e4b17023SJohn Marino
2174e4b17023SJohn Marino
2175e4b17023SJohn Marino /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2176e4b17023SJohn Marino the number of created vector stmts depends on the unrolling factor).
2177e4b17023SJohn Marino However, the actual number of vector stmts for every SLP node depends on
2178e4b17023SJohn Marino VF which is set later in vect_analyze_operations (). Hence, SLP costs
2179e4b17023SJohn Marino should be updated. In this function we assume that the inside costs
2180e4b17023SJohn Marino calculated in vect_model_xxx_cost are linear in ncopies. */
2181e4b17023SJohn Marino
2182e4b17023SJohn Marino void
vect_update_slp_costs_according_to_vf(loop_vec_info loop_vinfo)2183e4b17023SJohn Marino vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2184e4b17023SJohn Marino {
2185e4b17023SJohn Marino unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2186e4b17023SJohn Marino VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2187e4b17023SJohn Marino slp_instance instance;
2188e4b17023SJohn Marino
2189e4b17023SJohn Marino if (vect_print_dump_info (REPORT_SLP))
2190e4b17023SJohn Marino fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2191e4b17023SJohn Marino
2192e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2193e4b17023SJohn Marino /* We assume that costs are linear in ncopies. */
2194e4b17023SJohn Marino SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2195e4b17023SJohn Marino / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2196e4b17023SJohn Marino }
2197e4b17023SJohn Marino
2198e4b17023SJohn Marino
2199e4b17023SJohn Marino /* For constant and loop invariant defs of SLP_NODE this function returns
2200e4b17023SJohn Marino (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2201e4b17023SJohn Marino OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2202e4b17023SJohn Marino scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2203e4b17023SJohn Marino REDUC_INDEX is the index of the reduction operand in the statements, unless
2204e4b17023SJohn Marino it is -1. */
2205e4b17023SJohn Marino
2206e4b17023SJohn Marino static void
vect_get_constant_vectors(tree op,slp_tree slp_node,VEC (tree,heap)** vec_oprnds,unsigned int op_num,unsigned int number_of_vectors,int reduc_index)2207e4b17023SJohn Marino vect_get_constant_vectors (tree op, slp_tree slp_node,
2208e4b17023SJohn Marino VEC (tree, heap) **vec_oprnds,
2209e4b17023SJohn Marino unsigned int op_num, unsigned int number_of_vectors,
2210e4b17023SJohn Marino int reduc_index)
2211e4b17023SJohn Marino {
2212e4b17023SJohn Marino VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2213e4b17023SJohn Marino gimple stmt = VEC_index (gimple, stmts, 0);
2214e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2215e4b17023SJohn Marino int nunits;
2216e4b17023SJohn Marino tree vec_cst;
2217e4b17023SJohn Marino tree t = NULL_TREE;
2218e4b17023SJohn Marino int j, number_of_places_left_in_vector;
2219e4b17023SJohn Marino tree vector_type;
2220e4b17023SJohn Marino tree vop;
2221e4b17023SJohn Marino int group_size = VEC_length (gimple, stmts);
2222e4b17023SJohn Marino unsigned int vec_num, i;
2223e4b17023SJohn Marino int number_of_copies = 1;
2224e4b17023SJohn Marino VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2225e4b17023SJohn Marino bool constant_p, is_store;
2226e4b17023SJohn Marino tree neutral_op = NULL;
2227e4b17023SJohn Marino enum tree_code code = gimple_expr_code (stmt);
2228e4b17023SJohn Marino gimple def_stmt;
2229e4b17023SJohn Marino struct loop *loop;
2230e4b17023SJohn Marino
2231e4b17023SJohn Marino if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2232e4b17023SJohn Marino && reduc_index != -1)
2233e4b17023SJohn Marino {
2234e4b17023SJohn Marino op_num = reduc_index - 1;
2235e4b17023SJohn Marino op = gimple_op (stmt, reduc_index);
2236e4b17023SJohn Marino /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2237e4b17023SJohn Marino we need either neutral operands or the original operands. See
2238e4b17023SJohn Marino get_initial_def_for_reduction() for details. */
2239e4b17023SJohn Marino switch (code)
2240e4b17023SJohn Marino {
2241e4b17023SJohn Marino case WIDEN_SUM_EXPR:
2242e4b17023SJohn Marino case DOT_PROD_EXPR:
2243e4b17023SJohn Marino case PLUS_EXPR:
2244e4b17023SJohn Marino case MINUS_EXPR:
2245e4b17023SJohn Marino case BIT_IOR_EXPR:
2246e4b17023SJohn Marino case BIT_XOR_EXPR:
2247e4b17023SJohn Marino if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2248e4b17023SJohn Marino neutral_op = build_real (TREE_TYPE (op), dconst0);
2249e4b17023SJohn Marino else
2250e4b17023SJohn Marino neutral_op = build_int_cst (TREE_TYPE (op), 0);
2251e4b17023SJohn Marino
2252e4b17023SJohn Marino break;
2253e4b17023SJohn Marino
2254e4b17023SJohn Marino case MULT_EXPR:
2255e4b17023SJohn Marino if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2256e4b17023SJohn Marino neutral_op = build_real (TREE_TYPE (op), dconst1);
2257e4b17023SJohn Marino else
2258e4b17023SJohn Marino neutral_op = build_int_cst (TREE_TYPE (op), 1);
2259e4b17023SJohn Marino
2260e4b17023SJohn Marino break;
2261e4b17023SJohn Marino
2262e4b17023SJohn Marino case BIT_AND_EXPR:
2263e4b17023SJohn Marino neutral_op = build_int_cst (TREE_TYPE (op), -1);
2264e4b17023SJohn Marino break;
2265e4b17023SJohn Marino
2266e4b17023SJohn Marino case MAX_EXPR:
2267e4b17023SJohn Marino case MIN_EXPR:
2268e4b17023SJohn Marino def_stmt = SSA_NAME_DEF_STMT (op);
2269e4b17023SJohn Marino loop = (gimple_bb (stmt))->loop_father;
2270e4b17023SJohn Marino neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2271e4b17023SJohn Marino loop_preheader_edge (loop));
2272e4b17023SJohn Marino break;
2273e4b17023SJohn Marino
2274e4b17023SJohn Marino default:
2275e4b17023SJohn Marino neutral_op = NULL;
2276e4b17023SJohn Marino }
2277e4b17023SJohn Marino }
2278e4b17023SJohn Marino
2279e4b17023SJohn Marino if (STMT_VINFO_DATA_REF (stmt_vinfo))
2280e4b17023SJohn Marino {
2281e4b17023SJohn Marino is_store = true;
2282e4b17023SJohn Marino op = gimple_assign_rhs1 (stmt);
2283e4b17023SJohn Marino }
2284e4b17023SJohn Marino else
2285e4b17023SJohn Marino is_store = false;
2286e4b17023SJohn Marino
2287e4b17023SJohn Marino gcc_assert (op);
2288e4b17023SJohn Marino
2289e4b17023SJohn Marino if (CONSTANT_CLASS_P (op))
2290e4b17023SJohn Marino constant_p = true;
2291e4b17023SJohn Marino else
2292e4b17023SJohn Marino constant_p = false;
2293e4b17023SJohn Marino
2294e4b17023SJohn Marino vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2295e4b17023SJohn Marino gcc_assert (vector_type);
2296e4b17023SJohn Marino nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2297e4b17023SJohn Marino
2298e4b17023SJohn Marino /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2299e4b17023SJohn Marino created vectors. It is greater than 1 if unrolling is performed.
2300e4b17023SJohn Marino
2301e4b17023SJohn Marino For example, we have two scalar operands, s1 and s2 (e.g., group of
2302e4b17023SJohn Marino strided accesses of size two), while NUNITS is four (i.e., four scalars
2303e4b17023SJohn Marino of this type can be packed in a vector). The output vector will contain
2304e4b17023SJohn Marino two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2305e4b17023SJohn Marino will be 2).
2306e4b17023SJohn Marino
2307e4b17023SJohn Marino If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2308e4b17023SJohn Marino containing the operands.
2309e4b17023SJohn Marino
2310e4b17023SJohn Marino For example, NUNITS is four as before, and the group size is 8
2311e4b17023SJohn Marino (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2312e4b17023SJohn Marino {s5, s6, s7, s8}. */
2313e4b17023SJohn Marino
2314e4b17023SJohn Marino number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2315e4b17023SJohn Marino
2316e4b17023SJohn Marino number_of_places_left_in_vector = nunits;
2317e4b17023SJohn Marino for (j = 0; j < number_of_copies; j++)
2318e4b17023SJohn Marino {
2319e4b17023SJohn Marino for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2320e4b17023SJohn Marino {
2321e4b17023SJohn Marino if (is_store)
2322e4b17023SJohn Marino op = gimple_assign_rhs1 (stmt);
2323e4b17023SJohn Marino else
2324e4b17023SJohn Marino {
2325e4b17023SJohn Marino switch (code)
2326e4b17023SJohn Marino {
2327e4b17023SJohn Marino case COND_EXPR:
2328e4b17023SJohn Marino if (op_num == 0 || op_num == 1)
2329e4b17023SJohn Marino {
2330e4b17023SJohn Marino tree cond = gimple_assign_rhs1 (stmt);
2331e4b17023SJohn Marino op = TREE_OPERAND (cond, op_num);
2332e4b17023SJohn Marino }
2333e4b17023SJohn Marino else
2334e4b17023SJohn Marino {
2335e4b17023SJohn Marino if (op_num == 2)
2336e4b17023SJohn Marino op = gimple_assign_rhs2 (stmt);
2337e4b17023SJohn Marino else
2338e4b17023SJohn Marino op = gimple_assign_rhs3 (stmt);
2339e4b17023SJohn Marino }
2340e4b17023SJohn Marino break;
2341e4b17023SJohn Marino
2342e4b17023SJohn Marino case CALL_EXPR:
2343e4b17023SJohn Marino op = gimple_call_arg (stmt, op_num);
2344e4b17023SJohn Marino break;
2345e4b17023SJohn Marino
2346e4b17023SJohn Marino default:
2347e4b17023SJohn Marino op = gimple_op (stmt, op_num + 1);
2348e4b17023SJohn Marino }
2349e4b17023SJohn Marino }
2350e4b17023SJohn Marino
2351e4b17023SJohn Marino if (reduc_index != -1)
2352e4b17023SJohn Marino {
2353e4b17023SJohn Marino loop = (gimple_bb (stmt))->loop_father;
2354e4b17023SJohn Marino def_stmt = SSA_NAME_DEF_STMT (op);
2355e4b17023SJohn Marino
2356e4b17023SJohn Marino gcc_assert (loop);
2357e4b17023SJohn Marino
2358e4b17023SJohn Marino /* Get the def before the loop. In reduction chain we have only
2359e4b17023SJohn Marino one initial value. */
2360e4b17023SJohn Marino if ((j != (number_of_copies - 1)
2361e4b17023SJohn Marino || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2362e4b17023SJohn Marino && i != 0))
2363e4b17023SJohn Marino && neutral_op)
2364e4b17023SJohn Marino op = neutral_op;
2365e4b17023SJohn Marino else
2366e4b17023SJohn Marino op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2367e4b17023SJohn Marino loop_preheader_edge (loop));
2368e4b17023SJohn Marino }
2369e4b17023SJohn Marino
2370e4b17023SJohn Marino /* Create 'vect_ = {op0,op1,...,opn}'. */
2371e4b17023SJohn Marino t = tree_cons (NULL_TREE, op, t);
2372e4b17023SJohn Marino
2373e4b17023SJohn Marino number_of_places_left_in_vector--;
2374e4b17023SJohn Marino
2375e4b17023SJohn Marino if (number_of_places_left_in_vector == 0)
2376e4b17023SJohn Marino {
2377e4b17023SJohn Marino number_of_places_left_in_vector = nunits;
2378e4b17023SJohn Marino
2379e4b17023SJohn Marino if (constant_p)
2380e4b17023SJohn Marino vec_cst = build_vector (vector_type, t);
2381e4b17023SJohn Marino else
2382e4b17023SJohn Marino vec_cst = build_constructor_from_list (vector_type, t);
2383e4b17023SJohn Marino VEC_quick_push (tree, voprnds,
2384e4b17023SJohn Marino vect_init_vector (stmt, vec_cst, vector_type, NULL));
2385e4b17023SJohn Marino t = NULL_TREE;
2386e4b17023SJohn Marino }
2387e4b17023SJohn Marino }
2388e4b17023SJohn Marino }
2389e4b17023SJohn Marino
2390e4b17023SJohn Marino /* Since the vectors are created in the reverse order, we should invert
2391e4b17023SJohn Marino them. */
2392e4b17023SJohn Marino vec_num = VEC_length (tree, voprnds);
2393e4b17023SJohn Marino for (j = vec_num - 1; j >= 0; j--)
2394e4b17023SJohn Marino {
2395e4b17023SJohn Marino vop = VEC_index (tree, voprnds, j);
2396e4b17023SJohn Marino VEC_quick_push (tree, *vec_oprnds, vop);
2397e4b17023SJohn Marino }
2398e4b17023SJohn Marino
2399e4b17023SJohn Marino VEC_free (tree, heap, voprnds);
2400e4b17023SJohn Marino
2401e4b17023SJohn Marino /* In case that VF is greater than the unrolling factor needed for the SLP
2402e4b17023SJohn Marino group of stmts, NUMBER_OF_VECTORS to be created is greater than
2403e4b17023SJohn Marino NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2404e4b17023SJohn Marino to replicate the vectors. */
2405e4b17023SJohn Marino while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2406e4b17023SJohn Marino {
2407e4b17023SJohn Marino tree neutral_vec = NULL;
2408e4b17023SJohn Marino
2409e4b17023SJohn Marino if (neutral_op)
2410e4b17023SJohn Marino {
2411e4b17023SJohn Marino if (!neutral_vec)
2412e4b17023SJohn Marino neutral_vec = build_vector_from_val (vector_type, neutral_op);
2413e4b17023SJohn Marino
2414e4b17023SJohn Marino VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2415e4b17023SJohn Marino }
2416e4b17023SJohn Marino else
2417e4b17023SJohn Marino {
2418e4b17023SJohn Marino for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2419e4b17023SJohn Marino VEC_quick_push (tree, *vec_oprnds, vop);
2420e4b17023SJohn Marino }
2421e4b17023SJohn Marino }
2422e4b17023SJohn Marino }
2423e4b17023SJohn Marino
2424e4b17023SJohn Marino
2425e4b17023SJohn Marino /* Get vectorized definitions from SLP_NODE that contains corresponding
2426e4b17023SJohn Marino vectorized def-stmts. */
2427e4b17023SJohn Marino
2428e4b17023SJohn Marino static void
vect_get_slp_vect_defs(slp_tree slp_node,VEC (tree,heap)** vec_oprnds)2429e4b17023SJohn Marino vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2430e4b17023SJohn Marino {
2431e4b17023SJohn Marino tree vec_oprnd;
2432e4b17023SJohn Marino gimple vec_def_stmt;
2433e4b17023SJohn Marino unsigned int i;
2434e4b17023SJohn Marino
2435e4b17023SJohn Marino gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2436e4b17023SJohn Marino
2437e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2438e4b17023SJohn Marino {
2439e4b17023SJohn Marino gcc_assert (vec_def_stmt);
2440e4b17023SJohn Marino vec_oprnd = gimple_get_lhs (vec_def_stmt);
2441e4b17023SJohn Marino VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2442e4b17023SJohn Marino }
2443e4b17023SJohn Marino }
2444e4b17023SJohn Marino
2445e4b17023SJohn Marino
2446e4b17023SJohn Marino /* Get vectorized definitions for SLP_NODE.
2447e4b17023SJohn Marino If the scalar definitions are loop invariants or constants, collect them and
2448e4b17023SJohn Marino call vect_get_constant_vectors() to create vector stmts.
2449e4b17023SJohn Marino Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2450e4b17023SJohn Marino must be stored in the corresponding child of SLP_NODE, and we call
2451e4b17023SJohn Marino vect_get_slp_vect_defs () to retrieve them. */
2452e4b17023SJohn Marino
2453e4b17023SJohn Marino void
vect_get_slp_defs(VEC (tree,heap)* ops,slp_tree slp_node,VEC (slp_void_p,heap)** vec_oprnds,int reduc_index)2454e4b17023SJohn Marino vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2455e4b17023SJohn Marino VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2456e4b17023SJohn Marino {
2457e4b17023SJohn Marino gimple first_stmt, first_def;
2458e4b17023SJohn Marino int number_of_vects = 0, i;
2459e4b17023SJohn Marino unsigned int child_index = 0;
2460e4b17023SJohn Marino HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2461e4b17023SJohn Marino slp_tree child = NULL;
2462e4b17023SJohn Marino VEC (tree, heap) *vec_defs;
2463e4b17023SJohn Marino tree oprnd, def_lhs;
2464e4b17023SJohn Marino bool vectorized_defs;
2465e4b17023SJohn Marino
2466e4b17023SJohn Marino first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2467e4b17023SJohn Marino FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2468e4b17023SJohn Marino {
2469e4b17023SJohn Marino /* For each operand we check if it has vectorized definitions in a child
2470e4b17023SJohn Marino node or we need to create them (for invariants and constants). We
2471e4b17023SJohn Marino check if the LHS of the first stmt of the next child matches OPRND.
2472e4b17023SJohn Marino If it does, we found the correct child. Otherwise, we call
2473e4b17023SJohn Marino vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2474e4b17023SJohn Marino to check this child node for the next operand. */
2475e4b17023SJohn Marino vectorized_defs = false;
2476e4b17023SJohn Marino if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2477e4b17023SJohn Marino {
2478e4b17023SJohn Marino child = (slp_tree) VEC_index (slp_void_p,
2479e4b17023SJohn Marino SLP_TREE_CHILDREN (slp_node),
2480e4b17023SJohn Marino child_index);
2481e4b17023SJohn Marino first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2482e4b17023SJohn Marino
2483e4b17023SJohn Marino /* In the end of a pattern sequence we have a use of the original stmt,
2484e4b17023SJohn Marino so we need to compare OPRND with the original def. */
2485e4b17023SJohn Marino if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2486e4b17023SJohn Marino && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2487e4b17023SJohn Marino && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2488e4b17023SJohn Marino first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2489e4b17023SJohn Marino
2490e4b17023SJohn Marino if (is_gimple_call (first_def))
2491e4b17023SJohn Marino def_lhs = gimple_call_lhs (first_def);
2492e4b17023SJohn Marino else
2493e4b17023SJohn Marino def_lhs = gimple_assign_lhs (first_def);
2494e4b17023SJohn Marino
2495e4b17023SJohn Marino if (operand_equal_p (oprnd, def_lhs, 0))
2496e4b17023SJohn Marino {
2497e4b17023SJohn Marino /* The number of vector defs is determined by the number of
2498e4b17023SJohn Marino vector statements in the node from which we get those
2499e4b17023SJohn Marino statements. */
2500e4b17023SJohn Marino number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2501e4b17023SJohn Marino vectorized_defs = true;
2502e4b17023SJohn Marino child_index++;
2503e4b17023SJohn Marino }
2504e4b17023SJohn Marino }
2505e4b17023SJohn Marino
2506e4b17023SJohn Marino if (!vectorized_defs)
2507e4b17023SJohn Marino {
2508e4b17023SJohn Marino if (i == 0)
2509e4b17023SJohn Marino {
2510e4b17023SJohn Marino number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2511e4b17023SJohn Marino /* Number of vector stmts was calculated according to LHS in
2512e4b17023SJohn Marino vect_schedule_slp_instance (), fix it by replacing LHS with
2513e4b17023SJohn Marino RHS, if necessary. See vect_get_smallest_scalar_type () for
2514e4b17023SJohn Marino details. */
2515e4b17023SJohn Marino vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2516e4b17023SJohn Marino &rhs_size_unit);
2517e4b17023SJohn Marino if (rhs_size_unit != lhs_size_unit)
2518e4b17023SJohn Marino {
2519e4b17023SJohn Marino number_of_vects *= rhs_size_unit;
2520e4b17023SJohn Marino number_of_vects /= lhs_size_unit;
2521e4b17023SJohn Marino }
2522e4b17023SJohn Marino }
2523e4b17023SJohn Marino }
2524e4b17023SJohn Marino
2525e4b17023SJohn Marino /* Allocate memory for vectorized defs. */
2526e4b17023SJohn Marino vec_defs = VEC_alloc (tree, heap, number_of_vects);
2527e4b17023SJohn Marino
2528e4b17023SJohn Marino /* For reduction defs we call vect_get_constant_vectors (), since we are
2529e4b17023SJohn Marino looking for initial loop invariant values. */
2530e4b17023SJohn Marino if (vectorized_defs && reduc_index == -1)
2531e4b17023SJohn Marino /* The defs are already vectorized. */
2532e4b17023SJohn Marino vect_get_slp_vect_defs (child, &vec_defs);
2533e4b17023SJohn Marino else
2534e4b17023SJohn Marino /* Build vectors from scalar defs. */
2535e4b17023SJohn Marino vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2536e4b17023SJohn Marino number_of_vects, reduc_index);
2537e4b17023SJohn Marino
2538e4b17023SJohn Marino VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2539e4b17023SJohn Marino
2540e4b17023SJohn Marino /* For reductions, we only need initial values. */
2541e4b17023SJohn Marino if (reduc_index != -1)
2542e4b17023SJohn Marino return;
2543e4b17023SJohn Marino }
2544e4b17023SJohn Marino }
2545e4b17023SJohn Marino
2546e4b17023SJohn Marino
2547e4b17023SJohn Marino /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2548e4b17023SJohn Marino building a vector of type MASK_TYPE from it) and two input vectors placed in
2549e4b17023SJohn Marino DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2550e4b17023SJohn Marino shifting by STRIDE elements of DR_CHAIN for every copy.
2551e4b17023SJohn Marino (STRIDE is the number of vectorized stmts for NODE divided by the number of
2552e4b17023SJohn Marino copies).
2553e4b17023SJohn Marino VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2554e4b17023SJohn Marino the created stmts must be inserted. */
2555e4b17023SJohn Marino
2556e4b17023SJohn Marino static inline void
vect_create_mask_and_perm(gimple stmt,gimple next_scalar_stmt,tree mask,int first_vec_indx,int second_vec_indx,gimple_stmt_iterator * gsi,slp_tree node,tree vectype,VEC (tree,heap)* dr_chain,int ncopies,int vect_stmts_counter)2557e4b17023SJohn Marino vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2558e4b17023SJohn Marino tree mask, int first_vec_indx, int second_vec_indx,
2559e4b17023SJohn Marino gimple_stmt_iterator *gsi, slp_tree node,
2560e4b17023SJohn Marino tree vectype, VEC(tree,heap) *dr_chain,
2561e4b17023SJohn Marino int ncopies, int vect_stmts_counter)
2562e4b17023SJohn Marino {
2563e4b17023SJohn Marino tree perm_dest;
2564e4b17023SJohn Marino gimple perm_stmt = NULL;
2565e4b17023SJohn Marino stmt_vec_info next_stmt_info;
2566e4b17023SJohn Marino int i, stride;
2567e4b17023SJohn Marino tree first_vec, second_vec, data_ref;
2568e4b17023SJohn Marino
2569e4b17023SJohn Marino stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2570e4b17023SJohn Marino
2571e4b17023SJohn Marino /* Initialize the vect stmts of NODE to properly insert the generated
2572e4b17023SJohn Marino stmts later. */
2573e4b17023SJohn Marino for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2574e4b17023SJohn Marino i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2575e4b17023SJohn Marino VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2576e4b17023SJohn Marino
2577e4b17023SJohn Marino perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2578e4b17023SJohn Marino for (i = 0; i < ncopies; i++)
2579e4b17023SJohn Marino {
2580e4b17023SJohn Marino first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2581e4b17023SJohn Marino second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2582e4b17023SJohn Marino
2583e4b17023SJohn Marino /* Generate the permute statement. */
2584e4b17023SJohn Marino perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2585e4b17023SJohn Marino first_vec, second_vec, mask);
2586e4b17023SJohn Marino data_ref = make_ssa_name (perm_dest, perm_stmt);
2587e4b17023SJohn Marino gimple_set_lhs (perm_stmt, data_ref);
2588e4b17023SJohn Marino vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2589e4b17023SJohn Marino
2590e4b17023SJohn Marino /* Store the vector statement in NODE. */
2591e4b17023SJohn Marino VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2592e4b17023SJohn Marino stride * i + vect_stmts_counter, perm_stmt);
2593e4b17023SJohn Marino
2594e4b17023SJohn Marino first_vec_indx += stride;
2595e4b17023SJohn Marino second_vec_indx += stride;
2596e4b17023SJohn Marino }
2597e4b17023SJohn Marino
2598e4b17023SJohn Marino /* Mark the scalar stmt as vectorized. */
2599e4b17023SJohn Marino next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2600e4b17023SJohn Marino STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2601e4b17023SJohn Marino }
2602e4b17023SJohn Marino
2603e4b17023SJohn Marino
2604e4b17023SJohn Marino /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2605e4b17023SJohn Marino return in CURRENT_MASK_ELEMENT its equivalent in target specific
2606e4b17023SJohn Marino representation. Check that the mask is valid and return FALSE if not.
2607e4b17023SJohn Marino Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2608e4b17023SJohn Marino the next vector, i.e., the current first vector is not needed. */
2609e4b17023SJohn Marino
2610e4b17023SJohn Marino static bool
vect_get_mask_element(gimple stmt,int first_mask_element,int m,int mask_nunits,bool only_one_vec,int index,unsigned char * mask,int * current_mask_element,bool * need_next_vector,int * number_of_mask_fixes,bool * mask_fixed,bool * needs_first_vector)2611e4b17023SJohn Marino vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2612e4b17023SJohn Marino int mask_nunits, bool only_one_vec, int index,
2613e4b17023SJohn Marino unsigned char *mask, int *current_mask_element,
2614e4b17023SJohn Marino bool *need_next_vector, int *number_of_mask_fixes,
2615e4b17023SJohn Marino bool *mask_fixed, bool *needs_first_vector)
2616e4b17023SJohn Marino {
2617e4b17023SJohn Marino int i;
2618e4b17023SJohn Marino
2619e4b17023SJohn Marino /* Convert to target specific representation. */
2620e4b17023SJohn Marino *current_mask_element = first_mask_element + m;
2621e4b17023SJohn Marino /* Adjust the value in case it's a mask for second and third vectors. */
2622e4b17023SJohn Marino *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2623e4b17023SJohn Marino
2624e4b17023SJohn Marino if (*current_mask_element < mask_nunits)
2625e4b17023SJohn Marino *needs_first_vector = true;
2626e4b17023SJohn Marino
2627e4b17023SJohn Marino /* We have only one input vector to permute but the mask accesses values in
2628e4b17023SJohn Marino the next vector as well. */
2629e4b17023SJohn Marino if (only_one_vec && *current_mask_element >= mask_nunits)
2630e4b17023SJohn Marino {
2631e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2632e4b17023SJohn Marino {
2633e4b17023SJohn Marino fprintf (vect_dump, "permutation requires at least two vectors ");
2634e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2635e4b17023SJohn Marino }
2636e4b17023SJohn Marino
2637e4b17023SJohn Marino return false;
2638e4b17023SJohn Marino }
2639e4b17023SJohn Marino
2640e4b17023SJohn Marino /* The mask requires the next vector. */
2641e4b17023SJohn Marino if (*current_mask_element >= mask_nunits * 2)
2642e4b17023SJohn Marino {
2643e4b17023SJohn Marino if (*needs_first_vector || *mask_fixed)
2644e4b17023SJohn Marino {
2645e4b17023SJohn Marino /* We either need the first vector too or have already moved to the
2646e4b17023SJohn Marino next vector. In both cases, this permutation needs three
2647e4b17023SJohn Marino vectors. */
2648e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2649e4b17023SJohn Marino {
2650e4b17023SJohn Marino fprintf (vect_dump, "permutation requires at "
2651e4b17023SJohn Marino "least three vectors ");
2652e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2653e4b17023SJohn Marino }
2654e4b17023SJohn Marino
2655e4b17023SJohn Marino return false;
2656e4b17023SJohn Marino }
2657e4b17023SJohn Marino
2658e4b17023SJohn Marino /* We move to the next vector, dropping the first one and working with
2659e4b17023SJohn Marino the second and the third - we need to adjust the values of the mask
2660e4b17023SJohn Marino accordingly. */
2661e4b17023SJohn Marino *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2662e4b17023SJohn Marino
2663e4b17023SJohn Marino for (i = 0; i < index; i++)
2664e4b17023SJohn Marino mask[i] -= mask_nunits * *number_of_mask_fixes;
2665e4b17023SJohn Marino
2666e4b17023SJohn Marino (*number_of_mask_fixes)++;
2667e4b17023SJohn Marino *mask_fixed = true;
2668e4b17023SJohn Marino }
2669e4b17023SJohn Marino
2670e4b17023SJohn Marino *need_next_vector = *mask_fixed;
2671e4b17023SJohn Marino
2672e4b17023SJohn Marino /* This was the last element of this mask. Start a new one. */
2673e4b17023SJohn Marino if (index == mask_nunits - 1)
2674e4b17023SJohn Marino {
2675e4b17023SJohn Marino *number_of_mask_fixes = 1;
2676e4b17023SJohn Marino *mask_fixed = false;
2677e4b17023SJohn Marino *needs_first_vector = false;
2678e4b17023SJohn Marino }
2679e4b17023SJohn Marino
2680e4b17023SJohn Marino return true;
2681e4b17023SJohn Marino }
2682e4b17023SJohn Marino
2683e4b17023SJohn Marino
2684e4b17023SJohn Marino /* Generate vector permute statements from a list of loads in DR_CHAIN.
2685e4b17023SJohn Marino If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2686e4b17023SJohn Marino permute statements for SLP_NODE_INSTANCE. */
2687e4b17023SJohn Marino bool
vect_transform_slp_perm_load(gimple stmt,VEC (tree,heap)* dr_chain,gimple_stmt_iterator * gsi,int vf,slp_instance slp_node_instance,bool analyze_only)2688e4b17023SJohn Marino vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2689e4b17023SJohn Marino gimple_stmt_iterator *gsi, int vf,
2690e4b17023SJohn Marino slp_instance slp_node_instance, bool analyze_only)
2691e4b17023SJohn Marino {
2692e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2693e4b17023SJohn Marino tree mask_element_type = NULL_TREE, mask_type;
2694e4b17023SJohn Marino int i, j, k, nunits, vec_index = 0, scalar_index;
2695e4b17023SJohn Marino slp_tree node;
2696e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2697e4b17023SJohn Marino gimple next_scalar_stmt;
2698e4b17023SJohn Marino int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2699e4b17023SJohn Marino int first_mask_element;
2700e4b17023SJohn Marino int index, unroll_factor, current_mask_element, ncopies;
2701e4b17023SJohn Marino unsigned char *mask;
2702e4b17023SJohn Marino bool only_one_vec = false, need_next_vector = false;
2703e4b17023SJohn Marino int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2704e4b17023SJohn Marino int number_of_mask_fixes = 1;
2705e4b17023SJohn Marino bool mask_fixed = false;
2706e4b17023SJohn Marino bool needs_first_vector = false;
2707e4b17023SJohn Marino enum machine_mode mode;
2708e4b17023SJohn Marino
2709e4b17023SJohn Marino mode = TYPE_MODE (vectype);
2710e4b17023SJohn Marino
2711e4b17023SJohn Marino if (!can_vec_perm_p (mode, false, NULL))
2712e4b17023SJohn Marino {
2713e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2714e4b17023SJohn Marino {
2715e4b17023SJohn Marino fprintf (vect_dump, "no vect permute for ");
2716e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2717e4b17023SJohn Marino }
2718e4b17023SJohn Marino return false;
2719e4b17023SJohn Marino }
2720e4b17023SJohn Marino
2721e4b17023SJohn Marino /* The generic VEC_PERM_EXPR code always uses an integral type of the
2722e4b17023SJohn Marino same size as the vector element being permuted. */
2723e4b17023SJohn Marino mask_element_type
2724e4b17023SJohn Marino = lang_hooks.types.type_for_size
2725e4b17023SJohn Marino (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2726e4b17023SJohn Marino mask_type = get_vectype_for_scalar_type (mask_element_type);
2727e4b17023SJohn Marino nunits = TYPE_VECTOR_SUBPARTS (vectype);
2728e4b17023SJohn Marino mask = XALLOCAVEC (unsigned char, nunits);
2729e4b17023SJohn Marino unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2730e4b17023SJohn Marino
2731e4b17023SJohn Marino /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2732e4b17023SJohn Marino unrolling factor. */
2733e4b17023SJohn Marino orig_vec_stmts_num = group_size *
2734e4b17023SJohn Marino SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2735e4b17023SJohn Marino if (orig_vec_stmts_num == 1)
2736e4b17023SJohn Marino only_one_vec = true;
2737e4b17023SJohn Marino
2738e4b17023SJohn Marino /* Number of copies is determined by the final vectorization factor
2739e4b17023SJohn Marino relatively to SLP_NODE_INSTANCE unrolling factor. */
2740e4b17023SJohn Marino ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2741e4b17023SJohn Marino
2742e4b17023SJohn Marino /* Generate permutation masks for every NODE. Number of masks for each NODE
2743e4b17023SJohn Marino is equal to GROUP_SIZE.
2744e4b17023SJohn Marino E.g., we have a group of three nodes with three loads from the same
2745e4b17023SJohn Marino location in each node, and the vector size is 4. I.e., we have a
2746e4b17023SJohn Marino a0b0c0a1b1c1... sequence and we need to create the following vectors:
2747e4b17023SJohn Marino for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2748e4b17023SJohn Marino for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2749e4b17023SJohn Marino ...
2750e4b17023SJohn Marino
2751e4b17023SJohn Marino The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2752e4b17023SJohn Marino The last mask is illegal since we assume two operands for permute
2753e4b17023SJohn Marino operation, and the mask element values can't be outside that range.
2754e4b17023SJohn Marino Hence, the last mask must be converted into {2,5,5,5}.
2755e4b17023SJohn Marino For the first two permutations we need the first and the second input
2756e4b17023SJohn Marino vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2757e4b17023SJohn Marino we need the second and the third vectors: {b1,c1,a2,b2} and
2758e4b17023SJohn Marino {c2,a3,b3,c3}. */
2759e4b17023SJohn Marino
2760e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2761e4b17023SJohn Marino {
2762e4b17023SJohn Marino scalar_index = 0;
2763e4b17023SJohn Marino index = 0;
2764e4b17023SJohn Marino vect_stmts_counter = 0;
2765e4b17023SJohn Marino vec_index = 0;
2766e4b17023SJohn Marino first_vec_index = vec_index++;
2767e4b17023SJohn Marino if (only_one_vec)
2768e4b17023SJohn Marino second_vec_index = first_vec_index;
2769e4b17023SJohn Marino else
2770e4b17023SJohn Marino second_vec_index = vec_index++;
2771e4b17023SJohn Marino
2772e4b17023SJohn Marino for (j = 0; j < unroll_factor; j++)
2773e4b17023SJohn Marino {
2774e4b17023SJohn Marino for (k = 0; k < group_size; k++)
2775e4b17023SJohn Marino {
2776e4b17023SJohn Marino first_mask_element = i + j * group_size;
2777e4b17023SJohn Marino if (!vect_get_mask_element (stmt, first_mask_element, 0,
2778e4b17023SJohn Marino nunits, only_one_vec, index,
2779e4b17023SJohn Marino mask, ¤t_mask_element,
2780e4b17023SJohn Marino &need_next_vector,
2781e4b17023SJohn Marino &number_of_mask_fixes, &mask_fixed,
2782e4b17023SJohn Marino &needs_first_vector))
2783e4b17023SJohn Marino return false;
2784e4b17023SJohn Marino mask[index++] = current_mask_element;
2785e4b17023SJohn Marino
2786e4b17023SJohn Marino if (index == nunits)
2787e4b17023SJohn Marino {
2788e4b17023SJohn Marino tree mask_vec = NULL;
2789e4b17023SJohn Marino
2790e4b17023SJohn Marino if (!can_vec_perm_p (mode, false, mask))
2791e4b17023SJohn Marino {
2792e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2793e4b17023SJohn Marino {
2794e4b17023SJohn Marino fprintf (vect_dump, "unsupported vect permute { ");
2795e4b17023SJohn Marino for (i = 0; i < nunits; ++i)
2796e4b17023SJohn Marino fprintf (vect_dump, "%d ", mask[i]);
2797e4b17023SJohn Marino fprintf (vect_dump, "}\n");
2798e4b17023SJohn Marino }
2799e4b17023SJohn Marino return false;
2800e4b17023SJohn Marino }
2801e4b17023SJohn Marino
2802e4b17023SJohn Marino while (--index >= 0)
2803e4b17023SJohn Marino {
2804e4b17023SJohn Marino tree t = build_int_cst (mask_element_type, mask[index]);
2805e4b17023SJohn Marino mask_vec = tree_cons (NULL, t, mask_vec);
2806e4b17023SJohn Marino }
2807e4b17023SJohn Marino mask_vec = build_vector (mask_type, mask_vec);
2808e4b17023SJohn Marino index = 0;
2809e4b17023SJohn Marino
2810e4b17023SJohn Marino if (!analyze_only)
2811e4b17023SJohn Marino {
2812e4b17023SJohn Marino if (need_next_vector)
2813e4b17023SJohn Marino {
2814e4b17023SJohn Marino first_vec_index = second_vec_index;
2815e4b17023SJohn Marino second_vec_index = vec_index;
2816e4b17023SJohn Marino }
2817e4b17023SJohn Marino
2818e4b17023SJohn Marino next_scalar_stmt = VEC_index (gimple,
2819e4b17023SJohn Marino SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2820e4b17023SJohn Marino
2821e4b17023SJohn Marino vect_create_mask_and_perm (stmt, next_scalar_stmt,
2822e4b17023SJohn Marino mask_vec, first_vec_index, second_vec_index,
2823e4b17023SJohn Marino gsi, node, vectype, dr_chain,
2824e4b17023SJohn Marino ncopies, vect_stmts_counter++);
2825e4b17023SJohn Marino }
2826e4b17023SJohn Marino }
2827e4b17023SJohn Marino }
2828e4b17023SJohn Marino }
2829e4b17023SJohn Marino }
2830e4b17023SJohn Marino
2831e4b17023SJohn Marino return true;
2832e4b17023SJohn Marino }
2833e4b17023SJohn Marino
2834e4b17023SJohn Marino
2835e4b17023SJohn Marino
2836e4b17023SJohn Marino /* Vectorize SLP instance tree in postorder. */
2837e4b17023SJohn Marino
2838e4b17023SJohn Marino static bool
vect_schedule_slp_instance(slp_tree node,slp_instance instance,unsigned int vectorization_factor)2839e4b17023SJohn Marino vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2840e4b17023SJohn Marino unsigned int vectorization_factor)
2841e4b17023SJohn Marino {
2842e4b17023SJohn Marino gimple stmt;
2843e4b17023SJohn Marino bool strided_store, is_store;
2844e4b17023SJohn Marino gimple_stmt_iterator si;
2845e4b17023SJohn Marino stmt_vec_info stmt_info;
2846e4b17023SJohn Marino unsigned int vec_stmts_size, nunits, group_size;
2847e4b17023SJohn Marino tree vectype;
2848e4b17023SJohn Marino int i;
2849e4b17023SJohn Marino slp_tree loads_node;
2850e4b17023SJohn Marino slp_void_p child;
2851e4b17023SJohn Marino
2852e4b17023SJohn Marino if (!node)
2853e4b17023SJohn Marino return false;
2854e4b17023SJohn Marino
2855e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2856e4b17023SJohn Marino vect_schedule_slp_instance ((slp_tree) child, instance,
2857e4b17023SJohn Marino vectorization_factor);
2858e4b17023SJohn Marino
2859e4b17023SJohn Marino stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2860e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
2861e4b17023SJohn Marino
2862e4b17023SJohn Marino /* VECTYPE is the type of the destination. */
2863e4b17023SJohn Marino vectype = STMT_VINFO_VECTYPE (stmt_info);
2864e4b17023SJohn Marino nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2865e4b17023SJohn Marino group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2866e4b17023SJohn Marino
2867e4b17023SJohn Marino /* For each SLP instance calculate number of vector stmts to be created
2868e4b17023SJohn Marino for the scalar stmts in each node of the SLP tree. Number of vector
2869e4b17023SJohn Marino elements in one vector iteration is the number of scalar elements in
2870e4b17023SJohn Marino one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2871e4b17023SJohn Marino size. */
2872e4b17023SJohn Marino vec_stmts_size = (vectorization_factor * group_size) / nunits;
2873e4b17023SJohn Marino
2874e4b17023SJohn Marino /* In case of load permutation we have to allocate vectorized statements for
2875e4b17023SJohn Marino all the nodes that participate in that permutation. */
2876e4b17023SJohn Marino if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2877e4b17023SJohn Marino {
2878e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2879e4b17023SJohn Marino {
2880e4b17023SJohn Marino if (!SLP_TREE_VEC_STMTS (loads_node))
2881e4b17023SJohn Marino {
2882e4b17023SJohn Marino SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2883e4b17023SJohn Marino vec_stmts_size);
2884e4b17023SJohn Marino SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2885e4b17023SJohn Marino }
2886e4b17023SJohn Marino }
2887e4b17023SJohn Marino }
2888e4b17023SJohn Marino
2889e4b17023SJohn Marino if (!SLP_TREE_VEC_STMTS (node))
2890e4b17023SJohn Marino {
2891e4b17023SJohn Marino SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2892e4b17023SJohn Marino SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2893e4b17023SJohn Marino }
2894e4b17023SJohn Marino
2895e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2896e4b17023SJohn Marino {
2897e4b17023SJohn Marino fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2898e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2899e4b17023SJohn Marino }
2900e4b17023SJohn Marino
2901e4b17023SJohn Marino /* Loads should be inserted before the first load. */
2902e4b17023SJohn Marino if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2903e4b17023SJohn Marino && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2904e4b17023SJohn Marino && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2905e4b17023SJohn Marino && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2906e4b17023SJohn Marino si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2907e4b17023SJohn Marino else if (is_pattern_stmt_p (stmt_info))
2908e4b17023SJohn Marino si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2909e4b17023SJohn Marino else
2910e4b17023SJohn Marino si = gsi_for_stmt (stmt);
2911e4b17023SJohn Marino
2912e4b17023SJohn Marino /* Stores should be inserted just before the last store. */
2913e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2914e4b17023SJohn Marino && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2915e4b17023SJohn Marino {
2916e4b17023SJohn Marino gimple last_store = vect_find_last_store_in_slp_instance (instance);
2917e4b17023SJohn Marino if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
2918e4b17023SJohn Marino last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
2919e4b17023SJohn Marino si = gsi_for_stmt (last_store);
2920e4b17023SJohn Marino }
2921e4b17023SJohn Marino
2922e4b17023SJohn Marino /* Mark the first element of the reduction chain as reduction to properly
2923e4b17023SJohn Marino transform the node. In the analysis phase only the last element of the
2924e4b17023SJohn Marino chain is marked as reduction. */
2925e4b17023SJohn Marino if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2926e4b17023SJohn Marino && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2927e4b17023SJohn Marino {
2928e4b17023SJohn Marino STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2929e4b17023SJohn Marino STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2930e4b17023SJohn Marino }
2931e4b17023SJohn Marino
2932e4b17023SJohn Marino is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2933e4b17023SJohn Marino return is_store;
2934e4b17023SJohn Marino }
2935e4b17023SJohn Marino
2936e4b17023SJohn Marino /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2937e4b17023SJohn Marino For loop vectorization this is done in vectorizable_call, but for SLP
2938e4b17023SJohn Marino it needs to be deferred until end of vect_schedule_slp, because multiple
2939e4b17023SJohn Marino SLP instances may refer to the same scalar stmt. */
2940e4b17023SJohn Marino
2941e4b17023SJohn Marino static void
vect_remove_slp_scalar_calls(slp_tree node)2942e4b17023SJohn Marino vect_remove_slp_scalar_calls (slp_tree node)
2943e4b17023SJohn Marino {
2944e4b17023SJohn Marino gimple stmt, new_stmt;
2945e4b17023SJohn Marino gimple_stmt_iterator gsi;
2946e4b17023SJohn Marino int i;
2947e4b17023SJohn Marino slp_void_p child;
2948e4b17023SJohn Marino tree lhs;
2949e4b17023SJohn Marino stmt_vec_info stmt_info;
2950e4b17023SJohn Marino
2951e4b17023SJohn Marino if (!node)
2952e4b17023SJohn Marino return;
2953e4b17023SJohn Marino
2954e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2955e4b17023SJohn Marino vect_remove_slp_scalar_calls ((slp_tree) child);
2956e4b17023SJohn Marino
2957e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
2958e4b17023SJohn Marino {
2959e4b17023SJohn Marino if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
2960e4b17023SJohn Marino continue;
2961e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
2962e4b17023SJohn Marino if (stmt_info == NULL
2963e4b17023SJohn Marino || is_pattern_stmt_p (stmt_info)
2964e4b17023SJohn Marino || !PURE_SLP_STMT (stmt_info))
2965e4b17023SJohn Marino continue;
2966e4b17023SJohn Marino lhs = gimple_call_lhs (stmt);
2967e4b17023SJohn Marino new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2968e4b17023SJohn Marino set_vinfo_for_stmt (new_stmt, stmt_info);
2969e4b17023SJohn Marino set_vinfo_for_stmt (stmt, NULL);
2970e4b17023SJohn Marino STMT_VINFO_STMT (stmt_info) = new_stmt;
2971e4b17023SJohn Marino gsi = gsi_for_stmt (stmt);
2972e4b17023SJohn Marino gsi_replace (&gsi, new_stmt, false);
2973e4b17023SJohn Marino SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2974e4b17023SJohn Marino }
2975e4b17023SJohn Marino }
2976e4b17023SJohn Marino
2977e4b17023SJohn Marino /* Generate vector code for all SLP instances in the loop/basic block. */
2978e4b17023SJohn Marino
2979e4b17023SJohn Marino bool
vect_schedule_slp(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2980e4b17023SJohn Marino vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2981e4b17023SJohn Marino {
2982e4b17023SJohn Marino VEC (slp_instance, heap) *slp_instances;
2983e4b17023SJohn Marino slp_instance instance;
2984*5ce9237cSJohn Marino slp_tree loads_node;
2985*5ce9237cSJohn Marino unsigned int i, j, vf;
2986e4b17023SJohn Marino bool is_store = false;
2987e4b17023SJohn Marino
2988e4b17023SJohn Marino if (loop_vinfo)
2989e4b17023SJohn Marino {
2990e4b17023SJohn Marino slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2991e4b17023SJohn Marino vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2992e4b17023SJohn Marino }
2993e4b17023SJohn Marino else
2994e4b17023SJohn Marino {
2995e4b17023SJohn Marino slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2996e4b17023SJohn Marino vf = 1;
2997e4b17023SJohn Marino }
2998e4b17023SJohn Marino
2999e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3000e4b17023SJohn Marino {
3001e4b17023SJohn Marino /* Schedule the tree of INSTANCE. */
3002e4b17023SJohn Marino is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3003e4b17023SJohn Marino instance, vf);
3004*5ce9237cSJohn Marino
3005*5ce9237cSJohn Marino /* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
3006*5ce9237cSJohn Marino between SLP instances we fail to properly initialize the
3007*5ce9237cSJohn Marino vectorized SLP stmts and confuse different load permutations. */
3008*5ce9237cSJohn Marino FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), j, loads_node)
3009*5ce9237cSJohn Marino STMT_VINFO_VEC_STMT
3010*5ce9237cSJohn Marino (vinfo_for_stmt
3011*5ce9237cSJohn Marino (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (loads_node), 0))) = NULL;
3012*5ce9237cSJohn Marino
3013e4b17023SJohn Marino if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
3014e4b17023SJohn Marino || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3015e4b17023SJohn Marino fprintf (vect_dump, "vectorizing stmts using SLP.");
3016e4b17023SJohn Marino }
3017e4b17023SJohn Marino
3018e4b17023SJohn Marino FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3019e4b17023SJohn Marino {
3020e4b17023SJohn Marino slp_tree root = SLP_INSTANCE_TREE (instance);
3021e4b17023SJohn Marino gimple store;
3022e4b17023SJohn Marino unsigned int j;
3023e4b17023SJohn Marino gimple_stmt_iterator gsi;
3024e4b17023SJohn Marino
3025*5ce9237cSJohn Marino /* Remove scalar call stmts. Do not do this for basic-block
3026*5ce9237cSJohn Marino vectorization as not all uses may be vectorized.
3027*5ce9237cSJohn Marino ??? Why should this be necessary? DCE should be able to
3028*5ce9237cSJohn Marino remove the stmts itself.
3029*5ce9237cSJohn Marino ??? For BB vectorization we can as well remove scalar
3030*5ce9237cSJohn Marino stmts starting from the SLP tree root if they have no
3031*5ce9237cSJohn Marino uses. */
3032*5ce9237cSJohn Marino if (loop_vinfo)
3033e4b17023SJohn Marino vect_remove_slp_scalar_calls (root);
3034e4b17023SJohn Marino
3035e4b17023SJohn Marino for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3036e4b17023SJohn Marino && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3037e4b17023SJohn Marino {
3038e4b17023SJohn Marino if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3039e4b17023SJohn Marino break;
3040e4b17023SJohn Marino
3041e4b17023SJohn Marino if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3042e4b17023SJohn Marino store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3043e4b17023SJohn Marino /* Free the attached stmt_vec_info and remove the stmt. */
3044e4b17023SJohn Marino gsi = gsi_for_stmt (store);
3045e4b17023SJohn Marino gsi_remove (&gsi, true);
3046e4b17023SJohn Marino free_stmt_vec_info (store);
3047e4b17023SJohn Marino }
3048e4b17023SJohn Marino }
3049e4b17023SJohn Marino
3050e4b17023SJohn Marino return is_store;
3051e4b17023SJohn Marino }
3052e4b17023SJohn Marino
3053e4b17023SJohn Marino
3054e4b17023SJohn Marino /* Vectorize the basic block. */
3055e4b17023SJohn Marino
3056e4b17023SJohn Marino void
vect_slp_transform_bb(basic_block bb)3057e4b17023SJohn Marino vect_slp_transform_bb (basic_block bb)
3058e4b17023SJohn Marino {
3059e4b17023SJohn Marino bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3060e4b17023SJohn Marino gimple_stmt_iterator si;
3061e4b17023SJohn Marino
3062e4b17023SJohn Marino gcc_assert (bb_vinfo);
3063e4b17023SJohn Marino
3064e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3065e4b17023SJohn Marino fprintf (vect_dump, "SLPing BB\n");
3066e4b17023SJohn Marino
3067e4b17023SJohn Marino for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3068e4b17023SJohn Marino {
3069e4b17023SJohn Marino gimple stmt = gsi_stmt (si);
3070e4b17023SJohn Marino stmt_vec_info stmt_info;
3071e4b17023SJohn Marino
3072e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3073e4b17023SJohn Marino {
3074e4b17023SJohn Marino fprintf (vect_dump, "------>SLPing statement: ");
3075e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3076e4b17023SJohn Marino }
3077e4b17023SJohn Marino
3078e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
3079e4b17023SJohn Marino gcc_assert (stmt_info);
3080e4b17023SJohn Marino
3081e4b17023SJohn Marino /* Schedule all the SLP instances when the first SLP stmt is reached. */
3082e4b17023SJohn Marino if (STMT_SLP_TYPE (stmt_info))
3083e4b17023SJohn Marino {
3084e4b17023SJohn Marino vect_schedule_slp (NULL, bb_vinfo);
3085e4b17023SJohn Marino break;
3086e4b17023SJohn Marino }
3087e4b17023SJohn Marino }
3088e4b17023SJohn Marino
3089e4b17023SJohn Marino mark_sym_for_renaming (gimple_vop (cfun));
3090e4b17023SJohn Marino /* The memory tags and pointers in vectorized statements need to
3091e4b17023SJohn Marino have their SSA forms updated. FIXME, why can't this be delayed
3092e4b17023SJohn Marino until all the loops have been transformed? */
3093e4b17023SJohn Marino update_ssa (TODO_update_ssa);
3094e4b17023SJohn Marino
3095e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3096e4b17023SJohn Marino fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
3097e4b17023SJohn Marino
3098e4b17023SJohn Marino destroy_bb_vec_info (bb_vinfo);
3099e4b17023SJohn Marino }
3100e4b17023SJohn Marino
3101