1e4b17023SJohn Marino /* Data References Analysis and Manipulation Utilities for Vectorization.
2e4b17023SJohn Marino Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3e4b17023SJohn Marino Free Software Foundation, Inc.
4e4b17023SJohn Marino Contributed by Dorit Naishlos <dorit@il.ibm.com>
5e4b17023SJohn Marino and Ira Rosen <irar@il.ibm.com>
6e4b17023SJohn Marino
7e4b17023SJohn Marino This file is part of GCC.
8e4b17023SJohn Marino
9e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
10e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
11e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
12e4b17023SJohn Marino version.
13e4b17023SJohn Marino
14e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
16e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17e4b17023SJohn Marino for more details.
18e4b17023SJohn Marino
19e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
21e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
22e4b17023SJohn Marino
23e4b17023SJohn Marino #include "config.h"
24e4b17023SJohn Marino #include "system.h"
25e4b17023SJohn Marino #include "coretypes.h"
26e4b17023SJohn Marino #include "tm.h"
27e4b17023SJohn Marino #include "ggc.h"
28e4b17023SJohn Marino #include "tree.h"
29e4b17023SJohn Marino #include "tm_p.h"
30e4b17023SJohn Marino #include "target.h"
31e4b17023SJohn Marino #include "basic-block.h"
32e4b17023SJohn Marino #include "tree-pretty-print.h"
33e4b17023SJohn Marino #include "gimple-pretty-print.h"
34e4b17023SJohn Marino #include "tree-flow.h"
35e4b17023SJohn Marino #include "tree-dump.h"
36e4b17023SJohn Marino #include "cfgloop.h"
37e4b17023SJohn Marino #include "tree-chrec.h"
38e4b17023SJohn Marino #include "tree-scalar-evolution.h"
39e4b17023SJohn Marino #include "tree-vectorizer.h"
40e4b17023SJohn Marino #include "diagnostic-core.h"
41e4b17023SJohn Marino
42e4b17023SJohn Marino /* Need to include rtl.h, expr.h, etc. for optabs. */
43e4b17023SJohn Marino #include "expr.h"
44e4b17023SJohn Marino #include "optabs.h"
45e4b17023SJohn Marino
46e4b17023SJohn Marino /* Return true if load- or store-lanes optab OPTAB is implemented for
47e4b17023SJohn Marino COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
48e4b17023SJohn Marino
49e4b17023SJohn Marino static bool
vect_lanes_optab_supported_p(const char * name,convert_optab optab,tree vectype,unsigned HOST_WIDE_INT count)50e4b17023SJohn Marino vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51e4b17023SJohn Marino tree vectype, unsigned HOST_WIDE_INT count)
52e4b17023SJohn Marino {
53e4b17023SJohn Marino enum machine_mode mode, array_mode;
54e4b17023SJohn Marino bool limit_p;
55e4b17023SJohn Marino
56e4b17023SJohn Marino mode = TYPE_MODE (vectype);
57e4b17023SJohn Marino limit_p = !targetm.array_mode_supported_p (mode, count);
58e4b17023SJohn Marino array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59e4b17023SJohn Marino MODE_INT, limit_p);
60e4b17023SJohn Marino
61e4b17023SJohn Marino if (array_mode == BLKmode)
62e4b17023SJohn Marino {
63e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
64e4b17023SJohn Marino fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65e4b17023SJohn Marino GET_MODE_NAME (mode), count);
66e4b17023SJohn Marino return false;
67e4b17023SJohn Marino }
68e4b17023SJohn Marino
69e4b17023SJohn Marino if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
70e4b17023SJohn Marino {
71e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
72e4b17023SJohn Marino fprintf (vect_dump, "cannot use %s<%s><%s>",
73e4b17023SJohn Marino name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74e4b17023SJohn Marino return false;
75e4b17023SJohn Marino }
76e4b17023SJohn Marino
77e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
78e4b17023SJohn Marino fprintf (vect_dump, "can use %s<%s><%s>",
79e4b17023SJohn Marino name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
80e4b17023SJohn Marino
81e4b17023SJohn Marino return true;
82e4b17023SJohn Marino }
83e4b17023SJohn Marino
84e4b17023SJohn Marino
85e4b17023SJohn Marino /* Return the smallest scalar part of STMT.
86e4b17023SJohn Marino This is used to determine the vectype of the stmt. We generally set the
87e4b17023SJohn Marino vectype according to the type of the result (lhs). For stmts whose
88e4b17023SJohn Marino result-type is different than the type of the arguments (e.g., demotion,
89e4b17023SJohn Marino promotion), vectype will be reset appropriately (later). Note that we have
90e4b17023SJohn Marino to visit the smallest datatype in this function, because that determines the
91e4b17023SJohn Marino VF. If the smallest datatype in the loop is present only as the rhs of a
92e4b17023SJohn Marino promotion operation - we'd miss it.
93e4b17023SJohn Marino Such a case, where a variable of this datatype does not appear in the lhs
94e4b17023SJohn Marino anywhere in the loop, can only occur if it's an invariant: e.g.:
95e4b17023SJohn Marino 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96e4b17023SJohn Marino invariant motion. However, we cannot rely on invariant motion to always
97e4b17023SJohn Marino take invariants out of the loop, and so in the case of promotion we also
98e4b17023SJohn Marino have to check the rhs.
99e4b17023SJohn Marino LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100e4b17023SJohn Marino types. */
101e4b17023SJohn Marino
102e4b17023SJohn Marino tree
vect_get_smallest_scalar_type(gimple stmt,HOST_WIDE_INT * lhs_size_unit,HOST_WIDE_INT * rhs_size_unit)103e4b17023SJohn Marino vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104e4b17023SJohn Marino HOST_WIDE_INT *rhs_size_unit)
105e4b17023SJohn Marino {
106e4b17023SJohn Marino tree scalar_type = gimple_expr_type (stmt);
107e4b17023SJohn Marino HOST_WIDE_INT lhs, rhs;
108e4b17023SJohn Marino
109e4b17023SJohn Marino lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
110e4b17023SJohn Marino
111e4b17023SJohn Marino if (is_gimple_assign (stmt)
112e4b17023SJohn Marino && (gimple_assign_cast_p (stmt)
113e4b17023SJohn Marino || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114e4b17023SJohn Marino || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
115e4b17023SJohn Marino {
116e4b17023SJohn Marino tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
117e4b17023SJohn Marino
118e4b17023SJohn Marino rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119e4b17023SJohn Marino if (rhs < lhs)
120e4b17023SJohn Marino scalar_type = rhs_type;
121e4b17023SJohn Marino }
122e4b17023SJohn Marino
123e4b17023SJohn Marino *lhs_size_unit = lhs;
124e4b17023SJohn Marino *rhs_size_unit = rhs;
125e4b17023SJohn Marino return scalar_type;
126e4b17023SJohn Marino }
127e4b17023SJohn Marino
128e4b17023SJohn Marino
129e4b17023SJohn Marino /* Find the place of the data-ref in STMT in the interleaving chain that starts
130e4b17023SJohn Marino from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
131e4b17023SJohn Marino
132e4b17023SJohn Marino int
vect_get_place_in_interleaving_chain(gimple stmt,gimple first_stmt)133e4b17023SJohn Marino vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
134e4b17023SJohn Marino {
135e4b17023SJohn Marino gimple next_stmt = first_stmt;
136e4b17023SJohn Marino int result = 0;
137e4b17023SJohn Marino
138e4b17023SJohn Marino if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139e4b17023SJohn Marino return -1;
140e4b17023SJohn Marino
141e4b17023SJohn Marino while (next_stmt && next_stmt != stmt)
142e4b17023SJohn Marino {
143e4b17023SJohn Marino result++;
144e4b17023SJohn Marino next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
145e4b17023SJohn Marino }
146e4b17023SJohn Marino
147e4b17023SJohn Marino if (next_stmt)
148e4b17023SJohn Marino return result;
149e4b17023SJohn Marino else
150e4b17023SJohn Marino return -1;
151e4b17023SJohn Marino }
152e4b17023SJohn Marino
153e4b17023SJohn Marino
154e4b17023SJohn Marino /* Function vect_insert_into_interleaving_chain.
155e4b17023SJohn Marino
156e4b17023SJohn Marino Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
157e4b17023SJohn Marino
158e4b17023SJohn Marino static void
vect_insert_into_interleaving_chain(struct data_reference * dra,struct data_reference * drb)159e4b17023SJohn Marino vect_insert_into_interleaving_chain (struct data_reference *dra,
160e4b17023SJohn Marino struct data_reference *drb)
161e4b17023SJohn Marino {
162e4b17023SJohn Marino gimple prev, next;
163e4b17023SJohn Marino tree next_init;
164e4b17023SJohn Marino stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165e4b17023SJohn Marino stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
166e4b17023SJohn Marino
167e4b17023SJohn Marino prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168e4b17023SJohn Marino next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169e4b17023SJohn Marino while (next)
170e4b17023SJohn Marino {
171e4b17023SJohn Marino next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172e4b17023SJohn Marino if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
173e4b17023SJohn Marino {
174e4b17023SJohn Marino /* Insert here. */
175e4b17023SJohn Marino GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176e4b17023SJohn Marino GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177e4b17023SJohn Marino return;
178e4b17023SJohn Marino }
179e4b17023SJohn Marino prev = next;
180e4b17023SJohn Marino next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
181e4b17023SJohn Marino }
182e4b17023SJohn Marino
183e4b17023SJohn Marino /* We got to the end of the list. Insert here. */
184e4b17023SJohn Marino GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185e4b17023SJohn Marino GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
186e4b17023SJohn Marino }
187e4b17023SJohn Marino
188e4b17023SJohn Marino
189e4b17023SJohn Marino /* Function vect_update_interleaving_chain.
190e4b17023SJohn Marino
191e4b17023SJohn Marino For two data-refs DRA and DRB that are a part of a chain interleaved data
192e4b17023SJohn Marino accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
193e4b17023SJohn Marino
194e4b17023SJohn Marino There are four possible cases:
195e4b17023SJohn Marino 1. New stmts - both DRA and DRB are not a part of any chain:
196e4b17023SJohn Marino FIRST_DR = DRB
197e4b17023SJohn Marino NEXT_DR (DRB) = DRA
198e4b17023SJohn Marino 2. DRB is a part of a chain and DRA is not:
199e4b17023SJohn Marino no need to update FIRST_DR
200e4b17023SJohn Marino no need to insert DRB
201e4b17023SJohn Marino insert DRA according to init
202e4b17023SJohn Marino 3. DRA is a part of a chain and DRB is not:
203e4b17023SJohn Marino if (init of FIRST_DR > init of DRB)
204e4b17023SJohn Marino FIRST_DR = DRB
205e4b17023SJohn Marino NEXT(FIRST_DR) = previous FIRST_DR
206e4b17023SJohn Marino else
207e4b17023SJohn Marino insert DRB according to its init
208e4b17023SJohn Marino 4. both DRA and DRB are in some interleaving chains:
209e4b17023SJohn Marino choose the chain with the smallest init of FIRST_DR
210e4b17023SJohn Marino insert the nodes of the second chain into the first one. */
211e4b17023SJohn Marino
212e4b17023SJohn Marino static void
vect_update_interleaving_chain(struct data_reference * drb,struct data_reference * dra)213e4b17023SJohn Marino vect_update_interleaving_chain (struct data_reference *drb,
214e4b17023SJohn Marino struct data_reference *dra)
215e4b17023SJohn Marino {
216e4b17023SJohn Marino stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217e4b17023SJohn Marino stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218e4b17023SJohn Marino tree next_init, init_dra_chain, init_drb_chain;
219e4b17023SJohn Marino gimple first_a, first_b;
220e4b17023SJohn Marino tree node_init;
221e4b17023SJohn Marino gimple node, prev, next, first_stmt;
222e4b17023SJohn Marino
223e4b17023SJohn Marino /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224e4b17023SJohn Marino if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
225e4b17023SJohn Marino {
226e4b17023SJohn Marino GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227e4b17023SJohn Marino GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228e4b17023SJohn Marino GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229e4b17023SJohn Marino return;
230e4b17023SJohn Marino }
231e4b17023SJohn Marino
232e4b17023SJohn Marino /* 2. DRB is a part of a chain and DRA is not. */
233e4b17023SJohn Marino if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
234e4b17023SJohn Marino {
235e4b17023SJohn Marino GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236e4b17023SJohn Marino /* Insert DRA into the chain of DRB. */
237e4b17023SJohn Marino vect_insert_into_interleaving_chain (dra, drb);
238e4b17023SJohn Marino return;
239e4b17023SJohn Marino }
240e4b17023SJohn Marino
241e4b17023SJohn Marino /* 3. DRA is a part of a chain and DRB is not. */
242e4b17023SJohn Marino if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
243e4b17023SJohn Marino {
244e4b17023SJohn Marino gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245e4b17023SJohn Marino tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246e4b17023SJohn Marino old_first_stmt)));
247e4b17023SJohn Marino gimple tmp;
248e4b17023SJohn Marino
249e4b17023SJohn Marino if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
250e4b17023SJohn Marino {
251e4b17023SJohn Marino /* DRB's init is smaller than the init of the stmt previously marked
252e4b17023SJohn Marino as the first stmt of the interleaving chain of DRA. Therefore, we
253e4b17023SJohn Marino update FIRST_STMT and put DRB in the head of the list. */
254e4b17023SJohn Marino GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255e4b17023SJohn Marino GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
256e4b17023SJohn Marino
257e4b17023SJohn Marino /* Update all the stmts in the list to point to the new FIRST_STMT. */
258e4b17023SJohn Marino tmp = old_first_stmt;
259e4b17023SJohn Marino while (tmp)
260e4b17023SJohn Marino {
261e4b17023SJohn Marino GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262e4b17023SJohn Marino tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
263e4b17023SJohn Marino }
264e4b17023SJohn Marino }
265e4b17023SJohn Marino else
266e4b17023SJohn Marino {
267e4b17023SJohn Marino /* Insert DRB in the list of DRA. */
268e4b17023SJohn Marino vect_insert_into_interleaving_chain (drb, dra);
269e4b17023SJohn Marino GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
270e4b17023SJohn Marino }
271e4b17023SJohn Marino return;
272e4b17023SJohn Marino }
273e4b17023SJohn Marino
274e4b17023SJohn Marino /* 4. both DRA and DRB are in some interleaving chains. */
275e4b17023SJohn Marino first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276e4b17023SJohn Marino first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277e4b17023SJohn Marino if (first_a == first_b)
278e4b17023SJohn Marino return;
279e4b17023SJohn Marino init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280e4b17023SJohn Marino init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
281e4b17023SJohn Marino
282e4b17023SJohn Marino if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
283e4b17023SJohn Marino {
284e4b17023SJohn Marino /* Insert the nodes of DRA chain into the DRB chain.
285e4b17023SJohn Marino After inserting a node, continue from this node of the DRB chain (don't
286e4b17023SJohn Marino start from the beginning. */
287e4b17023SJohn Marino node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288e4b17023SJohn Marino prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289e4b17023SJohn Marino first_stmt = first_b;
290e4b17023SJohn Marino }
291e4b17023SJohn Marino else
292e4b17023SJohn Marino {
293e4b17023SJohn Marino /* Insert the nodes of DRB chain into the DRA chain.
294e4b17023SJohn Marino After inserting a node, continue from this node of the DRA chain (don't
295e4b17023SJohn Marino start from the beginning. */
296e4b17023SJohn Marino node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297e4b17023SJohn Marino prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298e4b17023SJohn Marino first_stmt = first_a;
299e4b17023SJohn Marino }
300e4b17023SJohn Marino
301e4b17023SJohn Marino while (node)
302e4b17023SJohn Marino {
303e4b17023SJohn Marino node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304e4b17023SJohn Marino next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305e4b17023SJohn Marino while (next)
306e4b17023SJohn Marino {
307e4b17023SJohn Marino next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308e4b17023SJohn Marino if (tree_int_cst_compare (next_init, node_init) > 0)
309e4b17023SJohn Marino {
310e4b17023SJohn Marino /* Insert here. */
311e4b17023SJohn Marino GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312e4b17023SJohn Marino GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313e4b17023SJohn Marino prev = node;
314e4b17023SJohn Marino break;
315e4b17023SJohn Marino }
316e4b17023SJohn Marino prev = next;
317e4b17023SJohn Marino next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
318e4b17023SJohn Marino }
319e4b17023SJohn Marino if (!next)
320e4b17023SJohn Marino {
321e4b17023SJohn Marino /* We got to the end of the list. Insert here. */
322e4b17023SJohn Marino GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323e4b17023SJohn Marino GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324e4b17023SJohn Marino prev = node;
325e4b17023SJohn Marino }
326e4b17023SJohn Marino GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327e4b17023SJohn Marino node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
328e4b17023SJohn Marino }
329e4b17023SJohn Marino }
330e4b17023SJohn Marino
331e4b17023SJohn Marino /* Check dependence between DRA and DRB for basic block vectorization.
332e4b17023SJohn Marino If the accesses share same bases and offsets, we can compare their initial
333e4b17023SJohn Marino constant offsets to decide whether they differ or not. In case of a read-
334e4b17023SJohn Marino write dependence we check that the load is before the store to ensure that
335e4b17023SJohn Marino vectorization will not change the order of the accesses. */
336e4b17023SJohn Marino
337e4b17023SJohn Marino static bool
vect_drs_dependent_in_basic_block(struct data_reference * dra,struct data_reference * drb)338e4b17023SJohn Marino vect_drs_dependent_in_basic_block (struct data_reference *dra,
339e4b17023SJohn Marino struct data_reference *drb)
340e4b17023SJohn Marino {
341e4b17023SJohn Marino HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342e4b17023SJohn Marino gimple earlier_stmt;
343e4b17023SJohn Marino
344e4b17023SJohn Marino /* We only call this function for pairs of loads and stores, but we verify
345e4b17023SJohn Marino it here. */
346e4b17023SJohn Marino if (DR_IS_READ (dra) == DR_IS_READ (drb))
347e4b17023SJohn Marino {
348e4b17023SJohn Marino if (DR_IS_READ (dra))
349e4b17023SJohn Marino return false;
350e4b17023SJohn Marino else
351e4b17023SJohn Marino return true;
352e4b17023SJohn Marino }
353e4b17023SJohn Marino
354e4b17023SJohn Marino /* Check that the data-refs have same bases and offsets. If not, we can't
355e4b17023SJohn Marino determine if they are dependent. */
356e4b17023SJohn Marino if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357e4b17023SJohn Marino || !dr_equal_offsets_p (dra, drb))
358e4b17023SJohn Marino return true;
359e4b17023SJohn Marino
360e4b17023SJohn Marino /* Check the types. */
361e4b17023SJohn Marino type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362e4b17023SJohn Marino type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
363e4b17023SJohn Marino
364e4b17023SJohn Marino if (type_size_a != type_size_b
365e4b17023SJohn Marino || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366e4b17023SJohn Marino TREE_TYPE (DR_REF (drb))))
367e4b17023SJohn Marino return true;
368e4b17023SJohn Marino
369e4b17023SJohn Marino init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370e4b17023SJohn Marino init_b = TREE_INT_CST_LOW (DR_INIT (drb));
371e4b17023SJohn Marino
372e4b17023SJohn Marino /* Two different locations - no dependence. */
373e4b17023SJohn Marino if (init_a != init_b)
374e4b17023SJohn Marino return false;
375e4b17023SJohn Marino
376e4b17023SJohn Marino /* We have a read-write dependence. Check that the load is before the store.
377e4b17023SJohn Marino When we vectorize basic blocks, vector load can be only before
378e4b17023SJohn Marino corresponding scalar load, and vector store can be only after its
379e4b17023SJohn Marino corresponding scalar store. So the order of the acceses is preserved in
380e4b17023SJohn Marino case the load is before the store. */
381e4b17023SJohn Marino earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382e4b17023SJohn Marino if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383e4b17023SJohn Marino return false;
384e4b17023SJohn Marino
385e4b17023SJohn Marino return true;
386e4b17023SJohn Marino }
387e4b17023SJohn Marino
388e4b17023SJohn Marino
389e4b17023SJohn Marino /* Function vect_check_interleaving.
390e4b17023SJohn Marino
391e4b17023SJohn Marino Check if DRA and DRB are a part of interleaving. In case they are, insert
392e4b17023SJohn Marino DRA and DRB in an interleaving chain. */
393e4b17023SJohn Marino
394e4b17023SJohn Marino static bool
vect_check_interleaving(struct data_reference * dra,struct data_reference * drb)395e4b17023SJohn Marino vect_check_interleaving (struct data_reference *dra,
396e4b17023SJohn Marino struct data_reference *drb)
397e4b17023SJohn Marino {
398e4b17023SJohn Marino HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
399e4b17023SJohn Marino
400e4b17023SJohn Marino /* Check that the data-refs have same first location (except init) and they
401e4b17023SJohn Marino are both either store or load (not load and store). */
402e4b17023SJohn Marino if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403e4b17023SJohn Marino || !dr_equal_offsets_p (dra, drb)
404e4b17023SJohn Marino || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405e4b17023SJohn Marino || DR_IS_READ (dra) != DR_IS_READ (drb))
406e4b17023SJohn Marino return false;
407e4b17023SJohn Marino
408e4b17023SJohn Marino /* Check:
409e4b17023SJohn Marino 1. data-refs are of the same type
410e4b17023SJohn Marino 2. their steps are equal
411e4b17023SJohn Marino 3. the step (if greater than zero) is greater than the difference between
412e4b17023SJohn Marino data-refs' inits. */
413e4b17023SJohn Marino type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414e4b17023SJohn Marino type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
415e4b17023SJohn Marino
416e4b17023SJohn Marino if (type_size_a != type_size_b
417e4b17023SJohn Marino || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418e4b17023SJohn Marino || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419e4b17023SJohn Marino TREE_TYPE (DR_REF (drb))))
420e4b17023SJohn Marino return false;
421e4b17023SJohn Marino
422e4b17023SJohn Marino init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423e4b17023SJohn Marino init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424e4b17023SJohn Marino step = TREE_INT_CST_LOW (DR_STEP (dra));
425e4b17023SJohn Marino
426e4b17023SJohn Marino if (init_a > init_b)
427e4b17023SJohn Marino {
428e4b17023SJohn Marino /* If init_a == init_b + the size of the type * k, we have an interleaving,
429e4b17023SJohn Marino and DRB is accessed before DRA. */
430e4b17023SJohn Marino diff_mod_size = (init_a - init_b) % type_size_a;
431e4b17023SJohn Marino
432e4b17023SJohn Marino if (step && (init_a - init_b) > step)
433e4b17023SJohn Marino return false;
434e4b17023SJohn Marino
435e4b17023SJohn Marino if (diff_mod_size == 0)
436e4b17023SJohn Marino {
437e4b17023SJohn Marino vect_update_interleaving_chain (drb, dra);
438e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
439e4b17023SJohn Marino {
440e4b17023SJohn Marino fprintf (vect_dump, "Detected interleaving ");
441e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442e4b17023SJohn Marino fprintf (vect_dump, " and ");
443e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
444e4b17023SJohn Marino }
445e4b17023SJohn Marino return true;
446e4b17023SJohn Marino }
447e4b17023SJohn Marino }
448e4b17023SJohn Marino else
449e4b17023SJohn Marino {
450e4b17023SJohn Marino /* If init_b == init_a + the size of the type * k, we have an
451e4b17023SJohn Marino interleaving, and DRA is accessed before DRB. */
452e4b17023SJohn Marino diff_mod_size = (init_b - init_a) % type_size_a;
453e4b17023SJohn Marino
454e4b17023SJohn Marino if (step && (init_b - init_a) > step)
455e4b17023SJohn Marino return false;
456e4b17023SJohn Marino
457e4b17023SJohn Marino if (diff_mod_size == 0)
458e4b17023SJohn Marino {
459e4b17023SJohn Marino vect_update_interleaving_chain (dra, drb);
460e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
461e4b17023SJohn Marino {
462e4b17023SJohn Marino fprintf (vect_dump, "Detected interleaving ");
463e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464e4b17023SJohn Marino fprintf (vect_dump, " and ");
465e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
466e4b17023SJohn Marino }
467e4b17023SJohn Marino return true;
468e4b17023SJohn Marino }
469e4b17023SJohn Marino }
470e4b17023SJohn Marino
471e4b17023SJohn Marino return false;
472e4b17023SJohn Marino }
473e4b17023SJohn Marino
474e4b17023SJohn Marino /* Check if data references pointed by DR_I and DR_J are same or
475e4b17023SJohn Marino belong to same interleaving group. Return FALSE if drs are
476e4b17023SJohn Marino different, otherwise return TRUE. */
477e4b17023SJohn Marino
478e4b17023SJohn Marino static bool
vect_same_range_drs(data_reference_p dr_i,data_reference_p dr_j)479e4b17023SJohn Marino vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
480e4b17023SJohn Marino {
481e4b17023SJohn Marino gimple stmt_i = DR_STMT (dr_i);
482e4b17023SJohn Marino gimple stmt_j = DR_STMT (dr_j);
483e4b17023SJohn Marino
484e4b17023SJohn Marino if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485e4b17023SJohn Marino || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486e4b17023SJohn Marino && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487e4b17023SJohn Marino && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488e4b17023SJohn Marino == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489e4b17023SJohn Marino return true;
490e4b17023SJohn Marino else
491e4b17023SJohn Marino return false;
492e4b17023SJohn Marino }
493e4b17023SJohn Marino
494e4b17023SJohn Marino /* If address ranges represented by DDR_I and DDR_J are equal,
495e4b17023SJohn Marino return TRUE, otherwise return FALSE. */
496e4b17023SJohn Marino
497e4b17023SJohn Marino static bool
vect_vfa_range_equal(ddr_p ddr_i,ddr_p ddr_j)498e4b17023SJohn Marino vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
499e4b17023SJohn Marino {
500e4b17023SJohn Marino if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501e4b17023SJohn Marino && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502e4b17023SJohn Marino || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503e4b17023SJohn Marino && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504e4b17023SJohn Marino return true;
505e4b17023SJohn Marino else
506e4b17023SJohn Marino return false;
507e4b17023SJohn Marino }
508e4b17023SJohn Marino
509e4b17023SJohn Marino /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510e4b17023SJohn Marino tested at run-time. Return TRUE if DDR was successfully inserted.
511e4b17023SJohn Marino Return false if versioning is not supported. */
512e4b17023SJohn Marino
513e4b17023SJohn Marino static bool
vect_mark_for_runtime_alias_test(ddr_p ddr,loop_vec_info loop_vinfo)514e4b17023SJohn Marino vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
515e4b17023SJohn Marino {
516e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
517e4b17023SJohn Marino
518e4b17023SJohn Marino if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519e4b17023SJohn Marino return false;
520e4b17023SJohn Marino
521e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
522e4b17023SJohn Marino {
523e4b17023SJohn Marino fprintf (vect_dump, "mark for run-time aliasing test between ");
524e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525e4b17023SJohn Marino fprintf (vect_dump, " and ");
526e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
527e4b17023SJohn Marino }
528e4b17023SJohn Marino
529e4b17023SJohn Marino if (optimize_loop_nest_for_size_p (loop))
530e4b17023SJohn Marino {
531e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
532e4b17023SJohn Marino fprintf (vect_dump, "versioning not supported when optimizing for size.");
533e4b17023SJohn Marino return false;
534e4b17023SJohn Marino }
535e4b17023SJohn Marino
536e4b17023SJohn Marino /* FORNOW: We don't support versioning with outer-loop vectorization. */
537e4b17023SJohn Marino if (loop->inner)
538e4b17023SJohn Marino {
539e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
540e4b17023SJohn Marino fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541e4b17023SJohn Marino return false;
542e4b17023SJohn Marino }
543e4b17023SJohn Marino
544e4b17023SJohn Marino VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545e4b17023SJohn Marino return true;
546e4b17023SJohn Marino }
547e4b17023SJohn Marino
548e4b17023SJohn Marino
549e4b17023SJohn Marino /* Function vect_analyze_data_ref_dependence.
550e4b17023SJohn Marino
551e4b17023SJohn Marino Return TRUE if there (might) exist a dependence between a memory-reference
552e4b17023SJohn Marino DRA and a memory-reference DRB. When versioning for alias may check a
553e4b17023SJohn Marino dependence at run-time, return FALSE. Adjust *MAX_VF according to
554e4b17023SJohn Marino the data dependence. */
555e4b17023SJohn Marino
556e4b17023SJohn Marino static bool
vect_analyze_data_ref_dependence(struct data_dependence_relation * ddr,loop_vec_info loop_vinfo,int * max_vf)557e4b17023SJohn Marino vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558e4b17023SJohn Marino loop_vec_info loop_vinfo, int *max_vf)
559e4b17023SJohn Marino {
560e4b17023SJohn Marino unsigned int i;
561e4b17023SJohn Marino struct loop *loop = NULL;
562e4b17023SJohn Marino struct data_reference *dra = DDR_A (ddr);
563e4b17023SJohn Marino struct data_reference *drb = DDR_B (ddr);
564e4b17023SJohn Marino stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
565e4b17023SJohn Marino stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
566e4b17023SJohn Marino lambda_vector dist_v;
567e4b17023SJohn Marino unsigned int loop_depth;
568e4b17023SJohn Marino
569e4b17023SJohn Marino /* Don't bother to analyze statements marked as unvectorizable. */
570e4b17023SJohn Marino if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
571e4b17023SJohn Marino || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
572e4b17023SJohn Marino return false;
573e4b17023SJohn Marino
574e4b17023SJohn Marino if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
575e4b17023SJohn Marino {
576e4b17023SJohn Marino /* Independent data accesses. */
577e4b17023SJohn Marino vect_check_interleaving (dra, drb);
578e4b17023SJohn Marino return false;
579e4b17023SJohn Marino }
580e4b17023SJohn Marino
581e4b17023SJohn Marino if (loop_vinfo)
582e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
583e4b17023SJohn Marino
584e4b17023SJohn Marino if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
585e4b17023SJohn Marino return false;
586e4b17023SJohn Marino
587e4b17023SJohn Marino if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
588e4b17023SJohn Marino {
589e4b17023SJohn Marino gimple earlier_stmt;
590e4b17023SJohn Marino
591e4b17023SJohn Marino if (loop_vinfo)
592e4b17023SJohn Marino {
593e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
594e4b17023SJohn Marino {
595e4b17023SJohn Marino fprintf (vect_dump, "versioning for alias required: "
596e4b17023SJohn Marino "can't determine dependence between ");
597e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
598e4b17023SJohn Marino fprintf (vect_dump, " and ");
599e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
600e4b17023SJohn Marino }
601e4b17023SJohn Marino
602e4b17023SJohn Marino /* Add to list of ddrs that need to be tested at run-time. */
603e4b17023SJohn Marino return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
604e4b17023SJohn Marino }
605e4b17023SJohn Marino
606e4b17023SJohn Marino /* When vectorizing a basic block unknown depnedence can still mean
607e4b17023SJohn Marino strided access. */
608e4b17023SJohn Marino if (vect_check_interleaving (dra, drb))
609e4b17023SJohn Marino return false;
610e4b17023SJohn Marino
611e4b17023SJohn Marino /* Read-read is OK (we need this check here, after checking for
612e4b17023SJohn Marino interleaving). */
613e4b17023SJohn Marino if (DR_IS_READ (dra) && DR_IS_READ (drb))
614e4b17023SJohn Marino return false;
615e4b17023SJohn Marino
616e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
617e4b17023SJohn Marino {
618e4b17023SJohn Marino fprintf (vect_dump, "can't determine dependence between ");
619e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
620e4b17023SJohn Marino fprintf (vect_dump, " and ");
621e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
622e4b17023SJohn Marino }
623e4b17023SJohn Marino
624e4b17023SJohn Marino /* We do not vectorize basic blocks with write-write dependencies. */
625e4b17023SJohn Marino if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
626e4b17023SJohn Marino return true;
627e4b17023SJohn Marino
628e4b17023SJohn Marino /* Check that it's not a load-after-store dependence. */
629e4b17023SJohn Marino earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
630e4b17023SJohn Marino if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
631e4b17023SJohn Marino return true;
632e4b17023SJohn Marino
633e4b17023SJohn Marino return false;
634e4b17023SJohn Marino }
635e4b17023SJohn Marino
636e4b17023SJohn Marino /* Versioning for alias is not yet supported for basic block SLP, and
637e4b17023SJohn Marino dependence distance is unapplicable, hence, in case of known data
638e4b17023SJohn Marino dependence, basic block vectorization is impossible for now. */
639e4b17023SJohn Marino if (!loop_vinfo)
640e4b17023SJohn Marino {
641e4b17023SJohn Marino if (dra != drb && vect_check_interleaving (dra, drb))
642e4b17023SJohn Marino return false;
643e4b17023SJohn Marino
644e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
645e4b17023SJohn Marino {
646e4b17023SJohn Marino fprintf (vect_dump, "determined dependence between ");
647e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
648e4b17023SJohn Marino fprintf (vect_dump, " and ");
649e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
650e4b17023SJohn Marino }
651e4b17023SJohn Marino
652e4b17023SJohn Marino /* Do not vectorize basic blcoks with write-write dependences. */
653e4b17023SJohn Marino if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
654e4b17023SJohn Marino return true;
655e4b17023SJohn Marino
656e4b17023SJohn Marino /* Check if this dependence is allowed in basic block vectorization. */
657e4b17023SJohn Marino return vect_drs_dependent_in_basic_block (dra, drb);
658e4b17023SJohn Marino }
659e4b17023SJohn Marino
660e4b17023SJohn Marino /* Loop-based vectorization and known data dependence. */
661e4b17023SJohn Marino if (DDR_NUM_DIST_VECTS (ddr) == 0)
662e4b17023SJohn Marino {
663e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
664e4b17023SJohn Marino {
665e4b17023SJohn Marino fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
666e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
667e4b17023SJohn Marino fprintf (vect_dump, " and ");
668e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
669e4b17023SJohn Marino }
670e4b17023SJohn Marino /* Add to list of ddrs that need to be tested at run-time. */
671e4b17023SJohn Marino return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
672e4b17023SJohn Marino }
673e4b17023SJohn Marino
674e4b17023SJohn Marino loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
675e4b17023SJohn Marino FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
676e4b17023SJohn Marino {
677e4b17023SJohn Marino int dist = dist_v[loop_depth];
678e4b17023SJohn Marino
679e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
680e4b17023SJohn Marino fprintf (vect_dump, "dependence distance = %d.", dist);
681e4b17023SJohn Marino
682e4b17023SJohn Marino if (dist == 0)
683e4b17023SJohn Marino {
684e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
685e4b17023SJohn Marino {
686e4b17023SJohn Marino fprintf (vect_dump, "dependence distance == 0 between ");
687e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
688e4b17023SJohn Marino fprintf (vect_dump, " and ");
689e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
690e4b17023SJohn Marino }
691e4b17023SJohn Marino
692e4b17023SJohn Marino /* For interleaving, mark that there is a read-write dependency if
693e4b17023SJohn Marino necessary. We check before that one of the data-refs is store. */
694e4b17023SJohn Marino if (DR_IS_READ (dra))
695e4b17023SJohn Marino GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
696e4b17023SJohn Marino else
697e4b17023SJohn Marino {
698e4b17023SJohn Marino if (DR_IS_READ (drb))
699e4b17023SJohn Marino GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
700e4b17023SJohn Marino }
701e4b17023SJohn Marino
702e4b17023SJohn Marino continue;
703e4b17023SJohn Marino }
704e4b17023SJohn Marino
705e4b17023SJohn Marino if (dist > 0 && DDR_REVERSED_P (ddr))
706e4b17023SJohn Marino {
707e4b17023SJohn Marino /* If DDR_REVERSED_P the order of the data-refs in DDR was
708e4b17023SJohn Marino reversed (to make distance vector positive), and the actual
709e4b17023SJohn Marino distance is negative. */
710e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
711e4b17023SJohn Marino fprintf (vect_dump, "dependence distance negative.");
712e4b17023SJohn Marino continue;
713e4b17023SJohn Marino }
714e4b17023SJohn Marino
715e4b17023SJohn Marino if (abs (dist) >= 2
716e4b17023SJohn Marino && abs (dist) < *max_vf)
717e4b17023SJohn Marino {
718e4b17023SJohn Marino /* The dependence distance requires reduction of the maximal
719e4b17023SJohn Marino vectorization factor. */
720e4b17023SJohn Marino *max_vf = abs (dist);
721e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
722e4b17023SJohn Marino fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
723e4b17023SJohn Marino *max_vf);
724e4b17023SJohn Marino }
725e4b17023SJohn Marino
726e4b17023SJohn Marino if (abs (dist) >= *max_vf)
727e4b17023SJohn Marino {
728e4b17023SJohn Marino /* Dependence distance does not create dependence, as far as
729e4b17023SJohn Marino vectorization is concerned, in this case. */
730e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
731e4b17023SJohn Marino fprintf (vect_dump, "dependence distance >= VF.");
732e4b17023SJohn Marino continue;
733e4b17023SJohn Marino }
734e4b17023SJohn Marino
735e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
736e4b17023SJohn Marino {
737e4b17023SJohn Marino fprintf (vect_dump, "not vectorized, possible dependence "
738e4b17023SJohn Marino "between data-refs ");
739e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
740e4b17023SJohn Marino fprintf (vect_dump, " and ");
741e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
742e4b17023SJohn Marino }
743e4b17023SJohn Marino
744e4b17023SJohn Marino return true;
745e4b17023SJohn Marino }
746e4b17023SJohn Marino
747e4b17023SJohn Marino return false;
748e4b17023SJohn Marino }
749e4b17023SJohn Marino
750e4b17023SJohn Marino /* Function vect_analyze_data_ref_dependences.
751e4b17023SJohn Marino
752e4b17023SJohn Marino Examine all the data references in the loop, and make sure there do not
753e4b17023SJohn Marino exist any data dependences between them. Set *MAX_VF according to
754e4b17023SJohn Marino the maximum vectorization factor the data dependences allow. */
755e4b17023SJohn Marino
756e4b17023SJohn Marino bool
vect_analyze_data_ref_dependences(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,int * max_vf)757e4b17023SJohn Marino vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
758e4b17023SJohn Marino bb_vec_info bb_vinfo, int *max_vf)
759e4b17023SJohn Marino {
760e4b17023SJohn Marino unsigned int i;
761e4b17023SJohn Marino VEC (ddr_p, heap) *ddrs = NULL;
762e4b17023SJohn Marino struct data_dependence_relation *ddr;
763e4b17023SJohn Marino
764e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
765e4b17023SJohn Marino fprintf (vect_dump, "=== vect_analyze_dependences ===");
766e4b17023SJohn Marino
767e4b17023SJohn Marino if (loop_vinfo)
768e4b17023SJohn Marino ddrs = LOOP_VINFO_DDRS (loop_vinfo);
769e4b17023SJohn Marino else
770e4b17023SJohn Marino ddrs = BB_VINFO_DDRS (bb_vinfo);
771e4b17023SJohn Marino
772e4b17023SJohn Marino FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
773e4b17023SJohn Marino if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
774e4b17023SJohn Marino return false;
775e4b17023SJohn Marino
776e4b17023SJohn Marino return true;
777e4b17023SJohn Marino }
778e4b17023SJohn Marino
779e4b17023SJohn Marino
780e4b17023SJohn Marino /* Function vect_compute_data_ref_alignment
781e4b17023SJohn Marino
782e4b17023SJohn Marino Compute the misalignment of the data reference DR.
783e4b17023SJohn Marino
784e4b17023SJohn Marino Output:
785e4b17023SJohn Marino 1. If during the misalignment computation it is found that the data reference
786e4b17023SJohn Marino cannot be vectorized then false is returned.
787e4b17023SJohn Marino 2. DR_MISALIGNMENT (DR) is defined.
788e4b17023SJohn Marino
789e4b17023SJohn Marino FOR NOW: No analysis is actually performed. Misalignment is calculated
790e4b17023SJohn Marino only for trivial cases. TODO. */
791e4b17023SJohn Marino
792e4b17023SJohn Marino static bool
vect_compute_data_ref_alignment(struct data_reference * dr)793e4b17023SJohn Marino vect_compute_data_ref_alignment (struct data_reference *dr)
794e4b17023SJohn Marino {
795e4b17023SJohn Marino gimple stmt = DR_STMT (dr);
796e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
798e4b17023SJohn Marino struct loop *loop = NULL;
799e4b17023SJohn Marino tree ref = DR_REF (dr);
800e4b17023SJohn Marino tree vectype;
801e4b17023SJohn Marino tree base, base_addr;
802e4b17023SJohn Marino bool base_aligned;
803e4b17023SJohn Marino tree misalign;
804e4b17023SJohn Marino tree aligned_to, alignment;
805e4b17023SJohn Marino
806e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
807e4b17023SJohn Marino fprintf (vect_dump, "vect_compute_data_ref_alignment:");
808e4b17023SJohn Marino
809e4b17023SJohn Marino if (loop_vinfo)
810e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
811e4b17023SJohn Marino
812e4b17023SJohn Marino /* Initialize misalignment to unknown. */
813e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr, -1);
814e4b17023SJohn Marino
815e4b17023SJohn Marino misalign = DR_INIT (dr);
816e4b17023SJohn Marino aligned_to = DR_ALIGNED_TO (dr);
817e4b17023SJohn Marino base_addr = DR_BASE_ADDRESS (dr);
818e4b17023SJohn Marino vectype = STMT_VINFO_VECTYPE (stmt_info);
819e4b17023SJohn Marino
820e4b17023SJohn Marino /* In case the dataref is in an inner-loop of the loop that is being
821e4b17023SJohn Marino vectorized (LOOP), we use the base and misalignment information
822e4b17023SJohn Marino relative to the outer-loop (LOOP). This is ok only if the misalignment
823e4b17023SJohn Marino stays the same throughout the execution of the inner-loop, which is why
824e4b17023SJohn Marino we have to check that the stride of the dataref in the inner-loop evenly
825e4b17023SJohn Marino divides by the vector size. */
826e4b17023SJohn Marino if (loop && nested_in_vect_loop_p (loop, stmt))
827e4b17023SJohn Marino {
828e4b17023SJohn Marino tree step = DR_STEP (dr);
829e4b17023SJohn Marino HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
830e4b17023SJohn Marino
831e4b17023SJohn Marino if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
832e4b17023SJohn Marino {
833e4b17023SJohn Marino if (vect_print_dump_info (REPORT_ALIGNMENT))
834e4b17023SJohn Marino fprintf (vect_dump, "inner step divides the vector-size.");
835e4b17023SJohn Marino misalign = STMT_VINFO_DR_INIT (stmt_info);
836e4b17023SJohn Marino aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
837e4b17023SJohn Marino base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
838e4b17023SJohn Marino }
839e4b17023SJohn Marino else
840e4b17023SJohn Marino {
841e4b17023SJohn Marino if (vect_print_dump_info (REPORT_ALIGNMENT))
842e4b17023SJohn Marino fprintf (vect_dump, "inner step doesn't divide the vector-size.");
843e4b17023SJohn Marino misalign = NULL_TREE;
844e4b17023SJohn Marino }
845e4b17023SJohn Marino }
846e4b17023SJohn Marino
847e4b17023SJohn Marino base = build_fold_indirect_ref (base_addr);
848e4b17023SJohn Marino alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
849e4b17023SJohn Marino
850e4b17023SJohn Marino if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
851e4b17023SJohn Marino || !misalign)
852e4b17023SJohn Marino {
853e4b17023SJohn Marino if (vect_print_dump_info (REPORT_ALIGNMENT))
854e4b17023SJohn Marino {
855e4b17023SJohn Marino fprintf (vect_dump, "Unknown alignment for access: ");
856e4b17023SJohn Marino print_generic_expr (vect_dump, base, TDF_SLIM);
857e4b17023SJohn Marino }
858e4b17023SJohn Marino return true;
859e4b17023SJohn Marino }
860e4b17023SJohn Marino
861e4b17023SJohn Marino if ((DECL_P (base)
862e4b17023SJohn Marino && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
863e4b17023SJohn Marino alignment) >= 0)
864e4b17023SJohn Marino || (TREE_CODE (base_addr) == SSA_NAME
865e4b17023SJohn Marino && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
866e4b17023SJohn Marino TREE_TYPE (base_addr)))),
867e4b17023SJohn Marino alignment) >= 0)
868e4b17023SJohn Marino || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
869e4b17023SJohn Marino base_aligned = true;
870e4b17023SJohn Marino else
871e4b17023SJohn Marino base_aligned = false;
872e4b17023SJohn Marino
873e4b17023SJohn Marino if (!base_aligned)
874e4b17023SJohn Marino {
875e4b17023SJohn Marino /* Do not change the alignment of global variables if
876e4b17023SJohn Marino flag_section_anchors is enabled. */
877e4b17023SJohn Marino if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
878e4b17023SJohn Marino || (TREE_STATIC (base) && flag_section_anchors))
879e4b17023SJohn Marino {
880e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
881e4b17023SJohn Marino {
882e4b17023SJohn Marino fprintf (vect_dump, "can't force alignment of ref: ");
883e4b17023SJohn Marino print_generic_expr (vect_dump, ref, TDF_SLIM);
884e4b17023SJohn Marino }
885e4b17023SJohn Marino return true;
886e4b17023SJohn Marino }
887e4b17023SJohn Marino
888e4b17023SJohn Marino /* Force the alignment of the decl.
889e4b17023SJohn Marino NOTE: This is the only change to the code we make during
890e4b17023SJohn Marino the analysis phase, before deciding to vectorize the loop. */
891e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
892e4b17023SJohn Marino {
893e4b17023SJohn Marino fprintf (vect_dump, "force alignment of ");
894e4b17023SJohn Marino print_generic_expr (vect_dump, ref, TDF_SLIM);
895e4b17023SJohn Marino }
896e4b17023SJohn Marino
897e4b17023SJohn Marino DECL_ALIGN (base) = TYPE_ALIGN (vectype);
898e4b17023SJohn Marino DECL_USER_ALIGN (base) = 1;
899e4b17023SJohn Marino }
900e4b17023SJohn Marino
901e4b17023SJohn Marino /* At this point we assume that the base is aligned. */
902e4b17023SJohn Marino gcc_assert (base_aligned
903e4b17023SJohn Marino || (TREE_CODE (base) == VAR_DECL
904e4b17023SJohn Marino && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
905e4b17023SJohn Marino
906e4b17023SJohn Marino /* If this is a backward running DR then first access in the larger
907e4b17023SJohn Marino vectype actually is N-1 elements before the address in the DR.
908e4b17023SJohn Marino Adjust misalign accordingly. */
909e4b17023SJohn Marino if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
910e4b17023SJohn Marino {
911e4b17023SJohn Marino tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
912e4b17023SJohn Marino /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
913e4b17023SJohn Marino otherwise we wouldn't be here. */
914e4b17023SJohn Marino offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
915e4b17023SJohn Marino /* PLUS because DR_STEP was negative. */
916e4b17023SJohn Marino misalign = size_binop (PLUS_EXPR, misalign, offset);
917e4b17023SJohn Marino }
918e4b17023SJohn Marino
919e4b17023SJohn Marino /* Modulo alignment. */
920e4b17023SJohn Marino misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
921e4b17023SJohn Marino
922e4b17023SJohn Marino if (!host_integerp (misalign, 1))
923e4b17023SJohn Marino {
924e4b17023SJohn Marino /* Negative or overflowed misalignment value. */
925e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
926e4b17023SJohn Marino fprintf (vect_dump, "unexpected misalign value");
927e4b17023SJohn Marino return false;
928e4b17023SJohn Marino }
929e4b17023SJohn Marino
930e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
931e4b17023SJohn Marino
932e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
933e4b17023SJohn Marino {
934e4b17023SJohn Marino fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
935e4b17023SJohn Marino print_generic_expr (vect_dump, ref, TDF_SLIM);
936e4b17023SJohn Marino }
937e4b17023SJohn Marino
938e4b17023SJohn Marino return true;
939e4b17023SJohn Marino }
940e4b17023SJohn Marino
941e4b17023SJohn Marino
942e4b17023SJohn Marino /* Function vect_compute_data_refs_alignment
943e4b17023SJohn Marino
944e4b17023SJohn Marino Compute the misalignment of data references in the loop.
945e4b17023SJohn Marino Return FALSE if a data reference is found that cannot be vectorized. */
946e4b17023SJohn Marino
947e4b17023SJohn Marino static bool
vect_compute_data_refs_alignment(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)948e4b17023SJohn Marino vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
949e4b17023SJohn Marino bb_vec_info bb_vinfo)
950e4b17023SJohn Marino {
951e4b17023SJohn Marino VEC (data_reference_p, heap) *datarefs;
952e4b17023SJohn Marino struct data_reference *dr;
953e4b17023SJohn Marino unsigned int i;
954e4b17023SJohn Marino
955e4b17023SJohn Marino if (loop_vinfo)
956e4b17023SJohn Marino datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
957e4b17023SJohn Marino else
958e4b17023SJohn Marino datarefs = BB_VINFO_DATAREFS (bb_vinfo);
959e4b17023SJohn Marino
960e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
961e4b17023SJohn Marino if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
962e4b17023SJohn Marino && !vect_compute_data_ref_alignment (dr))
963e4b17023SJohn Marino {
964e4b17023SJohn Marino if (bb_vinfo)
965e4b17023SJohn Marino {
966e4b17023SJohn Marino /* Mark unsupported statement as unvectorizable. */
967e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
968e4b17023SJohn Marino continue;
969e4b17023SJohn Marino }
970e4b17023SJohn Marino else
971e4b17023SJohn Marino return false;
972e4b17023SJohn Marino }
973e4b17023SJohn Marino
974e4b17023SJohn Marino return true;
975e4b17023SJohn Marino }
976e4b17023SJohn Marino
977e4b17023SJohn Marino
978e4b17023SJohn Marino /* Function vect_update_misalignment_for_peel
979e4b17023SJohn Marino
980e4b17023SJohn Marino DR - the data reference whose misalignment is to be adjusted.
981e4b17023SJohn Marino DR_PEEL - the data reference whose misalignment is being made
982e4b17023SJohn Marino zero in the vector loop by the peel.
983e4b17023SJohn Marino NPEEL - the number of iterations in the peel loop if the misalignment
984e4b17023SJohn Marino of DR_PEEL is known at compile time. */
985e4b17023SJohn Marino
986e4b17023SJohn Marino static void
vect_update_misalignment_for_peel(struct data_reference * dr,struct data_reference * dr_peel,int npeel)987e4b17023SJohn Marino vect_update_misalignment_for_peel (struct data_reference *dr,
988e4b17023SJohn Marino struct data_reference *dr_peel, int npeel)
989e4b17023SJohn Marino {
990e4b17023SJohn Marino unsigned int i;
991e4b17023SJohn Marino VEC(dr_p,heap) *same_align_drs;
992e4b17023SJohn Marino struct data_reference *current_dr;
993e4b17023SJohn Marino int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
994e4b17023SJohn Marino int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
995e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
996e4b17023SJohn Marino stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
997e4b17023SJohn Marino
998e4b17023SJohn Marino /* For interleaved data accesses the step in the loop must be multiplied by
999e4b17023SJohn Marino the size of the interleaving group. */
1000e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1001e4b17023SJohn Marino dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1002e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1003e4b17023SJohn Marino dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1004e4b17023SJohn Marino
1005e4b17023SJohn Marino /* It can be assumed that the data refs with the same alignment as dr_peel
1006e4b17023SJohn Marino are aligned in the vector loop. */
1007e4b17023SJohn Marino same_align_drs
1008e4b17023SJohn Marino = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1009e4b17023SJohn Marino FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1010e4b17023SJohn Marino {
1011e4b17023SJohn Marino if (current_dr != dr)
1012e4b17023SJohn Marino continue;
1013e4b17023SJohn Marino gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1014e4b17023SJohn Marino DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1015e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr, 0);
1016e4b17023SJohn Marino return;
1017e4b17023SJohn Marino }
1018e4b17023SJohn Marino
1019e4b17023SJohn Marino if (known_alignment_for_access_p (dr)
1020e4b17023SJohn Marino && known_alignment_for_access_p (dr_peel))
1021e4b17023SJohn Marino {
1022e4b17023SJohn Marino bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1023e4b17023SJohn Marino int misal = DR_MISALIGNMENT (dr);
1024e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1025e4b17023SJohn Marino misal += negative ? -npeel * dr_size : npeel * dr_size;
1026e4b17023SJohn Marino misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1027e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr, misal);
1028e4b17023SJohn Marino return;
1029e4b17023SJohn Marino }
1030e4b17023SJohn Marino
1031e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1032e4b17023SJohn Marino fprintf (vect_dump, "Setting misalignment to -1.");
1033e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr, -1);
1034e4b17023SJohn Marino }
1035e4b17023SJohn Marino
1036e4b17023SJohn Marino
1037e4b17023SJohn Marino /* Function vect_verify_datarefs_alignment
1038e4b17023SJohn Marino
1039e4b17023SJohn Marino Return TRUE if all data references in the loop can be
1040e4b17023SJohn Marino handled with respect to alignment. */
1041e4b17023SJohn Marino
1042e4b17023SJohn Marino bool
vect_verify_datarefs_alignment(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)1043e4b17023SJohn Marino vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1044e4b17023SJohn Marino {
1045e4b17023SJohn Marino VEC (data_reference_p, heap) *datarefs;
1046e4b17023SJohn Marino struct data_reference *dr;
1047e4b17023SJohn Marino enum dr_alignment_support supportable_dr_alignment;
1048e4b17023SJohn Marino unsigned int i;
1049e4b17023SJohn Marino
1050e4b17023SJohn Marino if (loop_vinfo)
1051e4b17023SJohn Marino datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1052e4b17023SJohn Marino else
1053e4b17023SJohn Marino datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1054e4b17023SJohn Marino
1055e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1056e4b17023SJohn Marino {
1057e4b17023SJohn Marino gimple stmt = DR_STMT (dr);
1058e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1059e4b17023SJohn Marino
1060e4b17023SJohn Marino /* For interleaving, only the alignment of the first access matters.
1061e4b17023SJohn Marino Skip statements marked as not vectorizable. */
1062e4b17023SJohn Marino if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1063e4b17023SJohn Marino && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1064e4b17023SJohn Marino || !STMT_VINFO_VECTORIZABLE (stmt_info))
1065e4b17023SJohn Marino continue;
1066e4b17023SJohn Marino
1067e4b17023SJohn Marino supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1068e4b17023SJohn Marino if (!supportable_dr_alignment)
1069e4b17023SJohn Marino {
1070e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1071e4b17023SJohn Marino {
1072e4b17023SJohn Marino if (DR_IS_READ (dr))
1073e4b17023SJohn Marino fprintf (vect_dump,
1074e4b17023SJohn Marino "not vectorized: unsupported unaligned load.");
1075e4b17023SJohn Marino else
1076e4b17023SJohn Marino fprintf (vect_dump,
1077e4b17023SJohn Marino "not vectorized: unsupported unaligned store.");
1078e4b17023SJohn Marino
1079e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1080e4b17023SJohn Marino }
1081e4b17023SJohn Marino return false;
1082e4b17023SJohn Marino }
1083e4b17023SJohn Marino if (supportable_dr_alignment != dr_aligned
1084e4b17023SJohn Marino && vect_print_dump_info (REPORT_ALIGNMENT))
1085e4b17023SJohn Marino fprintf (vect_dump, "Vectorizing an unaligned access.");
1086e4b17023SJohn Marino }
1087e4b17023SJohn Marino return true;
1088e4b17023SJohn Marino }
1089e4b17023SJohn Marino
1090e4b17023SJohn Marino
1091e4b17023SJohn Marino /* Function vector_alignment_reachable_p
1092e4b17023SJohn Marino
1093e4b17023SJohn Marino Return true if vector alignment for DR is reachable by peeling
1094e4b17023SJohn Marino a few loop iterations. Return false otherwise. */
1095e4b17023SJohn Marino
1096e4b17023SJohn Marino static bool
vector_alignment_reachable_p(struct data_reference * dr)1097e4b17023SJohn Marino vector_alignment_reachable_p (struct data_reference *dr)
1098e4b17023SJohn Marino {
1099e4b17023SJohn Marino gimple stmt = DR_STMT (dr);
1100e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1101e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1102e4b17023SJohn Marino
1103e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1104e4b17023SJohn Marino {
1105e4b17023SJohn Marino /* For interleaved access we peel only if number of iterations in
1106e4b17023SJohn Marino the prolog loop ({VF - misalignment}), is a multiple of the
1107e4b17023SJohn Marino number of the interleaved accesses. */
1108e4b17023SJohn Marino int elem_size, mis_in_elements;
1109e4b17023SJohn Marino int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1110e4b17023SJohn Marino
1111e4b17023SJohn Marino /* FORNOW: handle only known alignment. */
1112e4b17023SJohn Marino if (!known_alignment_for_access_p (dr))
1113e4b17023SJohn Marino return false;
1114e4b17023SJohn Marino
1115e4b17023SJohn Marino elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1116e4b17023SJohn Marino mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1117e4b17023SJohn Marino
1118e4b17023SJohn Marino if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1119e4b17023SJohn Marino return false;
1120e4b17023SJohn Marino }
1121e4b17023SJohn Marino
1122e4b17023SJohn Marino /* If misalignment is known at the compile time then allow peeling
1123e4b17023SJohn Marino only if natural alignment is reachable through peeling. */
1124e4b17023SJohn Marino if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1125e4b17023SJohn Marino {
1126e4b17023SJohn Marino HOST_WIDE_INT elmsize =
1127e4b17023SJohn Marino int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1128e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1129e4b17023SJohn Marino {
1130e4b17023SJohn Marino fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1131e4b17023SJohn Marino fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1132e4b17023SJohn Marino }
1133e4b17023SJohn Marino if (DR_MISALIGNMENT (dr) % elmsize)
1134e4b17023SJohn Marino {
1135e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1136e4b17023SJohn Marino fprintf (vect_dump, "data size does not divide the misalignment.\n");
1137e4b17023SJohn Marino return false;
1138e4b17023SJohn Marino }
1139e4b17023SJohn Marino }
1140e4b17023SJohn Marino
1141e4b17023SJohn Marino if (!known_alignment_for_access_p (dr))
1142e4b17023SJohn Marino {
1143e4b17023SJohn Marino tree type = (TREE_TYPE (DR_REF (dr)));
1144e4b17023SJohn Marino bool is_packed = contains_packed_reference (DR_REF (dr));
1145e4b17023SJohn Marino
1146e4b17023SJohn Marino if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1147e4b17023SJohn Marino is_packed = true;
1148e4b17023SJohn Marino
1149e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1150e4b17023SJohn Marino fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1151e4b17023SJohn Marino if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1152e4b17023SJohn Marino return true;
1153e4b17023SJohn Marino else
1154e4b17023SJohn Marino return false;
1155e4b17023SJohn Marino }
1156e4b17023SJohn Marino
1157e4b17023SJohn Marino return true;
1158e4b17023SJohn Marino }
1159e4b17023SJohn Marino
1160e4b17023SJohn Marino
1161e4b17023SJohn Marino /* Calculate the cost of the memory access represented by DR. */
1162e4b17023SJohn Marino
1163e4b17023SJohn Marino static void
vect_get_data_access_cost(struct data_reference * dr,unsigned int * inside_cost,unsigned int * outside_cost)1164e4b17023SJohn Marino vect_get_data_access_cost (struct data_reference *dr,
1165e4b17023SJohn Marino unsigned int *inside_cost,
1166e4b17023SJohn Marino unsigned int *outside_cost)
1167e4b17023SJohn Marino {
1168e4b17023SJohn Marino gimple stmt = DR_STMT (dr);
1169e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1170e4b17023SJohn Marino int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1171e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1172e4b17023SJohn Marino int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173e4b17023SJohn Marino int ncopies = vf / nunits;
1174e4b17023SJohn Marino bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1175e4b17023SJohn Marino
1176e4b17023SJohn Marino if (!supportable_dr_alignment)
1177e4b17023SJohn Marino *inside_cost = VECT_MAX_COST;
1178e4b17023SJohn Marino else
1179e4b17023SJohn Marino {
1180e4b17023SJohn Marino if (DR_IS_READ (dr))
1181e4b17023SJohn Marino vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1182e4b17023SJohn Marino else
1183e4b17023SJohn Marino vect_get_store_cost (dr, ncopies, inside_cost);
1184e4b17023SJohn Marino }
1185e4b17023SJohn Marino
1186e4b17023SJohn Marino if (vect_print_dump_info (REPORT_COST))
1187e4b17023SJohn Marino fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1188e4b17023SJohn Marino "outside_cost = %d.", *inside_cost, *outside_cost);
1189e4b17023SJohn Marino }
1190e4b17023SJohn Marino
1191e4b17023SJohn Marino
1192e4b17023SJohn Marino static hashval_t
vect_peeling_hash(const void * elem)1193e4b17023SJohn Marino vect_peeling_hash (const void *elem)
1194e4b17023SJohn Marino {
1195e4b17023SJohn Marino const struct _vect_peel_info *peel_info;
1196e4b17023SJohn Marino
1197e4b17023SJohn Marino peel_info = (const struct _vect_peel_info *) elem;
1198e4b17023SJohn Marino return (hashval_t) peel_info->npeel;
1199e4b17023SJohn Marino }
1200e4b17023SJohn Marino
1201e4b17023SJohn Marino
1202e4b17023SJohn Marino static int
vect_peeling_hash_eq(const void * elem1,const void * elem2)1203e4b17023SJohn Marino vect_peeling_hash_eq (const void *elem1, const void *elem2)
1204e4b17023SJohn Marino {
1205e4b17023SJohn Marino const struct _vect_peel_info *a, *b;
1206e4b17023SJohn Marino
1207e4b17023SJohn Marino a = (const struct _vect_peel_info *) elem1;
1208e4b17023SJohn Marino b = (const struct _vect_peel_info *) elem2;
1209e4b17023SJohn Marino return (a->npeel == b->npeel);
1210e4b17023SJohn Marino }
1211e4b17023SJohn Marino
1212e4b17023SJohn Marino
1213e4b17023SJohn Marino /* Insert DR into peeling hash table with NPEEL as key. */
1214e4b17023SJohn Marino
1215e4b17023SJohn Marino static void
vect_peeling_hash_insert(loop_vec_info loop_vinfo,struct data_reference * dr,int npeel)1216e4b17023SJohn Marino vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1217e4b17023SJohn Marino int npeel)
1218e4b17023SJohn Marino {
1219e4b17023SJohn Marino struct _vect_peel_info elem, *slot;
1220e4b17023SJohn Marino void **new_slot;
1221e4b17023SJohn Marino bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1222e4b17023SJohn Marino
1223e4b17023SJohn Marino elem.npeel = npeel;
1224e4b17023SJohn Marino slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1225e4b17023SJohn Marino &elem);
1226e4b17023SJohn Marino if (slot)
1227e4b17023SJohn Marino slot->count++;
1228e4b17023SJohn Marino else
1229e4b17023SJohn Marino {
1230e4b17023SJohn Marino slot = XNEW (struct _vect_peel_info);
1231e4b17023SJohn Marino slot->npeel = npeel;
1232e4b17023SJohn Marino slot->dr = dr;
1233e4b17023SJohn Marino slot->count = 1;
1234e4b17023SJohn Marino new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1235e4b17023SJohn Marino INSERT);
1236e4b17023SJohn Marino *new_slot = slot;
1237e4b17023SJohn Marino }
1238e4b17023SJohn Marino
1239e4b17023SJohn Marino if (!supportable_dr_alignment && !flag_vect_cost_model)
1240e4b17023SJohn Marino slot->count += VECT_MAX_COST;
1241e4b17023SJohn Marino }
1242e4b17023SJohn Marino
1243e4b17023SJohn Marino
1244e4b17023SJohn Marino /* Traverse peeling hash table to find peeling option that aligns maximum
1245e4b17023SJohn Marino number of data accesses. */
1246e4b17023SJohn Marino
1247e4b17023SJohn Marino static int
vect_peeling_hash_get_most_frequent(void ** slot,void * data)1248e4b17023SJohn Marino vect_peeling_hash_get_most_frequent (void **slot, void *data)
1249e4b17023SJohn Marino {
1250e4b17023SJohn Marino vect_peel_info elem = (vect_peel_info) *slot;
1251e4b17023SJohn Marino vect_peel_extended_info max = (vect_peel_extended_info) data;
1252e4b17023SJohn Marino
1253e4b17023SJohn Marino if (elem->count > max->peel_info.count
1254e4b17023SJohn Marino || (elem->count == max->peel_info.count
1255e4b17023SJohn Marino && max->peel_info.npeel > elem->npeel))
1256e4b17023SJohn Marino {
1257e4b17023SJohn Marino max->peel_info.npeel = elem->npeel;
1258e4b17023SJohn Marino max->peel_info.count = elem->count;
1259e4b17023SJohn Marino max->peel_info.dr = elem->dr;
1260e4b17023SJohn Marino }
1261e4b17023SJohn Marino
1262e4b17023SJohn Marino return 1;
1263e4b17023SJohn Marino }
1264e4b17023SJohn Marino
1265e4b17023SJohn Marino
1266e4b17023SJohn Marino /* Traverse peeling hash table and calculate cost for each peeling option.
1267e4b17023SJohn Marino Find the one with the lowest cost. */
1268e4b17023SJohn Marino
1269e4b17023SJohn Marino static int
vect_peeling_hash_get_lowest_cost(void ** slot,void * data)1270e4b17023SJohn Marino vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1271e4b17023SJohn Marino {
1272e4b17023SJohn Marino vect_peel_info elem = (vect_peel_info) *slot;
1273e4b17023SJohn Marino vect_peel_extended_info min = (vect_peel_extended_info) data;
1274e4b17023SJohn Marino int save_misalignment, dummy;
1275e4b17023SJohn Marino unsigned int inside_cost = 0, outside_cost = 0, i;
1276e4b17023SJohn Marino gimple stmt = DR_STMT (elem->dr);
1277e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1278e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1279e4b17023SJohn Marino VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1280e4b17023SJohn Marino struct data_reference *dr;
1281e4b17023SJohn Marino
1282e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1283e4b17023SJohn Marino {
1284e4b17023SJohn Marino stmt = DR_STMT (dr);
1285e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
1286e4b17023SJohn Marino /* For interleaving, only the alignment of the first access
1287e4b17023SJohn Marino matters. */
1288e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1289e4b17023SJohn Marino && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1290e4b17023SJohn Marino continue;
1291e4b17023SJohn Marino
1292e4b17023SJohn Marino save_misalignment = DR_MISALIGNMENT (dr);
1293e4b17023SJohn Marino vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1294e4b17023SJohn Marino vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1295e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr, save_misalignment);
1296e4b17023SJohn Marino }
1297e4b17023SJohn Marino
1298e4b17023SJohn Marino outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
12995ce9237cSJohn Marino vect_get_single_scalar_iteration_cost (loop_vinfo));
1300e4b17023SJohn Marino
1301e4b17023SJohn Marino if (inside_cost < min->inside_cost
1302e4b17023SJohn Marino || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1303e4b17023SJohn Marino {
1304e4b17023SJohn Marino min->inside_cost = inside_cost;
1305e4b17023SJohn Marino min->outside_cost = outside_cost;
1306e4b17023SJohn Marino min->peel_info.dr = elem->dr;
1307e4b17023SJohn Marino min->peel_info.npeel = elem->npeel;
1308e4b17023SJohn Marino }
1309e4b17023SJohn Marino
1310e4b17023SJohn Marino return 1;
1311e4b17023SJohn Marino }
1312e4b17023SJohn Marino
1313e4b17023SJohn Marino
1314e4b17023SJohn Marino /* Choose best peeling option by traversing peeling hash table and either
1315e4b17023SJohn Marino choosing an option with the lowest cost (if cost model is enabled) or the
1316e4b17023SJohn Marino option that aligns as many accesses as possible. */
1317e4b17023SJohn Marino
1318e4b17023SJohn Marino static struct data_reference *
vect_peeling_hash_choose_best_peeling(loop_vec_info loop_vinfo,unsigned int * npeel)1319e4b17023SJohn Marino vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1320e4b17023SJohn Marino unsigned int *npeel)
1321e4b17023SJohn Marino {
1322e4b17023SJohn Marino struct _vect_peel_extended_info res;
1323e4b17023SJohn Marino
1324e4b17023SJohn Marino res.peel_info.dr = NULL;
1325e4b17023SJohn Marino
1326e4b17023SJohn Marino if (flag_vect_cost_model)
1327e4b17023SJohn Marino {
1328e4b17023SJohn Marino res.inside_cost = INT_MAX;
1329e4b17023SJohn Marino res.outside_cost = INT_MAX;
1330e4b17023SJohn Marino htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1331e4b17023SJohn Marino vect_peeling_hash_get_lowest_cost, &res);
1332e4b17023SJohn Marino }
1333e4b17023SJohn Marino else
1334e4b17023SJohn Marino {
1335e4b17023SJohn Marino res.peel_info.count = 0;
1336e4b17023SJohn Marino htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1337e4b17023SJohn Marino vect_peeling_hash_get_most_frequent, &res);
1338e4b17023SJohn Marino }
1339e4b17023SJohn Marino
1340e4b17023SJohn Marino *npeel = res.peel_info.npeel;
1341e4b17023SJohn Marino return res.peel_info.dr;
1342e4b17023SJohn Marino }
1343e4b17023SJohn Marino
1344e4b17023SJohn Marino
1345e4b17023SJohn Marino /* Function vect_enhance_data_refs_alignment
1346e4b17023SJohn Marino
1347e4b17023SJohn Marino This pass will use loop versioning and loop peeling in order to enhance
1348e4b17023SJohn Marino the alignment of data references in the loop.
1349e4b17023SJohn Marino
1350e4b17023SJohn Marino FOR NOW: we assume that whatever versioning/peeling takes place, only the
1351e4b17023SJohn Marino original loop is to be vectorized. Any other loops that are created by
1352e4b17023SJohn Marino the transformations performed in this pass - are not supposed to be
1353e4b17023SJohn Marino vectorized. This restriction will be relaxed.
1354e4b17023SJohn Marino
1355e4b17023SJohn Marino This pass will require a cost model to guide it whether to apply peeling
1356e4b17023SJohn Marino or versioning or a combination of the two. For example, the scheme that
1357e4b17023SJohn Marino intel uses when given a loop with several memory accesses, is as follows:
1358e4b17023SJohn Marino choose one memory access ('p') which alignment you want to force by doing
1359e4b17023SJohn Marino peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1360e4b17023SJohn Marino other accesses are not necessarily aligned, or (2) use loop versioning to
1361e4b17023SJohn Marino generate one loop in which all accesses are aligned, and another loop in
1362e4b17023SJohn Marino which only 'p' is necessarily aligned.
1363e4b17023SJohn Marino
1364e4b17023SJohn Marino ("Automatic Intra-Register Vectorization for the Intel Architecture",
1365e4b17023SJohn Marino Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1366e4b17023SJohn Marino Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1367e4b17023SJohn Marino
1368e4b17023SJohn Marino Devising a cost model is the most critical aspect of this work. It will
1369e4b17023SJohn Marino guide us on which access to peel for, whether to use loop versioning, how
1370e4b17023SJohn Marino many versions to create, etc. The cost model will probably consist of
1371e4b17023SJohn Marino generic considerations as well as target specific considerations (on
1372e4b17023SJohn Marino powerpc for example, misaligned stores are more painful than misaligned
1373e4b17023SJohn Marino loads).
1374e4b17023SJohn Marino
1375e4b17023SJohn Marino Here are the general steps involved in alignment enhancements:
1376e4b17023SJohn Marino
1377e4b17023SJohn Marino -- original loop, before alignment analysis:
1378e4b17023SJohn Marino for (i=0; i<N; i++){
1379e4b17023SJohn Marino x = q[i]; # DR_MISALIGNMENT(q) = unknown
1380e4b17023SJohn Marino p[i] = y; # DR_MISALIGNMENT(p) = unknown
1381e4b17023SJohn Marino }
1382e4b17023SJohn Marino
1383e4b17023SJohn Marino -- After vect_compute_data_refs_alignment:
1384e4b17023SJohn Marino for (i=0; i<N; i++){
1385e4b17023SJohn Marino x = q[i]; # DR_MISALIGNMENT(q) = 3
1386e4b17023SJohn Marino p[i] = y; # DR_MISALIGNMENT(p) = unknown
1387e4b17023SJohn Marino }
1388e4b17023SJohn Marino
1389e4b17023SJohn Marino -- Possibility 1: we do loop versioning:
1390e4b17023SJohn Marino if (p is aligned) {
1391e4b17023SJohn Marino for (i=0; i<N; i++){ # loop 1A
1392e4b17023SJohn Marino x = q[i]; # DR_MISALIGNMENT(q) = 3
1393e4b17023SJohn Marino p[i] = y; # DR_MISALIGNMENT(p) = 0
1394e4b17023SJohn Marino }
1395e4b17023SJohn Marino }
1396e4b17023SJohn Marino else {
1397e4b17023SJohn Marino for (i=0; i<N; i++){ # loop 1B
1398e4b17023SJohn Marino x = q[i]; # DR_MISALIGNMENT(q) = 3
1399e4b17023SJohn Marino p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1400e4b17023SJohn Marino }
1401e4b17023SJohn Marino }
1402e4b17023SJohn Marino
1403e4b17023SJohn Marino -- Possibility 2: we do loop peeling:
1404e4b17023SJohn Marino for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1405e4b17023SJohn Marino x = q[i];
1406e4b17023SJohn Marino p[i] = y;
1407e4b17023SJohn Marino }
1408e4b17023SJohn Marino for (i = 3; i < N; i++){ # loop 2A
1409e4b17023SJohn Marino x = q[i]; # DR_MISALIGNMENT(q) = 0
1410e4b17023SJohn Marino p[i] = y; # DR_MISALIGNMENT(p) = unknown
1411e4b17023SJohn Marino }
1412e4b17023SJohn Marino
1413e4b17023SJohn Marino -- Possibility 3: combination of loop peeling and versioning:
1414e4b17023SJohn Marino for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1415e4b17023SJohn Marino x = q[i];
1416e4b17023SJohn Marino p[i] = y;
1417e4b17023SJohn Marino }
1418e4b17023SJohn Marino if (p is aligned) {
1419e4b17023SJohn Marino for (i = 3; i<N; i++){ # loop 3A
1420e4b17023SJohn Marino x = q[i]; # DR_MISALIGNMENT(q) = 0
1421e4b17023SJohn Marino p[i] = y; # DR_MISALIGNMENT(p) = 0
1422e4b17023SJohn Marino }
1423e4b17023SJohn Marino }
1424e4b17023SJohn Marino else {
1425e4b17023SJohn Marino for (i = 3; i<N; i++){ # loop 3B
1426e4b17023SJohn Marino x = q[i]; # DR_MISALIGNMENT(q) = 0
1427e4b17023SJohn Marino p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1428e4b17023SJohn Marino }
1429e4b17023SJohn Marino }
1430e4b17023SJohn Marino
1431e4b17023SJohn Marino These loops are later passed to loop_transform to be vectorized. The
1432e4b17023SJohn Marino vectorizer will use the alignment information to guide the transformation
1433e4b17023SJohn Marino (whether to generate regular loads/stores, or with special handling for
1434e4b17023SJohn Marino misalignment). */
1435e4b17023SJohn Marino
1436e4b17023SJohn Marino bool
vect_enhance_data_refs_alignment(loop_vec_info loop_vinfo)1437e4b17023SJohn Marino vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1438e4b17023SJohn Marino {
1439e4b17023SJohn Marino VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1440e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1441e4b17023SJohn Marino enum dr_alignment_support supportable_dr_alignment;
1442e4b17023SJohn Marino struct data_reference *dr0 = NULL, *first_store = NULL;
1443e4b17023SJohn Marino struct data_reference *dr;
1444e4b17023SJohn Marino unsigned int i, j;
1445e4b17023SJohn Marino bool do_peeling = false;
1446e4b17023SJohn Marino bool do_versioning = false;
1447e4b17023SJohn Marino bool stat;
1448e4b17023SJohn Marino gimple stmt;
1449e4b17023SJohn Marino stmt_vec_info stmt_info;
1450e4b17023SJohn Marino int vect_versioning_for_alias_required;
1451e4b17023SJohn Marino unsigned int npeel = 0;
1452e4b17023SJohn Marino bool all_misalignments_unknown = true;
1453e4b17023SJohn Marino unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1454e4b17023SJohn Marino unsigned possible_npeel_number = 1;
1455e4b17023SJohn Marino tree vectype;
1456e4b17023SJohn Marino unsigned int nelements, mis, same_align_drs_max = 0;
1457e4b17023SJohn Marino
1458e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1459e4b17023SJohn Marino fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1460e4b17023SJohn Marino
1461e4b17023SJohn Marino /* While cost model enhancements are expected in the future, the high level
1462e4b17023SJohn Marino view of the code at this time is as follows:
1463e4b17023SJohn Marino
1464e4b17023SJohn Marino A) If there is a misaligned access then see if peeling to align
1465e4b17023SJohn Marino this access can make all data references satisfy
1466e4b17023SJohn Marino vect_supportable_dr_alignment. If so, update data structures
1467e4b17023SJohn Marino as needed and return true.
1468e4b17023SJohn Marino
1469e4b17023SJohn Marino B) If peeling wasn't possible and there is a data reference with an
1470e4b17023SJohn Marino unknown misalignment that does not satisfy vect_supportable_dr_alignment
1471e4b17023SJohn Marino then see if loop versioning checks can be used to make all data
1472e4b17023SJohn Marino references satisfy vect_supportable_dr_alignment. If so, update
1473e4b17023SJohn Marino data structures as needed and return true.
1474e4b17023SJohn Marino
1475e4b17023SJohn Marino C) If neither peeling nor versioning were successful then return false if
1476e4b17023SJohn Marino any data reference does not satisfy vect_supportable_dr_alignment.
1477e4b17023SJohn Marino
1478e4b17023SJohn Marino D) Return true (all data references satisfy vect_supportable_dr_alignment).
1479e4b17023SJohn Marino
1480e4b17023SJohn Marino Note, Possibility 3 above (which is peeling and versioning together) is not
1481e4b17023SJohn Marino being done at this time. */
1482e4b17023SJohn Marino
1483e4b17023SJohn Marino /* (1) Peeling to force alignment. */
1484e4b17023SJohn Marino
1485e4b17023SJohn Marino /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1486e4b17023SJohn Marino Considerations:
1487e4b17023SJohn Marino + How many accesses will become aligned due to the peeling
1488e4b17023SJohn Marino - How many accesses will become unaligned due to the peeling,
1489e4b17023SJohn Marino and the cost of misaligned accesses.
1490e4b17023SJohn Marino - The cost of peeling (the extra runtime checks, the increase
1491e4b17023SJohn Marino in code size). */
1492e4b17023SJohn Marino
1493e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1494e4b17023SJohn Marino {
1495e4b17023SJohn Marino stmt = DR_STMT (dr);
1496e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
1497e4b17023SJohn Marino
1498e4b17023SJohn Marino if (!STMT_VINFO_RELEVANT (stmt_info))
1499e4b17023SJohn Marino continue;
1500e4b17023SJohn Marino
1501e4b17023SJohn Marino /* For interleaving, only the alignment of the first access
1502e4b17023SJohn Marino matters. */
1503e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1504e4b17023SJohn Marino && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1505e4b17023SJohn Marino continue;
1506e4b17023SJohn Marino
1507e4b17023SJohn Marino /* For invariant accesses there is nothing to enhance. */
1508e4b17023SJohn Marino if (integer_zerop (DR_STEP (dr)))
1509e4b17023SJohn Marino continue;
1510e4b17023SJohn Marino
1511e4b17023SJohn Marino supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1512e4b17023SJohn Marino do_peeling = vector_alignment_reachable_p (dr);
1513e4b17023SJohn Marino if (do_peeling)
1514e4b17023SJohn Marino {
1515e4b17023SJohn Marino if (known_alignment_for_access_p (dr))
1516e4b17023SJohn Marino {
1517e4b17023SJohn Marino unsigned int npeel_tmp;
1518e4b17023SJohn Marino bool negative = tree_int_cst_compare (DR_STEP (dr),
1519e4b17023SJohn Marino size_zero_node) < 0;
1520e4b17023SJohn Marino
1521e4b17023SJohn Marino /* Save info about DR in the hash table. */
1522e4b17023SJohn Marino if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1523e4b17023SJohn Marino LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1524e4b17023SJohn Marino htab_create (1, vect_peeling_hash,
1525e4b17023SJohn Marino vect_peeling_hash_eq, free);
1526e4b17023SJohn Marino
1527e4b17023SJohn Marino vectype = STMT_VINFO_VECTYPE (stmt_info);
1528e4b17023SJohn Marino nelements = TYPE_VECTOR_SUBPARTS (vectype);
1529e4b17023SJohn Marino mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1530e4b17023SJohn Marino TREE_TYPE (DR_REF (dr))));
1531e4b17023SJohn Marino npeel_tmp = (negative
1532e4b17023SJohn Marino ? (mis - nelements) : (nelements - mis))
1533e4b17023SJohn Marino & (nelements - 1);
1534e4b17023SJohn Marino
1535e4b17023SJohn Marino /* For multiple types, it is possible that the bigger type access
1536e4b17023SJohn Marino will have more than one peeling option. E.g., a loop with two
1537e4b17023SJohn Marino types: one of size (vector size / 4), and the other one of
1538e4b17023SJohn Marino size (vector size / 8). Vectorization factor will 8. If both
1539e4b17023SJohn Marino access are misaligned by 3, the first one needs one scalar
1540e4b17023SJohn Marino iteration to be aligned, and the second one needs 5. But the
1541e4b17023SJohn Marino the first one will be aligned also by peeling 5 scalar
1542e4b17023SJohn Marino iterations, and in that case both accesses will be aligned.
1543e4b17023SJohn Marino Hence, except for the immediate peeling amount, we also want
1544e4b17023SJohn Marino to try to add full vector size, while we don't exceed
1545e4b17023SJohn Marino vectorization factor.
1546e4b17023SJohn Marino We do this automtically for cost model, since we calculate cost
1547e4b17023SJohn Marino for every peeling option. */
1548e4b17023SJohn Marino if (!flag_vect_cost_model)
1549e4b17023SJohn Marino possible_npeel_number = vf /nelements;
1550e4b17023SJohn Marino
1551e4b17023SJohn Marino /* Handle the aligned case. We may decide to align some other
1552e4b17023SJohn Marino access, making DR unaligned. */
1553e4b17023SJohn Marino if (DR_MISALIGNMENT (dr) == 0)
1554e4b17023SJohn Marino {
1555e4b17023SJohn Marino npeel_tmp = 0;
1556e4b17023SJohn Marino if (!flag_vect_cost_model)
1557e4b17023SJohn Marino possible_npeel_number++;
1558e4b17023SJohn Marino }
1559e4b17023SJohn Marino
1560e4b17023SJohn Marino for (j = 0; j < possible_npeel_number; j++)
1561e4b17023SJohn Marino {
1562e4b17023SJohn Marino gcc_assert (npeel_tmp <= vf);
1563e4b17023SJohn Marino vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1564e4b17023SJohn Marino npeel_tmp += nelements;
1565e4b17023SJohn Marino }
1566e4b17023SJohn Marino
1567e4b17023SJohn Marino all_misalignments_unknown = false;
1568e4b17023SJohn Marino /* Data-ref that was chosen for the case that all the
1569e4b17023SJohn Marino misalignments are unknown is not relevant anymore, since we
1570e4b17023SJohn Marino have a data-ref with known alignment. */
1571e4b17023SJohn Marino dr0 = NULL;
1572e4b17023SJohn Marino }
1573e4b17023SJohn Marino else
1574e4b17023SJohn Marino {
1575e4b17023SJohn Marino /* If we don't know all the misalignment values, we prefer
1576e4b17023SJohn Marino peeling for data-ref that has maximum number of data-refs
1577e4b17023SJohn Marino with the same alignment, unless the target prefers to align
1578e4b17023SJohn Marino stores over load. */
1579e4b17023SJohn Marino if (all_misalignments_unknown)
1580e4b17023SJohn Marino {
1581e4b17023SJohn Marino if (same_align_drs_max < VEC_length (dr_p,
1582e4b17023SJohn Marino STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1583e4b17023SJohn Marino || !dr0)
1584e4b17023SJohn Marino {
1585e4b17023SJohn Marino same_align_drs_max = VEC_length (dr_p,
1586e4b17023SJohn Marino STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1587e4b17023SJohn Marino dr0 = dr;
1588e4b17023SJohn Marino }
1589e4b17023SJohn Marino
1590e4b17023SJohn Marino if (!first_store && DR_IS_WRITE (dr))
1591e4b17023SJohn Marino first_store = dr;
1592e4b17023SJohn Marino }
1593e4b17023SJohn Marino
1594e4b17023SJohn Marino /* If there are both known and unknown misaligned accesses in the
1595e4b17023SJohn Marino loop, we choose peeling amount according to the known
1596e4b17023SJohn Marino accesses. */
1597e4b17023SJohn Marino
1598e4b17023SJohn Marino
1599e4b17023SJohn Marino if (!supportable_dr_alignment)
1600e4b17023SJohn Marino {
1601e4b17023SJohn Marino dr0 = dr;
1602e4b17023SJohn Marino if (!first_store && DR_IS_WRITE (dr))
1603e4b17023SJohn Marino first_store = dr;
1604e4b17023SJohn Marino }
1605e4b17023SJohn Marino }
1606e4b17023SJohn Marino }
1607e4b17023SJohn Marino else
1608e4b17023SJohn Marino {
1609e4b17023SJohn Marino if (!aligned_access_p (dr))
1610e4b17023SJohn Marino {
1611e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1612e4b17023SJohn Marino fprintf (vect_dump, "vector alignment may not be reachable");
1613e4b17023SJohn Marino
1614e4b17023SJohn Marino break;
1615e4b17023SJohn Marino }
1616e4b17023SJohn Marino }
1617e4b17023SJohn Marino }
1618e4b17023SJohn Marino
1619e4b17023SJohn Marino vect_versioning_for_alias_required
1620e4b17023SJohn Marino = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1621e4b17023SJohn Marino
1622e4b17023SJohn Marino /* Temporarily, if versioning for alias is required, we disable peeling
1623e4b17023SJohn Marino until we support peeling and versioning. Often peeling for alignment
1624e4b17023SJohn Marino will require peeling for loop-bound, which in turn requires that we
1625e4b17023SJohn Marino know how to adjust the loop ivs after the loop. */
1626e4b17023SJohn Marino if (vect_versioning_for_alias_required
1627e4b17023SJohn Marino || !vect_can_advance_ivs_p (loop_vinfo)
1628e4b17023SJohn Marino || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1629e4b17023SJohn Marino do_peeling = false;
1630e4b17023SJohn Marino
1631e4b17023SJohn Marino if (do_peeling && all_misalignments_unknown
1632e4b17023SJohn Marino && vect_supportable_dr_alignment (dr0, false))
1633e4b17023SJohn Marino {
1634e4b17023SJohn Marino
1635e4b17023SJohn Marino /* Check if the target requires to prefer stores over loads, i.e., if
1636e4b17023SJohn Marino misaligned stores are more expensive than misaligned loads (taking
1637e4b17023SJohn Marino drs with same alignment into account). */
1638e4b17023SJohn Marino if (first_store && DR_IS_READ (dr0))
1639e4b17023SJohn Marino {
1640e4b17023SJohn Marino unsigned int load_inside_cost = 0, load_outside_cost = 0;
1641e4b17023SJohn Marino unsigned int store_inside_cost = 0, store_outside_cost = 0;
1642e4b17023SJohn Marino unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1643e4b17023SJohn Marino unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1644e4b17023SJohn Marino
1645e4b17023SJohn Marino vect_get_data_access_cost (dr0, &load_inside_cost,
1646e4b17023SJohn Marino &load_outside_cost);
1647e4b17023SJohn Marino vect_get_data_access_cost (first_store, &store_inside_cost,
1648e4b17023SJohn Marino &store_outside_cost);
1649e4b17023SJohn Marino
1650e4b17023SJohn Marino /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1651e4b17023SJohn Marino aligning the load DR0). */
1652e4b17023SJohn Marino load_inside_penalty = store_inside_cost;
1653e4b17023SJohn Marino load_outside_penalty = store_outside_cost;
1654e4b17023SJohn Marino for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1655e4b17023SJohn Marino (vinfo_for_stmt (DR_STMT (first_store))),
1656e4b17023SJohn Marino i, dr);
1657e4b17023SJohn Marino i++)
1658e4b17023SJohn Marino if (DR_IS_READ (dr))
1659e4b17023SJohn Marino {
1660e4b17023SJohn Marino load_inside_penalty += load_inside_cost;
1661e4b17023SJohn Marino load_outside_penalty += load_outside_cost;
1662e4b17023SJohn Marino }
1663e4b17023SJohn Marino else
1664e4b17023SJohn Marino {
1665e4b17023SJohn Marino load_inside_penalty += store_inside_cost;
1666e4b17023SJohn Marino load_outside_penalty += store_outside_cost;
1667e4b17023SJohn Marino }
1668e4b17023SJohn Marino
1669e4b17023SJohn Marino /* Calculate the penalty for leaving DR0 unaligned (by
1670e4b17023SJohn Marino aligning the FIRST_STORE). */
1671e4b17023SJohn Marino store_inside_penalty = load_inside_cost;
1672e4b17023SJohn Marino store_outside_penalty = load_outside_cost;
1673e4b17023SJohn Marino for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1674e4b17023SJohn Marino (vinfo_for_stmt (DR_STMT (dr0))),
1675e4b17023SJohn Marino i, dr);
1676e4b17023SJohn Marino i++)
1677e4b17023SJohn Marino if (DR_IS_READ (dr))
1678e4b17023SJohn Marino {
1679e4b17023SJohn Marino store_inside_penalty += load_inside_cost;
1680e4b17023SJohn Marino store_outside_penalty += load_outside_cost;
1681e4b17023SJohn Marino }
1682e4b17023SJohn Marino else
1683e4b17023SJohn Marino {
1684e4b17023SJohn Marino store_inside_penalty += store_inside_cost;
1685e4b17023SJohn Marino store_outside_penalty += store_outside_cost;
1686e4b17023SJohn Marino }
1687e4b17023SJohn Marino
1688e4b17023SJohn Marino if (load_inside_penalty > store_inside_penalty
1689e4b17023SJohn Marino || (load_inside_penalty == store_inside_penalty
1690e4b17023SJohn Marino && load_outside_penalty > store_outside_penalty))
1691e4b17023SJohn Marino dr0 = first_store;
1692e4b17023SJohn Marino }
1693e4b17023SJohn Marino
1694e4b17023SJohn Marino /* In case there are only loads with different unknown misalignments, use
1695e4b17023SJohn Marino peeling only if it may help to align other accesses in the loop. */
1696e4b17023SJohn Marino if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1697e4b17023SJohn Marino (vinfo_for_stmt (DR_STMT (dr0))))
1698e4b17023SJohn Marino && vect_supportable_dr_alignment (dr0, false)
1699e4b17023SJohn Marino != dr_unaligned_supported)
1700e4b17023SJohn Marino do_peeling = false;
1701e4b17023SJohn Marino }
1702e4b17023SJohn Marino
1703e4b17023SJohn Marino if (do_peeling && !dr0)
1704e4b17023SJohn Marino {
1705e4b17023SJohn Marino /* Peeling is possible, but there is no data access that is not supported
1706e4b17023SJohn Marino unless aligned. So we try to choose the best possible peeling. */
1707e4b17023SJohn Marino
1708e4b17023SJohn Marino /* We should get here only if there are drs with known misalignment. */
1709e4b17023SJohn Marino gcc_assert (!all_misalignments_unknown);
1710e4b17023SJohn Marino
1711e4b17023SJohn Marino /* Choose the best peeling from the hash table. */
1712e4b17023SJohn Marino dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1713e4b17023SJohn Marino if (!dr0 || !npeel)
1714e4b17023SJohn Marino do_peeling = false;
1715e4b17023SJohn Marino }
1716e4b17023SJohn Marino
1717e4b17023SJohn Marino if (do_peeling)
1718e4b17023SJohn Marino {
1719e4b17023SJohn Marino stmt = DR_STMT (dr0);
1720e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
1721e4b17023SJohn Marino vectype = STMT_VINFO_VECTYPE (stmt_info);
1722e4b17023SJohn Marino nelements = TYPE_VECTOR_SUBPARTS (vectype);
1723e4b17023SJohn Marino
1724e4b17023SJohn Marino if (known_alignment_for_access_p (dr0))
1725e4b17023SJohn Marino {
1726e4b17023SJohn Marino bool negative = tree_int_cst_compare (DR_STEP (dr0),
1727e4b17023SJohn Marino size_zero_node) < 0;
1728e4b17023SJohn Marino if (!npeel)
1729e4b17023SJohn Marino {
1730e4b17023SJohn Marino /* Since it's known at compile time, compute the number of
1731e4b17023SJohn Marino iterations in the peeled loop (the peeling factor) for use in
1732e4b17023SJohn Marino updating DR_MISALIGNMENT values. The peeling factor is the
1733e4b17023SJohn Marino vectorization factor minus the misalignment as an element
1734e4b17023SJohn Marino count. */
1735e4b17023SJohn Marino mis = DR_MISALIGNMENT (dr0);
1736e4b17023SJohn Marino mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1737e4b17023SJohn Marino npeel = ((negative ? mis - nelements : nelements - mis)
1738e4b17023SJohn Marino & (nelements - 1));
1739e4b17023SJohn Marino }
1740e4b17023SJohn Marino
1741e4b17023SJohn Marino /* For interleaved data access every iteration accesses all the
1742e4b17023SJohn Marino members of the group, therefore we divide the number of iterations
1743e4b17023SJohn Marino by the group size. */
1744e4b17023SJohn Marino stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1745e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1746e4b17023SJohn Marino npeel /= GROUP_SIZE (stmt_info);
1747e4b17023SJohn Marino
1748e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1749e4b17023SJohn Marino fprintf (vect_dump, "Try peeling by %d", npeel);
1750e4b17023SJohn Marino }
1751e4b17023SJohn Marino
1752e4b17023SJohn Marino /* Ensure that all data refs can be vectorized after the peel. */
1753e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1754e4b17023SJohn Marino {
1755e4b17023SJohn Marino int save_misalignment;
1756e4b17023SJohn Marino
1757e4b17023SJohn Marino if (dr == dr0)
1758e4b17023SJohn Marino continue;
1759e4b17023SJohn Marino
1760e4b17023SJohn Marino stmt = DR_STMT (dr);
1761e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
1762e4b17023SJohn Marino /* For interleaving, only the alignment of the first access
1763e4b17023SJohn Marino matters. */
1764e4b17023SJohn Marino if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1765e4b17023SJohn Marino && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1766e4b17023SJohn Marino continue;
1767e4b17023SJohn Marino
1768e4b17023SJohn Marino save_misalignment = DR_MISALIGNMENT (dr);
1769e4b17023SJohn Marino vect_update_misalignment_for_peel (dr, dr0, npeel);
1770e4b17023SJohn Marino supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1771e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr, save_misalignment);
1772e4b17023SJohn Marino
1773e4b17023SJohn Marino if (!supportable_dr_alignment)
1774e4b17023SJohn Marino {
1775e4b17023SJohn Marino do_peeling = false;
1776e4b17023SJohn Marino break;
1777e4b17023SJohn Marino }
1778e4b17023SJohn Marino }
1779e4b17023SJohn Marino
1780e4b17023SJohn Marino if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1781e4b17023SJohn Marino {
1782e4b17023SJohn Marino stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1783e4b17023SJohn Marino if (!stat)
1784e4b17023SJohn Marino do_peeling = false;
1785e4b17023SJohn Marino else
1786e4b17023SJohn Marino return stat;
1787e4b17023SJohn Marino }
1788e4b17023SJohn Marino
1789e4b17023SJohn Marino if (do_peeling)
1790e4b17023SJohn Marino {
1791e4b17023SJohn Marino /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1792e4b17023SJohn Marino If the misalignment of DR_i is identical to that of dr0 then set
1793e4b17023SJohn Marino DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1794e4b17023SJohn Marino dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1795e4b17023SJohn Marino by the peeling factor times the element size of DR_i (MOD the
1796e4b17023SJohn Marino vectorization factor times the size). Otherwise, the
1797e4b17023SJohn Marino misalignment of DR_i must be set to unknown. */
1798e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1799e4b17023SJohn Marino if (dr != dr0)
1800e4b17023SJohn Marino vect_update_misalignment_for_peel (dr, dr0, npeel);
1801e4b17023SJohn Marino
1802e4b17023SJohn Marino LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1803e4b17023SJohn Marino if (npeel)
1804e4b17023SJohn Marino LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1805e4b17023SJohn Marino else
1806e4b17023SJohn Marino LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1807e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr0, 0);
1808e4b17023SJohn Marino if (vect_print_dump_info (REPORT_ALIGNMENT))
1809e4b17023SJohn Marino fprintf (vect_dump, "Alignment of access forced using peeling.");
1810e4b17023SJohn Marino
1811e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1812e4b17023SJohn Marino fprintf (vect_dump, "Peeling for alignment will be applied.");
1813e4b17023SJohn Marino
1814e4b17023SJohn Marino stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1815e4b17023SJohn Marino gcc_assert (stat);
1816e4b17023SJohn Marino return stat;
1817e4b17023SJohn Marino }
1818e4b17023SJohn Marino }
1819e4b17023SJohn Marino
1820e4b17023SJohn Marino
1821e4b17023SJohn Marino /* (2) Versioning to force alignment. */
1822e4b17023SJohn Marino
1823e4b17023SJohn Marino /* Try versioning if:
1824e4b17023SJohn Marino 1) flag_tree_vect_loop_version is TRUE
1825e4b17023SJohn Marino 2) optimize loop for speed
1826e4b17023SJohn Marino 3) there is at least one unsupported misaligned data ref with an unknown
1827e4b17023SJohn Marino misalignment, and
1828e4b17023SJohn Marino 4) all misaligned data refs with a known misalignment are supported, and
1829e4b17023SJohn Marino 5) the number of runtime alignment checks is within reason. */
1830e4b17023SJohn Marino
1831e4b17023SJohn Marino do_versioning =
1832e4b17023SJohn Marino flag_tree_vect_loop_version
1833e4b17023SJohn Marino && optimize_loop_nest_for_speed_p (loop)
1834e4b17023SJohn Marino && (!loop->inner); /* FORNOW */
1835e4b17023SJohn Marino
1836e4b17023SJohn Marino if (do_versioning)
1837e4b17023SJohn Marino {
1838e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1839e4b17023SJohn Marino {
1840e4b17023SJohn Marino stmt = DR_STMT (dr);
1841e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
1842e4b17023SJohn Marino
1843e4b17023SJohn Marino /* For interleaving, only the alignment of the first access
1844e4b17023SJohn Marino matters. */
1845e4b17023SJohn Marino if (aligned_access_p (dr)
1846e4b17023SJohn Marino || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1847e4b17023SJohn Marino && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1848e4b17023SJohn Marino continue;
1849e4b17023SJohn Marino
1850e4b17023SJohn Marino supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1851e4b17023SJohn Marino
1852e4b17023SJohn Marino if (!supportable_dr_alignment)
1853e4b17023SJohn Marino {
1854e4b17023SJohn Marino gimple stmt;
1855e4b17023SJohn Marino int mask;
1856e4b17023SJohn Marino tree vectype;
1857e4b17023SJohn Marino
1858e4b17023SJohn Marino if (known_alignment_for_access_p (dr)
1859e4b17023SJohn Marino || VEC_length (gimple,
1860e4b17023SJohn Marino LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1861e4b17023SJohn Marino >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1862e4b17023SJohn Marino {
1863e4b17023SJohn Marino do_versioning = false;
1864e4b17023SJohn Marino break;
1865e4b17023SJohn Marino }
1866e4b17023SJohn Marino
1867e4b17023SJohn Marino stmt = DR_STMT (dr);
1868e4b17023SJohn Marino vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1869e4b17023SJohn Marino gcc_assert (vectype);
1870e4b17023SJohn Marino
1871e4b17023SJohn Marino /* The rightmost bits of an aligned address must be zeros.
1872e4b17023SJohn Marino Construct the mask needed for this test. For example,
1873e4b17023SJohn Marino GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1874e4b17023SJohn Marino mask must be 15 = 0xf. */
1875e4b17023SJohn Marino mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1876e4b17023SJohn Marino
1877e4b17023SJohn Marino /* FORNOW: use the same mask to test all potentially unaligned
1878e4b17023SJohn Marino references in the loop. The vectorizer currently supports
1879e4b17023SJohn Marino a single vector size, see the reference to
1880e4b17023SJohn Marino GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1881e4b17023SJohn Marino vectorization factor is computed. */
1882e4b17023SJohn Marino gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1883e4b17023SJohn Marino || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1884e4b17023SJohn Marino LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1885e4b17023SJohn Marino VEC_safe_push (gimple, heap,
1886e4b17023SJohn Marino LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1887e4b17023SJohn Marino DR_STMT (dr));
1888e4b17023SJohn Marino }
1889e4b17023SJohn Marino }
1890e4b17023SJohn Marino
1891e4b17023SJohn Marino /* Versioning requires at least one misaligned data reference. */
1892e4b17023SJohn Marino if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1893e4b17023SJohn Marino do_versioning = false;
1894e4b17023SJohn Marino else if (!do_versioning)
1895e4b17023SJohn Marino VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1896e4b17023SJohn Marino }
1897e4b17023SJohn Marino
1898e4b17023SJohn Marino if (do_versioning)
1899e4b17023SJohn Marino {
1900e4b17023SJohn Marino VEC(gimple,heap) *may_misalign_stmts
1901e4b17023SJohn Marino = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1902e4b17023SJohn Marino gimple stmt;
1903e4b17023SJohn Marino
1904e4b17023SJohn Marino /* It can now be assumed that the data references in the statements
1905e4b17023SJohn Marino in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1906e4b17023SJohn Marino of the loop being vectorized. */
1907e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1908e4b17023SJohn Marino {
1909e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1910e4b17023SJohn Marino dr = STMT_VINFO_DATA_REF (stmt_info);
1911e4b17023SJohn Marino SET_DR_MISALIGNMENT (dr, 0);
1912e4b17023SJohn Marino if (vect_print_dump_info (REPORT_ALIGNMENT))
1913e4b17023SJohn Marino fprintf (vect_dump, "Alignment of access forced using versioning.");
1914e4b17023SJohn Marino }
1915e4b17023SJohn Marino
1916e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1917e4b17023SJohn Marino fprintf (vect_dump, "Versioning for alignment will be applied.");
1918e4b17023SJohn Marino
1919e4b17023SJohn Marino /* Peeling and versioning can't be done together at this time. */
1920e4b17023SJohn Marino gcc_assert (! (do_peeling && do_versioning));
1921e4b17023SJohn Marino
1922e4b17023SJohn Marino stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1923e4b17023SJohn Marino gcc_assert (stat);
1924e4b17023SJohn Marino return stat;
1925e4b17023SJohn Marino }
1926e4b17023SJohn Marino
1927e4b17023SJohn Marino /* This point is reached if neither peeling nor versioning is being done. */
1928e4b17023SJohn Marino gcc_assert (! (do_peeling || do_versioning));
1929e4b17023SJohn Marino
1930e4b17023SJohn Marino stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1931e4b17023SJohn Marino return stat;
1932e4b17023SJohn Marino }
1933e4b17023SJohn Marino
1934e4b17023SJohn Marino
1935e4b17023SJohn Marino /* Function vect_find_same_alignment_drs.
1936e4b17023SJohn Marino
1937e4b17023SJohn Marino Update group and alignment relations according to the chosen
1938e4b17023SJohn Marino vectorization factor. */
1939e4b17023SJohn Marino
1940e4b17023SJohn Marino static void
vect_find_same_alignment_drs(struct data_dependence_relation * ddr,loop_vec_info loop_vinfo)1941e4b17023SJohn Marino vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1942e4b17023SJohn Marino loop_vec_info loop_vinfo)
1943e4b17023SJohn Marino {
1944e4b17023SJohn Marino unsigned int i;
1945e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1946e4b17023SJohn Marino int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1947e4b17023SJohn Marino struct data_reference *dra = DDR_A (ddr);
1948e4b17023SJohn Marino struct data_reference *drb = DDR_B (ddr);
1949e4b17023SJohn Marino stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1950e4b17023SJohn Marino stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1951e4b17023SJohn Marino int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1952e4b17023SJohn Marino int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1953e4b17023SJohn Marino lambda_vector dist_v;
1954e4b17023SJohn Marino unsigned int loop_depth;
1955e4b17023SJohn Marino
1956e4b17023SJohn Marino if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1957e4b17023SJohn Marino return;
1958e4b17023SJohn Marino
1959e4b17023SJohn Marino if (dra == drb)
1960e4b17023SJohn Marino return;
1961e4b17023SJohn Marino
1962e4b17023SJohn Marino if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1963e4b17023SJohn Marino return;
1964e4b17023SJohn Marino
1965e4b17023SJohn Marino /* Loop-based vectorization and known data dependence. */
1966e4b17023SJohn Marino if (DDR_NUM_DIST_VECTS (ddr) == 0)
1967e4b17023SJohn Marino return;
1968e4b17023SJohn Marino
1969e4b17023SJohn Marino /* Data-dependence analysis reports a distance vector of zero
1970e4b17023SJohn Marino for data-references that overlap only in the first iteration
1971e4b17023SJohn Marino but have different sign step (see PR45764).
1972e4b17023SJohn Marino So as a sanity check require equal DR_STEP. */
1973e4b17023SJohn Marino if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1974e4b17023SJohn Marino return;
1975e4b17023SJohn Marino
1976e4b17023SJohn Marino loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1977e4b17023SJohn Marino FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1978e4b17023SJohn Marino {
1979e4b17023SJohn Marino int dist = dist_v[loop_depth];
1980e4b17023SJohn Marino
1981e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
1982e4b17023SJohn Marino fprintf (vect_dump, "dependence distance = %d.", dist);
1983e4b17023SJohn Marino
1984e4b17023SJohn Marino /* Same loop iteration. */
1985e4b17023SJohn Marino if (dist == 0
1986e4b17023SJohn Marino || (dist % vectorization_factor == 0 && dra_size == drb_size))
1987e4b17023SJohn Marino {
1988e4b17023SJohn Marino /* Two references with distance zero have the same alignment. */
1989e4b17023SJohn Marino VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1990e4b17023SJohn Marino VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1991e4b17023SJohn Marino if (vect_print_dump_info (REPORT_ALIGNMENT))
1992e4b17023SJohn Marino fprintf (vect_dump, "accesses have the same alignment.");
1993e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
1994e4b17023SJohn Marino {
1995e4b17023SJohn Marino fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1996e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1997e4b17023SJohn Marino fprintf (vect_dump, " and ");
1998e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1999e4b17023SJohn Marino }
2000e4b17023SJohn Marino }
2001e4b17023SJohn Marino }
2002e4b17023SJohn Marino }
2003e4b17023SJohn Marino
2004e4b17023SJohn Marino
2005e4b17023SJohn Marino /* Function vect_analyze_data_refs_alignment
2006e4b17023SJohn Marino
2007e4b17023SJohn Marino Analyze the alignment of the data-references in the loop.
2008e4b17023SJohn Marino Return FALSE if a data reference is found that cannot be vectorized. */
2009e4b17023SJohn Marino
2010e4b17023SJohn Marino bool
vect_analyze_data_refs_alignment(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2011e4b17023SJohn Marino vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2012e4b17023SJohn Marino bb_vec_info bb_vinfo)
2013e4b17023SJohn Marino {
2014e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2015e4b17023SJohn Marino fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2016e4b17023SJohn Marino
2017e4b17023SJohn Marino /* Mark groups of data references with same alignment using
2018e4b17023SJohn Marino data dependence information. */
2019e4b17023SJohn Marino if (loop_vinfo)
2020e4b17023SJohn Marino {
2021e4b17023SJohn Marino VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2022e4b17023SJohn Marino struct data_dependence_relation *ddr;
2023e4b17023SJohn Marino unsigned int i;
2024e4b17023SJohn Marino
2025e4b17023SJohn Marino FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2026e4b17023SJohn Marino vect_find_same_alignment_drs (ddr, loop_vinfo);
2027e4b17023SJohn Marino }
2028e4b17023SJohn Marino
2029e4b17023SJohn Marino if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2030e4b17023SJohn Marino {
2031e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2032e4b17023SJohn Marino fprintf (vect_dump,
2033e4b17023SJohn Marino "not vectorized: can't calculate alignment for data ref.");
2034e4b17023SJohn Marino return false;
2035e4b17023SJohn Marino }
2036e4b17023SJohn Marino
2037e4b17023SJohn Marino return true;
2038e4b17023SJohn Marino }
2039e4b17023SJohn Marino
2040e4b17023SJohn Marino
2041e4b17023SJohn Marino /* Analyze groups of strided accesses: check that DR belongs to a group of
2042e4b17023SJohn Marino strided accesses of legal size, step, etc. Detect gaps, single element
2043e4b17023SJohn Marino interleaving, and other special cases. Set strided access info.
2044e4b17023SJohn Marino Collect groups of strided stores for further use in SLP analysis. */
2045e4b17023SJohn Marino
2046e4b17023SJohn Marino static bool
vect_analyze_group_access(struct data_reference * dr)2047e4b17023SJohn Marino vect_analyze_group_access (struct data_reference *dr)
2048e4b17023SJohn Marino {
2049e4b17023SJohn Marino tree step = DR_STEP (dr);
2050e4b17023SJohn Marino tree scalar_type = TREE_TYPE (DR_REF (dr));
2051e4b17023SJohn Marino HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2052e4b17023SJohn Marino gimple stmt = DR_STMT (dr);
2053e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2054e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2055e4b17023SJohn Marino bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2056e4b17023SJohn Marino HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2057e4b17023SJohn Marino HOST_WIDE_INT stride, last_accessed_element = 1;
2058e4b17023SJohn Marino bool slp_impossible = false;
2059e4b17023SJohn Marino struct loop *loop = NULL;
2060e4b17023SJohn Marino
2061e4b17023SJohn Marino if (loop_vinfo)
2062e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
2063e4b17023SJohn Marino
2064e4b17023SJohn Marino /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2065e4b17023SJohn Marino interleaving group (including gaps). */
2066e4b17023SJohn Marino stride = dr_step / type_size;
2067e4b17023SJohn Marino
2068e4b17023SJohn Marino /* Not consecutive access is possible only if it is a part of interleaving. */
2069e4b17023SJohn Marino if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2070e4b17023SJohn Marino {
2071e4b17023SJohn Marino /* Check if it this DR is a part of interleaving, and is a single
2072e4b17023SJohn Marino element of the group that is accessed in the loop. */
2073e4b17023SJohn Marino
2074e4b17023SJohn Marino /* Gaps are supported only for loads. STEP must be a multiple of the type
2075e4b17023SJohn Marino size. The size of the group must be a power of 2. */
2076e4b17023SJohn Marino if (DR_IS_READ (dr)
2077e4b17023SJohn Marino && (dr_step % type_size) == 0
2078e4b17023SJohn Marino && stride > 0
2079e4b17023SJohn Marino && exact_log2 (stride) != -1)
2080e4b17023SJohn Marino {
2081e4b17023SJohn Marino GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2082e4b17023SJohn Marino GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2083e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
2084e4b17023SJohn Marino {
2085e4b17023SJohn Marino fprintf (vect_dump, "Detected single element interleaving ");
2086e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2087e4b17023SJohn Marino fprintf (vect_dump, " step ");
2088e4b17023SJohn Marino print_generic_expr (vect_dump, step, TDF_SLIM);
2089e4b17023SJohn Marino }
2090e4b17023SJohn Marino
2091e4b17023SJohn Marino if (loop_vinfo)
2092e4b17023SJohn Marino {
2093e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2094e4b17023SJohn Marino fprintf (vect_dump, "Data access with gaps requires scalar "
2095e4b17023SJohn Marino "epilogue loop");
2096e4b17023SJohn Marino if (loop->inner)
2097e4b17023SJohn Marino {
2098e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2099e4b17023SJohn Marino fprintf (vect_dump, "Peeling for outer loop is not"
2100e4b17023SJohn Marino " supported");
2101e4b17023SJohn Marino return false;
2102e4b17023SJohn Marino }
2103e4b17023SJohn Marino
2104e4b17023SJohn Marino LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2105e4b17023SJohn Marino }
2106e4b17023SJohn Marino
2107e4b17023SJohn Marino return true;
2108e4b17023SJohn Marino }
2109e4b17023SJohn Marino
2110e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2111e4b17023SJohn Marino {
2112e4b17023SJohn Marino fprintf (vect_dump, "not consecutive access ");
2113e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2114e4b17023SJohn Marino }
2115e4b17023SJohn Marino
2116e4b17023SJohn Marino if (bb_vinfo)
2117e4b17023SJohn Marino {
2118e4b17023SJohn Marino /* Mark the statement as unvectorizable. */
2119e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2120e4b17023SJohn Marino return true;
2121e4b17023SJohn Marino }
2122e4b17023SJohn Marino
2123e4b17023SJohn Marino return false;
2124e4b17023SJohn Marino }
2125e4b17023SJohn Marino
2126e4b17023SJohn Marino if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2127e4b17023SJohn Marino {
2128e4b17023SJohn Marino /* First stmt in the interleaving chain. Check the chain. */
2129e4b17023SJohn Marino gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2130e4b17023SJohn Marino struct data_reference *data_ref = dr;
2131e4b17023SJohn Marino unsigned int count = 1;
2132e4b17023SJohn Marino tree next_step;
2133e4b17023SJohn Marino tree prev_init = DR_INIT (data_ref);
2134e4b17023SJohn Marino gimple prev = stmt;
2135e4b17023SJohn Marino HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2136e4b17023SJohn Marino
2137e4b17023SJohn Marino while (next)
2138e4b17023SJohn Marino {
2139e4b17023SJohn Marino /* Skip same data-refs. In case that two or more stmts share
2140e4b17023SJohn Marino data-ref (supported only for loads), we vectorize only the first
2141e4b17023SJohn Marino stmt, and the rest get their vectorized loads from the first
2142e4b17023SJohn Marino one. */
2143e4b17023SJohn Marino if (!tree_int_cst_compare (DR_INIT (data_ref),
2144e4b17023SJohn Marino DR_INIT (STMT_VINFO_DATA_REF (
2145e4b17023SJohn Marino vinfo_for_stmt (next)))))
2146e4b17023SJohn Marino {
2147e4b17023SJohn Marino if (DR_IS_WRITE (data_ref))
2148e4b17023SJohn Marino {
2149e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2150e4b17023SJohn Marino fprintf (vect_dump, "Two store stmts share the same dr.");
2151e4b17023SJohn Marino return false;
2152e4b17023SJohn Marino }
2153e4b17023SJohn Marino
2154e4b17023SJohn Marino /* Check that there is no load-store dependencies for this loads
2155e4b17023SJohn Marino to prevent a case of load-store-load to the same location. */
2156e4b17023SJohn Marino if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2157e4b17023SJohn Marino || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2158e4b17023SJohn Marino {
2159e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2160e4b17023SJohn Marino fprintf (vect_dump,
2161e4b17023SJohn Marino "READ_WRITE dependence in interleaving.");
2162e4b17023SJohn Marino return false;
2163e4b17023SJohn Marino }
2164e4b17023SJohn Marino
2165e4b17023SJohn Marino /* For load use the same data-ref load. */
2166e4b17023SJohn Marino GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2167e4b17023SJohn Marino
2168e4b17023SJohn Marino prev = next;
2169e4b17023SJohn Marino next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2170e4b17023SJohn Marino continue;
2171e4b17023SJohn Marino }
2172e4b17023SJohn Marino
2173e4b17023SJohn Marino prev = next;
2174e4b17023SJohn Marino
2175e4b17023SJohn Marino /* Check that all the accesses have the same STEP. */
2176e4b17023SJohn Marino next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2177e4b17023SJohn Marino if (tree_int_cst_compare (step, next_step))
2178e4b17023SJohn Marino {
2179e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2180e4b17023SJohn Marino fprintf (vect_dump, "not consecutive access in interleaving");
2181e4b17023SJohn Marino return false;
2182e4b17023SJohn Marino }
2183e4b17023SJohn Marino
2184e4b17023SJohn Marino data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2185e4b17023SJohn Marino /* Check that the distance between two accesses is equal to the type
2186e4b17023SJohn Marino size. Otherwise, we have gaps. */
2187e4b17023SJohn Marino diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2188e4b17023SJohn Marino - TREE_INT_CST_LOW (prev_init)) / type_size;
2189e4b17023SJohn Marino if (diff != 1)
2190e4b17023SJohn Marino {
2191e4b17023SJohn Marino /* FORNOW: SLP of accesses with gaps is not supported. */
2192e4b17023SJohn Marino slp_impossible = true;
2193e4b17023SJohn Marino if (DR_IS_WRITE (data_ref))
2194e4b17023SJohn Marino {
2195e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2196e4b17023SJohn Marino fprintf (vect_dump, "interleaved store with gaps");
2197e4b17023SJohn Marino return false;
2198e4b17023SJohn Marino }
2199e4b17023SJohn Marino
2200e4b17023SJohn Marino gaps += diff - 1;
2201e4b17023SJohn Marino }
2202e4b17023SJohn Marino
2203e4b17023SJohn Marino last_accessed_element += diff;
2204e4b17023SJohn Marino
2205e4b17023SJohn Marino /* Store the gap from the previous member of the group. If there is no
2206e4b17023SJohn Marino gap in the access, GROUP_GAP is always 1. */
2207e4b17023SJohn Marino GROUP_GAP (vinfo_for_stmt (next)) = diff;
2208e4b17023SJohn Marino
2209e4b17023SJohn Marino prev_init = DR_INIT (data_ref);
2210e4b17023SJohn Marino next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2211e4b17023SJohn Marino /* Count the number of data-refs in the chain. */
2212e4b17023SJohn Marino count++;
2213e4b17023SJohn Marino }
2214e4b17023SJohn Marino
2215e4b17023SJohn Marino /* COUNT is the number of accesses found, we multiply it by the size of
2216e4b17023SJohn Marino the type to get COUNT_IN_BYTES. */
2217e4b17023SJohn Marino count_in_bytes = type_size * count;
2218e4b17023SJohn Marino
2219e4b17023SJohn Marino /* Check that the size of the interleaving (including gaps) is not
2220e4b17023SJohn Marino greater than STEP. */
2221e4b17023SJohn Marino if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2222e4b17023SJohn Marino {
2223e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2224e4b17023SJohn Marino {
2225e4b17023SJohn Marino fprintf (vect_dump, "interleaving size is greater than step for ");
2226e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2227e4b17023SJohn Marino }
2228e4b17023SJohn Marino return false;
2229e4b17023SJohn Marino }
2230e4b17023SJohn Marino
2231e4b17023SJohn Marino /* Check that the size of the interleaving is equal to STEP for stores,
2232e4b17023SJohn Marino i.e., that there are no gaps. */
2233e4b17023SJohn Marino if (dr_step && dr_step != count_in_bytes)
2234e4b17023SJohn Marino {
2235e4b17023SJohn Marino if (DR_IS_READ (dr))
2236e4b17023SJohn Marino {
2237e4b17023SJohn Marino slp_impossible = true;
2238e4b17023SJohn Marino /* There is a gap after the last load in the group. This gap is a
2239e4b17023SJohn Marino difference between the stride and the number of elements. When
2240e4b17023SJohn Marino there is no gap, this difference should be 0. */
2241e4b17023SJohn Marino GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2242e4b17023SJohn Marino }
2243e4b17023SJohn Marino else
2244e4b17023SJohn Marino {
2245e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2246e4b17023SJohn Marino fprintf (vect_dump, "interleaved store with gaps");
2247e4b17023SJohn Marino return false;
2248e4b17023SJohn Marino }
2249e4b17023SJohn Marino }
2250e4b17023SJohn Marino
2251e4b17023SJohn Marino /* Check that STEP is a multiple of type size. */
2252e4b17023SJohn Marino if (dr_step && (dr_step % type_size) != 0)
2253e4b17023SJohn Marino {
2254e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2255e4b17023SJohn Marino {
2256e4b17023SJohn Marino fprintf (vect_dump, "step is not a multiple of type size: step ");
2257e4b17023SJohn Marino print_generic_expr (vect_dump, step, TDF_SLIM);
2258e4b17023SJohn Marino fprintf (vect_dump, " size ");
2259e4b17023SJohn Marino print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2260e4b17023SJohn Marino TDF_SLIM);
2261e4b17023SJohn Marino }
2262e4b17023SJohn Marino return false;
2263e4b17023SJohn Marino }
2264e4b17023SJohn Marino
2265e4b17023SJohn Marino if (stride == 0)
2266e4b17023SJohn Marino stride = count;
2267e4b17023SJohn Marino
2268e4b17023SJohn Marino GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2269e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2270e4b17023SJohn Marino fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2271e4b17023SJohn Marino
2272e4b17023SJohn Marino /* SLP: create an SLP data structure for every interleaving group of
2273e4b17023SJohn Marino stores for further analysis in vect_analyse_slp. */
2274e4b17023SJohn Marino if (DR_IS_WRITE (dr) && !slp_impossible)
2275e4b17023SJohn Marino {
2276e4b17023SJohn Marino if (loop_vinfo)
2277e4b17023SJohn Marino VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2278e4b17023SJohn Marino stmt);
2279e4b17023SJohn Marino if (bb_vinfo)
2280e4b17023SJohn Marino VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2281e4b17023SJohn Marino stmt);
2282e4b17023SJohn Marino }
2283e4b17023SJohn Marino
2284e4b17023SJohn Marino /* There is a gap in the end of the group. */
2285e4b17023SJohn Marino if (stride - last_accessed_element > 0 && loop_vinfo)
2286e4b17023SJohn Marino {
2287e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2288e4b17023SJohn Marino fprintf (vect_dump, "Data access with gaps requires scalar "
2289e4b17023SJohn Marino "epilogue loop");
2290e4b17023SJohn Marino if (loop->inner)
2291e4b17023SJohn Marino {
2292e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2293e4b17023SJohn Marino fprintf (vect_dump, "Peeling for outer loop is not supported");
2294e4b17023SJohn Marino return false;
2295e4b17023SJohn Marino }
2296e4b17023SJohn Marino
2297e4b17023SJohn Marino LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2298e4b17023SJohn Marino }
2299e4b17023SJohn Marino }
2300e4b17023SJohn Marino
2301e4b17023SJohn Marino return true;
2302e4b17023SJohn Marino }
2303e4b17023SJohn Marino
2304e4b17023SJohn Marino
2305e4b17023SJohn Marino /* Analyze the access pattern of the data-reference DR.
2306e4b17023SJohn Marino In case of non-consecutive accesses call vect_analyze_group_access() to
2307e4b17023SJohn Marino analyze groups of strided accesses. */
2308e4b17023SJohn Marino
2309e4b17023SJohn Marino static bool
vect_analyze_data_ref_access(struct data_reference * dr)2310e4b17023SJohn Marino vect_analyze_data_ref_access (struct data_reference *dr)
2311e4b17023SJohn Marino {
2312e4b17023SJohn Marino tree step = DR_STEP (dr);
2313e4b17023SJohn Marino tree scalar_type = TREE_TYPE (DR_REF (dr));
2314e4b17023SJohn Marino gimple stmt = DR_STMT (dr);
2315e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2316e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2317e4b17023SJohn Marino struct loop *loop = NULL;
2318e4b17023SJohn Marino HOST_WIDE_INT dr_step;
2319e4b17023SJohn Marino
2320e4b17023SJohn Marino if (loop_vinfo)
2321e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
2322e4b17023SJohn Marino
2323e4b17023SJohn Marino if (loop_vinfo && !step)
2324e4b17023SJohn Marino {
2325e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2326e4b17023SJohn Marino fprintf (vect_dump, "bad data-ref access in loop");
2327e4b17023SJohn Marino return false;
2328e4b17023SJohn Marino }
2329e4b17023SJohn Marino
2330e4b17023SJohn Marino /* Allow invariant loads in loops. */
2331e4b17023SJohn Marino dr_step = TREE_INT_CST_LOW (step);
2332e4b17023SJohn Marino if (loop_vinfo && dr_step == 0)
2333e4b17023SJohn Marino {
2334e4b17023SJohn Marino GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2335*95d28233SJohn Marino if (nested_in_vect_loop_p (loop, stmt))
2336*95d28233SJohn Marino {
2337*95d28233SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2338*95d28233SJohn Marino fprintf (vect_dump, "zero step in inner loop of nest");
2339*95d28233SJohn Marino return false;
2340*95d28233SJohn Marino }
2341e4b17023SJohn Marino return DR_IS_READ (dr);
2342e4b17023SJohn Marino }
2343e4b17023SJohn Marino
2344e4b17023SJohn Marino if (loop && nested_in_vect_loop_p (loop, stmt))
2345e4b17023SJohn Marino {
2346e4b17023SJohn Marino /* Interleaved accesses are not yet supported within outer-loop
2347e4b17023SJohn Marino vectorization for references in the inner-loop. */
2348e4b17023SJohn Marino GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2349e4b17023SJohn Marino
2350e4b17023SJohn Marino /* For the rest of the analysis we use the outer-loop step. */
2351e4b17023SJohn Marino step = STMT_VINFO_DR_STEP (stmt_info);
2352e4b17023SJohn Marino dr_step = TREE_INT_CST_LOW (step);
2353e4b17023SJohn Marino
2354e4b17023SJohn Marino if (dr_step == 0)
2355e4b17023SJohn Marino {
2356e4b17023SJohn Marino if (vect_print_dump_info (REPORT_ALIGNMENT))
2357e4b17023SJohn Marino fprintf (vect_dump, "zero step in outer loop.");
2358e4b17023SJohn Marino if (DR_IS_READ (dr))
2359e4b17023SJohn Marino return true;
2360e4b17023SJohn Marino else
2361e4b17023SJohn Marino return false;
2362e4b17023SJohn Marino }
2363e4b17023SJohn Marino }
2364e4b17023SJohn Marino
2365e4b17023SJohn Marino /* Consecutive? */
2366e4b17023SJohn Marino if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2367e4b17023SJohn Marino || (dr_step < 0
2368e4b17023SJohn Marino && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2369e4b17023SJohn Marino {
2370e4b17023SJohn Marino /* Mark that it is not interleaving. */
2371e4b17023SJohn Marino GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2372e4b17023SJohn Marino return true;
2373e4b17023SJohn Marino }
2374e4b17023SJohn Marino
2375e4b17023SJohn Marino if (loop && nested_in_vect_loop_p (loop, stmt))
2376e4b17023SJohn Marino {
2377e4b17023SJohn Marino if (vect_print_dump_info (REPORT_ALIGNMENT))
2378e4b17023SJohn Marino fprintf (vect_dump, "strided access in outer loop.");
2379e4b17023SJohn Marino return false;
2380e4b17023SJohn Marino }
2381e4b17023SJohn Marino
2382e4b17023SJohn Marino /* Not consecutive access - check if it's a part of interleaving group. */
2383e4b17023SJohn Marino return vect_analyze_group_access (dr);
2384e4b17023SJohn Marino }
2385e4b17023SJohn Marino
2386e4b17023SJohn Marino
2387e4b17023SJohn Marino /* Function vect_analyze_data_ref_accesses.
2388e4b17023SJohn Marino
2389e4b17023SJohn Marino Analyze the access pattern of all the data references in the loop.
2390e4b17023SJohn Marino
2391e4b17023SJohn Marino FORNOW: the only access pattern that is considered vectorizable is a
2392e4b17023SJohn Marino simple step 1 (consecutive) access.
2393e4b17023SJohn Marino
2394e4b17023SJohn Marino FORNOW: handle only arrays and pointer accesses. */
2395e4b17023SJohn Marino
2396e4b17023SJohn Marino bool
vect_analyze_data_ref_accesses(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2397e4b17023SJohn Marino vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2398e4b17023SJohn Marino {
2399e4b17023SJohn Marino unsigned int i;
2400e4b17023SJohn Marino VEC (data_reference_p, heap) *datarefs;
2401e4b17023SJohn Marino struct data_reference *dr;
2402e4b17023SJohn Marino
2403e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2404e4b17023SJohn Marino fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2405e4b17023SJohn Marino
2406e4b17023SJohn Marino if (loop_vinfo)
2407e4b17023SJohn Marino datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2408e4b17023SJohn Marino else
2409e4b17023SJohn Marino datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2410e4b17023SJohn Marino
2411e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2412e4b17023SJohn Marino if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2413e4b17023SJohn Marino && !vect_analyze_data_ref_access (dr))
2414e4b17023SJohn Marino {
2415e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2416e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: complicated access pattern.");
2417e4b17023SJohn Marino
2418e4b17023SJohn Marino if (bb_vinfo)
2419e4b17023SJohn Marino {
2420e4b17023SJohn Marino /* Mark the statement as not vectorizable. */
2421e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2422e4b17023SJohn Marino continue;
2423e4b17023SJohn Marino }
2424e4b17023SJohn Marino else
2425e4b17023SJohn Marino return false;
2426e4b17023SJohn Marino }
2427e4b17023SJohn Marino
2428e4b17023SJohn Marino return true;
2429e4b17023SJohn Marino }
2430e4b17023SJohn Marino
2431e4b17023SJohn Marino /* Function vect_prune_runtime_alias_test_list.
2432e4b17023SJohn Marino
2433e4b17023SJohn Marino Prune a list of ddrs to be tested at run-time by versioning for alias.
2434e4b17023SJohn Marino Return FALSE if resulting list of ddrs is longer then allowed by
2435e4b17023SJohn Marino PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2436e4b17023SJohn Marino
2437e4b17023SJohn Marino bool
vect_prune_runtime_alias_test_list(loop_vec_info loop_vinfo)2438e4b17023SJohn Marino vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2439e4b17023SJohn Marino {
2440e4b17023SJohn Marino VEC (ddr_p, heap) * ddrs =
2441e4b17023SJohn Marino LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2442e4b17023SJohn Marino unsigned i, j;
2443e4b17023SJohn Marino
2444e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2445e4b17023SJohn Marino fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2446e4b17023SJohn Marino
2447e4b17023SJohn Marino for (i = 0; i < VEC_length (ddr_p, ddrs); )
2448e4b17023SJohn Marino {
2449e4b17023SJohn Marino bool found;
2450e4b17023SJohn Marino ddr_p ddr_i;
2451e4b17023SJohn Marino
2452e4b17023SJohn Marino ddr_i = VEC_index (ddr_p, ddrs, i);
2453e4b17023SJohn Marino found = false;
2454e4b17023SJohn Marino
2455e4b17023SJohn Marino for (j = 0; j < i; j++)
2456e4b17023SJohn Marino {
2457e4b17023SJohn Marino ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2458e4b17023SJohn Marino
2459e4b17023SJohn Marino if (vect_vfa_range_equal (ddr_i, ddr_j))
2460e4b17023SJohn Marino {
2461e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
2462e4b17023SJohn Marino {
2463e4b17023SJohn Marino fprintf (vect_dump, "found equal ranges ");
2464e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2465e4b17023SJohn Marino fprintf (vect_dump, ", ");
2466e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2467e4b17023SJohn Marino fprintf (vect_dump, " and ");
2468e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2469e4b17023SJohn Marino fprintf (vect_dump, ", ");
2470e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2471e4b17023SJohn Marino }
2472e4b17023SJohn Marino found = true;
2473e4b17023SJohn Marino break;
2474e4b17023SJohn Marino }
2475e4b17023SJohn Marino }
2476e4b17023SJohn Marino
2477e4b17023SJohn Marino if (found)
2478e4b17023SJohn Marino {
2479e4b17023SJohn Marino VEC_ordered_remove (ddr_p, ddrs, i);
2480e4b17023SJohn Marino continue;
2481e4b17023SJohn Marino }
2482e4b17023SJohn Marino i++;
2483e4b17023SJohn Marino }
2484e4b17023SJohn Marino
2485e4b17023SJohn Marino if (VEC_length (ddr_p, ddrs) >
2486e4b17023SJohn Marino (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2487e4b17023SJohn Marino {
2488e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
2489e4b17023SJohn Marino {
2490e4b17023SJohn Marino fprintf (vect_dump,
2491e4b17023SJohn Marino "disable versioning for alias - max number of generated "
2492e4b17023SJohn Marino "checks exceeded.");
2493e4b17023SJohn Marino }
2494e4b17023SJohn Marino
2495e4b17023SJohn Marino VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2496e4b17023SJohn Marino
2497e4b17023SJohn Marino return false;
2498e4b17023SJohn Marino }
2499e4b17023SJohn Marino
2500e4b17023SJohn Marino return true;
2501e4b17023SJohn Marino }
2502e4b17023SJohn Marino
2503e4b17023SJohn Marino /* Check whether a non-affine read in stmt is suitable for gather load
2504e4b17023SJohn Marino and if so, return a builtin decl for that operation. */
2505e4b17023SJohn Marino
2506e4b17023SJohn Marino tree
vect_check_gather(gimple stmt,loop_vec_info loop_vinfo,tree * basep,tree * offp,int * scalep)2507e4b17023SJohn Marino vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2508e4b17023SJohn Marino tree *offp, int *scalep)
2509e4b17023SJohn Marino {
2510e4b17023SJohn Marino HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2511e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2512e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2513e4b17023SJohn Marino struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2514e4b17023SJohn Marino tree offtype = NULL_TREE;
2515e4b17023SJohn Marino tree decl, base, off;
2516e4b17023SJohn Marino enum machine_mode pmode;
2517e4b17023SJohn Marino int punsignedp, pvolatilep;
2518e4b17023SJohn Marino
2519e4b17023SJohn Marino /* The gather builtins need address of the form
2520e4b17023SJohn Marino loop_invariant + vector * {1, 2, 4, 8}
2521e4b17023SJohn Marino or
2522e4b17023SJohn Marino loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2523e4b17023SJohn Marino Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2524e4b17023SJohn Marino of loop invariants/SSA_NAMEs defined in the loop, with casts,
2525e4b17023SJohn Marino multiplications and additions in it. To get a vector, we need
2526e4b17023SJohn Marino a single SSA_NAME that will be defined in the loop and will
2527e4b17023SJohn Marino contain everything that is not loop invariant and that can be
2528e4b17023SJohn Marino vectorized. The following code attempts to find such a preexistng
2529e4b17023SJohn Marino SSA_NAME OFF and put the loop invariants into a tree BASE
2530e4b17023SJohn Marino that can be gimplified before the loop. */
2531e4b17023SJohn Marino base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2532e4b17023SJohn Marino &pmode, &punsignedp, &pvolatilep, false);
2533e4b17023SJohn Marino gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2534e4b17023SJohn Marino
2535e4b17023SJohn Marino if (TREE_CODE (base) == MEM_REF)
2536e4b17023SJohn Marino {
2537e4b17023SJohn Marino if (!integer_zerop (TREE_OPERAND (base, 1)))
2538e4b17023SJohn Marino {
2539e4b17023SJohn Marino if (off == NULL_TREE)
2540e4b17023SJohn Marino {
2541e4b17023SJohn Marino double_int moff = mem_ref_offset (base);
2542e4b17023SJohn Marino off = double_int_to_tree (sizetype, moff);
2543e4b17023SJohn Marino }
2544e4b17023SJohn Marino else
2545e4b17023SJohn Marino off = size_binop (PLUS_EXPR, off,
2546e4b17023SJohn Marino fold_convert (sizetype, TREE_OPERAND (base, 1)));
2547e4b17023SJohn Marino }
2548e4b17023SJohn Marino base = TREE_OPERAND (base, 0);
2549e4b17023SJohn Marino }
2550e4b17023SJohn Marino else
2551e4b17023SJohn Marino base = build_fold_addr_expr (base);
2552e4b17023SJohn Marino
2553e4b17023SJohn Marino if (off == NULL_TREE)
2554e4b17023SJohn Marino off = size_zero_node;
2555e4b17023SJohn Marino
2556e4b17023SJohn Marino /* If base is not loop invariant, either off is 0, then we start with just
2557e4b17023SJohn Marino the constant offset in the loop invariant BASE and continue with base
2558e4b17023SJohn Marino as OFF, otherwise give up.
2559e4b17023SJohn Marino We could handle that case by gimplifying the addition of base + off
2560e4b17023SJohn Marino into some SSA_NAME and use that as off, but for now punt. */
2561e4b17023SJohn Marino if (!expr_invariant_in_loop_p (loop, base))
2562e4b17023SJohn Marino {
2563e4b17023SJohn Marino if (!integer_zerop (off))
2564e4b17023SJohn Marino return NULL_TREE;
2565e4b17023SJohn Marino off = base;
2566e4b17023SJohn Marino base = size_int (pbitpos / BITS_PER_UNIT);
2567e4b17023SJohn Marino }
2568e4b17023SJohn Marino /* Otherwise put base + constant offset into the loop invariant BASE
2569e4b17023SJohn Marino and continue with OFF. */
2570e4b17023SJohn Marino else
2571e4b17023SJohn Marino {
2572e4b17023SJohn Marino base = fold_convert (sizetype, base);
2573e4b17023SJohn Marino base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2574e4b17023SJohn Marino }
2575e4b17023SJohn Marino
2576e4b17023SJohn Marino /* OFF at this point may be either a SSA_NAME or some tree expression
2577e4b17023SJohn Marino from get_inner_reference. Try to peel off loop invariants from it
2578e4b17023SJohn Marino into BASE as long as possible. */
2579e4b17023SJohn Marino STRIP_NOPS (off);
2580e4b17023SJohn Marino while (offtype == NULL_TREE)
2581e4b17023SJohn Marino {
2582e4b17023SJohn Marino enum tree_code code;
2583e4b17023SJohn Marino tree op0, op1, add = NULL_TREE;
2584e4b17023SJohn Marino
2585e4b17023SJohn Marino if (TREE_CODE (off) == SSA_NAME)
2586e4b17023SJohn Marino {
2587e4b17023SJohn Marino gimple def_stmt = SSA_NAME_DEF_STMT (off);
2588e4b17023SJohn Marino
2589e4b17023SJohn Marino if (expr_invariant_in_loop_p (loop, off))
2590e4b17023SJohn Marino return NULL_TREE;
2591e4b17023SJohn Marino
2592e4b17023SJohn Marino if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2593e4b17023SJohn Marino break;
2594e4b17023SJohn Marino
2595e4b17023SJohn Marino op0 = gimple_assign_rhs1 (def_stmt);
2596e4b17023SJohn Marino code = gimple_assign_rhs_code (def_stmt);
2597e4b17023SJohn Marino op1 = gimple_assign_rhs2 (def_stmt);
2598e4b17023SJohn Marino }
2599e4b17023SJohn Marino else
2600e4b17023SJohn Marino {
2601e4b17023SJohn Marino if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2602e4b17023SJohn Marino return NULL_TREE;
2603e4b17023SJohn Marino code = TREE_CODE (off);
2604e4b17023SJohn Marino extract_ops_from_tree (off, &code, &op0, &op1);
2605e4b17023SJohn Marino }
2606e4b17023SJohn Marino switch (code)
2607e4b17023SJohn Marino {
2608e4b17023SJohn Marino case POINTER_PLUS_EXPR:
2609e4b17023SJohn Marino case PLUS_EXPR:
2610e4b17023SJohn Marino if (expr_invariant_in_loop_p (loop, op0))
2611e4b17023SJohn Marino {
2612e4b17023SJohn Marino add = op0;
2613e4b17023SJohn Marino off = op1;
2614e4b17023SJohn Marino do_add:
2615e4b17023SJohn Marino add = fold_convert (sizetype, add);
2616e4b17023SJohn Marino if (scale != 1)
2617e4b17023SJohn Marino add = size_binop (MULT_EXPR, add, size_int (scale));
2618e4b17023SJohn Marino base = size_binop (PLUS_EXPR, base, add);
2619e4b17023SJohn Marino continue;
2620e4b17023SJohn Marino }
2621e4b17023SJohn Marino if (expr_invariant_in_loop_p (loop, op1))
2622e4b17023SJohn Marino {
2623e4b17023SJohn Marino add = op1;
2624e4b17023SJohn Marino off = op0;
2625e4b17023SJohn Marino goto do_add;
2626e4b17023SJohn Marino }
2627e4b17023SJohn Marino break;
2628e4b17023SJohn Marino case MINUS_EXPR:
2629e4b17023SJohn Marino if (expr_invariant_in_loop_p (loop, op1))
2630e4b17023SJohn Marino {
2631e4b17023SJohn Marino add = fold_convert (sizetype, op1);
2632e4b17023SJohn Marino add = size_binop (MINUS_EXPR, size_zero_node, add);
2633e4b17023SJohn Marino off = op0;
2634e4b17023SJohn Marino goto do_add;
2635e4b17023SJohn Marino }
2636e4b17023SJohn Marino break;
2637e4b17023SJohn Marino case MULT_EXPR:
2638e4b17023SJohn Marino if (scale == 1 && host_integerp (op1, 0))
2639e4b17023SJohn Marino {
2640e4b17023SJohn Marino scale = tree_low_cst (op1, 0);
2641e4b17023SJohn Marino off = op0;
2642e4b17023SJohn Marino continue;
2643e4b17023SJohn Marino }
2644e4b17023SJohn Marino break;
2645e4b17023SJohn Marino case SSA_NAME:
2646e4b17023SJohn Marino off = op0;
2647e4b17023SJohn Marino continue;
2648e4b17023SJohn Marino CASE_CONVERT:
2649e4b17023SJohn Marino if (!POINTER_TYPE_P (TREE_TYPE (op0))
2650e4b17023SJohn Marino && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2651e4b17023SJohn Marino break;
2652e4b17023SJohn Marino if (TYPE_PRECISION (TREE_TYPE (op0))
2653e4b17023SJohn Marino == TYPE_PRECISION (TREE_TYPE (off)))
2654e4b17023SJohn Marino {
2655e4b17023SJohn Marino off = op0;
2656e4b17023SJohn Marino continue;
2657e4b17023SJohn Marino }
2658e4b17023SJohn Marino if (TYPE_PRECISION (TREE_TYPE (op0))
2659e4b17023SJohn Marino < TYPE_PRECISION (TREE_TYPE (off)))
2660e4b17023SJohn Marino {
2661e4b17023SJohn Marino off = op0;
2662e4b17023SJohn Marino offtype = TREE_TYPE (off);
2663e4b17023SJohn Marino STRIP_NOPS (off);
2664e4b17023SJohn Marino continue;
2665e4b17023SJohn Marino }
2666e4b17023SJohn Marino break;
2667e4b17023SJohn Marino default:
2668e4b17023SJohn Marino break;
2669e4b17023SJohn Marino }
2670e4b17023SJohn Marino break;
2671e4b17023SJohn Marino }
2672e4b17023SJohn Marino
2673e4b17023SJohn Marino /* If at the end OFF still isn't a SSA_NAME or isn't
2674e4b17023SJohn Marino defined in the loop, punt. */
2675e4b17023SJohn Marino if (TREE_CODE (off) != SSA_NAME
2676e4b17023SJohn Marino || expr_invariant_in_loop_p (loop, off))
2677e4b17023SJohn Marino return NULL_TREE;
2678e4b17023SJohn Marino
2679e4b17023SJohn Marino if (offtype == NULL_TREE)
2680e4b17023SJohn Marino offtype = TREE_TYPE (off);
2681e4b17023SJohn Marino
2682e4b17023SJohn Marino decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2683e4b17023SJohn Marino offtype, scale);
2684e4b17023SJohn Marino if (decl == NULL_TREE)
2685e4b17023SJohn Marino return NULL_TREE;
2686e4b17023SJohn Marino
2687e4b17023SJohn Marino if (basep)
2688e4b17023SJohn Marino *basep = base;
2689e4b17023SJohn Marino if (offp)
2690e4b17023SJohn Marino *offp = off;
2691e4b17023SJohn Marino if (scalep)
2692e4b17023SJohn Marino *scalep = scale;
2693e4b17023SJohn Marino return decl;
2694e4b17023SJohn Marino }
2695e4b17023SJohn Marino
2696e4b17023SJohn Marino
2697e4b17023SJohn Marino /* Function vect_analyze_data_refs.
2698e4b17023SJohn Marino
2699e4b17023SJohn Marino Find all the data references in the loop or basic block.
2700e4b17023SJohn Marino
2701e4b17023SJohn Marino The general structure of the analysis of data refs in the vectorizer is as
2702e4b17023SJohn Marino follows:
2703e4b17023SJohn Marino 1- vect_analyze_data_refs(loop/bb): call
2704e4b17023SJohn Marino compute_data_dependences_for_loop/bb to find and analyze all data-refs
2705e4b17023SJohn Marino in the loop/bb and their dependences.
2706e4b17023SJohn Marino 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2707e4b17023SJohn Marino 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2708e4b17023SJohn Marino 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2709e4b17023SJohn Marino
2710e4b17023SJohn Marino */
2711e4b17023SJohn Marino
2712e4b17023SJohn Marino bool
vect_analyze_data_refs(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo,int * min_vf)2713e4b17023SJohn Marino vect_analyze_data_refs (loop_vec_info loop_vinfo,
2714e4b17023SJohn Marino bb_vec_info bb_vinfo,
2715e4b17023SJohn Marino int *min_vf)
2716e4b17023SJohn Marino {
2717e4b17023SJohn Marino struct loop *loop = NULL;
2718e4b17023SJohn Marino basic_block bb = NULL;
2719e4b17023SJohn Marino unsigned int i;
2720e4b17023SJohn Marino VEC (data_reference_p, heap) *datarefs;
2721e4b17023SJohn Marino struct data_reference *dr;
2722e4b17023SJohn Marino tree scalar_type;
2723e4b17023SJohn Marino bool res, stop_bb_analysis = false;
2724e4b17023SJohn Marino
2725e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2726e4b17023SJohn Marino fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2727e4b17023SJohn Marino
2728e4b17023SJohn Marino if (loop_vinfo)
2729e4b17023SJohn Marino {
2730e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
2731e4b17023SJohn Marino res = compute_data_dependences_for_loop
2732e4b17023SJohn Marino (loop, true,
2733e4b17023SJohn Marino &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2734e4b17023SJohn Marino &LOOP_VINFO_DATAREFS (loop_vinfo),
2735e4b17023SJohn Marino &LOOP_VINFO_DDRS (loop_vinfo));
2736e4b17023SJohn Marino
2737e4b17023SJohn Marino if (!res)
2738e4b17023SJohn Marino {
2739e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2740e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: loop contains function calls"
2741e4b17023SJohn Marino " or data references that cannot be analyzed");
2742e4b17023SJohn Marino return false;
2743e4b17023SJohn Marino }
2744e4b17023SJohn Marino
2745e4b17023SJohn Marino datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2746e4b17023SJohn Marino }
2747e4b17023SJohn Marino else
2748e4b17023SJohn Marino {
2749e4b17023SJohn Marino bb = BB_VINFO_BB (bb_vinfo);
2750e4b17023SJohn Marino res = compute_data_dependences_for_bb (bb, true,
2751e4b17023SJohn Marino &BB_VINFO_DATAREFS (bb_vinfo),
2752e4b17023SJohn Marino &BB_VINFO_DDRS (bb_vinfo));
2753e4b17023SJohn Marino if (!res)
2754e4b17023SJohn Marino {
2755e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2756e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: basic block contains function"
2757e4b17023SJohn Marino " calls or data references that cannot be analyzed");
2758e4b17023SJohn Marino return false;
2759e4b17023SJohn Marino }
2760e4b17023SJohn Marino
2761e4b17023SJohn Marino datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2762e4b17023SJohn Marino }
2763e4b17023SJohn Marino
2764e4b17023SJohn Marino /* Go through the data-refs, check that the analysis succeeded. Update
2765e4b17023SJohn Marino pointer from stmt_vec_info struct to DR and vectype. */
2766e4b17023SJohn Marino
2767e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2768e4b17023SJohn Marino {
2769e4b17023SJohn Marino gimple stmt;
2770e4b17023SJohn Marino stmt_vec_info stmt_info;
2771e4b17023SJohn Marino tree base, offset, init;
2772e4b17023SJohn Marino bool gather = false;
2773e4b17023SJohn Marino int vf;
2774e4b17023SJohn Marino
2775e4b17023SJohn Marino if (!dr || !DR_REF (dr))
2776e4b17023SJohn Marino {
2777e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2778e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2779e4b17023SJohn Marino
2780e4b17023SJohn Marino return false;
2781e4b17023SJohn Marino }
2782e4b17023SJohn Marino
2783e4b17023SJohn Marino stmt = DR_STMT (dr);
2784e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt);
2785e4b17023SJohn Marino
2786e4b17023SJohn Marino if (stop_bb_analysis)
2787e4b17023SJohn Marino {
2788e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2789e4b17023SJohn Marino continue;
2790e4b17023SJohn Marino }
2791e4b17023SJohn Marino
2792e4b17023SJohn Marino /* Check that analysis of the data-ref succeeded. */
2793e4b17023SJohn Marino if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2794e4b17023SJohn Marino || !DR_STEP (dr))
2795e4b17023SJohn Marino {
2796e4b17023SJohn Marino /* If target supports vector gather loads, see if they can't
2797e4b17023SJohn Marino be used. */
2798e4b17023SJohn Marino if (loop_vinfo
2799e4b17023SJohn Marino && DR_IS_READ (dr)
2800e4b17023SJohn Marino && !TREE_THIS_VOLATILE (DR_REF (dr))
2801e4b17023SJohn Marino && targetm.vectorize.builtin_gather != NULL
2802e4b17023SJohn Marino && !nested_in_vect_loop_p (loop, stmt))
2803e4b17023SJohn Marino {
2804e4b17023SJohn Marino struct data_reference *newdr
2805e4b17023SJohn Marino = create_data_ref (NULL, loop_containing_stmt (stmt),
2806e4b17023SJohn Marino DR_REF (dr), stmt, true);
2807e4b17023SJohn Marino gcc_assert (newdr != NULL && DR_REF (newdr));
2808e4b17023SJohn Marino if (DR_BASE_ADDRESS (newdr)
2809e4b17023SJohn Marino && DR_OFFSET (newdr)
2810e4b17023SJohn Marino && DR_INIT (newdr)
2811e4b17023SJohn Marino && DR_STEP (newdr)
2812e4b17023SJohn Marino && integer_zerop (DR_STEP (newdr)))
2813e4b17023SJohn Marino {
2814e4b17023SJohn Marino dr = newdr;
2815e4b17023SJohn Marino gather = true;
2816e4b17023SJohn Marino }
2817e4b17023SJohn Marino else
2818e4b17023SJohn Marino free_data_ref (newdr);
2819e4b17023SJohn Marino }
2820e4b17023SJohn Marino
2821e4b17023SJohn Marino if (!gather)
2822e4b17023SJohn Marino {
2823e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2824e4b17023SJohn Marino {
2825e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: data ref analysis "
2826e4b17023SJohn Marino "failed ");
2827e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2828e4b17023SJohn Marino }
2829e4b17023SJohn Marino
2830e4b17023SJohn Marino if (bb_vinfo)
2831e4b17023SJohn Marino {
2832e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2833e4b17023SJohn Marino stop_bb_analysis = true;
2834e4b17023SJohn Marino continue;
2835e4b17023SJohn Marino }
2836e4b17023SJohn Marino
2837e4b17023SJohn Marino return false;
2838e4b17023SJohn Marino }
2839e4b17023SJohn Marino }
2840e4b17023SJohn Marino
2841e4b17023SJohn Marino if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2842e4b17023SJohn Marino {
2843e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2844e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: base addr of dr is a "
2845e4b17023SJohn Marino "constant");
2846e4b17023SJohn Marino
2847e4b17023SJohn Marino if (bb_vinfo)
2848e4b17023SJohn Marino {
2849e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2850e4b17023SJohn Marino stop_bb_analysis = true;
2851e4b17023SJohn Marino continue;
2852e4b17023SJohn Marino }
2853e4b17023SJohn Marino
2854e4b17023SJohn Marino if (gather)
2855e4b17023SJohn Marino free_data_ref (dr);
2856e4b17023SJohn Marino return false;
2857e4b17023SJohn Marino }
2858e4b17023SJohn Marino
2859e4b17023SJohn Marino if (TREE_THIS_VOLATILE (DR_REF (dr)))
2860e4b17023SJohn Marino {
2861e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2862e4b17023SJohn Marino {
2863e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: volatile type ");
2864e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2865e4b17023SJohn Marino }
2866e4b17023SJohn Marino
2867e4b17023SJohn Marino if (bb_vinfo)
2868e4b17023SJohn Marino {
2869e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2870e4b17023SJohn Marino stop_bb_analysis = true;
2871e4b17023SJohn Marino continue;
2872e4b17023SJohn Marino }
2873e4b17023SJohn Marino
2874e4b17023SJohn Marino return false;
2875e4b17023SJohn Marino }
2876e4b17023SJohn Marino
2877e4b17023SJohn Marino if (stmt_can_throw_internal (stmt))
2878e4b17023SJohn Marino {
2879e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2880e4b17023SJohn Marino {
2881e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: statement can throw an "
2882e4b17023SJohn Marino "exception ");
2883e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2884e4b17023SJohn Marino }
2885e4b17023SJohn Marino
2886e4b17023SJohn Marino if (bb_vinfo)
2887e4b17023SJohn Marino {
2888e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2889e4b17023SJohn Marino stop_bb_analysis = true;
2890e4b17023SJohn Marino continue;
2891e4b17023SJohn Marino }
2892e4b17023SJohn Marino
2893e4b17023SJohn Marino if (gather)
2894e4b17023SJohn Marino free_data_ref (dr);
2895e4b17023SJohn Marino return false;
2896e4b17023SJohn Marino }
2897e4b17023SJohn Marino
2898e4b17023SJohn Marino if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
2899e4b17023SJohn Marino && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
2900e4b17023SJohn Marino {
2901e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2902e4b17023SJohn Marino {
2903e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: statement is bitfield "
2904e4b17023SJohn Marino "access ");
2905e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2906e4b17023SJohn Marino }
2907e4b17023SJohn Marino
2908e4b17023SJohn Marino if (bb_vinfo)
2909e4b17023SJohn Marino {
2910e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2911e4b17023SJohn Marino stop_bb_analysis = true;
2912e4b17023SJohn Marino continue;
2913e4b17023SJohn Marino }
2914e4b17023SJohn Marino
2915e4b17023SJohn Marino if (gather)
2916e4b17023SJohn Marino free_data_ref (dr);
2917e4b17023SJohn Marino return false;
2918e4b17023SJohn Marino }
2919e4b17023SJohn Marino
2920e4b17023SJohn Marino base = unshare_expr (DR_BASE_ADDRESS (dr));
2921e4b17023SJohn Marino offset = unshare_expr (DR_OFFSET (dr));
2922e4b17023SJohn Marino init = unshare_expr (DR_INIT (dr));
2923e4b17023SJohn Marino
2924e4b17023SJohn Marino if (is_gimple_call (stmt))
2925e4b17023SJohn Marino {
2926e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2927e4b17023SJohn Marino {
2928e4b17023SJohn Marino fprintf (vect_dump, "not vectorized: dr in a call ");
2929e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2930e4b17023SJohn Marino }
2931e4b17023SJohn Marino
2932e4b17023SJohn Marino if (bb_vinfo)
2933e4b17023SJohn Marino {
2934e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2935e4b17023SJohn Marino stop_bb_analysis = true;
2936e4b17023SJohn Marino continue;
2937e4b17023SJohn Marino }
2938e4b17023SJohn Marino
2939e4b17023SJohn Marino if (gather)
2940e4b17023SJohn Marino free_data_ref (dr);
2941e4b17023SJohn Marino return false;
2942e4b17023SJohn Marino }
2943e4b17023SJohn Marino
2944e4b17023SJohn Marino /* Update DR field in stmt_vec_info struct. */
2945e4b17023SJohn Marino
2946e4b17023SJohn Marino /* If the dataref is in an inner-loop of the loop that is considered for
2947e4b17023SJohn Marino for vectorization, we also want to analyze the access relative to
2948e4b17023SJohn Marino the outer-loop (DR contains information only relative to the
2949e4b17023SJohn Marino inner-most enclosing loop). We do that by building a reference to the
2950e4b17023SJohn Marino first location accessed by the inner-loop, and analyze it relative to
2951e4b17023SJohn Marino the outer-loop. */
2952e4b17023SJohn Marino if (loop && nested_in_vect_loop_p (loop, stmt))
2953e4b17023SJohn Marino {
2954e4b17023SJohn Marino tree outer_step, outer_base, outer_init;
2955e4b17023SJohn Marino HOST_WIDE_INT pbitsize, pbitpos;
2956e4b17023SJohn Marino tree poffset;
2957e4b17023SJohn Marino enum machine_mode pmode;
2958e4b17023SJohn Marino int punsignedp, pvolatilep;
2959e4b17023SJohn Marino affine_iv base_iv, offset_iv;
2960e4b17023SJohn Marino tree dinit;
2961e4b17023SJohn Marino
2962e4b17023SJohn Marino /* Build a reference to the first location accessed by the
2963e4b17023SJohn Marino inner-loop: *(BASE+INIT). (The first location is actually
2964e4b17023SJohn Marino BASE+INIT+OFFSET, but we add OFFSET separately later). */
2965e4b17023SJohn Marino tree inner_base = build_fold_indirect_ref
2966e4b17023SJohn Marino (fold_build_pointer_plus (base, init));
2967e4b17023SJohn Marino
2968e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2969e4b17023SJohn Marino {
2970e4b17023SJohn Marino fprintf (vect_dump, "analyze in outer-loop: ");
2971e4b17023SJohn Marino print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2972e4b17023SJohn Marino }
2973e4b17023SJohn Marino
2974e4b17023SJohn Marino outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2975e4b17023SJohn Marino &poffset, &pmode, &punsignedp, &pvolatilep, false);
2976e4b17023SJohn Marino gcc_assert (outer_base != NULL_TREE);
2977e4b17023SJohn Marino
2978e4b17023SJohn Marino if (pbitpos % BITS_PER_UNIT != 0)
2979e4b17023SJohn Marino {
2980e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2981e4b17023SJohn Marino fprintf (vect_dump, "failed: bit offset alignment.\n");
2982e4b17023SJohn Marino return false;
2983e4b17023SJohn Marino }
2984e4b17023SJohn Marino
2985e4b17023SJohn Marino outer_base = build_fold_addr_expr (outer_base);
2986e4b17023SJohn Marino if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2987e4b17023SJohn Marino &base_iv, false))
2988e4b17023SJohn Marino {
2989e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2990e4b17023SJohn Marino fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2991e4b17023SJohn Marino return false;
2992e4b17023SJohn Marino }
2993e4b17023SJohn Marino
2994e4b17023SJohn Marino if (offset)
2995e4b17023SJohn Marino {
2996e4b17023SJohn Marino if (poffset)
2997e4b17023SJohn Marino poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2998e4b17023SJohn Marino poffset);
2999e4b17023SJohn Marino else
3000e4b17023SJohn Marino poffset = offset;
3001e4b17023SJohn Marino }
3002e4b17023SJohn Marino
3003e4b17023SJohn Marino if (!poffset)
3004e4b17023SJohn Marino {
3005e4b17023SJohn Marino offset_iv.base = ssize_int (0);
3006e4b17023SJohn Marino offset_iv.step = ssize_int (0);
3007e4b17023SJohn Marino }
3008e4b17023SJohn Marino else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3009e4b17023SJohn Marino &offset_iv, false))
3010e4b17023SJohn Marino {
3011e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3012e4b17023SJohn Marino fprintf (vect_dump, "evolution of offset is not affine.\n");
3013e4b17023SJohn Marino return false;
3014e4b17023SJohn Marino }
3015e4b17023SJohn Marino
3016e4b17023SJohn Marino outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3017e4b17023SJohn Marino split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3018e4b17023SJohn Marino outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3019e4b17023SJohn Marino split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3020e4b17023SJohn Marino outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3021e4b17023SJohn Marino
3022e4b17023SJohn Marino outer_step = size_binop (PLUS_EXPR,
3023e4b17023SJohn Marino fold_convert (ssizetype, base_iv.step),
3024e4b17023SJohn Marino fold_convert (ssizetype, offset_iv.step));
3025e4b17023SJohn Marino
3026e4b17023SJohn Marino STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3027e4b17023SJohn Marino /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3028e4b17023SJohn Marino STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3029e4b17023SJohn Marino STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3030e4b17023SJohn Marino STMT_VINFO_DR_OFFSET (stmt_info) =
3031e4b17023SJohn Marino fold_convert (ssizetype, offset_iv.base);
3032e4b17023SJohn Marino STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3033e4b17023SJohn Marino size_int (highest_pow2_factor (offset_iv.base));
3034e4b17023SJohn Marino
3035e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3036e4b17023SJohn Marino {
3037e4b17023SJohn Marino fprintf (vect_dump, "\touter base_address: ");
3038e4b17023SJohn Marino print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3039e4b17023SJohn Marino fprintf (vect_dump, "\n\touter offset from base address: ");
3040e4b17023SJohn Marino print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3041e4b17023SJohn Marino fprintf (vect_dump, "\n\touter constant offset from base address: ");
3042e4b17023SJohn Marino print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3043e4b17023SJohn Marino fprintf (vect_dump, "\n\touter step: ");
3044e4b17023SJohn Marino print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3045e4b17023SJohn Marino fprintf (vect_dump, "\n\touter aligned to: ");
3046e4b17023SJohn Marino print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3047e4b17023SJohn Marino }
3048e4b17023SJohn Marino }
3049e4b17023SJohn Marino
3050e4b17023SJohn Marino if (STMT_VINFO_DATA_REF (stmt_info))
3051e4b17023SJohn Marino {
3052e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3053e4b17023SJohn Marino {
3054e4b17023SJohn Marino fprintf (vect_dump,
3055e4b17023SJohn Marino "not vectorized: more than one data ref in stmt: ");
3056e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3057e4b17023SJohn Marino }
3058e4b17023SJohn Marino
3059e4b17023SJohn Marino if (bb_vinfo)
3060e4b17023SJohn Marino {
3061e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3062e4b17023SJohn Marino stop_bb_analysis = true;
3063e4b17023SJohn Marino continue;
3064e4b17023SJohn Marino }
3065e4b17023SJohn Marino
3066e4b17023SJohn Marino if (gather)
3067e4b17023SJohn Marino free_data_ref (dr);
3068e4b17023SJohn Marino return false;
3069e4b17023SJohn Marino }
3070e4b17023SJohn Marino
3071e4b17023SJohn Marino STMT_VINFO_DATA_REF (stmt_info) = dr;
3072e4b17023SJohn Marino
3073e4b17023SJohn Marino /* Set vectype for STMT. */
3074e4b17023SJohn Marino scalar_type = TREE_TYPE (DR_REF (dr));
3075e4b17023SJohn Marino STMT_VINFO_VECTYPE (stmt_info) =
3076e4b17023SJohn Marino get_vectype_for_scalar_type (scalar_type);
3077e4b17023SJohn Marino if (!STMT_VINFO_VECTYPE (stmt_info))
3078e4b17023SJohn Marino {
3079e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3080e4b17023SJohn Marino {
3081e4b17023SJohn Marino fprintf (vect_dump,
3082e4b17023SJohn Marino "not vectorized: no vectype for stmt: ");
3083e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3084e4b17023SJohn Marino fprintf (vect_dump, " scalar_type: ");
3085e4b17023SJohn Marino print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3086e4b17023SJohn Marino }
3087e4b17023SJohn Marino
3088e4b17023SJohn Marino if (bb_vinfo)
3089e4b17023SJohn Marino {
3090e4b17023SJohn Marino /* Mark the statement as not vectorizable. */
3091e4b17023SJohn Marino STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3092e4b17023SJohn Marino stop_bb_analysis = true;
3093e4b17023SJohn Marino continue;
3094e4b17023SJohn Marino }
3095e4b17023SJohn Marino
3096e4b17023SJohn Marino if (gather)
3097e4b17023SJohn Marino {
3098e4b17023SJohn Marino STMT_VINFO_DATA_REF (stmt_info) = NULL;
3099e4b17023SJohn Marino free_data_ref (dr);
3100e4b17023SJohn Marino }
3101e4b17023SJohn Marino return false;
3102e4b17023SJohn Marino }
3103e4b17023SJohn Marino
3104e4b17023SJohn Marino /* Adjust the minimal vectorization factor according to the
3105e4b17023SJohn Marino vector type. */
3106e4b17023SJohn Marino vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3107e4b17023SJohn Marino if (vf > *min_vf)
3108e4b17023SJohn Marino *min_vf = vf;
3109e4b17023SJohn Marino
3110e4b17023SJohn Marino if (gather)
3111e4b17023SJohn Marino {
3112e4b17023SJohn Marino unsigned int j, k, n;
3113e4b17023SJohn Marino struct data_reference *olddr
3114e4b17023SJohn Marino = VEC_index (data_reference_p, datarefs, i);
3115e4b17023SJohn Marino VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3116e4b17023SJohn Marino struct data_dependence_relation *ddr, *newddr;
3117e4b17023SJohn Marino bool bad = false;
3118e4b17023SJohn Marino tree off;
3119e4b17023SJohn Marino VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3120e4b17023SJohn Marino
3121e4b17023SJohn Marino if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL)
3122e4b17023SJohn Marino || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3123e4b17023SJohn Marino {
3124e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3125e4b17023SJohn Marino {
3126e4b17023SJohn Marino fprintf (vect_dump,
3127e4b17023SJohn Marino "not vectorized: not suitable for gather ");
3128e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3129e4b17023SJohn Marino }
3130e4b17023SJohn Marino return false;
3131e4b17023SJohn Marino }
3132e4b17023SJohn Marino
3133e4b17023SJohn Marino n = VEC_length (data_reference_p, datarefs) - 1;
3134e4b17023SJohn Marino for (j = 0, k = i - 1; j < i; j++)
3135e4b17023SJohn Marino {
3136e4b17023SJohn Marino ddr = VEC_index (ddr_p, ddrs, k);
3137e4b17023SJohn Marino gcc_assert (DDR_B (ddr) == olddr);
3138e4b17023SJohn Marino newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3139e4b17023SJohn Marino nest);
3140e4b17023SJohn Marino VEC_replace (ddr_p, ddrs, k, newddr);
3141e4b17023SJohn Marino free_dependence_relation (ddr);
3142e4b17023SJohn Marino if (!bad
3143e4b17023SJohn Marino && DR_IS_WRITE (DDR_A (newddr))
3144e4b17023SJohn Marino && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3145e4b17023SJohn Marino bad = true;
3146e4b17023SJohn Marino k += --n;
3147e4b17023SJohn Marino }
3148e4b17023SJohn Marino
3149e4b17023SJohn Marino k++;
3150e4b17023SJohn Marino n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3151e4b17023SJohn Marino for (; k < n; k++)
3152e4b17023SJohn Marino {
3153e4b17023SJohn Marino ddr = VEC_index (ddr_p, ddrs, k);
3154e4b17023SJohn Marino gcc_assert (DDR_A (ddr) == olddr);
3155e4b17023SJohn Marino newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3156e4b17023SJohn Marino nest);
3157e4b17023SJohn Marino VEC_replace (ddr_p, ddrs, k, newddr);
3158e4b17023SJohn Marino free_dependence_relation (ddr);
3159e4b17023SJohn Marino if (!bad
3160e4b17023SJohn Marino && DR_IS_WRITE (DDR_B (newddr))
3161e4b17023SJohn Marino && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3162e4b17023SJohn Marino bad = true;
3163e4b17023SJohn Marino }
3164e4b17023SJohn Marino
3165e4b17023SJohn Marino k = VEC_length (ddr_p, ddrs)
3166e4b17023SJohn Marino - VEC_length (data_reference_p, datarefs) + i;
3167e4b17023SJohn Marino ddr = VEC_index (ddr_p, ddrs, k);
3168e4b17023SJohn Marino gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3169e4b17023SJohn Marino newddr = initialize_data_dependence_relation (dr, dr, nest);
3170e4b17023SJohn Marino VEC_replace (ddr_p, ddrs, k, newddr);
3171e4b17023SJohn Marino free_dependence_relation (ddr);
3172e4b17023SJohn Marino VEC_replace (data_reference_p, datarefs, i, dr);
3173e4b17023SJohn Marino
3174e4b17023SJohn Marino if (bad)
3175e4b17023SJohn Marino {
3176e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3177e4b17023SJohn Marino {
3178e4b17023SJohn Marino fprintf (vect_dump,
3179e4b17023SJohn Marino "not vectorized: data dependence conflict"
3180e4b17023SJohn Marino " prevents gather");
3181e4b17023SJohn Marino print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3182e4b17023SJohn Marino }
3183e4b17023SJohn Marino return false;
3184e4b17023SJohn Marino }
3185e4b17023SJohn Marino
3186e4b17023SJohn Marino STMT_VINFO_GATHER_P (stmt_info) = true;
3187e4b17023SJohn Marino }
3188e4b17023SJohn Marino }
3189e4b17023SJohn Marino
3190e4b17023SJohn Marino return true;
3191e4b17023SJohn Marino }
3192e4b17023SJohn Marino
3193e4b17023SJohn Marino
3194e4b17023SJohn Marino /* Function vect_get_new_vect_var.
3195e4b17023SJohn Marino
3196e4b17023SJohn Marino Returns a name for a new variable. The current naming scheme appends the
3197e4b17023SJohn Marino prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3198e4b17023SJohn Marino the name of vectorizer generated variables, and appends that to NAME if
3199e4b17023SJohn Marino provided. */
3200e4b17023SJohn Marino
3201e4b17023SJohn Marino tree
vect_get_new_vect_var(tree type,enum vect_var_kind var_kind,const char * name)3202e4b17023SJohn Marino vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3203e4b17023SJohn Marino {
3204e4b17023SJohn Marino const char *prefix;
3205e4b17023SJohn Marino tree new_vect_var;
3206e4b17023SJohn Marino
3207e4b17023SJohn Marino switch (var_kind)
3208e4b17023SJohn Marino {
3209e4b17023SJohn Marino case vect_simple_var:
3210e4b17023SJohn Marino prefix = "vect_";
3211e4b17023SJohn Marino break;
3212e4b17023SJohn Marino case vect_scalar_var:
3213e4b17023SJohn Marino prefix = "stmp_";
3214e4b17023SJohn Marino break;
3215e4b17023SJohn Marino case vect_pointer_var:
3216e4b17023SJohn Marino prefix = "vect_p";
3217e4b17023SJohn Marino break;
3218e4b17023SJohn Marino default:
3219e4b17023SJohn Marino gcc_unreachable ();
3220e4b17023SJohn Marino }
3221e4b17023SJohn Marino
3222e4b17023SJohn Marino if (name)
3223e4b17023SJohn Marino {
3224e4b17023SJohn Marino char* tmp = concat (prefix, name, NULL);
3225e4b17023SJohn Marino new_vect_var = create_tmp_var (type, tmp);
3226e4b17023SJohn Marino free (tmp);
3227e4b17023SJohn Marino }
3228e4b17023SJohn Marino else
3229e4b17023SJohn Marino new_vect_var = create_tmp_var (type, prefix);
3230e4b17023SJohn Marino
3231e4b17023SJohn Marino /* Mark vector typed variable as a gimple register variable. */
3232e4b17023SJohn Marino if (TREE_CODE (type) == VECTOR_TYPE)
3233e4b17023SJohn Marino DECL_GIMPLE_REG_P (new_vect_var) = true;
3234e4b17023SJohn Marino
3235e4b17023SJohn Marino return new_vect_var;
3236e4b17023SJohn Marino }
3237e4b17023SJohn Marino
3238e4b17023SJohn Marino
3239e4b17023SJohn Marino /* Function vect_create_addr_base_for_vector_ref.
3240e4b17023SJohn Marino
3241e4b17023SJohn Marino Create an expression that computes the address of the first memory location
3242e4b17023SJohn Marino that will be accessed for a data reference.
3243e4b17023SJohn Marino
3244e4b17023SJohn Marino Input:
3245e4b17023SJohn Marino STMT: The statement containing the data reference.
3246e4b17023SJohn Marino NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3247e4b17023SJohn Marino OFFSET: Optional. If supplied, it is be added to the initial address.
3248e4b17023SJohn Marino LOOP: Specify relative to which loop-nest should the address be computed.
3249e4b17023SJohn Marino For example, when the dataref is in an inner-loop nested in an
3250e4b17023SJohn Marino outer-loop that is now being vectorized, LOOP can be either the
3251e4b17023SJohn Marino outer-loop, or the inner-loop. The first memory location accessed
3252e4b17023SJohn Marino by the following dataref ('in' points to short):
3253e4b17023SJohn Marino
3254e4b17023SJohn Marino for (i=0; i<N; i++)
3255e4b17023SJohn Marino for (j=0; j<M; j++)
3256e4b17023SJohn Marino s += in[i+j]
3257e4b17023SJohn Marino
3258e4b17023SJohn Marino is as follows:
3259e4b17023SJohn Marino if LOOP=i_loop: &in (relative to i_loop)
3260e4b17023SJohn Marino if LOOP=j_loop: &in+i*2B (relative to j_loop)
3261e4b17023SJohn Marino
3262e4b17023SJohn Marino Output:
3263e4b17023SJohn Marino 1. Return an SSA_NAME whose value is the address of the memory location of
3264e4b17023SJohn Marino the first vector of the data reference.
3265e4b17023SJohn Marino 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3266e4b17023SJohn Marino these statement(s) which define the returned SSA_NAME.
3267e4b17023SJohn Marino
3268e4b17023SJohn Marino FORNOW: We are only handling array accesses with step 1. */
3269e4b17023SJohn Marino
3270e4b17023SJohn Marino tree
vect_create_addr_base_for_vector_ref(gimple stmt,gimple_seq * new_stmt_list,tree offset,struct loop * loop)3271e4b17023SJohn Marino vect_create_addr_base_for_vector_ref (gimple stmt,
3272e4b17023SJohn Marino gimple_seq *new_stmt_list,
3273e4b17023SJohn Marino tree offset,
3274e4b17023SJohn Marino struct loop *loop)
3275e4b17023SJohn Marino {
3276e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3277e4b17023SJohn Marino struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3278e4b17023SJohn Marino tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3279e4b17023SJohn Marino tree base_name;
3280e4b17023SJohn Marino tree data_ref_base_var;
3281e4b17023SJohn Marino tree vec_stmt;
3282e4b17023SJohn Marino tree addr_base, addr_expr;
3283e4b17023SJohn Marino tree dest;
3284e4b17023SJohn Marino gimple_seq seq = NULL;
3285e4b17023SJohn Marino tree base_offset = unshare_expr (DR_OFFSET (dr));
3286e4b17023SJohn Marino tree init = unshare_expr (DR_INIT (dr));
3287e4b17023SJohn Marino tree vect_ptr_type;
3288e4b17023SJohn Marino tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3289e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3290e4b17023SJohn Marino tree base;
3291e4b17023SJohn Marino
3292e4b17023SJohn Marino if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3293e4b17023SJohn Marino {
3294e4b17023SJohn Marino struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3295e4b17023SJohn Marino
3296e4b17023SJohn Marino gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3297e4b17023SJohn Marino
3298e4b17023SJohn Marino data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3299e4b17023SJohn Marino base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3300e4b17023SJohn Marino init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3301e4b17023SJohn Marino }
3302e4b17023SJohn Marino
3303e4b17023SJohn Marino if (loop_vinfo)
3304e4b17023SJohn Marino base_name = build_fold_indirect_ref (data_ref_base);
3305e4b17023SJohn Marino else
3306e4b17023SJohn Marino {
3307e4b17023SJohn Marino base_offset = ssize_int (0);
3308e4b17023SJohn Marino init = ssize_int (0);
3309e4b17023SJohn Marino base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3310e4b17023SJohn Marino }
3311e4b17023SJohn Marino
3312e4b17023SJohn Marino data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3313e4b17023SJohn Marino add_referenced_var (data_ref_base_var);
3314e4b17023SJohn Marino data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3315e4b17023SJohn Marino data_ref_base_var);
3316e4b17023SJohn Marino gimple_seq_add_seq (new_stmt_list, seq);
3317e4b17023SJohn Marino
3318e4b17023SJohn Marino /* Create base_offset */
3319e4b17023SJohn Marino base_offset = size_binop (PLUS_EXPR,
3320e4b17023SJohn Marino fold_convert (sizetype, base_offset),
3321e4b17023SJohn Marino fold_convert (sizetype, init));
3322e4b17023SJohn Marino dest = create_tmp_var (sizetype, "base_off");
3323e4b17023SJohn Marino add_referenced_var (dest);
3324e4b17023SJohn Marino base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3325e4b17023SJohn Marino gimple_seq_add_seq (new_stmt_list, seq);
3326e4b17023SJohn Marino
3327e4b17023SJohn Marino if (offset)
3328e4b17023SJohn Marino {
3329e4b17023SJohn Marino tree tmp = create_tmp_var (sizetype, "offset");
3330e4b17023SJohn Marino
3331e4b17023SJohn Marino add_referenced_var (tmp);
3332e4b17023SJohn Marino offset = fold_build2 (MULT_EXPR, sizetype,
3333e4b17023SJohn Marino fold_convert (sizetype, offset), step);
3334e4b17023SJohn Marino base_offset = fold_build2 (PLUS_EXPR, sizetype,
3335e4b17023SJohn Marino base_offset, offset);
3336e4b17023SJohn Marino base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3337e4b17023SJohn Marino gimple_seq_add_seq (new_stmt_list, seq);
3338e4b17023SJohn Marino }
3339e4b17023SJohn Marino
3340e4b17023SJohn Marino /* base + base_offset */
3341e4b17023SJohn Marino if (loop_vinfo)
3342e4b17023SJohn Marino addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3343e4b17023SJohn Marino else
3344e4b17023SJohn Marino {
3345e4b17023SJohn Marino addr_base = build1 (ADDR_EXPR,
3346e4b17023SJohn Marino build_pointer_type (TREE_TYPE (DR_REF (dr))),
3347e4b17023SJohn Marino unshare_expr (DR_REF (dr)));
3348e4b17023SJohn Marino }
3349e4b17023SJohn Marino
3350e4b17023SJohn Marino vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3351e4b17023SJohn Marino base = get_base_address (DR_REF (dr));
3352e4b17023SJohn Marino if (base
3353e4b17023SJohn Marino && TREE_CODE (base) == MEM_REF)
3354e4b17023SJohn Marino vect_ptr_type
3355e4b17023SJohn Marino = build_qualified_type (vect_ptr_type,
3356e4b17023SJohn Marino TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3357e4b17023SJohn Marino
3358e4b17023SJohn Marino vec_stmt = fold_convert (vect_ptr_type, addr_base);
3359e4b17023SJohn Marino addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3360e4b17023SJohn Marino get_name (base_name));
3361e4b17023SJohn Marino add_referenced_var (addr_expr);
3362e4b17023SJohn Marino vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3363e4b17023SJohn Marino gimple_seq_add_seq (new_stmt_list, seq);
3364e4b17023SJohn Marino
3365e4b17023SJohn Marino if (DR_PTR_INFO (dr)
3366e4b17023SJohn Marino && TREE_CODE (vec_stmt) == SSA_NAME)
3367e4b17023SJohn Marino {
3368e4b17023SJohn Marino duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3369e4b17023SJohn Marino if (offset)
3370e4b17023SJohn Marino {
3371e4b17023SJohn Marino SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
3372e4b17023SJohn Marino SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
3373e4b17023SJohn Marino }
3374e4b17023SJohn Marino }
3375e4b17023SJohn Marino
3376e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3377e4b17023SJohn Marino {
3378e4b17023SJohn Marino fprintf (vect_dump, "created ");
3379e4b17023SJohn Marino print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3380e4b17023SJohn Marino }
3381e4b17023SJohn Marino
3382e4b17023SJohn Marino return vec_stmt;
3383e4b17023SJohn Marino }
3384e4b17023SJohn Marino
3385e4b17023SJohn Marino
3386e4b17023SJohn Marino /* Function vect_create_data_ref_ptr.
3387e4b17023SJohn Marino
3388e4b17023SJohn Marino Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3389e4b17023SJohn Marino location accessed in the loop by STMT, along with the def-use update
3390e4b17023SJohn Marino chain to appropriately advance the pointer through the loop iterations.
3391e4b17023SJohn Marino Also set aliasing information for the pointer. This pointer is used by
3392e4b17023SJohn Marino the callers to this function to create a memory reference expression for
3393e4b17023SJohn Marino vector load/store access.
3394e4b17023SJohn Marino
3395e4b17023SJohn Marino Input:
3396e4b17023SJohn Marino 1. STMT: a stmt that references memory. Expected to be of the form
3397e4b17023SJohn Marino GIMPLE_ASSIGN <name, data-ref> or
3398e4b17023SJohn Marino GIMPLE_ASSIGN <data-ref, name>.
3399e4b17023SJohn Marino 2. AGGR_TYPE: the type of the reference, which should be either a vector
3400e4b17023SJohn Marino or an array.
3401e4b17023SJohn Marino 3. AT_LOOP: the loop where the vector memref is to be created.
3402e4b17023SJohn Marino 4. OFFSET (optional): an offset to be added to the initial address accessed
3403e4b17023SJohn Marino by the data-ref in STMT.
3404e4b17023SJohn Marino 5. BSI: location where the new stmts are to be placed if there is no loop
3405e4b17023SJohn Marino 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3406e4b17023SJohn Marino pointing to the initial address.
3407e4b17023SJohn Marino
3408e4b17023SJohn Marino Output:
3409e4b17023SJohn Marino 1. Declare a new ptr to vector_type, and have it point to the base of the
3410e4b17023SJohn Marino data reference (initial addressed accessed by the data reference).
3411e4b17023SJohn Marino For example, for vector of type V8HI, the following code is generated:
3412e4b17023SJohn Marino
3413e4b17023SJohn Marino v8hi *ap;
3414e4b17023SJohn Marino ap = (v8hi *)initial_address;
3415e4b17023SJohn Marino
3416e4b17023SJohn Marino if OFFSET is not supplied:
3417e4b17023SJohn Marino initial_address = &a[init];
3418e4b17023SJohn Marino if OFFSET is supplied:
3419e4b17023SJohn Marino initial_address = &a[init + OFFSET];
3420e4b17023SJohn Marino
3421e4b17023SJohn Marino Return the initial_address in INITIAL_ADDRESS.
3422e4b17023SJohn Marino
3423e4b17023SJohn Marino 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3424e4b17023SJohn Marino update the pointer in each iteration of the loop.
3425e4b17023SJohn Marino
3426e4b17023SJohn Marino Return the increment stmt that updates the pointer in PTR_INCR.
3427e4b17023SJohn Marino
3428e4b17023SJohn Marino 3. Set INV_P to true if the access pattern of the data reference in the
3429e4b17023SJohn Marino vectorized loop is invariant. Set it to false otherwise.
3430e4b17023SJohn Marino
3431e4b17023SJohn Marino 4. Return the pointer. */
3432e4b17023SJohn Marino
3433e4b17023SJohn Marino tree
vect_create_data_ref_ptr(gimple stmt,tree aggr_type,struct loop * at_loop,tree offset,tree * initial_address,gimple_stmt_iterator * gsi,gimple * ptr_incr,bool only_init,bool * inv_p)3434e4b17023SJohn Marino vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3435e4b17023SJohn Marino tree offset, tree *initial_address,
3436e4b17023SJohn Marino gimple_stmt_iterator *gsi, gimple *ptr_incr,
3437e4b17023SJohn Marino bool only_init, bool *inv_p)
3438e4b17023SJohn Marino {
3439e4b17023SJohn Marino tree base_name;
3440e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3441e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3442e4b17023SJohn Marino struct loop *loop = NULL;
3443e4b17023SJohn Marino bool nested_in_vect_loop = false;
3444e4b17023SJohn Marino struct loop *containing_loop = NULL;
3445e4b17023SJohn Marino tree aggr_ptr_type;
3446e4b17023SJohn Marino tree aggr_ptr;
3447e4b17023SJohn Marino tree new_temp;
3448e4b17023SJohn Marino gimple vec_stmt;
3449e4b17023SJohn Marino gimple_seq new_stmt_list = NULL;
3450e4b17023SJohn Marino edge pe = NULL;
3451e4b17023SJohn Marino basic_block new_bb;
3452e4b17023SJohn Marino tree aggr_ptr_init;
3453e4b17023SJohn Marino struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3454e4b17023SJohn Marino tree aptr;
3455e4b17023SJohn Marino gimple_stmt_iterator incr_gsi;
3456e4b17023SJohn Marino bool insert_after;
3457e4b17023SJohn Marino bool negative;
3458e4b17023SJohn Marino tree indx_before_incr, indx_after_incr;
3459e4b17023SJohn Marino gimple incr;
3460e4b17023SJohn Marino tree step;
3461e4b17023SJohn Marino bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3462e4b17023SJohn Marino tree base;
3463e4b17023SJohn Marino
3464e4b17023SJohn Marino gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3465e4b17023SJohn Marino || TREE_CODE (aggr_type) == VECTOR_TYPE);
3466e4b17023SJohn Marino
3467e4b17023SJohn Marino if (loop_vinfo)
3468e4b17023SJohn Marino {
3469e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
3470e4b17023SJohn Marino nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3471e4b17023SJohn Marino containing_loop = (gimple_bb (stmt))->loop_father;
3472e4b17023SJohn Marino pe = loop_preheader_edge (loop);
3473e4b17023SJohn Marino }
3474e4b17023SJohn Marino else
3475e4b17023SJohn Marino {
3476e4b17023SJohn Marino gcc_assert (bb_vinfo);
3477e4b17023SJohn Marino only_init = true;
3478e4b17023SJohn Marino *ptr_incr = NULL;
3479e4b17023SJohn Marino }
3480e4b17023SJohn Marino
3481e4b17023SJohn Marino /* Check the step (evolution) of the load in LOOP, and record
3482e4b17023SJohn Marino whether it's invariant. */
3483e4b17023SJohn Marino if (nested_in_vect_loop)
3484e4b17023SJohn Marino step = STMT_VINFO_DR_STEP (stmt_info);
3485e4b17023SJohn Marino else
3486e4b17023SJohn Marino step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3487e4b17023SJohn Marino
3488e4b17023SJohn Marino if (tree_int_cst_compare (step, size_zero_node) == 0)
3489e4b17023SJohn Marino *inv_p = true;
3490e4b17023SJohn Marino else
3491e4b17023SJohn Marino *inv_p = false;
3492e4b17023SJohn Marino negative = tree_int_cst_compare (step, size_zero_node) < 0;
3493e4b17023SJohn Marino
3494e4b17023SJohn Marino /* Create an expression for the first address accessed by this load
3495e4b17023SJohn Marino in LOOP. */
3496e4b17023SJohn Marino base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3497e4b17023SJohn Marino
3498e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3499e4b17023SJohn Marino {
3500e4b17023SJohn Marino tree data_ref_base = base_name;
3501e4b17023SJohn Marino fprintf (vect_dump, "create %s-pointer variable to type: ",
3502e4b17023SJohn Marino tree_code_name[(int) TREE_CODE (aggr_type)]);
3503e4b17023SJohn Marino print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3504e4b17023SJohn Marino if (TREE_CODE (data_ref_base) == VAR_DECL
3505e4b17023SJohn Marino || TREE_CODE (data_ref_base) == ARRAY_REF)
3506e4b17023SJohn Marino fprintf (vect_dump, " vectorizing an array ref: ");
3507e4b17023SJohn Marino else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3508e4b17023SJohn Marino fprintf (vect_dump, " vectorizing a record based array ref: ");
3509e4b17023SJohn Marino else if (TREE_CODE (data_ref_base) == SSA_NAME)
3510e4b17023SJohn Marino fprintf (vect_dump, " vectorizing a pointer ref: ");
3511e4b17023SJohn Marino print_generic_expr (vect_dump, base_name, TDF_SLIM);
3512e4b17023SJohn Marino }
3513e4b17023SJohn Marino
3514e4b17023SJohn Marino /* (1) Create the new aggregate-pointer variable. */
3515e4b17023SJohn Marino aggr_ptr_type = build_pointer_type (aggr_type);
3516e4b17023SJohn Marino base = get_base_address (DR_REF (dr));
3517e4b17023SJohn Marino if (base
3518e4b17023SJohn Marino && TREE_CODE (base) == MEM_REF)
3519e4b17023SJohn Marino aggr_ptr_type
3520e4b17023SJohn Marino = build_qualified_type (aggr_ptr_type,
3521e4b17023SJohn Marino TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3522e4b17023SJohn Marino aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3523e4b17023SJohn Marino get_name (base_name));
3524e4b17023SJohn Marino
3525e4b17023SJohn Marino /* Vector and array types inherit the alias set of their component
3526e4b17023SJohn Marino type by default so we need to use a ref-all pointer if the data
3527e4b17023SJohn Marino reference does not conflict with the created aggregated data
3528e4b17023SJohn Marino reference because it is not addressable. */
3529e4b17023SJohn Marino if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3530e4b17023SJohn Marino get_alias_set (DR_REF (dr))))
3531e4b17023SJohn Marino {
3532e4b17023SJohn Marino aggr_ptr_type
3533e4b17023SJohn Marino = build_pointer_type_for_mode (aggr_type,
3534e4b17023SJohn Marino TYPE_MODE (aggr_ptr_type), true);
3535e4b17023SJohn Marino aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3536e4b17023SJohn Marino get_name (base_name));
3537e4b17023SJohn Marino }
3538e4b17023SJohn Marino
3539e4b17023SJohn Marino /* Likewise for any of the data references in the stmt group. */
3540e4b17023SJohn Marino else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3541e4b17023SJohn Marino {
3542e4b17023SJohn Marino gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3543e4b17023SJohn Marino do
3544e4b17023SJohn Marino {
3545e4b17023SJohn Marino tree lhs = gimple_assign_lhs (orig_stmt);
3546e4b17023SJohn Marino if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3547e4b17023SJohn Marino get_alias_set (lhs)))
3548e4b17023SJohn Marino {
3549e4b17023SJohn Marino aggr_ptr_type
3550e4b17023SJohn Marino = build_pointer_type_for_mode (aggr_type,
3551e4b17023SJohn Marino TYPE_MODE (aggr_ptr_type), true);
3552e4b17023SJohn Marino aggr_ptr
3553e4b17023SJohn Marino = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3554e4b17023SJohn Marino get_name (base_name));
3555e4b17023SJohn Marino break;
3556e4b17023SJohn Marino }
3557e4b17023SJohn Marino
3558e4b17023SJohn Marino orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3559e4b17023SJohn Marino }
3560e4b17023SJohn Marino while (orig_stmt);
3561e4b17023SJohn Marino }
3562e4b17023SJohn Marino
3563e4b17023SJohn Marino add_referenced_var (aggr_ptr);
3564e4b17023SJohn Marino
3565e4b17023SJohn Marino /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3566e4b17023SJohn Marino vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3567e4b17023SJohn Marino def-use update cycles for the pointer: one relative to the outer-loop
3568e4b17023SJohn Marino (LOOP), which is what steps (3) and (4) below do. The other is relative
3569e4b17023SJohn Marino to the inner-loop (which is the inner-most loop containing the dataref),
3570e4b17023SJohn Marino and this is done be step (5) below.
3571e4b17023SJohn Marino
3572e4b17023SJohn Marino When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3573e4b17023SJohn Marino inner-most loop, and so steps (3),(4) work the same, and step (5) is
3574e4b17023SJohn Marino redundant. Steps (3),(4) create the following:
3575e4b17023SJohn Marino
3576e4b17023SJohn Marino vp0 = &base_addr;
3577e4b17023SJohn Marino LOOP: vp1 = phi(vp0,vp2)
3578e4b17023SJohn Marino ...
3579e4b17023SJohn Marino ...
3580e4b17023SJohn Marino vp2 = vp1 + step
3581e4b17023SJohn Marino goto LOOP
3582e4b17023SJohn Marino
3583e4b17023SJohn Marino If there is an inner-loop nested in loop, then step (5) will also be
3584e4b17023SJohn Marino applied, and an additional update in the inner-loop will be created:
3585e4b17023SJohn Marino
3586e4b17023SJohn Marino vp0 = &base_addr;
3587e4b17023SJohn Marino LOOP: vp1 = phi(vp0,vp2)
3588e4b17023SJohn Marino ...
3589e4b17023SJohn Marino inner: vp3 = phi(vp1,vp4)
3590e4b17023SJohn Marino vp4 = vp3 + inner_step
3591e4b17023SJohn Marino if () goto inner
3592e4b17023SJohn Marino ...
3593e4b17023SJohn Marino vp2 = vp1 + step
3594e4b17023SJohn Marino if () goto LOOP */
3595e4b17023SJohn Marino
3596e4b17023SJohn Marino /* (2) Calculate the initial address of the aggregate-pointer, and set
3597e4b17023SJohn Marino the aggregate-pointer to point to it before the loop. */
3598e4b17023SJohn Marino
3599e4b17023SJohn Marino /* Create: (&(base[init_val+offset]) in the loop preheader. */
3600e4b17023SJohn Marino
3601e4b17023SJohn Marino new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3602e4b17023SJohn Marino offset, loop);
3603e4b17023SJohn Marino if (new_stmt_list)
3604e4b17023SJohn Marino {
3605e4b17023SJohn Marino if (pe)
3606e4b17023SJohn Marino {
3607e4b17023SJohn Marino new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3608e4b17023SJohn Marino gcc_assert (!new_bb);
3609e4b17023SJohn Marino }
3610e4b17023SJohn Marino else
3611e4b17023SJohn Marino gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3612e4b17023SJohn Marino }
3613e4b17023SJohn Marino
3614e4b17023SJohn Marino *initial_address = new_temp;
3615e4b17023SJohn Marino
3616e4b17023SJohn Marino /* Create: p = (aggr_type *) initial_base */
3617e4b17023SJohn Marino if (TREE_CODE (new_temp) != SSA_NAME
3618e4b17023SJohn Marino || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3619e4b17023SJohn Marino {
3620e4b17023SJohn Marino vec_stmt = gimple_build_assign (aggr_ptr,
3621e4b17023SJohn Marino fold_convert (aggr_ptr_type, new_temp));
3622e4b17023SJohn Marino aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3623e4b17023SJohn Marino /* Copy the points-to information if it exists. */
3624e4b17023SJohn Marino if (DR_PTR_INFO (dr))
3625e4b17023SJohn Marino duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3626e4b17023SJohn Marino gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3627e4b17023SJohn Marino if (pe)
3628e4b17023SJohn Marino {
3629e4b17023SJohn Marino new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3630e4b17023SJohn Marino gcc_assert (!new_bb);
3631e4b17023SJohn Marino }
3632e4b17023SJohn Marino else
3633e4b17023SJohn Marino gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3634e4b17023SJohn Marino }
3635e4b17023SJohn Marino else
3636e4b17023SJohn Marino aggr_ptr_init = new_temp;
3637e4b17023SJohn Marino
3638e4b17023SJohn Marino /* (3) Handle the updating of the aggregate-pointer inside the loop.
3639e4b17023SJohn Marino This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3640e4b17023SJohn Marino inner-loop nested in LOOP (during outer-loop vectorization). */
3641e4b17023SJohn Marino
3642e4b17023SJohn Marino /* No update in loop is required. */
3643e4b17023SJohn Marino if (only_init && (!loop_vinfo || at_loop == loop))
3644e4b17023SJohn Marino aptr = aggr_ptr_init;
3645e4b17023SJohn Marino else
3646e4b17023SJohn Marino {
3647e4b17023SJohn Marino /* The step of the aggregate pointer is the type size. */
3648e4b17023SJohn Marino tree step = TYPE_SIZE_UNIT (aggr_type);
3649e4b17023SJohn Marino /* One exception to the above is when the scalar step of the load in
3650e4b17023SJohn Marino LOOP is zero. In this case the step here is also zero. */
3651e4b17023SJohn Marino if (*inv_p)
3652e4b17023SJohn Marino step = size_zero_node;
3653e4b17023SJohn Marino else if (negative)
3654e4b17023SJohn Marino step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3655e4b17023SJohn Marino
3656e4b17023SJohn Marino standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3657e4b17023SJohn Marino
3658e4b17023SJohn Marino create_iv (aggr_ptr_init,
3659e4b17023SJohn Marino fold_convert (aggr_ptr_type, step),
3660e4b17023SJohn Marino aggr_ptr, loop, &incr_gsi, insert_after,
3661e4b17023SJohn Marino &indx_before_incr, &indx_after_incr);
3662e4b17023SJohn Marino incr = gsi_stmt (incr_gsi);
3663e4b17023SJohn Marino set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3664e4b17023SJohn Marino
3665e4b17023SJohn Marino /* Copy the points-to information if it exists. */
3666e4b17023SJohn Marino if (DR_PTR_INFO (dr))
3667e4b17023SJohn Marino {
3668e4b17023SJohn Marino duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3669e4b17023SJohn Marino duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3670e4b17023SJohn Marino }
3671e4b17023SJohn Marino if (ptr_incr)
3672e4b17023SJohn Marino *ptr_incr = incr;
3673e4b17023SJohn Marino
3674e4b17023SJohn Marino aptr = indx_before_incr;
3675e4b17023SJohn Marino }
3676e4b17023SJohn Marino
3677e4b17023SJohn Marino if (!nested_in_vect_loop || only_init)
3678e4b17023SJohn Marino return aptr;
3679e4b17023SJohn Marino
3680e4b17023SJohn Marino
3681e4b17023SJohn Marino /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3682e4b17023SJohn Marino nested in LOOP, if exists. */
3683e4b17023SJohn Marino
3684e4b17023SJohn Marino gcc_assert (nested_in_vect_loop);
3685e4b17023SJohn Marino if (!only_init)
3686e4b17023SJohn Marino {
3687e4b17023SJohn Marino standard_iv_increment_position (containing_loop, &incr_gsi,
3688e4b17023SJohn Marino &insert_after);
3689e4b17023SJohn Marino create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3690e4b17023SJohn Marino containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3691e4b17023SJohn Marino &indx_after_incr);
3692e4b17023SJohn Marino incr = gsi_stmt (incr_gsi);
3693e4b17023SJohn Marino set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3694e4b17023SJohn Marino
3695e4b17023SJohn Marino /* Copy the points-to information if it exists. */
3696e4b17023SJohn Marino if (DR_PTR_INFO (dr))
3697e4b17023SJohn Marino {
3698e4b17023SJohn Marino duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3699e4b17023SJohn Marino duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3700e4b17023SJohn Marino }
3701e4b17023SJohn Marino if (ptr_incr)
3702e4b17023SJohn Marino *ptr_incr = incr;
3703e4b17023SJohn Marino
3704e4b17023SJohn Marino return indx_before_incr;
3705e4b17023SJohn Marino }
3706e4b17023SJohn Marino else
3707e4b17023SJohn Marino gcc_unreachable ();
3708e4b17023SJohn Marino }
3709e4b17023SJohn Marino
3710e4b17023SJohn Marino
3711e4b17023SJohn Marino /* Function bump_vector_ptr
3712e4b17023SJohn Marino
3713e4b17023SJohn Marino Increment a pointer (to a vector type) by vector-size. If requested,
3714e4b17023SJohn Marino i.e. if PTR-INCR is given, then also connect the new increment stmt
3715e4b17023SJohn Marino to the existing def-use update-chain of the pointer, by modifying
3716e4b17023SJohn Marino the PTR_INCR as illustrated below:
3717e4b17023SJohn Marino
3718e4b17023SJohn Marino The pointer def-use update-chain before this function:
3719e4b17023SJohn Marino DATAREF_PTR = phi (p_0, p_2)
3720e4b17023SJohn Marino ....
3721e4b17023SJohn Marino PTR_INCR: p_2 = DATAREF_PTR + step
3722e4b17023SJohn Marino
3723e4b17023SJohn Marino The pointer def-use update-chain after this function:
3724e4b17023SJohn Marino DATAREF_PTR = phi (p_0, p_2)
3725e4b17023SJohn Marino ....
3726e4b17023SJohn Marino NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3727e4b17023SJohn Marino ....
3728e4b17023SJohn Marino PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3729e4b17023SJohn Marino
3730e4b17023SJohn Marino Input:
3731e4b17023SJohn Marino DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3732e4b17023SJohn Marino in the loop.
3733e4b17023SJohn Marino PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3734e4b17023SJohn Marino the loop. The increment amount across iterations is expected
3735e4b17023SJohn Marino to be vector_size.
3736e4b17023SJohn Marino BSI - location where the new update stmt is to be placed.
3737e4b17023SJohn Marino STMT - the original scalar memory-access stmt that is being vectorized.
3738e4b17023SJohn Marino BUMP - optional. The offset by which to bump the pointer. If not given,
3739e4b17023SJohn Marino the offset is assumed to be vector_size.
3740e4b17023SJohn Marino
3741e4b17023SJohn Marino Output: Return NEW_DATAREF_PTR as illustrated above.
3742e4b17023SJohn Marino
3743e4b17023SJohn Marino */
3744e4b17023SJohn Marino
3745e4b17023SJohn Marino tree
bump_vector_ptr(tree dataref_ptr,gimple ptr_incr,gimple_stmt_iterator * gsi,gimple stmt,tree bump)3746e4b17023SJohn Marino bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3747e4b17023SJohn Marino gimple stmt, tree bump)
3748e4b17023SJohn Marino {
3749e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3750e4b17023SJohn Marino struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3751e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3752e4b17023SJohn Marino tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3753e4b17023SJohn Marino tree update = TYPE_SIZE_UNIT (vectype);
3754e4b17023SJohn Marino gimple incr_stmt;
3755e4b17023SJohn Marino ssa_op_iter iter;
3756e4b17023SJohn Marino use_operand_p use_p;
3757e4b17023SJohn Marino tree new_dataref_ptr;
3758e4b17023SJohn Marino
3759e4b17023SJohn Marino if (bump)
3760e4b17023SJohn Marino update = bump;
3761e4b17023SJohn Marino
3762e4b17023SJohn Marino incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3763e4b17023SJohn Marino dataref_ptr, update);
3764e4b17023SJohn Marino new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3765e4b17023SJohn Marino gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3766e4b17023SJohn Marino vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3767e4b17023SJohn Marino
3768e4b17023SJohn Marino /* Copy the points-to information if it exists. */
3769e4b17023SJohn Marino if (DR_PTR_INFO (dr))
3770e4b17023SJohn Marino {
3771e4b17023SJohn Marino duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3772e4b17023SJohn Marino SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3773e4b17023SJohn Marino SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3774e4b17023SJohn Marino }
3775e4b17023SJohn Marino
3776e4b17023SJohn Marino if (!ptr_incr)
3777e4b17023SJohn Marino return new_dataref_ptr;
3778e4b17023SJohn Marino
3779e4b17023SJohn Marino /* Update the vector-pointer's cross-iteration increment. */
3780e4b17023SJohn Marino FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3781e4b17023SJohn Marino {
3782e4b17023SJohn Marino tree use = USE_FROM_PTR (use_p);
3783e4b17023SJohn Marino
3784e4b17023SJohn Marino if (use == dataref_ptr)
3785e4b17023SJohn Marino SET_USE (use_p, new_dataref_ptr);
3786e4b17023SJohn Marino else
3787e4b17023SJohn Marino gcc_assert (tree_int_cst_compare (use, update) == 0);
3788e4b17023SJohn Marino }
3789e4b17023SJohn Marino
3790e4b17023SJohn Marino return new_dataref_ptr;
3791e4b17023SJohn Marino }
3792e4b17023SJohn Marino
3793e4b17023SJohn Marino
3794e4b17023SJohn Marino /* Function vect_create_destination_var.
3795e4b17023SJohn Marino
3796e4b17023SJohn Marino Create a new temporary of type VECTYPE. */
3797e4b17023SJohn Marino
3798e4b17023SJohn Marino tree
vect_create_destination_var(tree scalar_dest,tree vectype)3799e4b17023SJohn Marino vect_create_destination_var (tree scalar_dest, tree vectype)
3800e4b17023SJohn Marino {
3801e4b17023SJohn Marino tree vec_dest;
3802e4b17023SJohn Marino const char *new_name;
3803e4b17023SJohn Marino tree type;
3804e4b17023SJohn Marino enum vect_var_kind kind;
3805e4b17023SJohn Marino
3806e4b17023SJohn Marino kind = vectype ? vect_simple_var : vect_scalar_var;
3807e4b17023SJohn Marino type = vectype ? vectype : TREE_TYPE (scalar_dest);
3808e4b17023SJohn Marino
3809e4b17023SJohn Marino gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3810e4b17023SJohn Marino
3811e4b17023SJohn Marino new_name = get_name (scalar_dest);
3812e4b17023SJohn Marino if (!new_name)
3813e4b17023SJohn Marino new_name = "var_";
3814e4b17023SJohn Marino vec_dest = vect_get_new_vect_var (type, kind, new_name);
3815e4b17023SJohn Marino add_referenced_var (vec_dest);
3816e4b17023SJohn Marino
3817e4b17023SJohn Marino return vec_dest;
3818e4b17023SJohn Marino }
3819e4b17023SJohn Marino
3820e4b17023SJohn Marino /* Function vect_strided_store_supported.
3821e4b17023SJohn Marino
3822e4b17023SJohn Marino Returns TRUE if interleave high and interleave low permutations
3823e4b17023SJohn Marino are supported, and FALSE otherwise. */
3824e4b17023SJohn Marino
3825e4b17023SJohn Marino bool
vect_strided_store_supported(tree vectype,unsigned HOST_WIDE_INT count)3826e4b17023SJohn Marino vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3827e4b17023SJohn Marino {
3828e4b17023SJohn Marino enum machine_mode mode = TYPE_MODE (vectype);
3829e4b17023SJohn Marino
3830e4b17023SJohn Marino /* vect_permute_store_chain requires the group size to be a power of two. */
3831e4b17023SJohn Marino if (exact_log2 (count) == -1)
3832e4b17023SJohn Marino {
3833e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3834e4b17023SJohn Marino fprintf (vect_dump, "the size of the group of strided accesses"
3835e4b17023SJohn Marino " is not a power of 2");
3836e4b17023SJohn Marino return false;
3837e4b17023SJohn Marino }
3838e4b17023SJohn Marino
3839e4b17023SJohn Marino /* Check that the permutation is supported. */
3840e4b17023SJohn Marino if (VECTOR_MODE_P (mode))
3841e4b17023SJohn Marino {
3842e4b17023SJohn Marino unsigned int i, nelt = GET_MODE_NUNITS (mode);
3843e4b17023SJohn Marino unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3844e4b17023SJohn Marino for (i = 0; i < nelt / 2; i++)
3845e4b17023SJohn Marino {
3846e4b17023SJohn Marino sel[i * 2] = i;
3847e4b17023SJohn Marino sel[i * 2 + 1] = i + nelt;
3848e4b17023SJohn Marino }
3849e4b17023SJohn Marino if (can_vec_perm_p (mode, false, sel))
3850e4b17023SJohn Marino {
3851e4b17023SJohn Marino for (i = 0; i < nelt; i++)
3852e4b17023SJohn Marino sel[i] += nelt / 2;
3853e4b17023SJohn Marino if (can_vec_perm_p (mode, false, sel))
3854e4b17023SJohn Marino return true;
3855e4b17023SJohn Marino }
3856e4b17023SJohn Marino }
3857e4b17023SJohn Marino
3858e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
3859e4b17023SJohn Marino fprintf (vect_dump, "interleave op not supported by target.");
3860e4b17023SJohn Marino return false;
3861e4b17023SJohn Marino }
3862e4b17023SJohn Marino
3863e4b17023SJohn Marino
3864e4b17023SJohn Marino /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3865e4b17023SJohn Marino type VECTYPE. */
3866e4b17023SJohn Marino
3867e4b17023SJohn Marino bool
vect_store_lanes_supported(tree vectype,unsigned HOST_WIDE_INT count)3868e4b17023SJohn Marino vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3869e4b17023SJohn Marino {
3870e4b17023SJohn Marino return vect_lanes_optab_supported_p ("vec_store_lanes",
3871e4b17023SJohn Marino vec_store_lanes_optab,
3872e4b17023SJohn Marino vectype, count);
3873e4b17023SJohn Marino }
3874e4b17023SJohn Marino
3875e4b17023SJohn Marino
3876e4b17023SJohn Marino /* Function vect_permute_store_chain.
3877e4b17023SJohn Marino
3878e4b17023SJohn Marino Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3879e4b17023SJohn Marino a power of 2, generate interleave_high/low stmts to reorder the data
3880e4b17023SJohn Marino correctly for the stores. Return the final references for stores in
3881e4b17023SJohn Marino RESULT_CHAIN.
3882e4b17023SJohn Marino
3883e4b17023SJohn Marino E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3884e4b17023SJohn Marino The input is 4 vectors each containing 8 elements. We assign a number to
3885e4b17023SJohn Marino each element, the input sequence is:
3886e4b17023SJohn Marino
3887e4b17023SJohn Marino 1st vec: 0 1 2 3 4 5 6 7
3888e4b17023SJohn Marino 2nd vec: 8 9 10 11 12 13 14 15
3889e4b17023SJohn Marino 3rd vec: 16 17 18 19 20 21 22 23
3890e4b17023SJohn Marino 4th vec: 24 25 26 27 28 29 30 31
3891e4b17023SJohn Marino
3892e4b17023SJohn Marino The output sequence should be:
3893e4b17023SJohn Marino
3894e4b17023SJohn Marino 1st vec: 0 8 16 24 1 9 17 25
3895e4b17023SJohn Marino 2nd vec: 2 10 18 26 3 11 19 27
3896e4b17023SJohn Marino 3rd vec: 4 12 20 28 5 13 21 30
3897e4b17023SJohn Marino 4th vec: 6 14 22 30 7 15 23 31
3898e4b17023SJohn Marino
3899e4b17023SJohn Marino i.e., we interleave the contents of the four vectors in their order.
3900e4b17023SJohn Marino
3901e4b17023SJohn Marino We use interleave_high/low instructions to create such output. The input of
3902e4b17023SJohn Marino each interleave_high/low operation is two vectors:
3903e4b17023SJohn Marino 1st vec 2nd vec
3904e4b17023SJohn Marino 0 1 2 3 4 5 6 7
3905e4b17023SJohn Marino the even elements of the result vector are obtained left-to-right from the
3906e4b17023SJohn Marino high/low elements of the first vector. The odd elements of the result are
3907e4b17023SJohn Marino obtained left-to-right from the high/low elements of the second vector.
3908e4b17023SJohn Marino The output of interleave_high will be: 0 4 1 5
3909e4b17023SJohn Marino and of interleave_low: 2 6 3 7
3910e4b17023SJohn Marino
3911e4b17023SJohn Marino
3912e4b17023SJohn Marino The permutation is done in log LENGTH stages. In each stage interleave_high
3913e4b17023SJohn Marino and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3914e4b17023SJohn Marino where the first argument is taken from the first half of DR_CHAIN and the
3915e4b17023SJohn Marino second argument from it's second half.
3916e4b17023SJohn Marino In our example,
3917e4b17023SJohn Marino
3918e4b17023SJohn Marino I1: interleave_high (1st vec, 3rd vec)
3919e4b17023SJohn Marino I2: interleave_low (1st vec, 3rd vec)
3920e4b17023SJohn Marino I3: interleave_high (2nd vec, 4th vec)
3921e4b17023SJohn Marino I4: interleave_low (2nd vec, 4th vec)
3922e4b17023SJohn Marino
3923e4b17023SJohn Marino The output for the first stage is:
3924e4b17023SJohn Marino
3925e4b17023SJohn Marino I1: 0 16 1 17 2 18 3 19
3926e4b17023SJohn Marino I2: 4 20 5 21 6 22 7 23
3927e4b17023SJohn Marino I3: 8 24 9 25 10 26 11 27
3928e4b17023SJohn Marino I4: 12 28 13 29 14 30 15 31
3929e4b17023SJohn Marino
3930e4b17023SJohn Marino The output of the second stage, i.e. the final result is:
3931e4b17023SJohn Marino
3932e4b17023SJohn Marino I1: 0 8 16 24 1 9 17 25
3933e4b17023SJohn Marino I2: 2 10 18 26 3 11 19 27
3934e4b17023SJohn Marino I3: 4 12 20 28 5 13 21 30
3935e4b17023SJohn Marino I4: 6 14 22 30 7 15 23 31. */
3936e4b17023SJohn Marino
3937e4b17023SJohn Marino void
vect_permute_store_chain(VEC (tree,heap)* dr_chain,unsigned int length,gimple stmt,gimple_stmt_iterator * gsi,VEC (tree,heap)** result_chain)3938e4b17023SJohn Marino vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3939e4b17023SJohn Marino unsigned int length,
3940e4b17023SJohn Marino gimple stmt,
3941e4b17023SJohn Marino gimple_stmt_iterator *gsi,
3942e4b17023SJohn Marino VEC(tree,heap) **result_chain)
3943e4b17023SJohn Marino {
3944e4b17023SJohn Marino tree perm_dest, vect1, vect2, high, low;
3945e4b17023SJohn Marino gimple perm_stmt;
3946e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3947e4b17023SJohn Marino tree perm_mask_low, perm_mask_high;
3948e4b17023SJohn Marino unsigned int i, n;
3949e4b17023SJohn Marino unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
3950e4b17023SJohn Marino unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3951e4b17023SJohn Marino
3952e4b17023SJohn Marino *result_chain = VEC_copy (tree, heap, dr_chain);
3953e4b17023SJohn Marino
3954e4b17023SJohn Marino for (i = 0, n = nelt / 2; i < n; i++)
3955e4b17023SJohn Marino {
3956e4b17023SJohn Marino sel[i * 2] = i;
3957e4b17023SJohn Marino sel[i * 2 + 1] = i + nelt;
3958e4b17023SJohn Marino }
3959e4b17023SJohn Marino perm_mask_high = vect_gen_perm_mask (vectype, sel);
3960e4b17023SJohn Marino gcc_assert (perm_mask_high != NULL);
3961e4b17023SJohn Marino
3962e4b17023SJohn Marino for (i = 0; i < nelt; i++)
3963e4b17023SJohn Marino sel[i] += nelt / 2;
3964e4b17023SJohn Marino perm_mask_low = vect_gen_perm_mask (vectype, sel);
3965e4b17023SJohn Marino gcc_assert (perm_mask_low != NULL);
3966e4b17023SJohn Marino
3967e4b17023SJohn Marino for (i = 0, n = exact_log2 (length); i < n; i++)
3968e4b17023SJohn Marino {
3969e4b17023SJohn Marino for (j = 0; j < length/2; j++)
3970e4b17023SJohn Marino {
3971e4b17023SJohn Marino vect1 = VEC_index (tree, dr_chain, j);
3972e4b17023SJohn Marino vect2 = VEC_index (tree, dr_chain, j+length/2);
3973e4b17023SJohn Marino
3974e4b17023SJohn Marino /* Create interleaving stmt:
3975e4b17023SJohn Marino high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
3976e4b17023SJohn Marino perm_dest = create_tmp_var (vectype, "vect_inter_high");
3977e4b17023SJohn Marino DECL_GIMPLE_REG_P (perm_dest) = 1;
3978e4b17023SJohn Marino add_referenced_var (perm_dest);
3979e4b17023SJohn Marino high = make_ssa_name (perm_dest, NULL);
3980e4b17023SJohn Marino perm_stmt
3981e4b17023SJohn Marino = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
3982e4b17023SJohn Marino vect1, vect2, perm_mask_high);
3983e4b17023SJohn Marino vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3984e4b17023SJohn Marino VEC_replace (tree, *result_chain, 2*j, high);
3985e4b17023SJohn Marino
3986e4b17023SJohn Marino /* Create interleaving stmt:
3987e4b17023SJohn Marino low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
3988e4b17023SJohn Marino nelt*3/2+1, ...}> */
3989e4b17023SJohn Marino perm_dest = create_tmp_var (vectype, "vect_inter_low");
3990e4b17023SJohn Marino DECL_GIMPLE_REG_P (perm_dest) = 1;
3991e4b17023SJohn Marino add_referenced_var (perm_dest);
3992e4b17023SJohn Marino low = make_ssa_name (perm_dest, NULL);
3993e4b17023SJohn Marino perm_stmt
3994e4b17023SJohn Marino = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
3995e4b17023SJohn Marino vect1, vect2, perm_mask_low);
3996e4b17023SJohn Marino vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3997e4b17023SJohn Marino VEC_replace (tree, *result_chain, 2*j+1, low);
3998e4b17023SJohn Marino }
3999e4b17023SJohn Marino dr_chain = VEC_copy (tree, heap, *result_chain);
4000e4b17023SJohn Marino }
4001e4b17023SJohn Marino }
4002e4b17023SJohn Marino
4003e4b17023SJohn Marino /* Function vect_setup_realignment
4004e4b17023SJohn Marino
4005e4b17023SJohn Marino This function is called when vectorizing an unaligned load using
4006e4b17023SJohn Marino the dr_explicit_realign[_optimized] scheme.
4007e4b17023SJohn Marino This function generates the following code at the loop prolog:
4008e4b17023SJohn Marino
4009e4b17023SJohn Marino p = initial_addr;
4010e4b17023SJohn Marino x msq_init = *(floor(p)); # prolog load
4011e4b17023SJohn Marino realignment_token = call target_builtin;
4012e4b17023SJohn Marino loop:
4013e4b17023SJohn Marino x msq = phi (msq_init, ---)
4014e4b17023SJohn Marino
4015e4b17023SJohn Marino The stmts marked with x are generated only for the case of
4016e4b17023SJohn Marino dr_explicit_realign_optimized.
4017e4b17023SJohn Marino
4018e4b17023SJohn Marino The code above sets up a new (vector) pointer, pointing to the first
4019e4b17023SJohn Marino location accessed by STMT, and a "floor-aligned" load using that pointer.
4020e4b17023SJohn Marino It also generates code to compute the "realignment-token" (if the relevant
4021e4b17023SJohn Marino target hook was defined), and creates a phi-node at the loop-header bb
4022e4b17023SJohn Marino whose arguments are the result of the prolog-load (created by this
4023e4b17023SJohn Marino function) and the result of a load that takes place in the loop (to be
4024e4b17023SJohn Marino created by the caller to this function).
4025e4b17023SJohn Marino
4026e4b17023SJohn Marino For the case of dr_explicit_realign_optimized:
4027e4b17023SJohn Marino The caller to this function uses the phi-result (msq) to create the
4028e4b17023SJohn Marino realignment code inside the loop, and sets up the missing phi argument,
4029e4b17023SJohn Marino as follows:
4030e4b17023SJohn Marino loop:
4031e4b17023SJohn Marino msq = phi (msq_init, lsq)
4032e4b17023SJohn Marino lsq = *(floor(p')); # load in loop
4033e4b17023SJohn Marino result = realign_load (msq, lsq, realignment_token);
4034e4b17023SJohn Marino
4035e4b17023SJohn Marino For the case of dr_explicit_realign:
4036e4b17023SJohn Marino loop:
4037e4b17023SJohn Marino msq = *(floor(p)); # load in loop
4038e4b17023SJohn Marino p' = p + (VS-1);
4039e4b17023SJohn Marino lsq = *(floor(p')); # load in loop
4040e4b17023SJohn Marino result = realign_load (msq, lsq, realignment_token);
4041e4b17023SJohn Marino
4042e4b17023SJohn Marino Input:
4043e4b17023SJohn Marino STMT - (scalar) load stmt to be vectorized. This load accesses
4044e4b17023SJohn Marino a memory location that may be unaligned.
4045e4b17023SJohn Marino BSI - place where new code is to be inserted.
4046e4b17023SJohn Marino ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4047e4b17023SJohn Marino is used.
4048e4b17023SJohn Marino
4049e4b17023SJohn Marino Output:
4050e4b17023SJohn Marino REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4051e4b17023SJohn Marino target hook, if defined.
4052e4b17023SJohn Marino Return value - the result of the loop-header phi node. */
4053e4b17023SJohn Marino
4054e4b17023SJohn Marino tree
vect_setup_realignment(gimple stmt,gimple_stmt_iterator * gsi,tree * realignment_token,enum dr_alignment_support alignment_support_scheme,tree init_addr,struct loop ** at_loop)4055e4b17023SJohn Marino vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4056e4b17023SJohn Marino tree *realignment_token,
4057e4b17023SJohn Marino enum dr_alignment_support alignment_support_scheme,
4058e4b17023SJohn Marino tree init_addr,
4059e4b17023SJohn Marino struct loop **at_loop)
4060e4b17023SJohn Marino {
4061e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4062e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4063e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4064e4b17023SJohn Marino struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4065e4b17023SJohn Marino struct loop *loop = NULL;
4066e4b17023SJohn Marino edge pe = NULL;
4067e4b17023SJohn Marino tree scalar_dest = gimple_assign_lhs (stmt);
4068e4b17023SJohn Marino tree vec_dest;
4069e4b17023SJohn Marino gimple inc;
4070e4b17023SJohn Marino tree ptr;
4071e4b17023SJohn Marino tree data_ref;
4072e4b17023SJohn Marino gimple new_stmt;
4073e4b17023SJohn Marino basic_block new_bb;
4074e4b17023SJohn Marino tree msq_init = NULL_TREE;
4075e4b17023SJohn Marino tree new_temp;
4076e4b17023SJohn Marino gimple phi_stmt;
4077e4b17023SJohn Marino tree msq = NULL_TREE;
4078e4b17023SJohn Marino gimple_seq stmts = NULL;
4079e4b17023SJohn Marino bool inv_p;
4080e4b17023SJohn Marino bool compute_in_loop = false;
4081e4b17023SJohn Marino bool nested_in_vect_loop = false;
4082e4b17023SJohn Marino struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4083e4b17023SJohn Marino struct loop *loop_for_initial_load = NULL;
4084e4b17023SJohn Marino
4085e4b17023SJohn Marino if (loop_vinfo)
4086e4b17023SJohn Marino {
4087e4b17023SJohn Marino loop = LOOP_VINFO_LOOP (loop_vinfo);
4088e4b17023SJohn Marino nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4089e4b17023SJohn Marino }
4090e4b17023SJohn Marino
4091e4b17023SJohn Marino gcc_assert (alignment_support_scheme == dr_explicit_realign
4092e4b17023SJohn Marino || alignment_support_scheme == dr_explicit_realign_optimized);
4093e4b17023SJohn Marino
4094e4b17023SJohn Marino /* We need to generate three things:
4095e4b17023SJohn Marino 1. the misalignment computation
4096e4b17023SJohn Marino 2. the extra vector load (for the optimized realignment scheme).
4097e4b17023SJohn Marino 3. the phi node for the two vectors from which the realignment is
4098e4b17023SJohn Marino done (for the optimized realignment scheme). */
4099e4b17023SJohn Marino
4100e4b17023SJohn Marino /* 1. Determine where to generate the misalignment computation.
4101e4b17023SJohn Marino
4102e4b17023SJohn Marino If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4103e4b17023SJohn Marino calculation will be generated by this function, outside the loop (in the
4104e4b17023SJohn Marino preheader). Otherwise, INIT_ADDR had already been computed for us by the
4105e4b17023SJohn Marino caller, inside the loop.
4106e4b17023SJohn Marino
4107e4b17023SJohn Marino Background: If the misalignment remains fixed throughout the iterations of
4108e4b17023SJohn Marino the loop, then both realignment schemes are applicable, and also the
4109e4b17023SJohn Marino misalignment computation can be done outside LOOP. This is because we are
4110e4b17023SJohn Marino vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4111e4b17023SJohn Marino are a multiple of VS (the Vector Size), and therefore the misalignment in
4112e4b17023SJohn Marino different vectorized LOOP iterations is always the same.
4113e4b17023SJohn Marino The problem arises only if the memory access is in an inner-loop nested
4114e4b17023SJohn Marino inside LOOP, which is now being vectorized using outer-loop vectorization.
4115e4b17023SJohn Marino This is the only case when the misalignment of the memory access may not
4116e4b17023SJohn Marino remain fixed throughout the iterations of the inner-loop (as explained in
4117e4b17023SJohn Marino detail in vect_supportable_dr_alignment). In this case, not only is the
4118e4b17023SJohn Marino optimized realignment scheme not applicable, but also the misalignment
4119e4b17023SJohn Marino computation (and generation of the realignment token that is passed to
4120e4b17023SJohn Marino REALIGN_LOAD) have to be done inside the loop.
4121e4b17023SJohn Marino
4122e4b17023SJohn Marino In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4123e4b17023SJohn Marino or not, which in turn determines if the misalignment is computed inside
4124e4b17023SJohn Marino the inner-loop, or outside LOOP. */
4125e4b17023SJohn Marino
4126e4b17023SJohn Marino if (init_addr != NULL_TREE || !loop_vinfo)
4127e4b17023SJohn Marino {
4128e4b17023SJohn Marino compute_in_loop = true;
4129e4b17023SJohn Marino gcc_assert (alignment_support_scheme == dr_explicit_realign);
4130e4b17023SJohn Marino }
4131e4b17023SJohn Marino
4132e4b17023SJohn Marino
4133e4b17023SJohn Marino /* 2. Determine where to generate the extra vector load.
4134e4b17023SJohn Marino
4135e4b17023SJohn Marino For the optimized realignment scheme, instead of generating two vector
4136e4b17023SJohn Marino loads in each iteration, we generate a single extra vector load in the
4137e4b17023SJohn Marino preheader of the loop, and in each iteration reuse the result of the
4138e4b17023SJohn Marino vector load from the previous iteration. In case the memory access is in
4139e4b17023SJohn Marino an inner-loop nested inside LOOP, which is now being vectorized using
4140e4b17023SJohn Marino outer-loop vectorization, we need to determine whether this initial vector
4141e4b17023SJohn Marino load should be generated at the preheader of the inner-loop, or can be
4142e4b17023SJohn Marino generated at the preheader of LOOP. If the memory access has no evolution
4143e4b17023SJohn Marino in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4144e4b17023SJohn Marino to be generated inside LOOP (in the preheader of the inner-loop). */
4145e4b17023SJohn Marino
4146e4b17023SJohn Marino if (nested_in_vect_loop)
4147e4b17023SJohn Marino {
4148e4b17023SJohn Marino tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4149e4b17023SJohn Marino bool invariant_in_outerloop =
4150e4b17023SJohn Marino (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4151e4b17023SJohn Marino loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4152e4b17023SJohn Marino }
4153e4b17023SJohn Marino else
4154e4b17023SJohn Marino loop_for_initial_load = loop;
4155e4b17023SJohn Marino if (at_loop)
4156e4b17023SJohn Marino *at_loop = loop_for_initial_load;
4157e4b17023SJohn Marino
4158e4b17023SJohn Marino if (loop_for_initial_load)
4159e4b17023SJohn Marino pe = loop_preheader_edge (loop_for_initial_load);
4160e4b17023SJohn Marino
4161e4b17023SJohn Marino /* 3. For the case of the optimized realignment, create the first vector
4162e4b17023SJohn Marino load at the loop preheader. */
4163e4b17023SJohn Marino
4164e4b17023SJohn Marino if (alignment_support_scheme == dr_explicit_realign_optimized)
4165e4b17023SJohn Marino {
4166e4b17023SJohn Marino /* Create msq_init = *(floor(p1)) in the loop preheader */
4167e4b17023SJohn Marino
4168e4b17023SJohn Marino gcc_assert (!compute_in_loop);
4169e4b17023SJohn Marino vec_dest = vect_create_destination_var (scalar_dest, vectype);
4170e4b17023SJohn Marino ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4171e4b17023SJohn Marino NULL_TREE, &init_addr, NULL, &inc,
4172e4b17023SJohn Marino true, &inv_p);
4173e4b17023SJohn Marino new_stmt = gimple_build_assign_with_ops
4174e4b17023SJohn Marino (BIT_AND_EXPR, NULL_TREE, ptr,
4175e4b17023SJohn Marino build_int_cst (TREE_TYPE (ptr),
4176e4b17023SJohn Marino -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4177e4b17023SJohn Marino new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
4178e4b17023SJohn Marino gimple_assign_set_lhs (new_stmt, new_temp);
4179e4b17023SJohn Marino new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4180e4b17023SJohn Marino gcc_assert (!new_bb);
4181e4b17023SJohn Marino data_ref
4182e4b17023SJohn Marino = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4183e4b17023SJohn Marino build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4184e4b17023SJohn Marino new_stmt = gimple_build_assign (vec_dest, data_ref);
4185e4b17023SJohn Marino new_temp = make_ssa_name (vec_dest, new_stmt);
4186e4b17023SJohn Marino gimple_assign_set_lhs (new_stmt, new_temp);
4187e4b17023SJohn Marino mark_symbols_for_renaming (new_stmt);
4188e4b17023SJohn Marino if (pe)
4189e4b17023SJohn Marino {
4190e4b17023SJohn Marino new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4191e4b17023SJohn Marino gcc_assert (!new_bb);
4192e4b17023SJohn Marino }
4193e4b17023SJohn Marino else
4194e4b17023SJohn Marino gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4195e4b17023SJohn Marino
4196e4b17023SJohn Marino msq_init = gimple_assign_lhs (new_stmt);
4197e4b17023SJohn Marino }
4198e4b17023SJohn Marino
4199e4b17023SJohn Marino /* 4. Create realignment token using a target builtin, if available.
4200e4b17023SJohn Marino It is done either inside the containing loop, or before LOOP (as
4201e4b17023SJohn Marino determined above). */
4202e4b17023SJohn Marino
4203e4b17023SJohn Marino if (targetm.vectorize.builtin_mask_for_load)
4204e4b17023SJohn Marino {
4205e4b17023SJohn Marino tree builtin_decl;
4206e4b17023SJohn Marino
4207e4b17023SJohn Marino /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4208e4b17023SJohn Marino if (!init_addr)
4209e4b17023SJohn Marino {
4210e4b17023SJohn Marino /* Generate the INIT_ADDR computation outside LOOP. */
4211e4b17023SJohn Marino init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4212e4b17023SJohn Marino NULL_TREE, loop);
4213e4b17023SJohn Marino if (loop)
4214e4b17023SJohn Marino {
4215e4b17023SJohn Marino pe = loop_preheader_edge (loop);
4216e4b17023SJohn Marino new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4217e4b17023SJohn Marino gcc_assert (!new_bb);
4218e4b17023SJohn Marino }
4219e4b17023SJohn Marino else
4220e4b17023SJohn Marino gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4221e4b17023SJohn Marino }
4222e4b17023SJohn Marino
4223e4b17023SJohn Marino builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4224e4b17023SJohn Marino new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4225e4b17023SJohn Marino vec_dest =
4226e4b17023SJohn Marino vect_create_destination_var (scalar_dest,
4227e4b17023SJohn Marino gimple_call_return_type (new_stmt));
4228e4b17023SJohn Marino new_temp = make_ssa_name (vec_dest, new_stmt);
4229e4b17023SJohn Marino gimple_call_set_lhs (new_stmt, new_temp);
4230e4b17023SJohn Marino
4231e4b17023SJohn Marino if (compute_in_loop)
4232e4b17023SJohn Marino gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4233e4b17023SJohn Marino else
4234e4b17023SJohn Marino {
4235e4b17023SJohn Marino /* Generate the misalignment computation outside LOOP. */
4236e4b17023SJohn Marino pe = loop_preheader_edge (loop);
4237e4b17023SJohn Marino new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4238e4b17023SJohn Marino gcc_assert (!new_bb);
4239e4b17023SJohn Marino }
4240e4b17023SJohn Marino
4241e4b17023SJohn Marino *realignment_token = gimple_call_lhs (new_stmt);
4242e4b17023SJohn Marino
4243e4b17023SJohn Marino /* The result of the CALL_EXPR to this builtin is determined from
4244e4b17023SJohn Marino the value of the parameter and no global variables are touched
4245e4b17023SJohn Marino which makes the builtin a "const" function. Requiring the
4246e4b17023SJohn Marino builtin to have the "const" attribute makes it unnecessary
4247e4b17023SJohn Marino to call mark_call_clobbered. */
4248e4b17023SJohn Marino gcc_assert (TREE_READONLY (builtin_decl));
4249e4b17023SJohn Marino }
4250e4b17023SJohn Marino
4251e4b17023SJohn Marino if (alignment_support_scheme == dr_explicit_realign)
4252e4b17023SJohn Marino return msq;
4253e4b17023SJohn Marino
4254e4b17023SJohn Marino gcc_assert (!compute_in_loop);
4255e4b17023SJohn Marino gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4256e4b17023SJohn Marino
4257e4b17023SJohn Marino
4258e4b17023SJohn Marino /* 5. Create msq = phi <msq_init, lsq> in loop */
4259e4b17023SJohn Marino
4260e4b17023SJohn Marino pe = loop_preheader_edge (containing_loop);
4261e4b17023SJohn Marino vec_dest = vect_create_destination_var (scalar_dest, vectype);
4262e4b17023SJohn Marino msq = make_ssa_name (vec_dest, NULL);
4263e4b17023SJohn Marino phi_stmt = create_phi_node (msq, containing_loop->header);
4264e4b17023SJohn Marino SSA_NAME_DEF_STMT (msq) = phi_stmt;
4265e4b17023SJohn Marino add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4266e4b17023SJohn Marino
4267e4b17023SJohn Marino return msq;
4268e4b17023SJohn Marino }
4269e4b17023SJohn Marino
4270e4b17023SJohn Marino
4271e4b17023SJohn Marino /* Function vect_strided_load_supported.
4272e4b17023SJohn Marino
4273e4b17023SJohn Marino Returns TRUE if even and odd permutations are supported,
4274e4b17023SJohn Marino and FALSE otherwise. */
4275e4b17023SJohn Marino
4276e4b17023SJohn Marino bool
vect_strided_load_supported(tree vectype,unsigned HOST_WIDE_INT count)4277e4b17023SJohn Marino vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4278e4b17023SJohn Marino {
4279e4b17023SJohn Marino enum machine_mode mode = TYPE_MODE (vectype);
4280e4b17023SJohn Marino
4281e4b17023SJohn Marino /* vect_permute_load_chain requires the group size to be a power of two. */
4282e4b17023SJohn Marino if (exact_log2 (count) == -1)
4283e4b17023SJohn Marino {
4284e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
4285e4b17023SJohn Marino fprintf (vect_dump, "the size of the group of strided accesses"
4286e4b17023SJohn Marino " is not a power of 2");
4287e4b17023SJohn Marino return false;
4288e4b17023SJohn Marino }
4289e4b17023SJohn Marino
4290e4b17023SJohn Marino /* Check that the permutation is supported. */
4291e4b17023SJohn Marino if (VECTOR_MODE_P (mode))
4292e4b17023SJohn Marino {
4293e4b17023SJohn Marino unsigned int i, nelt = GET_MODE_NUNITS (mode);
4294e4b17023SJohn Marino unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4295e4b17023SJohn Marino
4296e4b17023SJohn Marino for (i = 0; i < nelt; i++)
4297e4b17023SJohn Marino sel[i] = i * 2;
4298e4b17023SJohn Marino if (can_vec_perm_p (mode, false, sel))
4299e4b17023SJohn Marino {
4300e4b17023SJohn Marino for (i = 0; i < nelt; i++)
4301e4b17023SJohn Marino sel[i] = i * 2 + 1;
4302e4b17023SJohn Marino if (can_vec_perm_p (mode, false, sel))
4303e4b17023SJohn Marino return true;
4304e4b17023SJohn Marino }
4305e4b17023SJohn Marino }
4306e4b17023SJohn Marino
4307e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
4308e4b17023SJohn Marino fprintf (vect_dump, "extract even/odd not supported by target");
4309e4b17023SJohn Marino return false;
4310e4b17023SJohn Marino }
4311e4b17023SJohn Marino
4312e4b17023SJohn Marino /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4313e4b17023SJohn Marino type VECTYPE. */
4314e4b17023SJohn Marino
4315e4b17023SJohn Marino bool
vect_load_lanes_supported(tree vectype,unsigned HOST_WIDE_INT count)4316e4b17023SJohn Marino vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4317e4b17023SJohn Marino {
4318e4b17023SJohn Marino return vect_lanes_optab_supported_p ("vec_load_lanes",
4319e4b17023SJohn Marino vec_load_lanes_optab,
4320e4b17023SJohn Marino vectype, count);
4321e4b17023SJohn Marino }
4322e4b17023SJohn Marino
4323e4b17023SJohn Marino /* Function vect_permute_load_chain.
4324e4b17023SJohn Marino
4325e4b17023SJohn Marino Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4326e4b17023SJohn Marino a power of 2, generate extract_even/odd stmts to reorder the input data
4327e4b17023SJohn Marino correctly. Return the final references for loads in RESULT_CHAIN.
4328e4b17023SJohn Marino
4329e4b17023SJohn Marino E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4330e4b17023SJohn Marino The input is 4 vectors each containing 8 elements. We assign a number to each
4331e4b17023SJohn Marino element, the input sequence is:
4332e4b17023SJohn Marino
4333e4b17023SJohn Marino 1st vec: 0 1 2 3 4 5 6 7
4334e4b17023SJohn Marino 2nd vec: 8 9 10 11 12 13 14 15
4335e4b17023SJohn Marino 3rd vec: 16 17 18 19 20 21 22 23
4336e4b17023SJohn Marino 4th vec: 24 25 26 27 28 29 30 31
4337e4b17023SJohn Marino
4338e4b17023SJohn Marino The output sequence should be:
4339e4b17023SJohn Marino
4340e4b17023SJohn Marino 1st vec: 0 4 8 12 16 20 24 28
4341e4b17023SJohn Marino 2nd vec: 1 5 9 13 17 21 25 29
4342e4b17023SJohn Marino 3rd vec: 2 6 10 14 18 22 26 30
4343e4b17023SJohn Marino 4th vec: 3 7 11 15 19 23 27 31
4344e4b17023SJohn Marino
4345e4b17023SJohn Marino i.e., the first output vector should contain the first elements of each
4346e4b17023SJohn Marino interleaving group, etc.
4347e4b17023SJohn Marino
4348e4b17023SJohn Marino We use extract_even/odd instructions to create such output. The input of
4349e4b17023SJohn Marino each extract_even/odd operation is two vectors
4350e4b17023SJohn Marino 1st vec 2nd vec
4351e4b17023SJohn Marino 0 1 2 3 4 5 6 7
4352e4b17023SJohn Marino
4353e4b17023SJohn Marino and the output is the vector of extracted even/odd elements. The output of
4354e4b17023SJohn Marino extract_even will be: 0 2 4 6
4355e4b17023SJohn Marino and of extract_odd: 1 3 5 7
4356e4b17023SJohn Marino
4357e4b17023SJohn Marino
4358e4b17023SJohn Marino The permutation is done in log LENGTH stages. In each stage extract_even
4359e4b17023SJohn Marino and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4360e4b17023SJohn Marino their order. In our example,
4361e4b17023SJohn Marino
4362e4b17023SJohn Marino E1: extract_even (1st vec, 2nd vec)
4363e4b17023SJohn Marino E2: extract_odd (1st vec, 2nd vec)
4364e4b17023SJohn Marino E3: extract_even (3rd vec, 4th vec)
4365e4b17023SJohn Marino E4: extract_odd (3rd vec, 4th vec)
4366e4b17023SJohn Marino
4367e4b17023SJohn Marino The output for the first stage will be:
4368e4b17023SJohn Marino
4369e4b17023SJohn Marino E1: 0 2 4 6 8 10 12 14
4370e4b17023SJohn Marino E2: 1 3 5 7 9 11 13 15
4371e4b17023SJohn Marino E3: 16 18 20 22 24 26 28 30
4372e4b17023SJohn Marino E4: 17 19 21 23 25 27 29 31
4373e4b17023SJohn Marino
4374e4b17023SJohn Marino In order to proceed and create the correct sequence for the next stage (or
4375e4b17023SJohn Marino for the correct output, if the second stage is the last one, as in our
4376e4b17023SJohn Marino example), we first put the output of extract_even operation and then the
4377e4b17023SJohn Marino output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4378e4b17023SJohn Marino The input for the second stage is:
4379e4b17023SJohn Marino
4380e4b17023SJohn Marino 1st vec (E1): 0 2 4 6 8 10 12 14
4381e4b17023SJohn Marino 2nd vec (E3): 16 18 20 22 24 26 28 30
4382e4b17023SJohn Marino 3rd vec (E2): 1 3 5 7 9 11 13 15
4383e4b17023SJohn Marino 4th vec (E4): 17 19 21 23 25 27 29 31
4384e4b17023SJohn Marino
4385e4b17023SJohn Marino The output of the second stage:
4386e4b17023SJohn Marino
4387e4b17023SJohn Marino E1: 0 4 8 12 16 20 24 28
4388e4b17023SJohn Marino E2: 2 6 10 14 18 22 26 30
4389e4b17023SJohn Marino E3: 1 5 9 13 17 21 25 29
4390e4b17023SJohn Marino E4: 3 7 11 15 19 23 27 31
4391e4b17023SJohn Marino
4392e4b17023SJohn Marino And RESULT_CHAIN after reordering:
4393e4b17023SJohn Marino
4394e4b17023SJohn Marino 1st vec (E1): 0 4 8 12 16 20 24 28
4395e4b17023SJohn Marino 2nd vec (E3): 1 5 9 13 17 21 25 29
4396e4b17023SJohn Marino 3rd vec (E2): 2 6 10 14 18 22 26 30
4397e4b17023SJohn Marino 4th vec (E4): 3 7 11 15 19 23 27 31. */
4398e4b17023SJohn Marino
4399e4b17023SJohn Marino static void
vect_permute_load_chain(VEC (tree,heap)* dr_chain,unsigned int length,gimple stmt,gimple_stmt_iterator * gsi,VEC (tree,heap)** result_chain)4400e4b17023SJohn Marino vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4401e4b17023SJohn Marino unsigned int length,
4402e4b17023SJohn Marino gimple stmt,
4403e4b17023SJohn Marino gimple_stmt_iterator *gsi,
4404e4b17023SJohn Marino VEC(tree,heap) **result_chain)
4405e4b17023SJohn Marino {
4406e4b17023SJohn Marino tree perm_dest, data_ref, first_vect, second_vect;
4407e4b17023SJohn Marino tree perm_mask_even, perm_mask_odd;
4408e4b17023SJohn Marino gimple perm_stmt;
4409e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4410e4b17023SJohn Marino unsigned int i, j, log_length = exact_log2 (length);
4411e4b17023SJohn Marino unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4412e4b17023SJohn Marino unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4413e4b17023SJohn Marino
4414e4b17023SJohn Marino *result_chain = VEC_copy (tree, heap, dr_chain);
4415e4b17023SJohn Marino
4416e4b17023SJohn Marino for (i = 0; i < nelt; ++i)
4417e4b17023SJohn Marino sel[i] = i * 2;
4418e4b17023SJohn Marino perm_mask_even = vect_gen_perm_mask (vectype, sel);
4419e4b17023SJohn Marino gcc_assert (perm_mask_even != NULL);
4420e4b17023SJohn Marino
4421e4b17023SJohn Marino for (i = 0; i < nelt; ++i)
4422e4b17023SJohn Marino sel[i] = i * 2 + 1;
4423e4b17023SJohn Marino perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4424e4b17023SJohn Marino gcc_assert (perm_mask_odd != NULL);
4425e4b17023SJohn Marino
4426e4b17023SJohn Marino for (i = 0; i < log_length; i++)
4427e4b17023SJohn Marino {
4428e4b17023SJohn Marino for (j = 0; j < length; j += 2)
4429e4b17023SJohn Marino {
4430e4b17023SJohn Marino first_vect = VEC_index (tree, dr_chain, j);
4431e4b17023SJohn Marino second_vect = VEC_index (tree, dr_chain, j+1);
4432e4b17023SJohn Marino
4433e4b17023SJohn Marino /* data_ref = permute_even (first_data_ref, second_data_ref); */
4434e4b17023SJohn Marino perm_dest = create_tmp_var (vectype, "vect_perm_even");
4435e4b17023SJohn Marino DECL_GIMPLE_REG_P (perm_dest) = 1;
4436e4b17023SJohn Marino add_referenced_var (perm_dest);
4437e4b17023SJohn Marino
4438e4b17023SJohn Marino perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4439e4b17023SJohn Marino first_vect, second_vect,
4440e4b17023SJohn Marino perm_mask_even);
4441e4b17023SJohn Marino
4442e4b17023SJohn Marino data_ref = make_ssa_name (perm_dest, perm_stmt);
4443e4b17023SJohn Marino gimple_assign_set_lhs (perm_stmt, data_ref);
4444e4b17023SJohn Marino vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4445e4b17023SJohn Marino mark_symbols_for_renaming (perm_stmt);
4446e4b17023SJohn Marino
4447e4b17023SJohn Marino VEC_replace (tree, *result_chain, j/2, data_ref);
4448e4b17023SJohn Marino
4449e4b17023SJohn Marino /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4450e4b17023SJohn Marino perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4451e4b17023SJohn Marino DECL_GIMPLE_REG_P (perm_dest) = 1;
4452e4b17023SJohn Marino add_referenced_var (perm_dest);
4453e4b17023SJohn Marino
4454e4b17023SJohn Marino perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4455e4b17023SJohn Marino first_vect, second_vect,
4456e4b17023SJohn Marino perm_mask_odd);
4457e4b17023SJohn Marino
4458e4b17023SJohn Marino data_ref = make_ssa_name (perm_dest, perm_stmt);
4459e4b17023SJohn Marino gimple_assign_set_lhs (perm_stmt, data_ref);
4460e4b17023SJohn Marino vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4461e4b17023SJohn Marino mark_symbols_for_renaming (perm_stmt);
4462e4b17023SJohn Marino
4463e4b17023SJohn Marino VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4464e4b17023SJohn Marino }
4465e4b17023SJohn Marino dr_chain = VEC_copy (tree, heap, *result_chain);
4466e4b17023SJohn Marino }
4467e4b17023SJohn Marino }
4468e4b17023SJohn Marino
4469e4b17023SJohn Marino
4470e4b17023SJohn Marino /* Function vect_transform_strided_load.
4471e4b17023SJohn Marino
4472e4b17023SJohn Marino Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4473e4b17023SJohn Marino to perform their permutation and ascribe the result vectorized statements to
4474e4b17023SJohn Marino the scalar statements.
4475e4b17023SJohn Marino */
4476e4b17023SJohn Marino
4477e4b17023SJohn Marino void
vect_transform_strided_load(gimple stmt,VEC (tree,heap)* dr_chain,int size,gimple_stmt_iterator * gsi)4478e4b17023SJohn Marino vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4479e4b17023SJohn Marino gimple_stmt_iterator *gsi)
4480e4b17023SJohn Marino {
4481e4b17023SJohn Marino VEC(tree,heap) *result_chain = NULL;
4482e4b17023SJohn Marino
4483e4b17023SJohn Marino /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4484e4b17023SJohn Marino RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4485e4b17023SJohn Marino vectors, that are ready for vector computation. */
4486e4b17023SJohn Marino result_chain = VEC_alloc (tree, heap, size);
4487e4b17023SJohn Marino vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4488e4b17023SJohn Marino vect_record_strided_load_vectors (stmt, result_chain);
4489e4b17023SJohn Marino VEC_free (tree, heap, result_chain);
4490e4b17023SJohn Marino }
4491e4b17023SJohn Marino
4492e4b17023SJohn Marino /* RESULT_CHAIN contains the output of a group of strided loads that were
4493e4b17023SJohn Marino generated as part of the vectorization of STMT. Assign the statement
4494e4b17023SJohn Marino for each vector to the associated scalar statement. */
4495e4b17023SJohn Marino
4496e4b17023SJohn Marino void
vect_record_strided_load_vectors(gimple stmt,VEC (tree,heap)* result_chain)4497e4b17023SJohn Marino vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4498e4b17023SJohn Marino {
4499e4b17023SJohn Marino gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4500e4b17023SJohn Marino gimple next_stmt, new_stmt;
4501e4b17023SJohn Marino unsigned int i, gap_count;
4502e4b17023SJohn Marino tree tmp_data_ref;
4503e4b17023SJohn Marino
4504e4b17023SJohn Marino /* Put a permuted data-ref in the VECTORIZED_STMT field.
4505e4b17023SJohn Marino Since we scan the chain starting from it's first node, their order
4506e4b17023SJohn Marino corresponds the order of data-refs in RESULT_CHAIN. */
4507e4b17023SJohn Marino next_stmt = first_stmt;
4508e4b17023SJohn Marino gap_count = 1;
4509e4b17023SJohn Marino FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4510e4b17023SJohn Marino {
4511e4b17023SJohn Marino if (!next_stmt)
4512e4b17023SJohn Marino break;
4513e4b17023SJohn Marino
4514e4b17023SJohn Marino /* Skip the gaps. Loads created for the gaps will be removed by dead
4515e4b17023SJohn Marino code elimination pass later. No need to check for the first stmt in
4516e4b17023SJohn Marino the group, since it always exists.
4517e4b17023SJohn Marino GROUP_GAP is the number of steps in elements from the previous
4518e4b17023SJohn Marino access (if there is no gap GROUP_GAP is 1). We skip loads that
4519e4b17023SJohn Marino correspond to the gaps. */
4520e4b17023SJohn Marino if (next_stmt != first_stmt
4521e4b17023SJohn Marino && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4522e4b17023SJohn Marino {
4523e4b17023SJohn Marino gap_count++;
4524e4b17023SJohn Marino continue;
4525e4b17023SJohn Marino }
4526e4b17023SJohn Marino
4527e4b17023SJohn Marino while (next_stmt)
4528e4b17023SJohn Marino {
4529e4b17023SJohn Marino new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4530e4b17023SJohn Marino /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4531e4b17023SJohn Marino copies, and we put the new vector statement in the first available
4532e4b17023SJohn Marino RELATED_STMT. */
4533e4b17023SJohn Marino if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4534e4b17023SJohn Marino STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4535e4b17023SJohn Marino else
4536e4b17023SJohn Marino {
4537e4b17023SJohn Marino if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4538e4b17023SJohn Marino {
4539e4b17023SJohn Marino gimple prev_stmt =
4540e4b17023SJohn Marino STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4541e4b17023SJohn Marino gimple rel_stmt =
4542e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4543e4b17023SJohn Marino while (rel_stmt)
4544e4b17023SJohn Marino {
4545e4b17023SJohn Marino prev_stmt = rel_stmt;
4546e4b17023SJohn Marino rel_stmt =
4547e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4548e4b17023SJohn Marino }
4549e4b17023SJohn Marino
4550e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4551e4b17023SJohn Marino new_stmt;
4552e4b17023SJohn Marino }
4553e4b17023SJohn Marino }
4554e4b17023SJohn Marino
4555e4b17023SJohn Marino next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4556e4b17023SJohn Marino gap_count = 1;
4557e4b17023SJohn Marino /* If NEXT_STMT accesses the same DR as the previous statement,
4558e4b17023SJohn Marino put the same TMP_DATA_REF as its vectorized statement; otherwise
4559e4b17023SJohn Marino get the next data-ref from RESULT_CHAIN. */
4560e4b17023SJohn Marino if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4561e4b17023SJohn Marino break;
4562e4b17023SJohn Marino }
4563e4b17023SJohn Marino }
4564e4b17023SJohn Marino }
4565e4b17023SJohn Marino
4566e4b17023SJohn Marino /* Function vect_force_dr_alignment_p.
4567e4b17023SJohn Marino
4568e4b17023SJohn Marino Returns whether the alignment of a DECL can be forced to be aligned
4569e4b17023SJohn Marino on ALIGNMENT bit boundary. */
4570e4b17023SJohn Marino
4571e4b17023SJohn Marino bool
vect_can_force_dr_alignment_p(const_tree decl,unsigned int alignment)4572e4b17023SJohn Marino vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4573e4b17023SJohn Marino {
4574e4b17023SJohn Marino if (TREE_CODE (decl) != VAR_DECL)
4575e4b17023SJohn Marino return false;
4576e4b17023SJohn Marino
4577e4b17023SJohn Marino if (DECL_EXTERNAL (decl))
4578e4b17023SJohn Marino return false;
4579e4b17023SJohn Marino
4580e4b17023SJohn Marino if (TREE_ASM_WRITTEN (decl))
4581e4b17023SJohn Marino return false;
4582e4b17023SJohn Marino
45835ce9237cSJohn Marino /* Do not override explicit alignment set by the user when an explicit
45845ce9237cSJohn Marino section name is also used. This is a common idiom used by many
45855ce9237cSJohn Marino software projects. */
45865ce9237cSJohn Marino if (DECL_SECTION_NAME (decl) != NULL_TREE
45875ce9237cSJohn Marino && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
45885ce9237cSJohn Marino return false;
45895ce9237cSJohn Marino
4590e4b17023SJohn Marino if (TREE_STATIC (decl))
4591e4b17023SJohn Marino return (alignment <= MAX_OFILE_ALIGNMENT);
4592e4b17023SJohn Marino else
4593e4b17023SJohn Marino return (alignment <= MAX_STACK_ALIGNMENT);
4594e4b17023SJohn Marino }
4595e4b17023SJohn Marino
4596e4b17023SJohn Marino
4597e4b17023SJohn Marino /* Return whether the data reference DR is supported with respect to its
4598e4b17023SJohn Marino alignment.
4599e4b17023SJohn Marino If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4600e4b17023SJohn Marino it is aligned, i.e., check if it is possible to vectorize it with different
4601e4b17023SJohn Marino alignment. */
4602e4b17023SJohn Marino
4603e4b17023SJohn Marino enum dr_alignment_support
vect_supportable_dr_alignment(struct data_reference * dr,bool check_aligned_accesses)4604e4b17023SJohn Marino vect_supportable_dr_alignment (struct data_reference *dr,
4605e4b17023SJohn Marino bool check_aligned_accesses)
4606e4b17023SJohn Marino {
4607e4b17023SJohn Marino gimple stmt = DR_STMT (dr);
4608e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4609e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4610e4b17023SJohn Marino enum machine_mode mode = TYPE_MODE (vectype);
4611e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4612e4b17023SJohn Marino struct loop *vect_loop = NULL;
4613e4b17023SJohn Marino bool nested_in_vect_loop = false;
4614e4b17023SJohn Marino
4615e4b17023SJohn Marino if (aligned_access_p (dr) && !check_aligned_accesses)
4616e4b17023SJohn Marino return dr_aligned;
4617e4b17023SJohn Marino
4618e4b17023SJohn Marino if (loop_vinfo)
4619e4b17023SJohn Marino {
4620e4b17023SJohn Marino vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4621e4b17023SJohn Marino nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4622e4b17023SJohn Marino }
4623e4b17023SJohn Marino
4624e4b17023SJohn Marino /* Possibly unaligned access. */
4625e4b17023SJohn Marino
4626e4b17023SJohn Marino /* We can choose between using the implicit realignment scheme (generating
4627e4b17023SJohn Marino a misaligned_move stmt) and the explicit realignment scheme (generating
4628e4b17023SJohn Marino aligned loads with a REALIGN_LOAD). There are two variants to the
4629e4b17023SJohn Marino explicit realignment scheme: optimized, and unoptimized.
4630e4b17023SJohn Marino We can optimize the realignment only if the step between consecutive
4631e4b17023SJohn Marino vector loads is equal to the vector size. Since the vector memory
4632e4b17023SJohn Marino accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4633e4b17023SJohn Marino is guaranteed that the misalignment amount remains the same throughout the
4634e4b17023SJohn Marino execution of the vectorized loop. Therefore, we can create the
4635e4b17023SJohn Marino "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4636e4b17023SJohn Marino at the loop preheader.
4637e4b17023SJohn Marino
4638e4b17023SJohn Marino However, in the case of outer-loop vectorization, when vectorizing a
4639e4b17023SJohn Marino memory access in the inner-loop nested within the LOOP that is now being
4640e4b17023SJohn Marino vectorized, while it is guaranteed that the misalignment of the
4641e4b17023SJohn Marino vectorized memory access will remain the same in different outer-loop
4642e4b17023SJohn Marino iterations, it is *not* guaranteed that is will remain the same throughout
4643e4b17023SJohn Marino the execution of the inner-loop. This is because the inner-loop advances
4644e4b17023SJohn Marino with the original scalar step (and not in steps of VS). If the inner-loop
4645e4b17023SJohn Marino step happens to be a multiple of VS, then the misalignment remains fixed
4646e4b17023SJohn Marino and we can use the optimized realignment scheme. For example:
4647e4b17023SJohn Marino
4648e4b17023SJohn Marino for (i=0; i<N; i++)
4649e4b17023SJohn Marino for (j=0; j<M; j++)
4650e4b17023SJohn Marino s += a[i+j];
4651e4b17023SJohn Marino
4652e4b17023SJohn Marino When vectorizing the i-loop in the above example, the step between
4653e4b17023SJohn Marino consecutive vector loads is 1, and so the misalignment does not remain
4654e4b17023SJohn Marino fixed across the execution of the inner-loop, and the realignment cannot
4655e4b17023SJohn Marino be optimized (as illustrated in the following pseudo vectorized loop):
4656e4b17023SJohn Marino
4657e4b17023SJohn Marino for (i=0; i<N; i+=4)
4658e4b17023SJohn Marino for (j=0; j<M; j++){
4659e4b17023SJohn Marino vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4660e4b17023SJohn Marino // when j is {0,1,2,3,4,5,6,7,...} respectively.
4661e4b17023SJohn Marino // (assuming that we start from an aligned address).
4662e4b17023SJohn Marino }
4663e4b17023SJohn Marino
4664e4b17023SJohn Marino We therefore have to use the unoptimized realignment scheme:
4665e4b17023SJohn Marino
4666e4b17023SJohn Marino for (i=0; i<N; i+=4)
4667e4b17023SJohn Marino for (j=k; j<M; j+=4)
4668e4b17023SJohn Marino vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4669e4b17023SJohn Marino // that the misalignment of the initial address is
4670e4b17023SJohn Marino // 0).
4671e4b17023SJohn Marino
4672e4b17023SJohn Marino The loop can then be vectorized as follows:
4673e4b17023SJohn Marino
4674e4b17023SJohn Marino for (k=0; k<4; k++){
4675e4b17023SJohn Marino rt = get_realignment_token (&vp[k]);
4676e4b17023SJohn Marino for (i=0; i<N; i+=4){
4677e4b17023SJohn Marino v1 = vp[i+k];
4678e4b17023SJohn Marino for (j=k; j<M; j+=4){
4679e4b17023SJohn Marino v2 = vp[i+j+VS-1];
4680e4b17023SJohn Marino va = REALIGN_LOAD <v1,v2,rt>;
4681e4b17023SJohn Marino vs += va;
4682e4b17023SJohn Marino v1 = v2;
4683e4b17023SJohn Marino }
4684e4b17023SJohn Marino }
4685e4b17023SJohn Marino } */
4686e4b17023SJohn Marino
4687e4b17023SJohn Marino if (DR_IS_READ (dr))
4688e4b17023SJohn Marino {
4689e4b17023SJohn Marino bool is_packed = false;
4690e4b17023SJohn Marino tree type = (TREE_TYPE (DR_REF (dr)));
4691e4b17023SJohn Marino
4692e4b17023SJohn Marino if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4693e4b17023SJohn Marino && (!targetm.vectorize.builtin_mask_for_load
4694e4b17023SJohn Marino || targetm.vectorize.builtin_mask_for_load ()))
4695e4b17023SJohn Marino {
4696e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4697e4b17023SJohn Marino if ((nested_in_vect_loop
4698e4b17023SJohn Marino && (TREE_INT_CST_LOW (DR_STEP (dr))
4699e4b17023SJohn Marino != GET_MODE_SIZE (TYPE_MODE (vectype))))
4700e4b17023SJohn Marino || !loop_vinfo)
4701e4b17023SJohn Marino return dr_explicit_realign;
4702e4b17023SJohn Marino else
4703e4b17023SJohn Marino return dr_explicit_realign_optimized;
4704e4b17023SJohn Marino }
4705e4b17023SJohn Marino if (!known_alignment_for_access_p (dr))
4706e4b17023SJohn Marino is_packed = contains_packed_reference (DR_REF (dr));
4707e4b17023SJohn Marino
4708e4b17023SJohn Marino if (targetm.vectorize.
4709e4b17023SJohn Marino support_vector_misalignment (mode, type,
4710e4b17023SJohn Marino DR_MISALIGNMENT (dr), is_packed))
4711e4b17023SJohn Marino /* Can't software pipeline the loads, but can at least do them. */
4712e4b17023SJohn Marino return dr_unaligned_supported;
4713e4b17023SJohn Marino }
4714e4b17023SJohn Marino else
4715e4b17023SJohn Marino {
4716e4b17023SJohn Marino bool is_packed = false;
4717e4b17023SJohn Marino tree type = (TREE_TYPE (DR_REF (dr)));
4718e4b17023SJohn Marino
4719e4b17023SJohn Marino if (!known_alignment_for_access_p (dr))
4720e4b17023SJohn Marino is_packed = contains_packed_reference (DR_REF (dr));
4721e4b17023SJohn Marino
4722e4b17023SJohn Marino if (targetm.vectorize.
4723e4b17023SJohn Marino support_vector_misalignment (mode, type,
4724e4b17023SJohn Marino DR_MISALIGNMENT (dr), is_packed))
4725e4b17023SJohn Marino return dr_unaligned_supported;
4726e4b17023SJohn Marino }
4727e4b17023SJohn Marino
4728e4b17023SJohn Marino /* Unsupported. */
4729e4b17023SJohn Marino return dr_unaligned_unsupported;
4730e4b17023SJohn Marino }
4731