1*38fd1498Szrj /* Vectorizer
2*38fd1498Szrj Copyright (C) 2003-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Dorit Naishlos <dorit@il.ibm.com>
4*38fd1498Szrj
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3. If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>. */
20*38fd1498Szrj
21*38fd1498Szrj /* Loop and basic block vectorizer.
22*38fd1498Szrj
23*38fd1498Szrj This file contains drivers for the three vectorizers:
24*38fd1498Szrj (1) loop vectorizer (inter-iteration parallelism),
25*38fd1498Szrj (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26*38fd1498Szrj vectorizer)
27*38fd1498Szrj (3) BB vectorizer (out-of-loops), aka SLP
28*38fd1498Szrj
29*38fd1498Szrj The rest of the vectorizer's code is organized as follows:
30*38fd1498Szrj - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31*38fd1498Szrj used by drivers (1) and (2).
32*38fd1498Szrj - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
33*38fd1498Szrj drivers (1) and (2).
34*38fd1498Szrj - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35*38fd1498Szrj used by drivers (2) and (3).
36*38fd1498Szrj - tree-vect-stmts.c - statements analysis and transformation (used by all).
37*38fd1498Szrj - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38*38fd1498Szrj manipulations (used by all).
39*38fd1498Szrj - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
40*38fd1498Szrj
41*38fd1498Szrj Here's a poor attempt at illustrating that:
42*38fd1498Szrj
43*38fd1498Szrj tree-vectorizer.c:
44*38fd1498Szrj loop_vect() loop_aware_slp() slp_vect()
45*38fd1498Szrj | / \ /
46*38fd1498Szrj | / \ /
47*38fd1498Szrj tree-vect-loop.c tree-vect-slp.c
48*38fd1498Szrj | \ \ / / |
49*38fd1498Szrj | \ \/ / |
50*38fd1498Szrj | \ /\ / |
51*38fd1498Szrj | \ / \ / |
52*38fd1498Szrj tree-vect-stmts.c tree-vect-data-refs.c
53*38fd1498Szrj \ /
54*38fd1498Szrj tree-vect-patterns.c
55*38fd1498Szrj */
56*38fd1498Szrj
57*38fd1498Szrj #include "config.h"
58*38fd1498Szrj #include "system.h"
59*38fd1498Szrj #include "coretypes.h"
60*38fd1498Szrj #include "backend.h"
61*38fd1498Szrj #include "tree.h"
62*38fd1498Szrj #include "gimple.h"
63*38fd1498Szrj #include "predict.h"
64*38fd1498Szrj #include "tree-pass.h"
65*38fd1498Szrj #include "ssa.h"
66*38fd1498Szrj #include "cgraph.h"
67*38fd1498Szrj #include "fold-const.h"
68*38fd1498Szrj #include "stor-layout.h"
69*38fd1498Szrj #include "gimple-iterator.h"
70*38fd1498Szrj #include "gimple-walk.h"
71*38fd1498Szrj #include "tree-ssa-loop-manip.h"
72*38fd1498Szrj #include "tree-ssa-loop-niter.h"
73*38fd1498Szrj #include "tree-cfg.h"
74*38fd1498Szrj #include "cfgloop.h"
75*38fd1498Szrj #include "tree-vectorizer.h"
76*38fd1498Szrj #include "tree-ssa-propagate.h"
77*38fd1498Szrj #include "dbgcnt.h"
78*38fd1498Szrj #include "tree-scalar-evolution.h"
79*38fd1498Szrj #include "stringpool.h"
80*38fd1498Szrj #include "attribs.h"
81*38fd1498Szrj
82*38fd1498Szrj
83*38fd1498Szrj /* Loop or bb location. */
84*38fd1498Szrj source_location vect_location;
85*38fd1498Szrj
86*38fd1498Szrj /* Vector mapping GIMPLE stmt to stmt_vec_info. */
87*38fd1498Szrj vec<stmt_vec_info> stmt_vec_info_vec;
88*38fd1498Szrj
89*38fd1498Szrj /* For mapping simduid to vectorization factor. */
90*38fd1498Szrj
91*38fd1498Szrj struct simduid_to_vf : free_ptr_hash<simduid_to_vf>
92*38fd1498Szrj {
93*38fd1498Szrj unsigned int simduid;
94*38fd1498Szrj poly_uint64 vf;
95*38fd1498Szrj
96*38fd1498Szrj /* hash_table support. */
97*38fd1498Szrj static inline hashval_t hash (const simduid_to_vf *);
98*38fd1498Szrj static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
99*38fd1498Szrj };
100*38fd1498Szrj
101*38fd1498Szrj inline hashval_t
hash(const simduid_to_vf * p)102*38fd1498Szrj simduid_to_vf::hash (const simduid_to_vf *p)
103*38fd1498Szrj {
104*38fd1498Szrj return p->simduid;
105*38fd1498Szrj }
106*38fd1498Szrj
107*38fd1498Szrj inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)108*38fd1498Szrj simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
109*38fd1498Szrj {
110*38fd1498Szrj return p1->simduid == p2->simduid;
111*38fd1498Szrj }
112*38fd1498Szrj
113*38fd1498Szrj /* This hash maps the OMP simd array to the corresponding simduid used
114*38fd1498Szrj to index into it. Like thus,
115*38fd1498Szrj
116*38fd1498Szrj _7 = GOMP_SIMD_LANE (simduid.0)
117*38fd1498Szrj ...
118*38fd1498Szrj ...
119*38fd1498Szrj D.1737[_7] = stuff;
120*38fd1498Szrj
121*38fd1498Szrj
122*38fd1498Szrj This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
123*38fd1498Szrj simduid.0. */
124*38fd1498Szrj
125*38fd1498Szrj struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
126*38fd1498Szrj {
127*38fd1498Szrj tree decl;
128*38fd1498Szrj unsigned int simduid;
129*38fd1498Szrj
130*38fd1498Szrj /* hash_table support. */
131*38fd1498Szrj static inline hashval_t hash (const simd_array_to_simduid *);
132*38fd1498Szrj static inline int equal (const simd_array_to_simduid *,
133*38fd1498Szrj const simd_array_to_simduid *);
134*38fd1498Szrj };
135*38fd1498Szrj
136*38fd1498Szrj inline hashval_t
hash(const simd_array_to_simduid * p)137*38fd1498Szrj simd_array_to_simduid::hash (const simd_array_to_simduid *p)
138*38fd1498Szrj {
139*38fd1498Szrj return DECL_UID (p->decl);
140*38fd1498Szrj }
141*38fd1498Szrj
142*38fd1498Szrj inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)143*38fd1498Szrj simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
144*38fd1498Szrj const simd_array_to_simduid *p2)
145*38fd1498Szrj {
146*38fd1498Szrj return p1->decl == p2->decl;
147*38fd1498Szrj }
148*38fd1498Szrj
149*38fd1498Szrj /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
150*38fd1498Szrj into their corresponding constants and remove
151*38fd1498Szrj IFN_GOMP_SIMD_ORDERED_{START,END}. */
152*38fd1498Szrj
153*38fd1498Szrj static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab)154*38fd1498Szrj adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
155*38fd1498Szrj {
156*38fd1498Szrj basic_block bb;
157*38fd1498Szrj
158*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
159*38fd1498Szrj {
160*38fd1498Szrj gimple_stmt_iterator i;
161*38fd1498Szrj
162*38fd1498Szrj for (i = gsi_start_bb (bb); !gsi_end_p (i); )
163*38fd1498Szrj {
164*38fd1498Szrj poly_uint64 vf = 1;
165*38fd1498Szrj enum internal_fn ifn;
166*38fd1498Szrj gimple *stmt = gsi_stmt (i);
167*38fd1498Szrj tree t;
168*38fd1498Szrj if (!is_gimple_call (stmt)
169*38fd1498Szrj || !gimple_call_internal_p (stmt))
170*38fd1498Szrj {
171*38fd1498Szrj gsi_next (&i);
172*38fd1498Szrj continue;
173*38fd1498Szrj }
174*38fd1498Szrj ifn = gimple_call_internal_fn (stmt);
175*38fd1498Szrj switch (ifn)
176*38fd1498Szrj {
177*38fd1498Szrj case IFN_GOMP_SIMD_LANE:
178*38fd1498Szrj case IFN_GOMP_SIMD_VF:
179*38fd1498Szrj case IFN_GOMP_SIMD_LAST_LANE:
180*38fd1498Szrj break;
181*38fd1498Szrj case IFN_GOMP_SIMD_ORDERED_START:
182*38fd1498Szrj case IFN_GOMP_SIMD_ORDERED_END:
183*38fd1498Szrj if (integer_onep (gimple_call_arg (stmt, 0)))
184*38fd1498Szrj {
185*38fd1498Szrj enum built_in_function bcode
186*38fd1498Szrj = (ifn == IFN_GOMP_SIMD_ORDERED_START
187*38fd1498Szrj ? BUILT_IN_GOMP_ORDERED_START
188*38fd1498Szrj : BUILT_IN_GOMP_ORDERED_END);
189*38fd1498Szrj gimple *g
190*38fd1498Szrj = gimple_build_call (builtin_decl_explicit (bcode), 0);
191*38fd1498Szrj tree vdef = gimple_vdef (stmt);
192*38fd1498Szrj gimple_set_vdef (g, vdef);
193*38fd1498Szrj SSA_NAME_DEF_STMT (vdef) = g;
194*38fd1498Szrj gimple_set_vuse (g, gimple_vuse (stmt));
195*38fd1498Szrj gsi_replace (&i, g, true);
196*38fd1498Szrj continue;
197*38fd1498Szrj }
198*38fd1498Szrj gsi_remove (&i, true);
199*38fd1498Szrj unlink_stmt_vdef (stmt);
200*38fd1498Szrj continue;
201*38fd1498Szrj default:
202*38fd1498Szrj gsi_next (&i);
203*38fd1498Szrj continue;
204*38fd1498Szrj }
205*38fd1498Szrj tree arg = gimple_call_arg (stmt, 0);
206*38fd1498Szrj gcc_assert (arg != NULL_TREE);
207*38fd1498Szrj gcc_assert (TREE_CODE (arg) == SSA_NAME);
208*38fd1498Szrj simduid_to_vf *p = NULL, data;
209*38fd1498Szrj data.simduid = DECL_UID (SSA_NAME_VAR (arg));
210*38fd1498Szrj /* Need to nullify loop safelen field since it's value is not
211*38fd1498Szrj valid after transformation. */
212*38fd1498Szrj if (bb->loop_father && bb->loop_father->safelen > 0)
213*38fd1498Szrj bb->loop_father->safelen = 0;
214*38fd1498Szrj if (htab)
215*38fd1498Szrj {
216*38fd1498Szrj p = htab->find (&data);
217*38fd1498Szrj if (p)
218*38fd1498Szrj vf = p->vf;
219*38fd1498Szrj }
220*38fd1498Szrj switch (ifn)
221*38fd1498Szrj {
222*38fd1498Szrj case IFN_GOMP_SIMD_VF:
223*38fd1498Szrj t = build_int_cst (unsigned_type_node, vf);
224*38fd1498Szrj break;
225*38fd1498Szrj case IFN_GOMP_SIMD_LANE:
226*38fd1498Szrj t = build_int_cst (unsigned_type_node, 0);
227*38fd1498Szrj break;
228*38fd1498Szrj case IFN_GOMP_SIMD_LAST_LANE:
229*38fd1498Szrj t = gimple_call_arg (stmt, 1);
230*38fd1498Szrj break;
231*38fd1498Szrj default:
232*38fd1498Szrj gcc_unreachable ();
233*38fd1498Szrj }
234*38fd1498Szrj tree lhs = gimple_call_lhs (stmt);
235*38fd1498Szrj if (lhs)
236*38fd1498Szrj replace_uses_by (lhs, t);
237*38fd1498Szrj release_defs (stmt);
238*38fd1498Szrj gsi_remove (&i, true);
239*38fd1498Szrj }
240*38fd1498Szrj }
241*38fd1498Szrj }
242*38fd1498Szrj
243*38fd1498Szrj /* Helper structure for note_simd_array_uses. */
244*38fd1498Szrj
245*38fd1498Szrj struct note_simd_array_uses_struct
246*38fd1498Szrj {
247*38fd1498Szrj hash_table<simd_array_to_simduid> **htab;
248*38fd1498Szrj unsigned int simduid;
249*38fd1498Szrj };
250*38fd1498Szrj
251*38fd1498Szrj /* Callback for note_simd_array_uses, called through walk_gimple_op. */
252*38fd1498Szrj
253*38fd1498Szrj static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)254*38fd1498Szrj note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
255*38fd1498Szrj {
256*38fd1498Szrj struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
257*38fd1498Szrj struct note_simd_array_uses_struct *ns
258*38fd1498Szrj = (struct note_simd_array_uses_struct *) wi->info;
259*38fd1498Szrj
260*38fd1498Szrj if (TYPE_P (*tp))
261*38fd1498Szrj *walk_subtrees = 0;
262*38fd1498Szrj else if (VAR_P (*tp)
263*38fd1498Szrj && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
264*38fd1498Szrj && DECL_CONTEXT (*tp) == current_function_decl)
265*38fd1498Szrj {
266*38fd1498Szrj simd_array_to_simduid data;
267*38fd1498Szrj if (!*ns->htab)
268*38fd1498Szrj *ns->htab = new hash_table<simd_array_to_simduid> (15);
269*38fd1498Szrj data.decl = *tp;
270*38fd1498Szrj data.simduid = ns->simduid;
271*38fd1498Szrj simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
272*38fd1498Szrj if (*slot == NULL)
273*38fd1498Szrj {
274*38fd1498Szrj simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
275*38fd1498Szrj *p = data;
276*38fd1498Szrj *slot = p;
277*38fd1498Szrj }
278*38fd1498Szrj else if ((*slot)->simduid != ns->simduid)
279*38fd1498Szrj (*slot)->simduid = -1U;
280*38fd1498Szrj *walk_subtrees = 0;
281*38fd1498Szrj }
282*38fd1498Szrj return NULL_TREE;
283*38fd1498Szrj }
284*38fd1498Szrj
285*38fd1498Szrj /* Find "omp simd array" temporaries and map them to corresponding
286*38fd1498Szrj simduid. */
287*38fd1498Szrj
288*38fd1498Szrj static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab)289*38fd1498Szrj note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
290*38fd1498Szrj {
291*38fd1498Szrj basic_block bb;
292*38fd1498Szrj gimple_stmt_iterator gsi;
293*38fd1498Szrj struct walk_stmt_info wi;
294*38fd1498Szrj struct note_simd_array_uses_struct ns;
295*38fd1498Szrj
296*38fd1498Szrj memset (&wi, 0, sizeof (wi));
297*38fd1498Szrj wi.info = &ns;
298*38fd1498Szrj ns.htab = htab;
299*38fd1498Szrj
300*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
301*38fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
302*38fd1498Szrj {
303*38fd1498Szrj gimple *stmt = gsi_stmt (gsi);
304*38fd1498Szrj if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
305*38fd1498Szrj continue;
306*38fd1498Szrj switch (gimple_call_internal_fn (stmt))
307*38fd1498Szrj {
308*38fd1498Szrj case IFN_GOMP_SIMD_LANE:
309*38fd1498Szrj case IFN_GOMP_SIMD_VF:
310*38fd1498Szrj case IFN_GOMP_SIMD_LAST_LANE:
311*38fd1498Szrj break;
312*38fd1498Szrj default:
313*38fd1498Szrj continue;
314*38fd1498Szrj }
315*38fd1498Szrj tree lhs = gimple_call_lhs (stmt);
316*38fd1498Szrj if (lhs == NULL_TREE)
317*38fd1498Szrj continue;
318*38fd1498Szrj imm_use_iterator use_iter;
319*38fd1498Szrj gimple *use_stmt;
320*38fd1498Szrj ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
321*38fd1498Szrj FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
322*38fd1498Szrj if (!is_gimple_debug (use_stmt))
323*38fd1498Szrj walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
324*38fd1498Szrj }
325*38fd1498Szrj }
326*38fd1498Szrj
327*38fd1498Szrj /* Shrink arrays with "omp simd array" attribute to the corresponding
328*38fd1498Szrj vectorization factor. */
329*38fd1498Szrj
330*38fd1498Szrj static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)331*38fd1498Szrj shrink_simd_arrays
332*38fd1498Szrj (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
333*38fd1498Szrj hash_table<simduid_to_vf> *simduid_to_vf_htab)
334*38fd1498Szrj {
335*38fd1498Szrj for (hash_table<simd_array_to_simduid>::iterator iter
336*38fd1498Szrj = simd_array_to_simduid_htab->begin ();
337*38fd1498Szrj iter != simd_array_to_simduid_htab->end (); ++iter)
338*38fd1498Szrj if ((*iter)->simduid != -1U)
339*38fd1498Szrj {
340*38fd1498Szrj tree decl = (*iter)->decl;
341*38fd1498Szrj poly_uint64 vf = 1;
342*38fd1498Szrj if (simduid_to_vf_htab)
343*38fd1498Szrj {
344*38fd1498Szrj simduid_to_vf *p = NULL, data;
345*38fd1498Szrj data.simduid = (*iter)->simduid;
346*38fd1498Szrj p = simduid_to_vf_htab->find (&data);
347*38fd1498Szrj if (p)
348*38fd1498Szrj vf = p->vf;
349*38fd1498Szrj }
350*38fd1498Szrj tree atype
351*38fd1498Szrj = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
352*38fd1498Szrj TREE_TYPE (decl) = atype;
353*38fd1498Szrj relayout_decl (decl);
354*38fd1498Szrj }
355*38fd1498Szrj
356*38fd1498Szrj delete simd_array_to_simduid_htab;
357*38fd1498Szrj }
358*38fd1498Szrj
359*38fd1498Szrj /* Initialize the vec_info with kind KIND_IN and target cost data
360*38fd1498Szrj TARGET_COST_DATA_IN. */
361*38fd1498Szrj
vec_info(vec_info::vec_kind kind_in,void * target_cost_data_in)362*38fd1498Szrj vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in)
363*38fd1498Szrj : kind (kind_in),
364*38fd1498Szrj datarefs (vNULL),
365*38fd1498Szrj ddrs (vNULL),
366*38fd1498Szrj target_cost_data (target_cost_data_in)
367*38fd1498Szrj {
368*38fd1498Szrj }
369*38fd1498Szrj
~vec_info()370*38fd1498Szrj vec_info::~vec_info ()
371*38fd1498Szrj {
372*38fd1498Szrj slp_instance instance;
373*38fd1498Szrj struct data_reference *dr;
374*38fd1498Szrj unsigned int i;
375*38fd1498Szrj
376*38fd1498Szrj FOR_EACH_VEC_ELT (datarefs, i, dr)
377*38fd1498Szrj if (dr->aux)
378*38fd1498Szrj {
379*38fd1498Szrj free (dr->aux);
380*38fd1498Szrj dr->aux = NULL;
381*38fd1498Szrj }
382*38fd1498Szrj
383*38fd1498Szrj FOR_EACH_VEC_ELT (slp_instances, i, instance)
384*38fd1498Szrj vect_free_slp_instance (instance);
385*38fd1498Szrj
386*38fd1498Szrj free_data_refs (datarefs);
387*38fd1498Szrj free_dependence_relations (ddrs);
388*38fd1498Szrj destroy_cost_data (target_cost_data);
389*38fd1498Szrj }
390*38fd1498Szrj
391*38fd1498Szrj /* A helper function to free scev and LOOP niter information, as well as
392*38fd1498Szrj clear loop constraint LOOP_C_FINITE. */
393*38fd1498Szrj
394*38fd1498Szrj void
vect_free_loop_info_assumptions(struct loop * loop)395*38fd1498Szrj vect_free_loop_info_assumptions (struct loop *loop)
396*38fd1498Szrj {
397*38fd1498Szrj scev_reset_htab ();
398*38fd1498Szrj /* We need to explicitly reset upper bound information since they are
399*38fd1498Szrj used even after free_numbers_of_iterations_estimates. */
400*38fd1498Szrj loop->any_upper_bound = false;
401*38fd1498Szrj loop->any_likely_upper_bound = false;
402*38fd1498Szrj free_numbers_of_iterations_estimates (loop);
403*38fd1498Szrj loop_constraint_clear (loop, LOOP_C_FINITE);
404*38fd1498Szrj }
405*38fd1498Szrj
406*38fd1498Szrj /* Return whether STMT is inside the region we try to vectorize. */
407*38fd1498Szrj
408*38fd1498Szrj bool
vect_stmt_in_region_p(vec_info * vinfo,gimple * stmt)409*38fd1498Szrj vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt)
410*38fd1498Szrj {
411*38fd1498Szrj if (!gimple_bb (stmt))
412*38fd1498Szrj return false;
413*38fd1498Szrj
414*38fd1498Szrj if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
415*38fd1498Szrj {
416*38fd1498Szrj struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
417*38fd1498Szrj if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
418*38fd1498Szrj return false;
419*38fd1498Szrj }
420*38fd1498Szrj else
421*38fd1498Szrj {
422*38fd1498Szrj bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
423*38fd1498Szrj if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo)
424*38fd1498Szrj || gimple_uid (stmt) == -1U
425*38fd1498Szrj || gimple_code (stmt) == GIMPLE_PHI)
426*38fd1498Szrj return false;
427*38fd1498Szrj }
428*38fd1498Szrj
429*38fd1498Szrj return true;
430*38fd1498Szrj }
431*38fd1498Szrj
432*38fd1498Szrj
433*38fd1498Szrj /* If LOOP has been versioned during ifcvt, return the internal call
434*38fd1498Szrj guarding it. */
435*38fd1498Szrj
436*38fd1498Szrj static gimple *
vect_loop_vectorized_call(struct loop * loop)437*38fd1498Szrj vect_loop_vectorized_call (struct loop *loop)
438*38fd1498Szrj {
439*38fd1498Szrj basic_block bb = loop_preheader_edge (loop)->src;
440*38fd1498Szrj gimple *g;
441*38fd1498Szrj do
442*38fd1498Szrj {
443*38fd1498Szrj g = last_stmt (bb);
444*38fd1498Szrj if (g)
445*38fd1498Szrj break;
446*38fd1498Szrj if (!single_pred_p (bb))
447*38fd1498Szrj break;
448*38fd1498Szrj bb = single_pred (bb);
449*38fd1498Szrj }
450*38fd1498Szrj while (1);
451*38fd1498Szrj if (g && gimple_code (g) == GIMPLE_COND)
452*38fd1498Szrj {
453*38fd1498Szrj gimple_stmt_iterator gsi = gsi_for_stmt (g);
454*38fd1498Szrj gsi_prev (&gsi);
455*38fd1498Szrj if (!gsi_end_p (gsi))
456*38fd1498Szrj {
457*38fd1498Szrj g = gsi_stmt (gsi);
458*38fd1498Szrj if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
459*38fd1498Szrj && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
460*38fd1498Szrj || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
461*38fd1498Szrj return g;
462*38fd1498Szrj }
463*38fd1498Szrj }
464*38fd1498Szrj return NULL;
465*38fd1498Szrj }
466*38fd1498Szrj
467*38fd1498Szrj /* If LOOP has been versioned during loop distribution, return the gurading
468*38fd1498Szrj internal call. */
469*38fd1498Szrj
470*38fd1498Szrj static gimple *
vect_loop_dist_alias_call(struct loop * loop)471*38fd1498Szrj vect_loop_dist_alias_call (struct loop *loop)
472*38fd1498Szrj {
473*38fd1498Szrj basic_block bb;
474*38fd1498Szrj basic_block entry;
475*38fd1498Szrj struct loop *outer, *orig;
476*38fd1498Szrj gimple_stmt_iterator gsi;
477*38fd1498Szrj gimple *g;
478*38fd1498Szrj
479*38fd1498Szrj if (loop->orig_loop_num == 0)
480*38fd1498Szrj return NULL;
481*38fd1498Szrj
482*38fd1498Szrj orig = get_loop (cfun, loop->orig_loop_num);
483*38fd1498Szrj if (orig == NULL)
484*38fd1498Szrj {
485*38fd1498Szrj /* The original loop is somehow destroyed. Clear the information. */
486*38fd1498Szrj loop->orig_loop_num = 0;
487*38fd1498Szrj return NULL;
488*38fd1498Szrj }
489*38fd1498Szrj
490*38fd1498Szrj if (loop != orig)
491*38fd1498Szrj bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
492*38fd1498Szrj else
493*38fd1498Szrj bb = loop_preheader_edge (loop)->src;
494*38fd1498Szrj
495*38fd1498Szrj outer = bb->loop_father;
496*38fd1498Szrj entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
497*38fd1498Szrj
498*38fd1498Szrj /* Look upward in dominance tree. */
499*38fd1498Szrj for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
500*38fd1498Szrj bb = get_immediate_dominator (CDI_DOMINATORS, bb))
501*38fd1498Szrj {
502*38fd1498Szrj g = last_stmt (bb);
503*38fd1498Szrj if (g == NULL || gimple_code (g) != GIMPLE_COND)
504*38fd1498Szrj continue;
505*38fd1498Szrj
506*38fd1498Szrj gsi = gsi_for_stmt (g);
507*38fd1498Szrj gsi_prev (&gsi);
508*38fd1498Szrj if (gsi_end_p (gsi))
509*38fd1498Szrj continue;
510*38fd1498Szrj
511*38fd1498Szrj g = gsi_stmt (gsi);
512*38fd1498Szrj /* The guarding internal function call must have the same distribution
513*38fd1498Szrj alias id. */
514*38fd1498Szrj if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
515*38fd1498Szrj && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
516*38fd1498Szrj return g;
517*38fd1498Szrj }
518*38fd1498Szrj return NULL;
519*38fd1498Szrj }
520*38fd1498Szrj
521*38fd1498Szrj /* Set the uids of all the statements in basic blocks inside loop
522*38fd1498Szrj represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
523*38fd1498Szrj call guarding the loop which has been if converted. */
524*38fd1498Szrj static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call)525*38fd1498Szrj set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
526*38fd1498Szrj {
527*38fd1498Szrj tree arg = gimple_call_arg (loop_vectorized_call, 1);
528*38fd1498Szrj basic_block *bbs;
529*38fd1498Szrj unsigned int i;
530*38fd1498Szrj struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
531*38fd1498Szrj
532*38fd1498Szrj LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
533*38fd1498Szrj gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
534*38fd1498Szrj == loop_vectorized_call);
535*38fd1498Szrj /* If we are going to vectorize outer loop, prevent vectorization
536*38fd1498Szrj of the inner loop in the scalar loop - either the scalar loop is
537*38fd1498Szrj thrown away, so it is a wasted work, or is used only for
538*38fd1498Szrj a few iterations. */
539*38fd1498Szrj if (scalar_loop->inner)
540*38fd1498Szrj {
541*38fd1498Szrj gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
542*38fd1498Szrj if (g)
543*38fd1498Szrj {
544*38fd1498Szrj arg = gimple_call_arg (g, 0);
545*38fd1498Szrj get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
546*38fd1498Szrj fold_loop_internal_call (g, boolean_false_node);
547*38fd1498Szrj }
548*38fd1498Szrj }
549*38fd1498Szrj bbs = get_loop_body (scalar_loop);
550*38fd1498Szrj for (i = 0; i < scalar_loop->num_nodes; i++)
551*38fd1498Szrj {
552*38fd1498Szrj basic_block bb = bbs[i];
553*38fd1498Szrj gimple_stmt_iterator gsi;
554*38fd1498Szrj for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
555*38fd1498Szrj {
556*38fd1498Szrj gimple *phi = gsi_stmt (gsi);
557*38fd1498Szrj gimple_set_uid (phi, 0);
558*38fd1498Szrj }
559*38fd1498Szrj for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
560*38fd1498Szrj {
561*38fd1498Szrj gimple *stmt = gsi_stmt (gsi);
562*38fd1498Szrj gimple_set_uid (stmt, 0);
563*38fd1498Szrj }
564*38fd1498Szrj }
565*38fd1498Szrj free (bbs);
566*38fd1498Szrj }
567*38fd1498Szrj
568*38fd1498Szrj /* Function vectorize_loops.
569*38fd1498Szrj
570*38fd1498Szrj Entry point to loop vectorization phase. */
571*38fd1498Szrj
572*38fd1498Szrj unsigned
vectorize_loops(void)573*38fd1498Szrj vectorize_loops (void)
574*38fd1498Szrj {
575*38fd1498Szrj unsigned int i;
576*38fd1498Szrj unsigned int num_vectorized_loops = 0;
577*38fd1498Szrj unsigned int vect_loops_num;
578*38fd1498Szrj struct loop *loop;
579*38fd1498Szrj hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
580*38fd1498Szrj hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
581*38fd1498Szrj bool any_ifcvt_loops = false;
582*38fd1498Szrj unsigned ret = 0;
583*38fd1498Szrj struct loop *new_loop;
584*38fd1498Szrj
585*38fd1498Szrj vect_loops_num = number_of_loops (cfun);
586*38fd1498Szrj
587*38fd1498Szrj /* Bail out if there are no loops. */
588*38fd1498Szrj if (vect_loops_num <= 1)
589*38fd1498Szrj return 0;
590*38fd1498Szrj
591*38fd1498Szrj if (cfun->has_simduid_loops)
592*38fd1498Szrj note_simd_array_uses (&simd_array_to_simduid_htab);
593*38fd1498Szrj
594*38fd1498Szrj init_stmt_vec_info_vec ();
595*38fd1498Szrj
596*38fd1498Szrj /* ----------- Analyze loops. ----------- */
597*38fd1498Szrj
598*38fd1498Szrj /* If some loop was duplicated, it gets bigger number
599*38fd1498Szrj than all previously defined loops. This fact allows us to run
600*38fd1498Szrj only over initial loops skipping newly generated ones. */
601*38fd1498Szrj FOR_EACH_LOOP (loop, 0)
602*38fd1498Szrj if (loop->dont_vectorize)
603*38fd1498Szrj {
604*38fd1498Szrj any_ifcvt_loops = true;
605*38fd1498Szrj /* If-conversion sometimes versions both the outer loop
606*38fd1498Szrj (for the case when outer loop vectorization might be
607*38fd1498Szrj desirable) as well as the inner loop in the scalar version
608*38fd1498Szrj of the loop. So we have:
609*38fd1498Szrj if (LOOP_VECTORIZED (1, 3))
610*38fd1498Szrj {
611*38fd1498Szrj loop1
612*38fd1498Szrj loop2
613*38fd1498Szrj }
614*38fd1498Szrj else
615*38fd1498Szrj loop3 (copy of loop1)
616*38fd1498Szrj if (LOOP_VECTORIZED (4, 5))
617*38fd1498Szrj loop4 (copy of loop2)
618*38fd1498Szrj else
619*38fd1498Szrj loop5 (copy of loop4)
620*38fd1498Szrj If FOR_EACH_LOOP gives us loop3 first (which has
621*38fd1498Szrj dont_vectorize set), make sure to process loop1 before loop4;
622*38fd1498Szrj so that we can prevent vectorization of loop4 if loop1
623*38fd1498Szrj is successfully vectorized. */
624*38fd1498Szrj if (loop->inner)
625*38fd1498Szrj {
626*38fd1498Szrj gimple *loop_vectorized_call
627*38fd1498Szrj = vect_loop_vectorized_call (loop);
628*38fd1498Szrj if (loop_vectorized_call
629*38fd1498Szrj && vect_loop_vectorized_call (loop->inner))
630*38fd1498Szrj {
631*38fd1498Szrj tree arg = gimple_call_arg (loop_vectorized_call, 0);
632*38fd1498Szrj struct loop *vector_loop
633*38fd1498Szrj = get_loop (cfun, tree_to_shwi (arg));
634*38fd1498Szrj if (vector_loop && vector_loop != loop)
635*38fd1498Szrj {
636*38fd1498Szrj loop = vector_loop;
637*38fd1498Szrj /* Make sure we don't vectorize it twice. */
638*38fd1498Szrj loop->dont_vectorize = true;
639*38fd1498Szrj goto try_vectorize;
640*38fd1498Szrj }
641*38fd1498Szrj }
642*38fd1498Szrj }
643*38fd1498Szrj }
644*38fd1498Szrj else
645*38fd1498Szrj {
646*38fd1498Szrj loop_vec_info loop_vinfo, orig_loop_vinfo;
647*38fd1498Szrj gimple *loop_vectorized_call, *loop_dist_alias_call;
648*38fd1498Szrj try_vectorize:
649*38fd1498Szrj if (!((flag_tree_loop_vectorize
650*38fd1498Szrj && optimize_loop_nest_for_speed_p (loop))
651*38fd1498Szrj || loop->force_vectorize))
652*38fd1498Szrj continue;
653*38fd1498Szrj orig_loop_vinfo = NULL;
654*38fd1498Szrj loop_vectorized_call = vect_loop_vectorized_call (loop);
655*38fd1498Szrj loop_dist_alias_call = vect_loop_dist_alias_call (loop);
656*38fd1498Szrj vectorize_epilogue:
657*38fd1498Szrj vect_location = find_loop_location (loop);
658*38fd1498Szrj if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
659*38fd1498Szrj && dump_enabled_p ())
660*38fd1498Szrj dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n",
661*38fd1498Szrj LOCATION_FILE (vect_location),
662*38fd1498Szrj LOCATION_LINE (vect_location));
663*38fd1498Szrj
664*38fd1498Szrj loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo);
665*38fd1498Szrj loop->aux = loop_vinfo;
666*38fd1498Szrj
667*38fd1498Szrj if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
668*38fd1498Szrj {
669*38fd1498Szrj /* Free existing information if loop is analyzed with some
670*38fd1498Szrj assumptions. */
671*38fd1498Szrj if (loop_constraint_set_p (loop, LOOP_C_FINITE))
672*38fd1498Szrj vect_free_loop_info_assumptions (loop);
673*38fd1498Szrj
674*38fd1498Szrj /* If we applied if-conversion then try to vectorize the
675*38fd1498Szrj BB of innermost loops.
676*38fd1498Szrj ??? Ideally BB vectorization would learn to vectorize
677*38fd1498Szrj control flow by applying if-conversion on-the-fly, the
678*38fd1498Szrj following retains the if-converted loop body even when
679*38fd1498Szrj only non-if-converted parts took part in BB vectorization. */
680*38fd1498Szrj if (flag_tree_slp_vectorize != 0
681*38fd1498Szrj && loop_vectorized_call
682*38fd1498Szrj && ! loop->inner)
683*38fd1498Szrj {
684*38fd1498Szrj basic_block bb = loop->header;
685*38fd1498Szrj bool has_mask_load_store = false;
686*38fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
687*38fd1498Szrj !gsi_end_p (gsi); gsi_next (&gsi))
688*38fd1498Szrj {
689*38fd1498Szrj gimple *stmt = gsi_stmt (gsi);
690*38fd1498Szrj if (is_gimple_call (stmt)
691*38fd1498Szrj && gimple_call_internal_p (stmt)
692*38fd1498Szrj && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
693*38fd1498Szrj || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
694*38fd1498Szrj {
695*38fd1498Szrj has_mask_load_store = true;
696*38fd1498Szrj break;
697*38fd1498Szrj }
698*38fd1498Szrj gimple_set_uid (stmt, -1);
699*38fd1498Szrj gimple_set_visited (stmt, false);
700*38fd1498Szrj }
701*38fd1498Szrj if (! has_mask_load_store && vect_slp_bb (bb))
702*38fd1498Szrj {
703*38fd1498Szrj dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
704*38fd1498Szrj "basic block vectorized\n");
705*38fd1498Szrj fold_loop_internal_call (loop_vectorized_call,
706*38fd1498Szrj boolean_true_node);
707*38fd1498Szrj loop_vectorized_call = NULL;
708*38fd1498Szrj ret |= TODO_cleanup_cfg;
709*38fd1498Szrj }
710*38fd1498Szrj }
711*38fd1498Szrj /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
712*38fd1498Szrj loop, don't vectorize its inner loop; we'll attempt to
713*38fd1498Szrj vectorize LOOP_VECTORIZED guarded inner loop of the scalar
714*38fd1498Szrj loop version. */
715*38fd1498Szrj if (loop_vectorized_call && loop->inner)
716*38fd1498Szrj loop->inner->dont_vectorize = true;
717*38fd1498Szrj continue;
718*38fd1498Szrj }
719*38fd1498Szrj
720*38fd1498Szrj if (!dbg_cnt (vect_loop))
721*38fd1498Szrj {
722*38fd1498Szrj /* We may miss some if-converted loops due to
723*38fd1498Szrj debug counter. Set any_ifcvt_loops to visit
724*38fd1498Szrj them at finalization. */
725*38fd1498Szrj any_ifcvt_loops = true;
726*38fd1498Szrj /* Free existing information if loop is analyzed with some
727*38fd1498Szrj assumptions. */
728*38fd1498Szrj if (loop_constraint_set_p (loop, LOOP_C_FINITE))
729*38fd1498Szrj vect_free_loop_info_assumptions (loop);
730*38fd1498Szrj
731*38fd1498Szrj break;
732*38fd1498Szrj }
733*38fd1498Szrj
734*38fd1498Szrj if (loop_vectorized_call)
735*38fd1498Szrj set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
736*38fd1498Szrj if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
737*38fd1498Szrj && dump_enabled_p ())
738*38fd1498Szrj dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
739*38fd1498Szrj "loop vectorized\n");
740*38fd1498Szrj new_loop = vect_transform_loop (loop_vinfo);
741*38fd1498Szrj num_vectorized_loops++;
742*38fd1498Szrj /* Now that the loop has been vectorized, allow it to be unrolled
743*38fd1498Szrj etc. */
744*38fd1498Szrj loop->force_vectorize = false;
745*38fd1498Szrj
746*38fd1498Szrj if (loop->simduid)
747*38fd1498Szrj {
748*38fd1498Szrj simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
749*38fd1498Szrj if (!simduid_to_vf_htab)
750*38fd1498Szrj simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
751*38fd1498Szrj simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
752*38fd1498Szrj simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
753*38fd1498Szrj *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
754*38fd1498Szrj = simduid_to_vf_data;
755*38fd1498Szrj }
756*38fd1498Szrj
757*38fd1498Szrj if (loop_vectorized_call)
758*38fd1498Szrj {
759*38fd1498Szrj fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
760*38fd1498Szrj loop_vectorized_call = NULL;
761*38fd1498Szrj ret |= TODO_cleanup_cfg;
762*38fd1498Szrj }
763*38fd1498Szrj if (loop_dist_alias_call)
764*38fd1498Szrj {
765*38fd1498Szrj tree value = gimple_call_arg (loop_dist_alias_call, 1);
766*38fd1498Szrj fold_loop_internal_call (loop_dist_alias_call, value);
767*38fd1498Szrj loop_dist_alias_call = NULL;
768*38fd1498Szrj ret |= TODO_cleanup_cfg;
769*38fd1498Szrj }
770*38fd1498Szrj
771*38fd1498Szrj if (new_loop)
772*38fd1498Szrj {
773*38fd1498Szrj /* Epilogue of vectorized loop must be vectorized too. */
774*38fd1498Szrj vect_loops_num = number_of_loops (cfun);
775*38fd1498Szrj loop = new_loop;
776*38fd1498Szrj orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */
777*38fd1498Szrj goto vectorize_epilogue;
778*38fd1498Szrj }
779*38fd1498Szrj }
780*38fd1498Szrj
781*38fd1498Szrj vect_location = UNKNOWN_LOCATION;
782*38fd1498Szrj
783*38fd1498Szrj statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
784*38fd1498Szrj if (dump_enabled_p ()
785*38fd1498Szrj || (num_vectorized_loops > 0 && dump_enabled_p ()))
786*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
787*38fd1498Szrj "vectorized %u loops in function.\n",
788*38fd1498Szrj num_vectorized_loops);
789*38fd1498Szrj
790*38fd1498Szrj /* ----------- Finalize. ----------- */
791*38fd1498Szrj
792*38fd1498Szrj if (any_ifcvt_loops)
793*38fd1498Szrj for (i = 1; i < vect_loops_num; i++)
794*38fd1498Szrj {
795*38fd1498Szrj loop = get_loop (cfun, i);
796*38fd1498Szrj if (loop && loop->dont_vectorize)
797*38fd1498Szrj {
798*38fd1498Szrj gimple *g = vect_loop_vectorized_call (loop);
799*38fd1498Szrj if (g)
800*38fd1498Szrj {
801*38fd1498Szrj fold_loop_internal_call (g, boolean_false_node);
802*38fd1498Szrj ret |= TODO_cleanup_cfg;
803*38fd1498Szrj g = NULL;
804*38fd1498Szrj }
805*38fd1498Szrj else
806*38fd1498Szrj g = vect_loop_dist_alias_call (loop);
807*38fd1498Szrj
808*38fd1498Szrj if (g)
809*38fd1498Szrj {
810*38fd1498Szrj fold_loop_internal_call (g, boolean_false_node);
811*38fd1498Szrj ret |= TODO_cleanup_cfg;
812*38fd1498Szrj }
813*38fd1498Szrj }
814*38fd1498Szrj }
815*38fd1498Szrj
816*38fd1498Szrj for (i = 1; i < vect_loops_num; i++)
817*38fd1498Szrj {
818*38fd1498Szrj loop_vec_info loop_vinfo;
819*38fd1498Szrj bool has_mask_store;
820*38fd1498Szrj
821*38fd1498Szrj loop = get_loop (cfun, i);
822*38fd1498Szrj if (!loop)
823*38fd1498Szrj continue;
824*38fd1498Szrj loop_vinfo = (loop_vec_info) loop->aux;
825*38fd1498Szrj has_mask_store = false;
826*38fd1498Szrj if (loop_vinfo)
827*38fd1498Szrj has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
828*38fd1498Szrj delete loop_vinfo;
829*38fd1498Szrj if (has_mask_store
830*38fd1498Szrj && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
831*38fd1498Szrj optimize_mask_stores (loop);
832*38fd1498Szrj loop->aux = NULL;
833*38fd1498Szrj }
834*38fd1498Szrj
835*38fd1498Szrj free_stmt_vec_info_vec ();
836*38fd1498Szrj
837*38fd1498Szrj /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
838*38fd1498Szrj if (cfun->has_simduid_loops)
839*38fd1498Szrj adjust_simduid_builtins (simduid_to_vf_htab);
840*38fd1498Szrj
841*38fd1498Szrj /* Shrink any "omp array simd" temporary arrays to the
842*38fd1498Szrj actual vectorization factors. */
843*38fd1498Szrj if (simd_array_to_simduid_htab)
844*38fd1498Szrj shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
845*38fd1498Szrj delete simduid_to_vf_htab;
846*38fd1498Szrj cfun->has_simduid_loops = false;
847*38fd1498Szrj
848*38fd1498Szrj if (num_vectorized_loops > 0)
849*38fd1498Szrj {
850*38fd1498Szrj /* If we vectorized any loop only virtual SSA form needs to be updated.
851*38fd1498Szrj ??? Also while we try hard to update loop-closed SSA form we fail
852*38fd1498Szrj to properly do this in some corner-cases (see PR56286). */
853*38fd1498Szrj rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
854*38fd1498Szrj return TODO_cleanup_cfg;
855*38fd1498Szrj }
856*38fd1498Szrj
857*38fd1498Szrj return ret;
858*38fd1498Szrj }
859*38fd1498Szrj
860*38fd1498Szrj
861*38fd1498Szrj /* Entry point to the simduid cleanup pass. */
862*38fd1498Szrj
863*38fd1498Szrj namespace {
864*38fd1498Szrj
865*38fd1498Szrj const pass_data pass_data_simduid_cleanup =
866*38fd1498Szrj {
867*38fd1498Szrj GIMPLE_PASS, /* type */
868*38fd1498Szrj "simduid", /* name */
869*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
870*38fd1498Szrj TV_NONE, /* tv_id */
871*38fd1498Szrj ( PROP_ssa | PROP_cfg ), /* properties_required */
872*38fd1498Szrj 0, /* properties_provided */
873*38fd1498Szrj 0, /* properties_destroyed */
874*38fd1498Szrj 0, /* todo_flags_start */
875*38fd1498Szrj 0, /* todo_flags_finish */
876*38fd1498Szrj };
877*38fd1498Szrj
878*38fd1498Szrj class pass_simduid_cleanup : public gimple_opt_pass
879*38fd1498Szrj {
880*38fd1498Szrj public:
pass_simduid_cleanup(gcc::context * ctxt)881*38fd1498Szrj pass_simduid_cleanup (gcc::context *ctxt)
882*38fd1498Szrj : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
883*38fd1498Szrj {}
884*38fd1498Szrj
885*38fd1498Szrj /* opt_pass methods: */
clone()886*38fd1498Szrj opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)887*38fd1498Szrj virtual bool gate (function *fun) { return fun->has_simduid_loops; }
888*38fd1498Szrj virtual unsigned int execute (function *);
889*38fd1498Szrj
890*38fd1498Szrj }; // class pass_simduid_cleanup
891*38fd1498Szrj
892*38fd1498Szrj unsigned int
execute(function * fun)893*38fd1498Szrj pass_simduid_cleanup::execute (function *fun)
894*38fd1498Szrj {
895*38fd1498Szrj hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
896*38fd1498Szrj
897*38fd1498Szrj note_simd_array_uses (&simd_array_to_simduid_htab);
898*38fd1498Szrj
899*38fd1498Szrj /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
900*38fd1498Szrj adjust_simduid_builtins (NULL);
901*38fd1498Szrj
902*38fd1498Szrj /* Shrink any "omp array simd" temporary arrays to the
903*38fd1498Szrj actual vectorization factors. */
904*38fd1498Szrj if (simd_array_to_simduid_htab)
905*38fd1498Szrj shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
906*38fd1498Szrj fun->has_simduid_loops = false;
907*38fd1498Szrj return 0;
908*38fd1498Szrj }
909*38fd1498Szrj
910*38fd1498Szrj } // anon namespace
911*38fd1498Szrj
912*38fd1498Szrj gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)913*38fd1498Szrj make_pass_simduid_cleanup (gcc::context *ctxt)
914*38fd1498Szrj {
915*38fd1498Szrj return new pass_simduid_cleanup (ctxt);
916*38fd1498Szrj }
917*38fd1498Szrj
918*38fd1498Szrj
919*38fd1498Szrj /* Entry point to basic block SLP phase. */
920*38fd1498Szrj
921*38fd1498Szrj namespace {
922*38fd1498Szrj
923*38fd1498Szrj const pass_data pass_data_slp_vectorize =
924*38fd1498Szrj {
925*38fd1498Szrj GIMPLE_PASS, /* type */
926*38fd1498Szrj "slp", /* name */
927*38fd1498Szrj OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
928*38fd1498Szrj TV_TREE_SLP_VECTORIZATION, /* tv_id */
929*38fd1498Szrj ( PROP_ssa | PROP_cfg ), /* properties_required */
930*38fd1498Szrj 0, /* properties_provided */
931*38fd1498Szrj 0, /* properties_destroyed */
932*38fd1498Szrj 0, /* todo_flags_start */
933*38fd1498Szrj TODO_update_ssa, /* todo_flags_finish */
934*38fd1498Szrj };
935*38fd1498Szrj
936*38fd1498Szrj class pass_slp_vectorize : public gimple_opt_pass
937*38fd1498Szrj {
938*38fd1498Szrj public:
pass_slp_vectorize(gcc::context * ctxt)939*38fd1498Szrj pass_slp_vectorize (gcc::context *ctxt)
940*38fd1498Szrj : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
941*38fd1498Szrj {}
942*38fd1498Szrj
943*38fd1498Szrj /* opt_pass methods: */
clone()944*38fd1498Szrj opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)945*38fd1498Szrj virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
946*38fd1498Szrj virtual unsigned int execute (function *);
947*38fd1498Szrj
948*38fd1498Szrj }; // class pass_slp_vectorize
949*38fd1498Szrj
950*38fd1498Szrj unsigned int
execute(function * fun)951*38fd1498Szrj pass_slp_vectorize::execute (function *fun)
952*38fd1498Szrj {
953*38fd1498Szrj basic_block bb;
954*38fd1498Szrj
955*38fd1498Szrj bool in_loop_pipeline = scev_initialized_p ();
956*38fd1498Szrj if (!in_loop_pipeline)
957*38fd1498Szrj {
958*38fd1498Szrj loop_optimizer_init (LOOPS_NORMAL);
959*38fd1498Szrj scev_initialize ();
960*38fd1498Szrj }
961*38fd1498Szrj
962*38fd1498Szrj /* Mark all stmts as not belonging to the current region and unvisited. */
963*38fd1498Szrj FOR_EACH_BB_FN (bb, fun)
964*38fd1498Szrj {
965*38fd1498Szrj for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
966*38fd1498Szrj gsi_next (&gsi))
967*38fd1498Szrj {
968*38fd1498Szrj gimple *stmt = gsi_stmt (gsi);
969*38fd1498Szrj gimple_set_uid (stmt, -1);
970*38fd1498Szrj gimple_set_visited (stmt, false);
971*38fd1498Szrj }
972*38fd1498Szrj }
973*38fd1498Szrj
974*38fd1498Szrj init_stmt_vec_info_vec ();
975*38fd1498Szrj
976*38fd1498Szrj FOR_EACH_BB_FN (bb, fun)
977*38fd1498Szrj {
978*38fd1498Szrj if (vect_slp_bb (bb))
979*38fd1498Szrj dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
980*38fd1498Szrj "basic block vectorized\n");
981*38fd1498Szrj }
982*38fd1498Szrj
983*38fd1498Szrj free_stmt_vec_info_vec ();
984*38fd1498Szrj
985*38fd1498Szrj if (!in_loop_pipeline)
986*38fd1498Szrj {
987*38fd1498Szrj scev_finalize ();
988*38fd1498Szrj loop_optimizer_finalize ();
989*38fd1498Szrj }
990*38fd1498Szrj
991*38fd1498Szrj return 0;
992*38fd1498Szrj }
993*38fd1498Szrj
994*38fd1498Szrj } // anon namespace
995*38fd1498Szrj
996*38fd1498Szrj gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)997*38fd1498Szrj make_pass_slp_vectorize (gcc::context *ctxt)
998*38fd1498Szrj {
999*38fd1498Szrj return new pass_slp_vectorize (ctxt);
1000*38fd1498Szrj }
1001*38fd1498Szrj
1002*38fd1498Szrj
1003*38fd1498Szrj /* Increase alignment of global arrays to improve vectorization potential.
1004*38fd1498Szrj TODO:
1005*38fd1498Szrj - Consider also structs that have an array field.
1006*38fd1498Szrj - Use ipa analysis to prune arrays that can't be vectorized?
1007*38fd1498Szrj This should involve global alignment analysis and in the future also
1008*38fd1498Szrj array padding. */
1009*38fd1498Szrj
1010*38fd1498Szrj static unsigned get_vec_alignment_for_type (tree);
1011*38fd1498Szrj static hash_map<tree, unsigned> *type_align_map;
1012*38fd1498Szrj
1013*38fd1498Szrj /* Return alignment of array's vector type corresponding to scalar type.
1014*38fd1498Szrj 0 if no vector type exists. */
1015*38fd1498Szrj static unsigned
get_vec_alignment_for_array_type(tree type)1016*38fd1498Szrj get_vec_alignment_for_array_type (tree type)
1017*38fd1498Szrj {
1018*38fd1498Szrj gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1019*38fd1498Szrj poly_uint64 array_size, vector_size;
1020*38fd1498Szrj
1021*38fd1498Szrj tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
1022*38fd1498Szrj if (!vectype
1023*38fd1498Szrj || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1024*38fd1498Szrj || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1025*38fd1498Szrj || maybe_lt (array_size, vector_size))
1026*38fd1498Szrj return 0;
1027*38fd1498Szrj
1028*38fd1498Szrj return TYPE_ALIGN (vectype);
1029*38fd1498Szrj }
1030*38fd1498Szrj
1031*38fd1498Szrj /* Return alignment of field having maximum alignment of vector type
1032*38fd1498Szrj corresponding to it's scalar type. For now, we only consider fields whose
1033*38fd1498Szrj offset is a multiple of it's vector alignment.
1034*38fd1498Szrj 0 if no suitable field is found. */
1035*38fd1498Szrj static unsigned
get_vec_alignment_for_record_type(tree type)1036*38fd1498Szrj get_vec_alignment_for_record_type (tree type)
1037*38fd1498Szrj {
1038*38fd1498Szrj gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1039*38fd1498Szrj
1040*38fd1498Szrj unsigned max_align = 0, alignment;
1041*38fd1498Szrj HOST_WIDE_INT offset;
1042*38fd1498Szrj tree offset_tree;
1043*38fd1498Szrj
1044*38fd1498Szrj if (TYPE_PACKED (type))
1045*38fd1498Szrj return 0;
1046*38fd1498Szrj
1047*38fd1498Szrj unsigned *slot = type_align_map->get (type);
1048*38fd1498Szrj if (slot)
1049*38fd1498Szrj return *slot;
1050*38fd1498Szrj
1051*38fd1498Szrj for (tree field = first_field (type);
1052*38fd1498Szrj field != NULL_TREE;
1053*38fd1498Szrj field = DECL_CHAIN (field))
1054*38fd1498Szrj {
1055*38fd1498Szrj /* Skip if not FIELD_DECL or if alignment is set by user. */
1056*38fd1498Szrj if (TREE_CODE (field) != FIELD_DECL
1057*38fd1498Szrj || DECL_USER_ALIGN (field)
1058*38fd1498Szrj || DECL_ARTIFICIAL (field))
1059*38fd1498Szrj continue;
1060*38fd1498Szrj
1061*38fd1498Szrj /* We don't need to process the type further if offset is variable,
1062*38fd1498Szrj since the offsets of remaining members will also be variable. */
1063*38fd1498Szrj if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1064*38fd1498Szrj || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1065*38fd1498Szrj break;
1066*38fd1498Szrj
1067*38fd1498Szrj /* Similarly stop processing the type if offset_tree
1068*38fd1498Szrj does not fit in unsigned HOST_WIDE_INT. */
1069*38fd1498Szrj offset_tree = bit_position (field);
1070*38fd1498Szrj if (!tree_fits_uhwi_p (offset_tree))
1071*38fd1498Szrj break;
1072*38fd1498Szrj
1073*38fd1498Szrj offset = tree_to_uhwi (offset_tree);
1074*38fd1498Szrj alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1075*38fd1498Szrj
1076*38fd1498Szrj /* Get maximum alignment of vectorized field/array among those members
1077*38fd1498Szrj whose offset is multiple of the vector alignment. */
1078*38fd1498Szrj if (alignment
1079*38fd1498Szrj && (offset % alignment == 0)
1080*38fd1498Szrj && (alignment > max_align))
1081*38fd1498Szrj max_align = alignment;
1082*38fd1498Szrj }
1083*38fd1498Szrj
1084*38fd1498Szrj type_align_map->put (type, max_align);
1085*38fd1498Szrj return max_align;
1086*38fd1498Szrj }
1087*38fd1498Szrj
1088*38fd1498Szrj /* Return alignment of vector type corresponding to decl's scalar type
1089*38fd1498Szrj or 0 if it doesn't exist or the vector alignment is lesser than
1090*38fd1498Szrj decl's alignment. */
1091*38fd1498Szrj static unsigned
get_vec_alignment_for_type(tree type)1092*38fd1498Szrj get_vec_alignment_for_type (tree type)
1093*38fd1498Szrj {
1094*38fd1498Szrj if (type == NULL_TREE)
1095*38fd1498Szrj return 0;
1096*38fd1498Szrj
1097*38fd1498Szrj gcc_assert (TYPE_P (type));
1098*38fd1498Szrj
1099*38fd1498Szrj static unsigned alignment = 0;
1100*38fd1498Szrj switch (TREE_CODE (type))
1101*38fd1498Szrj {
1102*38fd1498Szrj case ARRAY_TYPE:
1103*38fd1498Szrj alignment = get_vec_alignment_for_array_type (type);
1104*38fd1498Szrj break;
1105*38fd1498Szrj case RECORD_TYPE:
1106*38fd1498Szrj alignment = get_vec_alignment_for_record_type (type);
1107*38fd1498Szrj break;
1108*38fd1498Szrj default:
1109*38fd1498Szrj alignment = 0;
1110*38fd1498Szrj break;
1111*38fd1498Szrj }
1112*38fd1498Szrj
1113*38fd1498Szrj return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1114*38fd1498Szrj }
1115*38fd1498Szrj
1116*38fd1498Szrj /* Entry point to increase_alignment pass. */
1117*38fd1498Szrj static unsigned int
increase_alignment(void)1118*38fd1498Szrj increase_alignment (void)
1119*38fd1498Szrj {
1120*38fd1498Szrj varpool_node *vnode;
1121*38fd1498Szrj
1122*38fd1498Szrj vect_location = UNKNOWN_LOCATION;
1123*38fd1498Szrj type_align_map = new hash_map<tree, unsigned>;
1124*38fd1498Szrj
1125*38fd1498Szrj /* Increase the alignment of all global arrays for vectorization. */
1126*38fd1498Szrj FOR_EACH_DEFINED_VARIABLE (vnode)
1127*38fd1498Szrj {
1128*38fd1498Szrj tree decl = vnode->decl;
1129*38fd1498Szrj unsigned int alignment;
1130*38fd1498Szrj
1131*38fd1498Szrj if ((decl_in_symtab_p (decl)
1132*38fd1498Szrj && !symtab_node::get (decl)->can_increase_alignment_p ())
1133*38fd1498Szrj || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1134*38fd1498Szrj continue;
1135*38fd1498Szrj
1136*38fd1498Szrj alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1137*38fd1498Szrj if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1138*38fd1498Szrj {
1139*38fd1498Szrj vnode->increase_alignment (alignment);
1140*38fd1498Szrj dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
1141*38fd1498Szrj dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
1142*38fd1498Szrj dump_printf (MSG_NOTE, "\n");
1143*38fd1498Szrj }
1144*38fd1498Szrj }
1145*38fd1498Szrj
1146*38fd1498Szrj delete type_align_map;
1147*38fd1498Szrj return 0;
1148*38fd1498Szrj }
1149*38fd1498Szrj
1150*38fd1498Szrj
1151*38fd1498Szrj namespace {
1152*38fd1498Szrj
1153*38fd1498Szrj const pass_data pass_data_ipa_increase_alignment =
1154*38fd1498Szrj {
1155*38fd1498Szrj SIMPLE_IPA_PASS, /* type */
1156*38fd1498Szrj "increase_alignment", /* name */
1157*38fd1498Szrj OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1158*38fd1498Szrj TV_IPA_OPT, /* tv_id */
1159*38fd1498Szrj 0, /* properties_required */
1160*38fd1498Szrj 0, /* properties_provided */
1161*38fd1498Szrj 0, /* properties_destroyed */
1162*38fd1498Szrj 0, /* todo_flags_start */
1163*38fd1498Szrj 0, /* todo_flags_finish */
1164*38fd1498Szrj };
1165*38fd1498Szrj
1166*38fd1498Szrj class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1167*38fd1498Szrj {
1168*38fd1498Szrj public:
pass_ipa_increase_alignment(gcc::context * ctxt)1169*38fd1498Szrj pass_ipa_increase_alignment (gcc::context *ctxt)
1170*38fd1498Szrj : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1171*38fd1498Szrj {}
1172*38fd1498Szrj
1173*38fd1498Szrj /* opt_pass methods: */
gate(function *)1174*38fd1498Szrj virtual bool gate (function *)
1175*38fd1498Szrj {
1176*38fd1498Szrj return flag_section_anchors && flag_tree_loop_vectorize;
1177*38fd1498Szrj }
1178*38fd1498Szrj
execute(function *)1179*38fd1498Szrj virtual unsigned int execute (function *) { return increase_alignment (); }
1180*38fd1498Szrj
1181*38fd1498Szrj }; // class pass_ipa_increase_alignment
1182*38fd1498Szrj
1183*38fd1498Szrj } // anon namespace
1184*38fd1498Szrj
1185*38fd1498Szrj simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)1186*38fd1498Szrj make_pass_ipa_increase_alignment (gcc::context *ctxt)
1187*38fd1498Szrj {
1188*38fd1498Szrj return new pass_ipa_increase_alignment (ctxt);
1189*38fd1498Szrj }
1190