1*e4b17023SJohn Marino /* Vectorizer
2*e4b17023SJohn Marino Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3*e4b17023SJohn Marino Free Software Foundation, Inc.
4*e4b17023SJohn Marino Contributed by Dorit Naishlos <dorit@il.ibm.com>
5*e4b17023SJohn Marino
6*e4b17023SJohn Marino This file is part of GCC.
7*e4b17023SJohn Marino
8*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
9*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
10*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
11*e4b17023SJohn Marino version.
12*e4b17023SJohn Marino
13*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
15*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16*e4b17023SJohn Marino for more details.
17*e4b17023SJohn Marino
18*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
19*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
20*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
21*e4b17023SJohn Marino
22*e4b17023SJohn Marino /* Loop and basic block vectorizer.
23*e4b17023SJohn Marino
24*e4b17023SJohn Marino This file contains drivers for the three vectorizers:
25*e4b17023SJohn Marino (1) loop vectorizer (inter-iteration parallelism),
26*e4b17023SJohn Marino (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
27*e4b17023SJohn Marino vectorizer)
28*e4b17023SJohn Marino (3) BB vectorizer (out-of-loops), aka SLP
29*e4b17023SJohn Marino
30*e4b17023SJohn Marino The rest of the vectorizer's code is organized as follows:
31*e4b17023SJohn Marino - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
32*e4b17023SJohn Marino used by drivers (1) and (2).
33*e4b17023SJohn Marino - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
34*e4b17023SJohn Marino drivers (1) and (2).
35*e4b17023SJohn Marino - tree-vect-slp.c - BB vectorization specific analysis and transformation,
36*e4b17023SJohn Marino used by drivers (2) and (3).
37*e4b17023SJohn Marino - tree-vect-stmts.c - statements analysis and transformation (used by all).
38*e4b17023SJohn Marino - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
39*e4b17023SJohn Marino manipulations (used by all).
40*e4b17023SJohn Marino - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
41*e4b17023SJohn Marino
42*e4b17023SJohn Marino Here's a poor attempt at illustrating that:
43*e4b17023SJohn Marino
44*e4b17023SJohn Marino tree-vectorizer.c:
45*e4b17023SJohn Marino loop_vect() loop_aware_slp() slp_vect()
46*e4b17023SJohn Marino | / \ /
47*e4b17023SJohn Marino | / \ /
48*e4b17023SJohn Marino tree-vect-loop.c tree-vect-slp.c
49*e4b17023SJohn Marino | \ \ / / |
50*e4b17023SJohn Marino | \ \/ / |
51*e4b17023SJohn Marino | \ /\ / |
52*e4b17023SJohn Marino | \ / \ / |
53*e4b17023SJohn Marino tree-vect-stmts.c tree-vect-data-refs.c
54*e4b17023SJohn Marino \ /
55*e4b17023SJohn Marino tree-vect-patterns.c
56*e4b17023SJohn Marino */
57*e4b17023SJohn Marino
58*e4b17023SJohn Marino #include "config.h"
59*e4b17023SJohn Marino #include "system.h"
60*e4b17023SJohn Marino #include "coretypes.h"
61*e4b17023SJohn Marino #include "tm.h"
62*e4b17023SJohn Marino #include "ggc.h"
63*e4b17023SJohn Marino #include "tree.h"
64*e4b17023SJohn Marino #include "tree-pretty-print.h"
65*e4b17023SJohn Marino #include "tree-flow.h"
66*e4b17023SJohn Marino #include "tree-dump.h"
67*e4b17023SJohn Marino #include "cfgloop.h"
68*e4b17023SJohn Marino #include "cfglayout.h"
69*e4b17023SJohn Marino #include "tree-vectorizer.h"
70*e4b17023SJohn Marino #include "tree-pass.h"
71*e4b17023SJohn Marino #include "timevar.h"
72*e4b17023SJohn Marino
73*e4b17023SJohn Marino /* vect_dump will be set to stderr or dump_file if exist. */
74*e4b17023SJohn Marino FILE *vect_dump;
75*e4b17023SJohn Marino
76*e4b17023SJohn Marino /* vect_verbosity_level set to an invalid value
77*e4b17023SJohn Marino to mark that it's uninitialized. */
78*e4b17023SJohn Marino static enum vect_verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
79*e4b17023SJohn Marino
80*e4b17023SJohn Marino /* Loop or bb location. */
81*e4b17023SJohn Marino LOC vect_location;
82*e4b17023SJohn Marino
83*e4b17023SJohn Marino /* Vector mapping GIMPLE stmt to stmt_vec_info. */
VEC(vec_void_p,heap)84*e4b17023SJohn Marino VEC(vec_void_p,heap) *stmt_vec_info_vec;
85*e4b17023SJohn Marino
86*e4b17023SJohn Marino
87*e4b17023SJohn Marino
88*e4b17023SJohn Marino /* Function vect_set_dump_settings.
89*e4b17023SJohn Marino
90*e4b17023SJohn Marino Fix the verbosity level of the vectorizer if the
91*e4b17023SJohn Marino requested level was not set explicitly using the flag
92*e4b17023SJohn Marino -ftree-vectorizer-verbose=N.
93*e4b17023SJohn Marino Decide where to print the debugging information (dump_file/stderr).
94*e4b17023SJohn Marino If the user defined the verbosity level, but there is no dump file,
95*e4b17023SJohn Marino print to stderr, otherwise print to the dump file. */
96*e4b17023SJohn Marino
97*e4b17023SJohn Marino static void
98*e4b17023SJohn Marino vect_set_dump_settings (bool slp)
99*e4b17023SJohn Marino {
100*e4b17023SJohn Marino vect_dump = dump_file;
101*e4b17023SJohn Marino
102*e4b17023SJohn Marino /* Check if the verbosity level was defined by the user: */
103*e4b17023SJohn Marino if (user_vect_verbosity_level != MAX_VERBOSITY_LEVEL)
104*e4b17023SJohn Marino {
105*e4b17023SJohn Marino vect_verbosity_level = user_vect_verbosity_level;
106*e4b17023SJohn Marino /* Ignore user defined verbosity if dump flags require higher level of
107*e4b17023SJohn Marino verbosity. */
108*e4b17023SJohn Marino if (dump_file)
109*e4b17023SJohn Marino {
110*e4b17023SJohn Marino if (((dump_flags & TDF_DETAILS)
111*e4b17023SJohn Marino && vect_verbosity_level >= REPORT_DETAILS)
112*e4b17023SJohn Marino || ((dump_flags & TDF_STATS)
113*e4b17023SJohn Marino && vect_verbosity_level >= REPORT_UNVECTORIZED_LOCATIONS))
114*e4b17023SJohn Marino return;
115*e4b17023SJohn Marino }
116*e4b17023SJohn Marino else
117*e4b17023SJohn Marino {
118*e4b17023SJohn Marino /* If there is no dump file, print to stderr in case of loop
119*e4b17023SJohn Marino vectorization. */
120*e4b17023SJohn Marino if (!slp)
121*e4b17023SJohn Marino vect_dump = stderr;
122*e4b17023SJohn Marino
123*e4b17023SJohn Marino return;
124*e4b17023SJohn Marino }
125*e4b17023SJohn Marino }
126*e4b17023SJohn Marino
127*e4b17023SJohn Marino /* User didn't specify verbosity level: */
128*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
129*e4b17023SJohn Marino vect_verbosity_level = REPORT_DETAILS;
130*e4b17023SJohn Marino else if (dump_file && (dump_flags & TDF_STATS))
131*e4b17023SJohn Marino vect_verbosity_level = REPORT_UNVECTORIZED_LOCATIONS;
132*e4b17023SJohn Marino else
133*e4b17023SJohn Marino vect_verbosity_level = REPORT_NONE;
134*e4b17023SJohn Marino
135*e4b17023SJohn Marino gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
136*e4b17023SJohn Marino }
137*e4b17023SJohn Marino
138*e4b17023SJohn Marino
139*e4b17023SJohn Marino /* Function debug_loop_details.
140*e4b17023SJohn Marino
141*e4b17023SJohn Marino For vectorization debug dumps. */
142*e4b17023SJohn Marino
143*e4b17023SJohn Marino bool
vect_print_dump_info(enum vect_verbosity_levels vl)144*e4b17023SJohn Marino vect_print_dump_info (enum vect_verbosity_levels vl)
145*e4b17023SJohn Marino {
146*e4b17023SJohn Marino if (vl > vect_verbosity_level)
147*e4b17023SJohn Marino return false;
148*e4b17023SJohn Marino
149*e4b17023SJohn Marino if (!current_function_decl || !vect_dump)
150*e4b17023SJohn Marino return false;
151*e4b17023SJohn Marino
152*e4b17023SJohn Marino if (vect_location == UNKNOWN_LOC)
153*e4b17023SJohn Marino fprintf (vect_dump, "\n%s:%d: note: ",
154*e4b17023SJohn Marino DECL_SOURCE_FILE (current_function_decl),
155*e4b17023SJohn Marino DECL_SOURCE_LINE (current_function_decl));
156*e4b17023SJohn Marino else
157*e4b17023SJohn Marino fprintf (vect_dump, "\n%d: ", LOC_LINE (vect_location));
158*e4b17023SJohn Marino
159*e4b17023SJohn Marino return true;
160*e4b17023SJohn Marino }
161*e4b17023SJohn Marino
162*e4b17023SJohn Marino
163*e4b17023SJohn Marino /* Function vectorize_loops.
164*e4b17023SJohn Marino
165*e4b17023SJohn Marino Entry point to loop vectorization phase. */
166*e4b17023SJohn Marino
167*e4b17023SJohn Marino unsigned
vectorize_loops(void)168*e4b17023SJohn Marino vectorize_loops (void)
169*e4b17023SJohn Marino {
170*e4b17023SJohn Marino unsigned int i;
171*e4b17023SJohn Marino unsigned int num_vectorized_loops = 0;
172*e4b17023SJohn Marino unsigned int vect_loops_num;
173*e4b17023SJohn Marino loop_iterator li;
174*e4b17023SJohn Marino struct loop *loop;
175*e4b17023SJohn Marino
176*e4b17023SJohn Marino vect_loops_num = number_of_loops ();
177*e4b17023SJohn Marino
178*e4b17023SJohn Marino /* Bail out if there are no loops. */
179*e4b17023SJohn Marino if (vect_loops_num <= 1)
180*e4b17023SJohn Marino return 0;
181*e4b17023SJohn Marino
182*e4b17023SJohn Marino /* Fix the verbosity level if not defined explicitly by the user. */
183*e4b17023SJohn Marino vect_set_dump_settings (false);
184*e4b17023SJohn Marino
185*e4b17023SJohn Marino init_stmt_vec_info_vec ();
186*e4b17023SJohn Marino
187*e4b17023SJohn Marino /* ----------- Analyze loops. ----------- */
188*e4b17023SJohn Marino
189*e4b17023SJohn Marino /* If some loop was duplicated, it gets bigger number
190*e4b17023SJohn Marino than all previously defined loops. This fact allows us to run
191*e4b17023SJohn Marino only over initial loops skipping newly generated ones. */
192*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, 0)
193*e4b17023SJohn Marino if (optimize_loop_nest_for_speed_p (loop))
194*e4b17023SJohn Marino {
195*e4b17023SJohn Marino loop_vec_info loop_vinfo;
196*e4b17023SJohn Marino
197*e4b17023SJohn Marino vect_location = find_loop_location (loop);
198*e4b17023SJohn Marino if (vect_location != UNKNOWN_LOC
199*e4b17023SJohn Marino && vect_verbosity_level > REPORT_NONE)
200*e4b17023SJohn Marino fprintf (vect_dump, "\nAnalyzing loop at %s:%d\n",
201*e4b17023SJohn Marino LOC_FILE (vect_location), LOC_LINE (vect_location));
202*e4b17023SJohn Marino
203*e4b17023SJohn Marino loop_vinfo = vect_analyze_loop (loop);
204*e4b17023SJohn Marino loop->aux = loop_vinfo;
205*e4b17023SJohn Marino
206*e4b17023SJohn Marino if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
207*e4b17023SJohn Marino continue;
208*e4b17023SJohn Marino
209*e4b17023SJohn Marino if (vect_location != UNKNOWN_LOC
210*e4b17023SJohn Marino && vect_verbosity_level > REPORT_NONE)
211*e4b17023SJohn Marino fprintf (vect_dump, "\n\nVectorizing loop at %s:%d\n",
212*e4b17023SJohn Marino LOC_FILE (vect_location), LOC_LINE (vect_location));
213*e4b17023SJohn Marino
214*e4b17023SJohn Marino vect_transform_loop (loop_vinfo);
215*e4b17023SJohn Marino num_vectorized_loops++;
216*e4b17023SJohn Marino }
217*e4b17023SJohn Marino
218*e4b17023SJohn Marino vect_location = UNKNOWN_LOC;
219*e4b17023SJohn Marino
220*e4b17023SJohn Marino statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
221*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)
222*e4b17023SJohn Marino || (num_vectorized_loops > 0
223*e4b17023SJohn Marino && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)))
224*e4b17023SJohn Marino fprintf (vect_dump, "vectorized %u loops in function.\n",
225*e4b17023SJohn Marino num_vectorized_loops);
226*e4b17023SJohn Marino
227*e4b17023SJohn Marino /* ----------- Finalize. ----------- */
228*e4b17023SJohn Marino
229*e4b17023SJohn Marino mark_sym_for_renaming (gimple_vop (cfun));
230*e4b17023SJohn Marino
231*e4b17023SJohn Marino for (i = 1; i < vect_loops_num; i++)
232*e4b17023SJohn Marino {
233*e4b17023SJohn Marino loop_vec_info loop_vinfo;
234*e4b17023SJohn Marino
235*e4b17023SJohn Marino loop = get_loop (i);
236*e4b17023SJohn Marino if (!loop)
237*e4b17023SJohn Marino continue;
238*e4b17023SJohn Marino loop_vinfo = (loop_vec_info) loop->aux;
239*e4b17023SJohn Marino destroy_loop_vec_info (loop_vinfo, true);
240*e4b17023SJohn Marino loop->aux = NULL;
241*e4b17023SJohn Marino }
242*e4b17023SJohn Marino
243*e4b17023SJohn Marino free_stmt_vec_info_vec ();
244*e4b17023SJohn Marino
245*e4b17023SJohn Marino return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
246*e4b17023SJohn Marino }
247*e4b17023SJohn Marino
248*e4b17023SJohn Marino
249*e4b17023SJohn Marino /* Entry point to basic block SLP phase. */
250*e4b17023SJohn Marino
251*e4b17023SJohn Marino static unsigned int
execute_vect_slp(void)252*e4b17023SJohn Marino execute_vect_slp (void)
253*e4b17023SJohn Marino {
254*e4b17023SJohn Marino basic_block bb;
255*e4b17023SJohn Marino
256*e4b17023SJohn Marino /* Fix the verbosity level if not defined explicitly by the user. */
257*e4b17023SJohn Marino vect_set_dump_settings (true);
258*e4b17023SJohn Marino
259*e4b17023SJohn Marino init_stmt_vec_info_vec ();
260*e4b17023SJohn Marino
261*e4b17023SJohn Marino FOR_EACH_BB (bb)
262*e4b17023SJohn Marino {
263*e4b17023SJohn Marino vect_location = find_bb_location (bb);
264*e4b17023SJohn Marino
265*e4b17023SJohn Marino if (vect_slp_analyze_bb (bb))
266*e4b17023SJohn Marino {
267*e4b17023SJohn Marino vect_slp_transform_bb (bb);
268*e4b17023SJohn Marino
269*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
270*e4b17023SJohn Marino fprintf (vect_dump, "basic block vectorized using SLP\n");
271*e4b17023SJohn Marino }
272*e4b17023SJohn Marino }
273*e4b17023SJohn Marino
274*e4b17023SJohn Marino free_stmt_vec_info_vec ();
275*e4b17023SJohn Marino return 0;
276*e4b17023SJohn Marino }
277*e4b17023SJohn Marino
278*e4b17023SJohn Marino static bool
gate_vect_slp(void)279*e4b17023SJohn Marino gate_vect_slp (void)
280*e4b17023SJohn Marino {
281*e4b17023SJohn Marino /* Apply SLP either if the vectorizer is on and the user didn't specify
282*e4b17023SJohn Marino whether to run SLP or not, or if the SLP flag was set by the user. */
283*e4b17023SJohn Marino return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0)
284*e4b17023SJohn Marino || flag_tree_slp_vectorize == 1);
285*e4b17023SJohn Marino }
286*e4b17023SJohn Marino
287*e4b17023SJohn Marino struct gimple_opt_pass pass_slp_vectorize =
288*e4b17023SJohn Marino {
289*e4b17023SJohn Marino {
290*e4b17023SJohn Marino GIMPLE_PASS,
291*e4b17023SJohn Marino "slp", /* name */
292*e4b17023SJohn Marino gate_vect_slp, /* gate */
293*e4b17023SJohn Marino execute_vect_slp, /* execute */
294*e4b17023SJohn Marino NULL, /* sub */
295*e4b17023SJohn Marino NULL, /* next */
296*e4b17023SJohn Marino 0, /* static_pass_number */
297*e4b17023SJohn Marino TV_TREE_SLP_VECTORIZATION, /* tv_id */
298*e4b17023SJohn Marino PROP_ssa | PROP_cfg, /* properties_required */
299*e4b17023SJohn Marino 0, /* properties_provided */
300*e4b17023SJohn Marino 0, /* properties_destroyed */
301*e4b17023SJohn Marino 0, /* todo_flags_start */
302*e4b17023SJohn Marino TODO_ggc_collect
303*e4b17023SJohn Marino | TODO_verify_ssa
304*e4b17023SJohn Marino | TODO_update_ssa
305*e4b17023SJohn Marino | TODO_verify_stmts /* todo_flags_finish */
306*e4b17023SJohn Marino }
307*e4b17023SJohn Marino };
308*e4b17023SJohn Marino
309*e4b17023SJohn Marino
310*e4b17023SJohn Marino /* Increase alignment of global arrays to improve vectorization potential.
311*e4b17023SJohn Marino TODO:
312*e4b17023SJohn Marino - Consider also structs that have an array field.
313*e4b17023SJohn Marino - Use ipa analysis to prune arrays that can't be vectorized?
314*e4b17023SJohn Marino This should involve global alignment analysis and in the future also
315*e4b17023SJohn Marino array padding. */
316*e4b17023SJohn Marino
317*e4b17023SJohn Marino static unsigned int
increase_alignment(void)318*e4b17023SJohn Marino increase_alignment (void)
319*e4b17023SJohn Marino {
320*e4b17023SJohn Marino struct varpool_node *vnode;
321*e4b17023SJohn Marino
322*e4b17023SJohn Marino /* Increase the alignment of all global arrays for vectorization. */
323*e4b17023SJohn Marino for (vnode = varpool_nodes_queue;
324*e4b17023SJohn Marino vnode;
325*e4b17023SJohn Marino vnode = vnode->next_needed)
326*e4b17023SJohn Marino {
327*e4b17023SJohn Marino tree vectype, decl = vnode->decl;
328*e4b17023SJohn Marino tree t;
329*e4b17023SJohn Marino unsigned int alignment;
330*e4b17023SJohn Marino
331*e4b17023SJohn Marino t = TREE_TYPE(decl);
332*e4b17023SJohn Marino if (TREE_CODE (t) != ARRAY_TYPE)
333*e4b17023SJohn Marino continue;
334*e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (strip_array_types (t));
335*e4b17023SJohn Marino if (!vectype)
336*e4b17023SJohn Marino continue;
337*e4b17023SJohn Marino alignment = TYPE_ALIGN (vectype);
338*e4b17023SJohn Marino if (DECL_ALIGN (decl) >= alignment)
339*e4b17023SJohn Marino continue;
340*e4b17023SJohn Marino
341*e4b17023SJohn Marino if (vect_can_force_dr_alignment_p (decl, alignment))
342*e4b17023SJohn Marino {
343*e4b17023SJohn Marino DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
344*e4b17023SJohn Marino DECL_USER_ALIGN (decl) = 1;
345*e4b17023SJohn Marino if (dump_file)
346*e4b17023SJohn Marino {
347*e4b17023SJohn Marino fprintf (dump_file, "Increasing alignment of decl: ");
348*e4b17023SJohn Marino print_generic_expr (dump_file, decl, TDF_SLIM);
349*e4b17023SJohn Marino fprintf (dump_file, "\n");
350*e4b17023SJohn Marino }
351*e4b17023SJohn Marino }
352*e4b17023SJohn Marino }
353*e4b17023SJohn Marino return 0;
354*e4b17023SJohn Marino }
355*e4b17023SJohn Marino
356*e4b17023SJohn Marino
357*e4b17023SJohn Marino static bool
gate_increase_alignment(void)358*e4b17023SJohn Marino gate_increase_alignment (void)
359*e4b17023SJohn Marino {
360*e4b17023SJohn Marino return flag_section_anchors && flag_tree_vectorize;
361*e4b17023SJohn Marino }
362*e4b17023SJohn Marino
363*e4b17023SJohn Marino
364*e4b17023SJohn Marino struct simple_ipa_opt_pass pass_ipa_increase_alignment =
365*e4b17023SJohn Marino {
366*e4b17023SJohn Marino {
367*e4b17023SJohn Marino SIMPLE_IPA_PASS,
368*e4b17023SJohn Marino "increase_alignment", /* name */
369*e4b17023SJohn Marino gate_increase_alignment, /* gate */
370*e4b17023SJohn Marino increase_alignment, /* execute */
371*e4b17023SJohn Marino NULL, /* sub */
372*e4b17023SJohn Marino NULL, /* next */
373*e4b17023SJohn Marino 0, /* static_pass_number */
374*e4b17023SJohn Marino TV_IPA_OPT, /* tv_id */
375*e4b17023SJohn Marino 0, /* properties_required */
376*e4b17023SJohn Marino 0, /* properties_provided */
377*e4b17023SJohn Marino 0, /* properties_destroyed */
378*e4b17023SJohn Marino 0, /* todo_flags_start */
379*e4b17023SJohn Marino 0 /* todo_flags_finish */
380*e4b17023SJohn Marino }
381*e4b17023SJohn Marino };
382