1*e4b17023SJohn Marino /* Vectorizer 2*e4b17023SJohn Marino Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 3*e4b17023SJohn Marino Free Software Foundation, Inc. 4*e4b17023SJohn Marino Contributed by Dorit Naishlos <dorit@il.ibm.com> 5*e4b17023SJohn Marino 6*e4b17023SJohn Marino This file is part of GCC. 7*e4b17023SJohn Marino 8*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under 9*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free 10*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later 11*e4b17023SJohn Marino version. 12*e4b17023SJohn Marino 13*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or 15*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16*e4b17023SJohn Marino for more details. 17*e4b17023SJohn Marino 18*e4b17023SJohn Marino You should have received a copy of the GNU General Public License 19*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see 20*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */ 21*e4b17023SJohn Marino 22*e4b17023SJohn Marino /* Loop and basic block vectorizer. 23*e4b17023SJohn Marino 24*e4b17023SJohn Marino This file contains drivers for the three vectorizers: 25*e4b17023SJohn Marino (1) loop vectorizer (inter-iteration parallelism), 26*e4b17023SJohn Marino (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop 27*e4b17023SJohn Marino vectorizer) 28*e4b17023SJohn Marino (3) BB vectorizer (out-of-loops), aka SLP 29*e4b17023SJohn Marino 30*e4b17023SJohn Marino The rest of the vectorizer's code is organized as follows: 31*e4b17023SJohn Marino - tree-vect-loop.c - loop specific parts such as reductions, etc. These are 32*e4b17023SJohn Marino used by drivers (1) and (2). 33*e4b17023SJohn Marino - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by 34*e4b17023SJohn Marino drivers (1) and (2). 35*e4b17023SJohn Marino - tree-vect-slp.c - BB vectorization specific analysis and transformation, 36*e4b17023SJohn Marino used by drivers (2) and (3). 37*e4b17023SJohn Marino - tree-vect-stmts.c - statements analysis and transformation (used by all). 38*e4b17023SJohn Marino - tree-vect-data-refs.c - vectorizer specific data-refs analysis and 39*e4b17023SJohn Marino manipulations (used by all). 40*e4b17023SJohn Marino - tree-vect-patterns.c - vectorizable code patterns detector (used by all) 41*e4b17023SJohn Marino 42*e4b17023SJohn Marino Here's a poor attempt at illustrating that: 43*e4b17023SJohn Marino 44*e4b17023SJohn Marino tree-vectorizer.c: 45*e4b17023SJohn Marino loop_vect() loop_aware_slp() slp_vect() 46*e4b17023SJohn Marino | / \ / 47*e4b17023SJohn Marino | / \ / 48*e4b17023SJohn Marino tree-vect-loop.c tree-vect-slp.c 49*e4b17023SJohn Marino | \ \ / / | 50*e4b17023SJohn Marino | \ \/ / | 51*e4b17023SJohn Marino | \ /\ / | 52*e4b17023SJohn Marino | \ / \ / | 53*e4b17023SJohn Marino tree-vect-stmts.c tree-vect-data-refs.c 54*e4b17023SJohn Marino \ / 55*e4b17023SJohn Marino tree-vect-patterns.c 56*e4b17023SJohn Marino */ 57*e4b17023SJohn Marino 58*e4b17023SJohn Marino #include "config.h" 59*e4b17023SJohn Marino #include "system.h" 60*e4b17023SJohn Marino #include "coretypes.h" 61*e4b17023SJohn Marino #include "tm.h" 62*e4b17023SJohn Marino #include "ggc.h" 63*e4b17023SJohn Marino #include "tree.h" 64*e4b17023SJohn Marino #include "tree-pretty-print.h" 65*e4b17023SJohn Marino #include "tree-flow.h" 66*e4b17023SJohn Marino #include "tree-dump.h" 67*e4b17023SJohn Marino #include "cfgloop.h" 68*e4b17023SJohn Marino #include "cfglayout.h" 69*e4b17023SJohn Marino #include "tree-vectorizer.h" 70*e4b17023SJohn Marino #include "tree-pass.h" 71*e4b17023SJohn Marino #include "timevar.h" 72*e4b17023SJohn Marino 73*e4b17023SJohn Marino /* vect_dump will be set to stderr or dump_file if exist. */ 74*e4b17023SJohn Marino FILE *vect_dump; 75*e4b17023SJohn Marino 76*e4b17023SJohn Marino /* vect_verbosity_level set to an invalid value 77*e4b17023SJohn Marino to mark that it's uninitialized. */ 78*e4b17023SJohn Marino static enum vect_verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; 79*e4b17023SJohn Marino 80*e4b17023SJohn Marino /* Loop or bb location. */ 81*e4b17023SJohn Marino LOC vect_location; 82*e4b17023SJohn Marino 83*e4b17023SJohn Marino /* Vector mapping GIMPLE stmt to stmt_vec_info. */ 84*e4b17023SJohn Marino VEC(vec_void_p,heap) *stmt_vec_info_vec; 85*e4b17023SJohn Marino 86*e4b17023SJohn Marino 87*e4b17023SJohn Marino 88*e4b17023SJohn Marino /* Function vect_set_dump_settings. 89*e4b17023SJohn Marino 90*e4b17023SJohn Marino Fix the verbosity level of the vectorizer if the 91*e4b17023SJohn Marino requested level was not set explicitly using the flag 92*e4b17023SJohn Marino -ftree-vectorizer-verbose=N. 93*e4b17023SJohn Marino Decide where to print the debugging information (dump_file/stderr). 94*e4b17023SJohn Marino If the user defined the verbosity level, but there is no dump file, 95*e4b17023SJohn Marino print to stderr, otherwise print to the dump file. */ 96*e4b17023SJohn Marino 97*e4b17023SJohn Marino static void 98*e4b17023SJohn Marino vect_set_dump_settings (bool slp) 99*e4b17023SJohn Marino { 100*e4b17023SJohn Marino vect_dump = dump_file; 101*e4b17023SJohn Marino 102*e4b17023SJohn Marino /* Check if the verbosity level was defined by the user: */ 103*e4b17023SJohn Marino if (user_vect_verbosity_level != MAX_VERBOSITY_LEVEL) 104*e4b17023SJohn Marino { 105*e4b17023SJohn Marino vect_verbosity_level = user_vect_verbosity_level; 106*e4b17023SJohn Marino /* Ignore user defined verbosity if dump flags require higher level of 107*e4b17023SJohn Marino verbosity. */ 108*e4b17023SJohn Marino if (dump_file) 109*e4b17023SJohn Marino { 110*e4b17023SJohn Marino if (((dump_flags & TDF_DETAILS) 111*e4b17023SJohn Marino && vect_verbosity_level >= REPORT_DETAILS) 112*e4b17023SJohn Marino || ((dump_flags & TDF_STATS) 113*e4b17023SJohn Marino && vect_verbosity_level >= REPORT_UNVECTORIZED_LOCATIONS)) 114*e4b17023SJohn Marino return; 115*e4b17023SJohn Marino } 116*e4b17023SJohn Marino else 117*e4b17023SJohn Marino { 118*e4b17023SJohn Marino /* If there is no dump file, print to stderr in case of loop 119*e4b17023SJohn Marino vectorization. */ 120*e4b17023SJohn Marino if (!slp) 121*e4b17023SJohn Marino vect_dump = stderr; 122*e4b17023SJohn Marino 123*e4b17023SJohn Marino return; 124*e4b17023SJohn Marino } 125*e4b17023SJohn Marino } 126*e4b17023SJohn Marino 127*e4b17023SJohn Marino /* User didn't specify verbosity level: */ 128*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS)) 129*e4b17023SJohn Marino vect_verbosity_level = REPORT_DETAILS; 130*e4b17023SJohn Marino else if (dump_file && (dump_flags & TDF_STATS)) 131*e4b17023SJohn Marino vect_verbosity_level = REPORT_UNVECTORIZED_LOCATIONS; 132*e4b17023SJohn Marino else 133*e4b17023SJohn Marino vect_verbosity_level = REPORT_NONE; 134*e4b17023SJohn Marino 135*e4b17023SJohn Marino gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE); 136*e4b17023SJohn Marino } 137*e4b17023SJohn Marino 138*e4b17023SJohn Marino 139*e4b17023SJohn Marino /* Function debug_loop_details. 140*e4b17023SJohn Marino 141*e4b17023SJohn Marino For vectorization debug dumps. */ 142*e4b17023SJohn Marino 143*e4b17023SJohn Marino bool 144*e4b17023SJohn Marino vect_print_dump_info (enum vect_verbosity_levels vl) 145*e4b17023SJohn Marino { 146*e4b17023SJohn Marino if (vl > vect_verbosity_level) 147*e4b17023SJohn Marino return false; 148*e4b17023SJohn Marino 149*e4b17023SJohn Marino if (!current_function_decl || !vect_dump) 150*e4b17023SJohn Marino return false; 151*e4b17023SJohn Marino 152*e4b17023SJohn Marino if (vect_location == UNKNOWN_LOC) 153*e4b17023SJohn Marino fprintf (vect_dump, "\n%s:%d: note: ", 154*e4b17023SJohn Marino DECL_SOURCE_FILE (current_function_decl), 155*e4b17023SJohn Marino DECL_SOURCE_LINE (current_function_decl)); 156*e4b17023SJohn Marino else 157*e4b17023SJohn Marino fprintf (vect_dump, "\n%d: ", LOC_LINE (vect_location)); 158*e4b17023SJohn Marino 159*e4b17023SJohn Marino return true; 160*e4b17023SJohn Marino } 161*e4b17023SJohn Marino 162*e4b17023SJohn Marino 163*e4b17023SJohn Marino /* Function vectorize_loops. 164*e4b17023SJohn Marino 165*e4b17023SJohn Marino Entry point to loop vectorization phase. */ 166*e4b17023SJohn Marino 167*e4b17023SJohn Marino unsigned 168*e4b17023SJohn Marino vectorize_loops (void) 169*e4b17023SJohn Marino { 170*e4b17023SJohn Marino unsigned int i; 171*e4b17023SJohn Marino unsigned int num_vectorized_loops = 0; 172*e4b17023SJohn Marino unsigned int vect_loops_num; 173*e4b17023SJohn Marino loop_iterator li; 174*e4b17023SJohn Marino struct loop *loop; 175*e4b17023SJohn Marino 176*e4b17023SJohn Marino vect_loops_num = number_of_loops (); 177*e4b17023SJohn Marino 178*e4b17023SJohn Marino /* Bail out if there are no loops. */ 179*e4b17023SJohn Marino if (vect_loops_num <= 1) 180*e4b17023SJohn Marino return 0; 181*e4b17023SJohn Marino 182*e4b17023SJohn Marino /* Fix the verbosity level if not defined explicitly by the user. */ 183*e4b17023SJohn Marino vect_set_dump_settings (false); 184*e4b17023SJohn Marino 185*e4b17023SJohn Marino init_stmt_vec_info_vec (); 186*e4b17023SJohn Marino 187*e4b17023SJohn Marino /* ----------- Analyze loops. ----------- */ 188*e4b17023SJohn Marino 189*e4b17023SJohn Marino /* If some loop was duplicated, it gets bigger number 190*e4b17023SJohn Marino than all previously defined loops. This fact allows us to run 191*e4b17023SJohn Marino only over initial loops skipping newly generated ones. */ 192*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, 0) 193*e4b17023SJohn Marino if (optimize_loop_nest_for_speed_p (loop)) 194*e4b17023SJohn Marino { 195*e4b17023SJohn Marino loop_vec_info loop_vinfo; 196*e4b17023SJohn Marino 197*e4b17023SJohn Marino vect_location = find_loop_location (loop); 198*e4b17023SJohn Marino if (vect_location != UNKNOWN_LOC 199*e4b17023SJohn Marino && vect_verbosity_level > REPORT_NONE) 200*e4b17023SJohn Marino fprintf (vect_dump, "\nAnalyzing loop at %s:%d\n", 201*e4b17023SJohn Marino LOC_FILE (vect_location), LOC_LINE (vect_location)); 202*e4b17023SJohn Marino 203*e4b17023SJohn Marino loop_vinfo = vect_analyze_loop (loop); 204*e4b17023SJohn Marino loop->aux = loop_vinfo; 205*e4b17023SJohn Marino 206*e4b17023SJohn Marino if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) 207*e4b17023SJohn Marino continue; 208*e4b17023SJohn Marino 209*e4b17023SJohn Marino if (vect_location != UNKNOWN_LOC 210*e4b17023SJohn Marino && vect_verbosity_level > REPORT_NONE) 211*e4b17023SJohn Marino fprintf (vect_dump, "\n\nVectorizing loop at %s:%d\n", 212*e4b17023SJohn Marino LOC_FILE (vect_location), LOC_LINE (vect_location)); 213*e4b17023SJohn Marino 214*e4b17023SJohn Marino vect_transform_loop (loop_vinfo); 215*e4b17023SJohn Marino num_vectorized_loops++; 216*e4b17023SJohn Marino } 217*e4b17023SJohn Marino 218*e4b17023SJohn Marino vect_location = UNKNOWN_LOC; 219*e4b17023SJohn Marino 220*e4b17023SJohn Marino statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); 221*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS) 222*e4b17023SJohn Marino || (num_vectorized_loops > 0 223*e4b17023SJohn Marino && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))) 224*e4b17023SJohn Marino fprintf (vect_dump, "vectorized %u loops in function.\n", 225*e4b17023SJohn Marino num_vectorized_loops); 226*e4b17023SJohn Marino 227*e4b17023SJohn Marino /* ----------- Finalize. ----------- */ 228*e4b17023SJohn Marino 229*e4b17023SJohn Marino mark_sym_for_renaming (gimple_vop (cfun)); 230*e4b17023SJohn Marino 231*e4b17023SJohn Marino for (i = 1; i < vect_loops_num; i++) 232*e4b17023SJohn Marino { 233*e4b17023SJohn Marino loop_vec_info loop_vinfo; 234*e4b17023SJohn Marino 235*e4b17023SJohn Marino loop = get_loop (i); 236*e4b17023SJohn Marino if (!loop) 237*e4b17023SJohn Marino continue; 238*e4b17023SJohn Marino loop_vinfo = (loop_vec_info) loop->aux; 239*e4b17023SJohn Marino destroy_loop_vec_info (loop_vinfo, true); 240*e4b17023SJohn Marino loop->aux = NULL; 241*e4b17023SJohn Marino } 242*e4b17023SJohn Marino 243*e4b17023SJohn Marino free_stmt_vec_info_vec (); 244*e4b17023SJohn Marino 245*e4b17023SJohn Marino return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; 246*e4b17023SJohn Marino } 247*e4b17023SJohn Marino 248*e4b17023SJohn Marino 249*e4b17023SJohn Marino /* Entry point to basic block SLP phase. */ 250*e4b17023SJohn Marino 251*e4b17023SJohn Marino static unsigned int 252*e4b17023SJohn Marino execute_vect_slp (void) 253*e4b17023SJohn Marino { 254*e4b17023SJohn Marino basic_block bb; 255*e4b17023SJohn Marino 256*e4b17023SJohn Marino /* Fix the verbosity level if not defined explicitly by the user. */ 257*e4b17023SJohn Marino vect_set_dump_settings (true); 258*e4b17023SJohn Marino 259*e4b17023SJohn Marino init_stmt_vec_info_vec (); 260*e4b17023SJohn Marino 261*e4b17023SJohn Marino FOR_EACH_BB (bb) 262*e4b17023SJohn Marino { 263*e4b17023SJohn Marino vect_location = find_bb_location (bb); 264*e4b17023SJohn Marino 265*e4b17023SJohn Marino if (vect_slp_analyze_bb (bb)) 266*e4b17023SJohn Marino { 267*e4b17023SJohn Marino vect_slp_transform_bb (bb); 268*e4b17023SJohn Marino 269*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)) 270*e4b17023SJohn Marino fprintf (vect_dump, "basic block vectorized using SLP\n"); 271*e4b17023SJohn Marino } 272*e4b17023SJohn Marino } 273*e4b17023SJohn Marino 274*e4b17023SJohn Marino free_stmt_vec_info_vec (); 275*e4b17023SJohn Marino return 0; 276*e4b17023SJohn Marino } 277*e4b17023SJohn Marino 278*e4b17023SJohn Marino static bool 279*e4b17023SJohn Marino gate_vect_slp (void) 280*e4b17023SJohn Marino { 281*e4b17023SJohn Marino /* Apply SLP either if the vectorizer is on and the user didn't specify 282*e4b17023SJohn Marino whether to run SLP or not, or if the SLP flag was set by the user. */ 283*e4b17023SJohn Marino return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0) 284*e4b17023SJohn Marino || flag_tree_slp_vectorize == 1); 285*e4b17023SJohn Marino } 286*e4b17023SJohn Marino 287*e4b17023SJohn Marino struct gimple_opt_pass pass_slp_vectorize = 288*e4b17023SJohn Marino { 289*e4b17023SJohn Marino { 290*e4b17023SJohn Marino GIMPLE_PASS, 291*e4b17023SJohn Marino "slp", /* name */ 292*e4b17023SJohn Marino gate_vect_slp, /* gate */ 293*e4b17023SJohn Marino execute_vect_slp, /* execute */ 294*e4b17023SJohn Marino NULL, /* sub */ 295*e4b17023SJohn Marino NULL, /* next */ 296*e4b17023SJohn Marino 0, /* static_pass_number */ 297*e4b17023SJohn Marino TV_TREE_SLP_VECTORIZATION, /* tv_id */ 298*e4b17023SJohn Marino PROP_ssa | PROP_cfg, /* properties_required */ 299*e4b17023SJohn Marino 0, /* properties_provided */ 300*e4b17023SJohn Marino 0, /* properties_destroyed */ 301*e4b17023SJohn Marino 0, /* todo_flags_start */ 302*e4b17023SJohn Marino TODO_ggc_collect 303*e4b17023SJohn Marino | TODO_verify_ssa 304*e4b17023SJohn Marino | TODO_update_ssa 305*e4b17023SJohn Marino | TODO_verify_stmts /* todo_flags_finish */ 306*e4b17023SJohn Marino } 307*e4b17023SJohn Marino }; 308*e4b17023SJohn Marino 309*e4b17023SJohn Marino 310*e4b17023SJohn Marino /* Increase alignment of global arrays to improve vectorization potential. 311*e4b17023SJohn Marino TODO: 312*e4b17023SJohn Marino - Consider also structs that have an array field. 313*e4b17023SJohn Marino - Use ipa analysis to prune arrays that can't be vectorized? 314*e4b17023SJohn Marino This should involve global alignment analysis and in the future also 315*e4b17023SJohn Marino array padding. */ 316*e4b17023SJohn Marino 317*e4b17023SJohn Marino static unsigned int 318*e4b17023SJohn Marino increase_alignment (void) 319*e4b17023SJohn Marino { 320*e4b17023SJohn Marino struct varpool_node *vnode; 321*e4b17023SJohn Marino 322*e4b17023SJohn Marino /* Increase the alignment of all global arrays for vectorization. */ 323*e4b17023SJohn Marino for (vnode = varpool_nodes_queue; 324*e4b17023SJohn Marino vnode; 325*e4b17023SJohn Marino vnode = vnode->next_needed) 326*e4b17023SJohn Marino { 327*e4b17023SJohn Marino tree vectype, decl = vnode->decl; 328*e4b17023SJohn Marino tree t; 329*e4b17023SJohn Marino unsigned int alignment; 330*e4b17023SJohn Marino 331*e4b17023SJohn Marino t = TREE_TYPE(decl); 332*e4b17023SJohn Marino if (TREE_CODE (t) != ARRAY_TYPE) 333*e4b17023SJohn Marino continue; 334*e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (strip_array_types (t)); 335*e4b17023SJohn Marino if (!vectype) 336*e4b17023SJohn Marino continue; 337*e4b17023SJohn Marino alignment = TYPE_ALIGN (vectype); 338*e4b17023SJohn Marino if (DECL_ALIGN (decl) >= alignment) 339*e4b17023SJohn Marino continue; 340*e4b17023SJohn Marino 341*e4b17023SJohn Marino if (vect_can_force_dr_alignment_p (decl, alignment)) 342*e4b17023SJohn Marino { 343*e4b17023SJohn Marino DECL_ALIGN (decl) = TYPE_ALIGN (vectype); 344*e4b17023SJohn Marino DECL_USER_ALIGN (decl) = 1; 345*e4b17023SJohn Marino if (dump_file) 346*e4b17023SJohn Marino { 347*e4b17023SJohn Marino fprintf (dump_file, "Increasing alignment of decl: "); 348*e4b17023SJohn Marino print_generic_expr (dump_file, decl, TDF_SLIM); 349*e4b17023SJohn Marino fprintf (dump_file, "\n"); 350*e4b17023SJohn Marino } 351*e4b17023SJohn Marino } 352*e4b17023SJohn Marino } 353*e4b17023SJohn Marino return 0; 354*e4b17023SJohn Marino } 355*e4b17023SJohn Marino 356*e4b17023SJohn Marino 357*e4b17023SJohn Marino static bool 358*e4b17023SJohn Marino gate_increase_alignment (void) 359*e4b17023SJohn Marino { 360*e4b17023SJohn Marino return flag_section_anchors && flag_tree_vectorize; 361*e4b17023SJohn Marino } 362*e4b17023SJohn Marino 363*e4b17023SJohn Marino 364*e4b17023SJohn Marino struct simple_ipa_opt_pass pass_ipa_increase_alignment = 365*e4b17023SJohn Marino { 366*e4b17023SJohn Marino { 367*e4b17023SJohn Marino SIMPLE_IPA_PASS, 368*e4b17023SJohn Marino "increase_alignment", /* name */ 369*e4b17023SJohn Marino gate_increase_alignment, /* gate */ 370*e4b17023SJohn Marino increase_alignment, /* execute */ 371*e4b17023SJohn Marino NULL, /* sub */ 372*e4b17023SJohn Marino NULL, /* next */ 373*e4b17023SJohn Marino 0, /* static_pass_number */ 374*e4b17023SJohn Marino TV_IPA_OPT, /* tv_id */ 375*e4b17023SJohn Marino 0, /* properties_required */ 376*e4b17023SJohn Marino 0, /* properties_provided */ 377*e4b17023SJohn Marino 0, /* properties_destroyed */ 378*e4b17023SJohn Marino 0, /* todo_flags_start */ 379*e4b17023SJohn Marino 0 /* todo_flags_finish */ 380*e4b17023SJohn Marino } 381*e4b17023SJohn Marino }; 382