1 /* Vectorizer 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software 3 Foundation, Inc. 4 Contributed by Dorit Naishlos <dorit@il.ibm.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 /* Loop and basic block vectorizer. 23 24 This file contains drivers for the three vectorizers: 25 (1) loop vectorizer (inter-iteration parallelism), 26 (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop 27 vectorizer) 28 (3) BB vectorizer (out-of-loops), aka SLP 29 30 The rest of the vectorizer's code is organized as follows: 31 - tree-vect-loop.c - loop specific parts such as reductions, etc. These are 32 used by drivers (1) and (2). 33 - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by 34 drivers (1) and (2). 35 - tree-vect-slp.c - BB vectorization specific analysis and transformation, 36 used by drivers (2) and (3). 37 - tree-vect-stmts.c - statements analysis and transformation (used by all). 38 - tree-vect-data-refs.c - vectorizer specific data-refs analysis and 39 manipulations (used by all). 40 - tree-vect-patterns.c - vectorizable code patterns detector (used by all) 41 42 Here's a poor attempt at illustrating that: 43 44 tree-vectorizer.c: 45 loop_vect() loop_aware_slp() slp_vect() 46 | / \ / 47 | / \ / 48 tree-vect-loop.c tree-vect-slp.c 49 | \ \ / / | 50 | \ \/ / | 51 | \ /\ / | 52 | \ / \ / | 53 tree-vect-stmts.c tree-vect-data-refs.c 54 \ / 55 tree-vect-patterns.c 56 */ 57 58 #include "config.h" 59 #include "system.h" 60 #include "coretypes.h" 61 #include "tm.h" 62 #include "ggc.h" 63 #include "tree.h" 64 #include "diagnostic.h" 65 #include "tree-flow.h" 66 #include "tree-dump.h" 67 #include "cfgloop.h" 68 #include "cfglayout.h" 69 #include "tree-vectorizer.h" 70 #include "tree-pass.h" 71 #include "timevar.h" 72 73 /* vect_dump will be set to stderr or dump_file if exist. */ 74 FILE *vect_dump; 75 76 /* vect_verbosity_level set to an invalid value 77 to mark that it's uninitialized. */ 78 static enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; 79 static enum verbosity_levels user_vect_verbosity_level = MAX_VERBOSITY_LEVEL; 80 81 /* Loop or bb location. */ 82 LOC vect_location; 83 84 /* Vector mapping GIMPLE stmt to stmt_vec_info. */ 85 VEC(vec_void_p,heap) *stmt_vec_info_vec; 86 87 88 89 /* Function vect_set_verbosity_level. 90 91 Called from opts.c upon detection of the 92 -ftree-vectorizer-verbose=N option. */ 93 94 void 95 vect_set_verbosity_level (const char *val) 96 { 97 unsigned int vl; 98 99 vl = atoi (val); 100 if (vl < MAX_VERBOSITY_LEVEL) 101 user_vect_verbosity_level = (enum verbosity_levels) vl; 102 else 103 user_vect_verbosity_level 104 = (enum verbosity_levels) (MAX_VERBOSITY_LEVEL - 1); 105 } 106 107 108 /* Function vect_set_dump_settings. 109 110 Fix the verbosity level of the vectorizer if the 111 requested level was not set explicitly using the flag 112 -ftree-vectorizer-verbose=N. 113 Decide where to print the debugging information (dump_file/stderr). 114 If the user defined the verbosity level, but there is no dump file, 115 print to stderr, otherwise print to the dump file. */ 116 117 static void 118 vect_set_dump_settings (bool slp) 119 { 120 vect_dump = dump_file; 121 122 /* Check if the verbosity level was defined by the user: */ 123 if (user_vect_verbosity_level != MAX_VERBOSITY_LEVEL) 124 { 125 vect_verbosity_level = user_vect_verbosity_level; 126 /* Ignore user defined verbosity if dump flags require higher level of 127 verbosity. */ 128 if (dump_file) 129 { 130 if (((dump_flags & TDF_DETAILS) 131 && vect_verbosity_level >= REPORT_DETAILS) 132 || ((dump_flags & TDF_STATS) 133 && vect_verbosity_level >= REPORT_UNVECTORIZED_LOCATIONS)) 134 return; 135 } 136 else 137 { 138 /* If there is no dump file, print to stderr in case of loop 139 vectorization. */ 140 if (!slp) 141 vect_dump = stderr; 142 143 return; 144 } 145 } 146 147 /* User didn't specify verbosity level: */ 148 if (dump_file && (dump_flags & TDF_DETAILS)) 149 vect_verbosity_level = REPORT_DETAILS; 150 else if (dump_file && (dump_flags & TDF_STATS)) 151 vect_verbosity_level = REPORT_UNVECTORIZED_LOCATIONS; 152 else 153 vect_verbosity_level = REPORT_NONE; 154 155 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE); 156 } 157 158 159 /* Function debug_loop_details. 160 161 For vectorization debug dumps. */ 162 163 bool 164 vect_print_dump_info (enum verbosity_levels vl) 165 { 166 if (vl > vect_verbosity_level) 167 return false; 168 169 if (!current_function_decl || !vect_dump) 170 return false; 171 172 if (vect_location == UNKNOWN_LOC) 173 fprintf (vect_dump, "\n%s:%d: note: ", 174 DECL_SOURCE_FILE (current_function_decl), 175 DECL_SOURCE_LINE (current_function_decl)); 176 else 177 fprintf (vect_dump, "\n%s:%d: note: ", 178 LOC_FILE (vect_location), LOC_LINE (vect_location)); 179 180 return true; 181 } 182 183 184 /* Function vectorize_loops. 185 186 Entry point to loop vectorization phase. */ 187 188 unsigned 189 vectorize_loops (void) 190 { 191 unsigned int i; 192 unsigned int num_vectorized_loops = 0; 193 unsigned int vect_loops_num; 194 loop_iterator li; 195 struct loop *loop; 196 197 vect_loops_num = number_of_loops (); 198 199 /* Bail out if there are no loops. */ 200 if (vect_loops_num <= 1) 201 return 0; 202 203 /* Fix the verbosity level if not defined explicitly by the user. */ 204 vect_set_dump_settings (false); 205 206 init_stmt_vec_info_vec (); 207 208 /* ----------- Analyze loops. ----------- */ 209 210 /* If some loop was duplicated, it gets bigger number 211 than all previously defined loops. This fact allows us to run 212 only over initial loops skipping newly generated ones. */ 213 FOR_EACH_LOOP (li, loop, 0) 214 if (optimize_loop_nest_for_speed_p (loop)) 215 { 216 loop_vec_info loop_vinfo; 217 218 vect_location = find_loop_location (loop); 219 loop_vinfo = vect_analyze_loop (loop); 220 loop->aux = loop_vinfo; 221 222 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) 223 continue; 224 225 vect_transform_loop (loop_vinfo); 226 num_vectorized_loops++; 227 } 228 229 vect_location = UNKNOWN_LOC; 230 231 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); 232 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS) 233 || (num_vectorized_loops > 0 234 && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))) 235 fprintf (vect_dump, "vectorized %u loops in function.\n", 236 num_vectorized_loops); 237 238 /* ----------- Finalize. ----------- */ 239 240 mark_sym_for_renaming (gimple_vop (cfun)); 241 242 for (i = 1; i < vect_loops_num; i++) 243 { 244 loop_vec_info loop_vinfo; 245 246 loop = get_loop (i); 247 if (!loop) 248 continue; 249 loop_vinfo = (loop_vec_info) loop->aux; 250 destroy_loop_vec_info (loop_vinfo, true); 251 loop->aux = NULL; 252 } 253 254 free_stmt_vec_info_vec (); 255 256 return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; 257 } 258 259 260 /* Entry point to basic block SLP phase. */ 261 262 static unsigned int 263 execute_vect_slp (void) 264 { 265 basic_block bb; 266 267 /* Fix the verbosity level if not defined explicitly by the user. */ 268 vect_set_dump_settings (true); 269 270 init_stmt_vec_info_vec (); 271 272 FOR_EACH_BB (bb) 273 { 274 vect_location = find_bb_location (bb); 275 276 if (vect_slp_analyze_bb (bb)) 277 { 278 vect_slp_transform_bb (bb); 279 280 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)) 281 fprintf (vect_dump, "basic block vectorized using SLP\n"); 282 } 283 } 284 285 free_stmt_vec_info_vec (); 286 return 0; 287 } 288 289 static bool 290 gate_vect_slp (void) 291 { 292 /* Apply SLP either if the vectorizer is on and the user didn't specify 293 whether to run SLP or not, or if the SLP flag was set by the user. */ 294 return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0) 295 || flag_tree_slp_vectorize == 1); 296 } 297 298 struct gimple_opt_pass pass_slp_vectorize = 299 { 300 { 301 GIMPLE_PASS, 302 "slp", /* name */ 303 gate_vect_slp, /* gate */ 304 execute_vect_slp, /* execute */ 305 NULL, /* sub */ 306 NULL, /* next */ 307 0, /* static_pass_number */ 308 TV_TREE_SLP_VECTORIZATION, /* tv_id */ 309 PROP_ssa | PROP_cfg, /* properties_required */ 310 0, /* properties_provided */ 311 0, /* properties_destroyed */ 312 0, /* todo_flags_start */ 313 TODO_ggc_collect 314 | TODO_verify_ssa 315 | TODO_dump_func 316 | TODO_update_ssa 317 | TODO_verify_stmts /* todo_flags_finish */ 318 } 319 }; 320 321 322 /* Increase alignment of global arrays to improve vectorization potential. 323 TODO: 324 - Consider also structs that have an array field. 325 - Use ipa analysis to prune arrays that can't be vectorized? 326 This should involve global alignment analysis and in the future also 327 array padding. */ 328 329 static unsigned int 330 increase_alignment (void) 331 { 332 struct varpool_node *vnode; 333 334 /* Increase the alignment of all global arrays for vectorization. */ 335 for (vnode = varpool_nodes_queue; 336 vnode; 337 vnode = vnode->next_needed) 338 { 339 tree vectype, decl = vnode->decl; 340 tree t; 341 unsigned int alignment; 342 343 t = TREE_TYPE(decl); 344 if (TREE_CODE (t) != ARRAY_TYPE) 345 continue; 346 vectype = get_vectype_for_scalar_type (strip_array_types (t)); 347 if (!vectype) 348 continue; 349 alignment = TYPE_ALIGN (vectype); 350 if (DECL_ALIGN (decl) >= alignment) 351 continue; 352 353 if (vect_can_force_dr_alignment_p (decl, alignment)) 354 { 355 DECL_ALIGN (decl) = TYPE_ALIGN (vectype); 356 DECL_USER_ALIGN (decl) = 1; 357 if (dump_file) 358 { 359 fprintf (dump_file, "Increasing alignment of decl: "); 360 print_generic_expr (dump_file, decl, TDF_SLIM); 361 fprintf (dump_file, "\n"); 362 } 363 } 364 } 365 return 0; 366 } 367 368 369 static bool 370 gate_increase_alignment (void) 371 { 372 return flag_section_anchors && flag_tree_vectorize; 373 } 374 375 376 struct simple_ipa_opt_pass pass_ipa_increase_alignment = 377 { 378 { 379 SIMPLE_IPA_PASS, 380 "increase_alignment", /* name */ 381 gate_increase_alignment, /* gate */ 382 increase_alignment, /* execute */ 383 NULL, /* sub */ 384 NULL, /* next */ 385 0, /* static_pass_number */ 386 TV_NONE, /* tv_id */ 387 0, /* properties_required */ 388 0, /* properties_provided */ 389 0, /* properties_destroyed */ 390 0, /* todo_flags_start */ 391 0 /* todo_flags_finish */ 392 } 393 }; 394