1 /* Vectorizer Specific Loop Manipulations 2 Copyright (C) 2003-2017 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 and Ira Rosen <irar@il.ibm.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "tree.h" 27 #include "gimple.h" 28 #include "cfghooks.h" 29 #include "tree-pass.h" 30 #include "ssa.h" 31 #include "fold-const.h" 32 #include "cfganal.h" 33 #include "gimplify.h" 34 #include "gimple-iterator.h" 35 #include "gimplify-me.h" 36 #include "tree-cfg.h" 37 #include "tree-ssa-loop-manip.h" 38 #include "tree-into-ssa.h" 39 #include "tree-ssa.h" 40 #include "cfgloop.h" 41 #include "tree-scalar-evolution.h" 42 #include "tree-vectorizer.h" 43 #include "tree-ssa-loop-ivopts.h" 44 45 /************************************************************************* 46 Simple Loop Peeling Utilities 47 48 Utilities to support loop peeling for vectorization purposes. 49 *************************************************************************/ 50 51 52 /* Renames the use *OP_P. */ 53 54 static void 55 rename_use_op (use_operand_p op_p) 56 { 57 tree new_name; 58 59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) 60 return; 61 62 new_name = get_current_def (USE_FROM_PTR (op_p)); 63 64 /* Something defined outside of the loop. */ 65 if (!new_name) 66 return; 67 68 /* An ordinary ssa name defined in the loop. */ 69 70 SET_USE (op_p, new_name); 71 } 72 73 74 /* Renames the variables in basic block BB. Allow renaming of PHI arguments 75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is 76 true. */ 77 78 static void 79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop) 80 { 81 gimple *stmt; 82 use_operand_p use_p; 83 ssa_op_iter iter; 84 edge e; 85 edge_iterator ei; 86 struct loop *loop = bb->loop_father; 87 struct loop *outer_loop = NULL; 88 89 if (rename_from_outer_loop) 90 { 91 gcc_assert (loop); 92 outer_loop = loop_outer (loop); 93 } 94 95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); 96 gsi_next (&gsi)) 97 { 98 stmt = gsi_stmt (gsi); 99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) 100 rename_use_op (use_p); 101 } 102 103 FOR_EACH_EDGE (e, ei, bb->preds) 104 { 105 if (!flow_bb_inside_loop_p (loop, e->src)) 106 { 107 if (!rename_from_outer_loop) 108 continue; 109 if (e->src != outer_loop->header) 110 { 111 if (outer_loop->inner->next) 112 { 113 /* If outer_loop has 2 inner loops, allow there to 114 be an extra basic block which decides which of the 115 two loops to use using LOOP_VECTORIZED. */ 116 if (!single_pred_p (e->src) 117 || single_pred (e->src) != outer_loop->header) 118 continue; 119 } 120 else 121 continue; 122 } 123 } 124 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); 125 gsi_next (&gsi)) 126 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e)); 127 } 128 } 129 130 131 struct adjust_info 132 { 133 tree from, to; 134 basic_block bb; 135 }; 136 137 /* A stack of values to be adjusted in debug stmts. We have to 138 process them LIFO, so that the closest substitution applies. If we 139 processed them FIFO, without the stack, we might substitute uses 140 with a PHI DEF that would soon become non-dominant, and when we got 141 to the suitable one, it wouldn't have anything to substitute any 142 more. */ 143 static vec<adjust_info, va_heap> adjust_vec; 144 145 /* Adjust any debug stmts that referenced AI->from values to use the 146 loop-closed AI->to, if the references are dominated by AI->bb and 147 not by the definition of AI->from. */ 148 149 static void 150 adjust_debug_stmts_now (adjust_info *ai) 151 { 152 basic_block bbphi = ai->bb; 153 tree orig_def = ai->from; 154 tree new_def = ai->to; 155 imm_use_iterator imm_iter; 156 gimple *stmt; 157 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def)); 158 159 gcc_assert (dom_info_available_p (CDI_DOMINATORS)); 160 161 /* Adjust any debug stmts that held onto non-loop-closed 162 references. */ 163 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def) 164 { 165 use_operand_p use_p; 166 basic_block bbuse; 167 168 if (!is_gimple_debug (stmt)) 169 continue; 170 171 gcc_assert (gimple_debug_bind_p (stmt)); 172 173 bbuse = gimple_bb (stmt); 174 175 if ((bbuse == bbphi 176 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi)) 177 && !(bbuse == bbdef 178 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))) 179 { 180 if (new_def) 181 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 182 SET_USE (use_p, new_def); 183 else 184 { 185 gimple_debug_bind_reset_value (stmt); 186 update_stmt (stmt); 187 } 188 } 189 } 190 } 191 192 /* Adjust debug stmts as scheduled before. */ 193 194 static void 195 adjust_vec_debug_stmts (void) 196 { 197 if (!MAY_HAVE_DEBUG_STMTS) 198 return; 199 200 gcc_assert (adjust_vec.exists ()); 201 202 while (!adjust_vec.is_empty ()) 203 { 204 adjust_debug_stmts_now (&adjust_vec.last ()); 205 adjust_vec.pop (); 206 } 207 } 208 209 /* Adjust any debug stmts that referenced FROM values to use the 210 loop-closed TO, if the references are dominated by BB and not by 211 the definition of FROM. If adjust_vec is non-NULL, adjustments 212 will be postponed until adjust_vec_debug_stmts is called. */ 213 214 static void 215 adjust_debug_stmts (tree from, tree to, basic_block bb) 216 { 217 adjust_info ai; 218 219 if (MAY_HAVE_DEBUG_STMTS 220 && TREE_CODE (from) == SSA_NAME 221 && ! SSA_NAME_IS_DEFAULT_DEF (from) 222 && ! virtual_operand_p (from)) 223 { 224 ai.from = from; 225 ai.to = to; 226 ai.bb = bb; 227 228 if (adjust_vec.exists ()) 229 adjust_vec.safe_push (ai); 230 else 231 adjust_debug_stmts_now (&ai); 232 } 233 } 234 235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information 236 to adjust any debug stmts that referenced the old phi arg, 237 presumably non-loop-closed references left over from other 238 transformations. */ 239 240 static void 241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def) 242 { 243 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e); 244 245 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def); 246 247 if (MAY_HAVE_DEBUG_STMTS) 248 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi), 249 gimple_bb (update_phi)); 250 } 251 252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV 253 that starts at zero, increases by one and its limit is NITERS. 254 255 Assumption: the exit-condition of LOOP is the last stmt in the loop. */ 256 257 void 258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) 259 { 260 tree indx_before_incr, indx_after_incr; 261 gcond *cond_stmt; 262 gcond *orig_cond; 263 edge exit_edge = single_exit (loop); 264 gimple_stmt_iterator loop_cond_gsi; 265 gimple_stmt_iterator incr_gsi; 266 bool insert_after; 267 tree init = build_int_cst (TREE_TYPE (niters), 0); 268 tree step = build_int_cst (TREE_TYPE (niters), 1); 269 source_location loop_loc; 270 enum tree_code code; 271 272 orig_cond = get_loop_exit_condition (loop); 273 gcc_assert (orig_cond); 274 loop_cond_gsi = gsi_for_stmt (orig_cond); 275 276 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 277 create_iv (init, step, NULL_TREE, loop, 278 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr); 279 280 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr, 281 true, NULL_TREE, true, 282 GSI_SAME_STMT); 283 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE, 284 true, GSI_SAME_STMT); 285 286 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR; 287 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE, 288 NULL_TREE); 289 290 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT); 291 292 /* Remove old loop exit test: */ 293 gsi_remove (&loop_cond_gsi, true); 294 free_stmt_vec_info (orig_cond); 295 296 loop_loc = find_loop_location (loop); 297 if (dump_enabled_p ()) 298 { 299 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION) 300 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc), 301 LOCATION_LINE (loop_loc)); 302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0); 303 } 304 305 /* Record the number of latch iterations. */ 306 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters, 307 build_int_cst (TREE_TYPE (niters), 1)); 308 } 309 310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg. 311 For all PHI arguments in FROM->dest and TO->dest from those 312 edges ensure that TO->dest PHI arguments have current_def 313 to that in from. */ 314 315 static void 316 slpeel_duplicate_current_defs_from_edges (edge from, edge to) 317 { 318 gimple_stmt_iterator gsi_from, gsi_to; 319 320 for (gsi_from = gsi_start_phis (from->dest), 321 gsi_to = gsi_start_phis (to->dest); 322 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);) 323 { 324 gimple *from_phi = gsi_stmt (gsi_from); 325 gimple *to_phi = gsi_stmt (gsi_to); 326 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from); 327 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to); 328 if (virtual_operand_p (from_arg)) 329 { 330 gsi_next (&gsi_from); 331 continue; 332 } 333 if (virtual_operand_p (to_arg)) 334 { 335 gsi_next (&gsi_to); 336 continue; 337 } 338 if (TREE_CODE (from_arg) != SSA_NAME) 339 gcc_assert (operand_equal_p (from_arg, to_arg, 0)); 340 else 341 { 342 if (get_current_def (to_arg) == NULL_TREE) 343 set_current_def (to_arg, get_current_def (from_arg)); 344 } 345 gsi_next (&gsi_from); 346 gsi_next (&gsi_to); 347 } 348 349 gphi *from_phi = get_virtual_phi (from->dest); 350 gphi *to_phi = get_virtual_phi (to->dest); 351 if (from_phi) 352 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to), 353 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from))); 354 } 355 356 357 /* Given LOOP this function generates a new copy of it and puts it 358 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is 359 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the 360 basic blocks from SCALAR_LOOP instead of LOOP, but to either the 361 entry or exit of LOOP. */ 362 363 struct loop * 364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, 365 struct loop *scalar_loop, edge e) 366 { 367 struct loop *new_loop; 368 basic_block *new_bbs, *bbs, *pbbs; 369 bool at_exit; 370 bool was_imm_dom; 371 basic_block exit_dest; 372 edge exit, new_exit; 373 bool duplicate_outer_loop = false; 374 375 exit = single_exit (loop); 376 at_exit = (e == exit); 377 if (!at_exit && e != loop_preheader_edge (loop)) 378 return NULL; 379 380 if (scalar_loop == NULL) 381 scalar_loop = loop; 382 383 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1); 384 pbbs = bbs + 1; 385 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes); 386 /* Allow duplication of outer loops. */ 387 if (scalar_loop->inner) 388 duplicate_outer_loop = true; 389 /* Check whether duplication is possible. */ 390 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes)) 391 { 392 free (bbs); 393 return NULL; 394 } 395 396 /* Generate new loop structure. */ 397 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop)); 398 duplicate_subloops (scalar_loop, new_loop); 399 400 exit_dest = exit->dest; 401 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, 402 exit_dest) == loop->header ? 403 true : false); 404 405 /* Also copy the pre-header, this avoids jumping through hoops to 406 duplicate the loop entry PHI arguments. Create an empty 407 pre-header unconditionally for this. */ 408 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop)); 409 edge entry_e = single_pred_edge (preheader); 410 bbs[0] = preheader; 411 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1); 412 413 exit = single_exit (scalar_loop); 414 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs, 415 &exit, 1, &new_exit, NULL, 416 at_exit ? loop->latch : e->src, true); 417 exit = single_exit (loop); 418 basic_block new_preheader = new_bbs[0]; 419 420 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL); 421 422 if (scalar_loop != loop) 423 { 424 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from 425 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop, 426 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects 427 the LOOP SSA_NAMEs (on the exit edge and edge from latch to 428 header) to have current_def set, so copy them over. */ 429 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop), 430 exit); 431 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch, 432 0), 433 EDGE_SUCC (loop->latch, 0)); 434 } 435 436 if (at_exit) /* Add the loop copy at exit. */ 437 { 438 if (scalar_loop != loop) 439 { 440 gphi_iterator gsi; 441 new_exit = redirect_edge_and_branch (new_exit, exit_dest); 442 443 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); 444 gsi_next (&gsi)) 445 { 446 gphi *phi = gsi.phi (); 447 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e); 448 location_t orig_locus 449 = gimple_phi_arg_location_from_edge (phi, e); 450 451 add_phi_arg (phi, orig_arg, new_exit, orig_locus); 452 } 453 } 454 redirect_edge_and_branch_force (e, new_preheader); 455 flush_pending_stmts (e); 456 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src); 457 if (was_imm_dom || duplicate_outer_loop) 458 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src); 459 460 /* And remove the non-necessary forwarder again. Keep the other 461 one so we have a proper pre-header for the loop at the exit edge. */ 462 redirect_edge_pred (single_succ_edge (preheader), 463 single_pred (preheader)); 464 delete_basic_block (preheader); 465 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header, 466 loop_preheader_edge (scalar_loop)->src); 467 } 468 else /* Add the copy at entry. */ 469 { 470 if (scalar_loop != loop) 471 { 472 /* Remove the non-necessary forwarder of scalar_loop again. */ 473 redirect_edge_pred (single_succ_edge (preheader), 474 single_pred (preheader)); 475 delete_basic_block (preheader); 476 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header, 477 loop_preheader_edge (scalar_loop)->src); 478 preheader = split_edge (loop_preheader_edge (loop)); 479 entry_e = single_pred_edge (preheader); 480 } 481 482 redirect_edge_and_branch_force (entry_e, new_preheader); 483 flush_pending_stmts (entry_e); 484 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src); 485 486 redirect_edge_and_branch_force (new_exit, preheader); 487 flush_pending_stmts (new_exit); 488 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src); 489 490 /* And remove the non-necessary forwarder again. Keep the other 491 one so we have a proper pre-header for the loop at the exit edge. */ 492 redirect_edge_pred (single_succ_edge (new_preheader), 493 single_pred (new_preheader)); 494 delete_basic_block (new_preheader); 495 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, 496 loop_preheader_edge (new_loop)->src); 497 } 498 499 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++) 500 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop); 501 502 if (scalar_loop != loop) 503 { 504 /* Update new_loop->header PHIs, so that on the preheader 505 edge they are the ones from loop rather than scalar_loop. */ 506 gphi_iterator gsi_orig, gsi_new; 507 edge orig_e = loop_preheader_edge (loop); 508 edge new_e = loop_preheader_edge (new_loop); 509 510 for (gsi_orig = gsi_start_phis (loop->header), 511 gsi_new = gsi_start_phis (new_loop->header); 512 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new); 513 gsi_next (&gsi_orig), gsi_next (&gsi_new)) 514 { 515 gphi *orig_phi = gsi_orig.phi (); 516 gphi *new_phi = gsi_new.phi (); 517 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e); 518 location_t orig_locus 519 = gimple_phi_arg_location_from_edge (orig_phi, orig_e); 520 521 add_phi_arg (new_phi, orig_arg, new_e, orig_locus); 522 } 523 } 524 525 free (new_bbs); 526 free (bbs); 527 528 checking_verify_dominators (CDI_DOMINATORS); 529 530 return new_loop; 531 } 532 533 534 /* Given the condition expression COND, put it as the last statement of 535 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to 536 DOM_BB; return the skip edge. GUARD_TO is the target basic block to 537 skip the loop. PROBABILITY is the skip edge's probability. Mark the 538 new edge as irreducible if IRREDUCIBLE_P is true. */ 539 540 static edge 541 slpeel_add_loop_guard (basic_block guard_bb, tree cond, 542 basic_block guard_to, basic_block dom_bb, 543 int probability, bool irreducible_p) 544 { 545 gimple_stmt_iterator gsi; 546 edge new_e, enter_e; 547 gcond *cond_stmt; 548 gimple_seq gimplify_stmt_list = NULL; 549 550 enter_e = EDGE_SUCC (guard_bb, 0); 551 enter_e->flags &= ~EDGE_FALLTHRU; 552 enter_e->flags |= EDGE_FALSE_VALUE; 553 gsi = gsi_last_bb (guard_bb); 554 555 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr, 556 NULL_TREE); 557 if (gimplify_stmt_list) 558 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT); 559 560 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE); 561 gsi = gsi_last_bb (guard_bb); 562 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 563 564 /* Add new edge to connect guard block to the merge/loop-exit block. */ 565 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE); 566 567 new_e->count = guard_bb->count; 568 new_e->probability = probability; 569 new_e->count = apply_probability (enter_e->count, probability); 570 if (irreducible_p) 571 new_e->flags |= EDGE_IRREDUCIBLE_LOOP; 572 573 enter_e->count -= new_e->count; 574 enter_e->probability = inverse_probability (probability); 575 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb); 576 577 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */ 578 if (enter_e->dest->loop_father->header == enter_e->dest) 579 split_edge (enter_e); 580 581 return new_e; 582 } 583 584 585 /* This function verifies that the following restrictions apply to LOOP: 586 (1) it consists of exactly 2 basic blocks - header, and an empty latch 587 for innermost loop and 5 basic blocks for outer-loop. 588 (2) it is single entry, single exit 589 (3) its exit condition is the last stmt in the header 590 (4) E is the entry/exit edge of LOOP. 591 */ 592 593 bool 594 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e) 595 { 596 edge exit_e = single_exit (loop); 597 edge entry_e = loop_preheader_edge (loop); 598 gcond *orig_cond = get_loop_exit_condition (loop); 599 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src); 600 unsigned int num_bb = loop->inner? 5 : 2; 601 602 /* All loops have an outer scope; the only case loop->outer is NULL is for 603 the function itself. */ 604 if (!loop_outer (loop) 605 || loop->num_nodes != num_bb 606 || !empty_block_p (loop->latch) 607 || !single_exit (loop) 608 /* Verify that new loop exit condition can be trivially modified. */ 609 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi)) 610 || (e != exit_e && e != entry_e)) 611 return false; 612 613 return true; 614 } 615 616 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI 617 in the exit bb and rename all the uses after the loop. This simplifies 618 the *guard[12] routines, which assume loop closed SSA form for all PHIs 619 (but normally loop closed SSA form doesn't require virtual PHIs to be 620 in the same form). Doing this early simplifies the checking what 621 uses should be renamed. */ 622 623 static void 624 create_lcssa_for_virtual_phi (struct loop *loop) 625 { 626 gphi_iterator gsi; 627 edge exit_e = single_exit (loop); 628 629 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 630 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi)))) 631 { 632 gphi *phi = gsi.phi (); 633 for (gsi = gsi_start_phis (exit_e->dest); 634 !gsi_end_p (gsi); gsi_next (&gsi)) 635 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi)))) 636 break; 637 if (gsi_end_p (gsi)) 638 { 639 tree new_vop = copy_ssa_name (PHI_RESULT (phi)); 640 gphi *new_phi = create_phi_node (new_vop, exit_e->dest); 641 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)); 642 imm_use_iterator imm_iter; 643 gimple *stmt; 644 use_operand_p use_p; 645 646 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop) 647 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop); 648 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION); 649 gimple_phi_set_result (new_phi, new_vop); 650 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop) 651 if (stmt != new_phi 652 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 653 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 654 SET_USE (use_p, new_vop); 655 } 656 break; 657 } 658 659 } 660 661 /* Function vect_get_loop_location. 662 663 Extract the location of the loop in the source code. 664 If the loop is not well formed for vectorization, an estimated 665 location is calculated. 666 Return the loop location if succeed and NULL if not. */ 667 668 source_location 669 find_loop_location (struct loop *loop) 670 { 671 gimple *stmt = NULL; 672 basic_block bb; 673 gimple_stmt_iterator si; 674 675 if (!loop) 676 return UNKNOWN_LOCATION; 677 678 stmt = get_loop_exit_condition (loop); 679 680 if (stmt 681 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION) 682 return gimple_location (stmt); 683 684 /* If we got here the loop is probably not "well formed", 685 try to estimate the loop location */ 686 687 if (!loop->header) 688 return UNKNOWN_LOCATION; 689 690 bb = loop->header; 691 692 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 693 { 694 stmt = gsi_stmt (si); 695 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION) 696 return gimple_location (stmt); 697 } 698 699 return UNKNOWN_LOCATION; 700 } 701 702 /* Return true if PHI defines an IV of the loop to be vectorized. */ 703 704 static bool 705 iv_phi_p (gphi *phi) 706 { 707 if (virtual_operand_p (PHI_RESULT (phi))) 708 return false; 709 710 stmt_vec_info stmt_info = vinfo_for_stmt (phi); 711 gcc_assert (stmt_info != NULL); 712 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def 713 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) 714 return false; 715 716 return true; 717 } 718 719 /* Function vect_can_advance_ivs_p 720 721 In case the number of iterations that LOOP iterates is unknown at compile 722 time, an epilog loop will be generated, and the loop induction variables 723 (IVs) will be "advanced" to the value they are supposed to take just before 724 the epilog loop. Here we check that the access function of the loop IVs 725 and the expression that represents the loop bound are simple enough. 726 These restrictions will be relaxed in the future. */ 727 728 bool 729 vect_can_advance_ivs_p (loop_vec_info loop_vinfo) 730 { 731 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 732 basic_block bb = loop->header; 733 gphi_iterator gsi; 734 735 /* Analyze phi functions of the loop header. */ 736 737 if (dump_enabled_p ()) 738 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n"); 739 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 740 { 741 tree evolution_part; 742 743 gphi *phi = gsi.phi (); 744 if (dump_enabled_p ()) 745 { 746 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: "); 747 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); 748 } 749 750 /* Skip virtual phi's. The data dependences that are associated with 751 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. 752 753 Skip reduction phis. */ 754 if (!iv_phi_p (phi)) 755 { 756 if (dump_enabled_p ()) 757 dump_printf_loc (MSG_NOTE, vect_location, 758 "reduc or virtual phi. skip.\n"); 759 continue; 760 } 761 762 /* Analyze the evolution function. */ 763 764 evolution_part 765 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi)); 766 if (evolution_part == NULL_TREE) 767 { 768 if (dump_enabled_p ()) 769 dump_printf (MSG_MISSED_OPTIMIZATION, 770 "No access function or evolution.\n"); 771 return false; 772 } 773 774 /* FORNOW: We do not transform initial conditions of IVs 775 which evolution functions are not invariants in the loop. */ 776 777 if (!expr_invariant_in_loop_p (loop, evolution_part)) 778 { 779 if (dump_enabled_p ()) 780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 781 "evolution not invariant in loop.\n"); 782 return false; 783 } 784 785 /* FORNOW: We do not transform initial conditions of IVs 786 which evolution functions are a polynomial of degree >= 2. */ 787 788 if (tree_is_chrec (evolution_part)) 789 { 790 if (dump_enabled_p ()) 791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 792 "evolution is chrec.\n"); 793 return false; 794 } 795 } 796 797 return true; 798 } 799 800 801 /* Function vect_update_ivs_after_vectorizer. 802 803 "Advance" the induction variables of LOOP to the value they should take 804 after the execution of LOOP. This is currently necessary because the 805 vectorizer does not handle induction variables that are used after the 806 loop. Such a situation occurs when the last iterations of LOOP are 807 peeled, because: 808 1. We introduced new uses after LOOP for IVs that were not originally used 809 after LOOP: the IVs of LOOP are now used by an epilog loop. 810 2. LOOP is going to be vectorized; this means that it will iterate N/VF 811 times, whereas the loop IVs should be bumped N times. 812 813 Input: 814 - LOOP - a loop that is going to be vectorized. The last few iterations 815 of LOOP were peeled. 816 - NITERS - the number of iterations that LOOP executes (before it is 817 vectorized). i.e, the number of times the ivs should be bumped. 818 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path 819 coming out from LOOP on which there are uses of the LOOP ivs 820 (this is the path from LOOP->exit to epilog_loop->preheader). 821 822 The new definitions of the ivs are placed in LOOP->exit. 823 The phi args associated with the edge UPDATE_E in the bb 824 UPDATE_E->dest are updated accordingly. 825 826 Assumption 1: Like the rest of the vectorizer, this function assumes 827 a single loop exit that has a single predecessor. 828 829 Assumption 2: The phi nodes in the LOOP header and in update_bb are 830 organized in the same order. 831 832 Assumption 3: The access function of the ivs is simple enough (see 833 vect_can_advance_ivs_p). This assumption will be relaxed in the future. 834 835 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path 836 coming out of LOOP on which the ivs of LOOP are used (this is the path 837 that leads to the epilog loop; other paths skip the epilog loop). This 838 path starts with the edge UPDATE_E, and its destination (denoted update_bb) 839 needs to have its phis updated. 840 */ 841 842 static void 843 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, 844 tree niters, edge update_e) 845 { 846 gphi_iterator gsi, gsi1; 847 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 848 basic_block update_bb = update_e->dest; 849 basic_block exit_bb = single_exit (loop)->dest; 850 851 /* Make sure there exists a single-predecessor exit bb: */ 852 gcc_assert (single_pred_p (exit_bb)); 853 gcc_assert (single_succ_edge (exit_bb) == update_e); 854 855 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb); 856 !gsi_end_p (gsi) && !gsi_end_p (gsi1); 857 gsi_next (&gsi), gsi_next (&gsi1)) 858 { 859 tree init_expr; 860 tree step_expr, off; 861 tree type; 862 tree var, ni, ni_name; 863 gimple_stmt_iterator last_gsi; 864 865 gphi *phi = gsi.phi (); 866 gphi *phi1 = gsi1.phi (); 867 if (dump_enabled_p ()) 868 { 869 dump_printf_loc (MSG_NOTE, vect_location, 870 "vect_update_ivs_after_vectorizer: phi: "); 871 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); 872 } 873 874 /* Skip reduction and virtual phis. */ 875 if (!iv_phi_p (phi)) 876 { 877 if (dump_enabled_p ()) 878 dump_printf_loc (MSG_NOTE, vect_location, 879 "reduc or virtual phi. skip.\n"); 880 continue; 881 } 882 883 type = TREE_TYPE (gimple_phi_result (phi)); 884 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi)); 885 step_expr = unshare_expr (step_expr); 886 887 /* FORNOW: We do not support IVs whose evolution function is a polynomial 888 of degree >= 2 or exponential. */ 889 gcc_assert (!tree_is_chrec (step_expr)); 890 891 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 892 893 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), 894 fold_convert (TREE_TYPE (step_expr), niters), 895 step_expr); 896 if (POINTER_TYPE_P (type)) 897 ni = fold_build_pointer_plus (init_expr, off); 898 else 899 ni = fold_build2 (PLUS_EXPR, type, 900 init_expr, fold_convert (type, off)); 901 902 var = create_tmp_var (type, "tmp"); 903 904 last_gsi = gsi_last_bb (exit_bb); 905 gimple_seq new_stmts = NULL; 906 ni_name = force_gimple_operand (ni, &new_stmts, false, var); 907 /* Exit_bb shouldn't be empty. */ 908 if (!gsi_end_p (last_gsi)) 909 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT); 910 else 911 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT); 912 913 /* Fix phi expressions in the successor bb. */ 914 adjust_phi_and_debug_stmts (phi1, update_e, ni_name); 915 } 916 } 917 918 /* Function vect_gen_prolog_loop_niters 919 920 Generate the number of iterations which should be peeled as prolog for the 921 loop represented by LOOP_VINFO. It is calculated as the misalignment of 922 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). 923 As a result, after the execution of this loop, the data reference DR will 924 refer to an aligned location. The following computation is generated: 925 926 If the misalignment of DR is known at compile time: 927 addr_mis = int mis = DR_MISALIGNMENT (dr); 928 Else, compute address misalignment in bytes: 929 addr_mis = addr & (vectype_align - 1) 930 931 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step 932 933 (elem_size = element type size; an element is the scalar element whose type 934 is the inner type of the vectype) 935 936 The computations will be emitted at the end of BB. We also compute and 937 store upper bound (included) of the result in BOUND. 938 939 When the step of the data-ref in the loop is not 1 (as in interleaved data 940 and SLP), the number of iterations of the prolog must be divided by the step 941 (which is equal to the size of interleaved group). 942 943 The above formulas assume that VF == number of elements in the vector. This 944 may not hold when there are multiple-types in the loop. 945 In this case, for some data-references in the loop the VF does not represent 946 the number of elements that fit in the vector. Therefore, instead of VF we 947 use TYPE_VECTOR_SUBPARTS. */ 948 949 static tree 950 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo, 951 basic_block bb, int *bound) 952 { 953 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); 954 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 955 tree var; 956 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)); 957 gimple_seq stmts = NULL, new_stmts = NULL; 958 tree iters, iters_name; 959 gimple *dr_stmt = DR_STMT (dr); 960 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); 961 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 962 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; 963 int nelements = TYPE_VECTOR_SUBPARTS (vectype); 964 965 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) 966 { 967 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); 968 969 if (dump_enabled_p ()) 970 dump_printf_loc (MSG_NOTE, vect_location, 971 "known peeling = %d.\n", npeel); 972 973 iters = build_int_cst (niters_type, npeel); 974 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); 975 } 976 else 977 { 978 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; 979 tree offset = negative 980 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node; 981 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, 982 &stmts, offset, loop); 983 tree type = unsigned_type_for (TREE_TYPE (start_addr)); 984 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1); 985 HOST_WIDE_INT elem_size = 986 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 987 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size)); 988 tree nelements_minus_1 = build_int_cst (type, nelements - 1); 989 tree nelements_tree = build_int_cst (type, nelements); 990 tree byte_misalign; 991 tree elem_misalign; 992 993 /* Create: byte_misalign = addr & (vectype_align - 1) */ 994 byte_misalign = 995 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), 996 vectype_align_minus_1); 997 998 /* Create: elem_misalign = byte_misalign / element_size */ 999 elem_misalign = 1000 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); 1001 1002 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */ 1003 if (negative) 1004 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree); 1005 else 1006 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign); 1007 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1); 1008 iters = fold_convert (niters_type, iters); 1009 *bound = nelements - 1; 1010 } 1011 1012 if (dump_enabled_p ()) 1013 { 1014 dump_printf_loc (MSG_NOTE, vect_location, 1015 "niters for prolog loop: "); 1016 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters); 1017 dump_printf (MSG_NOTE, "\n"); 1018 } 1019 1020 var = create_tmp_var (niters_type, "prolog_loop_niters"); 1021 iters_name = force_gimple_operand (iters, &new_stmts, false, var); 1022 1023 if (new_stmts) 1024 gimple_seq_add_seq (&stmts, new_stmts); 1025 if (stmts) 1026 { 1027 gcc_assert (single_succ_p (bb)); 1028 gimple_stmt_iterator gsi = gsi_last_bb (bb); 1029 if (gsi_end_p (gsi)) 1030 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); 1031 else 1032 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT); 1033 } 1034 return iters_name; 1035 } 1036 1037 1038 /* Function vect_update_init_of_dr 1039 1040 NITERS iterations were peeled from LOOP. DR represents a data reference 1041 in LOOP. This function updates the information recorded in DR to 1042 account for the fact that the first NITERS iterations had already been 1043 executed. Specifically, it updates the OFFSET field of DR. */ 1044 1045 static void 1046 vect_update_init_of_dr (struct data_reference *dr, tree niters) 1047 { 1048 tree offset = DR_OFFSET (dr); 1049 1050 niters = fold_build2 (MULT_EXPR, sizetype, 1051 fold_convert (sizetype, niters), 1052 fold_convert (sizetype, DR_STEP (dr))); 1053 offset = fold_build2 (PLUS_EXPR, sizetype, 1054 fold_convert (sizetype, offset), niters); 1055 DR_OFFSET (dr) = offset; 1056 } 1057 1058 1059 /* Function vect_update_inits_of_drs 1060 1061 NITERS iterations were peeled from the loop represented by LOOP_VINFO. 1062 This function updates the information recorded for the data references in 1063 the loop to account for the fact that the first NITERS iterations had 1064 already been executed. Specifically, it updates the initial_condition of 1065 the access_function of all the data_references in the loop. */ 1066 1067 static void 1068 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) 1069 { 1070 unsigned int i; 1071 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1072 struct data_reference *dr; 1073 1074 if (dump_enabled_p ()) 1075 dump_printf_loc (MSG_NOTE, vect_location, 1076 "=== vect_update_inits_of_dr ===\n"); 1077 1078 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */ 1079 if (!types_compatible_p (sizetype, TREE_TYPE (niters))) 1080 { 1081 gimple_seq seq; 1082 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); 1083 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters"); 1084 1085 niters = fold_convert (sizetype, niters); 1086 niters = force_gimple_operand (niters, &seq, false, var); 1087 if (seq) 1088 { 1089 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 1090 gcc_assert (!new_bb); 1091 } 1092 } 1093 1094 FOR_EACH_VEC_ELT (datarefs, i, dr) 1095 vect_update_init_of_dr (dr, niters); 1096 } 1097 1098 1099 /* This function builds ni_name = number of iterations. Statements 1100 are emitted on the loop preheader edge. */ 1101 1102 tree 1103 vect_build_loop_niters (loop_vec_info loop_vinfo) 1104 { 1105 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); 1106 if (TREE_CODE (ni) == INTEGER_CST) 1107 return ni; 1108 else 1109 { 1110 tree ni_name, var; 1111 gimple_seq stmts = NULL; 1112 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); 1113 1114 var = create_tmp_var (TREE_TYPE (ni), "niters"); 1115 ni_name = force_gimple_operand (ni, &stmts, false, var); 1116 if (stmts) 1117 gsi_insert_seq_on_edge_immediate (pe, stmts); 1118 1119 return ni_name; 1120 } 1121 } 1122 1123 /* Calculate the number of iterations above which vectorized loop will be 1124 preferred than scalar loop. NITERS_PROLOG is the number of iterations 1125 of prolog loop. If it's integer const, the integer number is also passed 1126 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of 1127 number of iterations of prolog loop. VFM1 is vector factor minus one. 1128 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar 1129 (rather than vectorized) loop will be executed. This function stores 1130 upper bound (included) of the result in BOUND_SCALAR. */ 1131 1132 static tree 1133 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog, 1134 int bound_prolog, int vfm1, int th, 1135 int *bound_scalar, bool check_profitability) 1136 { 1137 tree type = TREE_TYPE (niters_prolog); 1138 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog, 1139 build_int_cst (type, vfm1)); 1140 1141 *bound_scalar = vfm1 + bound_prolog; 1142 if (check_profitability) 1143 { 1144 /* TH indicates the minimum niters of vectorized loop, while we 1145 compute the maximum niters of scalar loop. */ 1146 th--; 1147 /* Peeling for constant times. */ 1148 if (int_niters_prolog >= 0) 1149 { 1150 *bound_scalar = (int_niters_prolog + vfm1 < th 1151 ? th 1152 : vfm1 + int_niters_prolog); 1153 return build_int_cst (type, *bound_scalar); 1154 } 1155 /* Peeling for unknown times. Note BOUND_PROLOG is the upper 1156 bound (inlcuded) of niters of prolog loop. */ 1157 if (th >= vfm1 + bound_prolog) 1158 { 1159 *bound_scalar = th; 1160 return build_int_cst (type, th); 1161 } 1162 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */ 1163 else if (th > vfm1) 1164 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters); 1165 } 1166 return niters; 1167 } 1168 1169 /* This function generates the following statements: 1170 1171 niters = number of iterations loop executes (after peeling) 1172 niters_vector = niters / vf 1173 1174 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is 1175 true if NITERS doesn't overflow. */ 1176 1177 void 1178 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, 1179 tree *niters_vector_ptr, bool niters_no_overflow) 1180 { 1181 tree ni_minus_gap, var; 1182 tree niters_vector; 1183 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1184 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); 1185 tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf)); 1186 1187 /* If epilogue loop is required because of data accesses with gaps, we 1188 subtract one iteration from the total number of iterations here for 1189 correct calculation of RATIO. */ 1190 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) 1191 { 1192 ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), 1193 niters, 1194 build_one_cst (TREE_TYPE (niters))); 1195 if (!is_gimple_val (ni_minus_gap)) 1196 { 1197 var = create_tmp_var (TREE_TYPE (niters), "ni_gap"); 1198 gimple *stmts = NULL; 1199 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts, 1200 true, var); 1201 gsi_insert_seq_on_edge_immediate (pe, stmts); 1202 } 1203 } 1204 else 1205 ni_minus_gap = niters; 1206 1207 /* Create: niters >> log2(vf) */ 1208 /* If it's known that niters == number of latch executions + 1 doesn't 1209 overflow, we can generate niters >> log2(vf); otherwise we generate 1210 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio 1211 will be at least one. */ 1212 if (niters_no_overflow) 1213 niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters), 1214 ni_minus_gap, log_vf); 1215 else 1216 niters_vector 1217 = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), 1218 fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters), 1219 fold_build2 (MINUS_EXPR, TREE_TYPE (niters), 1220 ni_minus_gap, 1221 build_int_cst 1222 (TREE_TYPE (niters), vf)), 1223 log_vf), 1224 build_int_cst (TREE_TYPE (niters), 1)); 1225 1226 if (!is_gimple_val (niters_vector)) 1227 { 1228 var = create_tmp_var (TREE_TYPE (niters), "bnd"); 1229 gimple *stmts = NULL; 1230 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var); 1231 gsi_insert_seq_on_edge_immediate (pe, stmts); 1232 } 1233 *niters_vector_ptr = niters_vector; 1234 1235 return; 1236 } 1237 1238 /* Given NITERS_VECTOR which is the number of iterations for vectorized 1239 loop specified by LOOP_VINFO after vectorization, compute the number 1240 of iterations before vectorization (niters_vector * vf) and store it 1241 to NITERS_VECTOR_MULT_VF_PTR. */ 1242 1243 static void 1244 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo, 1245 tree niters_vector, 1246 tree *niters_vector_mult_vf_ptr) 1247 { 1248 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1249 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1250 tree type = TREE_TYPE (niters_vector); 1251 tree log_vf = build_int_cst (type, exact_log2 (vf)); 1252 basic_block exit_bb = single_exit (loop)->dest; 1253 1254 gcc_assert (niters_vector_mult_vf_ptr != NULL); 1255 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type, 1256 niters_vector, log_vf); 1257 if (!is_gimple_val (niters_vector_mult_vf)) 1258 { 1259 tree var = create_tmp_var (type, "niters_vector_mult_vf"); 1260 gimple_seq stmts = NULL; 1261 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf, 1262 &stmts, true, var); 1263 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb); 1264 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); 1265 } 1266 *niters_vector_mult_vf_ptr = niters_vector_mult_vf; 1267 } 1268 1269 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND 1270 from SECOND/FIRST and puts it at the original loop's preheader/exit 1271 edge, the two loops are arranged as below: 1272 1273 preheader_a: 1274 first_loop: 1275 header_a: 1276 i_1 = PHI<i_0, i_2>; 1277 ... 1278 i_2 = i_1 + 1; 1279 if (cond_a) 1280 goto latch_a; 1281 else 1282 goto between_bb; 1283 latch_a: 1284 goto header_a; 1285 1286 between_bb: 1287 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST, 1288 1289 second_loop: 1290 header_b: 1291 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x, 1292 or with i_2 if no LCSSA phi is created 1293 under condition of CREATE_LCSSA_FOR_IV_PHIS. 1294 ... 1295 i_4 = i_3 + 1; 1296 if (cond_b) 1297 goto latch_b; 1298 else 1299 goto exit_bb; 1300 latch_b: 1301 goto header_b; 1302 1303 exit_bb: 1304 1305 This function creates loop closed SSA for the first loop; update the 1306 second loop's PHI nodes by replacing argument on incoming edge with the 1307 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS 1308 is false, Loop closed ssa phis will only be created for non-iv phis for 1309 the first loop. 1310 1311 This function assumes exit bb of the first loop is preheader bb of the 1312 second loop, i.e, between_bb in the example code. With PHIs updated, 1313 the second loop will execute rest iterations of the first. */ 1314 1315 static void 1316 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo, 1317 struct loop *first, struct loop *second, 1318 bool create_lcssa_for_iv_phis) 1319 { 1320 gphi_iterator gsi_update, gsi_orig; 1321 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1322 1323 edge first_latch_e = EDGE_SUCC (first->latch, 0); 1324 edge second_preheader_e = loop_preheader_edge (second); 1325 basic_block between_bb = single_exit (first)->dest; 1326 1327 gcc_assert (between_bb == second_preheader_e->src); 1328 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb)); 1329 /* Either the first loop or the second is the loop to be vectorized. */ 1330 gcc_assert (loop == first || loop == second); 1331 1332 for (gsi_orig = gsi_start_phis (first->header), 1333 gsi_update = gsi_start_phis (second->header); 1334 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update); 1335 gsi_next (&gsi_orig), gsi_next (&gsi_update)) 1336 { 1337 gphi *orig_phi = gsi_orig.phi (); 1338 gphi *update_phi = gsi_update.phi (); 1339 1340 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e); 1341 /* Generate lcssa PHI node for the first loop. */ 1342 gphi *vect_phi = (loop == first) ? orig_phi : update_phi; 1343 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi)) 1344 { 1345 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi)); 1346 gphi *lcssa_phi = create_phi_node (new_res, between_bb); 1347 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION); 1348 arg = new_res; 1349 } 1350 1351 /* Update PHI node in the second loop by replacing arg on the loop's 1352 incoming edge. */ 1353 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg); 1354 } 1355 } 1356 1357 /* Function slpeel_add_loop_guard adds guard skipping from the beginning 1358 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE 1359 are two pred edges of the merge point before UPDATE_LOOP. The two loops 1360 appear like below: 1361 1362 guard_bb: 1363 if (cond) 1364 goto merge_bb; 1365 else 1366 goto skip_loop; 1367 1368 skip_loop: 1369 header_a: 1370 i_1 = PHI<i_0, i_2>; 1371 ... 1372 i_2 = i_1 + 1; 1373 if (cond_a) 1374 goto latch_a; 1375 else 1376 goto exit_a; 1377 latch_a: 1378 goto header_a; 1379 1380 exit_a: 1381 i_5 = PHI<i_2>; 1382 1383 merge_bb: 1384 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point. 1385 1386 update_loop: 1387 header_b: 1388 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x. 1389 ... 1390 i_4 = i_3 + 1; 1391 if (cond_b) 1392 goto latch_b; 1393 else 1394 goto exit_bb; 1395 latch_b: 1396 goto header_b; 1397 1398 exit_bb: 1399 1400 This function creates PHI nodes at merge_bb and replaces the use of i_5 1401 in the update_loop's PHI node with the result of new PHI result. */ 1402 1403 static void 1404 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop, 1405 struct loop *update_loop, 1406 edge guard_edge, edge merge_edge) 1407 { 1408 source_location merge_loc, guard_loc; 1409 edge orig_e = loop_preheader_edge (skip_loop); 1410 edge update_e = loop_preheader_edge (update_loop); 1411 gphi_iterator gsi_orig, gsi_update; 1412 1413 for ((gsi_orig = gsi_start_phis (skip_loop->header), 1414 gsi_update = gsi_start_phis (update_loop->header)); 1415 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update); 1416 gsi_next (&gsi_orig), gsi_next (&gsi_update)) 1417 { 1418 gphi *orig_phi = gsi_orig.phi (); 1419 gphi *update_phi = gsi_update.phi (); 1420 1421 /* Generate new phi node at merge bb of the guard. */ 1422 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi)); 1423 gphi *new_phi = create_phi_node (new_res, guard_edge->dest); 1424 1425 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the 1426 args in NEW_PHI for these edges. */ 1427 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e); 1428 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e); 1429 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e); 1430 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e); 1431 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc); 1432 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc); 1433 1434 /* Update phi in UPDATE_PHI. */ 1435 adjust_phi_and_debug_stmts (update_phi, update_e, new_res); 1436 } 1437 } 1438 1439 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP, 1440 this function searches for the corresponding lcssa phi node in exit 1441 bb of LOOP. If it is found, return the phi result; otherwise return 1442 NULL. */ 1443 1444 static tree 1445 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED, 1446 gphi *lcssa_phi) 1447 { 1448 gphi_iterator gsi; 1449 edge e = single_exit (loop); 1450 1451 gcc_assert (single_pred_p (e->dest)); 1452 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 1453 { 1454 gphi *phi = gsi.phi (); 1455 if (operand_equal_p (PHI_ARG_DEF (phi, 0), 1456 PHI_ARG_DEF (lcssa_phi, 0), 0)) 1457 return PHI_RESULT (phi); 1458 } 1459 return NULL_TREE; 1460 } 1461 1462 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied 1463 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a 1464 point between the two loops to the end of EPILOG. Edges GUARD_EDGE 1465 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG. 1466 The CFG looks like: 1467 1468 loop: 1469 header_a: 1470 i_1 = PHI<i_0, i_2>; 1471 ... 1472 i_2 = i_1 + 1; 1473 if (cond_a) 1474 goto latch_a; 1475 else 1476 goto exit_a; 1477 latch_a: 1478 goto header_a; 1479 1480 exit_a: 1481 1482 guard_bb: 1483 if (cond) 1484 goto merge_bb; 1485 else 1486 goto epilog_loop; 1487 1488 ;; fall_through_bb 1489 1490 epilog_loop: 1491 header_b: 1492 i_3 = PHI<i_2, i_4>; 1493 ... 1494 i_4 = i_3 + 1; 1495 if (cond_b) 1496 goto latch_b; 1497 else 1498 goto merge_bb; 1499 latch_b: 1500 goto header_b; 1501 1502 merge_bb: 1503 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point. 1504 1505 exit_bb: 1506 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb. 1507 1508 For each name used out side EPILOG (i.e - for each name that has a lcssa 1509 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two 1510 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is 1511 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined 1512 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI 1513 in exit_bb will also be updated. */ 1514 1515 static void 1516 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog, 1517 edge guard_edge, edge merge_edge) 1518 { 1519 gphi_iterator gsi; 1520 basic_block merge_bb = guard_edge->dest; 1521 1522 gcc_assert (single_succ_p (merge_bb)); 1523 edge e = single_succ_edge (merge_bb); 1524 basic_block exit_bb = e->dest; 1525 gcc_assert (single_pred_p (exit_bb)); 1526 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest); 1527 1528 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1529 { 1530 gphi *update_phi = gsi.phi (); 1531 tree old_arg = PHI_ARG_DEF (update_phi, 0); 1532 /* This loop-closed-phi actually doesn't represent a use out of the 1533 loop - the phi arg is a constant. */ 1534 if (TREE_CODE (old_arg) != SSA_NAME) 1535 continue; 1536 1537 tree merge_arg = get_current_def (old_arg); 1538 if (!merge_arg) 1539 merge_arg = old_arg; 1540 1541 tree guard_arg = find_guard_arg (loop, epilog, update_phi); 1542 /* If the var is live after loop but not a reduction, we simply 1543 use the old arg. */ 1544 if (!guard_arg) 1545 guard_arg = old_arg; 1546 1547 /* Create new phi node in MERGE_BB: */ 1548 tree new_res = copy_ssa_name (PHI_RESULT (update_phi)); 1549 gphi *merge_phi = create_phi_node (new_res, merge_bb); 1550 1551 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set 1552 the two PHI args in merge_phi for these edges. */ 1553 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION); 1554 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION); 1555 1556 /* Update the original phi in exit_bb. */ 1557 adjust_phi_and_debug_stmts (update_phi, e, new_res); 1558 } 1559 } 1560 1561 /* EPILOG loop is duplicated from the original loop for vectorizing, 1562 the arg of its loop closed ssa PHI needs to be updated. */ 1563 1564 static void 1565 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog) 1566 { 1567 gphi_iterator gsi; 1568 basic_block exit_bb = single_exit (epilog)->dest; 1569 1570 gcc_assert (single_pred_p (exit_bb)); 1571 edge e = EDGE_PRED (exit_bb, 0); 1572 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1573 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e)); 1574 } 1575 1576 /* Function vect_do_peeling. 1577 1578 Input: 1579 - LOOP_VINFO: Represent a loop to be vectorized, which looks like: 1580 1581 preheader: 1582 LOOP: 1583 header_bb: 1584 loop_body 1585 if (exit_loop_cond) goto exit_bb 1586 else goto header_bb 1587 exit_bb: 1588 1589 - NITERS: The number of iterations of the loop. 1590 - NITERSM1: The number of iterations of the loop's latch. 1591 - NITERS_NO_OVERFLOW: No overflow in computing NITERS. 1592 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if 1593 CHECK_PROFITABILITY is true. 1594 Output: 1595 - NITERS_VECTOR: The number of iterations of loop after vectorization. 1596 1597 This function peels prolog and epilog from the loop, adds guards skipping 1598 PROLOG and EPILOG for various conditions. As a result, the changed CFG 1599 would look like: 1600 1601 guard_bb_1: 1602 if (prefer_scalar_loop) goto merge_bb_1 1603 else goto guard_bb_2 1604 1605 guard_bb_2: 1606 if (skip_prolog) goto merge_bb_2 1607 else goto prolog_preheader 1608 1609 prolog_preheader: 1610 PROLOG: 1611 prolog_header_bb: 1612 prolog_body 1613 if (exit_prolog_cond) goto prolog_exit_bb 1614 else goto prolog_header_bb 1615 prolog_exit_bb: 1616 1617 merge_bb_2: 1618 1619 vector_preheader: 1620 VECTOR LOOP: 1621 vector_header_bb: 1622 vector_body 1623 if (exit_vector_cond) goto vector_exit_bb 1624 else goto vector_header_bb 1625 vector_exit_bb: 1626 1627 guard_bb_3: 1628 if (skip_epilog) goto merge_bb_3 1629 else goto epilog_preheader 1630 1631 merge_bb_1: 1632 1633 epilog_preheader: 1634 EPILOG: 1635 epilog_header_bb: 1636 epilog_body 1637 if (exit_epilog_cond) goto merge_bb_3 1638 else goto epilog_header_bb 1639 1640 merge_bb_3: 1641 1642 Note this function peels prolog and epilog only if it's necessary, 1643 as well as guards. 1644 Returns created epilogue or NULL. 1645 1646 TODO: Guard for prefer_scalar_loop should be emitted along with 1647 versioning conditions if loop versioning is needed. */ 1648 1649 1650 struct loop * 1651 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, 1652 tree *niters_vector, int th, bool check_profitability, 1653 bool niters_no_overflow) 1654 { 1655 edge e, guard_e; 1656 tree type = TREE_TYPE (niters), guard_cond; 1657 basic_block guard_bb, guard_to; 1658 int prob_prolog, prob_vector, prob_epilog; 1659 int bound_prolog = 0, bound_scalar = 0, bound = 0; 1660 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1661 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); 1662 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) 1663 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); 1664 1665 if (!prolog_peeling && !epilog_peeling) 1666 return NULL; 1667 1668 prob_vector = 9 * REG_BR_PROB_BASE / 10; 1669 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2) 1670 vf = 3; 1671 prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf; 1672 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1673 1674 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo); 1675 struct loop *first_loop = loop; 1676 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP; 1677 create_lcssa_for_virtual_phi (loop); 1678 update_ssa (TODO_update_ssa_only_virtuals); 1679 1680 if (MAY_HAVE_DEBUG_STMTS) 1681 { 1682 gcc_assert (!adjust_vec.exists ()); 1683 adjust_vec.create (32); 1684 } 1685 initialize_original_copy_tables (); 1686 1687 /* Prolog loop may be skipped. */ 1688 bool skip_prolog = (prolog_peeling != 0); 1689 /* Skip to epilog if scalar loop may be preferred. It's only used when 1690 we peel for epilog loop. */ 1691 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)); 1692 /* Epilog loop must be executed if the number of iterations for epilog 1693 loop is known at compile time, otherwise we need to add a check at 1694 the end of vector loop and skip to the end of epilog loop. */ 1695 bool skip_epilog = (prolog_peeling < 0 1696 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)); 1697 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */ 1698 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) 1699 skip_epilog = false; 1700 1701 /* Record the anchor bb at which guard should be placed if scalar loop 1702 may be preferred. */ 1703 basic_block anchor = loop_preheader_edge (loop)->src; 1704 if (skip_vector) 1705 { 1706 split_edge (loop_preheader_edge (loop)); 1707 1708 /* Due to the order in which we peel prolog and epilog, we first 1709 propagate probability to the whole loop. The purpose is to 1710 avoid adjusting probabilities of both prolog and vector loops 1711 separately. Note in this case, the probability of epilog loop 1712 needs to be scaled back later. */ 1713 basic_block bb_before_loop = loop_preheader_edge (loop)->src; 1714 scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector, 1715 REG_BR_PROB_BASE); 1716 scale_loop_profile (loop, prob_vector, bound); 1717 } 1718 1719 tree niters_prolog = build_int_cst (type, 0); 1720 source_location loop_loc = find_loop_location (loop); 1721 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); 1722 if (prolog_peeling) 1723 { 1724 e = loop_preheader_edge (loop); 1725 if (!slpeel_can_duplicate_loop_p (loop, e)) 1726 { 1727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, 1728 "loop can't be duplicated to preheader edge.\n"); 1729 gcc_unreachable (); 1730 } 1731 /* Peel prolog and put it on preheader edge of loop. */ 1732 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e); 1733 if (!prolog) 1734 { 1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, 1736 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n"); 1737 gcc_unreachable (); 1738 } 1739 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true); 1740 first_loop = prolog; 1741 reset_original_copy_tables (); 1742 1743 /* Generate and update the number of iterations for prolog loop. */ 1744 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor, 1745 &bound_prolog); 1746 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog); 1747 1748 /* Skip the prolog loop. */ 1749 if (skip_prolog) 1750 { 1751 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, 1752 niters_prolog, build_int_cst (type, 0)); 1753 guard_bb = loop_preheader_edge (prolog)->src; 1754 basic_block bb_after_prolog = loop_preheader_edge (loop)->src; 1755 guard_to = split_edge (loop_preheader_edge (loop)); 1756 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, 1757 guard_to, guard_bb, 1758 inverse_probability (prob_prolog), 1759 irred_flag); 1760 e = EDGE_PRED (guard_to, 0); 1761 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); 1762 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e); 1763 1764 scale_bbs_frequencies_int (&bb_after_prolog, 1, prob_prolog, 1765 REG_BR_PROB_BASE); 1766 scale_loop_profile (prolog, prob_prolog, bound_prolog); 1767 } 1768 /* Update init address of DRs. */ 1769 vect_update_inits_of_drs (loop_vinfo, niters_prolog); 1770 /* Update niters for vector loop. */ 1771 LOOP_VINFO_NITERS (loop_vinfo) 1772 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog); 1773 LOOP_VINFO_NITERSM1 (loop_vinfo) 1774 = fold_build2 (MINUS_EXPR, type, 1775 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog); 1776 niters = vect_build_loop_niters (loop_vinfo); 1777 1778 /* Prolog iterates at most bound_prolog times, latch iterates at 1779 most bound_prolog - 1 times. */ 1780 record_niter_bound (prolog, bound_prolog - 1, false, true); 1781 delete_update_ssa (); 1782 adjust_vec_debug_stmts (); 1783 scev_reset (); 1784 } 1785 1786 if (epilog_peeling) 1787 { 1788 e = single_exit (loop); 1789 if (!slpeel_can_duplicate_loop_p (loop, e)) 1790 { 1791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, 1792 "loop can't be duplicated to exit edge.\n"); 1793 gcc_unreachable (); 1794 } 1795 /* Peel epilog and put it on exit edge of loop. */ 1796 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e); 1797 if (!epilog) 1798 { 1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, 1800 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n"); 1801 gcc_unreachable (); 1802 } 1803 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false); 1804 1805 /* Scalar version loop may be preferred. In this case, add guard 1806 and skip to epilog. Note this only happens when the number of 1807 iterations of loop is unknown at compile time, otherwise this 1808 won't be vectorized. */ 1809 if (skip_vector) 1810 { 1811 /* Additional epilogue iteration is peeled if gap exists. */ 1812 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo); 1813 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling, 1814 bound_prolog, 1815 peel_for_gaps ? vf : vf - 1, 1816 th, &bound_scalar, 1817 check_profitability); 1818 /* Build guard against NITERSM1 since NITERS may overflow. */ 1819 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t); 1820 guard_bb = anchor; 1821 guard_to = split_edge (loop_preheader_edge (epilog)); 1822 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, 1823 guard_to, guard_bb, 1824 inverse_probability (prob_vector), 1825 irred_flag); 1826 e = EDGE_PRED (guard_to, 0); 1827 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); 1828 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e); 1829 1830 /* Simply propagate profile info from guard_bb to guard_to which is 1831 a merge point of control flow. */ 1832 guard_to->frequency = guard_bb->frequency; 1833 guard_to->count = guard_bb->count; 1834 single_succ_edge (guard_to)->count = guard_to->count; 1835 /* Scale probability of epilog loop back. */ 1836 int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector; 1837 scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE); 1838 } 1839 1840 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src; 1841 tree niters_vector_mult_vf; 1842 /* If loop is peeled for non-zero constant times, now niters refers to 1843 orig_niters - prolog_peeling, it won't overflow even the orig_niters 1844 overflows. */ 1845 niters_no_overflow |= (prolog_peeling > 0); 1846 vect_gen_vector_loop_niters (loop_vinfo, niters, 1847 niters_vector, niters_no_overflow); 1848 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector, 1849 &niters_vector_mult_vf); 1850 /* Update IVs of original loop as if they were advanced by 1851 niters_vector_mult_vf steps. */ 1852 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo)); 1853 edge update_e = skip_vector ? e : loop_preheader_edge (epilog); 1854 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf, 1855 update_e); 1856 1857 if (skip_epilog) 1858 { 1859 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, 1860 niters, niters_vector_mult_vf); 1861 guard_bb = single_exit (loop)->dest; 1862 guard_to = split_edge (single_exit (epilog)); 1863 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to, 1864 skip_vector ? anchor : guard_bb, 1865 inverse_probability (prob_epilog), 1866 irred_flag); 1867 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e, 1868 single_exit (epilog)); 1869 /* Only need to handle basic block before epilog loop if it's not 1870 the guard_bb, which is the case when skip_vector is true. */ 1871 if (guard_bb != bb_before_epilog) 1872 { 1873 prob_epilog = (combine_probabilities (prob_vector, prob_epilog) 1874 + inverse_probability (prob_vector)); 1875 1876 scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog, 1877 REG_BR_PROB_BASE); 1878 } 1879 scale_loop_profile (epilog, prob_epilog, bound); 1880 } 1881 else 1882 slpeel_update_phi_nodes_for_lcssa (epilog); 1883 1884 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2; 1885 /* We share epilog loop with scalar version loop. */ 1886 bound = MAX (bound, bound_scalar - 1); 1887 record_niter_bound (epilog, bound, false, true); 1888 1889 delete_update_ssa (); 1890 adjust_vec_debug_stmts (); 1891 scev_reset (); 1892 } 1893 adjust_vec.release (); 1894 free_original_copy_tables (); 1895 1896 return epilog; 1897 } 1898 1899 /* Function vect_create_cond_for_niters_checks. 1900 1901 Create a conditional expression that represents the run-time checks for 1902 loop's niter. The loop is guaranteed to to terminate if the run-time 1903 checks hold. 1904 1905 Input: 1906 COND_EXPR - input conditional expression. New conditions will be chained 1907 with logical AND operation. If it is NULL, then the function 1908 is used to return the number of alias checks. 1909 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs 1910 to be checked. 1911 1912 Output: 1913 COND_EXPR - conditional expression. 1914 1915 The returned COND_EXPR is the conditional expression to be used in the 1916 if statement that controls which version of the loop gets executed at 1917 runtime. */ 1918 1919 static void 1920 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr) 1921 { 1922 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo); 1923 1924 if (*cond_expr) 1925 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 1926 *cond_expr, part_cond_expr); 1927 else 1928 *cond_expr = part_cond_expr; 1929 } 1930 1931 /* Function vect_create_cond_for_align_checks. 1932 1933 Create a conditional expression that represents the alignment checks for 1934 all of data references (array element references) whose alignment must be 1935 checked at runtime. 1936 1937 Input: 1938 COND_EXPR - input conditional expression. New conditions will be chained 1939 with logical AND operation. 1940 LOOP_VINFO - two fields of the loop information are used. 1941 LOOP_VINFO_PTR_MASK is the mask used to check the alignment. 1942 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked. 1943 1944 Output: 1945 COND_EXPR_STMT_LIST - statements needed to construct the conditional 1946 expression. 1947 The returned value is the conditional expression to be used in the if 1948 statement that controls which version of the loop gets executed at runtime. 1949 1950 The algorithm makes two assumptions: 1951 1) The number of bytes "n" in a vector is a power of 2. 1952 2) An address "a" is aligned if a%n is zero and that this 1953 test can be done as a&(n-1) == 0. For example, for 16 1954 byte vectors the test is a&0xf == 0. */ 1955 1956 static void 1957 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, 1958 tree *cond_expr, 1959 gimple_seq *cond_expr_stmt_list) 1960 { 1961 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1962 vec<gimple *> may_misalign_stmts 1963 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); 1964 gimple *ref_stmt; 1965 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo); 1966 tree mask_cst; 1967 unsigned int i; 1968 tree int_ptrsize_type; 1969 char tmp_name[20]; 1970 tree or_tmp_name = NULL_TREE; 1971 tree and_tmp_name; 1972 gimple *and_stmt; 1973 tree ptrsize_zero; 1974 tree part_cond_expr; 1975 1976 /* Check that mask is one less than a power of 2, i.e., mask is 1977 all zeros followed by all ones. */ 1978 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0)); 1979 1980 int_ptrsize_type = signed_type_for (ptr_type_node); 1981 1982 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address 1983 of the first vector of the i'th data reference. */ 1984 1985 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt) 1986 { 1987 gimple_seq new_stmt_list = NULL; 1988 tree addr_base; 1989 tree addr_tmp_name; 1990 tree new_or_tmp_name; 1991 gimple *addr_stmt, *or_stmt; 1992 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt); 1993 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 1994 bool negative = tree_int_cst_compare 1995 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0; 1996 tree offset = negative 1997 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node; 1998 1999 /* create: addr_tmp = (int)(address_of_first_vector) */ 2000 addr_base = 2001 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list, 2002 offset, loop); 2003 if (new_stmt_list != NULL) 2004 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list); 2005 2006 sprintf (tmp_name, "addr2int%d", i); 2007 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name); 2008 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base); 2009 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt); 2010 2011 /* The addresses are OR together. */ 2012 2013 if (or_tmp_name != NULL_TREE) 2014 { 2015 /* create: or_tmp = or_tmp | addr_tmp */ 2016 sprintf (tmp_name, "orptrs%d", i); 2017 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name); 2018 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR, 2019 or_tmp_name, addr_tmp_name); 2020 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt); 2021 or_tmp_name = new_or_tmp_name; 2022 } 2023 else 2024 or_tmp_name = addr_tmp_name; 2025 2026 } /* end for i */ 2027 2028 mask_cst = build_int_cst (int_ptrsize_type, mask); 2029 2030 /* create: and_tmp = or_tmp & mask */ 2031 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask"); 2032 2033 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR, 2034 or_tmp_name, mask_cst); 2035 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt); 2036 2037 /* Make and_tmp the left operand of the conditional test against zero. 2038 if and_tmp has a nonzero bit then some address is unaligned. */ 2039 ptrsize_zero = build_int_cst (int_ptrsize_type, 0); 2040 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node, 2041 and_tmp_name, ptrsize_zero); 2042 if (*cond_expr) 2043 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 2044 *cond_expr, part_cond_expr); 2045 else 2046 *cond_expr = part_cond_expr; 2047 } 2048 2049 /* Given two data references and segment lengths described by DR_A and DR_B, 2050 create expression checking if the two addresses ranges intersect with 2051 each other based on index of the two addresses. This can only be done 2052 if DR_A and DR_B referring to the same (array) object and the index is 2053 the only difference. For example: 2054 2055 DR_A DR_B 2056 data-ref arr[i] arr[j] 2057 base_object arr arr 2058 index {i_0, +, 1}_loop {j_0, +, 1}_loop 2059 2060 The addresses and their index are like: 2061 2062 |<- ADDR_A ->| |<- ADDR_B ->| 2063 -------------------------------------------------------> 2064 | | | | | | | | | | 2065 -------------------------------------------------------> 2066 i_0 ... i_0+4 j_0 ... j_0+4 2067 2068 We can create expression based on index rather than address: 2069 2070 (i_0 + 4 < j_0 || j_0 + 4 < i_0) 2071 2072 Note evolution step of index needs to be considered in comparison. */ 2073 2074 static bool 2075 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr, 2076 const dr_with_seg_len& dr_a, 2077 const dr_with_seg_len& dr_b) 2078 { 2079 if (integer_zerop (DR_STEP (dr_a.dr)) 2080 || integer_zerop (DR_STEP (dr_b.dr)) 2081 || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr)) 2082 return false; 2083 2084 if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len)) 2085 return false; 2086 2087 if (!tree_fits_shwi_p (DR_STEP (dr_a.dr))) 2088 return false; 2089 2090 if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0)) 2091 return false; 2092 2093 if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)) 2094 return false; 2095 2096 gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST); 2097 2098 bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0; 2099 unsigned HOST_WIDE_INT abs_step 2100 = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr))); 2101 2102 unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len); 2103 unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len); 2104 /* Infer the number of iterations with which the memory segment is accessed 2105 by DR. In other words, alias is checked if memory segment accessed by 2106 DR_A in some iterations intersect with memory segment accessed by DR_B 2107 in the same amount iterations. 2108 Note segnment length is a linear function of number of iterations with 2109 DR_STEP as the coefficient. */ 2110 unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step; 2111 unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step; 2112 2113 unsigned int i; 2114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2115 for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) 2116 { 2117 tree access1 = DR_ACCESS_FN (dr_a.dr, i); 2118 tree access2 = DR_ACCESS_FN (dr_b.dr, i); 2119 /* Two indices must be the same if they are not scev, or not scev wrto 2120 current loop being vecorized. */ 2121 if (TREE_CODE (access1) != POLYNOMIAL_CHREC 2122 || TREE_CODE (access2) != POLYNOMIAL_CHREC 2123 || CHREC_VARIABLE (access1) != (unsigned)loop->num 2124 || CHREC_VARIABLE (access2) != (unsigned)loop->num) 2125 { 2126 if (operand_equal_p (access1, access2, 0)) 2127 continue; 2128 2129 return false; 2130 } 2131 /* The two indices must have the same step. */ 2132 if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0)) 2133 return false; 2134 2135 tree idx_step = CHREC_RIGHT (access1); 2136 /* Index must have const step, otherwise DR_STEP won't be constant. */ 2137 gcc_assert (TREE_CODE (idx_step) == INTEGER_CST); 2138 /* Index must evaluate in the same direction as DR. */ 2139 gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1); 2140 2141 tree min1 = CHREC_LEFT (access1); 2142 tree min2 = CHREC_LEFT (access2); 2143 if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2))) 2144 return false; 2145 2146 /* Ideally, alias can be checked against loop's control IV, but we 2147 need to prove linear mapping between control IV and reference 2148 index. Although that should be true, we check against (array) 2149 index of data reference. Like segment length, index length is 2150 linear function of the number of iterations with index_step as 2151 the coefficient, i.e, niter_len * idx_step. */ 2152 tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step, 2153 build_int_cst (TREE_TYPE (min1), 2154 niter_len1)); 2155 tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step, 2156 build_int_cst (TREE_TYPE (min2), 2157 niter_len2)); 2158 tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1); 2159 tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2); 2160 /* Adjust ranges for negative step. */ 2161 if (neg_step) 2162 { 2163 min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step); 2164 max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), 2165 CHREC_LEFT (access1), idx_step); 2166 min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step); 2167 max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), 2168 CHREC_LEFT (access2), idx_step); 2169 } 2170 tree part_cond_expr 2171 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, 2172 fold_build2 (LE_EXPR, boolean_type_node, max1, min2), 2173 fold_build2 (LE_EXPR, boolean_type_node, max2, min1)); 2174 if (*cond_expr) 2175 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 2176 *cond_expr, part_cond_expr); 2177 else 2178 *cond_expr = part_cond_expr; 2179 } 2180 return true; 2181 } 2182 2183 /* Given two data references and segment lengths described by DR_A and DR_B, 2184 create expression checking if the two addresses ranges intersect with 2185 each other: 2186 2187 ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0) 2188 || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */ 2189 2190 static void 2191 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr, 2192 const dr_with_seg_len& dr_a, 2193 const dr_with_seg_len& dr_b) 2194 { 2195 *cond_expr = NULL_TREE; 2196 if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b)) 2197 return; 2198 2199 tree segment_length_a = dr_a.seg_len; 2200 tree segment_length_b = dr_b.seg_len; 2201 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr); 2202 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr); 2203 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr); 2204 2205 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a), 2206 offset_a, DR_INIT (dr_a.dr)); 2207 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b), 2208 offset_b, DR_INIT (dr_b.dr)); 2209 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a); 2210 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b); 2211 2212 tree seg_a_min = addr_base_a; 2213 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a); 2214 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT 2215 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of 2216 [a, a+12) */ 2217 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0) 2218 { 2219 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr))); 2220 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size); 2221 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size); 2222 } 2223 2224 tree seg_b_min = addr_base_b; 2225 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b); 2226 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0) 2227 { 2228 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr))); 2229 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size); 2230 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size); 2231 } 2232 *cond_expr 2233 = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, 2234 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min), 2235 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min)); 2236 } 2237 2238 /* Function vect_create_cond_for_alias_checks. 2239 2240 Create a conditional expression that represents the run-time checks for 2241 overlapping of address ranges represented by a list of data references 2242 relations passed as input. 2243 2244 Input: 2245 COND_EXPR - input conditional expression. New conditions will be chained 2246 with logical AND operation. If it is NULL, then the function 2247 is used to return the number of alias checks. 2248 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs 2249 to be checked. 2250 2251 Output: 2252 COND_EXPR - conditional expression. 2253 2254 The returned COND_EXPR is the conditional expression to be used in the if 2255 statement that controls which version of the loop gets executed at runtime. 2256 */ 2257 2258 void 2259 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr) 2260 { 2261 vec<dr_with_seg_len_pair_t> comp_alias_ddrs = 2262 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); 2263 tree part_cond_expr; 2264 2265 if (comp_alias_ddrs.is_empty ()) 2266 return; 2267 2268 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i) 2269 { 2270 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first; 2271 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second; 2272 2273 if (dump_enabled_p ()) 2274 { 2275 dump_printf_loc (MSG_NOTE, vect_location, 2276 "create runtime check for data references "); 2277 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr)); 2278 dump_printf (MSG_NOTE, " and "); 2279 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr)); 2280 dump_printf (MSG_NOTE, "\n"); 2281 } 2282 2283 /* Create condition expression for each pair data references. */ 2284 create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b); 2285 if (*cond_expr) 2286 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 2287 *cond_expr, part_cond_expr); 2288 else 2289 *cond_expr = part_cond_expr; 2290 } 2291 2292 if (dump_enabled_p ()) 2293 dump_printf_loc (MSG_NOTE, vect_location, 2294 "created %u versioning for alias checks.\n", 2295 comp_alias_ddrs.length ()); 2296 } 2297 2298 2299 /* Function vect_loop_versioning. 2300 2301 If the loop has data references that may or may not be aligned or/and 2302 has data reference relations whose independence was not proven then 2303 two versions of the loop need to be generated, one which is vectorized 2304 and one which isn't. A test is then generated to control which of the 2305 loops is executed. The test checks for the alignment of all of the 2306 data references that may or may not be aligned. An additional 2307 sequence of runtime tests is generated for each pairs of DDRs whose 2308 independence was not proven. The vectorized version of loop is 2309 executed only if both alias and alignment tests are passed. 2310 2311 The test generated to check which version of loop is executed 2312 is modified to also check for profitability as indicated by the 2313 cost model threshold TH. 2314 2315 The versioning precondition(s) are placed in *COND_EXPR and 2316 *COND_EXPR_STMT_LIST. */ 2317 2318 void 2319 vect_loop_versioning (loop_vec_info loop_vinfo, 2320 unsigned int th, bool check_profitability) 2321 { 2322 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop; 2323 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); 2324 basic_block condition_bb; 2325 gphi_iterator gsi; 2326 gimple_stmt_iterator cond_exp_gsi; 2327 basic_block merge_bb; 2328 basic_block new_exit_bb; 2329 edge new_exit_e, e; 2330 gphi *orig_phi, *new_phi; 2331 tree cond_expr = NULL_TREE; 2332 gimple_seq cond_expr_stmt_list = NULL; 2333 tree arg; 2334 unsigned prob = 4 * REG_BR_PROB_BASE / 5; 2335 gimple_seq gimplify_stmt_list = NULL; 2336 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); 2337 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); 2338 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); 2339 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); 2340 2341 if (check_profitability) 2342 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, 2343 build_int_cst (TREE_TYPE (scalar_loop_iters), 2344 th)); 2345 2346 if (version_niter) 2347 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr); 2348 2349 if (cond_expr) 2350 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list, 2351 is_gimple_condexpr, NULL_TREE); 2352 2353 if (version_align) 2354 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, 2355 &cond_expr_stmt_list); 2356 2357 if (version_alias) 2358 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); 2359 2360 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list, 2361 is_gimple_condexpr, NULL_TREE); 2362 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); 2363 2364 initialize_original_copy_tables (); 2365 if (scalar_loop) 2366 { 2367 edge scalar_e; 2368 basic_block preheader, scalar_preheader; 2369 2370 /* We don't want to scale SCALAR_LOOP's frequencies, we need to 2371 scale LOOP's frequencies instead. */ 2372 nloop = loop_version (scalar_loop, cond_expr, &condition_bb, 2373 prob, REG_BR_PROB_BASE - prob, 2374 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true); 2375 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE); 2376 /* CONDITION_BB was created above SCALAR_LOOP's preheader, 2377 while we need to move it above LOOP's preheader. */ 2378 e = loop_preheader_edge (loop); 2379 scalar_e = loop_preheader_edge (scalar_loop); 2380 gcc_assert (empty_block_p (e->src) 2381 && single_pred_p (e->src)); 2382 gcc_assert (empty_block_p (scalar_e->src) 2383 && single_pred_p (scalar_e->src)); 2384 gcc_assert (single_pred_p (condition_bb)); 2385 preheader = e->src; 2386 scalar_preheader = scalar_e->src; 2387 scalar_e = find_edge (condition_bb, scalar_preheader); 2388 e = single_pred_edge (preheader); 2389 redirect_edge_and_branch_force (single_pred_edge (condition_bb), 2390 scalar_preheader); 2391 redirect_edge_and_branch_force (scalar_e, preheader); 2392 redirect_edge_and_branch_force (e, condition_bb); 2393 set_immediate_dominator (CDI_DOMINATORS, condition_bb, 2394 single_pred (condition_bb)); 2395 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader, 2396 single_pred (scalar_preheader)); 2397 set_immediate_dominator (CDI_DOMINATORS, preheader, 2398 condition_bb); 2399 } 2400 else 2401 nloop = loop_version (loop, cond_expr, &condition_bb, 2402 prob, REG_BR_PROB_BASE - prob, 2403 prob, REG_BR_PROB_BASE - prob, true); 2404 2405 if (version_niter) 2406 { 2407 /* The versioned loop could be infinite, we need to clear existing 2408 niter information which is copied from the original loop. */ 2409 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE)); 2410 vect_free_loop_info_assumptions (nloop); 2411 /* And set constraint LOOP_C_INFINITE for niter analyzer. */ 2412 loop_constraint_set (loop, LOOP_C_INFINITE); 2413 } 2414 2415 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION 2416 && dump_enabled_p ()) 2417 { 2418 if (version_alias) 2419 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, 2420 "loop versioned for vectorization because of " 2421 "possible aliasing\n"); 2422 if (version_align) 2423 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, 2424 "loop versioned for vectorization to enhance " 2425 "alignment\n"); 2426 2427 } 2428 free_original_copy_tables (); 2429 2430 /* Loop versioning violates an assumption we try to maintain during 2431 vectorization - that the loop exit block has a single predecessor. 2432 After versioning, the exit block of both loop versions is the same 2433 basic block (i.e. it has two predecessors). Just in order to simplify 2434 following transformations in the vectorizer, we fix this situation 2435 here by adding a new (empty) block on the exit-edge of the loop, 2436 with the proper loop-exit phis to maintain loop-closed-form. 2437 If loop versioning wasn't done from loop, but scalar_loop instead, 2438 merge_bb will have already just a single successor. */ 2439 2440 merge_bb = single_exit (loop)->dest; 2441 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2) 2442 { 2443 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2); 2444 new_exit_bb = split_edge (single_exit (loop)); 2445 new_exit_e = single_exit (loop); 2446 e = EDGE_SUCC (new_exit_bb, 0); 2447 2448 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2449 { 2450 tree new_res; 2451 orig_phi = gsi.phi (); 2452 new_res = copy_ssa_name (PHI_RESULT (orig_phi)); 2453 new_phi = create_phi_node (new_res, new_exit_bb); 2454 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); 2455 add_phi_arg (new_phi, arg, new_exit_e, 2456 gimple_phi_arg_location_from_edge (orig_phi, e)); 2457 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi)); 2458 } 2459 } 2460 2461 /* End loop-exit-fixes after versioning. */ 2462 2463 if (cond_expr_stmt_list) 2464 { 2465 cond_exp_gsi = gsi_last_bb (condition_bb); 2466 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list, 2467 GSI_SAME_STMT); 2468 } 2469 update_ssa (TODO_update_ssa); 2470 } 2471