1 /* Loop invariant motion. 2 Copyright (C) 2003-2022 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "tree.h" 25 #include "gimple.h" 26 #include "cfghooks.h" 27 #include "tree-pass.h" 28 #include "ssa.h" 29 #include "gimple-pretty-print.h" 30 #include "fold-const.h" 31 #include "cfganal.h" 32 #include "tree-eh.h" 33 #include "gimplify.h" 34 #include "gimple-iterator.h" 35 #include "tree-cfg.h" 36 #include "tree-ssa-loop-manip.h" 37 #include "tree-ssa-loop.h" 38 #include "tree-into-ssa.h" 39 #include "cfgloop.h" 40 #include "tree-affine.h" 41 #include "tree-ssa-propagate.h" 42 #include "trans-mem.h" 43 #include "gimple-fold.h" 44 #include "tree-scalar-evolution.h" 45 #include "tree-ssa-loop-niter.h" 46 #include "alias.h" 47 #include "builtins.h" 48 #include "tree-dfa.h" 49 #include "tree-ssa.h" 50 #include "dbgcnt.h" 51 52 /* TODO: Support for predicated code motion. I.e. 53 54 while (1) 55 { 56 if (cond) 57 { 58 a = inv; 59 something; 60 } 61 } 62 63 Where COND and INV are invariants, but evaluating INV may trap or be 64 invalid from some other reason if !COND. This may be transformed to 65 66 if (cond) 67 a = inv; 68 while (1) 69 { 70 if (cond) 71 something; 72 } */ 73 74 /* The auxiliary data kept for each statement. */ 75 76 struct lim_aux_data 77 { 78 class loop *max_loop; /* The outermost loop in that the statement 79 is invariant. */ 80 81 class loop *tgt_loop; /* The loop out of that we want to move the 82 invariant. */ 83 84 class loop *always_executed_in; 85 /* The outermost loop for that we are sure 86 the statement is executed if the loop 87 is entered. */ 88 89 unsigned cost; /* Cost of the computation performed by the 90 statement. */ 91 92 unsigned ref; /* The simple_mem_ref in this stmt or 0. */ 93 94 vec<gimple *> depends; /* Vector of statements that must be also 95 hoisted out of the loop when this statement 96 is hoisted; i.e. those that define the 97 operands of the statement and are inside of 98 the MAX_LOOP loop. */ 99 }; 100 101 /* Maps statements to their lim_aux_data. */ 102 103 static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map; 104 105 /* Description of a memory reference location. */ 106 107 struct mem_ref_loc 108 { 109 tree *ref; /* The reference itself. */ 110 gimple *stmt; /* The statement in that it occurs. */ 111 }; 112 113 114 /* Description of a memory reference. */ 115 116 class im_mem_ref 117 { 118 public: 119 unsigned id : 30; /* ID assigned to the memory reference 120 (its index in memory_accesses.refs_list) */ 121 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */ 122 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */ 123 hashval_t hash; /* Its hash value. */ 124 125 /* The memory access itself and associated caching of alias-oracle 126 query meta-data. We are using mem.ref == error_mark_node for the 127 case the reference is represented by its single access stmt 128 in accesses_in_loop[0]. */ 129 ao_ref mem; 130 131 bitmap stored; /* The set of loops in that this memory location 132 is stored to. */ 133 bitmap loaded; /* The set of loops in that this memory location 134 is loaded from. */ 135 vec<mem_ref_loc> accesses_in_loop; 136 /* The locations of the accesses. */ 137 138 /* The following set is computed on demand. */ 139 bitmap_head dep_loop; /* The set of loops in that the memory 140 reference is {in,}dependent in 141 different modes. */ 142 }; 143 144 /* We use six bits per loop in the ref->dep_loop bitmap to record 145 the dep_kind x dep_state combinations. */ 146 147 enum dep_kind { lim_raw, sm_war, sm_waw }; 148 enum dep_state { dep_unknown, dep_independent, dep_dependent }; 149 150 /* coldest outermost loop for given loop. */ 151 vec<class loop *> coldest_outermost_loop; 152 /* hotter outer loop nearest to given loop. */ 153 vec<class loop *> hotter_than_inner_loop; 154 155 /* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */ 156 157 static void 158 record_loop_dependence (class loop *loop, im_mem_ref *ref, 159 dep_kind kind, dep_state state) 160 { 161 gcc_assert (state != dep_unknown); 162 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0; 163 bitmap_set_bit (&ref->dep_loop, bit); 164 } 165 166 /* Query the loop dependence cache of REF for LOOP, KIND. */ 167 168 static dep_state 169 query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind) 170 { 171 unsigned first_bit = 6 * loop->num + kind * 2; 172 if (bitmap_bit_p (&ref->dep_loop, first_bit)) 173 return dep_independent; 174 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1)) 175 return dep_dependent; 176 return dep_unknown; 177 } 178 179 /* Mem_ref hashtable helpers. */ 180 181 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref> 182 { 183 typedef ao_ref *compare_type; 184 static inline hashval_t hash (const im_mem_ref *); 185 static inline bool equal (const im_mem_ref *, const ao_ref *); 186 }; 187 188 /* A hash function for class im_mem_ref object OBJ. */ 189 190 inline hashval_t 191 mem_ref_hasher::hash (const im_mem_ref *mem) 192 { 193 return mem->hash; 194 } 195 196 /* An equality function for class im_mem_ref object MEM1 with 197 memory reference OBJ2. */ 198 199 inline bool 200 mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2) 201 { 202 if (obj2->max_size_known_p ()) 203 return (mem1->ref_decomposed 204 && ((TREE_CODE (mem1->mem.base) == MEM_REF 205 && TREE_CODE (obj2->base) == MEM_REF 206 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0), 207 TREE_OPERAND (obj2->base, 0), 0) 208 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset, 209 mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset)) 210 || (operand_equal_p (mem1->mem.base, obj2->base, 0) 211 && known_eq (mem1->mem.offset, obj2->offset))) 212 && known_eq (mem1->mem.size, obj2->size) 213 && known_eq (mem1->mem.max_size, obj2->max_size) 214 && mem1->mem.volatile_p == obj2->volatile_p 215 && (mem1->mem.ref_alias_set == obj2->ref_alias_set 216 /* We are not canonicalizing alias-sets but for the 217 special-case we didn't canonicalize yet and the 218 incoming ref is a alias-set zero MEM we pick 219 the correct one already. */ 220 || (!mem1->ref_canonical 221 && (TREE_CODE (obj2->ref) == MEM_REF 222 || TREE_CODE (obj2->ref) == TARGET_MEM_REF) 223 && obj2->ref_alias_set == 0) 224 /* Likewise if there's a canonical ref with alias-set zero. */ 225 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0)) 226 && types_compatible_p (TREE_TYPE (mem1->mem.ref), 227 TREE_TYPE (obj2->ref))); 228 else 229 return operand_equal_p (mem1->mem.ref, obj2->ref, 0); 230 } 231 232 233 /* Description of memory accesses in loops. */ 234 235 static struct 236 { 237 /* The hash table of memory references accessed in loops. */ 238 hash_table<mem_ref_hasher> *refs; 239 240 /* The list of memory references. */ 241 vec<im_mem_ref *> refs_list; 242 243 /* The set of memory references accessed in each loop. */ 244 vec<bitmap_head> refs_loaded_in_loop; 245 246 /* The set of memory references stored in each loop. */ 247 vec<bitmap_head> refs_stored_in_loop; 248 249 /* The set of memory references stored in each loop, including subloops . */ 250 vec<bitmap_head> all_refs_stored_in_loop; 251 252 /* Cache for expanding memory addresses. */ 253 hash_map<tree, name_expansion *> *ttae_cache; 254 } memory_accesses; 255 256 /* Obstack for the bitmaps in the above data structures. */ 257 static bitmap_obstack lim_bitmap_obstack; 258 static obstack mem_ref_obstack; 259 260 static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind); 261 static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool); 262 static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true); 263 264 /* Minimum cost of an expensive expression. */ 265 #define LIM_EXPENSIVE ((unsigned) param_lim_expensive) 266 267 /* The outermost loop for which execution of the header guarantees that the 268 block will be executed. */ 269 #define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux) 270 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL)) 271 272 /* ID of the shared unanalyzable mem. */ 273 #define UNANALYZABLE_MEM_ID 0 274 275 /* Whether the reference was analyzable. */ 276 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID) 277 278 static struct lim_aux_data * 279 init_lim_data (gimple *stmt) 280 { 281 lim_aux_data *p = XCNEW (struct lim_aux_data); 282 lim_aux_data_map->put (stmt, p); 283 284 return p; 285 } 286 287 static struct lim_aux_data * 288 get_lim_data (gimple *stmt) 289 { 290 lim_aux_data **p = lim_aux_data_map->get (stmt); 291 if (!p) 292 return NULL; 293 294 return *p; 295 } 296 297 /* Releases the memory occupied by DATA. */ 298 299 static void 300 free_lim_aux_data (struct lim_aux_data *data) 301 { 302 data->depends.release (); 303 free (data); 304 } 305 306 static void 307 clear_lim_data (gimple *stmt) 308 { 309 lim_aux_data **p = lim_aux_data_map->get (stmt); 310 if (!p) 311 return; 312 313 free_lim_aux_data (*p); 314 *p = NULL; 315 } 316 317 318 /* The possibilities of statement movement. */ 319 enum move_pos 320 { 321 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */ 322 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement 323 become executed -- memory accesses, ... */ 324 MOVE_POSSIBLE /* Unlimited movement. */ 325 }; 326 327 328 /* If it is possible to hoist the statement STMT unconditionally, 329 returns MOVE_POSSIBLE. 330 If it is possible to hoist the statement STMT, but we must avoid making 331 it executed if it would not be executed in the original program (e.g. 332 because it may trap), return MOVE_PRESERVE_EXECUTION. 333 Otherwise return MOVE_IMPOSSIBLE. */ 334 335 static enum move_pos 336 movement_possibility_1 (gimple *stmt) 337 { 338 tree lhs; 339 enum move_pos ret = MOVE_POSSIBLE; 340 341 if (flag_unswitch_loops 342 && gimple_code (stmt) == GIMPLE_COND) 343 { 344 /* If we perform unswitching, force the operands of the invariant 345 condition to be moved out of the loop. */ 346 return MOVE_POSSIBLE; 347 } 348 349 if (gimple_code (stmt) == GIMPLE_PHI 350 && gimple_phi_num_args (stmt) <= 2 351 && !virtual_operand_p (gimple_phi_result (stmt)) 352 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt))) 353 return MOVE_POSSIBLE; 354 355 if (gimple_get_lhs (stmt) == NULL_TREE) 356 return MOVE_IMPOSSIBLE; 357 358 if (gimple_vdef (stmt)) 359 return MOVE_IMPOSSIBLE; 360 361 if (stmt_ends_bb_p (stmt) 362 || gimple_has_volatile_ops (stmt) 363 || gimple_has_side_effects (stmt) 364 || stmt_could_throw_p (cfun, stmt)) 365 return MOVE_IMPOSSIBLE; 366 367 if (is_gimple_call (stmt)) 368 { 369 /* While pure or const call is guaranteed to have no side effects, we 370 cannot move it arbitrarily. Consider code like 371 372 char *s = something (); 373 374 while (1) 375 { 376 if (s) 377 t = strlen (s); 378 else 379 t = 0; 380 } 381 382 Here the strlen call cannot be moved out of the loop, even though 383 s is invariant. In addition to possibly creating a call with 384 invalid arguments, moving out a function call that is not executed 385 may cause performance regressions in case the call is costly and 386 not executed at all. */ 387 ret = MOVE_PRESERVE_EXECUTION; 388 lhs = gimple_call_lhs (stmt); 389 } 390 else if (is_gimple_assign (stmt)) 391 lhs = gimple_assign_lhs (stmt); 392 else 393 return MOVE_IMPOSSIBLE; 394 395 if (TREE_CODE (lhs) == SSA_NAME 396 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) 397 return MOVE_IMPOSSIBLE; 398 399 if (TREE_CODE (lhs) != SSA_NAME 400 || gimple_could_trap_p (stmt)) 401 return MOVE_PRESERVE_EXECUTION; 402 403 /* Non local loads in a transaction cannot be hoisted out. Well, 404 unless the load happens on every path out of the loop, but we 405 don't take this into account yet. */ 406 if (flag_tm 407 && gimple_in_transaction (stmt) 408 && gimple_assign_single_p (stmt)) 409 { 410 tree rhs = gimple_assign_rhs1 (stmt); 411 if (DECL_P (rhs) && is_global_var (rhs)) 412 { 413 if (dump_file) 414 { 415 fprintf (dump_file, "Cannot hoist conditional load of "); 416 print_generic_expr (dump_file, rhs, TDF_SLIM); 417 fprintf (dump_file, " because it is in a transaction.\n"); 418 } 419 return MOVE_IMPOSSIBLE; 420 } 421 } 422 423 return ret; 424 } 425 426 static enum move_pos 427 movement_possibility (gimple *stmt) 428 { 429 enum move_pos pos = movement_possibility_1 (stmt); 430 if (pos == MOVE_POSSIBLE) 431 { 432 use_operand_p use_p; 433 ssa_op_iter ssa_iter; 434 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE) 435 if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME 436 && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p))) 437 return MOVE_PRESERVE_EXECUTION; 438 } 439 return pos; 440 } 441 442 443 /* Compare the profile count inequality of bb and loop's preheader, it is 444 three-state as stated in profile-count.h, FALSE is returned if inequality 445 cannot be decided. */ 446 bool 447 bb_colder_than_loop_preheader (basic_block bb, class loop *loop) 448 { 449 gcc_assert (bb && loop); 450 return bb->count < loop_preheader_edge (loop)->src->count; 451 } 452 453 /* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile 454 count. 455 It does three steps check: 456 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just 457 return NULL which means it should not be moved out at all; 458 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of 459 OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP; 460 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a 461 hotter loop between OUTERMOST_LOOP and loop in pre-computed 462 HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return 463 OUTERMOST_LOOP. 464 At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as 465 the hoist target. */ 466 467 static class loop * 468 get_coldest_out_loop (class loop *outermost_loop, class loop *loop, 469 basic_block curr_bb) 470 { 471 gcc_assert (outermost_loop == loop 472 || flow_loop_nested_p (outermost_loop, loop)); 473 474 /* If bb_colder_than_loop_preheader returns false due to three-state 475 comparision, OUTERMOST_LOOP is returned finally to preserve the behavior. 476 Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */ 477 if (curr_bb && bb_colder_than_loop_preheader (curr_bb, loop)) 478 return NULL; 479 480 class loop *coldest_loop = coldest_outermost_loop[loop->num]; 481 if (loop_depth (coldest_loop) < loop_depth (outermost_loop)) 482 { 483 class loop *hotter_loop = hotter_than_inner_loop[loop->num]; 484 if (!hotter_loop 485 || loop_depth (hotter_loop) < loop_depth (outermost_loop)) 486 return outermost_loop; 487 488 /* hotter_loop is between OUTERMOST_LOOP and LOOP like: 489 [loop tree root, ..., coldest_loop, ..., outermost_loop, ..., 490 hotter_loop, second_coldest_loop, ..., loop] 491 return second_coldest_loop to be the hoist target. */ 492 class loop *aloop; 493 for (aloop = hotter_loop->inner; aloop; aloop = aloop->next) 494 if (aloop == loop || flow_loop_nested_p (aloop, loop)) 495 return aloop; 496 } 497 return coldest_loop; 498 } 499 500 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost 501 loop to that we could move the expression using DEF if it did not have 502 other operands, i.e. the outermost loop enclosing LOOP in that the value 503 of DEF is invariant. */ 504 505 static class loop * 506 outermost_invariant_loop (tree def, class loop *loop) 507 { 508 gimple *def_stmt; 509 basic_block def_bb; 510 class loop *max_loop; 511 struct lim_aux_data *lim_data; 512 513 if (!def) 514 return superloop_at_depth (loop, 1); 515 516 if (TREE_CODE (def) != SSA_NAME) 517 { 518 gcc_assert (is_gimple_min_invariant (def)); 519 return superloop_at_depth (loop, 1); 520 } 521 522 def_stmt = SSA_NAME_DEF_STMT (def); 523 def_bb = gimple_bb (def_stmt); 524 if (!def_bb) 525 return superloop_at_depth (loop, 1); 526 527 max_loop = find_common_loop (loop, def_bb->loop_father); 528 529 lim_data = get_lim_data (def_stmt); 530 if (lim_data != NULL && lim_data->max_loop != NULL) 531 max_loop = find_common_loop (max_loop, 532 loop_outer (lim_data->max_loop)); 533 if (max_loop == loop) 534 return NULL; 535 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1); 536 537 return max_loop; 538 } 539 540 /* DATA is a structure containing information associated with a statement 541 inside LOOP. DEF is one of the operands of this statement. 542 543 Find the outermost loop enclosing LOOP in that value of DEF is invariant 544 and record this in DATA->max_loop field. If DEF itself is defined inside 545 this loop as well (i.e. we need to hoist it out of the loop if we want 546 to hoist the statement represented by DATA), record the statement in that 547 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true, 548 add the cost of the computation of DEF to the DATA->cost. 549 550 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */ 551 552 static bool 553 add_dependency (tree def, struct lim_aux_data *data, class loop *loop, 554 bool add_cost) 555 { 556 gimple *def_stmt = SSA_NAME_DEF_STMT (def); 557 basic_block def_bb = gimple_bb (def_stmt); 558 class loop *max_loop; 559 struct lim_aux_data *def_data; 560 561 if (!def_bb) 562 return true; 563 564 max_loop = outermost_invariant_loop (def, loop); 565 if (!max_loop) 566 return false; 567 568 if (flow_loop_nested_p (data->max_loop, max_loop)) 569 data->max_loop = max_loop; 570 571 def_data = get_lim_data (def_stmt); 572 if (!def_data) 573 return true; 574 575 if (add_cost 576 /* Only add the cost if the statement defining DEF is inside LOOP, 577 i.e. if it is likely that by moving the invariants dependent 578 on it, we will be able to avoid creating a new register for 579 it (since it will be only used in these dependent invariants). */ 580 && def_bb->loop_father == loop) 581 data->cost += def_data->cost; 582 583 data->depends.safe_push (def_stmt); 584 585 return true; 586 } 587 588 /* Returns an estimate for a cost of statement STMT. The values here 589 are just ad-hoc constants, similar to costs for inlining. */ 590 591 static unsigned 592 stmt_cost (gimple *stmt) 593 { 594 /* Always try to create possibilities for unswitching. */ 595 if (gimple_code (stmt) == GIMPLE_COND 596 || gimple_code (stmt) == GIMPLE_PHI) 597 return LIM_EXPENSIVE; 598 599 /* We should be hoisting calls if possible. */ 600 if (is_gimple_call (stmt)) 601 { 602 tree fndecl; 603 604 /* Unless the call is a builtin_constant_p; this always folds to a 605 constant, so moving it is useless. */ 606 fndecl = gimple_call_fndecl (stmt); 607 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_CONSTANT_P)) 608 return 0; 609 610 return LIM_EXPENSIVE; 611 } 612 613 /* Hoisting memory references out should almost surely be a win. */ 614 if (gimple_references_memory_p (stmt)) 615 return LIM_EXPENSIVE; 616 617 if (gimple_code (stmt) != GIMPLE_ASSIGN) 618 return 1; 619 620 switch (gimple_assign_rhs_code (stmt)) 621 { 622 case MULT_EXPR: 623 case WIDEN_MULT_EXPR: 624 case WIDEN_MULT_PLUS_EXPR: 625 case WIDEN_MULT_MINUS_EXPR: 626 case DOT_PROD_EXPR: 627 case TRUNC_DIV_EXPR: 628 case CEIL_DIV_EXPR: 629 case FLOOR_DIV_EXPR: 630 case ROUND_DIV_EXPR: 631 case EXACT_DIV_EXPR: 632 case CEIL_MOD_EXPR: 633 case FLOOR_MOD_EXPR: 634 case ROUND_MOD_EXPR: 635 case TRUNC_MOD_EXPR: 636 case RDIV_EXPR: 637 /* Division and multiplication are usually expensive. */ 638 return LIM_EXPENSIVE; 639 640 case LSHIFT_EXPR: 641 case RSHIFT_EXPR: 642 case WIDEN_LSHIFT_EXPR: 643 case LROTATE_EXPR: 644 case RROTATE_EXPR: 645 /* Shifts and rotates are usually expensive. */ 646 return LIM_EXPENSIVE; 647 648 case CONSTRUCTOR: 649 /* Make vector construction cost proportional to the number 650 of elements. */ 651 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt)); 652 653 case SSA_NAME: 654 case PAREN_EXPR: 655 /* Whether or not something is wrapped inside a PAREN_EXPR 656 should not change move cost. Nor should an intermediate 657 unpropagated SSA name copy. */ 658 return 0; 659 660 default: 661 return 1; 662 } 663 } 664 665 /* Finds the outermost loop between OUTER and LOOP in that the memory reference 666 REF is independent. If REF is not independent in LOOP, NULL is returned 667 instead. */ 668 669 static class loop * 670 outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref) 671 { 672 class loop *aloop; 673 674 if (ref->stored && bitmap_bit_p (ref->stored, loop->num)) 675 return NULL; 676 677 for (aloop = outer; 678 aloop != loop; 679 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1)) 680 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num)) 681 && ref_indep_loop_p (aloop, ref, lim_raw)) 682 return aloop; 683 684 if (ref_indep_loop_p (loop, ref, lim_raw)) 685 return loop; 686 else 687 return NULL; 688 } 689 690 /* If there is a simple load or store to a memory reference in STMT, returns 691 the location of the memory reference, and sets IS_STORE according to whether 692 it is a store or load. Otherwise, returns NULL. */ 693 694 static tree * 695 simple_mem_ref_in_stmt (gimple *stmt, bool *is_store) 696 { 697 tree *lhs, *rhs; 698 699 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */ 700 if (!gimple_assign_single_p (stmt)) 701 return NULL; 702 703 lhs = gimple_assign_lhs_ptr (stmt); 704 rhs = gimple_assign_rhs1_ptr (stmt); 705 706 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt)) 707 { 708 *is_store = false; 709 return rhs; 710 } 711 else if (gimple_vdef (stmt) 712 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs))) 713 { 714 *is_store = true; 715 return lhs; 716 } 717 else 718 return NULL; 719 } 720 721 /* From a controlling predicate in DOM determine the arguments from 722 the PHI node PHI that are chosen if the predicate evaluates to 723 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if 724 they are non-NULL. Returns true if the arguments can be determined, 725 else return false. */ 726 727 static bool 728 extract_true_false_args_from_phi (basic_block dom, gphi *phi, 729 tree *true_arg_p, tree *false_arg_p) 730 { 731 edge te, fe; 732 if (! extract_true_false_controlled_edges (dom, gimple_bb (phi), 733 &te, &fe)) 734 return false; 735 736 if (true_arg_p) 737 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx); 738 if (false_arg_p) 739 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx); 740 741 return true; 742 } 743 744 /* Determine the outermost loop to that it is possible to hoist a statement 745 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine 746 the outermost loop in that the value computed by STMT is invariant. 747 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that 748 we preserve the fact whether STMT is executed. It also fills other related 749 information to LIM_DATA (STMT). 750 751 The function returns false if STMT cannot be hoisted outside of the loop it 752 is defined in, and true otherwise. */ 753 754 static bool 755 determine_max_movement (gimple *stmt, bool must_preserve_exec) 756 { 757 basic_block bb = gimple_bb (stmt); 758 class loop *loop = bb->loop_father; 759 class loop *level; 760 struct lim_aux_data *lim_data = get_lim_data (stmt); 761 tree val; 762 ssa_op_iter iter; 763 764 if (must_preserve_exec) 765 level = ALWAYS_EXECUTED_IN (bb); 766 else 767 level = superloop_at_depth (loop, 1); 768 lim_data->max_loop = get_coldest_out_loop (level, loop, bb); 769 if (!lim_data->max_loop) 770 return false; 771 772 if (gphi *phi = dyn_cast <gphi *> (stmt)) 773 { 774 use_operand_p use_p; 775 unsigned min_cost = UINT_MAX; 776 unsigned total_cost = 0; 777 struct lim_aux_data *def_data; 778 779 /* We will end up promoting dependencies to be unconditionally 780 evaluated. For this reason the PHI cost (and thus the 781 cost we remove from the loop by doing the invariant motion) 782 is that of the cheapest PHI argument dependency chain. */ 783 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) 784 { 785 val = USE_FROM_PTR (use_p); 786 787 if (TREE_CODE (val) != SSA_NAME) 788 { 789 /* Assign const 1 to constants. */ 790 min_cost = MIN (min_cost, 1); 791 total_cost += 1; 792 continue; 793 } 794 if (!add_dependency (val, lim_data, loop, false)) 795 return false; 796 797 gimple *def_stmt = SSA_NAME_DEF_STMT (val); 798 if (gimple_bb (def_stmt) 799 && gimple_bb (def_stmt)->loop_father == loop) 800 { 801 def_data = get_lim_data (def_stmt); 802 if (def_data) 803 { 804 min_cost = MIN (min_cost, def_data->cost); 805 total_cost += def_data->cost; 806 } 807 } 808 } 809 810 min_cost = MIN (min_cost, total_cost); 811 lim_data->cost += min_cost; 812 813 if (gimple_phi_num_args (phi) > 1) 814 { 815 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); 816 gimple *cond; 817 if (gsi_end_p (gsi_last_bb (dom))) 818 return false; 819 cond = gsi_stmt (gsi_last_bb (dom)); 820 if (gimple_code (cond) != GIMPLE_COND) 821 return false; 822 /* Verify that this is an extended form of a diamond and 823 the PHI arguments are completely controlled by the 824 predicate in DOM. */ 825 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL)) 826 return false; 827 828 /* Fold in dependencies and cost of the condition. */ 829 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE) 830 { 831 if (!add_dependency (val, lim_data, loop, false)) 832 return false; 833 def_data = get_lim_data (SSA_NAME_DEF_STMT (val)); 834 if (def_data) 835 lim_data->cost += def_data->cost; 836 } 837 838 /* We want to avoid unconditionally executing very expensive 839 operations. As costs for our dependencies cannot be 840 negative just claim we are not invariand for this case. 841 We also are not sure whether the control-flow inside the 842 loop will vanish. */ 843 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE 844 && !(min_cost != 0 845 && total_cost / min_cost <= 2)) 846 return false; 847 848 /* Assume that the control-flow in the loop will vanish. 849 ??? We should verify this and not artificially increase 850 the cost if that is not the case. */ 851 lim_data->cost += stmt_cost (stmt); 852 } 853 854 return true; 855 } 856 else 857 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE) 858 if (!add_dependency (val, lim_data, loop, true)) 859 return false; 860 861 if (gimple_vuse (stmt)) 862 { 863 im_mem_ref *ref 864 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL; 865 if (ref 866 && MEM_ANALYZABLE (ref)) 867 { 868 lim_data->max_loop = outermost_indep_loop (lim_data->max_loop, 869 loop, ref); 870 if (!lim_data->max_loop) 871 return false; 872 } 873 else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false)) 874 return false; 875 } 876 877 lim_data->cost += stmt_cost (stmt); 878 879 return true; 880 } 881 882 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL, 883 and that one of the operands of this statement is computed by STMT. 884 Ensure that STMT (together with all the statements that define its 885 operands) is hoisted at least out of the loop LEVEL. */ 886 887 static void 888 set_level (gimple *stmt, class loop *orig_loop, class loop *level) 889 { 890 class loop *stmt_loop = gimple_bb (stmt)->loop_father; 891 struct lim_aux_data *lim_data; 892 gimple *dep_stmt; 893 unsigned i; 894 895 stmt_loop = find_common_loop (orig_loop, stmt_loop); 896 lim_data = get_lim_data (stmt); 897 if (lim_data != NULL && lim_data->tgt_loop != NULL) 898 stmt_loop = find_common_loop (stmt_loop, 899 loop_outer (lim_data->tgt_loop)); 900 if (flow_loop_nested_p (stmt_loop, level)) 901 return; 902 903 gcc_assert (level == lim_data->max_loop 904 || flow_loop_nested_p (lim_data->max_loop, level)); 905 906 lim_data->tgt_loop = level; 907 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt) 908 set_level (dep_stmt, orig_loop, level); 909 } 910 911 /* Determines an outermost loop from that we want to hoist the statement STMT. 912 For now we chose the outermost possible loop. TODO -- use profiling 913 information to set it more sanely. */ 914 915 static void 916 set_profitable_level (gimple *stmt) 917 { 918 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop); 919 } 920 921 /* Returns true if STMT is a call that has side effects. */ 922 923 static bool 924 nonpure_call_p (gimple *stmt) 925 { 926 if (gimple_code (stmt) != GIMPLE_CALL) 927 return false; 928 929 return gimple_has_side_effects (stmt); 930 } 931 932 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */ 933 934 static gimple * 935 rewrite_reciprocal (gimple_stmt_iterator *bsi) 936 { 937 gassign *stmt, *stmt1, *stmt2; 938 tree name, lhs, type; 939 tree real_one; 940 gimple_stmt_iterator gsi; 941 942 stmt = as_a <gassign *> (gsi_stmt (*bsi)); 943 lhs = gimple_assign_lhs (stmt); 944 type = TREE_TYPE (lhs); 945 946 real_one = build_one_cst (type); 947 948 name = make_temp_ssa_name (type, NULL, "reciptmp"); 949 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one, 950 gimple_assign_rhs2 (stmt)); 951 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name, 952 gimple_assign_rhs1 (stmt)); 953 954 /* Replace division stmt with reciprocal and multiply stmts. 955 The multiply stmt is not invariant, so update iterator 956 and avoid rescanning. */ 957 gsi = *bsi; 958 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT); 959 gsi_replace (&gsi, stmt2, true); 960 961 /* Continue processing with invariant reciprocal statement. */ 962 return stmt1; 963 } 964 965 /* Check if the pattern at *BSI is a bittest of the form 966 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */ 967 968 static gimple * 969 rewrite_bittest (gimple_stmt_iterator *bsi) 970 { 971 gassign *stmt; 972 gimple *stmt1; 973 gassign *stmt2; 974 gimple *use_stmt; 975 gcond *cond_stmt; 976 tree lhs, name, t, a, b; 977 use_operand_p use; 978 979 stmt = as_a <gassign *> (gsi_stmt (*bsi)); 980 lhs = gimple_assign_lhs (stmt); 981 982 /* Verify that the single use of lhs is a comparison against zero. */ 983 if (TREE_CODE (lhs) != SSA_NAME 984 || !single_imm_use (lhs, &use, &use_stmt)) 985 return stmt; 986 cond_stmt = dyn_cast <gcond *> (use_stmt); 987 if (!cond_stmt) 988 return stmt; 989 if (gimple_cond_lhs (cond_stmt) != lhs 990 || (gimple_cond_code (cond_stmt) != NE_EXPR 991 && gimple_cond_code (cond_stmt) != EQ_EXPR) 992 || !integer_zerop (gimple_cond_rhs (cond_stmt))) 993 return stmt; 994 995 /* Get at the operands of the shift. The rhs is TMP1 & 1. */ 996 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 997 if (gimple_code (stmt1) != GIMPLE_ASSIGN) 998 return stmt; 999 1000 /* There is a conversion in between possibly inserted by fold. */ 1001 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1))) 1002 { 1003 t = gimple_assign_rhs1 (stmt1); 1004 if (TREE_CODE (t) != SSA_NAME 1005 || !has_single_use (t)) 1006 return stmt; 1007 stmt1 = SSA_NAME_DEF_STMT (t); 1008 if (gimple_code (stmt1) != GIMPLE_ASSIGN) 1009 return stmt; 1010 } 1011 1012 /* Verify that B is loop invariant but A is not. Verify that with 1013 all the stmt walking we are still in the same loop. */ 1014 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR 1015 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt)) 1016 return stmt; 1017 1018 a = gimple_assign_rhs1 (stmt1); 1019 b = gimple_assign_rhs2 (stmt1); 1020 1021 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL 1022 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL) 1023 { 1024 gimple_stmt_iterator rsi; 1025 1026 /* 1 << B */ 1027 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a), 1028 build_int_cst (TREE_TYPE (a), 1), b); 1029 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp"); 1030 stmt1 = gimple_build_assign (name, t); 1031 1032 /* A & (1 << B) */ 1033 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name); 1034 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp"); 1035 stmt2 = gimple_build_assign (name, t); 1036 1037 /* Replace the SSA_NAME we compare against zero. Adjust 1038 the type of zero accordingly. */ 1039 SET_USE (use, name); 1040 gimple_cond_set_rhs (cond_stmt, 1041 build_int_cst_type (TREE_TYPE (name), 1042 0)); 1043 1044 /* Don't use gsi_replace here, none of the new assignments sets 1045 the variable originally set in stmt. Move bsi to stmt1, and 1046 then remove the original stmt, so that we get a chance to 1047 retain debug info for it. */ 1048 rsi = *bsi; 1049 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT); 1050 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT); 1051 gimple *to_release = gsi_stmt (rsi); 1052 gsi_remove (&rsi, true); 1053 release_defs (to_release); 1054 1055 return stmt1; 1056 } 1057 1058 return stmt; 1059 } 1060 1061 /* Determine the outermost loops in that statements in basic block BB are 1062 invariant, and record them to the LIM_DATA associated with the 1063 statements. */ 1064 1065 static void 1066 compute_invariantness (basic_block bb) 1067 { 1068 enum move_pos pos; 1069 gimple_stmt_iterator bsi; 1070 gimple *stmt; 1071 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL; 1072 class loop *outermost = ALWAYS_EXECUTED_IN (bb); 1073 struct lim_aux_data *lim_data; 1074 1075 if (!loop_outer (bb->loop_father)) 1076 return; 1077 1078 if (dump_file && (dump_flags & TDF_DETAILS)) 1079 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n", 1080 bb->index, bb->loop_father->num, loop_depth (bb->loop_father)); 1081 1082 /* Look at PHI nodes, but only if there is at most two. 1083 ??? We could relax this further by post-processing the inserted 1084 code and transforming adjacent cond-exprs with the same predicate 1085 to control flow again. */ 1086 bsi = gsi_start_phis (bb); 1087 if (!gsi_end_p (bsi) 1088 && ((gsi_next (&bsi), gsi_end_p (bsi)) 1089 || (gsi_next (&bsi), gsi_end_p (bsi)))) 1090 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1091 { 1092 stmt = gsi_stmt (bsi); 1093 1094 pos = movement_possibility (stmt); 1095 if (pos == MOVE_IMPOSSIBLE) 1096 continue; 1097 1098 lim_data = get_lim_data (stmt); 1099 if (! lim_data) 1100 lim_data = init_lim_data (stmt); 1101 lim_data->always_executed_in = outermost; 1102 1103 if (!determine_max_movement (stmt, false)) 1104 { 1105 lim_data->max_loop = NULL; 1106 continue; 1107 } 1108 1109 if (dump_file && (dump_flags & TDF_DETAILS)) 1110 { 1111 print_gimple_stmt (dump_file, stmt, 2); 1112 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n", 1113 loop_depth (lim_data->max_loop), 1114 lim_data->cost); 1115 } 1116 1117 if (lim_data->cost >= LIM_EXPENSIVE) 1118 set_profitable_level (stmt); 1119 } 1120 1121 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1122 { 1123 stmt = gsi_stmt (bsi); 1124 1125 pos = movement_possibility (stmt); 1126 if (pos == MOVE_IMPOSSIBLE) 1127 { 1128 if (nonpure_call_p (stmt)) 1129 { 1130 maybe_never = true; 1131 outermost = NULL; 1132 } 1133 /* Make sure to note always_executed_in for stores to make 1134 store-motion work. */ 1135 else if (stmt_makes_single_store (stmt)) 1136 { 1137 struct lim_aux_data *lim_data = get_lim_data (stmt); 1138 if (! lim_data) 1139 lim_data = init_lim_data (stmt); 1140 lim_data->always_executed_in = outermost; 1141 } 1142 continue; 1143 } 1144 1145 if (is_gimple_assign (stmt) 1146 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) 1147 == GIMPLE_BINARY_RHS)) 1148 { 1149 tree op0 = gimple_assign_rhs1 (stmt); 1150 tree op1 = gimple_assign_rhs2 (stmt); 1151 class loop *ol1 = outermost_invariant_loop (op1, 1152 loop_containing_stmt (stmt)); 1153 1154 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal 1155 to be hoisted out of loop, saving expensive divide. */ 1156 if (pos == MOVE_POSSIBLE 1157 && gimple_assign_rhs_code (stmt) == RDIV_EXPR 1158 && flag_unsafe_math_optimizations 1159 && !flag_trapping_math 1160 && ol1 != NULL 1161 && outermost_invariant_loop (op0, ol1) == NULL) 1162 stmt = rewrite_reciprocal (&bsi); 1163 1164 /* If the shift count is invariant, convert (A >> B) & 1 to 1165 A & (1 << B) allowing the bit mask to be hoisted out of the loop 1166 saving an expensive shift. */ 1167 if (pos == MOVE_POSSIBLE 1168 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR 1169 && integer_onep (op1) 1170 && TREE_CODE (op0) == SSA_NAME 1171 && has_single_use (op0)) 1172 stmt = rewrite_bittest (&bsi); 1173 } 1174 1175 lim_data = get_lim_data (stmt); 1176 if (! lim_data) 1177 lim_data = init_lim_data (stmt); 1178 lim_data->always_executed_in = outermost; 1179 1180 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION) 1181 continue; 1182 1183 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION)) 1184 { 1185 lim_data->max_loop = NULL; 1186 continue; 1187 } 1188 1189 if (dump_file && (dump_flags & TDF_DETAILS)) 1190 { 1191 print_gimple_stmt (dump_file, stmt, 2); 1192 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n", 1193 loop_depth (lim_data->max_loop), 1194 lim_data->cost); 1195 } 1196 1197 if (lim_data->cost >= LIM_EXPENSIVE) 1198 set_profitable_level (stmt); 1199 } 1200 } 1201 1202 /* Hoist the statements in basic block BB out of the loops prescribed by 1203 data stored in LIM_DATA structures associated with each statement. Callback 1204 for walk_dominator_tree. */ 1205 1206 unsigned int 1207 move_computations_worker (basic_block bb) 1208 { 1209 class loop *level; 1210 unsigned cost = 0; 1211 struct lim_aux_data *lim_data; 1212 unsigned int todo = 0; 1213 1214 if (!loop_outer (bb->loop_father)) 1215 return todo; 1216 1217 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); ) 1218 { 1219 gassign *new_stmt; 1220 gphi *stmt = bsi.phi (); 1221 1222 lim_data = get_lim_data (stmt); 1223 if (lim_data == NULL) 1224 { 1225 gsi_next (&bsi); 1226 continue; 1227 } 1228 1229 cost = lim_data->cost; 1230 level = lim_data->tgt_loop; 1231 clear_lim_data (stmt); 1232 1233 if (!level) 1234 { 1235 gsi_next (&bsi); 1236 continue; 1237 } 1238 1239 if (dump_file && (dump_flags & TDF_DETAILS)) 1240 { 1241 fprintf (dump_file, "Moving PHI node\n"); 1242 print_gimple_stmt (dump_file, stmt, 0); 1243 fprintf (dump_file, "(cost %u) out of loop %d.\n\n", 1244 cost, level->num); 1245 } 1246 1247 if (gimple_phi_num_args (stmt) == 1) 1248 { 1249 tree arg = PHI_ARG_DEF (stmt, 0); 1250 new_stmt = gimple_build_assign (gimple_phi_result (stmt), 1251 TREE_CODE (arg), arg); 1252 } 1253 else 1254 { 1255 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb); 1256 gimple *cond = gsi_stmt (gsi_last_bb (dom)); 1257 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t; 1258 /* Get the PHI arguments corresponding to the true and false 1259 edges of COND. */ 1260 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1); 1261 gcc_assert (arg0 && arg1); 1262 t = build2 (gimple_cond_code (cond), boolean_type_node, 1263 gimple_cond_lhs (cond), gimple_cond_rhs (cond)); 1264 new_stmt = gimple_build_assign (gimple_phi_result (stmt), 1265 COND_EXPR, t, arg0, arg1); 1266 todo |= TODO_cleanup_cfg; 1267 } 1268 if (!ALWAYS_EXECUTED_IN (bb) 1269 || (ALWAYS_EXECUTED_IN (bb) != level 1270 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))) 1271 reset_flow_sensitive_info (gimple_assign_lhs (new_stmt)); 1272 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); 1273 remove_phi_node (&bsi, false); 1274 } 1275 1276 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); ) 1277 { 1278 edge e; 1279 1280 gimple *stmt = gsi_stmt (bsi); 1281 1282 lim_data = get_lim_data (stmt); 1283 if (lim_data == NULL) 1284 { 1285 gsi_next (&bsi); 1286 continue; 1287 } 1288 1289 cost = lim_data->cost; 1290 level = lim_data->tgt_loop; 1291 clear_lim_data (stmt); 1292 1293 if (!level) 1294 { 1295 gsi_next (&bsi); 1296 continue; 1297 } 1298 1299 /* We do not really want to move conditionals out of the loop; we just 1300 placed it here to force its operands to be moved if necessary. */ 1301 if (gimple_code (stmt) == GIMPLE_COND) 1302 { 1303 gsi_next (&bsi); 1304 continue; 1305 } 1306 1307 if (dump_file && (dump_flags & TDF_DETAILS)) 1308 { 1309 fprintf (dump_file, "Moving statement\n"); 1310 print_gimple_stmt (dump_file, stmt, 0); 1311 fprintf (dump_file, "(cost %u) out of loop %d.\n\n", 1312 cost, level->num); 1313 } 1314 1315 e = loop_preheader_edge (level); 1316 gcc_assert (!gimple_vdef (stmt)); 1317 if (gimple_vuse (stmt)) 1318 { 1319 /* The new VUSE is the one from the virtual PHI in the loop 1320 header or the one already present. */ 1321 gphi_iterator gsi2; 1322 for (gsi2 = gsi_start_phis (e->dest); 1323 !gsi_end_p (gsi2); gsi_next (&gsi2)) 1324 { 1325 gphi *phi = gsi2.phi (); 1326 if (virtual_operand_p (gimple_phi_result (phi))) 1327 { 1328 SET_USE (gimple_vuse_op (stmt), 1329 PHI_ARG_DEF_FROM_EDGE (phi, e)); 1330 break; 1331 } 1332 } 1333 } 1334 gsi_remove (&bsi, false); 1335 if (gimple_has_lhs (stmt) 1336 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME 1337 && (!ALWAYS_EXECUTED_IN (bb) 1338 || !(ALWAYS_EXECUTED_IN (bb) == level 1339 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) 1340 reset_flow_sensitive_info (gimple_get_lhs (stmt)); 1341 /* In case this is a stmt that is not unconditionally executed 1342 when the target loop header is executed and the stmt may 1343 invoke undefined integer or pointer overflow rewrite it to 1344 unsigned arithmetic. */ 1345 if (is_gimple_assign (stmt) 1346 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))) 1347 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt))) 1348 && arith_code_with_undefined_signed_overflow 1349 (gimple_assign_rhs_code (stmt)) 1350 && (!ALWAYS_EXECUTED_IN (bb) 1351 || !(ALWAYS_EXECUTED_IN (bb) == level 1352 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) 1353 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt)); 1354 else 1355 gsi_insert_on_edge (e, stmt); 1356 } 1357 1358 return todo; 1359 } 1360 1361 /* Checks whether the statement defining variable *INDEX can be hoisted 1362 out of the loop passed in DATA. Callback for for_each_index. */ 1363 1364 static bool 1365 may_move_till (tree ref, tree *index, void *data) 1366 { 1367 class loop *loop = (class loop *) data, *max_loop; 1368 1369 /* If REF is an array reference, check also that the step and the lower 1370 bound is invariant in LOOP. */ 1371 if (TREE_CODE (ref) == ARRAY_REF) 1372 { 1373 tree step = TREE_OPERAND (ref, 3); 1374 tree lbound = TREE_OPERAND (ref, 2); 1375 1376 max_loop = outermost_invariant_loop (step, loop); 1377 if (!max_loop) 1378 return false; 1379 1380 max_loop = outermost_invariant_loop (lbound, loop); 1381 if (!max_loop) 1382 return false; 1383 } 1384 1385 max_loop = outermost_invariant_loop (*index, loop); 1386 if (!max_loop) 1387 return false; 1388 1389 return true; 1390 } 1391 1392 /* If OP is SSA NAME, force the statement that defines it to be 1393 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */ 1394 1395 static void 1396 force_move_till_op (tree op, class loop *orig_loop, class loop *loop) 1397 { 1398 gimple *stmt; 1399 1400 if (!op 1401 || is_gimple_min_invariant (op)) 1402 return; 1403 1404 gcc_assert (TREE_CODE (op) == SSA_NAME); 1405 1406 stmt = SSA_NAME_DEF_STMT (op); 1407 if (gimple_nop_p (stmt)) 1408 return; 1409 1410 set_level (stmt, orig_loop, loop); 1411 } 1412 1413 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of 1414 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for 1415 for_each_index. */ 1416 1417 struct fmt_data 1418 { 1419 class loop *loop; 1420 class loop *orig_loop; 1421 }; 1422 1423 static bool 1424 force_move_till (tree ref, tree *index, void *data) 1425 { 1426 struct fmt_data *fmt_data = (struct fmt_data *) data; 1427 1428 if (TREE_CODE (ref) == ARRAY_REF) 1429 { 1430 tree step = TREE_OPERAND (ref, 3); 1431 tree lbound = TREE_OPERAND (ref, 2); 1432 1433 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop); 1434 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop); 1435 } 1436 1437 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop); 1438 1439 return true; 1440 } 1441 1442 /* A function to free the mem_ref object OBJ. */ 1443 1444 static void 1445 memref_free (class im_mem_ref *mem) 1446 { 1447 mem->accesses_in_loop.release (); 1448 } 1449 1450 /* Allocates and returns a memory reference description for MEM whose hash 1451 value is HASH and id is ID. */ 1452 1453 static im_mem_ref * 1454 mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id) 1455 { 1456 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref); 1457 if (mem) 1458 ref->mem = *mem; 1459 else 1460 ao_ref_init (&ref->mem, error_mark_node); 1461 ref->id = id; 1462 ref->ref_canonical = false; 1463 ref->ref_decomposed = false; 1464 ref->hash = hash; 1465 ref->stored = NULL; 1466 ref->loaded = NULL; 1467 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack); 1468 ref->accesses_in_loop.create (1); 1469 1470 return ref; 1471 } 1472 1473 /* Records memory reference location *LOC in LOOP to the memory reference 1474 description REF. The reference occurs in statement STMT. */ 1475 1476 static void 1477 record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc) 1478 { 1479 mem_ref_loc aref; 1480 aref.stmt = stmt; 1481 aref.ref = loc; 1482 ref->accesses_in_loop.safe_push (aref); 1483 } 1484 1485 /* Set the LOOP bit in REF stored bitmap and allocate that if 1486 necessary. Return whether a bit was changed. */ 1487 1488 static bool 1489 set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop) 1490 { 1491 if (!ref->stored) 1492 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack); 1493 return bitmap_set_bit (ref->stored, loop->num); 1494 } 1495 1496 /* Marks reference REF as stored in LOOP. */ 1497 1498 static void 1499 mark_ref_stored (im_mem_ref *ref, class loop *loop) 1500 { 1501 while (loop != current_loops->tree_root 1502 && set_ref_stored_in_loop (ref, loop)) 1503 loop = loop_outer (loop); 1504 } 1505 1506 /* Set the LOOP bit in REF loaded bitmap and allocate that if 1507 necessary. Return whether a bit was changed. */ 1508 1509 static bool 1510 set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop) 1511 { 1512 if (!ref->loaded) 1513 ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack); 1514 return bitmap_set_bit (ref->loaded, loop->num); 1515 } 1516 1517 /* Marks reference REF as loaded in LOOP. */ 1518 1519 static void 1520 mark_ref_loaded (im_mem_ref *ref, class loop *loop) 1521 { 1522 while (loop != current_loops->tree_root 1523 && set_ref_loaded_in_loop (ref, loop)) 1524 loop = loop_outer (loop); 1525 } 1526 1527 /* Gathers memory references in statement STMT in LOOP, storing the 1528 information about them in the memory_accesses structure. Marks 1529 the vops accessed through unrecognized statements there as 1530 well. */ 1531 1532 static void 1533 gather_mem_refs_stmt (class loop *loop, gimple *stmt) 1534 { 1535 tree *mem = NULL; 1536 hashval_t hash; 1537 im_mem_ref **slot; 1538 im_mem_ref *ref; 1539 bool is_stored; 1540 unsigned id; 1541 1542 if (!gimple_vuse (stmt)) 1543 return; 1544 1545 mem = simple_mem_ref_in_stmt (stmt, &is_stored); 1546 if (!mem && is_gimple_assign (stmt)) 1547 { 1548 /* For aggregate copies record distinct references but use them 1549 only for disambiguation purposes. */ 1550 id = memory_accesses.refs_list.length (); 1551 ref = mem_ref_alloc (NULL, 0, id); 1552 memory_accesses.refs_list.safe_push (ref); 1553 if (dump_file && (dump_flags & TDF_DETAILS)) 1554 { 1555 fprintf (dump_file, "Unhandled memory reference %u: ", id); 1556 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); 1557 } 1558 record_mem_ref_loc (ref, stmt, mem); 1559 is_stored = gimple_vdef (stmt); 1560 } 1561 else if (!mem) 1562 { 1563 /* We use the shared mem_ref for all unanalyzable refs. */ 1564 id = UNANALYZABLE_MEM_ID; 1565 ref = memory_accesses.refs_list[id]; 1566 if (dump_file && (dump_flags & TDF_DETAILS)) 1567 { 1568 fprintf (dump_file, "Unanalyzed memory reference %u: ", id); 1569 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); 1570 } 1571 is_stored = gimple_vdef (stmt); 1572 } 1573 else 1574 { 1575 /* We are looking for equal refs that might differ in structure 1576 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but 1577 make sure we can canonicalize the ref in the hashtable if 1578 non-operand_equal_p refs are found. For the lookup we mark 1579 the case we want strict equality with aor.max_size == -1. */ 1580 ao_ref aor; 1581 ao_ref_init (&aor, *mem); 1582 ao_ref_base (&aor); 1583 ao_ref_alias_set (&aor); 1584 HOST_WIDE_INT offset, size, max_size; 1585 poly_int64 saved_maxsize = aor.max_size, mem_off; 1586 tree mem_base; 1587 bool ref_decomposed; 1588 if (aor.max_size_known_p () 1589 && aor.offset.is_constant (&offset) 1590 && aor.size.is_constant (&size) 1591 && aor.max_size.is_constant (&max_size) 1592 && size == max_size 1593 && (size % BITS_PER_UNIT) == 0 1594 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the 1595 size. Make sure this is consistent with the extraction. */ 1596 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem))) 1597 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))), 1598 aor.size) 1599 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off))) 1600 { 1601 ref_decomposed = true; 1602 tree base = ao_ref_base (&aor); 1603 poly_int64 moffset; 1604 HOST_WIDE_INT mcoffset; 1605 if (TREE_CODE (base) == MEM_REF 1606 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset) 1607 && moffset.is_constant (&mcoffset)) 1608 { 1609 hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0); 1610 hash = iterative_hash_host_wide_int (mcoffset, hash); 1611 } 1612 else 1613 { 1614 hash = iterative_hash_expr (base, 0); 1615 hash = iterative_hash_host_wide_int (offset, hash); 1616 } 1617 hash = iterative_hash_host_wide_int (size, hash); 1618 } 1619 else 1620 { 1621 ref_decomposed = false; 1622 hash = iterative_hash_expr (aor.ref, 0); 1623 aor.max_size = -1; 1624 } 1625 slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT); 1626 aor.max_size = saved_maxsize; 1627 if (*slot) 1628 { 1629 if (!(*slot)->ref_canonical 1630 && !operand_equal_p (*mem, (*slot)->mem.ref, 0)) 1631 { 1632 /* If we didn't yet canonicalize the hashtable ref (which 1633 we'll end up using for code insertion) and hit a second 1634 equal ref that is not structurally equivalent create 1635 a canonical ref which is a bare MEM_REF. */ 1636 if (TREE_CODE (*mem) == MEM_REF 1637 || TREE_CODE (*mem) == TARGET_MEM_REF) 1638 { 1639 (*slot)->mem.ref = *mem; 1640 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor); 1641 } 1642 else 1643 { 1644 tree ref_alias_type = reference_alias_ptr_type (*mem); 1645 unsigned int ref_align = get_object_alignment (*mem); 1646 tree ref_type = TREE_TYPE (*mem); 1647 tree tmp = build1 (ADDR_EXPR, ptr_type_node, 1648 unshare_expr (mem_base)); 1649 if (TYPE_ALIGN (ref_type) != ref_align) 1650 ref_type = build_aligned_type (ref_type, ref_align); 1651 (*slot)->mem.ref 1652 = fold_build2 (MEM_REF, ref_type, tmp, 1653 build_int_cst (ref_alias_type, mem_off)); 1654 if ((*slot)->mem.volatile_p) 1655 TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1; 1656 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF 1657 && is_gimple_mem_ref_addr 1658 (TREE_OPERAND ((*slot)->mem.ref, 1659 0))); 1660 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set; 1661 } 1662 (*slot)->ref_canonical = true; 1663 } 1664 ref = *slot; 1665 id = ref->id; 1666 } 1667 else 1668 { 1669 id = memory_accesses.refs_list.length (); 1670 ref = mem_ref_alloc (&aor, hash, id); 1671 ref->ref_decomposed = ref_decomposed; 1672 memory_accesses.refs_list.safe_push (ref); 1673 *slot = ref; 1674 1675 if (dump_file && (dump_flags & TDF_DETAILS)) 1676 { 1677 fprintf (dump_file, "Memory reference %u: ", id); 1678 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM); 1679 fprintf (dump_file, "\n"); 1680 } 1681 } 1682 1683 record_mem_ref_loc (ref, stmt, mem); 1684 } 1685 if (is_stored) 1686 { 1687 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id); 1688 mark_ref_stored (ref, loop); 1689 } 1690 /* A not simple memory op is also a read when it is a write. */ 1691 if (!is_stored || id == UNANALYZABLE_MEM_ID 1692 || ref->mem.ref == error_mark_node) 1693 { 1694 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id); 1695 mark_ref_loaded (ref, loop); 1696 } 1697 init_lim_data (stmt)->ref = ref->id; 1698 return; 1699 } 1700 1701 static unsigned *bb_loop_postorder; 1702 1703 /* qsort sort function to sort blocks after their loop fathers postorder. */ 1704 1705 static int 1706 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_, 1707 void *bb_loop_postorder_) 1708 { 1709 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; 1710 basic_block bb1 = *(const basic_block *)bb1_; 1711 basic_block bb2 = *(const basic_block *)bb2_; 1712 class loop *loop1 = bb1->loop_father; 1713 class loop *loop2 = bb2->loop_father; 1714 if (loop1->num == loop2->num) 1715 return bb1->index - bb2->index; 1716 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1; 1717 } 1718 1719 /* qsort sort function to sort ref locs after their loop fathers postorder. */ 1720 1721 static int 1722 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_, 1723 void *bb_loop_postorder_) 1724 { 1725 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; 1726 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_; 1727 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_; 1728 class loop *loop1 = gimple_bb (loc1->stmt)->loop_father; 1729 class loop *loop2 = gimple_bb (loc2->stmt)->loop_father; 1730 if (loop1->num == loop2->num) 1731 return 0; 1732 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1; 1733 } 1734 1735 /* Gathers memory references in loops. */ 1736 1737 static void 1738 analyze_memory_references (bool store_motion) 1739 { 1740 gimple_stmt_iterator bsi; 1741 basic_block bb, *bbs; 1742 class loop *outer; 1743 unsigned i, n; 1744 1745 /* Collect all basic-blocks in loops and sort them after their 1746 loops postorder. */ 1747 i = 0; 1748 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 1749 FOR_EACH_BB_FN (bb, cfun) 1750 if (bb->loop_father != current_loops->tree_root) 1751 bbs[i++] = bb; 1752 n = i; 1753 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp, 1754 bb_loop_postorder); 1755 1756 /* Visit blocks in loop postorder and assign mem-ref IDs in that order. 1757 That results in better locality for all the bitmaps. It also 1758 automatically sorts the location list of gathered memory references 1759 after their loop postorder number allowing to binary-search it. */ 1760 for (i = 0; i < n; ++i) 1761 { 1762 basic_block bb = bbs[i]; 1763 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1764 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi)); 1765 } 1766 1767 /* Verify the list of gathered memory references is sorted after their 1768 loop postorder number. */ 1769 if (flag_checking) 1770 { 1771 im_mem_ref *ref; 1772 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref) 1773 for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j) 1774 gcc_assert (sort_locs_in_loop_postorder_cmp 1775 (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j], 1776 bb_loop_postorder) <= 0); 1777 } 1778 1779 free (bbs); 1780 1781 if (!store_motion) 1782 return; 1783 1784 /* Propagate the information about accessed memory references up 1785 the loop hierarchy. */ 1786 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) 1787 { 1788 /* Finalize the overall touched references (including subloops). */ 1789 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num], 1790 &memory_accesses.refs_stored_in_loop[loop->num]); 1791 1792 /* Propagate the information about accessed memory references up 1793 the loop hierarchy. */ 1794 outer = loop_outer (loop); 1795 if (outer == current_loops->tree_root) 1796 continue; 1797 1798 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num], 1799 &memory_accesses.all_refs_stored_in_loop[loop->num]); 1800 } 1801 } 1802 1803 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in 1804 tree_to_aff_combination_expand. */ 1805 1806 static bool 1807 mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2, 1808 hash_map<tree, name_expansion *> **ttae_cache, 1809 bool tbaa_p) 1810 { 1811 gcc_checking_assert (mem1->mem.ref != error_mark_node 1812 && mem2->mem.ref != error_mark_node); 1813 1814 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same 1815 object and their offset differ in such a way that the locations cannot 1816 overlap, then they cannot alias. */ 1817 poly_widest_int size1, size2; 1818 aff_tree off1, off2; 1819 1820 /* Perform basic offset and type-based disambiguation. */ 1821 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p)) 1822 return false; 1823 1824 /* The expansion of addresses may be a bit expensive, thus we only do 1825 the check at -O2 and higher optimization levels. */ 1826 if (optimize < 2) 1827 return true; 1828 1829 get_inner_reference_aff (mem1->mem.ref, &off1, &size1); 1830 get_inner_reference_aff (mem2->mem.ref, &off2, &size2); 1831 aff_combination_expand (&off1, ttae_cache); 1832 aff_combination_expand (&off2, ttae_cache); 1833 aff_combination_scale (&off1, -1); 1834 aff_combination_add (&off2, &off1); 1835 1836 if (aff_comb_cannot_overlap_p (&off2, size1, size2)) 1837 return false; 1838 1839 return true; 1840 } 1841 1842 /* Compare function for bsearch searching for reference locations 1843 in a loop. */ 1844 1845 static int 1846 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_, 1847 void *bb_loop_postorder_) 1848 { 1849 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_; 1850 class loop *loop = (class loop *)const_cast<void *>(loop_); 1851 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_); 1852 class loop *loc_loop = gimple_bb (loc->stmt)->loop_father; 1853 if (loop->num == loc_loop->num 1854 || flow_loop_nested_p (loop, loc_loop)) 1855 return 0; 1856 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num] 1857 ? -1 : 1); 1858 } 1859 1860 /* Iterates over all locations of REF in LOOP and its subloops calling 1861 fn.operator() with the location as argument. When that operator 1862 returns true the iteration is stopped and true is returned. 1863 Otherwise false is returned. */ 1864 1865 template <typename FN> 1866 static bool 1867 for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn) 1868 { 1869 unsigned i; 1870 mem_ref_loc *loc; 1871 1872 /* Search for the cluster of locs in the accesses_in_loop vector 1873 which is sorted after postorder index of the loop father. */ 1874 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp, 1875 bb_loop_postorder); 1876 if (!loc) 1877 return false; 1878 1879 /* We have found one location inside loop or its sub-loops. Iterate 1880 both forward and backward to cover the whole cluster. */ 1881 i = loc - ref->accesses_in_loop.address (); 1882 while (i > 0) 1883 { 1884 --i; 1885 mem_ref_loc *l = &ref->accesses_in_loop[i]; 1886 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt))) 1887 break; 1888 if (fn (l)) 1889 return true; 1890 } 1891 for (i = loc - ref->accesses_in_loop.address (); 1892 i < ref->accesses_in_loop.length (); ++i) 1893 { 1894 mem_ref_loc *l = &ref->accesses_in_loop[i]; 1895 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt))) 1896 break; 1897 if (fn (l)) 1898 return true; 1899 } 1900 1901 return false; 1902 } 1903 1904 /* Rewrites location LOC by TMP_VAR. */ 1905 1906 class rewrite_mem_ref_loc 1907 { 1908 public: 1909 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {} 1910 bool operator () (mem_ref_loc *loc); 1911 tree tmp_var; 1912 }; 1913 1914 bool 1915 rewrite_mem_ref_loc::operator () (mem_ref_loc *loc) 1916 { 1917 *loc->ref = tmp_var; 1918 update_stmt (loc->stmt); 1919 return false; 1920 } 1921 1922 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */ 1923 1924 static void 1925 rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var) 1926 { 1927 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var)); 1928 } 1929 1930 /* Stores the first reference location in LOCP. */ 1931 1932 class first_mem_ref_loc_1 1933 { 1934 public: 1935 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {} 1936 bool operator () (mem_ref_loc *loc); 1937 mem_ref_loc **locp; 1938 }; 1939 1940 bool 1941 first_mem_ref_loc_1::operator () (mem_ref_loc *loc) 1942 { 1943 *locp = loc; 1944 return true; 1945 } 1946 1947 /* Returns the first reference location to REF in LOOP. */ 1948 1949 static mem_ref_loc * 1950 first_mem_ref_loc (class loop *loop, im_mem_ref *ref) 1951 { 1952 mem_ref_loc *locp = NULL; 1953 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp)); 1954 return locp; 1955 } 1956 1957 /* Helper function for execute_sm. Emit code to store TMP_VAR into 1958 MEM along edge EX. 1959 1960 The store is only done if MEM has changed. We do this so no 1961 changes to MEM occur on code paths that did not originally store 1962 into it. 1963 1964 The common case for execute_sm will transform: 1965 1966 for (...) { 1967 if (foo) 1968 stuff; 1969 else 1970 MEM = TMP_VAR; 1971 } 1972 1973 into: 1974 1975 lsm = MEM; 1976 for (...) { 1977 if (foo) 1978 stuff; 1979 else 1980 lsm = TMP_VAR; 1981 } 1982 MEM = lsm; 1983 1984 This function will generate: 1985 1986 lsm = MEM; 1987 1988 lsm_flag = false; 1989 ... 1990 for (...) { 1991 if (foo) 1992 stuff; 1993 else { 1994 lsm = TMP_VAR; 1995 lsm_flag = true; 1996 } 1997 } 1998 if (lsm_flag) <-- 1999 MEM = lsm; <-- (X) 2000 2001 In case MEM and TMP_VAR are NULL the function will return the then 2002 block so the caller can insert (X) and other related stmts. 2003 */ 2004 2005 static basic_block 2006 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, 2007 edge preheader, hash_set <basic_block> *flag_bbs, 2008 edge &append_cond_position, edge &last_cond_fallthru) 2009 { 2010 basic_block new_bb, then_bb, old_dest; 2011 bool loop_has_only_one_exit; 2012 edge then_old_edge; 2013 gimple_stmt_iterator gsi; 2014 gimple *stmt; 2015 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP; 2016 2017 profile_count count_sum = profile_count::zero (); 2018 int nbbs = 0, ncount = 0; 2019 profile_probability flag_probability = profile_probability::uninitialized (); 2020 2021 /* Flag is set in FLAG_BBS. Determine probability that flag will be true 2022 at loop exit. 2023 2024 This code may look fancy, but it cannot update profile very realistically 2025 because we do not know the probability that flag will be true at given 2026 loop exit. 2027 2028 We look for two interesting extremes 2029 - when exit is dominated by block setting the flag, we know it will 2030 always be true. This is a common case. 2031 - when all blocks setting the flag have very low frequency we know 2032 it will likely be false. 2033 In all other cases we default to 2/3 for flag being true. */ 2034 2035 for (hash_set<basic_block>::iterator it = flag_bbs->begin (); 2036 it != flag_bbs->end (); ++it) 2037 { 2038 if ((*it)->count.initialized_p ()) 2039 count_sum += (*it)->count, ncount ++; 2040 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it)) 2041 flag_probability = profile_probability::always (); 2042 nbbs++; 2043 } 2044 2045 profile_probability cap = profile_probability::always ().apply_scale (2, 3); 2046 2047 if (flag_probability.initialized_p ()) 2048 ; 2049 else if (ncount == nbbs 2050 && preheader->count () >= count_sum && preheader->count ().nonzero_p ()) 2051 { 2052 flag_probability = count_sum.probability_in (preheader->count ()); 2053 if (flag_probability > cap) 2054 flag_probability = cap; 2055 } 2056 2057 if (!flag_probability.initialized_p ()) 2058 flag_probability = cap; 2059 2060 /* ?? Insert store after previous store if applicable. See note 2061 below. */ 2062 if (append_cond_position) 2063 ex = append_cond_position; 2064 2065 loop_has_only_one_exit = single_pred_p (ex->dest); 2066 2067 if (loop_has_only_one_exit) 2068 ex = split_block_after_labels (ex->dest); 2069 else 2070 { 2071 for (gphi_iterator gpi = gsi_start_phis (ex->dest); 2072 !gsi_end_p (gpi); gsi_next (&gpi)) 2073 { 2074 gphi *phi = gpi.phi (); 2075 if (virtual_operand_p (gimple_phi_result (phi))) 2076 continue; 2077 2078 /* When the destination has a non-virtual PHI node with multiple 2079 predecessors make sure we preserve the PHI structure by 2080 forcing a forwarder block so that hoisting of that PHI will 2081 still work. */ 2082 split_edge (ex); 2083 break; 2084 } 2085 } 2086 2087 old_dest = ex->dest; 2088 new_bb = split_edge (ex); 2089 then_bb = create_empty_bb (new_bb); 2090 then_bb->count = new_bb->count.apply_probability (flag_probability); 2091 if (irr) 2092 then_bb->flags = BB_IRREDUCIBLE_LOOP; 2093 add_bb_to_loop (then_bb, new_bb->loop_father); 2094 2095 gsi = gsi_start_bb (new_bb); 2096 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node, 2097 NULL_TREE, NULL_TREE); 2098 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2099 2100 /* Insert actual store. */ 2101 if (mem) 2102 { 2103 gsi = gsi_start_bb (then_bb); 2104 stmt = gimple_build_assign (unshare_expr (mem), tmp_var); 2105 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2106 } 2107 2108 edge e1 = single_succ_edge (new_bb); 2109 edge e2 = make_edge (new_bb, then_bb, 2110 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); 2111 e2->probability = flag_probability; 2112 2113 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0); 2114 e1->flags &= ~EDGE_FALLTHRU; 2115 2116 e1->probability = flag_probability.invert (); 2117 2118 then_old_edge = make_single_succ_edge (then_bb, old_dest, 2119 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); 2120 2121 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb); 2122 2123 if (append_cond_position) 2124 { 2125 basic_block prevbb = last_cond_fallthru->src; 2126 redirect_edge_succ (last_cond_fallthru, new_bb); 2127 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb); 2128 set_immediate_dominator (CDI_DOMINATORS, old_dest, 2129 recompute_dominator (CDI_DOMINATORS, old_dest)); 2130 } 2131 2132 /* ?? Because stores may alias, they must happen in the exact 2133 sequence they originally happened. Save the position right after 2134 the (_lsm) store we just created so we can continue appending after 2135 it and maintain the original order. */ 2136 append_cond_position = then_old_edge; 2137 last_cond_fallthru = find_edge (new_bb, old_dest); 2138 2139 if (!loop_has_only_one_exit) 2140 for (gphi_iterator gpi = gsi_start_phis (old_dest); 2141 !gsi_end_p (gpi); gsi_next (&gpi)) 2142 { 2143 gphi *phi = gpi.phi (); 2144 unsigned i; 2145 2146 for (i = 0; i < gimple_phi_num_args (phi); i++) 2147 if (gimple_phi_arg_edge (phi, i)->src == new_bb) 2148 { 2149 tree arg = gimple_phi_arg_def (phi, i); 2150 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION); 2151 update_stmt (phi); 2152 } 2153 } 2154 2155 return then_bb; 2156 } 2157 2158 /* When REF is set on the location, set flag indicating the store. */ 2159 2160 class sm_set_flag_if_changed 2161 { 2162 public: 2163 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_) 2164 : flag (flag_), bbs (bbs_) {} 2165 bool operator () (mem_ref_loc *loc); 2166 tree flag; 2167 hash_set <basic_block> *bbs; 2168 }; 2169 2170 bool 2171 sm_set_flag_if_changed::operator () (mem_ref_loc *loc) 2172 { 2173 /* Only set the flag for writes. */ 2174 if (is_gimple_assign (loc->stmt) 2175 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref) 2176 { 2177 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt); 2178 gimple *stmt = gimple_build_assign (flag, boolean_true_node); 2179 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2180 bbs->add (gimple_bb (stmt)); 2181 } 2182 return false; 2183 } 2184 2185 /* Helper function for execute_sm. On every location where REF is 2186 set, set an appropriate flag indicating the store. */ 2187 2188 static tree 2189 execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref, 2190 hash_set <basic_block> *bbs) 2191 { 2192 tree flag; 2193 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag"); 2194 flag = create_tmp_reg (boolean_type_node, str); 2195 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs)); 2196 return flag; 2197 } 2198 2199 struct sm_aux 2200 { 2201 tree tmp_var; 2202 tree store_flag; 2203 hash_set <basic_block> flag_bbs; 2204 }; 2205 2206 /* Executes store motion of memory reference REF from LOOP. 2207 Exits from the LOOP are stored in EXITS. The initialization of the 2208 temporary variable is put to the preheader of the loop, and assignments 2209 to the reference from the temporary variable are emitted to exits. */ 2210 2211 static void 2212 execute_sm (class loop *loop, im_mem_ref *ref, 2213 hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt, 2214 bool use_other_flag_var) 2215 { 2216 gassign *load; 2217 struct fmt_data fmt_data; 2218 struct lim_aux_data *lim_data; 2219 bool multi_threaded_model_p = false; 2220 gimple_stmt_iterator gsi; 2221 sm_aux *aux = new sm_aux; 2222 2223 if (dump_file && (dump_flags & TDF_DETAILS)) 2224 { 2225 fprintf (dump_file, "Executing store motion of "); 2226 print_generic_expr (dump_file, ref->mem.ref); 2227 fprintf (dump_file, " from loop %d\n", loop->num); 2228 } 2229 2230 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref), 2231 get_lsm_tmp_name (ref->mem.ref, ~0)); 2232 2233 fmt_data.loop = loop; 2234 fmt_data.orig_loop = loop; 2235 for_each_index (&ref->mem.ref, force_move_till, &fmt_data); 2236 2237 bool always_stored = ref_always_accessed_p (loop, ref, true); 2238 if (maybe_mt 2239 && (bb_in_transaction (loop_preheader_edge (loop)->src) 2240 || (! flag_store_data_races && ! always_stored))) 2241 multi_threaded_model_p = true; 2242 2243 if (multi_threaded_model_p && !use_other_flag_var) 2244 aux->store_flag 2245 = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs); 2246 else 2247 aux->store_flag = NULL_TREE; 2248 2249 /* Remember variable setup. */ 2250 aux_map.put (ref, aux); 2251 2252 rewrite_mem_refs (loop, ref, aux->tmp_var); 2253 2254 /* Emit the load code on a random exit edge or into the latch if 2255 the loop does not exit, so that we are sure it will be processed 2256 by move_computations after all dependencies. */ 2257 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt); 2258 2259 /* Avoid doing a load if there was no load of the ref in the loop. 2260 Esp. when the ref is not always stored we cannot optimize it 2261 away later. But when it is not always stored we must use a conditional 2262 store then. */ 2263 if ((!always_stored && !multi_threaded_model_p) 2264 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) 2265 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref)); 2266 else 2267 { 2268 /* If not emitting a load mark the uninitialized state on the 2269 loop entry as not to be warned for. */ 2270 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var)); 2271 suppress_warning (uninit, OPT_Wuninitialized); 2272 load = gimple_build_assign (aux->tmp_var, uninit); 2273 } 2274 lim_data = init_lim_data (load); 2275 lim_data->max_loop = loop; 2276 lim_data->tgt_loop = loop; 2277 gsi_insert_before (&gsi, load, GSI_SAME_STMT); 2278 2279 if (aux->store_flag) 2280 { 2281 load = gimple_build_assign (aux->store_flag, boolean_false_node); 2282 lim_data = init_lim_data (load); 2283 lim_data->max_loop = loop; 2284 lim_data->tgt_loop = loop; 2285 gsi_insert_before (&gsi, load, GSI_SAME_STMT); 2286 } 2287 } 2288 2289 /* sm_ord is used for ordinary stores we can retain order with respect 2290 to other stores 2291 sm_unord is used for conditional executed stores which need to be 2292 able to execute in arbitrary order with respect to other stores 2293 sm_other is used for stores we do not try to apply store motion to. */ 2294 enum sm_kind { sm_ord, sm_unord, sm_other }; 2295 struct seq_entry 2296 { 2297 seq_entry () {} 2298 seq_entry (unsigned f, sm_kind k, tree fr = NULL) 2299 : first (f), second (k), from (fr) {} 2300 unsigned first; 2301 sm_kind second; 2302 tree from; 2303 }; 2304 2305 static void 2306 execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq, 2307 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind, 2308 edge &append_cond_position, edge &last_cond_fallthru) 2309 { 2310 /* Sink the stores to exit from the loop. */ 2311 for (unsigned i = seq.length (); i > 0; --i) 2312 { 2313 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first]; 2314 if (seq[i-1].second == sm_other) 2315 { 2316 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE); 2317 if (dump_file && (dump_flags & TDF_DETAILS)) 2318 { 2319 fprintf (dump_file, "Re-issueing dependent store of "); 2320 print_generic_expr (dump_file, ref->mem.ref); 2321 fprintf (dump_file, " from loop %d on exit %d -> %d\n", 2322 loop->num, ex->src->index, ex->dest->index); 2323 } 2324 gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref), 2325 seq[i-1].from); 2326 gsi_insert_on_edge (ex, store); 2327 } 2328 else 2329 { 2330 sm_aux *aux = *aux_map.get (ref); 2331 if (!aux->store_flag || kind == sm_ord) 2332 { 2333 gassign *store; 2334 store = gimple_build_assign (unshare_expr (ref->mem.ref), 2335 aux->tmp_var); 2336 gsi_insert_on_edge (ex, store); 2337 } 2338 else 2339 execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var, 2340 aux->store_flag, 2341 loop_preheader_edge (loop), &aux->flag_bbs, 2342 append_cond_position, last_cond_fallthru); 2343 } 2344 } 2345 } 2346 2347 /* Push the SM candidate at index PTR in the sequence SEQ down until 2348 we hit the next SM candidate. Return true if that went OK and 2349 false if we could not disambiguate agains another unrelated ref. 2350 Update *AT to the index where the candidate now resides. */ 2351 2352 static bool 2353 sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at) 2354 { 2355 *at = ptr; 2356 for (; ptr > 0; --ptr) 2357 { 2358 seq_entry &new_cand = seq[ptr]; 2359 seq_entry &against = seq[ptr-1]; 2360 if (against.second == sm_ord 2361 || (against.second == sm_other && against.from != NULL_TREE)) 2362 /* Found the tail of the sequence. */ 2363 break; 2364 /* We may not ignore self-dependences here. */ 2365 if (new_cand.first == against.first 2366 || !refs_independent_p (memory_accesses.refs_list[new_cand.first], 2367 memory_accesses.refs_list[against.first], 2368 false)) 2369 /* ??? Prune new_cand from the list of refs to apply SM to. */ 2370 return false; 2371 std::swap (new_cand, against); 2372 *at = ptr - 1; 2373 } 2374 return true; 2375 } 2376 2377 /* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ 2378 walking backwards from VDEF (or the end of BB if VDEF is NULL). */ 2379 2380 static int 2381 sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, 2382 vec<seq_entry> &seq, bitmap refs_not_in_seq, 2383 bitmap refs_not_supported, bool forked, 2384 bitmap fully_visited) 2385 { 2386 if (!vdef) 2387 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi); 2388 gsi_prev (&gsi)) 2389 { 2390 vdef = gimple_vdef (gsi_stmt (gsi)); 2391 if (vdef) 2392 break; 2393 } 2394 if (!vdef) 2395 { 2396 gphi *vphi = get_virtual_phi (bb); 2397 if (vphi) 2398 vdef = gimple_phi_result (vphi); 2399 } 2400 if (!vdef) 2401 { 2402 if (single_pred_p (bb)) 2403 /* This handles the perfect nest case. */ 2404 return sm_seq_valid_bb (loop, single_pred (bb), vdef, 2405 seq, refs_not_in_seq, refs_not_supported, 2406 forked, fully_visited); 2407 return 0; 2408 } 2409 do 2410 { 2411 gimple *def = SSA_NAME_DEF_STMT (vdef); 2412 if (gimple_bb (def) != bb) 2413 { 2414 /* If we forked by processing a PHI do not allow our walk to 2415 merge again until we handle that robustly. */ 2416 if (forked) 2417 { 2418 /* Mark refs_not_in_seq as unsupported. */ 2419 bitmap_ior_into (refs_not_supported, refs_not_in_seq); 2420 return 1; 2421 } 2422 /* Otherwise it doesn't really matter if we end up in different 2423 BBs. */ 2424 bb = gimple_bb (def); 2425 } 2426 if (gphi *phi = dyn_cast <gphi *> (def)) 2427 { 2428 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb) 2429 this is still linear. 2430 Eventually we want to cache intermediate results per BB 2431 (but we can't easily cache for different exits?). */ 2432 /* Stop at PHIs with possible backedges. */ 2433 if (bb == bb->loop_father->header 2434 || bb->flags & BB_IRREDUCIBLE_LOOP) 2435 { 2436 /* Mark refs_not_in_seq as unsupported. */ 2437 bitmap_ior_into (refs_not_supported, refs_not_in_seq); 2438 return 1; 2439 } 2440 if (gimple_phi_num_args (phi) == 1) 2441 return sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src, 2442 gimple_phi_arg_def (phi, 0), seq, 2443 refs_not_in_seq, refs_not_supported, 2444 false, fully_visited); 2445 if (bitmap_bit_p (fully_visited, 2446 SSA_NAME_VERSION (gimple_phi_result (phi)))) 2447 return 1; 2448 auto_vec<seq_entry> first_edge_seq; 2449 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack); 2450 int eret; 2451 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq); 2452 eret = sm_seq_valid_bb (loop, gimple_phi_arg_edge (phi, 0)->src, 2453 gimple_phi_arg_def (phi, 0), 2454 first_edge_seq, 2455 tem_refs_not_in_seq, refs_not_supported, 2456 true, fully_visited); 2457 if (eret != 1) 2458 return -1; 2459 /* Simplify our lives by pruning the sequence of !sm_ord. */ 2460 while (!first_edge_seq.is_empty () 2461 && first_edge_seq.last ().second != sm_ord) 2462 first_edge_seq.pop (); 2463 for (unsigned int i = 1; i < gimple_phi_num_args (phi); ++i) 2464 { 2465 tree vuse = gimple_phi_arg_def (phi, i); 2466 edge e = gimple_phi_arg_edge (phi, i); 2467 auto_vec<seq_entry> edge_seq; 2468 bitmap_and_compl (tem_refs_not_in_seq, 2469 refs_not_in_seq, refs_not_supported); 2470 /* If we've marked all refs we search for as unsupported 2471 we can stop processing and use the sequence as before 2472 the PHI. */ 2473 if (bitmap_empty_p (tem_refs_not_in_seq)) 2474 return 1; 2475 eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq, 2476 tem_refs_not_in_seq, refs_not_supported, 2477 true, fully_visited); 2478 if (eret != 1) 2479 return -1; 2480 /* Simplify our lives by pruning the sequence of !sm_ord. */ 2481 while (!edge_seq.is_empty () 2482 && edge_seq.last ().second != sm_ord) 2483 edge_seq.pop (); 2484 unsigned min_len = MIN(first_edge_seq.length (), 2485 edge_seq.length ()); 2486 /* Incrementally merge seqs into first_edge_seq. */ 2487 int first_uneq = -1; 2488 auto_vec<seq_entry, 2> extra_refs; 2489 for (unsigned int i = 0; i < min_len; ++i) 2490 { 2491 /* ??? We can more intelligently merge when we face different 2492 order by additional sinking operations in one sequence. 2493 For now we simply mark them as to be processed by the 2494 not order-preserving SM code. */ 2495 if (first_edge_seq[i].first != edge_seq[i].first) 2496 { 2497 if (first_edge_seq[i].second == sm_ord) 2498 bitmap_set_bit (refs_not_supported, 2499 first_edge_seq[i].first); 2500 if (edge_seq[i].second == sm_ord) 2501 bitmap_set_bit (refs_not_supported, edge_seq[i].first); 2502 first_edge_seq[i].second = sm_other; 2503 first_edge_seq[i].from = NULL_TREE; 2504 /* Record the dropped refs for later processing. */ 2505 if (first_uneq == -1) 2506 first_uneq = i; 2507 extra_refs.safe_push (seq_entry (edge_seq[i].first, 2508 sm_other, NULL_TREE)); 2509 } 2510 /* sm_other prevails. */ 2511 else if (first_edge_seq[i].second != edge_seq[i].second) 2512 { 2513 /* Make sure the ref is marked as not supported. */ 2514 bitmap_set_bit (refs_not_supported, 2515 first_edge_seq[i].first); 2516 first_edge_seq[i].second = sm_other; 2517 first_edge_seq[i].from = NULL_TREE; 2518 } 2519 else if (first_edge_seq[i].second == sm_other 2520 && first_edge_seq[i].from != NULL_TREE 2521 && (edge_seq[i].from == NULL_TREE 2522 || !operand_equal_p (first_edge_seq[i].from, 2523 edge_seq[i].from, 0))) 2524 first_edge_seq[i].from = NULL_TREE; 2525 } 2526 /* Any excess elements become sm_other since they are now 2527 coonditionally executed. */ 2528 if (first_edge_seq.length () > edge_seq.length ()) 2529 { 2530 for (unsigned i = edge_seq.length (); 2531 i < first_edge_seq.length (); ++i) 2532 { 2533 if (first_edge_seq[i].second == sm_ord) 2534 bitmap_set_bit (refs_not_supported, 2535 first_edge_seq[i].first); 2536 first_edge_seq[i].second = sm_other; 2537 } 2538 } 2539 else if (edge_seq.length () > first_edge_seq.length ()) 2540 { 2541 if (first_uneq == -1) 2542 first_uneq = first_edge_seq.length (); 2543 for (unsigned i = first_edge_seq.length (); 2544 i < edge_seq.length (); ++i) 2545 { 2546 if (edge_seq[i].second == sm_ord) 2547 bitmap_set_bit (refs_not_supported, edge_seq[i].first); 2548 extra_refs.safe_push (seq_entry (edge_seq[i].first, 2549 sm_other, NULL_TREE)); 2550 } 2551 } 2552 /* Put unmerged refs at first_uneq to force dependence checking 2553 on them. */ 2554 if (first_uneq != -1) 2555 { 2556 /* Missing ordered_splice_at. */ 2557 if ((unsigned)first_uneq == first_edge_seq.length ()) 2558 first_edge_seq.safe_splice (extra_refs); 2559 else 2560 { 2561 unsigned fes_length = first_edge_seq.length (); 2562 first_edge_seq.safe_grow (fes_length 2563 + extra_refs.length ()); 2564 memmove (&first_edge_seq[first_uneq + extra_refs.length ()], 2565 &first_edge_seq[first_uneq], 2566 (fes_length - first_uneq) * sizeof (seq_entry)); 2567 memcpy (&first_edge_seq[first_uneq], 2568 extra_refs.address (), 2569 extra_refs.length () * sizeof (seq_entry)); 2570 } 2571 } 2572 } 2573 /* Use the sequence from the first edge and push SMs down. */ 2574 for (unsigned i = 0; i < first_edge_seq.length (); ++i) 2575 { 2576 unsigned id = first_edge_seq[i].first; 2577 seq.safe_push (first_edge_seq[i]); 2578 unsigned new_idx; 2579 if ((first_edge_seq[i].second == sm_ord 2580 || (first_edge_seq[i].second == sm_other 2581 && first_edge_seq[i].from != NULL_TREE)) 2582 && !sm_seq_push_down (seq, seq.length () - 1, &new_idx)) 2583 { 2584 if (first_edge_seq[i].second == sm_ord) 2585 bitmap_set_bit (refs_not_supported, id); 2586 /* Mark it sm_other. */ 2587 seq[new_idx].second = sm_other; 2588 seq[new_idx].from = NULL_TREE; 2589 } 2590 } 2591 bitmap_set_bit (fully_visited, 2592 SSA_NAME_VERSION (gimple_phi_result (phi))); 2593 return 1; 2594 } 2595 lim_aux_data *data = get_lim_data (def); 2596 gcc_assert (data); 2597 if (data->ref == UNANALYZABLE_MEM_ID) 2598 return -1; 2599 /* Stop at memory references which we can't move. */ 2600 else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node 2601 || TREE_THIS_VOLATILE 2602 (memory_accesses.refs_list[data->ref]->mem.ref)) 2603 { 2604 /* Mark refs_not_in_seq as unsupported. */ 2605 bitmap_ior_into (refs_not_supported, refs_not_in_seq); 2606 return 1; 2607 } 2608 /* One of the stores we want to apply SM to and we've not yet seen. */ 2609 else if (bitmap_clear_bit (refs_not_in_seq, data->ref)) 2610 { 2611 seq.safe_push (seq_entry (data->ref, sm_ord)); 2612 2613 /* 1) push it down the queue until a SMed 2614 and not ignored ref is reached, skipping all not SMed refs 2615 and ignored refs via non-TBAA disambiguation. */ 2616 unsigned new_idx; 2617 if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx) 2618 /* If that fails but we did not fork yet continue, we'll see 2619 to re-materialize all of the stores in the sequence then. 2620 Further stores will only be pushed up to this one. */ 2621 && forked) 2622 { 2623 bitmap_set_bit (refs_not_supported, data->ref); 2624 /* Mark it sm_other. */ 2625 seq[new_idx].second = sm_other; 2626 } 2627 2628 /* 2) check whether we've seen all refs we want to SM and if so 2629 declare success for the active exit */ 2630 if (bitmap_empty_p (refs_not_in_seq)) 2631 return 1; 2632 } 2633 else 2634 /* Another store not part of the final sequence. Simply push it. */ 2635 seq.safe_push (seq_entry (data->ref, sm_other, 2636 gimple_assign_rhs1 (def))); 2637 2638 vdef = gimple_vuse (def); 2639 } 2640 while (1); 2641 } 2642 2643 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit 2644 edges of the LOOP. */ 2645 2646 static void 2647 hoist_memory_references (class loop *loop, bitmap mem_refs, 2648 const vec<edge> &exits) 2649 { 2650 im_mem_ref *ref; 2651 unsigned i; 2652 bitmap_iterator bi; 2653 2654 /* There's a special case we can use ordered re-materialization for 2655 conditionally excuted stores which is when all stores in the loop 2656 happen in the same basic-block. In that case we know we'll reach 2657 all stores and thus can simply process that BB and emit a single 2658 conditional block of ordered materializations. See PR102436. */ 2659 basic_block single_store_bb = NULL; 2660 EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num], 2661 0, i, bi) 2662 { 2663 bool fail = false; 2664 ref = memory_accesses.refs_list[i]; 2665 for (auto loc : ref->accesses_in_loop) 2666 if (!gimple_vdef (loc.stmt)) 2667 ; 2668 else if (!single_store_bb) 2669 { 2670 single_store_bb = gimple_bb (loc.stmt); 2671 bool conditional = false; 2672 for (edge e : exits) 2673 if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb)) 2674 { 2675 /* Conditional as seen from e. */ 2676 conditional = true; 2677 break; 2678 } 2679 if (!conditional) 2680 { 2681 fail = true; 2682 break; 2683 } 2684 } 2685 else if (single_store_bb != gimple_bb (loc.stmt)) 2686 { 2687 fail = true; 2688 break; 2689 } 2690 if (fail) 2691 { 2692 single_store_bb = NULL; 2693 break; 2694 } 2695 } 2696 if (single_store_bb) 2697 { 2698 /* Analyze the single block with stores. */ 2699 auto_bitmap fully_visited; 2700 auto_bitmap refs_not_supported; 2701 auto_bitmap refs_not_in_seq; 2702 auto_vec<seq_entry> seq; 2703 bitmap_copy (refs_not_in_seq, mem_refs); 2704 int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE, 2705 seq, refs_not_in_seq, refs_not_supported, 2706 false, fully_visited); 2707 if (res != 1) 2708 { 2709 /* Unhandled refs can still fail this. */ 2710 bitmap_clear (mem_refs); 2711 return; 2712 } 2713 2714 /* We cannot handle sm_other since we neither remember the 2715 stored location nor the value at the point we execute them. */ 2716 for (unsigned i = 0; i < seq.length (); ++i) 2717 { 2718 unsigned new_i; 2719 if (seq[i].second == sm_other 2720 && seq[i].from != NULL_TREE) 2721 seq[i].from = NULL_TREE; 2722 else if ((seq[i].second == sm_ord 2723 || (seq[i].second == sm_other 2724 && seq[i].from != NULL_TREE)) 2725 && !sm_seq_push_down (seq, i, &new_i)) 2726 { 2727 bitmap_set_bit (refs_not_supported, seq[new_i].first); 2728 seq[new_i].second = sm_other; 2729 seq[new_i].from = NULL_TREE; 2730 } 2731 } 2732 bitmap_and_compl_into (mem_refs, refs_not_supported); 2733 if (bitmap_empty_p (mem_refs)) 2734 return; 2735 2736 /* Prune seq. */ 2737 while (seq.last ().second == sm_other 2738 && seq.last ().from == NULL_TREE) 2739 seq.pop (); 2740 2741 hash_map<im_mem_ref *, sm_aux *> aux_map; 2742 2743 /* Execute SM but delay the store materialization for ordered 2744 sequences on exit. */ 2745 bool first_p = true; 2746 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) 2747 { 2748 ref = memory_accesses.refs_list[i]; 2749 execute_sm (loop, ref, aux_map, true, !first_p); 2750 first_p = false; 2751 } 2752 2753 /* Get at the single flag variable we eventually produced. */ 2754 im_mem_ref *ref 2755 = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)]; 2756 sm_aux *aux = *aux_map.get (ref); 2757 2758 /* Materialize ordered store sequences on exits. */ 2759 edge e; 2760 FOR_EACH_VEC_ELT (exits, i, e) 2761 { 2762 edge append_cond_position = NULL; 2763 edge last_cond_fallthru = NULL; 2764 edge insert_e = e; 2765 /* Construct the single flag variable control flow and insert 2766 the ordered seq of stores in the then block. With 2767 -fstore-data-races we can do the stores unconditionally. */ 2768 if (aux->store_flag) 2769 insert_e 2770 = single_pred_edge 2771 (execute_sm_if_changed (e, NULL_TREE, NULL_TREE, 2772 aux->store_flag, 2773 loop_preheader_edge (loop), 2774 &aux->flag_bbs, append_cond_position, 2775 last_cond_fallthru)); 2776 execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord, 2777 append_cond_position, last_cond_fallthru); 2778 gsi_commit_one_edge_insert (insert_e, NULL); 2779 } 2780 2781 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin (); 2782 iter != aux_map.end (); ++iter) 2783 delete (*iter).second; 2784 2785 return; 2786 } 2787 2788 /* To address PR57359 before actually applying store-motion check 2789 the candidates found for validity with regards to reordering 2790 relative to other stores which we until here disambiguated using 2791 TBAA which isn't valid. 2792 What matters is the order of the last stores to the mem_refs 2793 with respect to the other stores of the loop at the point of the 2794 loop exits. */ 2795 2796 /* For each exit compute the store order, pruning from mem_refs 2797 on the fly. */ 2798 /* The complexity of this is at least 2799 O(number of exits * number of SM refs) but more approaching 2800 O(number of exits * number of SM refs * number of stores). */ 2801 /* ??? Somehow do this in a single sweep over the loop body. */ 2802 auto_vec<std::pair<edge, vec<seq_entry> > > sms; 2803 auto_bitmap refs_not_supported (&lim_bitmap_obstack); 2804 edge e; 2805 FOR_EACH_VEC_ELT (exits, i, e) 2806 { 2807 vec<seq_entry> seq; 2808 seq.create (4); 2809 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack); 2810 bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported); 2811 if (bitmap_empty_p (refs_not_in_seq)) 2812 { 2813 seq.release (); 2814 break; 2815 } 2816 auto_bitmap fully_visited; 2817 int res = sm_seq_valid_bb (loop, e->src, NULL_TREE, 2818 seq, refs_not_in_seq, 2819 refs_not_supported, false, 2820 fully_visited); 2821 if (res != 1) 2822 { 2823 bitmap_copy (refs_not_supported, mem_refs); 2824 seq.release (); 2825 break; 2826 } 2827 sms.safe_push (std::make_pair (e, seq)); 2828 } 2829 2830 /* Prune pruned mem_refs from earlier processed exits. */ 2831 bool changed = !bitmap_empty_p (refs_not_supported); 2832 while (changed) 2833 { 2834 changed = false; 2835 std::pair<edge, vec<seq_entry> > *seq; 2836 FOR_EACH_VEC_ELT (sms, i, seq) 2837 { 2838 bool need_to_push = false; 2839 for (unsigned i = 0; i < seq->second.length (); ++i) 2840 { 2841 sm_kind kind = seq->second[i].second; 2842 if (kind == sm_other && seq->second[i].from == NULL_TREE) 2843 break; 2844 unsigned id = seq->second[i].first; 2845 unsigned new_idx; 2846 if (kind == sm_ord 2847 && bitmap_bit_p (refs_not_supported, id)) 2848 { 2849 seq->second[i].second = sm_other; 2850 gcc_assert (seq->second[i].from == NULL_TREE); 2851 need_to_push = true; 2852 } 2853 else if (need_to_push 2854 && !sm_seq_push_down (seq->second, i, &new_idx)) 2855 { 2856 /* We need to push down both sm_ord and sm_other 2857 but for the latter we need to disqualify all 2858 following refs. */ 2859 if (kind == sm_ord) 2860 { 2861 if (bitmap_set_bit (refs_not_supported, id)) 2862 changed = true; 2863 seq->second[new_idx].second = sm_other; 2864 } 2865 else 2866 { 2867 for (unsigned j = seq->second.length () - 1; 2868 j > new_idx; --j) 2869 if (seq->second[j].second == sm_ord 2870 && bitmap_set_bit (refs_not_supported, 2871 seq->second[j].first)) 2872 changed = true; 2873 seq->second.truncate (new_idx); 2874 break; 2875 } 2876 } 2877 } 2878 } 2879 } 2880 std::pair<edge, vec<seq_entry> > *seq; 2881 FOR_EACH_VEC_ELT (sms, i, seq) 2882 { 2883 /* Prune sm_other from the end. */ 2884 while (!seq->second.is_empty () 2885 && seq->second.last ().second == sm_other) 2886 seq->second.pop (); 2887 /* Prune duplicates from the start. */ 2888 auto_bitmap seen (&lim_bitmap_obstack); 2889 unsigned j, k; 2890 for (j = k = 0; j < seq->second.length (); ++j) 2891 if (bitmap_set_bit (seen, seq->second[j].first)) 2892 { 2893 if (k != j) 2894 seq->second[k] = seq->second[j]; 2895 ++k; 2896 } 2897 seq->second.truncate (k); 2898 /* And verify. */ 2899 seq_entry *e; 2900 FOR_EACH_VEC_ELT (seq->second, j, e) 2901 gcc_assert (e->second == sm_ord 2902 || (e->second == sm_other && e->from != NULL_TREE)); 2903 } 2904 2905 /* Verify dependence for refs we cannot handle with the order preserving 2906 code (refs_not_supported) or prune them from mem_refs. */ 2907 auto_vec<seq_entry> unord_refs; 2908 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi) 2909 { 2910 ref = memory_accesses.refs_list[i]; 2911 if (!ref_indep_loop_p (loop, ref, sm_waw)) 2912 bitmap_clear_bit (mem_refs, i); 2913 /* We've now verified store order for ref with respect to all other 2914 stores in the loop does not matter. */ 2915 else 2916 unord_refs.safe_push (seq_entry (i, sm_unord)); 2917 } 2918 2919 hash_map<im_mem_ref *, sm_aux *> aux_map; 2920 2921 /* Execute SM but delay the store materialization for ordered 2922 sequences on exit. */ 2923 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) 2924 { 2925 ref = memory_accesses.refs_list[i]; 2926 execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i), 2927 false); 2928 } 2929 2930 /* Materialize ordered store sequences on exits. */ 2931 FOR_EACH_VEC_ELT (exits, i, e) 2932 { 2933 edge append_cond_position = NULL; 2934 edge last_cond_fallthru = NULL; 2935 if (i < sms.length ()) 2936 { 2937 gcc_assert (sms[i].first == e); 2938 execute_sm_exit (loop, e, sms[i].second, aux_map, sm_ord, 2939 append_cond_position, last_cond_fallthru); 2940 sms[i].second.release (); 2941 } 2942 if (!unord_refs.is_empty ()) 2943 execute_sm_exit (loop, e, unord_refs, aux_map, sm_unord, 2944 append_cond_position, last_cond_fallthru); 2945 /* Commit edge inserts here to preserve the order of stores 2946 when an exit exits multiple loops. */ 2947 gsi_commit_one_edge_insert (e, NULL); 2948 } 2949 2950 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin (); 2951 iter != aux_map.end (); ++iter) 2952 delete (*iter).second; 2953 } 2954 2955 class ref_always_accessed 2956 { 2957 public: 2958 ref_always_accessed (class loop *loop_, bool stored_p_) 2959 : loop (loop_), stored_p (stored_p_) {} 2960 bool operator () (mem_ref_loc *loc); 2961 class loop *loop; 2962 bool stored_p; 2963 }; 2964 2965 bool 2966 ref_always_accessed::operator () (mem_ref_loc *loc) 2967 { 2968 class loop *must_exec; 2969 2970 struct lim_aux_data *lim_data = get_lim_data (loc->stmt); 2971 if (!lim_data) 2972 return false; 2973 2974 /* If we require an always executed store make sure the statement 2975 is a store. */ 2976 if (stored_p) 2977 { 2978 tree lhs = gimple_get_lhs (loc->stmt); 2979 if (!lhs 2980 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs))) 2981 return false; 2982 } 2983 2984 must_exec = lim_data->always_executed_in; 2985 if (!must_exec) 2986 return false; 2987 2988 if (must_exec == loop 2989 || flow_loop_nested_p (must_exec, loop)) 2990 return true; 2991 2992 return false; 2993 } 2994 2995 /* Returns true if REF is always accessed in LOOP. If STORED_P is true 2996 make sure REF is always stored to in LOOP. */ 2997 2998 static bool 2999 ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p) 3000 { 3001 return for_all_locs_in_loop (loop, ref, 3002 ref_always_accessed (loop, stored_p)); 3003 } 3004 3005 /* Returns true if REF1 and REF2 are independent. */ 3006 3007 static bool 3008 refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p) 3009 { 3010 if (ref1 == ref2) 3011 return true; 3012 3013 if (dump_file && (dump_flags & TDF_DETAILS)) 3014 fprintf (dump_file, "Querying dependency of refs %u and %u: ", 3015 ref1->id, ref2->id); 3016 3017 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache, tbaa_p)) 3018 { 3019 if (dump_file && (dump_flags & TDF_DETAILS)) 3020 fprintf (dump_file, "dependent.\n"); 3021 return false; 3022 } 3023 else 3024 { 3025 if (dump_file && (dump_flags & TDF_DETAILS)) 3026 fprintf (dump_file, "independent.\n"); 3027 return true; 3028 } 3029 } 3030 3031 /* Returns true if REF is independent on all other accessess in LOOP. 3032 KIND specifies the kind of dependence to consider. 3033 lim_raw assumes REF is not stored in LOOP and disambiguates RAW 3034 dependences so if true REF can be hoisted out of LOOP 3035 sm_war disambiguates a store REF against all other loads to see 3036 whether the store can be sunk across loads out of LOOP 3037 sm_waw disambiguates a store REF against all other stores to see 3038 whether the store can be sunk across stores out of LOOP. */ 3039 3040 static bool 3041 ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) 3042 { 3043 bool indep_p = true; 3044 bitmap refs_to_check; 3045 3046 if (kind == sm_war) 3047 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num]; 3048 else 3049 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num]; 3050 3051 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID) 3052 || ref->mem.ref == error_mark_node) 3053 indep_p = false; 3054 else 3055 { 3056 /* tri-state, { unknown, independent, dependent } */ 3057 dep_state state = query_loop_dependence (loop, ref, kind); 3058 if (state != dep_unknown) 3059 return state == dep_independent ? true : false; 3060 3061 class loop *inner = loop->inner; 3062 while (inner) 3063 { 3064 if (!ref_indep_loop_p (inner, ref, kind)) 3065 { 3066 indep_p = false; 3067 break; 3068 } 3069 inner = inner->next; 3070 } 3071 3072 if (indep_p) 3073 { 3074 unsigned i; 3075 bitmap_iterator bi; 3076 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) 3077 { 3078 im_mem_ref *aref = memory_accesses.refs_list[i]; 3079 if (aref->mem.ref == error_mark_node) 3080 { 3081 gimple *stmt = aref->accesses_in_loop[0].stmt; 3082 if ((kind == sm_war 3083 && ref_maybe_used_by_stmt_p (stmt, &ref->mem, 3084 kind != sm_waw)) 3085 || stmt_may_clobber_ref_p_1 (stmt, &ref->mem, 3086 kind != sm_waw)) 3087 { 3088 indep_p = false; 3089 break; 3090 } 3091 } 3092 else if (!refs_independent_p (ref, aref, kind != sm_waw)) 3093 { 3094 indep_p = false; 3095 break; 3096 } 3097 } 3098 } 3099 } 3100 3101 if (dump_file && (dump_flags & TDF_DETAILS)) 3102 fprintf (dump_file, "Querying %s dependencies of ref %u in loop %d: %s\n", 3103 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"), 3104 ref->id, loop->num, indep_p ? "independent" : "dependent"); 3105 3106 /* Record the computed result in the cache. */ 3107 record_loop_dependence (loop, ref, kind, 3108 indep_p ? dep_independent : dep_dependent); 3109 3110 return indep_p; 3111 } 3112 3113 class ref_in_loop_hot_body 3114 { 3115 public: 3116 ref_in_loop_hot_body (class loop *loop_) : l (loop_) {} 3117 bool operator () (mem_ref_loc *loc); 3118 class loop *l; 3119 }; 3120 3121 /* Check the coldest loop between loop L and innermost loop. If there is one 3122 cold loop between L and INNER_LOOP, store motion can be performed, otherwise 3123 no cold loop means no store motion. get_coldest_out_loop also handles cases 3124 when l is inner_loop. */ 3125 bool 3126 ref_in_loop_hot_body::operator () (mem_ref_loc *loc) 3127 { 3128 basic_block curr_bb = gimple_bb (loc->stmt); 3129 class loop *inner_loop = curr_bb->loop_father; 3130 return get_coldest_out_loop (l, inner_loop, curr_bb); 3131 } 3132 3133 3134 /* Returns true if we can perform store motion of REF from LOOP. */ 3135 3136 static bool 3137 can_sm_ref_p (class loop *loop, im_mem_ref *ref) 3138 { 3139 tree base; 3140 3141 /* Can't hoist unanalyzable refs. */ 3142 if (!MEM_ANALYZABLE (ref)) 3143 return false; 3144 3145 /* Can't hoist/sink aggregate copies. */ 3146 if (ref->mem.ref == error_mark_node) 3147 return false; 3148 3149 /* It should be movable. */ 3150 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref)) 3151 || TREE_THIS_VOLATILE (ref->mem.ref) 3152 || !for_each_index (&ref->mem.ref, may_move_till, loop)) 3153 return false; 3154 3155 /* If it can throw fail, we do not properly update EH info. */ 3156 if (tree_could_throw_p (ref->mem.ref)) 3157 return false; 3158 3159 /* If it can trap, it must be always executed in LOOP. 3160 Readonly memory locations may trap when storing to them, but 3161 tree_could_trap_p is a predicate for rvalues, so check that 3162 explicitly. */ 3163 base = get_base_address (ref->mem.ref); 3164 if ((tree_could_trap_p (ref->mem.ref) 3165 || (DECL_P (base) && TREE_READONLY (base))) 3166 /* ??? We can at least use false here, allowing loads? We 3167 are forcing conditional stores if the ref is not always 3168 stored to later anyway. So this would only guard 3169 the load we need to emit. Thus when the ref is not 3170 loaded we can elide this completely? */ 3171 && !ref_always_accessed_p (loop, ref, true)) 3172 return false; 3173 3174 /* Verify all loads of ref can be hoisted. */ 3175 if (ref->loaded 3176 && bitmap_bit_p (ref->loaded, loop->num) 3177 && !ref_indep_loop_p (loop, ref, lim_raw)) 3178 return false; 3179 3180 /* Verify the candidate can be disambiguated against all loads, 3181 that is, we can elide all in-loop stores. Disambiguation 3182 against stores is done later when we cannot guarantee preserving 3183 the order of stores. */ 3184 if (!ref_indep_loop_p (loop, ref, sm_war)) 3185 return false; 3186 3187 /* Verify whether the candidate is hot for LOOP. Only do store motion if the 3188 candidate's profile count is hot. Statement in cold BB shouldn't be moved 3189 out of it's loop_father. */ 3190 if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop))) 3191 return false; 3192 3193 return true; 3194 } 3195 3196 /* Marks the references in LOOP for that store motion should be performed 3197 in REFS_TO_SM. SM_EXECUTED is the set of references for that store 3198 motion was performed in one of the outer loops. */ 3199 3200 static void 3201 find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm) 3202 { 3203 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num]; 3204 unsigned i; 3205 bitmap_iterator bi; 3206 im_mem_ref *ref; 3207 3208 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi) 3209 { 3210 ref = memory_accesses.refs_list[i]; 3211 if (can_sm_ref_p (loop, ref) && dbg_cnt (lim)) 3212 bitmap_set_bit (refs_to_sm, i); 3213 } 3214 } 3215 3216 /* Checks whether LOOP (with exits stored in EXITS array) is suitable 3217 for a store motion optimization (i.e. whether we can insert statement 3218 on its exits). */ 3219 3220 static bool 3221 loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED, 3222 const vec<edge> &exits) 3223 { 3224 unsigned i; 3225 edge ex; 3226 3227 FOR_EACH_VEC_ELT (exits, i, ex) 3228 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH)) 3229 return false; 3230 3231 return true; 3232 } 3233 3234 /* Try to perform store motion for all memory references modified inside 3235 LOOP. SM_EXECUTED is the bitmap of the memory references for that 3236 store motion was executed in one of the outer loops. */ 3237 3238 static void 3239 store_motion_loop (class loop *loop, bitmap sm_executed) 3240 { 3241 auto_vec<edge> exits = get_loop_exit_edges (loop); 3242 class loop *subloop; 3243 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack); 3244 3245 if (loop_suitable_for_sm (loop, exits)) 3246 { 3247 find_refs_for_sm (loop, sm_executed, sm_in_loop); 3248 if (!bitmap_empty_p (sm_in_loop)) 3249 hoist_memory_references (loop, sm_in_loop, exits); 3250 } 3251 3252 bitmap_ior_into (sm_executed, sm_in_loop); 3253 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next) 3254 store_motion_loop (subloop, sm_executed); 3255 bitmap_and_compl_into (sm_executed, sm_in_loop); 3256 BITMAP_FREE (sm_in_loop); 3257 } 3258 3259 /* Try to perform store motion for all memory references modified inside 3260 loops. */ 3261 3262 static void 3263 do_store_motion (void) 3264 { 3265 class loop *loop; 3266 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack); 3267 3268 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) 3269 store_motion_loop (loop, sm_executed); 3270 3271 BITMAP_FREE (sm_executed); 3272 } 3273 3274 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e. 3275 for each such basic block bb records the outermost loop for that execution 3276 of its header implies execution of bb. CONTAINS_CALL is the bitmap of 3277 blocks that contain a nonpure call. */ 3278 3279 static void 3280 fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) 3281 { 3282 basic_block bb = NULL, last = NULL; 3283 edge e; 3284 class loop *inn_loop = loop; 3285 3286 if (ALWAYS_EXECUTED_IN (loop->header) == NULL) 3287 { 3288 auto_vec<basic_block, 64> worklist; 3289 worklist.reserve_exact (loop->num_nodes); 3290 worklist.quick_push (loop->header); 3291 do 3292 { 3293 edge_iterator ei; 3294 bb = worklist.pop (); 3295 3296 if (!flow_bb_inside_loop_p (inn_loop, bb)) 3297 { 3298 /* When we are leaving a possibly infinite inner loop 3299 we have to stop processing. */ 3300 if (!finite_loop_p (inn_loop)) 3301 break; 3302 /* If the loop was finite we can continue with processing 3303 the loop we exited to. */ 3304 inn_loop = bb->loop_father; 3305 } 3306 3307 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) 3308 last = bb; 3309 3310 if (bitmap_bit_p (contains_call, bb->index)) 3311 break; 3312 3313 /* If LOOP exits from this BB stop processing. */ 3314 FOR_EACH_EDGE (e, ei, bb->succs) 3315 if (!flow_bb_inside_loop_p (loop, e->dest)) 3316 break; 3317 if (e) 3318 break; 3319 3320 /* A loop might be infinite (TODO use simple loop analysis 3321 to disprove this if possible). */ 3322 if (bb->flags & BB_IRREDUCIBLE_LOOP) 3323 break; 3324 3325 if (bb->loop_father->header == bb) 3326 /* Record that we enter into a subloop since it might not 3327 be finite. */ 3328 /* ??? Entering into a not always executed subloop makes 3329 fill_always_executed_in quadratic in loop depth since 3330 we walk those loops N times. This is not a problem 3331 in practice though, see PR102253 for a worst-case testcase. */ 3332 inn_loop = bb->loop_father; 3333 3334 /* Walk the body of LOOP sorted by dominance relation. Additionally, 3335 if a basic block S dominates the latch, then only blocks dominated 3336 by S are after it. 3337 This is get_loop_body_in_dom_order using a worklist algorithm and 3338 stopping once we are no longer interested in visiting further 3339 blocks. */ 3340 unsigned old_len = worklist.length (); 3341 unsigned postpone = 0; 3342 for (basic_block son = first_dom_son (CDI_DOMINATORS, bb); 3343 son; 3344 son = next_dom_son (CDI_DOMINATORS, son)) 3345 { 3346 if (!flow_bb_inside_loop_p (loop, son)) 3347 continue; 3348 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son)) 3349 postpone = worklist.length (); 3350 worklist.quick_push (son); 3351 } 3352 if (postpone) 3353 /* Postponing the block that dominates the latch means 3354 processing it last and thus putting it earliest in the 3355 worklist. */ 3356 std::swap (worklist[old_len], worklist[postpone]); 3357 } 3358 while (!worklist.is_empty ()); 3359 3360 while (1) 3361 { 3362 if (dump_enabled_p ()) 3363 dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n", 3364 last->index, loop->num); 3365 SET_ALWAYS_EXECUTED_IN (last, loop); 3366 if (last == loop->header) 3367 break; 3368 last = get_immediate_dominator (CDI_DOMINATORS, last); 3369 } 3370 } 3371 3372 for (loop = loop->inner; loop; loop = loop->next) 3373 fill_always_executed_in_1 (loop, contains_call); 3374 } 3375 3376 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e. 3377 for each such basic block bb records the outermost loop for that execution 3378 of its header implies execution of bb. */ 3379 3380 static void 3381 fill_always_executed_in (void) 3382 { 3383 basic_block bb; 3384 class loop *loop; 3385 3386 auto_sbitmap contains_call (last_basic_block_for_fn (cfun)); 3387 bitmap_clear (contains_call); 3388 FOR_EACH_BB_FN (bb, cfun) 3389 { 3390 gimple_stmt_iterator gsi; 3391 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 3392 { 3393 if (nonpure_call_p (gsi_stmt (gsi))) 3394 break; 3395 } 3396 3397 if (!gsi_end_p (gsi)) 3398 bitmap_set_bit (contains_call, bb->index); 3399 } 3400 3401 for (loop = current_loops->tree_root->inner; loop; loop = loop->next) 3402 fill_always_executed_in_1 (loop, contains_call); 3403 } 3404 3405 /* Find the coldest loop preheader for LOOP, also find the nearest hotter loop 3406 to LOOP. Then recursively iterate each inner loop. */ 3407 3408 void 3409 fill_coldest_and_hotter_out_loop (class loop *coldest_loop, 3410 class loop *hotter_loop, class loop *loop) 3411 { 3412 if (bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src, 3413 coldest_loop)) 3414 coldest_loop = loop; 3415 3416 coldest_outermost_loop[loop->num] = coldest_loop; 3417 3418 hotter_than_inner_loop[loop->num] = NULL; 3419 class loop *outer_loop = loop_outer (loop); 3420 if (hotter_loop 3421 && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src, 3422 hotter_loop)) 3423 hotter_than_inner_loop[loop->num] = hotter_loop; 3424 3425 if (outer_loop && outer_loop != current_loops->tree_root 3426 && bb_colder_than_loop_preheader (loop_preheader_edge (loop)->src, 3427 outer_loop)) 3428 hotter_than_inner_loop[loop->num] = outer_loop; 3429 3430 if (dump_enabled_p ()) 3431 { 3432 dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ", 3433 loop->num, coldest_loop->num); 3434 if (hotter_than_inner_loop[loop->num]) 3435 dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n", 3436 hotter_than_inner_loop[loop->num]->num); 3437 else 3438 dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n"); 3439 } 3440 3441 class loop *inner_loop; 3442 for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next) 3443 fill_coldest_and_hotter_out_loop (coldest_loop, 3444 hotter_than_inner_loop[loop->num], 3445 inner_loop); 3446 } 3447 3448 /* Compute the global information needed by the loop invariant motion pass. */ 3449 3450 static void 3451 tree_ssa_lim_initialize (bool store_motion) 3452 { 3453 unsigned i; 3454 3455 bitmap_obstack_initialize (&lim_bitmap_obstack); 3456 gcc_obstack_init (&mem_ref_obstack); 3457 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>; 3458 3459 if (flag_tm) 3460 compute_transaction_bits (); 3461 3462 memory_accesses.refs = new hash_table<mem_ref_hasher> (100); 3463 memory_accesses.refs_list.create (100); 3464 /* Allocate a special, unanalyzable mem-ref with ID zero. */ 3465 memory_accesses.refs_list.quick_push 3466 (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID)); 3467 3468 memory_accesses.refs_loaded_in_loop.create (number_of_loops (cfun)); 3469 memory_accesses.refs_loaded_in_loop.quick_grow (number_of_loops (cfun)); 3470 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun)); 3471 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun)); 3472 if (store_motion) 3473 { 3474 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun)); 3475 memory_accesses.all_refs_stored_in_loop.quick_grow 3476 (number_of_loops (cfun)); 3477 } 3478 3479 for (i = 0; i < number_of_loops (cfun); i++) 3480 { 3481 bitmap_initialize (&memory_accesses.refs_loaded_in_loop[i], 3482 &lim_bitmap_obstack); 3483 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i], 3484 &lim_bitmap_obstack); 3485 if (store_motion) 3486 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i], 3487 &lim_bitmap_obstack); 3488 } 3489 3490 memory_accesses.ttae_cache = NULL; 3491 3492 /* Initialize bb_loop_postorder with a mapping from loop->num to 3493 its postorder index. */ 3494 i = 0; 3495 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun)); 3496 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) 3497 bb_loop_postorder[loop->num] = i++; 3498 } 3499 3500 /* Cleans up after the invariant motion pass. */ 3501 3502 static void 3503 tree_ssa_lim_finalize (void) 3504 { 3505 basic_block bb; 3506 unsigned i; 3507 im_mem_ref *ref; 3508 3509 FOR_EACH_BB_FN (bb, cfun) 3510 SET_ALWAYS_EXECUTED_IN (bb, NULL); 3511 3512 bitmap_obstack_release (&lim_bitmap_obstack); 3513 delete lim_aux_data_map; 3514 3515 delete memory_accesses.refs; 3516 memory_accesses.refs = NULL; 3517 3518 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref) 3519 memref_free (ref); 3520 memory_accesses.refs_list.release (); 3521 obstack_free (&mem_ref_obstack, NULL); 3522 3523 memory_accesses.refs_loaded_in_loop.release (); 3524 memory_accesses.refs_stored_in_loop.release (); 3525 memory_accesses.all_refs_stored_in_loop.release (); 3526 3527 if (memory_accesses.ttae_cache) 3528 free_affine_expand_cache (&memory_accesses.ttae_cache); 3529 3530 free (bb_loop_postorder); 3531 3532 coldest_outermost_loop.release (); 3533 hotter_than_inner_loop.release (); 3534 } 3535 3536 /* Moves invariants from loops. Only "expensive" invariants are moved out -- 3537 i.e. those that are likely to be win regardless of the register pressure. 3538 Only perform store motion if STORE_MOTION is true. */ 3539 3540 unsigned int 3541 loop_invariant_motion_in_fun (function *fun, bool store_motion) 3542 { 3543 unsigned int todo = 0; 3544 3545 tree_ssa_lim_initialize (store_motion); 3546 3547 mark_ssa_maybe_undefs (); 3548 3549 /* Gathers information about memory accesses in the loops. */ 3550 analyze_memory_references (store_motion); 3551 3552 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ 3553 fill_always_executed_in (); 3554 3555 /* Pre-compute coldest outermost loop and nearest hotter loop of each loop. 3556 */ 3557 class loop *loop; 3558 coldest_outermost_loop.create (number_of_loops (cfun)); 3559 coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun)); 3560 hotter_than_inner_loop.create (number_of_loops (cfun)); 3561 hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun)); 3562 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) 3563 fill_coldest_and_hotter_out_loop (loop, NULL, loop); 3564 3565 int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); 3566 int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); 3567 3568 /* For each statement determine the outermost loop in that it is 3569 invariant and cost for computing the invariant. */ 3570 for (int i = 0; i < n; ++i) 3571 compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i])); 3572 3573 /* Execute store motion. Force the necessary invariants to be moved 3574 out of the loops as well. */ 3575 if (store_motion) 3576 do_store_motion (); 3577 3578 free (rpo); 3579 rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); 3580 n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); 3581 3582 /* Move the expressions that are expensive enough. */ 3583 for (int i = 0; i < n; ++i) 3584 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i])); 3585 3586 free (rpo); 3587 3588 gsi_commit_edge_inserts (); 3589 if (need_ssa_update_p (fun)) 3590 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); 3591 3592 tree_ssa_lim_finalize (); 3593 3594 return todo; 3595 } 3596 3597 /* Loop invariant motion pass. */ 3598 3599 namespace { 3600 3601 const pass_data pass_data_lim = 3602 { 3603 GIMPLE_PASS, /* type */ 3604 "lim", /* name */ 3605 OPTGROUP_LOOP, /* optinfo_flags */ 3606 TV_LIM, /* tv_id */ 3607 PROP_cfg, /* properties_required */ 3608 0, /* properties_provided */ 3609 0, /* properties_destroyed */ 3610 0, /* todo_flags_start */ 3611 0, /* todo_flags_finish */ 3612 }; 3613 3614 class pass_lim : public gimple_opt_pass 3615 { 3616 public: 3617 pass_lim (gcc::context *ctxt) 3618 : gimple_opt_pass (pass_data_lim, ctxt) 3619 {} 3620 3621 /* opt_pass methods: */ 3622 opt_pass * clone () { return new pass_lim (m_ctxt); } 3623 virtual bool gate (function *) { return flag_tree_loop_im != 0; } 3624 virtual unsigned int execute (function *); 3625 3626 }; // class pass_lim 3627 3628 unsigned int 3629 pass_lim::execute (function *fun) 3630 { 3631 bool in_loop_pipeline = scev_initialized_p (); 3632 if (!in_loop_pipeline) 3633 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); 3634 3635 if (number_of_loops (fun) <= 1) 3636 return 0; 3637 unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores); 3638 3639 if (!in_loop_pipeline) 3640 loop_optimizer_finalize (); 3641 else 3642 scev_reset (); 3643 return todo; 3644 } 3645 3646 } // anon namespace 3647 3648 gimple_opt_pass * 3649 make_pass_lim (gcc::context *ctxt) 3650 { 3651 return new pass_lim (ctxt); 3652 } 3653 3654 3655