1 /* Induction variable optimizations. 2 Copyright (C) 2003-2020 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* This pass tries to find the optimal set of induction variables for the loop. 21 It optimizes just the basic linear induction variables (although adding 22 support for other types should not be too hard). It includes the 23 optimizations commonly known as strength reduction, induction variable 24 coalescing and induction variable elimination. It does it in the 25 following steps: 26 27 1) The interesting uses of induction variables are found. This includes 28 29 -- uses of induction variables in non-linear expressions 30 -- addresses of arrays 31 -- comparisons of induction variables 32 33 Note the interesting uses are categorized and handled in group. 34 Generally, address type uses are grouped together if their iv bases 35 are different in constant offset. 36 37 2) Candidates for the induction variables are found. This includes 38 39 -- old induction variables 40 -- the variables defined by expressions derived from the "interesting 41 groups/uses" above 42 43 3) The optimal (w.r. to a cost function) set of variables is chosen. The 44 cost function assigns a cost to sets of induction variables and consists 45 of three parts: 46 47 -- The group/use costs. Each of the interesting groups/uses chooses 48 the best induction variable in the set and adds its cost to the sum. 49 The cost reflects the time spent on modifying the induction variables 50 value to be usable for the given purpose (adding base and offset for 51 arrays, etc.). 52 -- The variable costs. Each of the variables has a cost assigned that 53 reflects the costs associated with incrementing the value of the 54 variable. The original variables are somewhat preferred. 55 -- The set cost. Depending on the size of the set, extra cost may be 56 added to reflect register pressure. 57 58 All the costs are defined in a machine-specific way, using the target 59 hooks and machine descriptions to determine them. 60 61 4) The trees are transformed to use the new variables, the dead code is 62 removed. 63 64 All of this is done loop by loop. Doing it globally is theoretically 65 possible, it might give a better performance and it might enable us 66 to decide costs more precisely, but getting all the interactions right 67 would be complicated. 68 69 For the targets supporting low-overhead loops, IVOPTs has to take care of 70 the loops which will probably be transformed in RTL doloop optimization, 71 to try to make selected IV candidate set optimal. The process of doloop 72 support includes: 73 74 1) Analyze the current loop will be transformed to doloop or not, find and 75 mark its compare type IV use as doloop use (iv_group field doloop_p), and 76 set flag doloop_use_p of ivopts_data to notify subsequent processings on 77 doloop. See analyze_and_mark_doloop_use and its callees for the details. 78 The target hook predict_doloop_p can be used for target specific checks. 79 80 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1}, 81 set flag doloop_p of iv_cand, step cost is set as zero and no extra cost 82 like biv. For cost determination between doloop IV cand and IV use, the 83 target hooks doloop_cost_for_generic and doloop_cost_for_address are 84 provided to add on extra costs for generic type and address type IV use. 85 Zero cost is assigned to the pair between doloop IV cand and doloop IV 86 use, and bound zero is set for IV elimination. 87 88 3) With the cost setting in step 2), the current cost model based IV 89 selection algorithm will process as usual, pick up doloop dedicated IV if 90 profitable. */ 91 92 #include "config.h" 93 #include "system.h" 94 #include "coretypes.h" 95 #include "backend.h" 96 #include "rtl.h" 97 #include "tree.h" 98 #include "gimple.h" 99 #include "cfghooks.h" 100 #include "tree-pass.h" 101 #include "memmodel.h" 102 #include "tm_p.h" 103 #include "ssa.h" 104 #include "expmed.h" 105 #include "insn-config.h" 106 #include "emit-rtl.h" 107 #include "recog.h" 108 #include "cgraph.h" 109 #include "gimple-pretty-print.h" 110 #include "alias.h" 111 #include "fold-const.h" 112 #include "stor-layout.h" 113 #include "tree-eh.h" 114 #include "gimplify.h" 115 #include "gimple-iterator.h" 116 #include "gimplify-me.h" 117 #include "tree-cfg.h" 118 #include "tree-ssa-loop-ivopts.h" 119 #include "tree-ssa-loop-manip.h" 120 #include "tree-ssa-loop-niter.h" 121 #include "tree-ssa-loop.h" 122 #include "explow.h" 123 #include "expr.h" 124 #include "tree-dfa.h" 125 #include "tree-ssa.h" 126 #include "cfgloop.h" 127 #include "tree-scalar-evolution.h" 128 #include "tree-affine.h" 129 #include "tree-ssa-propagate.h" 130 #include "tree-ssa-address.h" 131 #include "builtins.h" 132 #include "tree-vectorizer.h" 133 #include "dbgcnt.h" 134 #include "cfganal.h" 135 136 /* For lang_hooks.types.type_for_mode. */ 137 #include "langhooks.h" 138 139 /* FIXME: Expressions are expanded to RTL in this pass to determine the 140 cost of different addressing modes. This should be moved to a TBD 141 interface between the GIMPLE and RTL worlds. */ 142 143 /* The infinite cost. */ 144 #define INFTY 1000000000 145 146 /* Returns the expected number of loop iterations for LOOP. 147 The average trip count is computed from profile data if it 148 exists. */ 149 150 static inline HOST_WIDE_INT 151 avg_loop_niter (class loop *loop) 152 { 153 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); 154 if (niter == -1) 155 { 156 niter = likely_max_stmt_executions_int (loop); 157 158 if (niter == -1 || niter > param_avg_loop_niter) 159 return param_avg_loop_niter; 160 } 161 162 return niter; 163 } 164 165 struct iv_use; 166 167 /* Representation of the induction variable. */ 168 struct iv 169 { 170 tree base; /* Initial value of the iv. */ 171 tree base_object; /* A memory object to that the induction variable points. */ 172 tree step; /* Step of the iv (constant only). */ 173 tree ssa_name; /* The ssa name with the value. */ 174 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */ 175 bool biv_p; /* Is it a biv? */ 176 bool no_overflow; /* True if the iv doesn't overflow. */ 177 bool have_address_use;/* For biv, indicate if it's used in any address 178 type use. */ 179 }; 180 181 /* Per-ssa version information (induction variable descriptions, etc.). */ 182 struct version_info 183 { 184 tree name; /* The ssa name. */ 185 struct iv *iv; /* Induction variable description. */ 186 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 187 an expression that is not an induction variable. */ 188 bool preserve_biv; /* For the original biv, whether to preserve it. */ 189 unsigned inv_id; /* Id of an invariant. */ 190 }; 191 192 /* Types of uses. */ 193 enum use_type 194 { 195 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 196 USE_REF_ADDRESS, /* Use is an address for an explicit memory 197 reference. */ 198 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in 199 cases where the expansion of the function 200 will turn the argument into a normal address. */ 201 USE_COMPARE /* Use is a compare. */ 202 }; 203 204 /* Cost of a computation. */ 205 class comp_cost 206 { 207 public: 208 comp_cost (): cost (0), complexity (0), scratch (0) 209 {} 210 211 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0) 212 : cost (cost), complexity (complexity), scratch (scratch) 213 {} 214 215 /* Returns true if COST is infinite. */ 216 bool infinite_cost_p (); 217 218 /* Adds costs COST1 and COST2. */ 219 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2); 220 221 /* Adds COST to the comp_cost. */ 222 comp_cost operator+= (comp_cost cost); 223 224 /* Adds constant C to this comp_cost. */ 225 comp_cost operator+= (HOST_WIDE_INT c); 226 227 /* Subtracts constant C to this comp_cost. */ 228 comp_cost operator-= (HOST_WIDE_INT c); 229 230 /* Divide the comp_cost by constant C. */ 231 comp_cost operator/= (HOST_WIDE_INT c); 232 233 /* Multiply the comp_cost by constant C. */ 234 comp_cost operator*= (HOST_WIDE_INT c); 235 236 /* Subtracts costs COST1 and COST2. */ 237 friend comp_cost operator- (comp_cost cost1, comp_cost cost2); 238 239 /* Subtracts COST from this comp_cost. */ 240 comp_cost operator-= (comp_cost cost); 241 242 /* Returns true if COST1 is smaller than COST2. */ 243 friend bool operator< (comp_cost cost1, comp_cost cost2); 244 245 /* Returns true if COST1 and COST2 are equal. */ 246 friend bool operator== (comp_cost cost1, comp_cost cost2); 247 248 /* Returns true if COST1 is smaller or equal than COST2. */ 249 friend bool operator<= (comp_cost cost1, comp_cost cost2); 250 251 int64_t cost; /* The runtime cost. */ 252 unsigned complexity; /* The estimate of the complexity of the code for 253 the computation (in no concrete units -- 254 complexity field should be larger for more 255 complex expressions and addressing modes). */ 256 int64_t scratch; /* Scratch used during cost computation. */ 257 }; 258 259 static const comp_cost no_cost; 260 static const comp_cost infinite_cost (INFTY, 0, INFTY); 261 262 bool 263 comp_cost::infinite_cost_p () 264 { 265 return cost == INFTY; 266 } 267 268 comp_cost 269 operator+ (comp_cost cost1, comp_cost cost2) 270 { 271 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ()) 272 return infinite_cost; 273 274 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost); 275 cost1.cost += cost2.cost; 276 cost1.complexity += cost2.complexity; 277 278 return cost1; 279 } 280 281 comp_cost 282 operator- (comp_cost cost1, comp_cost cost2) 283 { 284 if (cost1.infinite_cost_p ()) 285 return infinite_cost; 286 287 gcc_assert (!cost2.infinite_cost_p ()); 288 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost); 289 290 cost1.cost -= cost2.cost; 291 cost1.complexity -= cost2.complexity; 292 293 return cost1; 294 } 295 296 comp_cost 297 comp_cost::operator+= (comp_cost cost) 298 { 299 *this = *this + cost; 300 return *this; 301 } 302 303 comp_cost 304 comp_cost::operator+= (HOST_WIDE_INT c) 305 { 306 if (c >= INFTY) 307 this->cost = INFTY; 308 309 if (infinite_cost_p ()) 310 return *this; 311 312 gcc_assert (this->cost + c < infinite_cost.cost); 313 this->cost += c; 314 315 return *this; 316 } 317 318 comp_cost 319 comp_cost::operator-= (HOST_WIDE_INT c) 320 { 321 if (infinite_cost_p ()) 322 return *this; 323 324 gcc_assert (this->cost - c < infinite_cost.cost); 325 this->cost -= c; 326 327 return *this; 328 } 329 330 comp_cost 331 comp_cost::operator/= (HOST_WIDE_INT c) 332 { 333 gcc_assert (c != 0); 334 if (infinite_cost_p ()) 335 return *this; 336 337 this->cost /= c; 338 339 return *this; 340 } 341 342 comp_cost 343 comp_cost::operator*= (HOST_WIDE_INT c) 344 { 345 if (infinite_cost_p ()) 346 return *this; 347 348 gcc_assert (this->cost * c < infinite_cost.cost); 349 this->cost *= c; 350 351 return *this; 352 } 353 354 comp_cost 355 comp_cost::operator-= (comp_cost cost) 356 { 357 *this = *this - cost; 358 return *this; 359 } 360 361 bool 362 operator< (comp_cost cost1, comp_cost cost2) 363 { 364 if (cost1.cost == cost2.cost) 365 return cost1.complexity < cost2.complexity; 366 367 return cost1.cost < cost2.cost; 368 } 369 370 bool 371 operator== (comp_cost cost1, comp_cost cost2) 372 { 373 return cost1.cost == cost2.cost 374 && cost1.complexity == cost2.complexity; 375 } 376 377 bool 378 operator<= (comp_cost cost1, comp_cost cost2) 379 { 380 return cost1 < cost2 || cost1 == cost2; 381 } 382 383 struct iv_inv_expr_ent; 384 385 /* The candidate - cost pair. */ 386 class cost_pair 387 { 388 public: 389 struct iv_cand *cand; /* The candidate. */ 390 comp_cost cost; /* The cost. */ 391 enum tree_code comp; /* For iv elimination, the comparison. */ 392 bitmap inv_vars; /* The list of invariant ssa_vars that have to be 393 preserved when representing iv_use with iv_cand. */ 394 bitmap inv_exprs; /* The list of newly created invariant expressions 395 when representing iv_use with iv_cand. */ 396 tree value; /* For final value elimination, the expression for 397 the final value of the iv. For iv elimination, 398 the new bound to compare with. */ 399 }; 400 401 /* Use. */ 402 struct iv_use 403 { 404 unsigned id; /* The id of the use. */ 405 unsigned group_id; /* The group id the use belongs to. */ 406 enum use_type type; /* Type of the use. */ 407 tree mem_type; /* The memory type to use when testing whether an 408 address is legitimate, and what the address's 409 cost is. */ 410 struct iv *iv; /* The induction variable it is based on. */ 411 gimple *stmt; /* Statement in that it occurs. */ 412 tree *op_p; /* The place where it occurs. */ 413 414 tree addr_base; /* Base address with const offset stripped. */ 415 poly_uint64_pod addr_offset; 416 /* Const offset stripped from base address. */ 417 }; 418 419 /* Group of uses. */ 420 struct iv_group 421 { 422 /* The id of the group. */ 423 unsigned id; 424 /* Uses of the group are of the same type. */ 425 enum use_type type; 426 /* The set of "related" IV candidates, plus the important ones. */ 427 bitmap related_cands; 428 /* Number of IV candidates in the cost_map. */ 429 unsigned n_map_members; 430 /* The costs wrto the iv candidates. */ 431 class cost_pair *cost_map; 432 /* The selected candidate for the group. */ 433 struct iv_cand *selected; 434 /* To indicate this is a doloop use group. */ 435 bool doloop_p; 436 /* Uses in the group. */ 437 vec<struct iv_use *> vuses; 438 }; 439 440 /* The position where the iv is computed. */ 441 enum iv_position 442 { 443 IP_NORMAL, /* At the end, just before the exit condition. */ 444 IP_END, /* At the end of the latch block. */ 445 IP_BEFORE_USE, /* Immediately before a specific use. */ 446 IP_AFTER_USE, /* Immediately after a specific use. */ 447 IP_ORIGINAL /* The original biv. */ 448 }; 449 450 /* The induction variable candidate. */ 451 struct iv_cand 452 { 453 unsigned id; /* The number of the candidate. */ 454 bool important; /* Whether this is an "important" candidate, i.e. such 455 that it should be considered by all uses. */ 456 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */ 457 gimple *incremented_at;/* For original biv, the statement where it is 458 incremented. */ 459 tree var_before; /* The variable used for it before increment. */ 460 tree var_after; /* The variable used for it after increment. */ 461 struct iv *iv; /* The value of the candidate. NULL for 462 "pseudocandidate" used to indicate the possibility 463 to replace the final value of an iv by direct 464 computation of the value. */ 465 unsigned cost; /* Cost of the candidate. */ 466 unsigned cost_step; /* Cost of the candidate's increment operation. */ 467 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place 468 where it is incremented. */ 469 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the 470 iv_cand. */ 471 bitmap inv_exprs; /* If step is more complicated than a single ssa_var, 472 hanlde it as a new invariant expression which will 473 be hoisted out of loop. */ 474 struct iv *orig_iv; /* The original iv if this cand is added from biv with 475 smaller type. */ 476 bool doloop_p; /* Whether this is a doloop candidate. */ 477 }; 478 479 /* Hashtable entry for common candidate derived from iv uses. */ 480 class iv_common_cand 481 { 482 public: 483 tree base; 484 tree step; 485 /* IV uses from which this common candidate is derived. */ 486 auto_vec<struct iv_use *> uses; 487 hashval_t hash; 488 }; 489 490 /* Hashtable helpers. */ 491 492 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand> 493 { 494 static inline hashval_t hash (const iv_common_cand *); 495 static inline bool equal (const iv_common_cand *, const iv_common_cand *); 496 }; 497 498 /* Hash function for possible common candidates. */ 499 500 inline hashval_t 501 iv_common_cand_hasher::hash (const iv_common_cand *ccand) 502 { 503 return ccand->hash; 504 } 505 506 /* Hash table equality function for common candidates. */ 507 508 inline bool 509 iv_common_cand_hasher::equal (const iv_common_cand *ccand1, 510 const iv_common_cand *ccand2) 511 { 512 return (ccand1->hash == ccand2->hash 513 && operand_equal_p (ccand1->base, ccand2->base, 0) 514 && operand_equal_p (ccand1->step, ccand2->step, 0) 515 && (TYPE_PRECISION (TREE_TYPE (ccand1->base)) 516 == TYPE_PRECISION (TREE_TYPE (ccand2->base)))); 517 } 518 519 /* Loop invariant expression hashtable entry. */ 520 521 struct iv_inv_expr_ent 522 { 523 /* Tree expression of the entry. */ 524 tree expr; 525 /* Unique indentifier. */ 526 int id; 527 /* Hash value. */ 528 hashval_t hash; 529 }; 530 531 /* Sort iv_inv_expr_ent pair A and B by id field. */ 532 533 static int 534 sort_iv_inv_expr_ent (const void *a, const void *b) 535 { 536 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a); 537 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b); 538 539 unsigned id1 = (*e1)->id; 540 unsigned id2 = (*e2)->id; 541 542 if (id1 < id2) 543 return -1; 544 else if (id1 > id2) 545 return 1; 546 else 547 return 0; 548 } 549 550 /* Hashtable helpers. */ 551 552 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent> 553 { 554 static inline hashval_t hash (const iv_inv_expr_ent *); 555 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *); 556 }; 557 558 /* Return true if uses of type TYPE represent some form of address. */ 559 560 inline bool 561 address_p (use_type type) 562 { 563 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS; 564 } 565 566 /* Hash function for loop invariant expressions. */ 567 568 inline hashval_t 569 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr) 570 { 571 return expr->hash; 572 } 573 574 /* Hash table equality function for expressions. */ 575 576 inline bool 577 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1, 578 const iv_inv_expr_ent *expr2) 579 { 580 return expr1->hash == expr2->hash 581 && operand_equal_p (expr1->expr, expr2->expr, 0); 582 } 583 584 struct ivopts_data 585 { 586 /* The currently optimized loop. */ 587 class loop *current_loop; 588 location_t loop_loc; 589 590 /* Numbers of iterations for all exits of the current loop. */ 591 hash_map<edge, tree_niter_desc *> *niters; 592 593 /* Number of registers used in it. */ 594 unsigned regs_used; 595 596 /* The size of version_info array allocated. */ 597 unsigned version_info_size; 598 599 /* The array of information for the ssa names. */ 600 struct version_info *version_info; 601 602 /* The hashtable of loop invariant expressions created 603 by ivopt. */ 604 hash_table<iv_inv_expr_hasher> *inv_expr_tab; 605 606 /* The bitmap of indices in version_info whose value was changed. */ 607 bitmap relevant; 608 609 /* The uses of induction variables. */ 610 vec<iv_group *> vgroups; 611 612 /* The candidates. */ 613 vec<iv_cand *> vcands; 614 615 /* A bitmap of important candidates. */ 616 bitmap important_candidates; 617 618 /* Cache used by tree_to_aff_combination_expand. */ 619 hash_map<tree, name_expansion *> *name_expansion_cache; 620 621 /* The hashtable of common candidates derived from iv uses. */ 622 hash_table<iv_common_cand_hasher> *iv_common_cand_tab; 623 624 /* The common candidates. */ 625 vec<iv_common_cand *> iv_common_cands; 626 627 /* Hash map recording base object information of tree exp. */ 628 hash_map<tree, tree> *base_object_map; 629 630 /* The maximum invariant variable id. */ 631 unsigned max_inv_var_id; 632 633 /* The maximum invariant expression id. */ 634 unsigned max_inv_expr_id; 635 636 /* Number of no_overflow BIVs which are not used in memory address. */ 637 unsigned bivs_not_used_in_addr; 638 639 /* Obstack for iv structure. */ 640 struct obstack iv_obstack; 641 642 /* Whether to consider just related and important candidates when replacing a 643 use. */ 644 bool consider_all_candidates; 645 646 /* Are we optimizing for speed? */ 647 bool speed; 648 649 /* Whether the loop body includes any function calls. */ 650 bool body_includes_call; 651 652 /* Whether the loop body can only be exited via single exit. */ 653 bool loop_single_exit_p; 654 655 /* Whether the loop has doloop comparison use. */ 656 bool doloop_use_p; 657 }; 658 659 /* An assignment of iv candidates to uses. */ 660 661 class iv_ca 662 { 663 public: 664 /* The number of uses covered by the assignment. */ 665 unsigned upto; 666 667 /* Number of uses that cannot be expressed by the candidates in the set. */ 668 unsigned bad_groups; 669 670 /* Candidate assigned to a use, together with the related costs. */ 671 class cost_pair **cand_for_group; 672 673 /* Number of times each candidate is used. */ 674 unsigned *n_cand_uses; 675 676 /* The candidates used. */ 677 bitmap cands; 678 679 /* The number of candidates in the set. */ 680 unsigned n_cands; 681 682 /* The number of invariants needed, including both invariant variants and 683 invariant expressions. */ 684 unsigned n_invs; 685 686 /* Total cost of expressing uses. */ 687 comp_cost cand_use_cost; 688 689 /* Total cost of candidates. */ 690 int64_t cand_cost; 691 692 /* Number of times each invariant variable is used. */ 693 unsigned *n_inv_var_uses; 694 695 /* Number of times each invariant expression is used. */ 696 unsigned *n_inv_expr_uses; 697 698 /* Total cost of the assignment. */ 699 comp_cost cost; 700 }; 701 702 /* Difference of two iv candidate assignments. */ 703 704 struct iv_ca_delta 705 { 706 /* Changed group. */ 707 struct iv_group *group; 708 709 /* An old assignment (for rollback purposes). */ 710 class cost_pair *old_cp; 711 712 /* A new assignment. */ 713 class cost_pair *new_cp; 714 715 /* Next change in the list. */ 716 struct iv_ca_delta *next; 717 }; 718 719 /* Bound on number of candidates below that all candidates are considered. */ 720 721 #define CONSIDER_ALL_CANDIDATES_BOUND \ 722 ((unsigned) param_iv_consider_all_candidates_bound) 723 724 /* If there are more iv occurrences, we just give up (it is quite unlikely that 725 optimizing such a loop would help, and it would take ages). */ 726 727 #define MAX_CONSIDERED_GROUPS \ 728 ((unsigned) param_iv_max_considered_uses) 729 730 /* If there are at most this number of ivs in the set, try removing unnecessary 731 ivs from the set always. */ 732 733 #define ALWAYS_PRUNE_CAND_SET_BOUND \ 734 ((unsigned) param_iv_always_prune_cand_set_bound) 735 736 /* The list of trees for that the decl_rtl field must be reset is stored 737 here. */ 738 739 static vec<tree> decl_rtl_to_reset; 740 741 static comp_cost force_expr_to_var_cost (tree, bool); 742 743 /* The single loop exit if it dominates the latch, NULL otherwise. */ 744 745 edge 746 single_dom_exit (class loop *loop) 747 { 748 edge exit = single_exit (loop); 749 750 if (!exit) 751 return NULL; 752 753 if (!just_once_each_iteration_p (loop, exit->src)) 754 return NULL; 755 756 return exit; 757 } 758 759 /* Dumps information about the induction variable IV to FILE. Don't dump 760 variable's name if DUMP_NAME is FALSE. The information is dumped with 761 preceding spaces indicated by INDENT_LEVEL. */ 762 763 void 764 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level) 765 { 766 const char *p; 767 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}; 768 769 if (indent_level > 4) 770 indent_level = 4; 771 p = spaces + 8 - (indent_level << 1); 772 773 fprintf (file, "%sIV struct:\n", p); 774 if (iv->ssa_name && dump_name) 775 { 776 fprintf (file, "%s SSA_NAME:\t", p); 777 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 778 fprintf (file, "\n"); 779 } 780 781 fprintf (file, "%s Type:\t", p); 782 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 783 fprintf (file, "\n"); 784 785 fprintf (file, "%s Base:\t", p); 786 print_generic_expr (file, iv->base, TDF_SLIM); 787 fprintf (file, "\n"); 788 789 fprintf (file, "%s Step:\t", p); 790 print_generic_expr (file, iv->step, TDF_SLIM); 791 fprintf (file, "\n"); 792 793 if (iv->base_object) 794 { 795 fprintf (file, "%s Object:\t", p); 796 print_generic_expr (file, iv->base_object, TDF_SLIM); 797 fprintf (file, "\n"); 798 } 799 800 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N'); 801 802 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n", 803 p, iv->no_overflow ? "No-overflow" : "Overflow"); 804 } 805 806 /* Dumps information about the USE to FILE. */ 807 808 void 809 dump_use (FILE *file, struct iv_use *use) 810 { 811 fprintf (file, " Use %d.%d:\n", use->group_id, use->id); 812 fprintf (file, " At stmt:\t"); 813 print_gimple_stmt (file, use->stmt, 0); 814 fprintf (file, " At pos:\t"); 815 if (use->op_p) 816 print_generic_expr (file, *use->op_p, TDF_SLIM); 817 fprintf (file, "\n"); 818 dump_iv (file, use->iv, false, 2); 819 } 820 821 /* Dumps information about the uses to FILE. */ 822 823 void 824 dump_groups (FILE *file, struct ivopts_data *data) 825 { 826 unsigned i, j; 827 struct iv_group *group; 828 829 for (i = 0; i < data->vgroups.length (); i++) 830 { 831 group = data->vgroups[i]; 832 fprintf (file, "Group %d:\n", group->id); 833 if (group->type == USE_NONLINEAR_EXPR) 834 fprintf (file, " Type:\tGENERIC\n"); 835 else if (group->type == USE_REF_ADDRESS) 836 fprintf (file, " Type:\tREFERENCE ADDRESS\n"); 837 else if (group->type == USE_PTR_ADDRESS) 838 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n"); 839 else 840 { 841 gcc_assert (group->type == USE_COMPARE); 842 fprintf (file, " Type:\tCOMPARE\n"); 843 } 844 for (j = 0; j < group->vuses.length (); j++) 845 dump_use (file, group->vuses[j]); 846 } 847 } 848 849 /* Dumps information about induction variable candidate CAND to FILE. */ 850 851 void 852 dump_cand (FILE *file, struct iv_cand *cand) 853 { 854 struct iv *iv = cand->iv; 855 856 fprintf (file, "Candidate %d:\n", cand->id); 857 if (cand->inv_vars) 858 { 859 fprintf (file, " Depend on inv.vars: "); 860 dump_bitmap (file, cand->inv_vars); 861 } 862 if (cand->inv_exprs) 863 { 864 fprintf (file, " Depend on inv.exprs: "); 865 dump_bitmap (file, cand->inv_exprs); 866 } 867 868 if (cand->var_before) 869 { 870 fprintf (file, " Var befor: "); 871 print_generic_expr (file, cand->var_before, TDF_SLIM); 872 fprintf (file, "\n"); 873 } 874 if (cand->var_after) 875 { 876 fprintf (file, " Var after: "); 877 print_generic_expr (file, cand->var_after, TDF_SLIM); 878 fprintf (file, "\n"); 879 } 880 881 switch (cand->pos) 882 { 883 case IP_NORMAL: 884 fprintf (file, " Incr POS: before exit test\n"); 885 break; 886 887 case IP_BEFORE_USE: 888 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id); 889 break; 890 891 case IP_AFTER_USE: 892 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id); 893 break; 894 895 case IP_END: 896 fprintf (file, " Incr POS: at end\n"); 897 break; 898 899 case IP_ORIGINAL: 900 fprintf (file, " Incr POS: orig biv\n"); 901 break; 902 } 903 904 dump_iv (file, iv, false, 1); 905 } 906 907 /* Returns the info for ssa version VER. */ 908 909 static inline struct version_info * 910 ver_info (struct ivopts_data *data, unsigned ver) 911 { 912 return data->version_info + ver; 913 } 914 915 /* Returns the info for ssa name NAME. */ 916 917 static inline struct version_info * 918 name_info (struct ivopts_data *data, tree name) 919 { 920 return ver_info (data, SSA_NAME_VERSION (name)); 921 } 922 923 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be 924 emitted in LOOP. */ 925 926 static bool 927 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt) 928 { 929 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); 930 931 gcc_assert (bb); 932 933 if (sbb == loop->latch) 934 return true; 935 936 if (sbb != bb) 937 return false; 938 939 return stmt == last_stmt (bb); 940 } 941 942 /* Returns true if STMT if after the place where the original induction 943 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true 944 if the positions are identical. */ 945 946 static bool 947 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal) 948 { 949 basic_block cand_bb = gimple_bb (cand->incremented_at); 950 basic_block stmt_bb = gimple_bb (stmt); 951 952 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 953 return false; 954 955 if (stmt_bb != cand_bb) 956 return true; 957 958 if (true_if_equal 959 && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) 960 return true; 961 return gimple_uid (stmt) > gimple_uid (cand->incremented_at); 962 } 963 964 /* Returns true if STMT if after the place where the induction variable 965 CAND is incremented in LOOP. */ 966 967 static bool 968 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt) 969 { 970 switch (cand->pos) 971 { 972 case IP_END: 973 return false; 974 975 case IP_NORMAL: 976 return stmt_after_ip_normal_pos (loop, stmt); 977 978 case IP_ORIGINAL: 979 case IP_AFTER_USE: 980 return stmt_after_inc_pos (cand, stmt, false); 981 982 case IP_BEFORE_USE: 983 return stmt_after_inc_pos (cand, stmt, true); 984 985 default: 986 gcc_unreachable (); 987 } 988 } 989 990 /* walk_tree callback for contains_abnormal_ssa_name_p. */ 991 992 static tree 993 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *) 994 { 995 if (TREE_CODE (*tp) == SSA_NAME 996 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp)) 997 return *tp; 998 999 if (!EXPR_P (*tp)) 1000 *walk_subtrees = 0; 1001 1002 return NULL_TREE; 1003 } 1004 1005 /* Returns true if EXPR contains a ssa name that occurs in an 1006 abnormal phi node. */ 1007 1008 bool 1009 contains_abnormal_ssa_name_p (tree expr) 1010 { 1011 return walk_tree_without_duplicates 1012 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE; 1013 } 1014 1015 /* Returns the structure describing number of iterations determined from 1016 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 1017 1018 static class tree_niter_desc * 1019 niter_for_exit (struct ivopts_data *data, edge exit) 1020 { 1021 class tree_niter_desc *desc; 1022 tree_niter_desc **slot; 1023 1024 if (!data->niters) 1025 { 1026 data->niters = new hash_map<edge, tree_niter_desc *>; 1027 slot = NULL; 1028 } 1029 else 1030 slot = data->niters->get (exit); 1031 1032 if (!slot) 1033 { 1034 /* Try to determine number of iterations. We cannot safely work with ssa 1035 names that appear in phi nodes on abnormal edges, so that we do not 1036 create overlapping life ranges for them (PR 27283). */ 1037 desc = XNEW (class tree_niter_desc); 1038 if (!number_of_iterations_exit (data->current_loop, 1039 exit, desc, true) 1040 || contains_abnormal_ssa_name_p (desc->niter)) 1041 { 1042 XDELETE (desc); 1043 desc = NULL; 1044 } 1045 data->niters->put (exit, desc); 1046 } 1047 else 1048 desc = *slot; 1049 1050 return desc; 1051 } 1052 1053 /* Returns the structure describing number of iterations determined from 1054 single dominating exit of DATA->current_loop, or NULL if something 1055 goes wrong. */ 1056 1057 static class tree_niter_desc * 1058 niter_for_single_dom_exit (struct ivopts_data *data) 1059 { 1060 edge exit = single_dom_exit (data->current_loop); 1061 1062 if (!exit) 1063 return NULL; 1064 1065 return niter_for_exit (data, exit); 1066 } 1067 1068 /* Initializes data structures used by the iv optimization pass, stored 1069 in DATA. */ 1070 1071 static void 1072 tree_ssa_iv_optimize_init (struct ivopts_data *data) 1073 { 1074 data->version_info_size = 2 * num_ssa_names; 1075 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 1076 data->relevant = BITMAP_ALLOC (NULL); 1077 data->important_candidates = BITMAP_ALLOC (NULL); 1078 data->max_inv_var_id = 0; 1079 data->max_inv_expr_id = 0; 1080 data->niters = NULL; 1081 data->vgroups.create (20); 1082 data->vcands.create (20); 1083 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10); 1084 data->name_expansion_cache = NULL; 1085 data->base_object_map = NULL; 1086 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10); 1087 data->iv_common_cands.create (20); 1088 decl_rtl_to_reset.create (20); 1089 gcc_obstack_init (&data->iv_obstack); 1090 } 1091 1092 /* walk_tree callback for determine_base_object. */ 1093 1094 static tree 1095 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata) 1096 { 1097 tree_code code = TREE_CODE (*tp); 1098 tree obj = NULL_TREE; 1099 if (code == ADDR_EXPR) 1100 { 1101 tree base = get_base_address (TREE_OPERAND (*tp, 0)); 1102 if (!base) 1103 obj = *tp; 1104 else if (TREE_CODE (base) != MEM_REF) 1105 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base)); 1106 } 1107 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp))) 1108 obj = fold_convert (ptr_type_node, *tp); 1109 1110 if (!obj) 1111 { 1112 if (!EXPR_P (*tp)) 1113 *walk_subtrees = 0; 1114 1115 return NULL_TREE; 1116 } 1117 /* Record special node for multiple base objects and stop. */ 1118 if (*static_cast<tree *> (wdata)) 1119 { 1120 *static_cast<tree *> (wdata) = integer_zero_node; 1121 return integer_zero_node; 1122 } 1123 /* Record the base object and continue looking. */ 1124 *static_cast<tree *> (wdata) = obj; 1125 return NULL_TREE; 1126 } 1127 1128 /* Returns a memory object to that EXPR points with caching. Return NULL if we 1129 are able to determine that it does not point to any such object; specially 1130 return integer_zero_node if EXPR contains multiple base objects. */ 1131 1132 static tree 1133 determine_base_object (struct ivopts_data *data, tree expr) 1134 { 1135 tree *slot, obj = NULL_TREE; 1136 if (data->base_object_map) 1137 { 1138 if ((slot = data->base_object_map->get(expr)) != NULL) 1139 return *slot; 1140 } 1141 else 1142 data->base_object_map = new hash_map<tree, tree>; 1143 1144 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj); 1145 data->base_object_map->put (expr, obj); 1146 return obj; 1147 } 1148 1149 /* Return true if address expression with non-DECL_P operand appears 1150 in EXPR. */ 1151 1152 static bool 1153 contain_complex_addr_expr (tree expr) 1154 { 1155 bool res = false; 1156 1157 STRIP_NOPS (expr); 1158 switch (TREE_CODE (expr)) 1159 { 1160 case POINTER_PLUS_EXPR: 1161 case PLUS_EXPR: 1162 case MINUS_EXPR: 1163 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); 1164 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); 1165 break; 1166 1167 case ADDR_EXPR: 1168 return (!DECL_P (TREE_OPERAND (expr, 0))); 1169 1170 default: 1171 return false; 1172 } 1173 1174 return res; 1175 } 1176 1177 /* Allocates an induction variable with given initial value BASE and step STEP 1178 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */ 1179 1180 static struct iv * 1181 alloc_iv (struct ivopts_data *data, tree base, tree step, 1182 bool no_overflow = false) 1183 { 1184 tree expr = base; 1185 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack, 1186 sizeof (struct iv)); 1187 gcc_assert (step != NULL_TREE); 1188 1189 /* Lower address expression in base except ones with DECL_P as operand. 1190 By doing this: 1191 1) More accurate cost can be computed for address expressions; 1192 2) Duplicate candidates won't be created for bases in different 1193 forms, like &a[0] and &a. */ 1194 STRIP_NOPS (expr); 1195 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0))) 1196 || contain_complex_addr_expr (expr)) 1197 { 1198 aff_tree comb; 1199 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb); 1200 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)); 1201 } 1202 1203 iv->base = base; 1204 iv->base_object = determine_base_object (data, base); 1205 iv->step = step; 1206 iv->biv_p = false; 1207 iv->nonlin_use = NULL; 1208 iv->ssa_name = NULL_TREE; 1209 if (!no_overflow 1210 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base), 1211 base, step)) 1212 no_overflow = true; 1213 iv->no_overflow = no_overflow; 1214 iv->have_address_use = false; 1215 1216 return iv; 1217 } 1218 1219 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV 1220 doesn't overflow. */ 1221 1222 static void 1223 set_iv (struct ivopts_data *data, tree iv, tree base, tree step, 1224 bool no_overflow) 1225 { 1226 struct version_info *info = name_info (data, iv); 1227 1228 gcc_assert (!info->iv); 1229 1230 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 1231 info->iv = alloc_iv (data, base, step, no_overflow); 1232 info->iv->ssa_name = iv; 1233 } 1234 1235 /* Finds induction variable declaration for VAR. */ 1236 1237 static struct iv * 1238 get_iv (struct ivopts_data *data, tree var) 1239 { 1240 basic_block bb; 1241 tree type = TREE_TYPE (var); 1242 1243 if (!POINTER_TYPE_P (type) 1244 && !INTEGRAL_TYPE_P (type)) 1245 return NULL; 1246 1247 if (!name_info (data, var)->iv) 1248 { 1249 bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1250 1251 if (!bb 1252 || !flow_bb_inside_loop_p (data->current_loop, bb)) 1253 { 1254 if (POINTER_TYPE_P (type)) 1255 type = sizetype; 1256 set_iv (data, var, var, build_int_cst (type, 0), true); 1257 } 1258 } 1259 1260 return name_info (data, var)->iv; 1261 } 1262 1263 /* Return the first non-invariant ssa var found in EXPR. */ 1264 1265 static tree 1266 extract_single_var_from_expr (tree expr) 1267 { 1268 int i, n; 1269 tree tmp; 1270 enum tree_code code; 1271 1272 if (!expr || is_gimple_min_invariant (expr)) 1273 return NULL; 1274 1275 code = TREE_CODE (expr); 1276 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1277 { 1278 n = TREE_OPERAND_LENGTH (expr); 1279 for (i = 0; i < n; i++) 1280 { 1281 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i)); 1282 1283 if (tmp) 1284 return tmp; 1285 } 1286 } 1287 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL; 1288 } 1289 1290 /* Finds basic ivs. */ 1291 1292 static bool 1293 find_bivs (struct ivopts_data *data) 1294 { 1295 gphi *phi; 1296 affine_iv iv; 1297 tree step, type, base, stop; 1298 bool found = false; 1299 class loop *loop = data->current_loop; 1300 gphi_iterator psi; 1301 1302 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1303 { 1304 phi = psi.phi (); 1305 1306 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 1307 continue; 1308 1309 if (virtual_operand_p (PHI_RESULT (phi))) 1310 continue; 1311 1312 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true)) 1313 continue; 1314 1315 if (integer_zerop (iv.step)) 1316 continue; 1317 1318 step = iv.step; 1319 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1320 /* Stop expanding iv base at the first ssa var referred by iv step. 1321 Ideally we should stop at any ssa var, because that's expensive 1322 and unusual to happen, we just do it on the first one. 1323 1324 See PR64705 for the rationale. */ 1325 stop = extract_single_var_from_expr (step); 1326 base = expand_simple_operations (base, stop); 1327 if (contains_abnormal_ssa_name_p (base) 1328 || contains_abnormal_ssa_name_p (step)) 1329 continue; 1330 1331 type = TREE_TYPE (PHI_RESULT (phi)); 1332 base = fold_convert (type, base); 1333 if (step) 1334 { 1335 if (POINTER_TYPE_P (type)) 1336 step = convert_to_ptrofftype (step); 1337 else 1338 step = fold_convert (type, step); 1339 } 1340 1341 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow); 1342 found = true; 1343 } 1344 1345 return found; 1346 } 1347 1348 /* Marks basic ivs. */ 1349 1350 static void 1351 mark_bivs (struct ivopts_data *data) 1352 { 1353 gphi *phi; 1354 gimple *def; 1355 tree var; 1356 struct iv *iv, *incr_iv; 1357 class loop *loop = data->current_loop; 1358 basic_block incr_bb; 1359 gphi_iterator psi; 1360 1361 data->bivs_not_used_in_addr = 0; 1362 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1363 { 1364 phi = psi.phi (); 1365 1366 iv = get_iv (data, PHI_RESULT (phi)); 1367 if (!iv) 1368 continue; 1369 1370 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1371 def = SSA_NAME_DEF_STMT (var); 1372 /* Don't mark iv peeled from other one as biv. */ 1373 if (def 1374 && gimple_code (def) == GIMPLE_PHI 1375 && gimple_bb (def) == loop->header) 1376 continue; 1377 1378 incr_iv = get_iv (data, var); 1379 if (!incr_iv) 1380 continue; 1381 1382 /* If the increment is in the subloop, ignore it. */ 1383 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1384 if (incr_bb->loop_father != data->current_loop 1385 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 1386 continue; 1387 1388 iv->biv_p = true; 1389 incr_iv->biv_p = true; 1390 if (iv->no_overflow) 1391 data->bivs_not_used_in_addr++; 1392 if (incr_iv->no_overflow) 1393 data->bivs_not_used_in_addr++; 1394 } 1395 } 1396 1397 /* Checks whether STMT defines a linear induction variable and stores its 1398 parameters to IV. */ 1399 1400 static bool 1401 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv) 1402 { 1403 tree lhs, stop; 1404 class loop *loop = data->current_loop; 1405 1406 iv->base = NULL_TREE; 1407 iv->step = NULL_TREE; 1408 1409 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1410 return false; 1411 1412 lhs = gimple_assign_lhs (stmt); 1413 if (TREE_CODE (lhs) != SSA_NAME) 1414 return false; 1415 1416 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) 1417 return false; 1418 1419 /* Stop expanding iv base at the first ssa var referred by iv step. 1420 Ideally we should stop at any ssa var, because that's expensive 1421 and unusual to happen, we just do it on the first one. 1422 1423 See PR64705 for the rationale. */ 1424 stop = extract_single_var_from_expr (iv->step); 1425 iv->base = expand_simple_operations (iv->base, stop); 1426 if (contains_abnormal_ssa_name_p (iv->base) 1427 || contains_abnormal_ssa_name_p (iv->step)) 1428 return false; 1429 1430 /* If STMT could throw, then do not consider STMT as defining a GIV. 1431 While this will suppress optimizations, we cannot safely delete this 1432 GIV and associated statements, even if it appears it is not used. */ 1433 if (stmt_could_throw_p (cfun, stmt)) 1434 return false; 1435 1436 return true; 1437 } 1438 1439 /* Finds general ivs in statement STMT. */ 1440 1441 static void 1442 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt) 1443 { 1444 affine_iv iv; 1445 1446 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1447 return; 1448 1449 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow); 1450 } 1451 1452 /* Finds general ivs in basic block BB. */ 1453 1454 static void 1455 find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1456 { 1457 gimple_stmt_iterator bsi; 1458 1459 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1460 find_givs_in_stmt (data, gsi_stmt (bsi)); 1461 } 1462 1463 /* Finds general ivs. */ 1464 1465 static void 1466 find_givs (struct ivopts_data *data) 1467 { 1468 class loop *loop = data->current_loop; 1469 basic_block *body = get_loop_body_in_dom_order (loop); 1470 unsigned i; 1471 1472 for (i = 0; i < loop->num_nodes; i++) 1473 find_givs_in_bb (data, body[i]); 1474 free (body); 1475 } 1476 1477 /* For each ssa name defined in LOOP determines whether it is an induction 1478 variable and if so, its initial value and step. */ 1479 1480 static bool 1481 find_induction_variables (struct ivopts_data *data) 1482 { 1483 unsigned i; 1484 bitmap_iterator bi; 1485 1486 if (!find_bivs (data)) 1487 return false; 1488 1489 find_givs (data); 1490 mark_bivs (data); 1491 1492 if (dump_file && (dump_flags & TDF_DETAILS)) 1493 { 1494 class tree_niter_desc *niter = niter_for_single_dom_exit (data); 1495 1496 if (niter) 1497 { 1498 fprintf (dump_file, " number of iterations "); 1499 print_generic_expr (dump_file, niter->niter, TDF_SLIM); 1500 if (!integer_zerop (niter->may_be_zero)) 1501 { 1502 fprintf (dump_file, "; zero if "); 1503 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); 1504 } 1505 fprintf (dump_file, "\n"); 1506 }; 1507 1508 fprintf (dump_file, "\n<Induction Vars>:\n"); 1509 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1510 { 1511 struct version_info *info = ver_info (data, i); 1512 if (info->iv && info->iv->step && !integer_zerop (info->iv->step)) 1513 dump_iv (dump_file, ver_info (data, i)->iv, true, 0); 1514 } 1515 } 1516 1517 return true; 1518 } 1519 1520 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP. 1521 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET 1522 is the const offset stripped from IV base and MEM_TYPE is the type 1523 of the memory being addressed. For uses of other types, ADDR_BASE 1524 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */ 1525 1526 static struct iv_use * 1527 record_use (struct iv_group *group, tree *use_p, struct iv *iv, 1528 gimple *stmt, enum use_type type, tree mem_type, 1529 tree addr_base, poly_uint64 addr_offset) 1530 { 1531 struct iv_use *use = XCNEW (struct iv_use); 1532 1533 use->id = group->vuses.length (); 1534 use->group_id = group->id; 1535 use->type = type; 1536 use->mem_type = mem_type; 1537 use->iv = iv; 1538 use->stmt = stmt; 1539 use->op_p = use_p; 1540 use->addr_base = addr_base; 1541 use->addr_offset = addr_offset; 1542 1543 group->vuses.safe_push (use); 1544 return use; 1545 } 1546 1547 /* Checks whether OP is a loop-level invariant and if so, records it. 1548 NONLINEAR_USE is true if the invariant is used in a way we do not 1549 handle specially. */ 1550 1551 static void 1552 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1553 { 1554 basic_block bb; 1555 struct version_info *info; 1556 1557 if (TREE_CODE (op) != SSA_NAME 1558 || virtual_operand_p (op)) 1559 return; 1560 1561 bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 1562 if (bb 1563 && flow_bb_inside_loop_p (data->current_loop, bb)) 1564 return; 1565 1566 info = name_info (data, op); 1567 info->name = op; 1568 info->has_nonlin_use |= nonlinear_use; 1569 if (!info->inv_id) 1570 info->inv_id = ++data->max_inv_var_id; 1571 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1572 } 1573 1574 /* Record a group of TYPE. */ 1575 1576 static struct iv_group * 1577 record_group (struct ivopts_data *data, enum use_type type) 1578 { 1579 struct iv_group *group = XCNEW (struct iv_group); 1580 1581 group->id = data->vgroups.length (); 1582 group->type = type; 1583 group->related_cands = BITMAP_ALLOC (NULL); 1584 group->vuses.create (1); 1585 group->doloop_p = false; 1586 1587 data->vgroups.safe_push (group); 1588 return group; 1589 } 1590 1591 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group. 1592 New group will be created if there is no existing group for the use. 1593 MEM_TYPE is the type of memory being addressed, or NULL if this 1594 isn't an address reference. */ 1595 1596 static struct iv_use * 1597 record_group_use (struct ivopts_data *data, tree *use_p, 1598 struct iv *iv, gimple *stmt, enum use_type type, 1599 tree mem_type) 1600 { 1601 tree addr_base = NULL; 1602 struct iv_group *group = NULL; 1603 poly_uint64 addr_offset = 0; 1604 1605 /* Record non address type use in a new group. */ 1606 if (address_p (type)) 1607 { 1608 unsigned int i; 1609 1610 addr_base = strip_offset (iv->base, &addr_offset); 1611 for (i = 0; i < data->vgroups.length (); i++) 1612 { 1613 struct iv_use *use; 1614 1615 group = data->vgroups[i]; 1616 use = group->vuses[0]; 1617 if (!address_p (use->type)) 1618 continue; 1619 1620 /* Check if it has the same stripped base and step. */ 1621 if (operand_equal_p (iv->base_object, use->iv->base_object, 0) 1622 && operand_equal_p (iv->step, use->iv->step, 0) 1623 && operand_equal_p (addr_base, use->addr_base, 0)) 1624 break; 1625 } 1626 if (i == data->vgroups.length ()) 1627 group = NULL; 1628 } 1629 1630 if (!group) 1631 group = record_group (data, type); 1632 1633 return record_use (group, use_p, iv, stmt, type, mem_type, 1634 addr_base, addr_offset); 1635 } 1636 1637 /* Checks whether the use OP is interesting and if so, records it. */ 1638 1639 static struct iv_use * 1640 find_interesting_uses_op (struct ivopts_data *data, tree op) 1641 { 1642 struct iv *iv; 1643 gimple *stmt; 1644 struct iv_use *use; 1645 1646 if (TREE_CODE (op) != SSA_NAME) 1647 return NULL; 1648 1649 iv = get_iv (data, op); 1650 if (!iv) 1651 return NULL; 1652 1653 if (iv->nonlin_use) 1654 { 1655 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR); 1656 return iv->nonlin_use; 1657 } 1658 1659 if (integer_zerop (iv->step)) 1660 { 1661 record_invariant (data, op, true); 1662 return NULL; 1663 } 1664 1665 stmt = SSA_NAME_DEF_STMT (op); 1666 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt)); 1667 1668 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE); 1669 iv->nonlin_use = use; 1670 return use; 1671 } 1672 1673 /* Indicate how compare type iv_use can be handled. */ 1674 enum comp_iv_rewrite 1675 { 1676 COMP_IV_NA, 1677 /* We may rewrite compare type iv_use by expressing value of the iv_use. */ 1678 COMP_IV_EXPR, 1679 /* We may rewrite compare type iv_uses on both sides of comparison by 1680 expressing value of each iv_use. */ 1681 COMP_IV_EXPR_2, 1682 /* We may rewrite compare type iv_use by expressing value of the iv_use 1683 or by eliminating it with other iv_cand. */ 1684 COMP_IV_ELIM 1685 }; 1686 1687 /* Given a condition in statement STMT, checks whether it is a compare 1688 of an induction variable and an invariant. If this is the case, 1689 CONTROL_VAR is set to location of the iv, BOUND to the location of 1690 the invariant, IV_VAR and IV_BOUND are set to the corresponding 1691 induction variable descriptions, and true is returned. If this is not 1692 the case, CONTROL_VAR and BOUND are set to the arguments of the 1693 condition and false is returned. */ 1694 1695 static enum comp_iv_rewrite 1696 extract_cond_operands (struct ivopts_data *data, gimple *stmt, 1697 tree **control_var, tree **bound, 1698 struct iv **iv_var, struct iv **iv_bound) 1699 { 1700 /* The objects returned when COND has constant operands. */ 1701 static struct iv const_iv; 1702 static tree zero; 1703 tree *op0 = &zero, *op1 = &zero; 1704 struct iv *iv0 = &const_iv, *iv1 = &const_iv; 1705 enum comp_iv_rewrite rewrite_type = COMP_IV_NA; 1706 1707 if (gimple_code (stmt) == GIMPLE_COND) 1708 { 1709 gcond *cond_stmt = as_a <gcond *> (stmt); 1710 op0 = gimple_cond_lhs_ptr (cond_stmt); 1711 op1 = gimple_cond_rhs_ptr (cond_stmt); 1712 } 1713 else 1714 { 1715 op0 = gimple_assign_rhs1_ptr (stmt); 1716 op1 = gimple_assign_rhs2_ptr (stmt); 1717 } 1718 1719 zero = integer_zero_node; 1720 const_iv.step = integer_zero_node; 1721 1722 if (TREE_CODE (*op0) == SSA_NAME) 1723 iv0 = get_iv (data, *op0); 1724 if (TREE_CODE (*op1) == SSA_NAME) 1725 iv1 = get_iv (data, *op1); 1726 1727 /* If both sides of comparison are IVs. We can express ivs on both end. */ 1728 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step)) 1729 { 1730 rewrite_type = COMP_IV_EXPR_2; 1731 goto end; 1732 } 1733 1734 /* If none side of comparison is IV. */ 1735 if ((!iv0 || integer_zerop (iv0->step)) 1736 && (!iv1 || integer_zerop (iv1->step))) 1737 goto end; 1738 1739 /* Control variable may be on the other side. */ 1740 if (!iv0 || integer_zerop (iv0->step)) 1741 { 1742 std::swap (op0, op1); 1743 std::swap (iv0, iv1); 1744 } 1745 /* If one side is IV and the other side isn't loop invariant. */ 1746 if (!iv1) 1747 rewrite_type = COMP_IV_EXPR; 1748 /* If one side is IV and the other side is loop invariant. */ 1749 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step)) 1750 rewrite_type = COMP_IV_ELIM; 1751 1752 end: 1753 if (control_var) 1754 *control_var = op0; 1755 if (iv_var) 1756 *iv_var = iv0; 1757 if (bound) 1758 *bound = op1; 1759 if (iv_bound) 1760 *iv_bound = iv1; 1761 1762 return rewrite_type; 1763 } 1764 1765 /* Checks whether the condition in STMT is interesting and if so, 1766 records it. */ 1767 1768 static void 1769 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt) 1770 { 1771 tree *var_p, *bound_p; 1772 struct iv *var_iv, *bound_iv; 1773 enum comp_iv_rewrite ret; 1774 1775 ret = extract_cond_operands (data, stmt, 1776 &var_p, &bound_p, &var_iv, &bound_iv); 1777 if (ret == COMP_IV_NA) 1778 { 1779 find_interesting_uses_op (data, *var_p); 1780 find_interesting_uses_op (data, *bound_p); 1781 return; 1782 } 1783 1784 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE); 1785 /* Record compare type iv_use for iv on the other side of comparison. */ 1786 if (ret == COMP_IV_EXPR_2) 1787 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE); 1788 } 1789 1790 /* Returns the outermost loop EXPR is obviously invariant in 1791 relative to the loop LOOP, i.e. if all its operands are defined 1792 outside of the returned loop. Returns NULL if EXPR is not 1793 even obviously invariant in LOOP. */ 1794 1795 class loop * 1796 outermost_invariant_loop_for_expr (class loop *loop, tree expr) 1797 { 1798 basic_block def_bb; 1799 unsigned i, len; 1800 1801 if (is_gimple_min_invariant (expr)) 1802 return current_loops->tree_root; 1803 1804 if (TREE_CODE (expr) == SSA_NAME) 1805 { 1806 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1807 if (def_bb) 1808 { 1809 if (flow_bb_inside_loop_p (loop, def_bb)) 1810 return NULL; 1811 return superloop_at_depth (loop, 1812 loop_depth (def_bb->loop_father) + 1); 1813 } 1814 1815 return current_loops->tree_root; 1816 } 1817 1818 if (!EXPR_P (expr)) 1819 return NULL; 1820 1821 unsigned maxdepth = 0; 1822 len = TREE_OPERAND_LENGTH (expr); 1823 for (i = 0; i < len; i++) 1824 { 1825 class loop *ivloop; 1826 if (!TREE_OPERAND (expr, i)) 1827 continue; 1828 1829 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i)); 1830 if (!ivloop) 1831 return NULL; 1832 maxdepth = MAX (maxdepth, loop_depth (ivloop)); 1833 } 1834 1835 return superloop_at_depth (loop, maxdepth); 1836 } 1837 1838 /* Returns true if expression EXPR is obviously invariant in LOOP, 1839 i.e. if all its operands are defined outside of the LOOP. LOOP 1840 should not be the function body. */ 1841 1842 bool 1843 expr_invariant_in_loop_p (class loop *loop, tree expr) 1844 { 1845 basic_block def_bb; 1846 unsigned i, len; 1847 1848 gcc_assert (loop_depth (loop) > 0); 1849 1850 if (is_gimple_min_invariant (expr)) 1851 return true; 1852 1853 if (TREE_CODE (expr) == SSA_NAME) 1854 { 1855 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1856 if (def_bb 1857 && flow_bb_inside_loop_p (loop, def_bb)) 1858 return false; 1859 1860 return true; 1861 } 1862 1863 if (!EXPR_P (expr)) 1864 return false; 1865 1866 len = TREE_OPERAND_LENGTH (expr); 1867 for (i = 0; i < len; i++) 1868 if (TREE_OPERAND (expr, i) 1869 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1870 return false; 1871 1872 return true; 1873 } 1874 1875 /* Given expression EXPR which computes inductive values with respect 1876 to loop recorded in DATA, this function returns biv from which EXPR 1877 is derived by tracing definition chains of ssa variables in EXPR. */ 1878 1879 static struct iv* 1880 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr) 1881 { 1882 struct iv *iv; 1883 unsigned i, n; 1884 tree e2, e1; 1885 enum tree_code code; 1886 gimple *stmt; 1887 1888 if (expr == NULL_TREE) 1889 return NULL; 1890 1891 if (is_gimple_min_invariant (expr)) 1892 return NULL; 1893 1894 code = TREE_CODE (expr); 1895 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1896 { 1897 n = TREE_OPERAND_LENGTH (expr); 1898 for (i = 0; i < n; i++) 1899 { 1900 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i)); 1901 if (iv) 1902 return iv; 1903 } 1904 } 1905 1906 /* Stop if it's not ssa name. */ 1907 if (code != SSA_NAME) 1908 return NULL; 1909 1910 iv = get_iv (data, expr); 1911 if (!iv || integer_zerop (iv->step)) 1912 return NULL; 1913 else if (iv->biv_p) 1914 return iv; 1915 1916 stmt = SSA_NAME_DEF_STMT (expr); 1917 if (gphi *phi = dyn_cast <gphi *> (stmt)) 1918 { 1919 ssa_op_iter iter; 1920 use_operand_p use_p; 1921 basic_block phi_bb = gimple_bb (phi); 1922 1923 /* Skip loop header PHI that doesn't define biv. */ 1924 if (phi_bb->loop_father == data->current_loop) 1925 return NULL; 1926 1927 if (virtual_operand_p (gimple_phi_result (phi))) 1928 return NULL; 1929 1930 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) 1931 { 1932 tree use = USE_FROM_PTR (use_p); 1933 iv = find_deriving_biv_for_expr (data, use); 1934 if (iv) 1935 return iv; 1936 } 1937 return NULL; 1938 } 1939 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1940 return NULL; 1941 1942 e1 = gimple_assign_rhs1 (stmt); 1943 code = gimple_assign_rhs_code (stmt); 1944 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) 1945 return find_deriving_biv_for_expr (data, e1); 1946 1947 switch (code) 1948 { 1949 case MULT_EXPR: 1950 case PLUS_EXPR: 1951 case MINUS_EXPR: 1952 case POINTER_PLUS_EXPR: 1953 /* Increments, decrements and multiplications by a constant 1954 are simple. */ 1955 e2 = gimple_assign_rhs2 (stmt); 1956 iv = find_deriving_biv_for_expr (data, e2); 1957 if (iv) 1958 return iv; 1959 gcc_fallthrough (); 1960 1961 CASE_CONVERT: 1962 /* Casts are simple. */ 1963 return find_deriving_biv_for_expr (data, e1); 1964 1965 default: 1966 break; 1967 } 1968 1969 return NULL; 1970 } 1971 1972 /* Record BIV, its predecessor and successor that they are used in 1973 address type uses. */ 1974 1975 static void 1976 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv) 1977 { 1978 unsigned i; 1979 tree type, base_1, base_2; 1980 bitmap_iterator bi; 1981 1982 if (!biv || !biv->biv_p || integer_zerop (biv->step) 1983 || biv->have_address_use || !biv->no_overflow) 1984 return; 1985 1986 type = TREE_TYPE (biv->base); 1987 if (!INTEGRAL_TYPE_P (type)) 1988 return; 1989 1990 biv->have_address_use = true; 1991 data->bivs_not_used_in_addr--; 1992 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step); 1993 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1994 { 1995 struct iv *iv = ver_info (data, i)->iv; 1996 1997 if (!iv || !iv->biv_p || integer_zerop (iv->step) 1998 || iv->have_address_use || !iv->no_overflow) 1999 continue; 2000 2001 if (type != TREE_TYPE (iv->base) 2002 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base))) 2003 continue; 2004 2005 if (!operand_equal_p (biv->step, iv->step, 0)) 2006 continue; 2007 2008 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step); 2009 if (operand_equal_p (base_1, iv->base, 0) 2010 || operand_equal_p (base_2, biv->base, 0)) 2011 { 2012 iv->have_address_use = true; 2013 data->bivs_not_used_in_addr--; 2014 } 2015 } 2016 } 2017 2018 /* Cumulates the steps of indices into DATA and replaces their values with the 2019 initial ones. Returns false when the value of the index cannot be determined. 2020 Callback for for_each_index. */ 2021 2022 struct ifs_ivopts_data 2023 { 2024 struct ivopts_data *ivopts_data; 2025 gimple *stmt; 2026 tree step; 2027 }; 2028 2029 static bool 2030 idx_find_step (tree base, tree *idx, void *data) 2031 { 2032 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; 2033 struct iv *iv; 2034 bool use_overflow_semantics = false; 2035 tree step, iv_base, iv_step, lbound, off; 2036 class loop *loop = dta->ivopts_data->current_loop; 2037 2038 /* If base is a component ref, require that the offset of the reference 2039 be invariant. */ 2040 if (TREE_CODE (base) == COMPONENT_REF) 2041 { 2042 off = component_ref_field_offset (base); 2043 return expr_invariant_in_loop_p (loop, off); 2044 } 2045 2046 /* If base is array, first check whether we will be able to move the 2047 reference out of the loop (in order to take its address in strength 2048 reduction). In order for this to work we need both lower bound 2049 and step to be loop invariants. */ 2050 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2051 { 2052 /* Moreover, for a range, the size needs to be invariant as well. */ 2053 if (TREE_CODE (base) == ARRAY_RANGE_REF 2054 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) 2055 return false; 2056 2057 step = array_ref_element_size (base); 2058 lbound = array_ref_low_bound (base); 2059 2060 if (!expr_invariant_in_loop_p (loop, step) 2061 || !expr_invariant_in_loop_p (loop, lbound)) 2062 return false; 2063 } 2064 2065 if (TREE_CODE (*idx) != SSA_NAME) 2066 return true; 2067 2068 iv = get_iv (dta->ivopts_data, *idx); 2069 if (!iv) 2070 return false; 2071 2072 /* XXX We produce for a base of *D42 with iv->base being &x[0] 2073 *&x[0], which is not folded and does not trigger the 2074 ARRAY_REF path below. */ 2075 *idx = iv->base; 2076 2077 if (integer_zerop (iv->step)) 2078 return true; 2079 2080 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2081 { 2082 step = array_ref_element_size (base); 2083 2084 /* We only handle addresses whose step is an integer constant. */ 2085 if (TREE_CODE (step) != INTEGER_CST) 2086 return false; 2087 } 2088 else 2089 /* The step for pointer arithmetics already is 1 byte. */ 2090 step = size_one_node; 2091 2092 iv_base = iv->base; 2093 iv_step = iv->step; 2094 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step))) 2095 use_overflow_semantics = true; 2096 2097 if (!convert_affine_scev (dta->ivopts_data->current_loop, 2098 sizetype, &iv_base, &iv_step, dta->stmt, 2099 use_overflow_semantics)) 2100 { 2101 /* The index might wrap. */ 2102 return false; 2103 } 2104 2105 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 2106 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); 2107 2108 if (dta->ivopts_data->bivs_not_used_in_addr) 2109 { 2110 if (!iv->biv_p) 2111 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name); 2112 2113 record_biv_for_address_use (dta->ivopts_data, iv); 2114 } 2115 return true; 2116 } 2117 2118 /* Records use in index IDX. Callback for for_each_index. Ivopts data 2119 object is passed to it in DATA. */ 2120 2121 static bool 2122 idx_record_use (tree base, tree *idx, 2123 void *vdata) 2124 { 2125 struct ivopts_data *data = (struct ivopts_data *) vdata; 2126 find_interesting_uses_op (data, *idx); 2127 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2128 { 2129 find_interesting_uses_op (data, array_ref_element_size (base)); 2130 find_interesting_uses_op (data, array_ref_low_bound (base)); 2131 } 2132 return true; 2133 } 2134 2135 /* If we can prove that TOP = cst * BOT for some constant cst, 2136 store cst to MUL and return true. Otherwise return false. 2137 The returned value is always sign-extended, regardless of the 2138 signedness of TOP and BOT. */ 2139 2140 static bool 2141 constant_multiple_of (tree top, tree bot, widest_int *mul) 2142 { 2143 tree mby; 2144 enum tree_code code; 2145 unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); 2146 widest_int res, p0, p1; 2147 2148 STRIP_NOPS (top); 2149 STRIP_NOPS (bot); 2150 2151 if (operand_equal_p (top, bot, 0)) 2152 { 2153 *mul = 1; 2154 return true; 2155 } 2156 2157 code = TREE_CODE (top); 2158 switch (code) 2159 { 2160 case MULT_EXPR: 2161 mby = TREE_OPERAND (top, 1); 2162 if (TREE_CODE (mby) != INTEGER_CST) 2163 return false; 2164 2165 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) 2166 return false; 2167 2168 *mul = wi::sext (res * wi::to_widest (mby), precision); 2169 return true; 2170 2171 case PLUS_EXPR: 2172 case MINUS_EXPR: 2173 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) 2174 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) 2175 return false; 2176 2177 if (code == MINUS_EXPR) 2178 p1 = -p1; 2179 *mul = wi::sext (p0 + p1, precision); 2180 return true; 2181 2182 case INTEGER_CST: 2183 if (TREE_CODE (bot) != INTEGER_CST) 2184 return false; 2185 2186 p0 = widest_int::from (wi::to_wide (top), SIGNED); 2187 p1 = widest_int::from (wi::to_wide (bot), SIGNED); 2188 if (p1 == 0) 2189 return false; 2190 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision); 2191 return res == 0; 2192 2193 default: 2194 if (POLY_INT_CST_P (top) 2195 && POLY_INT_CST_P (bot) 2196 && constant_multiple_p (wi::to_poly_widest (top), 2197 wi::to_poly_widest (bot), mul)) 2198 return true; 2199 2200 return false; 2201 } 2202 } 2203 2204 /* Return true if memory reference REF with step STEP may be unaligned. */ 2205 2206 static bool 2207 may_be_unaligned_p (tree ref, tree step) 2208 { 2209 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 2210 thus they are not misaligned. */ 2211 if (TREE_CODE (ref) == TARGET_MEM_REF) 2212 return false; 2213 2214 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref)); 2215 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align) 2216 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))); 2217 2218 unsigned HOST_WIDE_INT bitpos; 2219 unsigned int ref_align; 2220 get_object_alignment_1 (ref, &ref_align, &bitpos); 2221 if (ref_align < align 2222 || (bitpos % align) != 0 2223 || (bitpos % BITS_PER_UNIT) != 0) 2224 return true; 2225 2226 unsigned int trailing_zeros = tree_ctz (step); 2227 if (trailing_zeros < HOST_BITS_PER_INT 2228 && (1U << trailing_zeros) * BITS_PER_UNIT < align) 2229 return true; 2230 2231 return false; 2232 } 2233 2234 /* Return true if EXPR may be non-addressable. */ 2235 2236 bool 2237 may_be_nonaddressable_p (tree expr) 2238 { 2239 switch (TREE_CODE (expr)) 2240 { 2241 case VAR_DECL: 2242 /* Check if it's a register variable. */ 2243 return DECL_HARD_REGISTER (expr); 2244 2245 case TARGET_MEM_REF: 2246 /* TARGET_MEM_REFs are translated directly to valid MEMs on the 2247 target, thus they are always addressable. */ 2248 return false; 2249 2250 case MEM_REF: 2251 /* Likewise for MEM_REFs, modulo the storage order. */ 2252 return REF_REVERSE_STORAGE_ORDER (expr); 2253 2254 case BIT_FIELD_REF: 2255 if (REF_REVERSE_STORAGE_ORDER (expr)) 2256 return true; 2257 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2258 2259 case COMPONENT_REF: 2260 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) 2261 return true; 2262 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) 2263 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2264 2265 case ARRAY_REF: 2266 case ARRAY_RANGE_REF: 2267 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) 2268 return true; 2269 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2270 2271 case VIEW_CONVERT_EXPR: 2272 /* This kind of view-conversions may wrap non-addressable objects 2273 and make them look addressable. After some processing the 2274 non-addressability may be uncovered again, causing ADDR_EXPRs 2275 of inappropriate objects to be built. */ 2276 if (is_gimple_reg (TREE_OPERAND (expr, 0)) 2277 || !is_gimple_addressable (TREE_OPERAND (expr, 0))) 2278 return true; 2279 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2280 2281 CASE_CONVERT: 2282 return true; 2283 2284 default: 2285 break; 2286 } 2287 2288 return false; 2289 } 2290 2291 /* Finds addresses in *OP_P inside STMT. */ 2292 2293 static void 2294 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt, 2295 tree *op_p) 2296 { 2297 tree base = *op_p, step = size_zero_node; 2298 struct iv *civ; 2299 struct ifs_ivopts_data ifs_ivopts_data; 2300 2301 /* Do not play with volatile memory references. A bit too conservative, 2302 perhaps, but safe. */ 2303 if (gimple_has_volatile_ops (stmt)) 2304 goto fail; 2305 2306 /* Ignore bitfields for now. Not really something terribly complicated 2307 to handle. TODO. */ 2308 if (TREE_CODE (base) == BIT_FIELD_REF) 2309 goto fail; 2310 2311 base = unshare_expr (base); 2312 2313 if (TREE_CODE (base) == TARGET_MEM_REF) 2314 { 2315 tree type = build_pointer_type (TREE_TYPE (base)); 2316 tree astep; 2317 2318 if (TMR_BASE (base) 2319 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 2320 { 2321 civ = get_iv (data, TMR_BASE (base)); 2322 if (!civ) 2323 goto fail; 2324 2325 TMR_BASE (base) = civ->base; 2326 step = civ->step; 2327 } 2328 if (TMR_INDEX2 (base) 2329 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) 2330 { 2331 civ = get_iv (data, TMR_INDEX2 (base)); 2332 if (!civ) 2333 goto fail; 2334 2335 TMR_INDEX2 (base) = civ->base; 2336 step = civ->step; 2337 } 2338 if (TMR_INDEX (base) 2339 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 2340 { 2341 civ = get_iv (data, TMR_INDEX (base)); 2342 if (!civ) 2343 goto fail; 2344 2345 TMR_INDEX (base) = civ->base; 2346 astep = civ->step; 2347 2348 if (astep) 2349 { 2350 if (TMR_STEP (base)) 2351 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 2352 2353 step = fold_build2 (PLUS_EXPR, type, step, astep); 2354 } 2355 } 2356 2357 if (integer_zerop (step)) 2358 goto fail; 2359 base = tree_mem_ref_addr (type, base); 2360 } 2361 else 2362 { 2363 ifs_ivopts_data.ivopts_data = data; 2364 ifs_ivopts_data.stmt = stmt; 2365 ifs_ivopts_data.step = size_zero_node; 2366 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 2367 || integer_zerop (ifs_ivopts_data.step)) 2368 goto fail; 2369 step = ifs_ivopts_data.step; 2370 2371 /* Check that the base expression is addressable. This needs 2372 to be done after substituting bases of IVs into it. */ 2373 if (may_be_nonaddressable_p (base)) 2374 goto fail; 2375 2376 /* Moreover, on strict alignment platforms, check that it is 2377 sufficiently aligned. */ 2378 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) 2379 goto fail; 2380 2381 base = build_fold_addr_expr (base); 2382 2383 /* Substituting bases of IVs into the base expression might 2384 have caused folding opportunities. */ 2385 if (TREE_CODE (base) == ADDR_EXPR) 2386 { 2387 tree *ref = &TREE_OPERAND (base, 0); 2388 while (handled_component_p (*ref)) 2389 ref = &TREE_OPERAND (*ref, 0); 2390 if (TREE_CODE (*ref) == MEM_REF) 2391 { 2392 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), 2393 TREE_OPERAND (*ref, 0), 2394 TREE_OPERAND (*ref, 1)); 2395 if (tem) 2396 *ref = tem; 2397 } 2398 } 2399 } 2400 2401 civ = alloc_iv (data, base, step); 2402 /* Fail if base object of this memory reference is unknown. */ 2403 if (civ->base_object == NULL_TREE) 2404 goto fail; 2405 2406 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p)); 2407 return; 2408 2409 fail: 2410 for_each_index (op_p, idx_record_use, data); 2411 } 2412 2413 /* Finds and records invariants used in STMT. */ 2414 2415 static void 2416 find_invariants_stmt (struct ivopts_data *data, gimple *stmt) 2417 { 2418 ssa_op_iter iter; 2419 use_operand_p use_p; 2420 tree op; 2421 2422 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2423 { 2424 op = USE_FROM_PTR (use_p); 2425 record_invariant (data, op, false); 2426 } 2427 } 2428 2429 /* CALL calls an internal function. If operand *OP_P will become an 2430 address when the call is expanded, return the type of the memory 2431 being addressed, otherwise return null. */ 2432 2433 static tree 2434 get_mem_type_for_internal_fn (gcall *call, tree *op_p) 2435 { 2436 switch (gimple_call_internal_fn (call)) 2437 { 2438 case IFN_MASK_LOAD: 2439 case IFN_MASK_LOAD_LANES: 2440 if (op_p == gimple_call_arg_ptr (call, 0)) 2441 return TREE_TYPE (gimple_call_lhs (call)); 2442 return NULL_TREE; 2443 2444 case IFN_MASK_STORE: 2445 case IFN_MASK_STORE_LANES: 2446 if (op_p == gimple_call_arg_ptr (call, 0)) 2447 return TREE_TYPE (gimple_call_arg (call, 3)); 2448 return NULL_TREE; 2449 2450 default: 2451 return NULL_TREE; 2452 } 2453 } 2454 2455 /* IV is a (non-address) iv that describes operand *OP_P of STMT. 2456 Return true if the operand will become an address when STMT 2457 is expanded and record the associated address use if so. */ 2458 2459 static bool 2460 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p, 2461 struct iv *iv) 2462 { 2463 /* Fail if base object of this memory reference is unknown. */ 2464 if (iv->base_object == NULL_TREE) 2465 return false; 2466 2467 tree mem_type = NULL_TREE; 2468 if (gcall *call = dyn_cast <gcall *> (stmt)) 2469 if (gimple_call_internal_p (call)) 2470 mem_type = get_mem_type_for_internal_fn (call, op_p); 2471 if (mem_type) 2472 { 2473 iv = alloc_iv (data, iv->base, iv->step); 2474 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type); 2475 return true; 2476 } 2477 return false; 2478 } 2479 2480 /* Finds interesting uses of induction variables in the statement STMT. */ 2481 2482 static void 2483 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt) 2484 { 2485 struct iv *iv; 2486 tree op, *lhs, *rhs; 2487 ssa_op_iter iter; 2488 use_operand_p use_p; 2489 enum tree_code code; 2490 2491 find_invariants_stmt (data, stmt); 2492 2493 if (gimple_code (stmt) == GIMPLE_COND) 2494 { 2495 find_interesting_uses_cond (data, stmt); 2496 return; 2497 } 2498 2499 if (is_gimple_assign (stmt)) 2500 { 2501 lhs = gimple_assign_lhs_ptr (stmt); 2502 rhs = gimple_assign_rhs1_ptr (stmt); 2503 2504 if (TREE_CODE (*lhs) == SSA_NAME) 2505 { 2506 /* If the statement defines an induction variable, the uses are not 2507 interesting by themselves. */ 2508 2509 iv = get_iv (data, *lhs); 2510 2511 if (iv && !integer_zerop (iv->step)) 2512 return; 2513 } 2514 2515 code = gimple_assign_rhs_code (stmt); 2516 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS 2517 && (REFERENCE_CLASS_P (*rhs) 2518 || is_gimple_val (*rhs))) 2519 { 2520 if (REFERENCE_CLASS_P (*rhs)) 2521 find_interesting_uses_address (data, stmt, rhs); 2522 else 2523 find_interesting_uses_op (data, *rhs); 2524 2525 if (REFERENCE_CLASS_P (*lhs)) 2526 find_interesting_uses_address (data, stmt, lhs); 2527 return; 2528 } 2529 else if (TREE_CODE_CLASS (code) == tcc_comparison) 2530 { 2531 find_interesting_uses_cond (data, stmt); 2532 return; 2533 } 2534 2535 /* TODO -- we should also handle address uses of type 2536 2537 memory = call (whatever); 2538 2539 and 2540 2541 call (memory). */ 2542 } 2543 2544 if (gimple_code (stmt) == GIMPLE_PHI 2545 && gimple_bb (stmt) == data->current_loop->header) 2546 { 2547 iv = get_iv (data, PHI_RESULT (stmt)); 2548 2549 if (iv && !integer_zerop (iv->step)) 2550 return; 2551 } 2552 2553 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2554 { 2555 op = USE_FROM_PTR (use_p); 2556 2557 if (TREE_CODE (op) != SSA_NAME) 2558 continue; 2559 2560 iv = get_iv (data, op); 2561 if (!iv) 2562 continue; 2563 2564 if (!find_address_like_use (data, stmt, use_p->use, iv)) 2565 find_interesting_uses_op (data, op); 2566 } 2567 } 2568 2569 /* Finds interesting uses of induction variables outside of loops 2570 on loop exit edge EXIT. */ 2571 2572 static void 2573 find_interesting_uses_outside (struct ivopts_data *data, edge exit) 2574 { 2575 gphi *phi; 2576 gphi_iterator psi; 2577 tree def; 2578 2579 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) 2580 { 2581 phi = psi.phi (); 2582 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 2583 if (!virtual_operand_p (def)) 2584 find_interesting_uses_op (data, def); 2585 } 2586 } 2587 2588 /* Return TRUE if OFFSET is within the range of [base + offset] addressing 2589 mode for memory reference represented by USE. */ 2590 2591 static GTY (()) vec<rtx, va_gc> *addr_list; 2592 2593 static bool 2594 addr_offset_valid_p (struct iv_use *use, poly_int64 offset) 2595 { 2596 rtx reg, addr; 2597 unsigned list_index; 2598 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); 2599 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type); 2600 2601 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; 2602 if (list_index >= vec_safe_length (addr_list)) 2603 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE); 2604 2605 addr = (*addr_list)[list_index]; 2606 if (!addr) 2607 { 2608 addr_mode = targetm.addr_space.address_mode (as); 2609 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); 2610 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX); 2611 (*addr_list)[list_index] = addr; 2612 } 2613 else 2614 addr_mode = GET_MODE (addr); 2615 2616 XEXP (addr, 1) = gen_int_mode (offset, addr_mode); 2617 return (memory_address_addr_space_p (mem_mode, addr, as)); 2618 } 2619 2620 /* Comparison function to sort group in ascending order of addr_offset. */ 2621 2622 static int 2623 group_compare_offset (const void *a, const void *b) 2624 { 2625 const struct iv_use *const *u1 = (const struct iv_use *const *) a; 2626 const struct iv_use *const *u2 = (const struct iv_use *const *) b; 2627 2628 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset); 2629 } 2630 2631 /* Check if small groups should be split. Return true if no group 2632 contains more than two uses with distinct addr_offsets. Return 2633 false otherwise. We want to split such groups because: 2634 2635 1) Small groups don't have much benefit and may interfer with 2636 general candidate selection. 2637 2) Size for problem with only small groups is usually small and 2638 general algorithm can handle it well. 2639 2640 TODO -- Above claim may not hold when we want to merge memory 2641 accesses with conseuctive addresses. */ 2642 2643 static bool 2644 split_small_address_groups_p (struct ivopts_data *data) 2645 { 2646 unsigned int i, j, distinct = 1; 2647 struct iv_use *pre; 2648 struct iv_group *group; 2649 2650 for (i = 0; i < data->vgroups.length (); i++) 2651 { 2652 group = data->vgroups[i]; 2653 if (group->vuses.length () == 1) 2654 continue; 2655 2656 gcc_assert (address_p (group->type)); 2657 if (group->vuses.length () == 2) 2658 { 2659 if (compare_sizes_for_sort (group->vuses[0]->addr_offset, 2660 group->vuses[1]->addr_offset) > 0) 2661 std::swap (group->vuses[0], group->vuses[1]); 2662 } 2663 else 2664 group->vuses.qsort (group_compare_offset); 2665 2666 if (distinct > 2) 2667 continue; 2668 2669 distinct = 1; 2670 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++) 2671 { 2672 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset)) 2673 { 2674 pre = group->vuses[j]; 2675 distinct++; 2676 } 2677 2678 if (distinct > 2) 2679 break; 2680 } 2681 } 2682 2683 return (distinct <= 2); 2684 } 2685 2686 /* For each group of address type uses, this function further groups 2687 these uses according to the maximum offset supported by target's 2688 [base + offset] addressing mode. */ 2689 2690 static void 2691 split_address_groups (struct ivopts_data *data) 2692 { 2693 unsigned int i, j; 2694 /* Always split group. */ 2695 bool split_p = split_small_address_groups_p (data); 2696 2697 for (i = 0; i < data->vgroups.length (); i++) 2698 { 2699 struct iv_group *new_group = NULL; 2700 struct iv_group *group = data->vgroups[i]; 2701 struct iv_use *use = group->vuses[0]; 2702 2703 use->id = 0; 2704 use->group_id = group->id; 2705 if (group->vuses.length () == 1) 2706 continue; 2707 2708 gcc_assert (address_p (use->type)); 2709 2710 for (j = 1; j < group->vuses.length ();) 2711 { 2712 struct iv_use *next = group->vuses[j]; 2713 poly_int64 offset = next->addr_offset - use->addr_offset; 2714 2715 /* Split group if aksed to, or the offset against the first 2716 use can't fit in offset part of addressing mode. IV uses 2717 having the same offset are still kept in one group. */ 2718 if (maybe_ne (offset, 0) 2719 && (split_p || !addr_offset_valid_p (use, offset))) 2720 { 2721 if (!new_group) 2722 new_group = record_group (data, group->type); 2723 group->vuses.ordered_remove (j); 2724 new_group->vuses.safe_push (next); 2725 continue; 2726 } 2727 2728 next->id = j; 2729 next->group_id = group->id; 2730 j++; 2731 } 2732 } 2733 } 2734 2735 /* Finds uses of the induction variables that are interesting. */ 2736 2737 static void 2738 find_interesting_uses (struct ivopts_data *data) 2739 { 2740 basic_block bb; 2741 gimple_stmt_iterator bsi; 2742 basic_block *body = get_loop_body (data->current_loop); 2743 unsigned i; 2744 edge e; 2745 2746 for (i = 0; i < data->current_loop->num_nodes; i++) 2747 { 2748 edge_iterator ei; 2749 bb = body[i]; 2750 2751 FOR_EACH_EDGE (e, ei, bb->succs) 2752 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 2753 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 2754 find_interesting_uses_outside (data, e); 2755 2756 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2757 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2758 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2759 if (!is_gimple_debug (gsi_stmt (bsi))) 2760 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2761 } 2762 free (body); 2763 2764 split_address_groups (data); 2765 2766 if (dump_file && (dump_flags & TDF_DETAILS)) 2767 { 2768 fprintf (dump_file, "\n<IV Groups>:\n"); 2769 dump_groups (dump_file, data); 2770 fprintf (dump_file, "\n"); 2771 } 2772 } 2773 2774 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 2775 is true, assume we are inside an address. If TOP_COMPREF is true, assume 2776 we are at the top-level of the processed address. */ 2777 2778 static tree 2779 strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 2780 poly_int64 *offset) 2781 { 2782 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 2783 enum tree_code code; 2784 tree type, orig_type = TREE_TYPE (expr); 2785 poly_int64 off0, off1; 2786 HOST_WIDE_INT st; 2787 tree orig_expr = expr; 2788 2789 STRIP_NOPS (expr); 2790 2791 type = TREE_TYPE (expr); 2792 code = TREE_CODE (expr); 2793 *offset = 0; 2794 2795 switch (code) 2796 { 2797 case POINTER_PLUS_EXPR: 2798 case PLUS_EXPR: 2799 case MINUS_EXPR: 2800 op0 = TREE_OPERAND (expr, 0); 2801 op1 = TREE_OPERAND (expr, 1); 2802 2803 op0 = strip_offset_1 (op0, false, false, &off0); 2804 op1 = strip_offset_1 (op1, false, false, &off1); 2805 2806 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); 2807 if (op0 == TREE_OPERAND (expr, 0) 2808 && op1 == TREE_OPERAND (expr, 1)) 2809 return orig_expr; 2810 2811 if (integer_zerop (op1)) 2812 expr = op0; 2813 else if (integer_zerop (op0)) 2814 { 2815 if (code == MINUS_EXPR) 2816 expr = fold_build1 (NEGATE_EXPR, type, op1); 2817 else 2818 expr = op1; 2819 } 2820 else 2821 expr = fold_build2 (code, type, op0, op1); 2822 2823 return fold_convert (orig_type, expr); 2824 2825 case MULT_EXPR: 2826 op1 = TREE_OPERAND (expr, 1); 2827 if (!cst_and_fits_in_hwi (op1)) 2828 return orig_expr; 2829 2830 op0 = TREE_OPERAND (expr, 0); 2831 op0 = strip_offset_1 (op0, false, false, &off0); 2832 if (op0 == TREE_OPERAND (expr, 0)) 2833 return orig_expr; 2834 2835 *offset = off0 * int_cst_value (op1); 2836 if (integer_zerop (op0)) 2837 expr = op0; 2838 else 2839 expr = fold_build2 (MULT_EXPR, type, op0, op1); 2840 2841 return fold_convert (orig_type, expr); 2842 2843 case ARRAY_REF: 2844 case ARRAY_RANGE_REF: 2845 if (!inside_addr) 2846 return orig_expr; 2847 2848 step = array_ref_element_size (expr); 2849 if (!cst_and_fits_in_hwi (step)) 2850 break; 2851 2852 st = int_cst_value (step); 2853 op1 = TREE_OPERAND (expr, 1); 2854 op1 = strip_offset_1 (op1, false, false, &off1); 2855 *offset = off1 * st; 2856 2857 if (top_compref 2858 && integer_zerop (op1)) 2859 { 2860 /* Strip the component reference completely. */ 2861 op0 = TREE_OPERAND (expr, 0); 2862 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2863 *offset += off0; 2864 return op0; 2865 } 2866 break; 2867 2868 case COMPONENT_REF: 2869 { 2870 tree field; 2871 2872 if (!inside_addr) 2873 return orig_expr; 2874 2875 tmp = component_ref_field_offset (expr); 2876 field = TREE_OPERAND (expr, 1); 2877 if (top_compref 2878 && cst_and_fits_in_hwi (tmp) 2879 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field))) 2880 { 2881 HOST_WIDE_INT boffset, abs_off; 2882 2883 /* Strip the component reference completely. */ 2884 op0 = TREE_OPERAND (expr, 0); 2885 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2886 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field)); 2887 abs_off = abs_hwi (boffset) / BITS_PER_UNIT; 2888 if (boffset < 0) 2889 abs_off = -abs_off; 2890 2891 *offset = off0 + int_cst_value (tmp) + abs_off; 2892 return op0; 2893 } 2894 } 2895 break; 2896 2897 case ADDR_EXPR: 2898 op0 = TREE_OPERAND (expr, 0); 2899 op0 = strip_offset_1 (op0, true, true, &off0); 2900 *offset += off0; 2901 2902 if (op0 == TREE_OPERAND (expr, 0)) 2903 return orig_expr; 2904 2905 expr = build_fold_addr_expr (op0); 2906 return fold_convert (orig_type, expr); 2907 2908 case MEM_REF: 2909 /* ??? Offset operand? */ 2910 inside_addr = false; 2911 break; 2912 2913 default: 2914 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0)) 2915 return build_int_cst (orig_type, 0); 2916 return orig_expr; 2917 } 2918 2919 /* Default handling of expressions for that we want to recurse into 2920 the first operand. */ 2921 op0 = TREE_OPERAND (expr, 0); 2922 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 2923 *offset += off0; 2924 2925 if (op0 == TREE_OPERAND (expr, 0) 2926 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 2927 return orig_expr; 2928 2929 expr = copy_node (expr); 2930 TREE_OPERAND (expr, 0) = op0; 2931 if (op1) 2932 TREE_OPERAND (expr, 1) = op1; 2933 2934 /* Inside address, we might strip the top level component references, 2935 thus changing type of the expression. Handling of ADDR_EXPR 2936 will fix that. */ 2937 expr = fold_convert (orig_type, expr); 2938 2939 return expr; 2940 } 2941 2942 /* Strips constant offsets from EXPR and stores them to OFFSET. */ 2943 2944 tree 2945 strip_offset (tree expr, poly_uint64_pod *offset) 2946 { 2947 poly_int64 off; 2948 tree core = strip_offset_1 (expr, false, false, &off); 2949 *offset = off; 2950 return core; 2951 } 2952 2953 /* Returns variant of TYPE that can be used as base for different uses. 2954 We return unsigned type with the same precision, which avoids problems 2955 with overflows. */ 2956 2957 static tree 2958 generic_type_for (tree type) 2959 { 2960 if (POINTER_TYPE_P (type)) 2961 return unsigned_type_for (type); 2962 2963 if (TYPE_UNSIGNED (type)) 2964 return type; 2965 2966 return unsigned_type_for (type); 2967 } 2968 2969 /* Private data for walk_tree. */ 2970 2971 struct walk_tree_data 2972 { 2973 bitmap *inv_vars; 2974 struct ivopts_data *idata; 2975 }; 2976 2977 /* Callback function for walk_tree, it records invariants and symbol 2978 reference in *EXPR_P. DATA is the structure storing result info. */ 2979 2980 static tree 2981 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 2982 { 2983 tree op = *expr_p; 2984 struct version_info *info; 2985 struct walk_tree_data *wdata = (struct walk_tree_data*) data; 2986 2987 if (TREE_CODE (op) != SSA_NAME) 2988 return NULL_TREE; 2989 2990 info = name_info (wdata->idata, op); 2991 /* Because we expand simple operations when finding IVs, loop invariant 2992 variable that isn't referred by the original loop could be used now. 2993 Record such invariant variables here. */ 2994 if (!info->iv) 2995 { 2996 struct ivopts_data *idata = wdata->idata; 2997 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 2998 2999 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb)) 3000 { 3001 tree steptype = TREE_TYPE (op); 3002 if (POINTER_TYPE_P (steptype)) 3003 steptype = sizetype; 3004 set_iv (idata, op, op, build_int_cst (steptype, 0), true); 3005 record_invariant (idata, op, false); 3006 } 3007 } 3008 if (!info->inv_id || info->has_nonlin_use) 3009 return NULL_TREE; 3010 3011 if (!*wdata->inv_vars) 3012 *wdata->inv_vars = BITMAP_ALLOC (NULL); 3013 bitmap_set_bit (*wdata->inv_vars, info->inv_id); 3014 3015 return NULL_TREE; 3016 } 3017 3018 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should 3019 store it. */ 3020 3021 static inline void 3022 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars) 3023 { 3024 struct walk_tree_data wdata; 3025 3026 if (!inv_vars) 3027 return; 3028 3029 wdata.idata = data; 3030 wdata.inv_vars = inv_vars; 3031 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL); 3032 } 3033 3034 /* Get entry from invariant expr hash table for INV_EXPR. New entry 3035 will be recorded if it doesn't exist yet. Given below two exprs: 3036 inv_expr + cst1, inv_expr + cst2 3037 It's hard to make decision whether constant part should be stripped 3038 or not. We choose to not strip based on below facts: 3039 1) We need to count ADD cost for constant part if it's stripped, 3040 which isn't always trivial where this functions is called. 3041 2) Stripping constant away may be conflict with following loop 3042 invariant hoisting pass. 3043 3) Not stripping constant away results in more invariant exprs, 3044 which usually leads to decision preferring lower reg pressure. */ 3045 3046 static iv_inv_expr_ent * 3047 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr) 3048 { 3049 STRIP_NOPS (inv_expr); 3050 3051 if (poly_int_tree_p (inv_expr) 3052 || TREE_CODE (inv_expr) == SSA_NAME) 3053 return NULL; 3054 3055 /* Don't strip constant part away as we used to. */ 3056 3057 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */ 3058 struct iv_inv_expr_ent ent; 3059 ent.expr = inv_expr; 3060 ent.hash = iterative_hash_expr (inv_expr, 0); 3061 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT); 3062 3063 if (!*slot) 3064 { 3065 *slot = XNEW (struct iv_inv_expr_ent); 3066 (*slot)->expr = inv_expr; 3067 (*slot)->hash = ent.hash; 3068 (*slot)->id = ++data->max_inv_expr_id; 3069 } 3070 3071 return *slot; 3072 } 3073 3074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 3075 position to POS. If USE is not NULL, the candidate is set as related to 3076 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 3077 replacement of the final value of the iv by a direct computation. */ 3078 3079 static struct iv_cand * 3080 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important, 3081 enum iv_position pos, struct iv_use *use, 3082 gimple *incremented_at, struct iv *orig_iv = NULL, 3083 bool doloop = false) 3084 { 3085 unsigned i; 3086 struct iv_cand *cand = NULL; 3087 tree type, orig_type; 3088 3089 gcc_assert (base && step); 3090 3091 /* -fkeep-gc-roots-live means that we have to keep a real pointer 3092 live, but the ivopts code may replace a real pointer with one 3093 pointing before or after the memory block that is then adjusted 3094 into the memory block during the loop. FIXME: It would likely be 3095 better to actually force the pointer live and still use ivopts; 3096 for example, it would be enough to write the pointer into memory 3097 and keep it there until after the loop. */ 3098 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base))) 3099 return NULL; 3100 3101 /* For non-original variables, make sure their values are computed in a type 3102 that does not invoke undefined behavior on overflows (since in general, 3103 we cannot prove that these induction variables are non-wrapping). */ 3104 if (pos != IP_ORIGINAL) 3105 { 3106 orig_type = TREE_TYPE (base); 3107 type = generic_type_for (orig_type); 3108 if (type != orig_type) 3109 { 3110 base = fold_convert (type, base); 3111 step = fold_convert (type, step); 3112 } 3113 } 3114 3115 for (i = 0; i < data->vcands.length (); i++) 3116 { 3117 cand = data->vcands[i]; 3118 3119 if (cand->pos != pos) 3120 continue; 3121 3122 if (cand->incremented_at != incremented_at 3123 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 3124 && cand->ainc_use != use)) 3125 continue; 3126 3127 if (operand_equal_p (base, cand->iv->base, 0) 3128 && operand_equal_p (step, cand->iv->step, 0) 3129 && (TYPE_PRECISION (TREE_TYPE (base)) 3130 == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) 3131 break; 3132 } 3133 3134 if (i == data->vcands.length ()) 3135 { 3136 cand = XCNEW (struct iv_cand); 3137 cand->id = i; 3138 cand->iv = alloc_iv (data, base, step); 3139 cand->pos = pos; 3140 if (pos != IP_ORIGINAL) 3141 { 3142 if (doloop) 3143 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop"); 3144 else 3145 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 3146 cand->var_after = cand->var_before; 3147 } 3148 cand->important = important; 3149 cand->incremented_at = incremented_at; 3150 cand->doloop_p = doloop; 3151 data->vcands.safe_push (cand); 3152 3153 if (!poly_int_tree_p (step)) 3154 { 3155 find_inv_vars (data, &step, &cand->inv_vars); 3156 3157 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step); 3158 /* Share bitmap between inv_vars and inv_exprs for cand. */ 3159 if (inv_expr != NULL) 3160 { 3161 cand->inv_exprs = cand->inv_vars; 3162 cand->inv_vars = NULL; 3163 if (cand->inv_exprs) 3164 bitmap_clear (cand->inv_exprs); 3165 else 3166 cand->inv_exprs = BITMAP_ALLOC (NULL); 3167 3168 bitmap_set_bit (cand->inv_exprs, inv_expr->id); 3169 } 3170 } 3171 3172 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 3173 cand->ainc_use = use; 3174 else 3175 cand->ainc_use = NULL; 3176 3177 cand->orig_iv = orig_iv; 3178 if (dump_file && (dump_flags & TDF_DETAILS)) 3179 dump_cand (dump_file, cand); 3180 } 3181 3182 cand->important |= important; 3183 cand->doloop_p |= doloop; 3184 3185 /* Relate candidate to the group for which it is added. */ 3186 if (use) 3187 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i); 3188 3189 return cand; 3190 } 3191 3192 /* Returns true if incrementing the induction variable at the end of the LOOP 3193 is allowed. 3194 3195 The purpose is to avoid splitting latch edge with a biv increment, thus 3196 creating a jump, possibly confusing other optimization passes and leaving 3197 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not 3198 available (so we do not have a better alternative), or if the latch edge 3199 is already nonempty. */ 3200 3201 static bool 3202 allow_ip_end_pos_p (class loop *loop) 3203 { 3204 if (!ip_normal_pos (loop)) 3205 return true; 3206 3207 if (!empty_block_p (ip_end_pos (loop))) 3208 return true; 3209 3210 return false; 3211 } 3212 3213 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. 3214 Important field is set to IMPORTANT. */ 3215 3216 static void 3217 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, 3218 bool important, struct iv_use *use) 3219 { 3220 basic_block use_bb = gimple_bb (use->stmt); 3221 machine_mode mem_mode; 3222 unsigned HOST_WIDE_INT cstepi; 3223 3224 /* If we insert the increment in any position other than the standard 3225 ones, we must ensure that it is incremented once per iteration. 3226 It must not be in an inner nested loop, or one side of an if 3227 statement. */ 3228 if (use_bb->loop_father != data->current_loop 3229 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) 3230 || stmt_can_throw_internal (cfun, use->stmt) 3231 || !cst_and_fits_in_hwi (step)) 3232 return; 3233 3234 cstepi = int_cst_value (step); 3235 3236 mem_mode = TYPE_MODE (use->mem_type); 3237 if (((USE_LOAD_PRE_INCREMENT (mem_mode) 3238 || USE_STORE_PRE_INCREMENT (mem_mode)) 3239 && known_eq (GET_MODE_SIZE (mem_mode), cstepi)) 3240 || ((USE_LOAD_PRE_DECREMENT (mem_mode) 3241 || USE_STORE_PRE_DECREMENT (mem_mode)) 3242 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi))) 3243 { 3244 enum tree_code code = MINUS_EXPR; 3245 tree new_base; 3246 tree new_step = step; 3247 3248 if (POINTER_TYPE_P (TREE_TYPE (base))) 3249 { 3250 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 3251 code = POINTER_PLUS_EXPR; 3252 } 3253 else 3254 new_step = fold_convert (TREE_TYPE (base), new_step); 3255 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); 3256 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, 3257 use->stmt); 3258 } 3259 if (((USE_LOAD_POST_INCREMENT (mem_mode) 3260 || USE_STORE_POST_INCREMENT (mem_mode)) 3261 && known_eq (GET_MODE_SIZE (mem_mode), cstepi)) 3262 || ((USE_LOAD_POST_DECREMENT (mem_mode) 3263 || USE_STORE_POST_DECREMENT (mem_mode)) 3264 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi))) 3265 { 3266 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, 3267 use->stmt); 3268 } 3269 } 3270 3271 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 3272 position to POS. If USE is not NULL, the candidate is set as related to 3273 it. The candidate computation is scheduled before exit condition and at 3274 the end of loop. */ 3275 3276 static void 3277 add_candidate (struct ivopts_data *data, tree base, tree step, bool important, 3278 struct iv_use *use, struct iv *orig_iv = NULL, 3279 bool doloop = false) 3280 { 3281 if (ip_normal_pos (data->current_loop)) 3282 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv, 3283 doloop); 3284 /* Exclude doloop candidate here since it requires decrement then comparison 3285 and jump, the IP_END position doesn't match. */ 3286 if (!doloop && ip_end_pos (data->current_loop) 3287 && allow_ip_end_pos_p (data->current_loop)) 3288 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv); 3289 } 3290 3291 /* Adds standard iv candidates. */ 3292 3293 static void 3294 add_standard_iv_candidates (struct ivopts_data *data) 3295 { 3296 add_candidate (data, integer_zero_node, integer_one_node, true, NULL); 3297 3298 /* The same for a double-integer type if it is still fast enough. */ 3299 if (TYPE_PRECISION 3300 (long_integer_type_node) > TYPE_PRECISION (integer_type_node) 3301 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD) 3302 add_candidate (data, build_int_cst (long_integer_type_node, 0), 3303 build_int_cst (long_integer_type_node, 1), true, NULL); 3304 3305 /* The same for a double-integer type if it is still fast enough. */ 3306 if (TYPE_PRECISION 3307 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node) 3308 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD) 3309 add_candidate (data, build_int_cst (long_long_integer_type_node, 0), 3310 build_int_cst (long_long_integer_type_node, 1), true, NULL); 3311 } 3312 3313 3314 /* Adds candidates bases on the old induction variable IV. */ 3315 3316 static void 3317 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv) 3318 { 3319 gimple *phi; 3320 tree def; 3321 struct iv_cand *cand; 3322 3323 /* Check if this biv is used in address type use. */ 3324 if (iv->no_overflow && iv->have_address_use 3325 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base)) 3326 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype)) 3327 { 3328 tree base = fold_convert (sizetype, iv->base); 3329 tree step = fold_convert (sizetype, iv->step); 3330 3331 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */ 3332 add_candidate (data, base, step, true, NULL, iv); 3333 /* Add iv cand of the original type only if it has nonlinear use. */ 3334 if (iv->nonlin_use) 3335 add_candidate (data, iv->base, iv->step, true, NULL); 3336 } 3337 else 3338 add_candidate (data, iv->base, iv->step, true, NULL); 3339 3340 /* The same, but with initial value zero. */ 3341 if (POINTER_TYPE_P (TREE_TYPE (iv->base))) 3342 add_candidate (data, size_int (0), iv->step, true, NULL); 3343 else 3344 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 3345 iv->step, true, NULL); 3346 3347 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 3348 if (gimple_code (phi) == GIMPLE_PHI) 3349 { 3350 /* Additionally record the possibility of leaving the original iv 3351 untouched. */ 3352 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 3353 /* Don't add candidate if it's from another PHI node because 3354 it's an affine iv appearing in the form of PEELED_CHREC. */ 3355 phi = SSA_NAME_DEF_STMT (def); 3356 if (gimple_code (phi) != GIMPLE_PHI) 3357 { 3358 cand = add_candidate_1 (data, 3359 iv->base, iv->step, true, IP_ORIGINAL, NULL, 3360 SSA_NAME_DEF_STMT (def)); 3361 if (cand) 3362 { 3363 cand->var_before = iv->ssa_name; 3364 cand->var_after = def; 3365 } 3366 } 3367 else 3368 gcc_assert (gimple_bb (phi) == data->current_loop->header); 3369 } 3370 } 3371 3372 /* Adds candidates based on the old induction variables. */ 3373 3374 static void 3375 add_iv_candidate_for_bivs (struct ivopts_data *data) 3376 { 3377 unsigned i; 3378 struct iv *iv; 3379 bitmap_iterator bi; 3380 3381 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 3382 { 3383 iv = ver_info (data, i)->iv; 3384 if (iv && iv->biv_p && !integer_zerop (iv->step)) 3385 add_iv_candidate_for_biv (data, iv); 3386 } 3387 } 3388 3389 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */ 3390 3391 static void 3392 record_common_cand (struct ivopts_data *data, tree base, 3393 tree step, struct iv_use *use) 3394 { 3395 class iv_common_cand ent; 3396 class iv_common_cand **slot; 3397 3398 ent.base = base; 3399 ent.step = step; 3400 ent.hash = iterative_hash_expr (base, 0); 3401 ent.hash = iterative_hash_expr (step, ent.hash); 3402 3403 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT); 3404 if (*slot == NULL) 3405 { 3406 *slot = new iv_common_cand (); 3407 (*slot)->base = base; 3408 (*slot)->step = step; 3409 (*slot)->uses.create (8); 3410 (*slot)->hash = ent.hash; 3411 data->iv_common_cands.safe_push ((*slot)); 3412 } 3413 3414 gcc_assert (use != NULL); 3415 (*slot)->uses.safe_push (use); 3416 return; 3417 } 3418 3419 /* Comparison function used to sort common candidates. */ 3420 3421 static int 3422 common_cand_cmp (const void *p1, const void *p2) 3423 { 3424 unsigned n1, n2; 3425 const class iv_common_cand *const *const ccand1 3426 = (const class iv_common_cand *const *)p1; 3427 const class iv_common_cand *const *const ccand2 3428 = (const class iv_common_cand *const *)p2; 3429 3430 n1 = (*ccand1)->uses.length (); 3431 n2 = (*ccand2)->uses.length (); 3432 return n2 - n1; 3433 } 3434 3435 /* Adds IV candidates based on common candidated recorded. */ 3436 3437 static void 3438 add_iv_candidate_derived_from_uses (struct ivopts_data *data) 3439 { 3440 unsigned i, j; 3441 struct iv_cand *cand_1, *cand_2; 3442 3443 data->iv_common_cands.qsort (common_cand_cmp); 3444 for (i = 0; i < data->iv_common_cands.length (); i++) 3445 { 3446 class iv_common_cand *ptr = data->iv_common_cands[i]; 3447 3448 /* Only add IV candidate if it's derived from multiple uses. */ 3449 if (ptr->uses.length () <= 1) 3450 break; 3451 3452 cand_1 = NULL; 3453 cand_2 = NULL; 3454 if (ip_normal_pos (data->current_loop)) 3455 cand_1 = add_candidate_1 (data, ptr->base, ptr->step, 3456 false, IP_NORMAL, NULL, NULL); 3457 3458 if (ip_end_pos (data->current_loop) 3459 && allow_ip_end_pos_p (data->current_loop)) 3460 cand_2 = add_candidate_1 (data, ptr->base, ptr->step, 3461 false, IP_END, NULL, NULL); 3462 3463 /* Bind deriving uses and the new candidates. */ 3464 for (j = 0; j < ptr->uses.length (); j++) 3465 { 3466 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id]; 3467 if (cand_1) 3468 bitmap_set_bit (group->related_cands, cand_1->id); 3469 if (cand_2) 3470 bitmap_set_bit (group->related_cands, cand_2->id); 3471 } 3472 } 3473 3474 /* Release data since it is useless from this point. */ 3475 data->iv_common_cand_tab->empty (); 3476 data->iv_common_cands.truncate (0); 3477 } 3478 3479 /* Adds candidates based on the value of USE's iv. */ 3480 3481 static void 3482 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) 3483 { 3484 poly_uint64 offset; 3485 tree base; 3486 struct iv *iv = use->iv; 3487 tree basetype = TREE_TYPE (iv->base); 3488 3489 /* Don't add candidate for iv_use with non integer, pointer or non-mode 3490 precision types, instead, add candidate for the corresponding scev in 3491 unsigned type with the same precision. See PR93674 for more info. */ 3492 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype)) 3493 || !type_has_mode_precision_p (basetype)) 3494 { 3495 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype), 3496 TYPE_UNSIGNED (basetype)); 3497 add_candidate (data, fold_convert (basetype, iv->base), 3498 fold_convert (basetype, iv->step), false, NULL); 3499 return; 3500 } 3501 3502 add_candidate (data, iv->base, iv->step, false, use); 3503 3504 /* Record common candidate for use in case it can be shared by others. */ 3505 record_common_cand (data, iv->base, iv->step, use); 3506 3507 /* Record common candidate with initial value zero. */ 3508 basetype = TREE_TYPE (iv->base); 3509 if (POINTER_TYPE_P (basetype)) 3510 basetype = sizetype; 3511 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); 3512 3513 /* Compare the cost of an address with an unscaled index with the cost of 3514 an address with a scaled index and add candidate if useful. */ 3515 poly_int64 step; 3516 if (use != NULL 3517 && poly_int_tree_p (iv->step, &step) 3518 && address_p (use->type)) 3519 { 3520 poly_int64 new_step; 3521 unsigned int fact = preferred_mem_scale_factor 3522 (use->iv->base, 3523 TYPE_MODE (use->mem_type), 3524 optimize_loop_for_speed_p (data->current_loop)); 3525 3526 if (fact != 1 3527 && multiple_p (step, fact, &new_step)) 3528 add_candidate (data, size_int (0), 3529 wide_int_to_tree (sizetype, new_step), 3530 true, NULL); 3531 } 3532 3533 /* Record common candidate with constant offset stripped in base. 3534 Like the use itself, we also add candidate directly for it. */ 3535 base = strip_offset (iv->base, &offset); 3536 if (maybe_ne (offset, 0U) || base != iv->base) 3537 { 3538 record_common_cand (data, base, iv->step, use); 3539 add_candidate (data, base, iv->step, false, use); 3540 } 3541 3542 /* Record common candidate with base_object removed in base. */ 3543 base = iv->base; 3544 STRIP_NOPS (base); 3545 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR) 3546 { 3547 tree step = iv->step; 3548 3549 STRIP_NOPS (step); 3550 base = TREE_OPERAND (base, 1); 3551 step = fold_convert (sizetype, step); 3552 record_common_cand (data, base, step, use); 3553 /* Also record common candidate with offset stripped. */ 3554 base = strip_offset (base, &offset); 3555 if (maybe_ne (offset, 0U)) 3556 record_common_cand (data, base, step, use); 3557 } 3558 3559 /* At last, add auto-incremental candidates. Make such variables 3560 important since other iv uses with same base object may be based 3561 on it. */ 3562 if (use != NULL && address_p (use->type)) 3563 add_autoinc_candidates (data, iv->base, iv->step, true, use); 3564 } 3565 3566 /* Adds candidates based on the uses. */ 3567 3568 static void 3569 add_iv_candidate_for_groups (struct ivopts_data *data) 3570 { 3571 unsigned i; 3572 3573 /* Only add candidate for the first use in group. */ 3574 for (i = 0; i < data->vgroups.length (); i++) 3575 { 3576 struct iv_group *group = data->vgroups[i]; 3577 3578 gcc_assert (group->vuses[0] != NULL); 3579 add_iv_candidate_for_use (data, group->vuses[0]); 3580 } 3581 add_iv_candidate_derived_from_uses (data); 3582 } 3583 3584 /* Record important candidates and add them to related_cands bitmaps. */ 3585 3586 static void 3587 record_important_candidates (struct ivopts_data *data) 3588 { 3589 unsigned i; 3590 struct iv_group *group; 3591 3592 for (i = 0; i < data->vcands.length (); i++) 3593 { 3594 struct iv_cand *cand = data->vcands[i]; 3595 3596 if (cand->important) 3597 bitmap_set_bit (data->important_candidates, i); 3598 } 3599 3600 data->consider_all_candidates = (data->vcands.length () 3601 <= CONSIDER_ALL_CANDIDATES_BOUND); 3602 3603 /* Add important candidates to groups' related_cands bitmaps. */ 3604 for (i = 0; i < data->vgroups.length (); i++) 3605 { 3606 group = data->vgroups[i]; 3607 bitmap_ior_into (group->related_cands, data->important_candidates); 3608 } 3609 } 3610 3611 /* Allocates the data structure mapping the (use, candidate) pairs to costs. 3612 If consider_all_candidates is true, we use a two-dimensional array, otherwise 3613 we allocate a simple list to every use. */ 3614 3615 static void 3616 alloc_use_cost_map (struct ivopts_data *data) 3617 { 3618 unsigned i, size, s; 3619 3620 for (i = 0; i < data->vgroups.length (); i++) 3621 { 3622 struct iv_group *group = data->vgroups[i]; 3623 3624 if (data->consider_all_candidates) 3625 size = data->vcands.length (); 3626 else 3627 { 3628 s = bitmap_count_bits (group->related_cands); 3629 3630 /* Round up to the power of two, so that moduling by it is fast. */ 3631 size = s ? (1 << ceil_log2 (s)) : 1; 3632 } 3633 3634 group->n_map_members = size; 3635 group->cost_map = XCNEWVEC (class cost_pair, size); 3636 } 3637 } 3638 3639 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends 3640 on invariants INV_VARS and that the value used in expressing it is 3641 VALUE, and in case of iv elimination the comparison operator is COMP. */ 3642 3643 static void 3644 set_group_iv_cost (struct ivopts_data *data, 3645 struct iv_group *group, struct iv_cand *cand, 3646 comp_cost cost, bitmap inv_vars, tree value, 3647 enum tree_code comp, bitmap inv_exprs) 3648 { 3649 unsigned i, s; 3650 3651 if (cost.infinite_cost_p ()) 3652 { 3653 BITMAP_FREE (inv_vars); 3654 BITMAP_FREE (inv_exprs); 3655 return; 3656 } 3657 3658 if (data->consider_all_candidates) 3659 { 3660 group->cost_map[cand->id].cand = cand; 3661 group->cost_map[cand->id].cost = cost; 3662 group->cost_map[cand->id].inv_vars = inv_vars; 3663 group->cost_map[cand->id].inv_exprs = inv_exprs; 3664 group->cost_map[cand->id].value = value; 3665 group->cost_map[cand->id].comp = comp; 3666 return; 3667 } 3668 3669 /* n_map_members is a power of two, so this computes modulo. */ 3670 s = cand->id & (group->n_map_members - 1); 3671 for (i = s; i < group->n_map_members; i++) 3672 if (!group->cost_map[i].cand) 3673 goto found; 3674 for (i = 0; i < s; i++) 3675 if (!group->cost_map[i].cand) 3676 goto found; 3677 3678 gcc_unreachable (); 3679 3680 found: 3681 group->cost_map[i].cand = cand; 3682 group->cost_map[i].cost = cost; 3683 group->cost_map[i].inv_vars = inv_vars; 3684 group->cost_map[i].inv_exprs = inv_exprs; 3685 group->cost_map[i].value = value; 3686 group->cost_map[i].comp = comp; 3687 } 3688 3689 /* Gets cost of (GROUP, CAND) pair. */ 3690 3691 static class cost_pair * 3692 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group, 3693 struct iv_cand *cand) 3694 { 3695 unsigned i, s; 3696 class cost_pair *ret; 3697 3698 if (!cand) 3699 return NULL; 3700 3701 if (data->consider_all_candidates) 3702 { 3703 ret = group->cost_map + cand->id; 3704 if (!ret->cand) 3705 return NULL; 3706 3707 return ret; 3708 } 3709 3710 /* n_map_members is a power of two, so this computes modulo. */ 3711 s = cand->id & (group->n_map_members - 1); 3712 for (i = s; i < group->n_map_members; i++) 3713 if (group->cost_map[i].cand == cand) 3714 return group->cost_map + i; 3715 else if (group->cost_map[i].cand == NULL) 3716 return NULL; 3717 for (i = 0; i < s; i++) 3718 if (group->cost_map[i].cand == cand) 3719 return group->cost_map + i; 3720 else if (group->cost_map[i].cand == NULL) 3721 return NULL; 3722 3723 return NULL; 3724 } 3725 3726 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 3727 static rtx 3728 produce_memory_decl_rtl (tree obj, int *regno) 3729 { 3730 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); 3731 machine_mode address_mode = targetm.addr_space.address_mode (as); 3732 rtx x; 3733 3734 gcc_assert (obj); 3735 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 3736 { 3737 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 3738 x = gen_rtx_SYMBOL_REF (address_mode, name); 3739 SET_SYMBOL_REF_DECL (x, obj); 3740 x = gen_rtx_MEM (DECL_MODE (obj), x); 3741 set_mem_addr_space (x, as); 3742 targetm.encode_section_info (obj, x, true); 3743 } 3744 else 3745 { 3746 x = gen_raw_REG (address_mode, (*regno)++); 3747 x = gen_rtx_MEM (DECL_MODE (obj), x); 3748 set_mem_addr_space (x, as); 3749 } 3750 3751 return x; 3752 } 3753 3754 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 3755 walk_tree. DATA contains the actual fake register number. */ 3756 3757 static tree 3758 prepare_decl_rtl (tree *expr_p, int *ws, void *data) 3759 { 3760 tree obj = NULL_TREE; 3761 rtx x = NULL_RTX; 3762 int *regno = (int *) data; 3763 3764 switch (TREE_CODE (*expr_p)) 3765 { 3766 case ADDR_EXPR: 3767 for (expr_p = &TREE_OPERAND (*expr_p, 0); 3768 handled_component_p (*expr_p); 3769 expr_p = &TREE_OPERAND (*expr_p, 0)) 3770 continue; 3771 obj = *expr_p; 3772 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) 3773 x = produce_memory_decl_rtl (obj, regno); 3774 break; 3775 3776 case SSA_NAME: 3777 *ws = 0; 3778 obj = SSA_NAME_VAR (*expr_p); 3779 /* Defer handling of anonymous SSA_NAMEs to the expander. */ 3780 if (!obj) 3781 return NULL_TREE; 3782 if (!DECL_RTL_SET_P (obj)) 3783 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 3784 break; 3785 3786 case VAR_DECL: 3787 case PARM_DECL: 3788 case RESULT_DECL: 3789 *ws = 0; 3790 obj = *expr_p; 3791 3792 if (DECL_RTL_SET_P (obj)) 3793 break; 3794 3795 if (DECL_MODE (obj) == BLKmode) 3796 x = produce_memory_decl_rtl (obj, regno); 3797 else 3798 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 3799 3800 break; 3801 3802 default: 3803 break; 3804 } 3805 3806 if (x) 3807 { 3808 decl_rtl_to_reset.safe_push (obj); 3809 SET_DECL_RTL (obj, x); 3810 } 3811 3812 return NULL_TREE; 3813 } 3814 3815 /* Predict whether the given loop will be transformed in the RTL 3816 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks. 3817 This is only for target independent checks, see targetm.predict_doloop_p 3818 for the target dependent ones. 3819 3820 Note that according to some initial investigation, some checks like costly 3821 niter check and invalid stmt scanning don't have much gains among general 3822 cases, so keep this as simple as possible first. 3823 3824 Some RTL specific checks seems unable to be checked in gimple, if any new 3825 checks or easy checks _are_ missing here, please add them. */ 3826 3827 static bool 3828 generic_predict_doloop_p (struct ivopts_data *data) 3829 { 3830 class loop *loop = data->current_loop; 3831 3832 /* Call target hook for target dependent checks. */ 3833 if (!targetm.predict_doloop_p (loop)) 3834 { 3835 if (dump_file && (dump_flags & TDF_DETAILS)) 3836 fprintf (dump_file, "Predict doloop failure due to" 3837 " target specific checks.\n"); 3838 return false; 3839 } 3840 3841 /* Similar to doloop_optimize, check iteration description to know it's 3842 suitable or not. Keep it as simple as possible, feel free to extend it 3843 if you find any multiple exits cases matter. */ 3844 edge exit = single_dom_exit (loop); 3845 class tree_niter_desc *niter_desc; 3846 if (!exit || !(niter_desc = niter_for_exit (data, exit))) 3847 { 3848 if (dump_file && (dump_flags & TDF_DETAILS)) 3849 fprintf (dump_file, "Predict doloop failure due to" 3850 " unexpected niters.\n"); 3851 return false; 3852 } 3853 3854 /* Similar to doloop_optimize, check whether iteration count too small 3855 and not profitable. */ 3856 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop); 3857 if (est_niter == -1) 3858 est_niter = get_likely_max_loop_iterations_int (loop); 3859 if (est_niter >= 0 && est_niter < 3) 3860 { 3861 if (dump_file && (dump_flags & TDF_DETAILS)) 3862 fprintf (dump_file, 3863 "Predict doloop failure due to" 3864 " too few iterations (%u).\n", 3865 (unsigned int) est_niter); 3866 return false; 3867 } 3868 3869 return true; 3870 } 3871 3872 /* Determines cost of the computation of EXPR. */ 3873 3874 static unsigned 3875 computation_cost (tree expr, bool speed) 3876 { 3877 rtx_insn *seq; 3878 rtx rslt; 3879 tree type = TREE_TYPE (expr); 3880 unsigned cost; 3881 /* Avoid using hard regs in ways which may be unsupported. */ 3882 int regno = LAST_VIRTUAL_REGISTER + 1; 3883 struct cgraph_node *node = cgraph_node::get (current_function_decl); 3884 enum node_frequency real_frequency = node->frequency; 3885 3886 node->frequency = NODE_FREQUENCY_NORMAL; 3887 crtl->maybe_hot_insn_p = speed; 3888 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 3889 start_sequence (); 3890 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 3891 seq = get_insns (); 3892 end_sequence (); 3893 default_rtl_profile (); 3894 node->frequency = real_frequency; 3895 3896 cost = seq_cost (seq, speed); 3897 if (MEM_P (rslt)) 3898 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), 3899 TYPE_ADDR_SPACE (type), speed); 3900 else if (!REG_P (rslt)) 3901 cost += set_src_cost (rslt, TYPE_MODE (type), speed); 3902 3903 return cost; 3904 } 3905 3906 /* Returns variable containing the value of candidate CAND at statement AT. */ 3907 3908 static tree 3909 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt) 3910 { 3911 if (stmt_after_increment (loop, cand, stmt)) 3912 return cand->var_after; 3913 else 3914 return cand->var_before; 3915 } 3916 3917 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the 3918 same precision that is at least as wide as the precision of TYPE, stores 3919 BA to A and BB to B, and returns the type of BA. Otherwise, returns the 3920 type of A and B. */ 3921 3922 static tree 3923 determine_common_wider_type (tree *a, tree *b) 3924 { 3925 tree wider_type = NULL; 3926 tree suba, subb; 3927 tree atype = TREE_TYPE (*a); 3928 3929 if (CONVERT_EXPR_P (*a)) 3930 { 3931 suba = TREE_OPERAND (*a, 0); 3932 wider_type = TREE_TYPE (suba); 3933 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) 3934 return atype; 3935 } 3936 else 3937 return atype; 3938 3939 if (CONVERT_EXPR_P (*b)) 3940 { 3941 subb = TREE_OPERAND (*b, 0); 3942 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) 3943 return atype; 3944 } 3945 else 3946 return atype; 3947 3948 *a = suba; 3949 *b = subb; 3950 return wider_type; 3951 } 3952 3953 /* Determines the expression by that USE is expressed from induction variable 3954 CAND at statement AT in LOOP. The expression is stored in two parts in a 3955 decomposed form. The invariant part is stored in AFF_INV; while variant 3956 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's 3957 non-null. Returns false if USE cannot be expressed using CAND. */ 3958 3959 static bool 3960 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use, 3961 struct iv_cand *cand, class aff_tree *aff_inv, 3962 class aff_tree *aff_var, widest_int *prat = NULL) 3963 { 3964 tree ubase = use->iv->base, ustep = use->iv->step; 3965 tree cbase = cand->iv->base, cstep = cand->iv->step; 3966 tree common_type, uutype, var, cstep_common; 3967 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 3968 aff_tree aff_cbase; 3969 widest_int rat; 3970 3971 /* We must have a precision to express the values of use. */ 3972 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3973 return false; 3974 3975 var = var_at_stmt (loop, cand, at); 3976 uutype = unsigned_type_for (utype); 3977 3978 /* If the conversion is not noop, perform it. */ 3979 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3980 { 3981 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase) 3982 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep))) 3983 { 3984 tree inner_base, inner_step, inner_type; 3985 inner_base = TREE_OPERAND (cbase, 0); 3986 if (CONVERT_EXPR_P (cstep)) 3987 inner_step = TREE_OPERAND (cstep, 0); 3988 else 3989 inner_step = cstep; 3990 3991 inner_type = TREE_TYPE (inner_base); 3992 /* If candidate is added from a biv whose type is smaller than 3993 ctype, we know both candidate and the biv won't overflow. 3994 In this case, it's safe to skip the convertion in candidate. 3995 As an example, (unsigned short)((unsigned long)A) equals to 3996 (unsigned short)A, if A has a type no larger than short. */ 3997 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype)) 3998 { 3999 cbase = inner_base; 4000 cstep = inner_step; 4001 } 4002 } 4003 cbase = fold_convert (uutype, cbase); 4004 cstep = fold_convert (uutype, cstep); 4005 var = fold_convert (uutype, var); 4006 } 4007 4008 /* Ratio is 1 when computing the value of biv cand by itself. 4009 We can't rely on constant_multiple_of in this case because the 4010 use is created after the original biv is selected. The call 4011 could fail because of inconsistent fold behavior. See PR68021 4012 for more information. */ 4013 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 4014 { 4015 gcc_assert (is_gimple_assign (use->stmt)); 4016 gcc_assert (use->iv->ssa_name == cand->var_after); 4017 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 4018 rat = 1; 4019 } 4020 else if (!constant_multiple_of (ustep, cstep, &rat)) 4021 return false; 4022 4023 if (prat) 4024 *prat = rat; 4025 4026 /* In case both UBASE and CBASE are shortened to UUTYPE from some common 4027 type, we achieve better folding by computing their difference in this 4028 wider type, and cast the result to UUTYPE. We do not need to worry about 4029 overflows, as all the arithmetics will in the end be performed in UUTYPE 4030 anyway. */ 4031 common_type = determine_common_wider_type (&ubase, &cbase); 4032 4033 /* use = ubase - ratio * cbase + ratio * var. */ 4034 tree_to_aff_combination (ubase, common_type, aff_inv); 4035 tree_to_aff_combination (cbase, common_type, &aff_cbase); 4036 tree_to_aff_combination (var, uutype, aff_var); 4037 4038 /* We need to shift the value if we are after the increment. */ 4039 if (stmt_after_increment (loop, cand, at)) 4040 { 4041 aff_tree cstep_aff; 4042 4043 if (common_type != uutype) 4044 cstep_common = fold_convert (common_type, cstep); 4045 else 4046 cstep_common = cstep; 4047 4048 tree_to_aff_combination (cstep_common, common_type, &cstep_aff); 4049 aff_combination_add (&aff_cbase, &cstep_aff); 4050 } 4051 4052 aff_combination_scale (&aff_cbase, -rat); 4053 aff_combination_add (aff_inv, &aff_cbase); 4054 if (common_type != uutype) 4055 aff_combination_convert (aff_inv, uutype); 4056 4057 aff_combination_scale (aff_var, rat); 4058 return true; 4059 } 4060 4061 /* Determines the expression by that USE is expressed from induction variable 4062 CAND at statement AT in LOOP. The expression is stored in a decomposed 4063 form into AFF. Returns false if USE cannot be expressed using CAND. */ 4064 4065 static bool 4066 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use, 4067 struct iv_cand *cand, class aff_tree *aff) 4068 { 4069 aff_tree aff_var; 4070 4071 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var)) 4072 return false; 4073 4074 aff_combination_add (aff, &aff_var); 4075 return true; 4076 } 4077 4078 /* Return the type of USE. */ 4079 4080 static tree 4081 get_use_type (struct iv_use *use) 4082 { 4083 tree base_type = TREE_TYPE (use->iv->base); 4084 tree type; 4085 4086 if (use->type == USE_REF_ADDRESS) 4087 { 4088 /* The base_type may be a void pointer. Create a pointer type based on 4089 the mem_ref instead. */ 4090 type = build_pointer_type (TREE_TYPE (*use->op_p)); 4091 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type)) 4092 == TYPE_ADDR_SPACE (TREE_TYPE (base_type))); 4093 } 4094 else 4095 type = base_type; 4096 4097 return type; 4098 } 4099 4100 /* Determines the expression by that USE is expressed from induction variable 4101 CAND at statement AT in LOOP. The computation is unshared. */ 4102 4103 static tree 4104 get_computation_at (class loop *loop, gimple *at, 4105 struct iv_use *use, struct iv_cand *cand) 4106 { 4107 aff_tree aff; 4108 tree type = get_use_type (use); 4109 4110 if (!get_computation_aff (loop, at, use, cand, &aff)) 4111 return NULL_TREE; 4112 unshare_aff_combination (&aff); 4113 return fold_convert (type, aff_combination_to_tree (&aff)); 4114 } 4115 4116 /* Like get_computation_at, but try harder, even if the computation 4117 is more expensive. Intended for debug stmts. */ 4118 4119 static tree 4120 get_debug_computation_at (class loop *loop, gimple *at, 4121 struct iv_use *use, struct iv_cand *cand) 4122 { 4123 if (tree ret = get_computation_at (loop, at, use, cand)) 4124 return ret; 4125 4126 tree ubase = use->iv->base, ustep = use->iv->step; 4127 tree cbase = cand->iv->base, cstep = cand->iv->step; 4128 tree var; 4129 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 4130 widest_int rat; 4131 4132 /* We must have a precision to express the values of use. */ 4133 if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype)) 4134 return NULL_TREE; 4135 4136 /* Try to handle the case that get_computation_at doesn't, 4137 try to express 4138 use = ubase + (var - cbase) / ratio. */ 4139 if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep), 4140 &rat)) 4141 return NULL_TREE; 4142 4143 bool neg_p = false; 4144 if (wi::neg_p (rat)) 4145 { 4146 if (TYPE_UNSIGNED (ctype)) 4147 return NULL_TREE; 4148 neg_p = true; 4149 rat = wi::neg (rat); 4150 } 4151 4152 /* If both IVs can wrap around and CAND doesn't have a power of two step, 4153 it is unsafe. Consider uint16_t CAND with step 9, when wrapping around, 4154 the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say 4155 uint8_t with step 3, those values divided by 3 cast to uint8_t will be 4156 ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */ 4157 if (!use->iv->no_overflow 4158 && !cand->iv->no_overflow 4159 && !integer_pow2p (cstep)) 4160 return NULL_TREE; 4161 4162 int bits = wi::exact_log2 (rat); 4163 if (bits == -1) 4164 bits = wi::floor_log2 (rat) + 1; 4165 if (!cand->iv->no_overflow 4166 && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype)) 4167 return NULL_TREE; 4168 4169 var = var_at_stmt (loop, cand, at); 4170 4171 if (POINTER_TYPE_P (ctype)) 4172 { 4173 ctype = unsigned_type_for (ctype); 4174 cbase = fold_convert (ctype, cbase); 4175 cstep = fold_convert (ctype, cstep); 4176 var = fold_convert (ctype, var); 4177 } 4178 4179 if (stmt_after_increment (loop, cand, at)) 4180 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, 4181 unshare_expr (cstep)); 4182 4183 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase); 4184 var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var, 4185 wide_int_to_tree (TREE_TYPE (var), rat)); 4186 if (POINTER_TYPE_P (utype)) 4187 { 4188 var = fold_convert (sizetype, var); 4189 if (neg_p) 4190 var = fold_build1 (NEGATE_EXPR, sizetype, var); 4191 var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var); 4192 } 4193 else 4194 { 4195 var = fold_convert (utype, var); 4196 var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype, 4197 ubase, var); 4198 } 4199 return var; 4200 } 4201 4202 /* Adjust the cost COST for being in loop setup rather than loop body. 4203 If we're optimizing for space, the loop setup overhead is constant; 4204 if we're optimizing for speed, amortize it over the per-iteration cost. 4205 If ROUND_UP_P is true, the result is round up rather than to zero when 4206 optimizing for speed. */ 4207 static int64_t 4208 adjust_setup_cost (struct ivopts_data *data, int64_t cost, 4209 bool round_up_p = false) 4210 { 4211 if (cost == INFTY) 4212 return cost; 4213 else if (optimize_loop_for_speed_p (data->current_loop)) 4214 { 4215 int64_t niters = (int64_t) avg_loop_niter (data->current_loop); 4216 return (cost + (round_up_p ? niters - 1 : 0)) / niters; 4217 } 4218 else 4219 return cost; 4220 } 4221 4222 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the 4223 EXPR operand holding the shift. COST0 and COST1 are the costs for 4224 calculating the operands of EXPR. Returns true if successful, and returns 4225 the cost in COST. */ 4226 4227 static bool 4228 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0, 4229 comp_cost cost1, tree mult, bool speed, comp_cost *cost) 4230 { 4231 comp_cost res; 4232 tree op1 = TREE_OPERAND (expr, 1); 4233 tree cst = TREE_OPERAND (mult, 1); 4234 tree multop = TREE_OPERAND (mult, 0); 4235 int m = exact_log2 (int_cst_value (cst)); 4236 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 4237 int as_cost, sa_cost; 4238 bool mult_in_op1; 4239 4240 if (!(m >= 0 && m < maxm)) 4241 return false; 4242 4243 STRIP_NOPS (op1); 4244 mult_in_op1 = operand_equal_p (op1, mult, 0); 4245 4246 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 4247 4248 /* If the target has a cheap shift-and-add or shift-and-sub instruction, 4249 use that in preference to a shift insn followed by an add insn. */ 4250 sa_cost = (TREE_CODE (expr) != MINUS_EXPR 4251 ? shiftadd_cost (speed, mode, m) 4252 : (mult_in_op1 4253 ? shiftsub1_cost (speed, mode, m) 4254 : shiftsub0_cost (speed, mode, m))); 4255 4256 res = comp_cost (MIN (as_cost, sa_cost), 0); 4257 res += (mult_in_op1 ? cost0 : cost1); 4258 4259 STRIP_NOPS (multop); 4260 if (!is_gimple_val (multop)) 4261 res += force_expr_to_var_cost (multop, speed); 4262 4263 *cost = res; 4264 return true; 4265 } 4266 4267 /* Estimates cost of forcing expression EXPR into a variable. */ 4268 4269 static comp_cost 4270 force_expr_to_var_cost (tree expr, bool speed) 4271 { 4272 static bool costs_initialized = false; 4273 static unsigned integer_cost [2]; 4274 static unsigned symbol_cost [2]; 4275 static unsigned address_cost [2]; 4276 tree op0, op1; 4277 comp_cost cost0, cost1, cost; 4278 machine_mode mode; 4279 scalar_int_mode int_mode; 4280 4281 if (!costs_initialized) 4282 { 4283 tree type = build_pointer_type (integer_type_node); 4284 tree var, addr; 4285 rtx x; 4286 int i; 4287 4288 var = create_tmp_var_raw (integer_type_node, "test_var"); 4289 TREE_STATIC (var) = 1; 4290 x = produce_memory_decl_rtl (var, NULL); 4291 SET_DECL_RTL (var, x); 4292 4293 addr = build1 (ADDR_EXPR, type, var); 4294 4295 4296 for (i = 0; i < 2; i++) 4297 { 4298 integer_cost[i] = computation_cost (build_int_cst (integer_type_node, 4299 2000), i); 4300 4301 symbol_cost[i] = computation_cost (addr, i) + 1; 4302 4303 address_cost[i] 4304 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; 4305 if (dump_file && (dump_flags & TDF_DETAILS)) 4306 { 4307 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); 4308 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); 4309 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); 4310 fprintf (dump_file, " address %d\n", (int) address_cost[i]); 4311 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); 4312 fprintf (dump_file, "\n"); 4313 } 4314 } 4315 4316 costs_initialized = true; 4317 } 4318 4319 STRIP_NOPS (expr); 4320 4321 if (SSA_VAR_P (expr)) 4322 return no_cost; 4323 4324 if (is_gimple_min_invariant (expr)) 4325 { 4326 if (poly_int_tree_p (expr)) 4327 return comp_cost (integer_cost [speed], 0); 4328 4329 if (TREE_CODE (expr) == ADDR_EXPR) 4330 { 4331 tree obj = TREE_OPERAND (expr, 0); 4332 4333 if (VAR_P (obj) 4334 || TREE_CODE (obj) == PARM_DECL 4335 || TREE_CODE (obj) == RESULT_DECL) 4336 return comp_cost (symbol_cost [speed], 0); 4337 } 4338 4339 return comp_cost (address_cost [speed], 0); 4340 } 4341 4342 switch (TREE_CODE (expr)) 4343 { 4344 case POINTER_PLUS_EXPR: 4345 case PLUS_EXPR: 4346 case MINUS_EXPR: 4347 case MULT_EXPR: 4348 case TRUNC_DIV_EXPR: 4349 case BIT_AND_EXPR: 4350 case BIT_IOR_EXPR: 4351 case LSHIFT_EXPR: 4352 case RSHIFT_EXPR: 4353 op0 = TREE_OPERAND (expr, 0); 4354 op1 = TREE_OPERAND (expr, 1); 4355 STRIP_NOPS (op0); 4356 STRIP_NOPS (op1); 4357 break; 4358 4359 CASE_CONVERT: 4360 case NEGATE_EXPR: 4361 case BIT_NOT_EXPR: 4362 op0 = TREE_OPERAND (expr, 0); 4363 STRIP_NOPS (op0); 4364 op1 = NULL_TREE; 4365 break; 4366 /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we 4367 introduce COND_EXPR for IV base, need to support better cost estimation 4368 for this COND_EXPR and tcc_comparison. */ 4369 case COND_EXPR: 4370 op0 = TREE_OPERAND (expr, 1); 4371 STRIP_NOPS (op0); 4372 op1 = TREE_OPERAND (expr, 2); 4373 STRIP_NOPS (op1); 4374 break; 4375 case LT_EXPR: 4376 case LE_EXPR: 4377 case GT_EXPR: 4378 case GE_EXPR: 4379 case EQ_EXPR: 4380 case NE_EXPR: 4381 case UNORDERED_EXPR: 4382 case ORDERED_EXPR: 4383 case UNLT_EXPR: 4384 case UNLE_EXPR: 4385 case UNGT_EXPR: 4386 case UNGE_EXPR: 4387 case UNEQ_EXPR: 4388 case LTGT_EXPR: 4389 case MAX_EXPR: 4390 case MIN_EXPR: 4391 op0 = TREE_OPERAND (expr, 0); 4392 STRIP_NOPS (op0); 4393 op1 = TREE_OPERAND (expr, 1); 4394 STRIP_NOPS (op1); 4395 break; 4396 4397 default: 4398 /* Just an arbitrary value, FIXME. */ 4399 return comp_cost (target_spill_cost[speed], 0); 4400 } 4401 4402 if (op0 == NULL_TREE 4403 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0)) 4404 cost0 = no_cost; 4405 else 4406 cost0 = force_expr_to_var_cost (op0, speed); 4407 4408 if (op1 == NULL_TREE 4409 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1)) 4410 cost1 = no_cost; 4411 else 4412 cost1 = force_expr_to_var_cost (op1, speed); 4413 4414 mode = TYPE_MODE (TREE_TYPE (expr)); 4415 switch (TREE_CODE (expr)) 4416 { 4417 case POINTER_PLUS_EXPR: 4418 case PLUS_EXPR: 4419 case MINUS_EXPR: 4420 case NEGATE_EXPR: 4421 cost = comp_cost (add_cost (speed, mode), 0); 4422 if (TREE_CODE (expr) != NEGATE_EXPR) 4423 { 4424 tree mult = NULL_TREE; 4425 comp_cost sa_cost; 4426 if (TREE_CODE (op1) == MULT_EXPR) 4427 mult = op1; 4428 else if (TREE_CODE (op0) == MULT_EXPR) 4429 mult = op0; 4430 4431 if (mult != NULL_TREE 4432 && is_a <scalar_int_mode> (mode, &int_mode) 4433 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) 4434 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult, 4435 speed, &sa_cost)) 4436 return sa_cost; 4437 } 4438 break; 4439 4440 CASE_CONVERT: 4441 { 4442 tree inner_mode, outer_mode; 4443 outer_mode = TREE_TYPE (expr); 4444 inner_mode = TREE_TYPE (op0); 4445 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode), 4446 TYPE_MODE (inner_mode), speed), 0); 4447 } 4448 break; 4449 4450 case MULT_EXPR: 4451 if (cst_and_fits_in_hwi (op0)) 4452 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0), 4453 mode, speed), 0); 4454 else if (cst_and_fits_in_hwi (op1)) 4455 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1), 4456 mode, speed), 0); 4457 else 4458 return comp_cost (target_spill_cost [speed], 0); 4459 break; 4460 4461 case TRUNC_DIV_EXPR: 4462 /* Division by power of two is usually cheap, so we allow it. Forbid 4463 anything else. */ 4464 if (integer_pow2p (TREE_OPERAND (expr, 1))) 4465 cost = comp_cost (add_cost (speed, mode), 0); 4466 else 4467 cost = comp_cost (target_spill_cost[speed], 0); 4468 break; 4469 4470 case BIT_AND_EXPR: 4471 case BIT_IOR_EXPR: 4472 case BIT_NOT_EXPR: 4473 case LSHIFT_EXPR: 4474 case RSHIFT_EXPR: 4475 cost = comp_cost (add_cost (speed, mode), 0); 4476 break; 4477 case COND_EXPR: 4478 op0 = TREE_OPERAND (expr, 0); 4479 STRIP_NOPS (op0); 4480 if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME 4481 || CONSTANT_CLASS_P (op0)) 4482 cost = no_cost; 4483 else 4484 cost = force_expr_to_var_cost (op0, speed); 4485 break; 4486 case LT_EXPR: 4487 case LE_EXPR: 4488 case GT_EXPR: 4489 case GE_EXPR: 4490 case EQ_EXPR: 4491 case NE_EXPR: 4492 case UNORDERED_EXPR: 4493 case ORDERED_EXPR: 4494 case UNLT_EXPR: 4495 case UNLE_EXPR: 4496 case UNGT_EXPR: 4497 case UNGE_EXPR: 4498 case UNEQ_EXPR: 4499 case LTGT_EXPR: 4500 case MAX_EXPR: 4501 case MIN_EXPR: 4502 /* Simply use add cost for now, FIXME if there is some more accurate cost 4503 evaluation way. */ 4504 cost = comp_cost (add_cost (speed, mode), 0); 4505 break; 4506 4507 default: 4508 gcc_unreachable (); 4509 } 4510 4511 cost += cost0; 4512 cost += cost1; 4513 return cost; 4514 } 4515 4516 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the 4517 invariants the computation depends on. */ 4518 4519 static comp_cost 4520 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars) 4521 { 4522 if (!expr) 4523 return no_cost; 4524 4525 find_inv_vars (data, &expr, inv_vars); 4526 return force_expr_to_var_cost (expr, data->speed); 4527 } 4528 4529 /* Returns cost of auto-modifying address expression in shape base + offset. 4530 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the 4531 address expression. The address expression has ADDR_MODE in addr space 4532 AS. The memory access has MEM_MODE. SPEED means we are optimizing for 4533 speed or size. */ 4534 4535 enum ainc_type 4536 { 4537 AINC_PRE_INC, /* Pre increment. */ 4538 AINC_PRE_DEC, /* Pre decrement. */ 4539 AINC_POST_INC, /* Post increment. */ 4540 AINC_POST_DEC, /* Post decrement. */ 4541 AINC_NONE /* Also the number of auto increment types. */ 4542 }; 4543 4544 struct ainc_cost_data 4545 { 4546 int64_t costs[AINC_NONE]; 4547 }; 4548 4549 static comp_cost 4550 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset, 4551 machine_mode addr_mode, machine_mode mem_mode, 4552 addr_space_t as, bool speed) 4553 { 4554 if (!USE_LOAD_PRE_DECREMENT (mem_mode) 4555 && !USE_STORE_PRE_DECREMENT (mem_mode) 4556 && !USE_LOAD_POST_DECREMENT (mem_mode) 4557 && !USE_STORE_POST_DECREMENT (mem_mode) 4558 && !USE_LOAD_PRE_INCREMENT (mem_mode) 4559 && !USE_STORE_PRE_INCREMENT (mem_mode) 4560 && !USE_LOAD_POST_INCREMENT (mem_mode) 4561 && !USE_STORE_POST_INCREMENT (mem_mode)) 4562 return infinite_cost; 4563 4564 static vec<ainc_cost_data *> ainc_cost_data_list; 4565 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; 4566 if (idx >= ainc_cost_data_list.length ()) 4567 { 4568 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE; 4569 4570 gcc_assert (nsize > idx); 4571 ainc_cost_data_list.safe_grow_cleared (nsize); 4572 } 4573 4574 ainc_cost_data *data = ainc_cost_data_list[idx]; 4575 if (data == NULL) 4576 { 4577 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); 4578 4579 data = (ainc_cost_data *) xcalloc (1, sizeof (*data)); 4580 data->costs[AINC_PRE_DEC] = INFTY; 4581 data->costs[AINC_POST_DEC] = INFTY; 4582 data->costs[AINC_PRE_INC] = INFTY; 4583 data->costs[AINC_POST_INC] = INFTY; 4584 if (USE_LOAD_PRE_DECREMENT (mem_mode) 4585 || USE_STORE_PRE_DECREMENT (mem_mode)) 4586 { 4587 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg); 4588 4589 if (memory_address_addr_space_p (mem_mode, addr, as)) 4590 data->costs[AINC_PRE_DEC] 4591 = address_cost (addr, mem_mode, as, speed); 4592 } 4593 if (USE_LOAD_POST_DECREMENT (mem_mode) 4594 || USE_STORE_POST_DECREMENT (mem_mode)) 4595 { 4596 rtx addr = gen_rtx_POST_DEC (addr_mode, reg); 4597 4598 if (memory_address_addr_space_p (mem_mode, addr, as)) 4599 data->costs[AINC_POST_DEC] 4600 = address_cost (addr, mem_mode, as, speed); 4601 } 4602 if (USE_LOAD_PRE_INCREMENT (mem_mode) 4603 || USE_STORE_PRE_INCREMENT (mem_mode)) 4604 { 4605 rtx addr = gen_rtx_PRE_INC (addr_mode, reg); 4606 4607 if (memory_address_addr_space_p (mem_mode, addr, as)) 4608 data->costs[AINC_PRE_INC] 4609 = address_cost (addr, mem_mode, as, speed); 4610 } 4611 if (USE_LOAD_POST_INCREMENT (mem_mode) 4612 || USE_STORE_POST_INCREMENT (mem_mode)) 4613 { 4614 rtx addr = gen_rtx_POST_INC (addr_mode, reg); 4615 4616 if (memory_address_addr_space_p (mem_mode, addr, as)) 4617 data->costs[AINC_POST_INC] 4618 = address_cost (addr, mem_mode, as, speed); 4619 } 4620 ainc_cost_data_list[idx] = data; 4621 } 4622 4623 poly_int64 msize = GET_MODE_SIZE (mem_mode); 4624 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step)) 4625 return comp_cost (data->costs[AINC_POST_INC], 0); 4626 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step)) 4627 return comp_cost (data->costs[AINC_POST_DEC], 0); 4628 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step)) 4629 return comp_cost (data->costs[AINC_PRE_INC], 0); 4630 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step)) 4631 return comp_cost (data->costs[AINC_PRE_DEC], 0); 4632 4633 return infinite_cost; 4634 } 4635 4636 /* Return cost of computing USE's address expression by using CAND. 4637 AFF_INV and AFF_VAR represent invariant and variant parts of the 4638 address expression, respectively. If AFF_INV is simple, store 4639 the loop invariant variables which are depended by it in INV_VARS; 4640 if AFF_INV is complicated, handle it as a new invariant expression 4641 and record it in INV_EXPR. RATIO indicates multiple times between 4642 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean 4643 value to it indicating if this is an auto-increment address. */ 4644 4645 static comp_cost 4646 get_address_cost (struct ivopts_data *data, struct iv_use *use, 4647 struct iv_cand *cand, aff_tree *aff_inv, 4648 aff_tree *aff_var, HOST_WIDE_INT ratio, 4649 bitmap *inv_vars, iv_inv_expr_ent **inv_expr, 4650 bool *can_autoinc, bool speed) 4651 { 4652 rtx addr; 4653 bool simple_inv = true; 4654 tree comp_inv = NULL_TREE, type = aff_var->type; 4655 comp_cost var_cost = no_cost, cost = no_cost; 4656 struct mem_address parts = {NULL_TREE, integer_one_node, 4657 NULL_TREE, NULL_TREE, NULL_TREE}; 4658 machine_mode addr_mode = TYPE_MODE (type); 4659 machine_mode mem_mode = TYPE_MODE (use->mem_type); 4660 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); 4661 /* Only true if ratio != 1. */ 4662 bool ok_with_ratio_p = false; 4663 bool ok_without_ratio_p = false; 4664 4665 if (!aff_combination_const_p (aff_inv)) 4666 { 4667 parts.index = integer_one_node; 4668 /* Addressing mode "base + index". */ 4669 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts); 4670 if (ratio != 1) 4671 { 4672 parts.step = wide_int_to_tree (type, ratio); 4673 /* Addressing mode "base + index << scale". */ 4674 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts); 4675 if (!ok_with_ratio_p) 4676 parts.step = NULL_TREE; 4677 } 4678 if (ok_with_ratio_p || ok_without_ratio_p) 4679 { 4680 if (maybe_ne (aff_inv->offset, 0)) 4681 { 4682 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset); 4683 /* Addressing mode "base + index [<< scale] + offset". */ 4684 if (!valid_mem_ref_p (mem_mode, as, &parts)) 4685 parts.offset = NULL_TREE; 4686 else 4687 aff_inv->offset = 0; 4688 } 4689 4690 move_fixed_address_to_symbol (&parts, aff_inv); 4691 /* Base is fixed address and is moved to symbol part. */ 4692 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv)) 4693 parts.base = NULL_TREE; 4694 4695 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */ 4696 if (parts.symbol != NULL_TREE 4697 && !valid_mem_ref_p (mem_mode, as, &parts)) 4698 { 4699 aff_combination_add_elt (aff_inv, parts.symbol, 1); 4700 parts.symbol = NULL_TREE; 4701 /* Reset SIMPLE_INV since symbol address needs to be computed 4702 outside of address expression in this case. */ 4703 simple_inv = false; 4704 /* Symbol part is moved back to base part, it can't be NULL. */ 4705 parts.base = integer_one_node; 4706 } 4707 } 4708 else 4709 parts.index = NULL_TREE; 4710 } 4711 else 4712 { 4713 poly_int64 ainc_step; 4714 if (can_autoinc 4715 && ratio == 1 4716 && ptrdiff_tree_p (cand->iv->step, &ainc_step)) 4717 { 4718 poly_int64 ainc_offset = (aff_inv->offset).force_shwi (); 4719 4720 if (stmt_after_increment (data->current_loop, cand, use->stmt)) 4721 ainc_offset += ainc_step; 4722 cost = get_address_cost_ainc (ainc_step, ainc_offset, 4723 addr_mode, mem_mode, as, speed); 4724 if (!cost.infinite_cost_p ()) 4725 { 4726 *can_autoinc = true; 4727 return cost; 4728 } 4729 cost = no_cost; 4730 } 4731 if (!aff_combination_zero_p (aff_inv)) 4732 { 4733 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset); 4734 /* Addressing mode "base + offset". */ 4735 if (!valid_mem_ref_p (mem_mode, as, &parts)) 4736 parts.offset = NULL_TREE; 4737 else 4738 aff_inv->offset = 0; 4739 } 4740 } 4741 4742 if (simple_inv) 4743 simple_inv = (aff_inv == NULL 4744 || aff_combination_const_p (aff_inv) 4745 || aff_combination_singleton_var_p (aff_inv)); 4746 if (!aff_combination_zero_p (aff_inv)) 4747 comp_inv = aff_combination_to_tree (aff_inv); 4748 if (comp_inv != NULL_TREE) 4749 cost = force_var_cost (data, comp_inv, inv_vars); 4750 if (ratio != 1 && parts.step == NULL_TREE) 4751 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed); 4752 if (comp_inv != NULL_TREE && parts.index == NULL_TREE) 4753 var_cost += add_cost (speed, addr_mode); 4754 4755 if (comp_inv && inv_expr && !simple_inv) 4756 { 4757 *inv_expr = get_loop_invariant_expr (data, comp_inv); 4758 /* Clear depends on. */ 4759 if (*inv_expr != NULL && inv_vars && *inv_vars) 4760 bitmap_clear (*inv_vars); 4761 4762 /* Cost of small invariant expression adjusted against loop niters 4763 is usually zero, which makes it difficult to be differentiated 4764 from candidate based on loop invariant variables. Secondly, the 4765 generated invariant expression may not be hoisted out of loop by 4766 following pass. We penalize the cost by rounding up in order to 4767 neutralize such effects. */ 4768 cost.cost = adjust_setup_cost (data, cost.cost, true); 4769 cost.scratch = cost.cost; 4770 } 4771 4772 cost += var_cost; 4773 addr = addr_for_mem_ref (&parts, as, false); 4774 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as)); 4775 cost += address_cost (addr, mem_mode, as, speed); 4776 4777 if (parts.symbol != NULL_TREE) 4778 cost.complexity += 1; 4779 /* Don't increase the complexity of adding a scaled index if it's 4780 the only kind of index that the target allows. */ 4781 if (parts.step != NULL_TREE && ok_without_ratio_p) 4782 cost.complexity += 1; 4783 if (parts.base != NULL_TREE && parts.index != NULL_TREE) 4784 cost.complexity += 1; 4785 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset)) 4786 cost.complexity += 1; 4787 4788 return cost; 4789 } 4790 4791 /* Scale (multiply) the computed COST (except scratch part that should be 4792 hoisted out a loop) by header->frequency / AT->frequency, which makes 4793 expected cost more accurate. */ 4794 4795 static comp_cost 4796 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) 4797 { 4798 if (data->speed 4799 && data->current_loop->header->count.to_frequency (cfun) > 0) 4800 { 4801 basic_block bb = gimple_bb (at); 4802 gcc_assert (cost.scratch <= cost.cost); 4803 int scale_factor = (int)(intptr_t) bb->aux; 4804 if (scale_factor == 1) 4805 return cost; 4806 4807 int64_t scaled_cost 4808 = cost.scratch + (cost.cost - cost.scratch) * scale_factor; 4809 4810 if (dump_file && (dump_flags & TDF_DETAILS)) 4811 fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: " 4812 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n", 4813 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost); 4814 4815 cost.cost = scaled_cost; 4816 } 4817 4818 return cost; 4819 } 4820 4821 /* Determines the cost of the computation by that USE is expressed 4822 from induction variable CAND. If ADDRESS_P is true, we just need 4823 to create an address from it, otherwise we want to get it into 4824 register. A set of invariants we depend on is stored in INV_VARS. 4825 If CAN_AUTOINC is nonnull, use it to record whether autoinc 4826 addressing is likely. If INV_EXPR is nonnull, record invariant 4827 expr entry in it. */ 4828 4829 static comp_cost 4830 get_computation_cost (struct ivopts_data *data, struct iv_use *use, 4831 struct iv_cand *cand, bool address_p, bitmap *inv_vars, 4832 bool *can_autoinc, iv_inv_expr_ent **inv_expr) 4833 { 4834 gimple *at = use->stmt; 4835 tree ubase = use->iv->base, cbase = cand->iv->base; 4836 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 4837 tree comp_inv = NULL_TREE; 4838 HOST_WIDE_INT ratio, aratio; 4839 comp_cost cost; 4840 widest_int rat; 4841 aff_tree aff_inv, aff_var; 4842 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 4843 4844 if (inv_vars) 4845 *inv_vars = NULL; 4846 if (can_autoinc) 4847 *can_autoinc = false; 4848 if (inv_expr) 4849 *inv_expr = NULL; 4850 4851 /* Check if we have enough precision to express the values of use. */ 4852 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 4853 return infinite_cost; 4854 4855 if (address_p 4856 || (use->iv->base_object 4857 && cand->iv->base_object 4858 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) 4859 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object)))) 4860 { 4861 /* Do not try to express address of an object with computation based 4862 on address of a different object. This may cause problems in rtl 4863 level alias analysis (that does not expect this to be happening, 4864 as this is illegal in C), and would be unlikely to be useful 4865 anyway. */ 4866 if (use->iv->base_object 4867 && cand->iv->base_object 4868 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 4869 return infinite_cost; 4870 } 4871 4872 if (!get_computation_aff_1 (data->current_loop, at, use, 4873 cand, &aff_inv, &aff_var, &rat) 4874 || !wi::fits_shwi_p (rat)) 4875 return infinite_cost; 4876 4877 ratio = rat.to_shwi (); 4878 if (address_p) 4879 { 4880 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio, 4881 inv_vars, inv_expr, can_autoinc, speed); 4882 cost = get_scaled_computation_cost_at (data, at, cost); 4883 /* For doloop IV cand, add on the extra cost. */ 4884 cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0; 4885 return cost; 4886 } 4887 4888 bool simple_inv = (aff_combination_const_p (&aff_inv) 4889 || aff_combination_singleton_var_p (&aff_inv)); 4890 tree signed_type = signed_type_for (aff_combination_type (&aff_inv)); 4891 aff_combination_convert (&aff_inv, signed_type); 4892 if (!aff_combination_zero_p (&aff_inv)) 4893 comp_inv = aff_combination_to_tree (&aff_inv); 4894 4895 cost = force_var_cost (data, comp_inv, inv_vars); 4896 if (comp_inv && inv_expr && !simple_inv) 4897 { 4898 *inv_expr = get_loop_invariant_expr (data, comp_inv); 4899 /* Clear depends on. */ 4900 if (*inv_expr != NULL && inv_vars && *inv_vars) 4901 bitmap_clear (*inv_vars); 4902 4903 cost.cost = adjust_setup_cost (data, cost.cost); 4904 /* Record setup cost in scratch field. */ 4905 cost.scratch = cost.cost; 4906 } 4907 /* Cost of constant integer can be covered when adding invariant part to 4908 variant part. */ 4909 else if (comp_inv && CONSTANT_CLASS_P (comp_inv)) 4910 cost = no_cost; 4911 4912 /* Need type narrowing to represent use with cand. */ 4913 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 4914 { 4915 machine_mode outer_mode = TYPE_MODE (utype); 4916 machine_mode inner_mode = TYPE_MODE (ctype); 4917 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0); 4918 } 4919 4920 /* Turn a + i * (-c) into a - i * c. */ 4921 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv)) 4922 aratio = -ratio; 4923 else 4924 aratio = ratio; 4925 4926 if (ratio != 1) 4927 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed); 4928 4929 /* TODO: We may also need to check if we can compute a + i * 4 in one 4930 instruction. */ 4931 /* Need to add up the invariant and variant parts. */ 4932 if (comp_inv && !integer_zerop (comp_inv)) 4933 cost += add_cost (speed, TYPE_MODE (utype)); 4934 4935 cost = get_scaled_computation_cost_at (data, at, cost); 4936 4937 /* For doloop IV cand, add on the extra cost. */ 4938 if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR) 4939 cost += targetm.doloop_cost_for_generic; 4940 4941 return cost; 4942 } 4943 4944 /* Determines cost of computing the use in GROUP with CAND in a generic 4945 expression. */ 4946 4947 static bool 4948 determine_group_iv_cost_generic (struct ivopts_data *data, 4949 struct iv_group *group, struct iv_cand *cand) 4950 { 4951 comp_cost cost; 4952 iv_inv_expr_ent *inv_expr = NULL; 4953 bitmap inv_vars = NULL, inv_exprs = NULL; 4954 struct iv_use *use = group->vuses[0]; 4955 4956 /* The simple case first -- if we need to express value of the preserved 4957 original biv, the cost is 0. This also prevents us from counting the 4958 cost of increment twice -- once at this use and once in the cost of 4959 the candidate. */ 4960 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 4961 cost = no_cost; 4962 else 4963 cost = get_computation_cost (data, use, cand, false, 4964 &inv_vars, NULL, &inv_expr); 4965 4966 if (inv_expr) 4967 { 4968 inv_exprs = BITMAP_ALLOC (NULL); 4969 bitmap_set_bit (inv_exprs, inv_expr->id); 4970 } 4971 set_group_iv_cost (data, group, cand, cost, inv_vars, 4972 NULL_TREE, ERROR_MARK, inv_exprs); 4973 return !cost.infinite_cost_p (); 4974 } 4975 4976 /* Determines cost of computing uses in GROUP with CAND in addresses. */ 4977 4978 static bool 4979 determine_group_iv_cost_address (struct ivopts_data *data, 4980 struct iv_group *group, struct iv_cand *cand) 4981 { 4982 unsigned i; 4983 bitmap inv_vars = NULL, inv_exprs = NULL; 4984 bool can_autoinc; 4985 iv_inv_expr_ent *inv_expr = NULL; 4986 struct iv_use *use = group->vuses[0]; 4987 comp_cost sum_cost = no_cost, cost; 4988 4989 cost = get_computation_cost (data, use, cand, true, 4990 &inv_vars, &can_autoinc, &inv_expr); 4991 4992 if (inv_expr) 4993 { 4994 inv_exprs = BITMAP_ALLOC (NULL); 4995 bitmap_set_bit (inv_exprs, inv_expr->id); 4996 } 4997 sum_cost = cost; 4998 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use) 4999 { 5000 if (can_autoinc) 5001 sum_cost -= cand->cost_step; 5002 /* If we generated the candidate solely for exploiting autoincrement 5003 opportunities, and it turns out it can't be used, set the cost to 5004 infinity to make sure we ignore it. */ 5005 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) 5006 sum_cost = infinite_cost; 5007 } 5008 5009 /* Uses in a group can share setup code, so only add setup cost once. */ 5010 cost -= cost.scratch; 5011 /* Compute and add costs for rest uses of this group. */ 5012 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++) 5013 { 5014 struct iv_use *next = group->vuses[i]; 5015 5016 /* TODO: We could skip computing cost for sub iv_use when it has the 5017 same cost as the first iv_use, but the cost really depends on the 5018 offset and where the iv_use is. */ 5019 cost = get_computation_cost (data, next, cand, true, 5020 NULL, &can_autoinc, &inv_expr); 5021 if (inv_expr) 5022 { 5023 if (!inv_exprs) 5024 inv_exprs = BITMAP_ALLOC (NULL); 5025 5026 bitmap_set_bit (inv_exprs, inv_expr->id); 5027 } 5028 sum_cost += cost; 5029 } 5030 set_group_iv_cost (data, group, cand, sum_cost, inv_vars, 5031 NULL_TREE, ERROR_MARK, inv_exprs); 5032 5033 return !sum_cost.infinite_cost_p (); 5034 } 5035 5036 /* Computes value of candidate CAND at position AT in iteration NITER, and 5037 stores it to VAL. */ 5038 5039 static void 5040 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter, 5041 aff_tree *val) 5042 { 5043 aff_tree step, delta, nit; 5044 struct iv *iv = cand->iv; 5045 tree type = TREE_TYPE (iv->base); 5046 tree steptype; 5047 if (POINTER_TYPE_P (type)) 5048 steptype = sizetype; 5049 else 5050 steptype = unsigned_type_for (type); 5051 5052 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step); 5053 aff_combination_convert (&step, steptype); 5054 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit); 5055 aff_combination_convert (&nit, steptype); 5056 aff_combination_mult (&nit, &step, &delta); 5057 if (stmt_after_increment (loop, cand, at)) 5058 aff_combination_add (&delta, &step); 5059 5060 tree_to_aff_combination (iv->base, type, val); 5061 if (!POINTER_TYPE_P (type)) 5062 aff_combination_convert (val, steptype); 5063 aff_combination_add (val, &delta); 5064 } 5065 5066 /* Returns period of induction variable iv. */ 5067 5068 static tree 5069 iv_period (struct iv *iv) 5070 { 5071 tree step = iv->step, period, type; 5072 tree pow2div; 5073 5074 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 5075 5076 type = unsigned_type_for (TREE_TYPE (step)); 5077 /* Period of the iv is lcm (step, type_range)/step -1, 5078 i.e., N*type_range/step - 1. Since type range is power 5079 of two, N == (step >> num_of_ending_zeros_binary (step), 5080 so the final result is 5081 5082 (type_range >> num_of_ending_zeros_binary (step)) - 1 5083 5084 */ 5085 pow2div = num_ending_zeros (step); 5086 5087 period = build_low_bits_mask (type, 5088 (TYPE_PRECISION (type) 5089 - tree_to_uhwi (pow2div))); 5090 5091 return period; 5092 } 5093 5094 /* Returns the comparison operator used when eliminating the iv USE. */ 5095 5096 static enum tree_code 5097 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 5098 { 5099 class loop *loop = data->current_loop; 5100 basic_block ex_bb; 5101 edge exit; 5102 5103 ex_bb = gimple_bb (use->stmt); 5104 exit = EDGE_SUCC (ex_bb, 0); 5105 if (flow_bb_inside_loop_p (loop, exit->dest)) 5106 exit = EDGE_SUCC (ex_bb, 1); 5107 5108 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 5109 } 5110 5111 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now, 5112 we only detect the situation that BASE = SOMETHING + OFFSET, where the 5113 calculation is performed in non-wrapping type. 5114 5115 TODO: More generally, we could test for the situation that 5116 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero. 5117 This would require knowing the sign of OFFSET. */ 5118 5119 static bool 5120 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset) 5121 { 5122 enum tree_code code; 5123 tree e1, e2; 5124 aff_tree aff_e1, aff_e2, aff_offset; 5125 5126 if (!nowrap_type_p (TREE_TYPE (base))) 5127 return false; 5128 5129 base = expand_simple_operations (base); 5130 5131 if (TREE_CODE (base) == SSA_NAME) 5132 { 5133 gimple *stmt = SSA_NAME_DEF_STMT (base); 5134 5135 if (gimple_code (stmt) != GIMPLE_ASSIGN) 5136 return false; 5137 5138 code = gimple_assign_rhs_code (stmt); 5139 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 5140 return false; 5141 5142 e1 = gimple_assign_rhs1 (stmt); 5143 e2 = gimple_assign_rhs2 (stmt); 5144 } 5145 else 5146 { 5147 code = TREE_CODE (base); 5148 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 5149 return false; 5150 e1 = TREE_OPERAND (base, 0); 5151 e2 = TREE_OPERAND (base, 1); 5152 } 5153 5154 /* Use affine expansion as deeper inspection to prove the equality. */ 5155 tree_to_aff_combination_expand (e2, TREE_TYPE (e2), 5156 &aff_e2, &data->name_expansion_cache); 5157 tree_to_aff_combination_expand (offset, TREE_TYPE (offset), 5158 &aff_offset, &data->name_expansion_cache); 5159 aff_combination_scale (&aff_offset, -1); 5160 switch (code) 5161 { 5162 case PLUS_EXPR: 5163 aff_combination_add (&aff_e2, &aff_offset); 5164 if (aff_combination_zero_p (&aff_e2)) 5165 return true; 5166 5167 tree_to_aff_combination_expand (e1, TREE_TYPE (e1), 5168 &aff_e1, &data->name_expansion_cache); 5169 aff_combination_add (&aff_e1, &aff_offset); 5170 return aff_combination_zero_p (&aff_e1); 5171 5172 case POINTER_PLUS_EXPR: 5173 aff_combination_add (&aff_e2, &aff_offset); 5174 return aff_combination_zero_p (&aff_e2); 5175 5176 default: 5177 return false; 5178 } 5179 } 5180 5181 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR 5182 comparison with CAND. NITER describes the number of iterations of 5183 the loops. If successful, the comparison in COMP_P is altered accordingly. 5184 5185 We aim to handle the following situation: 5186 5187 sometype *base, *p; 5188 int a, b, i; 5189 5190 i = a; 5191 p = p_0 = base + a; 5192 5193 do 5194 { 5195 bla (*p); 5196 p++; 5197 i++; 5198 } 5199 while (i < b); 5200 5201 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1. 5202 We aim to optimize this to 5203 5204 p = p_0 = base + a; 5205 do 5206 { 5207 bla (*p); 5208 p++; 5209 } 5210 while (p < p_0 - a + b); 5211 5212 This preserves the correctness, since the pointer arithmetics does not 5213 overflow. More precisely: 5214 5215 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no 5216 overflow in computing it or the values of p. 5217 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not 5218 overflow. To prove this, we use the fact that p_0 = base + a. */ 5219 5220 static bool 5221 iv_elimination_compare_lt (struct ivopts_data *data, 5222 struct iv_cand *cand, enum tree_code *comp_p, 5223 class tree_niter_desc *niter) 5224 { 5225 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; 5226 class aff_tree nit, tmpa, tmpb; 5227 enum tree_code comp; 5228 HOST_WIDE_INT step; 5229 5230 /* We need to know that the candidate induction variable does not overflow. 5231 While more complex analysis may be used to prove this, for now just 5232 check that the variable appears in the original program and that it 5233 is computed in a type that guarantees no overflows. */ 5234 cand_type = TREE_TYPE (cand->iv->base); 5235 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) 5236 return false; 5237 5238 /* Make sure that the loop iterates till the loop bound is hit, as otherwise 5239 the calculation of the BOUND could overflow, making the comparison 5240 invalid. */ 5241 if (!data->loop_single_exit_p) 5242 return false; 5243 5244 /* We need to be able to decide whether candidate is increasing or decreasing 5245 in order to choose the right comparison operator. */ 5246 if (!cst_and_fits_in_hwi (cand->iv->step)) 5247 return false; 5248 step = int_cst_value (cand->iv->step); 5249 5250 /* Check that the number of iterations matches the expected pattern: 5251 a + 1 > b ? 0 : b - a - 1. */ 5252 mbz = niter->may_be_zero; 5253 if (TREE_CODE (mbz) == GT_EXPR) 5254 { 5255 /* Handle a + 1 > b. */ 5256 tree op0 = TREE_OPERAND (mbz, 0); 5257 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1))) 5258 { 5259 a = TREE_OPERAND (op0, 0); 5260 b = TREE_OPERAND (mbz, 1); 5261 } 5262 else 5263 return false; 5264 } 5265 else if (TREE_CODE (mbz) == LT_EXPR) 5266 { 5267 tree op1 = TREE_OPERAND (mbz, 1); 5268 5269 /* Handle b < a + 1. */ 5270 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1))) 5271 { 5272 a = TREE_OPERAND (op1, 0); 5273 b = TREE_OPERAND (mbz, 0); 5274 } 5275 else 5276 return false; 5277 } 5278 else 5279 return false; 5280 5281 /* Expected number of iterations is B - A - 1. Check that it matches 5282 the actual number, i.e., that B - A - NITER = 1. */ 5283 tree_to_aff_combination (niter->niter, nit_type, &nit); 5284 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa); 5285 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb); 5286 aff_combination_scale (&nit, -1); 5287 aff_combination_scale (&tmpa, -1); 5288 aff_combination_add (&tmpb, &tmpa); 5289 aff_combination_add (&tmpb, &nit); 5290 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1)) 5291 return false; 5292 5293 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not 5294 overflow. */ 5295 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), 5296 cand->iv->step, 5297 fold_convert (TREE_TYPE (cand->iv->step), a)); 5298 if (!difference_cannot_overflow_p (data, cand->iv->base, offset)) 5299 return false; 5300 5301 /* Determine the new comparison operator. */ 5302 comp = step < 0 ? GT_EXPR : LT_EXPR; 5303 if (*comp_p == NE_EXPR) 5304 *comp_p = comp; 5305 else if (*comp_p == EQ_EXPR) 5306 *comp_p = invert_tree_comparison (comp, false); 5307 else 5308 gcc_unreachable (); 5309 5310 return true; 5311 } 5312 5313 /* Check whether it is possible to express the condition in USE by comparison 5314 of candidate CAND. If so, store the value compared with to BOUND, and the 5315 comparison operator to COMP. */ 5316 5317 static bool 5318 may_eliminate_iv (struct ivopts_data *data, 5319 struct iv_use *use, struct iv_cand *cand, tree *bound, 5320 enum tree_code *comp) 5321 { 5322 basic_block ex_bb; 5323 edge exit; 5324 tree period; 5325 class loop *loop = data->current_loop; 5326 aff_tree bnd; 5327 class tree_niter_desc *desc = NULL; 5328 5329 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 5330 return false; 5331 5332 /* For now works only for exits that dominate the loop latch. 5333 TODO: extend to other conditions inside loop body. */ 5334 ex_bb = gimple_bb (use->stmt); 5335 if (use->stmt != last_stmt (ex_bb) 5336 || gimple_code (use->stmt) != GIMPLE_COND 5337 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 5338 return false; 5339 5340 exit = EDGE_SUCC (ex_bb, 0); 5341 if (flow_bb_inside_loop_p (loop, exit->dest)) 5342 exit = EDGE_SUCC (ex_bb, 1); 5343 if (flow_bb_inside_loop_p (loop, exit->dest)) 5344 return false; 5345 5346 desc = niter_for_exit (data, exit); 5347 if (!desc) 5348 return false; 5349 5350 /* Determine whether we can use the variable to test the exit condition. 5351 This is the case iff the period of the induction variable is greater 5352 than the number of iterations for which the exit condition is true. */ 5353 period = iv_period (cand->iv); 5354 5355 /* If the number of iterations is constant, compare against it directly. */ 5356 if (TREE_CODE (desc->niter) == INTEGER_CST) 5357 { 5358 /* See cand_value_at. */ 5359 if (stmt_after_increment (loop, cand, use->stmt)) 5360 { 5361 if (!tree_int_cst_lt (desc->niter, period)) 5362 return false; 5363 } 5364 else 5365 { 5366 if (tree_int_cst_lt (period, desc->niter)) 5367 return false; 5368 } 5369 } 5370 5371 /* If not, and if this is the only possible exit of the loop, see whether 5372 we can get a conservative estimate on the number of iterations of the 5373 entire loop and compare against that instead. */ 5374 else 5375 { 5376 widest_int period_value, max_niter; 5377 5378 max_niter = desc->max; 5379 if (stmt_after_increment (loop, cand, use->stmt)) 5380 max_niter += 1; 5381 period_value = wi::to_widest (period); 5382 if (wi::gtu_p (max_niter, period_value)) 5383 { 5384 /* See if we can take advantage of inferred loop bound 5385 information. */ 5386 if (data->loop_single_exit_p) 5387 { 5388 if (!max_loop_iterations (loop, &max_niter)) 5389 return false; 5390 /* The loop bound is already adjusted by adding 1. */ 5391 if (wi::gtu_p (max_niter, period_value)) 5392 return false; 5393 } 5394 else 5395 return false; 5396 } 5397 } 5398 5399 /* For doloop IV cand, the bound would be zero. It's safe whether 5400 may_be_zero set or not. */ 5401 if (cand->doloop_p) 5402 { 5403 *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0); 5404 *comp = iv_elimination_compare (data, use); 5405 return true; 5406 } 5407 5408 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); 5409 5410 *bound = fold_convert (TREE_TYPE (cand->iv->base), 5411 aff_combination_to_tree (&bnd)); 5412 *comp = iv_elimination_compare (data, use); 5413 5414 /* It is unlikely that computing the number of iterations using division 5415 would be more profitable than keeping the original induction variable. */ 5416 if (expression_expensive_p (*bound)) 5417 return false; 5418 5419 /* Sometimes, it is possible to handle the situation that the number of 5420 iterations may be zero unless additional assumptions by using < 5421 instead of != in the exit condition. 5422 5423 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and 5424 base the exit condition on it. However, that is often too 5425 expensive. */ 5426 if (!integer_zerop (desc->may_be_zero)) 5427 return iv_elimination_compare_lt (data, cand, comp, desc); 5428 5429 return true; 5430 } 5431 5432 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must 5433 be copied, if it is used in the loop body and DATA->body_includes_call. */ 5434 5435 static int 5436 parm_decl_cost (struct ivopts_data *data, tree bound) 5437 { 5438 tree sbound = bound; 5439 STRIP_NOPS (sbound); 5440 5441 if (TREE_CODE (sbound) == SSA_NAME 5442 && SSA_NAME_IS_DEFAULT_DEF (sbound) 5443 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL 5444 && data->body_includes_call) 5445 return COSTS_N_INSNS (1); 5446 5447 return 0; 5448 } 5449 5450 /* Determines cost of computing the use in GROUP with CAND in a condition. */ 5451 5452 static bool 5453 determine_group_iv_cost_cond (struct ivopts_data *data, 5454 struct iv_group *group, struct iv_cand *cand) 5455 { 5456 tree bound = NULL_TREE; 5457 struct iv *cmp_iv; 5458 bitmap inv_exprs = NULL; 5459 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars; 5460 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost; 5461 enum comp_iv_rewrite rewrite_type; 5462 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr; 5463 tree *control_var, *bound_cst; 5464 enum tree_code comp = ERROR_MARK; 5465 struct iv_use *use = group->vuses[0]; 5466 5467 /* Extract condition operands. */ 5468 rewrite_type = extract_cond_operands (data, use->stmt, &control_var, 5469 &bound_cst, NULL, &cmp_iv); 5470 gcc_assert (rewrite_type != COMP_IV_NA); 5471 5472 /* Try iv elimination. */ 5473 if (rewrite_type == COMP_IV_ELIM 5474 && may_eliminate_iv (data, use, cand, &bound, &comp)) 5475 { 5476 elim_cost = force_var_cost (data, bound, &inv_vars_elim); 5477 if (elim_cost.cost == 0) 5478 elim_cost.cost = parm_decl_cost (data, bound); 5479 else if (TREE_CODE (bound) == INTEGER_CST) 5480 elim_cost.cost = 0; 5481 /* If we replace a loop condition 'i < n' with 'p < base + n', 5482 inv_vars_elim will have 'base' and 'n' set, which implies that both 5483 'base' and 'n' will be live during the loop. More likely, 5484 'base + n' will be loop invariant, resulting in only one live value 5485 during the loop. So in that case we clear inv_vars_elim and set 5486 inv_expr_elim instead. */ 5487 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1) 5488 { 5489 inv_expr_elim = get_loop_invariant_expr (data, bound); 5490 bitmap_clear (inv_vars_elim); 5491 } 5492 /* The bound is a loop invariant, so it will be only computed 5493 once. */ 5494 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); 5495 } 5496 5497 /* When the condition is a comparison of the candidate IV against 5498 zero, prefer this IV. 5499 5500 TODO: The constant that we're subtracting from the cost should 5501 be target-dependent. This information should be added to the 5502 target costs for each backend. */ 5503 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */ 5504 && integer_zerop (*bound_cst) 5505 && (operand_equal_p (*control_var, cand->var_after, 0) 5506 || operand_equal_p (*control_var, cand->var_before, 0))) 5507 elim_cost -= 1; 5508 5509 express_cost = get_computation_cost (data, use, cand, false, 5510 &inv_vars_express, NULL, 5511 &inv_expr_express); 5512 if (cmp_iv != NULL) 5513 find_inv_vars (data, &cmp_iv->base, &inv_vars_express); 5514 5515 /* Count the cost of the original bound as well. */ 5516 bound_cost = force_var_cost (data, *bound_cst, NULL); 5517 if (bound_cost.cost == 0) 5518 bound_cost.cost = parm_decl_cost (data, *bound_cst); 5519 else if (TREE_CODE (*bound_cst) == INTEGER_CST) 5520 bound_cost.cost = 0; 5521 express_cost += bound_cost; 5522 5523 /* Choose the better approach, preferring the eliminated IV. */ 5524 if (elim_cost <= express_cost) 5525 { 5526 cost = elim_cost; 5527 inv_vars = inv_vars_elim; 5528 inv_vars_elim = NULL; 5529 inv_expr = inv_expr_elim; 5530 /* For doloop candidate/use pair, adjust to zero cost. */ 5531 if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost) 5532 cost = no_cost; 5533 } 5534 else 5535 { 5536 cost = express_cost; 5537 inv_vars = inv_vars_express; 5538 inv_vars_express = NULL; 5539 bound = NULL_TREE; 5540 comp = ERROR_MARK; 5541 inv_expr = inv_expr_express; 5542 } 5543 5544 if (inv_expr) 5545 { 5546 inv_exprs = BITMAP_ALLOC (NULL); 5547 bitmap_set_bit (inv_exprs, inv_expr->id); 5548 } 5549 set_group_iv_cost (data, group, cand, cost, 5550 inv_vars, bound, comp, inv_exprs); 5551 5552 if (inv_vars_elim) 5553 BITMAP_FREE (inv_vars_elim); 5554 if (inv_vars_express) 5555 BITMAP_FREE (inv_vars_express); 5556 5557 return !cost.infinite_cost_p (); 5558 } 5559 5560 /* Determines cost of computing uses in GROUP with CAND. Returns false 5561 if USE cannot be represented with CAND. */ 5562 5563 static bool 5564 determine_group_iv_cost (struct ivopts_data *data, 5565 struct iv_group *group, struct iv_cand *cand) 5566 { 5567 switch (group->type) 5568 { 5569 case USE_NONLINEAR_EXPR: 5570 return determine_group_iv_cost_generic (data, group, cand); 5571 5572 case USE_REF_ADDRESS: 5573 case USE_PTR_ADDRESS: 5574 return determine_group_iv_cost_address (data, group, cand); 5575 5576 case USE_COMPARE: 5577 return determine_group_iv_cost_cond (data, group, cand); 5578 5579 default: 5580 gcc_unreachable (); 5581 } 5582 } 5583 5584 /* Return true if get_computation_cost indicates that autoincrement is 5585 a possibility for the pair of USE and CAND, false otherwise. */ 5586 5587 static bool 5588 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, 5589 struct iv_cand *cand) 5590 { 5591 if (!address_p (use->type)) 5592 return false; 5593 5594 bool can_autoinc = false; 5595 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL); 5596 return can_autoinc; 5597 } 5598 5599 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a 5600 use that allows autoincrement, and set their AINC_USE if possible. */ 5601 5602 static void 5603 set_autoinc_for_original_candidates (struct ivopts_data *data) 5604 { 5605 unsigned i, j; 5606 5607 for (i = 0; i < data->vcands.length (); i++) 5608 { 5609 struct iv_cand *cand = data->vcands[i]; 5610 struct iv_use *closest_before = NULL; 5611 struct iv_use *closest_after = NULL; 5612 if (cand->pos != IP_ORIGINAL) 5613 continue; 5614 5615 for (j = 0; j < data->vgroups.length (); j++) 5616 { 5617 struct iv_group *group = data->vgroups[j]; 5618 struct iv_use *use = group->vuses[0]; 5619 unsigned uid = gimple_uid (use->stmt); 5620 5621 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)) 5622 continue; 5623 5624 if (uid < gimple_uid (cand->incremented_at) 5625 && (closest_before == NULL 5626 || uid > gimple_uid (closest_before->stmt))) 5627 closest_before = use; 5628 5629 if (uid > gimple_uid (cand->incremented_at) 5630 && (closest_after == NULL 5631 || uid < gimple_uid (closest_after->stmt))) 5632 closest_after = use; 5633 } 5634 5635 if (closest_before != NULL 5636 && autoinc_possible_for_pair (data, closest_before, cand)) 5637 cand->ainc_use = closest_before; 5638 else if (closest_after != NULL 5639 && autoinc_possible_for_pair (data, closest_after, cand)) 5640 cand->ainc_use = closest_after; 5641 } 5642 } 5643 5644 /* Relate compare use with all candidates. */ 5645 5646 static void 5647 relate_compare_use_with_all_cands (struct ivopts_data *data) 5648 { 5649 unsigned i, count = data->vcands.length (); 5650 for (i = 0; i < data->vgroups.length (); i++) 5651 { 5652 struct iv_group *group = data->vgroups[i]; 5653 5654 if (group->type == USE_COMPARE) 5655 bitmap_set_range (group->related_cands, 0, count); 5656 } 5657 } 5658 5659 /* Add one doloop dedicated IV candidate: 5660 - Base is (may_be_zero ? 1 : (niter + 1)). 5661 - Step is -1. */ 5662 5663 static void 5664 add_iv_candidate_for_doloop (struct ivopts_data *data) 5665 { 5666 tree_niter_desc *niter_desc = niter_for_single_dom_exit (data); 5667 gcc_assert (niter_desc && niter_desc->assumptions); 5668 5669 tree niter = niter_desc->niter; 5670 tree ntype = TREE_TYPE (niter); 5671 gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE); 5672 5673 tree may_be_zero = niter_desc->may_be_zero; 5674 if (may_be_zero && integer_zerop (may_be_zero)) 5675 may_be_zero = NULL_TREE; 5676 if (may_be_zero) 5677 { 5678 if (COMPARISON_CLASS_P (may_be_zero)) 5679 { 5680 niter = fold_build3 (COND_EXPR, ntype, may_be_zero, 5681 build_int_cst (ntype, 0), 5682 rewrite_to_non_trapping_overflow (niter)); 5683 } 5684 /* Don't try to obtain the iteration count expression when may_be_zero is 5685 integer_nonzerop (actually iteration count is one) or else. */ 5686 else 5687 return; 5688 } 5689 5690 tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), 5691 build_int_cst (ntype, 1)); 5692 add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true); 5693 } 5694 5695 /* Finds the candidates for the induction variables. */ 5696 5697 static void 5698 find_iv_candidates (struct ivopts_data *data) 5699 { 5700 /* Add commonly used ivs. */ 5701 add_standard_iv_candidates (data); 5702 5703 /* Add doloop dedicated ivs. */ 5704 if (data->doloop_use_p) 5705 add_iv_candidate_for_doloop (data); 5706 5707 /* Add old induction variables. */ 5708 add_iv_candidate_for_bivs (data); 5709 5710 /* Add induction variables derived from uses. */ 5711 add_iv_candidate_for_groups (data); 5712 5713 set_autoinc_for_original_candidates (data); 5714 5715 /* Record the important candidates. */ 5716 record_important_candidates (data); 5717 5718 /* Relate compare iv_use with all candidates. */ 5719 if (!data->consider_all_candidates) 5720 relate_compare_use_with_all_cands (data); 5721 5722 if (dump_file && (dump_flags & TDF_DETAILS)) 5723 { 5724 unsigned i; 5725 5726 fprintf (dump_file, "\n<Important Candidates>:\t"); 5727 for (i = 0; i < data->vcands.length (); i++) 5728 if (data->vcands[i]->important) 5729 fprintf (dump_file, " %d,", data->vcands[i]->id); 5730 fprintf (dump_file, "\n"); 5731 5732 fprintf (dump_file, "\n<Group, Cand> Related:\n"); 5733 for (i = 0; i < data->vgroups.length (); i++) 5734 { 5735 struct iv_group *group = data->vgroups[i]; 5736 5737 if (group->related_cands) 5738 { 5739 fprintf (dump_file, " Group %d:\t", group->id); 5740 dump_bitmap (dump_file, group->related_cands); 5741 } 5742 } 5743 fprintf (dump_file, "\n"); 5744 } 5745 } 5746 5747 /* Determines costs of computing use of iv with an iv candidate. */ 5748 5749 static void 5750 determine_group_iv_costs (struct ivopts_data *data) 5751 { 5752 unsigned i, j; 5753 struct iv_cand *cand; 5754 struct iv_group *group; 5755 bitmap to_clear = BITMAP_ALLOC (NULL); 5756 5757 alloc_use_cost_map (data); 5758 5759 for (i = 0; i < data->vgroups.length (); i++) 5760 { 5761 group = data->vgroups[i]; 5762 5763 if (data->consider_all_candidates) 5764 { 5765 for (j = 0; j < data->vcands.length (); j++) 5766 { 5767 cand = data->vcands[j]; 5768 determine_group_iv_cost (data, group, cand); 5769 } 5770 } 5771 else 5772 { 5773 bitmap_iterator bi; 5774 5775 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi) 5776 { 5777 cand = data->vcands[j]; 5778 if (!determine_group_iv_cost (data, group, cand)) 5779 bitmap_set_bit (to_clear, j); 5780 } 5781 5782 /* Remove the candidates for that the cost is infinite from 5783 the list of related candidates. */ 5784 bitmap_and_compl_into (group->related_cands, to_clear); 5785 bitmap_clear (to_clear); 5786 } 5787 } 5788 5789 BITMAP_FREE (to_clear); 5790 5791 if (dump_file && (dump_flags & TDF_DETAILS)) 5792 { 5793 bitmap_iterator bi; 5794 5795 /* Dump invariant variables. */ 5796 fprintf (dump_file, "\n<Invariant Vars>:\n"); 5797 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 5798 { 5799 struct version_info *info = ver_info (data, i); 5800 if (info->inv_id) 5801 { 5802 fprintf (dump_file, "Inv %d:\t", info->inv_id); 5803 print_generic_expr (dump_file, info->name, TDF_SLIM); 5804 fprintf (dump_file, "%s\n", 5805 info->has_nonlin_use ? "" : "\t(eliminable)"); 5806 } 5807 } 5808 5809 /* Dump invariant expressions. */ 5810 fprintf (dump_file, "\n<Invariant Expressions>:\n"); 5811 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ()); 5812 5813 for (hash_table<iv_inv_expr_hasher>::iterator it 5814 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end (); 5815 ++it) 5816 list.safe_push (*it); 5817 5818 list.qsort (sort_iv_inv_expr_ent); 5819 5820 for (i = 0; i < list.length (); ++i) 5821 { 5822 fprintf (dump_file, "inv_expr %d: \t", list[i]->id); 5823 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM); 5824 fprintf (dump_file, "\n"); 5825 } 5826 5827 fprintf (dump_file, "\n<Group-candidate Costs>:\n"); 5828 5829 for (i = 0; i < data->vgroups.length (); i++) 5830 { 5831 group = data->vgroups[i]; 5832 5833 fprintf (dump_file, "Group %d:\n", i); 5834 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n"); 5835 for (j = 0; j < group->n_map_members; j++) 5836 { 5837 if (!group->cost_map[j].cand 5838 || group->cost_map[j].cost.infinite_cost_p ()) 5839 continue; 5840 5841 fprintf (dump_file, " %d\t%" PRId64 "\t%d\t", 5842 group->cost_map[j].cand->id, 5843 group->cost_map[j].cost.cost, 5844 group->cost_map[j].cost.complexity); 5845 if (!group->cost_map[j].inv_exprs 5846 || bitmap_empty_p (group->cost_map[j].inv_exprs)) 5847 fprintf (dump_file, "NIL;\t"); 5848 else 5849 bitmap_print (dump_file, 5850 group->cost_map[j].inv_exprs, "", ";\t"); 5851 if (!group->cost_map[j].inv_vars 5852 || bitmap_empty_p (group->cost_map[j].inv_vars)) 5853 fprintf (dump_file, "NIL;\n"); 5854 else 5855 bitmap_print (dump_file, 5856 group->cost_map[j].inv_vars, "", "\n"); 5857 } 5858 5859 fprintf (dump_file, "\n"); 5860 } 5861 fprintf (dump_file, "\n"); 5862 } 5863 } 5864 5865 /* Determines cost of the candidate CAND. */ 5866 5867 static void 5868 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 5869 { 5870 comp_cost cost_base; 5871 int64_t cost, cost_step; 5872 tree base; 5873 5874 gcc_assert (cand->iv != NULL); 5875 5876 /* There are two costs associated with the candidate -- its increment 5877 and its initialization. The second is almost negligible for any loop 5878 that rolls enough, so we take it just very little into account. */ 5879 5880 base = cand->iv->base; 5881 cost_base = force_var_cost (data, base, NULL); 5882 /* It will be exceptional that the iv register happens to be initialized with 5883 the proper value at no cost. In general, there will at least be a regcopy 5884 or a const set. */ 5885 if (cost_base.cost == 0) 5886 cost_base.cost = COSTS_N_INSNS (1); 5887 /* Doloop decrement should be considered as zero cost. */ 5888 if (cand->doloop_p) 5889 cost_step = 0; 5890 else 5891 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base))); 5892 cost = cost_step + adjust_setup_cost (data, cost_base.cost); 5893 5894 /* Prefer the original ivs unless we may gain something by replacing it. 5895 The reason is to make debugging simpler; so this is not relevant for 5896 artificial ivs created by other optimization passes. */ 5897 if ((cand->pos != IP_ORIGINAL 5898 || !SSA_NAME_VAR (cand->var_before) 5899 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 5900 /* Prefer doloop as well. */ 5901 && !cand->doloop_p) 5902 cost++; 5903 5904 /* Prefer not to insert statements into latch unless there are some 5905 already (so that we do not create unnecessary jumps). */ 5906 if (cand->pos == IP_END 5907 && empty_block_p (ip_end_pos (data->current_loop))) 5908 cost++; 5909 5910 cand->cost = cost; 5911 cand->cost_step = cost_step; 5912 } 5913 5914 /* Determines costs of computation of the candidates. */ 5915 5916 static void 5917 determine_iv_costs (struct ivopts_data *data) 5918 { 5919 unsigned i; 5920 5921 if (dump_file && (dump_flags & TDF_DETAILS)) 5922 { 5923 fprintf (dump_file, "<Candidate Costs>:\n"); 5924 fprintf (dump_file, " cand\tcost\n"); 5925 } 5926 5927 for (i = 0; i < data->vcands.length (); i++) 5928 { 5929 struct iv_cand *cand = data->vcands[i]; 5930 5931 determine_iv_cost (data, cand); 5932 5933 if (dump_file && (dump_flags & TDF_DETAILS)) 5934 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 5935 } 5936 5937 if (dump_file && (dump_flags & TDF_DETAILS)) 5938 fprintf (dump_file, "\n"); 5939 } 5940 5941 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS 5942 induction variables. Note N_INVS includes both invariant variables and 5943 invariant expressions. */ 5944 5945 static unsigned 5946 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs, 5947 unsigned n_cands) 5948 { 5949 unsigned cost; 5950 unsigned n_old = data->regs_used, n_new = n_invs + n_cands; 5951 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs; 5952 bool speed = data->speed; 5953 5954 /* If there is a call in the loop body, the call-clobbered registers 5955 are not available for loop invariants. */ 5956 if (data->body_includes_call) 5957 available_regs = available_regs - target_clobbered_regs; 5958 5959 /* If we have enough registers. */ 5960 if (regs_needed + target_res_regs < available_regs) 5961 cost = n_new; 5962 /* If close to running out of registers, try to preserve them. */ 5963 else if (regs_needed <= available_regs) 5964 cost = target_reg_cost [speed] * regs_needed; 5965 /* If we run out of available registers but the number of candidates 5966 does not, we penalize extra registers using target_spill_cost. */ 5967 else if (n_cands <= available_regs) 5968 cost = target_reg_cost [speed] * available_regs 5969 + target_spill_cost [speed] * (regs_needed - available_regs); 5970 /* If the number of candidates runs out available registers, we penalize 5971 extra candidate registers using target_spill_cost * 2. Because it is 5972 more expensive to spill induction variable than invariant. */ 5973 else 5974 cost = target_reg_cost [speed] * available_regs 5975 + target_spill_cost [speed] * (n_cands - available_regs) * 2 5976 + target_spill_cost [speed] * (regs_needed - n_cands); 5977 5978 /* Finally, add the number of candidates, so that we prefer eliminating 5979 induction variables if possible. */ 5980 return cost + n_cands; 5981 } 5982 5983 /* For each size of the induction variable set determine the penalty. */ 5984 5985 static void 5986 determine_set_costs (struct ivopts_data *data) 5987 { 5988 unsigned j, n; 5989 gphi *phi; 5990 gphi_iterator psi; 5991 tree op; 5992 class loop *loop = data->current_loop; 5993 bitmap_iterator bi; 5994 5995 if (dump_file && (dump_flags & TDF_DETAILS)) 5996 { 5997 fprintf (dump_file, "<Global Costs>:\n"); 5998 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 5999 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); 6000 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); 6001 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); 6002 } 6003 6004 n = 0; 6005 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 6006 { 6007 phi = psi.phi (); 6008 op = PHI_RESULT (phi); 6009 6010 if (virtual_operand_p (op)) 6011 continue; 6012 6013 if (get_iv (data, op)) 6014 continue; 6015 6016 if (!POINTER_TYPE_P (TREE_TYPE (op)) 6017 && !INTEGRAL_TYPE_P (TREE_TYPE (op))) 6018 continue; 6019 6020 n++; 6021 } 6022 6023 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 6024 { 6025 struct version_info *info = ver_info (data, j); 6026 6027 if (info->inv_id && info->has_nonlin_use) 6028 n++; 6029 } 6030 6031 data->regs_used = n; 6032 if (dump_file && (dump_flags & TDF_DETAILS)) 6033 fprintf (dump_file, " regs_used %d\n", n); 6034 6035 if (dump_file && (dump_flags & TDF_DETAILS)) 6036 { 6037 fprintf (dump_file, " cost for size:\n"); 6038 fprintf (dump_file, " ivs\tcost\n"); 6039 for (j = 0; j <= 2 * target_avail_regs; j++) 6040 fprintf (dump_file, " %d\t%d\n", j, 6041 ivopts_estimate_reg_pressure (data, 0, j)); 6042 fprintf (dump_file, "\n"); 6043 } 6044 } 6045 6046 /* Returns true if A is a cheaper cost pair than B. */ 6047 6048 static bool 6049 cheaper_cost_pair (class cost_pair *a, class cost_pair *b) 6050 { 6051 if (!a) 6052 return false; 6053 6054 if (!b) 6055 return true; 6056 6057 if (a->cost < b->cost) 6058 return true; 6059 6060 if (b->cost < a->cost) 6061 return false; 6062 6063 /* In case the costs are the same, prefer the cheaper candidate. */ 6064 if (a->cand->cost < b->cand->cost) 6065 return true; 6066 6067 return false; 6068 } 6069 6070 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1 6071 for more expensive, equal and cheaper respectively. */ 6072 6073 static int 6074 compare_cost_pair (class cost_pair *a, class cost_pair *b) 6075 { 6076 if (cheaper_cost_pair (a, b)) 6077 return -1; 6078 if (cheaper_cost_pair (b, a)) 6079 return 1; 6080 6081 return 0; 6082 } 6083 6084 /* Returns candidate by that USE is expressed in IVS. */ 6085 6086 static class cost_pair * 6087 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group) 6088 { 6089 return ivs->cand_for_group[group->id]; 6090 } 6091 6092 /* Computes the cost field of IVS structure. */ 6093 6094 static void 6095 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs) 6096 { 6097 comp_cost cost = ivs->cand_use_cost; 6098 6099 cost += ivs->cand_cost; 6100 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands); 6101 ivs->cost = cost; 6102 } 6103 6104 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES 6105 and IVS. */ 6106 6107 static void 6108 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses) 6109 { 6110 bitmap_iterator bi; 6111 unsigned iid; 6112 6113 if (!invs) 6114 return; 6115 6116 gcc_assert (n_inv_uses != NULL); 6117 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 6118 { 6119 n_inv_uses[iid]--; 6120 if (n_inv_uses[iid] == 0) 6121 ivs->n_invs--; 6122 } 6123 } 6124 6125 /* Set USE not to be expressed by any candidate in IVS. */ 6126 6127 static void 6128 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs, 6129 struct iv_group *group) 6130 { 6131 unsigned gid = group->id, cid; 6132 class cost_pair *cp; 6133 6134 cp = ivs->cand_for_group[gid]; 6135 if (!cp) 6136 return; 6137 cid = cp->cand->id; 6138 6139 ivs->bad_groups++; 6140 ivs->cand_for_group[gid] = NULL; 6141 ivs->n_cand_uses[cid]--; 6142 6143 if (ivs->n_cand_uses[cid] == 0) 6144 { 6145 bitmap_clear_bit (ivs->cands, cid); 6146 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p) 6147 ivs->n_cands--; 6148 ivs->cand_cost -= cp->cand->cost; 6149 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses); 6150 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses); 6151 } 6152 6153 ivs->cand_use_cost -= cp->cost; 6154 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses); 6155 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses); 6156 iv_ca_recount_cost (data, ivs); 6157 } 6158 6159 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and 6160 IVS. */ 6161 6162 static void 6163 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses) 6164 { 6165 bitmap_iterator bi; 6166 unsigned iid; 6167 6168 if (!invs) 6169 return; 6170 6171 gcc_assert (n_inv_uses != NULL); 6172 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 6173 { 6174 n_inv_uses[iid]++; 6175 if (n_inv_uses[iid] == 1) 6176 ivs->n_invs++; 6177 } 6178 } 6179 6180 /* Set cost pair for GROUP in set IVS to CP. */ 6181 6182 static void 6183 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs, 6184 struct iv_group *group, class cost_pair *cp) 6185 { 6186 unsigned gid = group->id, cid; 6187 6188 if (ivs->cand_for_group[gid] == cp) 6189 return; 6190 6191 if (ivs->cand_for_group[gid]) 6192 iv_ca_set_no_cp (data, ivs, group); 6193 6194 if (cp) 6195 { 6196 cid = cp->cand->id; 6197 6198 ivs->bad_groups--; 6199 ivs->cand_for_group[gid] = cp; 6200 ivs->n_cand_uses[cid]++; 6201 if (ivs->n_cand_uses[cid] == 1) 6202 { 6203 bitmap_set_bit (ivs->cands, cid); 6204 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p) 6205 ivs->n_cands++; 6206 ivs->cand_cost += cp->cand->cost; 6207 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses); 6208 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses); 6209 } 6210 6211 ivs->cand_use_cost += cp->cost; 6212 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses); 6213 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses); 6214 iv_ca_recount_cost (data, ivs); 6215 } 6216 } 6217 6218 /* Extend set IVS by expressing USE by some of the candidates in it 6219 if possible. Consider all important candidates if candidates in 6220 set IVS don't give any result. */ 6221 6222 static void 6223 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs, 6224 struct iv_group *group) 6225 { 6226 class cost_pair *best_cp = NULL, *cp; 6227 bitmap_iterator bi; 6228 unsigned i; 6229 struct iv_cand *cand; 6230 6231 gcc_assert (ivs->upto >= group->id); 6232 ivs->upto++; 6233 ivs->bad_groups++; 6234 6235 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6236 { 6237 cand = data->vcands[i]; 6238 cp = get_group_iv_cost (data, group, cand); 6239 if (cheaper_cost_pair (cp, best_cp)) 6240 best_cp = cp; 6241 } 6242 6243 if (best_cp == NULL) 6244 { 6245 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 6246 { 6247 cand = data->vcands[i]; 6248 cp = get_group_iv_cost (data, group, cand); 6249 if (cheaper_cost_pair (cp, best_cp)) 6250 best_cp = cp; 6251 } 6252 } 6253 6254 iv_ca_set_cp (data, ivs, group, best_cp); 6255 } 6256 6257 /* Get cost for assignment IVS. */ 6258 6259 static comp_cost 6260 iv_ca_cost (class iv_ca *ivs) 6261 { 6262 /* This was a conditional expression but it triggered a bug in 6263 Sun C 5.5. */ 6264 if (ivs->bad_groups) 6265 return infinite_cost; 6266 else 6267 return ivs->cost; 6268 } 6269 6270 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants 6271 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants 6272 respectively. */ 6273 6274 static int 6275 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs, 6276 struct iv_group *group, class cost_pair *old_cp, 6277 class cost_pair *new_cp) 6278 { 6279 gcc_assert (old_cp && new_cp && old_cp != new_cp); 6280 unsigned old_n_invs = ivs->n_invs; 6281 iv_ca_set_cp (data, ivs, group, new_cp); 6282 unsigned new_n_invs = ivs->n_invs; 6283 iv_ca_set_cp (data, ivs, group, old_cp); 6284 6285 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0); 6286 } 6287 6288 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains 6289 it before NEXT. */ 6290 6291 static struct iv_ca_delta * 6292 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp, 6293 class cost_pair *new_cp, struct iv_ca_delta *next) 6294 { 6295 struct iv_ca_delta *change = XNEW (struct iv_ca_delta); 6296 6297 change->group = group; 6298 change->old_cp = old_cp; 6299 change->new_cp = new_cp; 6300 change->next = next; 6301 6302 return change; 6303 } 6304 6305 /* Joins two lists of changes L1 and L2. Destructive -- old lists 6306 are rewritten. */ 6307 6308 static struct iv_ca_delta * 6309 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 6310 { 6311 struct iv_ca_delta *last; 6312 6313 if (!l2) 6314 return l1; 6315 6316 if (!l1) 6317 return l2; 6318 6319 for (last = l1; last->next; last = last->next) 6320 continue; 6321 last->next = l2; 6322 6323 return l1; 6324 } 6325 6326 /* Reverse the list of changes DELTA, forming the inverse to it. */ 6327 6328 static struct iv_ca_delta * 6329 iv_ca_delta_reverse (struct iv_ca_delta *delta) 6330 { 6331 struct iv_ca_delta *act, *next, *prev = NULL; 6332 6333 for (act = delta; act; act = next) 6334 { 6335 next = act->next; 6336 act->next = prev; 6337 prev = act; 6338 6339 std::swap (act->old_cp, act->new_cp); 6340 } 6341 6342 return prev; 6343 } 6344 6345 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 6346 reverted instead. */ 6347 6348 static void 6349 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs, 6350 struct iv_ca_delta *delta, bool forward) 6351 { 6352 class cost_pair *from, *to; 6353 struct iv_ca_delta *act; 6354 6355 if (!forward) 6356 delta = iv_ca_delta_reverse (delta); 6357 6358 for (act = delta; act; act = act->next) 6359 { 6360 from = act->old_cp; 6361 to = act->new_cp; 6362 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from); 6363 iv_ca_set_cp (data, ivs, act->group, to); 6364 } 6365 6366 if (!forward) 6367 iv_ca_delta_reverse (delta); 6368 } 6369 6370 /* Returns true if CAND is used in IVS. */ 6371 6372 static bool 6373 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand) 6374 { 6375 return ivs->n_cand_uses[cand->id] > 0; 6376 } 6377 6378 /* Returns number of induction variable candidates in the set IVS. */ 6379 6380 static unsigned 6381 iv_ca_n_cands (class iv_ca *ivs) 6382 { 6383 return ivs->n_cands; 6384 } 6385 6386 /* Free the list of changes DELTA. */ 6387 6388 static void 6389 iv_ca_delta_free (struct iv_ca_delta **delta) 6390 { 6391 struct iv_ca_delta *act, *next; 6392 6393 for (act = *delta; act; act = next) 6394 { 6395 next = act->next; 6396 free (act); 6397 } 6398 6399 *delta = NULL; 6400 } 6401 6402 /* Allocates new iv candidates assignment. */ 6403 6404 static class iv_ca * 6405 iv_ca_new (struct ivopts_data *data) 6406 { 6407 class iv_ca *nw = XNEW (class iv_ca); 6408 6409 nw->upto = 0; 6410 nw->bad_groups = 0; 6411 nw->cand_for_group = XCNEWVEC (class cost_pair *, 6412 data->vgroups.length ()); 6413 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ()); 6414 nw->cands = BITMAP_ALLOC (NULL); 6415 nw->n_cands = 0; 6416 nw->n_invs = 0; 6417 nw->cand_use_cost = no_cost; 6418 nw->cand_cost = 0; 6419 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1); 6420 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1); 6421 nw->cost = no_cost; 6422 6423 return nw; 6424 } 6425 6426 /* Free memory occupied by the set IVS. */ 6427 6428 static void 6429 iv_ca_free (class iv_ca **ivs) 6430 { 6431 free ((*ivs)->cand_for_group); 6432 free ((*ivs)->n_cand_uses); 6433 BITMAP_FREE ((*ivs)->cands); 6434 free ((*ivs)->n_inv_var_uses); 6435 free ((*ivs)->n_inv_expr_uses); 6436 free (*ivs); 6437 *ivs = NULL; 6438 } 6439 6440 /* Dumps IVS to FILE. */ 6441 6442 static void 6443 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs) 6444 { 6445 unsigned i; 6446 comp_cost cost = iv_ca_cost (ivs); 6447 6448 fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost, 6449 cost.complexity); 6450 fprintf (file, " reg_cost: %d\n", 6451 ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands)); 6452 fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: " 6453 "%" PRId64 " (complexity %d)\n", ivs->cand_cost, 6454 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity); 6455 bitmap_print (file, ivs->cands, " candidates: ","\n"); 6456 6457 for (i = 0; i < ivs->upto; i++) 6458 { 6459 struct iv_group *group = data->vgroups[i]; 6460 class cost_pair *cp = iv_ca_cand_for_group (ivs, group); 6461 if (cp) 6462 fprintf (file, " group:%d --> iv_cand:%d, cost=(" 6463 "%" PRId64 ",%d)\n", group->id, cp->cand->id, 6464 cp->cost.cost, cp->cost.complexity); 6465 else 6466 fprintf (file, " group:%d --> ??\n", group->id); 6467 } 6468 6469 const char *pref = ""; 6470 fprintf (file, " invariant variables: "); 6471 for (i = 1; i <= data->max_inv_var_id; i++) 6472 if (ivs->n_inv_var_uses[i]) 6473 { 6474 fprintf (file, "%s%d", pref, i); 6475 pref = ", "; 6476 } 6477 6478 pref = ""; 6479 fprintf (file, "\n invariant expressions: "); 6480 for (i = 1; i <= data->max_inv_expr_id; i++) 6481 if (ivs->n_inv_expr_uses[i]) 6482 { 6483 fprintf (file, "%s%d", pref, i); 6484 pref = ", "; 6485 } 6486 6487 fprintf (file, "\n\n"); 6488 } 6489 6490 /* Try changing candidate in IVS to CAND for each use. Return cost of the 6491 new set, and store differences in DELTA. Number of induction variables 6492 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true 6493 the function will try to find a solution with mimimal iv candidates. */ 6494 6495 static comp_cost 6496 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs, 6497 struct iv_cand *cand, struct iv_ca_delta **delta, 6498 unsigned *n_ivs, bool min_ncand) 6499 { 6500 unsigned i; 6501 comp_cost cost; 6502 struct iv_group *group; 6503 class cost_pair *old_cp, *new_cp; 6504 6505 *delta = NULL; 6506 for (i = 0; i < ivs->upto; i++) 6507 { 6508 group = data->vgroups[i]; 6509 old_cp = iv_ca_cand_for_group (ivs, group); 6510 6511 if (old_cp 6512 && old_cp->cand == cand) 6513 continue; 6514 6515 new_cp = get_group_iv_cost (data, group, cand); 6516 if (!new_cp) 6517 continue; 6518 6519 if (!min_ncand) 6520 { 6521 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp); 6522 /* Skip if new_cp depends on more invariants. */ 6523 if (cmp_invs > 0) 6524 continue; 6525 6526 int cmp_cost = compare_cost_pair (new_cp, old_cp); 6527 /* Skip if new_cp is not cheaper. */ 6528 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0)) 6529 continue; 6530 } 6531 6532 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta); 6533 } 6534 6535 iv_ca_delta_commit (data, ivs, *delta, true); 6536 cost = iv_ca_cost (ivs); 6537 if (n_ivs) 6538 *n_ivs = iv_ca_n_cands (ivs); 6539 iv_ca_delta_commit (data, ivs, *delta, false); 6540 6541 return cost; 6542 } 6543 6544 /* Try narrowing set IVS by removing CAND. Return the cost of 6545 the new set and store the differences in DELTA. START is 6546 the candidate with which we start narrowing. */ 6547 6548 static comp_cost 6549 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs, 6550 struct iv_cand *cand, struct iv_cand *start, 6551 struct iv_ca_delta **delta) 6552 { 6553 unsigned i, ci; 6554 struct iv_group *group; 6555 class cost_pair *old_cp, *new_cp, *cp; 6556 bitmap_iterator bi; 6557 struct iv_cand *cnd; 6558 comp_cost cost, best_cost, acost; 6559 6560 *delta = NULL; 6561 for (i = 0; i < data->vgroups.length (); i++) 6562 { 6563 group = data->vgroups[i]; 6564 6565 old_cp = iv_ca_cand_for_group (ivs, group); 6566 if (old_cp->cand != cand) 6567 continue; 6568 6569 best_cost = iv_ca_cost (ivs); 6570 /* Start narrowing with START. */ 6571 new_cp = get_group_iv_cost (data, group, start); 6572 6573 if (data->consider_all_candidates) 6574 { 6575 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 6576 { 6577 if (ci == cand->id || (start && ci == start->id)) 6578 continue; 6579 6580 cnd = data->vcands[ci]; 6581 6582 cp = get_group_iv_cost (data, group, cnd); 6583 if (!cp) 6584 continue; 6585 6586 iv_ca_set_cp (data, ivs, group, cp); 6587 acost = iv_ca_cost (ivs); 6588 6589 if (acost < best_cost) 6590 { 6591 best_cost = acost; 6592 new_cp = cp; 6593 } 6594 } 6595 } 6596 else 6597 { 6598 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi) 6599 { 6600 if (ci == cand->id || (start && ci == start->id)) 6601 continue; 6602 6603 cnd = data->vcands[ci]; 6604 6605 cp = get_group_iv_cost (data, group, cnd); 6606 if (!cp) 6607 continue; 6608 6609 iv_ca_set_cp (data, ivs, group, cp); 6610 acost = iv_ca_cost (ivs); 6611 6612 if (acost < best_cost) 6613 { 6614 best_cost = acost; 6615 new_cp = cp; 6616 } 6617 } 6618 } 6619 /* Restore to old cp for use. */ 6620 iv_ca_set_cp (data, ivs, group, old_cp); 6621 6622 if (!new_cp) 6623 { 6624 iv_ca_delta_free (delta); 6625 return infinite_cost; 6626 } 6627 6628 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta); 6629 } 6630 6631 iv_ca_delta_commit (data, ivs, *delta, true); 6632 cost = iv_ca_cost (ivs); 6633 iv_ca_delta_commit (data, ivs, *delta, false); 6634 6635 return cost; 6636 } 6637 6638 /* Try optimizing the set of candidates IVS by removing candidates different 6639 from to EXCEPT_CAND from it. Return cost of the new set, and store 6640 differences in DELTA. */ 6641 6642 static comp_cost 6643 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs, 6644 struct iv_cand *except_cand, struct iv_ca_delta **delta) 6645 { 6646 bitmap_iterator bi; 6647 struct iv_ca_delta *act_delta, *best_delta; 6648 unsigned i; 6649 comp_cost best_cost, acost; 6650 struct iv_cand *cand; 6651 6652 best_delta = NULL; 6653 best_cost = iv_ca_cost (ivs); 6654 6655 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6656 { 6657 cand = data->vcands[i]; 6658 6659 if (cand == except_cand) 6660 continue; 6661 6662 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta); 6663 6664 if (acost < best_cost) 6665 { 6666 best_cost = acost; 6667 iv_ca_delta_free (&best_delta); 6668 best_delta = act_delta; 6669 } 6670 else 6671 iv_ca_delta_free (&act_delta); 6672 } 6673 6674 if (!best_delta) 6675 { 6676 *delta = NULL; 6677 return best_cost; 6678 } 6679 6680 /* Recurse to possibly remove other unnecessary ivs. */ 6681 iv_ca_delta_commit (data, ivs, best_delta, true); 6682 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 6683 iv_ca_delta_commit (data, ivs, best_delta, false); 6684 *delta = iv_ca_delta_join (best_delta, *delta); 6685 return best_cost; 6686 } 6687 6688 /* Check if CAND_IDX is a candidate other than OLD_CAND and has 6689 cheaper local cost for GROUP than BEST_CP. Return pointer to 6690 the corresponding cost_pair, otherwise just return BEST_CP. */ 6691 6692 static class cost_pair* 6693 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group, 6694 unsigned int cand_idx, struct iv_cand *old_cand, 6695 class cost_pair *best_cp) 6696 { 6697 struct iv_cand *cand; 6698 class cost_pair *cp; 6699 6700 gcc_assert (old_cand != NULL && best_cp != NULL); 6701 if (cand_idx == old_cand->id) 6702 return best_cp; 6703 6704 cand = data->vcands[cand_idx]; 6705 cp = get_group_iv_cost (data, group, cand); 6706 if (cp != NULL && cheaper_cost_pair (cp, best_cp)) 6707 return cp; 6708 6709 return best_cp; 6710 } 6711 6712 /* Try breaking local optimal fixed-point for IVS by replacing candidates 6713 which are used by more than one iv uses. For each of those candidates, 6714 this function tries to represent iv uses under that candidate using 6715 other ones with lower local cost, then tries to prune the new set. 6716 If the new set has lower cost, It returns the new cost after recording 6717 candidate replacement in list DELTA. */ 6718 6719 static comp_cost 6720 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs, 6721 struct iv_ca_delta **delta) 6722 { 6723 bitmap_iterator bi, bj; 6724 unsigned int i, j, k; 6725 struct iv_cand *cand; 6726 comp_cost orig_cost, acost; 6727 struct iv_ca_delta *act_delta, *tmp_delta; 6728 class cost_pair *old_cp, *best_cp = NULL; 6729 6730 *delta = NULL; 6731 orig_cost = iv_ca_cost (ivs); 6732 6733 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6734 { 6735 if (ivs->n_cand_uses[i] == 1 6736 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND) 6737 continue; 6738 6739 cand = data->vcands[i]; 6740 6741 act_delta = NULL; 6742 /* Represent uses under current candidate using other ones with 6743 lower local cost. */ 6744 for (j = 0; j < ivs->upto; j++) 6745 { 6746 struct iv_group *group = data->vgroups[j]; 6747 old_cp = iv_ca_cand_for_group (ivs, group); 6748 6749 if (old_cp->cand != cand) 6750 continue; 6751 6752 best_cp = old_cp; 6753 if (data->consider_all_candidates) 6754 for (k = 0; k < data->vcands.length (); k++) 6755 best_cp = cheaper_cost_with_cand (data, group, k, 6756 old_cp->cand, best_cp); 6757 else 6758 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj) 6759 best_cp = cheaper_cost_with_cand (data, group, k, 6760 old_cp->cand, best_cp); 6761 6762 if (best_cp == old_cp) 6763 continue; 6764 6765 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta); 6766 } 6767 /* No need for further prune. */ 6768 if (!act_delta) 6769 continue; 6770 6771 /* Prune the new candidate set. */ 6772 iv_ca_delta_commit (data, ivs, act_delta, true); 6773 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta); 6774 iv_ca_delta_commit (data, ivs, act_delta, false); 6775 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6776 6777 if (acost < orig_cost) 6778 { 6779 *delta = act_delta; 6780 return acost; 6781 } 6782 else 6783 iv_ca_delta_free (&act_delta); 6784 } 6785 6786 return orig_cost; 6787 } 6788 6789 /* Tries to extend the sets IVS in the best possible way in order to 6790 express the GROUP. If ORIGINALP is true, prefer candidates from 6791 the original set of IVs, otherwise favor important candidates not 6792 based on any memory object. */ 6793 6794 static bool 6795 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs, 6796 struct iv_group *group, bool originalp) 6797 { 6798 comp_cost best_cost, act_cost; 6799 unsigned i; 6800 bitmap_iterator bi; 6801 struct iv_cand *cand; 6802 struct iv_ca_delta *best_delta = NULL, *act_delta; 6803 class cost_pair *cp; 6804 6805 iv_ca_add_group (data, ivs, group); 6806 best_cost = iv_ca_cost (ivs); 6807 cp = iv_ca_cand_for_group (ivs, group); 6808 if (cp) 6809 { 6810 best_delta = iv_ca_delta_add (group, NULL, cp, NULL); 6811 iv_ca_set_no_cp (data, ivs, group); 6812 } 6813 6814 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise 6815 first try important candidates not based on any memory object. Only if 6816 this fails, try the specific ones. Rationale -- in loops with many 6817 variables the best choice often is to use just one generic biv. If we 6818 added here many ivs specific to the uses, the optimization algorithm later 6819 would be likely to get stuck in a local minimum, thus causing us to create 6820 too many ivs. The approach from few ivs to more seems more likely to be 6821 successful -- starting from few ivs, replacing an expensive use by a 6822 specific iv should always be a win. */ 6823 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi) 6824 { 6825 cand = data->vcands[i]; 6826 6827 if (originalp && cand->pos !=IP_ORIGINAL) 6828 continue; 6829 6830 if (!originalp && cand->iv->base_object != NULL_TREE) 6831 continue; 6832 6833 if (iv_ca_cand_used_p (ivs, cand)) 6834 continue; 6835 6836 cp = get_group_iv_cost (data, group, cand); 6837 if (!cp) 6838 continue; 6839 6840 iv_ca_set_cp (data, ivs, group, cp); 6841 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, 6842 true); 6843 iv_ca_set_no_cp (data, ivs, group); 6844 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta); 6845 6846 if (act_cost < best_cost) 6847 { 6848 best_cost = act_cost; 6849 6850 iv_ca_delta_free (&best_delta); 6851 best_delta = act_delta; 6852 } 6853 else 6854 iv_ca_delta_free (&act_delta); 6855 } 6856 6857 if (best_cost.infinite_cost_p ()) 6858 { 6859 for (i = 0; i < group->n_map_members; i++) 6860 { 6861 cp = group->cost_map + i; 6862 cand = cp->cand; 6863 if (!cand) 6864 continue; 6865 6866 /* Already tried this. */ 6867 if (cand->important) 6868 { 6869 if (originalp && cand->pos == IP_ORIGINAL) 6870 continue; 6871 if (!originalp && cand->iv->base_object == NULL_TREE) 6872 continue; 6873 } 6874 6875 if (iv_ca_cand_used_p (ivs, cand)) 6876 continue; 6877 6878 act_delta = NULL; 6879 iv_ca_set_cp (data, ivs, group, cp); 6880 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); 6881 iv_ca_set_no_cp (data, ivs, group); 6882 act_delta = iv_ca_delta_add (group, 6883 iv_ca_cand_for_group (ivs, group), 6884 cp, act_delta); 6885 6886 if (act_cost < best_cost) 6887 { 6888 best_cost = act_cost; 6889 6890 if (best_delta) 6891 iv_ca_delta_free (&best_delta); 6892 best_delta = act_delta; 6893 } 6894 else 6895 iv_ca_delta_free (&act_delta); 6896 } 6897 } 6898 6899 iv_ca_delta_commit (data, ivs, best_delta, true); 6900 iv_ca_delta_free (&best_delta); 6901 6902 return !best_cost.infinite_cost_p (); 6903 } 6904 6905 /* Finds an initial assignment of candidates to uses. */ 6906 6907 static class iv_ca * 6908 get_initial_solution (struct ivopts_data *data, bool originalp) 6909 { 6910 unsigned i; 6911 class iv_ca *ivs = iv_ca_new (data); 6912 6913 for (i = 0; i < data->vgroups.length (); i++) 6914 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp)) 6915 { 6916 iv_ca_free (&ivs); 6917 return NULL; 6918 } 6919 6920 return ivs; 6921 } 6922 6923 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P 6924 points to a bool variable, this function tries to break local 6925 optimal fixed-point by replacing candidates in IVS if it's true. */ 6926 6927 static bool 6928 try_improve_iv_set (struct ivopts_data *data, 6929 class iv_ca *ivs, bool *try_replace_p) 6930 { 6931 unsigned i, n_ivs; 6932 comp_cost acost, best_cost = iv_ca_cost (ivs); 6933 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 6934 struct iv_cand *cand; 6935 6936 /* Try extending the set of induction variables by one. */ 6937 for (i = 0; i < data->vcands.length (); i++) 6938 { 6939 cand = data->vcands[i]; 6940 6941 if (iv_ca_cand_used_p (ivs, cand)) 6942 continue; 6943 6944 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); 6945 if (!act_delta) 6946 continue; 6947 6948 /* If we successfully added the candidate and the set is small enough, 6949 try optimizing it by removing other candidates. */ 6950 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 6951 { 6952 iv_ca_delta_commit (data, ivs, act_delta, true); 6953 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 6954 iv_ca_delta_commit (data, ivs, act_delta, false); 6955 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6956 } 6957 6958 if (acost < best_cost) 6959 { 6960 best_cost = acost; 6961 iv_ca_delta_free (&best_delta); 6962 best_delta = act_delta; 6963 } 6964 else 6965 iv_ca_delta_free (&act_delta); 6966 } 6967 6968 if (!best_delta) 6969 { 6970 /* Try removing the candidates from the set instead. */ 6971 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 6972 6973 if (!best_delta && *try_replace_p) 6974 { 6975 *try_replace_p = false; 6976 /* So far candidate selecting algorithm tends to choose fewer IVs 6977 so that it can handle cases in which loops have many variables 6978 but the best choice is often to use only one general biv. One 6979 weakness is it can't handle opposite cases, in which different 6980 candidates should be chosen with respect to each use. To solve 6981 the problem, we replace candidates in a manner described by the 6982 comments of iv_ca_replace, thus give general algorithm a chance 6983 to break local optimal fixed-point in these cases. */ 6984 best_cost = iv_ca_replace (data, ivs, &best_delta); 6985 } 6986 6987 if (!best_delta) 6988 return false; 6989 } 6990 6991 iv_ca_delta_commit (data, ivs, best_delta, true); 6992 iv_ca_delta_free (&best_delta); 6993 return best_cost == iv_ca_cost (ivs); 6994 } 6995 6996 /* Attempts to find the optimal set of induction variables. We do simple 6997 greedy heuristic -- we try to replace at most one candidate in the selected 6998 solution and remove the unused ivs while this improves the cost. */ 6999 7000 static class iv_ca * 7001 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) 7002 { 7003 class iv_ca *set; 7004 bool try_replace_p = true; 7005 7006 /* Get the initial solution. */ 7007 set = get_initial_solution (data, originalp); 7008 if (!set) 7009 { 7010 if (dump_file && (dump_flags & TDF_DETAILS)) 7011 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 7012 return NULL; 7013 } 7014 7015 if (dump_file && (dump_flags & TDF_DETAILS)) 7016 { 7017 fprintf (dump_file, "Initial set of candidates:\n"); 7018 iv_ca_dump (data, dump_file, set); 7019 } 7020 7021 while (try_improve_iv_set (data, set, &try_replace_p)) 7022 { 7023 if (dump_file && (dump_flags & TDF_DETAILS)) 7024 { 7025 fprintf (dump_file, "Improved to:\n"); 7026 iv_ca_dump (data, dump_file, set); 7027 } 7028 } 7029 7030 /* If the set has infinite_cost, it can't be optimal. */ 7031 if (iv_ca_cost (set).infinite_cost_p ()) 7032 { 7033 if (dump_file && (dump_flags & TDF_DETAILS)) 7034 fprintf (dump_file, 7035 "Overflow to infinite cost in try_improve_iv_set.\n"); 7036 iv_ca_free (&set); 7037 } 7038 return set; 7039 } 7040 7041 static class iv_ca * 7042 find_optimal_iv_set (struct ivopts_data *data) 7043 { 7044 unsigned i; 7045 comp_cost cost, origcost; 7046 class iv_ca *set, *origset; 7047 7048 /* Determine the cost based on a strategy that starts with original IVs, 7049 and try again using a strategy that prefers candidates not based 7050 on any IVs. */ 7051 origset = find_optimal_iv_set_1 (data, true); 7052 set = find_optimal_iv_set_1 (data, false); 7053 7054 if (!origset && !set) 7055 return NULL; 7056 7057 origcost = origset ? iv_ca_cost (origset) : infinite_cost; 7058 cost = set ? iv_ca_cost (set) : infinite_cost; 7059 7060 if (dump_file && (dump_flags & TDF_DETAILS)) 7061 { 7062 fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n", 7063 origcost.cost, origcost.complexity); 7064 fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n", 7065 cost.cost, cost.complexity); 7066 } 7067 7068 /* Choose the one with the best cost. */ 7069 if (origcost <= cost) 7070 { 7071 if (set) 7072 iv_ca_free (&set); 7073 set = origset; 7074 } 7075 else if (origset) 7076 iv_ca_free (&origset); 7077 7078 for (i = 0; i < data->vgroups.length (); i++) 7079 { 7080 struct iv_group *group = data->vgroups[i]; 7081 group->selected = iv_ca_cand_for_group (set, group)->cand; 7082 } 7083 7084 return set; 7085 } 7086 7087 /* Creates a new induction variable corresponding to CAND. */ 7088 7089 static void 7090 create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 7091 { 7092 gimple_stmt_iterator incr_pos; 7093 tree base; 7094 struct iv_use *use; 7095 struct iv_group *group; 7096 bool after = false; 7097 7098 gcc_assert (cand->iv != NULL); 7099 7100 switch (cand->pos) 7101 { 7102 case IP_NORMAL: 7103 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop)); 7104 break; 7105 7106 case IP_END: 7107 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 7108 after = true; 7109 if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos))) 7110 { 7111 edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header); 7112 incr_pos = gsi_after_labels (split_edge (e)); 7113 after = false; 7114 } 7115 break; 7116 7117 case IP_AFTER_USE: 7118 after = true; 7119 /* fall through */ 7120 case IP_BEFORE_USE: 7121 incr_pos = gsi_for_stmt (cand->incremented_at); 7122 break; 7123 7124 case IP_ORIGINAL: 7125 /* Mark that the iv is preserved. */ 7126 name_info (data, cand->var_before)->preserve_biv = true; 7127 name_info (data, cand->var_after)->preserve_biv = true; 7128 7129 /* Rewrite the increment so that it uses var_before directly. */ 7130 use = find_interesting_uses_op (data, cand->var_after); 7131 group = data->vgroups[use->group_id]; 7132 group->selected = cand; 7133 return; 7134 } 7135 7136 gimple_add_tmp_var (cand->var_before); 7137 7138 base = unshare_expr (cand->iv->base); 7139 7140 create_iv (base, unshare_expr (cand->iv->step), 7141 cand->var_before, data->current_loop, 7142 &incr_pos, after, &cand->var_before, &cand->var_after); 7143 } 7144 7145 /* Creates new induction variables described in SET. */ 7146 7147 static void 7148 create_new_ivs (struct ivopts_data *data, class iv_ca *set) 7149 { 7150 unsigned i; 7151 struct iv_cand *cand; 7152 bitmap_iterator bi; 7153 7154 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 7155 { 7156 cand = data->vcands[i]; 7157 create_new_iv (data, cand); 7158 } 7159 7160 if (dump_file && (dump_flags & TDF_DETAILS)) 7161 { 7162 fprintf (dump_file, "Selected IV set for loop %d", 7163 data->current_loop->num); 7164 if (data->loop_loc != UNKNOWN_LOCATION) 7165 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 7166 LOCATION_LINE (data->loop_loc)); 7167 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters", 7168 avg_loop_niter (data->current_loop)); 7169 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands)); 7170 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 7171 { 7172 cand = data->vcands[i]; 7173 dump_cand (dump_file, cand); 7174 } 7175 fprintf (dump_file, "\n"); 7176 } 7177 } 7178 7179 /* Rewrites USE (definition of iv used in a nonlinear expression) 7180 using candidate CAND. */ 7181 7182 static void 7183 rewrite_use_nonlinear_expr (struct ivopts_data *data, 7184 struct iv_use *use, struct iv_cand *cand) 7185 { 7186 gassign *ass; 7187 gimple_stmt_iterator bsi; 7188 tree comp, type = get_use_type (use), tgt; 7189 7190 /* An important special case -- if we are asked to express value of 7191 the original iv by itself, just exit; there is no need to 7192 introduce a new computation (that might also need casting the 7193 variable to unsigned and back). */ 7194 if (cand->pos == IP_ORIGINAL 7195 && cand->incremented_at == use->stmt) 7196 { 7197 tree op = NULL_TREE; 7198 enum tree_code stmt_code; 7199 7200 gcc_assert (is_gimple_assign (use->stmt)); 7201 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 7202 7203 /* Check whether we may leave the computation unchanged. 7204 This is the case only if it does not rely on other 7205 computations in the loop -- otherwise, the computation 7206 we rely upon may be removed in remove_unused_ivs, 7207 thus leading to ICE. */ 7208 stmt_code = gimple_assign_rhs_code (use->stmt); 7209 if (stmt_code == PLUS_EXPR 7210 || stmt_code == MINUS_EXPR 7211 || stmt_code == POINTER_PLUS_EXPR) 7212 { 7213 if (gimple_assign_rhs1 (use->stmt) == cand->var_before) 7214 op = gimple_assign_rhs2 (use->stmt); 7215 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before) 7216 op = gimple_assign_rhs1 (use->stmt); 7217 } 7218 7219 if (op != NULL_TREE) 7220 { 7221 if (expr_invariant_in_loop_p (data->current_loop, op)) 7222 return; 7223 if (TREE_CODE (op) == SSA_NAME) 7224 { 7225 struct iv *iv = get_iv (data, op); 7226 if (iv != NULL && integer_zerop (iv->step)) 7227 return; 7228 } 7229 } 7230 } 7231 7232 switch (gimple_code (use->stmt)) 7233 { 7234 case GIMPLE_PHI: 7235 tgt = PHI_RESULT (use->stmt); 7236 7237 /* If we should keep the biv, do not replace it. */ 7238 if (name_info (data, tgt)->preserve_biv) 7239 return; 7240 7241 bsi = gsi_after_labels (gimple_bb (use->stmt)); 7242 break; 7243 7244 case GIMPLE_ASSIGN: 7245 tgt = gimple_assign_lhs (use->stmt); 7246 bsi = gsi_for_stmt (use->stmt); 7247 break; 7248 7249 default: 7250 gcc_unreachable (); 7251 } 7252 7253 aff_tree aff_inv, aff_var; 7254 if (!get_computation_aff_1 (data->current_loop, use->stmt, 7255 use, cand, &aff_inv, &aff_var)) 7256 gcc_unreachable (); 7257 7258 unshare_aff_combination (&aff_inv); 7259 unshare_aff_combination (&aff_var); 7260 /* Prefer CSE opportunity than loop invariant by adding offset at last 7261 so that iv_uses have different offsets can be CSEed. */ 7262 poly_widest_int offset = aff_inv.offset; 7263 aff_inv.offset = 0; 7264 7265 gimple_seq stmt_list = NULL, seq = NULL; 7266 tree comp_op1 = aff_combination_to_tree (&aff_inv); 7267 tree comp_op2 = aff_combination_to_tree (&aff_var); 7268 gcc_assert (comp_op1 && comp_op2); 7269 7270 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL); 7271 gimple_seq_add_seq (&stmt_list, seq); 7272 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL); 7273 gimple_seq_add_seq (&stmt_list, seq); 7274 7275 if (POINTER_TYPE_P (TREE_TYPE (comp_op2))) 7276 std::swap (comp_op1, comp_op2); 7277 7278 if (POINTER_TYPE_P (TREE_TYPE (comp_op1))) 7279 { 7280 comp = fold_build_pointer_plus (comp_op1, 7281 fold_convert (sizetype, comp_op2)); 7282 comp = fold_build_pointer_plus (comp, 7283 wide_int_to_tree (sizetype, offset)); 7284 } 7285 else 7286 { 7287 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1, 7288 fold_convert (TREE_TYPE (comp_op1), comp_op2)); 7289 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp, 7290 wide_int_to_tree (TREE_TYPE (comp_op1), offset)); 7291 } 7292 7293 comp = fold_convert (type, comp); 7294 if (!valid_gimple_rhs_p (comp) 7295 || (gimple_code (use->stmt) != GIMPLE_PHI 7296 /* We can't allow re-allocating the stmt as it might be pointed 7297 to still. */ 7298 && (get_gimple_rhs_num_ops (TREE_CODE (comp)) 7299 >= gimple_num_ops (gsi_stmt (bsi))))) 7300 { 7301 comp = force_gimple_operand (comp, &seq, true, NULL); 7302 gimple_seq_add_seq (&stmt_list, seq); 7303 if (POINTER_TYPE_P (TREE_TYPE (tgt))) 7304 { 7305 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt)); 7306 /* As this isn't a plain copy we have to reset alignment 7307 information. */ 7308 if (SSA_NAME_PTR_INFO (comp)) 7309 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp)); 7310 } 7311 } 7312 7313 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT); 7314 if (gimple_code (use->stmt) == GIMPLE_PHI) 7315 { 7316 ass = gimple_build_assign (tgt, comp); 7317 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 7318 7319 bsi = gsi_for_stmt (use->stmt); 7320 remove_phi_node (&bsi, false); 7321 } 7322 else 7323 { 7324 gimple_assign_set_rhs_from_tree (&bsi, comp); 7325 use->stmt = gsi_stmt (bsi); 7326 } 7327 } 7328 7329 /* Performs a peephole optimization to reorder the iv update statement with 7330 a mem ref to enable instruction combining in later phases. The mem ref uses 7331 the iv value before the update, so the reordering transformation requires 7332 adjustment of the offset. CAND is the selected IV_CAND. 7333 7334 Example: 7335 7336 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset 7337 iv2 = iv1 + 1; 7338 7339 if (t < val) (1) 7340 goto L; 7341 goto Head; 7342 7343 7344 directly propagating t over to (1) will introduce overlapping live range 7345 thus increase register pressure. This peephole transform it into: 7346 7347 7348 iv2 = iv1 + 1; 7349 t = MEM_REF (base, iv2, 8, 8); 7350 if (t < val) 7351 goto L; 7352 goto Head; 7353 */ 7354 7355 static void 7356 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use) 7357 { 7358 tree var_after; 7359 gimple *iv_update, *stmt; 7360 basic_block bb; 7361 gimple_stmt_iterator gsi, gsi_iv; 7362 7363 if (cand->pos != IP_NORMAL) 7364 return; 7365 7366 var_after = cand->var_after; 7367 iv_update = SSA_NAME_DEF_STMT (var_after); 7368 7369 bb = gimple_bb (iv_update); 7370 gsi = gsi_last_nondebug_bb (bb); 7371 stmt = gsi_stmt (gsi); 7372 7373 /* Only handle conditional statement for now. */ 7374 if (gimple_code (stmt) != GIMPLE_COND) 7375 return; 7376 7377 gsi_prev_nondebug (&gsi); 7378 stmt = gsi_stmt (gsi); 7379 if (stmt != iv_update) 7380 return; 7381 7382 gsi_prev_nondebug (&gsi); 7383 if (gsi_end_p (gsi)) 7384 return; 7385 7386 stmt = gsi_stmt (gsi); 7387 if (gimple_code (stmt) != GIMPLE_ASSIGN) 7388 return; 7389 7390 if (stmt != use->stmt) 7391 return; 7392 7393 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 7394 return; 7395 7396 if (dump_file && (dump_flags & TDF_DETAILS)) 7397 { 7398 fprintf (dump_file, "Reordering \n"); 7399 print_gimple_stmt (dump_file, iv_update, 0); 7400 print_gimple_stmt (dump_file, use->stmt, 0); 7401 fprintf (dump_file, "\n"); 7402 } 7403 7404 gsi = gsi_for_stmt (use->stmt); 7405 gsi_iv = gsi_for_stmt (iv_update); 7406 gsi_move_before (&gsi_iv, &gsi); 7407 7408 cand->pos = IP_BEFORE_USE; 7409 cand->incremented_at = use->stmt; 7410 } 7411 7412 /* Return the alias pointer type that should be used for a MEM_REF 7413 associated with USE, which has type USE_PTR_ADDRESS. */ 7414 7415 static tree 7416 get_alias_ptr_type_for_ptr_address (iv_use *use) 7417 { 7418 gcall *call = as_a <gcall *> (use->stmt); 7419 switch (gimple_call_internal_fn (call)) 7420 { 7421 case IFN_MASK_LOAD: 7422 case IFN_MASK_STORE: 7423 case IFN_MASK_LOAD_LANES: 7424 case IFN_MASK_STORE_LANES: 7425 /* The second argument contains the correct alias type. */ 7426 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0)); 7427 return TREE_TYPE (gimple_call_arg (call, 1)); 7428 7429 default: 7430 gcc_unreachable (); 7431 } 7432 } 7433 7434 7435 /* Rewrites USE (address that is an iv) using candidate CAND. */ 7436 7437 static void 7438 rewrite_use_address (struct ivopts_data *data, 7439 struct iv_use *use, struct iv_cand *cand) 7440 { 7441 aff_tree aff; 7442 bool ok; 7443 7444 adjust_iv_update_pos (cand, use); 7445 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff); 7446 gcc_assert (ok); 7447 unshare_aff_combination (&aff); 7448 7449 /* To avoid undefined overflow problems, all IV candidates use unsigned 7450 integer types. The drawback is that this makes it impossible for 7451 create_mem_ref to distinguish an IV that is based on a memory object 7452 from one that represents simply an offset. 7453 7454 To work around this problem, we pass a hint to create_mem_ref that 7455 indicates which variable (if any) in aff is an IV based on a memory 7456 object. Note that we only consider the candidate. If this is not 7457 based on an object, the base of the reference is in some subexpression 7458 of the use -- but these will use pointer types, so they are recognized 7459 by the create_mem_ref heuristics anyway. */ 7460 tree iv = var_at_stmt (data->current_loop, cand, use->stmt); 7461 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE; 7462 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 7463 tree type = use->mem_type; 7464 tree alias_ptr_type; 7465 if (use->type == USE_PTR_ADDRESS) 7466 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use); 7467 else 7468 { 7469 gcc_assert (type == TREE_TYPE (*use->op_p)); 7470 unsigned int align = get_object_alignment (*use->op_p); 7471 if (align != TYPE_ALIGN (type)) 7472 type = build_aligned_type (type, align); 7473 alias_ptr_type = reference_alias_ptr_type (*use->op_p); 7474 } 7475 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type, 7476 iv, base_hint, data->speed); 7477 7478 if (use->type == USE_PTR_ADDRESS) 7479 { 7480 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref); 7481 ref = fold_convert (get_use_type (use), ref); 7482 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE, 7483 true, GSI_SAME_STMT); 7484 } 7485 else 7486 copy_ref_info (ref, *use->op_p); 7487 7488 *use->op_p = ref; 7489 } 7490 7491 /* Rewrites USE (the condition such that one of the arguments is an iv) using 7492 candidate CAND. */ 7493 7494 static void 7495 rewrite_use_compare (struct ivopts_data *data, 7496 struct iv_use *use, struct iv_cand *cand) 7497 { 7498 tree comp, op, bound; 7499 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 7500 enum tree_code compare; 7501 struct iv_group *group = data->vgroups[use->group_id]; 7502 class cost_pair *cp = get_group_iv_cost (data, group, cand); 7503 7504 bound = cp->value; 7505 if (bound) 7506 { 7507 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 7508 tree var_type = TREE_TYPE (var); 7509 gimple_seq stmts; 7510 7511 if (dump_file && (dump_flags & TDF_DETAILS)) 7512 { 7513 fprintf (dump_file, "Replacing exit test: "); 7514 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); 7515 } 7516 compare = cp->comp; 7517 bound = unshare_expr (fold_convert (var_type, bound)); 7518 op = force_gimple_operand (bound, &stmts, true, NULL_TREE); 7519 if (stmts) 7520 gsi_insert_seq_on_edge_immediate ( 7521 loop_preheader_edge (data->current_loop), 7522 stmts); 7523 7524 gcond *cond_stmt = as_a <gcond *> (use->stmt); 7525 gimple_cond_set_lhs (cond_stmt, var); 7526 gimple_cond_set_code (cond_stmt, compare); 7527 gimple_cond_set_rhs (cond_stmt, op); 7528 return; 7529 } 7530 7531 /* The induction variable elimination failed; just express the original 7532 giv. */ 7533 comp = get_computation_at (data->current_loop, use->stmt, use, cand); 7534 gcc_assert (comp != NULL_TREE); 7535 gcc_assert (use->op_p != NULL); 7536 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true, 7537 SSA_NAME_VAR (*use->op_p), 7538 true, GSI_SAME_STMT); 7539 } 7540 7541 /* Rewrite the groups using the selected induction variables. */ 7542 7543 static void 7544 rewrite_groups (struct ivopts_data *data) 7545 { 7546 unsigned i, j; 7547 7548 for (i = 0; i < data->vgroups.length (); i++) 7549 { 7550 struct iv_group *group = data->vgroups[i]; 7551 struct iv_cand *cand = group->selected; 7552 7553 gcc_assert (cand); 7554 7555 if (group->type == USE_NONLINEAR_EXPR) 7556 { 7557 for (j = 0; j < group->vuses.length (); j++) 7558 { 7559 rewrite_use_nonlinear_expr (data, group->vuses[j], cand); 7560 update_stmt (group->vuses[j]->stmt); 7561 } 7562 } 7563 else if (address_p (group->type)) 7564 { 7565 for (j = 0; j < group->vuses.length (); j++) 7566 { 7567 rewrite_use_address (data, group->vuses[j], cand); 7568 update_stmt (group->vuses[j]->stmt); 7569 } 7570 } 7571 else 7572 { 7573 gcc_assert (group->type == USE_COMPARE); 7574 7575 for (j = 0; j < group->vuses.length (); j++) 7576 { 7577 rewrite_use_compare (data, group->vuses[j], cand); 7578 update_stmt (group->vuses[j]->stmt); 7579 } 7580 } 7581 } 7582 } 7583 7584 /* Removes the ivs that are not used after rewriting. */ 7585 7586 static void 7587 remove_unused_ivs (struct ivopts_data *data, bitmap toremove) 7588 { 7589 unsigned j; 7590 bitmap_iterator bi; 7591 7592 /* Figure out an order in which to release SSA DEFs so that we don't 7593 release something that we'd have to propagate into a debug stmt 7594 afterwards. */ 7595 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 7596 { 7597 struct version_info *info; 7598 7599 info = ver_info (data, j); 7600 if (info->iv 7601 && !integer_zerop (info->iv->step) 7602 && !info->inv_id 7603 && !info->iv->nonlin_use 7604 && !info->preserve_biv) 7605 { 7606 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name)); 7607 7608 tree def = info->iv->ssa_name; 7609 7610 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def)) 7611 { 7612 imm_use_iterator imm_iter; 7613 use_operand_p use_p; 7614 gimple *stmt; 7615 int count = 0; 7616 7617 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 7618 { 7619 if (!gimple_debug_bind_p (stmt)) 7620 continue; 7621 7622 /* We just want to determine whether to do nothing 7623 (count == 0), to substitute the computed 7624 expression into a single use of the SSA DEF by 7625 itself (count == 1), or to use a debug temp 7626 because the SSA DEF is used multiple times or as 7627 part of a larger expression (count > 1). */ 7628 count++; 7629 if (gimple_debug_bind_get_value (stmt) != def) 7630 count++; 7631 7632 if (count > 1) 7633 BREAK_FROM_IMM_USE_STMT (imm_iter); 7634 } 7635 7636 if (!count) 7637 continue; 7638 7639 struct iv_use dummy_use; 7640 struct iv_cand *best_cand = NULL, *cand; 7641 unsigned i, best_pref = 0, cand_pref; 7642 tree comp = NULL_TREE; 7643 7644 memset (&dummy_use, 0, sizeof (dummy_use)); 7645 dummy_use.iv = info->iv; 7646 for (i = 0; i < data->vgroups.length () && i < 64; i++) 7647 { 7648 cand = data->vgroups[i]->selected; 7649 if (cand == best_cand) 7650 continue; 7651 cand_pref = operand_equal_p (cand->iv->step, 7652 info->iv->step, 0) 7653 ? 4 : 0; 7654 cand_pref 7655 += TYPE_MODE (TREE_TYPE (cand->iv->base)) 7656 == TYPE_MODE (TREE_TYPE (info->iv->base)) 7657 ? 2 : 0; 7658 cand_pref 7659 += TREE_CODE (cand->iv->base) == INTEGER_CST 7660 ? 1 : 0; 7661 if (best_cand == NULL || best_pref < cand_pref) 7662 { 7663 tree this_comp 7664 = get_debug_computation_at (data->current_loop, 7665 SSA_NAME_DEF_STMT (def), 7666 &dummy_use, cand); 7667 if (this_comp) 7668 { 7669 best_cand = cand; 7670 best_pref = cand_pref; 7671 comp = this_comp; 7672 } 7673 } 7674 } 7675 7676 if (!best_cand) 7677 continue; 7678 7679 comp = unshare_expr (comp); 7680 if (count > 1) 7681 { 7682 tree vexpr = make_node (DEBUG_EXPR_DECL); 7683 DECL_ARTIFICIAL (vexpr) = 1; 7684 TREE_TYPE (vexpr) = TREE_TYPE (comp); 7685 if (SSA_NAME_VAR (def)) 7686 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def))); 7687 else 7688 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr))); 7689 gdebug *def_temp 7690 = gimple_build_debug_bind (vexpr, comp, NULL); 7691 gimple_stmt_iterator gsi; 7692 7693 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI) 7694 gsi = gsi_after_labels (gimple_bb 7695 (SSA_NAME_DEF_STMT (def))); 7696 else 7697 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def)); 7698 7699 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT); 7700 comp = vexpr; 7701 } 7702 7703 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 7704 { 7705 if (!gimple_debug_bind_p (stmt)) 7706 continue; 7707 7708 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 7709 SET_USE (use_p, comp); 7710 7711 update_stmt (stmt); 7712 } 7713 } 7714 } 7715 } 7716 } 7717 7718 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback 7719 for hash_map::traverse. */ 7720 7721 bool 7722 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *) 7723 { 7724 free (value); 7725 return true; 7726 } 7727 7728 /* Frees data allocated by the optimization of a single loop. */ 7729 7730 static void 7731 free_loop_data (struct ivopts_data *data) 7732 { 7733 unsigned i, j; 7734 bitmap_iterator bi; 7735 tree obj; 7736 7737 if (data->niters) 7738 { 7739 data->niters->traverse<void *, free_tree_niter_desc> (NULL); 7740 delete data->niters; 7741 data->niters = NULL; 7742 } 7743 7744 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 7745 { 7746 struct version_info *info; 7747 7748 info = ver_info (data, i); 7749 info->iv = NULL; 7750 info->has_nonlin_use = false; 7751 info->preserve_biv = false; 7752 info->inv_id = 0; 7753 } 7754 bitmap_clear (data->relevant); 7755 bitmap_clear (data->important_candidates); 7756 7757 for (i = 0; i < data->vgroups.length (); i++) 7758 { 7759 struct iv_group *group = data->vgroups[i]; 7760 7761 for (j = 0; j < group->vuses.length (); j++) 7762 free (group->vuses[j]); 7763 group->vuses.release (); 7764 7765 BITMAP_FREE (group->related_cands); 7766 for (j = 0; j < group->n_map_members; j++) 7767 { 7768 if (group->cost_map[j].inv_vars) 7769 BITMAP_FREE (group->cost_map[j].inv_vars); 7770 if (group->cost_map[j].inv_exprs) 7771 BITMAP_FREE (group->cost_map[j].inv_exprs); 7772 } 7773 7774 free (group->cost_map); 7775 free (group); 7776 } 7777 data->vgroups.truncate (0); 7778 7779 for (i = 0; i < data->vcands.length (); i++) 7780 { 7781 struct iv_cand *cand = data->vcands[i]; 7782 7783 if (cand->inv_vars) 7784 BITMAP_FREE (cand->inv_vars); 7785 if (cand->inv_exprs) 7786 BITMAP_FREE (cand->inv_exprs); 7787 free (cand); 7788 } 7789 data->vcands.truncate (0); 7790 7791 if (data->version_info_size < num_ssa_names) 7792 { 7793 data->version_info_size = 2 * num_ssa_names; 7794 free (data->version_info); 7795 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 7796 } 7797 7798 data->max_inv_var_id = 0; 7799 data->max_inv_expr_id = 0; 7800 7801 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj) 7802 SET_DECL_RTL (obj, NULL_RTX); 7803 7804 decl_rtl_to_reset.truncate (0); 7805 7806 data->inv_expr_tab->empty (); 7807 7808 data->iv_common_cand_tab->empty (); 7809 data->iv_common_cands.truncate (0); 7810 } 7811 7812 /* Finalizes data structures used by the iv optimization pass. LOOPS is the 7813 loop tree. */ 7814 7815 static void 7816 tree_ssa_iv_optimize_finalize (struct ivopts_data *data) 7817 { 7818 free_loop_data (data); 7819 free (data->version_info); 7820 BITMAP_FREE (data->relevant); 7821 BITMAP_FREE (data->important_candidates); 7822 7823 decl_rtl_to_reset.release (); 7824 data->vgroups.release (); 7825 data->vcands.release (); 7826 delete data->inv_expr_tab; 7827 data->inv_expr_tab = NULL; 7828 free_affine_expand_cache (&data->name_expansion_cache); 7829 if (data->base_object_map) 7830 delete data->base_object_map; 7831 delete data->iv_common_cand_tab; 7832 data->iv_common_cand_tab = NULL; 7833 data->iv_common_cands.release (); 7834 obstack_free (&data->iv_obstack, NULL); 7835 } 7836 7837 /* Returns true if the loop body BODY includes any function calls. */ 7838 7839 static bool 7840 loop_body_includes_call (basic_block *body, unsigned num_nodes) 7841 { 7842 gimple_stmt_iterator gsi; 7843 unsigned i; 7844 7845 for (i = 0; i < num_nodes; i++) 7846 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 7847 { 7848 gimple *stmt = gsi_stmt (gsi); 7849 if (is_gimple_call (stmt) 7850 && !gimple_call_internal_p (stmt) 7851 && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) 7852 return true; 7853 } 7854 return false; 7855 } 7856 7857 /* Determine cost scaling factor for basic blocks in loop. */ 7858 #define COST_SCALING_FACTOR_BOUND (20) 7859 7860 static void 7861 determine_scaling_factor (struct ivopts_data *data, basic_block *body) 7862 { 7863 int lfreq = data->current_loop->header->count.to_frequency (cfun); 7864 if (!data->speed || lfreq <= 0) 7865 return; 7866 7867 int max_freq = lfreq; 7868 for (unsigned i = 0; i < data->current_loop->num_nodes; i++) 7869 { 7870 body[i]->aux = (void *)(intptr_t) 1; 7871 if (max_freq < body[i]->count.to_frequency (cfun)) 7872 max_freq = body[i]->count.to_frequency (cfun); 7873 } 7874 if (max_freq > lfreq) 7875 { 7876 int divisor, factor; 7877 /* Check if scaling factor itself needs to be scaled by the bound. This 7878 is to avoid overflow when scaling cost according to profile info. */ 7879 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND) 7880 { 7881 divisor = max_freq; 7882 factor = COST_SCALING_FACTOR_BOUND; 7883 } 7884 else 7885 { 7886 divisor = lfreq; 7887 factor = 1; 7888 } 7889 for (unsigned i = 0; i < data->current_loop->num_nodes; i++) 7890 { 7891 int bfreq = body[i]->count.to_frequency (cfun); 7892 if (bfreq <= lfreq) 7893 continue; 7894 7895 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor); 7896 } 7897 } 7898 } 7899 7900 /* Find doloop comparison use and set its doloop_p on if found. */ 7901 7902 static bool 7903 find_doloop_use (struct ivopts_data *data) 7904 { 7905 struct loop *loop = data->current_loop; 7906 7907 for (unsigned i = 0; i < data->vgroups.length (); i++) 7908 { 7909 struct iv_group *group = data->vgroups[i]; 7910 if (group->type == USE_COMPARE) 7911 { 7912 gcc_assert (group->vuses.length () == 1); 7913 struct iv_use *use = group->vuses[0]; 7914 gimple *stmt = use->stmt; 7915 if (gimple_code (stmt) == GIMPLE_COND) 7916 { 7917 basic_block bb = gimple_bb (stmt); 7918 edge true_edge, false_edge; 7919 extract_true_false_edges_from_block (bb, &true_edge, &false_edge); 7920 /* This comparison is used for loop latch. Require latch is empty 7921 for now. */ 7922 if ((loop->latch == true_edge->dest 7923 || loop->latch == false_edge->dest) 7924 && empty_block_p (loop->latch)) 7925 { 7926 group->doloop_p = true; 7927 if (dump_file && (dump_flags & TDF_DETAILS)) 7928 { 7929 fprintf (dump_file, "Doloop cmp iv use: "); 7930 print_gimple_stmt (dump_file, stmt, TDF_DETAILS); 7931 } 7932 return true; 7933 } 7934 } 7935 } 7936 } 7937 7938 return false; 7939 } 7940 7941 /* For the targets which support doloop, to predict whether later RTL doloop 7942 transformation will perform on this loop, further detect the doloop use and 7943 mark the flag doloop_use_p if predicted. */ 7944 7945 void 7946 analyze_and_mark_doloop_use (struct ivopts_data *data) 7947 { 7948 data->doloop_use_p = false; 7949 7950 if (!flag_branch_on_count_reg) 7951 return; 7952 7953 if (!generic_predict_doloop_p (data)) 7954 return; 7955 7956 if (find_doloop_use (data)) 7957 { 7958 data->doloop_use_p = true; 7959 if (dump_file && (dump_flags & TDF_DETAILS)) 7960 { 7961 struct loop *loop = data->current_loop; 7962 fprintf (dump_file, 7963 "Predict loop %d can perform" 7964 " doloop optimization later.\n", 7965 loop->num); 7966 flow_loop_dump (loop, dump_file, NULL, 1); 7967 } 7968 } 7969 } 7970 7971 /* Optimizes the LOOP. Returns true if anything changed. */ 7972 7973 static bool 7974 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop, 7975 bitmap toremove) 7976 { 7977 bool changed = false; 7978 class iv_ca *iv_ca; 7979 edge exit = single_dom_exit (loop); 7980 basic_block *body; 7981 7982 gcc_assert (!data->niters); 7983 data->current_loop = loop; 7984 data->loop_loc = find_loop_location (loop).get_location_t (); 7985 data->speed = optimize_loop_for_speed_p (loop); 7986 7987 if (dump_file && (dump_flags & TDF_DETAILS)) 7988 { 7989 fprintf (dump_file, "Processing loop %d", loop->num); 7990 if (data->loop_loc != UNKNOWN_LOCATION) 7991 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 7992 LOCATION_LINE (data->loop_loc)); 7993 fprintf (dump_file, "\n"); 7994 7995 if (exit) 7996 { 7997 fprintf (dump_file, " single exit %d -> %d, exit condition ", 7998 exit->src->index, exit->dest->index); 7999 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM); 8000 fprintf (dump_file, "\n"); 8001 } 8002 8003 fprintf (dump_file, "\n"); 8004 } 8005 8006 body = get_loop_body (loop); 8007 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); 8008 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); 8009 8010 data->loop_single_exit_p 8011 = exit != NULL && loop_only_exit_p (loop, body, exit); 8012 8013 /* For each ssa name determines whether it behaves as an induction variable 8014 in some loop. */ 8015 if (!find_induction_variables (data)) 8016 goto finish; 8017 8018 /* Finds interesting uses (item 1). */ 8019 find_interesting_uses (data); 8020 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS) 8021 goto finish; 8022 8023 /* Determine cost scaling factor for basic blocks in loop. */ 8024 determine_scaling_factor (data, body); 8025 8026 /* Analyze doloop possibility and mark the doloop use if predicted. */ 8027 analyze_and_mark_doloop_use (data); 8028 8029 /* Finds candidates for the induction variables (item 2). */ 8030 find_iv_candidates (data); 8031 8032 /* Calculates the costs (item 3, part 1). */ 8033 determine_iv_costs (data); 8034 determine_group_iv_costs (data); 8035 determine_set_costs (data); 8036 8037 /* Find the optimal set of induction variables (item 3, part 2). */ 8038 iv_ca = find_optimal_iv_set (data); 8039 /* Cleanup basic block aux field. */ 8040 for (unsigned i = 0; i < data->current_loop->num_nodes; i++) 8041 body[i]->aux = NULL; 8042 if (!iv_ca) 8043 goto finish; 8044 changed = true; 8045 8046 /* Create the new induction variables (item 4, part 1). */ 8047 create_new_ivs (data, iv_ca); 8048 iv_ca_free (&iv_ca); 8049 8050 /* Rewrite the uses (item 4, part 2). */ 8051 rewrite_groups (data); 8052 8053 /* Remove the ivs that are unused after rewriting. */ 8054 remove_unused_ivs (data, toremove); 8055 8056 finish: 8057 free (body); 8058 free_loop_data (data); 8059 8060 return changed; 8061 } 8062 8063 /* Main entry point. Optimizes induction variables in loops. */ 8064 8065 void 8066 tree_ssa_iv_optimize (void) 8067 { 8068 class loop *loop; 8069 struct ivopts_data data; 8070 auto_bitmap toremove; 8071 8072 tree_ssa_iv_optimize_init (&data); 8073 8074 /* Optimize the loops starting with the innermost ones. */ 8075 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 8076 { 8077 if (!dbg_cnt (ivopts_loop)) 8078 continue; 8079 8080 if (dump_file && (dump_flags & TDF_DETAILS)) 8081 flow_loop_dump (loop, dump_file, NULL, 1); 8082 8083 tree_ssa_iv_optimize_loop (&data, loop, toremove); 8084 } 8085 8086 /* Remove eliminated IV defs. */ 8087 release_defs_bitset (toremove); 8088 8089 /* We have changed the structure of induction variables; it might happen 8090 that definitions in the scev database refer to some of them that were 8091 eliminated. */ 8092 scev_reset_htab (); 8093 /* Likewise niter and control-IV information. */ 8094 free_numbers_of_iterations_estimates (cfun); 8095 8096 tree_ssa_iv_optimize_finalize (&data); 8097 } 8098 8099 #include "gt-tree-ssa-loop-ivopts.h" 8100