1 /* Induction variable optimizations. 2 Copyright (C) 2003-2018 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* This pass tries to find the optimal set of induction variables for the loop. 21 It optimizes just the basic linear induction variables (although adding 22 support for other types should not be too hard). It includes the 23 optimizations commonly known as strength reduction, induction variable 24 coalescing and induction variable elimination. It does it in the 25 following steps: 26 27 1) The interesting uses of induction variables are found. This includes 28 29 -- uses of induction variables in non-linear expressions 30 -- addresses of arrays 31 -- comparisons of induction variables 32 33 Note the interesting uses are categorized and handled in group. 34 Generally, address type uses are grouped together if their iv bases 35 are different in constant offset. 36 37 2) Candidates for the induction variables are found. This includes 38 39 -- old induction variables 40 -- the variables defined by expressions derived from the "interesting 41 groups/uses" above 42 43 3) The optimal (w.r. to a cost function) set of variables is chosen. The 44 cost function assigns a cost to sets of induction variables and consists 45 of three parts: 46 47 -- The group/use costs. Each of the interesting groups/uses chooses 48 the best induction variable in the set and adds its cost to the sum. 49 The cost reflects the time spent on modifying the induction variables 50 value to be usable for the given purpose (adding base and offset for 51 arrays, etc.). 52 -- The variable costs. Each of the variables has a cost assigned that 53 reflects the costs associated with incrementing the value of the 54 variable. The original variables are somewhat preferred. 55 -- The set cost. Depending on the size of the set, extra cost may be 56 added to reflect register pressure. 57 58 All the costs are defined in a machine-specific way, using the target 59 hooks and machine descriptions to determine them. 60 61 4) The trees are transformed to use the new variables, the dead code is 62 removed. 63 64 All of this is done loop by loop. Doing it globally is theoretically 65 possible, it might give a better performance and it might enable us 66 to decide costs more precisely, but getting all the interactions right 67 would be complicated. */ 68 69 #include "config.h" 70 #include "system.h" 71 #include "coretypes.h" 72 #include "backend.h" 73 #include "rtl.h" 74 #include "tree.h" 75 #include "gimple.h" 76 #include "cfghooks.h" 77 #include "tree-pass.h" 78 #include "memmodel.h" 79 #include "tm_p.h" 80 #include "ssa.h" 81 #include "expmed.h" 82 #include "insn-config.h" 83 #include "emit-rtl.h" 84 #include "recog.h" 85 #include "cgraph.h" 86 #include "gimple-pretty-print.h" 87 #include "alias.h" 88 #include "fold-const.h" 89 #include "stor-layout.h" 90 #include "tree-eh.h" 91 #include "gimplify.h" 92 #include "gimple-iterator.h" 93 #include "gimplify-me.h" 94 #include "tree-cfg.h" 95 #include "tree-ssa-loop-ivopts.h" 96 #include "tree-ssa-loop-manip.h" 97 #include "tree-ssa-loop-niter.h" 98 #include "tree-ssa-loop.h" 99 #include "explow.h" 100 #include "expr.h" 101 #include "tree-dfa.h" 102 #include "tree-ssa.h" 103 #include "cfgloop.h" 104 #include "tree-scalar-evolution.h" 105 #include "params.h" 106 #include "tree-affine.h" 107 #include "tree-ssa-propagate.h" 108 #include "tree-ssa-address.h" 109 #include "builtins.h" 110 #include "tree-vectorizer.h" 111 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the 113 cost of different addressing modes. This should be moved to a TBD 114 interface between the GIMPLE and RTL worlds. */ 115 116 /* The infinite cost. */ 117 #define INFTY 10000000 118 119 /* Returns the expected number of loop iterations for LOOP. 120 The average trip count is computed from profile data if it 121 exists. */ 122 123 static inline HOST_WIDE_INT 124 avg_loop_niter (struct loop *loop) 125 { 126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); 127 if (niter == -1) 128 { 129 niter = likely_max_stmt_executions_int (loop); 130 131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER)) 132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER); 133 } 134 135 return niter; 136 } 137 138 struct iv_use; 139 140 /* Representation of the induction variable. */ 141 struct iv 142 { 143 tree base; /* Initial value of the iv. */ 144 tree base_object; /* A memory object to that the induction variable points. */ 145 tree step; /* Step of the iv (constant only). */ 146 tree ssa_name; /* The ssa name with the value. */ 147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */ 148 bool biv_p; /* Is it a biv? */ 149 bool no_overflow; /* True if the iv doesn't overflow. */ 150 bool have_address_use;/* For biv, indicate if it's used in any address 151 type use. */ 152 }; 153 154 /* Per-ssa version information (induction variable descriptions, etc.). */ 155 struct version_info 156 { 157 tree name; /* The ssa name. */ 158 struct iv *iv; /* Induction variable description. */ 159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 160 an expression that is not an induction variable. */ 161 bool preserve_biv; /* For the original biv, whether to preserve it. */ 162 unsigned inv_id; /* Id of an invariant. */ 163 }; 164 165 /* Types of uses. */ 166 enum use_type 167 { 168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 169 USE_REF_ADDRESS, /* Use is an address for an explicit memory 170 reference. */ 171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in 172 cases where the expansion of the function 173 will turn the argument into a normal address. */ 174 USE_COMPARE /* Use is a compare. */ 175 }; 176 177 /* Cost of a computation. */ 178 struct comp_cost 179 { 180 comp_cost (): cost (0), complexity (0), scratch (0) 181 {} 182 183 comp_cost (int cost, unsigned complexity, int scratch = 0) 184 : cost (cost), complexity (complexity), scratch (scratch) 185 {} 186 187 /* Returns true if COST is infinite. */ 188 bool infinite_cost_p (); 189 190 /* Adds costs COST1 and COST2. */ 191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2); 192 193 /* Adds COST to the comp_cost. */ 194 comp_cost operator+= (comp_cost cost); 195 196 /* Adds constant C to this comp_cost. */ 197 comp_cost operator+= (HOST_WIDE_INT c); 198 199 /* Subtracts constant C to this comp_cost. */ 200 comp_cost operator-= (HOST_WIDE_INT c); 201 202 /* Divide the comp_cost by constant C. */ 203 comp_cost operator/= (HOST_WIDE_INT c); 204 205 /* Multiply the comp_cost by constant C. */ 206 comp_cost operator*= (HOST_WIDE_INT c); 207 208 /* Subtracts costs COST1 and COST2. */ 209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2); 210 211 /* Subtracts COST from this comp_cost. */ 212 comp_cost operator-= (comp_cost cost); 213 214 /* Returns true if COST1 is smaller than COST2. */ 215 friend bool operator< (comp_cost cost1, comp_cost cost2); 216 217 /* Returns true if COST1 and COST2 are equal. */ 218 friend bool operator== (comp_cost cost1, comp_cost cost2); 219 220 /* Returns true if COST1 is smaller or equal than COST2. */ 221 friend bool operator<= (comp_cost cost1, comp_cost cost2); 222 223 int cost; /* The runtime cost. */ 224 unsigned complexity; /* The estimate of the complexity of the code for 225 the computation (in no concrete units -- 226 complexity field should be larger for more 227 complex expressions and addressing modes). */ 228 int scratch; /* Scratch used during cost computation. */ 229 }; 230 231 static const comp_cost no_cost; 232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY); 233 234 bool 235 comp_cost::infinite_cost_p () 236 { 237 return cost == INFTY; 238 } 239 240 comp_cost 241 operator+ (comp_cost cost1, comp_cost cost2) 242 { 243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ()) 244 return infinite_cost; 245 246 cost1.cost += cost2.cost; 247 cost1.complexity += cost2.complexity; 248 249 return cost1; 250 } 251 252 comp_cost 253 operator- (comp_cost cost1, comp_cost cost2) 254 { 255 if (cost1.infinite_cost_p ()) 256 return infinite_cost; 257 258 gcc_assert (!cost2.infinite_cost_p ()); 259 260 cost1.cost -= cost2.cost; 261 cost1.complexity -= cost2.complexity; 262 263 return cost1; 264 } 265 266 comp_cost 267 comp_cost::operator+= (comp_cost cost) 268 { 269 *this = *this + cost; 270 return *this; 271 } 272 273 comp_cost 274 comp_cost::operator+= (HOST_WIDE_INT c) 275 { 276 if (infinite_cost_p ()) 277 return *this; 278 279 this->cost += c; 280 281 return *this; 282 } 283 284 comp_cost 285 comp_cost::operator-= (HOST_WIDE_INT c) 286 { 287 if (infinite_cost_p ()) 288 return *this; 289 290 this->cost -= c; 291 292 return *this; 293 } 294 295 comp_cost 296 comp_cost::operator/= (HOST_WIDE_INT c) 297 { 298 if (infinite_cost_p ()) 299 return *this; 300 301 this->cost /= c; 302 303 return *this; 304 } 305 306 comp_cost 307 comp_cost::operator*= (HOST_WIDE_INT c) 308 { 309 if (infinite_cost_p ()) 310 return *this; 311 312 this->cost *= c; 313 314 return *this; 315 } 316 317 comp_cost 318 comp_cost::operator-= (comp_cost cost) 319 { 320 *this = *this - cost; 321 return *this; 322 } 323 324 bool 325 operator< (comp_cost cost1, comp_cost cost2) 326 { 327 if (cost1.cost == cost2.cost) 328 return cost1.complexity < cost2.complexity; 329 330 return cost1.cost < cost2.cost; 331 } 332 333 bool 334 operator== (comp_cost cost1, comp_cost cost2) 335 { 336 return cost1.cost == cost2.cost 337 && cost1.complexity == cost2.complexity; 338 } 339 340 bool 341 operator<= (comp_cost cost1, comp_cost cost2) 342 { 343 return cost1 < cost2 || cost1 == cost2; 344 } 345 346 struct iv_inv_expr_ent; 347 348 /* The candidate - cost pair. */ 349 struct cost_pair 350 { 351 struct iv_cand *cand; /* The candidate. */ 352 comp_cost cost; /* The cost. */ 353 enum tree_code comp; /* For iv elimination, the comparison. */ 354 bitmap inv_vars; /* The list of invariant ssa_vars that have to be 355 preserved when representing iv_use with iv_cand. */ 356 bitmap inv_exprs; /* The list of newly created invariant expressions 357 when representing iv_use with iv_cand. */ 358 tree value; /* For final value elimination, the expression for 359 the final value of the iv. For iv elimination, 360 the new bound to compare with. */ 361 }; 362 363 /* Use. */ 364 struct iv_use 365 { 366 unsigned id; /* The id of the use. */ 367 unsigned group_id; /* The group id the use belongs to. */ 368 enum use_type type; /* Type of the use. */ 369 tree mem_type; /* The memory type to use when testing whether an 370 address is legitimate, and what the address's 371 cost is. */ 372 struct iv *iv; /* The induction variable it is based on. */ 373 gimple *stmt; /* Statement in that it occurs. */ 374 tree *op_p; /* The place where it occurs. */ 375 376 tree addr_base; /* Base address with const offset stripped. */ 377 poly_uint64_pod addr_offset; 378 /* Const offset stripped from base address. */ 379 }; 380 381 /* Group of uses. */ 382 struct iv_group 383 { 384 /* The id of the group. */ 385 unsigned id; 386 /* Uses of the group are of the same type. */ 387 enum use_type type; 388 /* The set of "related" IV candidates, plus the important ones. */ 389 bitmap related_cands; 390 /* Number of IV candidates in the cost_map. */ 391 unsigned n_map_members; 392 /* The costs wrto the iv candidates. */ 393 struct cost_pair *cost_map; 394 /* The selected candidate for the group. */ 395 struct iv_cand *selected; 396 /* Uses in the group. */ 397 vec<struct iv_use *> vuses; 398 }; 399 400 /* The position where the iv is computed. */ 401 enum iv_position 402 { 403 IP_NORMAL, /* At the end, just before the exit condition. */ 404 IP_END, /* At the end of the latch block. */ 405 IP_BEFORE_USE, /* Immediately before a specific use. */ 406 IP_AFTER_USE, /* Immediately after a specific use. */ 407 IP_ORIGINAL /* The original biv. */ 408 }; 409 410 /* The induction variable candidate. */ 411 struct iv_cand 412 { 413 unsigned id; /* The number of the candidate. */ 414 bool important; /* Whether this is an "important" candidate, i.e. such 415 that it should be considered by all uses. */ 416 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */ 417 gimple *incremented_at;/* For original biv, the statement where it is 418 incremented. */ 419 tree var_before; /* The variable used for it before increment. */ 420 tree var_after; /* The variable used for it after increment. */ 421 struct iv *iv; /* The value of the candidate. NULL for 422 "pseudocandidate" used to indicate the possibility 423 to replace the final value of an iv by direct 424 computation of the value. */ 425 unsigned cost; /* Cost of the candidate. */ 426 unsigned cost_step; /* Cost of the candidate's increment operation. */ 427 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place 428 where it is incremented. */ 429 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the 430 iv_cand. */ 431 bitmap inv_exprs; /* If step is more complicated than a single ssa_var, 432 hanlde it as a new invariant expression which will 433 be hoisted out of loop. */ 434 struct iv *orig_iv; /* The original iv if this cand is added from biv with 435 smaller type. */ 436 }; 437 438 /* Hashtable entry for common candidate derived from iv uses. */ 439 struct iv_common_cand 440 { 441 tree base; 442 tree step; 443 /* IV uses from which this common candidate is derived. */ 444 auto_vec<struct iv_use *> uses; 445 hashval_t hash; 446 }; 447 448 /* Hashtable helpers. */ 449 450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand> 451 { 452 static inline hashval_t hash (const iv_common_cand *); 453 static inline bool equal (const iv_common_cand *, const iv_common_cand *); 454 }; 455 456 /* Hash function for possible common candidates. */ 457 458 inline hashval_t 459 iv_common_cand_hasher::hash (const iv_common_cand *ccand) 460 { 461 return ccand->hash; 462 } 463 464 /* Hash table equality function for common candidates. */ 465 466 inline bool 467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1, 468 const iv_common_cand *ccand2) 469 { 470 return (ccand1->hash == ccand2->hash 471 && operand_equal_p (ccand1->base, ccand2->base, 0) 472 && operand_equal_p (ccand1->step, ccand2->step, 0) 473 && (TYPE_PRECISION (TREE_TYPE (ccand1->base)) 474 == TYPE_PRECISION (TREE_TYPE (ccand2->base)))); 475 } 476 477 /* Loop invariant expression hashtable entry. */ 478 479 struct iv_inv_expr_ent 480 { 481 /* Tree expression of the entry. */ 482 tree expr; 483 /* Unique indentifier. */ 484 int id; 485 /* Hash value. */ 486 hashval_t hash; 487 }; 488 489 /* Sort iv_inv_expr_ent pair A and B by id field. */ 490 491 static int 492 sort_iv_inv_expr_ent (const void *a, const void *b) 493 { 494 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a); 495 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b); 496 497 unsigned id1 = (*e1)->id; 498 unsigned id2 = (*e2)->id; 499 500 if (id1 < id2) 501 return -1; 502 else if (id1 > id2) 503 return 1; 504 else 505 return 0; 506 } 507 508 /* Hashtable helpers. */ 509 510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent> 511 { 512 static inline hashval_t hash (const iv_inv_expr_ent *); 513 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *); 514 }; 515 516 /* Return true if uses of type TYPE represent some form of address. */ 517 518 inline bool 519 address_p (use_type type) 520 { 521 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS; 522 } 523 524 /* Hash function for loop invariant expressions. */ 525 526 inline hashval_t 527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr) 528 { 529 return expr->hash; 530 } 531 532 /* Hash table equality function for expressions. */ 533 534 inline bool 535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1, 536 const iv_inv_expr_ent *expr2) 537 { 538 return expr1->hash == expr2->hash 539 && operand_equal_p (expr1->expr, expr2->expr, 0); 540 } 541 542 struct ivopts_data 543 { 544 /* The currently optimized loop. */ 545 struct loop *current_loop; 546 source_location loop_loc; 547 548 /* Numbers of iterations for all exits of the current loop. */ 549 hash_map<edge, tree_niter_desc *> *niters; 550 551 /* Number of registers used in it. */ 552 unsigned regs_used; 553 554 /* The size of version_info array allocated. */ 555 unsigned version_info_size; 556 557 /* The array of information for the ssa names. */ 558 struct version_info *version_info; 559 560 /* The hashtable of loop invariant expressions created 561 by ivopt. */ 562 hash_table<iv_inv_expr_hasher> *inv_expr_tab; 563 564 /* The bitmap of indices in version_info whose value was changed. */ 565 bitmap relevant; 566 567 /* The uses of induction variables. */ 568 vec<iv_group *> vgroups; 569 570 /* The candidates. */ 571 vec<iv_cand *> vcands; 572 573 /* A bitmap of important candidates. */ 574 bitmap important_candidates; 575 576 /* Cache used by tree_to_aff_combination_expand. */ 577 hash_map<tree, name_expansion *> *name_expansion_cache; 578 579 /* The hashtable of common candidates derived from iv uses. */ 580 hash_table<iv_common_cand_hasher> *iv_common_cand_tab; 581 582 /* The common candidates. */ 583 vec<iv_common_cand *> iv_common_cands; 584 585 /* Hash map recording base object information of tree exp. */ 586 hash_map<tree, tree> *base_object_map; 587 588 /* The maximum invariant variable id. */ 589 unsigned max_inv_var_id; 590 591 /* The maximum invariant expression id. */ 592 unsigned max_inv_expr_id; 593 594 /* Number of no_overflow BIVs which are not used in memory address. */ 595 unsigned bivs_not_used_in_addr; 596 597 /* Obstack for iv structure. */ 598 struct obstack iv_obstack; 599 600 /* Whether to consider just related and important candidates when replacing a 601 use. */ 602 bool consider_all_candidates; 603 604 /* Are we optimizing for speed? */ 605 bool speed; 606 607 /* Whether the loop body includes any function calls. */ 608 bool body_includes_call; 609 610 /* Whether the loop body can only be exited via single exit. */ 611 bool loop_single_exit_p; 612 }; 613 614 /* An assignment of iv candidates to uses. */ 615 616 struct iv_ca 617 { 618 /* The number of uses covered by the assignment. */ 619 unsigned upto; 620 621 /* Number of uses that cannot be expressed by the candidates in the set. */ 622 unsigned bad_groups; 623 624 /* Candidate assigned to a use, together with the related costs. */ 625 struct cost_pair **cand_for_group; 626 627 /* Number of times each candidate is used. */ 628 unsigned *n_cand_uses; 629 630 /* The candidates used. */ 631 bitmap cands; 632 633 /* The number of candidates in the set. */ 634 unsigned n_cands; 635 636 /* The number of invariants needed, including both invariant variants and 637 invariant expressions. */ 638 unsigned n_invs; 639 640 /* Total cost of expressing uses. */ 641 comp_cost cand_use_cost; 642 643 /* Total cost of candidates. */ 644 unsigned cand_cost; 645 646 /* Number of times each invariant variable is used. */ 647 unsigned *n_inv_var_uses; 648 649 /* Number of times each invariant expression is used. */ 650 unsigned *n_inv_expr_uses; 651 652 /* Total cost of the assignment. */ 653 comp_cost cost; 654 }; 655 656 /* Difference of two iv candidate assignments. */ 657 658 struct iv_ca_delta 659 { 660 /* Changed group. */ 661 struct iv_group *group; 662 663 /* An old assignment (for rollback purposes). */ 664 struct cost_pair *old_cp; 665 666 /* A new assignment. */ 667 struct cost_pair *new_cp; 668 669 /* Next change in the list. */ 670 struct iv_ca_delta *next; 671 }; 672 673 /* Bound on number of candidates below that all candidates are considered. */ 674 675 #define CONSIDER_ALL_CANDIDATES_BOUND \ 676 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) 677 678 /* If there are more iv occurrences, we just give up (it is quite unlikely that 679 optimizing such a loop would help, and it would take ages). */ 680 681 #define MAX_CONSIDERED_GROUPS \ 682 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) 683 684 /* If there are at most this number of ivs in the set, try removing unnecessary 685 ivs from the set always. */ 686 687 #define ALWAYS_PRUNE_CAND_SET_BOUND \ 688 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) 689 690 /* The list of trees for that the decl_rtl field must be reset is stored 691 here. */ 692 693 static vec<tree> decl_rtl_to_reset; 694 695 static comp_cost force_expr_to_var_cost (tree, bool); 696 697 /* The single loop exit if it dominates the latch, NULL otherwise. */ 698 699 edge 700 single_dom_exit (struct loop *loop) 701 { 702 edge exit = single_exit (loop); 703 704 if (!exit) 705 return NULL; 706 707 if (!just_once_each_iteration_p (loop, exit->src)) 708 return NULL; 709 710 return exit; 711 } 712 713 /* Dumps information about the induction variable IV to FILE. Don't dump 714 variable's name if DUMP_NAME is FALSE. The information is dumped with 715 preceding spaces indicated by INDENT_LEVEL. */ 716 717 void 718 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level) 719 { 720 const char *p; 721 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}; 722 723 if (indent_level > 4) 724 indent_level = 4; 725 p = spaces + 8 - (indent_level << 1); 726 727 fprintf (file, "%sIV struct:\n", p); 728 if (iv->ssa_name && dump_name) 729 { 730 fprintf (file, "%s SSA_NAME:\t", p); 731 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 732 fprintf (file, "\n"); 733 } 734 735 fprintf (file, "%s Type:\t", p); 736 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 737 fprintf (file, "\n"); 738 739 fprintf (file, "%s Base:\t", p); 740 print_generic_expr (file, iv->base, TDF_SLIM); 741 fprintf (file, "\n"); 742 743 fprintf (file, "%s Step:\t", p); 744 print_generic_expr (file, iv->step, TDF_SLIM); 745 fprintf (file, "\n"); 746 747 if (iv->base_object) 748 { 749 fprintf (file, "%s Object:\t", p); 750 print_generic_expr (file, iv->base_object, TDF_SLIM); 751 fprintf (file, "\n"); 752 } 753 754 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N'); 755 756 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n", 757 p, iv->no_overflow ? "No-overflow" : "Overflow"); 758 } 759 760 /* Dumps information about the USE to FILE. */ 761 762 void 763 dump_use (FILE *file, struct iv_use *use) 764 { 765 fprintf (file, " Use %d.%d:\n", use->group_id, use->id); 766 fprintf (file, " At stmt:\t"); 767 print_gimple_stmt (file, use->stmt, 0); 768 fprintf (file, " At pos:\t"); 769 if (use->op_p) 770 print_generic_expr (file, *use->op_p, TDF_SLIM); 771 fprintf (file, "\n"); 772 dump_iv (file, use->iv, false, 2); 773 } 774 775 /* Dumps information about the uses to FILE. */ 776 777 void 778 dump_groups (FILE *file, struct ivopts_data *data) 779 { 780 unsigned i, j; 781 struct iv_group *group; 782 783 for (i = 0; i < data->vgroups.length (); i++) 784 { 785 group = data->vgroups[i]; 786 fprintf (file, "Group %d:\n", group->id); 787 if (group->type == USE_NONLINEAR_EXPR) 788 fprintf (file, " Type:\tGENERIC\n"); 789 else if (group->type == USE_REF_ADDRESS) 790 fprintf (file, " Type:\tREFERENCE ADDRESS\n"); 791 else if (group->type == USE_PTR_ADDRESS) 792 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n"); 793 else 794 { 795 gcc_assert (group->type == USE_COMPARE); 796 fprintf (file, " Type:\tCOMPARE\n"); 797 } 798 for (j = 0; j < group->vuses.length (); j++) 799 dump_use (file, group->vuses[j]); 800 } 801 } 802 803 /* Dumps information about induction variable candidate CAND to FILE. */ 804 805 void 806 dump_cand (FILE *file, struct iv_cand *cand) 807 { 808 struct iv *iv = cand->iv; 809 810 fprintf (file, "Candidate %d:\n", cand->id); 811 if (cand->inv_vars) 812 { 813 fprintf (file, " Depend on inv.vars: "); 814 dump_bitmap (file, cand->inv_vars); 815 } 816 if (cand->inv_exprs) 817 { 818 fprintf (file, " Depend on inv.exprs: "); 819 dump_bitmap (file, cand->inv_exprs); 820 } 821 822 if (cand->var_before) 823 { 824 fprintf (file, " Var befor: "); 825 print_generic_expr (file, cand->var_before, TDF_SLIM); 826 fprintf (file, "\n"); 827 } 828 if (cand->var_after) 829 { 830 fprintf (file, " Var after: "); 831 print_generic_expr (file, cand->var_after, TDF_SLIM); 832 fprintf (file, "\n"); 833 } 834 835 switch (cand->pos) 836 { 837 case IP_NORMAL: 838 fprintf (file, " Incr POS: before exit test\n"); 839 break; 840 841 case IP_BEFORE_USE: 842 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id); 843 break; 844 845 case IP_AFTER_USE: 846 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id); 847 break; 848 849 case IP_END: 850 fprintf (file, " Incr POS: at end\n"); 851 break; 852 853 case IP_ORIGINAL: 854 fprintf (file, " Incr POS: orig biv\n"); 855 break; 856 } 857 858 dump_iv (file, iv, false, 1); 859 } 860 861 /* Returns the info for ssa version VER. */ 862 863 static inline struct version_info * 864 ver_info (struct ivopts_data *data, unsigned ver) 865 { 866 return data->version_info + ver; 867 } 868 869 /* Returns the info for ssa name NAME. */ 870 871 static inline struct version_info * 872 name_info (struct ivopts_data *data, tree name) 873 { 874 return ver_info (data, SSA_NAME_VERSION (name)); 875 } 876 877 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be 878 emitted in LOOP. */ 879 880 static bool 881 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt) 882 { 883 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); 884 885 gcc_assert (bb); 886 887 if (sbb == loop->latch) 888 return true; 889 890 if (sbb != bb) 891 return false; 892 893 return stmt == last_stmt (bb); 894 } 895 896 /* Returns true if STMT if after the place where the original induction 897 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true 898 if the positions are identical. */ 899 900 static bool 901 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal) 902 { 903 basic_block cand_bb = gimple_bb (cand->incremented_at); 904 basic_block stmt_bb = gimple_bb (stmt); 905 906 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 907 return false; 908 909 if (stmt_bb != cand_bb) 910 return true; 911 912 if (true_if_equal 913 && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) 914 return true; 915 return gimple_uid (stmt) > gimple_uid (cand->incremented_at); 916 } 917 918 /* Returns true if STMT if after the place where the induction variable 919 CAND is incremented in LOOP. */ 920 921 static bool 922 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt) 923 { 924 switch (cand->pos) 925 { 926 case IP_END: 927 return false; 928 929 case IP_NORMAL: 930 return stmt_after_ip_normal_pos (loop, stmt); 931 932 case IP_ORIGINAL: 933 case IP_AFTER_USE: 934 return stmt_after_inc_pos (cand, stmt, false); 935 936 case IP_BEFORE_USE: 937 return stmt_after_inc_pos (cand, stmt, true); 938 939 default: 940 gcc_unreachable (); 941 } 942 } 943 944 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ 945 946 static bool 947 abnormal_ssa_name_p (tree exp) 948 { 949 if (!exp) 950 return false; 951 952 if (TREE_CODE (exp) != SSA_NAME) 953 return false; 954 955 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; 956 } 957 958 /* Returns false if BASE or INDEX contains a ssa name that occurs in an 959 abnormal phi node. Callback for for_each_index. */ 960 961 static bool 962 idx_contains_abnormal_ssa_name_p (tree base, tree *index, 963 void *data ATTRIBUTE_UNUSED) 964 { 965 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 966 { 967 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) 968 return false; 969 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) 970 return false; 971 } 972 973 return !abnormal_ssa_name_p (*index); 974 } 975 976 /* Returns true if EXPR contains a ssa name that occurs in an 977 abnormal phi node. */ 978 979 bool 980 contains_abnormal_ssa_name_p (tree expr) 981 { 982 enum tree_code code; 983 enum tree_code_class codeclass; 984 985 if (!expr) 986 return false; 987 988 code = TREE_CODE (expr); 989 codeclass = TREE_CODE_CLASS (code); 990 991 if (code == SSA_NAME) 992 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; 993 994 if (code == INTEGER_CST 995 || is_gimple_min_invariant (expr)) 996 return false; 997 998 if (code == ADDR_EXPR) 999 return !for_each_index (&TREE_OPERAND (expr, 0), 1000 idx_contains_abnormal_ssa_name_p, 1001 NULL); 1002 1003 if (code == COND_EXPR) 1004 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)) 1005 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)) 1006 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2)); 1007 1008 switch (codeclass) 1009 { 1010 case tcc_binary: 1011 case tcc_comparison: 1012 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) 1013 return true; 1014 1015 /* Fallthru. */ 1016 case tcc_unary: 1017 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) 1018 return true; 1019 1020 break; 1021 1022 default: 1023 gcc_unreachable (); 1024 } 1025 1026 return false; 1027 } 1028 1029 /* Returns the structure describing number of iterations determined from 1030 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 1031 1032 static struct tree_niter_desc * 1033 niter_for_exit (struct ivopts_data *data, edge exit) 1034 { 1035 struct tree_niter_desc *desc; 1036 tree_niter_desc **slot; 1037 1038 if (!data->niters) 1039 { 1040 data->niters = new hash_map<edge, tree_niter_desc *>; 1041 slot = NULL; 1042 } 1043 else 1044 slot = data->niters->get (exit); 1045 1046 if (!slot) 1047 { 1048 /* Try to determine number of iterations. We cannot safely work with ssa 1049 names that appear in phi nodes on abnormal edges, so that we do not 1050 create overlapping life ranges for them (PR 27283). */ 1051 desc = XNEW (struct tree_niter_desc); 1052 if (!number_of_iterations_exit (data->current_loop, 1053 exit, desc, true) 1054 || contains_abnormal_ssa_name_p (desc->niter)) 1055 { 1056 XDELETE (desc); 1057 desc = NULL; 1058 } 1059 data->niters->put (exit, desc); 1060 } 1061 else 1062 desc = *slot; 1063 1064 return desc; 1065 } 1066 1067 /* Returns the structure describing number of iterations determined from 1068 single dominating exit of DATA->current_loop, or NULL if something 1069 goes wrong. */ 1070 1071 static struct tree_niter_desc * 1072 niter_for_single_dom_exit (struct ivopts_data *data) 1073 { 1074 edge exit = single_dom_exit (data->current_loop); 1075 1076 if (!exit) 1077 return NULL; 1078 1079 return niter_for_exit (data, exit); 1080 } 1081 1082 /* Initializes data structures used by the iv optimization pass, stored 1083 in DATA. */ 1084 1085 static void 1086 tree_ssa_iv_optimize_init (struct ivopts_data *data) 1087 { 1088 data->version_info_size = 2 * num_ssa_names; 1089 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 1090 data->relevant = BITMAP_ALLOC (NULL); 1091 data->important_candidates = BITMAP_ALLOC (NULL); 1092 data->max_inv_var_id = 0; 1093 data->max_inv_expr_id = 0; 1094 data->niters = NULL; 1095 data->vgroups.create (20); 1096 data->vcands.create (20); 1097 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10); 1098 data->name_expansion_cache = NULL; 1099 data->base_object_map = NULL; 1100 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10); 1101 data->iv_common_cands.create (20); 1102 decl_rtl_to_reset.create (20); 1103 gcc_obstack_init (&data->iv_obstack); 1104 } 1105 1106 /* walk_tree callback for determine_base_object. */ 1107 1108 static tree 1109 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata) 1110 { 1111 tree_code code = TREE_CODE (*tp); 1112 tree obj = NULL_TREE; 1113 if (code == ADDR_EXPR) 1114 { 1115 tree base = get_base_address (TREE_OPERAND (*tp, 0)); 1116 if (!base) 1117 obj = *tp; 1118 else if (TREE_CODE (base) != MEM_REF) 1119 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base)); 1120 } 1121 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp))) 1122 obj = fold_convert (ptr_type_node, *tp); 1123 1124 if (!obj) 1125 { 1126 if (!EXPR_P (*tp)) 1127 *walk_subtrees = 0; 1128 1129 return NULL_TREE; 1130 } 1131 /* Record special node for multiple base objects and stop. */ 1132 if (*static_cast<tree *> (wdata)) 1133 { 1134 *static_cast<tree *> (wdata) = integer_zero_node; 1135 return integer_zero_node; 1136 } 1137 /* Record the base object and continue looking. */ 1138 *static_cast<tree *> (wdata) = obj; 1139 return NULL_TREE; 1140 } 1141 1142 /* Returns a memory object to that EXPR points with caching. Return NULL if we 1143 are able to determine that it does not point to any such object; specially 1144 return integer_zero_node if EXPR contains multiple base objects. */ 1145 1146 static tree 1147 determine_base_object (struct ivopts_data *data, tree expr) 1148 { 1149 tree *slot, obj = NULL_TREE; 1150 if (data->base_object_map) 1151 { 1152 if ((slot = data->base_object_map->get(expr)) != NULL) 1153 return *slot; 1154 } 1155 else 1156 data->base_object_map = new hash_map<tree, tree>; 1157 1158 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj); 1159 data->base_object_map->put (expr, obj); 1160 return obj; 1161 } 1162 1163 /* Return true if address expression with non-DECL_P operand appears 1164 in EXPR. */ 1165 1166 static bool 1167 contain_complex_addr_expr (tree expr) 1168 { 1169 bool res = false; 1170 1171 STRIP_NOPS (expr); 1172 switch (TREE_CODE (expr)) 1173 { 1174 case POINTER_PLUS_EXPR: 1175 case PLUS_EXPR: 1176 case MINUS_EXPR: 1177 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); 1178 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); 1179 break; 1180 1181 case ADDR_EXPR: 1182 return (!DECL_P (TREE_OPERAND (expr, 0))); 1183 1184 default: 1185 return false; 1186 } 1187 1188 return res; 1189 } 1190 1191 /* Allocates an induction variable with given initial value BASE and step STEP 1192 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */ 1193 1194 static struct iv * 1195 alloc_iv (struct ivopts_data *data, tree base, tree step, 1196 bool no_overflow = false) 1197 { 1198 tree expr = base; 1199 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack, 1200 sizeof (struct iv)); 1201 gcc_assert (step != NULL_TREE); 1202 1203 /* Lower address expression in base except ones with DECL_P as operand. 1204 By doing this: 1205 1) More accurate cost can be computed for address expressions; 1206 2) Duplicate candidates won't be created for bases in different 1207 forms, like &a[0] and &a. */ 1208 STRIP_NOPS (expr); 1209 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0))) 1210 || contain_complex_addr_expr (expr)) 1211 { 1212 aff_tree comb; 1213 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb); 1214 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)); 1215 } 1216 1217 iv->base = base; 1218 iv->base_object = determine_base_object (data, base); 1219 iv->step = step; 1220 iv->biv_p = false; 1221 iv->nonlin_use = NULL; 1222 iv->ssa_name = NULL_TREE; 1223 if (!no_overflow 1224 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base), 1225 base, step)) 1226 no_overflow = true; 1227 iv->no_overflow = no_overflow; 1228 iv->have_address_use = false; 1229 1230 return iv; 1231 } 1232 1233 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV 1234 doesn't overflow. */ 1235 1236 static void 1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step, 1238 bool no_overflow) 1239 { 1240 struct version_info *info = name_info (data, iv); 1241 1242 gcc_assert (!info->iv); 1243 1244 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 1245 info->iv = alloc_iv (data, base, step, no_overflow); 1246 info->iv->ssa_name = iv; 1247 } 1248 1249 /* Finds induction variable declaration for VAR. */ 1250 1251 static struct iv * 1252 get_iv (struct ivopts_data *data, tree var) 1253 { 1254 basic_block bb; 1255 tree type = TREE_TYPE (var); 1256 1257 if (!POINTER_TYPE_P (type) 1258 && !INTEGRAL_TYPE_P (type)) 1259 return NULL; 1260 1261 if (!name_info (data, var)->iv) 1262 { 1263 bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1264 1265 if (!bb 1266 || !flow_bb_inside_loop_p (data->current_loop, bb)) 1267 set_iv (data, var, var, build_int_cst (type, 0), true); 1268 } 1269 1270 return name_info (data, var)->iv; 1271 } 1272 1273 /* Return the first non-invariant ssa var found in EXPR. */ 1274 1275 static tree 1276 extract_single_var_from_expr (tree expr) 1277 { 1278 int i, n; 1279 tree tmp; 1280 enum tree_code code; 1281 1282 if (!expr || is_gimple_min_invariant (expr)) 1283 return NULL; 1284 1285 code = TREE_CODE (expr); 1286 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1287 { 1288 n = TREE_OPERAND_LENGTH (expr); 1289 for (i = 0; i < n; i++) 1290 { 1291 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i)); 1292 1293 if (tmp) 1294 return tmp; 1295 } 1296 } 1297 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL; 1298 } 1299 1300 /* Finds basic ivs. */ 1301 1302 static bool 1303 find_bivs (struct ivopts_data *data) 1304 { 1305 gphi *phi; 1306 affine_iv iv; 1307 tree step, type, base, stop; 1308 bool found = false; 1309 struct loop *loop = data->current_loop; 1310 gphi_iterator psi; 1311 1312 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1313 { 1314 phi = psi.phi (); 1315 1316 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 1317 continue; 1318 1319 if (virtual_operand_p (PHI_RESULT (phi))) 1320 continue; 1321 1322 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true)) 1323 continue; 1324 1325 if (integer_zerop (iv.step)) 1326 continue; 1327 1328 step = iv.step; 1329 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1330 /* Stop expanding iv base at the first ssa var referred by iv step. 1331 Ideally we should stop at any ssa var, because that's expensive 1332 and unusual to happen, we just do it on the first one. 1333 1334 See PR64705 for the rationale. */ 1335 stop = extract_single_var_from_expr (step); 1336 base = expand_simple_operations (base, stop); 1337 if (contains_abnormal_ssa_name_p (base) 1338 || contains_abnormal_ssa_name_p (step)) 1339 continue; 1340 1341 type = TREE_TYPE (PHI_RESULT (phi)); 1342 base = fold_convert (type, base); 1343 if (step) 1344 { 1345 if (POINTER_TYPE_P (type)) 1346 step = convert_to_ptrofftype (step); 1347 else 1348 step = fold_convert (type, step); 1349 } 1350 1351 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow); 1352 found = true; 1353 } 1354 1355 return found; 1356 } 1357 1358 /* Marks basic ivs. */ 1359 1360 static void 1361 mark_bivs (struct ivopts_data *data) 1362 { 1363 gphi *phi; 1364 gimple *def; 1365 tree var; 1366 struct iv *iv, *incr_iv; 1367 struct loop *loop = data->current_loop; 1368 basic_block incr_bb; 1369 gphi_iterator psi; 1370 1371 data->bivs_not_used_in_addr = 0; 1372 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1373 { 1374 phi = psi.phi (); 1375 1376 iv = get_iv (data, PHI_RESULT (phi)); 1377 if (!iv) 1378 continue; 1379 1380 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1381 def = SSA_NAME_DEF_STMT (var); 1382 /* Don't mark iv peeled from other one as biv. */ 1383 if (def 1384 && gimple_code (def) == GIMPLE_PHI 1385 && gimple_bb (def) == loop->header) 1386 continue; 1387 1388 incr_iv = get_iv (data, var); 1389 if (!incr_iv) 1390 continue; 1391 1392 /* If the increment is in the subloop, ignore it. */ 1393 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1394 if (incr_bb->loop_father != data->current_loop 1395 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 1396 continue; 1397 1398 iv->biv_p = true; 1399 incr_iv->biv_p = true; 1400 if (iv->no_overflow) 1401 data->bivs_not_used_in_addr++; 1402 if (incr_iv->no_overflow) 1403 data->bivs_not_used_in_addr++; 1404 } 1405 } 1406 1407 /* Checks whether STMT defines a linear induction variable and stores its 1408 parameters to IV. */ 1409 1410 static bool 1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv) 1412 { 1413 tree lhs, stop; 1414 struct loop *loop = data->current_loop; 1415 1416 iv->base = NULL_TREE; 1417 iv->step = NULL_TREE; 1418 1419 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1420 return false; 1421 1422 lhs = gimple_assign_lhs (stmt); 1423 if (TREE_CODE (lhs) != SSA_NAME) 1424 return false; 1425 1426 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) 1427 return false; 1428 1429 /* Stop expanding iv base at the first ssa var referred by iv step. 1430 Ideally we should stop at any ssa var, because that's expensive 1431 and unusual to happen, we just do it on the first one. 1432 1433 See PR64705 for the rationale. */ 1434 stop = extract_single_var_from_expr (iv->step); 1435 iv->base = expand_simple_operations (iv->base, stop); 1436 if (contains_abnormal_ssa_name_p (iv->base) 1437 || contains_abnormal_ssa_name_p (iv->step)) 1438 return false; 1439 1440 /* If STMT could throw, then do not consider STMT as defining a GIV. 1441 While this will suppress optimizations, we can not safely delete this 1442 GIV and associated statements, even if it appears it is not used. */ 1443 if (stmt_could_throw_p (stmt)) 1444 return false; 1445 1446 return true; 1447 } 1448 1449 /* Finds general ivs in statement STMT. */ 1450 1451 static void 1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt) 1453 { 1454 affine_iv iv; 1455 1456 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1457 return; 1458 1459 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow); 1460 } 1461 1462 /* Finds general ivs in basic block BB. */ 1463 1464 static void 1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1466 { 1467 gimple_stmt_iterator bsi; 1468 1469 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1470 find_givs_in_stmt (data, gsi_stmt (bsi)); 1471 } 1472 1473 /* Finds general ivs. */ 1474 1475 static void 1476 find_givs (struct ivopts_data *data) 1477 { 1478 struct loop *loop = data->current_loop; 1479 basic_block *body = get_loop_body_in_dom_order (loop); 1480 unsigned i; 1481 1482 for (i = 0; i < loop->num_nodes; i++) 1483 find_givs_in_bb (data, body[i]); 1484 free (body); 1485 } 1486 1487 /* For each ssa name defined in LOOP determines whether it is an induction 1488 variable and if so, its initial value and step. */ 1489 1490 static bool 1491 find_induction_variables (struct ivopts_data *data) 1492 { 1493 unsigned i; 1494 bitmap_iterator bi; 1495 1496 if (!find_bivs (data)) 1497 return false; 1498 1499 find_givs (data); 1500 mark_bivs (data); 1501 1502 if (dump_file && (dump_flags & TDF_DETAILS)) 1503 { 1504 struct tree_niter_desc *niter = niter_for_single_dom_exit (data); 1505 1506 if (niter) 1507 { 1508 fprintf (dump_file, " number of iterations "); 1509 print_generic_expr (dump_file, niter->niter, TDF_SLIM); 1510 if (!integer_zerop (niter->may_be_zero)) 1511 { 1512 fprintf (dump_file, "; zero if "); 1513 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); 1514 } 1515 fprintf (dump_file, "\n"); 1516 }; 1517 1518 fprintf (dump_file, "\n<Induction Vars>:\n"); 1519 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1520 { 1521 struct version_info *info = ver_info (data, i); 1522 if (info->iv && info->iv->step && !integer_zerop (info->iv->step)) 1523 dump_iv (dump_file, ver_info (data, i)->iv, true, 0); 1524 } 1525 } 1526 1527 return true; 1528 } 1529 1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP. 1531 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET 1532 is the const offset stripped from IV base and MEM_TYPE is the type 1533 of the memory being addressed. For uses of other types, ADDR_BASE 1534 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */ 1535 1536 static struct iv_use * 1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv, 1538 gimple *stmt, enum use_type type, tree mem_type, 1539 tree addr_base, poly_uint64 addr_offset) 1540 { 1541 struct iv_use *use = XCNEW (struct iv_use); 1542 1543 use->id = group->vuses.length (); 1544 use->group_id = group->id; 1545 use->type = type; 1546 use->mem_type = mem_type; 1547 use->iv = iv; 1548 use->stmt = stmt; 1549 use->op_p = use_p; 1550 use->addr_base = addr_base; 1551 use->addr_offset = addr_offset; 1552 1553 group->vuses.safe_push (use); 1554 return use; 1555 } 1556 1557 /* Checks whether OP is a loop-level invariant and if so, records it. 1558 NONLINEAR_USE is true if the invariant is used in a way we do not 1559 handle specially. */ 1560 1561 static void 1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1563 { 1564 basic_block bb; 1565 struct version_info *info; 1566 1567 if (TREE_CODE (op) != SSA_NAME 1568 || virtual_operand_p (op)) 1569 return; 1570 1571 bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 1572 if (bb 1573 && flow_bb_inside_loop_p (data->current_loop, bb)) 1574 return; 1575 1576 info = name_info (data, op); 1577 info->name = op; 1578 info->has_nonlin_use |= nonlinear_use; 1579 if (!info->inv_id) 1580 info->inv_id = ++data->max_inv_var_id; 1581 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1582 } 1583 1584 /* Record a group of TYPE. */ 1585 1586 static struct iv_group * 1587 record_group (struct ivopts_data *data, enum use_type type) 1588 { 1589 struct iv_group *group = XCNEW (struct iv_group); 1590 1591 group->id = data->vgroups.length (); 1592 group->type = type; 1593 group->related_cands = BITMAP_ALLOC (NULL); 1594 group->vuses.create (1); 1595 1596 data->vgroups.safe_push (group); 1597 return group; 1598 } 1599 1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group. 1601 New group will be created if there is no existing group for the use. 1602 MEM_TYPE is the type of memory being addressed, or NULL if this 1603 isn't an address reference. */ 1604 1605 static struct iv_use * 1606 record_group_use (struct ivopts_data *data, tree *use_p, 1607 struct iv *iv, gimple *stmt, enum use_type type, 1608 tree mem_type) 1609 { 1610 tree addr_base = NULL; 1611 struct iv_group *group = NULL; 1612 poly_uint64 addr_offset = 0; 1613 1614 /* Record non address type use in a new group. */ 1615 if (address_p (type)) 1616 { 1617 unsigned int i; 1618 1619 addr_base = strip_offset (iv->base, &addr_offset); 1620 for (i = 0; i < data->vgroups.length (); i++) 1621 { 1622 struct iv_use *use; 1623 1624 group = data->vgroups[i]; 1625 use = group->vuses[0]; 1626 if (!address_p (use->type)) 1627 continue; 1628 1629 /* Check if it has the same stripped base and step. */ 1630 if (operand_equal_p (iv->base_object, use->iv->base_object, 0) 1631 && operand_equal_p (iv->step, use->iv->step, 0) 1632 && operand_equal_p (addr_base, use->addr_base, 0)) 1633 break; 1634 } 1635 if (i == data->vgroups.length ()) 1636 group = NULL; 1637 } 1638 1639 if (!group) 1640 group = record_group (data, type); 1641 1642 return record_use (group, use_p, iv, stmt, type, mem_type, 1643 addr_base, addr_offset); 1644 } 1645 1646 /* Checks whether the use OP is interesting and if so, records it. */ 1647 1648 static struct iv_use * 1649 find_interesting_uses_op (struct ivopts_data *data, tree op) 1650 { 1651 struct iv *iv; 1652 gimple *stmt; 1653 struct iv_use *use; 1654 1655 if (TREE_CODE (op) != SSA_NAME) 1656 return NULL; 1657 1658 iv = get_iv (data, op); 1659 if (!iv) 1660 return NULL; 1661 1662 if (iv->nonlin_use) 1663 { 1664 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR); 1665 return iv->nonlin_use; 1666 } 1667 1668 if (integer_zerop (iv->step)) 1669 { 1670 record_invariant (data, op, true); 1671 return NULL; 1672 } 1673 1674 stmt = SSA_NAME_DEF_STMT (op); 1675 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt)); 1676 1677 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE); 1678 iv->nonlin_use = use; 1679 return use; 1680 } 1681 1682 /* Indicate how compare type iv_use can be handled. */ 1683 enum comp_iv_rewrite 1684 { 1685 COMP_IV_NA, 1686 /* We may rewrite compare type iv_use by expressing value of the iv_use. */ 1687 COMP_IV_EXPR, 1688 /* We may rewrite compare type iv_uses on both sides of comparison by 1689 expressing value of each iv_use. */ 1690 COMP_IV_EXPR_2, 1691 /* We may rewrite compare type iv_use by expressing value of the iv_use 1692 or by eliminating it with other iv_cand. */ 1693 COMP_IV_ELIM 1694 }; 1695 1696 /* Given a condition in statement STMT, checks whether it is a compare 1697 of an induction variable and an invariant. If this is the case, 1698 CONTROL_VAR is set to location of the iv, BOUND to the location of 1699 the invariant, IV_VAR and IV_BOUND are set to the corresponding 1700 induction variable descriptions, and true is returned. If this is not 1701 the case, CONTROL_VAR and BOUND are set to the arguments of the 1702 condition and false is returned. */ 1703 1704 static enum comp_iv_rewrite 1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt, 1706 tree **control_var, tree **bound, 1707 struct iv **iv_var, struct iv **iv_bound) 1708 { 1709 /* The objects returned when COND has constant operands. */ 1710 static struct iv const_iv; 1711 static tree zero; 1712 tree *op0 = &zero, *op1 = &zero; 1713 struct iv *iv0 = &const_iv, *iv1 = &const_iv; 1714 enum comp_iv_rewrite rewrite_type = COMP_IV_NA; 1715 1716 if (gimple_code (stmt) == GIMPLE_COND) 1717 { 1718 gcond *cond_stmt = as_a <gcond *> (stmt); 1719 op0 = gimple_cond_lhs_ptr (cond_stmt); 1720 op1 = gimple_cond_rhs_ptr (cond_stmt); 1721 } 1722 else 1723 { 1724 op0 = gimple_assign_rhs1_ptr (stmt); 1725 op1 = gimple_assign_rhs2_ptr (stmt); 1726 } 1727 1728 zero = integer_zero_node; 1729 const_iv.step = integer_zero_node; 1730 1731 if (TREE_CODE (*op0) == SSA_NAME) 1732 iv0 = get_iv (data, *op0); 1733 if (TREE_CODE (*op1) == SSA_NAME) 1734 iv1 = get_iv (data, *op1); 1735 1736 /* If both sides of comparison are IVs. We can express ivs on both end. */ 1737 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step)) 1738 { 1739 rewrite_type = COMP_IV_EXPR_2; 1740 goto end; 1741 } 1742 1743 /* If none side of comparison is IV. */ 1744 if ((!iv0 || integer_zerop (iv0->step)) 1745 && (!iv1 || integer_zerop (iv1->step))) 1746 goto end; 1747 1748 /* Control variable may be on the other side. */ 1749 if (!iv0 || integer_zerop (iv0->step)) 1750 { 1751 std::swap (op0, op1); 1752 std::swap (iv0, iv1); 1753 } 1754 /* If one side is IV and the other side isn't loop invariant. */ 1755 if (!iv1) 1756 rewrite_type = COMP_IV_EXPR; 1757 /* If one side is IV and the other side is loop invariant. */ 1758 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step)) 1759 rewrite_type = COMP_IV_ELIM; 1760 1761 end: 1762 if (control_var) 1763 *control_var = op0; 1764 if (iv_var) 1765 *iv_var = iv0; 1766 if (bound) 1767 *bound = op1; 1768 if (iv_bound) 1769 *iv_bound = iv1; 1770 1771 return rewrite_type; 1772 } 1773 1774 /* Checks whether the condition in STMT is interesting and if so, 1775 records it. */ 1776 1777 static void 1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt) 1779 { 1780 tree *var_p, *bound_p; 1781 struct iv *var_iv, *bound_iv; 1782 enum comp_iv_rewrite ret; 1783 1784 ret = extract_cond_operands (data, stmt, 1785 &var_p, &bound_p, &var_iv, &bound_iv); 1786 if (ret == COMP_IV_NA) 1787 { 1788 find_interesting_uses_op (data, *var_p); 1789 find_interesting_uses_op (data, *bound_p); 1790 return; 1791 } 1792 1793 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE); 1794 /* Record compare type iv_use for iv on the other side of comparison. */ 1795 if (ret == COMP_IV_EXPR_2) 1796 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE); 1797 } 1798 1799 /* Returns the outermost loop EXPR is obviously invariant in 1800 relative to the loop LOOP, i.e. if all its operands are defined 1801 outside of the returned loop. Returns NULL if EXPR is not 1802 even obviously invariant in LOOP. */ 1803 1804 struct loop * 1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr) 1806 { 1807 basic_block def_bb; 1808 unsigned i, len; 1809 1810 if (is_gimple_min_invariant (expr)) 1811 return current_loops->tree_root; 1812 1813 if (TREE_CODE (expr) == SSA_NAME) 1814 { 1815 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1816 if (def_bb) 1817 { 1818 if (flow_bb_inside_loop_p (loop, def_bb)) 1819 return NULL; 1820 return superloop_at_depth (loop, 1821 loop_depth (def_bb->loop_father) + 1); 1822 } 1823 1824 return current_loops->tree_root; 1825 } 1826 1827 if (!EXPR_P (expr)) 1828 return NULL; 1829 1830 unsigned maxdepth = 0; 1831 len = TREE_OPERAND_LENGTH (expr); 1832 for (i = 0; i < len; i++) 1833 { 1834 struct loop *ivloop; 1835 if (!TREE_OPERAND (expr, i)) 1836 continue; 1837 1838 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i)); 1839 if (!ivloop) 1840 return NULL; 1841 maxdepth = MAX (maxdepth, loop_depth (ivloop)); 1842 } 1843 1844 return superloop_at_depth (loop, maxdepth); 1845 } 1846 1847 /* Returns true if expression EXPR is obviously invariant in LOOP, 1848 i.e. if all its operands are defined outside of the LOOP. LOOP 1849 should not be the function body. */ 1850 1851 bool 1852 expr_invariant_in_loop_p (struct loop *loop, tree expr) 1853 { 1854 basic_block def_bb; 1855 unsigned i, len; 1856 1857 gcc_assert (loop_depth (loop) > 0); 1858 1859 if (is_gimple_min_invariant (expr)) 1860 return true; 1861 1862 if (TREE_CODE (expr) == SSA_NAME) 1863 { 1864 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1865 if (def_bb 1866 && flow_bb_inside_loop_p (loop, def_bb)) 1867 return false; 1868 1869 return true; 1870 } 1871 1872 if (!EXPR_P (expr)) 1873 return false; 1874 1875 len = TREE_OPERAND_LENGTH (expr); 1876 for (i = 0; i < len; i++) 1877 if (TREE_OPERAND (expr, i) 1878 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1879 return false; 1880 1881 return true; 1882 } 1883 1884 /* Given expression EXPR which computes inductive values with respect 1885 to loop recorded in DATA, this function returns biv from which EXPR 1886 is derived by tracing definition chains of ssa variables in EXPR. */ 1887 1888 static struct iv* 1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr) 1890 { 1891 struct iv *iv; 1892 unsigned i, n; 1893 tree e2, e1; 1894 enum tree_code code; 1895 gimple *stmt; 1896 1897 if (expr == NULL_TREE) 1898 return NULL; 1899 1900 if (is_gimple_min_invariant (expr)) 1901 return NULL; 1902 1903 code = TREE_CODE (expr); 1904 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1905 { 1906 n = TREE_OPERAND_LENGTH (expr); 1907 for (i = 0; i < n; i++) 1908 { 1909 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i)); 1910 if (iv) 1911 return iv; 1912 } 1913 } 1914 1915 /* Stop if it's not ssa name. */ 1916 if (code != SSA_NAME) 1917 return NULL; 1918 1919 iv = get_iv (data, expr); 1920 if (!iv || integer_zerop (iv->step)) 1921 return NULL; 1922 else if (iv->biv_p) 1923 return iv; 1924 1925 stmt = SSA_NAME_DEF_STMT (expr); 1926 if (gphi *phi = dyn_cast <gphi *> (stmt)) 1927 { 1928 ssa_op_iter iter; 1929 use_operand_p use_p; 1930 basic_block phi_bb = gimple_bb (phi); 1931 1932 /* Skip loop header PHI that doesn't define biv. */ 1933 if (phi_bb->loop_father == data->current_loop) 1934 return NULL; 1935 1936 if (virtual_operand_p (gimple_phi_result (phi))) 1937 return NULL; 1938 1939 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) 1940 { 1941 tree use = USE_FROM_PTR (use_p); 1942 iv = find_deriving_biv_for_expr (data, use); 1943 if (iv) 1944 return iv; 1945 } 1946 return NULL; 1947 } 1948 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1949 return NULL; 1950 1951 e1 = gimple_assign_rhs1 (stmt); 1952 code = gimple_assign_rhs_code (stmt); 1953 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) 1954 return find_deriving_biv_for_expr (data, e1); 1955 1956 switch (code) 1957 { 1958 case MULT_EXPR: 1959 case PLUS_EXPR: 1960 case MINUS_EXPR: 1961 case POINTER_PLUS_EXPR: 1962 /* Increments, decrements and multiplications by a constant 1963 are simple. */ 1964 e2 = gimple_assign_rhs2 (stmt); 1965 iv = find_deriving_biv_for_expr (data, e2); 1966 if (iv) 1967 return iv; 1968 gcc_fallthrough (); 1969 1970 CASE_CONVERT: 1971 /* Casts are simple. */ 1972 return find_deriving_biv_for_expr (data, e1); 1973 1974 default: 1975 break; 1976 } 1977 1978 return NULL; 1979 } 1980 1981 /* Record BIV, its predecessor and successor that they are used in 1982 address type uses. */ 1983 1984 static void 1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv) 1986 { 1987 unsigned i; 1988 tree type, base_1, base_2; 1989 bitmap_iterator bi; 1990 1991 if (!biv || !biv->biv_p || integer_zerop (biv->step) 1992 || biv->have_address_use || !biv->no_overflow) 1993 return; 1994 1995 type = TREE_TYPE (biv->base); 1996 if (!INTEGRAL_TYPE_P (type)) 1997 return; 1998 1999 biv->have_address_use = true; 2000 data->bivs_not_used_in_addr--; 2001 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step); 2002 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2003 { 2004 struct iv *iv = ver_info (data, i)->iv; 2005 2006 if (!iv || !iv->biv_p || integer_zerop (iv->step) 2007 || iv->have_address_use || !iv->no_overflow) 2008 continue; 2009 2010 if (type != TREE_TYPE (iv->base) 2011 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base))) 2012 continue; 2013 2014 if (!operand_equal_p (biv->step, iv->step, 0)) 2015 continue; 2016 2017 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step); 2018 if (operand_equal_p (base_1, iv->base, 0) 2019 || operand_equal_p (base_2, biv->base, 0)) 2020 { 2021 iv->have_address_use = true; 2022 data->bivs_not_used_in_addr--; 2023 } 2024 } 2025 } 2026 2027 /* Cumulates the steps of indices into DATA and replaces their values with the 2028 initial ones. Returns false when the value of the index cannot be determined. 2029 Callback for for_each_index. */ 2030 2031 struct ifs_ivopts_data 2032 { 2033 struct ivopts_data *ivopts_data; 2034 gimple *stmt; 2035 tree step; 2036 }; 2037 2038 static bool 2039 idx_find_step (tree base, tree *idx, void *data) 2040 { 2041 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; 2042 struct iv *iv; 2043 bool use_overflow_semantics = false; 2044 tree step, iv_base, iv_step, lbound, off; 2045 struct loop *loop = dta->ivopts_data->current_loop; 2046 2047 /* If base is a component ref, require that the offset of the reference 2048 be invariant. */ 2049 if (TREE_CODE (base) == COMPONENT_REF) 2050 { 2051 off = component_ref_field_offset (base); 2052 return expr_invariant_in_loop_p (loop, off); 2053 } 2054 2055 /* If base is array, first check whether we will be able to move the 2056 reference out of the loop (in order to take its address in strength 2057 reduction). In order for this to work we need both lower bound 2058 and step to be loop invariants. */ 2059 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2060 { 2061 /* Moreover, for a range, the size needs to be invariant as well. */ 2062 if (TREE_CODE (base) == ARRAY_RANGE_REF 2063 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) 2064 return false; 2065 2066 step = array_ref_element_size (base); 2067 lbound = array_ref_low_bound (base); 2068 2069 if (!expr_invariant_in_loop_p (loop, step) 2070 || !expr_invariant_in_loop_p (loop, lbound)) 2071 return false; 2072 } 2073 2074 if (TREE_CODE (*idx) != SSA_NAME) 2075 return true; 2076 2077 iv = get_iv (dta->ivopts_data, *idx); 2078 if (!iv) 2079 return false; 2080 2081 /* XXX We produce for a base of *D42 with iv->base being &x[0] 2082 *&x[0], which is not folded and does not trigger the 2083 ARRAY_REF path below. */ 2084 *idx = iv->base; 2085 2086 if (integer_zerop (iv->step)) 2087 return true; 2088 2089 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2090 { 2091 step = array_ref_element_size (base); 2092 2093 /* We only handle addresses whose step is an integer constant. */ 2094 if (TREE_CODE (step) != INTEGER_CST) 2095 return false; 2096 } 2097 else 2098 /* The step for pointer arithmetics already is 1 byte. */ 2099 step = size_one_node; 2100 2101 iv_base = iv->base; 2102 iv_step = iv->step; 2103 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step))) 2104 use_overflow_semantics = true; 2105 2106 if (!convert_affine_scev (dta->ivopts_data->current_loop, 2107 sizetype, &iv_base, &iv_step, dta->stmt, 2108 use_overflow_semantics)) 2109 { 2110 /* The index might wrap. */ 2111 return false; 2112 } 2113 2114 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 2115 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); 2116 2117 if (dta->ivopts_data->bivs_not_used_in_addr) 2118 { 2119 if (!iv->biv_p) 2120 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name); 2121 2122 record_biv_for_address_use (dta->ivopts_data, iv); 2123 } 2124 return true; 2125 } 2126 2127 /* Records use in index IDX. Callback for for_each_index. Ivopts data 2128 object is passed to it in DATA. */ 2129 2130 static bool 2131 idx_record_use (tree base, tree *idx, 2132 void *vdata) 2133 { 2134 struct ivopts_data *data = (struct ivopts_data *) vdata; 2135 find_interesting_uses_op (data, *idx); 2136 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2137 { 2138 find_interesting_uses_op (data, array_ref_element_size (base)); 2139 find_interesting_uses_op (data, array_ref_low_bound (base)); 2140 } 2141 return true; 2142 } 2143 2144 /* If we can prove that TOP = cst * BOT for some constant cst, 2145 store cst to MUL and return true. Otherwise return false. 2146 The returned value is always sign-extended, regardless of the 2147 signedness of TOP and BOT. */ 2148 2149 static bool 2150 constant_multiple_of (tree top, tree bot, widest_int *mul) 2151 { 2152 tree mby; 2153 enum tree_code code; 2154 unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); 2155 widest_int res, p0, p1; 2156 2157 STRIP_NOPS (top); 2158 STRIP_NOPS (bot); 2159 2160 if (operand_equal_p (top, bot, 0)) 2161 { 2162 *mul = 1; 2163 return true; 2164 } 2165 2166 code = TREE_CODE (top); 2167 switch (code) 2168 { 2169 case MULT_EXPR: 2170 mby = TREE_OPERAND (top, 1); 2171 if (TREE_CODE (mby) != INTEGER_CST) 2172 return false; 2173 2174 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) 2175 return false; 2176 2177 *mul = wi::sext (res * wi::to_widest (mby), precision); 2178 return true; 2179 2180 case PLUS_EXPR: 2181 case MINUS_EXPR: 2182 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) 2183 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) 2184 return false; 2185 2186 if (code == MINUS_EXPR) 2187 p1 = -p1; 2188 *mul = wi::sext (p0 + p1, precision); 2189 return true; 2190 2191 case INTEGER_CST: 2192 if (TREE_CODE (bot) != INTEGER_CST) 2193 return false; 2194 2195 p0 = widest_int::from (wi::to_wide (top), SIGNED); 2196 p1 = widest_int::from (wi::to_wide (bot), SIGNED); 2197 if (p1 == 0) 2198 return false; 2199 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision); 2200 return res == 0; 2201 2202 default: 2203 if (POLY_INT_CST_P (top) 2204 && POLY_INT_CST_P (bot) 2205 && constant_multiple_p (wi::to_poly_widest (top), 2206 wi::to_poly_widest (bot), mul)) 2207 return true; 2208 2209 return false; 2210 } 2211 } 2212 2213 /* Return true if memory reference REF with step STEP may be unaligned. */ 2214 2215 static bool 2216 may_be_unaligned_p (tree ref, tree step) 2217 { 2218 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 2219 thus they are not misaligned. */ 2220 if (TREE_CODE (ref) == TARGET_MEM_REF) 2221 return false; 2222 2223 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref)); 2224 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align) 2225 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))); 2226 2227 unsigned HOST_WIDE_INT bitpos; 2228 unsigned int ref_align; 2229 get_object_alignment_1 (ref, &ref_align, &bitpos); 2230 if (ref_align < align 2231 || (bitpos % align) != 0 2232 || (bitpos % BITS_PER_UNIT) != 0) 2233 return true; 2234 2235 unsigned int trailing_zeros = tree_ctz (step); 2236 if (trailing_zeros < HOST_BITS_PER_INT 2237 && (1U << trailing_zeros) * BITS_PER_UNIT < align) 2238 return true; 2239 2240 return false; 2241 } 2242 2243 /* Return true if EXPR may be non-addressable. */ 2244 2245 bool 2246 may_be_nonaddressable_p (tree expr) 2247 { 2248 switch (TREE_CODE (expr)) 2249 { 2250 case TARGET_MEM_REF: 2251 /* TARGET_MEM_REFs are translated directly to valid MEMs on the 2252 target, thus they are always addressable. */ 2253 return false; 2254 2255 case MEM_REF: 2256 /* Likewise for MEM_REFs, modulo the storage order. */ 2257 return REF_REVERSE_STORAGE_ORDER (expr); 2258 2259 case BIT_FIELD_REF: 2260 if (REF_REVERSE_STORAGE_ORDER (expr)) 2261 return true; 2262 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2263 2264 case COMPONENT_REF: 2265 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) 2266 return true; 2267 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) 2268 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2269 2270 case ARRAY_REF: 2271 case ARRAY_RANGE_REF: 2272 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) 2273 return true; 2274 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2275 2276 case VIEW_CONVERT_EXPR: 2277 /* This kind of view-conversions may wrap non-addressable objects 2278 and make them look addressable. After some processing the 2279 non-addressability may be uncovered again, causing ADDR_EXPRs 2280 of inappropriate objects to be built. */ 2281 if (is_gimple_reg (TREE_OPERAND (expr, 0)) 2282 || !is_gimple_addressable (TREE_OPERAND (expr, 0))) 2283 return true; 2284 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2285 2286 CASE_CONVERT: 2287 return true; 2288 2289 default: 2290 break; 2291 } 2292 2293 return false; 2294 } 2295 2296 /* Finds addresses in *OP_P inside STMT. */ 2297 2298 static void 2299 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt, 2300 tree *op_p) 2301 { 2302 tree base = *op_p, step = size_zero_node; 2303 struct iv *civ; 2304 struct ifs_ivopts_data ifs_ivopts_data; 2305 2306 /* Do not play with volatile memory references. A bit too conservative, 2307 perhaps, but safe. */ 2308 if (gimple_has_volatile_ops (stmt)) 2309 goto fail; 2310 2311 /* Ignore bitfields for now. Not really something terribly complicated 2312 to handle. TODO. */ 2313 if (TREE_CODE (base) == BIT_FIELD_REF) 2314 goto fail; 2315 2316 base = unshare_expr (base); 2317 2318 if (TREE_CODE (base) == TARGET_MEM_REF) 2319 { 2320 tree type = build_pointer_type (TREE_TYPE (base)); 2321 tree astep; 2322 2323 if (TMR_BASE (base) 2324 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 2325 { 2326 civ = get_iv (data, TMR_BASE (base)); 2327 if (!civ) 2328 goto fail; 2329 2330 TMR_BASE (base) = civ->base; 2331 step = civ->step; 2332 } 2333 if (TMR_INDEX2 (base) 2334 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) 2335 { 2336 civ = get_iv (data, TMR_INDEX2 (base)); 2337 if (!civ) 2338 goto fail; 2339 2340 TMR_INDEX2 (base) = civ->base; 2341 step = civ->step; 2342 } 2343 if (TMR_INDEX (base) 2344 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 2345 { 2346 civ = get_iv (data, TMR_INDEX (base)); 2347 if (!civ) 2348 goto fail; 2349 2350 TMR_INDEX (base) = civ->base; 2351 astep = civ->step; 2352 2353 if (astep) 2354 { 2355 if (TMR_STEP (base)) 2356 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 2357 2358 step = fold_build2 (PLUS_EXPR, type, step, astep); 2359 } 2360 } 2361 2362 if (integer_zerop (step)) 2363 goto fail; 2364 base = tree_mem_ref_addr (type, base); 2365 } 2366 else 2367 { 2368 ifs_ivopts_data.ivopts_data = data; 2369 ifs_ivopts_data.stmt = stmt; 2370 ifs_ivopts_data.step = size_zero_node; 2371 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 2372 || integer_zerop (ifs_ivopts_data.step)) 2373 goto fail; 2374 step = ifs_ivopts_data.step; 2375 2376 /* Check that the base expression is addressable. This needs 2377 to be done after substituting bases of IVs into it. */ 2378 if (may_be_nonaddressable_p (base)) 2379 goto fail; 2380 2381 /* Moreover, on strict alignment platforms, check that it is 2382 sufficiently aligned. */ 2383 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) 2384 goto fail; 2385 2386 base = build_fold_addr_expr (base); 2387 2388 /* Substituting bases of IVs into the base expression might 2389 have caused folding opportunities. */ 2390 if (TREE_CODE (base) == ADDR_EXPR) 2391 { 2392 tree *ref = &TREE_OPERAND (base, 0); 2393 while (handled_component_p (*ref)) 2394 ref = &TREE_OPERAND (*ref, 0); 2395 if (TREE_CODE (*ref) == MEM_REF) 2396 { 2397 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), 2398 TREE_OPERAND (*ref, 0), 2399 TREE_OPERAND (*ref, 1)); 2400 if (tem) 2401 *ref = tem; 2402 } 2403 } 2404 } 2405 2406 civ = alloc_iv (data, base, step); 2407 /* Fail if base object of this memory reference is unknown. */ 2408 if (civ->base_object == NULL_TREE) 2409 goto fail; 2410 2411 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p)); 2412 return; 2413 2414 fail: 2415 for_each_index (op_p, idx_record_use, data); 2416 } 2417 2418 /* Finds and records invariants used in STMT. */ 2419 2420 static void 2421 find_invariants_stmt (struct ivopts_data *data, gimple *stmt) 2422 { 2423 ssa_op_iter iter; 2424 use_operand_p use_p; 2425 tree op; 2426 2427 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2428 { 2429 op = USE_FROM_PTR (use_p); 2430 record_invariant (data, op, false); 2431 } 2432 } 2433 2434 /* CALL calls an internal function. If operand *OP_P will become an 2435 address when the call is expanded, return the type of the memory 2436 being addressed, otherwise return null. */ 2437 2438 static tree 2439 get_mem_type_for_internal_fn (gcall *call, tree *op_p) 2440 { 2441 switch (gimple_call_internal_fn (call)) 2442 { 2443 case IFN_MASK_LOAD: 2444 if (op_p == gimple_call_arg_ptr (call, 0)) 2445 return TREE_TYPE (gimple_call_lhs (call)); 2446 return NULL_TREE; 2447 2448 case IFN_MASK_STORE: 2449 if (op_p == gimple_call_arg_ptr (call, 0)) 2450 return TREE_TYPE (gimple_call_arg (call, 3)); 2451 return NULL_TREE; 2452 2453 default: 2454 return NULL_TREE; 2455 } 2456 } 2457 2458 /* IV is a (non-address) iv that describes operand *OP_P of STMT. 2459 Return true if the operand will become an address when STMT 2460 is expanded and record the associated address use if so. */ 2461 2462 static bool 2463 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p, 2464 struct iv *iv) 2465 { 2466 /* Fail if base object of this memory reference is unknown. */ 2467 if (iv->base_object == NULL_TREE) 2468 return false; 2469 2470 tree mem_type = NULL_TREE; 2471 if (gcall *call = dyn_cast <gcall *> (stmt)) 2472 if (gimple_call_internal_p (call)) 2473 mem_type = get_mem_type_for_internal_fn (call, op_p); 2474 if (mem_type) 2475 { 2476 iv = alloc_iv (data, iv->base, iv->step); 2477 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type); 2478 return true; 2479 } 2480 return false; 2481 } 2482 2483 /* Finds interesting uses of induction variables in the statement STMT. */ 2484 2485 static void 2486 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt) 2487 { 2488 struct iv *iv; 2489 tree op, *lhs, *rhs; 2490 ssa_op_iter iter; 2491 use_operand_p use_p; 2492 enum tree_code code; 2493 2494 find_invariants_stmt (data, stmt); 2495 2496 if (gimple_code (stmt) == GIMPLE_COND) 2497 { 2498 find_interesting_uses_cond (data, stmt); 2499 return; 2500 } 2501 2502 if (is_gimple_assign (stmt)) 2503 { 2504 lhs = gimple_assign_lhs_ptr (stmt); 2505 rhs = gimple_assign_rhs1_ptr (stmt); 2506 2507 if (TREE_CODE (*lhs) == SSA_NAME) 2508 { 2509 /* If the statement defines an induction variable, the uses are not 2510 interesting by themselves. */ 2511 2512 iv = get_iv (data, *lhs); 2513 2514 if (iv && !integer_zerop (iv->step)) 2515 return; 2516 } 2517 2518 code = gimple_assign_rhs_code (stmt); 2519 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS 2520 && (REFERENCE_CLASS_P (*rhs) 2521 || is_gimple_val (*rhs))) 2522 { 2523 if (REFERENCE_CLASS_P (*rhs)) 2524 find_interesting_uses_address (data, stmt, rhs); 2525 else 2526 find_interesting_uses_op (data, *rhs); 2527 2528 if (REFERENCE_CLASS_P (*lhs)) 2529 find_interesting_uses_address (data, stmt, lhs); 2530 return; 2531 } 2532 else if (TREE_CODE_CLASS (code) == tcc_comparison) 2533 { 2534 find_interesting_uses_cond (data, stmt); 2535 return; 2536 } 2537 2538 /* TODO -- we should also handle address uses of type 2539 2540 memory = call (whatever); 2541 2542 and 2543 2544 call (memory). */ 2545 } 2546 2547 if (gimple_code (stmt) == GIMPLE_PHI 2548 && gimple_bb (stmt) == data->current_loop->header) 2549 { 2550 iv = get_iv (data, PHI_RESULT (stmt)); 2551 2552 if (iv && !integer_zerop (iv->step)) 2553 return; 2554 } 2555 2556 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2557 { 2558 op = USE_FROM_PTR (use_p); 2559 2560 if (TREE_CODE (op) != SSA_NAME) 2561 continue; 2562 2563 iv = get_iv (data, op); 2564 if (!iv) 2565 continue; 2566 2567 if (!find_address_like_use (data, stmt, use_p->use, iv)) 2568 find_interesting_uses_op (data, op); 2569 } 2570 } 2571 2572 /* Finds interesting uses of induction variables outside of loops 2573 on loop exit edge EXIT. */ 2574 2575 static void 2576 find_interesting_uses_outside (struct ivopts_data *data, edge exit) 2577 { 2578 gphi *phi; 2579 gphi_iterator psi; 2580 tree def; 2581 2582 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) 2583 { 2584 phi = psi.phi (); 2585 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 2586 if (!virtual_operand_p (def)) 2587 find_interesting_uses_op (data, def); 2588 } 2589 } 2590 2591 /* Return TRUE if OFFSET is within the range of [base + offset] addressing 2592 mode for memory reference represented by USE. */ 2593 2594 static GTY (()) vec<rtx, va_gc> *addr_list; 2595 2596 static bool 2597 addr_offset_valid_p (struct iv_use *use, poly_int64 offset) 2598 { 2599 rtx reg, addr; 2600 unsigned list_index; 2601 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); 2602 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type); 2603 2604 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; 2605 if (list_index >= vec_safe_length (addr_list)) 2606 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE); 2607 2608 addr = (*addr_list)[list_index]; 2609 if (!addr) 2610 { 2611 addr_mode = targetm.addr_space.address_mode (as); 2612 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); 2613 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX); 2614 (*addr_list)[list_index] = addr; 2615 } 2616 else 2617 addr_mode = GET_MODE (addr); 2618 2619 XEXP (addr, 1) = gen_int_mode (offset, addr_mode); 2620 return (memory_address_addr_space_p (mem_mode, addr, as)); 2621 } 2622 2623 /* Comparison function to sort group in ascending order of addr_offset. */ 2624 2625 static int 2626 group_compare_offset (const void *a, const void *b) 2627 { 2628 const struct iv_use *const *u1 = (const struct iv_use *const *) a; 2629 const struct iv_use *const *u2 = (const struct iv_use *const *) b; 2630 2631 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset); 2632 } 2633 2634 /* Check if small groups should be split. Return true if no group 2635 contains more than two uses with distinct addr_offsets. Return 2636 false otherwise. We want to split such groups because: 2637 2638 1) Small groups don't have much benefit and may interfer with 2639 general candidate selection. 2640 2) Size for problem with only small groups is usually small and 2641 general algorithm can handle it well. 2642 2643 TODO -- Above claim may not hold when we want to merge memory 2644 accesses with conseuctive addresses. */ 2645 2646 static bool 2647 split_small_address_groups_p (struct ivopts_data *data) 2648 { 2649 unsigned int i, j, distinct = 1; 2650 struct iv_use *pre; 2651 struct iv_group *group; 2652 2653 for (i = 0; i < data->vgroups.length (); i++) 2654 { 2655 group = data->vgroups[i]; 2656 if (group->vuses.length () == 1) 2657 continue; 2658 2659 gcc_assert (address_p (group->type)); 2660 if (group->vuses.length () == 2) 2661 { 2662 if (compare_sizes_for_sort (group->vuses[0]->addr_offset, 2663 group->vuses[1]->addr_offset) > 0) 2664 std::swap (group->vuses[0], group->vuses[1]); 2665 } 2666 else 2667 group->vuses.qsort (group_compare_offset); 2668 2669 if (distinct > 2) 2670 continue; 2671 2672 distinct = 1; 2673 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++) 2674 { 2675 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset)) 2676 { 2677 pre = group->vuses[j]; 2678 distinct++; 2679 } 2680 2681 if (distinct > 2) 2682 break; 2683 } 2684 } 2685 2686 return (distinct <= 2); 2687 } 2688 2689 /* For each group of address type uses, this function further groups 2690 these uses according to the maximum offset supported by target's 2691 [base + offset] addressing mode. */ 2692 2693 static void 2694 split_address_groups (struct ivopts_data *data) 2695 { 2696 unsigned int i, j; 2697 /* Always split group. */ 2698 bool split_p = split_small_address_groups_p (data); 2699 2700 for (i = 0; i < data->vgroups.length (); i++) 2701 { 2702 struct iv_group *new_group = NULL; 2703 struct iv_group *group = data->vgroups[i]; 2704 struct iv_use *use = group->vuses[0]; 2705 2706 use->id = 0; 2707 use->group_id = group->id; 2708 if (group->vuses.length () == 1) 2709 continue; 2710 2711 gcc_assert (address_p (use->type)); 2712 2713 for (j = 1; j < group->vuses.length ();) 2714 { 2715 struct iv_use *next = group->vuses[j]; 2716 poly_int64 offset = next->addr_offset - use->addr_offset; 2717 2718 /* Split group if aksed to, or the offset against the first 2719 use can't fit in offset part of addressing mode. IV uses 2720 having the same offset are still kept in one group. */ 2721 if (maybe_ne (offset, 0) 2722 && (split_p || !addr_offset_valid_p (use, offset))) 2723 { 2724 if (!new_group) 2725 new_group = record_group (data, group->type); 2726 group->vuses.ordered_remove (j); 2727 new_group->vuses.safe_push (next); 2728 continue; 2729 } 2730 2731 next->id = j; 2732 next->group_id = group->id; 2733 j++; 2734 } 2735 } 2736 } 2737 2738 /* Finds uses of the induction variables that are interesting. */ 2739 2740 static void 2741 find_interesting_uses (struct ivopts_data *data) 2742 { 2743 basic_block bb; 2744 gimple_stmt_iterator bsi; 2745 basic_block *body = get_loop_body (data->current_loop); 2746 unsigned i; 2747 edge e; 2748 2749 for (i = 0; i < data->current_loop->num_nodes; i++) 2750 { 2751 edge_iterator ei; 2752 bb = body[i]; 2753 2754 FOR_EACH_EDGE (e, ei, bb->succs) 2755 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 2756 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 2757 find_interesting_uses_outside (data, e); 2758 2759 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2760 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2761 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2762 if (!is_gimple_debug (gsi_stmt (bsi))) 2763 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2764 } 2765 free (body); 2766 2767 split_address_groups (data); 2768 2769 if (dump_file && (dump_flags & TDF_DETAILS)) 2770 { 2771 fprintf (dump_file, "\n<IV Groups>:\n"); 2772 dump_groups (dump_file, data); 2773 fprintf (dump_file, "\n"); 2774 } 2775 } 2776 2777 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 2778 is true, assume we are inside an address. If TOP_COMPREF is true, assume 2779 we are at the top-level of the processed address. */ 2780 2781 static tree 2782 strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 2783 poly_int64 *offset) 2784 { 2785 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 2786 enum tree_code code; 2787 tree type, orig_type = TREE_TYPE (expr); 2788 poly_int64 off0, off1; 2789 HOST_WIDE_INT st; 2790 tree orig_expr = expr; 2791 2792 STRIP_NOPS (expr); 2793 2794 type = TREE_TYPE (expr); 2795 code = TREE_CODE (expr); 2796 *offset = 0; 2797 2798 switch (code) 2799 { 2800 case POINTER_PLUS_EXPR: 2801 case PLUS_EXPR: 2802 case MINUS_EXPR: 2803 op0 = TREE_OPERAND (expr, 0); 2804 op1 = TREE_OPERAND (expr, 1); 2805 2806 op0 = strip_offset_1 (op0, false, false, &off0); 2807 op1 = strip_offset_1 (op1, false, false, &off1); 2808 2809 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); 2810 if (op0 == TREE_OPERAND (expr, 0) 2811 && op1 == TREE_OPERAND (expr, 1)) 2812 return orig_expr; 2813 2814 if (integer_zerop (op1)) 2815 expr = op0; 2816 else if (integer_zerop (op0)) 2817 { 2818 if (code == MINUS_EXPR) 2819 expr = fold_build1 (NEGATE_EXPR, type, op1); 2820 else 2821 expr = op1; 2822 } 2823 else 2824 expr = fold_build2 (code, type, op0, op1); 2825 2826 return fold_convert (orig_type, expr); 2827 2828 case MULT_EXPR: 2829 op1 = TREE_OPERAND (expr, 1); 2830 if (!cst_and_fits_in_hwi (op1)) 2831 return orig_expr; 2832 2833 op0 = TREE_OPERAND (expr, 0); 2834 op0 = strip_offset_1 (op0, false, false, &off0); 2835 if (op0 == TREE_OPERAND (expr, 0)) 2836 return orig_expr; 2837 2838 *offset = off0 * int_cst_value (op1); 2839 if (integer_zerop (op0)) 2840 expr = op0; 2841 else 2842 expr = fold_build2 (MULT_EXPR, type, op0, op1); 2843 2844 return fold_convert (orig_type, expr); 2845 2846 case ARRAY_REF: 2847 case ARRAY_RANGE_REF: 2848 if (!inside_addr) 2849 return orig_expr; 2850 2851 step = array_ref_element_size (expr); 2852 if (!cst_and_fits_in_hwi (step)) 2853 break; 2854 2855 st = int_cst_value (step); 2856 op1 = TREE_OPERAND (expr, 1); 2857 op1 = strip_offset_1 (op1, false, false, &off1); 2858 *offset = off1 * st; 2859 2860 if (top_compref 2861 && integer_zerop (op1)) 2862 { 2863 /* Strip the component reference completely. */ 2864 op0 = TREE_OPERAND (expr, 0); 2865 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2866 *offset += off0; 2867 return op0; 2868 } 2869 break; 2870 2871 case COMPONENT_REF: 2872 { 2873 tree field; 2874 2875 if (!inside_addr) 2876 return orig_expr; 2877 2878 tmp = component_ref_field_offset (expr); 2879 field = TREE_OPERAND (expr, 1); 2880 if (top_compref 2881 && cst_and_fits_in_hwi (tmp) 2882 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field))) 2883 { 2884 HOST_WIDE_INT boffset, abs_off; 2885 2886 /* Strip the component reference completely. */ 2887 op0 = TREE_OPERAND (expr, 0); 2888 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2889 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field)); 2890 abs_off = abs_hwi (boffset) / BITS_PER_UNIT; 2891 if (boffset < 0) 2892 abs_off = -abs_off; 2893 2894 *offset = off0 + int_cst_value (tmp) + abs_off; 2895 return op0; 2896 } 2897 } 2898 break; 2899 2900 case ADDR_EXPR: 2901 op0 = TREE_OPERAND (expr, 0); 2902 op0 = strip_offset_1 (op0, true, true, &off0); 2903 *offset += off0; 2904 2905 if (op0 == TREE_OPERAND (expr, 0)) 2906 return orig_expr; 2907 2908 expr = build_fold_addr_expr (op0); 2909 return fold_convert (orig_type, expr); 2910 2911 case MEM_REF: 2912 /* ??? Offset operand? */ 2913 inside_addr = false; 2914 break; 2915 2916 default: 2917 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0)) 2918 return build_int_cst (orig_type, 0); 2919 return orig_expr; 2920 } 2921 2922 /* Default handling of expressions for that we want to recurse into 2923 the first operand. */ 2924 op0 = TREE_OPERAND (expr, 0); 2925 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 2926 *offset += off0; 2927 2928 if (op0 == TREE_OPERAND (expr, 0) 2929 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 2930 return orig_expr; 2931 2932 expr = copy_node (expr); 2933 TREE_OPERAND (expr, 0) = op0; 2934 if (op1) 2935 TREE_OPERAND (expr, 1) = op1; 2936 2937 /* Inside address, we might strip the top level component references, 2938 thus changing type of the expression. Handling of ADDR_EXPR 2939 will fix that. */ 2940 expr = fold_convert (orig_type, expr); 2941 2942 return expr; 2943 } 2944 2945 /* Strips constant offsets from EXPR and stores them to OFFSET. */ 2946 2947 tree 2948 strip_offset (tree expr, poly_uint64_pod *offset) 2949 { 2950 poly_int64 off; 2951 tree core = strip_offset_1 (expr, false, false, &off); 2952 *offset = off; 2953 return core; 2954 } 2955 2956 /* Returns variant of TYPE that can be used as base for different uses. 2957 We return unsigned type with the same precision, which avoids problems 2958 with overflows. */ 2959 2960 static tree 2961 generic_type_for (tree type) 2962 { 2963 if (POINTER_TYPE_P (type)) 2964 return unsigned_type_for (type); 2965 2966 if (TYPE_UNSIGNED (type)) 2967 return type; 2968 2969 return unsigned_type_for (type); 2970 } 2971 2972 /* Private data for walk_tree. */ 2973 2974 struct walk_tree_data 2975 { 2976 bitmap *inv_vars; 2977 struct ivopts_data *idata; 2978 }; 2979 2980 /* Callback function for walk_tree, it records invariants and symbol 2981 reference in *EXPR_P. DATA is the structure storing result info. */ 2982 2983 static tree 2984 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 2985 { 2986 tree op = *expr_p; 2987 struct version_info *info; 2988 struct walk_tree_data *wdata = (struct walk_tree_data*) data; 2989 2990 if (TREE_CODE (op) != SSA_NAME) 2991 return NULL_TREE; 2992 2993 info = name_info (wdata->idata, op); 2994 /* Because we expand simple operations when finding IVs, loop invariant 2995 variable that isn't referred by the original loop could be used now. 2996 Record such invariant variables here. */ 2997 if (!info->iv) 2998 { 2999 struct ivopts_data *idata = wdata->idata; 3000 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 3001 3002 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb)) 3003 { 3004 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true); 3005 record_invariant (idata, op, false); 3006 } 3007 } 3008 if (!info->inv_id || info->has_nonlin_use) 3009 return NULL_TREE; 3010 3011 if (!*wdata->inv_vars) 3012 *wdata->inv_vars = BITMAP_ALLOC (NULL); 3013 bitmap_set_bit (*wdata->inv_vars, info->inv_id); 3014 3015 return NULL_TREE; 3016 } 3017 3018 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should 3019 store it. */ 3020 3021 static inline void 3022 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars) 3023 { 3024 struct walk_tree_data wdata; 3025 3026 if (!inv_vars) 3027 return; 3028 3029 wdata.idata = data; 3030 wdata.inv_vars = inv_vars; 3031 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL); 3032 } 3033 3034 /* Get entry from invariant expr hash table for INV_EXPR. New entry 3035 will be recorded if it doesn't exist yet. Given below two exprs: 3036 inv_expr + cst1, inv_expr + cst2 3037 It's hard to make decision whether constant part should be stripped 3038 or not. We choose to not strip based on below facts: 3039 1) We need to count ADD cost for constant part if it's stripped, 3040 which is't always trivial where this functions is called. 3041 2) Stripping constant away may be conflict with following loop 3042 invariant hoisting pass. 3043 3) Not stripping constant away results in more invariant exprs, 3044 which usually leads to decision preferring lower reg pressure. */ 3045 3046 static iv_inv_expr_ent * 3047 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr) 3048 { 3049 STRIP_NOPS (inv_expr); 3050 3051 if (poly_int_tree_p (inv_expr) 3052 || TREE_CODE (inv_expr) == SSA_NAME) 3053 return NULL; 3054 3055 /* Don't strip constant part away as we used to. */ 3056 3057 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */ 3058 struct iv_inv_expr_ent ent; 3059 ent.expr = inv_expr; 3060 ent.hash = iterative_hash_expr (inv_expr, 0); 3061 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT); 3062 3063 if (!*slot) 3064 { 3065 *slot = XNEW (struct iv_inv_expr_ent); 3066 (*slot)->expr = inv_expr; 3067 (*slot)->hash = ent.hash; 3068 (*slot)->id = ++data->max_inv_expr_id; 3069 } 3070 3071 return *slot; 3072 } 3073 3074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 3075 position to POS. If USE is not NULL, the candidate is set as related to 3076 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 3077 replacement of the final value of the iv by a direct computation. */ 3078 3079 static struct iv_cand * 3080 add_candidate_1 (struct ivopts_data *data, 3081 tree base, tree step, bool important, enum iv_position pos, 3082 struct iv_use *use, gimple *incremented_at, 3083 struct iv *orig_iv = NULL) 3084 { 3085 unsigned i; 3086 struct iv_cand *cand = NULL; 3087 tree type, orig_type; 3088 3089 gcc_assert (base && step); 3090 3091 /* -fkeep-gc-roots-live means that we have to keep a real pointer 3092 live, but the ivopts code may replace a real pointer with one 3093 pointing before or after the memory block that is then adjusted 3094 into the memory block during the loop. FIXME: It would likely be 3095 better to actually force the pointer live and still use ivopts; 3096 for example, it would be enough to write the pointer into memory 3097 and keep it there until after the loop. */ 3098 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base))) 3099 return NULL; 3100 3101 /* For non-original variables, make sure their values are computed in a type 3102 that does not invoke undefined behavior on overflows (since in general, 3103 we cannot prove that these induction variables are non-wrapping). */ 3104 if (pos != IP_ORIGINAL) 3105 { 3106 orig_type = TREE_TYPE (base); 3107 type = generic_type_for (orig_type); 3108 if (type != orig_type) 3109 { 3110 base = fold_convert (type, base); 3111 step = fold_convert (type, step); 3112 } 3113 } 3114 3115 for (i = 0; i < data->vcands.length (); i++) 3116 { 3117 cand = data->vcands[i]; 3118 3119 if (cand->pos != pos) 3120 continue; 3121 3122 if (cand->incremented_at != incremented_at 3123 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 3124 && cand->ainc_use != use)) 3125 continue; 3126 3127 if (operand_equal_p (base, cand->iv->base, 0) 3128 && operand_equal_p (step, cand->iv->step, 0) 3129 && (TYPE_PRECISION (TREE_TYPE (base)) 3130 == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) 3131 break; 3132 } 3133 3134 if (i == data->vcands.length ()) 3135 { 3136 cand = XCNEW (struct iv_cand); 3137 cand->id = i; 3138 cand->iv = alloc_iv (data, base, step); 3139 cand->pos = pos; 3140 if (pos != IP_ORIGINAL) 3141 { 3142 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 3143 cand->var_after = cand->var_before; 3144 } 3145 cand->important = important; 3146 cand->incremented_at = incremented_at; 3147 data->vcands.safe_push (cand); 3148 3149 if (!poly_int_tree_p (step)) 3150 { 3151 find_inv_vars (data, &step, &cand->inv_vars); 3152 3153 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step); 3154 /* Share bitmap between inv_vars and inv_exprs for cand. */ 3155 if (inv_expr != NULL) 3156 { 3157 cand->inv_exprs = cand->inv_vars; 3158 cand->inv_vars = NULL; 3159 if (cand->inv_exprs) 3160 bitmap_clear (cand->inv_exprs); 3161 else 3162 cand->inv_exprs = BITMAP_ALLOC (NULL); 3163 3164 bitmap_set_bit (cand->inv_exprs, inv_expr->id); 3165 } 3166 } 3167 3168 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 3169 cand->ainc_use = use; 3170 else 3171 cand->ainc_use = NULL; 3172 3173 cand->orig_iv = orig_iv; 3174 if (dump_file && (dump_flags & TDF_DETAILS)) 3175 dump_cand (dump_file, cand); 3176 } 3177 3178 cand->important |= important; 3179 3180 /* Relate candidate to the group for which it is added. */ 3181 if (use) 3182 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i); 3183 3184 return cand; 3185 } 3186 3187 /* Returns true if incrementing the induction variable at the end of the LOOP 3188 is allowed. 3189 3190 The purpose is to avoid splitting latch edge with a biv increment, thus 3191 creating a jump, possibly confusing other optimization passes and leaving 3192 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not 3193 available (so we do not have a better alternative), or if the latch edge 3194 is already nonempty. */ 3195 3196 static bool 3197 allow_ip_end_pos_p (struct loop *loop) 3198 { 3199 if (!ip_normal_pos (loop)) 3200 return true; 3201 3202 if (!empty_block_p (ip_end_pos (loop))) 3203 return true; 3204 3205 return false; 3206 } 3207 3208 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. 3209 Important field is set to IMPORTANT. */ 3210 3211 static void 3212 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, 3213 bool important, struct iv_use *use) 3214 { 3215 basic_block use_bb = gimple_bb (use->stmt); 3216 machine_mode mem_mode; 3217 unsigned HOST_WIDE_INT cstepi; 3218 3219 /* If we insert the increment in any position other than the standard 3220 ones, we must ensure that it is incremented once per iteration. 3221 It must not be in an inner nested loop, or one side of an if 3222 statement. */ 3223 if (use_bb->loop_father != data->current_loop 3224 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) 3225 || stmt_can_throw_internal (use->stmt) 3226 || !cst_and_fits_in_hwi (step)) 3227 return; 3228 3229 cstepi = int_cst_value (step); 3230 3231 mem_mode = TYPE_MODE (use->mem_type); 3232 if (((USE_LOAD_PRE_INCREMENT (mem_mode) 3233 || USE_STORE_PRE_INCREMENT (mem_mode)) 3234 && known_eq (GET_MODE_SIZE (mem_mode), cstepi)) 3235 || ((USE_LOAD_PRE_DECREMENT (mem_mode) 3236 || USE_STORE_PRE_DECREMENT (mem_mode)) 3237 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi))) 3238 { 3239 enum tree_code code = MINUS_EXPR; 3240 tree new_base; 3241 tree new_step = step; 3242 3243 if (POINTER_TYPE_P (TREE_TYPE (base))) 3244 { 3245 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 3246 code = POINTER_PLUS_EXPR; 3247 } 3248 else 3249 new_step = fold_convert (TREE_TYPE (base), new_step); 3250 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); 3251 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, 3252 use->stmt); 3253 } 3254 if (((USE_LOAD_POST_INCREMENT (mem_mode) 3255 || USE_STORE_POST_INCREMENT (mem_mode)) 3256 && known_eq (GET_MODE_SIZE (mem_mode), cstepi)) 3257 || ((USE_LOAD_POST_DECREMENT (mem_mode) 3258 || USE_STORE_POST_DECREMENT (mem_mode)) 3259 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi))) 3260 { 3261 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, 3262 use->stmt); 3263 } 3264 } 3265 3266 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 3267 position to POS. If USE is not NULL, the candidate is set as related to 3268 it. The candidate computation is scheduled before exit condition and at 3269 the end of loop. */ 3270 3271 static void 3272 add_candidate (struct ivopts_data *data, 3273 tree base, tree step, bool important, struct iv_use *use, 3274 struct iv *orig_iv = NULL) 3275 { 3276 if (ip_normal_pos (data->current_loop)) 3277 add_candidate_1 (data, base, step, important, 3278 IP_NORMAL, use, NULL, orig_iv); 3279 if (ip_end_pos (data->current_loop) 3280 && allow_ip_end_pos_p (data->current_loop)) 3281 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv); 3282 } 3283 3284 /* Adds standard iv candidates. */ 3285 3286 static void 3287 add_standard_iv_candidates (struct ivopts_data *data) 3288 { 3289 add_candidate (data, integer_zero_node, integer_one_node, true, NULL); 3290 3291 /* The same for a double-integer type if it is still fast enough. */ 3292 if (TYPE_PRECISION 3293 (long_integer_type_node) > TYPE_PRECISION (integer_type_node) 3294 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD) 3295 add_candidate (data, build_int_cst (long_integer_type_node, 0), 3296 build_int_cst (long_integer_type_node, 1), true, NULL); 3297 3298 /* The same for a double-integer type if it is still fast enough. */ 3299 if (TYPE_PRECISION 3300 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node) 3301 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD) 3302 add_candidate (data, build_int_cst (long_long_integer_type_node, 0), 3303 build_int_cst (long_long_integer_type_node, 1), true, NULL); 3304 } 3305 3306 3307 /* Adds candidates bases on the old induction variable IV. */ 3308 3309 static void 3310 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv) 3311 { 3312 gimple *phi; 3313 tree def; 3314 struct iv_cand *cand; 3315 3316 /* Check if this biv is used in address type use. */ 3317 if (iv->no_overflow && iv->have_address_use 3318 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base)) 3319 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype)) 3320 { 3321 tree base = fold_convert (sizetype, iv->base); 3322 tree step = fold_convert (sizetype, iv->step); 3323 3324 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */ 3325 add_candidate (data, base, step, true, NULL, iv); 3326 /* Add iv cand of the original type only if it has nonlinear use. */ 3327 if (iv->nonlin_use) 3328 add_candidate (data, iv->base, iv->step, true, NULL); 3329 } 3330 else 3331 add_candidate (data, iv->base, iv->step, true, NULL); 3332 3333 /* The same, but with initial value zero. */ 3334 if (POINTER_TYPE_P (TREE_TYPE (iv->base))) 3335 add_candidate (data, size_int (0), iv->step, true, NULL); 3336 else 3337 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 3338 iv->step, true, NULL); 3339 3340 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 3341 if (gimple_code (phi) == GIMPLE_PHI) 3342 { 3343 /* Additionally record the possibility of leaving the original iv 3344 untouched. */ 3345 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 3346 /* Don't add candidate if it's from another PHI node because 3347 it's an affine iv appearing in the form of PEELED_CHREC. */ 3348 phi = SSA_NAME_DEF_STMT (def); 3349 if (gimple_code (phi) != GIMPLE_PHI) 3350 { 3351 cand = add_candidate_1 (data, 3352 iv->base, iv->step, true, IP_ORIGINAL, NULL, 3353 SSA_NAME_DEF_STMT (def)); 3354 if (cand) 3355 { 3356 cand->var_before = iv->ssa_name; 3357 cand->var_after = def; 3358 } 3359 } 3360 else 3361 gcc_assert (gimple_bb (phi) == data->current_loop->header); 3362 } 3363 } 3364 3365 /* Adds candidates based on the old induction variables. */ 3366 3367 static void 3368 add_iv_candidate_for_bivs (struct ivopts_data *data) 3369 { 3370 unsigned i; 3371 struct iv *iv; 3372 bitmap_iterator bi; 3373 3374 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 3375 { 3376 iv = ver_info (data, i)->iv; 3377 if (iv && iv->biv_p && !integer_zerop (iv->step)) 3378 add_iv_candidate_for_biv (data, iv); 3379 } 3380 } 3381 3382 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */ 3383 3384 static void 3385 record_common_cand (struct ivopts_data *data, tree base, 3386 tree step, struct iv_use *use) 3387 { 3388 struct iv_common_cand ent; 3389 struct iv_common_cand **slot; 3390 3391 ent.base = base; 3392 ent.step = step; 3393 ent.hash = iterative_hash_expr (base, 0); 3394 ent.hash = iterative_hash_expr (step, ent.hash); 3395 3396 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT); 3397 if (*slot == NULL) 3398 { 3399 *slot = new iv_common_cand (); 3400 (*slot)->base = base; 3401 (*slot)->step = step; 3402 (*slot)->uses.create (8); 3403 (*slot)->hash = ent.hash; 3404 data->iv_common_cands.safe_push ((*slot)); 3405 } 3406 3407 gcc_assert (use != NULL); 3408 (*slot)->uses.safe_push (use); 3409 return; 3410 } 3411 3412 /* Comparison function used to sort common candidates. */ 3413 3414 static int 3415 common_cand_cmp (const void *p1, const void *p2) 3416 { 3417 unsigned n1, n2; 3418 const struct iv_common_cand *const *const ccand1 3419 = (const struct iv_common_cand *const *)p1; 3420 const struct iv_common_cand *const *const ccand2 3421 = (const struct iv_common_cand *const *)p2; 3422 3423 n1 = (*ccand1)->uses.length (); 3424 n2 = (*ccand2)->uses.length (); 3425 return n2 - n1; 3426 } 3427 3428 /* Adds IV candidates based on common candidated recorded. */ 3429 3430 static void 3431 add_iv_candidate_derived_from_uses (struct ivopts_data *data) 3432 { 3433 unsigned i, j; 3434 struct iv_cand *cand_1, *cand_2; 3435 3436 data->iv_common_cands.qsort (common_cand_cmp); 3437 for (i = 0; i < data->iv_common_cands.length (); i++) 3438 { 3439 struct iv_common_cand *ptr = data->iv_common_cands[i]; 3440 3441 /* Only add IV candidate if it's derived from multiple uses. */ 3442 if (ptr->uses.length () <= 1) 3443 break; 3444 3445 cand_1 = NULL; 3446 cand_2 = NULL; 3447 if (ip_normal_pos (data->current_loop)) 3448 cand_1 = add_candidate_1 (data, ptr->base, ptr->step, 3449 false, IP_NORMAL, NULL, NULL); 3450 3451 if (ip_end_pos (data->current_loop) 3452 && allow_ip_end_pos_p (data->current_loop)) 3453 cand_2 = add_candidate_1 (data, ptr->base, ptr->step, 3454 false, IP_END, NULL, NULL); 3455 3456 /* Bind deriving uses and the new candidates. */ 3457 for (j = 0; j < ptr->uses.length (); j++) 3458 { 3459 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id]; 3460 if (cand_1) 3461 bitmap_set_bit (group->related_cands, cand_1->id); 3462 if (cand_2) 3463 bitmap_set_bit (group->related_cands, cand_2->id); 3464 } 3465 } 3466 3467 /* Release data since it is useless from this point. */ 3468 data->iv_common_cand_tab->empty (); 3469 data->iv_common_cands.truncate (0); 3470 } 3471 3472 /* Adds candidates based on the value of USE's iv. */ 3473 3474 static void 3475 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) 3476 { 3477 poly_uint64 offset; 3478 tree base; 3479 tree basetype; 3480 struct iv *iv = use->iv; 3481 3482 add_candidate (data, iv->base, iv->step, false, use); 3483 3484 /* Record common candidate for use in case it can be shared by others. */ 3485 record_common_cand (data, iv->base, iv->step, use); 3486 3487 /* Record common candidate with initial value zero. */ 3488 basetype = TREE_TYPE (iv->base); 3489 if (POINTER_TYPE_P (basetype)) 3490 basetype = sizetype; 3491 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); 3492 3493 /* Record common candidate with constant offset stripped in base. 3494 Like the use itself, we also add candidate directly for it. */ 3495 base = strip_offset (iv->base, &offset); 3496 if (maybe_ne (offset, 0U) || base != iv->base) 3497 { 3498 record_common_cand (data, base, iv->step, use); 3499 add_candidate (data, base, iv->step, false, use); 3500 } 3501 3502 /* Record common candidate with base_object removed in base. */ 3503 base = iv->base; 3504 STRIP_NOPS (base); 3505 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR) 3506 { 3507 tree step = iv->step; 3508 3509 STRIP_NOPS (step); 3510 base = TREE_OPERAND (base, 1); 3511 step = fold_convert (sizetype, step); 3512 record_common_cand (data, base, step, use); 3513 /* Also record common candidate with offset stripped. */ 3514 base = strip_offset (base, &offset); 3515 if (maybe_ne (offset, 0U)) 3516 record_common_cand (data, base, step, use); 3517 } 3518 3519 /* At last, add auto-incremental candidates. Make such variables 3520 important since other iv uses with same base object may be based 3521 on it. */ 3522 if (use != NULL && address_p (use->type)) 3523 add_autoinc_candidates (data, iv->base, iv->step, true, use); 3524 } 3525 3526 /* Adds candidates based on the uses. */ 3527 3528 static void 3529 add_iv_candidate_for_groups (struct ivopts_data *data) 3530 { 3531 unsigned i; 3532 3533 /* Only add candidate for the first use in group. */ 3534 for (i = 0; i < data->vgroups.length (); i++) 3535 { 3536 struct iv_group *group = data->vgroups[i]; 3537 3538 gcc_assert (group->vuses[0] != NULL); 3539 add_iv_candidate_for_use (data, group->vuses[0]); 3540 } 3541 add_iv_candidate_derived_from_uses (data); 3542 } 3543 3544 /* Record important candidates and add them to related_cands bitmaps. */ 3545 3546 static void 3547 record_important_candidates (struct ivopts_data *data) 3548 { 3549 unsigned i; 3550 struct iv_group *group; 3551 3552 for (i = 0; i < data->vcands.length (); i++) 3553 { 3554 struct iv_cand *cand = data->vcands[i]; 3555 3556 if (cand->important) 3557 bitmap_set_bit (data->important_candidates, i); 3558 } 3559 3560 data->consider_all_candidates = (data->vcands.length () 3561 <= CONSIDER_ALL_CANDIDATES_BOUND); 3562 3563 /* Add important candidates to groups' related_cands bitmaps. */ 3564 for (i = 0; i < data->vgroups.length (); i++) 3565 { 3566 group = data->vgroups[i]; 3567 bitmap_ior_into (group->related_cands, data->important_candidates); 3568 } 3569 } 3570 3571 /* Allocates the data structure mapping the (use, candidate) pairs to costs. 3572 If consider_all_candidates is true, we use a two-dimensional array, otherwise 3573 we allocate a simple list to every use. */ 3574 3575 static void 3576 alloc_use_cost_map (struct ivopts_data *data) 3577 { 3578 unsigned i, size, s; 3579 3580 for (i = 0; i < data->vgroups.length (); i++) 3581 { 3582 struct iv_group *group = data->vgroups[i]; 3583 3584 if (data->consider_all_candidates) 3585 size = data->vcands.length (); 3586 else 3587 { 3588 s = bitmap_count_bits (group->related_cands); 3589 3590 /* Round up to the power of two, so that moduling by it is fast. */ 3591 size = s ? (1 << ceil_log2 (s)) : 1; 3592 } 3593 3594 group->n_map_members = size; 3595 group->cost_map = XCNEWVEC (struct cost_pair, size); 3596 } 3597 } 3598 3599 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends 3600 on invariants INV_VARS and that the value used in expressing it is 3601 VALUE, and in case of iv elimination the comparison operator is COMP. */ 3602 3603 static void 3604 set_group_iv_cost (struct ivopts_data *data, 3605 struct iv_group *group, struct iv_cand *cand, 3606 comp_cost cost, bitmap inv_vars, tree value, 3607 enum tree_code comp, bitmap inv_exprs) 3608 { 3609 unsigned i, s; 3610 3611 if (cost.infinite_cost_p ()) 3612 { 3613 BITMAP_FREE (inv_vars); 3614 BITMAP_FREE (inv_exprs); 3615 return; 3616 } 3617 3618 if (data->consider_all_candidates) 3619 { 3620 group->cost_map[cand->id].cand = cand; 3621 group->cost_map[cand->id].cost = cost; 3622 group->cost_map[cand->id].inv_vars = inv_vars; 3623 group->cost_map[cand->id].inv_exprs = inv_exprs; 3624 group->cost_map[cand->id].value = value; 3625 group->cost_map[cand->id].comp = comp; 3626 return; 3627 } 3628 3629 /* n_map_members is a power of two, so this computes modulo. */ 3630 s = cand->id & (group->n_map_members - 1); 3631 for (i = s; i < group->n_map_members; i++) 3632 if (!group->cost_map[i].cand) 3633 goto found; 3634 for (i = 0; i < s; i++) 3635 if (!group->cost_map[i].cand) 3636 goto found; 3637 3638 gcc_unreachable (); 3639 3640 found: 3641 group->cost_map[i].cand = cand; 3642 group->cost_map[i].cost = cost; 3643 group->cost_map[i].inv_vars = inv_vars; 3644 group->cost_map[i].inv_exprs = inv_exprs; 3645 group->cost_map[i].value = value; 3646 group->cost_map[i].comp = comp; 3647 } 3648 3649 /* Gets cost of (GROUP, CAND) pair. */ 3650 3651 static struct cost_pair * 3652 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group, 3653 struct iv_cand *cand) 3654 { 3655 unsigned i, s; 3656 struct cost_pair *ret; 3657 3658 if (!cand) 3659 return NULL; 3660 3661 if (data->consider_all_candidates) 3662 { 3663 ret = group->cost_map + cand->id; 3664 if (!ret->cand) 3665 return NULL; 3666 3667 return ret; 3668 } 3669 3670 /* n_map_members is a power of two, so this computes modulo. */ 3671 s = cand->id & (group->n_map_members - 1); 3672 for (i = s; i < group->n_map_members; i++) 3673 if (group->cost_map[i].cand == cand) 3674 return group->cost_map + i; 3675 else if (group->cost_map[i].cand == NULL) 3676 return NULL; 3677 for (i = 0; i < s; i++) 3678 if (group->cost_map[i].cand == cand) 3679 return group->cost_map + i; 3680 else if (group->cost_map[i].cand == NULL) 3681 return NULL; 3682 3683 return NULL; 3684 } 3685 3686 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 3687 static rtx 3688 produce_memory_decl_rtl (tree obj, int *regno) 3689 { 3690 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); 3691 machine_mode address_mode = targetm.addr_space.address_mode (as); 3692 rtx x; 3693 3694 gcc_assert (obj); 3695 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 3696 { 3697 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 3698 x = gen_rtx_SYMBOL_REF (address_mode, name); 3699 SET_SYMBOL_REF_DECL (x, obj); 3700 x = gen_rtx_MEM (DECL_MODE (obj), x); 3701 set_mem_addr_space (x, as); 3702 targetm.encode_section_info (obj, x, true); 3703 } 3704 else 3705 { 3706 x = gen_raw_REG (address_mode, (*regno)++); 3707 x = gen_rtx_MEM (DECL_MODE (obj), x); 3708 set_mem_addr_space (x, as); 3709 } 3710 3711 return x; 3712 } 3713 3714 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 3715 walk_tree. DATA contains the actual fake register number. */ 3716 3717 static tree 3718 prepare_decl_rtl (tree *expr_p, int *ws, void *data) 3719 { 3720 tree obj = NULL_TREE; 3721 rtx x = NULL_RTX; 3722 int *regno = (int *) data; 3723 3724 switch (TREE_CODE (*expr_p)) 3725 { 3726 case ADDR_EXPR: 3727 for (expr_p = &TREE_OPERAND (*expr_p, 0); 3728 handled_component_p (*expr_p); 3729 expr_p = &TREE_OPERAND (*expr_p, 0)) 3730 continue; 3731 obj = *expr_p; 3732 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) 3733 x = produce_memory_decl_rtl (obj, regno); 3734 break; 3735 3736 case SSA_NAME: 3737 *ws = 0; 3738 obj = SSA_NAME_VAR (*expr_p); 3739 /* Defer handling of anonymous SSA_NAMEs to the expander. */ 3740 if (!obj) 3741 return NULL_TREE; 3742 if (!DECL_RTL_SET_P (obj)) 3743 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 3744 break; 3745 3746 case VAR_DECL: 3747 case PARM_DECL: 3748 case RESULT_DECL: 3749 *ws = 0; 3750 obj = *expr_p; 3751 3752 if (DECL_RTL_SET_P (obj)) 3753 break; 3754 3755 if (DECL_MODE (obj) == BLKmode) 3756 x = produce_memory_decl_rtl (obj, regno); 3757 else 3758 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 3759 3760 break; 3761 3762 default: 3763 break; 3764 } 3765 3766 if (x) 3767 { 3768 decl_rtl_to_reset.safe_push (obj); 3769 SET_DECL_RTL (obj, x); 3770 } 3771 3772 return NULL_TREE; 3773 } 3774 3775 /* Determines cost of the computation of EXPR. */ 3776 3777 static unsigned 3778 computation_cost (tree expr, bool speed) 3779 { 3780 rtx_insn *seq; 3781 rtx rslt; 3782 tree type = TREE_TYPE (expr); 3783 unsigned cost; 3784 /* Avoid using hard regs in ways which may be unsupported. */ 3785 int regno = LAST_VIRTUAL_REGISTER + 1; 3786 struct cgraph_node *node = cgraph_node::get (current_function_decl); 3787 enum node_frequency real_frequency = node->frequency; 3788 3789 node->frequency = NODE_FREQUENCY_NORMAL; 3790 crtl->maybe_hot_insn_p = speed; 3791 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 3792 start_sequence (); 3793 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 3794 seq = get_insns (); 3795 end_sequence (); 3796 default_rtl_profile (); 3797 node->frequency = real_frequency; 3798 3799 cost = seq_cost (seq, speed); 3800 if (MEM_P (rslt)) 3801 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), 3802 TYPE_ADDR_SPACE (type), speed); 3803 else if (!REG_P (rslt)) 3804 cost += set_src_cost (rslt, TYPE_MODE (type), speed); 3805 3806 return cost; 3807 } 3808 3809 /* Returns variable containing the value of candidate CAND at statement AT. */ 3810 3811 static tree 3812 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt) 3813 { 3814 if (stmt_after_increment (loop, cand, stmt)) 3815 return cand->var_after; 3816 else 3817 return cand->var_before; 3818 } 3819 3820 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the 3821 same precision that is at least as wide as the precision of TYPE, stores 3822 BA to A and BB to B, and returns the type of BA. Otherwise, returns the 3823 type of A and B. */ 3824 3825 static tree 3826 determine_common_wider_type (tree *a, tree *b) 3827 { 3828 tree wider_type = NULL; 3829 tree suba, subb; 3830 tree atype = TREE_TYPE (*a); 3831 3832 if (CONVERT_EXPR_P (*a)) 3833 { 3834 suba = TREE_OPERAND (*a, 0); 3835 wider_type = TREE_TYPE (suba); 3836 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) 3837 return atype; 3838 } 3839 else 3840 return atype; 3841 3842 if (CONVERT_EXPR_P (*b)) 3843 { 3844 subb = TREE_OPERAND (*b, 0); 3845 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) 3846 return atype; 3847 } 3848 else 3849 return atype; 3850 3851 *a = suba; 3852 *b = subb; 3853 return wider_type; 3854 } 3855 3856 /* Determines the expression by that USE is expressed from induction variable 3857 CAND at statement AT in LOOP. The expression is stored in two parts in a 3858 decomposed form. The invariant part is stored in AFF_INV; while variant 3859 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's 3860 non-null. Returns false if USE cannot be expressed using CAND. */ 3861 3862 static bool 3863 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use, 3864 struct iv_cand *cand, struct aff_tree *aff_inv, 3865 struct aff_tree *aff_var, widest_int *prat = NULL) 3866 { 3867 tree ubase = use->iv->base, ustep = use->iv->step; 3868 tree cbase = cand->iv->base, cstep = cand->iv->step; 3869 tree common_type, uutype, var, cstep_common; 3870 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 3871 aff_tree aff_cbase; 3872 widest_int rat; 3873 3874 /* We must have a precision to express the values of use. */ 3875 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3876 return false; 3877 3878 var = var_at_stmt (loop, cand, at); 3879 uutype = unsigned_type_for (utype); 3880 3881 /* If the conversion is not noop, perform it. */ 3882 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3883 { 3884 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase) 3885 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep))) 3886 { 3887 tree inner_base, inner_step, inner_type; 3888 inner_base = TREE_OPERAND (cbase, 0); 3889 if (CONVERT_EXPR_P (cstep)) 3890 inner_step = TREE_OPERAND (cstep, 0); 3891 else 3892 inner_step = cstep; 3893 3894 inner_type = TREE_TYPE (inner_base); 3895 /* If candidate is added from a biv whose type is smaller than 3896 ctype, we know both candidate and the biv won't overflow. 3897 In this case, it's safe to skip the convertion in candidate. 3898 As an example, (unsigned short)((unsigned long)A) equals to 3899 (unsigned short)A, if A has a type no larger than short. */ 3900 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype)) 3901 { 3902 cbase = inner_base; 3903 cstep = inner_step; 3904 } 3905 } 3906 cbase = fold_convert (uutype, cbase); 3907 cstep = fold_convert (uutype, cstep); 3908 var = fold_convert (uutype, var); 3909 } 3910 3911 /* Ratio is 1 when computing the value of biv cand by itself. 3912 We can't rely on constant_multiple_of in this case because the 3913 use is created after the original biv is selected. The call 3914 could fail because of inconsistent fold behavior. See PR68021 3915 for more information. */ 3916 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 3917 { 3918 gcc_assert (is_gimple_assign (use->stmt)); 3919 gcc_assert (use->iv->ssa_name == cand->var_after); 3920 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 3921 rat = 1; 3922 } 3923 else if (!constant_multiple_of (ustep, cstep, &rat)) 3924 return false; 3925 3926 if (prat) 3927 *prat = rat; 3928 3929 /* In case both UBASE and CBASE are shortened to UUTYPE from some common 3930 type, we achieve better folding by computing their difference in this 3931 wider type, and cast the result to UUTYPE. We do not need to worry about 3932 overflows, as all the arithmetics will in the end be performed in UUTYPE 3933 anyway. */ 3934 common_type = determine_common_wider_type (&ubase, &cbase); 3935 3936 /* use = ubase - ratio * cbase + ratio * var. */ 3937 tree_to_aff_combination (ubase, common_type, aff_inv); 3938 tree_to_aff_combination (cbase, common_type, &aff_cbase); 3939 tree_to_aff_combination (var, uutype, aff_var); 3940 3941 /* We need to shift the value if we are after the increment. */ 3942 if (stmt_after_increment (loop, cand, at)) 3943 { 3944 aff_tree cstep_aff; 3945 3946 if (common_type != uutype) 3947 cstep_common = fold_convert (common_type, cstep); 3948 else 3949 cstep_common = cstep; 3950 3951 tree_to_aff_combination (cstep_common, common_type, &cstep_aff); 3952 aff_combination_add (&aff_cbase, &cstep_aff); 3953 } 3954 3955 aff_combination_scale (&aff_cbase, -rat); 3956 aff_combination_add (aff_inv, &aff_cbase); 3957 if (common_type != uutype) 3958 aff_combination_convert (aff_inv, uutype); 3959 3960 aff_combination_scale (aff_var, rat); 3961 return true; 3962 } 3963 3964 /* Determines the expression by that USE is expressed from induction variable 3965 CAND at statement AT in LOOP. The expression is stored in a decomposed 3966 form into AFF. Returns false if USE cannot be expressed using CAND. */ 3967 3968 static bool 3969 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use, 3970 struct iv_cand *cand, struct aff_tree *aff) 3971 { 3972 aff_tree aff_var; 3973 3974 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var)) 3975 return false; 3976 3977 aff_combination_add (aff, &aff_var); 3978 return true; 3979 } 3980 3981 /* Return the type of USE. */ 3982 3983 static tree 3984 get_use_type (struct iv_use *use) 3985 { 3986 tree base_type = TREE_TYPE (use->iv->base); 3987 tree type; 3988 3989 if (use->type == USE_REF_ADDRESS) 3990 { 3991 /* The base_type may be a void pointer. Create a pointer type based on 3992 the mem_ref instead. */ 3993 type = build_pointer_type (TREE_TYPE (*use->op_p)); 3994 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type)) 3995 == TYPE_ADDR_SPACE (TREE_TYPE (base_type))); 3996 } 3997 else 3998 type = base_type; 3999 4000 return type; 4001 } 4002 4003 /* Determines the expression by that USE is expressed from induction variable 4004 CAND at statement AT in LOOP. The computation is unshared. */ 4005 4006 static tree 4007 get_computation_at (struct loop *loop, gimple *at, 4008 struct iv_use *use, struct iv_cand *cand) 4009 { 4010 aff_tree aff; 4011 tree type = get_use_type (use); 4012 4013 if (!get_computation_aff (loop, at, use, cand, &aff)) 4014 return NULL_TREE; 4015 unshare_aff_combination (&aff); 4016 return fold_convert (type, aff_combination_to_tree (&aff)); 4017 } 4018 4019 /* Adjust the cost COST for being in loop setup rather than loop body. 4020 If we're optimizing for space, the loop setup overhead is constant; 4021 if we're optimizing for speed, amortize it over the per-iteration cost. 4022 If ROUND_UP_P is true, the result is round up rather than to zero when 4023 optimizing for speed. */ 4024 static unsigned 4025 adjust_setup_cost (struct ivopts_data *data, unsigned cost, 4026 bool round_up_p = false) 4027 { 4028 if (cost == INFTY) 4029 return cost; 4030 else if (optimize_loop_for_speed_p (data->current_loop)) 4031 { 4032 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop); 4033 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters; 4034 } 4035 else 4036 return cost; 4037 } 4038 4039 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the 4040 EXPR operand holding the shift. COST0 and COST1 are the costs for 4041 calculating the operands of EXPR. Returns true if successful, and returns 4042 the cost in COST. */ 4043 4044 static bool 4045 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0, 4046 comp_cost cost1, tree mult, bool speed, comp_cost *cost) 4047 { 4048 comp_cost res; 4049 tree op1 = TREE_OPERAND (expr, 1); 4050 tree cst = TREE_OPERAND (mult, 1); 4051 tree multop = TREE_OPERAND (mult, 0); 4052 int m = exact_log2 (int_cst_value (cst)); 4053 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 4054 int as_cost, sa_cost; 4055 bool mult_in_op1; 4056 4057 if (!(m >= 0 && m < maxm)) 4058 return false; 4059 4060 STRIP_NOPS (op1); 4061 mult_in_op1 = operand_equal_p (op1, mult, 0); 4062 4063 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 4064 4065 /* If the target has a cheap shift-and-add or shift-and-sub instruction, 4066 use that in preference to a shift insn followed by an add insn. */ 4067 sa_cost = (TREE_CODE (expr) != MINUS_EXPR 4068 ? shiftadd_cost (speed, mode, m) 4069 : (mult_in_op1 4070 ? shiftsub1_cost (speed, mode, m) 4071 : shiftsub0_cost (speed, mode, m))); 4072 4073 res = comp_cost (MIN (as_cost, sa_cost), 0); 4074 res += (mult_in_op1 ? cost0 : cost1); 4075 4076 STRIP_NOPS (multop); 4077 if (!is_gimple_val (multop)) 4078 res += force_expr_to_var_cost (multop, speed); 4079 4080 *cost = res; 4081 return true; 4082 } 4083 4084 /* Estimates cost of forcing expression EXPR into a variable. */ 4085 4086 static comp_cost 4087 force_expr_to_var_cost (tree expr, bool speed) 4088 { 4089 static bool costs_initialized = false; 4090 static unsigned integer_cost [2]; 4091 static unsigned symbol_cost [2]; 4092 static unsigned address_cost [2]; 4093 tree op0, op1; 4094 comp_cost cost0, cost1, cost; 4095 machine_mode mode; 4096 scalar_int_mode int_mode; 4097 4098 if (!costs_initialized) 4099 { 4100 tree type = build_pointer_type (integer_type_node); 4101 tree var, addr; 4102 rtx x; 4103 int i; 4104 4105 var = create_tmp_var_raw (integer_type_node, "test_var"); 4106 TREE_STATIC (var) = 1; 4107 x = produce_memory_decl_rtl (var, NULL); 4108 SET_DECL_RTL (var, x); 4109 4110 addr = build1 (ADDR_EXPR, type, var); 4111 4112 4113 for (i = 0; i < 2; i++) 4114 { 4115 integer_cost[i] = computation_cost (build_int_cst (integer_type_node, 4116 2000), i); 4117 4118 symbol_cost[i] = computation_cost (addr, i) + 1; 4119 4120 address_cost[i] 4121 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; 4122 if (dump_file && (dump_flags & TDF_DETAILS)) 4123 { 4124 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); 4125 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); 4126 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); 4127 fprintf (dump_file, " address %d\n", (int) address_cost[i]); 4128 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); 4129 fprintf (dump_file, "\n"); 4130 } 4131 } 4132 4133 costs_initialized = true; 4134 } 4135 4136 STRIP_NOPS (expr); 4137 4138 if (SSA_VAR_P (expr)) 4139 return no_cost; 4140 4141 if (is_gimple_min_invariant (expr)) 4142 { 4143 if (poly_int_tree_p (expr)) 4144 return comp_cost (integer_cost [speed], 0); 4145 4146 if (TREE_CODE (expr) == ADDR_EXPR) 4147 { 4148 tree obj = TREE_OPERAND (expr, 0); 4149 4150 if (VAR_P (obj) 4151 || TREE_CODE (obj) == PARM_DECL 4152 || TREE_CODE (obj) == RESULT_DECL) 4153 return comp_cost (symbol_cost [speed], 0); 4154 } 4155 4156 return comp_cost (address_cost [speed], 0); 4157 } 4158 4159 switch (TREE_CODE (expr)) 4160 { 4161 case POINTER_PLUS_EXPR: 4162 case PLUS_EXPR: 4163 case MINUS_EXPR: 4164 case MULT_EXPR: 4165 case TRUNC_DIV_EXPR: 4166 case BIT_AND_EXPR: 4167 case BIT_IOR_EXPR: 4168 case LSHIFT_EXPR: 4169 case RSHIFT_EXPR: 4170 op0 = TREE_OPERAND (expr, 0); 4171 op1 = TREE_OPERAND (expr, 1); 4172 STRIP_NOPS (op0); 4173 STRIP_NOPS (op1); 4174 break; 4175 4176 CASE_CONVERT: 4177 case NEGATE_EXPR: 4178 case BIT_NOT_EXPR: 4179 op0 = TREE_OPERAND (expr, 0); 4180 STRIP_NOPS (op0); 4181 op1 = NULL_TREE; 4182 break; 4183 4184 default: 4185 /* Just an arbitrary value, FIXME. */ 4186 return comp_cost (target_spill_cost[speed], 0); 4187 } 4188 4189 if (op0 == NULL_TREE 4190 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0)) 4191 cost0 = no_cost; 4192 else 4193 cost0 = force_expr_to_var_cost (op0, speed); 4194 4195 if (op1 == NULL_TREE 4196 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1)) 4197 cost1 = no_cost; 4198 else 4199 cost1 = force_expr_to_var_cost (op1, speed); 4200 4201 mode = TYPE_MODE (TREE_TYPE (expr)); 4202 switch (TREE_CODE (expr)) 4203 { 4204 case POINTER_PLUS_EXPR: 4205 case PLUS_EXPR: 4206 case MINUS_EXPR: 4207 case NEGATE_EXPR: 4208 cost = comp_cost (add_cost (speed, mode), 0); 4209 if (TREE_CODE (expr) != NEGATE_EXPR) 4210 { 4211 tree mult = NULL_TREE; 4212 comp_cost sa_cost; 4213 if (TREE_CODE (op1) == MULT_EXPR) 4214 mult = op1; 4215 else if (TREE_CODE (op0) == MULT_EXPR) 4216 mult = op0; 4217 4218 if (mult != NULL_TREE 4219 && is_a <scalar_int_mode> (mode, &int_mode) 4220 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) 4221 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult, 4222 speed, &sa_cost)) 4223 return sa_cost; 4224 } 4225 break; 4226 4227 CASE_CONVERT: 4228 { 4229 tree inner_mode, outer_mode; 4230 outer_mode = TREE_TYPE (expr); 4231 inner_mode = TREE_TYPE (op0); 4232 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode), 4233 TYPE_MODE (inner_mode), speed), 0); 4234 } 4235 break; 4236 4237 case MULT_EXPR: 4238 if (cst_and_fits_in_hwi (op0)) 4239 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0), 4240 mode, speed), 0); 4241 else if (cst_and_fits_in_hwi (op1)) 4242 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1), 4243 mode, speed), 0); 4244 else 4245 return comp_cost (target_spill_cost [speed], 0); 4246 break; 4247 4248 case TRUNC_DIV_EXPR: 4249 /* Division by power of two is usually cheap, so we allow it. Forbid 4250 anything else. */ 4251 if (integer_pow2p (TREE_OPERAND (expr, 1))) 4252 cost = comp_cost (add_cost (speed, mode), 0); 4253 else 4254 cost = comp_cost (target_spill_cost[speed], 0); 4255 break; 4256 4257 case BIT_AND_EXPR: 4258 case BIT_IOR_EXPR: 4259 case BIT_NOT_EXPR: 4260 case LSHIFT_EXPR: 4261 case RSHIFT_EXPR: 4262 cost = comp_cost (add_cost (speed, mode), 0); 4263 break; 4264 4265 default: 4266 gcc_unreachable (); 4267 } 4268 4269 cost += cost0; 4270 cost += cost1; 4271 return cost; 4272 } 4273 4274 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the 4275 invariants the computation depends on. */ 4276 4277 static comp_cost 4278 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars) 4279 { 4280 if (!expr) 4281 return no_cost; 4282 4283 find_inv_vars (data, &expr, inv_vars); 4284 return force_expr_to_var_cost (expr, data->speed); 4285 } 4286 4287 /* Returns cost of auto-modifying address expression in shape base + offset. 4288 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the 4289 address expression. The address expression has ADDR_MODE in addr space 4290 AS. The memory access has MEM_MODE. SPEED means we are optimizing for 4291 speed or size. */ 4292 4293 enum ainc_type 4294 { 4295 AINC_PRE_INC, /* Pre increment. */ 4296 AINC_PRE_DEC, /* Pre decrement. */ 4297 AINC_POST_INC, /* Post increment. */ 4298 AINC_POST_DEC, /* Post decrement. */ 4299 AINC_NONE /* Also the number of auto increment types. */ 4300 }; 4301 4302 struct ainc_cost_data 4303 { 4304 unsigned costs[AINC_NONE]; 4305 }; 4306 4307 static comp_cost 4308 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset, 4309 machine_mode addr_mode, machine_mode mem_mode, 4310 addr_space_t as, bool speed) 4311 { 4312 if (!USE_LOAD_PRE_DECREMENT (mem_mode) 4313 && !USE_STORE_PRE_DECREMENT (mem_mode) 4314 && !USE_LOAD_POST_DECREMENT (mem_mode) 4315 && !USE_STORE_POST_DECREMENT (mem_mode) 4316 && !USE_LOAD_PRE_INCREMENT (mem_mode) 4317 && !USE_STORE_PRE_INCREMENT (mem_mode) 4318 && !USE_LOAD_POST_INCREMENT (mem_mode) 4319 && !USE_STORE_POST_INCREMENT (mem_mode)) 4320 return infinite_cost; 4321 4322 static vec<ainc_cost_data *> ainc_cost_data_list; 4323 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; 4324 if (idx >= ainc_cost_data_list.length ()) 4325 { 4326 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE; 4327 4328 gcc_assert (nsize > idx); 4329 ainc_cost_data_list.safe_grow_cleared (nsize); 4330 } 4331 4332 ainc_cost_data *data = ainc_cost_data_list[idx]; 4333 if (data == NULL) 4334 { 4335 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); 4336 4337 data = (ainc_cost_data *) xcalloc (1, sizeof (*data)); 4338 data->costs[AINC_PRE_DEC] = INFTY; 4339 data->costs[AINC_POST_DEC] = INFTY; 4340 data->costs[AINC_PRE_INC] = INFTY; 4341 data->costs[AINC_POST_INC] = INFTY; 4342 if (USE_LOAD_PRE_DECREMENT (mem_mode) 4343 || USE_STORE_PRE_DECREMENT (mem_mode)) 4344 { 4345 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg); 4346 4347 if (memory_address_addr_space_p (mem_mode, addr, as)) 4348 data->costs[AINC_PRE_DEC] 4349 = address_cost (addr, mem_mode, as, speed); 4350 } 4351 if (USE_LOAD_POST_DECREMENT (mem_mode) 4352 || USE_STORE_POST_DECREMENT (mem_mode)) 4353 { 4354 rtx addr = gen_rtx_POST_DEC (addr_mode, reg); 4355 4356 if (memory_address_addr_space_p (mem_mode, addr, as)) 4357 data->costs[AINC_POST_DEC] 4358 = address_cost (addr, mem_mode, as, speed); 4359 } 4360 if (USE_LOAD_PRE_INCREMENT (mem_mode) 4361 || USE_STORE_PRE_INCREMENT (mem_mode)) 4362 { 4363 rtx addr = gen_rtx_PRE_INC (addr_mode, reg); 4364 4365 if (memory_address_addr_space_p (mem_mode, addr, as)) 4366 data->costs[AINC_PRE_INC] 4367 = address_cost (addr, mem_mode, as, speed); 4368 } 4369 if (USE_LOAD_POST_INCREMENT (mem_mode) 4370 || USE_STORE_POST_INCREMENT (mem_mode)) 4371 { 4372 rtx addr = gen_rtx_POST_INC (addr_mode, reg); 4373 4374 if (memory_address_addr_space_p (mem_mode, addr, as)) 4375 data->costs[AINC_POST_INC] 4376 = address_cost (addr, mem_mode, as, speed); 4377 } 4378 ainc_cost_data_list[idx] = data; 4379 } 4380 4381 poly_int64 msize = GET_MODE_SIZE (mem_mode); 4382 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step)) 4383 return comp_cost (data->costs[AINC_POST_INC], 0); 4384 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step)) 4385 return comp_cost (data->costs[AINC_POST_DEC], 0); 4386 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step)) 4387 return comp_cost (data->costs[AINC_PRE_INC], 0); 4388 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step)) 4389 return comp_cost (data->costs[AINC_PRE_DEC], 0); 4390 4391 return infinite_cost; 4392 } 4393 4394 /* Return cost of computing USE's address expression by using CAND. 4395 AFF_INV and AFF_VAR represent invariant and variant parts of the 4396 address expression, respectively. If AFF_INV is simple, store 4397 the loop invariant variables which are depended by it in INV_VARS; 4398 if AFF_INV is complicated, handle it as a new invariant expression 4399 and record it in INV_EXPR. RATIO indicates multiple times between 4400 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean 4401 value to it indicating if this is an auto-increment address. */ 4402 4403 static comp_cost 4404 get_address_cost (struct ivopts_data *data, struct iv_use *use, 4405 struct iv_cand *cand, aff_tree *aff_inv, 4406 aff_tree *aff_var, HOST_WIDE_INT ratio, 4407 bitmap *inv_vars, iv_inv_expr_ent **inv_expr, 4408 bool *can_autoinc, bool speed) 4409 { 4410 rtx addr; 4411 bool simple_inv = true; 4412 tree comp_inv = NULL_TREE, type = aff_var->type; 4413 comp_cost var_cost = no_cost, cost = no_cost; 4414 struct mem_address parts = {NULL_TREE, integer_one_node, 4415 NULL_TREE, NULL_TREE, NULL_TREE}; 4416 machine_mode addr_mode = TYPE_MODE (type); 4417 machine_mode mem_mode = TYPE_MODE (use->mem_type); 4418 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); 4419 /* Only true if ratio != 1. */ 4420 bool ok_with_ratio_p = false; 4421 bool ok_without_ratio_p = false; 4422 4423 if (!aff_combination_const_p (aff_inv)) 4424 { 4425 parts.index = integer_one_node; 4426 /* Addressing mode "base + index". */ 4427 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts); 4428 if (ratio != 1) 4429 { 4430 parts.step = wide_int_to_tree (type, ratio); 4431 /* Addressing mode "base + index << scale". */ 4432 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts); 4433 if (!ok_with_ratio_p) 4434 parts.step = NULL_TREE; 4435 } 4436 if (ok_with_ratio_p || ok_without_ratio_p) 4437 { 4438 if (maybe_ne (aff_inv->offset, 0)) 4439 { 4440 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset); 4441 /* Addressing mode "base + index [<< scale] + offset". */ 4442 if (!valid_mem_ref_p (mem_mode, as, &parts)) 4443 parts.offset = NULL_TREE; 4444 else 4445 aff_inv->offset = 0; 4446 } 4447 4448 move_fixed_address_to_symbol (&parts, aff_inv); 4449 /* Base is fixed address and is moved to symbol part. */ 4450 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv)) 4451 parts.base = NULL_TREE; 4452 4453 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */ 4454 if (parts.symbol != NULL_TREE 4455 && !valid_mem_ref_p (mem_mode, as, &parts)) 4456 { 4457 aff_combination_add_elt (aff_inv, parts.symbol, 1); 4458 parts.symbol = NULL_TREE; 4459 /* Reset SIMPLE_INV since symbol address needs to be computed 4460 outside of address expression in this case. */ 4461 simple_inv = false; 4462 /* Symbol part is moved back to base part, it can't be NULL. */ 4463 parts.base = integer_one_node; 4464 } 4465 } 4466 else 4467 parts.index = NULL_TREE; 4468 } 4469 else 4470 { 4471 poly_int64 ainc_step; 4472 if (can_autoinc 4473 && ratio == 1 4474 && ptrdiff_tree_p (cand->iv->step, &ainc_step)) 4475 { 4476 poly_int64 ainc_offset = (aff_inv->offset).force_shwi (); 4477 4478 if (stmt_after_increment (data->current_loop, cand, use->stmt)) 4479 ainc_offset += ainc_step; 4480 cost = get_address_cost_ainc (ainc_step, ainc_offset, 4481 addr_mode, mem_mode, as, speed); 4482 if (!cost.infinite_cost_p ()) 4483 { 4484 *can_autoinc = true; 4485 return cost; 4486 } 4487 cost = no_cost; 4488 } 4489 if (!aff_combination_zero_p (aff_inv)) 4490 { 4491 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset); 4492 /* Addressing mode "base + offset". */ 4493 if (!valid_mem_ref_p (mem_mode, as, &parts)) 4494 parts.offset = NULL_TREE; 4495 else 4496 aff_inv->offset = 0; 4497 } 4498 } 4499 4500 if (simple_inv) 4501 simple_inv = (aff_inv == NULL 4502 || aff_combination_const_p (aff_inv) 4503 || aff_combination_singleton_var_p (aff_inv)); 4504 if (!aff_combination_zero_p (aff_inv)) 4505 comp_inv = aff_combination_to_tree (aff_inv); 4506 if (comp_inv != NULL_TREE) 4507 cost = force_var_cost (data, comp_inv, inv_vars); 4508 if (ratio != 1 && parts.step == NULL_TREE) 4509 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed); 4510 if (comp_inv != NULL_TREE && parts.index == NULL_TREE) 4511 var_cost += add_cost (speed, addr_mode); 4512 4513 if (comp_inv && inv_expr && !simple_inv) 4514 { 4515 *inv_expr = get_loop_invariant_expr (data, comp_inv); 4516 /* Clear depends on. */ 4517 if (*inv_expr != NULL && inv_vars && *inv_vars) 4518 bitmap_clear (*inv_vars); 4519 4520 /* Cost of small invariant expression adjusted against loop niters 4521 is usually zero, which makes it difficult to be differentiated 4522 from candidate based on loop invariant variables. Secondly, the 4523 generated invariant expression may not be hoisted out of loop by 4524 following pass. We penalize the cost by rounding up in order to 4525 neutralize such effects. */ 4526 cost.cost = adjust_setup_cost (data, cost.cost, true); 4527 cost.scratch = cost.cost; 4528 } 4529 4530 cost += var_cost; 4531 addr = addr_for_mem_ref (&parts, as, false); 4532 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as)); 4533 cost += address_cost (addr, mem_mode, as, speed); 4534 4535 if (parts.symbol != NULL_TREE) 4536 cost.complexity += 1; 4537 /* Don't increase the complexity of adding a scaled index if it's 4538 the only kind of index that the target allows. */ 4539 if (parts.step != NULL_TREE && ok_without_ratio_p) 4540 cost.complexity += 1; 4541 if (parts.base != NULL_TREE && parts.index != NULL_TREE) 4542 cost.complexity += 1; 4543 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset)) 4544 cost.complexity += 1; 4545 4546 return cost; 4547 } 4548 4549 /* Scale (multiply) the computed COST (except scratch part that should be 4550 hoisted out a loop) by header->frequency / AT->frequency, which makes 4551 expected cost more accurate. */ 4552 4553 static comp_cost 4554 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) 4555 { 4556 int loop_freq = data->current_loop->header->count.to_frequency (cfun); 4557 int bb_freq = gimple_bb (at)->count.to_frequency (cfun); 4558 if (loop_freq != 0) 4559 { 4560 gcc_assert (cost.scratch <= cost.cost); 4561 int scaled_cost 4562 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq; 4563 4564 if (dump_file && (dump_flags & TDF_DETAILS)) 4565 fprintf (dump_file, "Scaling cost based on bb prob " 4566 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n", 4567 1.0f * bb_freq / loop_freq, cost.cost, 4568 cost.scratch, scaled_cost, bb_freq, loop_freq); 4569 4570 cost.cost = scaled_cost; 4571 } 4572 4573 return cost; 4574 } 4575 4576 /* Determines the cost of the computation by that USE is expressed 4577 from induction variable CAND. If ADDRESS_P is true, we just need 4578 to create an address from it, otherwise we want to get it into 4579 register. A set of invariants we depend on is stored in INV_VARS. 4580 If CAN_AUTOINC is nonnull, use it to record whether autoinc 4581 addressing is likely. If INV_EXPR is nonnull, record invariant 4582 expr entry in it. */ 4583 4584 static comp_cost 4585 get_computation_cost (struct ivopts_data *data, struct iv_use *use, 4586 struct iv_cand *cand, bool address_p, bitmap *inv_vars, 4587 bool *can_autoinc, iv_inv_expr_ent **inv_expr) 4588 { 4589 gimple *at = use->stmt; 4590 tree ubase = use->iv->base, cbase = cand->iv->base; 4591 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 4592 tree comp_inv = NULL_TREE; 4593 HOST_WIDE_INT ratio, aratio; 4594 comp_cost cost; 4595 widest_int rat; 4596 aff_tree aff_inv, aff_var; 4597 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 4598 4599 if (inv_vars) 4600 *inv_vars = NULL; 4601 if (can_autoinc) 4602 *can_autoinc = false; 4603 if (inv_expr) 4604 *inv_expr = NULL; 4605 4606 /* Check if we have enough precision to express the values of use. */ 4607 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 4608 return infinite_cost; 4609 4610 if (address_p 4611 || (use->iv->base_object 4612 && cand->iv->base_object 4613 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) 4614 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object)))) 4615 { 4616 /* Do not try to express address of an object with computation based 4617 on address of a different object. This may cause problems in rtl 4618 level alias analysis (that does not expect this to be happening, 4619 as this is illegal in C), and would be unlikely to be useful 4620 anyway. */ 4621 if (use->iv->base_object 4622 && cand->iv->base_object 4623 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 4624 return infinite_cost; 4625 } 4626 4627 if (!get_computation_aff_1 (data->current_loop, at, use, 4628 cand, &aff_inv, &aff_var, &rat) 4629 || !wi::fits_shwi_p (rat)) 4630 return infinite_cost; 4631 4632 ratio = rat.to_shwi (); 4633 if (address_p) 4634 { 4635 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio, 4636 inv_vars, inv_expr, can_autoinc, speed); 4637 return get_scaled_computation_cost_at (data, at, cost); 4638 } 4639 4640 bool simple_inv = (aff_combination_const_p (&aff_inv) 4641 || aff_combination_singleton_var_p (&aff_inv)); 4642 tree signed_type = signed_type_for (aff_combination_type (&aff_inv)); 4643 aff_combination_convert (&aff_inv, signed_type); 4644 if (!aff_combination_zero_p (&aff_inv)) 4645 comp_inv = aff_combination_to_tree (&aff_inv); 4646 4647 cost = force_var_cost (data, comp_inv, inv_vars); 4648 if (comp_inv && inv_expr && !simple_inv) 4649 { 4650 *inv_expr = get_loop_invariant_expr (data, comp_inv); 4651 /* Clear depends on. */ 4652 if (*inv_expr != NULL && inv_vars && *inv_vars) 4653 bitmap_clear (*inv_vars); 4654 4655 cost.cost = adjust_setup_cost (data, cost.cost); 4656 /* Record setup cost in scratch field. */ 4657 cost.scratch = cost.cost; 4658 } 4659 /* Cost of constant integer can be covered when adding invariant part to 4660 variant part. */ 4661 else if (comp_inv && CONSTANT_CLASS_P (comp_inv)) 4662 cost = no_cost; 4663 4664 /* Need type narrowing to represent use with cand. */ 4665 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 4666 { 4667 machine_mode outer_mode = TYPE_MODE (utype); 4668 machine_mode inner_mode = TYPE_MODE (ctype); 4669 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0); 4670 } 4671 4672 /* Turn a + i * (-c) into a - i * c. */ 4673 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv)) 4674 aratio = -ratio; 4675 else 4676 aratio = ratio; 4677 4678 if (ratio != 1) 4679 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed); 4680 4681 /* TODO: We may also need to check if we can compute a + i * 4 in one 4682 instruction. */ 4683 /* Need to add up the invariant and variant parts. */ 4684 if (comp_inv && !integer_zerop (comp_inv)) 4685 cost += add_cost (speed, TYPE_MODE (utype)); 4686 4687 return get_scaled_computation_cost_at (data, at, cost); 4688 } 4689 4690 /* Determines cost of computing the use in GROUP with CAND in a generic 4691 expression. */ 4692 4693 static bool 4694 determine_group_iv_cost_generic (struct ivopts_data *data, 4695 struct iv_group *group, struct iv_cand *cand) 4696 { 4697 comp_cost cost; 4698 iv_inv_expr_ent *inv_expr = NULL; 4699 bitmap inv_vars = NULL, inv_exprs = NULL; 4700 struct iv_use *use = group->vuses[0]; 4701 4702 /* The simple case first -- if we need to express value of the preserved 4703 original biv, the cost is 0. This also prevents us from counting the 4704 cost of increment twice -- once at this use and once in the cost of 4705 the candidate. */ 4706 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 4707 cost = no_cost; 4708 else 4709 cost = get_computation_cost (data, use, cand, false, 4710 &inv_vars, NULL, &inv_expr); 4711 4712 if (inv_expr) 4713 { 4714 inv_exprs = BITMAP_ALLOC (NULL); 4715 bitmap_set_bit (inv_exprs, inv_expr->id); 4716 } 4717 set_group_iv_cost (data, group, cand, cost, inv_vars, 4718 NULL_TREE, ERROR_MARK, inv_exprs); 4719 return !cost.infinite_cost_p (); 4720 } 4721 4722 /* Determines cost of computing uses in GROUP with CAND in addresses. */ 4723 4724 static bool 4725 determine_group_iv_cost_address (struct ivopts_data *data, 4726 struct iv_group *group, struct iv_cand *cand) 4727 { 4728 unsigned i; 4729 bitmap inv_vars = NULL, inv_exprs = NULL; 4730 bool can_autoinc; 4731 iv_inv_expr_ent *inv_expr = NULL; 4732 struct iv_use *use = group->vuses[0]; 4733 comp_cost sum_cost = no_cost, cost; 4734 4735 cost = get_computation_cost (data, use, cand, true, 4736 &inv_vars, &can_autoinc, &inv_expr); 4737 4738 if (inv_expr) 4739 { 4740 inv_exprs = BITMAP_ALLOC (NULL); 4741 bitmap_set_bit (inv_exprs, inv_expr->id); 4742 } 4743 sum_cost = cost; 4744 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use) 4745 { 4746 if (can_autoinc) 4747 sum_cost -= cand->cost_step; 4748 /* If we generated the candidate solely for exploiting autoincrement 4749 opportunities, and it turns out it can't be used, set the cost to 4750 infinity to make sure we ignore it. */ 4751 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) 4752 sum_cost = infinite_cost; 4753 } 4754 4755 /* Uses in a group can share setup code, so only add setup cost once. */ 4756 cost -= cost.scratch; 4757 /* Compute and add costs for rest uses of this group. */ 4758 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++) 4759 { 4760 struct iv_use *next = group->vuses[i]; 4761 4762 /* TODO: We could skip computing cost for sub iv_use when it has the 4763 same cost as the first iv_use, but the cost really depends on the 4764 offset and where the iv_use is. */ 4765 cost = get_computation_cost (data, next, cand, true, 4766 NULL, &can_autoinc, &inv_expr); 4767 if (inv_expr) 4768 { 4769 if (!inv_exprs) 4770 inv_exprs = BITMAP_ALLOC (NULL); 4771 4772 bitmap_set_bit (inv_exprs, inv_expr->id); 4773 } 4774 sum_cost += cost; 4775 } 4776 set_group_iv_cost (data, group, cand, sum_cost, inv_vars, 4777 NULL_TREE, ERROR_MARK, inv_exprs); 4778 4779 return !sum_cost.infinite_cost_p (); 4780 } 4781 4782 /* Computes value of candidate CAND at position AT in iteration NITER, and 4783 stores it to VAL. */ 4784 4785 static void 4786 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter, 4787 aff_tree *val) 4788 { 4789 aff_tree step, delta, nit; 4790 struct iv *iv = cand->iv; 4791 tree type = TREE_TYPE (iv->base); 4792 tree steptype; 4793 if (POINTER_TYPE_P (type)) 4794 steptype = sizetype; 4795 else 4796 steptype = unsigned_type_for (type); 4797 4798 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step); 4799 aff_combination_convert (&step, steptype); 4800 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit); 4801 aff_combination_convert (&nit, steptype); 4802 aff_combination_mult (&nit, &step, &delta); 4803 if (stmt_after_increment (loop, cand, at)) 4804 aff_combination_add (&delta, &step); 4805 4806 tree_to_aff_combination (iv->base, type, val); 4807 if (!POINTER_TYPE_P (type)) 4808 aff_combination_convert (val, steptype); 4809 aff_combination_add (val, &delta); 4810 } 4811 4812 /* Returns period of induction variable iv. */ 4813 4814 static tree 4815 iv_period (struct iv *iv) 4816 { 4817 tree step = iv->step, period, type; 4818 tree pow2div; 4819 4820 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 4821 4822 type = unsigned_type_for (TREE_TYPE (step)); 4823 /* Period of the iv is lcm (step, type_range)/step -1, 4824 i.e., N*type_range/step - 1. Since type range is power 4825 of two, N == (step >> num_of_ending_zeros_binary (step), 4826 so the final result is 4827 4828 (type_range >> num_of_ending_zeros_binary (step)) - 1 4829 4830 */ 4831 pow2div = num_ending_zeros (step); 4832 4833 period = build_low_bits_mask (type, 4834 (TYPE_PRECISION (type) 4835 - tree_to_uhwi (pow2div))); 4836 4837 return period; 4838 } 4839 4840 /* Returns the comparison operator used when eliminating the iv USE. */ 4841 4842 static enum tree_code 4843 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 4844 { 4845 struct loop *loop = data->current_loop; 4846 basic_block ex_bb; 4847 edge exit; 4848 4849 ex_bb = gimple_bb (use->stmt); 4850 exit = EDGE_SUCC (ex_bb, 0); 4851 if (flow_bb_inside_loop_p (loop, exit->dest)) 4852 exit = EDGE_SUCC (ex_bb, 1); 4853 4854 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 4855 } 4856 4857 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now, 4858 we only detect the situation that BASE = SOMETHING + OFFSET, where the 4859 calculation is performed in non-wrapping type. 4860 4861 TODO: More generally, we could test for the situation that 4862 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero. 4863 This would require knowing the sign of OFFSET. */ 4864 4865 static bool 4866 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset) 4867 { 4868 enum tree_code code; 4869 tree e1, e2; 4870 aff_tree aff_e1, aff_e2, aff_offset; 4871 4872 if (!nowrap_type_p (TREE_TYPE (base))) 4873 return false; 4874 4875 base = expand_simple_operations (base); 4876 4877 if (TREE_CODE (base) == SSA_NAME) 4878 { 4879 gimple *stmt = SSA_NAME_DEF_STMT (base); 4880 4881 if (gimple_code (stmt) != GIMPLE_ASSIGN) 4882 return false; 4883 4884 code = gimple_assign_rhs_code (stmt); 4885 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4886 return false; 4887 4888 e1 = gimple_assign_rhs1 (stmt); 4889 e2 = gimple_assign_rhs2 (stmt); 4890 } 4891 else 4892 { 4893 code = TREE_CODE (base); 4894 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4895 return false; 4896 e1 = TREE_OPERAND (base, 0); 4897 e2 = TREE_OPERAND (base, 1); 4898 } 4899 4900 /* Use affine expansion as deeper inspection to prove the equality. */ 4901 tree_to_aff_combination_expand (e2, TREE_TYPE (e2), 4902 &aff_e2, &data->name_expansion_cache); 4903 tree_to_aff_combination_expand (offset, TREE_TYPE (offset), 4904 &aff_offset, &data->name_expansion_cache); 4905 aff_combination_scale (&aff_offset, -1); 4906 switch (code) 4907 { 4908 case PLUS_EXPR: 4909 aff_combination_add (&aff_e2, &aff_offset); 4910 if (aff_combination_zero_p (&aff_e2)) 4911 return true; 4912 4913 tree_to_aff_combination_expand (e1, TREE_TYPE (e1), 4914 &aff_e1, &data->name_expansion_cache); 4915 aff_combination_add (&aff_e1, &aff_offset); 4916 return aff_combination_zero_p (&aff_e1); 4917 4918 case POINTER_PLUS_EXPR: 4919 aff_combination_add (&aff_e2, &aff_offset); 4920 return aff_combination_zero_p (&aff_e2); 4921 4922 default: 4923 return false; 4924 } 4925 } 4926 4927 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR 4928 comparison with CAND. NITER describes the number of iterations of 4929 the loops. If successful, the comparison in COMP_P is altered accordingly. 4930 4931 We aim to handle the following situation: 4932 4933 sometype *base, *p; 4934 int a, b, i; 4935 4936 i = a; 4937 p = p_0 = base + a; 4938 4939 do 4940 { 4941 bla (*p); 4942 p++; 4943 i++; 4944 } 4945 while (i < b); 4946 4947 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1. 4948 We aim to optimize this to 4949 4950 p = p_0 = base + a; 4951 do 4952 { 4953 bla (*p); 4954 p++; 4955 } 4956 while (p < p_0 - a + b); 4957 4958 This preserves the correctness, since the pointer arithmetics does not 4959 overflow. More precisely: 4960 4961 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no 4962 overflow in computing it or the values of p. 4963 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not 4964 overflow. To prove this, we use the fact that p_0 = base + a. */ 4965 4966 static bool 4967 iv_elimination_compare_lt (struct ivopts_data *data, 4968 struct iv_cand *cand, enum tree_code *comp_p, 4969 struct tree_niter_desc *niter) 4970 { 4971 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; 4972 struct aff_tree nit, tmpa, tmpb; 4973 enum tree_code comp; 4974 HOST_WIDE_INT step; 4975 4976 /* We need to know that the candidate induction variable does not overflow. 4977 While more complex analysis may be used to prove this, for now just 4978 check that the variable appears in the original program and that it 4979 is computed in a type that guarantees no overflows. */ 4980 cand_type = TREE_TYPE (cand->iv->base); 4981 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) 4982 return false; 4983 4984 /* Make sure that the loop iterates till the loop bound is hit, as otherwise 4985 the calculation of the BOUND could overflow, making the comparison 4986 invalid. */ 4987 if (!data->loop_single_exit_p) 4988 return false; 4989 4990 /* We need to be able to decide whether candidate is increasing or decreasing 4991 in order to choose the right comparison operator. */ 4992 if (!cst_and_fits_in_hwi (cand->iv->step)) 4993 return false; 4994 step = int_cst_value (cand->iv->step); 4995 4996 /* Check that the number of iterations matches the expected pattern: 4997 a + 1 > b ? 0 : b - a - 1. */ 4998 mbz = niter->may_be_zero; 4999 if (TREE_CODE (mbz) == GT_EXPR) 5000 { 5001 /* Handle a + 1 > b. */ 5002 tree op0 = TREE_OPERAND (mbz, 0); 5003 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1))) 5004 { 5005 a = TREE_OPERAND (op0, 0); 5006 b = TREE_OPERAND (mbz, 1); 5007 } 5008 else 5009 return false; 5010 } 5011 else if (TREE_CODE (mbz) == LT_EXPR) 5012 { 5013 tree op1 = TREE_OPERAND (mbz, 1); 5014 5015 /* Handle b < a + 1. */ 5016 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1))) 5017 { 5018 a = TREE_OPERAND (op1, 0); 5019 b = TREE_OPERAND (mbz, 0); 5020 } 5021 else 5022 return false; 5023 } 5024 else 5025 return false; 5026 5027 /* Expected number of iterations is B - A - 1. Check that it matches 5028 the actual number, i.e., that B - A - NITER = 1. */ 5029 tree_to_aff_combination (niter->niter, nit_type, &nit); 5030 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa); 5031 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb); 5032 aff_combination_scale (&nit, -1); 5033 aff_combination_scale (&tmpa, -1); 5034 aff_combination_add (&tmpb, &tmpa); 5035 aff_combination_add (&tmpb, &nit); 5036 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1)) 5037 return false; 5038 5039 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not 5040 overflow. */ 5041 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), 5042 cand->iv->step, 5043 fold_convert (TREE_TYPE (cand->iv->step), a)); 5044 if (!difference_cannot_overflow_p (data, cand->iv->base, offset)) 5045 return false; 5046 5047 /* Determine the new comparison operator. */ 5048 comp = step < 0 ? GT_EXPR : LT_EXPR; 5049 if (*comp_p == NE_EXPR) 5050 *comp_p = comp; 5051 else if (*comp_p == EQ_EXPR) 5052 *comp_p = invert_tree_comparison (comp, false); 5053 else 5054 gcc_unreachable (); 5055 5056 return true; 5057 } 5058 5059 /* Check whether it is possible to express the condition in USE by comparison 5060 of candidate CAND. If so, store the value compared with to BOUND, and the 5061 comparison operator to COMP. */ 5062 5063 static bool 5064 may_eliminate_iv (struct ivopts_data *data, 5065 struct iv_use *use, struct iv_cand *cand, tree *bound, 5066 enum tree_code *comp) 5067 { 5068 basic_block ex_bb; 5069 edge exit; 5070 tree period; 5071 struct loop *loop = data->current_loop; 5072 aff_tree bnd; 5073 struct tree_niter_desc *desc = NULL; 5074 5075 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 5076 return false; 5077 5078 /* For now works only for exits that dominate the loop latch. 5079 TODO: extend to other conditions inside loop body. */ 5080 ex_bb = gimple_bb (use->stmt); 5081 if (use->stmt != last_stmt (ex_bb) 5082 || gimple_code (use->stmt) != GIMPLE_COND 5083 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 5084 return false; 5085 5086 exit = EDGE_SUCC (ex_bb, 0); 5087 if (flow_bb_inside_loop_p (loop, exit->dest)) 5088 exit = EDGE_SUCC (ex_bb, 1); 5089 if (flow_bb_inside_loop_p (loop, exit->dest)) 5090 return false; 5091 5092 desc = niter_for_exit (data, exit); 5093 if (!desc) 5094 return false; 5095 5096 /* Determine whether we can use the variable to test the exit condition. 5097 This is the case iff the period of the induction variable is greater 5098 than the number of iterations for which the exit condition is true. */ 5099 period = iv_period (cand->iv); 5100 5101 /* If the number of iterations is constant, compare against it directly. */ 5102 if (TREE_CODE (desc->niter) == INTEGER_CST) 5103 { 5104 /* See cand_value_at. */ 5105 if (stmt_after_increment (loop, cand, use->stmt)) 5106 { 5107 if (!tree_int_cst_lt (desc->niter, period)) 5108 return false; 5109 } 5110 else 5111 { 5112 if (tree_int_cst_lt (period, desc->niter)) 5113 return false; 5114 } 5115 } 5116 5117 /* If not, and if this is the only possible exit of the loop, see whether 5118 we can get a conservative estimate on the number of iterations of the 5119 entire loop and compare against that instead. */ 5120 else 5121 { 5122 widest_int period_value, max_niter; 5123 5124 max_niter = desc->max; 5125 if (stmt_after_increment (loop, cand, use->stmt)) 5126 max_niter += 1; 5127 period_value = wi::to_widest (period); 5128 if (wi::gtu_p (max_niter, period_value)) 5129 { 5130 /* See if we can take advantage of inferred loop bound 5131 information. */ 5132 if (data->loop_single_exit_p) 5133 { 5134 if (!max_loop_iterations (loop, &max_niter)) 5135 return false; 5136 /* The loop bound is already adjusted by adding 1. */ 5137 if (wi::gtu_p (max_niter, period_value)) 5138 return false; 5139 } 5140 else 5141 return false; 5142 } 5143 } 5144 5145 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); 5146 5147 *bound = fold_convert (TREE_TYPE (cand->iv->base), 5148 aff_combination_to_tree (&bnd)); 5149 *comp = iv_elimination_compare (data, use); 5150 5151 /* It is unlikely that computing the number of iterations using division 5152 would be more profitable than keeping the original induction variable. */ 5153 if (expression_expensive_p (*bound)) 5154 return false; 5155 5156 /* Sometimes, it is possible to handle the situation that the number of 5157 iterations may be zero unless additional assumptions by using < 5158 instead of != in the exit condition. 5159 5160 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and 5161 base the exit condition on it. However, that is often too 5162 expensive. */ 5163 if (!integer_zerop (desc->may_be_zero)) 5164 return iv_elimination_compare_lt (data, cand, comp, desc); 5165 5166 return true; 5167 } 5168 5169 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must 5170 be copied, if it is used in the loop body and DATA->body_includes_call. */ 5171 5172 static int 5173 parm_decl_cost (struct ivopts_data *data, tree bound) 5174 { 5175 tree sbound = bound; 5176 STRIP_NOPS (sbound); 5177 5178 if (TREE_CODE (sbound) == SSA_NAME 5179 && SSA_NAME_IS_DEFAULT_DEF (sbound) 5180 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL 5181 && data->body_includes_call) 5182 return COSTS_N_INSNS (1); 5183 5184 return 0; 5185 } 5186 5187 /* Determines cost of computing the use in GROUP with CAND in a condition. */ 5188 5189 static bool 5190 determine_group_iv_cost_cond (struct ivopts_data *data, 5191 struct iv_group *group, struct iv_cand *cand) 5192 { 5193 tree bound = NULL_TREE; 5194 struct iv *cmp_iv; 5195 bitmap inv_exprs = NULL; 5196 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars; 5197 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost; 5198 enum comp_iv_rewrite rewrite_type; 5199 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr; 5200 tree *control_var, *bound_cst; 5201 enum tree_code comp = ERROR_MARK; 5202 struct iv_use *use = group->vuses[0]; 5203 5204 /* Extract condition operands. */ 5205 rewrite_type = extract_cond_operands (data, use->stmt, &control_var, 5206 &bound_cst, NULL, &cmp_iv); 5207 gcc_assert (rewrite_type != COMP_IV_NA); 5208 5209 /* Try iv elimination. */ 5210 if (rewrite_type == COMP_IV_ELIM 5211 && may_eliminate_iv (data, use, cand, &bound, &comp)) 5212 { 5213 elim_cost = force_var_cost (data, bound, &inv_vars_elim); 5214 if (elim_cost.cost == 0) 5215 elim_cost.cost = parm_decl_cost (data, bound); 5216 else if (TREE_CODE (bound) == INTEGER_CST) 5217 elim_cost.cost = 0; 5218 /* If we replace a loop condition 'i < n' with 'p < base + n', 5219 inv_vars_elim will have 'base' and 'n' set, which implies that both 5220 'base' and 'n' will be live during the loop. More likely, 5221 'base + n' will be loop invariant, resulting in only one live value 5222 during the loop. So in that case we clear inv_vars_elim and set 5223 inv_expr_elim instead. */ 5224 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1) 5225 { 5226 inv_expr_elim = get_loop_invariant_expr (data, bound); 5227 bitmap_clear (inv_vars_elim); 5228 } 5229 /* The bound is a loop invariant, so it will be only computed 5230 once. */ 5231 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); 5232 } 5233 5234 /* When the condition is a comparison of the candidate IV against 5235 zero, prefer this IV. 5236 5237 TODO: The constant that we're subtracting from the cost should 5238 be target-dependent. This information should be added to the 5239 target costs for each backend. */ 5240 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */ 5241 && integer_zerop (*bound_cst) 5242 && (operand_equal_p (*control_var, cand->var_after, 0) 5243 || operand_equal_p (*control_var, cand->var_before, 0))) 5244 elim_cost -= 1; 5245 5246 express_cost = get_computation_cost (data, use, cand, false, 5247 &inv_vars_express, NULL, 5248 &inv_expr_express); 5249 if (cmp_iv != NULL) 5250 find_inv_vars (data, &cmp_iv->base, &inv_vars_express); 5251 5252 /* Count the cost of the original bound as well. */ 5253 bound_cost = force_var_cost (data, *bound_cst, NULL); 5254 if (bound_cost.cost == 0) 5255 bound_cost.cost = parm_decl_cost (data, *bound_cst); 5256 else if (TREE_CODE (*bound_cst) == INTEGER_CST) 5257 bound_cost.cost = 0; 5258 express_cost += bound_cost; 5259 5260 /* Choose the better approach, preferring the eliminated IV. */ 5261 if (elim_cost <= express_cost) 5262 { 5263 cost = elim_cost; 5264 inv_vars = inv_vars_elim; 5265 inv_vars_elim = NULL; 5266 inv_expr = inv_expr_elim; 5267 } 5268 else 5269 { 5270 cost = express_cost; 5271 inv_vars = inv_vars_express; 5272 inv_vars_express = NULL; 5273 bound = NULL_TREE; 5274 comp = ERROR_MARK; 5275 inv_expr = inv_expr_express; 5276 } 5277 5278 if (inv_expr) 5279 { 5280 inv_exprs = BITMAP_ALLOC (NULL); 5281 bitmap_set_bit (inv_exprs, inv_expr->id); 5282 } 5283 set_group_iv_cost (data, group, cand, cost, 5284 inv_vars, bound, comp, inv_exprs); 5285 5286 if (inv_vars_elim) 5287 BITMAP_FREE (inv_vars_elim); 5288 if (inv_vars_express) 5289 BITMAP_FREE (inv_vars_express); 5290 5291 return !cost.infinite_cost_p (); 5292 } 5293 5294 /* Determines cost of computing uses in GROUP with CAND. Returns false 5295 if USE cannot be represented with CAND. */ 5296 5297 static bool 5298 determine_group_iv_cost (struct ivopts_data *data, 5299 struct iv_group *group, struct iv_cand *cand) 5300 { 5301 switch (group->type) 5302 { 5303 case USE_NONLINEAR_EXPR: 5304 return determine_group_iv_cost_generic (data, group, cand); 5305 5306 case USE_REF_ADDRESS: 5307 case USE_PTR_ADDRESS: 5308 return determine_group_iv_cost_address (data, group, cand); 5309 5310 case USE_COMPARE: 5311 return determine_group_iv_cost_cond (data, group, cand); 5312 5313 default: 5314 gcc_unreachable (); 5315 } 5316 } 5317 5318 /* Return true if get_computation_cost indicates that autoincrement is 5319 a possibility for the pair of USE and CAND, false otherwise. */ 5320 5321 static bool 5322 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, 5323 struct iv_cand *cand) 5324 { 5325 if (!address_p (use->type)) 5326 return false; 5327 5328 bool can_autoinc = false; 5329 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL); 5330 return can_autoinc; 5331 } 5332 5333 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a 5334 use that allows autoincrement, and set their AINC_USE if possible. */ 5335 5336 static void 5337 set_autoinc_for_original_candidates (struct ivopts_data *data) 5338 { 5339 unsigned i, j; 5340 5341 for (i = 0; i < data->vcands.length (); i++) 5342 { 5343 struct iv_cand *cand = data->vcands[i]; 5344 struct iv_use *closest_before = NULL; 5345 struct iv_use *closest_after = NULL; 5346 if (cand->pos != IP_ORIGINAL) 5347 continue; 5348 5349 for (j = 0; j < data->vgroups.length (); j++) 5350 { 5351 struct iv_group *group = data->vgroups[j]; 5352 struct iv_use *use = group->vuses[0]; 5353 unsigned uid = gimple_uid (use->stmt); 5354 5355 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)) 5356 continue; 5357 5358 if (uid < gimple_uid (cand->incremented_at) 5359 && (closest_before == NULL 5360 || uid > gimple_uid (closest_before->stmt))) 5361 closest_before = use; 5362 5363 if (uid > gimple_uid (cand->incremented_at) 5364 && (closest_after == NULL 5365 || uid < gimple_uid (closest_after->stmt))) 5366 closest_after = use; 5367 } 5368 5369 if (closest_before != NULL 5370 && autoinc_possible_for_pair (data, closest_before, cand)) 5371 cand->ainc_use = closest_before; 5372 else if (closest_after != NULL 5373 && autoinc_possible_for_pair (data, closest_after, cand)) 5374 cand->ainc_use = closest_after; 5375 } 5376 } 5377 5378 /* Relate compare use with all candidates. */ 5379 5380 static void 5381 relate_compare_use_with_all_cands (struct ivopts_data *data) 5382 { 5383 unsigned i, count = data->vcands.length (); 5384 for (i = 0; i < data->vgroups.length (); i++) 5385 { 5386 struct iv_group *group = data->vgroups[i]; 5387 5388 if (group->type == USE_COMPARE) 5389 bitmap_set_range (group->related_cands, 0, count); 5390 } 5391 } 5392 5393 /* Finds the candidates for the induction variables. */ 5394 5395 static void 5396 find_iv_candidates (struct ivopts_data *data) 5397 { 5398 /* Add commonly used ivs. */ 5399 add_standard_iv_candidates (data); 5400 5401 /* Add old induction variables. */ 5402 add_iv_candidate_for_bivs (data); 5403 5404 /* Add induction variables derived from uses. */ 5405 add_iv_candidate_for_groups (data); 5406 5407 set_autoinc_for_original_candidates (data); 5408 5409 /* Record the important candidates. */ 5410 record_important_candidates (data); 5411 5412 /* Relate compare iv_use with all candidates. */ 5413 if (!data->consider_all_candidates) 5414 relate_compare_use_with_all_cands (data); 5415 5416 if (dump_file && (dump_flags & TDF_DETAILS)) 5417 { 5418 unsigned i; 5419 5420 fprintf (dump_file, "\n<Important Candidates>:\t"); 5421 for (i = 0; i < data->vcands.length (); i++) 5422 if (data->vcands[i]->important) 5423 fprintf (dump_file, " %d,", data->vcands[i]->id); 5424 fprintf (dump_file, "\n"); 5425 5426 fprintf (dump_file, "\n<Group, Cand> Related:\n"); 5427 for (i = 0; i < data->vgroups.length (); i++) 5428 { 5429 struct iv_group *group = data->vgroups[i]; 5430 5431 if (group->related_cands) 5432 { 5433 fprintf (dump_file, " Group %d:\t", group->id); 5434 dump_bitmap (dump_file, group->related_cands); 5435 } 5436 } 5437 fprintf (dump_file, "\n"); 5438 } 5439 } 5440 5441 /* Determines costs of computing use of iv with an iv candidate. */ 5442 5443 static void 5444 determine_group_iv_costs (struct ivopts_data *data) 5445 { 5446 unsigned i, j; 5447 struct iv_cand *cand; 5448 struct iv_group *group; 5449 bitmap to_clear = BITMAP_ALLOC (NULL); 5450 5451 alloc_use_cost_map (data); 5452 5453 for (i = 0; i < data->vgroups.length (); i++) 5454 { 5455 group = data->vgroups[i]; 5456 5457 if (data->consider_all_candidates) 5458 { 5459 for (j = 0; j < data->vcands.length (); j++) 5460 { 5461 cand = data->vcands[j]; 5462 determine_group_iv_cost (data, group, cand); 5463 } 5464 } 5465 else 5466 { 5467 bitmap_iterator bi; 5468 5469 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi) 5470 { 5471 cand = data->vcands[j]; 5472 if (!determine_group_iv_cost (data, group, cand)) 5473 bitmap_set_bit (to_clear, j); 5474 } 5475 5476 /* Remove the candidates for that the cost is infinite from 5477 the list of related candidates. */ 5478 bitmap_and_compl_into (group->related_cands, to_clear); 5479 bitmap_clear (to_clear); 5480 } 5481 } 5482 5483 BITMAP_FREE (to_clear); 5484 5485 if (dump_file && (dump_flags & TDF_DETAILS)) 5486 { 5487 bitmap_iterator bi; 5488 5489 /* Dump invariant variables. */ 5490 fprintf (dump_file, "\n<Invariant Vars>:\n"); 5491 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 5492 { 5493 struct version_info *info = ver_info (data, i); 5494 if (info->inv_id) 5495 { 5496 fprintf (dump_file, "Inv %d:\t", info->inv_id); 5497 print_generic_expr (dump_file, info->name, TDF_SLIM); 5498 fprintf (dump_file, "%s\n", 5499 info->has_nonlin_use ? "" : "\t(eliminable)"); 5500 } 5501 } 5502 5503 /* Dump invariant expressions. */ 5504 fprintf (dump_file, "\n<Invariant Expressions>:\n"); 5505 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ()); 5506 5507 for (hash_table<iv_inv_expr_hasher>::iterator it 5508 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end (); 5509 ++it) 5510 list.safe_push (*it); 5511 5512 list.qsort (sort_iv_inv_expr_ent); 5513 5514 for (i = 0; i < list.length (); ++i) 5515 { 5516 fprintf (dump_file, "inv_expr %d: \t", list[i]->id); 5517 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM); 5518 fprintf (dump_file, "\n"); 5519 } 5520 5521 fprintf (dump_file, "\n<Group-candidate Costs>:\n"); 5522 5523 for (i = 0; i < data->vgroups.length (); i++) 5524 { 5525 group = data->vgroups[i]; 5526 5527 fprintf (dump_file, "Group %d:\n", i); 5528 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n"); 5529 for (j = 0; j < group->n_map_members; j++) 5530 { 5531 if (!group->cost_map[j].cand 5532 || group->cost_map[j].cost.infinite_cost_p ()) 5533 continue; 5534 5535 fprintf (dump_file, " %d\t%d\t%d\t", 5536 group->cost_map[j].cand->id, 5537 group->cost_map[j].cost.cost, 5538 group->cost_map[j].cost.complexity); 5539 if (!group->cost_map[j].inv_exprs 5540 || bitmap_empty_p (group->cost_map[j].inv_exprs)) 5541 fprintf (dump_file, "NIL;\t"); 5542 else 5543 bitmap_print (dump_file, 5544 group->cost_map[j].inv_exprs, "", ";\t"); 5545 if (!group->cost_map[j].inv_vars 5546 || bitmap_empty_p (group->cost_map[j].inv_vars)) 5547 fprintf (dump_file, "NIL;\n"); 5548 else 5549 bitmap_print (dump_file, 5550 group->cost_map[j].inv_vars, "", "\n"); 5551 } 5552 5553 fprintf (dump_file, "\n"); 5554 } 5555 fprintf (dump_file, "\n"); 5556 } 5557 } 5558 5559 /* Determines cost of the candidate CAND. */ 5560 5561 static void 5562 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 5563 { 5564 comp_cost cost_base; 5565 unsigned cost, cost_step; 5566 tree base; 5567 5568 gcc_assert (cand->iv != NULL); 5569 5570 /* There are two costs associated with the candidate -- its increment 5571 and its initialization. The second is almost negligible for any loop 5572 that rolls enough, so we take it just very little into account. */ 5573 5574 base = cand->iv->base; 5575 cost_base = force_var_cost (data, base, NULL); 5576 /* It will be exceptional that the iv register happens to be initialized with 5577 the proper value at no cost. In general, there will at least be a regcopy 5578 or a const set. */ 5579 if (cost_base.cost == 0) 5580 cost_base.cost = COSTS_N_INSNS (1); 5581 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base))); 5582 5583 cost = cost_step + adjust_setup_cost (data, cost_base.cost); 5584 5585 /* Prefer the original ivs unless we may gain something by replacing it. 5586 The reason is to make debugging simpler; so this is not relevant for 5587 artificial ivs created by other optimization passes. */ 5588 if (cand->pos != IP_ORIGINAL 5589 || !SSA_NAME_VAR (cand->var_before) 5590 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 5591 cost++; 5592 5593 /* Prefer not to insert statements into latch unless there are some 5594 already (so that we do not create unnecessary jumps). */ 5595 if (cand->pos == IP_END 5596 && empty_block_p (ip_end_pos (data->current_loop))) 5597 cost++; 5598 5599 cand->cost = cost; 5600 cand->cost_step = cost_step; 5601 } 5602 5603 /* Determines costs of computation of the candidates. */ 5604 5605 static void 5606 determine_iv_costs (struct ivopts_data *data) 5607 { 5608 unsigned i; 5609 5610 if (dump_file && (dump_flags & TDF_DETAILS)) 5611 { 5612 fprintf (dump_file, "<Candidate Costs>:\n"); 5613 fprintf (dump_file, " cand\tcost\n"); 5614 } 5615 5616 for (i = 0; i < data->vcands.length (); i++) 5617 { 5618 struct iv_cand *cand = data->vcands[i]; 5619 5620 determine_iv_cost (data, cand); 5621 5622 if (dump_file && (dump_flags & TDF_DETAILS)) 5623 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 5624 } 5625 5626 if (dump_file && (dump_flags & TDF_DETAILS)) 5627 fprintf (dump_file, "\n"); 5628 } 5629 5630 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS 5631 induction variables. Note N_INVS includes both invariant variables and 5632 invariant expressions. */ 5633 5634 static unsigned 5635 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs, 5636 unsigned n_cands) 5637 { 5638 unsigned cost; 5639 unsigned n_old = data->regs_used, n_new = n_invs + n_cands; 5640 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs; 5641 bool speed = data->speed; 5642 5643 /* If there is a call in the loop body, the call-clobbered registers 5644 are not available for loop invariants. */ 5645 if (data->body_includes_call) 5646 available_regs = available_regs - target_clobbered_regs; 5647 5648 /* If we have enough registers. */ 5649 if (regs_needed + target_res_regs < available_regs) 5650 cost = n_new; 5651 /* If close to running out of registers, try to preserve them. */ 5652 else if (regs_needed <= available_regs) 5653 cost = target_reg_cost [speed] * regs_needed; 5654 /* If we run out of available registers but the number of candidates 5655 does not, we penalize extra registers using target_spill_cost. */ 5656 else if (n_cands <= available_regs) 5657 cost = target_reg_cost [speed] * available_regs 5658 + target_spill_cost [speed] * (regs_needed - available_regs); 5659 /* If the number of candidates runs out available registers, we penalize 5660 extra candidate registers using target_spill_cost * 2. Because it is 5661 more expensive to spill induction variable than invariant. */ 5662 else 5663 cost = target_reg_cost [speed] * available_regs 5664 + target_spill_cost [speed] * (n_cands - available_regs) * 2 5665 + target_spill_cost [speed] * (regs_needed - n_cands); 5666 5667 /* Finally, add the number of candidates, so that we prefer eliminating 5668 induction variables if possible. */ 5669 return cost + n_cands; 5670 } 5671 5672 /* For each size of the induction variable set determine the penalty. */ 5673 5674 static void 5675 determine_set_costs (struct ivopts_data *data) 5676 { 5677 unsigned j, n; 5678 gphi *phi; 5679 gphi_iterator psi; 5680 tree op; 5681 struct loop *loop = data->current_loop; 5682 bitmap_iterator bi; 5683 5684 if (dump_file && (dump_flags & TDF_DETAILS)) 5685 { 5686 fprintf (dump_file, "<Global Costs>:\n"); 5687 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 5688 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); 5689 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); 5690 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); 5691 } 5692 5693 n = 0; 5694 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 5695 { 5696 phi = psi.phi (); 5697 op = PHI_RESULT (phi); 5698 5699 if (virtual_operand_p (op)) 5700 continue; 5701 5702 if (get_iv (data, op)) 5703 continue; 5704 5705 if (!POINTER_TYPE_P (TREE_TYPE (op)) 5706 && !INTEGRAL_TYPE_P (TREE_TYPE (op))) 5707 continue; 5708 5709 n++; 5710 } 5711 5712 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 5713 { 5714 struct version_info *info = ver_info (data, j); 5715 5716 if (info->inv_id && info->has_nonlin_use) 5717 n++; 5718 } 5719 5720 data->regs_used = n; 5721 if (dump_file && (dump_flags & TDF_DETAILS)) 5722 fprintf (dump_file, " regs_used %d\n", n); 5723 5724 if (dump_file && (dump_flags & TDF_DETAILS)) 5725 { 5726 fprintf (dump_file, " cost for size:\n"); 5727 fprintf (dump_file, " ivs\tcost\n"); 5728 for (j = 0; j <= 2 * target_avail_regs; j++) 5729 fprintf (dump_file, " %d\t%d\n", j, 5730 ivopts_estimate_reg_pressure (data, 0, j)); 5731 fprintf (dump_file, "\n"); 5732 } 5733 } 5734 5735 /* Returns true if A is a cheaper cost pair than B. */ 5736 5737 static bool 5738 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) 5739 { 5740 if (!a) 5741 return false; 5742 5743 if (!b) 5744 return true; 5745 5746 if (a->cost < b->cost) 5747 return true; 5748 5749 if (b->cost < a->cost) 5750 return false; 5751 5752 /* In case the costs are the same, prefer the cheaper candidate. */ 5753 if (a->cand->cost < b->cand->cost) 5754 return true; 5755 5756 return false; 5757 } 5758 5759 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1 5760 for more expensive, equal and cheaper respectively. */ 5761 5762 static int 5763 compare_cost_pair (struct cost_pair *a, struct cost_pair *b) 5764 { 5765 if (cheaper_cost_pair (a, b)) 5766 return -1; 5767 if (cheaper_cost_pair (b, a)) 5768 return 1; 5769 5770 return 0; 5771 } 5772 5773 /* Returns candidate by that USE is expressed in IVS. */ 5774 5775 static struct cost_pair * 5776 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group) 5777 { 5778 return ivs->cand_for_group[group->id]; 5779 } 5780 5781 /* Computes the cost field of IVS structure. */ 5782 5783 static void 5784 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) 5785 { 5786 comp_cost cost = ivs->cand_use_cost; 5787 5788 cost += ivs->cand_cost; 5789 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands); 5790 ivs->cost = cost; 5791 } 5792 5793 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES 5794 and IVS. */ 5795 5796 static void 5797 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses) 5798 { 5799 bitmap_iterator bi; 5800 unsigned iid; 5801 5802 if (!invs) 5803 return; 5804 5805 gcc_assert (n_inv_uses != NULL); 5806 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5807 { 5808 n_inv_uses[iid]--; 5809 if (n_inv_uses[iid] == 0) 5810 ivs->n_invs--; 5811 } 5812 } 5813 5814 /* Set USE not to be expressed by any candidate in IVS. */ 5815 5816 static void 5817 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, 5818 struct iv_group *group) 5819 { 5820 unsigned gid = group->id, cid; 5821 struct cost_pair *cp; 5822 5823 cp = ivs->cand_for_group[gid]; 5824 if (!cp) 5825 return; 5826 cid = cp->cand->id; 5827 5828 ivs->bad_groups++; 5829 ivs->cand_for_group[gid] = NULL; 5830 ivs->n_cand_uses[cid]--; 5831 5832 if (ivs->n_cand_uses[cid] == 0) 5833 { 5834 bitmap_clear_bit (ivs->cands, cid); 5835 ivs->n_cands--; 5836 ivs->cand_cost -= cp->cand->cost; 5837 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses); 5838 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses); 5839 } 5840 5841 ivs->cand_use_cost -= cp->cost; 5842 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses); 5843 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses); 5844 iv_ca_recount_cost (data, ivs); 5845 } 5846 5847 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and 5848 IVS. */ 5849 5850 static void 5851 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses) 5852 { 5853 bitmap_iterator bi; 5854 unsigned iid; 5855 5856 if (!invs) 5857 return; 5858 5859 gcc_assert (n_inv_uses != NULL); 5860 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5861 { 5862 n_inv_uses[iid]++; 5863 if (n_inv_uses[iid] == 1) 5864 ivs->n_invs++; 5865 } 5866 } 5867 5868 /* Set cost pair for GROUP in set IVS to CP. */ 5869 5870 static void 5871 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, 5872 struct iv_group *group, struct cost_pair *cp) 5873 { 5874 unsigned gid = group->id, cid; 5875 5876 if (ivs->cand_for_group[gid] == cp) 5877 return; 5878 5879 if (ivs->cand_for_group[gid]) 5880 iv_ca_set_no_cp (data, ivs, group); 5881 5882 if (cp) 5883 { 5884 cid = cp->cand->id; 5885 5886 ivs->bad_groups--; 5887 ivs->cand_for_group[gid] = cp; 5888 ivs->n_cand_uses[cid]++; 5889 if (ivs->n_cand_uses[cid] == 1) 5890 { 5891 bitmap_set_bit (ivs->cands, cid); 5892 ivs->n_cands++; 5893 ivs->cand_cost += cp->cand->cost; 5894 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses); 5895 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses); 5896 } 5897 5898 ivs->cand_use_cost += cp->cost; 5899 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses); 5900 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses); 5901 iv_ca_recount_cost (data, ivs); 5902 } 5903 } 5904 5905 /* Extend set IVS by expressing USE by some of the candidates in it 5906 if possible. Consider all important candidates if candidates in 5907 set IVS don't give any result. */ 5908 5909 static void 5910 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs, 5911 struct iv_group *group) 5912 { 5913 struct cost_pair *best_cp = NULL, *cp; 5914 bitmap_iterator bi; 5915 unsigned i; 5916 struct iv_cand *cand; 5917 5918 gcc_assert (ivs->upto >= group->id); 5919 ivs->upto++; 5920 ivs->bad_groups++; 5921 5922 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5923 { 5924 cand = data->vcands[i]; 5925 cp = get_group_iv_cost (data, group, cand); 5926 if (cheaper_cost_pair (cp, best_cp)) 5927 best_cp = cp; 5928 } 5929 5930 if (best_cp == NULL) 5931 { 5932 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 5933 { 5934 cand = data->vcands[i]; 5935 cp = get_group_iv_cost (data, group, cand); 5936 if (cheaper_cost_pair (cp, best_cp)) 5937 best_cp = cp; 5938 } 5939 } 5940 5941 iv_ca_set_cp (data, ivs, group, best_cp); 5942 } 5943 5944 /* Get cost for assignment IVS. */ 5945 5946 static comp_cost 5947 iv_ca_cost (struct iv_ca *ivs) 5948 { 5949 /* This was a conditional expression but it triggered a bug in 5950 Sun C 5.5. */ 5951 if (ivs->bad_groups) 5952 return infinite_cost; 5953 else 5954 return ivs->cost; 5955 } 5956 5957 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants 5958 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants 5959 respectively. */ 5960 5961 static int 5962 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs, 5963 struct iv_group *group, struct cost_pair *old_cp, 5964 struct cost_pair *new_cp) 5965 { 5966 gcc_assert (old_cp && new_cp && old_cp != new_cp); 5967 unsigned old_n_invs = ivs->n_invs; 5968 iv_ca_set_cp (data, ivs, group, new_cp); 5969 unsigned new_n_invs = ivs->n_invs; 5970 iv_ca_set_cp (data, ivs, group, old_cp); 5971 5972 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0); 5973 } 5974 5975 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains 5976 it before NEXT. */ 5977 5978 static struct iv_ca_delta * 5979 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp, 5980 struct cost_pair *new_cp, struct iv_ca_delta *next) 5981 { 5982 struct iv_ca_delta *change = XNEW (struct iv_ca_delta); 5983 5984 change->group = group; 5985 change->old_cp = old_cp; 5986 change->new_cp = new_cp; 5987 change->next = next; 5988 5989 return change; 5990 } 5991 5992 /* Joins two lists of changes L1 and L2. Destructive -- old lists 5993 are rewritten. */ 5994 5995 static struct iv_ca_delta * 5996 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 5997 { 5998 struct iv_ca_delta *last; 5999 6000 if (!l2) 6001 return l1; 6002 6003 if (!l1) 6004 return l2; 6005 6006 for (last = l1; last->next; last = last->next) 6007 continue; 6008 last->next = l2; 6009 6010 return l1; 6011 } 6012 6013 /* Reverse the list of changes DELTA, forming the inverse to it. */ 6014 6015 static struct iv_ca_delta * 6016 iv_ca_delta_reverse (struct iv_ca_delta *delta) 6017 { 6018 struct iv_ca_delta *act, *next, *prev = NULL; 6019 6020 for (act = delta; act; act = next) 6021 { 6022 next = act->next; 6023 act->next = prev; 6024 prev = act; 6025 6026 std::swap (act->old_cp, act->new_cp); 6027 } 6028 6029 return prev; 6030 } 6031 6032 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 6033 reverted instead. */ 6034 6035 static void 6036 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs, 6037 struct iv_ca_delta *delta, bool forward) 6038 { 6039 struct cost_pair *from, *to; 6040 struct iv_ca_delta *act; 6041 6042 if (!forward) 6043 delta = iv_ca_delta_reverse (delta); 6044 6045 for (act = delta; act; act = act->next) 6046 { 6047 from = act->old_cp; 6048 to = act->new_cp; 6049 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from); 6050 iv_ca_set_cp (data, ivs, act->group, to); 6051 } 6052 6053 if (!forward) 6054 iv_ca_delta_reverse (delta); 6055 } 6056 6057 /* Returns true if CAND is used in IVS. */ 6058 6059 static bool 6060 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand) 6061 { 6062 return ivs->n_cand_uses[cand->id] > 0; 6063 } 6064 6065 /* Returns number of induction variable candidates in the set IVS. */ 6066 6067 static unsigned 6068 iv_ca_n_cands (struct iv_ca *ivs) 6069 { 6070 return ivs->n_cands; 6071 } 6072 6073 /* Free the list of changes DELTA. */ 6074 6075 static void 6076 iv_ca_delta_free (struct iv_ca_delta **delta) 6077 { 6078 struct iv_ca_delta *act, *next; 6079 6080 for (act = *delta; act; act = next) 6081 { 6082 next = act->next; 6083 free (act); 6084 } 6085 6086 *delta = NULL; 6087 } 6088 6089 /* Allocates new iv candidates assignment. */ 6090 6091 static struct iv_ca * 6092 iv_ca_new (struct ivopts_data *data) 6093 { 6094 struct iv_ca *nw = XNEW (struct iv_ca); 6095 6096 nw->upto = 0; 6097 nw->bad_groups = 0; 6098 nw->cand_for_group = XCNEWVEC (struct cost_pair *, 6099 data->vgroups.length ()); 6100 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ()); 6101 nw->cands = BITMAP_ALLOC (NULL); 6102 nw->n_cands = 0; 6103 nw->n_invs = 0; 6104 nw->cand_use_cost = no_cost; 6105 nw->cand_cost = 0; 6106 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1); 6107 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1); 6108 nw->cost = no_cost; 6109 6110 return nw; 6111 } 6112 6113 /* Free memory occupied by the set IVS. */ 6114 6115 static void 6116 iv_ca_free (struct iv_ca **ivs) 6117 { 6118 free ((*ivs)->cand_for_group); 6119 free ((*ivs)->n_cand_uses); 6120 BITMAP_FREE ((*ivs)->cands); 6121 free ((*ivs)->n_inv_var_uses); 6122 free ((*ivs)->n_inv_expr_uses); 6123 free (*ivs); 6124 *ivs = NULL; 6125 } 6126 6127 /* Dumps IVS to FILE. */ 6128 6129 static void 6130 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) 6131 { 6132 unsigned i; 6133 comp_cost cost = iv_ca_cost (ivs); 6134 6135 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, 6136 cost.complexity); 6137 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n", 6138 ivs->cand_cost, ivs->cand_use_cost.cost, 6139 ivs->cand_use_cost.complexity); 6140 bitmap_print (file, ivs->cands, " candidates: ","\n"); 6141 6142 for (i = 0; i < ivs->upto; i++) 6143 { 6144 struct iv_group *group = data->vgroups[i]; 6145 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group); 6146 if (cp) 6147 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n", 6148 group->id, cp->cand->id, cp->cost.cost, 6149 cp->cost.complexity); 6150 else 6151 fprintf (file, " group:%d --> ??\n", group->id); 6152 } 6153 6154 const char *pref = ""; 6155 fprintf (file, " invariant variables: "); 6156 for (i = 1; i <= data->max_inv_var_id; i++) 6157 if (ivs->n_inv_var_uses[i]) 6158 { 6159 fprintf (file, "%s%d", pref, i); 6160 pref = ", "; 6161 } 6162 6163 pref = ""; 6164 fprintf (file, "\n invariant expressions: "); 6165 for (i = 1; i <= data->max_inv_expr_id; i++) 6166 if (ivs->n_inv_expr_uses[i]) 6167 { 6168 fprintf (file, "%s%d", pref, i); 6169 pref = ", "; 6170 } 6171 6172 fprintf (file, "\n\n"); 6173 } 6174 6175 /* Try changing candidate in IVS to CAND for each use. Return cost of the 6176 new set, and store differences in DELTA. Number of induction variables 6177 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true 6178 the function will try to find a solution with mimimal iv candidates. */ 6179 6180 static comp_cost 6181 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, 6182 struct iv_cand *cand, struct iv_ca_delta **delta, 6183 unsigned *n_ivs, bool min_ncand) 6184 { 6185 unsigned i; 6186 comp_cost cost; 6187 struct iv_group *group; 6188 struct cost_pair *old_cp, *new_cp; 6189 6190 *delta = NULL; 6191 for (i = 0; i < ivs->upto; i++) 6192 { 6193 group = data->vgroups[i]; 6194 old_cp = iv_ca_cand_for_group (ivs, group); 6195 6196 if (old_cp 6197 && old_cp->cand == cand) 6198 continue; 6199 6200 new_cp = get_group_iv_cost (data, group, cand); 6201 if (!new_cp) 6202 continue; 6203 6204 if (!min_ncand) 6205 { 6206 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp); 6207 /* Skip if new_cp depends on more invariants. */ 6208 if (cmp_invs > 0) 6209 continue; 6210 6211 int cmp_cost = compare_cost_pair (new_cp, old_cp); 6212 /* Skip if new_cp is not cheaper. */ 6213 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0)) 6214 continue; 6215 } 6216 6217 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta); 6218 } 6219 6220 iv_ca_delta_commit (data, ivs, *delta, true); 6221 cost = iv_ca_cost (ivs); 6222 if (n_ivs) 6223 *n_ivs = iv_ca_n_cands (ivs); 6224 iv_ca_delta_commit (data, ivs, *delta, false); 6225 6226 return cost; 6227 } 6228 6229 /* Try narrowing set IVS by removing CAND. Return the cost of 6230 the new set and store the differences in DELTA. START is 6231 the candidate with which we start narrowing. */ 6232 6233 static comp_cost 6234 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, 6235 struct iv_cand *cand, struct iv_cand *start, 6236 struct iv_ca_delta **delta) 6237 { 6238 unsigned i, ci; 6239 struct iv_group *group; 6240 struct cost_pair *old_cp, *new_cp, *cp; 6241 bitmap_iterator bi; 6242 struct iv_cand *cnd; 6243 comp_cost cost, best_cost, acost; 6244 6245 *delta = NULL; 6246 for (i = 0; i < data->vgroups.length (); i++) 6247 { 6248 group = data->vgroups[i]; 6249 6250 old_cp = iv_ca_cand_for_group (ivs, group); 6251 if (old_cp->cand != cand) 6252 continue; 6253 6254 best_cost = iv_ca_cost (ivs); 6255 /* Start narrowing with START. */ 6256 new_cp = get_group_iv_cost (data, group, start); 6257 6258 if (data->consider_all_candidates) 6259 { 6260 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 6261 { 6262 if (ci == cand->id || (start && ci == start->id)) 6263 continue; 6264 6265 cnd = data->vcands[ci]; 6266 6267 cp = get_group_iv_cost (data, group, cnd); 6268 if (!cp) 6269 continue; 6270 6271 iv_ca_set_cp (data, ivs, group, cp); 6272 acost = iv_ca_cost (ivs); 6273 6274 if (acost < best_cost) 6275 { 6276 best_cost = acost; 6277 new_cp = cp; 6278 } 6279 } 6280 } 6281 else 6282 { 6283 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi) 6284 { 6285 if (ci == cand->id || (start && ci == start->id)) 6286 continue; 6287 6288 cnd = data->vcands[ci]; 6289 6290 cp = get_group_iv_cost (data, group, cnd); 6291 if (!cp) 6292 continue; 6293 6294 iv_ca_set_cp (data, ivs, group, cp); 6295 acost = iv_ca_cost (ivs); 6296 6297 if (acost < best_cost) 6298 { 6299 best_cost = acost; 6300 new_cp = cp; 6301 } 6302 } 6303 } 6304 /* Restore to old cp for use. */ 6305 iv_ca_set_cp (data, ivs, group, old_cp); 6306 6307 if (!new_cp) 6308 { 6309 iv_ca_delta_free (delta); 6310 return infinite_cost; 6311 } 6312 6313 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta); 6314 } 6315 6316 iv_ca_delta_commit (data, ivs, *delta, true); 6317 cost = iv_ca_cost (ivs); 6318 iv_ca_delta_commit (data, ivs, *delta, false); 6319 6320 return cost; 6321 } 6322 6323 /* Try optimizing the set of candidates IVS by removing candidates different 6324 from to EXCEPT_CAND from it. Return cost of the new set, and store 6325 differences in DELTA. */ 6326 6327 static comp_cost 6328 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, 6329 struct iv_cand *except_cand, struct iv_ca_delta **delta) 6330 { 6331 bitmap_iterator bi; 6332 struct iv_ca_delta *act_delta, *best_delta; 6333 unsigned i; 6334 comp_cost best_cost, acost; 6335 struct iv_cand *cand; 6336 6337 best_delta = NULL; 6338 best_cost = iv_ca_cost (ivs); 6339 6340 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6341 { 6342 cand = data->vcands[i]; 6343 6344 if (cand == except_cand) 6345 continue; 6346 6347 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta); 6348 6349 if (acost < best_cost) 6350 { 6351 best_cost = acost; 6352 iv_ca_delta_free (&best_delta); 6353 best_delta = act_delta; 6354 } 6355 else 6356 iv_ca_delta_free (&act_delta); 6357 } 6358 6359 if (!best_delta) 6360 { 6361 *delta = NULL; 6362 return best_cost; 6363 } 6364 6365 /* Recurse to possibly remove other unnecessary ivs. */ 6366 iv_ca_delta_commit (data, ivs, best_delta, true); 6367 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 6368 iv_ca_delta_commit (data, ivs, best_delta, false); 6369 *delta = iv_ca_delta_join (best_delta, *delta); 6370 return best_cost; 6371 } 6372 6373 /* Check if CAND_IDX is a candidate other than OLD_CAND and has 6374 cheaper local cost for GROUP than BEST_CP. Return pointer to 6375 the corresponding cost_pair, otherwise just return BEST_CP. */ 6376 6377 static struct cost_pair* 6378 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group, 6379 unsigned int cand_idx, struct iv_cand *old_cand, 6380 struct cost_pair *best_cp) 6381 { 6382 struct iv_cand *cand; 6383 struct cost_pair *cp; 6384 6385 gcc_assert (old_cand != NULL && best_cp != NULL); 6386 if (cand_idx == old_cand->id) 6387 return best_cp; 6388 6389 cand = data->vcands[cand_idx]; 6390 cp = get_group_iv_cost (data, group, cand); 6391 if (cp != NULL && cheaper_cost_pair (cp, best_cp)) 6392 return cp; 6393 6394 return best_cp; 6395 } 6396 6397 /* Try breaking local optimal fixed-point for IVS by replacing candidates 6398 which are used by more than one iv uses. For each of those candidates, 6399 this function tries to represent iv uses under that candidate using 6400 other ones with lower local cost, then tries to prune the new set. 6401 If the new set has lower cost, It returns the new cost after recording 6402 candidate replacement in list DELTA. */ 6403 6404 static comp_cost 6405 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs, 6406 struct iv_ca_delta **delta) 6407 { 6408 bitmap_iterator bi, bj; 6409 unsigned int i, j, k; 6410 struct iv_cand *cand; 6411 comp_cost orig_cost, acost; 6412 struct iv_ca_delta *act_delta, *tmp_delta; 6413 struct cost_pair *old_cp, *best_cp = NULL; 6414 6415 *delta = NULL; 6416 orig_cost = iv_ca_cost (ivs); 6417 6418 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6419 { 6420 if (ivs->n_cand_uses[i] == 1 6421 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND) 6422 continue; 6423 6424 cand = data->vcands[i]; 6425 6426 act_delta = NULL; 6427 /* Represent uses under current candidate using other ones with 6428 lower local cost. */ 6429 for (j = 0; j < ivs->upto; j++) 6430 { 6431 struct iv_group *group = data->vgroups[j]; 6432 old_cp = iv_ca_cand_for_group (ivs, group); 6433 6434 if (old_cp->cand != cand) 6435 continue; 6436 6437 best_cp = old_cp; 6438 if (data->consider_all_candidates) 6439 for (k = 0; k < data->vcands.length (); k++) 6440 best_cp = cheaper_cost_with_cand (data, group, k, 6441 old_cp->cand, best_cp); 6442 else 6443 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj) 6444 best_cp = cheaper_cost_with_cand (data, group, k, 6445 old_cp->cand, best_cp); 6446 6447 if (best_cp == old_cp) 6448 continue; 6449 6450 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta); 6451 } 6452 /* No need for further prune. */ 6453 if (!act_delta) 6454 continue; 6455 6456 /* Prune the new candidate set. */ 6457 iv_ca_delta_commit (data, ivs, act_delta, true); 6458 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta); 6459 iv_ca_delta_commit (data, ivs, act_delta, false); 6460 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6461 6462 if (acost < orig_cost) 6463 { 6464 *delta = act_delta; 6465 return acost; 6466 } 6467 else 6468 iv_ca_delta_free (&act_delta); 6469 } 6470 6471 return orig_cost; 6472 } 6473 6474 /* Tries to extend the sets IVS in the best possible way in order to 6475 express the GROUP. If ORIGINALP is true, prefer candidates from 6476 the original set of IVs, otherwise favor important candidates not 6477 based on any memory object. */ 6478 6479 static bool 6480 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, 6481 struct iv_group *group, bool originalp) 6482 { 6483 comp_cost best_cost, act_cost; 6484 unsigned i; 6485 bitmap_iterator bi; 6486 struct iv_cand *cand; 6487 struct iv_ca_delta *best_delta = NULL, *act_delta; 6488 struct cost_pair *cp; 6489 6490 iv_ca_add_group (data, ivs, group); 6491 best_cost = iv_ca_cost (ivs); 6492 cp = iv_ca_cand_for_group (ivs, group); 6493 if (cp) 6494 { 6495 best_delta = iv_ca_delta_add (group, NULL, cp, NULL); 6496 iv_ca_set_no_cp (data, ivs, group); 6497 } 6498 6499 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise 6500 first try important candidates not based on any memory object. Only if 6501 this fails, try the specific ones. Rationale -- in loops with many 6502 variables the best choice often is to use just one generic biv. If we 6503 added here many ivs specific to the uses, the optimization algorithm later 6504 would be likely to get stuck in a local minimum, thus causing us to create 6505 too many ivs. The approach from few ivs to more seems more likely to be 6506 successful -- starting from few ivs, replacing an expensive use by a 6507 specific iv should always be a win. */ 6508 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi) 6509 { 6510 cand = data->vcands[i]; 6511 6512 if (originalp && cand->pos !=IP_ORIGINAL) 6513 continue; 6514 6515 if (!originalp && cand->iv->base_object != NULL_TREE) 6516 continue; 6517 6518 if (iv_ca_cand_used_p (ivs, cand)) 6519 continue; 6520 6521 cp = get_group_iv_cost (data, group, cand); 6522 if (!cp) 6523 continue; 6524 6525 iv_ca_set_cp (data, ivs, group, cp); 6526 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, 6527 true); 6528 iv_ca_set_no_cp (data, ivs, group); 6529 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta); 6530 6531 if (act_cost < best_cost) 6532 { 6533 best_cost = act_cost; 6534 6535 iv_ca_delta_free (&best_delta); 6536 best_delta = act_delta; 6537 } 6538 else 6539 iv_ca_delta_free (&act_delta); 6540 } 6541 6542 if (best_cost.infinite_cost_p ()) 6543 { 6544 for (i = 0; i < group->n_map_members; i++) 6545 { 6546 cp = group->cost_map + i; 6547 cand = cp->cand; 6548 if (!cand) 6549 continue; 6550 6551 /* Already tried this. */ 6552 if (cand->important) 6553 { 6554 if (originalp && cand->pos == IP_ORIGINAL) 6555 continue; 6556 if (!originalp && cand->iv->base_object == NULL_TREE) 6557 continue; 6558 } 6559 6560 if (iv_ca_cand_used_p (ivs, cand)) 6561 continue; 6562 6563 act_delta = NULL; 6564 iv_ca_set_cp (data, ivs, group, cp); 6565 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); 6566 iv_ca_set_no_cp (data, ivs, group); 6567 act_delta = iv_ca_delta_add (group, 6568 iv_ca_cand_for_group (ivs, group), 6569 cp, act_delta); 6570 6571 if (act_cost < best_cost) 6572 { 6573 best_cost = act_cost; 6574 6575 if (best_delta) 6576 iv_ca_delta_free (&best_delta); 6577 best_delta = act_delta; 6578 } 6579 else 6580 iv_ca_delta_free (&act_delta); 6581 } 6582 } 6583 6584 iv_ca_delta_commit (data, ivs, best_delta, true); 6585 iv_ca_delta_free (&best_delta); 6586 6587 return !best_cost.infinite_cost_p (); 6588 } 6589 6590 /* Finds an initial assignment of candidates to uses. */ 6591 6592 static struct iv_ca * 6593 get_initial_solution (struct ivopts_data *data, bool originalp) 6594 { 6595 unsigned i; 6596 struct iv_ca *ivs = iv_ca_new (data); 6597 6598 for (i = 0; i < data->vgroups.length (); i++) 6599 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp)) 6600 { 6601 iv_ca_free (&ivs); 6602 return NULL; 6603 } 6604 6605 return ivs; 6606 } 6607 6608 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P 6609 points to a bool variable, this function tries to break local 6610 optimal fixed-point by replacing candidates in IVS if it's true. */ 6611 6612 static bool 6613 try_improve_iv_set (struct ivopts_data *data, 6614 struct iv_ca *ivs, bool *try_replace_p) 6615 { 6616 unsigned i, n_ivs; 6617 comp_cost acost, best_cost = iv_ca_cost (ivs); 6618 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 6619 struct iv_cand *cand; 6620 6621 /* Try extending the set of induction variables by one. */ 6622 for (i = 0; i < data->vcands.length (); i++) 6623 { 6624 cand = data->vcands[i]; 6625 6626 if (iv_ca_cand_used_p (ivs, cand)) 6627 continue; 6628 6629 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); 6630 if (!act_delta) 6631 continue; 6632 6633 /* If we successfully added the candidate and the set is small enough, 6634 try optimizing it by removing other candidates. */ 6635 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 6636 { 6637 iv_ca_delta_commit (data, ivs, act_delta, true); 6638 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 6639 iv_ca_delta_commit (data, ivs, act_delta, false); 6640 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6641 } 6642 6643 if (acost < best_cost) 6644 { 6645 best_cost = acost; 6646 iv_ca_delta_free (&best_delta); 6647 best_delta = act_delta; 6648 } 6649 else 6650 iv_ca_delta_free (&act_delta); 6651 } 6652 6653 if (!best_delta) 6654 { 6655 /* Try removing the candidates from the set instead. */ 6656 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 6657 6658 if (!best_delta && *try_replace_p) 6659 { 6660 *try_replace_p = false; 6661 /* So far candidate selecting algorithm tends to choose fewer IVs 6662 so that it can handle cases in which loops have many variables 6663 but the best choice is often to use only one general biv. One 6664 weakness is it can't handle opposite cases, in which different 6665 candidates should be chosen with respect to each use. To solve 6666 the problem, we replace candidates in a manner described by the 6667 comments of iv_ca_replace, thus give general algorithm a chance 6668 to break local optimal fixed-point in these cases. */ 6669 best_cost = iv_ca_replace (data, ivs, &best_delta); 6670 } 6671 6672 if (!best_delta) 6673 return false; 6674 } 6675 6676 iv_ca_delta_commit (data, ivs, best_delta, true); 6677 gcc_assert (best_cost == iv_ca_cost (ivs)); 6678 iv_ca_delta_free (&best_delta); 6679 return true; 6680 } 6681 6682 /* Attempts to find the optimal set of induction variables. We do simple 6683 greedy heuristic -- we try to replace at most one candidate in the selected 6684 solution and remove the unused ivs while this improves the cost. */ 6685 6686 static struct iv_ca * 6687 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) 6688 { 6689 struct iv_ca *set; 6690 bool try_replace_p = true; 6691 6692 /* Get the initial solution. */ 6693 set = get_initial_solution (data, originalp); 6694 if (!set) 6695 { 6696 if (dump_file && (dump_flags & TDF_DETAILS)) 6697 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 6698 return NULL; 6699 } 6700 6701 if (dump_file && (dump_flags & TDF_DETAILS)) 6702 { 6703 fprintf (dump_file, "Initial set of candidates:\n"); 6704 iv_ca_dump (data, dump_file, set); 6705 } 6706 6707 while (try_improve_iv_set (data, set, &try_replace_p)) 6708 { 6709 if (dump_file && (dump_flags & TDF_DETAILS)) 6710 { 6711 fprintf (dump_file, "Improved to:\n"); 6712 iv_ca_dump (data, dump_file, set); 6713 } 6714 } 6715 6716 return set; 6717 } 6718 6719 static struct iv_ca * 6720 find_optimal_iv_set (struct ivopts_data *data) 6721 { 6722 unsigned i; 6723 comp_cost cost, origcost; 6724 struct iv_ca *set, *origset; 6725 6726 /* Determine the cost based on a strategy that starts with original IVs, 6727 and try again using a strategy that prefers candidates not based 6728 on any IVs. */ 6729 origset = find_optimal_iv_set_1 (data, true); 6730 set = find_optimal_iv_set_1 (data, false); 6731 6732 if (!origset && !set) 6733 return NULL; 6734 6735 origcost = origset ? iv_ca_cost (origset) : infinite_cost; 6736 cost = set ? iv_ca_cost (set) : infinite_cost; 6737 6738 if (dump_file && (dump_flags & TDF_DETAILS)) 6739 { 6740 fprintf (dump_file, "Original cost %d (complexity %d)\n\n", 6741 origcost.cost, origcost.complexity); 6742 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", 6743 cost.cost, cost.complexity); 6744 } 6745 6746 /* Choose the one with the best cost. */ 6747 if (origcost <= cost) 6748 { 6749 if (set) 6750 iv_ca_free (&set); 6751 set = origset; 6752 } 6753 else if (origset) 6754 iv_ca_free (&origset); 6755 6756 for (i = 0; i < data->vgroups.length (); i++) 6757 { 6758 struct iv_group *group = data->vgroups[i]; 6759 group->selected = iv_ca_cand_for_group (set, group)->cand; 6760 } 6761 6762 return set; 6763 } 6764 6765 /* Creates a new induction variable corresponding to CAND. */ 6766 6767 static void 6768 create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 6769 { 6770 gimple_stmt_iterator incr_pos; 6771 tree base; 6772 struct iv_use *use; 6773 struct iv_group *group; 6774 bool after = false; 6775 6776 gcc_assert (cand->iv != NULL); 6777 6778 switch (cand->pos) 6779 { 6780 case IP_NORMAL: 6781 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop)); 6782 break; 6783 6784 case IP_END: 6785 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 6786 after = true; 6787 break; 6788 6789 case IP_AFTER_USE: 6790 after = true; 6791 /* fall through */ 6792 case IP_BEFORE_USE: 6793 incr_pos = gsi_for_stmt (cand->incremented_at); 6794 break; 6795 6796 case IP_ORIGINAL: 6797 /* Mark that the iv is preserved. */ 6798 name_info (data, cand->var_before)->preserve_biv = true; 6799 name_info (data, cand->var_after)->preserve_biv = true; 6800 6801 /* Rewrite the increment so that it uses var_before directly. */ 6802 use = find_interesting_uses_op (data, cand->var_after); 6803 group = data->vgroups[use->group_id]; 6804 group->selected = cand; 6805 return; 6806 } 6807 6808 gimple_add_tmp_var (cand->var_before); 6809 6810 base = unshare_expr (cand->iv->base); 6811 6812 create_iv (base, unshare_expr (cand->iv->step), 6813 cand->var_before, data->current_loop, 6814 &incr_pos, after, &cand->var_before, &cand->var_after); 6815 } 6816 6817 /* Creates new induction variables described in SET. */ 6818 6819 static void 6820 create_new_ivs (struct ivopts_data *data, struct iv_ca *set) 6821 { 6822 unsigned i; 6823 struct iv_cand *cand; 6824 bitmap_iterator bi; 6825 6826 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6827 { 6828 cand = data->vcands[i]; 6829 create_new_iv (data, cand); 6830 } 6831 6832 if (dump_file && (dump_flags & TDF_DETAILS)) 6833 { 6834 fprintf (dump_file, "Selected IV set for loop %d", 6835 data->current_loop->num); 6836 if (data->loop_loc != UNKNOWN_LOCATION) 6837 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 6838 LOCATION_LINE (data->loop_loc)); 6839 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters", 6840 avg_loop_niter (data->current_loop)); 6841 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands)); 6842 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6843 { 6844 cand = data->vcands[i]; 6845 dump_cand (dump_file, cand); 6846 } 6847 fprintf (dump_file, "\n"); 6848 } 6849 } 6850 6851 /* Rewrites USE (definition of iv used in a nonlinear expression) 6852 using candidate CAND. */ 6853 6854 static void 6855 rewrite_use_nonlinear_expr (struct ivopts_data *data, 6856 struct iv_use *use, struct iv_cand *cand) 6857 { 6858 gassign *ass; 6859 gimple_stmt_iterator bsi; 6860 tree comp, type = get_use_type (use), tgt; 6861 6862 /* An important special case -- if we are asked to express value of 6863 the original iv by itself, just exit; there is no need to 6864 introduce a new computation (that might also need casting the 6865 variable to unsigned and back). */ 6866 if (cand->pos == IP_ORIGINAL 6867 && cand->incremented_at == use->stmt) 6868 { 6869 tree op = NULL_TREE; 6870 enum tree_code stmt_code; 6871 6872 gcc_assert (is_gimple_assign (use->stmt)); 6873 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 6874 6875 /* Check whether we may leave the computation unchanged. 6876 This is the case only if it does not rely on other 6877 computations in the loop -- otherwise, the computation 6878 we rely upon may be removed in remove_unused_ivs, 6879 thus leading to ICE. */ 6880 stmt_code = gimple_assign_rhs_code (use->stmt); 6881 if (stmt_code == PLUS_EXPR 6882 || stmt_code == MINUS_EXPR 6883 || stmt_code == POINTER_PLUS_EXPR) 6884 { 6885 if (gimple_assign_rhs1 (use->stmt) == cand->var_before) 6886 op = gimple_assign_rhs2 (use->stmt); 6887 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before) 6888 op = gimple_assign_rhs1 (use->stmt); 6889 } 6890 6891 if (op != NULL_TREE) 6892 { 6893 if (expr_invariant_in_loop_p (data->current_loop, op)) 6894 return; 6895 if (TREE_CODE (op) == SSA_NAME) 6896 { 6897 struct iv *iv = get_iv (data, op); 6898 if (iv != NULL && integer_zerop (iv->step)) 6899 return; 6900 } 6901 } 6902 } 6903 6904 switch (gimple_code (use->stmt)) 6905 { 6906 case GIMPLE_PHI: 6907 tgt = PHI_RESULT (use->stmt); 6908 6909 /* If we should keep the biv, do not replace it. */ 6910 if (name_info (data, tgt)->preserve_biv) 6911 return; 6912 6913 bsi = gsi_after_labels (gimple_bb (use->stmt)); 6914 break; 6915 6916 case GIMPLE_ASSIGN: 6917 tgt = gimple_assign_lhs (use->stmt); 6918 bsi = gsi_for_stmt (use->stmt); 6919 break; 6920 6921 default: 6922 gcc_unreachable (); 6923 } 6924 6925 aff_tree aff_inv, aff_var; 6926 if (!get_computation_aff_1 (data->current_loop, use->stmt, 6927 use, cand, &aff_inv, &aff_var)) 6928 gcc_unreachable (); 6929 6930 unshare_aff_combination (&aff_inv); 6931 unshare_aff_combination (&aff_var); 6932 /* Prefer CSE opportunity than loop invariant by adding offset at last 6933 so that iv_uses have different offsets can be CSEed. */ 6934 poly_widest_int offset = aff_inv.offset; 6935 aff_inv.offset = 0; 6936 6937 gimple_seq stmt_list = NULL, seq = NULL; 6938 tree comp_op1 = aff_combination_to_tree (&aff_inv); 6939 tree comp_op2 = aff_combination_to_tree (&aff_var); 6940 gcc_assert (comp_op1 && comp_op2); 6941 6942 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL); 6943 gimple_seq_add_seq (&stmt_list, seq); 6944 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL); 6945 gimple_seq_add_seq (&stmt_list, seq); 6946 6947 if (POINTER_TYPE_P (TREE_TYPE (comp_op2))) 6948 std::swap (comp_op1, comp_op2); 6949 6950 if (POINTER_TYPE_P (TREE_TYPE (comp_op1))) 6951 { 6952 comp = fold_build_pointer_plus (comp_op1, 6953 fold_convert (sizetype, comp_op2)); 6954 comp = fold_build_pointer_plus (comp, 6955 wide_int_to_tree (sizetype, offset)); 6956 } 6957 else 6958 { 6959 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1, 6960 fold_convert (TREE_TYPE (comp_op1), comp_op2)); 6961 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp, 6962 wide_int_to_tree (TREE_TYPE (comp_op1), offset)); 6963 } 6964 6965 comp = fold_convert (type, comp); 6966 if (!valid_gimple_rhs_p (comp) 6967 || (gimple_code (use->stmt) != GIMPLE_PHI 6968 /* We can't allow re-allocating the stmt as it might be pointed 6969 to still. */ 6970 && (get_gimple_rhs_num_ops (TREE_CODE (comp)) 6971 >= gimple_num_ops (gsi_stmt (bsi))))) 6972 { 6973 comp = force_gimple_operand (comp, &seq, true, NULL); 6974 gimple_seq_add_seq (&stmt_list, seq); 6975 if (POINTER_TYPE_P (TREE_TYPE (tgt))) 6976 { 6977 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt)); 6978 /* As this isn't a plain copy we have to reset alignment 6979 information. */ 6980 if (SSA_NAME_PTR_INFO (comp)) 6981 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp)); 6982 } 6983 } 6984 6985 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT); 6986 if (gimple_code (use->stmt) == GIMPLE_PHI) 6987 { 6988 ass = gimple_build_assign (tgt, comp); 6989 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 6990 6991 bsi = gsi_for_stmt (use->stmt); 6992 remove_phi_node (&bsi, false); 6993 } 6994 else 6995 { 6996 gimple_assign_set_rhs_from_tree (&bsi, comp); 6997 use->stmt = gsi_stmt (bsi); 6998 } 6999 } 7000 7001 /* Performs a peephole optimization to reorder the iv update statement with 7002 a mem ref to enable instruction combining in later phases. The mem ref uses 7003 the iv value before the update, so the reordering transformation requires 7004 adjustment of the offset. CAND is the selected IV_CAND. 7005 7006 Example: 7007 7008 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset 7009 iv2 = iv1 + 1; 7010 7011 if (t < val) (1) 7012 goto L; 7013 goto Head; 7014 7015 7016 directly propagating t over to (1) will introduce overlapping live range 7017 thus increase register pressure. This peephole transform it into: 7018 7019 7020 iv2 = iv1 + 1; 7021 t = MEM_REF (base, iv2, 8, 8); 7022 if (t < val) 7023 goto L; 7024 goto Head; 7025 */ 7026 7027 static void 7028 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use) 7029 { 7030 tree var_after; 7031 gimple *iv_update, *stmt; 7032 basic_block bb; 7033 gimple_stmt_iterator gsi, gsi_iv; 7034 7035 if (cand->pos != IP_NORMAL) 7036 return; 7037 7038 var_after = cand->var_after; 7039 iv_update = SSA_NAME_DEF_STMT (var_after); 7040 7041 bb = gimple_bb (iv_update); 7042 gsi = gsi_last_nondebug_bb (bb); 7043 stmt = gsi_stmt (gsi); 7044 7045 /* Only handle conditional statement for now. */ 7046 if (gimple_code (stmt) != GIMPLE_COND) 7047 return; 7048 7049 gsi_prev_nondebug (&gsi); 7050 stmt = gsi_stmt (gsi); 7051 if (stmt != iv_update) 7052 return; 7053 7054 gsi_prev_nondebug (&gsi); 7055 if (gsi_end_p (gsi)) 7056 return; 7057 7058 stmt = gsi_stmt (gsi); 7059 if (gimple_code (stmt) != GIMPLE_ASSIGN) 7060 return; 7061 7062 if (stmt != use->stmt) 7063 return; 7064 7065 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 7066 return; 7067 7068 if (dump_file && (dump_flags & TDF_DETAILS)) 7069 { 7070 fprintf (dump_file, "Reordering \n"); 7071 print_gimple_stmt (dump_file, iv_update, 0); 7072 print_gimple_stmt (dump_file, use->stmt, 0); 7073 fprintf (dump_file, "\n"); 7074 } 7075 7076 gsi = gsi_for_stmt (use->stmt); 7077 gsi_iv = gsi_for_stmt (iv_update); 7078 gsi_move_before (&gsi_iv, &gsi); 7079 7080 cand->pos = IP_BEFORE_USE; 7081 cand->incremented_at = use->stmt; 7082 } 7083 7084 /* Return the alias pointer type that should be used for a MEM_REF 7085 associated with USE, which has type USE_PTR_ADDRESS. */ 7086 7087 static tree 7088 get_alias_ptr_type_for_ptr_address (iv_use *use) 7089 { 7090 gcall *call = as_a <gcall *> (use->stmt); 7091 switch (gimple_call_internal_fn (call)) 7092 { 7093 case IFN_MASK_LOAD: 7094 case IFN_MASK_STORE: 7095 /* The second argument contains the correct alias type. */ 7096 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0)); 7097 return TREE_TYPE (gimple_call_arg (call, 1)); 7098 7099 default: 7100 gcc_unreachable (); 7101 } 7102 } 7103 7104 7105 /* Rewrites USE (address that is an iv) using candidate CAND. */ 7106 7107 static void 7108 rewrite_use_address (struct ivopts_data *data, 7109 struct iv_use *use, struct iv_cand *cand) 7110 { 7111 aff_tree aff; 7112 bool ok; 7113 7114 adjust_iv_update_pos (cand, use); 7115 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff); 7116 gcc_assert (ok); 7117 unshare_aff_combination (&aff); 7118 7119 /* To avoid undefined overflow problems, all IV candidates use unsigned 7120 integer types. The drawback is that this makes it impossible for 7121 create_mem_ref to distinguish an IV that is based on a memory object 7122 from one that represents simply an offset. 7123 7124 To work around this problem, we pass a hint to create_mem_ref that 7125 indicates which variable (if any) in aff is an IV based on a memory 7126 object. Note that we only consider the candidate. If this is not 7127 based on an object, the base of the reference is in some subexpression 7128 of the use -- but these will use pointer types, so they are recognized 7129 by the create_mem_ref heuristics anyway. */ 7130 tree iv = var_at_stmt (data->current_loop, cand, use->stmt); 7131 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE; 7132 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 7133 tree type = use->mem_type; 7134 tree alias_ptr_type; 7135 if (use->type == USE_PTR_ADDRESS) 7136 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use); 7137 else 7138 { 7139 gcc_assert (type == TREE_TYPE (*use->op_p)); 7140 unsigned int align = get_object_alignment (*use->op_p); 7141 if (align != TYPE_ALIGN (type)) 7142 type = build_aligned_type (type, align); 7143 alias_ptr_type = reference_alias_ptr_type (*use->op_p); 7144 } 7145 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type, 7146 iv, base_hint, data->speed); 7147 7148 if (use->type == USE_PTR_ADDRESS) 7149 { 7150 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref); 7151 ref = fold_convert (get_use_type (use), ref); 7152 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE, 7153 true, GSI_SAME_STMT); 7154 } 7155 else 7156 copy_ref_info (ref, *use->op_p); 7157 7158 *use->op_p = ref; 7159 } 7160 7161 /* Rewrites USE (the condition such that one of the arguments is an iv) using 7162 candidate CAND. */ 7163 7164 static void 7165 rewrite_use_compare (struct ivopts_data *data, 7166 struct iv_use *use, struct iv_cand *cand) 7167 { 7168 tree comp, op, bound; 7169 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 7170 enum tree_code compare; 7171 struct iv_group *group = data->vgroups[use->group_id]; 7172 struct cost_pair *cp = get_group_iv_cost (data, group, cand); 7173 7174 bound = cp->value; 7175 if (bound) 7176 { 7177 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 7178 tree var_type = TREE_TYPE (var); 7179 gimple_seq stmts; 7180 7181 if (dump_file && (dump_flags & TDF_DETAILS)) 7182 { 7183 fprintf (dump_file, "Replacing exit test: "); 7184 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); 7185 } 7186 compare = cp->comp; 7187 bound = unshare_expr (fold_convert (var_type, bound)); 7188 op = force_gimple_operand (bound, &stmts, true, NULL_TREE); 7189 if (stmts) 7190 gsi_insert_seq_on_edge_immediate ( 7191 loop_preheader_edge (data->current_loop), 7192 stmts); 7193 7194 gcond *cond_stmt = as_a <gcond *> (use->stmt); 7195 gimple_cond_set_lhs (cond_stmt, var); 7196 gimple_cond_set_code (cond_stmt, compare); 7197 gimple_cond_set_rhs (cond_stmt, op); 7198 return; 7199 } 7200 7201 /* The induction variable elimination failed; just express the original 7202 giv. */ 7203 comp = get_computation_at (data->current_loop, use->stmt, use, cand); 7204 gcc_assert (comp != NULL_TREE); 7205 gcc_assert (use->op_p != NULL); 7206 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true, 7207 SSA_NAME_VAR (*use->op_p), 7208 true, GSI_SAME_STMT); 7209 } 7210 7211 /* Rewrite the groups using the selected induction variables. */ 7212 7213 static void 7214 rewrite_groups (struct ivopts_data *data) 7215 { 7216 unsigned i, j; 7217 7218 for (i = 0; i < data->vgroups.length (); i++) 7219 { 7220 struct iv_group *group = data->vgroups[i]; 7221 struct iv_cand *cand = group->selected; 7222 7223 gcc_assert (cand); 7224 7225 if (group->type == USE_NONLINEAR_EXPR) 7226 { 7227 for (j = 0; j < group->vuses.length (); j++) 7228 { 7229 rewrite_use_nonlinear_expr (data, group->vuses[j], cand); 7230 update_stmt (group->vuses[j]->stmt); 7231 } 7232 } 7233 else if (address_p (group->type)) 7234 { 7235 for (j = 0; j < group->vuses.length (); j++) 7236 { 7237 rewrite_use_address (data, group->vuses[j], cand); 7238 update_stmt (group->vuses[j]->stmt); 7239 } 7240 } 7241 else 7242 { 7243 gcc_assert (group->type == USE_COMPARE); 7244 7245 for (j = 0; j < group->vuses.length (); j++) 7246 { 7247 rewrite_use_compare (data, group->vuses[j], cand); 7248 update_stmt (group->vuses[j]->stmt); 7249 } 7250 } 7251 } 7252 } 7253 7254 /* Removes the ivs that are not used after rewriting. */ 7255 7256 static void 7257 remove_unused_ivs (struct ivopts_data *data) 7258 { 7259 unsigned j; 7260 bitmap_iterator bi; 7261 bitmap toremove = BITMAP_ALLOC (NULL); 7262 7263 /* Figure out an order in which to release SSA DEFs so that we don't 7264 release something that we'd have to propagate into a debug stmt 7265 afterwards. */ 7266 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 7267 { 7268 struct version_info *info; 7269 7270 info = ver_info (data, j); 7271 if (info->iv 7272 && !integer_zerop (info->iv->step) 7273 && !info->inv_id 7274 && !info->iv->nonlin_use 7275 && !info->preserve_biv) 7276 { 7277 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name)); 7278 7279 tree def = info->iv->ssa_name; 7280 7281 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def)) 7282 { 7283 imm_use_iterator imm_iter; 7284 use_operand_p use_p; 7285 gimple *stmt; 7286 int count = 0; 7287 7288 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 7289 { 7290 if (!gimple_debug_bind_p (stmt)) 7291 continue; 7292 7293 /* We just want to determine whether to do nothing 7294 (count == 0), to substitute the computed 7295 expression into a single use of the SSA DEF by 7296 itself (count == 1), or to use a debug temp 7297 because the SSA DEF is used multiple times or as 7298 part of a larger expression (count > 1). */ 7299 count++; 7300 if (gimple_debug_bind_get_value (stmt) != def) 7301 count++; 7302 7303 if (count > 1) 7304 BREAK_FROM_IMM_USE_STMT (imm_iter); 7305 } 7306 7307 if (!count) 7308 continue; 7309 7310 struct iv_use dummy_use; 7311 struct iv_cand *best_cand = NULL, *cand; 7312 unsigned i, best_pref = 0, cand_pref; 7313 7314 memset (&dummy_use, 0, sizeof (dummy_use)); 7315 dummy_use.iv = info->iv; 7316 for (i = 0; i < data->vgroups.length () && i < 64; i++) 7317 { 7318 cand = data->vgroups[i]->selected; 7319 if (cand == best_cand) 7320 continue; 7321 cand_pref = operand_equal_p (cand->iv->step, 7322 info->iv->step, 0) 7323 ? 4 : 0; 7324 cand_pref 7325 += TYPE_MODE (TREE_TYPE (cand->iv->base)) 7326 == TYPE_MODE (TREE_TYPE (info->iv->base)) 7327 ? 2 : 0; 7328 cand_pref 7329 += TREE_CODE (cand->iv->base) == INTEGER_CST 7330 ? 1 : 0; 7331 if (best_cand == NULL || best_pref < cand_pref) 7332 { 7333 best_cand = cand; 7334 best_pref = cand_pref; 7335 } 7336 } 7337 7338 if (!best_cand) 7339 continue; 7340 7341 tree comp = get_computation_at (data->current_loop, 7342 SSA_NAME_DEF_STMT (def), 7343 &dummy_use, best_cand); 7344 if (!comp) 7345 continue; 7346 7347 if (count > 1) 7348 { 7349 tree vexpr = make_node (DEBUG_EXPR_DECL); 7350 DECL_ARTIFICIAL (vexpr) = 1; 7351 TREE_TYPE (vexpr) = TREE_TYPE (comp); 7352 if (SSA_NAME_VAR (def)) 7353 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def))); 7354 else 7355 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr))); 7356 gdebug *def_temp 7357 = gimple_build_debug_bind (vexpr, comp, NULL); 7358 gimple_stmt_iterator gsi; 7359 7360 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI) 7361 gsi = gsi_after_labels (gimple_bb 7362 (SSA_NAME_DEF_STMT (def))); 7363 else 7364 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def)); 7365 7366 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT); 7367 comp = vexpr; 7368 } 7369 7370 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 7371 { 7372 if (!gimple_debug_bind_p (stmt)) 7373 continue; 7374 7375 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 7376 SET_USE (use_p, comp); 7377 7378 update_stmt (stmt); 7379 } 7380 } 7381 } 7382 } 7383 7384 release_defs_bitset (toremove); 7385 7386 BITMAP_FREE (toremove); 7387 } 7388 7389 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback 7390 for hash_map::traverse. */ 7391 7392 bool 7393 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *) 7394 { 7395 free (value); 7396 return true; 7397 } 7398 7399 /* Frees data allocated by the optimization of a single loop. */ 7400 7401 static void 7402 free_loop_data (struct ivopts_data *data) 7403 { 7404 unsigned i, j; 7405 bitmap_iterator bi; 7406 tree obj; 7407 7408 if (data->niters) 7409 { 7410 data->niters->traverse<void *, free_tree_niter_desc> (NULL); 7411 delete data->niters; 7412 data->niters = NULL; 7413 } 7414 7415 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 7416 { 7417 struct version_info *info; 7418 7419 info = ver_info (data, i); 7420 info->iv = NULL; 7421 info->has_nonlin_use = false; 7422 info->preserve_biv = false; 7423 info->inv_id = 0; 7424 } 7425 bitmap_clear (data->relevant); 7426 bitmap_clear (data->important_candidates); 7427 7428 for (i = 0; i < data->vgroups.length (); i++) 7429 { 7430 struct iv_group *group = data->vgroups[i]; 7431 7432 for (j = 0; j < group->vuses.length (); j++) 7433 free (group->vuses[j]); 7434 group->vuses.release (); 7435 7436 BITMAP_FREE (group->related_cands); 7437 for (j = 0; j < group->n_map_members; j++) 7438 { 7439 if (group->cost_map[j].inv_vars) 7440 BITMAP_FREE (group->cost_map[j].inv_vars); 7441 if (group->cost_map[j].inv_exprs) 7442 BITMAP_FREE (group->cost_map[j].inv_exprs); 7443 } 7444 7445 free (group->cost_map); 7446 free (group); 7447 } 7448 data->vgroups.truncate (0); 7449 7450 for (i = 0; i < data->vcands.length (); i++) 7451 { 7452 struct iv_cand *cand = data->vcands[i]; 7453 7454 if (cand->inv_vars) 7455 BITMAP_FREE (cand->inv_vars); 7456 if (cand->inv_exprs) 7457 BITMAP_FREE (cand->inv_exprs); 7458 free (cand); 7459 } 7460 data->vcands.truncate (0); 7461 7462 if (data->version_info_size < num_ssa_names) 7463 { 7464 data->version_info_size = 2 * num_ssa_names; 7465 free (data->version_info); 7466 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 7467 } 7468 7469 data->max_inv_var_id = 0; 7470 data->max_inv_expr_id = 0; 7471 7472 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj) 7473 SET_DECL_RTL (obj, NULL_RTX); 7474 7475 decl_rtl_to_reset.truncate (0); 7476 7477 data->inv_expr_tab->empty (); 7478 7479 data->iv_common_cand_tab->empty (); 7480 data->iv_common_cands.truncate (0); 7481 } 7482 7483 /* Finalizes data structures used by the iv optimization pass. LOOPS is the 7484 loop tree. */ 7485 7486 static void 7487 tree_ssa_iv_optimize_finalize (struct ivopts_data *data) 7488 { 7489 free_loop_data (data); 7490 free (data->version_info); 7491 BITMAP_FREE (data->relevant); 7492 BITMAP_FREE (data->important_candidates); 7493 7494 decl_rtl_to_reset.release (); 7495 data->vgroups.release (); 7496 data->vcands.release (); 7497 delete data->inv_expr_tab; 7498 data->inv_expr_tab = NULL; 7499 free_affine_expand_cache (&data->name_expansion_cache); 7500 if (data->base_object_map) 7501 delete data->base_object_map; 7502 delete data->iv_common_cand_tab; 7503 data->iv_common_cand_tab = NULL; 7504 data->iv_common_cands.release (); 7505 obstack_free (&data->iv_obstack, NULL); 7506 } 7507 7508 /* Returns true if the loop body BODY includes any function calls. */ 7509 7510 static bool 7511 loop_body_includes_call (basic_block *body, unsigned num_nodes) 7512 { 7513 gimple_stmt_iterator gsi; 7514 unsigned i; 7515 7516 for (i = 0; i < num_nodes; i++) 7517 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 7518 { 7519 gimple *stmt = gsi_stmt (gsi); 7520 if (is_gimple_call (stmt) 7521 && !gimple_call_internal_p (stmt) 7522 && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) 7523 return true; 7524 } 7525 return false; 7526 } 7527 7528 /* Optimizes the LOOP. Returns true if anything changed. */ 7529 7530 static bool 7531 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) 7532 { 7533 bool changed = false; 7534 struct iv_ca *iv_ca; 7535 edge exit = single_dom_exit (loop); 7536 basic_block *body; 7537 7538 gcc_assert (!data->niters); 7539 data->current_loop = loop; 7540 data->loop_loc = find_loop_location (loop); 7541 data->speed = optimize_loop_for_speed_p (loop); 7542 7543 if (dump_file && (dump_flags & TDF_DETAILS)) 7544 { 7545 fprintf (dump_file, "Processing loop %d", loop->num); 7546 if (data->loop_loc != UNKNOWN_LOCATION) 7547 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 7548 LOCATION_LINE (data->loop_loc)); 7549 fprintf (dump_file, "\n"); 7550 7551 if (exit) 7552 { 7553 fprintf (dump_file, " single exit %d -> %d, exit condition ", 7554 exit->src->index, exit->dest->index); 7555 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM); 7556 fprintf (dump_file, "\n"); 7557 } 7558 7559 fprintf (dump_file, "\n"); 7560 } 7561 7562 body = get_loop_body (loop); 7563 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); 7564 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); 7565 free (body); 7566 7567 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit); 7568 7569 /* For each ssa name determines whether it behaves as an induction variable 7570 in some loop. */ 7571 if (!find_induction_variables (data)) 7572 goto finish; 7573 7574 /* Finds interesting uses (item 1). */ 7575 find_interesting_uses (data); 7576 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS) 7577 goto finish; 7578 7579 /* Finds candidates for the induction variables (item 2). */ 7580 find_iv_candidates (data); 7581 7582 /* Calculates the costs (item 3, part 1). */ 7583 determine_iv_costs (data); 7584 determine_group_iv_costs (data); 7585 determine_set_costs (data); 7586 7587 /* Find the optimal set of induction variables (item 3, part 2). */ 7588 iv_ca = find_optimal_iv_set (data); 7589 if (!iv_ca) 7590 goto finish; 7591 changed = true; 7592 7593 /* Create the new induction variables (item 4, part 1). */ 7594 create_new_ivs (data, iv_ca); 7595 iv_ca_free (&iv_ca); 7596 7597 /* Rewrite the uses (item 4, part 2). */ 7598 rewrite_groups (data); 7599 7600 /* Remove the ivs that are unused after rewriting. */ 7601 remove_unused_ivs (data); 7602 7603 /* We have changed the structure of induction variables; it might happen 7604 that definitions in the scev database refer to some of them that were 7605 eliminated. */ 7606 scev_reset (); 7607 7608 finish: 7609 free_loop_data (data); 7610 7611 return changed; 7612 } 7613 7614 /* Main entry point. Optimizes induction variables in loops. */ 7615 7616 void 7617 tree_ssa_iv_optimize (void) 7618 { 7619 struct loop *loop; 7620 struct ivopts_data data; 7621 7622 tree_ssa_iv_optimize_init (&data); 7623 7624 /* Optimize the loops starting with the innermost ones. */ 7625 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 7626 { 7627 if (dump_file && (dump_flags & TDF_DETAILS)) 7628 flow_loop_dump (loop, dump_file, NULL, 1); 7629 7630 tree_ssa_iv_optimize_loop (&data, loop); 7631 } 7632 7633 tree_ssa_iv_optimize_finalize (&data); 7634 } 7635 7636 #include "gt-tree-ssa-loop-ivopts.h" 7637