1 /* Induction variable optimizations. 2 Copyright (C) 2003-2019 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* This pass tries to find the optimal set of induction variables for the loop. 21 It optimizes just the basic linear induction variables (although adding 22 support for other types should not be too hard). It includes the 23 optimizations commonly known as strength reduction, induction variable 24 coalescing and induction variable elimination. It does it in the 25 following steps: 26 27 1) The interesting uses of induction variables are found. This includes 28 29 -- uses of induction variables in non-linear expressions 30 -- addresses of arrays 31 -- comparisons of induction variables 32 33 Note the interesting uses are categorized and handled in group. 34 Generally, address type uses are grouped together if their iv bases 35 are different in constant offset. 36 37 2) Candidates for the induction variables are found. This includes 38 39 -- old induction variables 40 -- the variables defined by expressions derived from the "interesting 41 groups/uses" above 42 43 3) The optimal (w.r. to a cost function) set of variables is chosen. The 44 cost function assigns a cost to sets of induction variables and consists 45 of three parts: 46 47 -- The group/use costs. Each of the interesting groups/uses chooses 48 the best induction variable in the set and adds its cost to the sum. 49 The cost reflects the time spent on modifying the induction variables 50 value to be usable for the given purpose (adding base and offset for 51 arrays, etc.). 52 -- The variable costs. Each of the variables has a cost assigned that 53 reflects the costs associated with incrementing the value of the 54 variable. The original variables are somewhat preferred. 55 -- The set cost. Depending on the size of the set, extra cost may be 56 added to reflect register pressure. 57 58 All the costs are defined in a machine-specific way, using the target 59 hooks and machine descriptions to determine them. 60 61 4) The trees are transformed to use the new variables, the dead code is 62 removed. 63 64 All of this is done loop by loop. Doing it globally is theoretically 65 possible, it might give a better performance and it might enable us 66 to decide costs more precisely, but getting all the interactions right 67 would be complicated. */ 68 69 #include "config.h" 70 #include "system.h" 71 #include "coretypes.h" 72 #include "backend.h" 73 #include "rtl.h" 74 #include "tree.h" 75 #include "gimple.h" 76 #include "cfghooks.h" 77 #include "tree-pass.h" 78 #include "memmodel.h" 79 #include "tm_p.h" 80 #include "ssa.h" 81 #include "expmed.h" 82 #include "insn-config.h" 83 #include "emit-rtl.h" 84 #include "recog.h" 85 #include "cgraph.h" 86 #include "gimple-pretty-print.h" 87 #include "alias.h" 88 #include "fold-const.h" 89 #include "stor-layout.h" 90 #include "tree-eh.h" 91 #include "gimplify.h" 92 #include "gimple-iterator.h" 93 #include "gimplify-me.h" 94 #include "tree-cfg.h" 95 #include "tree-ssa-loop-ivopts.h" 96 #include "tree-ssa-loop-manip.h" 97 #include "tree-ssa-loop-niter.h" 98 #include "tree-ssa-loop.h" 99 #include "explow.h" 100 #include "expr.h" 101 #include "tree-dfa.h" 102 #include "tree-ssa.h" 103 #include "cfgloop.h" 104 #include "tree-scalar-evolution.h" 105 #include "params.h" 106 #include "tree-affine.h" 107 #include "tree-ssa-propagate.h" 108 #include "tree-ssa-address.h" 109 #include "builtins.h" 110 #include "tree-vectorizer.h" 111 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the 113 cost of different addressing modes. This should be moved to a TBD 114 interface between the GIMPLE and RTL worlds. */ 115 116 /* The infinite cost. */ 117 #define INFTY 10000000 118 119 /* Returns the expected number of loop iterations for LOOP. 120 The average trip count is computed from profile data if it 121 exists. */ 122 123 static inline HOST_WIDE_INT 124 avg_loop_niter (struct loop *loop) 125 { 126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); 127 if (niter == -1) 128 { 129 niter = likely_max_stmt_executions_int (loop); 130 131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER)) 132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER); 133 } 134 135 return niter; 136 } 137 138 struct iv_use; 139 140 /* Representation of the induction variable. */ 141 struct iv 142 { 143 tree base; /* Initial value of the iv. */ 144 tree base_object; /* A memory object to that the induction variable points. */ 145 tree step; /* Step of the iv (constant only). */ 146 tree ssa_name; /* The ssa name with the value. */ 147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */ 148 bool biv_p; /* Is it a biv? */ 149 bool no_overflow; /* True if the iv doesn't overflow. */ 150 bool have_address_use;/* For biv, indicate if it's used in any address 151 type use. */ 152 }; 153 154 /* Per-ssa version information (induction variable descriptions, etc.). */ 155 struct version_info 156 { 157 tree name; /* The ssa name. */ 158 struct iv *iv; /* Induction variable description. */ 159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 160 an expression that is not an induction variable. */ 161 bool preserve_biv; /* For the original biv, whether to preserve it. */ 162 unsigned inv_id; /* Id of an invariant. */ 163 }; 164 165 /* Types of uses. */ 166 enum use_type 167 { 168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 169 USE_REF_ADDRESS, /* Use is an address for an explicit memory 170 reference. */ 171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in 172 cases where the expansion of the function 173 will turn the argument into a normal address. */ 174 USE_COMPARE /* Use is a compare. */ 175 }; 176 177 /* Cost of a computation. */ 178 struct comp_cost 179 { 180 comp_cost (): cost (0), complexity (0), scratch (0) 181 {} 182 183 comp_cost (int cost, unsigned complexity, int scratch = 0) 184 : cost (cost), complexity (complexity), scratch (scratch) 185 {} 186 187 /* Returns true if COST is infinite. */ 188 bool infinite_cost_p (); 189 190 /* Adds costs COST1 and COST2. */ 191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2); 192 193 /* Adds COST to the comp_cost. */ 194 comp_cost operator+= (comp_cost cost); 195 196 /* Adds constant C to this comp_cost. */ 197 comp_cost operator+= (HOST_WIDE_INT c); 198 199 /* Subtracts constant C to this comp_cost. */ 200 comp_cost operator-= (HOST_WIDE_INT c); 201 202 /* Divide the comp_cost by constant C. */ 203 comp_cost operator/= (HOST_WIDE_INT c); 204 205 /* Multiply the comp_cost by constant C. */ 206 comp_cost operator*= (HOST_WIDE_INT c); 207 208 /* Subtracts costs COST1 and COST2. */ 209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2); 210 211 /* Subtracts COST from this comp_cost. */ 212 comp_cost operator-= (comp_cost cost); 213 214 /* Returns true if COST1 is smaller than COST2. */ 215 friend bool operator< (comp_cost cost1, comp_cost cost2); 216 217 /* Returns true if COST1 and COST2 are equal. */ 218 friend bool operator== (comp_cost cost1, comp_cost cost2); 219 220 /* Returns true if COST1 is smaller or equal than COST2. */ 221 friend bool operator<= (comp_cost cost1, comp_cost cost2); 222 223 int cost; /* The runtime cost. */ 224 unsigned complexity; /* The estimate of the complexity of the code for 225 the computation (in no concrete units -- 226 complexity field should be larger for more 227 complex expressions and addressing modes). */ 228 int scratch; /* Scratch used during cost computation. */ 229 }; 230 231 static const comp_cost no_cost; 232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY); 233 234 bool 235 comp_cost::infinite_cost_p () 236 { 237 return cost == INFTY; 238 } 239 240 comp_cost 241 operator+ (comp_cost cost1, comp_cost cost2) 242 { 243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ()) 244 return infinite_cost; 245 246 cost1.cost += cost2.cost; 247 cost1.complexity += cost2.complexity; 248 249 return cost1; 250 } 251 252 comp_cost 253 operator- (comp_cost cost1, comp_cost cost2) 254 { 255 if (cost1.infinite_cost_p ()) 256 return infinite_cost; 257 258 gcc_assert (!cost2.infinite_cost_p ()); 259 260 cost1.cost -= cost2.cost; 261 cost1.complexity -= cost2.complexity; 262 263 return cost1; 264 } 265 266 comp_cost 267 comp_cost::operator+= (comp_cost cost) 268 { 269 *this = *this + cost; 270 return *this; 271 } 272 273 comp_cost 274 comp_cost::operator+= (HOST_WIDE_INT c) 275 { 276 if (infinite_cost_p ()) 277 return *this; 278 279 this->cost += c; 280 281 return *this; 282 } 283 284 comp_cost 285 comp_cost::operator-= (HOST_WIDE_INT c) 286 { 287 if (infinite_cost_p ()) 288 return *this; 289 290 this->cost -= c; 291 292 return *this; 293 } 294 295 comp_cost 296 comp_cost::operator/= (HOST_WIDE_INT c) 297 { 298 if (infinite_cost_p ()) 299 return *this; 300 301 this->cost /= c; 302 303 return *this; 304 } 305 306 comp_cost 307 comp_cost::operator*= (HOST_WIDE_INT c) 308 { 309 if (infinite_cost_p ()) 310 return *this; 311 312 this->cost *= c; 313 314 return *this; 315 } 316 317 comp_cost 318 comp_cost::operator-= (comp_cost cost) 319 { 320 *this = *this - cost; 321 return *this; 322 } 323 324 bool 325 operator< (comp_cost cost1, comp_cost cost2) 326 { 327 if (cost1.cost == cost2.cost) 328 return cost1.complexity < cost2.complexity; 329 330 return cost1.cost < cost2.cost; 331 } 332 333 bool 334 operator== (comp_cost cost1, comp_cost cost2) 335 { 336 return cost1.cost == cost2.cost 337 && cost1.complexity == cost2.complexity; 338 } 339 340 bool 341 operator<= (comp_cost cost1, comp_cost cost2) 342 { 343 return cost1 < cost2 || cost1 == cost2; 344 } 345 346 struct iv_inv_expr_ent; 347 348 /* The candidate - cost pair. */ 349 struct cost_pair 350 { 351 struct iv_cand *cand; /* The candidate. */ 352 comp_cost cost; /* The cost. */ 353 enum tree_code comp; /* For iv elimination, the comparison. */ 354 bitmap inv_vars; /* The list of invariant ssa_vars that have to be 355 preserved when representing iv_use with iv_cand. */ 356 bitmap inv_exprs; /* The list of newly created invariant expressions 357 when representing iv_use with iv_cand. */ 358 tree value; /* For final value elimination, the expression for 359 the final value of the iv. For iv elimination, 360 the new bound to compare with. */ 361 }; 362 363 /* Use. */ 364 struct iv_use 365 { 366 unsigned id; /* The id of the use. */ 367 unsigned group_id; /* The group id the use belongs to. */ 368 enum use_type type; /* Type of the use. */ 369 tree mem_type; /* The memory type to use when testing whether an 370 address is legitimate, and what the address's 371 cost is. */ 372 struct iv *iv; /* The induction variable it is based on. */ 373 gimple *stmt; /* Statement in that it occurs. */ 374 tree *op_p; /* The place where it occurs. */ 375 376 tree addr_base; /* Base address with const offset stripped. */ 377 poly_uint64_pod addr_offset; 378 /* Const offset stripped from base address. */ 379 }; 380 381 /* Group of uses. */ 382 struct iv_group 383 { 384 /* The id of the group. */ 385 unsigned id; 386 /* Uses of the group are of the same type. */ 387 enum use_type type; 388 /* The set of "related" IV candidates, plus the important ones. */ 389 bitmap related_cands; 390 /* Number of IV candidates in the cost_map. */ 391 unsigned n_map_members; 392 /* The costs wrto the iv candidates. */ 393 struct cost_pair *cost_map; 394 /* The selected candidate for the group. */ 395 struct iv_cand *selected; 396 /* Uses in the group. */ 397 vec<struct iv_use *> vuses; 398 }; 399 400 /* The position where the iv is computed. */ 401 enum iv_position 402 { 403 IP_NORMAL, /* At the end, just before the exit condition. */ 404 IP_END, /* At the end of the latch block. */ 405 IP_BEFORE_USE, /* Immediately before a specific use. */ 406 IP_AFTER_USE, /* Immediately after a specific use. */ 407 IP_ORIGINAL /* The original biv. */ 408 }; 409 410 /* The induction variable candidate. */ 411 struct iv_cand 412 { 413 unsigned id; /* The number of the candidate. */ 414 bool important; /* Whether this is an "important" candidate, i.e. such 415 that it should be considered by all uses. */ 416 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */ 417 gimple *incremented_at;/* For original biv, the statement where it is 418 incremented. */ 419 tree var_before; /* The variable used for it before increment. */ 420 tree var_after; /* The variable used for it after increment. */ 421 struct iv *iv; /* The value of the candidate. NULL for 422 "pseudocandidate" used to indicate the possibility 423 to replace the final value of an iv by direct 424 computation of the value. */ 425 unsigned cost; /* Cost of the candidate. */ 426 unsigned cost_step; /* Cost of the candidate's increment operation. */ 427 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place 428 where it is incremented. */ 429 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the 430 iv_cand. */ 431 bitmap inv_exprs; /* If step is more complicated than a single ssa_var, 432 hanlde it as a new invariant expression which will 433 be hoisted out of loop. */ 434 struct iv *orig_iv; /* The original iv if this cand is added from biv with 435 smaller type. */ 436 }; 437 438 /* Hashtable entry for common candidate derived from iv uses. */ 439 struct iv_common_cand 440 { 441 tree base; 442 tree step; 443 /* IV uses from which this common candidate is derived. */ 444 auto_vec<struct iv_use *> uses; 445 hashval_t hash; 446 }; 447 448 /* Hashtable helpers. */ 449 450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand> 451 { 452 static inline hashval_t hash (const iv_common_cand *); 453 static inline bool equal (const iv_common_cand *, const iv_common_cand *); 454 }; 455 456 /* Hash function for possible common candidates. */ 457 458 inline hashval_t 459 iv_common_cand_hasher::hash (const iv_common_cand *ccand) 460 { 461 return ccand->hash; 462 } 463 464 /* Hash table equality function for common candidates. */ 465 466 inline bool 467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1, 468 const iv_common_cand *ccand2) 469 { 470 return (ccand1->hash == ccand2->hash 471 && operand_equal_p (ccand1->base, ccand2->base, 0) 472 && operand_equal_p (ccand1->step, ccand2->step, 0) 473 && (TYPE_PRECISION (TREE_TYPE (ccand1->base)) 474 == TYPE_PRECISION (TREE_TYPE (ccand2->base)))); 475 } 476 477 /* Loop invariant expression hashtable entry. */ 478 479 struct iv_inv_expr_ent 480 { 481 /* Tree expression of the entry. */ 482 tree expr; 483 /* Unique indentifier. */ 484 int id; 485 /* Hash value. */ 486 hashval_t hash; 487 }; 488 489 /* Sort iv_inv_expr_ent pair A and B by id field. */ 490 491 static int 492 sort_iv_inv_expr_ent (const void *a, const void *b) 493 { 494 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a); 495 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b); 496 497 unsigned id1 = (*e1)->id; 498 unsigned id2 = (*e2)->id; 499 500 if (id1 < id2) 501 return -1; 502 else if (id1 > id2) 503 return 1; 504 else 505 return 0; 506 } 507 508 /* Hashtable helpers. */ 509 510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent> 511 { 512 static inline hashval_t hash (const iv_inv_expr_ent *); 513 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *); 514 }; 515 516 /* Return true if uses of type TYPE represent some form of address. */ 517 518 inline bool 519 address_p (use_type type) 520 { 521 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS; 522 } 523 524 /* Hash function for loop invariant expressions. */ 525 526 inline hashval_t 527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr) 528 { 529 return expr->hash; 530 } 531 532 /* Hash table equality function for expressions. */ 533 534 inline bool 535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1, 536 const iv_inv_expr_ent *expr2) 537 { 538 return expr1->hash == expr2->hash 539 && operand_equal_p (expr1->expr, expr2->expr, 0); 540 } 541 542 struct ivopts_data 543 { 544 /* The currently optimized loop. */ 545 struct loop *current_loop; 546 location_t loop_loc; 547 548 /* Numbers of iterations for all exits of the current loop. */ 549 hash_map<edge, tree_niter_desc *> *niters; 550 551 /* Number of registers used in it. */ 552 unsigned regs_used; 553 554 /* The size of version_info array allocated. */ 555 unsigned version_info_size; 556 557 /* The array of information for the ssa names. */ 558 struct version_info *version_info; 559 560 /* The hashtable of loop invariant expressions created 561 by ivopt. */ 562 hash_table<iv_inv_expr_hasher> *inv_expr_tab; 563 564 /* The bitmap of indices in version_info whose value was changed. */ 565 bitmap relevant; 566 567 /* The uses of induction variables. */ 568 vec<iv_group *> vgroups; 569 570 /* The candidates. */ 571 vec<iv_cand *> vcands; 572 573 /* A bitmap of important candidates. */ 574 bitmap important_candidates; 575 576 /* Cache used by tree_to_aff_combination_expand. */ 577 hash_map<tree, name_expansion *> *name_expansion_cache; 578 579 /* The hashtable of common candidates derived from iv uses. */ 580 hash_table<iv_common_cand_hasher> *iv_common_cand_tab; 581 582 /* The common candidates. */ 583 vec<iv_common_cand *> iv_common_cands; 584 585 /* Hash map recording base object information of tree exp. */ 586 hash_map<tree, tree> *base_object_map; 587 588 /* The maximum invariant variable id. */ 589 unsigned max_inv_var_id; 590 591 /* The maximum invariant expression id. */ 592 unsigned max_inv_expr_id; 593 594 /* Number of no_overflow BIVs which are not used in memory address. */ 595 unsigned bivs_not_used_in_addr; 596 597 /* Obstack for iv structure. */ 598 struct obstack iv_obstack; 599 600 /* Whether to consider just related and important candidates when replacing a 601 use. */ 602 bool consider_all_candidates; 603 604 /* Are we optimizing for speed? */ 605 bool speed; 606 607 /* Whether the loop body includes any function calls. */ 608 bool body_includes_call; 609 610 /* Whether the loop body can only be exited via single exit. */ 611 bool loop_single_exit_p; 612 }; 613 614 /* An assignment of iv candidates to uses. */ 615 616 struct iv_ca 617 { 618 /* The number of uses covered by the assignment. */ 619 unsigned upto; 620 621 /* Number of uses that cannot be expressed by the candidates in the set. */ 622 unsigned bad_groups; 623 624 /* Candidate assigned to a use, together with the related costs. */ 625 struct cost_pair **cand_for_group; 626 627 /* Number of times each candidate is used. */ 628 unsigned *n_cand_uses; 629 630 /* The candidates used. */ 631 bitmap cands; 632 633 /* The number of candidates in the set. */ 634 unsigned n_cands; 635 636 /* The number of invariants needed, including both invariant variants and 637 invariant expressions. */ 638 unsigned n_invs; 639 640 /* Total cost of expressing uses. */ 641 comp_cost cand_use_cost; 642 643 /* Total cost of candidates. */ 644 unsigned cand_cost; 645 646 /* Number of times each invariant variable is used. */ 647 unsigned *n_inv_var_uses; 648 649 /* Number of times each invariant expression is used. */ 650 unsigned *n_inv_expr_uses; 651 652 /* Total cost of the assignment. */ 653 comp_cost cost; 654 }; 655 656 /* Difference of two iv candidate assignments. */ 657 658 struct iv_ca_delta 659 { 660 /* Changed group. */ 661 struct iv_group *group; 662 663 /* An old assignment (for rollback purposes). */ 664 struct cost_pair *old_cp; 665 666 /* A new assignment. */ 667 struct cost_pair *new_cp; 668 669 /* Next change in the list. */ 670 struct iv_ca_delta *next; 671 }; 672 673 /* Bound on number of candidates below that all candidates are considered. */ 674 675 #define CONSIDER_ALL_CANDIDATES_BOUND \ 676 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) 677 678 /* If there are more iv occurrences, we just give up (it is quite unlikely that 679 optimizing such a loop would help, and it would take ages). */ 680 681 #define MAX_CONSIDERED_GROUPS \ 682 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) 683 684 /* If there are at most this number of ivs in the set, try removing unnecessary 685 ivs from the set always. */ 686 687 #define ALWAYS_PRUNE_CAND_SET_BOUND \ 688 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) 689 690 /* The list of trees for that the decl_rtl field must be reset is stored 691 here. */ 692 693 static vec<tree> decl_rtl_to_reset; 694 695 static comp_cost force_expr_to_var_cost (tree, bool); 696 697 /* The single loop exit if it dominates the latch, NULL otherwise. */ 698 699 edge 700 single_dom_exit (struct loop *loop) 701 { 702 edge exit = single_exit (loop); 703 704 if (!exit) 705 return NULL; 706 707 if (!just_once_each_iteration_p (loop, exit->src)) 708 return NULL; 709 710 return exit; 711 } 712 713 /* Dumps information about the induction variable IV to FILE. Don't dump 714 variable's name if DUMP_NAME is FALSE. The information is dumped with 715 preceding spaces indicated by INDENT_LEVEL. */ 716 717 void 718 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level) 719 { 720 const char *p; 721 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}; 722 723 if (indent_level > 4) 724 indent_level = 4; 725 p = spaces + 8 - (indent_level << 1); 726 727 fprintf (file, "%sIV struct:\n", p); 728 if (iv->ssa_name && dump_name) 729 { 730 fprintf (file, "%s SSA_NAME:\t", p); 731 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 732 fprintf (file, "\n"); 733 } 734 735 fprintf (file, "%s Type:\t", p); 736 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 737 fprintf (file, "\n"); 738 739 fprintf (file, "%s Base:\t", p); 740 print_generic_expr (file, iv->base, TDF_SLIM); 741 fprintf (file, "\n"); 742 743 fprintf (file, "%s Step:\t", p); 744 print_generic_expr (file, iv->step, TDF_SLIM); 745 fprintf (file, "\n"); 746 747 if (iv->base_object) 748 { 749 fprintf (file, "%s Object:\t", p); 750 print_generic_expr (file, iv->base_object, TDF_SLIM); 751 fprintf (file, "\n"); 752 } 753 754 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N'); 755 756 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n", 757 p, iv->no_overflow ? "No-overflow" : "Overflow"); 758 } 759 760 /* Dumps information about the USE to FILE. */ 761 762 void 763 dump_use (FILE *file, struct iv_use *use) 764 { 765 fprintf (file, " Use %d.%d:\n", use->group_id, use->id); 766 fprintf (file, " At stmt:\t"); 767 print_gimple_stmt (file, use->stmt, 0); 768 fprintf (file, " At pos:\t"); 769 if (use->op_p) 770 print_generic_expr (file, *use->op_p, TDF_SLIM); 771 fprintf (file, "\n"); 772 dump_iv (file, use->iv, false, 2); 773 } 774 775 /* Dumps information about the uses to FILE. */ 776 777 void 778 dump_groups (FILE *file, struct ivopts_data *data) 779 { 780 unsigned i, j; 781 struct iv_group *group; 782 783 for (i = 0; i < data->vgroups.length (); i++) 784 { 785 group = data->vgroups[i]; 786 fprintf (file, "Group %d:\n", group->id); 787 if (group->type == USE_NONLINEAR_EXPR) 788 fprintf (file, " Type:\tGENERIC\n"); 789 else if (group->type == USE_REF_ADDRESS) 790 fprintf (file, " Type:\tREFERENCE ADDRESS\n"); 791 else if (group->type == USE_PTR_ADDRESS) 792 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n"); 793 else 794 { 795 gcc_assert (group->type == USE_COMPARE); 796 fprintf (file, " Type:\tCOMPARE\n"); 797 } 798 for (j = 0; j < group->vuses.length (); j++) 799 dump_use (file, group->vuses[j]); 800 } 801 } 802 803 /* Dumps information about induction variable candidate CAND to FILE. */ 804 805 void 806 dump_cand (FILE *file, struct iv_cand *cand) 807 { 808 struct iv *iv = cand->iv; 809 810 fprintf (file, "Candidate %d:\n", cand->id); 811 if (cand->inv_vars) 812 { 813 fprintf (file, " Depend on inv.vars: "); 814 dump_bitmap (file, cand->inv_vars); 815 } 816 if (cand->inv_exprs) 817 { 818 fprintf (file, " Depend on inv.exprs: "); 819 dump_bitmap (file, cand->inv_exprs); 820 } 821 822 if (cand->var_before) 823 { 824 fprintf (file, " Var befor: "); 825 print_generic_expr (file, cand->var_before, TDF_SLIM); 826 fprintf (file, "\n"); 827 } 828 if (cand->var_after) 829 { 830 fprintf (file, " Var after: "); 831 print_generic_expr (file, cand->var_after, TDF_SLIM); 832 fprintf (file, "\n"); 833 } 834 835 switch (cand->pos) 836 { 837 case IP_NORMAL: 838 fprintf (file, " Incr POS: before exit test\n"); 839 break; 840 841 case IP_BEFORE_USE: 842 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id); 843 break; 844 845 case IP_AFTER_USE: 846 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id); 847 break; 848 849 case IP_END: 850 fprintf (file, " Incr POS: at end\n"); 851 break; 852 853 case IP_ORIGINAL: 854 fprintf (file, " Incr POS: orig biv\n"); 855 break; 856 } 857 858 dump_iv (file, iv, false, 1); 859 } 860 861 /* Returns the info for ssa version VER. */ 862 863 static inline struct version_info * 864 ver_info (struct ivopts_data *data, unsigned ver) 865 { 866 return data->version_info + ver; 867 } 868 869 /* Returns the info for ssa name NAME. */ 870 871 static inline struct version_info * 872 name_info (struct ivopts_data *data, tree name) 873 { 874 return ver_info (data, SSA_NAME_VERSION (name)); 875 } 876 877 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be 878 emitted in LOOP. */ 879 880 static bool 881 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt) 882 { 883 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); 884 885 gcc_assert (bb); 886 887 if (sbb == loop->latch) 888 return true; 889 890 if (sbb != bb) 891 return false; 892 893 return stmt == last_stmt (bb); 894 } 895 896 /* Returns true if STMT if after the place where the original induction 897 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true 898 if the positions are identical. */ 899 900 static bool 901 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal) 902 { 903 basic_block cand_bb = gimple_bb (cand->incremented_at); 904 basic_block stmt_bb = gimple_bb (stmt); 905 906 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 907 return false; 908 909 if (stmt_bb != cand_bb) 910 return true; 911 912 if (true_if_equal 913 && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) 914 return true; 915 return gimple_uid (stmt) > gimple_uid (cand->incremented_at); 916 } 917 918 /* Returns true if STMT if after the place where the induction variable 919 CAND is incremented in LOOP. */ 920 921 static bool 922 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt) 923 { 924 switch (cand->pos) 925 { 926 case IP_END: 927 return false; 928 929 case IP_NORMAL: 930 return stmt_after_ip_normal_pos (loop, stmt); 931 932 case IP_ORIGINAL: 933 case IP_AFTER_USE: 934 return stmt_after_inc_pos (cand, stmt, false); 935 936 case IP_BEFORE_USE: 937 return stmt_after_inc_pos (cand, stmt, true); 938 939 default: 940 gcc_unreachable (); 941 } 942 } 943 944 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ 945 946 static bool 947 abnormal_ssa_name_p (tree exp) 948 { 949 if (!exp) 950 return false; 951 952 if (TREE_CODE (exp) != SSA_NAME) 953 return false; 954 955 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; 956 } 957 958 /* Returns false if BASE or INDEX contains a ssa name that occurs in an 959 abnormal phi node. Callback for for_each_index. */ 960 961 static bool 962 idx_contains_abnormal_ssa_name_p (tree base, tree *index, 963 void *data ATTRIBUTE_UNUSED) 964 { 965 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 966 { 967 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) 968 return false; 969 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) 970 return false; 971 } 972 973 return !abnormal_ssa_name_p (*index); 974 } 975 976 /* Returns true if EXPR contains a ssa name that occurs in an 977 abnormal phi node. */ 978 979 bool 980 contains_abnormal_ssa_name_p (tree expr) 981 { 982 enum tree_code code; 983 enum tree_code_class codeclass; 984 985 if (!expr) 986 return false; 987 988 code = TREE_CODE (expr); 989 codeclass = TREE_CODE_CLASS (code); 990 991 if (code == CALL_EXPR) 992 { 993 tree arg; 994 call_expr_arg_iterator iter; 995 FOR_EACH_CALL_EXPR_ARG (arg, iter, expr) 996 if (contains_abnormal_ssa_name_p (arg)) 997 return true; 998 return false; 999 } 1000 1001 if (code == SSA_NAME) 1002 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; 1003 1004 if (code == INTEGER_CST 1005 || is_gimple_min_invariant (expr)) 1006 return false; 1007 1008 if (code == ADDR_EXPR) 1009 return !for_each_index (&TREE_OPERAND (expr, 0), 1010 idx_contains_abnormal_ssa_name_p, 1011 NULL); 1012 1013 if (code == COND_EXPR) 1014 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)) 1015 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)) 1016 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2)); 1017 1018 switch (codeclass) 1019 { 1020 case tcc_binary: 1021 case tcc_comparison: 1022 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) 1023 return true; 1024 1025 /* Fallthru. */ 1026 case tcc_unary: 1027 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) 1028 return true; 1029 1030 break; 1031 1032 default: 1033 gcc_unreachable (); 1034 } 1035 1036 return false; 1037 } 1038 1039 /* Returns the structure describing number of iterations determined from 1040 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 1041 1042 static struct tree_niter_desc * 1043 niter_for_exit (struct ivopts_data *data, edge exit) 1044 { 1045 struct tree_niter_desc *desc; 1046 tree_niter_desc **slot; 1047 1048 if (!data->niters) 1049 { 1050 data->niters = new hash_map<edge, tree_niter_desc *>; 1051 slot = NULL; 1052 } 1053 else 1054 slot = data->niters->get (exit); 1055 1056 if (!slot) 1057 { 1058 /* Try to determine number of iterations. We cannot safely work with ssa 1059 names that appear in phi nodes on abnormal edges, so that we do not 1060 create overlapping life ranges for them (PR 27283). */ 1061 desc = XNEW (struct tree_niter_desc); 1062 if (!number_of_iterations_exit (data->current_loop, 1063 exit, desc, true) 1064 || contains_abnormal_ssa_name_p (desc->niter)) 1065 { 1066 XDELETE (desc); 1067 desc = NULL; 1068 } 1069 data->niters->put (exit, desc); 1070 } 1071 else 1072 desc = *slot; 1073 1074 return desc; 1075 } 1076 1077 /* Returns the structure describing number of iterations determined from 1078 single dominating exit of DATA->current_loop, or NULL if something 1079 goes wrong. */ 1080 1081 static struct tree_niter_desc * 1082 niter_for_single_dom_exit (struct ivopts_data *data) 1083 { 1084 edge exit = single_dom_exit (data->current_loop); 1085 1086 if (!exit) 1087 return NULL; 1088 1089 return niter_for_exit (data, exit); 1090 } 1091 1092 /* Initializes data structures used by the iv optimization pass, stored 1093 in DATA. */ 1094 1095 static void 1096 tree_ssa_iv_optimize_init (struct ivopts_data *data) 1097 { 1098 data->version_info_size = 2 * num_ssa_names; 1099 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 1100 data->relevant = BITMAP_ALLOC (NULL); 1101 data->important_candidates = BITMAP_ALLOC (NULL); 1102 data->max_inv_var_id = 0; 1103 data->max_inv_expr_id = 0; 1104 data->niters = NULL; 1105 data->vgroups.create (20); 1106 data->vcands.create (20); 1107 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10); 1108 data->name_expansion_cache = NULL; 1109 data->base_object_map = NULL; 1110 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10); 1111 data->iv_common_cands.create (20); 1112 decl_rtl_to_reset.create (20); 1113 gcc_obstack_init (&data->iv_obstack); 1114 } 1115 1116 /* walk_tree callback for determine_base_object. */ 1117 1118 static tree 1119 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata) 1120 { 1121 tree_code code = TREE_CODE (*tp); 1122 tree obj = NULL_TREE; 1123 if (code == ADDR_EXPR) 1124 { 1125 tree base = get_base_address (TREE_OPERAND (*tp, 0)); 1126 if (!base) 1127 obj = *tp; 1128 else if (TREE_CODE (base) != MEM_REF) 1129 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base)); 1130 } 1131 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp))) 1132 obj = fold_convert (ptr_type_node, *tp); 1133 1134 if (!obj) 1135 { 1136 if (!EXPR_P (*tp)) 1137 *walk_subtrees = 0; 1138 1139 return NULL_TREE; 1140 } 1141 /* Record special node for multiple base objects and stop. */ 1142 if (*static_cast<tree *> (wdata)) 1143 { 1144 *static_cast<tree *> (wdata) = integer_zero_node; 1145 return integer_zero_node; 1146 } 1147 /* Record the base object and continue looking. */ 1148 *static_cast<tree *> (wdata) = obj; 1149 return NULL_TREE; 1150 } 1151 1152 /* Returns a memory object to that EXPR points with caching. Return NULL if we 1153 are able to determine that it does not point to any such object; specially 1154 return integer_zero_node if EXPR contains multiple base objects. */ 1155 1156 static tree 1157 determine_base_object (struct ivopts_data *data, tree expr) 1158 { 1159 tree *slot, obj = NULL_TREE; 1160 if (data->base_object_map) 1161 { 1162 if ((slot = data->base_object_map->get(expr)) != NULL) 1163 return *slot; 1164 } 1165 else 1166 data->base_object_map = new hash_map<tree, tree>; 1167 1168 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj); 1169 data->base_object_map->put (expr, obj); 1170 return obj; 1171 } 1172 1173 /* Return true if address expression with non-DECL_P operand appears 1174 in EXPR. */ 1175 1176 static bool 1177 contain_complex_addr_expr (tree expr) 1178 { 1179 bool res = false; 1180 1181 STRIP_NOPS (expr); 1182 switch (TREE_CODE (expr)) 1183 { 1184 case POINTER_PLUS_EXPR: 1185 case PLUS_EXPR: 1186 case MINUS_EXPR: 1187 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); 1188 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); 1189 break; 1190 1191 case ADDR_EXPR: 1192 return (!DECL_P (TREE_OPERAND (expr, 0))); 1193 1194 default: 1195 return false; 1196 } 1197 1198 return res; 1199 } 1200 1201 /* Allocates an induction variable with given initial value BASE and step STEP 1202 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */ 1203 1204 static struct iv * 1205 alloc_iv (struct ivopts_data *data, tree base, tree step, 1206 bool no_overflow = false) 1207 { 1208 tree expr = base; 1209 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack, 1210 sizeof (struct iv)); 1211 gcc_assert (step != NULL_TREE); 1212 1213 /* Lower address expression in base except ones with DECL_P as operand. 1214 By doing this: 1215 1) More accurate cost can be computed for address expressions; 1216 2) Duplicate candidates won't be created for bases in different 1217 forms, like &a[0] and &a. */ 1218 STRIP_NOPS (expr); 1219 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0))) 1220 || contain_complex_addr_expr (expr)) 1221 { 1222 aff_tree comb; 1223 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb); 1224 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)); 1225 } 1226 1227 iv->base = base; 1228 iv->base_object = determine_base_object (data, base); 1229 iv->step = step; 1230 iv->biv_p = false; 1231 iv->nonlin_use = NULL; 1232 iv->ssa_name = NULL_TREE; 1233 if (!no_overflow 1234 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base), 1235 base, step)) 1236 no_overflow = true; 1237 iv->no_overflow = no_overflow; 1238 iv->have_address_use = false; 1239 1240 return iv; 1241 } 1242 1243 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV 1244 doesn't overflow. */ 1245 1246 static void 1247 set_iv (struct ivopts_data *data, tree iv, tree base, tree step, 1248 bool no_overflow) 1249 { 1250 struct version_info *info = name_info (data, iv); 1251 1252 gcc_assert (!info->iv); 1253 1254 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 1255 info->iv = alloc_iv (data, base, step, no_overflow); 1256 info->iv->ssa_name = iv; 1257 } 1258 1259 /* Finds induction variable declaration for VAR. */ 1260 1261 static struct iv * 1262 get_iv (struct ivopts_data *data, tree var) 1263 { 1264 basic_block bb; 1265 tree type = TREE_TYPE (var); 1266 1267 if (!POINTER_TYPE_P (type) 1268 && !INTEGRAL_TYPE_P (type)) 1269 return NULL; 1270 1271 if (!name_info (data, var)->iv) 1272 { 1273 bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1274 1275 if (!bb 1276 || !flow_bb_inside_loop_p (data->current_loop, bb)) 1277 set_iv (data, var, var, build_int_cst (type, 0), true); 1278 } 1279 1280 return name_info (data, var)->iv; 1281 } 1282 1283 /* Return the first non-invariant ssa var found in EXPR. */ 1284 1285 static tree 1286 extract_single_var_from_expr (tree expr) 1287 { 1288 int i, n; 1289 tree tmp; 1290 enum tree_code code; 1291 1292 if (!expr || is_gimple_min_invariant (expr)) 1293 return NULL; 1294 1295 code = TREE_CODE (expr); 1296 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1297 { 1298 n = TREE_OPERAND_LENGTH (expr); 1299 for (i = 0; i < n; i++) 1300 { 1301 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i)); 1302 1303 if (tmp) 1304 return tmp; 1305 } 1306 } 1307 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL; 1308 } 1309 1310 /* Finds basic ivs. */ 1311 1312 static bool 1313 find_bivs (struct ivopts_data *data) 1314 { 1315 gphi *phi; 1316 affine_iv iv; 1317 tree step, type, base, stop; 1318 bool found = false; 1319 struct loop *loop = data->current_loop; 1320 gphi_iterator psi; 1321 1322 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1323 { 1324 phi = psi.phi (); 1325 1326 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 1327 continue; 1328 1329 if (virtual_operand_p (PHI_RESULT (phi))) 1330 continue; 1331 1332 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true)) 1333 continue; 1334 1335 if (integer_zerop (iv.step)) 1336 continue; 1337 1338 step = iv.step; 1339 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1340 /* Stop expanding iv base at the first ssa var referred by iv step. 1341 Ideally we should stop at any ssa var, because that's expensive 1342 and unusual to happen, we just do it on the first one. 1343 1344 See PR64705 for the rationale. */ 1345 stop = extract_single_var_from_expr (step); 1346 base = expand_simple_operations (base, stop); 1347 if (contains_abnormal_ssa_name_p (base) 1348 || contains_abnormal_ssa_name_p (step)) 1349 continue; 1350 1351 type = TREE_TYPE (PHI_RESULT (phi)); 1352 base = fold_convert (type, base); 1353 if (step) 1354 { 1355 if (POINTER_TYPE_P (type)) 1356 step = convert_to_ptrofftype (step); 1357 else 1358 step = fold_convert (type, step); 1359 } 1360 1361 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow); 1362 found = true; 1363 } 1364 1365 return found; 1366 } 1367 1368 /* Marks basic ivs. */ 1369 1370 static void 1371 mark_bivs (struct ivopts_data *data) 1372 { 1373 gphi *phi; 1374 gimple *def; 1375 tree var; 1376 struct iv *iv, *incr_iv; 1377 struct loop *loop = data->current_loop; 1378 basic_block incr_bb; 1379 gphi_iterator psi; 1380 1381 data->bivs_not_used_in_addr = 0; 1382 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1383 { 1384 phi = psi.phi (); 1385 1386 iv = get_iv (data, PHI_RESULT (phi)); 1387 if (!iv) 1388 continue; 1389 1390 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1391 def = SSA_NAME_DEF_STMT (var); 1392 /* Don't mark iv peeled from other one as biv. */ 1393 if (def 1394 && gimple_code (def) == GIMPLE_PHI 1395 && gimple_bb (def) == loop->header) 1396 continue; 1397 1398 incr_iv = get_iv (data, var); 1399 if (!incr_iv) 1400 continue; 1401 1402 /* If the increment is in the subloop, ignore it. */ 1403 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1404 if (incr_bb->loop_father != data->current_loop 1405 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 1406 continue; 1407 1408 iv->biv_p = true; 1409 incr_iv->biv_p = true; 1410 if (iv->no_overflow) 1411 data->bivs_not_used_in_addr++; 1412 if (incr_iv->no_overflow) 1413 data->bivs_not_used_in_addr++; 1414 } 1415 } 1416 1417 /* Checks whether STMT defines a linear induction variable and stores its 1418 parameters to IV. */ 1419 1420 static bool 1421 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv) 1422 { 1423 tree lhs, stop; 1424 struct loop *loop = data->current_loop; 1425 1426 iv->base = NULL_TREE; 1427 iv->step = NULL_TREE; 1428 1429 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1430 return false; 1431 1432 lhs = gimple_assign_lhs (stmt); 1433 if (TREE_CODE (lhs) != SSA_NAME) 1434 return false; 1435 1436 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) 1437 return false; 1438 1439 /* Stop expanding iv base at the first ssa var referred by iv step. 1440 Ideally we should stop at any ssa var, because that's expensive 1441 and unusual to happen, we just do it on the first one. 1442 1443 See PR64705 for the rationale. */ 1444 stop = extract_single_var_from_expr (iv->step); 1445 iv->base = expand_simple_operations (iv->base, stop); 1446 if (contains_abnormal_ssa_name_p (iv->base) 1447 || contains_abnormal_ssa_name_p (iv->step)) 1448 return false; 1449 1450 /* If STMT could throw, then do not consider STMT as defining a GIV. 1451 While this will suppress optimizations, we cannot safely delete this 1452 GIV and associated statements, even if it appears it is not used. */ 1453 if (stmt_could_throw_p (cfun, stmt)) 1454 return false; 1455 1456 return true; 1457 } 1458 1459 /* Finds general ivs in statement STMT. */ 1460 1461 static void 1462 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt) 1463 { 1464 affine_iv iv; 1465 1466 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1467 return; 1468 1469 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow); 1470 } 1471 1472 /* Finds general ivs in basic block BB. */ 1473 1474 static void 1475 find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1476 { 1477 gimple_stmt_iterator bsi; 1478 1479 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1480 find_givs_in_stmt (data, gsi_stmt (bsi)); 1481 } 1482 1483 /* Finds general ivs. */ 1484 1485 static void 1486 find_givs (struct ivopts_data *data) 1487 { 1488 struct loop *loop = data->current_loop; 1489 basic_block *body = get_loop_body_in_dom_order (loop); 1490 unsigned i; 1491 1492 for (i = 0; i < loop->num_nodes; i++) 1493 find_givs_in_bb (data, body[i]); 1494 free (body); 1495 } 1496 1497 /* For each ssa name defined in LOOP determines whether it is an induction 1498 variable and if so, its initial value and step. */ 1499 1500 static bool 1501 find_induction_variables (struct ivopts_data *data) 1502 { 1503 unsigned i; 1504 bitmap_iterator bi; 1505 1506 if (!find_bivs (data)) 1507 return false; 1508 1509 find_givs (data); 1510 mark_bivs (data); 1511 1512 if (dump_file && (dump_flags & TDF_DETAILS)) 1513 { 1514 struct tree_niter_desc *niter = niter_for_single_dom_exit (data); 1515 1516 if (niter) 1517 { 1518 fprintf (dump_file, " number of iterations "); 1519 print_generic_expr (dump_file, niter->niter, TDF_SLIM); 1520 if (!integer_zerop (niter->may_be_zero)) 1521 { 1522 fprintf (dump_file, "; zero if "); 1523 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); 1524 } 1525 fprintf (dump_file, "\n"); 1526 }; 1527 1528 fprintf (dump_file, "\n<Induction Vars>:\n"); 1529 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1530 { 1531 struct version_info *info = ver_info (data, i); 1532 if (info->iv && info->iv->step && !integer_zerop (info->iv->step)) 1533 dump_iv (dump_file, ver_info (data, i)->iv, true, 0); 1534 } 1535 } 1536 1537 return true; 1538 } 1539 1540 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP. 1541 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET 1542 is the const offset stripped from IV base and MEM_TYPE is the type 1543 of the memory being addressed. For uses of other types, ADDR_BASE 1544 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */ 1545 1546 static struct iv_use * 1547 record_use (struct iv_group *group, tree *use_p, struct iv *iv, 1548 gimple *stmt, enum use_type type, tree mem_type, 1549 tree addr_base, poly_uint64 addr_offset) 1550 { 1551 struct iv_use *use = XCNEW (struct iv_use); 1552 1553 use->id = group->vuses.length (); 1554 use->group_id = group->id; 1555 use->type = type; 1556 use->mem_type = mem_type; 1557 use->iv = iv; 1558 use->stmt = stmt; 1559 use->op_p = use_p; 1560 use->addr_base = addr_base; 1561 use->addr_offset = addr_offset; 1562 1563 group->vuses.safe_push (use); 1564 return use; 1565 } 1566 1567 /* Checks whether OP is a loop-level invariant and if so, records it. 1568 NONLINEAR_USE is true if the invariant is used in a way we do not 1569 handle specially. */ 1570 1571 static void 1572 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1573 { 1574 basic_block bb; 1575 struct version_info *info; 1576 1577 if (TREE_CODE (op) != SSA_NAME 1578 || virtual_operand_p (op)) 1579 return; 1580 1581 bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 1582 if (bb 1583 && flow_bb_inside_loop_p (data->current_loop, bb)) 1584 return; 1585 1586 info = name_info (data, op); 1587 info->name = op; 1588 info->has_nonlin_use |= nonlinear_use; 1589 if (!info->inv_id) 1590 info->inv_id = ++data->max_inv_var_id; 1591 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1592 } 1593 1594 /* Record a group of TYPE. */ 1595 1596 static struct iv_group * 1597 record_group (struct ivopts_data *data, enum use_type type) 1598 { 1599 struct iv_group *group = XCNEW (struct iv_group); 1600 1601 group->id = data->vgroups.length (); 1602 group->type = type; 1603 group->related_cands = BITMAP_ALLOC (NULL); 1604 group->vuses.create (1); 1605 1606 data->vgroups.safe_push (group); 1607 return group; 1608 } 1609 1610 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group. 1611 New group will be created if there is no existing group for the use. 1612 MEM_TYPE is the type of memory being addressed, or NULL if this 1613 isn't an address reference. */ 1614 1615 static struct iv_use * 1616 record_group_use (struct ivopts_data *data, tree *use_p, 1617 struct iv *iv, gimple *stmt, enum use_type type, 1618 tree mem_type) 1619 { 1620 tree addr_base = NULL; 1621 struct iv_group *group = NULL; 1622 poly_uint64 addr_offset = 0; 1623 1624 /* Record non address type use in a new group. */ 1625 if (address_p (type)) 1626 { 1627 unsigned int i; 1628 1629 addr_base = strip_offset (iv->base, &addr_offset); 1630 for (i = 0; i < data->vgroups.length (); i++) 1631 { 1632 struct iv_use *use; 1633 1634 group = data->vgroups[i]; 1635 use = group->vuses[0]; 1636 if (!address_p (use->type)) 1637 continue; 1638 1639 /* Check if it has the same stripped base and step. */ 1640 if (operand_equal_p (iv->base_object, use->iv->base_object, 0) 1641 && operand_equal_p (iv->step, use->iv->step, 0) 1642 && operand_equal_p (addr_base, use->addr_base, 0)) 1643 break; 1644 } 1645 if (i == data->vgroups.length ()) 1646 group = NULL; 1647 } 1648 1649 if (!group) 1650 group = record_group (data, type); 1651 1652 return record_use (group, use_p, iv, stmt, type, mem_type, 1653 addr_base, addr_offset); 1654 } 1655 1656 /* Checks whether the use OP is interesting and if so, records it. */ 1657 1658 static struct iv_use * 1659 find_interesting_uses_op (struct ivopts_data *data, tree op) 1660 { 1661 struct iv *iv; 1662 gimple *stmt; 1663 struct iv_use *use; 1664 1665 if (TREE_CODE (op) != SSA_NAME) 1666 return NULL; 1667 1668 iv = get_iv (data, op); 1669 if (!iv) 1670 return NULL; 1671 1672 if (iv->nonlin_use) 1673 { 1674 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR); 1675 return iv->nonlin_use; 1676 } 1677 1678 if (integer_zerop (iv->step)) 1679 { 1680 record_invariant (data, op, true); 1681 return NULL; 1682 } 1683 1684 stmt = SSA_NAME_DEF_STMT (op); 1685 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt)); 1686 1687 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE); 1688 iv->nonlin_use = use; 1689 return use; 1690 } 1691 1692 /* Indicate how compare type iv_use can be handled. */ 1693 enum comp_iv_rewrite 1694 { 1695 COMP_IV_NA, 1696 /* We may rewrite compare type iv_use by expressing value of the iv_use. */ 1697 COMP_IV_EXPR, 1698 /* We may rewrite compare type iv_uses on both sides of comparison by 1699 expressing value of each iv_use. */ 1700 COMP_IV_EXPR_2, 1701 /* We may rewrite compare type iv_use by expressing value of the iv_use 1702 or by eliminating it with other iv_cand. */ 1703 COMP_IV_ELIM 1704 }; 1705 1706 /* Given a condition in statement STMT, checks whether it is a compare 1707 of an induction variable and an invariant. If this is the case, 1708 CONTROL_VAR is set to location of the iv, BOUND to the location of 1709 the invariant, IV_VAR and IV_BOUND are set to the corresponding 1710 induction variable descriptions, and true is returned. If this is not 1711 the case, CONTROL_VAR and BOUND are set to the arguments of the 1712 condition and false is returned. */ 1713 1714 static enum comp_iv_rewrite 1715 extract_cond_operands (struct ivopts_data *data, gimple *stmt, 1716 tree **control_var, tree **bound, 1717 struct iv **iv_var, struct iv **iv_bound) 1718 { 1719 /* The objects returned when COND has constant operands. */ 1720 static struct iv const_iv; 1721 static tree zero; 1722 tree *op0 = &zero, *op1 = &zero; 1723 struct iv *iv0 = &const_iv, *iv1 = &const_iv; 1724 enum comp_iv_rewrite rewrite_type = COMP_IV_NA; 1725 1726 if (gimple_code (stmt) == GIMPLE_COND) 1727 { 1728 gcond *cond_stmt = as_a <gcond *> (stmt); 1729 op0 = gimple_cond_lhs_ptr (cond_stmt); 1730 op1 = gimple_cond_rhs_ptr (cond_stmt); 1731 } 1732 else 1733 { 1734 op0 = gimple_assign_rhs1_ptr (stmt); 1735 op1 = gimple_assign_rhs2_ptr (stmt); 1736 } 1737 1738 zero = integer_zero_node; 1739 const_iv.step = integer_zero_node; 1740 1741 if (TREE_CODE (*op0) == SSA_NAME) 1742 iv0 = get_iv (data, *op0); 1743 if (TREE_CODE (*op1) == SSA_NAME) 1744 iv1 = get_iv (data, *op1); 1745 1746 /* If both sides of comparison are IVs. We can express ivs on both end. */ 1747 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step)) 1748 { 1749 rewrite_type = COMP_IV_EXPR_2; 1750 goto end; 1751 } 1752 1753 /* If none side of comparison is IV. */ 1754 if ((!iv0 || integer_zerop (iv0->step)) 1755 && (!iv1 || integer_zerop (iv1->step))) 1756 goto end; 1757 1758 /* Control variable may be on the other side. */ 1759 if (!iv0 || integer_zerop (iv0->step)) 1760 { 1761 std::swap (op0, op1); 1762 std::swap (iv0, iv1); 1763 } 1764 /* If one side is IV and the other side isn't loop invariant. */ 1765 if (!iv1) 1766 rewrite_type = COMP_IV_EXPR; 1767 /* If one side is IV and the other side is loop invariant. */ 1768 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step)) 1769 rewrite_type = COMP_IV_ELIM; 1770 1771 end: 1772 if (control_var) 1773 *control_var = op0; 1774 if (iv_var) 1775 *iv_var = iv0; 1776 if (bound) 1777 *bound = op1; 1778 if (iv_bound) 1779 *iv_bound = iv1; 1780 1781 return rewrite_type; 1782 } 1783 1784 /* Checks whether the condition in STMT is interesting and if so, 1785 records it. */ 1786 1787 static void 1788 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt) 1789 { 1790 tree *var_p, *bound_p; 1791 struct iv *var_iv, *bound_iv; 1792 enum comp_iv_rewrite ret; 1793 1794 ret = extract_cond_operands (data, stmt, 1795 &var_p, &bound_p, &var_iv, &bound_iv); 1796 if (ret == COMP_IV_NA) 1797 { 1798 find_interesting_uses_op (data, *var_p); 1799 find_interesting_uses_op (data, *bound_p); 1800 return; 1801 } 1802 1803 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE); 1804 /* Record compare type iv_use for iv on the other side of comparison. */ 1805 if (ret == COMP_IV_EXPR_2) 1806 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE); 1807 } 1808 1809 /* Returns the outermost loop EXPR is obviously invariant in 1810 relative to the loop LOOP, i.e. if all its operands are defined 1811 outside of the returned loop. Returns NULL if EXPR is not 1812 even obviously invariant in LOOP. */ 1813 1814 struct loop * 1815 outermost_invariant_loop_for_expr (struct loop *loop, tree expr) 1816 { 1817 basic_block def_bb; 1818 unsigned i, len; 1819 1820 if (is_gimple_min_invariant (expr)) 1821 return current_loops->tree_root; 1822 1823 if (TREE_CODE (expr) == SSA_NAME) 1824 { 1825 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1826 if (def_bb) 1827 { 1828 if (flow_bb_inside_loop_p (loop, def_bb)) 1829 return NULL; 1830 return superloop_at_depth (loop, 1831 loop_depth (def_bb->loop_father) + 1); 1832 } 1833 1834 return current_loops->tree_root; 1835 } 1836 1837 if (!EXPR_P (expr)) 1838 return NULL; 1839 1840 unsigned maxdepth = 0; 1841 len = TREE_OPERAND_LENGTH (expr); 1842 for (i = 0; i < len; i++) 1843 { 1844 struct loop *ivloop; 1845 if (!TREE_OPERAND (expr, i)) 1846 continue; 1847 1848 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i)); 1849 if (!ivloop) 1850 return NULL; 1851 maxdepth = MAX (maxdepth, loop_depth (ivloop)); 1852 } 1853 1854 return superloop_at_depth (loop, maxdepth); 1855 } 1856 1857 /* Returns true if expression EXPR is obviously invariant in LOOP, 1858 i.e. if all its operands are defined outside of the LOOP. LOOP 1859 should not be the function body. */ 1860 1861 bool 1862 expr_invariant_in_loop_p (struct loop *loop, tree expr) 1863 { 1864 basic_block def_bb; 1865 unsigned i, len; 1866 1867 gcc_assert (loop_depth (loop) > 0); 1868 1869 if (is_gimple_min_invariant (expr)) 1870 return true; 1871 1872 if (TREE_CODE (expr) == SSA_NAME) 1873 { 1874 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1875 if (def_bb 1876 && flow_bb_inside_loop_p (loop, def_bb)) 1877 return false; 1878 1879 return true; 1880 } 1881 1882 if (!EXPR_P (expr)) 1883 return false; 1884 1885 len = TREE_OPERAND_LENGTH (expr); 1886 for (i = 0; i < len; i++) 1887 if (TREE_OPERAND (expr, i) 1888 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1889 return false; 1890 1891 return true; 1892 } 1893 1894 /* Given expression EXPR which computes inductive values with respect 1895 to loop recorded in DATA, this function returns biv from which EXPR 1896 is derived by tracing definition chains of ssa variables in EXPR. */ 1897 1898 static struct iv* 1899 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr) 1900 { 1901 struct iv *iv; 1902 unsigned i, n; 1903 tree e2, e1; 1904 enum tree_code code; 1905 gimple *stmt; 1906 1907 if (expr == NULL_TREE) 1908 return NULL; 1909 1910 if (is_gimple_min_invariant (expr)) 1911 return NULL; 1912 1913 code = TREE_CODE (expr); 1914 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1915 { 1916 n = TREE_OPERAND_LENGTH (expr); 1917 for (i = 0; i < n; i++) 1918 { 1919 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i)); 1920 if (iv) 1921 return iv; 1922 } 1923 } 1924 1925 /* Stop if it's not ssa name. */ 1926 if (code != SSA_NAME) 1927 return NULL; 1928 1929 iv = get_iv (data, expr); 1930 if (!iv || integer_zerop (iv->step)) 1931 return NULL; 1932 else if (iv->biv_p) 1933 return iv; 1934 1935 stmt = SSA_NAME_DEF_STMT (expr); 1936 if (gphi *phi = dyn_cast <gphi *> (stmt)) 1937 { 1938 ssa_op_iter iter; 1939 use_operand_p use_p; 1940 basic_block phi_bb = gimple_bb (phi); 1941 1942 /* Skip loop header PHI that doesn't define biv. */ 1943 if (phi_bb->loop_father == data->current_loop) 1944 return NULL; 1945 1946 if (virtual_operand_p (gimple_phi_result (phi))) 1947 return NULL; 1948 1949 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) 1950 { 1951 tree use = USE_FROM_PTR (use_p); 1952 iv = find_deriving_biv_for_expr (data, use); 1953 if (iv) 1954 return iv; 1955 } 1956 return NULL; 1957 } 1958 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1959 return NULL; 1960 1961 e1 = gimple_assign_rhs1 (stmt); 1962 code = gimple_assign_rhs_code (stmt); 1963 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) 1964 return find_deriving_biv_for_expr (data, e1); 1965 1966 switch (code) 1967 { 1968 case MULT_EXPR: 1969 case PLUS_EXPR: 1970 case MINUS_EXPR: 1971 case POINTER_PLUS_EXPR: 1972 /* Increments, decrements and multiplications by a constant 1973 are simple. */ 1974 e2 = gimple_assign_rhs2 (stmt); 1975 iv = find_deriving_biv_for_expr (data, e2); 1976 if (iv) 1977 return iv; 1978 gcc_fallthrough (); 1979 1980 CASE_CONVERT: 1981 /* Casts are simple. */ 1982 return find_deriving_biv_for_expr (data, e1); 1983 1984 default: 1985 break; 1986 } 1987 1988 return NULL; 1989 } 1990 1991 /* Record BIV, its predecessor and successor that they are used in 1992 address type uses. */ 1993 1994 static void 1995 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv) 1996 { 1997 unsigned i; 1998 tree type, base_1, base_2; 1999 bitmap_iterator bi; 2000 2001 if (!biv || !biv->biv_p || integer_zerop (biv->step) 2002 || biv->have_address_use || !biv->no_overflow) 2003 return; 2004 2005 type = TREE_TYPE (biv->base); 2006 if (!INTEGRAL_TYPE_P (type)) 2007 return; 2008 2009 biv->have_address_use = true; 2010 data->bivs_not_used_in_addr--; 2011 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step); 2012 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2013 { 2014 struct iv *iv = ver_info (data, i)->iv; 2015 2016 if (!iv || !iv->biv_p || integer_zerop (iv->step) 2017 || iv->have_address_use || !iv->no_overflow) 2018 continue; 2019 2020 if (type != TREE_TYPE (iv->base) 2021 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base))) 2022 continue; 2023 2024 if (!operand_equal_p (biv->step, iv->step, 0)) 2025 continue; 2026 2027 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step); 2028 if (operand_equal_p (base_1, iv->base, 0) 2029 || operand_equal_p (base_2, biv->base, 0)) 2030 { 2031 iv->have_address_use = true; 2032 data->bivs_not_used_in_addr--; 2033 } 2034 } 2035 } 2036 2037 /* Cumulates the steps of indices into DATA and replaces their values with the 2038 initial ones. Returns false when the value of the index cannot be determined. 2039 Callback for for_each_index. */ 2040 2041 struct ifs_ivopts_data 2042 { 2043 struct ivopts_data *ivopts_data; 2044 gimple *stmt; 2045 tree step; 2046 }; 2047 2048 static bool 2049 idx_find_step (tree base, tree *idx, void *data) 2050 { 2051 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; 2052 struct iv *iv; 2053 bool use_overflow_semantics = false; 2054 tree step, iv_base, iv_step, lbound, off; 2055 struct loop *loop = dta->ivopts_data->current_loop; 2056 2057 /* If base is a component ref, require that the offset of the reference 2058 be invariant. */ 2059 if (TREE_CODE (base) == COMPONENT_REF) 2060 { 2061 off = component_ref_field_offset (base); 2062 return expr_invariant_in_loop_p (loop, off); 2063 } 2064 2065 /* If base is array, first check whether we will be able to move the 2066 reference out of the loop (in order to take its address in strength 2067 reduction). In order for this to work we need both lower bound 2068 and step to be loop invariants. */ 2069 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2070 { 2071 /* Moreover, for a range, the size needs to be invariant as well. */ 2072 if (TREE_CODE (base) == ARRAY_RANGE_REF 2073 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) 2074 return false; 2075 2076 step = array_ref_element_size (base); 2077 lbound = array_ref_low_bound (base); 2078 2079 if (!expr_invariant_in_loop_p (loop, step) 2080 || !expr_invariant_in_loop_p (loop, lbound)) 2081 return false; 2082 } 2083 2084 if (TREE_CODE (*idx) != SSA_NAME) 2085 return true; 2086 2087 iv = get_iv (dta->ivopts_data, *idx); 2088 if (!iv) 2089 return false; 2090 2091 /* XXX We produce for a base of *D42 with iv->base being &x[0] 2092 *&x[0], which is not folded and does not trigger the 2093 ARRAY_REF path below. */ 2094 *idx = iv->base; 2095 2096 if (integer_zerop (iv->step)) 2097 return true; 2098 2099 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2100 { 2101 step = array_ref_element_size (base); 2102 2103 /* We only handle addresses whose step is an integer constant. */ 2104 if (TREE_CODE (step) != INTEGER_CST) 2105 return false; 2106 } 2107 else 2108 /* The step for pointer arithmetics already is 1 byte. */ 2109 step = size_one_node; 2110 2111 iv_base = iv->base; 2112 iv_step = iv->step; 2113 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step))) 2114 use_overflow_semantics = true; 2115 2116 if (!convert_affine_scev (dta->ivopts_data->current_loop, 2117 sizetype, &iv_base, &iv_step, dta->stmt, 2118 use_overflow_semantics)) 2119 { 2120 /* The index might wrap. */ 2121 return false; 2122 } 2123 2124 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 2125 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); 2126 2127 if (dta->ivopts_data->bivs_not_used_in_addr) 2128 { 2129 if (!iv->biv_p) 2130 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name); 2131 2132 record_biv_for_address_use (dta->ivopts_data, iv); 2133 } 2134 return true; 2135 } 2136 2137 /* Records use in index IDX. Callback for for_each_index. Ivopts data 2138 object is passed to it in DATA. */ 2139 2140 static bool 2141 idx_record_use (tree base, tree *idx, 2142 void *vdata) 2143 { 2144 struct ivopts_data *data = (struct ivopts_data *) vdata; 2145 find_interesting_uses_op (data, *idx); 2146 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2147 { 2148 find_interesting_uses_op (data, array_ref_element_size (base)); 2149 find_interesting_uses_op (data, array_ref_low_bound (base)); 2150 } 2151 return true; 2152 } 2153 2154 /* If we can prove that TOP = cst * BOT for some constant cst, 2155 store cst to MUL and return true. Otherwise return false. 2156 The returned value is always sign-extended, regardless of the 2157 signedness of TOP and BOT. */ 2158 2159 static bool 2160 constant_multiple_of (tree top, tree bot, widest_int *mul) 2161 { 2162 tree mby; 2163 enum tree_code code; 2164 unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); 2165 widest_int res, p0, p1; 2166 2167 STRIP_NOPS (top); 2168 STRIP_NOPS (bot); 2169 2170 if (operand_equal_p (top, bot, 0)) 2171 { 2172 *mul = 1; 2173 return true; 2174 } 2175 2176 code = TREE_CODE (top); 2177 switch (code) 2178 { 2179 case MULT_EXPR: 2180 mby = TREE_OPERAND (top, 1); 2181 if (TREE_CODE (mby) != INTEGER_CST) 2182 return false; 2183 2184 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) 2185 return false; 2186 2187 *mul = wi::sext (res * wi::to_widest (mby), precision); 2188 return true; 2189 2190 case PLUS_EXPR: 2191 case MINUS_EXPR: 2192 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) 2193 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) 2194 return false; 2195 2196 if (code == MINUS_EXPR) 2197 p1 = -p1; 2198 *mul = wi::sext (p0 + p1, precision); 2199 return true; 2200 2201 case INTEGER_CST: 2202 if (TREE_CODE (bot) != INTEGER_CST) 2203 return false; 2204 2205 p0 = widest_int::from (wi::to_wide (top), SIGNED); 2206 p1 = widest_int::from (wi::to_wide (bot), SIGNED); 2207 if (p1 == 0) 2208 return false; 2209 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision); 2210 return res == 0; 2211 2212 default: 2213 if (POLY_INT_CST_P (top) 2214 && POLY_INT_CST_P (bot) 2215 && constant_multiple_p (wi::to_poly_widest (top), 2216 wi::to_poly_widest (bot), mul)) 2217 return true; 2218 2219 return false; 2220 } 2221 } 2222 2223 /* Return true if memory reference REF with step STEP may be unaligned. */ 2224 2225 static bool 2226 may_be_unaligned_p (tree ref, tree step) 2227 { 2228 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 2229 thus they are not misaligned. */ 2230 if (TREE_CODE (ref) == TARGET_MEM_REF) 2231 return false; 2232 2233 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref)); 2234 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align) 2235 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))); 2236 2237 unsigned HOST_WIDE_INT bitpos; 2238 unsigned int ref_align; 2239 get_object_alignment_1 (ref, &ref_align, &bitpos); 2240 if (ref_align < align 2241 || (bitpos % align) != 0 2242 || (bitpos % BITS_PER_UNIT) != 0) 2243 return true; 2244 2245 unsigned int trailing_zeros = tree_ctz (step); 2246 if (trailing_zeros < HOST_BITS_PER_INT 2247 && (1U << trailing_zeros) * BITS_PER_UNIT < align) 2248 return true; 2249 2250 return false; 2251 } 2252 2253 /* Return true if EXPR may be non-addressable. */ 2254 2255 bool 2256 may_be_nonaddressable_p (tree expr) 2257 { 2258 switch (TREE_CODE (expr)) 2259 { 2260 case VAR_DECL: 2261 /* Check if it's a register variable. */ 2262 return DECL_HARD_REGISTER (expr); 2263 2264 case TARGET_MEM_REF: 2265 /* TARGET_MEM_REFs are translated directly to valid MEMs on the 2266 target, thus they are always addressable. */ 2267 return false; 2268 2269 case MEM_REF: 2270 /* Likewise for MEM_REFs, modulo the storage order. */ 2271 return REF_REVERSE_STORAGE_ORDER (expr); 2272 2273 case BIT_FIELD_REF: 2274 if (REF_REVERSE_STORAGE_ORDER (expr)) 2275 return true; 2276 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2277 2278 case COMPONENT_REF: 2279 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) 2280 return true; 2281 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) 2282 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2283 2284 case ARRAY_REF: 2285 case ARRAY_RANGE_REF: 2286 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) 2287 return true; 2288 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2289 2290 case VIEW_CONVERT_EXPR: 2291 /* This kind of view-conversions may wrap non-addressable objects 2292 and make them look addressable. After some processing the 2293 non-addressability may be uncovered again, causing ADDR_EXPRs 2294 of inappropriate objects to be built. */ 2295 if (is_gimple_reg (TREE_OPERAND (expr, 0)) 2296 || !is_gimple_addressable (TREE_OPERAND (expr, 0))) 2297 return true; 2298 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2299 2300 CASE_CONVERT: 2301 return true; 2302 2303 default: 2304 break; 2305 } 2306 2307 return false; 2308 } 2309 2310 /* Finds addresses in *OP_P inside STMT. */ 2311 2312 static void 2313 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt, 2314 tree *op_p) 2315 { 2316 tree base = *op_p, step = size_zero_node; 2317 struct iv *civ; 2318 struct ifs_ivopts_data ifs_ivopts_data; 2319 2320 /* Do not play with volatile memory references. A bit too conservative, 2321 perhaps, but safe. */ 2322 if (gimple_has_volatile_ops (stmt)) 2323 goto fail; 2324 2325 /* Ignore bitfields for now. Not really something terribly complicated 2326 to handle. TODO. */ 2327 if (TREE_CODE (base) == BIT_FIELD_REF) 2328 goto fail; 2329 2330 base = unshare_expr (base); 2331 2332 if (TREE_CODE (base) == TARGET_MEM_REF) 2333 { 2334 tree type = build_pointer_type (TREE_TYPE (base)); 2335 tree astep; 2336 2337 if (TMR_BASE (base) 2338 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 2339 { 2340 civ = get_iv (data, TMR_BASE (base)); 2341 if (!civ) 2342 goto fail; 2343 2344 TMR_BASE (base) = civ->base; 2345 step = civ->step; 2346 } 2347 if (TMR_INDEX2 (base) 2348 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) 2349 { 2350 civ = get_iv (data, TMR_INDEX2 (base)); 2351 if (!civ) 2352 goto fail; 2353 2354 TMR_INDEX2 (base) = civ->base; 2355 step = civ->step; 2356 } 2357 if (TMR_INDEX (base) 2358 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 2359 { 2360 civ = get_iv (data, TMR_INDEX (base)); 2361 if (!civ) 2362 goto fail; 2363 2364 TMR_INDEX (base) = civ->base; 2365 astep = civ->step; 2366 2367 if (astep) 2368 { 2369 if (TMR_STEP (base)) 2370 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 2371 2372 step = fold_build2 (PLUS_EXPR, type, step, astep); 2373 } 2374 } 2375 2376 if (integer_zerop (step)) 2377 goto fail; 2378 base = tree_mem_ref_addr (type, base); 2379 } 2380 else 2381 { 2382 ifs_ivopts_data.ivopts_data = data; 2383 ifs_ivopts_data.stmt = stmt; 2384 ifs_ivopts_data.step = size_zero_node; 2385 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 2386 || integer_zerop (ifs_ivopts_data.step)) 2387 goto fail; 2388 step = ifs_ivopts_data.step; 2389 2390 /* Check that the base expression is addressable. This needs 2391 to be done after substituting bases of IVs into it. */ 2392 if (may_be_nonaddressable_p (base)) 2393 goto fail; 2394 2395 /* Moreover, on strict alignment platforms, check that it is 2396 sufficiently aligned. */ 2397 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) 2398 goto fail; 2399 2400 base = build_fold_addr_expr (base); 2401 2402 /* Substituting bases of IVs into the base expression might 2403 have caused folding opportunities. */ 2404 if (TREE_CODE (base) == ADDR_EXPR) 2405 { 2406 tree *ref = &TREE_OPERAND (base, 0); 2407 while (handled_component_p (*ref)) 2408 ref = &TREE_OPERAND (*ref, 0); 2409 if (TREE_CODE (*ref) == MEM_REF) 2410 { 2411 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), 2412 TREE_OPERAND (*ref, 0), 2413 TREE_OPERAND (*ref, 1)); 2414 if (tem) 2415 *ref = tem; 2416 } 2417 } 2418 } 2419 2420 civ = alloc_iv (data, base, step); 2421 /* Fail if base object of this memory reference is unknown. */ 2422 if (civ->base_object == NULL_TREE) 2423 goto fail; 2424 2425 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p)); 2426 return; 2427 2428 fail: 2429 for_each_index (op_p, idx_record_use, data); 2430 } 2431 2432 /* Finds and records invariants used in STMT. */ 2433 2434 static void 2435 find_invariants_stmt (struct ivopts_data *data, gimple *stmt) 2436 { 2437 ssa_op_iter iter; 2438 use_operand_p use_p; 2439 tree op; 2440 2441 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2442 { 2443 op = USE_FROM_PTR (use_p); 2444 record_invariant (data, op, false); 2445 } 2446 } 2447 2448 /* CALL calls an internal function. If operand *OP_P will become an 2449 address when the call is expanded, return the type of the memory 2450 being addressed, otherwise return null. */ 2451 2452 static tree 2453 get_mem_type_for_internal_fn (gcall *call, tree *op_p) 2454 { 2455 switch (gimple_call_internal_fn (call)) 2456 { 2457 case IFN_MASK_LOAD: 2458 if (op_p == gimple_call_arg_ptr (call, 0)) 2459 return TREE_TYPE (gimple_call_lhs (call)); 2460 return NULL_TREE; 2461 2462 case IFN_MASK_STORE: 2463 if (op_p == gimple_call_arg_ptr (call, 0)) 2464 return TREE_TYPE (gimple_call_arg (call, 3)); 2465 return NULL_TREE; 2466 2467 default: 2468 return NULL_TREE; 2469 } 2470 } 2471 2472 /* IV is a (non-address) iv that describes operand *OP_P of STMT. 2473 Return true if the operand will become an address when STMT 2474 is expanded and record the associated address use if so. */ 2475 2476 static bool 2477 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p, 2478 struct iv *iv) 2479 { 2480 /* Fail if base object of this memory reference is unknown. */ 2481 if (iv->base_object == NULL_TREE) 2482 return false; 2483 2484 tree mem_type = NULL_TREE; 2485 if (gcall *call = dyn_cast <gcall *> (stmt)) 2486 if (gimple_call_internal_p (call)) 2487 mem_type = get_mem_type_for_internal_fn (call, op_p); 2488 if (mem_type) 2489 { 2490 iv = alloc_iv (data, iv->base, iv->step); 2491 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type); 2492 return true; 2493 } 2494 return false; 2495 } 2496 2497 /* Finds interesting uses of induction variables in the statement STMT. */ 2498 2499 static void 2500 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt) 2501 { 2502 struct iv *iv; 2503 tree op, *lhs, *rhs; 2504 ssa_op_iter iter; 2505 use_operand_p use_p; 2506 enum tree_code code; 2507 2508 find_invariants_stmt (data, stmt); 2509 2510 if (gimple_code (stmt) == GIMPLE_COND) 2511 { 2512 find_interesting_uses_cond (data, stmt); 2513 return; 2514 } 2515 2516 if (is_gimple_assign (stmt)) 2517 { 2518 lhs = gimple_assign_lhs_ptr (stmt); 2519 rhs = gimple_assign_rhs1_ptr (stmt); 2520 2521 if (TREE_CODE (*lhs) == SSA_NAME) 2522 { 2523 /* If the statement defines an induction variable, the uses are not 2524 interesting by themselves. */ 2525 2526 iv = get_iv (data, *lhs); 2527 2528 if (iv && !integer_zerop (iv->step)) 2529 return; 2530 } 2531 2532 code = gimple_assign_rhs_code (stmt); 2533 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS 2534 && (REFERENCE_CLASS_P (*rhs) 2535 || is_gimple_val (*rhs))) 2536 { 2537 if (REFERENCE_CLASS_P (*rhs)) 2538 find_interesting_uses_address (data, stmt, rhs); 2539 else 2540 find_interesting_uses_op (data, *rhs); 2541 2542 if (REFERENCE_CLASS_P (*lhs)) 2543 find_interesting_uses_address (data, stmt, lhs); 2544 return; 2545 } 2546 else if (TREE_CODE_CLASS (code) == tcc_comparison) 2547 { 2548 find_interesting_uses_cond (data, stmt); 2549 return; 2550 } 2551 2552 /* TODO -- we should also handle address uses of type 2553 2554 memory = call (whatever); 2555 2556 and 2557 2558 call (memory). */ 2559 } 2560 2561 if (gimple_code (stmt) == GIMPLE_PHI 2562 && gimple_bb (stmt) == data->current_loop->header) 2563 { 2564 iv = get_iv (data, PHI_RESULT (stmt)); 2565 2566 if (iv && !integer_zerop (iv->step)) 2567 return; 2568 } 2569 2570 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2571 { 2572 op = USE_FROM_PTR (use_p); 2573 2574 if (TREE_CODE (op) != SSA_NAME) 2575 continue; 2576 2577 iv = get_iv (data, op); 2578 if (!iv) 2579 continue; 2580 2581 if (!find_address_like_use (data, stmt, use_p->use, iv)) 2582 find_interesting_uses_op (data, op); 2583 } 2584 } 2585 2586 /* Finds interesting uses of induction variables outside of loops 2587 on loop exit edge EXIT. */ 2588 2589 static void 2590 find_interesting_uses_outside (struct ivopts_data *data, edge exit) 2591 { 2592 gphi *phi; 2593 gphi_iterator psi; 2594 tree def; 2595 2596 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) 2597 { 2598 phi = psi.phi (); 2599 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 2600 if (!virtual_operand_p (def)) 2601 find_interesting_uses_op (data, def); 2602 } 2603 } 2604 2605 /* Return TRUE if OFFSET is within the range of [base + offset] addressing 2606 mode for memory reference represented by USE. */ 2607 2608 static GTY (()) vec<rtx, va_gc> *addr_list; 2609 2610 static bool 2611 addr_offset_valid_p (struct iv_use *use, poly_int64 offset) 2612 { 2613 rtx reg, addr; 2614 unsigned list_index; 2615 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); 2616 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type); 2617 2618 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; 2619 if (list_index >= vec_safe_length (addr_list)) 2620 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE); 2621 2622 addr = (*addr_list)[list_index]; 2623 if (!addr) 2624 { 2625 addr_mode = targetm.addr_space.address_mode (as); 2626 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); 2627 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX); 2628 (*addr_list)[list_index] = addr; 2629 } 2630 else 2631 addr_mode = GET_MODE (addr); 2632 2633 XEXP (addr, 1) = gen_int_mode (offset, addr_mode); 2634 return (memory_address_addr_space_p (mem_mode, addr, as)); 2635 } 2636 2637 /* Comparison function to sort group in ascending order of addr_offset. */ 2638 2639 static int 2640 group_compare_offset (const void *a, const void *b) 2641 { 2642 const struct iv_use *const *u1 = (const struct iv_use *const *) a; 2643 const struct iv_use *const *u2 = (const struct iv_use *const *) b; 2644 2645 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset); 2646 } 2647 2648 /* Check if small groups should be split. Return true if no group 2649 contains more than two uses with distinct addr_offsets. Return 2650 false otherwise. We want to split such groups because: 2651 2652 1) Small groups don't have much benefit and may interfer with 2653 general candidate selection. 2654 2) Size for problem with only small groups is usually small and 2655 general algorithm can handle it well. 2656 2657 TODO -- Above claim may not hold when we want to merge memory 2658 accesses with conseuctive addresses. */ 2659 2660 static bool 2661 split_small_address_groups_p (struct ivopts_data *data) 2662 { 2663 unsigned int i, j, distinct = 1; 2664 struct iv_use *pre; 2665 struct iv_group *group; 2666 2667 for (i = 0; i < data->vgroups.length (); i++) 2668 { 2669 group = data->vgroups[i]; 2670 if (group->vuses.length () == 1) 2671 continue; 2672 2673 gcc_assert (address_p (group->type)); 2674 if (group->vuses.length () == 2) 2675 { 2676 if (compare_sizes_for_sort (group->vuses[0]->addr_offset, 2677 group->vuses[1]->addr_offset) > 0) 2678 std::swap (group->vuses[0], group->vuses[1]); 2679 } 2680 else 2681 group->vuses.qsort (group_compare_offset); 2682 2683 if (distinct > 2) 2684 continue; 2685 2686 distinct = 1; 2687 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++) 2688 { 2689 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset)) 2690 { 2691 pre = group->vuses[j]; 2692 distinct++; 2693 } 2694 2695 if (distinct > 2) 2696 break; 2697 } 2698 } 2699 2700 return (distinct <= 2); 2701 } 2702 2703 /* For each group of address type uses, this function further groups 2704 these uses according to the maximum offset supported by target's 2705 [base + offset] addressing mode. */ 2706 2707 static void 2708 split_address_groups (struct ivopts_data *data) 2709 { 2710 unsigned int i, j; 2711 /* Always split group. */ 2712 bool split_p = split_small_address_groups_p (data); 2713 2714 for (i = 0; i < data->vgroups.length (); i++) 2715 { 2716 struct iv_group *new_group = NULL; 2717 struct iv_group *group = data->vgroups[i]; 2718 struct iv_use *use = group->vuses[0]; 2719 2720 use->id = 0; 2721 use->group_id = group->id; 2722 if (group->vuses.length () == 1) 2723 continue; 2724 2725 gcc_assert (address_p (use->type)); 2726 2727 for (j = 1; j < group->vuses.length ();) 2728 { 2729 struct iv_use *next = group->vuses[j]; 2730 poly_int64 offset = next->addr_offset - use->addr_offset; 2731 2732 /* Split group if aksed to, or the offset against the first 2733 use can't fit in offset part of addressing mode. IV uses 2734 having the same offset are still kept in one group. */ 2735 if (maybe_ne (offset, 0) 2736 && (split_p || !addr_offset_valid_p (use, offset))) 2737 { 2738 if (!new_group) 2739 new_group = record_group (data, group->type); 2740 group->vuses.ordered_remove (j); 2741 new_group->vuses.safe_push (next); 2742 continue; 2743 } 2744 2745 next->id = j; 2746 next->group_id = group->id; 2747 j++; 2748 } 2749 } 2750 } 2751 2752 /* Finds uses of the induction variables that are interesting. */ 2753 2754 static void 2755 find_interesting_uses (struct ivopts_data *data) 2756 { 2757 basic_block bb; 2758 gimple_stmt_iterator bsi; 2759 basic_block *body = get_loop_body (data->current_loop); 2760 unsigned i; 2761 edge e; 2762 2763 for (i = 0; i < data->current_loop->num_nodes; i++) 2764 { 2765 edge_iterator ei; 2766 bb = body[i]; 2767 2768 FOR_EACH_EDGE (e, ei, bb->succs) 2769 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 2770 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 2771 find_interesting_uses_outside (data, e); 2772 2773 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2774 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2775 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2776 if (!is_gimple_debug (gsi_stmt (bsi))) 2777 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2778 } 2779 free (body); 2780 2781 split_address_groups (data); 2782 2783 if (dump_file && (dump_flags & TDF_DETAILS)) 2784 { 2785 fprintf (dump_file, "\n<IV Groups>:\n"); 2786 dump_groups (dump_file, data); 2787 fprintf (dump_file, "\n"); 2788 } 2789 } 2790 2791 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 2792 is true, assume we are inside an address. If TOP_COMPREF is true, assume 2793 we are at the top-level of the processed address. */ 2794 2795 static tree 2796 strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 2797 poly_int64 *offset) 2798 { 2799 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 2800 enum tree_code code; 2801 tree type, orig_type = TREE_TYPE (expr); 2802 poly_int64 off0, off1; 2803 HOST_WIDE_INT st; 2804 tree orig_expr = expr; 2805 2806 STRIP_NOPS (expr); 2807 2808 type = TREE_TYPE (expr); 2809 code = TREE_CODE (expr); 2810 *offset = 0; 2811 2812 switch (code) 2813 { 2814 case POINTER_PLUS_EXPR: 2815 case PLUS_EXPR: 2816 case MINUS_EXPR: 2817 op0 = TREE_OPERAND (expr, 0); 2818 op1 = TREE_OPERAND (expr, 1); 2819 2820 op0 = strip_offset_1 (op0, false, false, &off0); 2821 op1 = strip_offset_1 (op1, false, false, &off1); 2822 2823 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); 2824 if (op0 == TREE_OPERAND (expr, 0) 2825 && op1 == TREE_OPERAND (expr, 1)) 2826 return orig_expr; 2827 2828 if (integer_zerop (op1)) 2829 expr = op0; 2830 else if (integer_zerop (op0)) 2831 { 2832 if (code == MINUS_EXPR) 2833 expr = fold_build1 (NEGATE_EXPR, type, op1); 2834 else 2835 expr = op1; 2836 } 2837 else 2838 expr = fold_build2 (code, type, op0, op1); 2839 2840 return fold_convert (orig_type, expr); 2841 2842 case MULT_EXPR: 2843 op1 = TREE_OPERAND (expr, 1); 2844 if (!cst_and_fits_in_hwi (op1)) 2845 return orig_expr; 2846 2847 op0 = TREE_OPERAND (expr, 0); 2848 op0 = strip_offset_1 (op0, false, false, &off0); 2849 if (op0 == TREE_OPERAND (expr, 0)) 2850 return orig_expr; 2851 2852 *offset = off0 * int_cst_value (op1); 2853 if (integer_zerop (op0)) 2854 expr = op0; 2855 else 2856 expr = fold_build2 (MULT_EXPR, type, op0, op1); 2857 2858 return fold_convert (orig_type, expr); 2859 2860 case ARRAY_REF: 2861 case ARRAY_RANGE_REF: 2862 if (!inside_addr) 2863 return orig_expr; 2864 2865 step = array_ref_element_size (expr); 2866 if (!cst_and_fits_in_hwi (step)) 2867 break; 2868 2869 st = int_cst_value (step); 2870 op1 = TREE_OPERAND (expr, 1); 2871 op1 = strip_offset_1 (op1, false, false, &off1); 2872 *offset = off1 * st; 2873 2874 if (top_compref 2875 && integer_zerop (op1)) 2876 { 2877 /* Strip the component reference completely. */ 2878 op0 = TREE_OPERAND (expr, 0); 2879 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2880 *offset += off0; 2881 return op0; 2882 } 2883 break; 2884 2885 case COMPONENT_REF: 2886 { 2887 tree field; 2888 2889 if (!inside_addr) 2890 return orig_expr; 2891 2892 tmp = component_ref_field_offset (expr); 2893 field = TREE_OPERAND (expr, 1); 2894 if (top_compref 2895 && cst_and_fits_in_hwi (tmp) 2896 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field))) 2897 { 2898 HOST_WIDE_INT boffset, abs_off; 2899 2900 /* Strip the component reference completely. */ 2901 op0 = TREE_OPERAND (expr, 0); 2902 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2903 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field)); 2904 abs_off = abs_hwi (boffset) / BITS_PER_UNIT; 2905 if (boffset < 0) 2906 abs_off = -abs_off; 2907 2908 *offset = off0 + int_cst_value (tmp) + abs_off; 2909 return op0; 2910 } 2911 } 2912 break; 2913 2914 case ADDR_EXPR: 2915 op0 = TREE_OPERAND (expr, 0); 2916 op0 = strip_offset_1 (op0, true, true, &off0); 2917 *offset += off0; 2918 2919 if (op0 == TREE_OPERAND (expr, 0)) 2920 return orig_expr; 2921 2922 expr = build_fold_addr_expr (op0); 2923 return fold_convert (orig_type, expr); 2924 2925 case MEM_REF: 2926 /* ??? Offset operand? */ 2927 inside_addr = false; 2928 break; 2929 2930 default: 2931 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0)) 2932 return build_int_cst (orig_type, 0); 2933 return orig_expr; 2934 } 2935 2936 /* Default handling of expressions for that we want to recurse into 2937 the first operand. */ 2938 op0 = TREE_OPERAND (expr, 0); 2939 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 2940 *offset += off0; 2941 2942 if (op0 == TREE_OPERAND (expr, 0) 2943 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 2944 return orig_expr; 2945 2946 expr = copy_node (expr); 2947 TREE_OPERAND (expr, 0) = op0; 2948 if (op1) 2949 TREE_OPERAND (expr, 1) = op1; 2950 2951 /* Inside address, we might strip the top level component references, 2952 thus changing type of the expression. Handling of ADDR_EXPR 2953 will fix that. */ 2954 expr = fold_convert (orig_type, expr); 2955 2956 return expr; 2957 } 2958 2959 /* Strips constant offsets from EXPR and stores them to OFFSET. */ 2960 2961 tree 2962 strip_offset (tree expr, poly_uint64_pod *offset) 2963 { 2964 poly_int64 off; 2965 tree core = strip_offset_1 (expr, false, false, &off); 2966 *offset = off; 2967 return core; 2968 } 2969 2970 /* Returns variant of TYPE that can be used as base for different uses. 2971 We return unsigned type with the same precision, which avoids problems 2972 with overflows. */ 2973 2974 static tree 2975 generic_type_for (tree type) 2976 { 2977 if (POINTER_TYPE_P (type)) 2978 return unsigned_type_for (type); 2979 2980 if (TYPE_UNSIGNED (type)) 2981 return type; 2982 2983 return unsigned_type_for (type); 2984 } 2985 2986 /* Private data for walk_tree. */ 2987 2988 struct walk_tree_data 2989 { 2990 bitmap *inv_vars; 2991 struct ivopts_data *idata; 2992 }; 2993 2994 /* Callback function for walk_tree, it records invariants and symbol 2995 reference in *EXPR_P. DATA is the structure storing result info. */ 2996 2997 static tree 2998 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 2999 { 3000 tree op = *expr_p; 3001 struct version_info *info; 3002 struct walk_tree_data *wdata = (struct walk_tree_data*) data; 3003 3004 if (TREE_CODE (op) != SSA_NAME) 3005 return NULL_TREE; 3006 3007 info = name_info (wdata->idata, op); 3008 /* Because we expand simple operations when finding IVs, loop invariant 3009 variable that isn't referred by the original loop could be used now. 3010 Record such invariant variables here. */ 3011 if (!info->iv) 3012 { 3013 struct ivopts_data *idata = wdata->idata; 3014 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 3015 3016 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb)) 3017 { 3018 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true); 3019 record_invariant (idata, op, false); 3020 } 3021 } 3022 if (!info->inv_id || info->has_nonlin_use) 3023 return NULL_TREE; 3024 3025 if (!*wdata->inv_vars) 3026 *wdata->inv_vars = BITMAP_ALLOC (NULL); 3027 bitmap_set_bit (*wdata->inv_vars, info->inv_id); 3028 3029 return NULL_TREE; 3030 } 3031 3032 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should 3033 store it. */ 3034 3035 static inline void 3036 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars) 3037 { 3038 struct walk_tree_data wdata; 3039 3040 if (!inv_vars) 3041 return; 3042 3043 wdata.idata = data; 3044 wdata.inv_vars = inv_vars; 3045 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL); 3046 } 3047 3048 /* Get entry from invariant expr hash table for INV_EXPR. New entry 3049 will be recorded if it doesn't exist yet. Given below two exprs: 3050 inv_expr + cst1, inv_expr + cst2 3051 It's hard to make decision whether constant part should be stripped 3052 or not. We choose to not strip based on below facts: 3053 1) We need to count ADD cost for constant part if it's stripped, 3054 which isn't always trivial where this functions is called. 3055 2) Stripping constant away may be conflict with following loop 3056 invariant hoisting pass. 3057 3) Not stripping constant away results in more invariant exprs, 3058 which usually leads to decision preferring lower reg pressure. */ 3059 3060 static iv_inv_expr_ent * 3061 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr) 3062 { 3063 STRIP_NOPS (inv_expr); 3064 3065 if (poly_int_tree_p (inv_expr) 3066 || TREE_CODE (inv_expr) == SSA_NAME) 3067 return NULL; 3068 3069 /* Don't strip constant part away as we used to. */ 3070 3071 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */ 3072 struct iv_inv_expr_ent ent; 3073 ent.expr = inv_expr; 3074 ent.hash = iterative_hash_expr (inv_expr, 0); 3075 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT); 3076 3077 if (!*slot) 3078 { 3079 *slot = XNEW (struct iv_inv_expr_ent); 3080 (*slot)->expr = inv_expr; 3081 (*slot)->hash = ent.hash; 3082 (*slot)->id = ++data->max_inv_expr_id; 3083 } 3084 3085 return *slot; 3086 } 3087 3088 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 3089 position to POS. If USE is not NULL, the candidate is set as related to 3090 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 3091 replacement of the final value of the iv by a direct computation. */ 3092 3093 static struct iv_cand * 3094 add_candidate_1 (struct ivopts_data *data, 3095 tree base, tree step, bool important, enum iv_position pos, 3096 struct iv_use *use, gimple *incremented_at, 3097 struct iv *orig_iv = NULL) 3098 { 3099 unsigned i; 3100 struct iv_cand *cand = NULL; 3101 tree type, orig_type; 3102 3103 gcc_assert (base && step); 3104 3105 /* -fkeep-gc-roots-live means that we have to keep a real pointer 3106 live, but the ivopts code may replace a real pointer with one 3107 pointing before or after the memory block that is then adjusted 3108 into the memory block during the loop. FIXME: It would likely be 3109 better to actually force the pointer live and still use ivopts; 3110 for example, it would be enough to write the pointer into memory 3111 and keep it there until after the loop. */ 3112 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base))) 3113 return NULL; 3114 3115 /* For non-original variables, make sure their values are computed in a type 3116 that does not invoke undefined behavior on overflows (since in general, 3117 we cannot prove that these induction variables are non-wrapping). */ 3118 if (pos != IP_ORIGINAL) 3119 { 3120 orig_type = TREE_TYPE (base); 3121 type = generic_type_for (orig_type); 3122 if (type != orig_type) 3123 { 3124 base = fold_convert (type, base); 3125 step = fold_convert (type, step); 3126 } 3127 } 3128 3129 for (i = 0; i < data->vcands.length (); i++) 3130 { 3131 cand = data->vcands[i]; 3132 3133 if (cand->pos != pos) 3134 continue; 3135 3136 if (cand->incremented_at != incremented_at 3137 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 3138 && cand->ainc_use != use)) 3139 continue; 3140 3141 if (operand_equal_p (base, cand->iv->base, 0) 3142 && operand_equal_p (step, cand->iv->step, 0) 3143 && (TYPE_PRECISION (TREE_TYPE (base)) 3144 == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) 3145 break; 3146 } 3147 3148 if (i == data->vcands.length ()) 3149 { 3150 cand = XCNEW (struct iv_cand); 3151 cand->id = i; 3152 cand->iv = alloc_iv (data, base, step); 3153 cand->pos = pos; 3154 if (pos != IP_ORIGINAL) 3155 { 3156 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 3157 cand->var_after = cand->var_before; 3158 } 3159 cand->important = important; 3160 cand->incremented_at = incremented_at; 3161 data->vcands.safe_push (cand); 3162 3163 if (!poly_int_tree_p (step)) 3164 { 3165 find_inv_vars (data, &step, &cand->inv_vars); 3166 3167 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step); 3168 /* Share bitmap between inv_vars and inv_exprs for cand. */ 3169 if (inv_expr != NULL) 3170 { 3171 cand->inv_exprs = cand->inv_vars; 3172 cand->inv_vars = NULL; 3173 if (cand->inv_exprs) 3174 bitmap_clear (cand->inv_exprs); 3175 else 3176 cand->inv_exprs = BITMAP_ALLOC (NULL); 3177 3178 bitmap_set_bit (cand->inv_exprs, inv_expr->id); 3179 } 3180 } 3181 3182 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 3183 cand->ainc_use = use; 3184 else 3185 cand->ainc_use = NULL; 3186 3187 cand->orig_iv = orig_iv; 3188 if (dump_file && (dump_flags & TDF_DETAILS)) 3189 dump_cand (dump_file, cand); 3190 } 3191 3192 cand->important |= important; 3193 3194 /* Relate candidate to the group for which it is added. */ 3195 if (use) 3196 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i); 3197 3198 return cand; 3199 } 3200 3201 /* Returns true if incrementing the induction variable at the end of the LOOP 3202 is allowed. 3203 3204 The purpose is to avoid splitting latch edge with a biv increment, thus 3205 creating a jump, possibly confusing other optimization passes and leaving 3206 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not 3207 available (so we do not have a better alternative), or if the latch edge 3208 is already nonempty. */ 3209 3210 static bool 3211 allow_ip_end_pos_p (struct loop *loop) 3212 { 3213 if (!ip_normal_pos (loop)) 3214 return true; 3215 3216 if (!empty_block_p (ip_end_pos (loop))) 3217 return true; 3218 3219 return false; 3220 } 3221 3222 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. 3223 Important field is set to IMPORTANT. */ 3224 3225 static void 3226 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, 3227 bool important, struct iv_use *use) 3228 { 3229 basic_block use_bb = gimple_bb (use->stmt); 3230 machine_mode mem_mode; 3231 unsigned HOST_WIDE_INT cstepi; 3232 3233 /* If we insert the increment in any position other than the standard 3234 ones, we must ensure that it is incremented once per iteration. 3235 It must not be in an inner nested loop, or one side of an if 3236 statement. */ 3237 if (use_bb->loop_father != data->current_loop 3238 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) 3239 || stmt_can_throw_internal (cfun, use->stmt) 3240 || !cst_and_fits_in_hwi (step)) 3241 return; 3242 3243 cstepi = int_cst_value (step); 3244 3245 mem_mode = TYPE_MODE (use->mem_type); 3246 if (((USE_LOAD_PRE_INCREMENT (mem_mode) 3247 || USE_STORE_PRE_INCREMENT (mem_mode)) 3248 && known_eq (GET_MODE_SIZE (mem_mode), cstepi)) 3249 || ((USE_LOAD_PRE_DECREMENT (mem_mode) 3250 || USE_STORE_PRE_DECREMENT (mem_mode)) 3251 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi))) 3252 { 3253 enum tree_code code = MINUS_EXPR; 3254 tree new_base; 3255 tree new_step = step; 3256 3257 if (POINTER_TYPE_P (TREE_TYPE (base))) 3258 { 3259 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 3260 code = POINTER_PLUS_EXPR; 3261 } 3262 else 3263 new_step = fold_convert (TREE_TYPE (base), new_step); 3264 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); 3265 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, 3266 use->stmt); 3267 } 3268 if (((USE_LOAD_POST_INCREMENT (mem_mode) 3269 || USE_STORE_POST_INCREMENT (mem_mode)) 3270 && known_eq (GET_MODE_SIZE (mem_mode), cstepi)) 3271 || ((USE_LOAD_POST_DECREMENT (mem_mode) 3272 || USE_STORE_POST_DECREMENT (mem_mode)) 3273 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi))) 3274 { 3275 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, 3276 use->stmt); 3277 } 3278 } 3279 3280 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 3281 position to POS. If USE is not NULL, the candidate is set as related to 3282 it. The candidate computation is scheduled before exit condition and at 3283 the end of loop. */ 3284 3285 static void 3286 add_candidate (struct ivopts_data *data, 3287 tree base, tree step, bool important, struct iv_use *use, 3288 struct iv *orig_iv = NULL) 3289 { 3290 if (ip_normal_pos (data->current_loop)) 3291 add_candidate_1 (data, base, step, important, 3292 IP_NORMAL, use, NULL, orig_iv); 3293 if (ip_end_pos (data->current_loop) 3294 && allow_ip_end_pos_p (data->current_loop)) 3295 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv); 3296 } 3297 3298 /* Adds standard iv candidates. */ 3299 3300 static void 3301 add_standard_iv_candidates (struct ivopts_data *data) 3302 { 3303 add_candidate (data, integer_zero_node, integer_one_node, true, NULL); 3304 3305 /* The same for a double-integer type if it is still fast enough. */ 3306 if (TYPE_PRECISION 3307 (long_integer_type_node) > TYPE_PRECISION (integer_type_node) 3308 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD) 3309 add_candidate (data, build_int_cst (long_integer_type_node, 0), 3310 build_int_cst (long_integer_type_node, 1), true, NULL); 3311 3312 /* The same for a double-integer type if it is still fast enough. */ 3313 if (TYPE_PRECISION 3314 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node) 3315 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD) 3316 add_candidate (data, build_int_cst (long_long_integer_type_node, 0), 3317 build_int_cst (long_long_integer_type_node, 1), true, NULL); 3318 } 3319 3320 3321 /* Adds candidates bases on the old induction variable IV. */ 3322 3323 static void 3324 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv) 3325 { 3326 gimple *phi; 3327 tree def; 3328 struct iv_cand *cand; 3329 3330 /* Check if this biv is used in address type use. */ 3331 if (iv->no_overflow && iv->have_address_use 3332 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base)) 3333 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype)) 3334 { 3335 tree base = fold_convert (sizetype, iv->base); 3336 tree step = fold_convert (sizetype, iv->step); 3337 3338 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */ 3339 add_candidate (data, base, step, true, NULL, iv); 3340 /* Add iv cand of the original type only if it has nonlinear use. */ 3341 if (iv->nonlin_use) 3342 add_candidate (data, iv->base, iv->step, true, NULL); 3343 } 3344 else 3345 add_candidate (data, iv->base, iv->step, true, NULL); 3346 3347 /* The same, but with initial value zero. */ 3348 if (POINTER_TYPE_P (TREE_TYPE (iv->base))) 3349 add_candidate (data, size_int (0), iv->step, true, NULL); 3350 else 3351 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 3352 iv->step, true, NULL); 3353 3354 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 3355 if (gimple_code (phi) == GIMPLE_PHI) 3356 { 3357 /* Additionally record the possibility of leaving the original iv 3358 untouched. */ 3359 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 3360 /* Don't add candidate if it's from another PHI node because 3361 it's an affine iv appearing in the form of PEELED_CHREC. */ 3362 phi = SSA_NAME_DEF_STMT (def); 3363 if (gimple_code (phi) != GIMPLE_PHI) 3364 { 3365 cand = add_candidate_1 (data, 3366 iv->base, iv->step, true, IP_ORIGINAL, NULL, 3367 SSA_NAME_DEF_STMT (def)); 3368 if (cand) 3369 { 3370 cand->var_before = iv->ssa_name; 3371 cand->var_after = def; 3372 } 3373 } 3374 else 3375 gcc_assert (gimple_bb (phi) == data->current_loop->header); 3376 } 3377 } 3378 3379 /* Adds candidates based on the old induction variables. */ 3380 3381 static void 3382 add_iv_candidate_for_bivs (struct ivopts_data *data) 3383 { 3384 unsigned i; 3385 struct iv *iv; 3386 bitmap_iterator bi; 3387 3388 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 3389 { 3390 iv = ver_info (data, i)->iv; 3391 if (iv && iv->biv_p && !integer_zerop (iv->step)) 3392 add_iv_candidate_for_biv (data, iv); 3393 } 3394 } 3395 3396 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */ 3397 3398 static void 3399 record_common_cand (struct ivopts_data *data, tree base, 3400 tree step, struct iv_use *use) 3401 { 3402 struct iv_common_cand ent; 3403 struct iv_common_cand **slot; 3404 3405 ent.base = base; 3406 ent.step = step; 3407 ent.hash = iterative_hash_expr (base, 0); 3408 ent.hash = iterative_hash_expr (step, ent.hash); 3409 3410 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT); 3411 if (*slot == NULL) 3412 { 3413 *slot = new iv_common_cand (); 3414 (*slot)->base = base; 3415 (*slot)->step = step; 3416 (*slot)->uses.create (8); 3417 (*slot)->hash = ent.hash; 3418 data->iv_common_cands.safe_push ((*slot)); 3419 } 3420 3421 gcc_assert (use != NULL); 3422 (*slot)->uses.safe_push (use); 3423 return; 3424 } 3425 3426 /* Comparison function used to sort common candidates. */ 3427 3428 static int 3429 common_cand_cmp (const void *p1, const void *p2) 3430 { 3431 unsigned n1, n2; 3432 const struct iv_common_cand *const *const ccand1 3433 = (const struct iv_common_cand *const *)p1; 3434 const struct iv_common_cand *const *const ccand2 3435 = (const struct iv_common_cand *const *)p2; 3436 3437 n1 = (*ccand1)->uses.length (); 3438 n2 = (*ccand2)->uses.length (); 3439 return n2 - n1; 3440 } 3441 3442 /* Adds IV candidates based on common candidated recorded. */ 3443 3444 static void 3445 add_iv_candidate_derived_from_uses (struct ivopts_data *data) 3446 { 3447 unsigned i, j; 3448 struct iv_cand *cand_1, *cand_2; 3449 3450 data->iv_common_cands.qsort (common_cand_cmp); 3451 for (i = 0; i < data->iv_common_cands.length (); i++) 3452 { 3453 struct iv_common_cand *ptr = data->iv_common_cands[i]; 3454 3455 /* Only add IV candidate if it's derived from multiple uses. */ 3456 if (ptr->uses.length () <= 1) 3457 break; 3458 3459 cand_1 = NULL; 3460 cand_2 = NULL; 3461 if (ip_normal_pos (data->current_loop)) 3462 cand_1 = add_candidate_1 (data, ptr->base, ptr->step, 3463 false, IP_NORMAL, NULL, NULL); 3464 3465 if (ip_end_pos (data->current_loop) 3466 && allow_ip_end_pos_p (data->current_loop)) 3467 cand_2 = add_candidate_1 (data, ptr->base, ptr->step, 3468 false, IP_END, NULL, NULL); 3469 3470 /* Bind deriving uses and the new candidates. */ 3471 for (j = 0; j < ptr->uses.length (); j++) 3472 { 3473 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id]; 3474 if (cand_1) 3475 bitmap_set_bit (group->related_cands, cand_1->id); 3476 if (cand_2) 3477 bitmap_set_bit (group->related_cands, cand_2->id); 3478 } 3479 } 3480 3481 /* Release data since it is useless from this point. */ 3482 data->iv_common_cand_tab->empty (); 3483 data->iv_common_cands.truncate (0); 3484 } 3485 3486 /* Adds candidates based on the value of USE's iv. */ 3487 3488 static void 3489 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) 3490 { 3491 poly_uint64 offset; 3492 tree base; 3493 tree basetype; 3494 struct iv *iv = use->iv; 3495 3496 add_candidate (data, iv->base, iv->step, false, use); 3497 3498 /* Record common candidate for use in case it can be shared by others. */ 3499 record_common_cand (data, iv->base, iv->step, use); 3500 3501 /* Record common candidate with initial value zero. */ 3502 basetype = TREE_TYPE (iv->base); 3503 if (POINTER_TYPE_P (basetype)) 3504 basetype = sizetype; 3505 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); 3506 3507 /* Record common candidate with constant offset stripped in base. 3508 Like the use itself, we also add candidate directly for it. */ 3509 base = strip_offset (iv->base, &offset); 3510 if (maybe_ne (offset, 0U) || base != iv->base) 3511 { 3512 record_common_cand (data, base, iv->step, use); 3513 add_candidate (data, base, iv->step, false, use); 3514 } 3515 3516 /* Record common candidate with base_object removed in base. */ 3517 base = iv->base; 3518 STRIP_NOPS (base); 3519 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR) 3520 { 3521 tree step = iv->step; 3522 3523 STRIP_NOPS (step); 3524 base = TREE_OPERAND (base, 1); 3525 step = fold_convert (sizetype, step); 3526 record_common_cand (data, base, step, use); 3527 /* Also record common candidate with offset stripped. */ 3528 base = strip_offset (base, &offset); 3529 if (maybe_ne (offset, 0U)) 3530 record_common_cand (data, base, step, use); 3531 } 3532 3533 /* At last, add auto-incremental candidates. Make such variables 3534 important since other iv uses with same base object may be based 3535 on it. */ 3536 if (use != NULL && address_p (use->type)) 3537 add_autoinc_candidates (data, iv->base, iv->step, true, use); 3538 } 3539 3540 /* Adds candidates based on the uses. */ 3541 3542 static void 3543 add_iv_candidate_for_groups (struct ivopts_data *data) 3544 { 3545 unsigned i; 3546 3547 /* Only add candidate for the first use in group. */ 3548 for (i = 0; i < data->vgroups.length (); i++) 3549 { 3550 struct iv_group *group = data->vgroups[i]; 3551 3552 gcc_assert (group->vuses[0] != NULL); 3553 add_iv_candidate_for_use (data, group->vuses[0]); 3554 } 3555 add_iv_candidate_derived_from_uses (data); 3556 } 3557 3558 /* Record important candidates and add them to related_cands bitmaps. */ 3559 3560 static void 3561 record_important_candidates (struct ivopts_data *data) 3562 { 3563 unsigned i; 3564 struct iv_group *group; 3565 3566 for (i = 0; i < data->vcands.length (); i++) 3567 { 3568 struct iv_cand *cand = data->vcands[i]; 3569 3570 if (cand->important) 3571 bitmap_set_bit (data->important_candidates, i); 3572 } 3573 3574 data->consider_all_candidates = (data->vcands.length () 3575 <= CONSIDER_ALL_CANDIDATES_BOUND); 3576 3577 /* Add important candidates to groups' related_cands bitmaps. */ 3578 for (i = 0; i < data->vgroups.length (); i++) 3579 { 3580 group = data->vgroups[i]; 3581 bitmap_ior_into (group->related_cands, data->important_candidates); 3582 } 3583 } 3584 3585 /* Allocates the data structure mapping the (use, candidate) pairs to costs. 3586 If consider_all_candidates is true, we use a two-dimensional array, otherwise 3587 we allocate a simple list to every use. */ 3588 3589 static void 3590 alloc_use_cost_map (struct ivopts_data *data) 3591 { 3592 unsigned i, size, s; 3593 3594 for (i = 0; i < data->vgroups.length (); i++) 3595 { 3596 struct iv_group *group = data->vgroups[i]; 3597 3598 if (data->consider_all_candidates) 3599 size = data->vcands.length (); 3600 else 3601 { 3602 s = bitmap_count_bits (group->related_cands); 3603 3604 /* Round up to the power of two, so that moduling by it is fast. */ 3605 size = s ? (1 << ceil_log2 (s)) : 1; 3606 } 3607 3608 group->n_map_members = size; 3609 group->cost_map = XCNEWVEC (struct cost_pair, size); 3610 } 3611 } 3612 3613 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends 3614 on invariants INV_VARS and that the value used in expressing it is 3615 VALUE, and in case of iv elimination the comparison operator is COMP. */ 3616 3617 static void 3618 set_group_iv_cost (struct ivopts_data *data, 3619 struct iv_group *group, struct iv_cand *cand, 3620 comp_cost cost, bitmap inv_vars, tree value, 3621 enum tree_code comp, bitmap inv_exprs) 3622 { 3623 unsigned i, s; 3624 3625 if (cost.infinite_cost_p ()) 3626 { 3627 BITMAP_FREE (inv_vars); 3628 BITMAP_FREE (inv_exprs); 3629 return; 3630 } 3631 3632 if (data->consider_all_candidates) 3633 { 3634 group->cost_map[cand->id].cand = cand; 3635 group->cost_map[cand->id].cost = cost; 3636 group->cost_map[cand->id].inv_vars = inv_vars; 3637 group->cost_map[cand->id].inv_exprs = inv_exprs; 3638 group->cost_map[cand->id].value = value; 3639 group->cost_map[cand->id].comp = comp; 3640 return; 3641 } 3642 3643 /* n_map_members is a power of two, so this computes modulo. */ 3644 s = cand->id & (group->n_map_members - 1); 3645 for (i = s; i < group->n_map_members; i++) 3646 if (!group->cost_map[i].cand) 3647 goto found; 3648 for (i = 0; i < s; i++) 3649 if (!group->cost_map[i].cand) 3650 goto found; 3651 3652 gcc_unreachable (); 3653 3654 found: 3655 group->cost_map[i].cand = cand; 3656 group->cost_map[i].cost = cost; 3657 group->cost_map[i].inv_vars = inv_vars; 3658 group->cost_map[i].inv_exprs = inv_exprs; 3659 group->cost_map[i].value = value; 3660 group->cost_map[i].comp = comp; 3661 } 3662 3663 /* Gets cost of (GROUP, CAND) pair. */ 3664 3665 static struct cost_pair * 3666 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group, 3667 struct iv_cand *cand) 3668 { 3669 unsigned i, s; 3670 struct cost_pair *ret; 3671 3672 if (!cand) 3673 return NULL; 3674 3675 if (data->consider_all_candidates) 3676 { 3677 ret = group->cost_map + cand->id; 3678 if (!ret->cand) 3679 return NULL; 3680 3681 return ret; 3682 } 3683 3684 /* n_map_members is a power of two, so this computes modulo. */ 3685 s = cand->id & (group->n_map_members - 1); 3686 for (i = s; i < group->n_map_members; i++) 3687 if (group->cost_map[i].cand == cand) 3688 return group->cost_map + i; 3689 else if (group->cost_map[i].cand == NULL) 3690 return NULL; 3691 for (i = 0; i < s; i++) 3692 if (group->cost_map[i].cand == cand) 3693 return group->cost_map + i; 3694 else if (group->cost_map[i].cand == NULL) 3695 return NULL; 3696 3697 return NULL; 3698 } 3699 3700 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 3701 static rtx 3702 produce_memory_decl_rtl (tree obj, int *regno) 3703 { 3704 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); 3705 machine_mode address_mode = targetm.addr_space.address_mode (as); 3706 rtx x; 3707 3708 gcc_assert (obj); 3709 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 3710 { 3711 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 3712 x = gen_rtx_SYMBOL_REF (address_mode, name); 3713 SET_SYMBOL_REF_DECL (x, obj); 3714 x = gen_rtx_MEM (DECL_MODE (obj), x); 3715 set_mem_addr_space (x, as); 3716 targetm.encode_section_info (obj, x, true); 3717 } 3718 else 3719 { 3720 x = gen_raw_REG (address_mode, (*regno)++); 3721 x = gen_rtx_MEM (DECL_MODE (obj), x); 3722 set_mem_addr_space (x, as); 3723 } 3724 3725 return x; 3726 } 3727 3728 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 3729 walk_tree. DATA contains the actual fake register number. */ 3730 3731 static tree 3732 prepare_decl_rtl (tree *expr_p, int *ws, void *data) 3733 { 3734 tree obj = NULL_TREE; 3735 rtx x = NULL_RTX; 3736 int *regno = (int *) data; 3737 3738 switch (TREE_CODE (*expr_p)) 3739 { 3740 case ADDR_EXPR: 3741 for (expr_p = &TREE_OPERAND (*expr_p, 0); 3742 handled_component_p (*expr_p); 3743 expr_p = &TREE_OPERAND (*expr_p, 0)) 3744 continue; 3745 obj = *expr_p; 3746 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) 3747 x = produce_memory_decl_rtl (obj, regno); 3748 break; 3749 3750 case SSA_NAME: 3751 *ws = 0; 3752 obj = SSA_NAME_VAR (*expr_p); 3753 /* Defer handling of anonymous SSA_NAMEs to the expander. */ 3754 if (!obj) 3755 return NULL_TREE; 3756 if (!DECL_RTL_SET_P (obj)) 3757 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 3758 break; 3759 3760 case VAR_DECL: 3761 case PARM_DECL: 3762 case RESULT_DECL: 3763 *ws = 0; 3764 obj = *expr_p; 3765 3766 if (DECL_RTL_SET_P (obj)) 3767 break; 3768 3769 if (DECL_MODE (obj) == BLKmode) 3770 x = produce_memory_decl_rtl (obj, regno); 3771 else 3772 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 3773 3774 break; 3775 3776 default: 3777 break; 3778 } 3779 3780 if (x) 3781 { 3782 decl_rtl_to_reset.safe_push (obj); 3783 SET_DECL_RTL (obj, x); 3784 } 3785 3786 return NULL_TREE; 3787 } 3788 3789 /* Determines cost of the computation of EXPR. */ 3790 3791 static unsigned 3792 computation_cost (tree expr, bool speed) 3793 { 3794 rtx_insn *seq; 3795 rtx rslt; 3796 tree type = TREE_TYPE (expr); 3797 unsigned cost; 3798 /* Avoid using hard regs in ways which may be unsupported. */ 3799 int regno = LAST_VIRTUAL_REGISTER + 1; 3800 struct cgraph_node *node = cgraph_node::get (current_function_decl); 3801 enum node_frequency real_frequency = node->frequency; 3802 3803 node->frequency = NODE_FREQUENCY_NORMAL; 3804 crtl->maybe_hot_insn_p = speed; 3805 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 3806 start_sequence (); 3807 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 3808 seq = get_insns (); 3809 end_sequence (); 3810 default_rtl_profile (); 3811 node->frequency = real_frequency; 3812 3813 cost = seq_cost (seq, speed); 3814 if (MEM_P (rslt)) 3815 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), 3816 TYPE_ADDR_SPACE (type), speed); 3817 else if (!REG_P (rslt)) 3818 cost += set_src_cost (rslt, TYPE_MODE (type), speed); 3819 3820 return cost; 3821 } 3822 3823 /* Returns variable containing the value of candidate CAND at statement AT. */ 3824 3825 static tree 3826 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt) 3827 { 3828 if (stmt_after_increment (loop, cand, stmt)) 3829 return cand->var_after; 3830 else 3831 return cand->var_before; 3832 } 3833 3834 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the 3835 same precision that is at least as wide as the precision of TYPE, stores 3836 BA to A and BB to B, and returns the type of BA. Otherwise, returns the 3837 type of A and B. */ 3838 3839 static tree 3840 determine_common_wider_type (tree *a, tree *b) 3841 { 3842 tree wider_type = NULL; 3843 tree suba, subb; 3844 tree atype = TREE_TYPE (*a); 3845 3846 if (CONVERT_EXPR_P (*a)) 3847 { 3848 suba = TREE_OPERAND (*a, 0); 3849 wider_type = TREE_TYPE (suba); 3850 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) 3851 return atype; 3852 } 3853 else 3854 return atype; 3855 3856 if (CONVERT_EXPR_P (*b)) 3857 { 3858 subb = TREE_OPERAND (*b, 0); 3859 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) 3860 return atype; 3861 } 3862 else 3863 return atype; 3864 3865 *a = suba; 3866 *b = subb; 3867 return wider_type; 3868 } 3869 3870 /* Determines the expression by that USE is expressed from induction variable 3871 CAND at statement AT in LOOP. The expression is stored in two parts in a 3872 decomposed form. The invariant part is stored in AFF_INV; while variant 3873 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's 3874 non-null. Returns false if USE cannot be expressed using CAND. */ 3875 3876 static bool 3877 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use, 3878 struct iv_cand *cand, struct aff_tree *aff_inv, 3879 struct aff_tree *aff_var, widest_int *prat = NULL) 3880 { 3881 tree ubase = use->iv->base, ustep = use->iv->step; 3882 tree cbase = cand->iv->base, cstep = cand->iv->step; 3883 tree common_type, uutype, var, cstep_common; 3884 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 3885 aff_tree aff_cbase; 3886 widest_int rat; 3887 3888 /* We must have a precision to express the values of use. */ 3889 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3890 return false; 3891 3892 var = var_at_stmt (loop, cand, at); 3893 uutype = unsigned_type_for (utype); 3894 3895 /* If the conversion is not noop, perform it. */ 3896 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3897 { 3898 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase) 3899 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep))) 3900 { 3901 tree inner_base, inner_step, inner_type; 3902 inner_base = TREE_OPERAND (cbase, 0); 3903 if (CONVERT_EXPR_P (cstep)) 3904 inner_step = TREE_OPERAND (cstep, 0); 3905 else 3906 inner_step = cstep; 3907 3908 inner_type = TREE_TYPE (inner_base); 3909 /* If candidate is added from a biv whose type is smaller than 3910 ctype, we know both candidate and the biv won't overflow. 3911 In this case, it's safe to skip the convertion in candidate. 3912 As an example, (unsigned short)((unsigned long)A) equals to 3913 (unsigned short)A, if A has a type no larger than short. */ 3914 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype)) 3915 { 3916 cbase = inner_base; 3917 cstep = inner_step; 3918 } 3919 } 3920 cbase = fold_convert (uutype, cbase); 3921 cstep = fold_convert (uutype, cstep); 3922 var = fold_convert (uutype, var); 3923 } 3924 3925 /* Ratio is 1 when computing the value of biv cand by itself. 3926 We can't rely on constant_multiple_of in this case because the 3927 use is created after the original biv is selected. The call 3928 could fail because of inconsistent fold behavior. See PR68021 3929 for more information. */ 3930 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 3931 { 3932 gcc_assert (is_gimple_assign (use->stmt)); 3933 gcc_assert (use->iv->ssa_name == cand->var_after); 3934 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 3935 rat = 1; 3936 } 3937 else if (!constant_multiple_of (ustep, cstep, &rat)) 3938 return false; 3939 3940 if (prat) 3941 *prat = rat; 3942 3943 /* In case both UBASE and CBASE are shortened to UUTYPE from some common 3944 type, we achieve better folding by computing their difference in this 3945 wider type, and cast the result to UUTYPE. We do not need to worry about 3946 overflows, as all the arithmetics will in the end be performed in UUTYPE 3947 anyway. */ 3948 common_type = determine_common_wider_type (&ubase, &cbase); 3949 3950 /* use = ubase - ratio * cbase + ratio * var. */ 3951 tree_to_aff_combination (ubase, common_type, aff_inv); 3952 tree_to_aff_combination (cbase, common_type, &aff_cbase); 3953 tree_to_aff_combination (var, uutype, aff_var); 3954 3955 /* We need to shift the value if we are after the increment. */ 3956 if (stmt_after_increment (loop, cand, at)) 3957 { 3958 aff_tree cstep_aff; 3959 3960 if (common_type != uutype) 3961 cstep_common = fold_convert (common_type, cstep); 3962 else 3963 cstep_common = cstep; 3964 3965 tree_to_aff_combination (cstep_common, common_type, &cstep_aff); 3966 aff_combination_add (&aff_cbase, &cstep_aff); 3967 } 3968 3969 aff_combination_scale (&aff_cbase, -rat); 3970 aff_combination_add (aff_inv, &aff_cbase); 3971 if (common_type != uutype) 3972 aff_combination_convert (aff_inv, uutype); 3973 3974 aff_combination_scale (aff_var, rat); 3975 return true; 3976 } 3977 3978 /* Determines the expression by that USE is expressed from induction variable 3979 CAND at statement AT in LOOP. The expression is stored in a decomposed 3980 form into AFF. Returns false if USE cannot be expressed using CAND. */ 3981 3982 static bool 3983 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use, 3984 struct iv_cand *cand, struct aff_tree *aff) 3985 { 3986 aff_tree aff_var; 3987 3988 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var)) 3989 return false; 3990 3991 aff_combination_add (aff, &aff_var); 3992 return true; 3993 } 3994 3995 /* Return the type of USE. */ 3996 3997 static tree 3998 get_use_type (struct iv_use *use) 3999 { 4000 tree base_type = TREE_TYPE (use->iv->base); 4001 tree type; 4002 4003 if (use->type == USE_REF_ADDRESS) 4004 { 4005 /* The base_type may be a void pointer. Create a pointer type based on 4006 the mem_ref instead. */ 4007 type = build_pointer_type (TREE_TYPE (*use->op_p)); 4008 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type)) 4009 == TYPE_ADDR_SPACE (TREE_TYPE (base_type))); 4010 } 4011 else 4012 type = base_type; 4013 4014 return type; 4015 } 4016 4017 /* Determines the expression by that USE is expressed from induction variable 4018 CAND at statement AT in LOOP. The computation is unshared. */ 4019 4020 static tree 4021 get_computation_at (struct loop *loop, gimple *at, 4022 struct iv_use *use, struct iv_cand *cand) 4023 { 4024 aff_tree aff; 4025 tree type = get_use_type (use); 4026 4027 if (!get_computation_aff (loop, at, use, cand, &aff)) 4028 return NULL_TREE; 4029 unshare_aff_combination (&aff); 4030 return fold_convert (type, aff_combination_to_tree (&aff)); 4031 } 4032 4033 /* Adjust the cost COST for being in loop setup rather than loop body. 4034 If we're optimizing for space, the loop setup overhead is constant; 4035 if we're optimizing for speed, amortize it over the per-iteration cost. 4036 If ROUND_UP_P is true, the result is round up rather than to zero when 4037 optimizing for speed. */ 4038 static unsigned 4039 adjust_setup_cost (struct ivopts_data *data, unsigned cost, 4040 bool round_up_p = false) 4041 { 4042 if (cost == INFTY) 4043 return cost; 4044 else if (optimize_loop_for_speed_p (data->current_loop)) 4045 { 4046 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop); 4047 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters; 4048 } 4049 else 4050 return cost; 4051 } 4052 4053 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the 4054 EXPR operand holding the shift. COST0 and COST1 are the costs for 4055 calculating the operands of EXPR. Returns true if successful, and returns 4056 the cost in COST. */ 4057 4058 static bool 4059 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0, 4060 comp_cost cost1, tree mult, bool speed, comp_cost *cost) 4061 { 4062 comp_cost res; 4063 tree op1 = TREE_OPERAND (expr, 1); 4064 tree cst = TREE_OPERAND (mult, 1); 4065 tree multop = TREE_OPERAND (mult, 0); 4066 int m = exact_log2 (int_cst_value (cst)); 4067 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 4068 int as_cost, sa_cost; 4069 bool mult_in_op1; 4070 4071 if (!(m >= 0 && m < maxm)) 4072 return false; 4073 4074 STRIP_NOPS (op1); 4075 mult_in_op1 = operand_equal_p (op1, mult, 0); 4076 4077 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 4078 4079 /* If the target has a cheap shift-and-add or shift-and-sub instruction, 4080 use that in preference to a shift insn followed by an add insn. */ 4081 sa_cost = (TREE_CODE (expr) != MINUS_EXPR 4082 ? shiftadd_cost (speed, mode, m) 4083 : (mult_in_op1 4084 ? shiftsub1_cost (speed, mode, m) 4085 : shiftsub0_cost (speed, mode, m))); 4086 4087 res = comp_cost (MIN (as_cost, sa_cost), 0); 4088 res += (mult_in_op1 ? cost0 : cost1); 4089 4090 STRIP_NOPS (multop); 4091 if (!is_gimple_val (multop)) 4092 res += force_expr_to_var_cost (multop, speed); 4093 4094 *cost = res; 4095 return true; 4096 } 4097 4098 /* Estimates cost of forcing expression EXPR into a variable. */ 4099 4100 static comp_cost 4101 force_expr_to_var_cost (tree expr, bool speed) 4102 { 4103 static bool costs_initialized = false; 4104 static unsigned integer_cost [2]; 4105 static unsigned symbol_cost [2]; 4106 static unsigned address_cost [2]; 4107 tree op0, op1; 4108 comp_cost cost0, cost1, cost; 4109 machine_mode mode; 4110 scalar_int_mode int_mode; 4111 4112 if (!costs_initialized) 4113 { 4114 tree type = build_pointer_type (integer_type_node); 4115 tree var, addr; 4116 rtx x; 4117 int i; 4118 4119 var = create_tmp_var_raw (integer_type_node, "test_var"); 4120 TREE_STATIC (var) = 1; 4121 x = produce_memory_decl_rtl (var, NULL); 4122 SET_DECL_RTL (var, x); 4123 4124 addr = build1 (ADDR_EXPR, type, var); 4125 4126 4127 for (i = 0; i < 2; i++) 4128 { 4129 integer_cost[i] = computation_cost (build_int_cst (integer_type_node, 4130 2000), i); 4131 4132 symbol_cost[i] = computation_cost (addr, i) + 1; 4133 4134 address_cost[i] 4135 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; 4136 if (dump_file && (dump_flags & TDF_DETAILS)) 4137 { 4138 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); 4139 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); 4140 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); 4141 fprintf (dump_file, " address %d\n", (int) address_cost[i]); 4142 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); 4143 fprintf (dump_file, "\n"); 4144 } 4145 } 4146 4147 costs_initialized = true; 4148 } 4149 4150 STRIP_NOPS (expr); 4151 4152 if (SSA_VAR_P (expr)) 4153 return no_cost; 4154 4155 if (is_gimple_min_invariant (expr)) 4156 { 4157 if (poly_int_tree_p (expr)) 4158 return comp_cost (integer_cost [speed], 0); 4159 4160 if (TREE_CODE (expr) == ADDR_EXPR) 4161 { 4162 tree obj = TREE_OPERAND (expr, 0); 4163 4164 if (VAR_P (obj) 4165 || TREE_CODE (obj) == PARM_DECL 4166 || TREE_CODE (obj) == RESULT_DECL) 4167 return comp_cost (symbol_cost [speed], 0); 4168 } 4169 4170 return comp_cost (address_cost [speed], 0); 4171 } 4172 4173 switch (TREE_CODE (expr)) 4174 { 4175 case POINTER_PLUS_EXPR: 4176 case PLUS_EXPR: 4177 case MINUS_EXPR: 4178 case MULT_EXPR: 4179 case TRUNC_DIV_EXPR: 4180 case BIT_AND_EXPR: 4181 case BIT_IOR_EXPR: 4182 case LSHIFT_EXPR: 4183 case RSHIFT_EXPR: 4184 op0 = TREE_OPERAND (expr, 0); 4185 op1 = TREE_OPERAND (expr, 1); 4186 STRIP_NOPS (op0); 4187 STRIP_NOPS (op1); 4188 break; 4189 4190 CASE_CONVERT: 4191 case NEGATE_EXPR: 4192 case BIT_NOT_EXPR: 4193 op0 = TREE_OPERAND (expr, 0); 4194 STRIP_NOPS (op0); 4195 op1 = NULL_TREE; 4196 break; 4197 4198 default: 4199 /* Just an arbitrary value, FIXME. */ 4200 return comp_cost (target_spill_cost[speed], 0); 4201 } 4202 4203 if (op0 == NULL_TREE 4204 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0)) 4205 cost0 = no_cost; 4206 else 4207 cost0 = force_expr_to_var_cost (op0, speed); 4208 4209 if (op1 == NULL_TREE 4210 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1)) 4211 cost1 = no_cost; 4212 else 4213 cost1 = force_expr_to_var_cost (op1, speed); 4214 4215 mode = TYPE_MODE (TREE_TYPE (expr)); 4216 switch (TREE_CODE (expr)) 4217 { 4218 case POINTER_PLUS_EXPR: 4219 case PLUS_EXPR: 4220 case MINUS_EXPR: 4221 case NEGATE_EXPR: 4222 cost = comp_cost (add_cost (speed, mode), 0); 4223 if (TREE_CODE (expr) != NEGATE_EXPR) 4224 { 4225 tree mult = NULL_TREE; 4226 comp_cost sa_cost; 4227 if (TREE_CODE (op1) == MULT_EXPR) 4228 mult = op1; 4229 else if (TREE_CODE (op0) == MULT_EXPR) 4230 mult = op0; 4231 4232 if (mult != NULL_TREE 4233 && is_a <scalar_int_mode> (mode, &int_mode) 4234 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) 4235 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult, 4236 speed, &sa_cost)) 4237 return sa_cost; 4238 } 4239 break; 4240 4241 CASE_CONVERT: 4242 { 4243 tree inner_mode, outer_mode; 4244 outer_mode = TREE_TYPE (expr); 4245 inner_mode = TREE_TYPE (op0); 4246 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode), 4247 TYPE_MODE (inner_mode), speed), 0); 4248 } 4249 break; 4250 4251 case MULT_EXPR: 4252 if (cst_and_fits_in_hwi (op0)) 4253 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0), 4254 mode, speed), 0); 4255 else if (cst_and_fits_in_hwi (op1)) 4256 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1), 4257 mode, speed), 0); 4258 else 4259 return comp_cost (target_spill_cost [speed], 0); 4260 break; 4261 4262 case TRUNC_DIV_EXPR: 4263 /* Division by power of two is usually cheap, so we allow it. Forbid 4264 anything else. */ 4265 if (integer_pow2p (TREE_OPERAND (expr, 1))) 4266 cost = comp_cost (add_cost (speed, mode), 0); 4267 else 4268 cost = comp_cost (target_spill_cost[speed], 0); 4269 break; 4270 4271 case BIT_AND_EXPR: 4272 case BIT_IOR_EXPR: 4273 case BIT_NOT_EXPR: 4274 case LSHIFT_EXPR: 4275 case RSHIFT_EXPR: 4276 cost = comp_cost (add_cost (speed, mode), 0); 4277 break; 4278 4279 default: 4280 gcc_unreachable (); 4281 } 4282 4283 cost += cost0; 4284 cost += cost1; 4285 return cost; 4286 } 4287 4288 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the 4289 invariants the computation depends on. */ 4290 4291 static comp_cost 4292 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars) 4293 { 4294 if (!expr) 4295 return no_cost; 4296 4297 find_inv_vars (data, &expr, inv_vars); 4298 return force_expr_to_var_cost (expr, data->speed); 4299 } 4300 4301 /* Returns cost of auto-modifying address expression in shape base + offset. 4302 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the 4303 address expression. The address expression has ADDR_MODE in addr space 4304 AS. The memory access has MEM_MODE. SPEED means we are optimizing for 4305 speed or size. */ 4306 4307 enum ainc_type 4308 { 4309 AINC_PRE_INC, /* Pre increment. */ 4310 AINC_PRE_DEC, /* Pre decrement. */ 4311 AINC_POST_INC, /* Post increment. */ 4312 AINC_POST_DEC, /* Post decrement. */ 4313 AINC_NONE /* Also the number of auto increment types. */ 4314 }; 4315 4316 struct ainc_cost_data 4317 { 4318 unsigned costs[AINC_NONE]; 4319 }; 4320 4321 static comp_cost 4322 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset, 4323 machine_mode addr_mode, machine_mode mem_mode, 4324 addr_space_t as, bool speed) 4325 { 4326 if (!USE_LOAD_PRE_DECREMENT (mem_mode) 4327 && !USE_STORE_PRE_DECREMENT (mem_mode) 4328 && !USE_LOAD_POST_DECREMENT (mem_mode) 4329 && !USE_STORE_POST_DECREMENT (mem_mode) 4330 && !USE_LOAD_PRE_INCREMENT (mem_mode) 4331 && !USE_STORE_PRE_INCREMENT (mem_mode) 4332 && !USE_LOAD_POST_INCREMENT (mem_mode) 4333 && !USE_STORE_POST_INCREMENT (mem_mode)) 4334 return infinite_cost; 4335 4336 static vec<ainc_cost_data *> ainc_cost_data_list; 4337 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; 4338 if (idx >= ainc_cost_data_list.length ()) 4339 { 4340 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE; 4341 4342 gcc_assert (nsize > idx); 4343 ainc_cost_data_list.safe_grow_cleared (nsize); 4344 } 4345 4346 ainc_cost_data *data = ainc_cost_data_list[idx]; 4347 if (data == NULL) 4348 { 4349 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); 4350 4351 data = (ainc_cost_data *) xcalloc (1, sizeof (*data)); 4352 data->costs[AINC_PRE_DEC] = INFTY; 4353 data->costs[AINC_POST_DEC] = INFTY; 4354 data->costs[AINC_PRE_INC] = INFTY; 4355 data->costs[AINC_POST_INC] = INFTY; 4356 if (USE_LOAD_PRE_DECREMENT (mem_mode) 4357 || USE_STORE_PRE_DECREMENT (mem_mode)) 4358 { 4359 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg); 4360 4361 if (memory_address_addr_space_p (mem_mode, addr, as)) 4362 data->costs[AINC_PRE_DEC] 4363 = address_cost (addr, mem_mode, as, speed); 4364 } 4365 if (USE_LOAD_POST_DECREMENT (mem_mode) 4366 || USE_STORE_POST_DECREMENT (mem_mode)) 4367 { 4368 rtx addr = gen_rtx_POST_DEC (addr_mode, reg); 4369 4370 if (memory_address_addr_space_p (mem_mode, addr, as)) 4371 data->costs[AINC_POST_DEC] 4372 = address_cost (addr, mem_mode, as, speed); 4373 } 4374 if (USE_LOAD_PRE_INCREMENT (mem_mode) 4375 || USE_STORE_PRE_INCREMENT (mem_mode)) 4376 { 4377 rtx addr = gen_rtx_PRE_INC (addr_mode, reg); 4378 4379 if (memory_address_addr_space_p (mem_mode, addr, as)) 4380 data->costs[AINC_PRE_INC] 4381 = address_cost (addr, mem_mode, as, speed); 4382 } 4383 if (USE_LOAD_POST_INCREMENT (mem_mode) 4384 || USE_STORE_POST_INCREMENT (mem_mode)) 4385 { 4386 rtx addr = gen_rtx_POST_INC (addr_mode, reg); 4387 4388 if (memory_address_addr_space_p (mem_mode, addr, as)) 4389 data->costs[AINC_POST_INC] 4390 = address_cost (addr, mem_mode, as, speed); 4391 } 4392 ainc_cost_data_list[idx] = data; 4393 } 4394 4395 poly_int64 msize = GET_MODE_SIZE (mem_mode); 4396 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step)) 4397 return comp_cost (data->costs[AINC_POST_INC], 0); 4398 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step)) 4399 return comp_cost (data->costs[AINC_POST_DEC], 0); 4400 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step)) 4401 return comp_cost (data->costs[AINC_PRE_INC], 0); 4402 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step)) 4403 return comp_cost (data->costs[AINC_PRE_DEC], 0); 4404 4405 return infinite_cost; 4406 } 4407 4408 /* Return cost of computing USE's address expression by using CAND. 4409 AFF_INV and AFF_VAR represent invariant and variant parts of the 4410 address expression, respectively. If AFF_INV is simple, store 4411 the loop invariant variables which are depended by it in INV_VARS; 4412 if AFF_INV is complicated, handle it as a new invariant expression 4413 and record it in INV_EXPR. RATIO indicates multiple times between 4414 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean 4415 value to it indicating if this is an auto-increment address. */ 4416 4417 static comp_cost 4418 get_address_cost (struct ivopts_data *data, struct iv_use *use, 4419 struct iv_cand *cand, aff_tree *aff_inv, 4420 aff_tree *aff_var, HOST_WIDE_INT ratio, 4421 bitmap *inv_vars, iv_inv_expr_ent **inv_expr, 4422 bool *can_autoinc, bool speed) 4423 { 4424 rtx addr; 4425 bool simple_inv = true; 4426 tree comp_inv = NULL_TREE, type = aff_var->type; 4427 comp_cost var_cost = no_cost, cost = no_cost; 4428 struct mem_address parts = {NULL_TREE, integer_one_node, 4429 NULL_TREE, NULL_TREE, NULL_TREE}; 4430 machine_mode addr_mode = TYPE_MODE (type); 4431 machine_mode mem_mode = TYPE_MODE (use->mem_type); 4432 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); 4433 /* Only true if ratio != 1. */ 4434 bool ok_with_ratio_p = false; 4435 bool ok_without_ratio_p = false; 4436 4437 if (!aff_combination_const_p (aff_inv)) 4438 { 4439 parts.index = integer_one_node; 4440 /* Addressing mode "base + index". */ 4441 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts); 4442 if (ratio != 1) 4443 { 4444 parts.step = wide_int_to_tree (type, ratio); 4445 /* Addressing mode "base + index << scale". */ 4446 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts); 4447 if (!ok_with_ratio_p) 4448 parts.step = NULL_TREE; 4449 } 4450 if (ok_with_ratio_p || ok_without_ratio_p) 4451 { 4452 if (maybe_ne (aff_inv->offset, 0)) 4453 { 4454 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset); 4455 /* Addressing mode "base + index [<< scale] + offset". */ 4456 if (!valid_mem_ref_p (mem_mode, as, &parts)) 4457 parts.offset = NULL_TREE; 4458 else 4459 aff_inv->offset = 0; 4460 } 4461 4462 move_fixed_address_to_symbol (&parts, aff_inv); 4463 /* Base is fixed address and is moved to symbol part. */ 4464 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv)) 4465 parts.base = NULL_TREE; 4466 4467 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */ 4468 if (parts.symbol != NULL_TREE 4469 && !valid_mem_ref_p (mem_mode, as, &parts)) 4470 { 4471 aff_combination_add_elt (aff_inv, parts.symbol, 1); 4472 parts.symbol = NULL_TREE; 4473 /* Reset SIMPLE_INV since symbol address needs to be computed 4474 outside of address expression in this case. */ 4475 simple_inv = false; 4476 /* Symbol part is moved back to base part, it can't be NULL. */ 4477 parts.base = integer_one_node; 4478 } 4479 } 4480 else 4481 parts.index = NULL_TREE; 4482 } 4483 else 4484 { 4485 poly_int64 ainc_step; 4486 if (can_autoinc 4487 && ratio == 1 4488 && ptrdiff_tree_p (cand->iv->step, &ainc_step)) 4489 { 4490 poly_int64 ainc_offset = (aff_inv->offset).force_shwi (); 4491 4492 if (stmt_after_increment (data->current_loop, cand, use->stmt)) 4493 ainc_offset += ainc_step; 4494 cost = get_address_cost_ainc (ainc_step, ainc_offset, 4495 addr_mode, mem_mode, as, speed); 4496 if (!cost.infinite_cost_p ()) 4497 { 4498 *can_autoinc = true; 4499 return cost; 4500 } 4501 cost = no_cost; 4502 } 4503 if (!aff_combination_zero_p (aff_inv)) 4504 { 4505 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset); 4506 /* Addressing mode "base + offset". */ 4507 if (!valid_mem_ref_p (mem_mode, as, &parts)) 4508 parts.offset = NULL_TREE; 4509 else 4510 aff_inv->offset = 0; 4511 } 4512 } 4513 4514 if (simple_inv) 4515 simple_inv = (aff_inv == NULL 4516 || aff_combination_const_p (aff_inv) 4517 || aff_combination_singleton_var_p (aff_inv)); 4518 if (!aff_combination_zero_p (aff_inv)) 4519 comp_inv = aff_combination_to_tree (aff_inv); 4520 if (comp_inv != NULL_TREE) 4521 cost = force_var_cost (data, comp_inv, inv_vars); 4522 if (ratio != 1 && parts.step == NULL_TREE) 4523 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed); 4524 if (comp_inv != NULL_TREE && parts.index == NULL_TREE) 4525 var_cost += add_cost (speed, addr_mode); 4526 4527 if (comp_inv && inv_expr && !simple_inv) 4528 { 4529 *inv_expr = get_loop_invariant_expr (data, comp_inv); 4530 /* Clear depends on. */ 4531 if (*inv_expr != NULL && inv_vars && *inv_vars) 4532 bitmap_clear (*inv_vars); 4533 4534 /* Cost of small invariant expression adjusted against loop niters 4535 is usually zero, which makes it difficult to be differentiated 4536 from candidate based on loop invariant variables. Secondly, the 4537 generated invariant expression may not be hoisted out of loop by 4538 following pass. We penalize the cost by rounding up in order to 4539 neutralize such effects. */ 4540 cost.cost = adjust_setup_cost (data, cost.cost, true); 4541 cost.scratch = cost.cost; 4542 } 4543 4544 cost += var_cost; 4545 addr = addr_for_mem_ref (&parts, as, false); 4546 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as)); 4547 cost += address_cost (addr, mem_mode, as, speed); 4548 4549 if (parts.symbol != NULL_TREE) 4550 cost.complexity += 1; 4551 /* Don't increase the complexity of adding a scaled index if it's 4552 the only kind of index that the target allows. */ 4553 if (parts.step != NULL_TREE && ok_without_ratio_p) 4554 cost.complexity += 1; 4555 if (parts.base != NULL_TREE && parts.index != NULL_TREE) 4556 cost.complexity += 1; 4557 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset)) 4558 cost.complexity += 1; 4559 4560 return cost; 4561 } 4562 4563 /* Scale (multiply) the computed COST (except scratch part that should be 4564 hoisted out a loop) by header->frequency / AT->frequency, which makes 4565 expected cost more accurate. */ 4566 4567 static comp_cost 4568 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) 4569 { 4570 int loop_freq = data->current_loop->header->count.to_frequency (cfun); 4571 int bb_freq = gimple_bb (at)->count.to_frequency (cfun); 4572 if (loop_freq != 0) 4573 { 4574 gcc_assert (cost.scratch <= cost.cost); 4575 int scaled_cost 4576 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq; 4577 4578 if (dump_file && (dump_flags & TDF_DETAILS)) 4579 fprintf (dump_file, "Scaling cost based on bb prob " 4580 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n", 4581 1.0f * bb_freq / loop_freq, cost.cost, 4582 cost.scratch, scaled_cost, bb_freq, loop_freq); 4583 4584 cost.cost = scaled_cost; 4585 } 4586 4587 return cost; 4588 } 4589 4590 /* Determines the cost of the computation by that USE is expressed 4591 from induction variable CAND. If ADDRESS_P is true, we just need 4592 to create an address from it, otherwise we want to get it into 4593 register. A set of invariants we depend on is stored in INV_VARS. 4594 If CAN_AUTOINC is nonnull, use it to record whether autoinc 4595 addressing is likely. If INV_EXPR is nonnull, record invariant 4596 expr entry in it. */ 4597 4598 static comp_cost 4599 get_computation_cost (struct ivopts_data *data, struct iv_use *use, 4600 struct iv_cand *cand, bool address_p, bitmap *inv_vars, 4601 bool *can_autoinc, iv_inv_expr_ent **inv_expr) 4602 { 4603 gimple *at = use->stmt; 4604 tree ubase = use->iv->base, cbase = cand->iv->base; 4605 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 4606 tree comp_inv = NULL_TREE; 4607 HOST_WIDE_INT ratio, aratio; 4608 comp_cost cost; 4609 widest_int rat; 4610 aff_tree aff_inv, aff_var; 4611 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 4612 4613 if (inv_vars) 4614 *inv_vars = NULL; 4615 if (can_autoinc) 4616 *can_autoinc = false; 4617 if (inv_expr) 4618 *inv_expr = NULL; 4619 4620 /* Check if we have enough precision to express the values of use. */ 4621 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 4622 return infinite_cost; 4623 4624 if (address_p 4625 || (use->iv->base_object 4626 && cand->iv->base_object 4627 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) 4628 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object)))) 4629 { 4630 /* Do not try to express address of an object with computation based 4631 on address of a different object. This may cause problems in rtl 4632 level alias analysis (that does not expect this to be happening, 4633 as this is illegal in C), and would be unlikely to be useful 4634 anyway. */ 4635 if (use->iv->base_object 4636 && cand->iv->base_object 4637 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 4638 return infinite_cost; 4639 } 4640 4641 if (!get_computation_aff_1 (data->current_loop, at, use, 4642 cand, &aff_inv, &aff_var, &rat) 4643 || !wi::fits_shwi_p (rat)) 4644 return infinite_cost; 4645 4646 ratio = rat.to_shwi (); 4647 if (address_p) 4648 { 4649 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio, 4650 inv_vars, inv_expr, can_autoinc, speed); 4651 return get_scaled_computation_cost_at (data, at, cost); 4652 } 4653 4654 bool simple_inv = (aff_combination_const_p (&aff_inv) 4655 || aff_combination_singleton_var_p (&aff_inv)); 4656 tree signed_type = signed_type_for (aff_combination_type (&aff_inv)); 4657 aff_combination_convert (&aff_inv, signed_type); 4658 if (!aff_combination_zero_p (&aff_inv)) 4659 comp_inv = aff_combination_to_tree (&aff_inv); 4660 4661 cost = force_var_cost (data, comp_inv, inv_vars); 4662 if (comp_inv && inv_expr && !simple_inv) 4663 { 4664 *inv_expr = get_loop_invariant_expr (data, comp_inv); 4665 /* Clear depends on. */ 4666 if (*inv_expr != NULL && inv_vars && *inv_vars) 4667 bitmap_clear (*inv_vars); 4668 4669 cost.cost = adjust_setup_cost (data, cost.cost); 4670 /* Record setup cost in scratch field. */ 4671 cost.scratch = cost.cost; 4672 } 4673 /* Cost of constant integer can be covered when adding invariant part to 4674 variant part. */ 4675 else if (comp_inv && CONSTANT_CLASS_P (comp_inv)) 4676 cost = no_cost; 4677 4678 /* Need type narrowing to represent use with cand. */ 4679 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 4680 { 4681 machine_mode outer_mode = TYPE_MODE (utype); 4682 machine_mode inner_mode = TYPE_MODE (ctype); 4683 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0); 4684 } 4685 4686 /* Turn a + i * (-c) into a - i * c. */ 4687 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv)) 4688 aratio = -ratio; 4689 else 4690 aratio = ratio; 4691 4692 if (ratio != 1) 4693 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed); 4694 4695 /* TODO: We may also need to check if we can compute a + i * 4 in one 4696 instruction. */ 4697 /* Need to add up the invariant and variant parts. */ 4698 if (comp_inv && !integer_zerop (comp_inv)) 4699 cost += add_cost (speed, TYPE_MODE (utype)); 4700 4701 return get_scaled_computation_cost_at (data, at, cost); 4702 } 4703 4704 /* Determines cost of computing the use in GROUP with CAND in a generic 4705 expression. */ 4706 4707 static bool 4708 determine_group_iv_cost_generic (struct ivopts_data *data, 4709 struct iv_group *group, struct iv_cand *cand) 4710 { 4711 comp_cost cost; 4712 iv_inv_expr_ent *inv_expr = NULL; 4713 bitmap inv_vars = NULL, inv_exprs = NULL; 4714 struct iv_use *use = group->vuses[0]; 4715 4716 /* The simple case first -- if we need to express value of the preserved 4717 original biv, the cost is 0. This also prevents us from counting the 4718 cost of increment twice -- once at this use and once in the cost of 4719 the candidate. */ 4720 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 4721 cost = no_cost; 4722 else 4723 cost = get_computation_cost (data, use, cand, false, 4724 &inv_vars, NULL, &inv_expr); 4725 4726 if (inv_expr) 4727 { 4728 inv_exprs = BITMAP_ALLOC (NULL); 4729 bitmap_set_bit (inv_exprs, inv_expr->id); 4730 } 4731 set_group_iv_cost (data, group, cand, cost, inv_vars, 4732 NULL_TREE, ERROR_MARK, inv_exprs); 4733 return !cost.infinite_cost_p (); 4734 } 4735 4736 /* Determines cost of computing uses in GROUP with CAND in addresses. */ 4737 4738 static bool 4739 determine_group_iv_cost_address (struct ivopts_data *data, 4740 struct iv_group *group, struct iv_cand *cand) 4741 { 4742 unsigned i; 4743 bitmap inv_vars = NULL, inv_exprs = NULL; 4744 bool can_autoinc; 4745 iv_inv_expr_ent *inv_expr = NULL; 4746 struct iv_use *use = group->vuses[0]; 4747 comp_cost sum_cost = no_cost, cost; 4748 4749 cost = get_computation_cost (data, use, cand, true, 4750 &inv_vars, &can_autoinc, &inv_expr); 4751 4752 if (inv_expr) 4753 { 4754 inv_exprs = BITMAP_ALLOC (NULL); 4755 bitmap_set_bit (inv_exprs, inv_expr->id); 4756 } 4757 sum_cost = cost; 4758 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use) 4759 { 4760 if (can_autoinc) 4761 sum_cost -= cand->cost_step; 4762 /* If we generated the candidate solely for exploiting autoincrement 4763 opportunities, and it turns out it can't be used, set the cost to 4764 infinity to make sure we ignore it. */ 4765 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) 4766 sum_cost = infinite_cost; 4767 } 4768 4769 /* Uses in a group can share setup code, so only add setup cost once. */ 4770 cost -= cost.scratch; 4771 /* Compute and add costs for rest uses of this group. */ 4772 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++) 4773 { 4774 struct iv_use *next = group->vuses[i]; 4775 4776 /* TODO: We could skip computing cost for sub iv_use when it has the 4777 same cost as the first iv_use, but the cost really depends on the 4778 offset and where the iv_use is. */ 4779 cost = get_computation_cost (data, next, cand, true, 4780 NULL, &can_autoinc, &inv_expr); 4781 if (inv_expr) 4782 { 4783 if (!inv_exprs) 4784 inv_exprs = BITMAP_ALLOC (NULL); 4785 4786 bitmap_set_bit (inv_exprs, inv_expr->id); 4787 } 4788 sum_cost += cost; 4789 } 4790 set_group_iv_cost (data, group, cand, sum_cost, inv_vars, 4791 NULL_TREE, ERROR_MARK, inv_exprs); 4792 4793 return !sum_cost.infinite_cost_p (); 4794 } 4795 4796 /* Computes value of candidate CAND at position AT in iteration NITER, and 4797 stores it to VAL. */ 4798 4799 static void 4800 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter, 4801 aff_tree *val) 4802 { 4803 aff_tree step, delta, nit; 4804 struct iv *iv = cand->iv; 4805 tree type = TREE_TYPE (iv->base); 4806 tree steptype; 4807 if (POINTER_TYPE_P (type)) 4808 steptype = sizetype; 4809 else 4810 steptype = unsigned_type_for (type); 4811 4812 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step); 4813 aff_combination_convert (&step, steptype); 4814 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit); 4815 aff_combination_convert (&nit, steptype); 4816 aff_combination_mult (&nit, &step, &delta); 4817 if (stmt_after_increment (loop, cand, at)) 4818 aff_combination_add (&delta, &step); 4819 4820 tree_to_aff_combination (iv->base, type, val); 4821 if (!POINTER_TYPE_P (type)) 4822 aff_combination_convert (val, steptype); 4823 aff_combination_add (val, &delta); 4824 } 4825 4826 /* Returns period of induction variable iv. */ 4827 4828 static tree 4829 iv_period (struct iv *iv) 4830 { 4831 tree step = iv->step, period, type; 4832 tree pow2div; 4833 4834 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 4835 4836 type = unsigned_type_for (TREE_TYPE (step)); 4837 /* Period of the iv is lcm (step, type_range)/step -1, 4838 i.e., N*type_range/step - 1. Since type range is power 4839 of two, N == (step >> num_of_ending_zeros_binary (step), 4840 so the final result is 4841 4842 (type_range >> num_of_ending_zeros_binary (step)) - 1 4843 4844 */ 4845 pow2div = num_ending_zeros (step); 4846 4847 period = build_low_bits_mask (type, 4848 (TYPE_PRECISION (type) 4849 - tree_to_uhwi (pow2div))); 4850 4851 return period; 4852 } 4853 4854 /* Returns the comparison operator used when eliminating the iv USE. */ 4855 4856 static enum tree_code 4857 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 4858 { 4859 struct loop *loop = data->current_loop; 4860 basic_block ex_bb; 4861 edge exit; 4862 4863 ex_bb = gimple_bb (use->stmt); 4864 exit = EDGE_SUCC (ex_bb, 0); 4865 if (flow_bb_inside_loop_p (loop, exit->dest)) 4866 exit = EDGE_SUCC (ex_bb, 1); 4867 4868 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 4869 } 4870 4871 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now, 4872 we only detect the situation that BASE = SOMETHING + OFFSET, where the 4873 calculation is performed in non-wrapping type. 4874 4875 TODO: More generally, we could test for the situation that 4876 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero. 4877 This would require knowing the sign of OFFSET. */ 4878 4879 static bool 4880 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset) 4881 { 4882 enum tree_code code; 4883 tree e1, e2; 4884 aff_tree aff_e1, aff_e2, aff_offset; 4885 4886 if (!nowrap_type_p (TREE_TYPE (base))) 4887 return false; 4888 4889 base = expand_simple_operations (base); 4890 4891 if (TREE_CODE (base) == SSA_NAME) 4892 { 4893 gimple *stmt = SSA_NAME_DEF_STMT (base); 4894 4895 if (gimple_code (stmt) != GIMPLE_ASSIGN) 4896 return false; 4897 4898 code = gimple_assign_rhs_code (stmt); 4899 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4900 return false; 4901 4902 e1 = gimple_assign_rhs1 (stmt); 4903 e2 = gimple_assign_rhs2 (stmt); 4904 } 4905 else 4906 { 4907 code = TREE_CODE (base); 4908 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4909 return false; 4910 e1 = TREE_OPERAND (base, 0); 4911 e2 = TREE_OPERAND (base, 1); 4912 } 4913 4914 /* Use affine expansion as deeper inspection to prove the equality. */ 4915 tree_to_aff_combination_expand (e2, TREE_TYPE (e2), 4916 &aff_e2, &data->name_expansion_cache); 4917 tree_to_aff_combination_expand (offset, TREE_TYPE (offset), 4918 &aff_offset, &data->name_expansion_cache); 4919 aff_combination_scale (&aff_offset, -1); 4920 switch (code) 4921 { 4922 case PLUS_EXPR: 4923 aff_combination_add (&aff_e2, &aff_offset); 4924 if (aff_combination_zero_p (&aff_e2)) 4925 return true; 4926 4927 tree_to_aff_combination_expand (e1, TREE_TYPE (e1), 4928 &aff_e1, &data->name_expansion_cache); 4929 aff_combination_add (&aff_e1, &aff_offset); 4930 return aff_combination_zero_p (&aff_e1); 4931 4932 case POINTER_PLUS_EXPR: 4933 aff_combination_add (&aff_e2, &aff_offset); 4934 return aff_combination_zero_p (&aff_e2); 4935 4936 default: 4937 return false; 4938 } 4939 } 4940 4941 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR 4942 comparison with CAND. NITER describes the number of iterations of 4943 the loops. If successful, the comparison in COMP_P is altered accordingly. 4944 4945 We aim to handle the following situation: 4946 4947 sometype *base, *p; 4948 int a, b, i; 4949 4950 i = a; 4951 p = p_0 = base + a; 4952 4953 do 4954 { 4955 bla (*p); 4956 p++; 4957 i++; 4958 } 4959 while (i < b); 4960 4961 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1. 4962 We aim to optimize this to 4963 4964 p = p_0 = base + a; 4965 do 4966 { 4967 bla (*p); 4968 p++; 4969 } 4970 while (p < p_0 - a + b); 4971 4972 This preserves the correctness, since the pointer arithmetics does not 4973 overflow. More precisely: 4974 4975 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no 4976 overflow in computing it or the values of p. 4977 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not 4978 overflow. To prove this, we use the fact that p_0 = base + a. */ 4979 4980 static bool 4981 iv_elimination_compare_lt (struct ivopts_data *data, 4982 struct iv_cand *cand, enum tree_code *comp_p, 4983 struct tree_niter_desc *niter) 4984 { 4985 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; 4986 struct aff_tree nit, tmpa, tmpb; 4987 enum tree_code comp; 4988 HOST_WIDE_INT step; 4989 4990 /* We need to know that the candidate induction variable does not overflow. 4991 While more complex analysis may be used to prove this, for now just 4992 check that the variable appears in the original program and that it 4993 is computed in a type that guarantees no overflows. */ 4994 cand_type = TREE_TYPE (cand->iv->base); 4995 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) 4996 return false; 4997 4998 /* Make sure that the loop iterates till the loop bound is hit, as otherwise 4999 the calculation of the BOUND could overflow, making the comparison 5000 invalid. */ 5001 if (!data->loop_single_exit_p) 5002 return false; 5003 5004 /* We need to be able to decide whether candidate is increasing or decreasing 5005 in order to choose the right comparison operator. */ 5006 if (!cst_and_fits_in_hwi (cand->iv->step)) 5007 return false; 5008 step = int_cst_value (cand->iv->step); 5009 5010 /* Check that the number of iterations matches the expected pattern: 5011 a + 1 > b ? 0 : b - a - 1. */ 5012 mbz = niter->may_be_zero; 5013 if (TREE_CODE (mbz) == GT_EXPR) 5014 { 5015 /* Handle a + 1 > b. */ 5016 tree op0 = TREE_OPERAND (mbz, 0); 5017 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1))) 5018 { 5019 a = TREE_OPERAND (op0, 0); 5020 b = TREE_OPERAND (mbz, 1); 5021 } 5022 else 5023 return false; 5024 } 5025 else if (TREE_CODE (mbz) == LT_EXPR) 5026 { 5027 tree op1 = TREE_OPERAND (mbz, 1); 5028 5029 /* Handle b < a + 1. */ 5030 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1))) 5031 { 5032 a = TREE_OPERAND (op1, 0); 5033 b = TREE_OPERAND (mbz, 0); 5034 } 5035 else 5036 return false; 5037 } 5038 else 5039 return false; 5040 5041 /* Expected number of iterations is B - A - 1. Check that it matches 5042 the actual number, i.e., that B - A - NITER = 1. */ 5043 tree_to_aff_combination (niter->niter, nit_type, &nit); 5044 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa); 5045 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb); 5046 aff_combination_scale (&nit, -1); 5047 aff_combination_scale (&tmpa, -1); 5048 aff_combination_add (&tmpb, &tmpa); 5049 aff_combination_add (&tmpb, &nit); 5050 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1)) 5051 return false; 5052 5053 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not 5054 overflow. */ 5055 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), 5056 cand->iv->step, 5057 fold_convert (TREE_TYPE (cand->iv->step), a)); 5058 if (!difference_cannot_overflow_p (data, cand->iv->base, offset)) 5059 return false; 5060 5061 /* Determine the new comparison operator. */ 5062 comp = step < 0 ? GT_EXPR : LT_EXPR; 5063 if (*comp_p == NE_EXPR) 5064 *comp_p = comp; 5065 else if (*comp_p == EQ_EXPR) 5066 *comp_p = invert_tree_comparison (comp, false); 5067 else 5068 gcc_unreachable (); 5069 5070 return true; 5071 } 5072 5073 /* Check whether it is possible to express the condition in USE by comparison 5074 of candidate CAND. If so, store the value compared with to BOUND, and the 5075 comparison operator to COMP. */ 5076 5077 static bool 5078 may_eliminate_iv (struct ivopts_data *data, 5079 struct iv_use *use, struct iv_cand *cand, tree *bound, 5080 enum tree_code *comp) 5081 { 5082 basic_block ex_bb; 5083 edge exit; 5084 tree period; 5085 struct loop *loop = data->current_loop; 5086 aff_tree bnd; 5087 struct tree_niter_desc *desc = NULL; 5088 5089 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 5090 return false; 5091 5092 /* For now works only for exits that dominate the loop latch. 5093 TODO: extend to other conditions inside loop body. */ 5094 ex_bb = gimple_bb (use->stmt); 5095 if (use->stmt != last_stmt (ex_bb) 5096 || gimple_code (use->stmt) != GIMPLE_COND 5097 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 5098 return false; 5099 5100 exit = EDGE_SUCC (ex_bb, 0); 5101 if (flow_bb_inside_loop_p (loop, exit->dest)) 5102 exit = EDGE_SUCC (ex_bb, 1); 5103 if (flow_bb_inside_loop_p (loop, exit->dest)) 5104 return false; 5105 5106 desc = niter_for_exit (data, exit); 5107 if (!desc) 5108 return false; 5109 5110 /* Determine whether we can use the variable to test the exit condition. 5111 This is the case iff the period of the induction variable is greater 5112 than the number of iterations for which the exit condition is true. */ 5113 period = iv_period (cand->iv); 5114 5115 /* If the number of iterations is constant, compare against it directly. */ 5116 if (TREE_CODE (desc->niter) == INTEGER_CST) 5117 { 5118 /* See cand_value_at. */ 5119 if (stmt_after_increment (loop, cand, use->stmt)) 5120 { 5121 if (!tree_int_cst_lt (desc->niter, period)) 5122 return false; 5123 } 5124 else 5125 { 5126 if (tree_int_cst_lt (period, desc->niter)) 5127 return false; 5128 } 5129 } 5130 5131 /* If not, and if this is the only possible exit of the loop, see whether 5132 we can get a conservative estimate on the number of iterations of the 5133 entire loop and compare against that instead. */ 5134 else 5135 { 5136 widest_int period_value, max_niter; 5137 5138 max_niter = desc->max; 5139 if (stmt_after_increment (loop, cand, use->stmt)) 5140 max_niter += 1; 5141 period_value = wi::to_widest (period); 5142 if (wi::gtu_p (max_niter, period_value)) 5143 { 5144 /* See if we can take advantage of inferred loop bound 5145 information. */ 5146 if (data->loop_single_exit_p) 5147 { 5148 if (!max_loop_iterations (loop, &max_niter)) 5149 return false; 5150 /* The loop bound is already adjusted by adding 1. */ 5151 if (wi::gtu_p (max_niter, period_value)) 5152 return false; 5153 } 5154 else 5155 return false; 5156 } 5157 } 5158 5159 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); 5160 5161 *bound = fold_convert (TREE_TYPE (cand->iv->base), 5162 aff_combination_to_tree (&bnd)); 5163 *comp = iv_elimination_compare (data, use); 5164 5165 /* It is unlikely that computing the number of iterations using division 5166 would be more profitable than keeping the original induction variable. */ 5167 if (expression_expensive_p (*bound)) 5168 return false; 5169 5170 /* Sometimes, it is possible to handle the situation that the number of 5171 iterations may be zero unless additional assumptions by using < 5172 instead of != in the exit condition. 5173 5174 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and 5175 base the exit condition on it. However, that is often too 5176 expensive. */ 5177 if (!integer_zerop (desc->may_be_zero)) 5178 return iv_elimination_compare_lt (data, cand, comp, desc); 5179 5180 return true; 5181 } 5182 5183 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must 5184 be copied, if it is used in the loop body and DATA->body_includes_call. */ 5185 5186 static int 5187 parm_decl_cost (struct ivopts_data *data, tree bound) 5188 { 5189 tree sbound = bound; 5190 STRIP_NOPS (sbound); 5191 5192 if (TREE_CODE (sbound) == SSA_NAME 5193 && SSA_NAME_IS_DEFAULT_DEF (sbound) 5194 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL 5195 && data->body_includes_call) 5196 return COSTS_N_INSNS (1); 5197 5198 return 0; 5199 } 5200 5201 /* Determines cost of computing the use in GROUP with CAND in a condition. */ 5202 5203 static bool 5204 determine_group_iv_cost_cond (struct ivopts_data *data, 5205 struct iv_group *group, struct iv_cand *cand) 5206 { 5207 tree bound = NULL_TREE; 5208 struct iv *cmp_iv; 5209 bitmap inv_exprs = NULL; 5210 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars; 5211 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost; 5212 enum comp_iv_rewrite rewrite_type; 5213 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr; 5214 tree *control_var, *bound_cst; 5215 enum tree_code comp = ERROR_MARK; 5216 struct iv_use *use = group->vuses[0]; 5217 5218 /* Extract condition operands. */ 5219 rewrite_type = extract_cond_operands (data, use->stmt, &control_var, 5220 &bound_cst, NULL, &cmp_iv); 5221 gcc_assert (rewrite_type != COMP_IV_NA); 5222 5223 /* Try iv elimination. */ 5224 if (rewrite_type == COMP_IV_ELIM 5225 && may_eliminate_iv (data, use, cand, &bound, &comp)) 5226 { 5227 elim_cost = force_var_cost (data, bound, &inv_vars_elim); 5228 if (elim_cost.cost == 0) 5229 elim_cost.cost = parm_decl_cost (data, bound); 5230 else if (TREE_CODE (bound) == INTEGER_CST) 5231 elim_cost.cost = 0; 5232 /* If we replace a loop condition 'i < n' with 'p < base + n', 5233 inv_vars_elim will have 'base' and 'n' set, which implies that both 5234 'base' and 'n' will be live during the loop. More likely, 5235 'base + n' will be loop invariant, resulting in only one live value 5236 during the loop. So in that case we clear inv_vars_elim and set 5237 inv_expr_elim instead. */ 5238 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1) 5239 { 5240 inv_expr_elim = get_loop_invariant_expr (data, bound); 5241 bitmap_clear (inv_vars_elim); 5242 } 5243 /* The bound is a loop invariant, so it will be only computed 5244 once. */ 5245 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); 5246 } 5247 5248 /* When the condition is a comparison of the candidate IV against 5249 zero, prefer this IV. 5250 5251 TODO: The constant that we're subtracting from the cost should 5252 be target-dependent. This information should be added to the 5253 target costs for each backend. */ 5254 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */ 5255 && integer_zerop (*bound_cst) 5256 && (operand_equal_p (*control_var, cand->var_after, 0) 5257 || operand_equal_p (*control_var, cand->var_before, 0))) 5258 elim_cost -= 1; 5259 5260 express_cost = get_computation_cost (data, use, cand, false, 5261 &inv_vars_express, NULL, 5262 &inv_expr_express); 5263 if (cmp_iv != NULL) 5264 find_inv_vars (data, &cmp_iv->base, &inv_vars_express); 5265 5266 /* Count the cost of the original bound as well. */ 5267 bound_cost = force_var_cost (data, *bound_cst, NULL); 5268 if (bound_cost.cost == 0) 5269 bound_cost.cost = parm_decl_cost (data, *bound_cst); 5270 else if (TREE_CODE (*bound_cst) == INTEGER_CST) 5271 bound_cost.cost = 0; 5272 express_cost += bound_cost; 5273 5274 /* Choose the better approach, preferring the eliminated IV. */ 5275 if (elim_cost <= express_cost) 5276 { 5277 cost = elim_cost; 5278 inv_vars = inv_vars_elim; 5279 inv_vars_elim = NULL; 5280 inv_expr = inv_expr_elim; 5281 } 5282 else 5283 { 5284 cost = express_cost; 5285 inv_vars = inv_vars_express; 5286 inv_vars_express = NULL; 5287 bound = NULL_TREE; 5288 comp = ERROR_MARK; 5289 inv_expr = inv_expr_express; 5290 } 5291 5292 if (inv_expr) 5293 { 5294 inv_exprs = BITMAP_ALLOC (NULL); 5295 bitmap_set_bit (inv_exprs, inv_expr->id); 5296 } 5297 set_group_iv_cost (data, group, cand, cost, 5298 inv_vars, bound, comp, inv_exprs); 5299 5300 if (inv_vars_elim) 5301 BITMAP_FREE (inv_vars_elim); 5302 if (inv_vars_express) 5303 BITMAP_FREE (inv_vars_express); 5304 5305 return !cost.infinite_cost_p (); 5306 } 5307 5308 /* Determines cost of computing uses in GROUP with CAND. Returns false 5309 if USE cannot be represented with CAND. */ 5310 5311 static bool 5312 determine_group_iv_cost (struct ivopts_data *data, 5313 struct iv_group *group, struct iv_cand *cand) 5314 { 5315 switch (group->type) 5316 { 5317 case USE_NONLINEAR_EXPR: 5318 return determine_group_iv_cost_generic (data, group, cand); 5319 5320 case USE_REF_ADDRESS: 5321 case USE_PTR_ADDRESS: 5322 return determine_group_iv_cost_address (data, group, cand); 5323 5324 case USE_COMPARE: 5325 return determine_group_iv_cost_cond (data, group, cand); 5326 5327 default: 5328 gcc_unreachable (); 5329 } 5330 } 5331 5332 /* Return true if get_computation_cost indicates that autoincrement is 5333 a possibility for the pair of USE and CAND, false otherwise. */ 5334 5335 static bool 5336 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, 5337 struct iv_cand *cand) 5338 { 5339 if (!address_p (use->type)) 5340 return false; 5341 5342 bool can_autoinc = false; 5343 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL); 5344 return can_autoinc; 5345 } 5346 5347 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a 5348 use that allows autoincrement, and set their AINC_USE if possible. */ 5349 5350 static void 5351 set_autoinc_for_original_candidates (struct ivopts_data *data) 5352 { 5353 unsigned i, j; 5354 5355 for (i = 0; i < data->vcands.length (); i++) 5356 { 5357 struct iv_cand *cand = data->vcands[i]; 5358 struct iv_use *closest_before = NULL; 5359 struct iv_use *closest_after = NULL; 5360 if (cand->pos != IP_ORIGINAL) 5361 continue; 5362 5363 for (j = 0; j < data->vgroups.length (); j++) 5364 { 5365 struct iv_group *group = data->vgroups[j]; 5366 struct iv_use *use = group->vuses[0]; 5367 unsigned uid = gimple_uid (use->stmt); 5368 5369 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)) 5370 continue; 5371 5372 if (uid < gimple_uid (cand->incremented_at) 5373 && (closest_before == NULL 5374 || uid > gimple_uid (closest_before->stmt))) 5375 closest_before = use; 5376 5377 if (uid > gimple_uid (cand->incremented_at) 5378 && (closest_after == NULL 5379 || uid < gimple_uid (closest_after->stmt))) 5380 closest_after = use; 5381 } 5382 5383 if (closest_before != NULL 5384 && autoinc_possible_for_pair (data, closest_before, cand)) 5385 cand->ainc_use = closest_before; 5386 else if (closest_after != NULL 5387 && autoinc_possible_for_pair (data, closest_after, cand)) 5388 cand->ainc_use = closest_after; 5389 } 5390 } 5391 5392 /* Relate compare use with all candidates. */ 5393 5394 static void 5395 relate_compare_use_with_all_cands (struct ivopts_data *data) 5396 { 5397 unsigned i, count = data->vcands.length (); 5398 for (i = 0; i < data->vgroups.length (); i++) 5399 { 5400 struct iv_group *group = data->vgroups[i]; 5401 5402 if (group->type == USE_COMPARE) 5403 bitmap_set_range (group->related_cands, 0, count); 5404 } 5405 } 5406 5407 /* Finds the candidates for the induction variables. */ 5408 5409 static void 5410 find_iv_candidates (struct ivopts_data *data) 5411 { 5412 /* Add commonly used ivs. */ 5413 add_standard_iv_candidates (data); 5414 5415 /* Add old induction variables. */ 5416 add_iv_candidate_for_bivs (data); 5417 5418 /* Add induction variables derived from uses. */ 5419 add_iv_candidate_for_groups (data); 5420 5421 set_autoinc_for_original_candidates (data); 5422 5423 /* Record the important candidates. */ 5424 record_important_candidates (data); 5425 5426 /* Relate compare iv_use with all candidates. */ 5427 if (!data->consider_all_candidates) 5428 relate_compare_use_with_all_cands (data); 5429 5430 if (dump_file && (dump_flags & TDF_DETAILS)) 5431 { 5432 unsigned i; 5433 5434 fprintf (dump_file, "\n<Important Candidates>:\t"); 5435 for (i = 0; i < data->vcands.length (); i++) 5436 if (data->vcands[i]->important) 5437 fprintf (dump_file, " %d,", data->vcands[i]->id); 5438 fprintf (dump_file, "\n"); 5439 5440 fprintf (dump_file, "\n<Group, Cand> Related:\n"); 5441 for (i = 0; i < data->vgroups.length (); i++) 5442 { 5443 struct iv_group *group = data->vgroups[i]; 5444 5445 if (group->related_cands) 5446 { 5447 fprintf (dump_file, " Group %d:\t", group->id); 5448 dump_bitmap (dump_file, group->related_cands); 5449 } 5450 } 5451 fprintf (dump_file, "\n"); 5452 } 5453 } 5454 5455 /* Determines costs of computing use of iv with an iv candidate. */ 5456 5457 static void 5458 determine_group_iv_costs (struct ivopts_data *data) 5459 { 5460 unsigned i, j; 5461 struct iv_cand *cand; 5462 struct iv_group *group; 5463 bitmap to_clear = BITMAP_ALLOC (NULL); 5464 5465 alloc_use_cost_map (data); 5466 5467 for (i = 0; i < data->vgroups.length (); i++) 5468 { 5469 group = data->vgroups[i]; 5470 5471 if (data->consider_all_candidates) 5472 { 5473 for (j = 0; j < data->vcands.length (); j++) 5474 { 5475 cand = data->vcands[j]; 5476 determine_group_iv_cost (data, group, cand); 5477 } 5478 } 5479 else 5480 { 5481 bitmap_iterator bi; 5482 5483 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi) 5484 { 5485 cand = data->vcands[j]; 5486 if (!determine_group_iv_cost (data, group, cand)) 5487 bitmap_set_bit (to_clear, j); 5488 } 5489 5490 /* Remove the candidates for that the cost is infinite from 5491 the list of related candidates. */ 5492 bitmap_and_compl_into (group->related_cands, to_clear); 5493 bitmap_clear (to_clear); 5494 } 5495 } 5496 5497 BITMAP_FREE (to_clear); 5498 5499 if (dump_file && (dump_flags & TDF_DETAILS)) 5500 { 5501 bitmap_iterator bi; 5502 5503 /* Dump invariant variables. */ 5504 fprintf (dump_file, "\n<Invariant Vars>:\n"); 5505 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 5506 { 5507 struct version_info *info = ver_info (data, i); 5508 if (info->inv_id) 5509 { 5510 fprintf (dump_file, "Inv %d:\t", info->inv_id); 5511 print_generic_expr (dump_file, info->name, TDF_SLIM); 5512 fprintf (dump_file, "%s\n", 5513 info->has_nonlin_use ? "" : "\t(eliminable)"); 5514 } 5515 } 5516 5517 /* Dump invariant expressions. */ 5518 fprintf (dump_file, "\n<Invariant Expressions>:\n"); 5519 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ()); 5520 5521 for (hash_table<iv_inv_expr_hasher>::iterator it 5522 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end (); 5523 ++it) 5524 list.safe_push (*it); 5525 5526 list.qsort (sort_iv_inv_expr_ent); 5527 5528 for (i = 0; i < list.length (); ++i) 5529 { 5530 fprintf (dump_file, "inv_expr %d: \t", list[i]->id); 5531 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM); 5532 fprintf (dump_file, "\n"); 5533 } 5534 5535 fprintf (dump_file, "\n<Group-candidate Costs>:\n"); 5536 5537 for (i = 0; i < data->vgroups.length (); i++) 5538 { 5539 group = data->vgroups[i]; 5540 5541 fprintf (dump_file, "Group %d:\n", i); 5542 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n"); 5543 for (j = 0; j < group->n_map_members; j++) 5544 { 5545 if (!group->cost_map[j].cand 5546 || group->cost_map[j].cost.infinite_cost_p ()) 5547 continue; 5548 5549 fprintf (dump_file, " %d\t%d\t%d\t", 5550 group->cost_map[j].cand->id, 5551 group->cost_map[j].cost.cost, 5552 group->cost_map[j].cost.complexity); 5553 if (!group->cost_map[j].inv_exprs 5554 || bitmap_empty_p (group->cost_map[j].inv_exprs)) 5555 fprintf (dump_file, "NIL;\t"); 5556 else 5557 bitmap_print (dump_file, 5558 group->cost_map[j].inv_exprs, "", ";\t"); 5559 if (!group->cost_map[j].inv_vars 5560 || bitmap_empty_p (group->cost_map[j].inv_vars)) 5561 fprintf (dump_file, "NIL;\n"); 5562 else 5563 bitmap_print (dump_file, 5564 group->cost_map[j].inv_vars, "", "\n"); 5565 } 5566 5567 fprintf (dump_file, "\n"); 5568 } 5569 fprintf (dump_file, "\n"); 5570 } 5571 } 5572 5573 /* Determines cost of the candidate CAND. */ 5574 5575 static void 5576 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 5577 { 5578 comp_cost cost_base; 5579 unsigned cost, cost_step; 5580 tree base; 5581 5582 gcc_assert (cand->iv != NULL); 5583 5584 /* There are two costs associated with the candidate -- its increment 5585 and its initialization. The second is almost negligible for any loop 5586 that rolls enough, so we take it just very little into account. */ 5587 5588 base = cand->iv->base; 5589 cost_base = force_var_cost (data, base, NULL); 5590 /* It will be exceptional that the iv register happens to be initialized with 5591 the proper value at no cost. In general, there will at least be a regcopy 5592 or a const set. */ 5593 if (cost_base.cost == 0) 5594 cost_base.cost = COSTS_N_INSNS (1); 5595 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base))); 5596 5597 cost = cost_step + adjust_setup_cost (data, cost_base.cost); 5598 5599 /* Prefer the original ivs unless we may gain something by replacing it. 5600 The reason is to make debugging simpler; so this is not relevant for 5601 artificial ivs created by other optimization passes. */ 5602 if (cand->pos != IP_ORIGINAL 5603 || !SSA_NAME_VAR (cand->var_before) 5604 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 5605 cost++; 5606 5607 /* Prefer not to insert statements into latch unless there are some 5608 already (so that we do not create unnecessary jumps). */ 5609 if (cand->pos == IP_END 5610 && empty_block_p (ip_end_pos (data->current_loop))) 5611 cost++; 5612 5613 cand->cost = cost; 5614 cand->cost_step = cost_step; 5615 } 5616 5617 /* Determines costs of computation of the candidates. */ 5618 5619 static void 5620 determine_iv_costs (struct ivopts_data *data) 5621 { 5622 unsigned i; 5623 5624 if (dump_file && (dump_flags & TDF_DETAILS)) 5625 { 5626 fprintf (dump_file, "<Candidate Costs>:\n"); 5627 fprintf (dump_file, " cand\tcost\n"); 5628 } 5629 5630 for (i = 0; i < data->vcands.length (); i++) 5631 { 5632 struct iv_cand *cand = data->vcands[i]; 5633 5634 determine_iv_cost (data, cand); 5635 5636 if (dump_file && (dump_flags & TDF_DETAILS)) 5637 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 5638 } 5639 5640 if (dump_file && (dump_flags & TDF_DETAILS)) 5641 fprintf (dump_file, "\n"); 5642 } 5643 5644 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS 5645 induction variables. Note N_INVS includes both invariant variables and 5646 invariant expressions. */ 5647 5648 static unsigned 5649 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs, 5650 unsigned n_cands) 5651 { 5652 unsigned cost; 5653 unsigned n_old = data->regs_used, n_new = n_invs + n_cands; 5654 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs; 5655 bool speed = data->speed; 5656 5657 /* If there is a call in the loop body, the call-clobbered registers 5658 are not available for loop invariants. */ 5659 if (data->body_includes_call) 5660 available_regs = available_regs - target_clobbered_regs; 5661 5662 /* If we have enough registers. */ 5663 if (regs_needed + target_res_regs < available_regs) 5664 cost = n_new; 5665 /* If close to running out of registers, try to preserve them. */ 5666 else if (regs_needed <= available_regs) 5667 cost = target_reg_cost [speed] * regs_needed; 5668 /* If we run out of available registers but the number of candidates 5669 does not, we penalize extra registers using target_spill_cost. */ 5670 else if (n_cands <= available_regs) 5671 cost = target_reg_cost [speed] * available_regs 5672 + target_spill_cost [speed] * (regs_needed - available_regs); 5673 /* If the number of candidates runs out available registers, we penalize 5674 extra candidate registers using target_spill_cost * 2. Because it is 5675 more expensive to spill induction variable than invariant. */ 5676 else 5677 cost = target_reg_cost [speed] * available_regs 5678 + target_spill_cost [speed] * (n_cands - available_regs) * 2 5679 + target_spill_cost [speed] * (regs_needed - n_cands); 5680 5681 /* Finally, add the number of candidates, so that we prefer eliminating 5682 induction variables if possible. */ 5683 return cost + n_cands; 5684 } 5685 5686 /* For each size of the induction variable set determine the penalty. */ 5687 5688 static void 5689 determine_set_costs (struct ivopts_data *data) 5690 { 5691 unsigned j, n; 5692 gphi *phi; 5693 gphi_iterator psi; 5694 tree op; 5695 struct loop *loop = data->current_loop; 5696 bitmap_iterator bi; 5697 5698 if (dump_file && (dump_flags & TDF_DETAILS)) 5699 { 5700 fprintf (dump_file, "<Global Costs>:\n"); 5701 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 5702 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); 5703 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); 5704 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); 5705 } 5706 5707 n = 0; 5708 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 5709 { 5710 phi = psi.phi (); 5711 op = PHI_RESULT (phi); 5712 5713 if (virtual_operand_p (op)) 5714 continue; 5715 5716 if (get_iv (data, op)) 5717 continue; 5718 5719 if (!POINTER_TYPE_P (TREE_TYPE (op)) 5720 && !INTEGRAL_TYPE_P (TREE_TYPE (op))) 5721 continue; 5722 5723 n++; 5724 } 5725 5726 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 5727 { 5728 struct version_info *info = ver_info (data, j); 5729 5730 if (info->inv_id && info->has_nonlin_use) 5731 n++; 5732 } 5733 5734 data->regs_used = n; 5735 if (dump_file && (dump_flags & TDF_DETAILS)) 5736 fprintf (dump_file, " regs_used %d\n", n); 5737 5738 if (dump_file && (dump_flags & TDF_DETAILS)) 5739 { 5740 fprintf (dump_file, " cost for size:\n"); 5741 fprintf (dump_file, " ivs\tcost\n"); 5742 for (j = 0; j <= 2 * target_avail_regs; j++) 5743 fprintf (dump_file, " %d\t%d\n", j, 5744 ivopts_estimate_reg_pressure (data, 0, j)); 5745 fprintf (dump_file, "\n"); 5746 } 5747 } 5748 5749 /* Returns true if A is a cheaper cost pair than B. */ 5750 5751 static bool 5752 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) 5753 { 5754 if (!a) 5755 return false; 5756 5757 if (!b) 5758 return true; 5759 5760 if (a->cost < b->cost) 5761 return true; 5762 5763 if (b->cost < a->cost) 5764 return false; 5765 5766 /* In case the costs are the same, prefer the cheaper candidate. */ 5767 if (a->cand->cost < b->cand->cost) 5768 return true; 5769 5770 return false; 5771 } 5772 5773 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1 5774 for more expensive, equal and cheaper respectively. */ 5775 5776 static int 5777 compare_cost_pair (struct cost_pair *a, struct cost_pair *b) 5778 { 5779 if (cheaper_cost_pair (a, b)) 5780 return -1; 5781 if (cheaper_cost_pair (b, a)) 5782 return 1; 5783 5784 return 0; 5785 } 5786 5787 /* Returns candidate by that USE is expressed in IVS. */ 5788 5789 static struct cost_pair * 5790 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group) 5791 { 5792 return ivs->cand_for_group[group->id]; 5793 } 5794 5795 /* Computes the cost field of IVS structure. */ 5796 5797 static void 5798 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) 5799 { 5800 comp_cost cost = ivs->cand_use_cost; 5801 5802 cost += ivs->cand_cost; 5803 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands); 5804 ivs->cost = cost; 5805 } 5806 5807 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES 5808 and IVS. */ 5809 5810 static void 5811 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses) 5812 { 5813 bitmap_iterator bi; 5814 unsigned iid; 5815 5816 if (!invs) 5817 return; 5818 5819 gcc_assert (n_inv_uses != NULL); 5820 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5821 { 5822 n_inv_uses[iid]--; 5823 if (n_inv_uses[iid] == 0) 5824 ivs->n_invs--; 5825 } 5826 } 5827 5828 /* Set USE not to be expressed by any candidate in IVS. */ 5829 5830 static void 5831 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, 5832 struct iv_group *group) 5833 { 5834 unsigned gid = group->id, cid; 5835 struct cost_pair *cp; 5836 5837 cp = ivs->cand_for_group[gid]; 5838 if (!cp) 5839 return; 5840 cid = cp->cand->id; 5841 5842 ivs->bad_groups++; 5843 ivs->cand_for_group[gid] = NULL; 5844 ivs->n_cand_uses[cid]--; 5845 5846 if (ivs->n_cand_uses[cid] == 0) 5847 { 5848 bitmap_clear_bit (ivs->cands, cid); 5849 ivs->n_cands--; 5850 ivs->cand_cost -= cp->cand->cost; 5851 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses); 5852 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses); 5853 } 5854 5855 ivs->cand_use_cost -= cp->cost; 5856 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses); 5857 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses); 5858 iv_ca_recount_cost (data, ivs); 5859 } 5860 5861 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and 5862 IVS. */ 5863 5864 static void 5865 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses) 5866 { 5867 bitmap_iterator bi; 5868 unsigned iid; 5869 5870 if (!invs) 5871 return; 5872 5873 gcc_assert (n_inv_uses != NULL); 5874 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5875 { 5876 n_inv_uses[iid]++; 5877 if (n_inv_uses[iid] == 1) 5878 ivs->n_invs++; 5879 } 5880 } 5881 5882 /* Set cost pair for GROUP in set IVS to CP. */ 5883 5884 static void 5885 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, 5886 struct iv_group *group, struct cost_pair *cp) 5887 { 5888 unsigned gid = group->id, cid; 5889 5890 if (ivs->cand_for_group[gid] == cp) 5891 return; 5892 5893 if (ivs->cand_for_group[gid]) 5894 iv_ca_set_no_cp (data, ivs, group); 5895 5896 if (cp) 5897 { 5898 cid = cp->cand->id; 5899 5900 ivs->bad_groups--; 5901 ivs->cand_for_group[gid] = cp; 5902 ivs->n_cand_uses[cid]++; 5903 if (ivs->n_cand_uses[cid] == 1) 5904 { 5905 bitmap_set_bit (ivs->cands, cid); 5906 ivs->n_cands++; 5907 ivs->cand_cost += cp->cand->cost; 5908 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses); 5909 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses); 5910 } 5911 5912 ivs->cand_use_cost += cp->cost; 5913 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses); 5914 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses); 5915 iv_ca_recount_cost (data, ivs); 5916 } 5917 } 5918 5919 /* Extend set IVS by expressing USE by some of the candidates in it 5920 if possible. Consider all important candidates if candidates in 5921 set IVS don't give any result. */ 5922 5923 static void 5924 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs, 5925 struct iv_group *group) 5926 { 5927 struct cost_pair *best_cp = NULL, *cp; 5928 bitmap_iterator bi; 5929 unsigned i; 5930 struct iv_cand *cand; 5931 5932 gcc_assert (ivs->upto >= group->id); 5933 ivs->upto++; 5934 ivs->bad_groups++; 5935 5936 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5937 { 5938 cand = data->vcands[i]; 5939 cp = get_group_iv_cost (data, group, cand); 5940 if (cheaper_cost_pair (cp, best_cp)) 5941 best_cp = cp; 5942 } 5943 5944 if (best_cp == NULL) 5945 { 5946 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 5947 { 5948 cand = data->vcands[i]; 5949 cp = get_group_iv_cost (data, group, cand); 5950 if (cheaper_cost_pair (cp, best_cp)) 5951 best_cp = cp; 5952 } 5953 } 5954 5955 iv_ca_set_cp (data, ivs, group, best_cp); 5956 } 5957 5958 /* Get cost for assignment IVS. */ 5959 5960 static comp_cost 5961 iv_ca_cost (struct iv_ca *ivs) 5962 { 5963 /* This was a conditional expression but it triggered a bug in 5964 Sun C 5.5. */ 5965 if (ivs->bad_groups) 5966 return infinite_cost; 5967 else 5968 return ivs->cost; 5969 } 5970 5971 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants 5972 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants 5973 respectively. */ 5974 5975 static int 5976 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs, 5977 struct iv_group *group, struct cost_pair *old_cp, 5978 struct cost_pair *new_cp) 5979 { 5980 gcc_assert (old_cp && new_cp && old_cp != new_cp); 5981 unsigned old_n_invs = ivs->n_invs; 5982 iv_ca_set_cp (data, ivs, group, new_cp); 5983 unsigned new_n_invs = ivs->n_invs; 5984 iv_ca_set_cp (data, ivs, group, old_cp); 5985 5986 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0); 5987 } 5988 5989 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains 5990 it before NEXT. */ 5991 5992 static struct iv_ca_delta * 5993 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp, 5994 struct cost_pair *new_cp, struct iv_ca_delta *next) 5995 { 5996 struct iv_ca_delta *change = XNEW (struct iv_ca_delta); 5997 5998 change->group = group; 5999 change->old_cp = old_cp; 6000 change->new_cp = new_cp; 6001 change->next = next; 6002 6003 return change; 6004 } 6005 6006 /* Joins two lists of changes L1 and L2. Destructive -- old lists 6007 are rewritten. */ 6008 6009 static struct iv_ca_delta * 6010 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 6011 { 6012 struct iv_ca_delta *last; 6013 6014 if (!l2) 6015 return l1; 6016 6017 if (!l1) 6018 return l2; 6019 6020 for (last = l1; last->next; last = last->next) 6021 continue; 6022 last->next = l2; 6023 6024 return l1; 6025 } 6026 6027 /* Reverse the list of changes DELTA, forming the inverse to it. */ 6028 6029 static struct iv_ca_delta * 6030 iv_ca_delta_reverse (struct iv_ca_delta *delta) 6031 { 6032 struct iv_ca_delta *act, *next, *prev = NULL; 6033 6034 for (act = delta; act; act = next) 6035 { 6036 next = act->next; 6037 act->next = prev; 6038 prev = act; 6039 6040 std::swap (act->old_cp, act->new_cp); 6041 } 6042 6043 return prev; 6044 } 6045 6046 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 6047 reverted instead. */ 6048 6049 static void 6050 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs, 6051 struct iv_ca_delta *delta, bool forward) 6052 { 6053 struct cost_pair *from, *to; 6054 struct iv_ca_delta *act; 6055 6056 if (!forward) 6057 delta = iv_ca_delta_reverse (delta); 6058 6059 for (act = delta; act; act = act->next) 6060 { 6061 from = act->old_cp; 6062 to = act->new_cp; 6063 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from); 6064 iv_ca_set_cp (data, ivs, act->group, to); 6065 } 6066 6067 if (!forward) 6068 iv_ca_delta_reverse (delta); 6069 } 6070 6071 /* Returns true if CAND is used in IVS. */ 6072 6073 static bool 6074 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand) 6075 { 6076 return ivs->n_cand_uses[cand->id] > 0; 6077 } 6078 6079 /* Returns number of induction variable candidates in the set IVS. */ 6080 6081 static unsigned 6082 iv_ca_n_cands (struct iv_ca *ivs) 6083 { 6084 return ivs->n_cands; 6085 } 6086 6087 /* Free the list of changes DELTA. */ 6088 6089 static void 6090 iv_ca_delta_free (struct iv_ca_delta **delta) 6091 { 6092 struct iv_ca_delta *act, *next; 6093 6094 for (act = *delta; act; act = next) 6095 { 6096 next = act->next; 6097 free (act); 6098 } 6099 6100 *delta = NULL; 6101 } 6102 6103 /* Allocates new iv candidates assignment. */ 6104 6105 static struct iv_ca * 6106 iv_ca_new (struct ivopts_data *data) 6107 { 6108 struct iv_ca *nw = XNEW (struct iv_ca); 6109 6110 nw->upto = 0; 6111 nw->bad_groups = 0; 6112 nw->cand_for_group = XCNEWVEC (struct cost_pair *, 6113 data->vgroups.length ()); 6114 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ()); 6115 nw->cands = BITMAP_ALLOC (NULL); 6116 nw->n_cands = 0; 6117 nw->n_invs = 0; 6118 nw->cand_use_cost = no_cost; 6119 nw->cand_cost = 0; 6120 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1); 6121 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1); 6122 nw->cost = no_cost; 6123 6124 return nw; 6125 } 6126 6127 /* Free memory occupied by the set IVS. */ 6128 6129 static void 6130 iv_ca_free (struct iv_ca **ivs) 6131 { 6132 free ((*ivs)->cand_for_group); 6133 free ((*ivs)->n_cand_uses); 6134 BITMAP_FREE ((*ivs)->cands); 6135 free ((*ivs)->n_inv_var_uses); 6136 free ((*ivs)->n_inv_expr_uses); 6137 free (*ivs); 6138 *ivs = NULL; 6139 } 6140 6141 /* Dumps IVS to FILE. */ 6142 6143 static void 6144 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) 6145 { 6146 unsigned i; 6147 comp_cost cost = iv_ca_cost (ivs); 6148 6149 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, 6150 cost.complexity); 6151 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n", 6152 ivs->cand_cost, ivs->cand_use_cost.cost, 6153 ivs->cand_use_cost.complexity); 6154 bitmap_print (file, ivs->cands, " candidates: ","\n"); 6155 6156 for (i = 0; i < ivs->upto; i++) 6157 { 6158 struct iv_group *group = data->vgroups[i]; 6159 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group); 6160 if (cp) 6161 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n", 6162 group->id, cp->cand->id, cp->cost.cost, 6163 cp->cost.complexity); 6164 else 6165 fprintf (file, " group:%d --> ??\n", group->id); 6166 } 6167 6168 const char *pref = ""; 6169 fprintf (file, " invariant variables: "); 6170 for (i = 1; i <= data->max_inv_var_id; i++) 6171 if (ivs->n_inv_var_uses[i]) 6172 { 6173 fprintf (file, "%s%d", pref, i); 6174 pref = ", "; 6175 } 6176 6177 pref = ""; 6178 fprintf (file, "\n invariant expressions: "); 6179 for (i = 1; i <= data->max_inv_expr_id; i++) 6180 if (ivs->n_inv_expr_uses[i]) 6181 { 6182 fprintf (file, "%s%d", pref, i); 6183 pref = ", "; 6184 } 6185 6186 fprintf (file, "\n\n"); 6187 } 6188 6189 /* Try changing candidate in IVS to CAND for each use. Return cost of the 6190 new set, and store differences in DELTA. Number of induction variables 6191 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true 6192 the function will try to find a solution with mimimal iv candidates. */ 6193 6194 static comp_cost 6195 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, 6196 struct iv_cand *cand, struct iv_ca_delta **delta, 6197 unsigned *n_ivs, bool min_ncand) 6198 { 6199 unsigned i; 6200 comp_cost cost; 6201 struct iv_group *group; 6202 struct cost_pair *old_cp, *new_cp; 6203 6204 *delta = NULL; 6205 for (i = 0; i < ivs->upto; i++) 6206 { 6207 group = data->vgroups[i]; 6208 old_cp = iv_ca_cand_for_group (ivs, group); 6209 6210 if (old_cp 6211 && old_cp->cand == cand) 6212 continue; 6213 6214 new_cp = get_group_iv_cost (data, group, cand); 6215 if (!new_cp) 6216 continue; 6217 6218 if (!min_ncand) 6219 { 6220 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp); 6221 /* Skip if new_cp depends on more invariants. */ 6222 if (cmp_invs > 0) 6223 continue; 6224 6225 int cmp_cost = compare_cost_pair (new_cp, old_cp); 6226 /* Skip if new_cp is not cheaper. */ 6227 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0)) 6228 continue; 6229 } 6230 6231 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta); 6232 } 6233 6234 iv_ca_delta_commit (data, ivs, *delta, true); 6235 cost = iv_ca_cost (ivs); 6236 if (n_ivs) 6237 *n_ivs = iv_ca_n_cands (ivs); 6238 iv_ca_delta_commit (data, ivs, *delta, false); 6239 6240 return cost; 6241 } 6242 6243 /* Try narrowing set IVS by removing CAND. Return the cost of 6244 the new set and store the differences in DELTA. START is 6245 the candidate with which we start narrowing. */ 6246 6247 static comp_cost 6248 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, 6249 struct iv_cand *cand, struct iv_cand *start, 6250 struct iv_ca_delta **delta) 6251 { 6252 unsigned i, ci; 6253 struct iv_group *group; 6254 struct cost_pair *old_cp, *new_cp, *cp; 6255 bitmap_iterator bi; 6256 struct iv_cand *cnd; 6257 comp_cost cost, best_cost, acost; 6258 6259 *delta = NULL; 6260 for (i = 0; i < data->vgroups.length (); i++) 6261 { 6262 group = data->vgroups[i]; 6263 6264 old_cp = iv_ca_cand_for_group (ivs, group); 6265 if (old_cp->cand != cand) 6266 continue; 6267 6268 best_cost = iv_ca_cost (ivs); 6269 /* Start narrowing with START. */ 6270 new_cp = get_group_iv_cost (data, group, start); 6271 6272 if (data->consider_all_candidates) 6273 { 6274 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 6275 { 6276 if (ci == cand->id || (start && ci == start->id)) 6277 continue; 6278 6279 cnd = data->vcands[ci]; 6280 6281 cp = get_group_iv_cost (data, group, cnd); 6282 if (!cp) 6283 continue; 6284 6285 iv_ca_set_cp (data, ivs, group, cp); 6286 acost = iv_ca_cost (ivs); 6287 6288 if (acost < best_cost) 6289 { 6290 best_cost = acost; 6291 new_cp = cp; 6292 } 6293 } 6294 } 6295 else 6296 { 6297 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi) 6298 { 6299 if (ci == cand->id || (start && ci == start->id)) 6300 continue; 6301 6302 cnd = data->vcands[ci]; 6303 6304 cp = get_group_iv_cost (data, group, cnd); 6305 if (!cp) 6306 continue; 6307 6308 iv_ca_set_cp (data, ivs, group, cp); 6309 acost = iv_ca_cost (ivs); 6310 6311 if (acost < best_cost) 6312 { 6313 best_cost = acost; 6314 new_cp = cp; 6315 } 6316 } 6317 } 6318 /* Restore to old cp for use. */ 6319 iv_ca_set_cp (data, ivs, group, old_cp); 6320 6321 if (!new_cp) 6322 { 6323 iv_ca_delta_free (delta); 6324 return infinite_cost; 6325 } 6326 6327 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta); 6328 } 6329 6330 iv_ca_delta_commit (data, ivs, *delta, true); 6331 cost = iv_ca_cost (ivs); 6332 iv_ca_delta_commit (data, ivs, *delta, false); 6333 6334 return cost; 6335 } 6336 6337 /* Try optimizing the set of candidates IVS by removing candidates different 6338 from to EXCEPT_CAND from it. Return cost of the new set, and store 6339 differences in DELTA. */ 6340 6341 static comp_cost 6342 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, 6343 struct iv_cand *except_cand, struct iv_ca_delta **delta) 6344 { 6345 bitmap_iterator bi; 6346 struct iv_ca_delta *act_delta, *best_delta; 6347 unsigned i; 6348 comp_cost best_cost, acost; 6349 struct iv_cand *cand; 6350 6351 best_delta = NULL; 6352 best_cost = iv_ca_cost (ivs); 6353 6354 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6355 { 6356 cand = data->vcands[i]; 6357 6358 if (cand == except_cand) 6359 continue; 6360 6361 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta); 6362 6363 if (acost < best_cost) 6364 { 6365 best_cost = acost; 6366 iv_ca_delta_free (&best_delta); 6367 best_delta = act_delta; 6368 } 6369 else 6370 iv_ca_delta_free (&act_delta); 6371 } 6372 6373 if (!best_delta) 6374 { 6375 *delta = NULL; 6376 return best_cost; 6377 } 6378 6379 /* Recurse to possibly remove other unnecessary ivs. */ 6380 iv_ca_delta_commit (data, ivs, best_delta, true); 6381 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 6382 iv_ca_delta_commit (data, ivs, best_delta, false); 6383 *delta = iv_ca_delta_join (best_delta, *delta); 6384 return best_cost; 6385 } 6386 6387 /* Check if CAND_IDX is a candidate other than OLD_CAND and has 6388 cheaper local cost for GROUP than BEST_CP. Return pointer to 6389 the corresponding cost_pair, otherwise just return BEST_CP. */ 6390 6391 static struct cost_pair* 6392 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group, 6393 unsigned int cand_idx, struct iv_cand *old_cand, 6394 struct cost_pair *best_cp) 6395 { 6396 struct iv_cand *cand; 6397 struct cost_pair *cp; 6398 6399 gcc_assert (old_cand != NULL && best_cp != NULL); 6400 if (cand_idx == old_cand->id) 6401 return best_cp; 6402 6403 cand = data->vcands[cand_idx]; 6404 cp = get_group_iv_cost (data, group, cand); 6405 if (cp != NULL && cheaper_cost_pair (cp, best_cp)) 6406 return cp; 6407 6408 return best_cp; 6409 } 6410 6411 /* Try breaking local optimal fixed-point for IVS by replacing candidates 6412 which are used by more than one iv uses. For each of those candidates, 6413 this function tries to represent iv uses under that candidate using 6414 other ones with lower local cost, then tries to prune the new set. 6415 If the new set has lower cost, It returns the new cost after recording 6416 candidate replacement in list DELTA. */ 6417 6418 static comp_cost 6419 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs, 6420 struct iv_ca_delta **delta) 6421 { 6422 bitmap_iterator bi, bj; 6423 unsigned int i, j, k; 6424 struct iv_cand *cand; 6425 comp_cost orig_cost, acost; 6426 struct iv_ca_delta *act_delta, *tmp_delta; 6427 struct cost_pair *old_cp, *best_cp = NULL; 6428 6429 *delta = NULL; 6430 orig_cost = iv_ca_cost (ivs); 6431 6432 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6433 { 6434 if (ivs->n_cand_uses[i] == 1 6435 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND) 6436 continue; 6437 6438 cand = data->vcands[i]; 6439 6440 act_delta = NULL; 6441 /* Represent uses under current candidate using other ones with 6442 lower local cost. */ 6443 for (j = 0; j < ivs->upto; j++) 6444 { 6445 struct iv_group *group = data->vgroups[j]; 6446 old_cp = iv_ca_cand_for_group (ivs, group); 6447 6448 if (old_cp->cand != cand) 6449 continue; 6450 6451 best_cp = old_cp; 6452 if (data->consider_all_candidates) 6453 for (k = 0; k < data->vcands.length (); k++) 6454 best_cp = cheaper_cost_with_cand (data, group, k, 6455 old_cp->cand, best_cp); 6456 else 6457 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj) 6458 best_cp = cheaper_cost_with_cand (data, group, k, 6459 old_cp->cand, best_cp); 6460 6461 if (best_cp == old_cp) 6462 continue; 6463 6464 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta); 6465 } 6466 /* No need for further prune. */ 6467 if (!act_delta) 6468 continue; 6469 6470 /* Prune the new candidate set. */ 6471 iv_ca_delta_commit (data, ivs, act_delta, true); 6472 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta); 6473 iv_ca_delta_commit (data, ivs, act_delta, false); 6474 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6475 6476 if (acost < orig_cost) 6477 { 6478 *delta = act_delta; 6479 return acost; 6480 } 6481 else 6482 iv_ca_delta_free (&act_delta); 6483 } 6484 6485 return orig_cost; 6486 } 6487 6488 /* Tries to extend the sets IVS in the best possible way in order to 6489 express the GROUP. If ORIGINALP is true, prefer candidates from 6490 the original set of IVs, otherwise favor important candidates not 6491 based on any memory object. */ 6492 6493 static bool 6494 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, 6495 struct iv_group *group, bool originalp) 6496 { 6497 comp_cost best_cost, act_cost; 6498 unsigned i; 6499 bitmap_iterator bi; 6500 struct iv_cand *cand; 6501 struct iv_ca_delta *best_delta = NULL, *act_delta; 6502 struct cost_pair *cp; 6503 6504 iv_ca_add_group (data, ivs, group); 6505 best_cost = iv_ca_cost (ivs); 6506 cp = iv_ca_cand_for_group (ivs, group); 6507 if (cp) 6508 { 6509 best_delta = iv_ca_delta_add (group, NULL, cp, NULL); 6510 iv_ca_set_no_cp (data, ivs, group); 6511 } 6512 6513 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise 6514 first try important candidates not based on any memory object. Only if 6515 this fails, try the specific ones. Rationale -- in loops with many 6516 variables the best choice often is to use just one generic biv. If we 6517 added here many ivs specific to the uses, the optimization algorithm later 6518 would be likely to get stuck in a local minimum, thus causing us to create 6519 too many ivs. The approach from few ivs to more seems more likely to be 6520 successful -- starting from few ivs, replacing an expensive use by a 6521 specific iv should always be a win. */ 6522 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi) 6523 { 6524 cand = data->vcands[i]; 6525 6526 if (originalp && cand->pos !=IP_ORIGINAL) 6527 continue; 6528 6529 if (!originalp && cand->iv->base_object != NULL_TREE) 6530 continue; 6531 6532 if (iv_ca_cand_used_p (ivs, cand)) 6533 continue; 6534 6535 cp = get_group_iv_cost (data, group, cand); 6536 if (!cp) 6537 continue; 6538 6539 iv_ca_set_cp (data, ivs, group, cp); 6540 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, 6541 true); 6542 iv_ca_set_no_cp (data, ivs, group); 6543 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta); 6544 6545 if (act_cost < best_cost) 6546 { 6547 best_cost = act_cost; 6548 6549 iv_ca_delta_free (&best_delta); 6550 best_delta = act_delta; 6551 } 6552 else 6553 iv_ca_delta_free (&act_delta); 6554 } 6555 6556 if (best_cost.infinite_cost_p ()) 6557 { 6558 for (i = 0; i < group->n_map_members; i++) 6559 { 6560 cp = group->cost_map + i; 6561 cand = cp->cand; 6562 if (!cand) 6563 continue; 6564 6565 /* Already tried this. */ 6566 if (cand->important) 6567 { 6568 if (originalp && cand->pos == IP_ORIGINAL) 6569 continue; 6570 if (!originalp && cand->iv->base_object == NULL_TREE) 6571 continue; 6572 } 6573 6574 if (iv_ca_cand_used_p (ivs, cand)) 6575 continue; 6576 6577 act_delta = NULL; 6578 iv_ca_set_cp (data, ivs, group, cp); 6579 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); 6580 iv_ca_set_no_cp (data, ivs, group); 6581 act_delta = iv_ca_delta_add (group, 6582 iv_ca_cand_for_group (ivs, group), 6583 cp, act_delta); 6584 6585 if (act_cost < best_cost) 6586 { 6587 best_cost = act_cost; 6588 6589 if (best_delta) 6590 iv_ca_delta_free (&best_delta); 6591 best_delta = act_delta; 6592 } 6593 else 6594 iv_ca_delta_free (&act_delta); 6595 } 6596 } 6597 6598 iv_ca_delta_commit (data, ivs, best_delta, true); 6599 iv_ca_delta_free (&best_delta); 6600 6601 return !best_cost.infinite_cost_p (); 6602 } 6603 6604 /* Finds an initial assignment of candidates to uses. */ 6605 6606 static struct iv_ca * 6607 get_initial_solution (struct ivopts_data *data, bool originalp) 6608 { 6609 unsigned i; 6610 struct iv_ca *ivs = iv_ca_new (data); 6611 6612 for (i = 0; i < data->vgroups.length (); i++) 6613 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp)) 6614 { 6615 iv_ca_free (&ivs); 6616 return NULL; 6617 } 6618 6619 return ivs; 6620 } 6621 6622 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P 6623 points to a bool variable, this function tries to break local 6624 optimal fixed-point by replacing candidates in IVS if it's true. */ 6625 6626 static bool 6627 try_improve_iv_set (struct ivopts_data *data, 6628 struct iv_ca *ivs, bool *try_replace_p) 6629 { 6630 unsigned i, n_ivs; 6631 comp_cost acost, best_cost = iv_ca_cost (ivs); 6632 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 6633 struct iv_cand *cand; 6634 6635 /* Try extending the set of induction variables by one. */ 6636 for (i = 0; i < data->vcands.length (); i++) 6637 { 6638 cand = data->vcands[i]; 6639 6640 if (iv_ca_cand_used_p (ivs, cand)) 6641 continue; 6642 6643 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); 6644 if (!act_delta) 6645 continue; 6646 6647 /* If we successfully added the candidate and the set is small enough, 6648 try optimizing it by removing other candidates. */ 6649 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 6650 { 6651 iv_ca_delta_commit (data, ivs, act_delta, true); 6652 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 6653 iv_ca_delta_commit (data, ivs, act_delta, false); 6654 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6655 } 6656 6657 if (acost < best_cost) 6658 { 6659 best_cost = acost; 6660 iv_ca_delta_free (&best_delta); 6661 best_delta = act_delta; 6662 } 6663 else 6664 iv_ca_delta_free (&act_delta); 6665 } 6666 6667 if (!best_delta) 6668 { 6669 /* Try removing the candidates from the set instead. */ 6670 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 6671 6672 if (!best_delta && *try_replace_p) 6673 { 6674 *try_replace_p = false; 6675 /* So far candidate selecting algorithm tends to choose fewer IVs 6676 so that it can handle cases in which loops have many variables 6677 but the best choice is often to use only one general biv. One 6678 weakness is it can't handle opposite cases, in which different 6679 candidates should be chosen with respect to each use. To solve 6680 the problem, we replace candidates in a manner described by the 6681 comments of iv_ca_replace, thus give general algorithm a chance 6682 to break local optimal fixed-point in these cases. */ 6683 best_cost = iv_ca_replace (data, ivs, &best_delta); 6684 } 6685 6686 if (!best_delta) 6687 return false; 6688 } 6689 6690 iv_ca_delta_commit (data, ivs, best_delta, true); 6691 gcc_assert (best_cost == iv_ca_cost (ivs)); 6692 iv_ca_delta_free (&best_delta); 6693 return true; 6694 } 6695 6696 /* Attempts to find the optimal set of induction variables. We do simple 6697 greedy heuristic -- we try to replace at most one candidate in the selected 6698 solution and remove the unused ivs while this improves the cost. */ 6699 6700 static struct iv_ca * 6701 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) 6702 { 6703 struct iv_ca *set; 6704 bool try_replace_p = true; 6705 6706 /* Get the initial solution. */ 6707 set = get_initial_solution (data, originalp); 6708 if (!set) 6709 { 6710 if (dump_file && (dump_flags & TDF_DETAILS)) 6711 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 6712 return NULL; 6713 } 6714 6715 if (dump_file && (dump_flags & TDF_DETAILS)) 6716 { 6717 fprintf (dump_file, "Initial set of candidates:\n"); 6718 iv_ca_dump (data, dump_file, set); 6719 } 6720 6721 while (try_improve_iv_set (data, set, &try_replace_p)) 6722 { 6723 if (dump_file && (dump_flags & TDF_DETAILS)) 6724 { 6725 fprintf (dump_file, "Improved to:\n"); 6726 iv_ca_dump (data, dump_file, set); 6727 } 6728 } 6729 6730 return set; 6731 } 6732 6733 static struct iv_ca * 6734 find_optimal_iv_set (struct ivopts_data *data) 6735 { 6736 unsigned i; 6737 comp_cost cost, origcost; 6738 struct iv_ca *set, *origset; 6739 6740 /* Determine the cost based on a strategy that starts with original IVs, 6741 and try again using a strategy that prefers candidates not based 6742 on any IVs. */ 6743 origset = find_optimal_iv_set_1 (data, true); 6744 set = find_optimal_iv_set_1 (data, false); 6745 6746 if (!origset && !set) 6747 return NULL; 6748 6749 origcost = origset ? iv_ca_cost (origset) : infinite_cost; 6750 cost = set ? iv_ca_cost (set) : infinite_cost; 6751 6752 if (dump_file && (dump_flags & TDF_DETAILS)) 6753 { 6754 fprintf (dump_file, "Original cost %d (complexity %d)\n\n", 6755 origcost.cost, origcost.complexity); 6756 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", 6757 cost.cost, cost.complexity); 6758 } 6759 6760 /* Choose the one with the best cost. */ 6761 if (origcost <= cost) 6762 { 6763 if (set) 6764 iv_ca_free (&set); 6765 set = origset; 6766 } 6767 else if (origset) 6768 iv_ca_free (&origset); 6769 6770 for (i = 0; i < data->vgroups.length (); i++) 6771 { 6772 struct iv_group *group = data->vgroups[i]; 6773 group->selected = iv_ca_cand_for_group (set, group)->cand; 6774 } 6775 6776 return set; 6777 } 6778 6779 /* Creates a new induction variable corresponding to CAND. */ 6780 6781 static void 6782 create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 6783 { 6784 gimple_stmt_iterator incr_pos; 6785 tree base; 6786 struct iv_use *use; 6787 struct iv_group *group; 6788 bool after = false; 6789 6790 gcc_assert (cand->iv != NULL); 6791 6792 switch (cand->pos) 6793 { 6794 case IP_NORMAL: 6795 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop)); 6796 break; 6797 6798 case IP_END: 6799 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 6800 after = true; 6801 break; 6802 6803 case IP_AFTER_USE: 6804 after = true; 6805 /* fall through */ 6806 case IP_BEFORE_USE: 6807 incr_pos = gsi_for_stmt (cand->incremented_at); 6808 break; 6809 6810 case IP_ORIGINAL: 6811 /* Mark that the iv is preserved. */ 6812 name_info (data, cand->var_before)->preserve_biv = true; 6813 name_info (data, cand->var_after)->preserve_biv = true; 6814 6815 /* Rewrite the increment so that it uses var_before directly. */ 6816 use = find_interesting_uses_op (data, cand->var_after); 6817 group = data->vgroups[use->group_id]; 6818 group->selected = cand; 6819 return; 6820 } 6821 6822 gimple_add_tmp_var (cand->var_before); 6823 6824 base = unshare_expr (cand->iv->base); 6825 6826 create_iv (base, unshare_expr (cand->iv->step), 6827 cand->var_before, data->current_loop, 6828 &incr_pos, after, &cand->var_before, &cand->var_after); 6829 } 6830 6831 /* Creates new induction variables described in SET. */ 6832 6833 static void 6834 create_new_ivs (struct ivopts_data *data, struct iv_ca *set) 6835 { 6836 unsigned i; 6837 struct iv_cand *cand; 6838 bitmap_iterator bi; 6839 6840 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6841 { 6842 cand = data->vcands[i]; 6843 create_new_iv (data, cand); 6844 } 6845 6846 if (dump_file && (dump_flags & TDF_DETAILS)) 6847 { 6848 fprintf (dump_file, "Selected IV set for loop %d", 6849 data->current_loop->num); 6850 if (data->loop_loc != UNKNOWN_LOCATION) 6851 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 6852 LOCATION_LINE (data->loop_loc)); 6853 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters", 6854 avg_loop_niter (data->current_loop)); 6855 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands)); 6856 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6857 { 6858 cand = data->vcands[i]; 6859 dump_cand (dump_file, cand); 6860 } 6861 fprintf (dump_file, "\n"); 6862 } 6863 } 6864 6865 /* Rewrites USE (definition of iv used in a nonlinear expression) 6866 using candidate CAND. */ 6867 6868 static void 6869 rewrite_use_nonlinear_expr (struct ivopts_data *data, 6870 struct iv_use *use, struct iv_cand *cand) 6871 { 6872 gassign *ass; 6873 gimple_stmt_iterator bsi; 6874 tree comp, type = get_use_type (use), tgt; 6875 6876 /* An important special case -- if we are asked to express value of 6877 the original iv by itself, just exit; there is no need to 6878 introduce a new computation (that might also need casting the 6879 variable to unsigned and back). */ 6880 if (cand->pos == IP_ORIGINAL 6881 && cand->incremented_at == use->stmt) 6882 { 6883 tree op = NULL_TREE; 6884 enum tree_code stmt_code; 6885 6886 gcc_assert (is_gimple_assign (use->stmt)); 6887 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 6888 6889 /* Check whether we may leave the computation unchanged. 6890 This is the case only if it does not rely on other 6891 computations in the loop -- otherwise, the computation 6892 we rely upon may be removed in remove_unused_ivs, 6893 thus leading to ICE. */ 6894 stmt_code = gimple_assign_rhs_code (use->stmt); 6895 if (stmt_code == PLUS_EXPR 6896 || stmt_code == MINUS_EXPR 6897 || stmt_code == POINTER_PLUS_EXPR) 6898 { 6899 if (gimple_assign_rhs1 (use->stmt) == cand->var_before) 6900 op = gimple_assign_rhs2 (use->stmt); 6901 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before) 6902 op = gimple_assign_rhs1 (use->stmt); 6903 } 6904 6905 if (op != NULL_TREE) 6906 { 6907 if (expr_invariant_in_loop_p (data->current_loop, op)) 6908 return; 6909 if (TREE_CODE (op) == SSA_NAME) 6910 { 6911 struct iv *iv = get_iv (data, op); 6912 if (iv != NULL && integer_zerop (iv->step)) 6913 return; 6914 } 6915 } 6916 } 6917 6918 switch (gimple_code (use->stmt)) 6919 { 6920 case GIMPLE_PHI: 6921 tgt = PHI_RESULT (use->stmt); 6922 6923 /* If we should keep the biv, do not replace it. */ 6924 if (name_info (data, tgt)->preserve_biv) 6925 return; 6926 6927 bsi = gsi_after_labels (gimple_bb (use->stmt)); 6928 break; 6929 6930 case GIMPLE_ASSIGN: 6931 tgt = gimple_assign_lhs (use->stmt); 6932 bsi = gsi_for_stmt (use->stmt); 6933 break; 6934 6935 default: 6936 gcc_unreachable (); 6937 } 6938 6939 aff_tree aff_inv, aff_var; 6940 if (!get_computation_aff_1 (data->current_loop, use->stmt, 6941 use, cand, &aff_inv, &aff_var)) 6942 gcc_unreachable (); 6943 6944 unshare_aff_combination (&aff_inv); 6945 unshare_aff_combination (&aff_var); 6946 /* Prefer CSE opportunity than loop invariant by adding offset at last 6947 so that iv_uses have different offsets can be CSEed. */ 6948 poly_widest_int offset = aff_inv.offset; 6949 aff_inv.offset = 0; 6950 6951 gimple_seq stmt_list = NULL, seq = NULL; 6952 tree comp_op1 = aff_combination_to_tree (&aff_inv); 6953 tree comp_op2 = aff_combination_to_tree (&aff_var); 6954 gcc_assert (comp_op1 && comp_op2); 6955 6956 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL); 6957 gimple_seq_add_seq (&stmt_list, seq); 6958 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL); 6959 gimple_seq_add_seq (&stmt_list, seq); 6960 6961 if (POINTER_TYPE_P (TREE_TYPE (comp_op2))) 6962 std::swap (comp_op1, comp_op2); 6963 6964 if (POINTER_TYPE_P (TREE_TYPE (comp_op1))) 6965 { 6966 comp = fold_build_pointer_plus (comp_op1, 6967 fold_convert (sizetype, comp_op2)); 6968 comp = fold_build_pointer_plus (comp, 6969 wide_int_to_tree (sizetype, offset)); 6970 } 6971 else 6972 { 6973 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1, 6974 fold_convert (TREE_TYPE (comp_op1), comp_op2)); 6975 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp, 6976 wide_int_to_tree (TREE_TYPE (comp_op1), offset)); 6977 } 6978 6979 comp = fold_convert (type, comp); 6980 if (!valid_gimple_rhs_p (comp) 6981 || (gimple_code (use->stmt) != GIMPLE_PHI 6982 /* We can't allow re-allocating the stmt as it might be pointed 6983 to still. */ 6984 && (get_gimple_rhs_num_ops (TREE_CODE (comp)) 6985 >= gimple_num_ops (gsi_stmt (bsi))))) 6986 { 6987 comp = force_gimple_operand (comp, &seq, true, NULL); 6988 gimple_seq_add_seq (&stmt_list, seq); 6989 if (POINTER_TYPE_P (TREE_TYPE (tgt))) 6990 { 6991 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt)); 6992 /* As this isn't a plain copy we have to reset alignment 6993 information. */ 6994 if (SSA_NAME_PTR_INFO (comp)) 6995 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp)); 6996 } 6997 } 6998 6999 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT); 7000 if (gimple_code (use->stmt) == GIMPLE_PHI) 7001 { 7002 ass = gimple_build_assign (tgt, comp); 7003 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 7004 7005 bsi = gsi_for_stmt (use->stmt); 7006 remove_phi_node (&bsi, false); 7007 } 7008 else 7009 { 7010 gimple_assign_set_rhs_from_tree (&bsi, comp); 7011 use->stmt = gsi_stmt (bsi); 7012 } 7013 } 7014 7015 /* Performs a peephole optimization to reorder the iv update statement with 7016 a mem ref to enable instruction combining in later phases. The mem ref uses 7017 the iv value before the update, so the reordering transformation requires 7018 adjustment of the offset. CAND is the selected IV_CAND. 7019 7020 Example: 7021 7022 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset 7023 iv2 = iv1 + 1; 7024 7025 if (t < val) (1) 7026 goto L; 7027 goto Head; 7028 7029 7030 directly propagating t over to (1) will introduce overlapping live range 7031 thus increase register pressure. This peephole transform it into: 7032 7033 7034 iv2 = iv1 + 1; 7035 t = MEM_REF (base, iv2, 8, 8); 7036 if (t < val) 7037 goto L; 7038 goto Head; 7039 */ 7040 7041 static void 7042 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use) 7043 { 7044 tree var_after; 7045 gimple *iv_update, *stmt; 7046 basic_block bb; 7047 gimple_stmt_iterator gsi, gsi_iv; 7048 7049 if (cand->pos != IP_NORMAL) 7050 return; 7051 7052 var_after = cand->var_after; 7053 iv_update = SSA_NAME_DEF_STMT (var_after); 7054 7055 bb = gimple_bb (iv_update); 7056 gsi = gsi_last_nondebug_bb (bb); 7057 stmt = gsi_stmt (gsi); 7058 7059 /* Only handle conditional statement for now. */ 7060 if (gimple_code (stmt) != GIMPLE_COND) 7061 return; 7062 7063 gsi_prev_nondebug (&gsi); 7064 stmt = gsi_stmt (gsi); 7065 if (stmt != iv_update) 7066 return; 7067 7068 gsi_prev_nondebug (&gsi); 7069 if (gsi_end_p (gsi)) 7070 return; 7071 7072 stmt = gsi_stmt (gsi); 7073 if (gimple_code (stmt) != GIMPLE_ASSIGN) 7074 return; 7075 7076 if (stmt != use->stmt) 7077 return; 7078 7079 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 7080 return; 7081 7082 if (dump_file && (dump_flags & TDF_DETAILS)) 7083 { 7084 fprintf (dump_file, "Reordering \n"); 7085 print_gimple_stmt (dump_file, iv_update, 0); 7086 print_gimple_stmt (dump_file, use->stmt, 0); 7087 fprintf (dump_file, "\n"); 7088 } 7089 7090 gsi = gsi_for_stmt (use->stmt); 7091 gsi_iv = gsi_for_stmt (iv_update); 7092 gsi_move_before (&gsi_iv, &gsi); 7093 7094 cand->pos = IP_BEFORE_USE; 7095 cand->incremented_at = use->stmt; 7096 } 7097 7098 /* Return the alias pointer type that should be used for a MEM_REF 7099 associated with USE, which has type USE_PTR_ADDRESS. */ 7100 7101 static tree 7102 get_alias_ptr_type_for_ptr_address (iv_use *use) 7103 { 7104 gcall *call = as_a <gcall *> (use->stmt); 7105 switch (gimple_call_internal_fn (call)) 7106 { 7107 case IFN_MASK_LOAD: 7108 case IFN_MASK_STORE: 7109 /* The second argument contains the correct alias type. */ 7110 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0)); 7111 return TREE_TYPE (gimple_call_arg (call, 1)); 7112 7113 default: 7114 gcc_unreachable (); 7115 } 7116 } 7117 7118 7119 /* Rewrites USE (address that is an iv) using candidate CAND. */ 7120 7121 static void 7122 rewrite_use_address (struct ivopts_data *data, 7123 struct iv_use *use, struct iv_cand *cand) 7124 { 7125 aff_tree aff; 7126 bool ok; 7127 7128 adjust_iv_update_pos (cand, use); 7129 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff); 7130 gcc_assert (ok); 7131 unshare_aff_combination (&aff); 7132 7133 /* To avoid undefined overflow problems, all IV candidates use unsigned 7134 integer types. The drawback is that this makes it impossible for 7135 create_mem_ref to distinguish an IV that is based on a memory object 7136 from one that represents simply an offset. 7137 7138 To work around this problem, we pass a hint to create_mem_ref that 7139 indicates which variable (if any) in aff is an IV based on a memory 7140 object. Note that we only consider the candidate. If this is not 7141 based on an object, the base of the reference is in some subexpression 7142 of the use -- but these will use pointer types, so they are recognized 7143 by the create_mem_ref heuristics anyway. */ 7144 tree iv = var_at_stmt (data->current_loop, cand, use->stmt); 7145 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE; 7146 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 7147 tree type = use->mem_type; 7148 tree alias_ptr_type; 7149 if (use->type == USE_PTR_ADDRESS) 7150 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use); 7151 else 7152 { 7153 gcc_assert (type == TREE_TYPE (*use->op_p)); 7154 unsigned int align = get_object_alignment (*use->op_p); 7155 if (align != TYPE_ALIGN (type)) 7156 type = build_aligned_type (type, align); 7157 alias_ptr_type = reference_alias_ptr_type (*use->op_p); 7158 } 7159 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type, 7160 iv, base_hint, data->speed); 7161 7162 if (use->type == USE_PTR_ADDRESS) 7163 { 7164 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref); 7165 ref = fold_convert (get_use_type (use), ref); 7166 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE, 7167 true, GSI_SAME_STMT); 7168 } 7169 else 7170 copy_ref_info (ref, *use->op_p); 7171 7172 *use->op_p = ref; 7173 } 7174 7175 /* Rewrites USE (the condition such that one of the arguments is an iv) using 7176 candidate CAND. */ 7177 7178 static void 7179 rewrite_use_compare (struct ivopts_data *data, 7180 struct iv_use *use, struct iv_cand *cand) 7181 { 7182 tree comp, op, bound; 7183 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 7184 enum tree_code compare; 7185 struct iv_group *group = data->vgroups[use->group_id]; 7186 struct cost_pair *cp = get_group_iv_cost (data, group, cand); 7187 7188 bound = cp->value; 7189 if (bound) 7190 { 7191 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 7192 tree var_type = TREE_TYPE (var); 7193 gimple_seq stmts; 7194 7195 if (dump_file && (dump_flags & TDF_DETAILS)) 7196 { 7197 fprintf (dump_file, "Replacing exit test: "); 7198 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); 7199 } 7200 compare = cp->comp; 7201 bound = unshare_expr (fold_convert (var_type, bound)); 7202 op = force_gimple_operand (bound, &stmts, true, NULL_TREE); 7203 if (stmts) 7204 gsi_insert_seq_on_edge_immediate ( 7205 loop_preheader_edge (data->current_loop), 7206 stmts); 7207 7208 gcond *cond_stmt = as_a <gcond *> (use->stmt); 7209 gimple_cond_set_lhs (cond_stmt, var); 7210 gimple_cond_set_code (cond_stmt, compare); 7211 gimple_cond_set_rhs (cond_stmt, op); 7212 return; 7213 } 7214 7215 /* The induction variable elimination failed; just express the original 7216 giv. */ 7217 comp = get_computation_at (data->current_loop, use->stmt, use, cand); 7218 gcc_assert (comp != NULL_TREE); 7219 gcc_assert (use->op_p != NULL); 7220 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true, 7221 SSA_NAME_VAR (*use->op_p), 7222 true, GSI_SAME_STMT); 7223 } 7224 7225 /* Rewrite the groups using the selected induction variables. */ 7226 7227 static void 7228 rewrite_groups (struct ivopts_data *data) 7229 { 7230 unsigned i, j; 7231 7232 for (i = 0; i < data->vgroups.length (); i++) 7233 { 7234 struct iv_group *group = data->vgroups[i]; 7235 struct iv_cand *cand = group->selected; 7236 7237 gcc_assert (cand); 7238 7239 if (group->type == USE_NONLINEAR_EXPR) 7240 { 7241 for (j = 0; j < group->vuses.length (); j++) 7242 { 7243 rewrite_use_nonlinear_expr (data, group->vuses[j], cand); 7244 update_stmt (group->vuses[j]->stmt); 7245 } 7246 } 7247 else if (address_p (group->type)) 7248 { 7249 for (j = 0; j < group->vuses.length (); j++) 7250 { 7251 rewrite_use_address (data, group->vuses[j], cand); 7252 update_stmt (group->vuses[j]->stmt); 7253 } 7254 } 7255 else 7256 { 7257 gcc_assert (group->type == USE_COMPARE); 7258 7259 for (j = 0; j < group->vuses.length (); j++) 7260 { 7261 rewrite_use_compare (data, group->vuses[j], cand); 7262 update_stmt (group->vuses[j]->stmt); 7263 } 7264 } 7265 } 7266 } 7267 7268 /* Removes the ivs that are not used after rewriting. */ 7269 7270 static void 7271 remove_unused_ivs (struct ivopts_data *data, bitmap toremove) 7272 { 7273 unsigned j; 7274 bitmap_iterator bi; 7275 7276 /* Figure out an order in which to release SSA DEFs so that we don't 7277 release something that we'd have to propagate into a debug stmt 7278 afterwards. */ 7279 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 7280 { 7281 struct version_info *info; 7282 7283 info = ver_info (data, j); 7284 if (info->iv 7285 && !integer_zerop (info->iv->step) 7286 && !info->inv_id 7287 && !info->iv->nonlin_use 7288 && !info->preserve_biv) 7289 { 7290 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name)); 7291 7292 tree def = info->iv->ssa_name; 7293 7294 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def)) 7295 { 7296 imm_use_iterator imm_iter; 7297 use_operand_p use_p; 7298 gimple *stmt; 7299 int count = 0; 7300 7301 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 7302 { 7303 if (!gimple_debug_bind_p (stmt)) 7304 continue; 7305 7306 /* We just want to determine whether to do nothing 7307 (count == 0), to substitute the computed 7308 expression into a single use of the SSA DEF by 7309 itself (count == 1), or to use a debug temp 7310 because the SSA DEF is used multiple times or as 7311 part of a larger expression (count > 1). */ 7312 count++; 7313 if (gimple_debug_bind_get_value (stmt) != def) 7314 count++; 7315 7316 if (count > 1) 7317 BREAK_FROM_IMM_USE_STMT (imm_iter); 7318 } 7319 7320 if (!count) 7321 continue; 7322 7323 struct iv_use dummy_use; 7324 struct iv_cand *best_cand = NULL, *cand; 7325 unsigned i, best_pref = 0, cand_pref; 7326 7327 memset (&dummy_use, 0, sizeof (dummy_use)); 7328 dummy_use.iv = info->iv; 7329 for (i = 0; i < data->vgroups.length () && i < 64; i++) 7330 { 7331 cand = data->vgroups[i]->selected; 7332 if (cand == best_cand) 7333 continue; 7334 cand_pref = operand_equal_p (cand->iv->step, 7335 info->iv->step, 0) 7336 ? 4 : 0; 7337 cand_pref 7338 += TYPE_MODE (TREE_TYPE (cand->iv->base)) 7339 == TYPE_MODE (TREE_TYPE (info->iv->base)) 7340 ? 2 : 0; 7341 cand_pref 7342 += TREE_CODE (cand->iv->base) == INTEGER_CST 7343 ? 1 : 0; 7344 if (best_cand == NULL || best_pref < cand_pref) 7345 { 7346 best_cand = cand; 7347 best_pref = cand_pref; 7348 } 7349 } 7350 7351 if (!best_cand) 7352 continue; 7353 7354 tree comp = get_computation_at (data->current_loop, 7355 SSA_NAME_DEF_STMT (def), 7356 &dummy_use, best_cand); 7357 if (!comp) 7358 continue; 7359 7360 if (count > 1) 7361 { 7362 tree vexpr = make_node (DEBUG_EXPR_DECL); 7363 DECL_ARTIFICIAL (vexpr) = 1; 7364 TREE_TYPE (vexpr) = TREE_TYPE (comp); 7365 if (SSA_NAME_VAR (def)) 7366 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def))); 7367 else 7368 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr))); 7369 gdebug *def_temp 7370 = gimple_build_debug_bind (vexpr, comp, NULL); 7371 gimple_stmt_iterator gsi; 7372 7373 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI) 7374 gsi = gsi_after_labels (gimple_bb 7375 (SSA_NAME_DEF_STMT (def))); 7376 else 7377 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def)); 7378 7379 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT); 7380 comp = vexpr; 7381 } 7382 7383 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 7384 { 7385 if (!gimple_debug_bind_p (stmt)) 7386 continue; 7387 7388 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 7389 SET_USE (use_p, comp); 7390 7391 update_stmt (stmt); 7392 } 7393 } 7394 } 7395 } 7396 } 7397 7398 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback 7399 for hash_map::traverse. */ 7400 7401 bool 7402 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *) 7403 { 7404 free (value); 7405 return true; 7406 } 7407 7408 /* Frees data allocated by the optimization of a single loop. */ 7409 7410 static void 7411 free_loop_data (struct ivopts_data *data) 7412 { 7413 unsigned i, j; 7414 bitmap_iterator bi; 7415 tree obj; 7416 7417 if (data->niters) 7418 { 7419 data->niters->traverse<void *, free_tree_niter_desc> (NULL); 7420 delete data->niters; 7421 data->niters = NULL; 7422 } 7423 7424 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 7425 { 7426 struct version_info *info; 7427 7428 info = ver_info (data, i); 7429 info->iv = NULL; 7430 info->has_nonlin_use = false; 7431 info->preserve_biv = false; 7432 info->inv_id = 0; 7433 } 7434 bitmap_clear (data->relevant); 7435 bitmap_clear (data->important_candidates); 7436 7437 for (i = 0; i < data->vgroups.length (); i++) 7438 { 7439 struct iv_group *group = data->vgroups[i]; 7440 7441 for (j = 0; j < group->vuses.length (); j++) 7442 free (group->vuses[j]); 7443 group->vuses.release (); 7444 7445 BITMAP_FREE (group->related_cands); 7446 for (j = 0; j < group->n_map_members; j++) 7447 { 7448 if (group->cost_map[j].inv_vars) 7449 BITMAP_FREE (group->cost_map[j].inv_vars); 7450 if (group->cost_map[j].inv_exprs) 7451 BITMAP_FREE (group->cost_map[j].inv_exprs); 7452 } 7453 7454 free (group->cost_map); 7455 free (group); 7456 } 7457 data->vgroups.truncate (0); 7458 7459 for (i = 0; i < data->vcands.length (); i++) 7460 { 7461 struct iv_cand *cand = data->vcands[i]; 7462 7463 if (cand->inv_vars) 7464 BITMAP_FREE (cand->inv_vars); 7465 if (cand->inv_exprs) 7466 BITMAP_FREE (cand->inv_exprs); 7467 free (cand); 7468 } 7469 data->vcands.truncate (0); 7470 7471 if (data->version_info_size < num_ssa_names) 7472 { 7473 data->version_info_size = 2 * num_ssa_names; 7474 free (data->version_info); 7475 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 7476 } 7477 7478 data->max_inv_var_id = 0; 7479 data->max_inv_expr_id = 0; 7480 7481 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj) 7482 SET_DECL_RTL (obj, NULL_RTX); 7483 7484 decl_rtl_to_reset.truncate (0); 7485 7486 data->inv_expr_tab->empty (); 7487 7488 data->iv_common_cand_tab->empty (); 7489 data->iv_common_cands.truncate (0); 7490 } 7491 7492 /* Finalizes data structures used by the iv optimization pass. LOOPS is the 7493 loop tree. */ 7494 7495 static void 7496 tree_ssa_iv_optimize_finalize (struct ivopts_data *data) 7497 { 7498 free_loop_data (data); 7499 free (data->version_info); 7500 BITMAP_FREE (data->relevant); 7501 BITMAP_FREE (data->important_candidates); 7502 7503 decl_rtl_to_reset.release (); 7504 data->vgroups.release (); 7505 data->vcands.release (); 7506 delete data->inv_expr_tab; 7507 data->inv_expr_tab = NULL; 7508 free_affine_expand_cache (&data->name_expansion_cache); 7509 if (data->base_object_map) 7510 delete data->base_object_map; 7511 delete data->iv_common_cand_tab; 7512 data->iv_common_cand_tab = NULL; 7513 data->iv_common_cands.release (); 7514 obstack_free (&data->iv_obstack, NULL); 7515 } 7516 7517 /* Returns true if the loop body BODY includes any function calls. */ 7518 7519 static bool 7520 loop_body_includes_call (basic_block *body, unsigned num_nodes) 7521 { 7522 gimple_stmt_iterator gsi; 7523 unsigned i; 7524 7525 for (i = 0; i < num_nodes; i++) 7526 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 7527 { 7528 gimple *stmt = gsi_stmt (gsi); 7529 if (is_gimple_call (stmt) 7530 && !gimple_call_internal_p (stmt) 7531 && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) 7532 return true; 7533 } 7534 return false; 7535 } 7536 7537 /* Optimizes the LOOP. Returns true if anything changed. */ 7538 7539 static bool 7540 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop, 7541 bitmap toremove) 7542 { 7543 bool changed = false; 7544 struct iv_ca *iv_ca; 7545 edge exit = single_dom_exit (loop); 7546 basic_block *body; 7547 7548 gcc_assert (!data->niters); 7549 data->current_loop = loop; 7550 data->loop_loc = find_loop_location (loop).get_location_t (); 7551 data->speed = optimize_loop_for_speed_p (loop); 7552 7553 if (dump_file && (dump_flags & TDF_DETAILS)) 7554 { 7555 fprintf (dump_file, "Processing loop %d", loop->num); 7556 if (data->loop_loc != UNKNOWN_LOCATION) 7557 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 7558 LOCATION_LINE (data->loop_loc)); 7559 fprintf (dump_file, "\n"); 7560 7561 if (exit) 7562 { 7563 fprintf (dump_file, " single exit %d -> %d, exit condition ", 7564 exit->src->index, exit->dest->index); 7565 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM); 7566 fprintf (dump_file, "\n"); 7567 } 7568 7569 fprintf (dump_file, "\n"); 7570 } 7571 7572 body = get_loop_body (loop); 7573 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); 7574 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); 7575 free (body); 7576 7577 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit); 7578 7579 /* For each ssa name determines whether it behaves as an induction variable 7580 in some loop. */ 7581 if (!find_induction_variables (data)) 7582 goto finish; 7583 7584 /* Finds interesting uses (item 1). */ 7585 find_interesting_uses (data); 7586 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS) 7587 goto finish; 7588 7589 /* Finds candidates for the induction variables (item 2). */ 7590 find_iv_candidates (data); 7591 7592 /* Calculates the costs (item 3, part 1). */ 7593 determine_iv_costs (data); 7594 determine_group_iv_costs (data); 7595 determine_set_costs (data); 7596 7597 /* Find the optimal set of induction variables (item 3, part 2). */ 7598 iv_ca = find_optimal_iv_set (data); 7599 if (!iv_ca) 7600 goto finish; 7601 changed = true; 7602 7603 /* Create the new induction variables (item 4, part 1). */ 7604 create_new_ivs (data, iv_ca); 7605 iv_ca_free (&iv_ca); 7606 7607 /* Rewrite the uses (item 4, part 2). */ 7608 rewrite_groups (data); 7609 7610 /* Remove the ivs that are unused after rewriting. */ 7611 remove_unused_ivs (data, toremove); 7612 7613 finish: 7614 free_loop_data (data); 7615 7616 return changed; 7617 } 7618 7619 /* Main entry point. Optimizes induction variables in loops. */ 7620 7621 void 7622 tree_ssa_iv_optimize (void) 7623 { 7624 struct loop *loop; 7625 struct ivopts_data data; 7626 auto_bitmap toremove; 7627 7628 tree_ssa_iv_optimize_init (&data); 7629 7630 /* Optimize the loops starting with the innermost ones. */ 7631 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 7632 { 7633 if (dump_file && (dump_flags & TDF_DETAILS)) 7634 flow_loop_dump (loop, dump_file, NULL, 1); 7635 7636 tree_ssa_iv_optimize_loop (&data, loop, toremove); 7637 } 7638 7639 /* Remove eliminated IV defs. */ 7640 release_defs_bitset (toremove); 7641 7642 /* We have changed the structure of induction variables; it might happen 7643 that definitions in the scev database refer to some of them that were 7644 eliminated. */ 7645 scev_reset_htab (); 7646 /* Likewise niter and control-IV information. */ 7647 free_numbers_of_iterations_estimates (cfun); 7648 7649 tree_ssa_iv_optimize_finalize (&data); 7650 } 7651 7652 #include "gt-tree-ssa-loop-ivopts.h" 7653