1 /* Induction variable optimizations. 2 Copyright (C) 2003-2017 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* This pass tries to find the optimal set of induction variables for the loop. 21 It optimizes just the basic linear induction variables (although adding 22 support for other types should not be too hard). It includes the 23 optimizations commonly known as strength reduction, induction variable 24 coalescing and induction variable elimination. It does it in the 25 following steps: 26 27 1) The interesting uses of induction variables are found. This includes 28 29 -- uses of induction variables in non-linear expressions 30 -- addresses of arrays 31 -- comparisons of induction variables 32 33 Note the interesting uses are categorized and handled in group. 34 Generally, address type uses are grouped together if their iv bases 35 are different in constant offset. 36 37 2) Candidates for the induction variables are found. This includes 38 39 -- old induction variables 40 -- the variables defined by expressions derived from the "interesting 41 groups/uses" above 42 43 3) The optimal (w.r. to a cost function) set of variables is chosen. The 44 cost function assigns a cost to sets of induction variables and consists 45 of three parts: 46 47 -- The group/use costs. Each of the interesting groups/uses chooses 48 the best induction variable in the set and adds its cost to the sum. 49 The cost reflects the time spent on modifying the induction variables 50 value to be usable for the given purpose (adding base and offset for 51 arrays, etc.). 52 -- The variable costs. Each of the variables has a cost assigned that 53 reflects the costs associated with incrementing the value of the 54 variable. The original variables are somewhat preferred. 55 -- The set cost. Depending on the size of the set, extra cost may be 56 added to reflect register pressure. 57 58 All the costs are defined in a machine-specific way, using the target 59 hooks and machine descriptions to determine them. 60 61 4) The trees are transformed to use the new variables, the dead code is 62 removed. 63 64 All of this is done loop by loop. Doing it globally is theoretically 65 possible, it might give a better performance and it might enable us 66 to decide costs more precisely, but getting all the interactions right 67 would be complicated. */ 68 69 #include "config.h" 70 #include "system.h" 71 #include "coretypes.h" 72 #include "backend.h" 73 #include "rtl.h" 74 #include "tree.h" 75 #include "gimple.h" 76 #include "cfghooks.h" 77 #include "tree-pass.h" 78 #include "memmodel.h" 79 #include "tm_p.h" 80 #include "ssa.h" 81 #include "expmed.h" 82 #include "insn-config.h" 83 #include "emit-rtl.h" 84 #include "recog.h" 85 #include "cgraph.h" 86 #include "gimple-pretty-print.h" 87 #include "alias.h" 88 #include "fold-const.h" 89 #include "stor-layout.h" 90 #include "tree-eh.h" 91 #include "gimplify.h" 92 #include "gimple-iterator.h" 93 #include "gimplify-me.h" 94 #include "tree-cfg.h" 95 #include "tree-ssa-loop-ivopts.h" 96 #include "tree-ssa-loop-manip.h" 97 #include "tree-ssa-loop-niter.h" 98 #include "tree-ssa-loop.h" 99 #include "explow.h" 100 #include "expr.h" 101 #include "tree-dfa.h" 102 #include "tree-ssa.h" 103 #include "cfgloop.h" 104 #include "tree-scalar-evolution.h" 105 #include "params.h" 106 #include "tree-affine.h" 107 #include "tree-ssa-propagate.h" 108 #include "tree-ssa-address.h" 109 #include "builtins.h" 110 #include "tree-vectorizer.h" 111 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the 113 cost of different addressing modes. This should be moved to a TBD 114 interface between the GIMPLE and RTL worlds. */ 115 116 /* The infinite cost. */ 117 #define INFTY 10000000 118 119 /* Returns the expected number of loop iterations for LOOP. 120 The average trip count is computed from profile data if it 121 exists. */ 122 123 static inline HOST_WIDE_INT 124 avg_loop_niter (struct loop *loop) 125 { 126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); 127 if (niter == -1) 128 { 129 niter = likely_max_stmt_executions_int (loop); 130 131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER)) 132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER); 133 } 134 135 return niter; 136 } 137 138 struct iv_use; 139 140 /* Representation of the induction variable. */ 141 struct iv 142 { 143 tree base; /* Initial value of the iv. */ 144 tree base_object; /* A memory object to that the induction variable points. */ 145 tree step; /* Step of the iv (constant only). */ 146 tree ssa_name; /* The ssa name with the value. */ 147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */ 148 bool biv_p; /* Is it a biv? */ 149 bool no_overflow; /* True if the iv doesn't overflow. */ 150 bool have_address_use;/* For biv, indicate if it's used in any address 151 type use. */ 152 }; 153 154 /* Per-ssa version information (induction variable descriptions, etc.). */ 155 struct version_info 156 { 157 tree name; /* The ssa name. */ 158 struct iv *iv; /* Induction variable description. */ 159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 160 an expression that is not an induction variable. */ 161 bool preserve_biv; /* For the original biv, whether to preserve it. */ 162 unsigned inv_id; /* Id of an invariant. */ 163 }; 164 165 /* Types of uses. */ 166 enum use_type 167 { 168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 169 USE_ADDRESS, /* Use in an address. */ 170 USE_COMPARE /* Use is a compare. */ 171 }; 172 173 /* Cost of a computation. */ 174 struct comp_cost 175 { 176 comp_cost (): cost (0), complexity (0), scratch (0) 177 {} 178 179 comp_cost (int cost, unsigned complexity, int scratch = 0) 180 : cost (cost), complexity (complexity), scratch (scratch) 181 {} 182 183 /* Returns true if COST is infinite. */ 184 bool infinite_cost_p (); 185 186 /* Adds costs COST1 and COST2. */ 187 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2); 188 189 /* Adds COST to the comp_cost. */ 190 comp_cost operator+= (comp_cost cost); 191 192 /* Adds constant C to this comp_cost. */ 193 comp_cost operator+= (HOST_WIDE_INT c); 194 195 /* Subtracts constant C to this comp_cost. */ 196 comp_cost operator-= (HOST_WIDE_INT c); 197 198 /* Divide the comp_cost by constant C. */ 199 comp_cost operator/= (HOST_WIDE_INT c); 200 201 /* Multiply the comp_cost by constant C. */ 202 comp_cost operator*= (HOST_WIDE_INT c); 203 204 /* Subtracts costs COST1 and COST2. */ 205 friend comp_cost operator- (comp_cost cost1, comp_cost cost2); 206 207 /* Subtracts COST from this comp_cost. */ 208 comp_cost operator-= (comp_cost cost); 209 210 /* Returns true if COST1 is smaller than COST2. */ 211 friend bool operator< (comp_cost cost1, comp_cost cost2); 212 213 /* Returns true if COST1 and COST2 are equal. */ 214 friend bool operator== (comp_cost cost1, comp_cost cost2); 215 216 /* Returns true if COST1 is smaller or equal than COST2. */ 217 friend bool operator<= (comp_cost cost1, comp_cost cost2); 218 219 int cost; /* The runtime cost. */ 220 unsigned complexity; /* The estimate of the complexity of the code for 221 the computation (in no concrete units -- 222 complexity field should be larger for more 223 complex expressions and addressing modes). */ 224 int scratch; /* Scratch used during cost computation. */ 225 }; 226 227 static const comp_cost no_cost; 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY); 229 230 bool 231 comp_cost::infinite_cost_p () 232 { 233 return cost == INFTY; 234 } 235 236 comp_cost 237 operator+ (comp_cost cost1, comp_cost cost2) 238 { 239 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ()) 240 return infinite_cost; 241 242 cost1.cost += cost2.cost; 243 cost1.complexity += cost2.complexity; 244 245 return cost1; 246 } 247 248 comp_cost 249 operator- (comp_cost cost1, comp_cost cost2) 250 { 251 if (cost1.infinite_cost_p ()) 252 return infinite_cost; 253 254 gcc_assert (!cost2.infinite_cost_p ()); 255 256 cost1.cost -= cost2.cost; 257 cost1.complexity -= cost2.complexity; 258 259 return cost1; 260 } 261 262 comp_cost 263 comp_cost::operator+= (comp_cost cost) 264 { 265 *this = *this + cost; 266 return *this; 267 } 268 269 comp_cost 270 comp_cost::operator+= (HOST_WIDE_INT c) 271 { 272 if (infinite_cost_p ()) 273 return *this; 274 275 this->cost += c; 276 277 return *this; 278 } 279 280 comp_cost 281 comp_cost::operator-= (HOST_WIDE_INT c) 282 { 283 if (infinite_cost_p ()) 284 return *this; 285 286 this->cost -= c; 287 288 return *this; 289 } 290 291 comp_cost 292 comp_cost::operator/= (HOST_WIDE_INT c) 293 { 294 if (infinite_cost_p ()) 295 return *this; 296 297 this->cost /= c; 298 299 return *this; 300 } 301 302 comp_cost 303 comp_cost::operator*= (HOST_WIDE_INT c) 304 { 305 if (infinite_cost_p ()) 306 return *this; 307 308 this->cost *= c; 309 310 return *this; 311 } 312 313 comp_cost 314 comp_cost::operator-= (comp_cost cost) 315 { 316 *this = *this - cost; 317 return *this; 318 } 319 320 bool 321 operator< (comp_cost cost1, comp_cost cost2) 322 { 323 if (cost1.cost == cost2.cost) 324 return cost1.complexity < cost2.complexity; 325 326 return cost1.cost < cost2.cost; 327 } 328 329 bool 330 operator== (comp_cost cost1, comp_cost cost2) 331 { 332 return cost1.cost == cost2.cost 333 && cost1.complexity == cost2.complexity; 334 } 335 336 bool 337 operator<= (comp_cost cost1, comp_cost cost2) 338 { 339 return cost1 < cost2 || cost1 == cost2; 340 } 341 342 struct iv_inv_expr_ent; 343 344 /* The candidate - cost pair. */ 345 struct cost_pair 346 { 347 struct iv_cand *cand; /* The candidate. */ 348 comp_cost cost; /* The cost. */ 349 enum tree_code comp; /* For iv elimination, the comparison. */ 350 bitmap depends_on; /* The list of invariants that have to be 351 preserved. */ 352 tree value; /* For final value elimination, the expression for 353 the final value of the iv. For iv elimination, 354 the new bound to compare with. */ 355 iv_inv_expr_ent *inv_expr; /* Loop invariant expression. */ 356 }; 357 358 /* Use. */ 359 struct iv_use 360 { 361 unsigned id; /* The id of the use. */ 362 unsigned group_id; /* The group id the use belongs to. */ 363 enum use_type type; /* Type of the use. */ 364 struct iv *iv; /* The induction variable it is based on. */ 365 gimple *stmt; /* Statement in that it occurs. */ 366 tree *op_p; /* The place where it occurs. */ 367 368 tree addr_base; /* Base address with const offset stripped. */ 369 unsigned HOST_WIDE_INT addr_offset; 370 /* Const offset stripped from base address. */ 371 }; 372 373 /* Group of uses. */ 374 struct iv_group 375 { 376 /* The id of the group. */ 377 unsigned id; 378 /* Uses of the group are of the same type. */ 379 enum use_type type; 380 /* The set of "related" IV candidates, plus the important ones. */ 381 bitmap related_cands; 382 /* Number of IV candidates in the cost_map. */ 383 unsigned n_map_members; 384 /* The costs wrto the iv candidates. */ 385 struct cost_pair *cost_map; 386 /* The selected candidate for the group. */ 387 struct iv_cand *selected; 388 /* Uses in the group. */ 389 vec<struct iv_use *> vuses; 390 }; 391 392 /* The position where the iv is computed. */ 393 enum iv_position 394 { 395 IP_NORMAL, /* At the end, just before the exit condition. */ 396 IP_END, /* At the end of the latch block. */ 397 IP_BEFORE_USE, /* Immediately before a specific use. */ 398 IP_AFTER_USE, /* Immediately after a specific use. */ 399 IP_ORIGINAL /* The original biv. */ 400 }; 401 402 /* The induction variable candidate. */ 403 struct iv_cand 404 { 405 unsigned id; /* The number of the candidate. */ 406 bool important; /* Whether this is an "important" candidate, i.e. such 407 that it should be considered by all uses. */ 408 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */ 409 gimple *incremented_at;/* For original biv, the statement where it is 410 incremented. */ 411 tree var_before; /* The variable used for it before increment. */ 412 tree var_after; /* The variable used for it after increment. */ 413 struct iv *iv; /* The value of the candidate. NULL for 414 "pseudocandidate" used to indicate the possibility 415 to replace the final value of an iv by direct 416 computation of the value. */ 417 unsigned cost; /* Cost of the candidate. */ 418 unsigned cost_step; /* Cost of the candidate's increment operation. */ 419 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place 420 where it is incremented. */ 421 bitmap depends_on; /* The list of invariants that are used in step of the 422 biv. */ 423 struct iv *orig_iv; /* The original iv if this cand is added from biv with 424 smaller type. */ 425 }; 426 427 /* Hashtable entry for common candidate derived from iv uses. */ 428 struct iv_common_cand 429 { 430 tree base; 431 tree step; 432 /* IV uses from which this common candidate is derived. */ 433 auto_vec<struct iv_use *> uses; 434 hashval_t hash; 435 }; 436 437 /* Hashtable helpers. */ 438 439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand> 440 { 441 static inline hashval_t hash (const iv_common_cand *); 442 static inline bool equal (const iv_common_cand *, const iv_common_cand *); 443 }; 444 445 /* Hash function for possible common candidates. */ 446 447 inline hashval_t 448 iv_common_cand_hasher::hash (const iv_common_cand *ccand) 449 { 450 return ccand->hash; 451 } 452 453 /* Hash table equality function for common candidates. */ 454 455 inline bool 456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1, 457 const iv_common_cand *ccand2) 458 { 459 return (ccand1->hash == ccand2->hash 460 && operand_equal_p (ccand1->base, ccand2->base, 0) 461 && operand_equal_p (ccand1->step, ccand2->step, 0) 462 && (TYPE_PRECISION (TREE_TYPE (ccand1->base)) 463 == TYPE_PRECISION (TREE_TYPE (ccand2->base)))); 464 } 465 466 /* Loop invariant expression hashtable entry. */ 467 468 struct iv_inv_expr_ent 469 { 470 /* Tree expression of the entry. */ 471 tree expr; 472 /* Unique indentifier. */ 473 int id; 474 /* Hash value. */ 475 hashval_t hash; 476 }; 477 478 /* Sort iv_inv_expr_ent pair A and B by id field. */ 479 480 static int 481 sort_iv_inv_expr_ent (const void *a, const void *b) 482 { 483 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a); 484 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b); 485 486 unsigned id1 = (*e1)->id; 487 unsigned id2 = (*e2)->id; 488 489 if (id1 < id2) 490 return -1; 491 else if (id1 > id2) 492 return 1; 493 else 494 return 0; 495 } 496 497 /* Hashtable helpers. */ 498 499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent> 500 { 501 static inline hashval_t hash (const iv_inv_expr_ent *); 502 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *); 503 }; 504 505 /* Hash function for loop invariant expressions. */ 506 507 inline hashval_t 508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr) 509 { 510 return expr->hash; 511 } 512 513 /* Hash table equality function for expressions. */ 514 515 inline bool 516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1, 517 const iv_inv_expr_ent *expr2) 518 { 519 return expr1->hash == expr2->hash 520 && operand_equal_p (expr1->expr, expr2->expr, 0); 521 } 522 523 struct ivopts_data 524 { 525 /* The currently optimized loop. */ 526 struct loop *current_loop; 527 source_location loop_loc; 528 529 /* Numbers of iterations for all exits of the current loop. */ 530 hash_map<edge, tree_niter_desc *> *niters; 531 532 /* Number of registers used in it. */ 533 unsigned regs_used; 534 535 /* The size of version_info array allocated. */ 536 unsigned version_info_size; 537 538 /* The array of information for the ssa names. */ 539 struct version_info *version_info; 540 541 /* The hashtable of loop invariant expressions created 542 by ivopt. */ 543 hash_table<iv_inv_expr_hasher> *inv_expr_tab; 544 545 /* Loop invariant expression id. */ 546 int max_inv_expr_id; 547 548 /* The bitmap of indices in version_info whose value was changed. */ 549 bitmap relevant; 550 551 /* The uses of induction variables. */ 552 vec<iv_group *> vgroups; 553 554 /* The candidates. */ 555 vec<iv_cand *> vcands; 556 557 /* A bitmap of important candidates. */ 558 bitmap important_candidates; 559 560 /* Cache used by tree_to_aff_combination_expand. */ 561 hash_map<tree, name_expansion *> *name_expansion_cache; 562 563 /* The hashtable of common candidates derived from iv uses. */ 564 hash_table<iv_common_cand_hasher> *iv_common_cand_tab; 565 566 /* The common candidates. */ 567 vec<iv_common_cand *> iv_common_cands; 568 569 /* Hash map recording base object information of tree exp. */ 570 hash_map<tree, tree> *base_object_map; 571 572 /* The maximum invariant id. */ 573 unsigned max_inv_id; 574 575 /* Number of no_overflow BIVs which are not used in memory address. */ 576 unsigned bivs_not_used_in_addr; 577 578 /* Obstack for iv structure. */ 579 struct obstack iv_obstack; 580 581 /* Whether to consider just related and important candidates when replacing a 582 use. */ 583 bool consider_all_candidates; 584 585 /* Are we optimizing for speed? */ 586 bool speed; 587 588 /* Whether the loop body includes any function calls. */ 589 bool body_includes_call; 590 591 /* Whether the loop body can only be exited via single exit. */ 592 bool loop_single_exit_p; 593 }; 594 595 /* An assignment of iv candidates to uses. */ 596 597 struct iv_ca 598 { 599 /* The number of uses covered by the assignment. */ 600 unsigned upto; 601 602 /* Number of uses that cannot be expressed by the candidates in the set. */ 603 unsigned bad_groups; 604 605 /* Candidate assigned to a use, together with the related costs. */ 606 struct cost_pair **cand_for_group; 607 608 /* Number of times each candidate is used. */ 609 unsigned *n_cand_uses; 610 611 /* The candidates used. */ 612 bitmap cands; 613 614 /* The number of candidates in the set. */ 615 unsigned n_cands; 616 617 /* Total number of registers needed. */ 618 unsigned n_regs; 619 620 /* Total cost of expressing uses. */ 621 comp_cost cand_use_cost; 622 623 /* Total cost of candidates. */ 624 unsigned cand_cost; 625 626 /* Number of times each invariant is used. */ 627 unsigned *n_invariant_uses; 628 629 /* Hash set with used invariant expression. */ 630 hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs; 631 632 /* Total cost of the assignment. */ 633 comp_cost cost; 634 }; 635 636 /* Difference of two iv candidate assignments. */ 637 638 struct iv_ca_delta 639 { 640 /* Changed group. */ 641 struct iv_group *group; 642 643 /* An old assignment (for rollback purposes). */ 644 struct cost_pair *old_cp; 645 646 /* A new assignment. */ 647 struct cost_pair *new_cp; 648 649 /* Next change in the list. */ 650 struct iv_ca_delta *next; 651 }; 652 653 /* Bound on number of candidates below that all candidates are considered. */ 654 655 #define CONSIDER_ALL_CANDIDATES_BOUND \ 656 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) 657 658 /* If there are more iv occurrences, we just give up (it is quite unlikely that 659 optimizing such a loop would help, and it would take ages). */ 660 661 #define MAX_CONSIDERED_GROUPS \ 662 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) 663 664 /* If there are at most this number of ivs in the set, try removing unnecessary 665 ivs from the set always. */ 666 667 #define ALWAYS_PRUNE_CAND_SET_BOUND \ 668 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) 669 670 /* The list of trees for that the decl_rtl field must be reset is stored 671 here. */ 672 673 static vec<tree> decl_rtl_to_reset; 674 675 static comp_cost force_expr_to_var_cost (tree, bool); 676 677 /* The single loop exit if it dominates the latch, NULL otherwise. */ 678 679 edge 680 single_dom_exit (struct loop *loop) 681 { 682 edge exit = single_exit (loop); 683 684 if (!exit) 685 return NULL; 686 687 if (!just_once_each_iteration_p (loop, exit->src)) 688 return NULL; 689 690 return exit; 691 } 692 693 /* Dumps information about the induction variable IV to FILE. Don't dump 694 variable's name if DUMP_NAME is FALSE. The information is dumped with 695 preceding spaces indicated by INDENT_LEVEL. */ 696 697 void 698 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level) 699 { 700 const char *p; 701 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}; 702 703 if (indent_level > 4) 704 indent_level = 4; 705 p = spaces + 8 - (indent_level << 1); 706 707 fprintf (file, "%sIV struct:\n", p); 708 if (iv->ssa_name && dump_name) 709 { 710 fprintf (file, "%s SSA_NAME:\t", p); 711 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 712 fprintf (file, "\n"); 713 } 714 715 fprintf (file, "%s Type:\t", p); 716 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 717 fprintf (file, "\n"); 718 719 fprintf (file, "%s Base:\t", p); 720 print_generic_expr (file, iv->base, TDF_SLIM); 721 fprintf (file, "\n"); 722 723 fprintf (file, "%s Step:\t", p); 724 print_generic_expr (file, iv->step, TDF_SLIM); 725 fprintf (file, "\n"); 726 727 if (iv->base_object) 728 { 729 fprintf (file, "%s Object:\t", p); 730 print_generic_expr (file, iv->base_object, TDF_SLIM); 731 fprintf (file, "\n"); 732 } 733 734 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N'); 735 736 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n", 737 p, iv->no_overflow ? "No-overflow" : "Overflow"); 738 } 739 740 /* Dumps information about the USE to FILE. */ 741 742 void 743 dump_use (FILE *file, struct iv_use *use) 744 { 745 fprintf (file, " Use %d.%d:\n", use->group_id, use->id); 746 fprintf (file, " At stmt:\t"); 747 print_gimple_stmt (file, use->stmt, 0, 0); 748 fprintf (file, " At pos:\t"); 749 if (use->op_p) 750 print_generic_expr (file, *use->op_p, TDF_SLIM); 751 fprintf (file, "\n"); 752 dump_iv (file, use->iv, false, 2); 753 } 754 755 /* Dumps information about the uses to FILE. */ 756 757 void 758 dump_groups (FILE *file, struct ivopts_data *data) 759 { 760 unsigned i, j; 761 struct iv_group *group; 762 763 for (i = 0; i < data->vgroups.length (); i++) 764 { 765 group = data->vgroups[i]; 766 fprintf (file, "Group %d:\n", group->id); 767 if (group->type == USE_NONLINEAR_EXPR) 768 fprintf (file, " Type:\tGENERIC\n"); 769 else if (group->type == USE_ADDRESS) 770 fprintf (file, " Type:\tADDRESS\n"); 771 else 772 { 773 gcc_assert (group->type == USE_COMPARE); 774 fprintf (file, " Type:\tCOMPARE\n"); 775 } 776 for (j = 0; j < group->vuses.length (); j++) 777 dump_use (file, group->vuses[j]); 778 } 779 } 780 781 /* Dumps information about induction variable candidate CAND to FILE. */ 782 783 void 784 dump_cand (FILE *file, struct iv_cand *cand) 785 { 786 struct iv *iv = cand->iv; 787 788 fprintf (file, "Candidate %d:\n", cand->id); 789 if (cand->depends_on) 790 { 791 fprintf (file, " Depend on: "); 792 dump_bitmap (file, cand->depends_on); 793 } 794 795 if (cand->var_before) 796 { 797 fprintf (file, " Var befor: "); 798 print_generic_expr (file, cand->var_before, TDF_SLIM); 799 fprintf (file, "\n"); 800 } 801 if (cand->var_after) 802 { 803 fprintf (file, " Var after: "); 804 print_generic_expr (file, cand->var_after, TDF_SLIM); 805 fprintf (file, "\n"); 806 } 807 808 switch (cand->pos) 809 { 810 case IP_NORMAL: 811 fprintf (file, " Incr POS: before exit test\n"); 812 break; 813 814 case IP_BEFORE_USE: 815 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id); 816 break; 817 818 case IP_AFTER_USE: 819 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id); 820 break; 821 822 case IP_END: 823 fprintf (file, " Incr POS: at end\n"); 824 break; 825 826 case IP_ORIGINAL: 827 fprintf (file, " Incr POS: orig biv\n"); 828 break; 829 } 830 831 dump_iv (file, iv, false, 1); 832 } 833 834 /* Returns the info for ssa version VER. */ 835 836 static inline struct version_info * 837 ver_info (struct ivopts_data *data, unsigned ver) 838 { 839 return data->version_info + ver; 840 } 841 842 /* Returns the info for ssa name NAME. */ 843 844 static inline struct version_info * 845 name_info (struct ivopts_data *data, tree name) 846 { 847 return ver_info (data, SSA_NAME_VERSION (name)); 848 } 849 850 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be 851 emitted in LOOP. */ 852 853 static bool 854 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt) 855 { 856 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); 857 858 gcc_assert (bb); 859 860 if (sbb == loop->latch) 861 return true; 862 863 if (sbb != bb) 864 return false; 865 866 return stmt == last_stmt (bb); 867 } 868 869 /* Returns true if STMT if after the place where the original induction 870 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true 871 if the positions are identical. */ 872 873 static bool 874 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal) 875 { 876 basic_block cand_bb = gimple_bb (cand->incremented_at); 877 basic_block stmt_bb = gimple_bb (stmt); 878 879 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 880 return false; 881 882 if (stmt_bb != cand_bb) 883 return true; 884 885 if (true_if_equal 886 && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) 887 return true; 888 return gimple_uid (stmt) > gimple_uid (cand->incremented_at); 889 } 890 891 /* Returns true if STMT if after the place where the induction variable 892 CAND is incremented in LOOP. */ 893 894 static bool 895 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt) 896 { 897 switch (cand->pos) 898 { 899 case IP_END: 900 return false; 901 902 case IP_NORMAL: 903 return stmt_after_ip_normal_pos (loop, stmt); 904 905 case IP_ORIGINAL: 906 case IP_AFTER_USE: 907 return stmt_after_inc_pos (cand, stmt, false); 908 909 case IP_BEFORE_USE: 910 return stmt_after_inc_pos (cand, stmt, true); 911 912 default: 913 gcc_unreachable (); 914 } 915 } 916 917 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ 918 919 static bool 920 abnormal_ssa_name_p (tree exp) 921 { 922 if (!exp) 923 return false; 924 925 if (TREE_CODE (exp) != SSA_NAME) 926 return false; 927 928 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; 929 } 930 931 /* Returns false if BASE or INDEX contains a ssa name that occurs in an 932 abnormal phi node. Callback for for_each_index. */ 933 934 static bool 935 idx_contains_abnormal_ssa_name_p (tree base, tree *index, 936 void *data ATTRIBUTE_UNUSED) 937 { 938 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 939 { 940 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) 941 return false; 942 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) 943 return false; 944 } 945 946 return !abnormal_ssa_name_p (*index); 947 } 948 949 /* Returns true if EXPR contains a ssa name that occurs in an 950 abnormal phi node. */ 951 952 bool 953 contains_abnormal_ssa_name_p (tree expr) 954 { 955 enum tree_code code; 956 enum tree_code_class codeclass; 957 958 if (!expr) 959 return false; 960 961 code = TREE_CODE (expr); 962 codeclass = TREE_CODE_CLASS (code); 963 964 if (code == SSA_NAME) 965 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; 966 967 if (code == INTEGER_CST 968 || is_gimple_min_invariant (expr)) 969 return false; 970 971 if (code == ADDR_EXPR) 972 return !for_each_index (&TREE_OPERAND (expr, 0), 973 idx_contains_abnormal_ssa_name_p, 974 NULL); 975 976 if (code == COND_EXPR) 977 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)) 978 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)) 979 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2)); 980 981 switch (codeclass) 982 { 983 case tcc_binary: 984 case tcc_comparison: 985 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) 986 return true; 987 988 /* Fallthru. */ 989 case tcc_unary: 990 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) 991 return true; 992 993 break; 994 995 default: 996 gcc_unreachable (); 997 } 998 999 return false; 1000 } 1001 1002 /* Returns the structure describing number of iterations determined from 1003 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 1004 1005 static struct tree_niter_desc * 1006 niter_for_exit (struct ivopts_data *data, edge exit) 1007 { 1008 struct tree_niter_desc *desc; 1009 tree_niter_desc **slot; 1010 1011 if (!data->niters) 1012 { 1013 data->niters = new hash_map<edge, tree_niter_desc *>; 1014 slot = NULL; 1015 } 1016 else 1017 slot = data->niters->get (exit); 1018 1019 if (!slot) 1020 { 1021 /* Try to determine number of iterations. We cannot safely work with ssa 1022 names that appear in phi nodes on abnormal edges, so that we do not 1023 create overlapping life ranges for them (PR 27283). */ 1024 desc = XNEW (struct tree_niter_desc); 1025 if (!number_of_iterations_exit (data->current_loop, 1026 exit, desc, true) 1027 || contains_abnormal_ssa_name_p (desc->niter)) 1028 { 1029 XDELETE (desc); 1030 desc = NULL; 1031 } 1032 data->niters->put (exit, desc); 1033 } 1034 else 1035 desc = *slot; 1036 1037 return desc; 1038 } 1039 1040 /* Returns the structure describing number of iterations determined from 1041 single dominating exit of DATA->current_loop, or NULL if something 1042 goes wrong. */ 1043 1044 static struct tree_niter_desc * 1045 niter_for_single_dom_exit (struct ivopts_data *data) 1046 { 1047 edge exit = single_dom_exit (data->current_loop); 1048 1049 if (!exit) 1050 return NULL; 1051 1052 return niter_for_exit (data, exit); 1053 } 1054 1055 /* Initializes data structures used by the iv optimization pass, stored 1056 in DATA. */ 1057 1058 static void 1059 tree_ssa_iv_optimize_init (struct ivopts_data *data) 1060 { 1061 data->version_info_size = 2 * num_ssa_names; 1062 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 1063 data->relevant = BITMAP_ALLOC (NULL); 1064 data->important_candidates = BITMAP_ALLOC (NULL); 1065 data->max_inv_id = 0; 1066 data->niters = NULL; 1067 data->vgroups.create (20); 1068 data->vcands.create (20); 1069 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10); 1070 data->max_inv_expr_id = 0; 1071 data->name_expansion_cache = NULL; 1072 data->base_object_map = NULL; 1073 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10); 1074 data->iv_common_cands.create (20); 1075 decl_rtl_to_reset.create (20); 1076 gcc_obstack_init (&data->iv_obstack); 1077 } 1078 1079 /* walk_tree callback for determine_base_object. */ 1080 1081 static tree 1082 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata) 1083 { 1084 tree_code code = TREE_CODE (*tp); 1085 tree obj = NULL_TREE; 1086 if (code == ADDR_EXPR) 1087 { 1088 tree base = get_base_address (TREE_OPERAND (*tp, 0)); 1089 if (!base) 1090 obj = *tp; 1091 else if (TREE_CODE (base) != MEM_REF) 1092 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base)); 1093 } 1094 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp))) 1095 obj = fold_convert (ptr_type_node, *tp); 1096 1097 if (!obj) 1098 { 1099 if (!EXPR_P (*tp)) 1100 *walk_subtrees = 0; 1101 1102 return NULL_TREE; 1103 } 1104 /* Record special node for multiple base objects and stop. */ 1105 if (*static_cast<tree *> (wdata)) 1106 { 1107 *static_cast<tree *> (wdata) = integer_zero_node; 1108 return integer_zero_node; 1109 } 1110 /* Record the base object and continue looking. */ 1111 *static_cast<tree *> (wdata) = obj; 1112 return NULL_TREE; 1113 } 1114 1115 /* Returns a memory object to that EXPR points with caching. Return NULL if we 1116 are able to determine that it does not point to any such object; specially 1117 return integer_zero_node if EXPR contains multiple base objects. */ 1118 1119 static tree 1120 determine_base_object (struct ivopts_data *data, tree expr) 1121 { 1122 tree *slot, obj = NULL_TREE; 1123 if (data->base_object_map) 1124 { 1125 if ((slot = data->base_object_map->get(expr)) != NULL) 1126 return *slot; 1127 } 1128 else 1129 data->base_object_map = new hash_map<tree, tree>; 1130 1131 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj); 1132 data->base_object_map->put (expr, obj); 1133 return obj; 1134 } 1135 1136 /* Return true if address expression with non-DECL_P operand appears 1137 in EXPR. */ 1138 1139 static bool 1140 contain_complex_addr_expr (tree expr) 1141 { 1142 bool res = false; 1143 1144 STRIP_NOPS (expr); 1145 switch (TREE_CODE (expr)) 1146 { 1147 case POINTER_PLUS_EXPR: 1148 case PLUS_EXPR: 1149 case MINUS_EXPR: 1150 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); 1151 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); 1152 break; 1153 1154 case ADDR_EXPR: 1155 return (!DECL_P (TREE_OPERAND (expr, 0))); 1156 1157 default: 1158 return false; 1159 } 1160 1161 return res; 1162 } 1163 1164 /* Allocates an induction variable with given initial value BASE and step STEP 1165 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */ 1166 1167 static struct iv * 1168 alloc_iv (struct ivopts_data *data, tree base, tree step, 1169 bool no_overflow = false) 1170 { 1171 tree expr = base; 1172 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack, 1173 sizeof (struct iv)); 1174 gcc_assert (step != NULL_TREE); 1175 1176 /* Lower address expression in base except ones with DECL_P as operand. 1177 By doing this: 1178 1) More accurate cost can be computed for address expressions; 1179 2) Duplicate candidates won't be created for bases in different 1180 forms, like &a[0] and &a. */ 1181 STRIP_NOPS (expr); 1182 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0))) 1183 || contain_complex_addr_expr (expr)) 1184 { 1185 aff_tree comb; 1186 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb); 1187 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)); 1188 } 1189 1190 iv->base = base; 1191 iv->base_object = determine_base_object (data, base); 1192 iv->step = step; 1193 iv->biv_p = false; 1194 iv->nonlin_use = NULL; 1195 iv->ssa_name = NULL_TREE; 1196 if (!no_overflow 1197 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base), 1198 base, step)) 1199 no_overflow = true; 1200 iv->no_overflow = no_overflow; 1201 iv->have_address_use = false; 1202 1203 return iv; 1204 } 1205 1206 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV 1207 doesn't overflow. */ 1208 1209 static void 1210 set_iv (struct ivopts_data *data, tree iv, tree base, tree step, 1211 bool no_overflow) 1212 { 1213 struct version_info *info = name_info (data, iv); 1214 1215 gcc_assert (!info->iv); 1216 1217 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 1218 info->iv = alloc_iv (data, base, step, no_overflow); 1219 info->iv->ssa_name = iv; 1220 } 1221 1222 /* Finds induction variable declaration for VAR. */ 1223 1224 static struct iv * 1225 get_iv (struct ivopts_data *data, tree var) 1226 { 1227 basic_block bb; 1228 tree type = TREE_TYPE (var); 1229 1230 if (!POINTER_TYPE_P (type) 1231 && !INTEGRAL_TYPE_P (type)) 1232 return NULL; 1233 1234 if (!name_info (data, var)->iv) 1235 { 1236 bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1237 1238 if (!bb 1239 || !flow_bb_inside_loop_p (data->current_loop, bb)) 1240 set_iv (data, var, var, build_int_cst (type, 0), true); 1241 } 1242 1243 return name_info (data, var)->iv; 1244 } 1245 1246 /* Return the first non-invariant ssa var found in EXPR. */ 1247 1248 static tree 1249 extract_single_var_from_expr (tree expr) 1250 { 1251 int i, n; 1252 tree tmp; 1253 enum tree_code code; 1254 1255 if (!expr || is_gimple_min_invariant (expr)) 1256 return NULL; 1257 1258 code = TREE_CODE (expr); 1259 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1260 { 1261 n = TREE_OPERAND_LENGTH (expr); 1262 for (i = 0; i < n; i++) 1263 { 1264 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i)); 1265 1266 if (tmp) 1267 return tmp; 1268 } 1269 } 1270 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL; 1271 } 1272 1273 /* Finds basic ivs. */ 1274 1275 static bool 1276 find_bivs (struct ivopts_data *data) 1277 { 1278 gphi *phi; 1279 affine_iv iv; 1280 tree step, type, base, stop; 1281 bool found = false; 1282 struct loop *loop = data->current_loop; 1283 gphi_iterator psi; 1284 1285 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1286 { 1287 phi = psi.phi (); 1288 1289 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 1290 continue; 1291 1292 if (virtual_operand_p (PHI_RESULT (phi))) 1293 continue; 1294 1295 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true)) 1296 continue; 1297 1298 if (integer_zerop (iv.step)) 1299 continue; 1300 1301 step = iv.step; 1302 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1303 /* Stop expanding iv base at the first ssa var referred by iv step. 1304 Ideally we should stop at any ssa var, because that's expensive 1305 and unusual to happen, we just do it on the first one. 1306 1307 See PR64705 for the rationale. */ 1308 stop = extract_single_var_from_expr (step); 1309 base = expand_simple_operations (base, stop); 1310 if (contains_abnormal_ssa_name_p (base) 1311 || contains_abnormal_ssa_name_p (step)) 1312 continue; 1313 1314 type = TREE_TYPE (PHI_RESULT (phi)); 1315 base = fold_convert (type, base); 1316 if (step) 1317 { 1318 if (POINTER_TYPE_P (type)) 1319 step = convert_to_ptrofftype (step); 1320 else 1321 step = fold_convert (type, step); 1322 } 1323 1324 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow); 1325 found = true; 1326 } 1327 1328 return found; 1329 } 1330 1331 /* Marks basic ivs. */ 1332 1333 static void 1334 mark_bivs (struct ivopts_data *data) 1335 { 1336 gphi *phi; 1337 gimple *def; 1338 tree var; 1339 struct iv *iv, *incr_iv; 1340 struct loop *loop = data->current_loop; 1341 basic_block incr_bb; 1342 gphi_iterator psi; 1343 1344 data->bivs_not_used_in_addr = 0; 1345 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1346 { 1347 phi = psi.phi (); 1348 1349 iv = get_iv (data, PHI_RESULT (phi)); 1350 if (!iv) 1351 continue; 1352 1353 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1354 def = SSA_NAME_DEF_STMT (var); 1355 /* Don't mark iv peeled from other one as biv. */ 1356 if (def 1357 && gimple_code (def) == GIMPLE_PHI 1358 && gimple_bb (def) == loop->header) 1359 continue; 1360 1361 incr_iv = get_iv (data, var); 1362 if (!incr_iv) 1363 continue; 1364 1365 /* If the increment is in the subloop, ignore it. */ 1366 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1367 if (incr_bb->loop_father != data->current_loop 1368 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 1369 continue; 1370 1371 iv->biv_p = true; 1372 incr_iv->biv_p = true; 1373 if (iv->no_overflow) 1374 data->bivs_not_used_in_addr++; 1375 if (incr_iv->no_overflow) 1376 data->bivs_not_used_in_addr++; 1377 } 1378 } 1379 1380 /* Checks whether STMT defines a linear induction variable and stores its 1381 parameters to IV. */ 1382 1383 static bool 1384 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv) 1385 { 1386 tree lhs, stop; 1387 struct loop *loop = data->current_loop; 1388 1389 iv->base = NULL_TREE; 1390 iv->step = NULL_TREE; 1391 1392 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1393 return false; 1394 1395 lhs = gimple_assign_lhs (stmt); 1396 if (TREE_CODE (lhs) != SSA_NAME) 1397 return false; 1398 1399 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) 1400 return false; 1401 1402 /* Stop expanding iv base at the first ssa var referred by iv step. 1403 Ideally we should stop at any ssa var, because that's expensive 1404 and unusual to happen, we just do it on the first one. 1405 1406 See PR64705 for the rationale. */ 1407 stop = extract_single_var_from_expr (iv->step); 1408 iv->base = expand_simple_operations (iv->base, stop); 1409 if (contains_abnormal_ssa_name_p (iv->base) 1410 || contains_abnormal_ssa_name_p (iv->step)) 1411 return false; 1412 1413 /* If STMT could throw, then do not consider STMT as defining a GIV. 1414 While this will suppress optimizations, we can not safely delete this 1415 GIV and associated statements, even if it appears it is not used. */ 1416 if (stmt_could_throw_p (stmt)) 1417 return false; 1418 1419 return true; 1420 } 1421 1422 /* Finds general ivs in statement STMT. */ 1423 1424 static void 1425 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt) 1426 { 1427 affine_iv iv; 1428 1429 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1430 return; 1431 1432 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow); 1433 } 1434 1435 /* Finds general ivs in basic block BB. */ 1436 1437 static void 1438 find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1439 { 1440 gimple_stmt_iterator bsi; 1441 1442 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1443 find_givs_in_stmt (data, gsi_stmt (bsi)); 1444 } 1445 1446 /* Finds general ivs. */ 1447 1448 static void 1449 find_givs (struct ivopts_data *data) 1450 { 1451 struct loop *loop = data->current_loop; 1452 basic_block *body = get_loop_body_in_dom_order (loop); 1453 unsigned i; 1454 1455 for (i = 0; i < loop->num_nodes; i++) 1456 find_givs_in_bb (data, body[i]); 1457 free (body); 1458 } 1459 1460 /* For each ssa name defined in LOOP determines whether it is an induction 1461 variable and if so, its initial value and step. */ 1462 1463 static bool 1464 find_induction_variables (struct ivopts_data *data) 1465 { 1466 unsigned i; 1467 bitmap_iterator bi; 1468 1469 if (!find_bivs (data)) 1470 return false; 1471 1472 find_givs (data); 1473 mark_bivs (data); 1474 1475 if (dump_file && (dump_flags & TDF_DETAILS)) 1476 { 1477 struct tree_niter_desc *niter = niter_for_single_dom_exit (data); 1478 1479 if (niter) 1480 { 1481 fprintf (dump_file, " number of iterations "); 1482 print_generic_expr (dump_file, niter->niter, TDF_SLIM); 1483 if (!integer_zerop (niter->may_be_zero)) 1484 { 1485 fprintf (dump_file, "; zero if "); 1486 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); 1487 } 1488 fprintf (dump_file, "\n"); 1489 }; 1490 1491 fprintf (dump_file, "\n<Induction Vars>:\n"); 1492 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1493 { 1494 struct version_info *info = ver_info (data, i); 1495 if (info->iv && info->iv->step && !integer_zerop (info->iv->step)) 1496 dump_iv (dump_file, ver_info (data, i)->iv, true, 0); 1497 } 1498 } 1499 1500 return true; 1501 } 1502 1503 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP. 1504 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET 1505 is the const offset stripped from IV base; for other types use, both 1506 are zero by default. */ 1507 1508 static struct iv_use * 1509 record_use (struct iv_group *group, tree *use_p, struct iv *iv, 1510 gimple *stmt, enum use_type type, tree addr_base, 1511 unsigned HOST_WIDE_INT addr_offset) 1512 { 1513 struct iv_use *use = XCNEW (struct iv_use); 1514 1515 use->id = group->vuses.length (); 1516 use->group_id = group->id; 1517 use->type = type; 1518 use->iv = iv; 1519 use->stmt = stmt; 1520 use->op_p = use_p; 1521 use->addr_base = addr_base; 1522 use->addr_offset = addr_offset; 1523 1524 group->vuses.safe_push (use); 1525 return use; 1526 } 1527 1528 /* Checks whether OP is a loop-level invariant and if so, records it. 1529 NONLINEAR_USE is true if the invariant is used in a way we do not 1530 handle specially. */ 1531 1532 static void 1533 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1534 { 1535 basic_block bb; 1536 struct version_info *info; 1537 1538 if (TREE_CODE (op) != SSA_NAME 1539 || virtual_operand_p (op)) 1540 return; 1541 1542 bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 1543 if (bb 1544 && flow_bb_inside_loop_p (data->current_loop, bb)) 1545 return; 1546 1547 info = name_info (data, op); 1548 info->name = op; 1549 info->has_nonlin_use |= nonlinear_use; 1550 if (!info->inv_id) 1551 info->inv_id = ++data->max_inv_id; 1552 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1553 } 1554 1555 static tree 1556 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset); 1557 1558 /* Record a group of TYPE. */ 1559 1560 static struct iv_group * 1561 record_group (struct ivopts_data *data, enum use_type type) 1562 { 1563 struct iv_group *group = XCNEW (struct iv_group); 1564 1565 group->id = data->vgroups.length (); 1566 group->type = type; 1567 group->related_cands = BITMAP_ALLOC (NULL); 1568 group->vuses.create (1); 1569 1570 data->vgroups.safe_push (group); 1571 return group; 1572 } 1573 1574 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group. 1575 New group will be created if there is no existing group for the use. */ 1576 1577 static struct iv_use * 1578 record_group_use (struct ivopts_data *data, tree *use_p, 1579 struct iv *iv, gimple *stmt, enum use_type type) 1580 { 1581 tree addr_base = NULL; 1582 struct iv_group *group = NULL; 1583 unsigned HOST_WIDE_INT addr_offset = 0; 1584 1585 /* Record non address type use in a new group. */ 1586 if (type == USE_ADDRESS && iv->base_object) 1587 { 1588 unsigned int i; 1589 1590 addr_base = strip_offset (iv->base, &addr_offset); 1591 for (i = 0; i < data->vgroups.length (); i++) 1592 { 1593 struct iv_use *use; 1594 1595 group = data->vgroups[i]; 1596 use = group->vuses[0]; 1597 if (use->type != USE_ADDRESS || !use->iv->base_object) 1598 continue; 1599 1600 /* Check if it has the same stripped base and step. */ 1601 if (operand_equal_p (iv->base_object, use->iv->base_object, 0) 1602 && operand_equal_p (iv->step, use->iv->step, 0) 1603 && operand_equal_p (addr_base, use->addr_base, 0)) 1604 break; 1605 } 1606 if (i == data->vgroups.length ()) 1607 group = NULL; 1608 } 1609 1610 if (!group) 1611 group = record_group (data, type); 1612 1613 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset); 1614 } 1615 1616 /* Checks whether the use OP is interesting and if so, records it. */ 1617 1618 static struct iv_use * 1619 find_interesting_uses_op (struct ivopts_data *data, tree op) 1620 { 1621 struct iv *iv; 1622 gimple *stmt; 1623 struct iv_use *use; 1624 1625 if (TREE_CODE (op) != SSA_NAME) 1626 return NULL; 1627 1628 iv = get_iv (data, op); 1629 if (!iv) 1630 return NULL; 1631 1632 if (iv->nonlin_use) 1633 { 1634 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR); 1635 return iv->nonlin_use; 1636 } 1637 1638 if (integer_zerop (iv->step)) 1639 { 1640 record_invariant (data, op, true); 1641 return NULL; 1642 } 1643 1644 stmt = SSA_NAME_DEF_STMT (op); 1645 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt)); 1646 1647 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR); 1648 iv->nonlin_use = use; 1649 return use; 1650 } 1651 1652 /* Given a condition in statement STMT, checks whether it is a compare 1653 of an induction variable and an invariant. If this is the case, 1654 CONTROL_VAR is set to location of the iv, BOUND to the location of 1655 the invariant, IV_VAR and IV_BOUND are set to the corresponding 1656 induction variable descriptions, and true is returned. If this is not 1657 the case, CONTROL_VAR and BOUND are set to the arguments of the 1658 condition and false is returned. */ 1659 1660 static bool 1661 extract_cond_operands (struct ivopts_data *data, gimple *stmt, 1662 tree **control_var, tree **bound, 1663 struct iv **iv_var, struct iv **iv_bound) 1664 { 1665 /* The objects returned when COND has constant operands. */ 1666 static struct iv const_iv; 1667 static tree zero; 1668 tree *op0 = &zero, *op1 = &zero; 1669 struct iv *iv0 = &const_iv, *iv1 = &const_iv; 1670 bool ret = false; 1671 1672 if (gimple_code (stmt) == GIMPLE_COND) 1673 { 1674 gcond *cond_stmt = as_a <gcond *> (stmt); 1675 op0 = gimple_cond_lhs_ptr (cond_stmt); 1676 op1 = gimple_cond_rhs_ptr (cond_stmt); 1677 } 1678 else 1679 { 1680 op0 = gimple_assign_rhs1_ptr (stmt); 1681 op1 = gimple_assign_rhs2_ptr (stmt); 1682 } 1683 1684 zero = integer_zero_node; 1685 const_iv.step = integer_zero_node; 1686 1687 if (TREE_CODE (*op0) == SSA_NAME) 1688 iv0 = get_iv (data, *op0); 1689 if (TREE_CODE (*op1) == SSA_NAME) 1690 iv1 = get_iv (data, *op1); 1691 1692 /* Exactly one of the compared values must be an iv, and the other one must 1693 be an invariant. */ 1694 if (!iv0 || !iv1) 1695 goto end; 1696 1697 if (integer_zerop (iv0->step)) 1698 { 1699 /* Control variable may be on the other side. */ 1700 std::swap (op0, op1); 1701 std::swap (iv0, iv1); 1702 } 1703 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step); 1704 1705 end: 1706 if (control_var) 1707 *control_var = op0; 1708 if (iv_var) 1709 *iv_var = iv0; 1710 if (bound) 1711 *bound = op1; 1712 if (iv_bound) 1713 *iv_bound = iv1; 1714 1715 return ret; 1716 } 1717 1718 /* Checks whether the condition in STMT is interesting and if so, 1719 records it. */ 1720 1721 static void 1722 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt) 1723 { 1724 tree *var_p, *bound_p; 1725 struct iv *var_iv; 1726 1727 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL)) 1728 { 1729 find_interesting_uses_op (data, *var_p); 1730 find_interesting_uses_op (data, *bound_p); 1731 return; 1732 } 1733 1734 record_group_use (data, NULL, var_iv, stmt, USE_COMPARE); 1735 } 1736 1737 /* Returns the outermost loop EXPR is obviously invariant in 1738 relative to the loop LOOP, i.e. if all its operands are defined 1739 outside of the returned loop. Returns NULL if EXPR is not 1740 even obviously invariant in LOOP. */ 1741 1742 struct loop * 1743 outermost_invariant_loop_for_expr (struct loop *loop, tree expr) 1744 { 1745 basic_block def_bb; 1746 unsigned i, len; 1747 1748 if (is_gimple_min_invariant (expr)) 1749 return current_loops->tree_root; 1750 1751 if (TREE_CODE (expr) == SSA_NAME) 1752 { 1753 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1754 if (def_bb) 1755 { 1756 if (flow_bb_inside_loop_p (loop, def_bb)) 1757 return NULL; 1758 return superloop_at_depth (loop, 1759 loop_depth (def_bb->loop_father) + 1); 1760 } 1761 1762 return current_loops->tree_root; 1763 } 1764 1765 if (!EXPR_P (expr)) 1766 return NULL; 1767 1768 unsigned maxdepth = 0; 1769 len = TREE_OPERAND_LENGTH (expr); 1770 for (i = 0; i < len; i++) 1771 { 1772 struct loop *ivloop; 1773 if (!TREE_OPERAND (expr, i)) 1774 continue; 1775 1776 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i)); 1777 if (!ivloop) 1778 return NULL; 1779 maxdepth = MAX (maxdepth, loop_depth (ivloop)); 1780 } 1781 1782 return superloop_at_depth (loop, maxdepth); 1783 } 1784 1785 /* Returns true if expression EXPR is obviously invariant in LOOP, 1786 i.e. if all its operands are defined outside of the LOOP. LOOP 1787 should not be the function body. */ 1788 1789 bool 1790 expr_invariant_in_loop_p (struct loop *loop, tree expr) 1791 { 1792 basic_block def_bb; 1793 unsigned i, len; 1794 1795 gcc_assert (loop_depth (loop) > 0); 1796 1797 if (is_gimple_min_invariant (expr)) 1798 return true; 1799 1800 if (TREE_CODE (expr) == SSA_NAME) 1801 { 1802 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1803 if (def_bb 1804 && flow_bb_inside_loop_p (loop, def_bb)) 1805 return false; 1806 1807 return true; 1808 } 1809 1810 if (!EXPR_P (expr)) 1811 return false; 1812 1813 len = TREE_OPERAND_LENGTH (expr); 1814 for (i = 0; i < len; i++) 1815 if (TREE_OPERAND (expr, i) 1816 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1817 return false; 1818 1819 return true; 1820 } 1821 1822 /* Given expression EXPR which computes inductive values with respect 1823 to loop recorded in DATA, this function returns biv from which EXPR 1824 is derived by tracing definition chains of ssa variables in EXPR. */ 1825 1826 static struct iv* 1827 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr) 1828 { 1829 struct iv *iv; 1830 unsigned i, n; 1831 tree e2, e1; 1832 enum tree_code code; 1833 gimple *stmt; 1834 1835 if (expr == NULL_TREE) 1836 return NULL; 1837 1838 if (is_gimple_min_invariant (expr)) 1839 return NULL; 1840 1841 code = TREE_CODE (expr); 1842 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1843 { 1844 n = TREE_OPERAND_LENGTH (expr); 1845 for (i = 0; i < n; i++) 1846 { 1847 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i)); 1848 if (iv) 1849 return iv; 1850 } 1851 } 1852 1853 /* Stop if it's not ssa name. */ 1854 if (code != SSA_NAME) 1855 return NULL; 1856 1857 iv = get_iv (data, expr); 1858 if (!iv || integer_zerop (iv->step)) 1859 return NULL; 1860 else if (iv->biv_p) 1861 return iv; 1862 1863 stmt = SSA_NAME_DEF_STMT (expr); 1864 if (gphi *phi = dyn_cast <gphi *> (stmt)) 1865 { 1866 ssa_op_iter iter; 1867 use_operand_p use_p; 1868 basic_block phi_bb = gimple_bb (phi); 1869 1870 /* Skip loop header PHI that doesn't define biv. */ 1871 if (phi_bb->loop_father == data->current_loop) 1872 return NULL; 1873 1874 if (virtual_operand_p (gimple_phi_result (phi))) 1875 return NULL; 1876 1877 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE) 1878 { 1879 tree use = USE_FROM_PTR (use_p); 1880 iv = find_deriving_biv_for_expr (data, use); 1881 if (iv) 1882 return iv; 1883 } 1884 return NULL; 1885 } 1886 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1887 return NULL; 1888 1889 e1 = gimple_assign_rhs1 (stmt); 1890 code = gimple_assign_rhs_code (stmt); 1891 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) 1892 return find_deriving_biv_for_expr (data, e1); 1893 1894 switch (code) 1895 { 1896 case MULT_EXPR: 1897 case PLUS_EXPR: 1898 case MINUS_EXPR: 1899 case POINTER_PLUS_EXPR: 1900 /* Increments, decrements and multiplications by a constant 1901 are simple. */ 1902 e2 = gimple_assign_rhs2 (stmt); 1903 iv = find_deriving_biv_for_expr (data, e2); 1904 if (iv) 1905 return iv; 1906 gcc_fallthrough (); 1907 1908 CASE_CONVERT: 1909 /* Casts are simple. */ 1910 return find_deriving_biv_for_expr (data, e1); 1911 1912 default: 1913 break; 1914 } 1915 1916 return NULL; 1917 } 1918 1919 /* Record BIV, its predecessor and successor that they are used in 1920 address type uses. */ 1921 1922 static void 1923 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv) 1924 { 1925 unsigned i; 1926 tree type, base_1, base_2; 1927 bitmap_iterator bi; 1928 1929 if (!biv || !biv->biv_p || integer_zerop (biv->step) 1930 || biv->have_address_use || !biv->no_overflow) 1931 return; 1932 1933 type = TREE_TYPE (biv->base); 1934 if (!INTEGRAL_TYPE_P (type)) 1935 return; 1936 1937 biv->have_address_use = true; 1938 data->bivs_not_used_in_addr--; 1939 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step); 1940 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1941 { 1942 struct iv *iv = ver_info (data, i)->iv; 1943 1944 if (!iv || !iv->biv_p || integer_zerop (iv->step) 1945 || iv->have_address_use || !iv->no_overflow) 1946 continue; 1947 1948 if (type != TREE_TYPE (iv->base) 1949 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base))) 1950 continue; 1951 1952 if (!operand_equal_p (biv->step, iv->step, 0)) 1953 continue; 1954 1955 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step); 1956 if (operand_equal_p (base_1, iv->base, 0) 1957 || operand_equal_p (base_2, biv->base, 0)) 1958 { 1959 iv->have_address_use = true; 1960 data->bivs_not_used_in_addr--; 1961 } 1962 } 1963 } 1964 1965 /* Cumulates the steps of indices into DATA and replaces their values with the 1966 initial ones. Returns false when the value of the index cannot be determined. 1967 Callback for for_each_index. */ 1968 1969 struct ifs_ivopts_data 1970 { 1971 struct ivopts_data *ivopts_data; 1972 gimple *stmt; 1973 tree step; 1974 }; 1975 1976 static bool 1977 idx_find_step (tree base, tree *idx, void *data) 1978 { 1979 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; 1980 struct iv *iv; 1981 bool use_overflow_semantics = false; 1982 tree step, iv_base, iv_step, lbound, off; 1983 struct loop *loop = dta->ivopts_data->current_loop; 1984 1985 /* If base is a component ref, require that the offset of the reference 1986 be invariant. */ 1987 if (TREE_CODE (base) == COMPONENT_REF) 1988 { 1989 off = component_ref_field_offset (base); 1990 return expr_invariant_in_loop_p (loop, off); 1991 } 1992 1993 /* If base is array, first check whether we will be able to move the 1994 reference out of the loop (in order to take its address in strength 1995 reduction). In order for this to work we need both lower bound 1996 and step to be loop invariants. */ 1997 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1998 { 1999 /* Moreover, for a range, the size needs to be invariant as well. */ 2000 if (TREE_CODE (base) == ARRAY_RANGE_REF 2001 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) 2002 return false; 2003 2004 step = array_ref_element_size (base); 2005 lbound = array_ref_low_bound (base); 2006 2007 if (!expr_invariant_in_loop_p (loop, step) 2008 || !expr_invariant_in_loop_p (loop, lbound)) 2009 return false; 2010 } 2011 2012 if (TREE_CODE (*idx) != SSA_NAME) 2013 return true; 2014 2015 iv = get_iv (dta->ivopts_data, *idx); 2016 if (!iv) 2017 return false; 2018 2019 /* XXX We produce for a base of *D42 with iv->base being &x[0] 2020 *&x[0], which is not folded and does not trigger the 2021 ARRAY_REF path below. */ 2022 *idx = iv->base; 2023 2024 if (integer_zerop (iv->step)) 2025 return true; 2026 2027 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2028 { 2029 step = array_ref_element_size (base); 2030 2031 /* We only handle addresses whose step is an integer constant. */ 2032 if (TREE_CODE (step) != INTEGER_CST) 2033 return false; 2034 } 2035 else 2036 /* The step for pointer arithmetics already is 1 byte. */ 2037 step = size_one_node; 2038 2039 iv_base = iv->base; 2040 iv_step = iv->step; 2041 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step))) 2042 use_overflow_semantics = true; 2043 2044 if (!convert_affine_scev (dta->ivopts_data->current_loop, 2045 sizetype, &iv_base, &iv_step, dta->stmt, 2046 use_overflow_semantics)) 2047 { 2048 /* The index might wrap. */ 2049 return false; 2050 } 2051 2052 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 2053 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); 2054 2055 if (dta->ivopts_data->bivs_not_used_in_addr) 2056 { 2057 if (!iv->biv_p) 2058 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name); 2059 2060 record_biv_for_address_use (dta->ivopts_data, iv); 2061 } 2062 return true; 2063 } 2064 2065 /* Records use in index IDX. Callback for for_each_index. Ivopts data 2066 object is passed to it in DATA. */ 2067 2068 static bool 2069 idx_record_use (tree base, tree *idx, 2070 void *vdata) 2071 { 2072 struct ivopts_data *data = (struct ivopts_data *) vdata; 2073 find_interesting_uses_op (data, *idx); 2074 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 2075 { 2076 find_interesting_uses_op (data, array_ref_element_size (base)); 2077 find_interesting_uses_op (data, array_ref_low_bound (base)); 2078 } 2079 return true; 2080 } 2081 2082 /* If we can prove that TOP = cst * BOT for some constant cst, 2083 store cst to MUL and return true. Otherwise return false. 2084 The returned value is always sign-extended, regardless of the 2085 signedness of TOP and BOT. */ 2086 2087 static bool 2088 constant_multiple_of (tree top, tree bot, widest_int *mul) 2089 { 2090 tree mby; 2091 enum tree_code code; 2092 unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); 2093 widest_int res, p0, p1; 2094 2095 STRIP_NOPS (top); 2096 STRIP_NOPS (bot); 2097 2098 if (operand_equal_p (top, bot, 0)) 2099 { 2100 *mul = 1; 2101 return true; 2102 } 2103 2104 code = TREE_CODE (top); 2105 switch (code) 2106 { 2107 case MULT_EXPR: 2108 mby = TREE_OPERAND (top, 1); 2109 if (TREE_CODE (mby) != INTEGER_CST) 2110 return false; 2111 2112 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) 2113 return false; 2114 2115 *mul = wi::sext (res * wi::to_widest (mby), precision); 2116 return true; 2117 2118 case PLUS_EXPR: 2119 case MINUS_EXPR: 2120 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) 2121 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) 2122 return false; 2123 2124 if (code == MINUS_EXPR) 2125 p1 = -p1; 2126 *mul = wi::sext (p0 + p1, precision); 2127 return true; 2128 2129 case INTEGER_CST: 2130 if (TREE_CODE (bot) != INTEGER_CST) 2131 return false; 2132 2133 p0 = widest_int::from (top, SIGNED); 2134 p1 = widest_int::from (bot, SIGNED); 2135 if (p1 == 0) 2136 return false; 2137 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision); 2138 return res == 0; 2139 2140 default: 2141 return false; 2142 } 2143 } 2144 2145 /* Return true if memory reference REF with step STEP may be unaligned. */ 2146 2147 static bool 2148 may_be_unaligned_p (tree ref, tree step) 2149 { 2150 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 2151 thus they are not misaligned. */ 2152 if (TREE_CODE (ref) == TARGET_MEM_REF) 2153 return false; 2154 2155 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref)); 2156 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align) 2157 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))); 2158 2159 unsigned HOST_WIDE_INT bitpos; 2160 unsigned int ref_align; 2161 get_object_alignment_1 (ref, &ref_align, &bitpos); 2162 if (ref_align < align 2163 || (bitpos % align) != 0 2164 || (bitpos % BITS_PER_UNIT) != 0) 2165 return true; 2166 2167 unsigned int trailing_zeros = tree_ctz (step); 2168 if (trailing_zeros < HOST_BITS_PER_INT 2169 && (1U << trailing_zeros) * BITS_PER_UNIT < align) 2170 return true; 2171 2172 return false; 2173 } 2174 2175 /* Return true if EXPR may be non-addressable. */ 2176 2177 bool 2178 may_be_nonaddressable_p (tree expr) 2179 { 2180 switch (TREE_CODE (expr)) 2181 { 2182 case TARGET_MEM_REF: 2183 /* TARGET_MEM_REFs are translated directly to valid MEMs on the 2184 target, thus they are always addressable. */ 2185 return false; 2186 2187 case MEM_REF: 2188 /* Likewise for MEM_REFs, modulo the storage order. */ 2189 return REF_REVERSE_STORAGE_ORDER (expr); 2190 2191 case BIT_FIELD_REF: 2192 if (REF_REVERSE_STORAGE_ORDER (expr)) 2193 return true; 2194 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2195 2196 case COMPONENT_REF: 2197 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) 2198 return true; 2199 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) 2200 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2201 2202 case ARRAY_REF: 2203 case ARRAY_RANGE_REF: 2204 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0)))) 2205 return true; 2206 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2207 2208 case VIEW_CONVERT_EXPR: 2209 /* This kind of view-conversions may wrap non-addressable objects 2210 and make them look addressable. After some processing the 2211 non-addressability may be uncovered again, causing ADDR_EXPRs 2212 of inappropriate objects to be built. */ 2213 if (is_gimple_reg (TREE_OPERAND (expr, 0)) 2214 || !is_gimple_addressable (TREE_OPERAND (expr, 0))) 2215 return true; 2216 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 2217 2218 CASE_CONVERT: 2219 return true; 2220 2221 default: 2222 break; 2223 } 2224 2225 return false; 2226 } 2227 2228 /* Finds addresses in *OP_P inside STMT. */ 2229 2230 static void 2231 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt, 2232 tree *op_p) 2233 { 2234 tree base = *op_p, step = size_zero_node; 2235 struct iv *civ; 2236 struct ifs_ivopts_data ifs_ivopts_data; 2237 2238 /* Do not play with volatile memory references. A bit too conservative, 2239 perhaps, but safe. */ 2240 if (gimple_has_volatile_ops (stmt)) 2241 goto fail; 2242 2243 /* Ignore bitfields for now. Not really something terribly complicated 2244 to handle. TODO. */ 2245 if (TREE_CODE (base) == BIT_FIELD_REF) 2246 goto fail; 2247 2248 base = unshare_expr (base); 2249 2250 if (TREE_CODE (base) == TARGET_MEM_REF) 2251 { 2252 tree type = build_pointer_type (TREE_TYPE (base)); 2253 tree astep; 2254 2255 if (TMR_BASE (base) 2256 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 2257 { 2258 civ = get_iv (data, TMR_BASE (base)); 2259 if (!civ) 2260 goto fail; 2261 2262 TMR_BASE (base) = civ->base; 2263 step = civ->step; 2264 } 2265 if (TMR_INDEX2 (base) 2266 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) 2267 { 2268 civ = get_iv (data, TMR_INDEX2 (base)); 2269 if (!civ) 2270 goto fail; 2271 2272 TMR_INDEX2 (base) = civ->base; 2273 step = civ->step; 2274 } 2275 if (TMR_INDEX (base) 2276 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 2277 { 2278 civ = get_iv (data, TMR_INDEX (base)); 2279 if (!civ) 2280 goto fail; 2281 2282 TMR_INDEX (base) = civ->base; 2283 astep = civ->step; 2284 2285 if (astep) 2286 { 2287 if (TMR_STEP (base)) 2288 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 2289 2290 step = fold_build2 (PLUS_EXPR, type, step, astep); 2291 } 2292 } 2293 2294 if (integer_zerop (step)) 2295 goto fail; 2296 base = tree_mem_ref_addr (type, base); 2297 } 2298 else 2299 { 2300 ifs_ivopts_data.ivopts_data = data; 2301 ifs_ivopts_data.stmt = stmt; 2302 ifs_ivopts_data.step = size_zero_node; 2303 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 2304 || integer_zerop (ifs_ivopts_data.step)) 2305 goto fail; 2306 step = ifs_ivopts_data.step; 2307 2308 /* Check that the base expression is addressable. This needs 2309 to be done after substituting bases of IVs into it. */ 2310 if (may_be_nonaddressable_p (base)) 2311 goto fail; 2312 2313 /* Moreover, on strict alignment platforms, check that it is 2314 sufficiently aligned. */ 2315 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) 2316 goto fail; 2317 2318 base = build_fold_addr_expr (base); 2319 2320 /* Substituting bases of IVs into the base expression might 2321 have caused folding opportunities. */ 2322 if (TREE_CODE (base) == ADDR_EXPR) 2323 { 2324 tree *ref = &TREE_OPERAND (base, 0); 2325 while (handled_component_p (*ref)) 2326 ref = &TREE_OPERAND (*ref, 0); 2327 if (TREE_CODE (*ref) == MEM_REF) 2328 { 2329 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), 2330 TREE_OPERAND (*ref, 0), 2331 TREE_OPERAND (*ref, 1)); 2332 if (tem) 2333 *ref = tem; 2334 } 2335 } 2336 } 2337 2338 civ = alloc_iv (data, base, step); 2339 /* Fail if base object of this memory reference is unknown. */ 2340 if (civ->base_object == NULL_TREE) 2341 goto fail; 2342 2343 record_group_use (data, op_p, civ, stmt, USE_ADDRESS); 2344 return; 2345 2346 fail: 2347 for_each_index (op_p, idx_record_use, data); 2348 } 2349 2350 /* Finds and records invariants used in STMT. */ 2351 2352 static void 2353 find_invariants_stmt (struct ivopts_data *data, gimple *stmt) 2354 { 2355 ssa_op_iter iter; 2356 use_operand_p use_p; 2357 tree op; 2358 2359 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2360 { 2361 op = USE_FROM_PTR (use_p); 2362 record_invariant (data, op, false); 2363 } 2364 } 2365 2366 /* Finds interesting uses of induction variables in the statement STMT. */ 2367 2368 static void 2369 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt) 2370 { 2371 struct iv *iv; 2372 tree op, *lhs, *rhs; 2373 ssa_op_iter iter; 2374 use_operand_p use_p; 2375 enum tree_code code; 2376 2377 find_invariants_stmt (data, stmt); 2378 2379 if (gimple_code (stmt) == GIMPLE_COND) 2380 { 2381 find_interesting_uses_cond (data, stmt); 2382 return; 2383 } 2384 2385 if (is_gimple_assign (stmt)) 2386 { 2387 lhs = gimple_assign_lhs_ptr (stmt); 2388 rhs = gimple_assign_rhs1_ptr (stmt); 2389 2390 if (TREE_CODE (*lhs) == SSA_NAME) 2391 { 2392 /* If the statement defines an induction variable, the uses are not 2393 interesting by themselves. */ 2394 2395 iv = get_iv (data, *lhs); 2396 2397 if (iv && !integer_zerop (iv->step)) 2398 return; 2399 } 2400 2401 code = gimple_assign_rhs_code (stmt); 2402 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS 2403 && (REFERENCE_CLASS_P (*rhs) 2404 || is_gimple_val (*rhs))) 2405 { 2406 if (REFERENCE_CLASS_P (*rhs)) 2407 find_interesting_uses_address (data, stmt, rhs); 2408 else 2409 find_interesting_uses_op (data, *rhs); 2410 2411 if (REFERENCE_CLASS_P (*lhs)) 2412 find_interesting_uses_address (data, stmt, lhs); 2413 return; 2414 } 2415 else if (TREE_CODE_CLASS (code) == tcc_comparison) 2416 { 2417 find_interesting_uses_cond (data, stmt); 2418 return; 2419 } 2420 2421 /* TODO -- we should also handle address uses of type 2422 2423 memory = call (whatever); 2424 2425 and 2426 2427 call (memory). */ 2428 } 2429 2430 if (gimple_code (stmt) == GIMPLE_PHI 2431 && gimple_bb (stmt) == data->current_loop->header) 2432 { 2433 iv = get_iv (data, PHI_RESULT (stmt)); 2434 2435 if (iv && !integer_zerop (iv->step)) 2436 return; 2437 } 2438 2439 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2440 { 2441 op = USE_FROM_PTR (use_p); 2442 2443 if (TREE_CODE (op) != SSA_NAME) 2444 continue; 2445 2446 iv = get_iv (data, op); 2447 if (!iv) 2448 continue; 2449 2450 find_interesting_uses_op (data, op); 2451 } 2452 } 2453 2454 /* Finds interesting uses of induction variables outside of loops 2455 on loop exit edge EXIT. */ 2456 2457 static void 2458 find_interesting_uses_outside (struct ivopts_data *data, edge exit) 2459 { 2460 gphi *phi; 2461 gphi_iterator psi; 2462 tree def; 2463 2464 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) 2465 { 2466 phi = psi.phi (); 2467 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 2468 if (!virtual_operand_p (def)) 2469 find_interesting_uses_op (data, def); 2470 } 2471 } 2472 2473 /* Compute maximum offset of [base + offset] addressing mode 2474 for memory reference represented by USE. */ 2475 2476 static HOST_WIDE_INT 2477 compute_max_addr_offset (struct iv_use *use) 2478 { 2479 int width; 2480 rtx reg, addr; 2481 HOST_WIDE_INT i, off; 2482 unsigned list_index, num; 2483 addr_space_t as; 2484 machine_mode mem_mode, addr_mode; 2485 static vec<HOST_WIDE_INT> max_offset_list; 2486 2487 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); 2488 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); 2489 2490 num = max_offset_list.length (); 2491 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; 2492 if (list_index >= num) 2493 { 2494 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE); 2495 for (; num < max_offset_list.length (); num++) 2496 max_offset_list[num] = -1; 2497 } 2498 2499 off = max_offset_list[list_index]; 2500 if (off != -1) 2501 return off; 2502 2503 addr_mode = targetm.addr_space.address_mode (as); 2504 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1); 2505 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX); 2506 2507 width = GET_MODE_BITSIZE (addr_mode) - 1; 2508 if (width > (HOST_BITS_PER_WIDE_INT - 1)) 2509 width = HOST_BITS_PER_WIDE_INT - 1; 2510 2511 for (i = width; i > 0; i--) 2512 { 2513 off = (HOST_WIDE_INT_1U << i) - 1; 2514 XEXP (addr, 1) = gen_int_mode (off, addr_mode); 2515 if (memory_address_addr_space_p (mem_mode, addr, as)) 2516 break; 2517 2518 /* For some strict-alignment targets, the offset must be naturally 2519 aligned. Try an aligned offset if mem_mode is not QImode. */ 2520 off = (HOST_WIDE_INT_1U << i); 2521 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode) 2522 { 2523 off -= GET_MODE_SIZE (mem_mode); 2524 XEXP (addr, 1) = gen_int_mode (off, addr_mode); 2525 if (memory_address_addr_space_p (mem_mode, addr, as)) 2526 break; 2527 } 2528 } 2529 if (i == 0) 2530 off = 0; 2531 2532 max_offset_list[list_index] = off; 2533 return off; 2534 } 2535 2536 /* Comparison function to sort group in ascending order of addr_offset. */ 2537 2538 static int 2539 group_compare_offset (const void *a, const void *b) 2540 { 2541 const struct iv_use *const *u1 = (const struct iv_use *const *) a; 2542 const struct iv_use *const *u2 = (const struct iv_use *const *) b; 2543 2544 if ((*u1)->addr_offset != (*u2)->addr_offset) 2545 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1; 2546 else 2547 return 0; 2548 } 2549 2550 /* Check if small groups should be split. Return true if no group 2551 contains more than two uses with distinct addr_offsets. Return 2552 false otherwise. We want to split such groups because: 2553 2554 1) Small groups don't have much benefit and may interfer with 2555 general candidate selection. 2556 2) Size for problem with only small groups is usually small and 2557 general algorithm can handle it well. 2558 2559 TODO -- Above claim may not hold when we want to merge memory 2560 accesses with conseuctive addresses. */ 2561 2562 static bool 2563 split_small_address_groups_p (struct ivopts_data *data) 2564 { 2565 unsigned int i, j, distinct = 1; 2566 struct iv_use *pre; 2567 struct iv_group *group; 2568 2569 for (i = 0; i < data->vgroups.length (); i++) 2570 { 2571 group = data->vgroups[i]; 2572 if (group->vuses.length () == 1) 2573 continue; 2574 2575 gcc_assert (group->type == USE_ADDRESS); 2576 if (group->vuses.length () == 2) 2577 { 2578 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset) 2579 std::swap (group->vuses[0], group->vuses[1]); 2580 } 2581 else 2582 group->vuses.qsort (group_compare_offset); 2583 2584 if (distinct > 2) 2585 continue; 2586 2587 distinct = 1; 2588 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++) 2589 { 2590 if (group->vuses[j]->addr_offset != pre->addr_offset) 2591 { 2592 pre = group->vuses[j]; 2593 distinct++; 2594 } 2595 2596 if (distinct > 2) 2597 break; 2598 } 2599 } 2600 2601 return (distinct <= 2); 2602 } 2603 2604 /* For each group of address type uses, this function further groups 2605 these uses according to the maximum offset supported by target's 2606 [base + offset] addressing mode. */ 2607 2608 static void 2609 split_address_groups (struct ivopts_data *data) 2610 { 2611 unsigned int i, j; 2612 HOST_WIDE_INT max_offset = -1; 2613 2614 /* Reset max offset to split all small groups. */ 2615 if (split_small_address_groups_p (data)) 2616 max_offset = 0; 2617 2618 for (i = 0; i < data->vgroups.length (); i++) 2619 { 2620 struct iv_group *group = data->vgroups[i]; 2621 struct iv_use *use = group->vuses[0]; 2622 2623 use->id = 0; 2624 use->group_id = group->id; 2625 if (group->vuses.length () == 1) 2626 continue; 2627 2628 if (max_offset != 0) 2629 max_offset = compute_max_addr_offset (use); 2630 2631 for (j = 1; j < group->vuses.length (); j++) 2632 { 2633 struct iv_use *next = group->vuses[j]; 2634 2635 /* Only uses with offset that can fit in offset part against 2636 the first use can be grouped together. */ 2637 if (next->addr_offset - use->addr_offset 2638 > (unsigned HOST_WIDE_INT) max_offset) 2639 break; 2640 2641 next->id = j; 2642 next->group_id = group->id; 2643 } 2644 /* Split group. */ 2645 if (j < group->vuses.length ()) 2646 { 2647 struct iv_group *new_group = record_group (data, group->type); 2648 new_group->vuses.safe_splice (group->vuses); 2649 new_group->vuses.block_remove (0, j); 2650 group->vuses.truncate (j); 2651 } 2652 } 2653 } 2654 2655 /* Finds uses of the induction variables that are interesting. */ 2656 2657 static void 2658 find_interesting_uses (struct ivopts_data *data) 2659 { 2660 basic_block bb; 2661 gimple_stmt_iterator bsi; 2662 basic_block *body = get_loop_body (data->current_loop); 2663 unsigned i; 2664 edge e; 2665 2666 for (i = 0; i < data->current_loop->num_nodes; i++) 2667 { 2668 edge_iterator ei; 2669 bb = body[i]; 2670 2671 FOR_EACH_EDGE (e, ei, bb->succs) 2672 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 2673 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 2674 find_interesting_uses_outside (data, e); 2675 2676 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2677 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2678 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2679 if (!is_gimple_debug (gsi_stmt (bsi))) 2680 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2681 } 2682 2683 split_address_groups (data); 2684 2685 if (dump_file && (dump_flags & TDF_DETAILS)) 2686 { 2687 bitmap_iterator bi; 2688 2689 fprintf (dump_file, "\n<Invariant Vars>:\n"); 2690 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2691 { 2692 struct version_info *info = ver_info (data, i); 2693 if (info->inv_id) 2694 { 2695 fprintf (dump_file, "Inv %d:\t", info->inv_id); 2696 print_generic_expr (dump_file, info->name, TDF_SLIM); 2697 fprintf (dump_file, "%s\n", 2698 info->has_nonlin_use ? "" : "\t(eliminable)"); 2699 } 2700 } 2701 2702 fprintf (dump_file, "\n<IV Groups>:\n"); 2703 dump_groups (dump_file, data); 2704 fprintf (dump_file, "\n"); 2705 } 2706 2707 free (body); 2708 } 2709 2710 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 2711 is true, assume we are inside an address. If TOP_COMPREF is true, assume 2712 we are at the top-level of the processed address. */ 2713 2714 static tree 2715 strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 2716 HOST_WIDE_INT *offset) 2717 { 2718 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 2719 enum tree_code code; 2720 tree type, orig_type = TREE_TYPE (expr); 2721 HOST_WIDE_INT off0, off1, st; 2722 tree orig_expr = expr; 2723 2724 STRIP_NOPS (expr); 2725 2726 type = TREE_TYPE (expr); 2727 code = TREE_CODE (expr); 2728 *offset = 0; 2729 2730 switch (code) 2731 { 2732 case INTEGER_CST: 2733 if (!cst_and_fits_in_hwi (expr) 2734 || integer_zerop (expr)) 2735 return orig_expr; 2736 2737 *offset = int_cst_value (expr); 2738 return build_int_cst (orig_type, 0); 2739 2740 case POINTER_PLUS_EXPR: 2741 case PLUS_EXPR: 2742 case MINUS_EXPR: 2743 op0 = TREE_OPERAND (expr, 0); 2744 op1 = TREE_OPERAND (expr, 1); 2745 2746 op0 = strip_offset_1 (op0, false, false, &off0); 2747 op1 = strip_offset_1 (op1, false, false, &off1); 2748 2749 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); 2750 if (op0 == TREE_OPERAND (expr, 0) 2751 && op1 == TREE_OPERAND (expr, 1)) 2752 return orig_expr; 2753 2754 if (integer_zerop (op1)) 2755 expr = op0; 2756 else if (integer_zerop (op0)) 2757 { 2758 if (code == MINUS_EXPR) 2759 expr = fold_build1 (NEGATE_EXPR, type, op1); 2760 else 2761 expr = op1; 2762 } 2763 else 2764 expr = fold_build2 (code, type, op0, op1); 2765 2766 return fold_convert (orig_type, expr); 2767 2768 case MULT_EXPR: 2769 op1 = TREE_OPERAND (expr, 1); 2770 if (!cst_and_fits_in_hwi (op1)) 2771 return orig_expr; 2772 2773 op0 = TREE_OPERAND (expr, 0); 2774 op0 = strip_offset_1 (op0, false, false, &off0); 2775 if (op0 == TREE_OPERAND (expr, 0)) 2776 return orig_expr; 2777 2778 *offset = off0 * int_cst_value (op1); 2779 if (integer_zerop (op0)) 2780 expr = op0; 2781 else 2782 expr = fold_build2 (MULT_EXPR, type, op0, op1); 2783 2784 return fold_convert (orig_type, expr); 2785 2786 case ARRAY_REF: 2787 case ARRAY_RANGE_REF: 2788 if (!inside_addr) 2789 return orig_expr; 2790 2791 step = array_ref_element_size (expr); 2792 if (!cst_and_fits_in_hwi (step)) 2793 break; 2794 2795 st = int_cst_value (step); 2796 op1 = TREE_OPERAND (expr, 1); 2797 op1 = strip_offset_1 (op1, false, false, &off1); 2798 *offset = off1 * st; 2799 2800 if (top_compref 2801 && integer_zerop (op1)) 2802 { 2803 /* Strip the component reference completely. */ 2804 op0 = TREE_OPERAND (expr, 0); 2805 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2806 *offset += off0; 2807 return op0; 2808 } 2809 break; 2810 2811 case COMPONENT_REF: 2812 { 2813 tree field; 2814 2815 if (!inside_addr) 2816 return orig_expr; 2817 2818 tmp = component_ref_field_offset (expr); 2819 field = TREE_OPERAND (expr, 1); 2820 if (top_compref 2821 && cst_and_fits_in_hwi (tmp) 2822 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field))) 2823 { 2824 HOST_WIDE_INT boffset, abs_off; 2825 2826 /* Strip the component reference completely. */ 2827 op0 = TREE_OPERAND (expr, 0); 2828 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2829 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field)); 2830 abs_off = abs_hwi (boffset) / BITS_PER_UNIT; 2831 if (boffset < 0) 2832 abs_off = -abs_off; 2833 2834 *offset = off0 + int_cst_value (tmp) + abs_off; 2835 return op0; 2836 } 2837 } 2838 break; 2839 2840 case ADDR_EXPR: 2841 op0 = TREE_OPERAND (expr, 0); 2842 op0 = strip_offset_1 (op0, true, true, &off0); 2843 *offset += off0; 2844 2845 if (op0 == TREE_OPERAND (expr, 0)) 2846 return orig_expr; 2847 2848 expr = build_fold_addr_expr (op0); 2849 return fold_convert (orig_type, expr); 2850 2851 case MEM_REF: 2852 /* ??? Offset operand? */ 2853 inside_addr = false; 2854 break; 2855 2856 default: 2857 return orig_expr; 2858 } 2859 2860 /* Default handling of expressions for that we want to recurse into 2861 the first operand. */ 2862 op0 = TREE_OPERAND (expr, 0); 2863 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 2864 *offset += off0; 2865 2866 if (op0 == TREE_OPERAND (expr, 0) 2867 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 2868 return orig_expr; 2869 2870 expr = copy_node (expr); 2871 TREE_OPERAND (expr, 0) = op0; 2872 if (op1) 2873 TREE_OPERAND (expr, 1) = op1; 2874 2875 /* Inside address, we might strip the top level component references, 2876 thus changing type of the expression. Handling of ADDR_EXPR 2877 will fix that. */ 2878 expr = fold_convert (orig_type, expr); 2879 2880 return expr; 2881 } 2882 2883 /* Strips constant offsets from EXPR and stores them to OFFSET. */ 2884 2885 static tree 2886 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset) 2887 { 2888 HOST_WIDE_INT off; 2889 tree core = strip_offset_1 (expr, false, false, &off); 2890 *offset = off; 2891 return core; 2892 } 2893 2894 /* Returns variant of TYPE that can be used as base for different uses. 2895 We return unsigned type with the same precision, which avoids problems 2896 with overflows. */ 2897 2898 static tree 2899 generic_type_for (tree type) 2900 { 2901 if (POINTER_TYPE_P (type)) 2902 return unsigned_type_for (type); 2903 2904 if (TYPE_UNSIGNED (type)) 2905 return type; 2906 2907 return unsigned_type_for (type); 2908 } 2909 2910 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains 2911 the bitmap to that we should store it. */ 2912 2913 static struct ivopts_data *fd_ivopts_data; 2914 static tree 2915 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 2916 { 2917 bitmap *depends_on = (bitmap *) data; 2918 struct version_info *info; 2919 2920 if (TREE_CODE (*expr_p) != SSA_NAME) 2921 return NULL_TREE; 2922 info = name_info (fd_ivopts_data, *expr_p); 2923 2924 if (!info->inv_id || info->has_nonlin_use) 2925 return NULL_TREE; 2926 2927 if (!*depends_on) 2928 *depends_on = BITMAP_ALLOC (NULL); 2929 bitmap_set_bit (*depends_on, info->inv_id); 2930 2931 return NULL_TREE; 2932 } 2933 2934 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2935 position to POS. If USE is not NULL, the candidate is set as related to 2936 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 2937 replacement of the final value of the iv by a direct computation. */ 2938 2939 static struct iv_cand * 2940 add_candidate_1 (struct ivopts_data *data, 2941 tree base, tree step, bool important, enum iv_position pos, 2942 struct iv_use *use, gimple *incremented_at, 2943 struct iv *orig_iv = NULL) 2944 { 2945 unsigned i; 2946 struct iv_cand *cand = NULL; 2947 tree type, orig_type; 2948 2949 gcc_assert (base && step); 2950 2951 /* -fkeep-gc-roots-live means that we have to keep a real pointer 2952 live, but the ivopts code may replace a real pointer with one 2953 pointing before or after the memory block that is then adjusted 2954 into the memory block during the loop. FIXME: It would likely be 2955 better to actually force the pointer live and still use ivopts; 2956 for example, it would be enough to write the pointer into memory 2957 and keep it there until after the loop. */ 2958 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base))) 2959 return NULL; 2960 2961 /* For non-original variables, make sure their values are computed in a type 2962 that does not invoke undefined behavior on overflows (since in general, 2963 we cannot prove that these induction variables are non-wrapping). */ 2964 if (pos != IP_ORIGINAL) 2965 { 2966 orig_type = TREE_TYPE (base); 2967 type = generic_type_for (orig_type); 2968 if (type != orig_type) 2969 { 2970 base = fold_convert (type, base); 2971 step = fold_convert (type, step); 2972 } 2973 } 2974 2975 for (i = 0; i < data->vcands.length (); i++) 2976 { 2977 cand = data->vcands[i]; 2978 2979 if (cand->pos != pos) 2980 continue; 2981 2982 if (cand->incremented_at != incremented_at 2983 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2984 && cand->ainc_use != use)) 2985 continue; 2986 2987 if (operand_equal_p (base, cand->iv->base, 0) 2988 && operand_equal_p (step, cand->iv->step, 0) 2989 && (TYPE_PRECISION (TREE_TYPE (base)) 2990 == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) 2991 break; 2992 } 2993 2994 if (i == data->vcands.length ()) 2995 { 2996 cand = XCNEW (struct iv_cand); 2997 cand->id = i; 2998 cand->iv = alloc_iv (data, base, step); 2999 cand->pos = pos; 3000 if (pos != IP_ORIGINAL) 3001 { 3002 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 3003 cand->var_after = cand->var_before; 3004 } 3005 cand->important = important; 3006 cand->incremented_at = incremented_at; 3007 data->vcands.safe_push (cand); 3008 3009 if (TREE_CODE (step) != INTEGER_CST) 3010 { 3011 fd_ivopts_data = data; 3012 walk_tree (&step, find_depends, &cand->depends_on, NULL); 3013 } 3014 3015 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 3016 cand->ainc_use = use; 3017 else 3018 cand->ainc_use = NULL; 3019 3020 cand->orig_iv = orig_iv; 3021 if (dump_file && (dump_flags & TDF_DETAILS)) 3022 dump_cand (dump_file, cand); 3023 } 3024 3025 cand->important |= important; 3026 3027 /* Relate candidate to the group for which it is added. */ 3028 if (use) 3029 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i); 3030 3031 return cand; 3032 } 3033 3034 /* Returns true if incrementing the induction variable at the end of the LOOP 3035 is allowed. 3036 3037 The purpose is to avoid splitting latch edge with a biv increment, thus 3038 creating a jump, possibly confusing other optimization passes and leaving 3039 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS 3040 is not available (so we do not have a better alternative), or if the latch 3041 edge is already nonempty. */ 3042 3043 static bool 3044 allow_ip_end_pos_p (struct loop *loop) 3045 { 3046 if (!ip_normal_pos (loop)) 3047 return true; 3048 3049 if (!empty_block_p (ip_end_pos (loop))) 3050 return true; 3051 3052 return false; 3053 } 3054 3055 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. 3056 Important field is set to IMPORTANT. */ 3057 3058 static void 3059 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, 3060 bool important, struct iv_use *use) 3061 { 3062 basic_block use_bb = gimple_bb (use->stmt); 3063 machine_mode mem_mode; 3064 unsigned HOST_WIDE_INT cstepi; 3065 3066 /* If we insert the increment in any position other than the standard 3067 ones, we must ensure that it is incremented once per iteration. 3068 It must not be in an inner nested loop, or one side of an if 3069 statement. */ 3070 if (use_bb->loop_father != data->current_loop 3071 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) 3072 || stmt_could_throw_p (use->stmt) 3073 || !cst_and_fits_in_hwi (step)) 3074 return; 3075 3076 cstepi = int_cst_value (step); 3077 3078 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); 3079 if (((USE_LOAD_PRE_INCREMENT (mem_mode) 3080 || USE_STORE_PRE_INCREMENT (mem_mode)) 3081 && GET_MODE_SIZE (mem_mode) == cstepi) 3082 || ((USE_LOAD_PRE_DECREMENT (mem_mode) 3083 || USE_STORE_PRE_DECREMENT (mem_mode)) 3084 && GET_MODE_SIZE (mem_mode) == -cstepi)) 3085 { 3086 enum tree_code code = MINUS_EXPR; 3087 tree new_base; 3088 tree new_step = step; 3089 3090 if (POINTER_TYPE_P (TREE_TYPE (base))) 3091 { 3092 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 3093 code = POINTER_PLUS_EXPR; 3094 } 3095 else 3096 new_step = fold_convert (TREE_TYPE (base), new_step); 3097 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); 3098 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, 3099 use->stmt); 3100 } 3101 if (((USE_LOAD_POST_INCREMENT (mem_mode) 3102 || USE_STORE_POST_INCREMENT (mem_mode)) 3103 && GET_MODE_SIZE (mem_mode) == cstepi) 3104 || ((USE_LOAD_POST_DECREMENT (mem_mode) 3105 || USE_STORE_POST_DECREMENT (mem_mode)) 3106 && GET_MODE_SIZE (mem_mode) == -cstepi)) 3107 { 3108 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, 3109 use->stmt); 3110 } 3111 } 3112 3113 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 3114 position to POS. If USE is not NULL, the candidate is set as related to 3115 it. The candidate computation is scheduled before exit condition and at 3116 the end of loop. */ 3117 3118 static void 3119 add_candidate (struct ivopts_data *data, 3120 tree base, tree step, bool important, struct iv_use *use, 3121 struct iv *orig_iv = NULL) 3122 { 3123 if (ip_normal_pos (data->current_loop)) 3124 add_candidate_1 (data, base, step, important, 3125 IP_NORMAL, use, NULL, orig_iv); 3126 if (ip_end_pos (data->current_loop) 3127 && allow_ip_end_pos_p (data->current_loop)) 3128 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv); 3129 } 3130 3131 /* Adds standard iv candidates. */ 3132 3133 static void 3134 add_standard_iv_candidates (struct ivopts_data *data) 3135 { 3136 add_candidate (data, integer_zero_node, integer_one_node, true, NULL); 3137 3138 /* The same for a double-integer type if it is still fast enough. */ 3139 if (TYPE_PRECISION 3140 (long_integer_type_node) > TYPE_PRECISION (integer_type_node) 3141 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD) 3142 add_candidate (data, build_int_cst (long_integer_type_node, 0), 3143 build_int_cst (long_integer_type_node, 1), true, NULL); 3144 3145 /* The same for a double-integer type if it is still fast enough. */ 3146 if (TYPE_PRECISION 3147 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node) 3148 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD) 3149 add_candidate (data, build_int_cst (long_long_integer_type_node, 0), 3150 build_int_cst (long_long_integer_type_node, 1), true, NULL); 3151 } 3152 3153 3154 /* Adds candidates bases on the old induction variable IV. */ 3155 3156 static void 3157 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv) 3158 { 3159 gimple *phi; 3160 tree def; 3161 struct iv_cand *cand; 3162 3163 /* Check if this biv is used in address type use. */ 3164 if (iv->no_overflow && iv->have_address_use 3165 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base)) 3166 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype)) 3167 { 3168 tree base = fold_convert (sizetype, iv->base); 3169 tree step = fold_convert (sizetype, iv->step); 3170 3171 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */ 3172 add_candidate (data, base, step, true, NULL, iv); 3173 /* Add iv cand of the original type only if it has nonlinear use. */ 3174 if (iv->nonlin_use) 3175 add_candidate (data, iv->base, iv->step, true, NULL); 3176 } 3177 else 3178 add_candidate (data, iv->base, iv->step, true, NULL); 3179 3180 /* The same, but with initial value zero. */ 3181 if (POINTER_TYPE_P (TREE_TYPE (iv->base))) 3182 add_candidate (data, size_int (0), iv->step, true, NULL); 3183 else 3184 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 3185 iv->step, true, NULL); 3186 3187 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 3188 if (gimple_code (phi) == GIMPLE_PHI) 3189 { 3190 /* Additionally record the possibility of leaving the original iv 3191 untouched. */ 3192 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 3193 /* Don't add candidate if it's from another PHI node because 3194 it's an affine iv appearing in the form of PEELED_CHREC. */ 3195 phi = SSA_NAME_DEF_STMT (def); 3196 if (gimple_code (phi) != GIMPLE_PHI) 3197 { 3198 cand = add_candidate_1 (data, 3199 iv->base, iv->step, true, IP_ORIGINAL, NULL, 3200 SSA_NAME_DEF_STMT (def)); 3201 if (cand) 3202 { 3203 cand->var_before = iv->ssa_name; 3204 cand->var_after = def; 3205 } 3206 } 3207 else 3208 gcc_assert (gimple_bb (phi) == data->current_loop->header); 3209 } 3210 } 3211 3212 /* Adds candidates based on the old induction variables. */ 3213 3214 static void 3215 add_iv_candidate_for_bivs (struct ivopts_data *data) 3216 { 3217 unsigned i; 3218 struct iv *iv; 3219 bitmap_iterator bi; 3220 3221 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 3222 { 3223 iv = ver_info (data, i)->iv; 3224 if (iv && iv->biv_p && !integer_zerop (iv->step)) 3225 add_iv_candidate_for_biv (data, iv); 3226 } 3227 } 3228 3229 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */ 3230 3231 static void 3232 record_common_cand (struct ivopts_data *data, tree base, 3233 tree step, struct iv_use *use) 3234 { 3235 struct iv_common_cand ent; 3236 struct iv_common_cand **slot; 3237 3238 ent.base = base; 3239 ent.step = step; 3240 ent.hash = iterative_hash_expr (base, 0); 3241 ent.hash = iterative_hash_expr (step, ent.hash); 3242 3243 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT); 3244 if (*slot == NULL) 3245 { 3246 *slot = new iv_common_cand (); 3247 (*slot)->base = base; 3248 (*slot)->step = step; 3249 (*slot)->uses.create (8); 3250 (*slot)->hash = ent.hash; 3251 data->iv_common_cands.safe_push ((*slot)); 3252 } 3253 3254 gcc_assert (use != NULL); 3255 (*slot)->uses.safe_push (use); 3256 return; 3257 } 3258 3259 /* Comparison function used to sort common candidates. */ 3260 3261 static int 3262 common_cand_cmp (const void *p1, const void *p2) 3263 { 3264 unsigned n1, n2; 3265 const struct iv_common_cand *const *const ccand1 3266 = (const struct iv_common_cand *const *)p1; 3267 const struct iv_common_cand *const *const ccand2 3268 = (const struct iv_common_cand *const *)p2; 3269 3270 n1 = (*ccand1)->uses.length (); 3271 n2 = (*ccand2)->uses.length (); 3272 return n2 - n1; 3273 } 3274 3275 /* Adds IV candidates based on common candidated recorded. */ 3276 3277 static void 3278 add_iv_candidate_derived_from_uses (struct ivopts_data *data) 3279 { 3280 unsigned i, j; 3281 struct iv_cand *cand_1, *cand_2; 3282 3283 data->iv_common_cands.qsort (common_cand_cmp); 3284 for (i = 0; i < data->iv_common_cands.length (); i++) 3285 { 3286 struct iv_common_cand *ptr = data->iv_common_cands[i]; 3287 3288 /* Only add IV candidate if it's derived from multiple uses. */ 3289 if (ptr->uses.length () <= 1) 3290 break; 3291 3292 cand_1 = NULL; 3293 cand_2 = NULL; 3294 if (ip_normal_pos (data->current_loop)) 3295 cand_1 = add_candidate_1 (data, ptr->base, ptr->step, 3296 false, IP_NORMAL, NULL, NULL); 3297 3298 if (ip_end_pos (data->current_loop) 3299 && allow_ip_end_pos_p (data->current_loop)) 3300 cand_2 = add_candidate_1 (data, ptr->base, ptr->step, 3301 false, IP_END, NULL, NULL); 3302 3303 /* Bind deriving uses and the new candidates. */ 3304 for (j = 0; j < ptr->uses.length (); j++) 3305 { 3306 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id]; 3307 if (cand_1) 3308 bitmap_set_bit (group->related_cands, cand_1->id); 3309 if (cand_2) 3310 bitmap_set_bit (group->related_cands, cand_2->id); 3311 } 3312 } 3313 3314 /* Release data since it is useless from this point. */ 3315 data->iv_common_cand_tab->empty (); 3316 data->iv_common_cands.truncate (0); 3317 } 3318 3319 /* Adds candidates based on the value of USE's iv. */ 3320 3321 static void 3322 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) 3323 { 3324 unsigned HOST_WIDE_INT offset; 3325 tree base; 3326 tree basetype; 3327 struct iv *iv = use->iv; 3328 3329 add_candidate (data, iv->base, iv->step, false, use); 3330 3331 /* Record common candidate for use in case it can be shared by others. */ 3332 record_common_cand (data, iv->base, iv->step, use); 3333 3334 /* Record common candidate with initial value zero. */ 3335 basetype = TREE_TYPE (iv->base); 3336 if (POINTER_TYPE_P (basetype)) 3337 basetype = sizetype; 3338 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); 3339 3340 /* Record common candidate with constant offset stripped in base. 3341 Like the use itself, we also add candidate directly for it. */ 3342 base = strip_offset (iv->base, &offset); 3343 if (offset || base != iv->base) 3344 { 3345 record_common_cand (data, base, iv->step, use); 3346 add_candidate (data, base, iv->step, false, use); 3347 } 3348 3349 /* Record common candidate with base_object removed in base. */ 3350 base = iv->base; 3351 STRIP_NOPS (base); 3352 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR) 3353 { 3354 tree step = iv->step; 3355 3356 STRIP_NOPS (step); 3357 base = TREE_OPERAND (base, 1); 3358 step = fold_convert (sizetype, step); 3359 record_common_cand (data, base, step, use); 3360 /* Also record common candidate with offset stripped. */ 3361 base = strip_offset (base, &offset); 3362 if (offset) 3363 record_common_cand (data, base, step, use); 3364 } 3365 3366 /* At last, add auto-incremental candidates. Make such variables 3367 important since other iv uses with same base object may be based 3368 on it. */ 3369 if (use != NULL && use->type == USE_ADDRESS) 3370 add_autoinc_candidates (data, iv->base, iv->step, true, use); 3371 } 3372 3373 /* Adds candidates based on the uses. */ 3374 3375 static void 3376 add_iv_candidate_for_groups (struct ivopts_data *data) 3377 { 3378 unsigned i; 3379 3380 /* Only add candidate for the first use in group. */ 3381 for (i = 0; i < data->vgroups.length (); i++) 3382 { 3383 struct iv_group *group = data->vgroups[i]; 3384 3385 gcc_assert (group->vuses[0] != NULL); 3386 add_iv_candidate_for_use (data, group->vuses[0]); 3387 } 3388 add_iv_candidate_derived_from_uses (data); 3389 } 3390 3391 /* Record important candidates and add them to related_cands bitmaps. */ 3392 3393 static void 3394 record_important_candidates (struct ivopts_data *data) 3395 { 3396 unsigned i; 3397 struct iv_group *group; 3398 3399 for (i = 0; i < data->vcands.length (); i++) 3400 { 3401 struct iv_cand *cand = data->vcands[i]; 3402 3403 if (cand->important) 3404 bitmap_set_bit (data->important_candidates, i); 3405 } 3406 3407 data->consider_all_candidates = (data->vcands.length () 3408 <= CONSIDER_ALL_CANDIDATES_BOUND); 3409 3410 /* Add important candidates to groups' related_cands bitmaps. */ 3411 for (i = 0; i < data->vgroups.length (); i++) 3412 { 3413 group = data->vgroups[i]; 3414 bitmap_ior_into (group->related_cands, data->important_candidates); 3415 } 3416 } 3417 3418 /* Allocates the data structure mapping the (use, candidate) pairs to costs. 3419 If consider_all_candidates is true, we use a two-dimensional array, otherwise 3420 we allocate a simple list to every use. */ 3421 3422 static void 3423 alloc_use_cost_map (struct ivopts_data *data) 3424 { 3425 unsigned i, size, s; 3426 3427 for (i = 0; i < data->vgroups.length (); i++) 3428 { 3429 struct iv_group *group = data->vgroups[i]; 3430 3431 if (data->consider_all_candidates) 3432 size = data->vcands.length (); 3433 else 3434 { 3435 s = bitmap_count_bits (group->related_cands); 3436 3437 /* Round up to the power of two, so that moduling by it is fast. */ 3438 size = s ? (1 << ceil_log2 (s)) : 1; 3439 } 3440 3441 group->n_map_members = size; 3442 group->cost_map = XCNEWVEC (struct cost_pair, size); 3443 } 3444 } 3445 3446 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends 3447 on invariants DEPENDS_ON and that the value used in expressing it 3448 is VALUE, and in case of iv elimination the comparison operator is COMP. */ 3449 3450 static void 3451 set_group_iv_cost (struct ivopts_data *data, 3452 struct iv_group *group, struct iv_cand *cand, 3453 comp_cost cost, bitmap depends_on, tree value, 3454 enum tree_code comp, iv_inv_expr_ent *inv_expr) 3455 { 3456 unsigned i, s; 3457 3458 if (cost.infinite_cost_p ()) 3459 { 3460 BITMAP_FREE (depends_on); 3461 return; 3462 } 3463 3464 if (data->consider_all_candidates) 3465 { 3466 group->cost_map[cand->id].cand = cand; 3467 group->cost_map[cand->id].cost = cost; 3468 group->cost_map[cand->id].depends_on = depends_on; 3469 group->cost_map[cand->id].value = value; 3470 group->cost_map[cand->id].comp = comp; 3471 group->cost_map[cand->id].inv_expr = inv_expr; 3472 return; 3473 } 3474 3475 /* n_map_members is a power of two, so this computes modulo. */ 3476 s = cand->id & (group->n_map_members - 1); 3477 for (i = s; i < group->n_map_members; i++) 3478 if (!group->cost_map[i].cand) 3479 goto found; 3480 for (i = 0; i < s; i++) 3481 if (!group->cost_map[i].cand) 3482 goto found; 3483 3484 gcc_unreachable (); 3485 3486 found: 3487 group->cost_map[i].cand = cand; 3488 group->cost_map[i].cost = cost; 3489 group->cost_map[i].depends_on = depends_on; 3490 group->cost_map[i].value = value; 3491 group->cost_map[i].comp = comp; 3492 group->cost_map[i].inv_expr = inv_expr; 3493 } 3494 3495 /* Gets cost of (GROUP, CAND) pair. */ 3496 3497 static struct cost_pair * 3498 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group, 3499 struct iv_cand *cand) 3500 { 3501 unsigned i, s; 3502 struct cost_pair *ret; 3503 3504 if (!cand) 3505 return NULL; 3506 3507 if (data->consider_all_candidates) 3508 { 3509 ret = group->cost_map + cand->id; 3510 if (!ret->cand) 3511 return NULL; 3512 3513 return ret; 3514 } 3515 3516 /* n_map_members is a power of two, so this computes modulo. */ 3517 s = cand->id & (group->n_map_members - 1); 3518 for (i = s; i < group->n_map_members; i++) 3519 if (group->cost_map[i].cand == cand) 3520 return group->cost_map + i; 3521 else if (group->cost_map[i].cand == NULL) 3522 return NULL; 3523 for (i = 0; i < s; i++) 3524 if (group->cost_map[i].cand == cand) 3525 return group->cost_map + i; 3526 else if (group->cost_map[i].cand == NULL) 3527 return NULL; 3528 3529 return NULL; 3530 } 3531 3532 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 3533 static rtx 3534 produce_memory_decl_rtl (tree obj, int *regno) 3535 { 3536 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); 3537 machine_mode address_mode = targetm.addr_space.address_mode (as); 3538 rtx x; 3539 3540 gcc_assert (obj); 3541 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 3542 { 3543 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 3544 x = gen_rtx_SYMBOL_REF (address_mode, name); 3545 SET_SYMBOL_REF_DECL (x, obj); 3546 x = gen_rtx_MEM (DECL_MODE (obj), x); 3547 set_mem_addr_space (x, as); 3548 targetm.encode_section_info (obj, x, true); 3549 } 3550 else 3551 { 3552 x = gen_raw_REG (address_mode, (*regno)++); 3553 x = gen_rtx_MEM (DECL_MODE (obj), x); 3554 set_mem_addr_space (x, as); 3555 } 3556 3557 return x; 3558 } 3559 3560 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 3561 walk_tree. DATA contains the actual fake register number. */ 3562 3563 static tree 3564 prepare_decl_rtl (tree *expr_p, int *ws, void *data) 3565 { 3566 tree obj = NULL_TREE; 3567 rtx x = NULL_RTX; 3568 int *regno = (int *) data; 3569 3570 switch (TREE_CODE (*expr_p)) 3571 { 3572 case ADDR_EXPR: 3573 for (expr_p = &TREE_OPERAND (*expr_p, 0); 3574 handled_component_p (*expr_p); 3575 expr_p = &TREE_OPERAND (*expr_p, 0)) 3576 continue; 3577 obj = *expr_p; 3578 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) 3579 x = produce_memory_decl_rtl (obj, regno); 3580 break; 3581 3582 case SSA_NAME: 3583 *ws = 0; 3584 obj = SSA_NAME_VAR (*expr_p); 3585 /* Defer handling of anonymous SSA_NAMEs to the expander. */ 3586 if (!obj) 3587 return NULL_TREE; 3588 if (!DECL_RTL_SET_P (obj)) 3589 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 3590 break; 3591 3592 case VAR_DECL: 3593 case PARM_DECL: 3594 case RESULT_DECL: 3595 *ws = 0; 3596 obj = *expr_p; 3597 3598 if (DECL_RTL_SET_P (obj)) 3599 break; 3600 3601 if (DECL_MODE (obj) == BLKmode) 3602 x = produce_memory_decl_rtl (obj, regno); 3603 else 3604 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 3605 3606 break; 3607 3608 default: 3609 break; 3610 } 3611 3612 if (x) 3613 { 3614 decl_rtl_to_reset.safe_push (obj); 3615 SET_DECL_RTL (obj, x); 3616 } 3617 3618 return NULL_TREE; 3619 } 3620 3621 /* Determines cost of the computation of EXPR. */ 3622 3623 static unsigned 3624 computation_cost (tree expr, bool speed) 3625 { 3626 rtx_insn *seq; 3627 rtx rslt; 3628 tree type = TREE_TYPE (expr); 3629 unsigned cost; 3630 /* Avoid using hard regs in ways which may be unsupported. */ 3631 int regno = LAST_VIRTUAL_REGISTER + 1; 3632 struct cgraph_node *node = cgraph_node::get (current_function_decl); 3633 enum node_frequency real_frequency = node->frequency; 3634 3635 node->frequency = NODE_FREQUENCY_NORMAL; 3636 crtl->maybe_hot_insn_p = speed; 3637 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 3638 start_sequence (); 3639 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 3640 seq = get_insns (); 3641 end_sequence (); 3642 default_rtl_profile (); 3643 node->frequency = real_frequency; 3644 3645 cost = seq_cost (seq, speed); 3646 if (MEM_P (rslt)) 3647 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), 3648 TYPE_ADDR_SPACE (type), speed); 3649 else if (!REG_P (rslt)) 3650 cost += set_src_cost (rslt, TYPE_MODE (type), speed); 3651 3652 return cost; 3653 } 3654 3655 /* Returns variable containing the value of candidate CAND at statement AT. */ 3656 3657 static tree 3658 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt) 3659 { 3660 if (stmt_after_increment (loop, cand, stmt)) 3661 return cand->var_after; 3662 else 3663 return cand->var_before; 3664 } 3665 3666 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the 3667 same precision that is at least as wide as the precision of TYPE, stores 3668 BA to A and BB to B, and returns the type of BA. Otherwise, returns the 3669 type of A and B. */ 3670 3671 static tree 3672 determine_common_wider_type (tree *a, tree *b) 3673 { 3674 tree wider_type = NULL; 3675 tree suba, subb; 3676 tree atype = TREE_TYPE (*a); 3677 3678 if (CONVERT_EXPR_P (*a)) 3679 { 3680 suba = TREE_OPERAND (*a, 0); 3681 wider_type = TREE_TYPE (suba); 3682 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) 3683 return atype; 3684 } 3685 else 3686 return atype; 3687 3688 if (CONVERT_EXPR_P (*b)) 3689 { 3690 subb = TREE_OPERAND (*b, 0); 3691 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) 3692 return atype; 3693 } 3694 else 3695 return atype; 3696 3697 *a = suba; 3698 *b = subb; 3699 return wider_type; 3700 } 3701 3702 /* Determines the expression by that USE is expressed from induction variable 3703 CAND at statement AT in LOOP. The expression is stored in a decomposed 3704 form into AFF. Returns false if USE cannot be expressed using CAND. */ 3705 3706 static bool 3707 get_computation_aff (struct loop *loop, 3708 struct iv_use *use, struct iv_cand *cand, gimple *at, 3709 struct aff_tree *aff) 3710 { 3711 tree ubase = use->iv->base; 3712 tree ustep = use->iv->step; 3713 tree cbase = cand->iv->base; 3714 tree cstep = cand->iv->step, cstep_common; 3715 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 3716 tree common_type, var; 3717 tree uutype; 3718 aff_tree cbase_aff, var_aff; 3719 widest_int rat; 3720 3721 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3722 { 3723 /* We do not have a precision to express the values of use. */ 3724 return false; 3725 } 3726 3727 var = var_at_stmt (loop, cand, at); 3728 uutype = unsigned_type_for (utype); 3729 3730 /* If the conversion is not noop, perform it. */ 3731 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3732 { 3733 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase) 3734 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST)) 3735 { 3736 tree inner_base, inner_step, inner_type; 3737 inner_base = TREE_OPERAND (cbase, 0); 3738 if (CONVERT_EXPR_P (cstep)) 3739 inner_step = TREE_OPERAND (cstep, 0); 3740 else 3741 inner_step = cstep; 3742 3743 inner_type = TREE_TYPE (inner_base); 3744 /* If candidate is added from a biv whose type is smaller than 3745 ctype, we know both candidate and the biv won't overflow. 3746 In this case, it's safe to skip the convertion in candidate. 3747 As an example, (unsigned short)((unsigned long)A) equals to 3748 (unsigned short)A, if A has a type no larger than short. */ 3749 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype)) 3750 { 3751 cbase = inner_base; 3752 cstep = inner_step; 3753 } 3754 } 3755 cstep = fold_convert (uutype, cstep); 3756 cbase = fold_convert (uutype, cbase); 3757 var = fold_convert (uutype, var); 3758 } 3759 3760 /* Ratio is 1 when computing the value of biv cand by itself. 3761 We can't rely on constant_multiple_of in this case because the 3762 use is created after the original biv is selected. The call 3763 could fail because of inconsistent fold behavior. See PR68021 3764 for more information. */ 3765 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 3766 { 3767 gcc_assert (is_gimple_assign (use->stmt)); 3768 gcc_assert (use->iv->ssa_name == cand->var_after); 3769 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 3770 rat = 1; 3771 } 3772 else if (!constant_multiple_of (ustep, cstep, &rat)) 3773 return false; 3774 3775 /* In case both UBASE and CBASE are shortened to UUTYPE from some common 3776 type, we achieve better folding by computing their difference in this 3777 wider type, and cast the result to UUTYPE. We do not need to worry about 3778 overflows, as all the arithmetics will in the end be performed in UUTYPE 3779 anyway. */ 3780 common_type = determine_common_wider_type (&ubase, &cbase); 3781 3782 /* use = ubase - ratio * cbase + ratio * var. */ 3783 tree_to_aff_combination (ubase, common_type, aff); 3784 tree_to_aff_combination (cbase, common_type, &cbase_aff); 3785 tree_to_aff_combination (var, uutype, &var_aff); 3786 3787 /* We need to shift the value if we are after the increment. */ 3788 if (stmt_after_increment (loop, cand, at)) 3789 { 3790 aff_tree cstep_aff; 3791 3792 if (common_type != uutype) 3793 cstep_common = fold_convert (common_type, cstep); 3794 else 3795 cstep_common = cstep; 3796 3797 tree_to_aff_combination (cstep_common, common_type, &cstep_aff); 3798 aff_combination_add (&cbase_aff, &cstep_aff); 3799 } 3800 3801 aff_combination_scale (&cbase_aff, -rat); 3802 aff_combination_add (aff, &cbase_aff); 3803 if (common_type != uutype) 3804 aff_combination_convert (aff, uutype); 3805 3806 aff_combination_scale (&var_aff, rat); 3807 aff_combination_add (aff, &var_aff); 3808 3809 return true; 3810 } 3811 3812 /* Return the type of USE. */ 3813 3814 static tree 3815 get_use_type (struct iv_use *use) 3816 { 3817 tree base_type = TREE_TYPE (use->iv->base); 3818 tree type; 3819 3820 if (use->type == USE_ADDRESS) 3821 { 3822 /* The base_type may be a void pointer. Create a pointer type based on 3823 the mem_ref instead. */ 3824 type = build_pointer_type (TREE_TYPE (*use->op_p)); 3825 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type)) 3826 == TYPE_ADDR_SPACE (TREE_TYPE (base_type))); 3827 } 3828 else 3829 type = base_type; 3830 3831 return type; 3832 } 3833 3834 /* Determines the expression by that USE is expressed from induction variable 3835 CAND at statement AT in LOOP. The computation is unshared. */ 3836 3837 static tree 3838 get_computation_at (struct loop *loop, 3839 struct iv_use *use, struct iv_cand *cand, gimple *at) 3840 { 3841 aff_tree aff; 3842 tree type = get_use_type (use); 3843 3844 if (!get_computation_aff (loop, use, cand, at, &aff)) 3845 return NULL_TREE; 3846 unshare_aff_combination (&aff); 3847 return fold_convert (type, aff_combination_to_tree (&aff)); 3848 } 3849 3850 /* Determines the expression by that USE is expressed from induction variable 3851 CAND in LOOP. The computation is unshared. */ 3852 3853 static tree 3854 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) 3855 { 3856 return get_computation_at (loop, use, cand, use->stmt); 3857 } 3858 3859 /* Adjust the cost COST for being in loop setup rather than loop body. 3860 If we're optimizing for space, the loop setup overhead is constant; 3861 if we're optimizing for speed, amortize it over the per-iteration cost. */ 3862 static unsigned 3863 adjust_setup_cost (struct ivopts_data *data, unsigned cost) 3864 { 3865 if (cost == INFTY) 3866 return cost; 3867 else if (optimize_loop_for_speed_p (data->current_loop)) 3868 return cost / avg_loop_niter (data->current_loop); 3869 else 3870 return cost; 3871 } 3872 3873 /* Returns true if multiplying by RATIO is allowed in an address. Test the 3874 validity for a memory reference accessing memory of mode MODE in 3875 address space AS. */ 3876 3877 3878 bool 3879 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, 3880 addr_space_t as) 3881 { 3882 #define MAX_RATIO 128 3883 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode; 3884 static vec<sbitmap> valid_mult_list; 3885 sbitmap valid_mult; 3886 3887 if (data_index >= valid_mult_list.length ()) 3888 valid_mult_list.safe_grow_cleared (data_index + 1); 3889 3890 valid_mult = valid_mult_list[data_index]; 3891 if (!valid_mult) 3892 { 3893 machine_mode address_mode = targetm.addr_space.address_mode (as); 3894 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3895 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); 3896 rtx addr, scaled; 3897 HOST_WIDE_INT i; 3898 3899 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1); 3900 bitmap_clear (valid_mult); 3901 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX); 3902 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2); 3903 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3904 { 3905 XEXP (scaled, 1) = gen_int_mode (i, address_mode); 3906 if (memory_address_addr_space_p (mode, addr, as) 3907 || memory_address_addr_space_p (mode, scaled, as)) 3908 bitmap_set_bit (valid_mult, i + MAX_RATIO); 3909 } 3910 3911 if (dump_file && (dump_flags & TDF_DETAILS)) 3912 { 3913 fprintf (dump_file, " allowed multipliers:"); 3914 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3915 if (bitmap_bit_p (valid_mult, i + MAX_RATIO)) 3916 fprintf (dump_file, " %d", (int) i); 3917 fprintf (dump_file, "\n"); 3918 fprintf (dump_file, "\n"); 3919 } 3920 3921 valid_mult_list[data_index] = valid_mult; 3922 } 3923 3924 if (ratio > MAX_RATIO || ratio < -MAX_RATIO) 3925 return false; 3926 3927 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO); 3928 } 3929 3930 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index. 3931 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false, 3932 variable is omitted. Compute the cost for a memory reference that accesses 3933 a memory location of mode MEM_MODE in address space AS. 3934 3935 MAY_AUTOINC is set to true if the autoincrement (increasing index by 3936 size of MEM_MODE / RATIO) is available. To make this determination, we 3937 look at the size of the increment to be made, which is given in CSTEP. 3938 CSTEP may be zero if the step is unknown. 3939 STMT_AFTER_INC is true iff the statement we're looking at is after the 3940 increment of the original biv. 3941 3942 TODO -- there must be some better way. This all is quite crude. */ 3943 3944 enum ainc_type 3945 { 3946 AINC_PRE_INC, /* Pre increment. */ 3947 AINC_PRE_DEC, /* Pre decrement. */ 3948 AINC_POST_INC, /* Post increment. */ 3949 AINC_POST_DEC, /* Post decrement. */ 3950 AINC_NONE /* Also the number of auto increment types. */ 3951 }; 3952 3953 struct address_cost_data 3954 { 3955 HOST_WIDE_INT min_offset, max_offset; 3956 unsigned costs[2][2][2][2]; 3957 unsigned ainc_costs[AINC_NONE]; 3958 }; 3959 3960 3961 static comp_cost 3962 get_address_cost (bool symbol_present, bool var_present, 3963 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio, 3964 HOST_WIDE_INT cstep, machine_mode mem_mode, 3965 addr_space_t as, bool speed, 3966 bool stmt_after_inc, bool *may_autoinc) 3967 { 3968 machine_mode address_mode = targetm.addr_space.address_mode (as); 3969 static vec<address_cost_data *> address_cost_data_list; 3970 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode; 3971 address_cost_data *data; 3972 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE]; 3973 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE]; 3974 unsigned cost, acost, complexity; 3975 enum ainc_type autoinc_type; 3976 bool offset_p, ratio_p, autoinc; 3977 HOST_WIDE_INT s_offset, autoinc_offset, msize; 3978 unsigned HOST_WIDE_INT mask; 3979 unsigned bits; 3980 3981 if (data_index >= address_cost_data_list.length ()) 3982 address_cost_data_list.safe_grow_cleared (data_index + 1); 3983 3984 data = address_cost_data_list[data_index]; 3985 if (!data) 3986 { 3987 HOST_WIDE_INT i; 3988 HOST_WIDE_INT rat, off = 0; 3989 int old_cse_not_expected, width; 3990 unsigned sym_p, var_p, off_p, rat_p, add_c; 3991 rtx_insn *seq; 3992 rtx addr, base; 3993 rtx reg0, reg1; 3994 3995 data = (address_cost_data *) xcalloc (1, sizeof (*data)); 3996 3997 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3998 3999 width = GET_MODE_BITSIZE (address_mode) - 1; 4000 if (width > (HOST_BITS_PER_WIDE_INT - 1)) 4001 width = HOST_BITS_PER_WIDE_INT - 1; 4002 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); 4003 4004 for (i = width; i >= 0; i--) 4005 { 4006 off = -(HOST_WIDE_INT_1U << i); 4007 XEXP (addr, 1) = gen_int_mode (off, address_mode); 4008 if (memory_address_addr_space_p (mem_mode, addr, as)) 4009 break; 4010 } 4011 data->min_offset = (i == -1? 0 : off); 4012 4013 for (i = width; i >= 0; i--) 4014 { 4015 off = (HOST_WIDE_INT_1U << i) - 1; 4016 XEXP (addr, 1) = gen_int_mode (off, address_mode); 4017 if (memory_address_addr_space_p (mem_mode, addr, as)) 4018 break; 4019 /* For some strict-alignment targets, the offset must be naturally 4020 aligned. Try an aligned offset if mem_mode is not QImode. */ 4021 off = mem_mode != QImode 4022 ? (HOST_WIDE_INT_1U << i) 4023 - GET_MODE_SIZE (mem_mode) 4024 : 0; 4025 if (off > 0) 4026 { 4027 XEXP (addr, 1) = gen_int_mode (off, address_mode); 4028 if (memory_address_addr_space_p (mem_mode, addr, as)) 4029 break; 4030 } 4031 } 4032 if (i == -1) 4033 off = 0; 4034 data->max_offset = off; 4035 4036 if (dump_file && (dump_flags & TDF_DETAILS)) 4037 { 4038 fprintf (dump_file, "get_address_cost:\n"); 4039 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n", 4040 GET_MODE_NAME (mem_mode), 4041 data->min_offset); 4042 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n", 4043 GET_MODE_NAME (mem_mode), 4044 data->max_offset); 4045 } 4046 4047 rat = 1; 4048 for (i = 2; i <= MAX_RATIO; i++) 4049 if (multiplier_allowed_in_address_p (i, mem_mode, as)) 4050 { 4051 rat = i; 4052 break; 4053 } 4054 4055 /* Compute the cost of various addressing modes. */ 4056 acost = 0; 4057 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 4058 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); 4059 4060 if (USE_LOAD_PRE_DECREMENT (mem_mode) 4061 || USE_STORE_PRE_DECREMENT (mem_mode)) 4062 { 4063 addr = gen_rtx_PRE_DEC (address_mode, reg0); 4064 has_predec[mem_mode] 4065 = memory_address_addr_space_p (mem_mode, addr, as); 4066 4067 if (has_predec[mem_mode]) 4068 data->ainc_costs[AINC_PRE_DEC] 4069 = address_cost (addr, mem_mode, as, speed); 4070 } 4071 if (USE_LOAD_POST_DECREMENT (mem_mode) 4072 || USE_STORE_POST_DECREMENT (mem_mode)) 4073 { 4074 addr = gen_rtx_POST_DEC (address_mode, reg0); 4075 has_postdec[mem_mode] 4076 = memory_address_addr_space_p (mem_mode, addr, as); 4077 4078 if (has_postdec[mem_mode]) 4079 data->ainc_costs[AINC_POST_DEC] 4080 = address_cost (addr, mem_mode, as, speed); 4081 } 4082 if (USE_LOAD_PRE_INCREMENT (mem_mode) 4083 || USE_STORE_PRE_DECREMENT (mem_mode)) 4084 { 4085 addr = gen_rtx_PRE_INC (address_mode, reg0); 4086 has_preinc[mem_mode] 4087 = memory_address_addr_space_p (mem_mode, addr, as); 4088 4089 if (has_preinc[mem_mode]) 4090 data->ainc_costs[AINC_PRE_INC] 4091 = address_cost (addr, mem_mode, as, speed); 4092 } 4093 if (USE_LOAD_POST_INCREMENT (mem_mode) 4094 || USE_STORE_POST_INCREMENT (mem_mode)) 4095 { 4096 addr = gen_rtx_POST_INC (address_mode, reg0); 4097 has_postinc[mem_mode] 4098 = memory_address_addr_space_p (mem_mode, addr, as); 4099 4100 if (has_postinc[mem_mode]) 4101 data->ainc_costs[AINC_POST_INC] 4102 = address_cost (addr, mem_mode, as, speed); 4103 } 4104 for (i = 0; i < 16; i++) 4105 { 4106 sym_p = i & 1; 4107 var_p = (i >> 1) & 1; 4108 off_p = (i >> 2) & 1; 4109 rat_p = (i >> 3) & 1; 4110 4111 addr = reg0; 4112 if (rat_p) 4113 addr = gen_rtx_fmt_ee (MULT, address_mode, addr, 4114 gen_int_mode (rat, address_mode)); 4115 4116 if (var_p) 4117 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1); 4118 4119 if (sym_p) 4120 { 4121 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup ("")); 4122 /* ??? We can run into trouble with some backends by presenting 4123 it with symbols which haven't been properly passed through 4124 targetm.encode_section_info. By setting the local bit, we 4125 enhance the probability of things working. */ 4126 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL; 4127 4128 if (off_p) 4129 base = gen_rtx_fmt_e (CONST, address_mode, 4130 gen_rtx_fmt_ee 4131 (PLUS, address_mode, base, 4132 gen_int_mode (off, address_mode))); 4133 } 4134 else if (off_p) 4135 base = gen_int_mode (off, address_mode); 4136 else 4137 base = NULL_RTX; 4138 4139 if (base) 4140 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base); 4141 4142 start_sequence (); 4143 /* To avoid splitting addressing modes, pretend that no cse will 4144 follow. */ 4145 old_cse_not_expected = cse_not_expected; 4146 cse_not_expected = true; 4147 addr = memory_address_addr_space (mem_mode, addr, as); 4148 cse_not_expected = old_cse_not_expected; 4149 seq = get_insns (); 4150 end_sequence (); 4151 4152 acost = seq_cost (seq, speed); 4153 acost += address_cost (addr, mem_mode, as, speed); 4154 4155 if (!acost) 4156 acost = 1; 4157 data->costs[sym_p][var_p][off_p][rat_p] = acost; 4158 } 4159 4160 /* On some targets, it is quite expensive to load symbol to a register, 4161 which makes addresses that contain symbols look much more expensive. 4162 However, the symbol will have to be loaded in any case before the 4163 loop (and quite likely we have it in register already), so it does not 4164 make much sense to penalize them too heavily. So make some final 4165 tweaks for the SYMBOL_PRESENT modes: 4166 4167 If VAR_PRESENT is false, and the mode obtained by changing symbol to 4168 var is cheaper, use this mode with small penalty. 4169 If VAR_PRESENT is true, try whether the mode with 4170 SYMBOL_PRESENT = false is cheaper even with cost of addition, and 4171 if this is the case, use it. */ 4172 add_c = add_cost (speed, address_mode); 4173 for (i = 0; i < 8; i++) 4174 { 4175 var_p = i & 1; 4176 off_p = (i >> 1) & 1; 4177 rat_p = (i >> 2) & 1; 4178 4179 acost = data->costs[0][1][off_p][rat_p] + 1; 4180 if (var_p) 4181 acost += add_c; 4182 4183 if (acost < data->costs[1][var_p][off_p][rat_p]) 4184 data->costs[1][var_p][off_p][rat_p] = acost; 4185 } 4186 4187 if (dump_file && (dump_flags & TDF_DETAILS)) 4188 { 4189 fprintf (dump_file, "<Address Costs>:\n"); 4190 4191 for (i = 0; i < 16; i++) 4192 { 4193 sym_p = i & 1; 4194 var_p = (i >> 1) & 1; 4195 off_p = (i >> 2) & 1; 4196 rat_p = (i >> 3) & 1; 4197 4198 fprintf (dump_file, " "); 4199 if (sym_p) 4200 fprintf (dump_file, "sym + "); 4201 if (var_p) 4202 fprintf (dump_file, "var + "); 4203 if (off_p) 4204 fprintf (dump_file, "cst + "); 4205 if (rat_p) 4206 fprintf (dump_file, "rat * "); 4207 4208 acost = data->costs[sym_p][var_p][off_p][rat_p]; 4209 fprintf (dump_file, "index costs %d\n", acost); 4210 } 4211 if (has_predec[mem_mode] || has_postdec[mem_mode] 4212 || has_preinc[mem_mode] || has_postinc[mem_mode]) 4213 fprintf (dump_file, " May include autoinc/dec\n"); 4214 fprintf (dump_file, "\n"); 4215 } 4216 4217 address_cost_data_list[data_index] = data; 4218 } 4219 4220 bits = GET_MODE_BITSIZE (address_mode); 4221 mask = ~(HOST_WIDE_INT_M1U << (bits - 1) << 1); 4222 offset &= mask; 4223 if ((offset >> (bits - 1) & 1)) 4224 offset |= ~mask; 4225 s_offset = offset; 4226 4227 autoinc = false; 4228 autoinc_type = AINC_NONE; 4229 msize = GET_MODE_SIZE (mem_mode); 4230 autoinc_offset = offset; 4231 if (stmt_after_inc) 4232 autoinc_offset += ratio * cstep; 4233 if (symbol_present || var_present || ratio != 1) 4234 autoinc = false; 4235 else 4236 { 4237 if (has_postinc[mem_mode] && autoinc_offset == 0 4238 && msize == cstep) 4239 autoinc_type = AINC_POST_INC; 4240 else if (has_postdec[mem_mode] && autoinc_offset == 0 4241 && msize == -cstep) 4242 autoinc_type = AINC_POST_DEC; 4243 else if (has_preinc[mem_mode] && autoinc_offset == msize 4244 && msize == cstep) 4245 autoinc_type = AINC_PRE_INC; 4246 else if (has_predec[mem_mode] && autoinc_offset == -msize 4247 && msize == -cstep) 4248 autoinc_type = AINC_PRE_DEC; 4249 4250 if (autoinc_type != AINC_NONE) 4251 autoinc = true; 4252 } 4253 4254 cost = 0; 4255 offset_p = (s_offset != 0 4256 && data->min_offset <= s_offset 4257 && s_offset <= data->max_offset); 4258 ratio_p = (ratio != 1 4259 && multiplier_allowed_in_address_p (ratio, mem_mode, as)); 4260 4261 if (ratio != 1 && !ratio_p) 4262 cost += mult_by_coeff_cost (ratio, address_mode, speed); 4263 4264 if (s_offset && !offset_p && !symbol_present) 4265 cost += add_cost (speed, address_mode); 4266 4267 if (may_autoinc) 4268 *may_autoinc = autoinc; 4269 if (autoinc) 4270 acost = data->ainc_costs[autoinc_type]; 4271 else 4272 acost = data->costs[symbol_present][var_present][offset_p][ratio_p]; 4273 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p; 4274 return comp_cost (cost + acost, complexity); 4275 } 4276 4277 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the 4278 EXPR operand holding the shift. COST0 and COST1 are the costs for 4279 calculating the operands of EXPR. Returns true if successful, and returns 4280 the cost in COST. */ 4281 4282 static bool 4283 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0, 4284 comp_cost cost1, tree mult, bool speed, comp_cost *cost) 4285 { 4286 comp_cost res; 4287 tree op1 = TREE_OPERAND (expr, 1); 4288 tree cst = TREE_OPERAND (mult, 1); 4289 tree multop = TREE_OPERAND (mult, 0); 4290 int m = exact_log2 (int_cst_value (cst)); 4291 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 4292 int as_cost, sa_cost; 4293 bool mult_in_op1; 4294 4295 if (!(m >= 0 && m < maxm)) 4296 return false; 4297 4298 STRIP_NOPS (op1); 4299 mult_in_op1 = operand_equal_p (op1, mult, 0); 4300 4301 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 4302 4303 /* If the target has a cheap shift-and-add or shift-and-sub instruction, 4304 use that in preference to a shift insn followed by an add insn. */ 4305 sa_cost = (TREE_CODE (expr) != MINUS_EXPR 4306 ? shiftadd_cost (speed, mode, m) 4307 : (mult_in_op1 4308 ? shiftsub1_cost (speed, mode, m) 4309 : shiftsub0_cost (speed, mode, m))); 4310 4311 res = comp_cost (MIN (as_cost, sa_cost), 0); 4312 res += (mult_in_op1 ? cost0 : cost1); 4313 4314 STRIP_NOPS (multop); 4315 if (!is_gimple_val (multop)) 4316 res += force_expr_to_var_cost (multop, speed); 4317 4318 *cost = res; 4319 return true; 4320 } 4321 4322 /* Estimates cost of forcing expression EXPR into a variable. */ 4323 4324 static comp_cost 4325 force_expr_to_var_cost (tree expr, bool speed) 4326 { 4327 static bool costs_initialized = false; 4328 static unsigned integer_cost [2]; 4329 static unsigned symbol_cost [2]; 4330 static unsigned address_cost [2]; 4331 tree op0, op1; 4332 comp_cost cost0, cost1, cost; 4333 machine_mode mode; 4334 4335 if (!costs_initialized) 4336 { 4337 tree type = build_pointer_type (integer_type_node); 4338 tree var, addr; 4339 rtx x; 4340 int i; 4341 4342 var = create_tmp_var_raw (integer_type_node, "test_var"); 4343 TREE_STATIC (var) = 1; 4344 x = produce_memory_decl_rtl (var, NULL); 4345 SET_DECL_RTL (var, x); 4346 4347 addr = build1 (ADDR_EXPR, type, var); 4348 4349 4350 for (i = 0; i < 2; i++) 4351 { 4352 integer_cost[i] = computation_cost (build_int_cst (integer_type_node, 4353 2000), i); 4354 4355 symbol_cost[i] = computation_cost (addr, i) + 1; 4356 4357 address_cost[i] 4358 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; 4359 if (dump_file && (dump_flags & TDF_DETAILS)) 4360 { 4361 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); 4362 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); 4363 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); 4364 fprintf (dump_file, " address %d\n", (int) address_cost[i]); 4365 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); 4366 fprintf (dump_file, "\n"); 4367 } 4368 } 4369 4370 costs_initialized = true; 4371 } 4372 4373 STRIP_NOPS (expr); 4374 4375 if (SSA_VAR_P (expr)) 4376 return no_cost; 4377 4378 if (is_gimple_min_invariant (expr)) 4379 { 4380 if (TREE_CODE (expr) == INTEGER_CST) 4381 return comp_cost (integer_cost [speed], 0); 4382 4383 if (TREE_CODE (expr) == ADDR_EXPR) 4384 { 4385 tree obj = TREE_OPERAND (expr, 0); 4386 4387 if (VAR_P (obj) 4388 || TREE_CODE (obj) == PARM_DECL 4389 || TREE_CODE (obj) == RESULT_DECL) 4390 return comp_cost (symbol_cost [speed], 0); 4391 } 4392 4393 return comp_cost (address_cost [speed], 0); 4394 } 4395 4396 switch (TREE_CODE (expr)) 4397 { 4398 case POINTER_PLUS_EXPR: 4399 case PLUS_EXPR: 4400 case MINUS_EXPR: 4401 case MULT_EXPR: 4402 op0 = TREE_OPERAND (expr, 0); 4403 op1 = TREE_OPERAND (expr, 1); 4404 STRIP_NOPS (op0); 4405 STRIP_NOPS (op1); 4406 break; 4407 4408 CASE_CONVERT: 4409 case NEGATE_EXPR: 4410 op0 = TREE_OPERAND (expr, 0); 4411 STRIP_NOPS (op0); 4412 op1 = NULL_TREE; 4413 break; 4414 4415 default: 4416 /* Just an arbitrary value, FIXME. */ 4417 return comp_cost (target_spill_cost[speed], 0); 4418 } 4419 4420 if (op0 == NULL_TREE 4421 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0)) 4422 cost0 = no_cost; 4423 else 4424 cost0 = force_expr_to_var_cost (op0, speed); 4425 4426 if (op1 == NULL_TREE 4427 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1)) 4428 cost1 = no_cost; 4429 else 4430 cost1 = force_expr_to_var_cost (op1, speed); 4431 4432 mode = TYPE_MODE (TREE_TYPE (expr)); 4433 switch (TREE_CODE (expr)) 4434 { 4435 case POINTER_PLUS_EXPR: 4436 case PLUS_EXPR: 4437 case MINUS_EXPR: 4438 case NEGATE_EXPR: 4439 cost = comp_cost (add_cost (speed, mode), 0); 4440 if (TREE_CODE (expr) != NEGATE_EXPR) 4441 { 4442 tree mult = NULL_TREE; 4443 comp_cost sa_cost; 4444 if (TREE_CODE (op1) == MULT_EXPR) 4445 mult = op1; 4446 else if (TREE_CODE (op0) == MULT_EXPR) 4447 mult = op0; 4448 4449 if (mult != NULL_TREE 4450 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) 4451 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, 4452 speed, &sa_cost)) 4453 return sa_cost; 4454 } 4455 break; 4456 4457 CASE_CONVERT: 4458 { 4459 tree inner_mode, outer_mode; 4460 outer_mode = TREE_TYPE (expr); 4461 inner_mode = TREE_TYPE (op0); 4462 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode), 4463 TYPE_MODE (inner_mode), speed), 0); 4464 } 4465 break; 4466 4467 case MULT_EXPR: 4468 if (cst_and_fits_in_hwi (op0)) 4469 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0), 4470 mode, speed), 0); 4471 else if (cst_and_fits_in_hwi (op1)) 4472 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1), 4473 mode, speed), 0); 4474 else 4475 return comp_cost (target_spill_cost [speed], 0); 4476 break; 4477 4478 default: 4479 gcc_unreachable (); 4480 } 4481 4482 cost += cost0; 4483 cost += cost1; 4484 4485 /* Bound the cost by target_spill_cost. The parts of complicated 4486 computations often are either loop invariant or at least can 4487 be shared between several iv uses, so letting this grow without 4488 limits would not give reasonable results. */ 4489 if (cost.cost > (int) target_spill_cost [speed]) 4490 cost.cost = target_spill_cost [speed]; 4491 4492 return cost; 4493 } 4494 4495 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the 4496 invariants the computation depends on. */ 4497 4498 static comp_cost 4499 force_var_cost (struct ivopts_data *data, 4500 tree expr, bitmap *depends_on) 4501 { 4502 if (depends_on) 4503 { 4504 fd_ivopts_data = data; 4505 walk_tree (&expr, find_depends, depends_on, NULL); 4506 } 4507 4508 return force_expr_to_var_cost (expr, data->speed); 4509 } 4510 4511 /* Estimates cost of expressing address ADDR as var + symbol + offset. The 4512 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set 4513 to false if the corresponding part is missing. DEPENDS_ON is a set of the 4514 invariants the computation depends on. */ 4515 4516 static comp_cost 4517 split_address_cost (struct ivopts_data *data, 4518 tree addr, bool *symbol_present, bool *var_present, 4519 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 4520 { 4521 tree core; 4522 HOST_WIDE_INT bitsize; 4523 HOST_WIDE_INT bitpos; 4524 tree toffset; 4525 machine_mode mode; 4526 int unsignedp, reversep, volatilep; 4527 4528 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode, 4529 &unsignedp, &reversep, &volatilep); 4530 4531 if (toffset != 0 4532 || bitpos % BITS_PER_UNIT != 0 4533 || reversep 4534 || !VAR_P (core)) 4535 { 4536 *symbol_present = false; 4537 *var_present = true; 4538 fd_ivopts_data = data; 4539 if (depends_on) 4540 walk_tree (&addr, find_depends, depends_on, NULL); 4541 4542 return comp_cost (target_spill_cost[data->speed], 0); 4543 } 4544 4545 *offset += bitpos / BITS_PER_UNIT; 4546 if (TREE_STATIC (core) 4547 || DECL_EXTERNAL (core)) 4548 { 4549 *symbol_present = true; 4550 *var_present = false; 4551 return no_cost; 4552 } 4553 4554 *symbol_present = false; 4555 *var_present = true; 4556 return no_cost; 4557 } 4558 4559 /* Estimates cost of expressing difference of addresses E1 - E2 as 4560 var + symbol + offset. The value of offset is added to OFFSET, 4561 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 4562 part is missing. DEPENDS_ON is a set of the invariants the computation 4563 depends on. */ 4564 4565 static comp_cost 4566 ptr_difference_cost (struct ivopts_data *data, 4567 tree e1, tree e2, bool *symbol_present, bool *var_present, 4568 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 4569 { 4570 HOST_WIDE_INT diff = 0; 4571 aff_tree aff_e1, aff_e2; 4572 tree type; 4573 4574 gcc_assert (TREE_CODE (e1) == ADDR_EXPR); 4575 4576 if (ptr_difference_const (e1, e2, &diff)) 4577 { 4578 *offset += diff; 4579 *symbol_present = false; 4580 *var_present = false; 4581 return no_cost; 4582 } 4583 4584 if (integer_zerop (e2)) 4585 return split_address_cost (data, TREE_OPERAND (e1, 0), 4586 symbol_present, var_present, offset, depends_on); 4587 4588 *symbol_present = false; 4589 *var_present = true; 4590 4591 type = signed_type_for (TREE_TYPE (e1)); 4592 tree_to_aff_combination (e1, type, &aff_e1); 4593 tree_to_aff_combination (e2, type, &aff_e2); 4594 aff_combination_scale (&aff_e2, -1); 4595 aff_combination_add (&aff_e1, &aff_e2); 4596 4597 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 4598 } 4599 4600 /* Estimates cost of expressing difference E1 - E2 as 4601 var + symbol + offset. The value of offset is added to OFFSET, 4602 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 4603 part is missing. DEPENDS_ON is a set of the invariants the computation 4604 depends on. */ 4605 4606 static comp_cost 4607 difference_cost (struct ivopts_data *data, 4608 tree e1, tree e2, bool *symbol_present, bool *var_present, 4609 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 4610 { 4611 machine_mode mode = TYPE_MODE (TREE_TYPE (e1)); 4612 unsigned HOST_WIDE_INT off1, off2; 4613 aff_tree aff_e1, aff_e2; 4614 tree type; 4615 4616 e1 = strip_offset (e1, &off1); 4617 e2 = strip_offset (e2, &off2); 4618 *offset += off1 - off2; 4619 4620 STRIP_NOPS (e1); 4621 STRIP_NOPS (e2); 4622 4623 if (TREE_CODE (e1) == ADDR_EXPR) 4624 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, 4625 offset, depends_on); 4626 *symbol_present = false; 4627 4628 if (operand_equal_p (e1, e2, 0)) 4629 { 4630 *var_present = false; 4631 return no_cost; 4632 } 4633 4634 *var_present = true; 4635 4636 if (integer_zerop (e2)) 4637 return force_var_cost (data, e1, depends_on); 4638 4639 if (integer_zerop (e1)) 4640 { 4641 comp_cost cost = force_var_cost (data, e2, depends_on); 4642 cost += mult_by_coeff_cost (-1, mode, data->speed); 4643 return cost; 4644 } 4645 4646 type = signed_type_for (TREE_TYPE (e1)); 4647 tree_to_aff_combination (e1, type, &aff_e1); 4648 tree_to_aff_combination (e2, type, &aff_e2); 4649 aff_combination_scale (&aff_e2, -1); 4650 aff_combination_add (&aff_e1, &aff_e2); 4651 4652 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 4653 } 4654 4655 /* Returns true if AFF1 and AFF2 are identical. */ 4656 4657 static bool 4658 compare_aff_trees (aff_tree *aff1, aff_tree *aff2) 4659 { 4660 unsigned i; 4661 4662 if (aff1->n != aff2->n) 4663 return false; 4664 4665 for (i = 0; i < aff1->n; i++) 4666 { 4667 if (aff1->elts[i].coef != aff2->elts[i].coef) 4668 return false; 4669 4670 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0)) 4671 return false; 4672 } 4673 return true; 4674 } 4675 4676 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */ 4677 4678 static iv_inv_expr_ent * 4679 record_inv_expr (struct ivopts_data *data, tree expr) 4680 { 4681 struct iv_inv_expr_ent ent; 4682 struct iv_inv_expr_ent **slot; 4683 4684 ent.expr = expr; 4685 ent.hash = iterative_hash_expr (expr, 0); 4686 slot = data->inv_expr_tab->find_slot (&ent, INSERT); 4687 4688 if (!*slot) 4689 { 4690 *slot = XNEW (struct iv_inv_expr_ent); 4691 (*slot)->expr = expr; 4692 (*slot)->hash = ent.hash; 4693 (*slot)->id = data->max_inv_expr_id++; 4694 } 4695 4696 return *slot; 4697 } 4698 4699 /* Returns the invariant expression if expression UBASE - RATIO * CBASE 4700 requires a new compiler generated temporary. Returns -1 otherwise. 4701 ADDRESS_P is a flag indicating if the expression is for address 4702 computation. */ 4703 4704 static iv_inv_expr_ent * 4705 get_loop_invariant_expr (struct ivopts_data *data, tree ubase, 4706 tree cbase, HOST_WIDE_INT ratio, 4707 bool address_p) 4708 { 4709 aff_tree ubase_aff, cbase_aff; 4710 tree expr, ub, cb; 4711 4712 STRIP_NOPS (ubase); 4713 STRIP_NOPS (cbase); 4714 ub = ubase; 4715 cb = cbase; 4716 4717 if ((TREE_CODE (ubase) == INTEGER_CST) 4718 && (TREE_CODE (cbase) == INTEGER_CST)) 4719 return NULL; 4720 4721 /* Strips the constant part. */ 4722 if (TREE_CODE (ubase) == PLUS_EXPR 4723 || TREE_CODE (ubase) == MINUS_EXPR 4724 || TREE_CODE (ubase) == POINTER_PLUS_EXPR) 4725 { 4726 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST) 4727 ubase = TREE_OPERAND (ubase, 0); 4728 } 4729 4730 /* Strips the constant part. */ 4731 if (TREE_CODE (cbase) == PLUS_EXPR 4732 || TREE_CODE (cbase) == MINUS_EXPR 4733 || TREE_CODE (cbase) == POINTER_PLUS_EXPR) 4734 { 4735 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST) 4736 cbase = TREE_OPERAND (cbase, 0); 4737 } 4738 4739 if (address_p) 4740 { 4741 if (((TREE_CODE (ubase) == SSA_NAME) 4742 || (TREE_CODE (ubase) == ADDR_EXPR 4743 && is_gimple_min_invariant (ubase))) 4744 && (TREE_CODE (cbase) == INTEGER_CST)) 4745 return NULL; 4746 4747 if (((TREE_CODE (cbase) == SSA_NAME) 4748 || (TREE_CODE (cbase) == ADDR_EXPR 4749 && is_gimple_min_invariant (cbase))) 4750 && (TREE_CODE (ubase) == INTEGER_CST)) 4751 return NULL; 4752 } 4753 4754 if (ratio == 1) 4755 { 4756 if (operand_equal_p (ubase, cbase, 0)) 4757 return NULL; 4758 4759 if (TREE_CODE (ubase) == ADDR_EXPR 4760 && TREE_CODE (cbase) == ADDR_EXPR) 4761 { 4762 tree usym, csym; 4763 4764 usym = TREE_OPERAND (ubase, 0); 4765 csym = TREE_OPERAND (cbase, 0); 4766 if (TREE_CODE (usym) == ARRAY_REF) 4767 { 4768 tree ind = TREE_OPERAND (usym, 1); 4769 if (TREE_CODE (ind) == INTEGER_CST 4770 && tree_fits_shwi_p (ind) 4771 && tree_to_shwi (ind) == 0) 4772 usym = TREE_OPERAND (usym, 0); 4773 } 4774 if (TREE_CODE (csym) == ARRAY_REF) 4775 { 4776 tree ind = TREE_OPERAND (csym, 1); 4777 if (TREE_CODE (ind) == INTEGER_CST 4778 && tree_fits_shwi_p (ind) 4779 && tree_to_shwi (ind) == 0) 4780 csym = TREE_OPERAND (csym, 0); 4781 } 4782 if (operand_equal_p (usym, csym, 0)) 4783 return NULL; 4784 } 4785 /* Now do more complex comparison */ 4786 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff); 4787 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff); 4788 if (compare_aff_trees (&ubase_aff, &cbase_aff)) 4789 return NULL; 4790 } 4791 4792 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff); 4793 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff); 4794 4795 aff_combination_scale (&cbase_aff, -1 * ratio); 4796 aff_combination_add (&ubase_aff, &cbase_aff); 4797 expr = aff_combination_to_tree (&ubase_aff); 4798 return record_inv_expr (data, expr); 4799 } 4800 4801 /* Scale (multiply) the computed COST (except scratch part that should be 4802 hoisted out a loop) by header->frequency / AT->frequency, 4803 which makes expected cost more accurate. */ 4804 4805 static comp_cost 4806 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, iv_cand *cand, 4807 comp_cost cost) 4808 { 4809 int loop_freq = data->current_loop->header->frequency; 4810 int bb_freq = gimple_bb (at)->frequency; 4811 if (loop_freq != 0) 4812 { 4813 gcc_assert (cost.scratch <= cost.cost); 4814 int scaled_cost 4815 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq; 4816 4817 if (dump_file && (dump_flags & TDF_DETAILS)) 4818 fprintf (dump_file, "Scaling iv_use based on cand %d " 4819 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n", 4820 cand->id, 1.0f * bb_freq / loop_freq, cost.cost, 4821 cost.scratch, scaled_cost, bb_freq, loop_freq); 4822 4823 cost.cost = scaled_cost; 4824 } 4825 4826 return cost; 4827 } 4828 4829 /* Determines the cost of the computation by that USE is expressed 4830 from induction variable CAND. If ADDRESS_P is true, we just need 4831 to create an address from it, otherwise we want to get it into 4832 register. A set of invariants we depend on is stored in 4833 DEPENDS_ON. AT is the statement at that the value is computed. 4834 If CAN_AUTOINC is nonnull, use it to record whether autoinc 4835 addressing is likely. */ 4836 4837 static comp_cost 4838 get_computation_cost_at (struct ivopts_data *data, 4839 struct iv_use *use, struct iv_cand *cand, 4840 bool address_p, bitmap *depends_on, gimple *at, 4841 bool *can_autoinc, 4842 iv_inv_expr_ent **inv_expr) 4843 { 4844 tree ubase = use->iv->base, ustep = use->iv->step; 4845 tree cbase, cstep; 4846 tree utype = TREE_TYPE (ubase), ctype; 4847 unsigned HOST_WIDE_INT cstepi, offset = 0; 4848 HOST_WIDE_INT ratio, aratio; 4849 bool var_present, symbol_present, stmt_is_after_inc; 4850 comp_cost cost; 4851 widest_int rat; 4852 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 4853 machine_mode mem_mode = (address_p 4854 ? TYPE_MODE (TREE_TYPE (*use->op_p)) 4855 : VOIDmode); 4856 4857 if (depends_on) 4858 *depends_on = NULL; 4859 4860 /* Only consider real candidates. */ 4861 if (!cand->iv) 4862 return infinite_cost; 4863 4864 cbase = cand->iv->base; 4865 cstep = cand->iv->step; 4866 ctype = TREE_TYPE (cbase); 4867 4868 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 4869 { 4870 /* We do not have a precision to express the values of use. */ 4871 return infinite_cost; 4872 } 4873 4874 if (address_p 4875 || (use->iv->base_object 4876 && cand->iv->base_object 4877 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) 4878 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object)))) 4879 { 4880 /* Do not try to express address of an object with computation based 4881 on address of a different object. This may cause problems in rtl 4882 level alias analysis (that does not expect this to be happening, 4883 as this is illegal in C), and would be unlikely to be useful 4884 anyway. */ 4885 if (use->iv->base_object 4886 && cand->iv->base_object 4887 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 4888 return infinite_cost; 4889 } 4890 4891 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 4892 { 4893 /* TODO -- add direct handling of this case. */ 4894 goto fallback; 4895 } 4896 4897 /* CSTEPI is removed from the offset in case statement is after the 4898 increment. If the step is not constant, we use zero instead. 4899 This is a bit imprecise (there is the extra addition), but 4900 redundancy elimination is likely to transform the code so that 4901 it uses value of the variable before increment anyway, 4902 so it is not that much unrealistic. */ 4903 if (cst_and_fits_in_hwi (cstep)) 4904 cstepi = int_cst_value (cstep); 4905 else 4906 cstepi = 0; 4907 4908 if (!constant_multiple_of (ustep, cstep, &rat)) 4909 return infinite_cost; 4910 4911 if (wi::fits_shwi_p (rat)) 4912 ratio = rat.to_shwi (); 4913 else 4914 return infinite_cost; 4915 4916 STRIP_NOPS (cbase); 4917 ctype = TREE_TYPE (cbase); 4918 4919 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at); 4920 4921 /* use = ubase + ratio * (var - cbase). If either cbase is a constant 4922 or ratio == 1, it is better to handle this like 4923 4924 ubase - ratio * cbase + ratio * var 4925 4926 (also holds in the case ratio == -1, TODO. */ 4927 4928 if (cst_and_fits_in_hwi (cbase)) 4929 { 4930 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase); 4931 cost = difference_cost (data, 4932 ubase, build_int_cst (utype, 0), 4933 &symbol_present, &var_present, &offset, 4934 depends_on); 4935 cost /= avg_loop_niter (data->current_loop); 4936 } 4937 else if (ratio == 1) 4938 { 4939 tree real_cbase = cbase; 4940 4941 /* Check to see if any adjustment is needed. */ 4942 if (cstepi == 0 && stmt_is_after_inc) 4943 { 4944 aff_tree real_cbase_aff; 4945 aff_tree cstep_aff; 4946 4947 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase), 4948 &real_cbase_aff); 4949 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff); 4950 4951 aff_combination_add (&real_cbase_aff, &cstep_aff); 4952 real_cbase = aff_combination_to_tree (&real_cbase_aff); 4953 } 4954 4955 cost = difference_cost (data, 4956 ubase, real_cbase, 4957 &symbol_present, &var_present, &offset, 4958 depends_on); 4959 cost /= avg_loop_niter (data->current_loop); 4960 } 4961 else if (address_p 4962 && !POINTER_TYPE_P (ctype) 4963 && multiplier_allowed_in_address_p 4964 (ratio, mem_mode, 4965 TYPE_ADDR_SPACE (TREE_TYPE (utype)))) 4966 { 4967 tree real_cbase = cbase; 4968 4969 if (cstepi == 0 && stmt_is_after_inc) 4970 { 4971 if (POINTER_TYPE_P (ctype)) 4972 real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep); 4973 else 4974 real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep); 4975 } 4976 real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase, 4977 build_int_cst (ctype, ratio)); 4978 cost = difference_cost (data, 4979 ubase, real_cbase, 4980 &symbol_present, &var_present, &offset, 4981 depends_on); 4982 cost /= avg_loop_niter (data->current_loop); 4983 } 4984 else 4985 { 4986 cost = force_var_cost (data, cbase, depends_on); 4987 cost += difference_cost (data, ubase, build_int_cst (utype, 0), 4988 &symbol_present, &var_present, &offset, 4989 depends_on); 4990 cost /= avg_loop_niter (data->current_loop); 4991 cost += add_cost (data->speed, TYPE_MODE (ctype)); 4992 } 4993 4994 /* Record setup cost in scratch field. */ 4995 cost.scratch = cost.cost; 4996 4997 if (inv_expr && depends_on && *depends_on) 4998 { 4999 *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio, 5000 address_p); 5001 /* Clear depends on. */ 5002 if (*inv_expr != NULL) 5003 bitmap_clear (*depends_on); 5004 } 5005 5006 /* If we are after the increment, the value of the candidate is higher by 5007 one iteration. */ 5008 if (stmt_is_after_inc) 5009 offset -= ratio * cstepi; 5010 5011 /* Now the computation is in shape symbol + var1 + const + ratio * var2. 5012 (symbol/var1/const parts may be omitted). If we are looking for an 5013 address, find the cost of addressing this. */ 5014 if (address_p) 5015 { 5016 cost += get_address_cost (symbol_present, var_present, 5017 offset, ratio, cstepi, 5018 mem_mode, 5019 TYPE_ADDR_SPACE (TREE_TYPE (utype)), 5020 speed, stmt_is_after_inc, can_autoinc); 5021 return get_scaled_computation_cost_at (data, at, cand, cost); 5022 } 5023 5024 /* Otherwise estimate the costs for computing the expression. */ 5025 if (!symbol_present && !var_present && !offset) 5026 { 5027 if (ratio != 1) 5028 cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed); 5029 return get_scaled_computation_cost_at (data, at, cand, cost); 5030 } 5031 5032 /* Symbol + offset should be compile-time computable so consider that they 5033 are added once to the variable, if present. */ 5034 if (var_present && (symbol_present || offset)) 5035 cost += adjust_setup_cost (data, 5036 add_cost (speed, TYPE_MODE (ctype))); 5037 5038 /* Having offset does not affect runtime cost in case it is added to 5039 symbol, but it increases complexity. */ 5040 if (offset) 5041 cost.complexity++; 5042 5043 cost += add_cost (speed, TYPE_MODE (ctype)); 5044 5045 aratio = ratio > 0 ? ratio : -ratio; 5046 if (aratio != 1) 5047 cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed); 5048 5049 return get_scaled_computation_cost_at (data, at, cand, cost); 5050 5051 fallback: 5052 if (can_autoinc) 5053 *can_autoinc = false; 5054 5055 /* Just get the expression, expand it and measure the cost. */ 5056 tree comp = get_computation_at (data->current_loop, use, cand, at); 5057 5058 if (!comp) 5059 return infinite_cost; 5060 5061 if (address_p) 5062 comp = build_simple_mem_ref (comp); 5063 5064 cost = comp_cost (computation_cost (comp, speed), 0); 5065 5066 return get_scaled_computation_cost_at (data, at, cand, cost); 5067 } 5068 5069 /* Determines the cost of the computation by that USE is expressed 5070 from induction variable CAND. If ADDRESS_P is true, we just need 5071 to create an address from it, otherwise we want to get it into 5072 register. A set of invariants we depend on is stored in 5073 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether 5074 autoinc addressing is likely. */ 5075 5076 static comp_cost 5077 get_computation_cost (struct ivopts_data *data, 5078 struct iv_use *use, struct iv_cand *cand, 5079 bool address_p, bitmap *depends_on, 5080 bool *can_autoinc, iv_inv_expr_ent **inv_expr) 5081 { 5082 return get_computation_cost_at (data, 5083 use, cand, address_p, depends_on, use->stmt, 5084 can_autoinc, inv_expr); 5085 } 5086 5087 /* Determines cost of computing the use in GROUP with CAND in a generic 5088 expression. */ 5089 5090 static bool 5091 determine_group_iv_cost_generic (struct ivopts_data *data, 5092 struct iv_group *group, struct iv_cand *cand) 5093 { 5094 comp_cost cost; 5095 iv_inv_expr_ent *inv_expr = NULL; 5096 bitmap depends_on = NULL; 5097 struct iv_use *use = group->vuses[0]; 5098 5099 /* The simple case first -- if we need to express value of the preserved 5100 original biv, the cost is 0. This also prevents us from counting the 5101 cost of increment twice -- once at this use and once in the cost of 5102 the candidate. */ 5103 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 5104 cost = no_cost; 5105 else 5106 cost = get_computation_cost (data, use, cand, false, 5107 &depends_on, NULL, &inv_expr); 5108 5109 set_group_iv_cost (data, group, cand, cost, depends_on, 5110 NULL_TREE, ERROR_MARK, inv_expr); 5111 return !cost.infinite_cost_p (); 5112 } 5113 5114 /* Determines cost of computing uses in GROUP with CAND in addresses. */ 5115 5116 static bool 5117 determine_group_iv_cost_address (struct ivopts_data *data, 5118 struct iv_group *group, struct iv_cand *cand) 5119 { 5120 unsigned i; 5121 bitmap depends_on; 5122 bool can_autoinc; 5123 iv_inv_expr_ent *inv_expr = NULL; 5124 struct iv_use *use = group->vuses[0]; 5125 comp_cost sum_cost = no_cost, cost; 5126 5127 cost = get_computation_cost (data, use, cand, true, 5128 &depends_on, &can_autoinc, &inv_expr); 5129 5130 sum_cost = cost; 5131 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use) 5132 { 5133 if (can_autoinc) 5134 sum_cost -= cand->cost_step; 5135 /* If we generated the candidate solely for exploiting autoincrement 5136 opportunities, and it turns out it can't be used, set the cost to 5137 infinity to make sure we ignore it. */ 5138 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) 5139 sum_cost = infinite_cost; 5140 } 5141 5142 /* Uses in a group can share setup code, so only add setup cost once. */ 5143 cost -= cost.scratch; 5144 /* Compute and add costs for rest uses of this group. */ 5145 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++) 5146 { 5147 struct iv_use *next = group->vuses[i]; 5148 5149 /* TODO: We could skip computing cost for sub iv_use when it has the 5150 same cost as the first iv_use, but the cost really depends on the 5151 offset and where the iv_use is. */ 5152 cost = get_computation_cost (data, next, cand, true, 5153 NULL, &can_autoinc, NULL); 5154 sum_cost += cost; 5155 } 5156 set_group_iv_cost (data, group, cand, sum_cost, depends_on, 5157 NULL_TREE, ERROR_MARK, inv_expr); 5158 5159 return !sum_cost.infinite_cost_p (); 5160 } 5161 5162 /* Computes value of candidate CAND at position AT in iteration NITER, and 5163 stores it to VAL. */ 5164 5165 static void 5166 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter, 5167 aff_tree *val) 5168 { 5169 aff_tree step, delta, nit; 5170 struct iv *iv = cand->iv; 5171 tree type = TREE_TYPE (iv->base); 5172 tree steptype; 5173 if (POINTER_TYPE_P (type)) 5174 steptype = sizetype; 5175 else 5176 steptype = unsigned_type_for (type); 5177 5178 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step); 5179 aff_combination_convert (&step, steptype); 5180 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit); 5181 aff_combination_convert (&nit, steptype); 5182 aff_combination_mult (&nit, &step, &delta); 5183 if (stmt_after_increment (loop, cand, at)) 5184 aff_combination_add (&delta, &step); 5185 5186 tree_to_aff_combination (iv->base, type, val); 5187 if (!POINTER_TYPE_P (type)) 5188 aff_combination_convert (val, steptype); 5189 aff_combination_add (val, &delta); 5190 } 5191 5192 /* Returns period of induction variable iv. */ 5193 5194 static tree 5195 iv_period (struct iv *iv) 5196 { 5197 tree step = iv->step, period, type; 5198 tree pow2div; 5199 5200 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 5201 5202 type = unsigned_type_for (TREE_TYPE (step)); 5203 /* Period of the iv is lcm (step, type_range)/step -1, 5204 i.e., N*type_range/step - 1. Since type range is power 5205 of two, N == (step >> num_of_ending_zeros_binary (step), 5206 so the final result is 5207 5208 (type_range >> num_of_ending_zeros_binary (step)) - 1 5209 5210 */ 5211 pow2div = num_ending_zeros (step); 5212 5213 period = build_low_bits_mask (type, 5214 (TYPE_PRECISION (type) 5215 - tree_to_uhwi (pow2div))); 5216 5217 return period; 5218 } 5219 5220 /* Returns the comparison operator used when eliminating the iv USE. */ 5221 5222 static enum tree_code 5223 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 5224 { 5225 struct loop *loop = data->current_loop; 5226 basic_block ex_bb; 5227 edge exit; 5228 5229 ex_bb = gimple_bb (use->stmt); 5230 exit = EDGE_SUCC (ex_bb, 0); 5231 if (flow_bb_inside_loop_p (loop, exit->dest)) 5232 exit = EDGE_SUCC (ex_bb, 1); 5233 5234 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 5235 } 5236 5237 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now, 5238 we only detect the situation that BASE = SOMETHING + OFFSET, where the 5239 calculation is performed in non-wrapping type. 5240 5241 TODO: More generally, we could test for the situation that 5242 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero. 5243 This would require knowing the sign of OFFSET. */ 5244 5245 static bool 5246 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset) 5247 { 5248 enum tree_code code; 5249 tree e1, e2; 5250 aff_tree aff_e1, aff_e2, aff_offset; 5251 5252 if (!nowrap_type_p (TREE_TYPE (base))) 5253 return false; 5254 5255 base = expand_simple_operations (base); 5256 5257 if (TREE_CODE (base) == SSA_NAME) 5258 { 5259 gimple *stmt = SSA_NAME_DEF_STMT (base); 5260 5261 if (gimple_code (stmt) != GIMPLE_ASSIGN) 5262 return false; 5263 5264 code = gimple_assign_rhs_code (stmt); 5265 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 5266 return false; 5267 5268 e1 = gimple_assign_rhs1 (stmt); 5269 e2 = gimple_assign_rhs2 (stmt); 5270 } 5271 else 5272 { 5273 code = TREE_CODE (base); 5274 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 5275 return false; 5276 e1 = TREE_OPERAND (base, 0); 5277 e2 = TREE_OPERAND (base, 1); 5278 } 5279 5280 /* Use affine expansion as deeper inspection to prove the equality. */ 5281 tree_to_aff_combination_expand (e2, TREE_TYPE (e2), 5282 &aff_e2, &data->name_expansion_cache); 5283 tree_to_aff_combination_expand (offset, TREE_TYPE (offset), 5284 &aff_offset, &data->name_expansion_cache); 5285 aff_combination_scale (&aff_offset, -1); 5286 switch (code) 5287 { 5288 case PLUS_EXPR: 5289 aff_combination_add (&aff_e2, &aff_offset); 5290 if (aff_combination_zero_p (&aff_e2)) 5291 return true; 5292 5293 tree_to_aff_combination_expand (e1, TREE_TYPE (e1), 5294 &aff_e1, &data->name_expansion_cache); 5295 aff_combination_add (&aff_e1, &aff_offset); 5296 return aff_combination_zero_p (&aff_e1); 5297 5298 case POINTER_PLUS_EXPR: 5299 aff_combination_add (&aff_e2, &aff_offset); 5300 return aff_combination_zero_p (&aff_e2); 5301 5302 default: 5303 return false; 5304 } 5305 } 5306 5307 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR 5308 comparison with CAND. NITER describes the number of iterations of 5309 the loops. If successful, the comparison in COMP_P is altered accordingly. 5310 5311 We aim to handle the following situation: 5312 5313 sometype *base, *p; 5314 int a, b, i; 5315 5316 i = a; 5317 p = p_0 = base + a; 5318 5319 do 5320 { 5321 bla (*p); 5322 p++; 5323 i++; 5324 } 5325 while (i < b); 5326 5327 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1. 5328 We aim to optimize this to 5329 5330 p = p_0 = base + a; 5331 do 5332 { 5333 bla (*p); 5334 p++; 5335 } 5336 while (p < p_0 - a + b); 5337 5338 This preserves the correctness, since the pointer arithmetics does not 5339 overflow. More precisely: 5340 5341 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no 5342 overflow in computing it or the values of p. 5343 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not 5344 overflow. To prove this, we use the fact that p_0 = base + a. */ 5345 5346 static bool 5347 iv_elimination_compare_lt (struct ivopts_data *data, 5348 struct iv_cand *cand, enum tree_code *comp_p, 5349 struct tree_niter_desc *niter) 5350 { 5351 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; 5352 struct aff_tree nit, tmpa, tmpb; 5353 enum tree_code comp; 5354 HOST_WIDE_INT step; 5355 5356 /* We need to know that the candidate induction variable does not overflow. 5357 While more complex analysis may be used to prove this, for now just 5358 check that the variable appears in the original program and that it 5359 is computed in a type that guarantees no overflows. */ 5360 cand_type = TREE_TYPE (cand->iv->base); 5361 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) 5362 return false; 5363 5364 /* Make sure that the loop iterates till the loop bound is hit, as otherwise 5365 the calculation of the BOUND could overflow, making the comparison 5366 invalid. */ 5367 if (!data->loop_single_exit_p) 5368 return false; 5369 5370 /* We need to be able to decide whether candidate is increasing or decreasing 5371 in order to choose the right comparison operator. */ 5372 if (!cst_and_fits_in_hwi (cand->iv->step)) 5373 return false; 5374 step = int_cst_value (cand->iv->step); 5375 5376 /* Check that the number of iterations matches the expected pattern: 5377 a + 1 > b ? 0 : b - a - 1. */ 5378 mbz = niter->may_be_zero; 5379 if (TREE_CODE (mbz) == GT_EXPR) 5380 { 5381 /* Handle a + 1 > b. */ 5382 tree op0 = TREE_OPERAND (mbz, 0); 5383 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1))) 5384 { 5385 a = TREE_OPERAND (op0, 0); 5386 b = TREE_OPERAND (mbz, 1); 5387 } 5388 else 5389 return false; 5390 } 5391 else if (TREE_CODE (mbz) == LT_EXPR) 5392 { 5393 tree op1 = TREE_OPERAND (mbz, 1); 5394 5395 /* Handle b < a + 1. */ 5396 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1))) 5397 { 5398 a = TREE_OPERAND (op1, 0); 5399 b = TREE_OPERAND (mbz, 0); 5400 } 5401 else 5402 return false; 5403 } 5404 else 5405 return false; 5406 5407 /* Expected number of iterations is B - A - 1. Check that it matches 5408 the actual number, i.e., that B - A - NITER = 1. */ 5409 tree_to_aff_combination (niter->niter, nit_type, &nit); 5410 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa); 5411 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb); 5412 aff_combination_scale (&nit, -1); 5413 aff_combination_scale (&tmpa, -1); 5414 aff_combination_add (&tmpb, &tmpa); 5415 aff_combination_add (&tmpb, &nit); 5416 if (tmpb.n != 0 || tmpb.offset != 1) 5417 return false; 5418 5419 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not 5420 overflow. */ 5421 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), 5422 cand->iv->step, 5423 fold_convert (TREE_TYPE (cand->iv->step), a)); 5424 if (!difference_cannot_overflow_p (data, cand->iv->base, offset)) 5425 return false; 5426 5427 /* Determine the new comparison operator. */ 5428 comp = step < 0 ? GT_EXPR : LT_EXPR; 5429 if (*comp_p == NE_EXPR) 5430 *comp_p = comp; 5431 else if (*comp_p == EQ_EXPR) 5432 *comp_p = invert_tree_comparison (comp, false); 5433 else 5434 gcc_unreachable (); 5435 5436 return true; 5437 } 5438 5439 /* Check whether it is possible to express the condition in USE by comparison 5440 of candidate CAND. If so, store the value compared with to BOUND, and the 5441 comparison operator to COMP. */ 5442 5443 static bool 5444 may_eliminate_iv (struct ivopts_data *data, 5445 struct iv_use *use, struct iv_cand *cand, tree *bound, 5446 enum tree_code *comp) 5447 { 5448 basic_block ex_bb; 5449 edge exit; 5450 tree period; 5451 struct loop *loop = data->current_loop; 5452 aff_tree bnd; 5453 struct tree_niter_desc *desc = NULL; 5454 5455 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 5456 return false; 5457 5458 /* For now works only for exits that dominate the loop latch. 5459 TODO: extend to other conditions inside loop body. */ 5460 ex_bb = gimple_bb (use->stmt); 5461 if (use->stmt != last_stmt (ex_bb) 5462 || gimple_code (use->stmt) != GIMPLE_COND 5463 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 5464 return false; 5465 5466 exit = EDGE_SUCC (ex_bb, 0); 5467 if (flow_bb_inside_loop_p (loop, exit->dest)) 5468 exit = EDGE_SUCC (ex_bb, 1); 5469 if (flow_bb_inside_loop_p (loop, exit->dest)) 5470 return false; 5471 5472 desc = niter_for_exit (data, exit); 5473 if (!desc) 5474 return false; 5475 5476 /* Determine whether we can use the variable to test the exit condition. 5477 This is the case iff the period of the induction variable is greater 5478 than the number of iterations for which the exit condition is true. */ 5479 period = iv_period (cand->iv); 5480 5481 /* If the number of iterations is constant, compare against it directly. */ 5482 if (TREE_CODE (desc->niter) == INTEGER_CST) 5483 { 5484 /* See cand_value_at. */ 5485 if (stmt_after_increment (loop, cand, use->stmt)) 5486 { 5487 if (!tree_int_cst_lt (desc->niter, period)) 5488 return false; 5489 } 5490 else 5491 { 5492 if (tree_int_cst_lt (period, desc->niter)) 5493 return false; 5494 } 5495 } 5496 5497 /* If not, and if this is the only possible exit of the loop, see whether 5498 we can get a conservative estimate on the number of iterations of the 5499 entire loop and compare against that instead. */ 5500 else 5501 { 5502 widest_int period_value, max_niter; 5503 5504 max_niter = desc->max; 5505 if (stmt_after_increment (loop, cand, use->stmt)) 5506 max_niter += 1; 5507 period_value = wi::to_widest (period); 5508 if (wi::gtu_p (max_niter, period_value)) 5509 { 5510 /* See if we can take advantage of inferred loop bound 5511 information. */ 5512 if (data->loop_single_exit_p) 5513 { 5514 if (!max_loop_iterations (loop, &max_niter)) 5515 return false; 5516 /* The loop bound is already adjusted by adding 1. */ 5517 if (wi::gtu_p (max_niter, period_value)) 5518 return false; 5519 } 5520 else 5521 return false; 5522 } 5523 } 5524 5525 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); 5526 5527 *bound = fold_convert (TREE_TYPE (cand->iv->base), 5528 aff_combination_to_tree (&bnd)); 5529 *comp = iv_elimination_compare (data, use); 5530 5531 /* It is unlikely that computing the number of iterations using division 5532 would be more profitable than keeping the original induction variable. */ 5533 if (expression_expensive_p (*bound)) 5534 return false; 5535 5536 /* Sometimes, it is possible to handle the situation that the number of 5537 iterations may be zero unless additional assumptions by using < 5538 instead of != in the exit condition. 5539 5540 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and 5541 base the exit condition on it. However, that is often too 5542 expensive. */ 5543 if (!integer_zerop (desc->may_be_zero)) 5544 return iv_elimination_compare_lt (data, cand, comp, desc); 5545 5546 return true; 5547 } 5548 5549 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must 5550 be copied, if it is used in the loop body and DATA->body_includes_call. */ 5551 5552 static int 5553 parm_decl_cost (struct ivopts_data *data, tree bound) 5554 { 5555 tree sbound = bound; 5556 STRIP_NOPS (sbound); 5557 5558 if (TREE_CODE (sbound) == SSA_NAME 5559 && SSA_NAME_IS_DEFAULT_DEF (sbound) 5560 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL 5561 && data->body_includes_call) 5562 return COSTS_N_INSNS (1); 5563 5564 return 0; 5565 } 5566 5567 /* Determines cost of computing the use in GROUP with CAND in a condition. */ 5568 5569 static bool 5570 determine_group_iv_cost_cond (struct ivopts_data *data, 5571 struct iv_group *group, struct iv_cand *cand) 5572 { 5573 tree bound = NULL_TREE; 5574 struct iv *cmp_iv; 5575 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on; 5576 comp_cost elim_cost, express_cost, cost, bound_cost; 5577 bool ok; 5578 iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr; 5579 tree *control_var, *bound_cst; 5580 enum tree_code comp = ERROR_MARK; 5581 struct iv_use *use = group->vuses[0]; 5582 5583 gcc_assert (cand->iv); 5584 5585 /* Try iv elimination. */ 5586 if (may_eliminate_iv (data, use, cand, &bound, &comp)) 5587 { 5588 elim_cost = force_var_cost (data, bound, &depends_on_elim); 5589 if (elim_cost.cost == 0) 5590 elim_cost.cost = parm_decl_cost (data, bound); 5591 else if (TREE_CODE (bound) == INTEGER_CST) 5592 elim_cost.cost = 0; 5593 /* If we replace a loop condition 'i < n' with 'p < base + n', 5594 depends_on_elim will have 'base' and 'n' set, which implies 5595 that both 'base' and 'n' will be live during the loop. More likely, 5596 'base + n' will be loop invariant, resulting in only one live value 5597 during the loop. So in that case we clear depends_on_elim and set 5598 elim_inv_expr_id instead. */ 5599 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1) 5600 { 5601 elim_inv_expr = record_inv_expr (data, bound); 5602 bitmap_clear (depends_on_elim); 5603 } 5604 /* The bound is a loop invariant, so it will be only computed 5605 once. */ 5606 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); 5607 } 5608 else 5609 elim_cost = infinite_cost; 5610 5611 /* Try expressing the original giv. If it is compared with an invariant, 5612 note that we cannot get rid of it. */ 5613 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst, 5614 NULL, &cmp_iv); 5615 gcc_assert (ok); 5616 5617 /* When the condition is a comparison of the candidate IV against 5618 zero, prefer this IV. 5619 5620 TODO: The constant that we're subtracting from the cost should 5621 be target-dependent. This information should be added to the 5622 target costs for each backend. */ 5623 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */ 5624 && integer_zerop (*bound_cst) 5625 && (operand_equal_p (*control_var, cand->var_after, 0) 5626 || operand_equal_p (*control_var, cand->var_before, 0))) 5627 elim_cost -= 1; 5628 5629 express_cost = get_computation_cost (data, use, cand, false, 5630 &depends_on_express, NULL, 5631 &express_inv_expr); 5632 fd_ivopts_data = data; 5633 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL); 5634 5635 /* Count the cost of the original bound as well. */ 5636 bound_cost = force_var_cost (data, *bound_cst, NULL); 5637 if (bound_cost.cost == 0) 5638 bound_cost.cost = parm_decl_cost (data, *bound_cst); 5639 else if (TREE_CODE (*bound_cst) == INTEGER_CST) 5640 bound_cost.cost = 0; 5641 express_cost += bound_cost; 5642 5643 /* Choose the better approach, preferring the eliminated IV. */ 5644 if (elim_cost <= express_cost) 5645 { 5646 cost = elim_cost; 5647 depends_on = depends_on_elim; 5648 depends_on_elim = NULL; 5649 inv_expr = elim_inv_expr; 5650 } 5651 else 5652 { 5653 cost = express_cost; 5654 depends_on = depends_on_express; 5655 depends_on_express = NULL; 5656 bound = NULL_TREE; 5657 comp = ERROR_MARK; 5658 inv_expr = express_inv_expr; 5659 } 5660 5661 set_group_iv_cost (data, group, cand, cost, 5662 depends_on, bound, comp, inv_expr); 5663 5664 if (depends_on_elim) 5665 BITMAP_FREE (depends_on_elim); 5666 if (depends_on_express) 5667 BITMAP_FREE (depends_on_express); 5668 5669 return !cost.infinite_cost_p (); 5670 } 5671 5672 /* Determines cost of computing uses in GROUP with CAND. Returns false 5673 if USE cannot be represented with CAND. */ 5674 5675 static bool 5676 determine_group_iv_cost (struct ivopts_data *data, 5677 struct iv_group *group, struct iv_cand *cand) 5678 { 5679 switch (group->type) 5680 { 5681 case USE_NONLINEAR_EXPR: 5682 return determine_group_iv_cost_generic (data, group, cand); 5683 5684 case USE_ADDRESS: 5685 return determine_group_iv_cost_address (data, group, cand); 5686 5687 case USE_COMPARE: 5688 return determine_group_iv_cost_cond (data, group, cand); 5689 5690 default: 5691 gcc_unreachable (); 5692 } 5693 } 5694 5695 /* Return true if get_computation_cost indicates that autoincrement is 5696 a possibility for the pair of USE and CAND, false otherwise. */ 5697 5698 static bool 5699 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, 5700 struct iv_cand *cand) 5701 { 5702 bitmap depends_on; 5703 bool can_autoinc; 5704 comp_cost cost; 5705 5706 if (use->type != USE_ADDRESS) 5707 return false; 5708 5709 cost = get_computation_cost (data, use, cand, true, &depends_on, 5710 &can_autoinc, NULL); 5711 5712 BITMAP_FREE (depends_on); 5713 5714 return !cost.infinite_cost_p () && can_autoinc; 5715 } 5716 5717 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a 5718 use that allows autoincrement, and set their AINC_USE if possible. */ 5719 5720 static void 5721 set_autoinc_for_original_candidates (struct ivopts_data *data) 5722 { 5723 unsigned i, j; 5724 5725 for (i = 0; i < data->vcands.length (); i++) 5726 { 5727 struct iv_cand *cand = data->vcands[i]; 5728 struct iv_use *closest_before = NULL; 5729 struct iv_use *closest_after = NULL; 5730 if (cand->pos != IP_ORIGINAL) 5731 continue; 5732 5733 for (j = 0; j < data->vgroups.length (); j++) 5734 { 5735 struct iv_group *group = data->vgroups[j]; 5736 struct iv_use *use = group->vuses[0]; 5737 unsigned uid = gimple_uid (use->stmt); 5738 5739 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)) 5740 continue; 5741 5742 if (uid < gimple_uid (cand->incremented_at) 5743 && (closest_before == NULL 5744 || uid > gimple_uid (closest_before->stmt))) 5745 closest_before = use; 5746 5747 if (uid > gimple_uid (cand->incremented_at) 5748 && (closest_after == NULL 5749 || uid < gimple_uid (closest_after->stmt))) 5750 closest_after = use; 5751 } 5752 5753 if (closest_before != NULL 5754 && autoinc_possible_for_pair (data, closest_before, cand)) 5755 cand->ainc_use = closest_before; 5756 else if (closest_after != NULL 5757 && autoinc_possible_for_pair (data, closest_after, cand)) 5758 cand->ainc_use = closest_after; 5759 } 5760 } 5761 5762 /* Finds the candidates for the induction variables. */ 5763 5764 static void 5765 find_iv_candidates (struct ivopts_data *data) 5766 { 5767 /* Add commonly used ivs. */ 5768 add_standard_iv_candidates (data); 5769 5770 /* Add old induction variables. */ 5771 add_iv_candidate_for_bivs (data); 5772 5773 /* Add induction variables derived from uses. */ 5774 add_iv_candidate_for_groups (data); 5775 5776 set_autoinc_for_original_candidates (data); 5777 5778 /* Record the important candidates. */ 5779 record_important_candidates (data); 5780 5781 if (dump_file && (dump_flags & TDF_DETAILS)) 5782 { 5783 unsigned i; 5784 5785 fprintf (dump_file, "\n<Important Candidates>:\t"); 5786 for (i = 0; i < data->vcands.length (); i++) 5787 if (data->vcands[i]->important) 5788 fprintf (dump_file, " %d,", data->vcands[i]->id); 5789 fprintf (dump_file, "\n"); 5790 5791 fprintf (dump_file, "\n<Group, Cand> Related:\n"); 5792 for (i = 0; i < data->vgroups.length (); i++) 5793 { 5794 struct iv_group *group = data->vgroups[i]; 5795 5796 if (group->related_cands) 5797 { 5798 fprintf (dump_file, " Group %d:\t", group->id); 5799 dump_bitmap (dump_file, group->related_cands); 5800 } 5801 } 5802 fprintf (dump_file, "\n"); 5803 } 5804 } 5805 5806 /* Determines costs of computing use of iv with an iv candidate. */ 5807 5808 static void 5809 determine_group_iv_costs (struct ivopts_data *data) 5810 { 5811 unsigned i, j; 5812 struct iv_cand *cand; 5813 struct iv_group *group; 5814 bitmap to_clear = BITMAP_ALLOC (NULL); 5815 5816 alloc_use_cost_map (data); 5817 5818 for (i = 0; i < data->vgroups.length (); i++) 5819 { 5820 group = data->vgroups[i]; 5821 5822 if (data->consider_all_candidates) 5823 { 5824 for (j = 0; j < data->vcands.length (); j++) 5825 { 5826 cand = data->vcands[j]; 5827 determine_group_iv_cost (data, group, cand); 5828 } 5829 } 5830 else 5831 { 5832 bitmap_iterator bi; 5833 5834 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi) 5835 { 5836 cand = data->vcands[j]; 5837 if (!determine_group_iv_cost (data, group, cand)) 5838 bitmap_set_bit (to_clear, j); 5839 } 5840 5841 /* Remove the candidates for that the cost is infinite from 5842 the list of related candidates. */ 5843 bitmap_and_compl_into (group->related_cands, to_clear); 5844 bitmap_clear (to_clear); 5845 } 5846 } 5847 5848 BITMAP_FREE (to_clear); 5849 5850 if (dump_file && (dump_flags & TDF_DETAILS)) 5851 { 5852 fprintf (dump_file, "\n<Invariant Expressions>:\n"); 5853 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ()); 5854 5855 for (hash_table<iv_inv_expr_hasher>::iterator it 5856 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end (); 5857 ++it) 5858 list.safe_push (*it); 5859 5860 list.qsort (sort_iv_inv_expr_ent); 5861 5862 for (i = 0; i < list.length (); ++i) 5863 { 5864 fprintf (dump_file, "inv_expr %d: \t", i); 5865 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM); 5866 fprintf (dump_file, "\n"); 5867 } 5868 5869 fprintf (dump_file, "\n<Group-candidate Costs>:\n"); 5870 5871 for (i = 0; i < data->vgroups.length (); i++) 5872 { 5873 group = data->vgroups[i]; 5874 5875 fprintf (dump_file, "Group %d:\n", i); 5876 fprintf (dump_file, " cand\tcost\tcompl.\tinv.ex.\tdepends on\n"); 5877 for (j = 0; j < group->n_map_members; j++) 5878 { 5879 if (!group->cost_map[j].cand 5880 || group->cost_map[j].cost.infinite_cost_p ()) 5881 continue; 5882 5883 fprintf (dump_file, " %d\t%d\t%d\t", 5884 group->cost_map[j].cand->id, 5885 group->cost_map[j].cost.cost, 5886 group->cost_map[j].cost.complexity); 5887 if (group->cost_map[j].inv_expr != NULL) 5888 fprintf (dump_file, "%d\t", 5889 group->cost_map[j].inv_expr->id); 5890 else 5891 fprintf (dump_file, "\t"); 5892 if (group->cost_map[j].depends_on) 5893 bitmap_print (dump_file, 5894 group->cost_map[j].depends_on, "",""); 5895 fprintf (dump_file, "\n"); 5896 } 5897 5898 fprintf (dump_file, "\n"); 5899 } 5900 fprintf (dump_file, "\n"); 5901 } 5902 } 5903 5904 /* Determines cost of the candidate CAND. */ 5905 5906 static void 5907 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 5908 { 5909 comp_cost cost_base; 5910 unsigned cost, cost_step; 5911 tree base; 5912 5913 if (!cand->iv) 5914 { 5915 cand->cost = 0; 5916 return; 5917 } 5918 5919 /* There are two costs associated with the candidate -- its increment 5920 and its initialization. The second is almost negligible for any loop 5921 that rolls enough, so we take it just very little into account. */ 5922 5923 base = cand->iv->base; 5924 cost_base = force_var_cost (data, base, NULL); 5925 /* It will be exceptional that the iv register happens to be initialized with 5926 the proper value at no cost. In general, there will at least be a regcopy 5927 or a const set. */ 5928 if (cost_base.cost == 0) 5929 cost_base.cost = COSTS_N_INSNS (1); 5930 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base))); 5931 5932 cost = cost_step + adjust_setup_cost (data, cost_base.cost); 5933 5934 /* Prefer the original ivs unless we may gain something by replacing it. 5935 The reason is to make debugging simpler; so this is not relevant for 5936 artificial ivs created by other optimization passes. */ 5937 if (cand->pos != IP_ORIGINAL 5938 || !SSA_NAME_VAR (cand->var_before) 5939 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 5940 cost++; 5941 5942 /* Prefer not to insert statements into latch unless there are some 5943 already (so that we do not create unnecessary jumps). */ 5944 if (cand->pos == IP_END 5945 && empty_block_p (ip_end_pos (data->current_loop))) 5946 cost++; 5947 5948 cand->cost = cost; 5949 cand->cost_step = cost_step; 5950 } 5951 5952 /* Determines costs of computation of the candidates. */ 5953 5954 static void 5955 determine_iv_costs (struct ivopts_data *data) 5956 { 5957 unsigned i; 5958 5959 if (dump_file && (dump_flags & TDF_DETAILS)) 5960 { 5961 fprintf (dump_file, "<Candidate Costs>:\n"); 5962 fprintf (dump_file, " cand\tcost\n"); 5963 } 5964 5965 for (i = 0; i < data->vcands.length (); i++) 5966 { 5967 struct iv_cand *cand = data->vcands[i]; 5968 5969 determine_iv_cost (data, cand); 5970 5971 if (dump_file && (dump_flags & TDF_DETAILS)) 5972 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 5973 } 5974 5975 if (dump_file && (dump_flags & TDF_DETAILS)) 5976 fprintf (dump_file, "\n"); 5977 } 5978 5979 /* Calculates cost for having SIZE induction variables. */ 5980 5981 static unsigned 5982 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) 5983 { 5984 /* We add size to the cost, so that we prefer eliminating ivs 5985 if possible. */ 5986 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed, 5987 data->body_includes_call); 5988 } 5989 5990 /* For each size of the induction variable set determine the penalty. */ 5991 5992 static void 5993 determine_set_costs (struct ivopts_data *data) 5994 { 5995 unsigned j, n; 5996 gphi *phi; 5997 gphi_iterator psi; 5998 tree op; 5999 struct loop *loop = data->current_loop; 6000 bitmap_iterator bi; 6001 6002 if (dump_file && (dump_flags & TDF_DETAILS)) 6003 { 6004 fprintf (dump_file, "<Global Costs>:\n"); 6005 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 6006 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); 6007 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); 6008 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); 6009 } 6010 6011 n = 0; 6012 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 6013 { 6014 phi = psi.phi (); 6015 op = PHI_RESULT (phi); 6016 6017 if (virtual_operand_p (op)) 6018 continue; 6019 6020 if (get_iv (data, op)) 6021 continue; 6022 6023 n++; 6024 } 6025 6026 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 6027 { 6028 struct version_info *info = ver_info (data, j); 6029 6030 if (info->inv_id && info->has_nonlin_use) 6031 n++; 6032 } 6033 6034 data->regs_used = n; 6035 if (dump_file && (dump_flags & TDF_DETAILS)) 6036 fprintf (dump_file, " regs_used %d\n", n); 6037 6038 if (dump_file && (dump_flags & TDF_DETAILS)) 6039 { 6040 fprintf (dump_file, " cost for size:\n"); 6041 fprintf (dump_file, " ivs\tcost\n"); 6042 for (j = 0; j <= 2 * target_avail_regs; j++) 6043 fprintf (dump_file, " %d\t%d\n", j, 6044 ivopts_global_cost_for_size (data, j)); 6045 fprintf (dump_file, "\n"); 6046 } 6047 } 6048 6049 /* Returns true if A is a cheaper cost pair than B. */ 6050 6051 static bool 6052 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) 6053 { 6054 if (!a) 6055 return false; 6056 6057 if (!b) 6058 return true; 6059 6060 if (a->cost < b->cost) 6061 return true; 6062 6063 if (b->cost < a->cost) 6064 return false; 6065 6066 /* In case the costs are the same, prefer the cheaper candidate. */ 6067 if (a->cand->cost < b->cand->cost) 6068 return true; 6069 6070 return false; 6071 } 6072 6073 6074 /* Returns candidate by that USE is expressed in IVS. */ 6075 6076 static struct cost_pair * 6077 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group) 6078 { 6079 return ivs->cand_for_group[group->id]; 6080 } 6081 6082 /* Computes the cost field of IVS structure. */ 6083 6084 static void 6085 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) 6086 { 6087 comp_cost cost = ivs->cand_use_cost; 6088 6089 cost += ivs->cand_cost; 6090 6091 cost += ivopts_global_cost_for_size (data, 6092 ivs->n_regs 6093 + ivs->used_inv_exprs->elements ()); 6094 6095 ivs->cost = cost; 6096 } 6097 6098 /* Remove invariants in set INVS to set IVS. */ 6099 6100 static void 6101 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs) 6102 { 6103 bitmap_iterator bi; 6104 unsigned iid; 6105 6106 if (!invs) 6107 return; 6108 6109 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 6110 { 6111 ivs->n_invariant_uses[iid]--; 6112 if (ivs->n_invariant_uses[iid] == 0) 6113 ivs->n_regs--; 6114 } 6115 } 6116 6117 /* Set USE not to be expressed by any candidate in IVS. */ 6118 6119 static void 6120 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, 6121 struct iv_group *group) 6122 { 6123 unsigned gid = group->id, cid; 6124 struct cost_pair *cp; 6125 6126 cp = ivs->cand_for_group[gid]; 6127 if (!cp) 6128 return; 6129 cid = cp->cand->id; 6130 6131 ivs->bad_groups++; 6132 ivs->cand_for_group[gid] = NULL; 6133 ivs->n_cand_uses[cid]--; 6134 6135 if (ivs->n_cand_uses[cid] == 0) 6136 { 6137 bitmap_clear_bit (ivs->cands, cid); 6138 /* Do not count the pseudocandidates. */ 6139 if (cp->cand->iv) 6140 ivs->n_regs--; 6141 ivs->n_cands--; 6142 ivs->cand_cost -= cp->cand->cost; 6143 6144 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on); 6145 } 6146 6147 ivs->cand_use_cost -= cp->cost; 6148 6149 iv_ca_set_remove_invariants (ivs, cp->depends_on); 6150 6151 if (cp->inv_expr != NULL) 6152 { 6153 unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr); 6154 --(*slot); 6155 if (*slot == 0) 6156 ivs->used_inv_exprs->remove (cp->inv_expr); 6157 } 6158 iv_ca_recount_cost (data, ivs); 6159 } 6160 6161 /* Add invariants in set INVS to set IVS. */ 6162 6163 static void 6164 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs) 6165 { 6166 bitmap_iterator bi; 6167 unsigned iid; 6168 6169 if (!invs) 6170 return; 6171 6172 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 6173 { 6174 ivs->n_invariant_uses[iid]++; 6175 if (ivs->n_invariant_uses[iid] == 1) 6176 ivs->n_regs++; 6177 } 6178 } 6179 6180 /* Set cost pair for GROUP in set IVS to CP. */ 6181 6182 static void 6183 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, 6184 struct iv_group *group, struct cost_pair *cp) 6185 { 6186 unsigned gid = group->id, cid; 6187 6188 if (ivs->cand_for_group[gid] == cp) 6189 return; 6190 6191 if (ivs->cand_for_group[gid]) 6192 iv_ca_set_no_cp (data, ivs, group); 6193 6194 if (cp) 6195 { 6196 cid = cp->cand->id; 6197 6198 ivs->bad_groups--; 6199 ivs->cand_for_group[gid] = cp; 6200 ivs->n_cand_uses[cid]++; 6201 if (ivs->n_cand_uses[cid] == 1) 6202 { 6203 bitmap_set_bit (ivs->cands, cid); 6204 /* Do not count the pseudocandidates. */ 6205 if (cp->cand->iv) 6206 ivs->n_regs++; 6207 ivs->n_cands++; 6208 ivs->cand_cost += cp->cand->cost; 6209 6210 iv_ca_set_add_invariants (ivs, cp->cand->depends_on); 6211 } 6212 6213 ivs->cand_use_cost += cp->cost; 6214 iv_ca_set_add_invariants (ivs, cp->depends_on); 6215 6216 if (cp->inv_expr != NULL) 6217 { 6218 unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr); 6219 ++(*slot); 6220 } 6221 iv_ca_recount_cost (data, ivs); 6222 } 6223 } 6224 6225 /* Extend set IVS by expressing USE by some of the candidates in it 6226 if possible. Consider all important candidates if candidates in 6227 set IVS don't give any result. */ 6228 6229 static void 6230 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs, 6231 struct iv_group *group) 6232 { 6233 struct cost_pair *best_cp = NULL, *cp; 6234 bitmap_iterator bi; 6235 unsigned i; 6236 struct iv_cand *cand; 6237 6238 gcc_assert (ivs->upto >= group->id); 6239 ivs->upto++; 6240 ivs->bad_groups++; 6241 6242 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6243 { 6244 cand = data->vcands[i]; 6245 cp = get_group_iv_cost (data, group, cand); 6246 if (cheaper_cost_pair (cp, best_cp)) 6247 best_cp = cp; 6248 } 6249 6250 if (best_cp == NULL) 6251 { 6252 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 6253 { 6254 cand = data->vcands[i]; 6255 cp = get_group_iv_cost (data, group, cand); 6256 if (cheaper_cost_pair (cp, best_cp)) 6257 best_cp = cp; 6258 } 6259 } 6260 6261 iv_ca_set_cp (data, ivs, group, best_cp); 6262 } 6263 6264 /* Get cost for assignment IVS. */ 6265 6266 static comp_cost 6267 iv_ca_cost (struct iv_ca *ivs) 6268 { 6269 /* This was a conditional expression but it triggered a bug in 6270 Sun C 5.5. */ 6271 if (ivs->bad_groups) 6272 return infinite_cost; 6273 else 6274 return ivs->cost; 6275 } 6276 6277 /* Returns true if all dependences of CP are among invariants in IVS. */ 6278 6279 static bool 6280 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp) 6281 { 6282 unsigned i; 6283 bitmap_iterator bi; 6284 6285 if (!cp->depends_on) 6286 return true; 6287 6288 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi) 6289 { 6290 if (ivs->n_invariant_uses[i] == 0) 6291 return false; 6292 } 6293 6294 return true; 6295 } 6296 6297 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains 6298 it before NEXT. */ 6299 6300 static struct iv_ca_delta * 6301 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp, 6302 struct cost_pair *new_cp, struct iv_ca_delta *next) 6303 { 6304 struct iv_ca_delta *change = XNEW (struct iv_ca_delta); 6305 6306 change->group = group; 6307 change->old_cp = old_cp; 6308 change->new_cp = new_cp; 6309 change->next = next; 6310 6311 return change; 6312 } 6313 6314 /* Joins two lists of changes L1 and L2. Destructive -- old lists 6315 are rewritten. */ 6316 6317 static struct iv_ca_delta * 6318 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 6319 { 6320 struct iv_ca_delta *last; 6321 6322 if (!l2) 6323 return l1; 6324 6325 if (!l1) 6326 return l2; 6327 6328 for (last = l1; last->next; last = last->next) 6329 continue; 6330 last->next = l2; 6331 6332 return l1; 6333 } 6334 6335 /* Reverse the list of changes DELTA, forming the inverse to it. */ 6336 6337 static struct iv_ca_delta * 6338 iv_ca_delta_reverse (struct iv_ca_delta *delta) 6339 { 6340 struct iv_ca_delta *act, *next, *prev = NULL; 6341 6342 for (act = delta; act; act = next) 6343 { 6344 next = act->next; 6345 act->next = prev; 6346 prev = act; 6347 6348 std::swap (act->old_cp, act->new_cp); 6349 } 6350 6351 return prev; 6352 } 6353 6354 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 6355 reverted instead. */ 6356 6357 static void 6358 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs, 6359 struct iv_ca_delta *delta, bool forward) 6360 { 6361 struct cost_pair *from, *to; 6362 struct iv_ca_delta *act; 6363 6364 if (!forward) 6365 delta = iv_ca_delta_reverse (delta); 6366 6367 for (act = delta; act; act = act->next) 6368 { 6369 from = act->old_cp; 6370 to = act->new_cp; 6371 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from); 6372 iv_ca_set_cp (data, ivs, act->group, to); 6373 } 6374 6375 if (!forward) 6376 iv_ca_delta_reverse (delta); 6377 } 6378 6379 /* Returns true if CAND is used in IVS. */ 6380 6381 static bool 6382 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand) 6383 { 6384 return ivs->n_cand_uses[cand->id] > 0; 6385 } 6386 6387 /* Returns number of induction variable candidates in the set IVS. */ 6388 6389 static unsigned 6390 iv_ca_n_cands (struct iv_ca *ivs) 6391 { 6392 return ivs->n_cands; 6393 } 6394 6395 /* Free the list of changes DELTA. */ 6396 6397 static void 6398 iv_ca_delta_free (struct iv_ca_delta **delta) 6399 { 6400 struct iv_ca_delta *act, *next; 6401 6402 for (act = *delta; act; act = next) 6403 { 6404 next = act->next; 6405 free (act); 6406 } 6407 6408 *delta = NULL; 6409 } 6410 6411 /* Allocates new iv candidates assignment. */ 6412 6413 static struct iv_ca * 6414 iv_ca_new (struct ivopts_data *data) 6415 { 6416 struct iv_ca *nw = XNEW (struct iv_ca); 6417 6418 nw->upto = 0; 6419 nw->bad_groups = 0; 6420 nw->cand_for_group = XCNEWVEC (struct cost_pair *, 6421 data->vgroups.length ()); 6422 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ()); 6423 nw->cands = BITMAP_ALLOC (NULL); 6424 nw->n_cands = 0; 6425 nw->n_regs = 0; 6426 nw->cand_use_cost = no_cost; 6427 nw->cand_cost = 0; 6428 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1); 6429 nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13); 6430 nw->cost = no_cost; 6431 6432 return nw; 6433 } 6434 6435 /* Free memory occupied by the set IVS. */ 6436 6437 static void 6438 iv_ca_free (struct iv_ca **ivs) 6439 { 6440 free ((*ivs)->cand_for_group); 6441 free ((*ivs)->n_cand_uses); 6442 BITMAP_FREE ((*ivs)->cands); 6443 free ((*ivs)->n_invariant_uses); 6444 delete ((*ivs)->used_inv_exprs); 6445 free (*ivs); 6446 *ivs = NULL; 6447 } 6448 6449 /* Dumps IVS to FILE. */ 6450 6451 static void 6452 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) 6453 { 6454 unsigned i; 6455 comp_cost cost = iv_ca_cost (ivs); 6456 6457 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, 6458 cost.complexity); 6459 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n", 6460 ivs->cand_cost, ivs->cand_use_cost.cost, 6461 ivs->cand_use_cost.complexity); 6462 bitmap_print (file, ivs->cands, " candidates: ","\n"); 6463 6464 for (i = 0; i < ivs->upto; i++) 6465 { 6466 struct iv_group *group = data->vgroups[i]; 6467 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group); 6468 if (cp) 6469 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n", 6470 group->id, cp->cand->id, cp->cost.cost, 6471 cp->cost.complexity); 6472 else 6473 fprintf (file, " group:%d --> ??\n", group->id); 6474 } 6475 6476 const char *pref = ""; 6477 fprintf (file, " invariant variables: "); 6478 for (i = 1; i <= data->max_inv_id; i++) 6479 if (ivs->n_invariant_uses[i]) 6480 { 6481 fprintf (file, "%s%d", pref, i); 6482 pref = ", "; 6483 } 6484 6485 pref = ""; 6486 fprintf (file, "\n invariant expressions: "); 6487 for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it 6488 = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it) 6489 { 6490 fprintf (file, "%s%d", pref, (*it).first->id); 6491 pref = ", "; 6492 } 6493 6494 fprintf (file, "\n\n"); 6495 } 6496 6497 /* Try changing candidate in IVS to CAND for each use. Return cost of the 6498 new set, and store differences in DELTA. Number of induction variables 6499 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true 6500 the function will try to find a solution with mimimal iv candidates. */ 6501 6502 static comp_cost 6503 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, 6504 struct iv_cand *cand, struct iv_ca_delta **delta, 6505 unsigned *n_ivs, bool min_ncand) 6506 { 6507 unsigned i; 6508 comp_cost cost; 6509 struct iv_group *group; 6510 struct cost_pair *old_cp, *new_cp; 6511 6512 *delta = NULL; 6513 for (i = 0; i < ivs->upto; i++) 6514 { 6515 group = data->vgroups[i]; 6516 old_cp = iv_ca_cand_for_group (ivs, group); 6517 6518 if (old_cp 6519 && old_cp->cand == cand) 6520 continue; 6521 6522 new_cp = get_group_iv_cost (data, group, cand); 6523 if (!new_cp) 6524 continue; 6525 6526 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp)) 6527 continue; 6528 6529 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp)) 6530 continue; 6531 6532 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta); 6533 } 6534 6535 iv_ca_delta_commit (data, ivs, *delta, true); 6536 cost = iv_ca_cost (ivs); 6537 if (n_ivs) 6538 *n_ivs = iv_ca_n_cands (ivs); 6539 iv_ca_delta_commit (data, ivs, *delta, false); 6540 6541 return cost; 6542 } 6543 6544 /* Try narrowing set IVS by removing CAND. Return the cost of 6545 the new set and store the differences in DELTA. START is 6546 the candidate with which we start narrowing. */ 6547 6548 static comp_cost 6549 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, 6550 struct iv_cand *cand, struct iv_cand *start, 6551 struct iv_ca_delta **delta) 6552 { 6553 unsigned i, ci; 6554 struct iv_group *group; 6555 struct cost_pair *old_cp, *new_cp, *cp; 6556 bitmap_iterator bi; 6557 struct iv_cand *cnd; 6558 comp_cost cost, best_cost, acost; 6559 6560 *delta = NULL; 6561 for (i = 0; i < data->vgroups.length (); i++) 6562 { 6563 group = data->vgroups[i]; 6564 6565 old_cp = iv_ca_cand_for_group (ivs, group); 6566 if (old_cp->cand != cand) 6567 continue; 6568 6569 best_cost = iv_ca_cost (ivs); 6570 /* Start narrowing with START. */ 6571 new_cp = get_group_iv_cost (data, group, start); 6572 6573 if (data->consider_all_candidates) 6574 { 6575 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 6576 { 6577 if (ci == cand->id || (start && ci == start->id)) 6578 continue; 6579 6580 cnd = data->vcands[ci]; 6581 6582 cp = get_group_iv_cost (data, group, cnd); 6583 if (!cp) 6584 continue; 6585 6586 iv_ca_set_cp (data, ivs, group, cp); 6587 acost = iv_ca_cost (ivs); 6588 6589 if (acost < best_cost) 6590 { 6591 best_cost = acost; 6592 new_cp = cp; 6593 } 6594 } 6595 } 6596 else 6597 { 6598 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi) 6599 { 6600 if (ci == cand->id || (start && ci == start->id)) 6601 continue; 6602 6603 cnd = data->vcands[ci]; 6604 6605 cp = get_group_iv_cost (data, group, cnd); 6606 if (!cp) 6607 continue; 6608 6609 iv_ca_set_cp (data, ivs, group, cp); 6610 acost = iv_ca_cost (ivs); 6611 6612 if (acost < best_cost) 6613 { 6614 best_cost = acost; 6615 new_cp = cp; 6616 } 6617 } 6618 } 6619 /* Restore to old cp for use. */ 6620 iv_ca_set_cp (data, ivs, group, old_cp); 6621 6622 if (!new_cp) 6623 { 6624 iv_ca_delta_free (delta); 6625 return infinite_cost; 6626 } 6627 6628 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta); 6629 } 6630 6631 iv_ca_delta_commit (data, ivs, *delta, true); 6632 cost = iv_ca_cost (ivs); 6633 iv_ca_delta_commit (data, ivs, *delta, false); 6634 6635 return cost; 6636 } 6637 6638 /* Try optimizing the set of candidates IVS by removing candidates different 6639 from to EXCEPT_CAND from it. Return cost of the new set, and store 6640 differences in DELTA. */ 6641 6642 static comp_cost 6643 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, 6644 struct iv_cand *except_cand, struct iv_ca_delta **delta) 6645 { 6646 bitmap_iterator bi; 6647 struct iv_ca_delta *act_delta, *best_delta; 6648 unsigned i; 6649 comp_cost best_cost, acost; 6650 struct iv_cand *cand; 6651 6652 best_delta = NULL; 6653 best_cost = iv_ca_cost (ivs); 6654 6655 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6656 { 6657 cand = data->vcands[i]; 6658 6659 if (cand == except_cand) 6660 continue; 6661 6662 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta); 6663 6664 if (acost < best_cost) 6665 { 6666 best_cost = acost; 6667 iv_ca_delta_free (&best_delta); 6668 best_delta = act_delta; 6669 } 6670 else 6671 iv_ca_delta_free (&act_delta); 6672 } 6673 6674 if (!best_delta) 6675 { 6676 *delta = NULL; 6677 return best_cost; 6678 } 6679 6680 /* Recurse to possibly remove other unnecessary ivs. */ 6681 iv_ca_delta_commit (data, ivs, best_delta, true); 6682 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 6683 iv_ca_delta_commit (data, ivs, best_delta, false); 6684 *delta = iv_ca_delta_join (best_delta, *delta); 6685 return best_cost; 6686 } 6687 6688 /* Check if CAND_IDX is a candidate other than OLD_CAND and has 6689 cheaper local cost for GROUP than BEST_CP. Return pointer to 6690 the corresponding cost_pair, otherwise just return BEST_CP. */ 6691 6692 static struct cost_pair* 6693 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group, 6694 unsigned int cand_idx, struct iv_cand *old_cand, 6695 struct cost_pair *best_cp) 6696 { 6697 struct iv_cand *cand; 6698 struct cost_pair *cp; 6699 6700 gcc_assert (old_cand != NULL && best_cp != NULL); 6701 if (cand_idx == old_cand->id) 6702 return best_cp; 6703 6704 cand = data->vcands[cand_idx]; 6705 cp = get_group_iv_cost (data, group, cand); 6706 if (cp != NULL && cheaper_cost_pair (cp, best_cp)) 6707 return cp; 6708 6709 return best_cp; 6710 } 6711 6712 /* Try breaking local optimal fixed-point for IVS by replacing candidates 6713 which are used by more than one iv uses. For each of those candidates, 6714 this function tries to represent iv uses under that candidate using 6715 other ones with lower local cost, then tries to prune the new set. 6716 If the new set has lower cost, It returns the new cost after recording 6717 candidate replacement in list DELTA. */ 6718 6719 static comp_cost 6720 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs, 6721 struct iv_ca_delta **delta) 6722 { 6723 bitmap_iterator bi, bj; 6724 unsigned int i, j, k; 6725 struct iv_cand *cand; 6726 comp_cost orig_cost, acost; 6727 struct iv_ca_delta *act_delta, *tmp_delta; 6728 struct cost_pair *old_cp, *best_cp = NULL; 6729 6730 *delta = NULL; 6731 orig_cost = iv_ca_cost (ivs); 6732 6733 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 6734 { 6735 if (ivs->n_cand_uses[i] == 1 6736 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND) 6737 continue; 6738 6739 cand = data->vcands[i]; 6740 6741 act_delta = NULL; 6742 /* Represent uses under current candidate using other ones with 6743 lower local cost. */ 6744 for (j = 0; j < ivs->upto; j++) 6745 { 6746 struct iv_group *group = data->vgroups[j]; 6747 old_cp = iv_ca_cand_for_group (ivs, group); 6748 6749 if (old_cp->cand != cand) 6750 continue; 6751 6752 best_cp = old_cp; 6753 if (data->consider_all_candidates) 6754 for (k = 0; k < data->vcands.length (); k++) 6755 best_cp = cheaper_cost_with_cand (data, group, k, 6756 old_cp->cand, best_cp); 6757 else 6758 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj) 6759 best_cp = cheaper_cost_with_cand (data, group, k, 6760 old_cp->cand, best_cp); 6761 6762 if (best_cp == old_cp) 6763 continue; 6764 6765 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta); 6766 } 6767 /* No need for further prune. */ 6768 if (!act_delta) 6769 continue; 6770 6771 /* Prune the new candidate set. */ 6772 iv_ca_delta_commit (data, ivs, act_delta, true); 6773 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta); 6774 iv_ca_delta_commit (data, ivs, act_delta, false); 6775 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6776 6777 if (acost < orig_cost) 6778 { 6779 *delta = act_delta; 6780 return acost; 6781 } 6782 else 6783 iv_ca_delta_free (&act_delta); 6784 } 6785 6786 return orig_cost; 6787 } 6788 6789 /* Tries to extend the sets IVS in the best possible way in order to 6790 express the GROUP. If ORIGINALP is true, prefer candidates from 6791 the original set of IVs, otherwise favor important candidates not 6792 based on any memory object. */ 6793 6794 static bool 6795 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, 6796 struct iv_group *group, bool originalp) 6797 { 6798 comp_cost best_cost, act_cost; 6799 unsigned i; 6800 bitmap_iterator bi; 6801 struct iv_cand *cand; 6802 struct iv_ca_delta *best_delta = NULL, *act_delta; 6803 struct cost_pair *cp; 6804 6805 iv_ca_add_group (data, ivs, group); 6806 best_cost = iv_ca_cost (ivs); 6807 cp = iv_ca_cand_for_group (ivs, group); 6808 if (cp) 6809 { 6810 best_delta = iv_ca_delta_add (group, NULL, cp, NULL); 6811 iv_ca_set_no_cp (data, ivs, group); 6812 } 6813 6814 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise 6815 first try important candidates not based on any memory object. Only if 6816 this fails, try the specific ones. Rationale -- in loops with many 6817 variables the best choice often is to use just one generic biv. If we 6818 added here many ivs specific to the uses, the optimization algorithm later 6819 would be likely to get stuck in a local minimum, thus causing us to create 6820 too many ivs. The approach from few ivs to more seems more likely to be 6821 successful -- starting from few ivs, replacing an expensive use by a 6822 specific iv should always be a win. */ 6823 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi) 6824 { 6825 cand = data->vcands[i]; 6826 6827 if (originalp && cand->pos !=IP_ORIGINAL) 6828 continue; 6829 6830 if (!originalp && cand->iv->base_object != NULL_TREE) 6831 continue; 6832 6833 if (iv_ca_cand_used_p (ivs, cand)) 6834 continue; 6835 6836 cp = get_group_iv_cost (data, group, cand); 6837 if (!cp) 6838 continue; 6839 6840 iv_ca_set_cp (data, ivs, group, cp); 6841 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, 6842 true); 6843 iv_ca_set_no_cp (data, ivs, group); 6844 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta); 6845 6846 if (act_cost < best_cost) 6847 { 6848 best_cost = act_cost; 6849 6850 iv_ca_delta_free (&best_delta); 6851 best_delta = act_delta; 6852 } 6853 else 6854 iv_ca_delta_free (&act_delta); 6855 } 6856 6857 if (best_cost.infinite_cost_p ()) 6858 { 6859 for (i = 0; i < group->n_map_members; i++) 6860 { 6861 cp = group->cost_map + i; 6862 cand = cp->cand; 6863 if (!cand) 6864 continue; 6865 6866 /* Already tried this. */ 6867 if (cand->important) 6868 { 6869 if (originalp && cand->pos == IP_ORIGINAL) 6870 continue; 6871 if (!originalp && cand->iv->base_object == NULL_TREE) 6872 continue; 6873 } 6874 6875 if (iv_ca_cand_used_p (ivs, cand)) 6876 continue; 6877 6878 act_delta = NULL; 6879 iv_ca_set_cp (data, ivs, group, cp); 6880 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); 6881 iv_ca_set_no_cp (data, ivs, group); 6882 act_delta = iv_ca_delta_add (group, 6883 iv_ca_cand_for_group (ivs, group), 6884 cp, act_delta); 6885 6886 if (act_cost < best_cost) 6887 { 6888 best_cost = act_cost; 6889 6890 if (best_delta) 6891 iv_ca_delta_free (&best_delta); 6892 best_delta = act_delta; 6893 } 6894 else 6895 iv_ca_delta_free (&act_delta); 6896 } 6897 } 6898 6899 iv_ca_delta_commit (data, ivs, best_delta, true); 6900 iv_ca_delta_free (&best_delta); 6901 6902 return !best_cost.infinite_cost_p (); 6903 } 6904 6905 /* Finds an initial assignment of candidates to uses. */ 6906 6907 static struct iv_ca * 6908 get_initial_solution (struct ivopts_data *data, bool originalp) 6909 { 6910 unsigned i; 6911 struct iv_ca *ivs = iv_ca_new (data); 6912 6913 for (i = 0; i < data->vgroups.length (); i++) 6914 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp)) 6915 { 6916 iv_ca_free (&ivs); 6917 return NULL; 6918 } 6919 6920 return ivs; 6921 } 6922 6923 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P 6924 points to a bool variable, this function tries to break local 6925 optimal fixed-point by replacing candidates in IVS if it's true. */ 6926 6927 static bool 6928 try_improve_iv_set (struct ivopts_data *data, 6929 struct iv_ca *ivs, bool *try_replace_p) 6930 { 6931 unsigned i, n_ivs; 6932 comp_cost acost, best_cost = iv_ca_cost (ivs); 6933 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 6934 struct iv_cand *cand; 6935 6936 /* Try extending the set of induction variables by one. */ 6937 for (i = 0; i < data->vcands.length (); i++) 6938 { 6939 cand = data->vcands[i]; 6940 6941 if (iv_ca_cand_used_p (ivs, cand)) 6942 continue; 6943 6944 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); 6945 if (!act_delta) 6946 continue; 6947 6948 /* If we successfully added the candidate and the set is small enough, 6949 try optimizing it by removing other candidates. */ 6950 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 6951 { 6952 iv_ca_delta_commit (data, ivs, act_delta, true); 6953 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 6954 iv_ca_delta_commit (data, ivs, act_delta, false); 6955 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6956 } 6957 6958 if (acost < best_cost) 6959 { 6960 best_cost = acost; 6961 iv_ca_delta_free (&best_delta); 6962 best_delta = act_delta; 6963 } 6964 else 6965 iv_ca_delta_free (&act_delta); 6966 } 6967 6968 if (!best_delta) 6969 { 6970 /* Try removing the candidates from the set instead. */ 6971 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 6972 6973 if (!best_delta && *try_replace_p) 6974 { 6975 *try_replace_p = false; 6976 /* So far candidate selecting algorithm tends to choose fewer IVs 6977 so that it can handle cases in which loops have many variables 6978 but the best choice is often to use only one general biv. One 6979 weakness is it can't handle opposite cases, in which different 6980 candidates should be chosen with respect to each use. To solve 6981 the problem, we replace candidates in a manner described by the 6982 comments of iv_ca_replace, thus give general algorithm a chance 6983 to break local optimal fixed-point in these cases. */ 6984 best_cost = iv_ca_replace (data, ivs, &best_delta); 6985 } 6986 6987 if (!best_delta) 6988 return false; 6989 } 6990 6991 iv_ca_delta_commit (data, ivs, best_delta, true); 6992 gcc_assert (best_cost == iv_ca_cost (ivs)); 6993 iv_ca_delta_free (&best_delta); 6994 return true; 6995 } 6996 6997 /* Attempts to find the optimal set of induction variables. We do simple 6998 greedy heuristic -- we try to replace at most one candidate in the selected 6999 solution and remove the unused ivs while this improves the cost. */ 7000 7001 static struct iv_ca * 7002 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) 7003 { 7004 struct iv_ca *set; 7005 bool try_replace_p = true; 7006 7007 /* Get the initial solution. */ 7008 set = get_initial_solution (data, originalp); 7009 if (!set) 7010 { 7011 if (dump_file && (dump_flags & TDF_DETAILS)) 7012 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 7013 return NULL; 7014 } 7015 7016 if (dump_file && (dump_flags & TDF_DETAILS)) 7017 { 7018 fprintf (dump_file, "Initial set of candidates:\n"); 7019 iv_ca_dump (data, dump_file, set); 7020 } 7021 7022 while (try_improve_iv_set (data, set, &try_replace_p)) 7023 { 7024 if (dump_file && (dump_flags & TDF_DETAILS)) 7025 { 7026 fprintf (dump_file, "Improved to:\n"); 7027 iv_ca_dump (data, dump_file, set); 7028 } 7029 } 7030 7031 return set; 7032 } 7033 7034 static struct iv_ca * 7035 find_optimal_iv_set (struct ivopts_data *data) 7036 { 7037 unsigned i; 7038 comp_cost cost, origcost; 7039 struct iv_ca *set, *origset; 7040 7041 /* Determine the cost based on a strategy that starts with original IVs, 7042 and try again using a strategy that prefers candidates not based 7043 on any IVs. */ 7044 origset = find_optimal_iv_set_1 (data, true); 7045 set = find_optimal_iv_set_1 (data, false); 7046 7047 if (!origset && !set) 7048 return NULL; 7049 7050 origcost = origset ? iv_ca_cost (origset) : infinite_cost; 7051 cost = set ? iv_ca_cost (set) : infinite_cost; 7052 7053 if (dump_file && (dump_flags & TDF_DETAILS)) 7054 { 7055 fprintf (dump_file, "Original cost %d (complexity %d)\n\n", 7056 origcost.cost, origcost.complexity); 7057 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", 7058 cost.cost, cost.complexity); 7059 } 7060 7061 /* Choose the one with the best cost. */ 7062 if (origcost <= cost) 7063 { 7064 if (set) 7065 iv_ca_free (&set); 7066 set = origset; 7067 } 7068 else if (origset) 7069 iv_ca_free (&origset); 7070 7071 for (i = 0; i < data->vgroups.length (); i++) 7072 { 7073 struct iv_group *group = data->vgroups[i]; 7074 group->selected = iv_ca_cand_for_group (set, group)->cand; 7075 } 7076 7077 return set; 7078 } 7079 7080 /* Creates a new induction variable corresponding to CAND. */ 7081 7082 static void 7083 create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 7084 { 7085 gimple_stmt_iterator incr_pos; 7086 tree base; 7087 struct iv_use *use; 7088 struct iv_group *group; 7089 bool after = false; 7090 7091 if (!cand->iv) 7092 return; 7093 7094 switch (cand->pos) 7095 { 7096 case IP_NORMAL: 7097 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop)); 7098 break; 7099 7100 case IP_END: 7101 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 7102 after = true; 7103 break; 7104 7105 case IP_AFTER_USE: 7106 after = true; 7107 /* fall through */ 7108 case IP_BEFORE_USE: 7109 incr_pos = gsi_for_stmt (cand->incremented_at); 7110 break; 7111 7112 case IP_ORIGINAL: 7113 /* Mark that the iv is preserved. */ 7114 name_info (data, cand->var_before)->preserve_biv = true; 7115 name_info (data, cand->var_after)->preserve_biv = true; 7116 7117 /* Rewrite the increment so that it uses var_before directly. */ 7118 use = find_interesting_uses_op (data, cand->var_after); 7119 group = data->vgroups[use->group_id]; 7120 group->selected = cand; 7121 return; 7122 } 7123 7124 gimple_add_tmp_var (cand->var_before); 7125 7126 base = unshare_expr (cand->iv->base); 7127 7128 create_iv (base, unshare_expr (cand->iv->step), 7129 cand->var_before, data->current_loop, 7130 &incr_pos, after, &cand->var_before, &cand->var_after); 7131 } 7132 7133 /* Creates new induction variables described in SET. */ 7134 7135 static void 7136 create_new_ivs (struct ivopts_data *data, struct iv_ca *set) 7137 { 7138 unsigned i; 7139 struct iv_cand *cand; 7140 bitmap_iterator bi; 7141 7142 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 7143 { 7144 cand = data->vcands[i]; 7145 create_new_iv (data, cand); 7146 } 7147 7148 if (dump_file && (dump_flags & TDF_DETAILS)) 7149 { 7150 fprintf (dump_file, "Selected IV set for loop %d", 7151 data->current_loop->num); 7152 if (data->loop_loc != UNKNOWN_LOCATION) 7153 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 7154 LOCATION_LINE (data->loop_loc)); 7155 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters", 7156 avg_loop_niter (data->current_loop)); 7157 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions", 7158 (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ()); 7159 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands)); 7160 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 7161 { 7162 cand = data->vcands[i]; 7163 dump_cand (dump_file, cand); 7164 } 7165 fprintf (dump_file, "\n"); 7166 } 7167 } 7168 7169 /* Rewrites USE (definition of iv used in a nonlinear expression) 7170 using candidate CAND. */ 7171 7172 static void 7173 rewrite_use_nonlinear_expr (struct ivopts_data *data, 7174 struct iv_use *use, struct iv_cand *cand) 7175 { 7176 tree comp; 7177 tree tgt; 7178 gassign *ass; 7179 gimple_stmt_iterator bsi; 7180 7181 /* An important special case -- if we are asked to express value of 7182 the original iv by itself, just exit; there is no need to 7183 introduce a new computation (that might also need casting the 7184 variable to unsigned and back). */ 7185 if (cand->pos == IP_ORIGINAL 7186 && cand->incremented_at == use->stmt) 7187 { 7188 tree op = NULL_TREE; 7189 enum tree_code stmt_code; 7190 7191 gcc_assert (is_gimple_assign (use->stmt)); 7192 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 7193 7194 /* Check whether we may leave the computation unchanged. 7195 This is the case only if it does not rely on other 7196 computations in the loop -- otherwise, the computation 7197 we rely upon may be removed in remove_unused_ivs, 7198 thus leading to ICE. */ 7199 stmt_code = gimple_assign_rhs_code (use->stmt); 7200 if (stmt_code == PLUS_EXPR 7201 || stmt_code == MINUS_EXPR 7202 || stmt_code == POINTER_PLUS_EXPR) 7203 { 7204 if (gimple_assign_rhs1 (use->stmt) == cand->var_before) 7205 op = gimple_assign_rhs2 (use->stmt); 7206 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before) 7207 op = gimple_assign_rhs1 (use->stmt); 7208 } 7209 7210 if (op != NULL_TREE) 7211 { 7212 if (expr_invariant_in_loop_p (data->current_loop, op)) 7213 return; 7214 if (TREE_CODE (op) == SSA_NAME) 7215 { 7216 struct iv *iv = get_iv (data, op); 7217 if (iv != NULL && integer_zerop (iv->step)) 7218 return; 7219 } 7220 } 7221 } 7222 7223 comp = get_computation (data->current_loop, use, cand); 7224 gcc_assert (comp != NULL_TREE); 7225 7226 switch (gimple_code (use->stmt)) 7227 { 7228 case GIMPLE_PHI: 7229 tgt = PHI_RESULT (use->stmt); 7230 7231 /* If we should keep the biv, do not replace it. */ 7232 if (name_info (data, tgt)->preserve_biv) 7233 return; 7234 7235 bsi = gsi_after_labels (gimple_bb (use->stmt)); 7236 break; 7237 7238 case GIMPLE_ASSIGN: 7239 tgt = gimple_assign_lhs (use->stmt); 7240 bsi = gsi_for_stmt (use->stmt); 7241 break; 7242 7243 default: 7244 gcc_unreachable (); 7245 } 7246 7247 if (!valid_gimple_rhs_p (comp) 7248 || (gimple_code (use->stmt) != GIMPLE_PHI 7249 /* We can't allow re-allocating the stmt as it might be pointed 7250 to still. */ 7251 && (get_gimple_rhs_num_ops (TREE_CODE (comp)) 7252 >= gimple_num_ops (gsi_stmt (bsi))))) 7253 { 7254 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE, 7255 true, GSI_SAME_STMT); 7256 if (POINTER_TYPE_P (TREE_TYPE (tgt))) 7257 { 7258 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt)); 7259 /* As this isn't a plain copy we have to reset alignment 7260 information. */ 7261 if (SSA_NAME_PTR_INFO (comp)) 7262 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp)); 7263 } 7264 } 7265 7266 if (gimple_code (use->stmt) == GIMPLE_PHI) 7267 { 7268 ass = gimple_build_assign (tgt, comp); 7269 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 7270 7271 bsi = gsi_for_stmt (use->stmt); 7272 remove_phi_node (&bsi, false); 7273 } 7274 else 7275 { 7276 gimple_assign_set_rhs_from_tree (&bsi, comp); 7277 use->stmt = gsi_stmt (bsi); 7278 } 7279 } 7280 7281 /* Performs a peephole optimization to reorder the iv update statement with 7282 a mem ref to enable instruction combining in later phases. The mem ref uses 7283 the iv value before the update, so the reordering transformation requires 7284 adjustment of the offset. CAND is the selected IV_CAND. 7285 7286 Example: 7287 7288 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset 7289 iv2 = iv1 + 1; 7290 7291 if (t < val) (1) 7292 goto L; 7293 goto Head; 7294 7295 7296 directly propagating t over to (1) will introduce overlapping live range 7297 thus increase register pressure. This peephole transform it into: 7298 7299 7300 iv2 = iv1 + 1; 7301 t = MEM_REF (base, iv2, 8, 8); 7302 if (t < val) 7303 goto L; 7304 goto Head; 7305 */ 7306 7307 static void 7308 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use) 7309 { 7310 tree var_after; 7311 gimple *iv_update, *stmt; 7312 basic_block bb; 7313 gimple_stmt_iterator gsi, gsi_iv; 7314 7315 if (cand->pos != IP_NORMAL) 7316 return; 7317 7318 var_after = cand->var_after; 7319 iv_update = SSA_NAME_DEF_STMT (var_after); 7320 7321 bb = gimple_bb (iv_update); 7322 gsi = gsi_last_nondebug_bb (bb); 7323 stmt = gsi_stmt (gsi); 7324 7325 /* Only handle conditional statement for now. */ 7326 if (gimple_code (stmt) != GIMPLE_COND) 7327 return; 7328 7329 gsi_prev_nondebug (&gsi); 7330 stmt = gsi_stmt (gsi); 7331 if (stmt != iv_update) 7332 return; 7333 7334 gsi_prev_nondebug (&gsi); 7335 if (gsi_end_p (gsi)) 7336 return; 7337 7338 stmt = gsi_stmt (gsi); 7339 if (gimple_code (stmt) != GIMPLE_ASSIGN) 7340 return; 7341 7342 if (stmt != use->stmt) 7343 return; 7344 7345 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 7346 return; 7347 7348 if (dump_file && (dump_flags & TDF_DETAILS)) 7349 { 7350 fprintf (dump_file, "Reordering \n"); 7351 print_gimple_stmt (dump_file, iv_update, 0, 0); 7352 print_gimple_stmt (dump_file, use->stmt, 0, 0); 7353 fprintf (dump_file, "\n"); 7354 } 7355 7356 gsi = gsi_for_stmt (use->stmt); 7357 gsi_iv = gsi_for_stmt (iv_update); 7358 gsi_move_before (&gsi_iv, &gsi); 7359 7360 cand->pos = IP_BEFORE_USE; 7361 cand->incremented_at = use->stmt; 7362 } 7363 7364 /* Rewrites USE (address that is an iv) using candidate CAND. */ 7365 7366 static void 7367 rewrite_use_address (struct ivopts_data *data, 7368 struct iv_use *use, struct iv_cand *cand) 7369 { 7370 aff_tree aff; 7371 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 7372 tree base_hint = NULL_TREE; 7373 tree ref, iv; 7374 bool ok; 7375 7376 adjust_iv_update_pos (cand, use); 7377 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); 7378 gcc_assert (ok); 7379 unshare_aff_combination (&aff); 7380 7381 /* To avoid undefined overflow problems, all IV candidates use unsigned 7382 integer types. The drawback is that this makes it impossible for 7383 create_mem_ref to distinguish an IV that is based on a memory object 7384 from one that represents simply an offset. 7385 7386 To work around this problem, we pass a hint to create_mem_ref that 7387 indicates which variable (if any) in aff is an IV based on a memory 7388 object. Note that we only consider the candidate. If this is not 7389 based on an object, the base of the reference is in some subexpression 7390 of the use -- but these will use pointer types, so they are recognized 7391 by the create_mem_ref heuristics anyway. */ 7392 if (cand->iv->base_object) 7393 base_hint = var_at_stmt (data->current_loop, cand, use->stmt); 7394 7395 iv = var_at_stmt (data->current_loop, cand, use->stmt); 7396 tree type = TREE_TYPE (*use->op_p); 7397 unsigned int align = get_object_alignment (*use->op_p); 7398 if (align != TYPE_ALIGN (type)) 7399 type = build_aligned_type (type, align); 7400 ref = create_mem_ref (&bsi, type, &aff, 7401 reference_alias_ptr_type (*use->op_p), 7402 iv, base_hint, data->speed); 7403 copy_ref_info (ref, *use->op_p); 7404 *use->op_p = ref; 7405 } 7406 7407 /* Rewrites USE (the condition such that one of the arguments is an iv) using 7408 candidate CAND. */ 7409 7410 static void 7411 rewrite_use_compare (struct ivopts_data *data, 7412 struct iv_use *use, struct iv_cand *cand) 7413 { 7414 tree comp, *var_p, op, bound; 7415 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 7416 enum tree_code compare; 7417 struct iv_group *group = data->vgroups[use->group_id]; 7418 struct cost_pair *cp = get_group_iv_cost (data, group, cand); 7419 bool ok; 7420 7421 bound = cp->value; 7422 if (bound) 7423 { 7424 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 7425 tree var_type = TREE_TYPE (var); 7426 gimple_seq stmts; 7427 7428 if (dump_file && (dump_flags & TDF_DETAILS)) 7429 { 7430 fprintf (dump_file, "Replacing exit test: "); 7431 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); 7432 } 7433 compare = cp->comp; 7434 bound = unshare_expr (fold_convert (var_type, bound)); 7435 op = force_gimple_operand (bound, &stmts, true, NULL_TREE); 7436 if (stmts) 7437 gsi_insert_seq_on_edge_immediate ( 7438 loop_preheader_edge (data->current_loop), 7439 stmts); 7440 7441 gcond *cond_stmt = as_a <gcond *> (use->stmt); 7442 gimple_cond_set_lhs (cond_stmt, var); 7443 gimple_cond_set_code (cond_stmt, compare); 7444 gimple_cond_set_rhs (cond_stmt, op); 7445 return; 7446 } 7447 7448 /* The induction variable elimination failed; just express the original 7449 giv. */ 7450 comp = get_computation (data->current_loop, use, cand); 7451 gcc_assert (comp != NULL_TREE); 7452 7453 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL); 7454 gcc_assert (ok); 7455 7456 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p), 7457 true, GSI_SAME_STMT); 7458 } 7459 7460 /* Rewrite the groups using the selected induction variables. */ 7461 7462 static void 7463 rewrite_groups (struct ivopts_data *data) 7464 { 7465 unsigned i, j; 7466 7467 for (i = 0; i < data->vgroups.length (); i++) 7468 { 7469 struct iv_group *group = data->vgroups[i]; 7470 struct iv_cand *cand = group->selected; 7471 7472 gcc_assert (cand); 7473 7474 if (group->type == USE_NONLINEAR_EXPR) 7475 { 7476 for (j = 0; j < group->vuses.length (); j++) 7477 { 7478 rewrite_use_nonlinear_expr (data, group->vuses[j], cand); 7479 update_stmt (group->vuses[j]->stmt); 7480 } 7481 } 7482 else if (group->type == USE_ADDRESS) 7483 { 7484 for (j = 0; j < group->vuses.length (); j++) 7485 { 7486 rewrite_use_address (data, group->vuses[j], cand); 7487 update_stmt (group->vuses[j]->stmt); 7488 } 7489 } 7490 else 7491 { 7492 gcc_assert (group->type == USE_COMPARE); 7493 7494 for (j = 0; j < group->vuses.length (); j++) 7495 { 7496 rewrite_use_compare (data, group->vuses[j], cand); 7497 update_stmt (group->vuses[j]->stmt); 7498 } 7499 } 7500 } 7501 } 7502 7503 /* Removes the ivs that are not used after rewriting. */ 7504 7505 static void 7506 remove_unused_ivs (struct ivopts_data *data) 7507 { 7508 unsigned j; 7509 bitmap_iterator bi; 7510 bitmap toremove = BITMAP_ALLOC (NULL); 7511 7512 /* Figure out an order in which to release SSA DEFs so that we don't 7513 release something that we'd have to propagate into a debug stmt 7514 afterwards. */ 7515 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 7516 { 7517 struct version_info *info; 7518 7519 info = ver_info (data, j); 7520 if (info->iv 7521 && !integer_zerop (info->iv->step) 7522 && !info->inv_id 7523 && !info->iv->nonlin_use 7524 && !info->preserve_biv) 7525 { 7526 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name)); 7527 7528 tree def = info->iv->ssa_name; 7529 7530 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def)) 7531 { 7532 imm_use_iterator imm_iter; 7533 use_operand_p use_p; 7534 gimple *stmt; 7535 int count = 0; 7536 7537 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 7538 { 7539 if (!gimple_debug_bind_p (stmt)) 7540 continue; 7541 7542 /* We just want to determine whether to do nothing 7543 (count == 0), to substitute the computed 7544 expression into a single use of the SSA DEF by 7545 itself (count == 1), or to use a debug temp 7546 because the SSA DEF is used multiple times or as 7547 part of a larger expression (count > 1). */ 7548 count++; 7549 if (gimple_debug_bind_get_value (stmt) != def) 7550 count++; 7551 7552 if (count > 1) 7553 BREAK_FROM_IMM_USE_STMT (imm_iter); 7554 } 7555 7556 if (!count) 7557 continue; 7558 7559 struct iv_use dummy_use; 7560 struct iv_cand *best_cand = NULL, *cand; 7561 unsigned i, best_pref = 0, cand_pref; 7562 7563 memset (&dummy_use, 0, sizeof (dummy_use)); 7564 dummy_use.iv = info->iv; 7565 for (i = 0; i < data->vgroups.length () && i < 64; i++) 7566 { 7567 cand = data->vgroups[i]->selected; 7568 if (cand == best_cand) 7569 continue; 7570 cand_pref = operand_equal_p (cand->iv->step, 7571 info->iv->step, 0) 7572 ? 4 : 0; 7573 cand_pref 7574 += TYPE_MODE (TREE_TYPE (cand->iv->base)) 7575 == TYPE_MODE (TREE_TYPE (info->iv->base)) 7576 ? 2 : 0; 7577 cand_pref 7578 += TREE_CODE (cand->iv->base) == INTEGER_CST 7579 ? 1 : 0; 7580 if (best_cand == NULL || best_pref < cand_pref) 7581 { 7582 best_cand = cand; 7583 best_pref = cand_pref; 7584 } 7585 } 7586 7587 if (!best_cand) 7588 continue; 7589 7590 tree comp = get_computation_at (data->current_loop, 7591 &dummy_use, best_cand, 7592 SSA_NAME_DEF_STMT (def)); 7593 if (!comp) 7594 continue; 7595 7596 if (count > 1) 7597 { 7598 tree vexpr = make_node (DEBUG_EXPR_DECL); 7599 DECL_ARTIFICIAL (vexpr) = 1; 7600 TREE_TYPE (vexpr) = TREE_TYPE (comp); 7601 if (SSA_NAME_VAR (def)) 7602 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def))); 7603 else 7604 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr))); 7605 gdebug *def_temp 7606 = gimple_build_debug_bind (vexpr, comp, NULL); 7607 gimple_stmt_iterator gsi; 7608 7609 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI) 7610 gsi = gsi_after_labels (gimple_bb 7611 (SSA_NAME_DEF_STMT (def))); 7612 else 7613 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def)); 7614 7615 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT); 7616 comp = vexpr; 7617 } 7618 7619 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 7620 { 7621 if (!gimple_debug_bind_p (stmt)) 7622 continue; 7623 7624 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 7625 SET_USE (use_p, comp); 7626 7627 update_stmt (stmt); 7628 } 7629 } 7630 } 7631 } 7632 7633 release_defs_bitset (toremove); 7634 7635 BITMAP_FREE (toremove); 7636 } 7637 7638 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback 7639 for hash_map::traverse. */ 7640 7641 bool 7642 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *) 7643 { 7644 free (value); 7645 return true; 7646 } 7647 7648 /* Frees data allocated by the optimization of a single loop. */ 7649 7650 static void 7651 free_loop_data (struct ivopts_data *data) 7652 { 7653 unsigned i, j; 7654 bitmap_iterator bi; 7655 tree obj; 7656 7657 if (data->niters) 7658 { 7659 data->niters->traverse<void *, free_tree_niter_desc> (NULL); 7660 delete data->niters; 7661 data->niters = NULL; 7662 } 7663 7664 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 7665 { 7666 struct version_info *info; 7667 7668 info = ver_info (data, i); 7669 info->iv = NULL; 7670 info->has_nonlin_use = false; 7671 info->preserve_biv = false; 7672 info->inv_id = 0; 7673 } 7674 bitmap_clear (data->relevant); 7675 bitmap_clear (data->important_candidates); 7676 7677 for (i = 0; i < data->vgroups.length (); i++) 7678 { 7679 struct iv_group *group = data->vgroups[i]; 7680 7681 for (j = 0; j < group->vuses.length (); j++) 7682 free (group->vuses[j]); 7683 group->vuses.release (); 7684 7685 BITMAP_FREE (group->related_cands); 7686 for (j = 0; j < group->n_map_members; j++) 7687 if (group->cost_map[j].depends_on) 7688 BITMAP_FREE (group->cost_map[j].depends_on); 7689 7690 free (group->cost_map); 7691 free (group); 7692 } 7693 data->vgroups.truncate (0); 7694 7695 for (i = 0; i < data->vcands.length (); i++) 7696 { 7697 struct iv_cand *cand = data->vcands[i]; 7698 7699 if (cand->depends_on) 7700 BITMAP_FREE (cand->depends_on); 7701 free (cand); 7702 } 7703 data->vcands.truncate (0); 7704 7705 if (data->version_info_size < num_ssa_names) 7706 { 7707 data->version_info_size = 2 * num_ssa_names; 7708 free (data->version_info); 7709 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 7710 } 7711 7712 data->max_inv_id = 0; 7713 7714 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj) 7715 SET_DECL_RTL (obj, NULL_RTX); 7716 7717 decl_rtl_to_reset.truncate (0); 7718 7719 data->inv_expr_tab->empty (); 7720 data->max_inv_expr_id = 0; 7721 7722 data->iv_common_cand_tab->empty (); 7723 data->iv_common_cands.truncate (0); 7724 } 7725 7726 /* Finalizes data structures used by the iv optimization pass. LOOPS is the 7727 loop tree. */ 7728 7729 static void 7730 tree_ssa_iv_optimize_finalize (struct ivopts_data *data) 7731 { 7732 free_loop_data (data); 7733 free (data->version_info); 7734 BITMAP_FREE (data->relevant); 7735 BITMAP_FREE (data->important_candidates); 7736 7737 decl_rtl_to_reset.release (); 7738 data->vgroups.release (); 7739 data->vcands.release (); 7740 delete data->inv_expr_tab; 7741 data->inv_expr_tab = NULL; 7742 free_affine_expand_cache (&data->name_expansion_cache); 7743 if (data->base_object_map) 7744 delete data->base_object_map; 7745 delete data->iv_common_cand_tab; 7746 data->iv_common_cand_tab = NULL; 7747 data->iv_common_cands.release (); 7748 obstack_free (&data->iv_obstack, NULL); 7749 } 7750 7751 /* Returns true if the loop body BODY includes any function calls. */ 7752 7753 static bool 7754 loop_body_includes_call (basic_block *body, unsigned num_nodes) 7755 { 7756 gimple_stmt_iterator gsi; 7757 unsigned i; 7758 7759 for (i = 0; i < num_nodes; i++) 7760 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 7761 { 7762 gimple *stmt = gsi_stmt (gsi); 7763 if (is_gimple_call (stmt) 7764 && !gimple_call_internal_p (stmt) 7765 && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) 7766 return true; 7767 } 7768 return false; 7769 } 7770 7771 /* Optimizes the LOOP. Returns true if anything changed. */ 7772 7773 static bool 7774 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) 7775 { 7776 bool changed = false; 7777 struct iv_ca *iv_ca; 7778 edge exit = single_dom_exit (loop); 7779 basic_block *body; 7780 7781 gcc_assert (!data->niters); 7782 data->current_loop = loop; 7783 data->loop_loc = find_loop_location (loop); 7784 data->speed = optimize_loop_for_speed_p (loop); 7785 7786 if (dump_file && (dump_flags & TDF_DETAILS)) 7787 { 7788 fprintf (dump_file, "Processing loop %d", loop->num); 7789 if (data->loop_loc != UNKNOWN_LOCATION) 7790 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 7791 LOCATION_LINE (data->loop_loc)); 7792 fprintf (dump_file, "\n"); 7793 7794 if (exit) 7795 { 7796 fprintf (dump_file, " single exit %d -> %d, exit condition ", 7797 exit->src->index, exit->dest->index); 7798 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM); 7799 fprintf (dump_file, "\n"); 7800 } 7801 7802 fprintf (dump_file, "\n"); 7803 } 7804 7805 body = get_loop_body (loop); 7806 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); 7807 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); 7808 free (body); 7809 7810 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit); 7811 7812 /* For each ssa name determines whether it behaves as an induction variable 7813 in some loop. */ 7814 if (!find_induction_variables (data)) 7815 goto finish; 7816 7817 /* Finds interesting uses (item 1). */ 7818 find_interesting_uses (data); 7819 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS) 7820 goto finish; 7821 7822 /* Finds candidates for the induction variables (item 2). */ 7823 find_iv_candidates (data); 7824 7825 /* Calculates the costs (item 3, part 1). */ 7826 determine_iv_costs (data); 7827 determine_group_iv_costs (data); 7828 determine_set_costs (data); 7829 7830 /* Find the optimal set of induction variables (item 3, part 2). */ 7831 iv_ca = find_optimal_iv_set (data); 7832 if (!iv_ca) 7833 goto finish; 7834 changed = true; 7835 7836 /* Create the new induction variables (item 4, part 1). */ 7837 create_new_ivs (data, iv_ca); 7838 iv_ca_free (&iv_ca); 7839 7840 /* Rewrite the uses (item 4, part 2). */ 7841 rewrite_groups (data); 7842 7843 /* Remove the ivs that are unused after rewriting. */ 7844 remove_unused_ivs (data); 7845 7846 /* We have changed the structure of induction variables; it might happen 7847 that definitions in the scev database refer to some of them that were 7848 eliminated. */ 7849 scev_reset (); 7850 7851 finish: 7852 free_loop_data (data); 7853 7854 return changed; 7855 } 7856 7857 /* Main entry point. Optimizes induction variables in loops. */ 7858 7859 void 7860 tree_ssa_iv_optimize (void) 7861 { 7862 struct loop *loop; 7863 struct ivopts_data data; 7864 7865 tree_ssa_iv_optimize_init (&data); 7866 7867 /* Optimize the loops starting with the innermost ones. */ 7868 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 7869 { 7870 if (dump_file && (dump_flags & TDF_DETAILS)) 7871 flow_loop_dump (loop, dump_file, NULL, 1); 7872 7873 tree_ssa_iv_optimize_loop (&data, loop); 7874 } 7875 7876 tree_ssa_iv_optimize_finalize (&data); 7877 } 7878