1 /* Induction variable optimizations. 2 Copyright (C) 2003-2015 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* This pass tries to find the optimal set of induction variables for the loop. 21 It optimizes just the basic linear induction variables (although adding 22 support for other types should not be too hard). It includes the 23 optimizations commonly known as strength reduction, induction variable 24 coalescing and induction variable elimination. It does it in the 25 following steps: 26 27 1) The interesting uses of induction variables are found. This includes 28 29 -- uses of induction variables in non-linear expressions 30 -- addresses of arrays 31 -- comparisons of induction variables 32 33 2) Candidates for the induction variables are found. This includes 34 35 -- old induction variables 36 -- the variables defined by expressions derived from the "interesting 37 uses" above 38 39 3) The optimal (w.r. to a cost function) set of variables is chosen. The 40 cost function assigns a cost to sets of induction variables and consists 41 of three parts: 42 43 -- The use costs. Each of the interesting uses chooses the best induction 44 variable in the set and adds its cost to the sum. The cost reflects 45 the time spent on modifying the induction variables value to be usable 46 for the given purpose (adding base and offset for arrays, etc.). 47 -- The variable costs. Each of the variables has a cost assigned that 48 reflects the costs associated with incrementing the value of the 49 variable. The original variables are somewhat preferred. 50 -- The set cost. Depending on the size of the set, extra cost may be 51 added to reflect register pressure. 52 53 All the costs are defined in a machine-specific way, using the target 54 hooks and machine descriptions to determine them. 55 56 4) The trees are transformed to use the new variables, the dead code is 57 removed. 58 59 All of this is done loop by loop. Doing it globally is theoretically 60 possible, it might give a better performance and it might enable us 61 to decide costs more precisely, but getting all the interactions right 62 would be complicated. */ 63 64 #include "config.h" 65 #include "system.h" 66 #include "coretypes.h" 67 #include "tm.h" 68 #include "hash-set.h" 69 #include "machmode.h" 70 #include "vec.h" 71 #include "double-int.h" 72 #include "input.h" 73 #include "alias.h" 74 #include "symtab.h" 75 #include "wide-int.h" 76 #include "inchash.h" 77 #include "tree.h" 78 #include "fold-const.h" 79 #include "stor-layout.h" 80 #include "tm_p.h" 81 #include "predict.h" 82 #include "hard-reg-set.h" 83 #include "function.h" 84 #include "dominance.h" 85 #include "cfg.h" 86 #include "basic-block.h" 87 #include "gimple-pretty-print.h" 88 #include "hash-map.h" 89 #include "hash-table.h" 90 #include "tree-ssa-alias.h" 91 #include "internal-fn.h" 92 #include "tree-eh.h" 93 #include "gimple-expr.h" 94 #include "is-a.h" 95 #include "gimple.h" 96 #include "gimplify.h" 97 #include "gimple-iterator.h" 98 #include "gimplify-me.h" 99 #include "gimple-ssa.h" 100 #include "plugin-api.h" 101 #include "ipa-ref.h" 102 #include "cgraph.h" 103 #include "tree-cfg.h" 104 #include "tree-phinodes.h" 105 #include "ssa-iterators.h" 106 #include "stringpool.h" 107 #include "tree-ssanames.h" 108 #include "tree-ssa-loop-ivopts.h" 109 #include "tree-ssa-loop-manip.h" 110 #include "tree-ssa-loop-niter.h" 111 #include "tree-ssa-loop.h" 112 #include "hashtab.h" 113 #include "rtl.h" 114 #include "flags.h" 115 #include "statistics.h" 116 #include "real.h" 117 #include "fixed-value.h" 118 #include "insn-config.h" 119 #include "expmed.h" 120 #include "dojump.h" 121 #include "explow.h" 122 #include "calls.h" 123 #include "emit-rtl.h" 124 #include "varasm.h" 125 #include "stmt.h" 126 #include "expr.h" 127 #include "tree-dfa.h" 128 #include "tree-ssa.h" 129 #include "cfgloop.h" 130 #include "tree-pass.h" 131 #include "tree-chrec.h" 132 #include "tree-scalar-evolution.h" 133 #include "params.h" 134 #include "langhooks.h" 135 #include "tree-affine.h" 136 #include "target.h" 137 #include "tree-inline.h" 138 #include "tree-ssa-propagate.h" 139 #include "tree-ssa-address.h" 140 #include "builtins.h" 141 #include "tree-vectorizer.h" 142 143 /* FIXME: Expressions are expanded to RTL in this pass to determine the 144 cost of different addressing modes. This should be moved to a TBD 145 interface between the GIMPLE and RTL worlds. */ 146 #include "recog.h" 147 148 /* The infinite cost. */ 149 #define INFTY 10000000 150 151 #define AVG_LOOP_NITER(LOOP) 5 152 153 /* Returns the expected number of loop iterations for LOOP. 154 The average trip count is computed from profile data if it 155 exists. */ 156 157 static inline HOST_WIDE_INT 158 avg_loop_niter (struct loop *loop) 159 { 160 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); 161 if (niter == -1) 162 return AVG_LOOP_NITER (loop); 163 164 return niter; 165 } 166 167 /* Representation of the induction variable. */ 168 struct iv 169 { 170 tree base; /* Initial value of the iv. */ 171 tree base_object; /* A memory object to that the induction variable points. */ 172 tree step; /* Step of the iv (constant only). */ 173 tree ssa_name; /* The ssa name with the value. */ 174 bool biv_p; /* Is it a biv? */ 175 bool have_use_for; /* Do we already have a use for it? */ 176 unsigned use_id; /* The identifier in the use if it is the case. */ 177 }; 178 179 /* Per-ssa version information (induction variable descriptions, etc.). */ 180 struct version_info 181 { 182 tree name; /* The ssa name. */ 183 struct iv *iv; /* Induction variable description. */ 184 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 185 an expression that is not an induction variable. */ 186 bool preserve_biv; /* For the original biv, whether to preserve it. */ 187 unsigned inv_id; /* Id of an invariant. */ 188 }; 189 190 /* Types of uses. */ 191 enum use_type 192 { 193 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 194 USE_ADDRESS, /* Use in an address. */ 195 USE_COMPARE /* Use is a compare. */ 196 }; 197 198 /* Cost of a computation. */ 199 typedef struct 200 { 201 int cost; /* The runtime cost. */ 202 unsigned complexity; /* The estimate of the complexity of the code for 203 the computation (in no concrete units -- 204 complexity field should be larger for more 205 complex expressions and addressing modes). */ 206 } comp_cost; 207 208 static const comp_cost no_cost = {0, 0}; 209 static const comp_cost infinite_cost = {INFTY, INFTY}; 210 211 /* The candidate - cost pair. */ 212 struct cost_pair 213 { 214 struct iv_cand *cand; /* The candidate. */ 215 comp_cost cost; /* The cost. */ 216 bitmap depends_on; /* The list of invariants that have to be 217 preserved. */ 218 tree value; /* For final value elimination, the expression for 219 the final value of the iv. For iv elimination, 220 the new bound to compare with. */ 221 enum tree_code comp; /* For iv elimination, the comparison. */ 222 int inv_expr_id; /* Loop invariant expression id. */ 223 }; 224 225 /* Use. */ 226 struct iv_use 227 { 228 unsigned id; /* The id of the use. */ 229 enum use_type type; /* Type of the use. */ 230 struct iv *iv; /* The induction variable it is based on. */ 231 gimple stmt; /* Statement in that it occurs. */ 232 tree *op_p; /* The place where it occurs. */ 233 bitmap related_cands; /* The set of "related" iv candidates, plus the common 234 important ones. */ 235 236 unsigned n_map_members; /* Number of candidates in the cost_map list. */ 237 struct cost_pair *cost_map; 238 /* The costs wrto the iv candidates. */ 239 240 struct iv_cand *selected; 241 /* The selected candidate. */ 242 }; 243 244 /* The position where the iv is computed. */ 245 enum iv_position 246 { 247 IP_NORMAL, /* At the end, just before the exit condition. */ 248 IP_END, /* At the end of the latch block. */ 249 IP_BEFORE_USE, /* Immediately before a specific use. */ 250 IP_AFTER_USE, /* Immediately after a specific use. */ 251 IP_ORIGINAL /* The original biv. */ 252 }; 253 254 /* The induction variable candidate. */ 255 struct iv_cand 256 { 257 unsigned id; /* The number of the candidate. */ 258 bool important; /* Whether this is an "important" candidate, i.e. such 259 that it should be considered by all uses. */ 260 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */ 261 gimple incremented_at;/* For original biv, the statement where it is 262 incremented. */ 263 tree var_before; /* The variable used for it before increment. */ 264 tree var_after; /* The variable used for it after increment. */ 265 struct iv *iv; /* The value of the candidate. NULL for 266 "pseudocandidate" used to indicate the possibility 267 to replace the final value of an iv by direct 268 computation of the value. */ 269 unsigned cost; /* Cost of the candidate. */ 270 unsigned cost_step; /* Cost of the candidate's increment operation. */ 271 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place 272 where it is incremented. */ 273 bitmap depends_on; /* The list of invariants that are used in step of the 274 biv. */ 275 }; 276 277 /* Loop invariant expression hashtable entry. */ 278 struct iv_inv_expr_ent 279 { 280 tree expr; 281 int id; 282 hashval_t hash; 283 }; 284 285 /* The data used by the induction variable optimizations. */ 286 287 typedef struct iv_use *iv_use_p; 288 289 typedef struct iv_cand *iv_cand_p; 290 291 /* Hashtable helpers. */ 292 293 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent> 294 { 295 typedef iv_inv_expr_ent value_type; 296 typedef iv_inv_expr_ent compare_type; 297 static inline hashval_t hash (const value_type *); 298 static inline bool equal (const value_type *, const compare_type *); 299 }; 300 301 /* Hash function for loop invariant expressions. */ 302 303 inline hashval_t 304 iv_inv_expr_hasher::hash (const value_type *expr) 305 { 306 return expr->hash; 307 } 308 309 /* Hash table equality function for expressions. */ 310 311 inline bool 312 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2) 313 { 314 return expr1->hash == expr2->hash 315 && operand_equal_p (expr1->expr, expr2->expr, 0); 316 } 317 318 struct ivopts_data 319 { 320 /* The currently optimized loop. */ 321 struct loop *current_loop; 322 source_location loop_loc; 323 324 /* Numbers of iterations for all exits of the current loop. */ 325 hash_map<edge, tree_niter_desc *> *niters; 326 327 /* Number of registers used in it. */ 328 unsigned regs_used; 329 330 /* The size of version_info array allocated. */ 331 unsigned version_info_size; 332 333 /* The array of information for the ssa names. */ 334 struct version_info *version_info; 335 336 /* The hashtable of loop invariant expressions created 337 by ivopt. */ 338 hash_table<iv_inv_expr_hasher> *inv_expr_tab; 339 340 /* Loop invariant expression id. */ 341 int inv_expr_id; 342 343 /* The bitmap of indices in version_info whose value was changed. */ 344 bitmap relevant; 345 346 /* The uses of induction variables. */ 347 vec<iv_use_p> iv_uses; 348 349 /* The candidates. */ 350 vec<iv_cand_p> iv_candidates; 351 352 /* A bitmap of important candidates. */ 353 bitmap important_candidates; 354 355 /* Cache used by tree_to_aff_combination_expand. */ 356 hash_map<tree, name_expansion *> *name_expansion_cache; 357 358 /* The maximum invariant id. */ 359 unsigned max_inv_id; 360 361 /* Whether to consider just related and important candidates when replacing a 362 use. */ 363 bool consider_all_candidates; 364 365 /* Are we optimizing for speed? */ 366 bool speed; 367 368 /* Whether the loop body includes any function calls. */ 369 bool body_includes_call; 370 371 /* Whether the loop body can only be exited via single exit. */ 372 bool loop_single_exit_p; 373 }; 374 375 /* An assignment of iv candidates to uses. */ 376 377 struct iv_ca 378 { 379 /* The number of uses covered by the assignment. */ 380 unsigned upto; 381 382 /* Number of uses that cannot be expressed by the candidates in the set. */ 383 unsigned bad_uses; 384 385 /* Candidate assigned to a use, together with the related costs. */ 386 struct cost_pair **cand_for_use; 387 388 /* Number of times each candidate is used. */ 389 unsigned *n_cand_uses; 390 391 /* The candidates used. */ 392 bitmap cands; 393 394 /* The number of candidates in the set. */ 395 unsigned n_cands; 396 397 /* Total number of registers needed. */ 398 unsigned n_regs; 399 400 /* Total cost of expressing uses. */ 401 comp_cost cand_use_cost; 402 403 /* Total cost of candidates. */ 404 unsigned cand_cost; 405 406 /* Number of times each invariant is used. */ 407 unsigned *n_invariant_uses; 408 409 /* The array holding the number of uses of each loop 410 invariant expressions created by ivopt. */ 411 unsigned *used_inv_expr; 412 413 /* The number of created loop invariants. */ 414 unsigned num_used_inv_expr; 415 416 /* Total cost of the assignment. */ 417 comp_cost cost; 418 }; 419 420 /* Difference of two iv candidate assignments. */ 421 422 struct iv_ca_delta 423 { 424 /* Changed use. */ 425 struct iv_use *use; 426 427 /* An old assignment (for rollback purposes). */ 428 struct cost_pair *old_cp; 429 430 /* A new assignment. */ 431 struct cost_pair *new_cp; 432 433 /* Next change in the list. */ 434 struct iv_ca_delta *next_change; 435 }; 436 437 /* Bound on number of candidates below that all candidates are considered. */ 438 439 #define CONSIDER_ALL_CANDIDATES_BOUND \ 440 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) 441 442 /* If there are more iv occurrences, we just give up (it is quite unlikely that 443 optimizing such a loop would help, and it would take ages). */ 444 445 #define MAX_CONSIDERED_USES \ 446 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) 447 448 /* If there are at most this number of ivs in the set, try removing unnecessary 449 ivs from the set always. */ 450 451 #define ALWAYS_PRUNE_CAND_SET_BOUND \ 452 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) 453 454 /* The list of trees for that the decl_rtl field must be reset is stored 455 here. */ 456 457 static vec<tree> decl_rtl_to_reset; 458 459 static comp_cost force_expr_to_var_cost (tree, bool); 460 461 /* Number of uses recorded in DATA. */ 462 463 static inline unsigned 464 n_iv_uses (struct ivopts_data *data) 465 { 466 return data->iv_uses.length (); 467 } 468 469 /* Ith use recorded in DATA. */ 470 471 static inline struct iv_use * 472 iv_use (struct ivopts_data *data, unsigned i) 473 { 474 return data->iv_uses[i]; 475 } 476 477 /* Number of candidates recorded in DATA. */ 478 479 static inline unsigned 480 n_iv_cands (struct ivopts_data *data) 481 { 482 return data->iv_candidates.length (); 483 } 484 485 /* Ith candidate recorded in DATA. */ 486 487 static inline struct iv_cand * 488 iv_cand (struct ivopts_data *data, unsigned i) 489 { 490 return data->iv_candidates[i]; 491 } 492 493 /* The single loop exit if it dominates the latch, NULL otherwise. */ 494 495 edge 496 single_dom_exit (struct loop *loop) 497 { 498 edge exit = single_exit (loop); 499 500 if (!exit) 501 return NULL; 502 503 if (!just_once_each_iteration_p (loop, exit->src)) 504 return NULL; 505 506 return exit; 507 } 508 509 /* Dumps information about the induction variable IV to FILE. */ 510 511 void 512 dump_iv (FILE *file, struct iv *iv) 513 { 514 if (iv->ssa_name) 515 { 516 fprintf (file, "ssa name "); 517 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 518 fprintf (file, "\n"); 519 } 520 521 fprintf (file, " type "); 522 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 523 fprintf (file, "\n"); 524 525 if (iv->step) 526 { 527 fprintf (file, " base "); 528 print_generic_expr (file, iv->base, TDF_SLIM); 529 fprintf (file, "\n"); 530 531 fprintf (file, " step "); 532 print_generic_expr (file, iv->step, TDF_SLIM); 533 fprintf (file, "\n"); 534 } 535 else 536 { 537 fprintf (file, " invariant "); 538 print_generic_expr (file, iv->base, TDF_SLIM); 539 fprintf (file, "\n"); 540 } 541 542 if (iv->base_object) 543 { 544 fprintf (file, " base object "); 545 print_generic_expr (file, iv->base_object, TDF_SLIM); 546 fprintf (file, "\n"); 547 } 548 549 if (iv->biv_p) 550 fprintf (file, " is a biv\n"); 551 } 552 553 /* Dumps information about the USE to FILE. */ 554 555 void 556 dump_use (FILE *file, struct iv_use *use) 557 { 558 fprintf (file, "use %d\n", use->id); 559 560 switch (use->type) 561 { 562 case USE_NONLINEAR_EXPR: 563 fprintf (file, " generic\n"); 564 break; 565 566 case USE_ADDRESS: 567 fprintf (file, " address\n"); 568 break; 569 570 case USE_COMPARE: 571 fprintf (file, " compare\n"); 572 break; 573 574 default: 575 gcc_unreachable (); 576 } 577 578 fprintf (file, " in statement "); 579 print_gimple_stmt (file, use->stmt, 0, 0); 580 fprintf (file, "\n"); 581 582 fprintf (file, " at position "); 583 if (use->op_p) 584 print_generic_expr (file, *use->op_p, TDF_SLIM); 585 fprintf (file, "\n"); 586 587 dump_iv (file, use->iv); 588 589 if (use->related_cands) 590 { 591 fprintf (file, " related candidates "); 592 dump_bitmap (file, use->related_cands); 593 } 594 } 595 596 /* Dumps information about the uses to FILE. */ 597 598 void 599 dump_uses (FILE *file, struct ivopts_data *data) 600 { 601 unsigned i; 602 struct iv_use *use; 603 604 for (i = 0; i < n_iv_uses (data); i++) 605 { 606 use = iv_use (data, i); 607 608 dump_use (file, use); 609 fprintf (file, "\n"); 610 } 611 } 612 613 /* Dumps information about induction variable candidate CAND to FILE. */ 614 615 void 616 dump_cand (FILE *file, struct iv_cand *cand) 617 { 618 struct iv *iv = cand->iv; 619 620 fprintf (file, "candidate %d%s\n", 621 cand->id, cand->important ? " (important)" : ""); 622 623 if (cand->depends_on) 624 { 625 fprintf (file, " depends on "); 626 dump_bitmap (file, cand->depends_on); 627 } 628 629 if (!iv) 630 { 631 fprintf (file, " final value replacement\n"); 632 return; 633 } 634 635 if (cand->var_before) 636 { 637 fprintf (file, " var_before "); 638 print_generic_expr (file, cand->var_before, TDF_SLIM); 639 fprintf (file, "\n"); 640 } 641 if (cand->var_after) 642 { 643 fprintf (file, " var_after "); 644 print_generic_expr (file, cand->var_after, TDF_SLIM); 645 fprintf (file, "\n"); 646 } 647 648 switch (cand->pos) 649 { 650 case IP_NORMAL: 651 fprintf (file, " incremented before exit test\n"); 652 break; 653 654 case IP_BEFORE_USE: 655 fprintf (file, " incremented before use %d\n", cand->ainc_use->id); 656 break; 657 658 case IP_AFTER_USE: 659 fprintf (file, " incremented after use %d\n", cand->ainc_use->id); 660 break; 661 662 case IP_END: 663 fprintf (file, " incremented at end\n"); 664 break; 665 666 case IP_ORIGINAL: 667 fprintf (file, " original biv\n"); 668 break; 669 } 670 671 dump_iv (file, iv); 672 } 673 674 /* Returns the info for ssa version VER. */ 675 676 static inline struct version_info * 677 ver_info (struct ivopts_data *data, unsigned ver) 678 { 679 return data->version_info + ver; 680 } 681 682 /* Returns the info for ssa name NAME. */ 683 684 static inline struct version_info * 685 name_info (struct ivopts_data *data, tree name) 686 { 687 return ver_info (data, SSA_NAME_VERSION (name)); 688 } 689 690 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be 691 emitted in LOOP. */ 692 693 static bool 694 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt) 695 { 696 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); 697 698 gcc_assert (bb); 699 700 if (sbb == loop->latch) 701 return true; 702 703 if (sbb != bb) 704 return false; 705 706 return stmt == last_stmt (bb); 707 } 708 709 /* Returns true if STMT if after the place where the original induction 710 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true 711 if the positions are identical. */ 712 713 static bool 714 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal) 715 { 716 basic_block cand_bb = gimple_bb (cand->incremented_at); 717 basic_block stmt_bb = gimple_bb (stmt); 718 719 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 720 return false; 721 722 if (stmt_bb != cand_bb) 723 return true; 724 725 if (true_if_equal 726 && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) 727 return true; 728 return gimple_uid (stmt) > gimple_uid (cand->incremented_at); 729 } 730 731 /* Returns true if STMT if after the place where the induction variable 732 CAND is incremented in LOOP. */ 733 734 static bool 735 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt) 736 { 737 switch (cand->pos) 738 { 739 case IP_END: 740 return false; 741 742 case IP_NORMAL: 743 return stmt_after_ip_normal_pos (loop, stmt); 744 745 case IP_ORIGINAL: 746 case IP_AFTER_USE: 747 return stmt_after_inc_pos (cand, stmt, false); 748 749 case IP_BEFORE_USE: 750 return stmt_after_inc_pos (cand, stmt, true); 751 752 default: 753 gcc_unreachable (); 754 } 755 } 756 757 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ 758 759 static bool 760 abnormal_ssa_name_p (tree exp) 761 { 762 if (!exp) 763 return false; 764 765 if (TREE_CODE (exp) != SSA_NAME) 766 return false; 767 768 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; 769 } 770 771 /* Returns false if BASE or INDEX contains a ssa name that occurs in an 772 abnormal phi node. Callback for for_each_index. */ 773 774 static bool 775 idx_contains_abnormal_ssa_name_p (tree base, tree *index, 776 void *data ATTRIBUTE_UNUSED) 777 { 778 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 779 { 780 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) 781 return false; 782 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) 783 return false; 784 } 785 786 return !abnormal_ssa_name_p (*index); 787 } 788 789 /* Returns true if EXPR contains a ssa name that occurs in an 790 abnormal phi node. */ 791 792 bool 793 contains_abnormal_ssa_name_p (tree expr) 794 { 795 enum tree_code code; 796 enum tree_code_class codeclass; 797 798 if (!expr) 799 return false; 800 801 code = TREE_CODE (expr); 802 codeclass = TREE_CODE_CLASS (code); 803 804 if (code == SSA_NAME) 805 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; 806 807 if (code == INTEGER_CST 808 || is_gimple_min_invariant (expr)) 809 return false; 810 811 if (code == ADDR_EXPR) 812 return !for_each_index (&TREE_OPERAND (expr, 0), 813 idx_contains_abnormal_ssa_name_p, 814 NULL); 815 816 if (code == COND_EXPR) 817 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)) 818 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)) 819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2)); 820 821 switch (codeclass) 822 { 823 case tcc_binary: 824 case tcc_comparison: 825 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) 826 return true; 827 828 /* Fallthru. */ 829 case tcc_unary: 830 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) 831 return true; 832 833 break; 834 835 default: 836 gcc_unreachable (); 837 } 838 839 return false; 840 } 841 842 /* Returns the structure describing number of iterations determined from 843 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 844 845 static struct tree_niter_desc * 846 niter_for_exit (struct ivopts_data *data, edge exit) 847 { 848 struct tree_niter_desc *desc; 849 tree_niter_desc **slot; 850 851 if (!data->niters) 852 { 853 data->niters = new hash_map<edge, tree_niter_desc *>; 854 slot = NULL; 855 } 856 else 857 slot = data->niters->get (exit); 858 859 if (!slot) 860 { 861 /* Try to determine number of iterations. We cannot safely work with ssa 862 names that appear in phi nodes on abnormal edges, so that we do not 863 create overlapping life ranges for them (PR 27283). */ 864 desc = XNEW (struct tree_niter_desc); 865 if (!number_of_iterations_exit (data->current_loop, 866 exit, desc, true) 867 || contains_abnormal_ssa_name_p (desc->niter)) 868 { 869 XDELETE (desc); 870 desc = NULL; 871 } 872 data->niters->put (exit, desc); 873 } 874 else 875 desc = *slot; 876 877 return desc; 878 } 879 880 /* Returns the structure describing number of iterations determined from 881 single dominating exit of DATA->current_loop, or NULL if something 882 goes wrong. */ 883 884 static struct tree_niter_desc * 885 niter_for_single_dom_exit (struct ivopts_data *data) 886 { 887 edge exit = single_dom_exit (data->current_loop); 888 889 if (!exit) 890 return NULL; 891 892 return niter_for_exit (data, exit); 893 } 894 895 /* Initializes data structures used by the iv optimization pass, stored 896 in DATA. */ 897 898 static void 899 tree_ssa_iv_optimize_init (struct ivopts_data *data) 900 { 901 data->version_info_size = 2 * num_ssa_names; 902 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 903 data->relevant = BITMAP_ALLOC (NULL); 904 data->important_candidates = BITMAP_ALLOC (NULL); 905 data->max_inv_id = 0; 906 data->niters = NULL; 907 data->iv_uses.create (20); 908 data->iv_candidates.create (20); 909 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10); 910 data->inv_expr_id = 0; 911 data->name_expansion_cache = NULL; 912 decl_rtl_to_reset.create (20); 913 } 914 915 /* Returns a memory object to that EXPR points. In case we are able to 916 determine that it does not point to any such object, NULL is returned. */ 917 918 static tree 919 determine_base_object (tree expr) 920 { 921 enum tree_code code = TREE_CODE (expr); 922 tree base, obj; 923 924 /* If this is a pointer casted to any type, we need to determine 925 the base object for the pointer; so handle conversions before 926 throwing away non-pointer expressions. */ 927 if (CONVERT_EXPR_P (expr)) 928 return determine_base_object (TREE_OPERAND (expr, 0)); 929 930 if (!POINTER_TYPE_P (TREE_TYPE (expr))) 931 return NULL_TREE; 932 933 switch (code) 934 { 935 case INTEGER_CST: 936 return NULL_TREE; 937 938 case ADDR_EXPR: 939 obj = TREE_OPERAND (expr, 0); 940 base = get_base_address (obj); 941 942 if (!base) 943 return expr; 944 945 if (TREE_CODE (base) == MEM_REF) 946 return determine_base_object (TREE_OPERAND (base, 0)); 947 948 return fold_convert (ptr_type_node, 949 build_fold_addr_expr (base)); 950 951 case POINTER_PLUS_EXPR: 952 return determine_base_object (TREE_OPERAND (expr, 0)); 953 954 case PLUS_EXPR: 955 case MINUS_EXPR: 956 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */ 957 gcc_unreachable (); 958 959 default: 960 return fold_convert (ptr_type_node, expr); 961 } 962 } 963 964 /* Return true if address expression with non-DECL_P operand appears 965 in EXPR. */ 966 967 static bool 968 contain_complex_addr_expr (tree expr) 969 { 970 bool res = false; 971 972 STRIP_NOPS (expr); 973 switch (TREE_CODE (expr)) 974 { 975 case POINTER_PLUS_EXPR: 976 case PLUS_EXPR: 977 case MINUS_EXPR: 978 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); 979 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); 980 break; 981 982 case ADDR_EXPR: 983 return (!DECL_P (TREE_OPERAND (expr, 0))); 984 985 default: 986 return false; 987 } 988 989 return res; 990 } 991 992 /* Allocates an induction variable with given initial value BASE and step STEP 993 for loop LOOP. */ 994 995 static struct iv * 996 alloc_iv (tree base, tree step) 997 { 998 tree expr = base; 999 struct iv *iv = XCNEW (struct iv); 1000 gcc_assert (step != NULL_TREE); 1001 1002 /* Lower address expression in base except ones with DECL_P as operand. 1003 By doing this: 1004 1) More accurate cost can be computed for address expressions; 1005 2) Duplicate candidates won't be created for bases in different 1006 forms, like &a[0] and &a. */ 1007 STRIP_NOPS (expr); 1008 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0))) 1009 || contain_complex_addr_expr (expr)) 1010 { 1011 aff_tree comb; 1012 tree_to_aff_combination (expr, TREE_TYPE (base), &comb); 1013 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)); 1014 } 1015 1016 iv->base = base; 1017 iv->base_object = determine_base_object (base); 1018 iv->step = step; 1019 iv->biv_p = false; 1020 iv->have_use_for = false; 1021 iv->use_id = 0; 1022 iv->ssa_name = NULL_TREE; 1023 1024 return iv; 1025 } 1026 1027 /* Sets STEP and BASE for induction variable IV. */ 1028 1029 static void 1030 set_iv (struct ivopts_data *data, tree iv, tree base, tree step) 1031 { 1032 struct version_info *info = name_info (data, iv); 1033 1034 gcc_assert (!info->iv); 1035 1036 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 1037 info->iv = alloc_iv (base, step); 1038 info->iv->ssa_name = iv; 1039 } 1040 1041 /* Finds induction variable declaration for VAR. */ 1042 1043 static struct iv * 1044 get_iv (struct ivopts_data *data, tree var) 1045 { 1046 basic_block bb; 1047 tree type = TREE_TYPE (var); 1048 1049 if (!POINTER_TYPE_P (type) 1050 && !INTEGRAL_TYPE_P (type)) 1051 return NULL; 1052 1053 if (!name_info (data, var)->iv) 1054 { 1055 bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1056 1057 if (!bb 1058 || !flow_bb_inside_loop_p (data->current_loop, bb)) 1059 set_iv (data, var, var, build_int_cst (type, 0)); 1060 } 1061 1062 return name_info (data, var)->iv; 1063 } 1064 1065 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does 1066 not define a simple affine biv with nonzero step. */ 1067 1068 static tree 1069 determine_biv_step (gphi *phi) 1070 { 1071 struct loop *loop = gimple_bb (phi)->loop_father; 1072 tree name = PHI_RESULT (phi); 1073 affine_iv iv; 1074 1075 if (virtual_operand_p (name)) 1076 return NULL_TREE; 1077 1078 if (!simple_iv (loop, loop, name, &iv, true)) 1079 return NULL_TREE; 1080 1081 return integer_zerop (iv.step) ? NULL_TREE : iv.step; 1082 } 1083 1084 /* Return the first non-invariant ssa var found in EXPR. */ 1085 1086 static tree 1087 extract_single_var_from_expr (tree expr) 1088 { 1089 int i, n; 1090 tree tmp; 1091 enum tree_code code; 1092 1093 if (!expr || is_gimple_min_invariant (expr)) 1094 return NULL; 1095 1096 code = TREE_CODE (expr); 1097 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1098 { 1099 n = TREE_OPERAND_LENGTH (expr); 1100 for (i = 0; i < n; i++) 1101 { 1102 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i)); 1103 1104 if (tmp) 1105 return tmp; 1106 } 1107 } 1108 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL; 1109 } 1110 1111 /* Finds basic ivs. */ 1112 1113 static bool 1114 find_bivs (struct ivopts_data *data) 1115 { 1116 gphi *phi; 1117 tree step, type, base, stop; 1118 bool found = false; 1119 struct loop *loop = data->current_loop; 1120 gphi_iterator psi; 1121 1122 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1123 { 1124 phi = psi.phi (); 1125 1126 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 1127 continue; 1128 1129 step = determine_biv_step (phi); 1130 if (!step) 1131 continue; 1132 1133 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1134 /* Stop expanding iv base at the first ssa var referred by iv step. 1135 Ideally we should stop at any ssa var, because that's expensive 1136 and unusual to happen, we just do it on the first one. 1137 1138 See PR64705 for the rationale. */ 1139 stop = extract_single_var_from_expr (step); 1140 base = expand_simple_operations (base, stop); 1141 if (contains_abnormal_ssa_name_p (base) 1142 || contains_abnormal_ssa_name_p (step)) 1143 continue; 1144 1145 type = TREE_TYPE (PHI_RESULT (phi)); 1146 base = fold_convert (type, base); 1147 if (step) 1148 { 1149 if (POINTER_TYPE_P (type)) 1150 step = convert_to_ptrofftype (step); 1151 else 1152 step = fold_convert (type, step); 1153 } 1154 1155 set_iv (data, PHI_RESULT (phi), base, step); 1156 found = true; 1157 } 1158 1159 return found; 1160 } 1161 1162 /* Marks basic ivs. */ 1163 1164 static void 1165 mark_bivs (struct ivopts_data *data) 1166 { 1167 gphi *phi; 1168 gimple def; 1169 tree var; 1170 struct iv *iv, *incr_iv; 1171 struct loop *loop = data->current_loop; 1172 basic_block incr_bb; 1173 gphi_iterator psi; 1174 1175 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1176 { 1177 phi = psi.phi (); 1178 1179 iv = get_iv (data, PHI_RESULT (phi)); 1180 if (!iv) 1181 continue; 1182 1183 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1184 def = SSA_NAME_DEF_STMT (var); 1185 /* Don't mark iv peeled from other one as biv. */ 1186 if (def 1187 && gimple_code (def) == GIMPLE_PHI 1188 && gimple_bb (def) == loop->header) 1189 continue; 1190 1191 incr_iv = get_iv (data, var); 1192 if (!incr_iv) 1193 continue; 1194 1195 /* If the increment is in the subloop, ignore it. */ 1196 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1197 if (incr_bb->loop_father != data->current_loop 1198 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 1199 continue; 1200 1201 iv->biv_p = true; 1202 incr_iv->biv_p = true; 1203 } 1204 } 1205 1206 /* Checks whether STMT defines a linear induction variable and stores its 1207 parameters to IV. */ 1208 1209 static bool 1210 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv) 1211 { 1212 tree lhs, stop; 1213 struct loop *loop = data->current_loop; 1214 1215 iv->base = NULL_TREE; 1216 iv->step = NULL_TREE; 1217 1218 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1219 return false; 1220 1221 lhs = gimple_assign_lhs (stmt); 1222 if (TREE_CODE (lhs) != SSA_NAME) 1223 return false; 1224 1225 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) 1226 return false; 1227 1228 /* Stop expanding iv base at the first ssa var referred by iv step. 1229 Ideally we should stop at any ssa var, because that's expensive 1230 and unusual to happen, we just do it on the first one. 1231 1232 See PR64705 for the rationale. */ 1233 stop = extract_single_var_from_expr (iv->step); 1234 iv->base = expand_simple_operations (iv->base, stop); 1235 if (contains_abnormal_ssa_name_p (iv->base) 1236 || contains_abnormal_ssa_name_p (iv->step)) 1237 return false; 1238 1239 /* If STMT could throw, then do not consider STMT as defining a GIV. 1240 While this will suppress optimizations, we can not safely delete this 1241 GIV and associated statements, even if it appears it is not used. */ 1242 if (stmt_could_throw_p (stmt)) 1243 return false; 1244 1245 return true; 1246 } 1247 1248 /* Finds general ivs in statement STMT. */ 1249 1250 static void 1251 find_givs_in_stmt (struct ivopts_data *data, gimple stmt) 1252 { 1253 affine_iv iv; 1254 1255 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1256 return; 1257 1258 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step); 1259 } 1260 1261 /* Finds general ivs in basic block BB. */ 1262 1263 static void 1264 find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1265 { 1266 gimple_stmt_iterator bsi; 1267 1268 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1269 find_givs_in_stmt (data, gsi_stmt (bsi)); 1270 } 1271 1272 /* Finds general ivs. */ 1273 1274 static void 1275 find_givs (struct ivopts_data *data) 1276 { 1277 struct loop *loop = data->current_loop; 1278 basic_block *body = get_loop_body_in_dom_order (loop); 1279 unsigned i; 1280 1281 for (i = 0; i < loop->num_nodes; i++) 1282 find_givs_in_bb (data, body[i]); 1283 free (body); 1284 } 1285 1286 /* For each ssa name defined in LOOP determines whether it is an induction 1287 variable and if so, its initial value and step. */ 1288 1289 static bool 1290 find_induction_variables (struct ivopts_data *data) 1291 { 1292 unsigned i; 1293 bitmap_iterator bi; 1294 1295 if (!find_bivs (data)) 1296 return false; 1297 1298 find_givs (data); 1299 mark_bivs (data); 1300 1301 if (dump_file && (dump_flags & TDF_DETAILS)) 1302 { 1303 struct tree_niter_desc *niter = niter_for_single_dom_exit (data); 1304 1305 if (niter) 1306 { 1307 fprintf (dump_file, " number of iterations "); 1308 print_generic_expr (dump_file, niter->niter, TDF_SLIM); 1309 if (!integer_zerop (niter->may_be_zero)) 1310 { 1311 fprintf (dump_file, "; zero if "); 1312 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); 1313 } 1314 fprintf (dump_file, "\n\n"); 1315 }; 1316 1317 fprintf (dump_file, "Induction variables:\n\n"); 1318 1319 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1320 { 1321 if (ver_info (data, i)->iv) 1322 dump_iv (dump_file, ver_info (data, i)->iv); 1323 } 1324 } 1325 1326 return true; 1327 } 1328 1329 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */ 1330 1331 static struct iv_use * 1332 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv, 1333 gimple stmt, enum use_type use_type) 1334 { 1335 struct iv_use *use = XCNEW (struct iv_use); 1336 1337 use->id = n_iv_uses (data); 1338 use->type = use_type; 1339 use->iv = iv; 1340 use->stmt = stmt; 1341 use->op_p = use_p; 1342 use->related_cands = BITMAP_ALLOC (NULL); 1343 1344 /* To avoid showing ssa name in the dumps, if it was not reset by the 1345 caller. */ 1346 iv->ssa_name = NULL_TREE; 1347 1348 if (dump_file && (dump_flags & TDF_DETAILS)) 1349 dump_use (dump_file, use); 1350 1351 data->iv_uses.safe_push (use); 1352 1353 return use; 1354 } 1355 1356 /* Checks whether OP is a loop-level invariant and if so, records it. 1357 NONLINEAR_USE is true if the invariant is used in a way we do not 1358 handle specially. */ 1359 1360 static void 1361 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1362 { 1363 basic_block bb; 1364 struct version_info *info; 1365 1366 if (TREE_CODE (op) != SSA_NAME 1367 || virtual_operand_p (op)) 1368 return; 1369 1370 bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 1371 if (bb 1372 && flow_bb_inside_loop_p (data->current_loop, bb)) 1373 return; 1374 1375 info = name_info (data, op); 1376 info->name = op; 1377 info->has_nonlin_use |= nonlinear_use; 1378 if (!info->inv_id) 1379 info->inv_id = ++data->max_inv_id; 1380 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1381 } 1382 1383 /* Checks whether the use OP is interesting and if so, records it. */ 1384 1385 static struct iv_use * 1386 find_interesting_uses_op (struct ivopts_data *data, tree op) 1387 { 1388 struct iv *iv; 1389 struct iv *civ; 1390 gimple stmt; 1391 struct iv_use *use; 1392 1393 if (TREE_CODE (op) != SSA_NAME) 1394 return NULL; 1395 1396 iv = get_iv (data, op); 1397 if (!iv) 1398 return NULL; 1399 1400 if (iv->have_use_for) 1401 { 1402 use = iv_use (data, iv->use_id); 1403 1404 gcc_assert (use->type == USE_NONLINEAR_EXPR); 1405 return use; 1406 } 1407 1408 if (integer_zerop (iv->step)) 1409 { 1410 record_invariant (data, op, true); 1411 return NULL; 1412 } 1413 iv->have_use_for = true; 1414 1415 civ = XNEW (struct iv); 1416 *civ = *iv; 1417 1418 stmt = SSA_NAME_DEF_STMT (op); 1419 gcc_assert (gimple_code (stmt) == GIMPLE_PHI 1420 || is_gimple_assign (stmt)); 1421 1422 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR); 1423 iv->use_id = use->id; 1424 1425 return use; 1426 } 1427 1428 /* Given a condition in statement STMT, checks whether it is a compare 1429 of an induction variable and an invariant. If this is the case, 1430 CONTROL_VAR is set to location of the iv, BOUND to the location of 1431 the invariant, IV_VAR and IV_BOUND are set to the corresponding 1432 induction variable descriptions, and true is returned. If this is not 1433 the case, CONTROL_VAR and BOUND are set to the arguments of the 1434 condition and false is returned. */ 1435 1436 static bool 1437 extract_cond_operands (struct ivopts_data *data, gimple stmt, 1438 tree **control_var, tree **bound, 1439 struct iv **iv_var, struct iv **iv_bound) 1440 { 1441 /* The objects returned when COND has constant operands. */ 1442 static struct iv const_iv; 1443 static tree zero; 1444 tree *op0 = &zero, *op1 = &zero, *tmp_op; 1445 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv; 1446 bool ret = false; 1447 1448 if (gimple_code (stmt) == GIMPLE_COND) 1449 { 1450 gcond *cond_stmt = as_a <gcond *> (stmt); 1451 op0 = gimple_cond_lhs_ptr (cond_stmt); 1452 op1 = gimple_cond_rhs_ptr (cond_stmt); 1453 } 1454 else 1455 { 1456 op0 = gimple_assign_rhs1_ptr (stmt); 1457 op1 = gimple_assign_rhs2_ptr (stmt); 1458 } 1459 1460 zero = integer_zero_node; 1461 const_iv.step = integer_zero_node; 1462 1463 if (TREE_CODE (*op0) == SSA_NAME) 1464 iv0 = get_iv (data, *op0); 1465 if (TREE_CODE (*op1) == SSA_NAME) 1466 iv1 = get_iv (data, *op1); 1467 1468 /* Exactly one of the compared values must be an iv, and the other one must 1469 be an invariant. */ 1470 if (!iv0 || !iv1) 1471 goto end; 1472 1473 if (integer_zerop (iv0->step)) 1474 { 1475 /* Control variable may be on the other side. */ 1476 tmp_op = op0; op0 = op1; op1 = tmp_op; 1477 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv; 1478 } 1479 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step); 1480 1481 end: 1482 if (control_var) 1483 *control_var = op0;; 1484 if (iv_var) 1485 *iv_var = iv0;; 1486 if (bound) 1487 *bound = op1; 1488 if (iv_bound) 1489 *iv_bound = iv1; 1490 1491 return ret; 1492 } 1493 1494 /* Checks whether the condition in STMT is interesting and if so, 1495 records it. */ 1496 1497 static void 1498 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt) 1499 { 1500 tree *var_p, *bound_p; 1501 struct iv *var_iv, *civ; 1502 1503 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL)) 1504 { 1505 find_interesting_uses_op (data, *var_p); 1506 find_interesting_uses_op (data, *bound_p); 1507 return; 1508 } 1509 1510 civ = XNEW (struct iv); 1511 *civ = *var_iv; 1512 record_use (data, NULL, civ, stmt, USE_COMPARE); 1513 } 1514 1515 /* Returns the outermost loop EXPR is obviously invariant in 1516 relative to the loop LOOP, i.e. if all its operands are defined 1517 outside of the returned loop. Returns NULL if EXPR is not 1518 even obviously invariant in LOOP. */ 1519 1520 struct loop * 1521 outermost_invariant_loop_for_expr (struct loop *loop, tree expr) 1522 { 1523 basic_block def_bb; 1524 unsigned i, len; 1525 1526 if (is_gimple_min_invariant (expr)) 1527 return current_loops->tree_root; 1528 1529 if (TREE_CODE (expr) == SSA_NAME) 1530 { 1531 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1532 if (def_bb) 1533 { 1534 if (flow_bb_inside_loop_p (loop, def_bb)) 1535 return NULL; 1536 return superloop_at_depth (loop, 1537 loop_depth (def_bb->loop_father) + 1); 1538 } 1539 1540 return current_loops->tree_root; 1541 } 1542 1543 if (!EXPR_P (expr)) 1544 return NULL; 1545 1546 unsigned maxdepth = 0; 1547 len = TREE_OPERAND_LENGTH (expr); 1548 for (i = 0; i < len; i++) 1549 { 1550 struct loop *ivloop; 1551 if (!TREE_OPERAND (expr, i)) 1552 continue; 1553 1554 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i)); 1555 if (!ivloop) 1556 return NULL; 1557 maxdepth = MAX (maxdepth, loop_depth (ivloop)); 1558 } 1559 1560 return superloop_at_depth (loop, maxdepth); 1561 } 1562 1563 /* Returns true if expression EXPR is obviously invariant in LOOP, 1564 i.e. if all its operands are defined outside of the LOOP. LOOP 1565 should not be the function body. */ 1566 1567 bool 1568 expr_invariant_in_loop_p (struct loop *loop, tree expr) 1569 { 1570 basic_block def_bb; 1571 unsigned i, len; 1572 1573 gcc_assert (loop_depth (loop) > 0); 1574 1575 if (is_gimple_min_invariant (expr)) 1576 return true; 1577 1578 if (TREE_CODE (expr) == SSA_NAME) 1579 { 1580 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1581 if (def_bb 1582 && flow_bb_inside_loop_p (loop, def_bb)) 1583 return false; 1584 1585 return true; 1586 } 1587 1588 if (!EXPR_P (expr)) 1589 return false; 1590 1591 len = TREE_OPERAND_LENGTH (expr); 1592 for (i = 0; i < len; i++) 1593 if (TREE_OPERAND (expr, i) 1594 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1595 return false; 1596 1597 return true; 1598 } 1599 1600 /* Cumulates the steps of indices into DATA and replaces their values with the 1601 initial ones. Returns false when the value of the index cannot be determined. 1602 Callback for for_each_index. */ 1603 1604 struct ifs_ivopts_data 1605 { 1606 struct ivopts_data *ivopts_data; 1607 gimple stmt; 1608 tree step; 1609 }; 1610 1611 static bool 1612 idx_find_step (tree base, tree *idx, void *data) 1613 { 1614 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; 1615 struct iv *iv; 1616 tree step, iv_base, iv_step, lbound, off; 1617 struct loop *loop = dta->ivopts_data->current_loop; 1618 1619 /* If base is a component ref, require that the offset of the reference 1620 be invariant. */ 1621 if (TREE_CODE (base) == COMPONENT_REF) 1622 { 1623 off = component_ref_field_offset (base); 1624 return expr_invariant_in_loop_p (loop, off); 1625 } 1626 1627 /* If base is array, first check whether we will be able to move the 1628 reference out of the loop (in order to take its address in strength 1629 reduction). In order for this to work we need both lower bound 1630 and step to be loop invariants. */ 1631 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1632 { 1633 /* Moreover, for a range, the size needs to be invariant as well. */ 1634 if (TREE_CODE (base) == ARRAY_RANGE_REF 1635 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) 1636 return false; 1637 1638 step = array_ref_element_size (base); 1639 lbound = array_ref_low_bound (base); 1640 1641 if (!expr_invariant_in_loop_p (loop, step) 1642 || !expr_invariant_in_loop_p (loop, lbound)) 1643 return false; 1644 } 1645 1646 if (TREE_CODE (*idx) != SSA_NAME) 1647 return true; 1648 1649 iv = get_iv (dta->ivopts_data, *idx); 1650 if (!iv) 1651 return false; 1652 1653 /* XXX We produce for a base of *D42 with iv->base being &x[0] 1654 *&x[0], which is not folded and does not trigger the 1655 ARRAY_REF path below. */ 1656 *idx = iv->base; 1657 1658 if (integer_zerop (iv->step)) 1659 return true; 1660 1661 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1662 { 1663 step = array_ref_element_size (base); 1664 1665 /* We only handle addresses whose step is an integer constant. */ 1666 if (TREE_CODE (step) != INTEGER_CST) 1667 return false; 1668 } 1669 else 1670 /* The step for pointer arithmetics already is 1 byte. */ 1671 step = size_one_node; 1672 1673 iv_base = iv->base; 1674 iv_step = iv->step; 1675 if (!convert_affine_scev (dta->ivopts_data->current_loop, 1676 sizetype, &iv_base, &iv_step, dta->stmt, 1677 false)) 1678 { 1679 /* The index might wrap. */ 1680 return false; 1681 } 1682 1683 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 1684 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); 1685 1686 return true; 1687 } 1688 1689 /* Records use in index IDX. Callback for for_each_index. Ivopts data 1690 object is passed to it in DATA. */ 1691 1692 static bool 1693 idx_record_use (tree base, tree *idx, 1694 void *vdata) 1695 { 1696 struct ivopts_data *data = (struct ivopts_data *) vdata; 1697 find_interesting_uses_op (data, *idx); 1698 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1699 { 1700 find_interesting_uses_op (data, array_ref_element_size (base)); 1701 find_interesting_uses_op (data, array_ref_low_bound (base)); 1702 } 1703 return true; 1704 } 1705 1706 /* If we can prove that TOP = cst * BOT for some constant cst, 1707 store cst to MUL and return true. Otherwise return false. 1708 The returned value is always sign-extended, regardless of the 1709 signedness of TOP and BOT. */ 1710 1711 static bool 1712 constant_multiple_of (tree top, tree bot, widest_int *mul) 1713 { 1714 tree mby; 1715 enum tree_code code; 1716 unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); 1717 widest_int res, p0, p1; 1718 1719 STRIP_NOPS (top); 1720 STRIP_NOPS (bot); 1721 1722 if (operand_equal_p (top, bot, 0)) 1723 { 1724 *mul = 1; 1725 return true; 1726 } 1727 1728 code = TREE_CODE (top); 1729 switch (code) 1730 { 1731 case MULT_EXPR: 1732 mby = TREE_OPERAND (top, 1); 1733 if (TREE_CODE (mby) != INTEGER_CST) 1734 return false; 1735 1736 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) 1737 return false; 1738 1739 *mul = wi::sext (res * wi::to_widest (mby), precision); 1740 return true; 1741 1742 case PLUS_EXPR: 1743 case MINUS_EXPR: 1744 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) 1745 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) 1746 return false; 1747 1748 if (code == MINUS_EXPR) 1749 p1 = -p1; 1750 *mul = wi::sext (p0 + p1, precision); 1751 return true; 1752 1753 case INTEGER_CST: 1754 if (TREE_CODE (bot) != INTEGER_CST) 1755 return false; 1756 1757 p0 = widest_int::from (top, SIGNED); 1758 p1 = widest_int::from (bot, SIGNED); 1759 if (p1 == 0) 1760 return false; 1761 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision); 1762 return res == 0; 1763 1764 default: 1765 return false; 1766 } 1767 } 1768 1769 /* Return true if memory reference REF with step STEP may be unaligned. */ 1770 1771 static bool 1772 may_be_unaligned_p (tree ref, tree step) 1773 { 1774 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 1775 thus they are not misaligned. */ 1776 if (TREE_CODE (ref) == TARGET_MEM_REF) 1777 return false; 1778 1779 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref)); 1780 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align) 1781 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))); 1782 1783 unsigned HOST_WIDE_INT bitpos; 1784 unsigned int ref_align; 1785 get_object_alignment_1 (ref, &ref_align, &bitpos); 1786 if (ref_align < align 1787 || (bitpos % align) != 0 1788 || (bitpos % BITS_PER_UNIT) != 0) 1789 return true; 1790 1791 unsigned int trailing_zeros = tree_ctz (step); 1792 if (trailing_zeros < HOST_BITS_PER_INT 1793 && (1U << trailing_zeros) * BITS_PER_UNIT < align) 1794 return true; 1795 1796 return false; 1797 } 1798 1799 /* Return true if EXPR may be non-addressable. */ 1800 1801 bool 1802 may_be_nonaddressable_p (tree expr) 1803 { 1804 switch (TREE_CODE (expr)) 1805 { 1806 case TARGET_MEM_REF: 1807 /* TARGET_MEM_REFs are translated directly to valid MEMs on the 1808 target, thus they are always addressable. */ 1809 return false; 1810 1811 case COMPONENT_REF: 1812 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) 1813 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 1814 1815 case VIEW_CONVERT_EXPR: 1816 /* This kind of view-conversions may wrap non-addressable objects 1817 and make them look addressable. After some processing the 1818 non-addressability may be uncovered again, causing ADDR_EXPRs 1819 of inappropriate objects to be built. */ 1820 if (is_gimple_reg (TREE_OPERAND (expr, 0)) 1821 || !is_gimple_addressable (TREE_OPERAND (expr, 0))) 1822 return true; 1823 1824 /* ... fall through ... */ 1825 1826 case ARRAY_REF: 1827 case ARRAY_RANGE_REF: 1828 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 1829 1830 CASE_CONVERT: 1831 return true; 1832 1833 default: 1834 break; 1835 } 1836 1837 return false; 1838 } 1839 1840 /* Finds addresses in *OP_P inside STMT. */ 1841 1842 static void 1843 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p) 1844 { 1845 tree base = *op_p, step = size_zero_node; 1846 struct iv *civ; 1847 struct ifs_ivopts_data ifs_ivopts_data; 1848 1849 /* Do not play with volatile memory references. A bit too conservative, 1850 perhaps, but safe. */ 1851 if (gimple_has_volatile_ops (stmt)) 1852 goto fail; 1853 1854 /* Ignore bitfields for now. Not really something terribly complicated 1855 to handle. TODO. */ 1856 if (TREE_CODE (base) == BIT_FIELD_REF) 1857 goto fail; 1858 1859 base = unshare_expr (base); 1860 1861 if (TREE_CODE (base) == TARGET_MEM_REF) 1862 { 1863 tree type = build_pointer_type (TREE_TYPE (base)); 1864 tree astep; 1865 1866 if (TMR_BASE (base) 1867 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 1868 { 1869 civ = get_iv (data, TMR_BASE (base)); 1870 if (!civ) 1871 goto fail; 1872 1873 TMR_BASE (base) = civ->base; 1874 step = civ->step; 1875 } 1876 if (TMR_INDEX2 (base) 1877 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) 1878 { 1879 civ = get_iv (data, TMR_INDEX2 (base)); 1880 if (!civ) 1881 goto fail; 1882 1883 TMR_INDEX2 (base) = civ->base; 1884 step = civ->step; 1885 } 1886 if (TMR_INDEX (base) 1887 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 1888 { 1889 civ = get_iv (data, TMR_INDEX (base)); 1890 if (!civ) 1891 goto fail; 1892 1893 TMR_INDEX (base) = civ->base; 1894 astep = civ->step; 1895 1896 if (astep) 1897 { 1898 if (TMR_STEP (base)) 1899 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 1900 1901 step = fold_build2 (PLUS_EXPR, type, step, astep); 1902 } 1903 } 1904 1905 if (integer_zerop (step)) 1906 goto fail; 1907 base = tree_mem_ref_addr (type, base); 1908 } 1909 else 1910 { 1911 ifs_ivopts_data.ivopts_data = data; 1912 ifs_ivopts_data.stmt = stmt; 1913 ifs_ivopts_data.step = size_zero_node; 1914 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 1915 || integer_zerop (ifs_ivopts_data.step)) 1916 goto fail; 1917 step = ifs_ivopts_data.step; 1918 1919 /* Check that the base expression is addressable. This needs 1920 to be done after substituting bases of IVs into it. */ 1921 if (may_be_nonaddressable_p (base)) 1922 goto fail; 1923 1924 /* Moreover, on strict alignment platforms, check that it is 1925 sufficiently aligned. */ 1926 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) 1927 goto fail; 1928 1929 base = build_fold_addr_expr (base); 1930 1931 /* Substituting bases of IVs into the base expression might 1932 have caused folding opportunities. */ 1933 if (TREE_CODE (base) == ADDR_EXPR) 1934 { 1935 tree *ref = &TREE_OPERAND (base, 0); 1936 while (handled_component_p (*ref)) 1937 ref = &TREE_OPERAND (*ref, 0); 1938 if (TREE_CODE (*ref) == MEM_REF) 1939 { 1940 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), 1941 TREE_OPERAND (*ref, 0), 1942 TREE_OPERAND (*ref, 1)); 1943 if (tem) 1944 *ref = tem; 1945 } 1946 } 1947 } 1948 1949 civ = alloc_iv (base, step); 1950 record_use (data, op_p, civ, stmt, USE_ADDRESS); 1951 return; 1952 1953 fail: 1954 for_each_index (op_p, idx_record_use, data); 1955 } 1956 1957 /* Finds and records invariants used in STMT. */ 1958 1959 static void 1960 find_invariants_stmt (struct ivopts_data *data, gimple stmt) 1961 { 1962 ssa_op_iter iter; 1963 use_operand_p use_p; 1964 tree op; 1965 1966 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 1967 { 1968 op = USE_FROM_PTR (use_p); 1969 record_invariant (data, op, false); 1970 } 1971 } 1972 1973 /* Finds interesting uses of induction variables in the statement STMT. */ 1974 1975 static void 1976 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt) 1977 { 1978 struct iv *iv; 1979 tree op, *lhs, *rhs; 1980 ssa_op_iter iter; 1981 use_operand_p use_p; 1982 enum tree_code code; 1983 1984 find_invariants_stmt (data, stmt); 1985 1986 if (gimple_code (stmt) == GIMPLE_COND) 1987 { 1988 find_interesting_uses_cond (data, stmt); 1989 return; 1990 } 1991 1992 if (is_gimple_assign (stmt)) 1993 { 1994 lhs = gimple_assign_lhs_ptr (stmt); 1995 rhs = gimple_assign_rhs1_ptr (stmt); 1996 1997 if (TREE_CODE (*lhs) == SSA_NAME) 1998 { 1999 /* If the statement defines an induction variable, the uses are not 2000 interesting by themselves. */ 2001 2002 iv = get_iv (data, *lhs); 2003 2004 if (iv && !integer_zerop (iv->step)) 2005 return; 2006 } 2007 2008 code = gimple_assign_rhs_code (stmt); 2009 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS 2010 && (REFERENCE_CLASS_P (*rhs) 2011 || is_gimple_val (*rhs))) 2012 { 2013 if (REFERENCE_CLASS_P (*rhs)) 2014 find_interesting_uses_address (data, stmt, rhs); 2015 else 2016 find_interesting_uses_op (data, *rhs); 2017 2018 if (REFERENCE_CLASS_P (*lhs)) 2019 find_interesting_uses_address (data, stmt, lhs); 2020 return; 2021 } 2022 else if (TREE_CODE_CLASS (code) == tcc_comparison) 2023 { 2024 find_interesting_uses_cond (data, stmt); 2025 return; 2026 } 2027 2028 /* TODO -- we should also handle address uses of type 2029 2030 memory = call (whatever); 2031 2032 and 2033 2034 call (memory). */ 2035 } 2036 2037 if (gimple_code (stmt) == GIMPLE_PHI 2038 && gimple_bb (stmt) == data->current_loop->header) 2039 { 2040 iv = get_iv (data, PHI_RESULT (stmt)); 2041 2042 if (iv && !integer_zerop (iv->step)) 2043 return; 2044 } 2045 2046 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2047 { 2048 op = USE_FROM_PTR (use_p); 2049 2050 if (TREE_CODE (op) != SSA_NAME) 2051 continue; 2052 2053 iv = get_iv (data, op); 2054 if (!iv) 2055 continue; 2056 2057 find_interesting_uses_op (data, op); 2058 } 2059 } 2060 2061 /* Finds interesting uses of induction variables outside of loops 2062 on loop exit edge EXIT. */ 2063 2064 static void 2065 find_interesting_uses_outside (struct ivopts_data *data, edge exit) 2066 { 2067 gphi *phi; 2068 gphi_iterator psi; 2069 tree def; 2070 2071 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) 2072 { 2073 phi = psi.phi (); 2074 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 2075 if (!virtual_operand_p (def)) 2076 find_interesting_uses_op (data, def); 2077 } 2078 } 2079 2080 /* Finds uses of the induction variables that are interesting. */ 2081 2082 static void 2083 find_interesting_uses (struct ivopts_data *data) 2084 { 2085 basic_block bb; 2086 gimple_stmt_iterator bsi; 2087 basic_block *body = get_loop_body (data->current_loop); 2088 unsigned i; 2089 struct version_info *info; 2090 edge e; 2091 2092 if (dump_file && (dump_flags & TDF_DETAILS)) 2093 fprintf (dump_file, "Uses:\n\n"); 2094 2095 for (i = 0; i < data->current_loop->num_nodes; i++) 2096 { 2097 edge_iterator ei; 2098 bb = body[i]; 2099 2100 FOR_EACH_EDGE (e, ei, bb->succs) 2101 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 2102 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 2103 find_interesting_uses_outside (data, e); 2104 2105 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2106 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2107 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2108 if (!is_gimple_debug (gsi_stmt (bsi))) 2109 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2110 } 2111 2112 if (dump_file && (dump_flags & TDF_DETAILS)) 2113 { 2114 bitmap_iterator bi; 2115 2116 fprintf (dump_file, "\n"); 2117 2118 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2119 { 2120 info = ver_info (data, i); 2121 if (info->inv_id) 2122 { 2123 fprintf (dump_file, " "); 2124 print_generic_expr (dump_file, info->name, TDF_SLIM); 2125 fprintf (dump_file, " is invariant (%d)%s\n", 2126 info->inv_id, info->has_nonlin_use ? "" : ", eliminable"); 2127 } 2128 } 2129 2130 fprintf (dump_file, "\n"); 2131 } 2132 2133 free (body); 2134 } 2135 2136 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 2137 is true, assume we are inside an address. If TOP_COMPREF is true, assume 2138 we are at the top-level of the processed address. */ 2139 2140 static tree 2141 strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 2142 HOST_WIDE_INT *offset) 2143 { 2144 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 2145 enum tree_code code; 2146 tree type, orig_type = TREE_TYPE (expr); 2147 HOST_WIDE_INT off0, off1, st; 2148 tree orig_expr = expr; 2149 2150 STRIP_NOPS (expr); 2151 2152 type = TREE_TYPE (expr); 2153 code = TREE_CODE (expr); 2154 *offset = 0; 2155 2156 switch (code) 2157 { 2158 case INTEGER_CST: 2159 if (!cst_and_fits_in_hwi (expr) 2160 || integer_zerop (expr)) 2161 return orig_expr; 2162 2163 *offset = int_cst_value (expr); 2164 return build_int_cst (orig_type, 0); 2165 2166 case POINTER_PLUS_EXPR: 2167 case PLUS_EXPR: 2168 case MINUS_EXPR: 2169 op0 = TREE_OPERAND (expr, 0); 2170 op1 = TREE_OPERAND (expr, 1); 2171 2172 op0 = strip_offset_1 (op0, false, false, &off0); 2173 op1 = strip_offset_1 (op1, false, false, &off1); 2174 2175 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); 2176 if (op0 == TREE_OPERAND (expr, 0) 2177 && op1 == TREE_OPERAND (expr, 1)) 2178 return orig_expr; 2179 2180 if (integer_zerop (op1)) 2181 expr = op0; 2182 else if (integer_zerop (op0)) 2183 { 2184 if (code == MINUS_EXPR) 2185 expr = fold_build1 (NEGATE_EXPR, type, op1); 2186 else 2187 expr = op1; 2188 } 2189 else 2190 expr = fold_build2 (code, type, op0, op1); 2191 2192 return fold_convert (orig_type, expr); 2193 2194 case MULT_EXPR: 2195 op1 = TREE_OPERAND (expr, 1); 2196 if (!cst_and_fits_in_hwi (op1)) 2197 return orig_expr; 2198 2199 op0 = TREE_OPERAND (expr, 0); 2200 op0 = strip_offset_1 (op0, false, false, &off0); 2201 if (op0 == TREE_OPERAND (expr, 0)) 2202 return orig_expr; 2203 2204 *offset = off0 * int_cst_value (op1); 2205 if (integer_zerop (op0)) 2206 expr = op0; 2207 else 2208 expr = fold_build2 (MULT_EXPR, type, op0, op1); 2209 2210 return fold_convert (orig_type, expr); 2211 2212 case ARRAY_REF: 2213 case ARRAY_RANGE_REF: 2214 if (!inside_addr) 2215 return orig_expr; 2216 2217 step = array_ref_element_size (expr); 2218 if (!cst_and_fits_in_hwi (step)) 2219 break; 2220 2221 st = int_cst_value (step); 2222 op1 = TREE_OPERAND (expr, 1); 2223 op1 = strip_offset_1 (op1, false, false, &off1); 2224 *offset = off1 * st; 2225 2226 if (top_compref 2227 && integer_zerop (op1)) 2228 { 2229 /* Strip the component reference completely. */ 2230 op0 = TREE_OPERAND (expr, 0); 2231 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2232 *offset += off0; 2233 return op0; 2234 } 2235 break; 2236 2237 case COMPONENT_REF: 2238 { 2239 tree field; 2240 2241 if (!inside_addr) 2242 return orig_expr; 2243 2244 tmp = component_ref_field_offset (expr); 2245 field = TREE_OPERAND (expr, 1); 2246 if (top_compref 2247 && cst_and_fits_in_hwi (tmp) 2248 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field))) 2249 { 2250 HOST_WIDE_INT boffset, abs_off; 2251 2252 /* Strip the component reference completely. */ 2253 op0 = TREE_OPERAND (expr, 0); 2254 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2255 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field)); 2256 abs_off = abs_hwi (boffset) / BITS_PER_UNIT; 2257 if (boffset < 0) 2258 abs_off = -abs_off; 2259 2260 *offset = off0 + int_cst_value (tmp) + abs_off; 2261 return op0; 2262 } 2263 } 2264 break; 2265 2266 case ADDR_EXPR: 2267 op0 = TREE_OPERAND (expr, 0); 2268 op0 = strip_offset_1 (op0, true, true, &off0); 2269 *offset += off0; 2270 2271 if (op0 == TREE_OPERAND (expr, 0)) 2272 return orig_expr; 2273 2274 expr = build_fold_addr_expr (op0); 2275 return fold_convert (orig_type, expr); 2276 2277 case MEM_REF: 2278 /* ??? Offset operand? */ 2279 inside_addr = false; 2280 break; 2281 2282 default: 2283 return orig_expr; 2284 } 2285 2286 /* Default handling of expressions for that we want to recurse into 2287 the first operand. */ 2288 op0 = TREE_OPERAND (expr, 0); 2289 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 2290 *offset += off0; 2291 2292 if (op0 == TREE_OPERAND (expr, 0) 2293 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 2294 return orig_expr; 2295 2296 expr = copy_node (expr); 2297 TREE_OPERAND (expr, 0) = op0; 2298 if (op1) 2299 TREE_OPERAND (expr, 1) = op1; 2300 2301 /* Inside address, we might strip the top level component references, 2302 thus changing type of the expression. Handling of ADDR_EXPR 2303 will fix that. */ 2304 expr = fold_convert (orig_type, expr); 2305 2306 return expr; 2307 } 2308 2309 /* Strips constant offsets from EXPR and stores them to OFFSET. */ 2310 2311 static tree 2312 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset) 2313 { 2314 HOST_WIDE_INT off; 2315 tree core = strip_offset_1 (expr, false, false, &off); 2316 *offset = off; 2317 return core; 2318 } 2319 2320 /* Returns variant of TYPE that can be used as base for different uses. 2321 We return unsigned type with the same precision, which avoids problems 2322 with overflows. */ 2323 2324 static tree 2325 generic_type_for (tree type) 2326 { 2327 if (POINTER_TYPE_P (type)) 2328 return unsigned_type_for (type); 2329 2330 if (TYPE_UNSIGNED (type)) 2331 return type; 2332 2333 return unsigned_type_for (type); 2334 } 2335 2336 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains 2337 the bitmap to that we should store it. */ 2338 2339 static struct ivopts_data *fd_ivopts_data; 2340 static tree 2341 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 2342 { 2343 bitmap *depends_on = (bitmap *) data; 2344 struct version_info *info; 2345 2346 if (TREE_CODE (*expr_p) != SSA_NAME) 2347 return NULL_TREE; 2348 info = name_info (fd_ivopts_data, *expr_p); 2349 2350 if (!info->inv_id || info->has_nonlin_use) 2351 return NULL_TREE; 2352 2353 if (!*depends_on) 2354 *depends_on = BITMAP_ALLOC (NULL); 2355 bitmap_set_bit (*depends_on, info->inv_id); 2356 2357 return NULL_TREE; 2358 } 2359 2360 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2361 position to POS. If USE is not NULL, the candidate is set as related to 2362 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 2363 replacement of the final value of the iv by a direct computation. */ 2364 2365 static struct iv_cand * 2366 add_candidate_1 (struct ivopts_data *data, 2367 tree base, tree step, bool important, enum iv_position pos, 2368 struct iv_use *use, gimple incremented_at) 2369 { 2370 unsigned i; 2371 struct iv_cand *cand = NULL; 2372 tree type, orig_type; 2373 2374 /* For non-original variables, make sure their values are computed in a type 2375 that does not invoke undefined behavior on overflows (since in general, 2376 we cannot prove that these induction variables are non-wrapping). */ 2377 if (pos != IP_ORIGINAL) 2378 { 2379 orig_type = TREE_TYPE (base); 2380 type = generic_type_for (orig_type); 2381 if (type != orig_type) 2382 { 2383 base = fold_convert (type, base); 2384 step = fold_convert (type, step); 2385 } 2386 } 2387 2388 for (i = 0; i < n_iv_cands (data); i++) 2389 { 2390 cand = iv_cand (data, i); 2391 2392 if (cand->pos != pos) 2393 continue; 2394 2395 if (cand->incremented_at != incremented_at 2396 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2397 && cand->ainc_use != use)) 2398 continue; 2399 2400 if (!cand->iv) 2401 { 2402 if (!base && !step) 2403 break; 2404 2405 continue; 2406 } 2407 2408 if (!base && !step) 2409 continue; 2410 2411 if (operand_equal_p (base, cand->iv->base, 0) 2412 && operand_equal_p (step, cand->iv->step, 0) 2413 && (TYPE_PRECISION (TREE_TYPE (base)) 2414 == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) 2415 break; 2416 } 2417 2418 if (i == n_iv_cands (data)) 2419 { 2420 cand = XCNEW (struct iv_cand); 2421 cand->id = i; 2422 2423 if (!base && !step) 2424 cand->iv = NULL; 2425 else 2426 cand->iv = alloc_iv (base, step); 2427 2428 cand->pos = pos; 2429 if (pos != IP_ORIGINAL && cand->iv) 2430 { 2431 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 2432 cand->var_after = cand->var_before; 2433 } 2434 cand->important = important; 2435 cand->incremented_at = incremented_at; 2436 data->iv_candidates.safe_push (cand); 2437 2438 if (step 2439 && TREE_CODE (step) != INTEGER_CST) 2440 { 2441 fd_ivopts_data = data; 2442 walk_tree (&step, find_depends, &cand->depends_on, NULL); 2443 } 2444 2445 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2446 cand->ainc_use = use; 2447 else 2448 cand->ainc_use = NULL; 2449 2450 if (dump_file && (dump_flags & TDF_DETAILS)) 2451 dump_cand (dump_file, cand); 2452 } 2453 2454 if (important && !cand->important) 2455 { 2456 cand->important = true; 2457 if (dump_file && (dump_flags & TDF_DETAILS)) 2458 fprintf (dump_file, "Candidate %d is important\n", cand->id); 2459 } 2460 2461 if (use) 2462 { 2463 bitmap_set_bit (use->related_cands, i); 2464 if (dump_file && (dump_flags & TDF_DETAILS)) 2465 fprintf (dump_file, "Candidate %d is related to use %d\n", 2466 cand->id, use->id); 2467 } 2468 2469 return cand; 2470 } 2471 2472 /* Returns true if incrementing the induction variable at the end of the LOOP 2473 is allowed. 2474 2475 The purpose is to avoid splitting latch edge with a biv increment, thus 2476 creating a jump, possibly confusing other optimization passes and leaving 2477 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS 2478 is not available (so we do not have a better alternative), or if the latch 2479 edge is already nonempty. */ 2480 2481 static bool 2482 allow_ip_end_pos_p (struct loop *loop) 2483 { 2484 if (!ip_normal_pos (loop)) 2485 return true; 2486 2487 if (!empty_block_p (ip_end_pos (loop))) 2488 return true; 2489 2490 return false; 2491 } 2492 2493 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. 2494 Important field is set to IMPORTANT. */ 2495 2496 static void 2497 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, 2498 bool important, struct iv_use *use) 2499 { 2500 basic_block use_bb = gimple_bb (use->stmt); 2501 machine_mode mem_mode; 2502 unsigned HOST_WIDE_INT cstepi; 2503 2504 /* If we insert the increment in any position other than the standard 2505 ones, we must ensure that it is incremented once per iteration. 2506 It must not be in an inner nested loop, or one side of an if 2507 statement. */ 2508 if (use_bb->loop_father != data->current_loop 2509 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) 2510 || stmt_could_throw_p (use->stmt) 2511 || !cst_and_fits_in_hwi (step)) 2512 return; 2513 2514 cstepi = int_cst_value (step); 2515 2516 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); 2517 if (((USE_LOAD_PRE_INCREMENT (mem_mode) 2518 || USE_STORE_PRE_INCREMENT (mem_mode)) 2519 && GET_MODE_SIZE (mem_mode) == cstepi) 2520 || ((USE_LOAD_PRE_DECREMENT (mem_mode) 2521 || USE_STORE_PRE_DECREMENT (mem_mode)) 2522 && GET_MODE_SIZE (mem_mode) == -cstepi)) 2523 { 2524 enum tree_code code = MINUS_EXPR; 2525 tree new_base; 2526 tree new_step = step; 2527 2528 if (POINTER_TYPE_P (TREE_TYPE (base))) 2529 { 2530 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 2531 code = POINTER_PLUS_EXPR; 2532 } 2533 else 2534 new_step = fold_convert (TREE_TYPE (base), new_step); 2535 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); 2536 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, 2537 use->stmt); 2538 } 2539 if (((USE_LOAD_POST_INCREMENT (mem_mode) 2540 || USE_STORE_POST_INCREMENT (mem_mode)) 2541 && GET_MODE_SIZE (mem_mode) == cstepi) 2542 || ((USE_LOAD_POST_DECREMENT (mem_mode) 2543 || USE_STORE_POST_DECREMENT (mem_mode)) 2544 && GET_MODE_SIZE (mem_mode) == -cstepi)) 2545 { 2546 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, 2547 use->stmt); 2548 } 2549 } 2550 2551 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2552 position to POS. If USE is not NULL, the candidate is set as related to 2553 it. The candidate computation is scheduled on all available positions. */ 2554 2555 static void 2556 add_candidate (struct ivopts_data *data, 2557 tree base, tree step, bool important, struct iv_use *use) 2558 { 2559 if (ip_normal_pos (data->current_loop)) 2560 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL); 2561 if (ip_end_pos (data->current_loop) 2562 && allow_ip_end_pos_p (data->current_loop)) 2563 add_candidate_1 (data, base, step, important, IP_END, use, NULL); 2564 2565 if (use != NULL && use->type == USE_ADDRESS) 2566 add_autoinc_candidates (data, base, step, important, use); 2567 } 2568 2569 /* Adds standard iv candidates. */ 2570 2571 static void 2572 add_standard_iv_candidates (struct ivopts_data *data) 2573 { 2574 add_candidate (data, integer_zero_node, integer_one_node, true, NULL); 2575 2576 /* The same for a double-integer type if it is still fast enough. */ 2577 if (TYPE_PRECISION 2578 (long_integer_type_node) > TYPE_PRECISION (integer_type_node) 2579 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD) 2580 add_candidate (data, build_int_cst (long_integer_type_node, 0), 2581 build_int_cst (long_integer_type_node, 1), true, NULL); 2582 2583 /* The same for a double-integer type if it is still fast enough. */ 2584 if (TYPE_PRECISION 2585 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node) 2586 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD) 2587 add_candidate (data, build_int_cst (long_long_integer_type_node, 0), 2588 build_int_cst (long_long_integer_type_node, 1), true, NULL); 2589 } 2590 2591 2592 /* Adds candidates bases on the old induction variable IV. */ 2593 2594 static void 2595 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv) 2596 { 2597 gimple phi; 2598 tree def; 2599 struct iv_cand *cand; 2600 2601 add_candidate (data, iv->base, iv->step, true, NULL); 2602 2603 /* The same, but with initial value zero. */ 2604 if (POINTER_TYPE_P (TREE_TYPE (iv->base))) 2605 add_candidate (data, size_int (0), iv->step, true, NULL); 2606 else 2607 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 2608 iv->step, true, NULL); 2609 2610 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 2611 if (gimple_code (phi) == GIMPLE_PHI) 2612 { 2613 /* Additionally record the possibility of leaving the original iv 2614 untouched. */ 2615 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 2616 /* Don't add candidate if it's from another PHI node because 2617 it's an affine iv appearing in the form of PEELED_CHREC. */ 2618 phi = SSA_NAME_DEF_STMT (def); 2619 if (gimple_code (phi) != GIMPLE_PHI) 2620 { 2621 cand = add_candidate_1 (data, 2622 iv->base, iv->step, true, IP_ORIGINAL, NULL, 2623 SSA_NAME_DEF_STMT (def)); 2624 cand->var_before = iv->ssa_name; 2625 cand->var_after = def; 2626 } 2627 else 2628 gcc_assert (gimple_bb (phi) == data->current_loop->header); 2629 } 2630 } 2631 2632 /* Adds candidates based on the old induction variables. */ 2633 2634 static void 2635 add_old_ivs_candidates (struct ivopts_data *data) 2636 { 2637 unsigned i; 2638 struct iv *iv; 2639 bitmap_iterator bi; 2640 2641 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2642 { 2643 iv = ver_info (data, i)->iv; 2644 if (iv && iv->biv_p && !integer_zerop (iv->step)) 2645 add_old_iv_candidates (data, iv); 2646 } 2647 } 2648 2649 /* Adds candidates based on the value of the induction variable IV and USE. */ 2650 2651 static void 2652 add_iv_value_candidates (struct ivopts_data *data, 2653 struct iv *iv, struct iv_use *use) 2654 { 2655 unsigned HOST_WIDE_INT offset; 2656 tree base; 2657 tree basetype; 2658 2659 add_candidate (data, iv->base, iv->step, false, use); 2660 2661 /* The same, but with initial value zero. Make such variable important, 2662 since it is generic enough so that possibly many uses may be based 2663 on it. */ 2664 basetype = TREE_TYPE (iv->base); 2665 if (POINTER_TYPE_P (basetype)) 2666 basetype = sizetype; 2667 add_candidate (data, build_int_cst (basetype, 0), 2668 iv->step, true, use); 2669 2670 /* Third, try removing the constant offset. Make sure to even 2671 add a candidate for &a[0] vs. (T *)&a. */ 2672 base = strip_offset (iv->base, &offset); 2673 if (offset 2674 || base != iv->base) 2675 add_candidate (data, base, iv->step, false, use); 2676 } 2677 2678 /* Adds candidates based on the uses. */ 2679 2680 static void 2681 add_derived_ivs_candidates (struct ivopts_data *data) 2682 { 2683 unsigned i; 2684 2685 for (i = 0; i < n_iv_uses (data); i++) 2686 { 2687 struct iv_use *use = iv_use (data, i); 2688 2689 if (!use) 2690 continue; 2691 2692 switch (use->type) 2693 { 2694 case USE_NONLINEAR_EXPR: 2695 case USE_COMPARE: 2696 case USE_ADDRESS: 2697 /* Just add the ivs based on the value of the iv used here. */ 2698 add_iv_value_candidates (data, use->iv, use); 2699 break; 2700 2701 default: 2702 gcc_unreachable (); 2703 } 2704 } 2705 } 2706 2707 /* Record important candidates and add them to related_cands bitmaps 2708 if needed. */ 2709 2710 static void 2711 record_important_candidates (struct ivopts_data *data) 2712 { 2713 unsigned i; 2714 struct iv_use *use; 2715 2716 for (i = 0; i < n_iv_cands (data); i++) 2717 { 2718 struct iv_cand *cand = iv_cand (data, i); 2719 2720 if (cand->important) 2721 bitmap_set_bit (data->important_candidates, i); 2722 } 2723 2724 data->consider_all_candidates = (n_iv_cands (data) 2725 <= CONSIDER_ALL_CANDIDATES_BOUND); 2726 2727 if (data->consider_all_candidates) 2728 { 2729 /* We will not need "related_cands" bitmaps in this case, 2730 so release them to decrease peak memory consumption. */ 2731 for (i = 0; i < n_iv_uses (data); i++) 2732 { 2733 use = iv_use (data, i); 2734 BITMAP_FREE (use->related_cands); 2735 } 2736 } 2737 else 2738 { 2739 /* Add important candidates to the related_cands bitmaps. */ 2740 for (i = 0; i < n_iv_uses (data); i++) 2741 bitmap_ior_into (iv_use (data, i)->related_cands, 2742 data->important_candidates); 2743 } 2744 } 2745 2746 /* Allocates the data structure mapping the (use, candidate) pairs to costs. 2747 If consider_all_candidates is true, we use a two-dimensional array, otherwise 2748 we allocate a simple list to every use. */ 2749 2750 static void 2751 alloc_use_cost_map (struct ivopts_data *data) 2752 { 2753 unsigned i, size, s; 2754 2755 for (i = 0; i < n_iv_uses (data); i++) 2756 { 2757 struct iv_use *use = iv_use (data, i); 2758 2759 if (data->consider_all_candidates) 2760 size = n_iv_cands (data); 2761 else 2762 { 2763 s = bitmap_count_bits (use->related_cands); 2764 2765 /* Round up to the power of two, so that moduling by it is fast. */ 2766 size = s ? (1 << ceil_log2 (s)) : 1; 2767 } 2768 2769 use->n_map_members = size; 2770 use->cost_map = XCNEWVEC (struct cost_pair, size); 2771 } 2772 } 2773 2774 /* Returns description of computation cost of expression whose runtime 2775 cost is RUNTIME and complexity corresponds to COMPLEXITY. */ 2776 2777 static comp_cost 2778 new_cost (unsigned runtime, unsigned complexity) 2779 { 2780 comp_cost cost; 2781 2782 cost.cost = runtime; 2783 cost.complexity = complexity; 2784 2785 return cost; 2786 } 2787 2788 /* Adds costs COST1 and COST2. */ 2789 2790 static comp_cost 2791 add_costs (comp_cost cost1, comp_cost cost2) 2792 { 2793 cost1.cost += cost2.cost; 2794 cost1.complexity += cost2.complexity; 2795 2796 return cost1; 2797 } 2798 /* Subtracts costs COST1 and COST2. */ 2799 2800 static comp_cost 2801 sub_costs (comp_cost cost1, comp_cost cost2) 2802 { 2803 cost1.cost -= cost2.cost; 2804 cost1.complexity -= cost2.complexity; 2805 2806 return cost1; 2807 } 2808 2809 /* Returns a negative number if COST1 < COST2, a positive number if 2810 COST1 > COST2, and 0 if COST1 = COST2. */ 2811 2812 static int 2813 compare_costs (comp_cost cost1, comp_cost cost2) 2814 { 2815 if (cost1.cost == cost2.cost) 2816 return cost1.complexity - cost2.complexity; 2817 2818 return cost1.cost - cost2.cost; 2819 } 2820 2821 /* Returns true if COST is infinite. */ 2822 2823 static bool 2824 infinite_cost_p (comp_cost cost) 2825 { 2826 return cost.cost == INFTY; 2827 } 2828 2829 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends 2830 on invariants DEPENDS_ON and that the value used in expressing it 2831 is VALUE, and in case of iv elimination the comparison operator is COMP. */ 2832 2833 static void 2834 set_use_iv_cost (struct ivopts_data *data, 2835 struct iv_use *use, struct iv_cand *cand, 2836 comp_cost cost, bitmap depends_on, tree value, 2837 enum tree_code comp, int inv_expr_id) 2838 { 2839 unsigned i, s; 2840 2841 if (infinite_cost_p (cost)) 2842 { 2843 BITMAP_FREE (depends_on); 2844 return; 2845 } 2846 2847 if (data->consider_all_candidates) 2848 { 2849 use->cost_map[cand->id].cand = cand; 2850 use->cost_map[cand->id].cost = cost; 2851 use->cost_map[cand->id].depends_on = depends_on; 2852 use->cost_map[cand->id].value = value; 2853 use->cost_map[cand->id].comp = comp; 2854 use->cost_map[cand->id].inv_expr_id = inv_expr_id; 2855 return; 2856 } 2857 2858 /* n_map_members is a power of two, so this computes modulo. */ 2859 s = cand->id & (use->n_map_members - 1); 2860 for (i = s; i < use->n_map_members; i++) 2861 if (!use->cost_map[i].cand) 2862 goto found; 2863 for (i = 0; i < s; i++) 2864 if (!use->cost_map[i].cand) 2865 goto found; 2866 2867 gcc_unreachable (); 2868 2869 found: 2870 use->cost_map[i].cand = cand; 2871 use->cost_map[i].cost = cost; 2872 use->cost_map[i].depends_on = depends_on; 2873 use->cost_map[i].value = value; 2874 use->cost_map[i].comp = comp; 2875 use->cost_map[i].inv_expr_id = inv_expr_id; 2876 } 2877 2878 /* Gets cost of (USE, CANDIDATE) pair. */ 2879 2880 static struct cost_pair * 2881 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use, 2882 struct iv_cand *cand) 2883 { 2884 unsigned i, s; 2885 struct cost_pair *ret; 2886 2887 if (!cand) 2888 return NULL; 2889 2890 if (data->consider_all_candidates) 2891 { 2892 ret = use->cost_map + cand->id; 2893 if (!ret->cand) 2894 return NULL; 2895 2896 return ret; 2897 } 2898 2899 /* n_map_members is a power of two, so this computes modulo. */ 2900 s = cand->id & (use->n_map_members - 1); 2901 for (i = s; i < use->n_map_members; i++) 2902 if (use->cost_map[i].cand == cand) 2903 return use->cost_map + i; 2904 else if (use->cost_map[i].cand == NULL) 2905 return NULL; 2906 for (i = 0; i < s; i++) 2907 if (use->cost_map[i].cand == cand) 2908 return use->cost_map + i; 2909 else if (use->cost_map[i].cand == NULL) 2910 return NULL; 2911 2912 return NULL; 2913 } 2914 2915 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 2916 static rtx 2917 produce_memory_decl_rtl (tree obj, int *regno) 2918 { 2919 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); 2920 machine_mode address_mode = targetm.addr_space.address_mode (as); 2921 rtx x; 2922 2923 gcc_assert (obj); 2924 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 2925 { 2926 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 2927 x = gen_rtx_SYMBOL_REF (address_mode, name); 2928 SET_SYMBOL_REF_DECL (x, obj); 2929 x = gen_rtx_MEM (DECL_MODE (obj), x); 2930 set_mem_addr_space (x, as); 2931 targetm.encode_section_info (obj, x, true); 2932 } 2933 else 2934 { 2935 x = gen_raw_REG (address_mode, (*regno)++); 2936 x = gen_rtx_MEM (DECL_MODE (obj), x); 2937 set_mem_addr_space (x, as); 2938 } 2939 2940 return x; 2941 } 2942 2943 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 2944 walk_tree. DATA contains the actual fake register number. */ 2945 2946 static tree 2947 prepare_decl_rtl (tree *expr_p, int *ws, void *data) 2948 { 2949 tree obj = NULL_TREE; 2950 rtx x = NULL_RTX; 2951 int *regno = (int *) data; 2952 2953 switch (TREE_CODE (*expr_p)) 2954 { 2955 case ADDR_EXPR: 2956 for (expr_p = &TREE_OPERAND (*expr_p, 0); 2957 handled_component_p (*expr_p); 2958 expr_p = &TREE_OPERAND (*expr_p, 0)) 2959 continue; 2960 obj = *expr_p; 2961 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) 2962 x = produce_memory_decl_rtl (obj, regno); 2963 break; 2964 2965 case SSA_NAME: 2966 *ws = 0; 2967 obj = SSA_NAME_VAR (*expr_p); 2968 /* Defer handling of anonymous SSA_NAMEs to the expander. */ 2969 if (!obj) 2970 return NULL_TREE; 2971 if (!DECL_RTL_SET_P (obj)) 2972 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2973 break; 2974 2975 case VAR_DECL: 2976 case PARM_DECL: 2977 case RESULT_DECL: 2978 *ws = 0; 2979 obj = *expr_p; 2980 2981 if (DECL_RTL_SET_P (obj)) 2982 break; 2983 2984 if (DECL_MODE (obj) == BLKmode) 2985 x = produce_memory_decl_rtl (obj, regno); 2986 else 2987 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2988 2989 break; 2990 2991 default: 2992 break; 2993 } 2994 2995 if (x) 2996 { 2997 decl_rtl_to_reset.safe_push (obj); 2998 SET_DECL_RTL (obj, x); 2999 } 3000 3001 return NULL_TREE; 3002 } 3003 3004 /* Determines cost of the computation of EXPR. */ 3005 3006 static unsigned 3007 computation_cost (tree expr, bool speed) 3008 { 3009 rtx_insn *seq; 3010 rtx rslt; 3011 tree type = TREE_TYPE (expr); 3012 unsigned cost; 3013 /* Avoid using hard regs in ways which may be unsupported. */ 3014 int regno = LAST_VIRTUAL_REGISTER + 1; 3015 struct cgraph_node *node = cgraph_node::get (current_function_decl); 3016 enum node_frequency real_frequency = node->frequency; 3017 3018 node->frequency = NODE_FREQUENCY_NORMAL; 3019 crtl->maybe_hot_insn_p = speed; 3020 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 3021 start_sequence (); 3022 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 3023 seq = get_insns (); 3024 end_sequence (); 3025 default_rtl_profile (); 3026 node->frequency = real_frequency; 3027 3028 cost = seq_cost (seq, speed); 3029 if (MEM_P (rslt)) 3030 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), 3031 TYPE_ADDR_SPACE (type), speed); 3032 else if (!REG_P (rslt)) 3033 cost += set_src_cost (rslt, speed); 3034 3035 return cost; 3036 } 3037 3038 /* Returns variable containing the value of candidate CAND at statement AT. */ 3039 3040 static tree 3041 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt) 3042 { 3043 if (stmt_after_increment (loop, cand, stmt)) 3044 return cand->var_after; 3045 else 3046 return cand->var_before; 3047 } 3048 3049 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the 3050 same precision that is at least as wide as the precision of TYPE, stores 3051 BA to A and BB to B, and returns the type of BA. Otherwise, returns the 3052 type of A and B. */ 3053 3054 static tree 3055 determine_common_wider_type (tree *a, tree *b) 3056 { 3057 tree wider_type = NULL; 3058 tree suba, subb; 3059 tree atype = TREE_TYPE (*a); 3060 3061 if (CONVERT_EXPR_P (*a)) 3062 { 3063 suba = TREE_OPERAND (*a, 0); 3064 wider_type = TREE_TYPE (suba); 3065 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) 3066 return atype; 3067 } 3068 else 3069 return atype; 3070 3071 if (CONVERT_EXPR_P (*b)) 3072 { 3073 subb = TREE_OPERAND (*b, 0); 3074 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) 3075 return atype; 3076 } 3077 else 3078 return atype; 3079 3080 *a = suba; 3081 *b = subb; 3082 return wider_type; 3083 } 3084 3085 /* Determines the expression by that USE is expressed from induction variable 3086 CAND at statement AT in LOOP. The expression is stored in a decomposed 3087 form into AFF. Returns false if USE cannot be expressed using CAND. */ 3088 3089 static bool 3090 get_computation_aff (struct loop *loop, 3091 struct iv_use *use, struct iv_cand *cand, gimple at, 3092 struct aff_tree *aff) 3093 { 3094 tree ubase = use->iv->base; 3095 tree ustep = use->iv->step; 3096 tree cbase = cand->iv->base; 3097 tree cstep = cand->iv->step, cstep_common; 3098 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 3099 tree common_type, var; 3100 tree uutype; 3101 aff_tree cbase_aff, var_aff; 3102 widest_int rat; 3103 3104 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3105 { 3106 /* We do not have a precision to express the values of use. */ 3107 return false; 3108 } 3109 3110 var = var_at_stmt (loop, cand, at); 3111 uutype = unsigned_type_for (utype); 3112 3113 /* If the conversion is not noop, perform it. */ 3114 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3115 { 3116 cstep = fold_convert (uutype, cstep); 3117 cbase = fold_convert (uutype, cbase); 3118 var = fold_convert (uutype, var); 3119 } 3120 3121 /* Ratio is 1 when computing the value of biv cand by itself. 3122 We can't rely on constant_multiple_of in this case because the 3123 use is created after the original biv is selected. The call 3124 could fail because of inconsistent fold behavior. See PR68021 3125 for more information. */ 3126 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) 3127 { 3128 gcc_assert (is_gimple_assign (use->stmt)); 3129 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 3130 rat = 1; 3131 } 3132 else if (!constant_multiple_of (ustep, cstep, &rat)) 3133 return false; 3134 3135 /* In case both UBASE and CBASE are shortened to UUTYPE from some common 3136 type, we achieve better folding by computing their difference in this 3137 wider type, and cast the result to UUTYPE. We do not need to worry about 3138 overflows, as all the arithmetics will in the end be performed in UUTYPE 3139 anyway. */ 3140 common_type = determine_common_wider_type (&ubase, &cbase); 3141 3142 /* use = ubase - ratio * cbase + ratio * var. */ 3143 tree_to_aff_combination (ubase, common_type, aff); 3144 tree_to_aff_combination (cbase, common_type, &cbase_aff); 3145 tree_to_aff_combination (var, uutype, &var_aff); 3146 3147 /* We need to shift the value if we are after the increment. */ 3148 if (stmt_after_increment (loop, cand, at)) 3149 { 3150 aff_tree cstep_aff; 3151 3152 if (common_type != uutype) 3153 cstep_common = fold_convert (common_type, cstep); 3154 else 3155 cstep_common = cstep; 3156 3157 tree_to_aff_combination (cstep_common, common_type, &cstep_aff); 3158 aff_combination_add (&cbase_aff, &cstep_aff); 3159 } 3160 3161 aff_combination_scale (&cbase_aff, -rat); 3162 aff_combination_add (aff, &cbase_aff); 3163 if (common_type != uutype) 3164 aff_combination_convert (aff, uutype); 3165 3166 aff_combination_scale (&var_aff, rat); 3167 aff_combination_add (aff, &var_aff); 3168 3169 return true; 3170 } 3171 3172 /* Return the type of USE. */ 3173 3174 static tree 3175 get_use_type (struct iv_use *use) 3176 { 3177 tree base_type = TREE_TYPE (use->iv->base); 3178 tree type; 3179 3180 if (use->type == USE_ADDRESS) 3181 { 3182 /* The base_type may be a void pointer. Create a pointer type based on 3183 the mem_ref instead. */ 3184 type = build_pointer_type (TREE_TYPE (*use->op_p)); 3185 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type)) 3186 == TYPE_ADDR_SPACE (TREE_TYPE (base_type))); 3187 } 3188 else 3189 type = base_type; 3190 3191 return type; 3192 } 3193 3194 /* Determines the expression by that USE is expressed from induction variable 3195 CAND at statement AT in LOOP. The computation is unshared. */ 3196 3197 static tree 3198 get_computation_at (struct loop *loop, 3199 struct iv_use *use, struct iv_cand *cand, gimple at) 3200 { 3201 aff_tree aff; 3202 tree type = get_use_type (use); 3203 3204 if (!get_computation_aff (loop, use, cand, at, &aff)) 3205 return NULL_TREE; 3206 unshare_aff_combination (&aff); 3207 return fold_convert (type, aff_combination_to_tree (&aff)); 3208 } 3209 3210 /* Determines the expression by that USE is expressed from induction variable 3211 CAND in LOOP. The computation is unshared. */ 3212 3213 static tree 3214 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) 3215 { 3216 return get_computation_at (loop, use, cand, use->stmt); 3217 } 3218 3219 /* Adjust the cost COST for being in loop setup rather than loop body. 3220 If we're optimizing for space, the loop setup overhead is constant; 3221 if we're optimizing for speed, amortize it over the per-iteration cost. */ 3222 static unsigned 3223 adjust_setup_cost (struct ivopts_data *data, unsigned cost) 3224 { 3225 if (cost == INFTY) 3226 return cost; 3227 else if (optimize_loop_for_speed_p (data->current_loop)) 3228 return cost / avg_loop_niter (data->current_loop); 3229 else 3230 return cost; 3231 } 3232 3233 /* Returns true if multiplying by RATIO is allowed in an address. Test the 3234 validity for a memory reference accessing memory of mode MODE in 3235 address space AS. */ 3236 3237 3238 bool 3239 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, 3240 addr_space_t as) 3241 { 3242 #define MAX_RATIO 128 3243 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode; 3244 static vec<sbitmap> valid_mult_list; 3245 sbitmap valid_mult; 3246 3247 if (data_index >= valid_mult_list.length ()) 3248 valid_mult_list.safe_grow_cleared (data_index + 1); 3249 3250 valid_mult = valid_mult_list[data_index]; 3251 if (!valid_mult) 3252 { 3253 machine_mode address_mode = targetm.addr_space.address_mode (as); 3254 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3255 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); 3256 rtx addr, scaled; 3257 HOST_WIDE_INT i; 3258 3259 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1); 3260 bitmap_clear (valid_mult); 3261 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX); 3262 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2); 3263 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3264 { 3265 XEXP (scaled, 1) = gen_int_mode (i, address_mode); 3266 if (memory_address_addr_space_p (mode, addr, as) 3267 || memory_address_addr_space_p (mode, scaled, as)) 3268 bitmap_set_bit (valid_mult, i + MAX_RATIO); 3269 } 3270 3271 if (dump_file && (dump_flags & TDF_DETAILS)) 3272 { 3273 fprintf (dump_file, " allowed multipliers:"); 3274 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3275 if (bitmap_bit_p (valid_mult, i + MAX_RATIO)) 3276 fprintf (dump_file, " %d", (int) i); 3277 fprintf (dump_file, "\n"); 3278 fprintf (dump_file, "\n"); 3279 } 3280 3281 valid_mult_list[data_index] = valid_mult; 3282 } 3283 3284 if (ratio > MAX_RATIO || ratio < -MAX_RATIO) 3285 return false; 3286 3287 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO); 3288 } 3289 3290 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index. 3291 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false, 3292 variable is omitted. Compute the cost for a memory reference that accesses 3293 a memory location of mode MEM_MODE in address space AS. 3294 3295 MAY_AUTOINC is set to true if the autoincrement (increasing index by 3296 size of MEM_MODE / RATIO) is available. To make this determination, we 3297 look at the size of the increment to be made, which is given in CSTEP. 3298 CSTEP may be zero if the step is unknown. 3299 STMT_AFTER_INC is true iff the statement we're looking at is after the 3300 increment of the original biv. 3301 3302 TODO -- there must be some better way. This all is quite crude. */ 3303 3304 enum ainc_type 3305 { 3306 AINC_PRE_INC, /* Pre increment. */ 3307 AINC_PRE_DEC, /* Pre decrement. */ 3308 AINC_POST_INC, /* Post increment. */ 3309 AINC_POST_DEC, /* Post decrement. */ 3310 AINC_NONE /* Also the number of auto increment types. */ 3311 }; 3312 3313 typedef struct address_cost_data_s 3314 { 3315 HOST_WIDE_INT min_offset, max_offset; 3316 unsigned costs[2][2][2][2]; 3317 unsigned ainc_costs[AINC_NONE]; 3318 } *address_cost_data; 3319 3320 3321 static comp_cost 3322 get_address_cost (bool symbol_present, bool var_present, 3323 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio, 3324 HOST_WIDE_INT cstep, machine_mode mem_mode, 3325 addr_space_t as, bool speed, 3326 bool stmt_after_inc, bool *may_autoinc) 3327 { 3328 machine_mode address_mode = targetm.addr_space.address_mode (as); 3329 static vec<address_cost_data> address_cost_data_list; 3330 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode; 3331 address_cost_data data; 3332 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE]; 3333 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE]; 3334 unsigned cost, acost, complexity; 3335 enum ainc_type autoinc_type; 3336 bool offset_p, ratio_p, autoinc; 3337 HOST_WIDE_INT s_offset, autoinc_offset, msize; 3338 unsigned HOST_WIDE_INT mask; 3339 unsigned bits; 3340 3341 if (data_index >= address_cost_data_list.length ()) 3342 address_cost_data_list.safe_grow_cleared (data_index + 1); 3343 3344 data = address_cost_data_list[data_index]; 3345 if (!data) 3346 { 3347 HOST_WIDE_INT i; 3348 HOST_WIDE_INT rat, off = 0; 3349 int old_cse_not_expected, width; 3350 unsigned sym_p, var_p, off_p, rat_p, add_c; 3351 rtx_insn *seq; 3352 rtx addr, base; 3353 rtx reg0, reg1; 3354 3355 data = (address_cost_data) xcalloc (1, sizeof (*data)); 3356 3357 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3358 3359 width = GET_MODE_BITSIZE (address_mode) - 1; 3360 if (width > (HOST_BITS_PER_WIDE_INT - 1)) 3361 width = HOST_BITS_PER_WIDE_INT - 1; 3362 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); 3363 3364 for (i = width; i >= 0; i--) 3365 { 3366 off = -((unsigned HOST_WIDE_INT) 1 << i); 3367 XEXP (addr, 1) = gen_int_mode (off, address_mode); 3368 if (memory_address_addr_space_p (mem_mode, addr, as)) 3369 break; 3370 } 3371 data->min_offset = (i == -1? 0 : off); 3372 3373 for (i = width; i >= 0; i--) 3374 { 3375 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1; 3376 XEXP (addr, 1) = gen_int_mode (off, address_mode); 3377 if (memory_address_addr_space_p (mem_mode, addr, as)) 3378 break; 3379 /* For some strict-alignment targets, the offset must be naturally 3380 aligned. Try an aligned offset if mem_mode is not QImode. */ 3381 off = mem_mode != QImode 3382 ? ((unsigned HOST_WIDE_INT) 1 << i) 3383 - GET_MODE_SIZE (mem_mode) 3384 : 0; 3385 if (off > 0) 3386 { 3387 XEXP (addr, 1) = gen_int_mode (off, address_mode); 3388 if (memory_address_addr_space_p (mem_mode, addr, as)) 3389 break; 3390 } 3391 } 3392 if (i == -1) 3393 off = 0; 3394 data->max_offset = off; 3395 3396 if (dump_file && (dump_flags & TDF_DETAILS)) 3397 { 3398 fprintf (dump_file, "get_address_cost:\n"); 3399 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n", 3400 GET_MODE_NAME (mem_mode), 3401 data->min_offset); 3402 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n", 3403 GET_MODE_NAME (mem_mode), 3404 data->max_offset); 3405 } 3406 3407 rat = 1; 3408 for (i = 2; i <= MAX_RATIO; i++) 3409 if (multiplier_allowed_in_address_p (i, mem_mode, as)) 3410 { 3411 rat = i; 3412 break; 3413 } 3414 3415 /* Compute the cost of various addressing modes. */ 3416 acost = 0; 3417 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3418 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); 3419 3420 if (USE_LOAD_PRE_DECREMENT (mem_mode) 3421 || USE_STORE_PRE_DECREMENT (mem_mode)) 3422 { 3423 addr = gen_rtx_PRE_DEC (address_mode, reg0); 3424 has_predec[mem_mode] 3425 = memory_address_addr_space_p (mem_mode, addr, as); 3426 3427 if (has_predec[mem_mode]) 3428 data->ainc_costs[AINC_PRE_DEC] 3429 = address_cost (addr, mem_mode, as, speed); 3430 } 3431 if (USE_LOAD_POST_DECREMENT (mem_mode) 3432 || USE_STORE_POST_DECREMENT (mem_mode)) 3433 { 3434 addr = gen_rtx_POST_DEC (address_mode, reg0); 3435 has_postdec[mem_mode] 3436 = memory_address_addr_space_p (mem_mode, addr, as); 3437 3438 if (has_postdec[mem_mode]) 3439 data->ainc_costs[AINC_POST_DEC] 3440 = address_cost (addr, mem_mode, as, speed); 3441 } 3442 if (USE_LOAD_PRE_INCREMENT (mem_mode) 3443 || USE_STORE_PRE_DECREMENT (mem_mode)) 3444 { 3445 addr = gen_rtx_PRE_INC (address_mode, reg0); 3446 has_preinc[mem_mode] 3447 = memory_address_addr_space_p (mem_mode, addr, as); 3448 3449 if (has_preinc[mem_mode]) 3450 data->ainc_costs[AINC_PRE_INC] 3451 = address_cost (addr, mem_mode, as, speed); 3452 } 3453 if (USE_LOAD_POST_INCREMENT (mem_mode) 3454 || USE_STORE_POST_INCREMENT (mem_mode)) 3455 { 3456 addr = gen_rtx_POST_INC (address_mode, reg0); 3457 has_postinc[mem_mode] 3458 = memory_address_addr_space_p (mem_mode, addr, as); 3459 3460 if (has_postinc[mem_mode]) 3461 data->ainc_costs[AINC_POST_INC] 3462 = address_cost (addr, mem_mode, as, speed); 3463 } 3464 for (i = 0; i < 16; i++) 3465 { 3466 sym_p = i & 1; 3467 var_p = (i >> 1) & 1; 3468 off_p = (i >> 2) & 1; 3469 rat_p = (i >> 3) & 1; 3470 3471 addr = reg0; 3472 if (rat_p) 3473 addr = gen_rtx_fmt_ee (MULT, address_mode, addr, 3474 gen_int_mode (rat, address_mode)); 3475 3476 if (var_p) 3477 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1); 3478 3479 if (sym_p) 3480 { 3481 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup ("")); 3482 /* ??? We can run into trouble with some backends by presenting 3483 it with symbols which haven't been properly passed through 3484 targetm.encode_section_info. By setting the local bit, we 3485 enhance the probability of things working. */ 3486 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL; 3487 3488 if (off_p) 3489 base = gen_rtx_fmt_e (CONST, address_mode, 3490 gen_rtx_fmt_ee 3491 (PLUS, address_mode, base, 3492 gen_int_mode (off, address_mode))); 3493 } 3494 else if (off_p) 3495 base = gen_int_mode (off, address_mode); 3496 else 3497 base = NULL_RTX; 3498 3499 if (base) 3500 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base); 3501 3502 start_sequence (); 3503 /* To avoid splitting addressing modes, pretend that no cse will 3504 follow. */ 3505 old_cse_not_expected = cse_not_expected; 3506 cse_not_expected = true; 3507 addr = memory_address_addr_space (mem_mode, addr, as); 3508 cse_not_expected = old_cse_not_expected; 3509 seq = get_insns (); 3510 end_sequence (); 3511 3512 acost = seq_cost (seq, speed); 3513 acost += address_cost (addr, mem_mode, as, speed); 3514 3515 if (!acost) 3516 acost = 1; 3517 data->costs[sym_p][var_p][off_p][rat_p] = acost; 3518 } 3519 3520 /* On some targets, it is quite expensive to load symbol to a register, 3521 which makes addresses that contain symbols look much more expensive. 3522 However, the symbol will have to be loaded in any case before the 3523 loop (and quite likely we have it in register already), so it does not 3524 make much sense to penalize them too heavily. So make some final 3525 tweaks for the SYMBOL_PRESENT modes: 3526 3527 If VAR_PRESENT is false, and the mode obtained by changing symbol to 3528 var is cheaper, use this mode with small penalty. 3529 If VAR_PRESENT is true, try whether the mode with 3530 SYMBOL_PRESENT = false is cheaper even with cost of addition, and 3531 if this is the case, use it. */ 3532 add_c = add_cost (speed, address_mode); 3533 for (i = 0; i < 8; i++) 3534 { 3535 var_p = i & 1; 3536 off_p = (i >> 1) & 1; 3537 rat_p = (i >> 2) & 1; 3538 3539 acost = data->costs[0][1][off_p][rat_p] + 1; 3540 if (var_p) 3541 acost += add_c; 3542 3543 if (acost < data->costs[1][var_p][off_p][rat_p]) 3544 data->costs[1][var_p][off_p][rat_p] = acost; 3545 } 3546 3547 if (dump_file && (dump_flags & TDF_DETAILS)) 3548 { 3549 fprintf (dump_file, "Address costs:\n"); 3550 3551 for (i = 0; i < 16; i++) 3552 { 3553 sym_p = i & 1; 3554 var_p = (i >> 1) & 1; 3555 off_p = (i >> 2) & 1; 3556 rat_p = (i >> 3) & 1; 3557 3558 fprintf (dump_file, " "); 3559 if (sym_p) 3560 fprintf (dump_file, "sym + "); 3561 if (var_p) 3562 fprintf (dump_file, "var + "); 3563 if (off_p) 3564 fprintf (dump_file, "cst + "); 3565 if (rat_p) 3566 fprintf (dump_file, "rat * "); 3567 3568 acost = data->costs[sym_p][var_p][off_p][rat_p]; 3569 fprintf (dump_file, "index costs %d\n", acost); 3570 } 3571 if (has_predec[mem_mode] || has_postdec[mem_mode] 3572 || has_preinc[mem_mode] || has_postinc[mem_mode]) 3573 fprintf (dump_file, " May include autoinc/dec\n"); 3574 fprintf (dump_file, "\n"); 3575 } 3576 3577 address_cost_data_list[data_index] = data; 3578 } 3579 3580 bits = GET_MODE_BITSIZE (address_mode); 3581 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1); 3582 offset &= mask; 3583 if ((offset >> (bits - 1) & 1)) 3584 offset |= ~mask; 3585 s_offset = offset; 3586 3587 autoinc = false; 3588 autoinc_type = AINC_NONE; 3589 msize = GET_MODE_SIZE (mem_mode); 3590 autoinc_offset = offset; 3591 if (stmt_after_inc) 3592 autoinc_offset += ratio * cstep; 3593 if (symbol_present || var_present || ratio != 1) 3594 autoinc = false; 3595 else 3596 { 3597 if (has_postinc[mem_mode] && autoinc_offset == 0 3598 && msize == cstep) 3599 autoinc_type = AINC_POST_INC; 3600 else if (has_postdec[mem_mode] && autoinc_offset == 0 3601 && msize == -cstep) 3602 autoinc_type = AINC_POST_DEC; 3603 else if (has_preinc[mem_mode] && autoinc_offset == msize 3604 && msize == cstep) 3605 autoinc_type = AINC_PRE_INC; 3606 else if (has_predec[mem_mode] && autoinc_offset == -msize 3607 && msize == -cstep) 3608 autoinc_type = AINC_PRE_DEC; 3609 3610 if (autoinc_type != AINC_NONE) 3611 autoinc = true; 3612 } 3613 3614 cost = 0; 3615 offset_p = (s_offset != 0 3616 && data->min_offset <= s_offset 3617 && s_offset <= data->max_offset); 3618 ratio_p = (ratio != 1 3619 && multiplier_allowed_in_address_p (ratio, mem_mode, as)); 3620 3621 if (ratio != 1 && !ratio_p) 3622 cost += mult_by_coeff_cost (ratio, address_mode, speed); 3623 3624 if (s_offset && !offset_p && !symbol_present) 3625 cost += add_cost (speed, address_mode); 3626 3627 if (may_autoinc) 3628 *may_autoinc = autoinc; 3629 if (autoinc) 3630 acost = data->ainc_costs[autoinc_type]; 3631 else 3632 acost = data->costs[symbol_present][var_present][offset_p][ratio_p]; 3633 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p; 3634 return new_cost (cost + acost, complexity); 3635 } 3636 3637 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the 3638 the EXPR operand holding the shift. COST0 and COST1 are the costs for 3639 calculating the operands of EXPR. Returns true if successful, and returns 3640 the cost in COST. */ 3641 3642 static bool 3643 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0, 3644 comp_cost cost1, tree mult, bool speed, comp_cost *cost) 3645 { 3646 comp_cost res; 3647 tree op1 = TREE_OPERAND (expr, 1); 3648 tree cst = TREE_OPERAND (mult, 1); 3649 tree multop = TREE_OPERAND (mult, 0); 3650 int m = exact_log2 (int_cst_value (cst)); 3651 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 3652 int as_cost, sa_cost; 3653 bool mult_in_op1; 3654 3655 if (!(m >= 0 && m < maxm)) 3656 return false; 3657 3658 mult_in_op1 = operand_equal_p (op1, mult, 0); 3659 3660 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 3661 3662 /* If the target has a cheap shift-and-add or shift-and-sub instruction, 3663 use that in preference to a shift insn followed by an add insn. */ 3664 sa_cost = (TREE_CODE (expr) != MINUS_EXPR 3665 ? shiftadd_cost (speed, mode, m) 3666 : (mult_in_op1 3667 ? shiftsub1_cost (speed, mode, m) 3668 : shiftsub0_cost (speed, mode, m))); 3669 3670 res = new_cost (MIN (as_cost, sa_cost), 0); 3671 res = add_costs (res, mult_in_op1 ? cost0 : cost1); 3672 3673 STRIP_NOPS (multop); 3674 if (!is_gimple_val (multop)) 3675 res = add_costs (res, force_expr_to_var_cost (multop, speed)); 3676 3677 *cost = res; 3678 return true; 3679 } 3680 3681 /* Estimates cost of forcing expression EXPR into a variable. */ 3682 3683 static comp_cost 3684 force_expr_to_var_cost (tree expr, bool speed) 3685 { 3686 static bool costs_initialized = false; 3687 static unsigned integer_cost [2]; 3688 static unsigned symbol_cost [2]; 3689 static unsigned address_cost [2]; 3690 tree op0, op1; 3691 comp_cost cost0, cost1, cost; 3692 machine_mode mode; 3693 3694 if (!costs_initialized) 3695 { 3696 tree type = build_pointer_type (integer_type_node); 3697 tree var, addr; 3698 rtx x; 3699 int i; 3700 3701 var = create_tmp_var_raw (integer_type_node, "test_var"); 3702 TREE_STATIC (var) = 1; 3703 x = produce_memory_decl_rtl (var, NULL); 3704 SET_DECL_RTL (var, x); 3705 3706 addr = build1 (ADDR_EXPR, type, var); 3707 3708 3709 for (i = 0; i < 2; i++) 3710 { 3711 integer_cost[i] = computation_cost (build_int_cst (integer_type_node, 3712 2000), i); 3713 3714 symbol_cost[i] = computation_cost (addr, i) + 1; 3715 3716 address_cost[i] 3717 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; 3718 if (dump_file && (dump_flags & TDF_DETAILS)) 3719 { 3720 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); 3721 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); 3722 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); 3723 fprintf (dump_file, " address %d\n", (int) address_cost[i]); 3724 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); 3725 fprintf (dump_file, "\n"); 3726 } 3727 } 3728 3729 costs_initialized = true; 3730 } 3731 3732 STRIP_NOPS (expr); 3733 3734 if (SSA_VAR_P (expr)) 3735 return no_cost; 3736 3737 if (is_gimple_min_invariant (expr)) 3738 { 3739 if (TREE_CODE (expr) == INTEGER_CST) 3740 return new_cost (integer_cost [speed], 0); 3741 3742 if (TREE_CODE (expr) == ADDR_EXPR) 3743 { 3744 tree obj = TREE_OPERAND (expr, 0); 3745 3746 if (TREE_CODE (obj) == VAR_DECL 3747 || TREE_CODE (obj) == PARM_DECL 3748 || TREE_CODE (obj) == RESULT_DECL) 3749 return new_cost (symbol_cost [speed], 0); 3750 } 3751 3752 return new_cost (address_cost [speed], 0); 3753 } 3754 3755 switch (TREE_CODE (expr)) 3756 { 3757 case POINTER_PLUS_EXPR: 3758 case PLUS_EXPR: 3759 case MINUS_EXPR: 3760 case MULT_EXPR: 3761 op0 = TREE_OPERAND (expr, 0); 3762 op1 = TREE_OPERAND (expr, 1); 3763 STRIP_NOPS (op0); 3764 STRIP_NOPS (op1); 3765 break; 3766 3767 CASE_CONVERT: 3768 case NEGATE_EXPR: 3769 op0 = TREE_OPERAND (expr, 0); 3770 STRIP_NOPS (op0); 3771 op1 = NULL_TREE; 3772 break; 3773 3774 default: 3775 /* Just an arbitrary value, FIXME. */ 3776 return new_cost (target_spill_cost[speed], 0); 3777 } 3778 3779 if (op0 == NULL_TREE 3780 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0)) 3781 cost0 = no_cost; 3782 else 3783 cost0 = force_expr_to_var_cost (op0, speed); 3784 3785 if (op1 == NULL_TREE 3786 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1)) 3787 cost1 = no_cost; 3788 else 3789 cost1 = force_expr_to_var_cost (op1, speed); 3790 3791 mode = TYPE_MODE (TREE_TYPE (expr)); 3792 switch (TREE_CODE (expr)) 3793 { 3794 case POINTER_PLUS_EXPR: 3795 case PLUS_EXPR: 3796 case MINUS_EXPR: 3797 case NEGATE_EXPR: 3798 cost = new_cost (add_cost (speed, mode), 0); 3799 if (TREE_CODE (expr) != NEGATE_EXPR) 3800 { 3801 tree mult = NULL_TREE; 3802 comp_cost sa_cost; 3803 if (TREE_CODE (op1) == MULT_EXPR) 3804 mult = op1; 3805 else if (TREE_CODE (op0) == MULT_EXPR) 3806 mult = op0; 3807 3808 if (mult != NULL_TREE 3809 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) 3810 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, 3811 speed, &sa_cost)) 3812 return sa_cost; 3813 } 3814 break; 3815 3816 CASE_CONVERT: 3817 { 3818 tree inner_mode, outer_mode; 3819 outer_mode = TREE_TYPE (expr); 3820 inner_mode = TREE_TYPE (op0); 3821 cost = new_cost (convert_cost (TYPE_MODE (outer_mode), 3822 TYPE_MODE (inner_mode), speed), 0); 3823 } 3824 break; 3825 3826 case MULT_EXPR: 3827 if (cst_and_fits_in_hwi (op0)) 3828 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0), 3829 mode, speed), 0); 3830 else if (cst_and_fits_in_hwi (op1)) 3831 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1), 3832 mode, speed), 0); 3833 else 3834 return new_cost (target_spill_cost [speed], 0); 3835 break; 3836 3837 default: 3838 gcc_unreachable (); 3839 } 3840 3841 cost = add_costs (cost, cost0); 3842 cost = add_costs (cost, cost1); 3843 3844 /* Bound the cost by target_spill_cost. The parts of complicated 3845 computations often are either loop invariant or at least can 3846 be shared between several iv uses, so letting this grow without 3847 limits would not give reasonable results. */ 3848 if (cost.cost > (int) target_spill_cost [speed]) 3849 cost.cost = target_spill_cost [speed]; 3850 3851 return cost; 3852 } 3853 3854 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the 3855 invariants the computation depends on. */ 3856 3857 static comp_cost 3858 force_var_cost (struct ivopts_data *data, 3859 tree expr, bitmap *depends_on) 3860 { 3861 if (depends_on) 3862 { 3863 fd_ivopts_data = data; 3864 walk_tree (&expr, find_depends, depends_on, NULL); 3865 } 3866 3867 return force_expr_to_var_cost (expr, data->speed); 3868 } 3869 3870 /* Estimates cost of expressing address ADDR as var + symbol + offset. The 3871 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set 3872 to false if the corresponding part is missing. DEPENDS_ON is a set of the 3873 invariants the computation depends on. */ 3874 3875 static comp_cost 3876 split_address_cost (struct ivopts_data *data, 3877 tree addr, bool *symbol_present, bool *var_present, 3878 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3879 { 3880 tree core; 3881 HOST_WIDE_INT bitsize; 3882 HOST_WIDE_INT bitpos; 3883 tree toffset; 3884 machine_mode mode; 3885 int unsignedp, volatilep; 3886 3887 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode, 3888 &unsignedp, &volatilep, false); 3889 3890 if (toffset != 0 3891 || bitpos % BITS_PER_UNIT != 0 3892 || TREE_CODE (core) != VAR_DECL) 3893 { 3894 *symbol_present = false; 3895 *var_present = true; 3896 fd_ivopts_data = data; 3897 walk_tree (&addr, find_depends, depends_on, NULL); 3898 return new_cost (target_spill_cost[data->speed], 0); 3899 } 3900 3901 *offset += bitpos / BITS_PER_UNIT; 3902 if (TREE_STATIC (core) 3903 || DECL_EXTERNAL (core)) 3904 { 3905 *symbol_present = true; 3906 *var_present = false; 3907 return no_cost; 3908 } 3909 3910 *symbol_present = false; 3911 *var_present = true; 3912 return no_cost; 3913 } 3914 3915 /* Estimates cost of expressing difference of addresses E1 - E2 as 3916 var + symbol + offset. The value of offset is added to OFFSET, 3917 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3918 part is missing. DEPENDS_ON is a set of the invariants the computation 3919 depends on. */ 3920 3921 static comp_cost 3922 ptr_difference_cost (struct ivopts_data *data, 3923 tree e1, tree e2, bool *symbol_present, bool *var_present, 3924 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3925 { 3926 HOST_WIDE_INT diff = 0; 3927 aff_tree aff_e1, aff_e2; 3928 tree type; 3929 3930 gcc_assert (TREE_CODE (e1) == ADDR_EXPR); 3931 3932 if (ptr_difference_const (e1, e2, &diff)) 3933 { 3934 *offset += diff; 3935 *symbol_present = false; 3936 *var_present = false; 3937 return no_cost; 3938 } 3939 3940 if (integer_zerop (e2)) 3941 return split_address_cost (data, TREE_OPERAND (e1, 0), 3942 symbol_present, var_present, offset, depends_on); 3943 3944 *symbol_present = false; 3945 *var_present = true; 3946 3947 type = signed_type_for (TREE_TYPE (e1)); 3948 tree_to_aff_combination (e1, type, &aff_e1); 3949 tree_to_aff_combination (e2, type, &aff_e2); 3950 aff_combination_scale (&aff_e2, -1); 3951 aff_combination_add (&aff_e1, &aff_e2); 3952 3953 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 3954 } 3955 3956 /* Estimates cost of expressing difference E1 - E2 as 3957 var + symbol + offset. The value of offset is added to OFFSET, 3958 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3959 part is missing. DEPENDS_ON is a set of the invariants the computation 3960 depends on. */ 3961 3962 static comp_cost 3963 difference_cost (struct ivopts_data *data, 3964 tree e1, tree e2, bool *symbol_present, bool *var_present, 3965 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3966 { 3967 machine_mode mode = TYPE_MODE (TREE_TYPE (e1)); 3968 unsigned HOST_WIDE_INT off1, off2; 3969 aff_tree aff_e1, aff_e2; 3970 tree type; 3971 3972 e1 = strip_offset (e1, &off1); 3973 e2 = strip_offset (e2, &off2); 3974 *offset += off1 - off2; 3975 3976 STRIP_NOPS (e1); 3977 STRIP_NOPS (e2); 3978 3979 if (TREE_CODE (e1) == ADDR_EXPR) 3980 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, 3981 offset, depends_on); 3982 *symbol_present = false; 3983 3984 if (operand_equal_p (e1, e2, 0)) 3985 { 3986 *var_present = false; 3987 return no_cost; 3988 } 3989 3990 *var_present = true; 3991 3992 if (integer_zerop (e2)) 3993 return force_var_cost (data, e1, depends_on); 3994 3995 if (integer_zerop (e1)) 3996 { 3997 comp_cost cost = force_var_cost (data, e2, depends_on); 3998 cost.cost += mult_by_coeff_cost (-1, mode, data->speed); 3999 return cost; 4000 } 4001 4002 type = signed_type_for (TREE_TYPE (e1)); 4003 tree_to_aff_combination (e1, type, &aff_e1); 4004 tree_to_aff_combination (e2, type, &aff_e2); 4005 aff_combination_scale (&aff_e2, -1); 4006 aff_combination_add (&aff_e1, &aff_e2); 4007 4008 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 4009 } 4010 4011 /* Returns true if AFF1 and AFF2 are identical. */ 4012 4013 static bool 4014 compare_aff_trees (aff_tree *aff1, aff_tree *aff2) 4015 { 4016 unsigned i; 4017 4018 if (aff1->n != aff2->n) 4019 return false; 4020 4021 for (i = 0; i < aff1->n; i++) 4022 { 4023 if (aff1->elts[i].coef != aff2->elts[i].coef) 4024 return false; 4025 4026 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0)) 4027 return false; 4028 } 4029 return true; 4030 } 4031 4032 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */ 4033 4034 static int 4035 get_expr_id (struct ivopts_data *data, tree expr) 4036 { 4037 struct iv_inv_expr_ent ent; 4038 struct iv_inv_expr_ent **slot; 4039 4040 ent.expr = expr; 4041 ent.hash = iterative_hash_expr (expr, 0); 4042 slot = data->inv_expr_tab->find_slot (&ent, INSERT); 4043 if (*slot) 4044 return (*slot)->id; 4045 4046 *slot = XNEW (struct iv_inv_expr_ent); 4047 (*slot)->expr = expr; 4048 (*slot)->hash = ent.hash; 4049 (*slot)->id = data->inv_expr_id++; 4050 return (*slot)->id; 4051 } 4052 4053 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE 4054 requires a new compiler generated temporary. Returns -1 otherwise. 4055 ADDRESS_P is a flag indicating if the expression is for address 4056 computation. */ 4057 4058 static int 4059 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase, 4060 tree cbase, HOST_WIDE_INT ratio, 4061 bool address_p) 4062 { 4063 aff_tree ubase_aff, cbase_aff; 4064 tree expr, ub, cb; 4065 4066 STRIP_NOPS (ubase); 4067 STRIP_NOPS (cbase); 4068 ub = ubase; 4069 cb = cbase; 4070 4071 if ((TREE_CODE (ubase) == INTEGER_CST) 4072 && (TREE_CODE (cbase) == INTEGER_CST)) 4073 return -1; 4074 4075 /* Strips the constant part. */ 4076 if (TREE_CODE (ubase) == PLUS_EXPR 4077 || TREE_CODE (ubase) == MINUS_EXPR 4078 || TREE_CODE (ubase) == POINTER_PLUS_EXPR) 4079 { 4080 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST) 4081 ubase = TREE_OPERAND (ubase, 0); 4082 } 4083 4084 /* Strips the constant part. */ 4085 if (TREE_CODE (cbase) == PLUS_EXPR 4086 || TREE_CODE (cbase) == MINUS_EXPR 4087 || TREE_CODE (cbase) == POINTER_PLUS_EXPR) 4088 { 4089 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST) 4090 cbase = TREE_OPERAND (cbase, 0); 4091 } 4092 4093 if (address_p) 4094 { 4095 if (((TREE_CODE (ubase) == SSA_NAME) 4096 || (TREE_CODE (ubase) == ADDR_EXPR 4097 && is_gimple_min_invariant (ubase))) 4098 && (TREE_CODE (cbase) == INTEGER_CST)) 4099 return -1; 4100 4101 if (((TREE_CODE (cbase) == SSA_NAME) 4102 || (TREE_CODE (cbase) == ADDR_EXPR 4103 && is_gimple_min_invariant (cbase))) 4104 && (TREE_CODE (ubase) == INTEGER_CST)) 4105 return -1; 4106 } 4107 4108 if (ratio == 1) 4109 { 4110 if (operand_equal_p (ubase, cbase, 0)) 4111 return -1; 4112 4113 if (TREE_CODE (ubase) == ADDR_EXPR 4114 && TREE_CODE (cbase) == ADDR_EXPR) 4115 { 4116 tree usym, csym; 4117 4118 usym = TREE_OPERAND (ubase, 0); 4119 csym = TREE_OPERAND (cbase, 0); 4120 if (TREE_CODE (usym) == ARRAY_REF) 4121 { 4122 tree ind = TREE_OPERAND (usym, 1); 4123 if (TREE_CODE (ind) == INTEGER_CST 4124 && tree_fits_shwi_p (ind) 4125 && tree_to_shwi (ind) == 0) 4126 usym = TREE_OPERAND (usym, 0); 4127 } 4128 if (TREE_CODE (csym) == ARRAY_REF) 4129 { 4130 tree ind = TREE_OPERAND (csym, 1); 4131 if (TREE_CODE (ind) == INTEGER_CST 4132 && tree_fits_shwi_p (ind) 4133 && tree_to_shwi (ind) == 0) 4134 csym = TREE_OPERAND (csym, 0); 4135 } 4136 if (operand_equal_p (usym, csym, 0)) 4137 return -1; 4138 } 4139 /* Now do more complex comparison */ 4140 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff); 4141 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff); 4142 if (compare_aff_trees (&ubase_aff, &cbase_aff)) 4143 return -1; 4144 } 4145 4146 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff); 4147 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff); 4148 4149 aff_combination_scale (&cbase_aff, -1 * ratio); 4150 aff_combination_add (&ubase_aff, &cbase_aff); 4151 expr = aff_combination_to_tree (&ubase_aff); 4152 return get_expr_id (data, expr); 4153 } 4154 4155 4156 4157 /* Determines the cost of the computation by that USE is expressed 4158 from induction variable CAND. If ADDRESS_P is true, we just need 4159 to create an address from it, otherwise we want to get it into 4160 register. A set of invariants we depend on is stored in 4161 DEPENDS_ON. AT is the statement at that the value is computed. 4162 If CAN_AUTOINC is nonnull, use it to record whether autoinc 4163 addressing is likely. */ 4164 4165 static comp_cost 4166 get_computation_cost_at (struct ivopts_data *data, 4167 struct iv_use *use, struct iv_cand *cand, 4168 bool address_p, bitmap *depends_on, gimple at, 4169 bool *can_autoinc, 4170 int *inv_expr_id) 4171 { 4172 tree ubase = use->iv->base, ustep = use->iv->step; 4173 tree cbase, cstep; 4174 tree utype = TREE_TYPE (ubase), ctype; 4175 unsigned HOST_WIDE_INT cstepi, offset = 0; 4176 HOST_WIDE_INT ratio, aratio; 4177 bool var_present, symbol_present, stmt_is_after_inc; 4178 comp_cost cost; 4179 widest_int rat; 4180 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 4181 machine_mode mem_mode = (address_p 4182 ? TYPE_MODE (TREE_TYPE (*use->op_p)) 4183 : VOIDmode); 4184 4185 *depends_on = NULL; 4186 4187 /* Only consider real candidates. */ 4188 if (!cand->iv) 4189 return infinite_cost; 4190 4191 cbase = cand->iv->base; 4192 cstep = cand->iv->step; 4193 ctype = TREE_TYPE (cbase); 4194 4195 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 4196 { 4197 /* We do not have a precision to express the values of use. */ 4198 return infinite_cost; 4199 } 4200 4201 if (address_p 4202 || (use->iv->base_object 4203 && cand->iv->base_object 4204 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) 4205 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object)))) 4206 { 4207 /* Do not try to express address of an object with computation based 4208 on address of a different object. This may cause problems in rtl 4209 level alias analysis (that does not expect this to be happening, 4210 as this is illegal in C), and would be unlikely to be useful 4211 anyway. */ 4212 if (use->iv->base_object 4213 && cand->iv->base_object 4214 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 4215 return infinite_cost; 4216 } 4217 4218 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 4219 { 4220 /* TODO -- add direct handling of this case. */ 4221 goto fallback; 4222 } 4223 4224 /* CSTEPI is removed from the offset in case statement is after the 4225 increment. If the step is not constant, we use zero instead. 4226 This is a bit imprecise (there is the extra addition), but 4227 redundancy elimination is likely to transform the code so that 4228 it uses value of the variable before increment anyway, 4229 so it is not that much unrealistic. */ 4230 if (cst_and_fits_in_hwi (cstep)) 4231 cstepi = int_cst_value (cstep); 4232 else 4233 cstepi = 0; 4234 4235 if (!constant_multiple_of (ustep, cstep, &rat)) 4236 return infinite_cost; 4237 4238 if (wi::fits_shwi_p (rat)) 4239 ratio = rat.to_shwi (); 4240 else 4241 return infinite_cost; 4242 4243 STRIP_NOPS (cbase); 4244 ctype = TREE_TYPE (cbase); 4245 4246 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at); 4247 4248 /* use = ubase + ratio * (var - cbase). If either cbase is a constant 4249 or ratio == 1, it is better to handle this like 4250 4251 ubase - ratio * cbase + ratio * var 4252 4253 (also holds in the case ratio == -1, TODO. */ 4254 4255 if (cst_and_fits_in_hwi (cbase)) 4256 { 4257 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase); 4258 cost = difference_cost (data, 4259 ubase, build_int_cst (utype, 0), 4260 &symbol_present, &var_present, &offset, 4261 depends_on); 4262 cost.cost /= avg_loop_niter (data->current_loop); 4263 } 4264 else if (ratio == 1) 4265 { 4266 tree real_cbase = cbase; 4267 4268 /* Check to see if any adjustment is needed. */ 4269 if (cstepi == 0 && stmt_is_after_inc) 4270 { 4271 aff_tree real_cbase_aff; 4272 aff_tree cstep_aff; 4273 4274 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase), 4275 &real_cbase_aff); 4276 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff); 4277 4278 aff_combination_add (&real_cbase_aff, &cstep_aff); 4279 real_cbase = aff_combination_to_tree (&real_cbase_aff); 4280 } 4281 4282 cost = difference_cost (data, 4283 ubase, real_cbase, 4284 &symbol_present, &var_present, &offset, 4285 depends_on); 4286 cost.cost /= avg_loop_niter (data->current_loop); 4287 } 4288 else if (address_p 4289 && !POINTER_TYPE_P (ctype) 4290 && multiplier_allowed_in_address_p 4291 (ratio, mem_mode, 4292 TYPE_ADDR_SPACE (TREE_TYPE (utype)))) 4293 { 4294 cbase 4295 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio)); 4296 cost = difference_cost (data, 4297 ubase, cbase, 4298 &symbol_present, &var_present, &offset, 4299 depends_on); 4300 cost.cost /= avg_loop_niter (data->current_loop); 4301 } 4302 else 4303 { 4304 cost = force_var_cost (data, cbase, depends_on); 4305 cost = add_costs (cost, 4306 difference_cost (data, 4307 ubase, build_int_cst (utype, 0), 4308 &symbol_present, &var_present, 4309 &offset, depends_on)); 4310 cost.cost /= avg_loop_niter (data->current_loop); 4311 cost.cost += add_cost (data->speed, TYPE_MODE (ctype)); 4312 } 4313 4314 if (inv_expr_id) 4315 { 4316 *inv_expr_id = 4317 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p); 4318 /* Clear depends on. */ 4319 if (*inv_expr_id != -1 && depends_on && *depends_on) 4320 bitmap_clear (*depends_on); 4321 } 4322 4323 /* If we are after the increment, the value of the candidate is higher by 4324 one iteration. */ 4325 if (stmt_is_after_inc) 4326 offset -= ratio * cstepi; 4327 4328 /* Now the computation is in shape symbol + var1 + const + ratio * var2. 4329 (symbol/var1/const parts may be omitted). If we are looking for an 4330 address, find the cost of addressing this. */ 4331 if (address_p) 4332 return add_costs (cost, 4333 get_address_cost (symbol_present, var_present, 4334 offset, ratio, cstepi, 4335 mem_mode, 4336 TYPE_ADDR_SPACE (TREE_TYPE (utype)), 4337 speed, stmt_is_after_inc, 4338 can_autoinc)); 4339 4340 /* Otherwise estimate the costs for computing the expression. */ 4341 if (!symbol_present && !var_present && !offset) 4342 { 4343 if (ratio != 1) 4344 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed); 4345 return cost; 4346 } 4347 4348 /* Symbol + offset should be compile-time computable so consider that they 4349 are added once to the variable, if present. */ 4350 if (var_present && (symbol_present || offset)) 4351 cost.cost += adjust_setup_cost (data, 4352 add_cost (speed, TYPE_MODE (ctype))); 4353 4354 /* Having offset does not affect runtime cost in case it is added to 4355 symbol, but it increases complexity. */ 4356 if (offset) 4357 cost.complexity++; 4358 4359 cost.cost += add_cost (speed, TYPE_MODE (ctype)); 4360 4361 aratio = ratio > 0 ? ratio : -ratio; 4362 if (aratio != 1) 4363 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed); 4364 return cost; 4365 4366 fallback: 4367 if (can_autoinc) 4368 *can_autoinc = false; 4369 4370 { 4371 /* Just get the expression, expand it and measure the cost. */ 4372 tree comp = get_computation_at (data->current_loop, use, cand, at); 4373 4374 if (!comp) 4375 return infinite_cost; 4376 4377 if (address_p) 4378 comp = build_simple_mem_ref (comp); 4379 4380 return new_cost (computation_cost (comp, speed), 0); 4381 } 4382 } 4383 4384 /* Determines the cost of the computation by that USE is expressed 4385 from induction variable CAND. If ADDRESS_P is true, we just need 4386 to create an address from it, otherwise we want to get it into 4387 register. A set of invariants we depend on is stored in 4388 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether 4389 autoinc addressing is likely. */ 4390 4391 static comp_cost 4392 get_computation_cost (struct ivopts_data *data, 4393 struct iv_use *use, struct iv_cand *cand, 4394 bool address_p, bitmap *depends_on, 4395 bool *can_autoinc, int *inv_expr_id) 4396 { 4397 return get_computation_cost_at (data, 4398 use, cand, address_p, depends_on, use->stmt, 4399 can_autoinc, inv_expr_id); 4400 } 4401 4402 /* Determines cost of basing replacement of USE on CAND in a generic 4403 expression. */ 4404 4405 static bool 4406 determine_use_iv_cost_generic (struct ivopts_data *data, 4407 struct iv_use *use, struct iv_cand *cand) 4408 { 4409 bitmap depends_on; 4410 comp_cost cost; 4411 int inv_expr_id = -1; 4412 4413 /* The simple case first -- if we need to express value of the preserved 4414 original biv, the cost is 0. This also prevents us from counting the 4415 cost of increment twice -- once at this use and once in the cost of 4416 the candidate. */ 4417 if (cand->pos == IP_ORIGINAL 4418 && cand->incremented_at == use->stmt) 4419 { 4420 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE, 4421 ERROR_MARK, -1); 4422 return true; 4423 } 4424 4425 cost = get_computation_cost (data, use, cand, false, &depends_on, 4426 NULL, &inv_expr_id); 4427 4428 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK, 4429 inv_expr_id); 4430 4431 return !infinite_cost_p (cost); 4432 } 4433 4434 /* Determines cost of basing replacement of USE on CAND in an address. */ 4435 4436 static bool 4437 determine_use_iv_cost_address (struct ivopts_data *data, 4438 struct iv_use *use, struct iv_cand *cand) 4439 { 4440 bitmap depends_on; 4441 bool can_autoinc; 4442 int inv_expr_id = -1; 4443 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on, 4444 &can_autoinc, &inv_expr_id); 4445 4446 if (cand->ainc_use == use) 4447 { 4448 if (can_autoinc) 4449 cost.cost -= cand->cost_step; 4450 /* If we generated the candidate solely for exploiting autoincrement 4451 opportunities, and it turns out it can't be used, set the cost to 4452 infinity to make sure we ignore it. */ 4453 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) 4454 cost = infinite_cost; 4455 } 4456 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK, 4457 inv_expr_id); 4458 4459 return !infinite_cost_p (cost); 4460 } 4461 4462 /* Computes value of candidate CAND at position AT in iteration NITER, and 4463 stores it to VAL. */ 4464 4465 static void 4466 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter, 4467 aff_tree *val) 4468 { 4469 aff_tree step, delta, nit; 4470 struct iv *iv = cand->iv; 4471 tree type = TREE_TYPE (iv->base); 4472 tree steptype = type; 4473 if (POINTER_TYPE_P (type)) 4474 steptype = sizetype; 4475 steptype = unsigned_type_for (type); 4476 4477 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step); 4478 aff_combination_convert (&step, steptype); 4479 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit); 4480 aff_combination_convert (&nit, steptype); 4481 aff_combination_mult (&nit, &step, &delta); 4482 if (stmt_after_increment (loop, cand, at)) 4483 aff_combination_add (&delta, &step); 4484 4485 tree_to_aff_combination (iv->base, type, val); 4486 if (!POINTER_TYPE_P (type)) 4487 aff_combination_convert (val, steptype); 4488 aff_combination_add (val, &delta); 4489 } 4490 4491 /* Returns period of induction variable iv. */ 4492 4493 static tree 4494 iv_period (struct iv *iv) 4495 { 4496 tree step = iv->step, period, type; 4497 tree pow2div; 4498 4499 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 4500 4501 type = unsigned_type_for (TREE_TYPE (step)); 4502 /* Period of the iv is lcm (step, type_range)/step -1, 4503 i.e., N*type_range/step - 1. Since type range is power 4504 of two, N == (step >> num_of_ending_zeros_binary (step), 4505 so the final result is 4506 4507 (type_range >> num_of_ending_zeros_binary (step)) - 1 4508 4509 */ 4510 pow2div = num_ending_zeros (step); 4511 4512 period = build_low_bits_mask (type, 4513 (TYPE_PRECISION (type) 4514 - tree_to_uhwi (pow2div))); 4515 4516 return period; 4517 } 4518 4519 /* Returns the comparison operator used when eliminating the iv USE. */ 4520 4521 static enum tree_code 4522 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 4523 { 4524 struct loop *loop = data->current_loop; 4525 basic_block ex_bb; 4526 edge exit; 4527 4528 ex_bb = gimple_bb (use->stmt); 4529 exit = EDGE_SUCC (ex_bb, 0); 4530 if (flow_bb_inside_loop_p (loop, exit->dest)) 4531 exit = EDGE_SUCC (ex_bb, 1); 4532 4533 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 4534 } 4535 4536 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now, 4537 we only detect the situation that BASE = SOMETHING + OFFSET, where the 4538 calculation is performed in non-wrapping type. 4539 4540 TODO: More generally, we could test for the situation that 4541 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero. 4542 This would require knowing the sign of OFFSET. */ 4543 4544 static bool 4545 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset) 4546 { 4547 enum tree_code code; 4548 tree e1, e2; 4549 aff_tree aff_e1, aff_e2, aff_offset; 4550 4551 if (!nowrap_type_p (TREE_TYPE (base))) 4552 return false; 4553 4554 base = expand_simple_operations (base); 4555 4556 if (TREE_CODE (base) == SSA_NAME) 4557 { 4558 gimple stmt = SSA_NAME_DEF_STMT (base); 4559 4560 if (gimple_code (stmt) != GIMPLE_ASSIGN) 4561 return false; 4562 4563 code = gimple_assign_rhs_code (stmt); 4564 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4565 return false; 4566 4567 e1 = gimple_assign_rhs1 (stmt); 4568 e2 = gimple_assign_rhs2 (stmt); 4569 } 4570 else 4571 { 4572 code = TREE_CODE (base); 4573 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4574 return false; 4575 e1 = TREE_OPERAND (base, 0); 4576 e2 = TREE_OPERAND (base, 1); 4577 } 4578 4579 /* Use affine expansion as deeper inspection to prove the equality. */ 4580 tree_to_aff_combination_expand (e2, TREE_TYPE (e2), 4581 &aff_e2, &data->name_expansion_cache); 4582 tree_to_aff_combination_expand (offset, TREE_TYPE (offset), 4583 &aff_offset, &data->name_expansion_cache); 4584 aff_combination_scale (&aff_offset, -1); 4585 switch (code) 4586 { 4587 case PLUS_EXPR: 4588 aff_combination_add (&aff_e2, &aff_offset); 4589 if (aff_combination_zero_p (&aff_e2)) 4590 return true; 4591 4592 tree_to_aff_combination_expand (e1, TREE_TYPE (e1), 4593 &aff_e1, &data->name_expansion_cache); 4594 aff_combination_add (&aff_e1, &aff_offset); 4595 return aff_combination_zero_p (&aff_e1); 4596 4597 case POINTER_PLUS_EXPR: 4598 aff_combination_add (&aff_e2, &aff_offset); 4599 return aff_combination_zero_p (&aff_e2); 4600 4601 default: 4602 return false; 4603 } 4604 } 4605 4606 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR 4607 comparison with CAND. NITER describes the number of iterations of 4608 the loops. If successful, the comparison in COMP_P is altered accordingly. 4609 4610 We aim to handle the following situation: 4611 4612 sometype *base, *p; 4613 int a, b, i; 4614 4615 i = a; 4616 p = p_0 = base + a; 4617 4618 do 4619 { 4620 bla (*p); 4621 p++; 4622 i++; 4623 } 4624 while (i < b); 4625 4626 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1. 4627 We aim to optimize this to 4628 4629 p = p_0 = base + a; 4630 do 4631 { 4632 bla (*p); 4633 p++; 4634 } 4635 while (p < p_0 - a + b); 4636 4637 This preserves the correctness, since the pointer arithmetics does not 4638 overflow. More precisely: 4639 4640 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no 4641 overflow in computing it or the values of p. 4642 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not 4643 overflow. To prove this, we use the fact that p_0 = base + a. */ 4644 4645 static bool 4646 iv_elimination_compare_lt (struct ivopts_data *data, 4647 struct iv_cand *cand, enum tree_code *comp_p, 4648 struct tree_niter_desc *niter) 4649 { 4650 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; 4651 struct aff_tree nit, tmpa, tmpb; 4652 enum tree_code comp; 4653 HOST_WIDE_INT step; 4654 4655 /* We need to know that the candidate induction variable does not overflow. 4656 While more complex analysis may be used to prove this, for now just 4657 check that the variable appears in the original program and that it 4658 is computed in a type that guarantees no overflows. */ 4659 cand_type = TREE_TYPE (cand->iv->base); 4660 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) 4661 return false; 4662 4663 /* Make sure that the loop iterates till the loop bound is hit, as otherwise 4664 the calculation of the BOUND could overflow, making the comparison 4665 invalid. */ 4666 if (!data->loop_single_exit_p) 4667 return false; 4668 4669 /* We need to be able to decide whether candidate is increasing or decreasing 4670 in order to choose the right comparison operator. */ 4671 if (!cst_and_fits_in_hwi (cand->iv->step)) 4672 return false; 4673 step = int_cst_value (cand->iv->step); 4674 4675 /* Check that the number of iterations matches the expected pattern: 4676 a + 1 > b ? 0 : b - a - 1. */ 4677 mbz = niter->may_be_zero; 4678 if (TREE_CODE (mbz) == GT_EXPR) 4679 { 4680 /* Handle a + 1 > b. */ 4681 tree op0 = TREE_OPERAND (mbz, 0); 4682 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1))) 4683 { 4684 a = TREE_OPERAND (op0, 0); 4685 b = TREE_OPERAND (mbz, 1); 4686 } 4687 else 4688 return false; 4689 } 4690 else if (TREE_CODE (mbz) == LT_EXPR) 4691 { 4692 tree op1 = TREE_OPERAND (mbz, 1); 4693 4694 /* Handle b < a + 1. */ 4695 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1))) 4696 { 4697 a = TREE_OPERAND (op1, 0); 4698 b = TREE_OPERAND (mbz, 0); 4699 } 4700 else 4701 return false; 4702 } 4703 else 4704 return false; 4705 4706 /* Expected number of iterations is B - A - 1. Check that it matches 4707 the actual number, i.e., that B - A - NITER = 1. */ 4708 tree_to_aff_combination (niter->niter, nit_type, &nit); 4709 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa); 4710 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb); 4711 aff_combination_scale (&nit, -1); 4712 aff_combination_scale (&tmpa, -1); 4713 aff_combination_add (&tmpb, &tmpa); 4714 aff_combination_add (&tmpb, &nit); 4715 if (tmpb.n != 0 || tmpb.offset != 1) 4716 return false; 4717 4718 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not 4719 overflow. */ 4720 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), 4721 cand->iv->step, 4722 fold_convert (TREE_TYPE (cand->iv->step), a)); 4723 if (!difference_cannot_overflow_p (data, cand->iv->base, offset)) 4724 return false; 4725 4726 /* Determine the new comparison operator. */ 4727 comp = step < 0 ? GT_EXPR : LT_EXPR; 4728 if (*comp_p == NE_EXPR) 4729 *comp_p = comp; 4730 else if (*comp_p == EQ_EXPR) 4731 *comp_p = invert_tree_comparison (comp, false); 4732 else 4733 gcc_unreachable (); 4734 4735 return true; 4736 } 4737 4738 /* Check whether it is possible to express the condition in USE by comparison 4739 of candidate CAND. If so, store the value compared with to BOUND, and the 4740 comparison operator to COMP. */ 4741 4742 static bool 4743 may_eliminate_iv (struct ivopts_data *data, 4744 struct iv_use *use, struct iv_cand *cand, tree *bound, 4745 enum tree_code *comp) 4746 { 4747 basic_block ex_bb; 4748 edge exit; 4749 tree period; 4750 struct loop *loop = data->current_loop; 4751 aff_tree bnd; 4752 struct tree_niter_desc *desc = NULL; 4753 4754 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 4755 return false; 4756 4757 /* For now works only for exits that dominate the loop latch. 4758 TODO: extend to other conditions inside loop body. */ 4759 ex_bb = gimple_bb (use->stmt); 4760 if (use->stmt != last_stmt (ex_bb) 4761 || gimple_code (use->stmt) != GIMPLE_COND 4762 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 4763 return false; 4764 4765 exit = EDGE_SUCC (ex_bb, 0); 4766 if (flow_bb_inside_loop_p (loop, exit->dest)) 4767 exit = EDGE_SUCC (ex_bb, 1); 4768 if (flow_bb_inside_loop_p (loop, exit->dest)) 4769 return false; 4770 4771 desc = niter_for_exit (data, exit); 4772 if (!desc) 4773 return false; 4774 4775 /* Determine whether we can use the variable to test the exit condition. 4776 This is the case iff the period of the induction variable is greater 4777 than the number of iterations for which the exit condition is true. */ 4778 period = iv_period (cand->iv); 4779 4780 /* If the number of iterations is constant, compare against it directly. */ 4781 if (TREE_CODE (desc->niter) == INTEGER_CST) 4782 { 4783 /* See cand_value_at. */ 4784 if (stmt_after_increment (loop, cand, use->stmt)) 4785 { 4786 if (!tree_int_cst_lt (desc->niter, period)) 4787 return false; 4788 } 4789 else 4790 { 4791 if (tree_int_cst_lt (period, desc->niter)) 4792 return false; 4793 } 4794 } 4795 4796 /* If not, and if this is the only possible exit of the loop, see whether 4797 we can get a conservative estimate on the number of iterations of the 4798 entire loop and compare against that instead. */ 4799 else 4800 { 4801 widest_int period_value, max_niter; 4802 4803 max_niter = desc->max; 4804 if (stmt_after_increment (loop, cand, use->stmt)) 4805 max_niter += 1; 4806 period_value = wi::to_widest (period); 4807 if (wi::gtu_p (max_niter, period_value)) 4808 { 4809 /* See if we can take advantage of inferred loop bound information. */ 4810 if (data->loop_single_exit_p) 4811 { 4812 if (!max_loop_iterations (loop, &max_niter)) 4813 return false; 4814 /* The loop bound is already adjusted by adding 1. */ 4815 if (wi::gtu_p (max_niter, period_value)) 4816 return false; 4817 } 4818 else 4819 return false; 4820 } 4821 } 4822 4823 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); 4824 4825 *bound = fold_convert (TREE_TYPE (cand->iv->base), 4826 aff_combination_to_tree (&bnd)); 4827 *comp = iv_elimination_compare (data, use); 4828 4829 /* It is unlikely that computing the number of iterations using division 4830 would be more profitable than keeping the original induction variable. */ 4831 if (expression_expensive_p (*bound)) 4832 return false; 4833 4834 /* Sometimes, it is possible to handle the situation that the number of 4835 iterations may be zero unless additional assumtions by using < 4836 instead of != in the exit condition. 4837 4838 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and 4839 base the exit condition on it. However, that is often too 4840 expensive. */ 4841 if (!integer_zerop (desc->may_be_zero)) 4842 return iv_elimination_compare_lt (data, cand, comp, desc); 4843 4844 return true; 4845 } 4846 4847 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must 4848 be copied, if is is used in the loop body and DATA->body_includes_call. */ 4849 4850 static int 4851 parm_decl_cost (struct ivopts_data *data, tree bound) 4852 { 4853 tree sbound = bound; 4854 STRIP_NOPS (sbound); 4855 4856 if (TREE_CODE (sbound) == SSA_NAME 4857 && SSA_NAME_IS_DEFAULT_DEF (sbound) 4858 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL 4859 && data->body_includes_call) 4860 return COSTS_N_INSNS (1); 4861 4862 return 0; 4863 } 4864 4865 /* Determines cost of basing replacement of USE on CAND in a condition. */ 4866 4867 static bool 4868 determine_use_iv_cost_condition (struct ivopts_data *data, 4869 struct iv_use *use, struct iv_cand *cand) 4870 { 4871 tree bound = NULL_TREE; 4872 struct iv *cmp_iv; 4873 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on; 4874 comp_cost elim_cost, express_cost, cost, bound_cost; 4875 bool ok; 4876 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id; 4877 tree *control_var, *bound_cst; 4878 enum tree_code comp = ERROR_MARK; 4879 4880 /* Only consider real candidates. */ 4881 if (!cand->iv) 4882 { 4883 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, 4884 ERROR_MARK, -1); 4885 return false; 4886 } 4887 4888 /* Try iv elimination. */ 4889 if (may_eliminate_iv (data, use, cand, &bound, &comp)) 4890 { 4891 elim_cost = force_var_cost (data, bound, &depends_on_elim); 4892 if (elim_cost.cost == 0) 4893 elim_cost.cost = parm_decl_cost (data, bound); 4894 else if (TREE_CODE (bound) == INTEGER_CST) 4895 elim_cost.cost = 0; 4896 /* If we replace a loop condition 'i < n' with 'p < base + n', 4897 depends_on_elim will have 'base' and 'n' set, which implies 4898 that both 'base' and 'n' will be live during the loop. More likely, 4899 'base + n' will be loop invariant, resulting in only one live value 4900 during the loop. So in that case we clear depends_on_elim and set 4901 elim_inv_expr_id instead. */ 4902 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1) 4903 { 4904 elim_inv_expr_id = get_expr_id (data, bound); 4905 bitmap_clear (depends_on_elim); 4906 } 4907 /* The bound is a loop invariant, so it will be only computed 4908 once. */ 4909 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); 4910 } 4911 else 4912 elim_cost = infinite_cost; 4913 4914 /* Try expressing the original giv. If it is compared with an invariant, 4915 note that we cannot get rid of it. */ 4916 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst, 4917 NULL, &cmp_iv); 4918 gcc_assert (ok); 4919 4920 /* When the condition is a comparison of the candidate IV against 4921 zero, prefer this IV. 4922 4923 TODO: The constant that we're subtracting from the cost should 4924 be target-dependent. This information should be added to the 4925 target costs for each backend. */ 4926 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */ 4927 && integer_zerop (*bound_cst) 4928 && (operand_equal_p (*control_var, cand->var_after, 0) 4929 || operand_equal_p (*control_var, cand->var_before, 0))) 4930 elim_cost.cost -= 1; 4931 4932 express_cost = get_computation_cost (data, use, cand, false, 4933 &depends_on_express, NULL, 4934 &express_inv_expr_id); 4935 fd_ivopts_data = data; 4936 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL); 4937 4938 /* Count the cost of the original bound as well. */ 4939 bound_cost = force_var_cost (data, *bound_cst, NULL); 4940 if (bound_cost.cost == 0) 4941 bound_cost.cost = parm_decl_cost (data, *bound_cst); 4942 else if (TREE_CODE (*bound_cst) == INTEGER_CST) 4943 bound_cost.cost = 0; 4944 express_cost.cost += bound_cost.cost; 4945 4946 /* Choose the better approach, preferring the eliminated IV. */ 4947 if (compare_costs (elim_cost, express_cost) <= 0) 4948 { 4949 cost = elim_cost; 4950 depends_on = depends_on_elim; 4951 depends_on_elim = NULL; 4952 inv_expr_id = elim_inv_expr_id; 4953 } 4954 else 4955 { 4956 cost = express_cost; 4957 depends_on = depends_on_express; 4958 depends_on_express = NULL; 4959 bound = NULL_TREE; 4960 comp = ERROR_MARK; 4961 inv_expr_id = express_inv_expr_id; 4962 } 4963 4964 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id); 4965 4966 if (depends_on_elim) 4967 BITMAP_FREE (depends_on_elim); 4968 if (depends_on_express) 4969 BITMAP_FREE (depends_on_express); 4970 4971 return !infinite_cost_p (cost); 4972 } 4973 4974 /* Determines cost of basing replacement of USE on CAND. Returns false 4975 if USE cannot be based on CAND. */ 4976 4977 static bool 4978 determine_use_iv_cost (struct ivopts_data *data, 4979 struct iv_use *use, struct iv_cand *cand) 4980 { 4981 switch (use->type) 4982 { 4983 case USE_NONLINEAR_EXPR: 4984 return determine_use_iv_cost_generic (data, use, cand); 4985 4986 case USE_ADDRESS: 4987 return determine_use_iv_cost_address (data, use, cand); 4988 4989 case USE_COMPARE: 4990 return determine_use_iv_cost_condition (data, use, cand); 4991 4992 default: 4993 gcc_unreachable (); 4994 } 4995 } 4996 4997 /* Return true if get_computation_cost indicates that autoincrement is 4998 a possibility for the pair of USE and CAND, false otherwise. */ 4999 5000 static bool 5001 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, 5002 struct iv_cand *cand) 5003 { 5004 bitmap depends_on; 5005 bool can_autoinc; 5006 comp_cost cost; 5007 5008 if (use->type != USE_ADDRESS) 5009 return false; 5010 5011 cost = get_computation_cost (data, use, cand, true, &depends_on, 5012 &can_autoinc, NULL); 5013 5014 BITMAP_FREE (depends_on); 5015 5016 return !infinite_cost_p (cost) && can_autoinc; 5017 } 5018 5019 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a 5020 use that allows autoincrement, and set their AINC_USE if possible. */ 5021 5022 static void 5023 set_autoinc_for_original_candidates (struct ivopts_data *data) 5024 { 5025 unsigned i, j; 5026 5027 for (i = 0; i < n_iv_cands (data); i++) 5028 { 5029 struct iv_cand *cand = iv_cand (data, i); 5030 struct iv_use *closest_before = NULL; 5031 struct iv_use *closest_after = NULL; 5032 if (cand->pos != IP_ORIGINAL) 5033 continue; 5034 5035 for (j = 0; j < n_iv_uses (data); j++) 5036 { 5037 struct iv_use *use = iv_use (data, j); 5038 unsigned uid = gimple_uid (use->stmt); 5039 5040 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)) 5041 continue; 5042 5043 if (uid < gimple_uid (cand->incremented_at) 5044 && (closest_before == NULL 5045 || uid > gimple_uid (closest_before->stmt))) 5046 closest_before = use; 5047 5048 if (uid > gimple_uid (cand->incremented_at) 5049 && (closest_after == NULL 5050 || uid < gimple_uid (closest_after->stmt))) 5051 closest_after = use; 5052 } 5053 5054 if (closest_before != NULL 5055 && autoinc_possible_for_pair (data, closest_before, cand)) 5056 cand->ainc_use = closest_before; 5057 else if (closest_after != NULL 5058 && autoinc_possible_for_pair (data, closest_after, cand)) 5059 cand->ainc_use = closest_after; 5060 } 5061 } 5062 5063 /* Finds the candidates for the induction variables. */ 5064 5065 static void 5066 find_iv_candidates (struct ivopts_data *data) 5067 { 5068 /* Add commonly used ivs. */ 5069 add_standard_iv_candidates (data); 5070 5071 /* Add old induction variables. */ 5072 add_old_ivs_candidates (data); 5073 5074 /* Add induction variables derived from uses. */ 5075 add_derived_ivs_candidates (data); 5076 5077 set_autoinc_for_original_candidates (data); 5078 5079 /* Record the important candidates. */ 5080 record_important_candidates (data); 5081 } 5082 5083 /* Determines costs of basing the use of the iv on an iv candidate. */ 5084 5085 static void 5086 determine_use_iv_costs (struct ivopts_data *data) 5087 { 5088 unsigned i, j; 5089 struct iv_use *use; 5090 struct iv_cand *cand; 5091 bitmap to_clear = BITMAP_ALLOC (NULL); 5092 5093 alloc_use_cost_map (data); 5094 5095 for (i = 0; i < n_iv_uses (data); i++) 5096 { 5097 use = iv_use (data, i); 5098 5099 if (data->consider_all_candidates) 5100 { 5101 for (j = 0; j < n_iv_cands (data); j++) 5102 { 5103 cand = iv_cand (data, j); 5104 determine_use_iv_cost (data, use, cand); 5105 } 5106 } 5107 else 5108 { 5109 bitmap_iterator bi; 5110 5111 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi) 5112 { 5113 cand = iv_cand (data, j); 5114 if (!determine_use_iv_cost (data, use, cand)) 5115 bitmap_set_bit (to_clear, j); 5116 } 5117 5118 /* Remove the candidates for that the cost is infinite from 5119 the list of related candidates. */ 5120 bitmap_and_compl_into (use->related_cands, to_clear); 5121 bitmap_clear (to_clear); 5122 } 5123 } 5124 5125 BITMAP_FREE (to_clear); 5126 5127 if (dump_file && (dump_flags & TDF_DETAILS)) 5128 { 5129 fprintf (dump_file, "Use-candidate costs:\n"); 5130 5131 for (i = 0; i < n_iv_uses (data); i++) 5132 { 5133 use = iv_use (data, i); 5134 5135 fprintf (dump_file, "Use %d:\n", i); 5136 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n"); 5137 for (j = 0; j < use->n_map_members; j++) 5138 { 5139 if (!use->cost_map[j].cand 5140 || infinite_cost_p (use->cost_map[j].cost)) 5141 continue; 5142 5143 fprintf (dump_file, " %d\t%d\t%d\t", 5144 use->cost_map[j].cand->id, 5145 use->cost_map[j].cost.cost, 5146 use->cost_map[j].cost.complexity); 5147 if (use->cost_map[j].depends_on) 5148 bitmap_print (dump_file, 5149 use->cost_map[j].depends_on, "",""); 5150 if (use->cost_map[j].inv_expr_id != -1) 5151 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id); 5152 fprintf (dump_file, "\n"); 5153 } 5154 5155 fprintf (dump_file, "\n"); 5156 } 5157 fprintf (dump_file, "\n"); 5158 } 5159 } 5160 5161 /* Determines cost of the candidate CAND. */ 5162 5163 static void 5164 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 5165 { 5166 comp_cost cost_base; 5167 unsigned cost, cost_step; 5168 tree base; 5169 5170 if (!cand->iv) 5171 { 5172 cand->cost = 0; 5173 return; 5174 } 5175 5176 /* There are two costs associated with the candidate -- its increment 5177 and its initialization. The second is almost negligible for any loop 5178 that rolls enough, so we take it just very little into account. */ 5179 5180 base = cand->iv->base; 5181 cost_base = force_var_cost (data, base, NULL); 5182 /* It will be exceptional that the iv register happens to be initialized with 5183 the proper value at no cost. In general, there will at least be a regcopy 5184 or a const set. */ 5185 if (cost_base.cost == 0) 5186 cost_base.cost = COSTS_N_INSNS (1); 5187 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base))); 5188 5189 cost = cost_step + adjust_setup_cost (data, cost_base.cost); 5190 5191 /* Prefer the original ivs unless we may gain something by replacing it. 5192 The reason is to make debugging simpler; so this is not relevant for 5193 artificial ivs created by other optimization passes. */ 5194 if (cand->pos != IP_ORIGINAL 5195 || !SSA_NAME_VAR (cand->var_before) 5196 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 5197 cost++; 5198 5199 /* Prefer not to insert statements into latch unless there are some 5200 already (so that we do not create unnecessary jumps). */ 5201 if (cand->pos == IP_END 5202 && empty_block_p (ip_end_pos (data->current_loop))) 5203 cost++; 5204 5205 cand->cost = cost; 5206 cand->cost_step = cost_step; 5207 } 5208 5209 /* Determines costs of computation of the candidates. */ 5210 5211 static void 5212 determine_iv_costs (struct ivopts_data *data) 5213 { 5214 unsigned i; 5215 5216 if (dump_file && (dump_flags & TDF_DETAILS)) 5217 { 5218 fprintf (dump_file, "Candidate costs:\n"); 5219 fprintf (dump_file, " cand\tcost\n"); 5220 } 5221 5222 for (i = 0; i < n_iv_cands (data); i++) 5223 { 5224 struct iv_cand *cand = iv_cand (data, i); 5225 5226 determine_iv_cost (data, cand); 5227 5228 if (dump_file && (dump_flags & TDF_DETAILS)) 5229 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 5230 } 5231 5232 if (dump_file && (dump_flags & TDF_DETAILS)) 5233 fprintf (dump_file, "\n"); 5234 } 5235 5236 /* Calculates cost for having SIZE induction variables. */ 5237 5238 static unsigned 5239 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) 5240 { 5241 /* We add size to the cost, so that we prefer eliminating ivs 5242 if possible. */ 5243 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed, 5244 data->body_includes_call); 5245 } 5246 5247 /* For each size of the induction variable set determine the penalty. */ 5248 5249 static void 5250 determine_set_costs (struct ivopts_data *data) 5251 { 5252 unsigned j, n; 5253 gphi *phi; 5254 gphi_iterator psi; 5255 tree op; 5256 struct loop *loop = data->current_loop; 5257 bitmap_iterator bi; 5258 5259 if (dump_file && (dump_flags & TDF_DETAILS)) 5260 { 5261 fprintf (dump_file, "Global costs:\n"); 5262 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 5263 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); 5264 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); 5265 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); 5266 } 5267 5268 n = 0; 5269 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 5270 { 5271 phi = psi.phi (); 5272 op = PHI_RESULT (phi); 5273 5274 if (virtual_operand_p (op)) 5275 continue; 5276 5277 if (get_iv (data, op)) 5278 continue; 5279 5280 n++; 5281 } 5282 5283 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 5284 { 5285 struct version_info *info = ver_info (data, j); 5286 5287 if (info->inv_id && info->has_nonlin_use) 5288 n++; 5289 } 5290 5291 data->regs_used = n; 5292 if (dump_file && (dump_flags & TDF_DETAILS)) 5293 fprintf (dump_file, " regs_used %d\n", n); 5294 5295 if (dump_file && (dump_flags & TDF_DETAILS)) 5296 { 5297 fprintf (dump_file, " cost for size:\n"); 5298 fprintf (dump_file, " ivs\tcost\n"); 5299 for (j = 0; j <= 2 * target_avail_regs; j++) 5300 fprintf (dump_file, " %d\t%d\n", j, 5301 ivopts_global_cost_for_size (data, j)); 5302 fprintf (dump_file, "\n"); 5303 } 5304 } 5305 5306 /* Returns true if A is a cheaper cost pair than B. */ 5307 5308 static bool 5309 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) 5310 { 5311 int cmp; 5312 5313 if (!a) 5314 return false; 5315 5316 if (!b) 5317 return true; 5318 5319 cmp = compare_costs (a->cost, b->cost); 5320 if (cmp < 0) 5321 return true; 5322 5323 if (cmp > 0) 5324 return false; 5325 5326 /* In case the costs are the same, prefer the cheaper candidate. */ 5327 if (a->cand->cost < b->cand->cost) 5328 return true; 5329 5330 return false; 5331 } 5332 5333 5334 /* Returns candidate by that USE is expressed in IVS. */ 5335 5336 static struct cost_pair * 5337 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) 5338 { 5339 return ivs->cand_for_use[use->id]; 5340 } 5341 5342 /* Computes the cost field of IVS structure. */ 5343 5344 static void 5345 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) 5346 { 5347 comp_cost cost = ivs->cand_use_cost; 5348 5349 cost.cost += ivs->cand_cost; 5350 5351 cost.cost += ivopts_global_cost_for_size (data, 5352 ivs->n_regs + ivs->num_used_inv_expr); 5353 5354 ivs->cost = cost; 5355 } 5356 5357 /* Remove invariants in set INVS to set IVS. */ 5358 5359 static void 5360 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs) 5361 { 5362 bitmap_iterator bi; 5363 unsigned iid; 5364 5365 if (!invs) 5366 return; 5367 5368 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5369 { 5370 ivs->n_invariant_uses[iid]--; 5371 if (ivs->n_invariant_uses[iid] == 0) 5372 ivs->n_regs--; 5373 } 5374 } 5375 5376 /* Set USE not to be expressed by any candidate in IVS. */ 5377 5378 static void 5379 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, 5380 struct iv_use *use) 5381 { 5382 unsigned uid = use->id, cid; 5383 struct cost_pair *cp; 5384 5385 cp = ivs->cand_for_use[uid]; 5386 if (!cp) 5387 return; 5388 cid = cp->cand->id; 5389 5390 ivs->bad_uses++; 5391 ivs->cand_for_use[uid] = NULL; 5392 ivs->n_cand_uses[cid]--; 5393 5394 if (ivs->n_cand_uses[cid] == 0) 5395 { 5396 bitmap_clear_bit (ivs->cands, cid); 5397 /* Do not count the pseudocandidates. */ 5398 if (cp->cand->iv) 5399 ivs->n_regs--; 5400 ivs->n_cands--; 5401 ivs->cand_cost -= cp->cand->cost; 5402 5403 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on); 5404 } 5405 5406 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost); 5407 5408 iv_ca_set_remove_invariants (ivs, cp->depends_on); 5409 5410 if (cp->inv_expr_id != -1) 5411 { 5412 ivs->used_inv_expr[cp->inv_expr_id]--; 5413 if (ivs->used_inv_expr[cp->inv_expr_id] == 0) 5414 ivs->num_used_inv_expr--; 5415 } 5416 iv_ca_recount_cost (data, ivs); 5417 } 5418 5419 /* Add invariants in set INVS to set IVS. */ 5420 5421 static void 5422 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs) 5423 { 5424 bitmap_iterator bi; 5425 unsigned iid; 5426 5427 if (!invs) 5428 return; 5429 5430 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5431 { 5432 ivs->n_invariant_uses[iid]++; 5433 if (ivs->n_invariant_uses[iid] == 1) 5434 ivs->n_regs++; 5435 } 5436 } 5437 5438 /* Set cost pair for USE in set IVS to CP. */ 5439 5440 static void 5441 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, 5442 struct iv_use *use, struct cost_pair *cp) 5443 { 5444 unsigned uid = use->id, cid; 5445 5446 if (ivs->cand_for_use[uid] == cp) 5447 return; 5448 5449 if (ivs->cand_for_use[uid]) 5450 iv_ca_set_no_cp (data, ivs, use); 5451 5452 if (cp) 5453 { 5454 cid = cp->cand->id; 5455 5456 ivs->bad_uses--; 5457 ivs->cand_for_use[uid] = cp; 5458 ivs->n_cand_uses[cid]++; 5459 if (ivs->n_cand_uses[cid] == 1) 5460 { 5461 bitmap_set_bit (ivs->cands, cid); 5462 /* Do not count the pseudocandidates. */ 5463 if (cp->cand->iv) 5464 ivs->n_regs++; 5465 ivs->n_cands++; 5466 ivs->cand_cost += cp->cand->cost; 5467 5468 iv_ca_set_add_invariants (ivs, cp->cand->depends_on); 5469 } 5470 5471 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost); 5472 iv_ca_set_add_invariants (ivs, cp->depends_on); 5473 5474 if (cp->inv_expr_id != -1) 5475 { 5476 ivs->used_inv_expr[cp->inv_expr_id]++; 5477 if (ivs->used_inv_expr[cp->inv_expr_id] == 1) 5478 ivs->num_used_inv_expr++; 5479 } 5480 iv_ca_recount_cost (data, ivs); 5481 } 5482 } 5483 5484 /* Extend set IVS by expressing USE by some of the candidates in it 5485 if possible. Consider all important candidates if candidates in 5486 set IVS don't give any result. */ 5487 5488 static void 5489 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, 5490 struct iv_use *use) 5491 { 5492 struct cost_pair *best_cp = NULL, *cp; 5493 bitmap_iterator bi; 5494 unsigned i; 5495 struct iv_cand *cand; 5496 5497 gcc_assert (ivs->upto >= use->id); 5498 ivs->upto++; 5499 ivs->bad_uses++; 5500 5501 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5502 { 5503 cand = iv_cand (data, i); 5504 cp = get_use_iv_cost (data, use, cand); 5505 if (cheaper_cost_pair (cp, best_cp)) 5506 best_cp = cp; 5507 } 5508 5509 if (best_cp == NULL) 5510 { 5511 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 5512 { 5513 cand = iv_cand (data, i); 5514 cp = get_use_iv_cost (data, use, cand); 5515 if (cheaper_cost_pair (cp, best_cp)) 5516 best_cp = cp; 5517 } 5518 } 5519 5520 iv_ca_set_cp (data, ivs, use, best_cp); 5521 } 5522 5523 /* Get cost for assignment IVS. */ 5524 5525 static comp_cost 5526 iv_ca_cost (struct iv_ca *ivs) 5527 { 5528 /* This was a conditional expression but it triggered a bug in 5529 Sun C 5.5. */ 5530 if (ivs->bad_uses) 5531 return infinite_cost; 5532 else 5533 return ivs->cost; 5534 } 5535 5536 /* Returns true if all dependences of CP are among invariants in IVS. */ 5537 5538 static bool 5539 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp) 5540 { 5541 unsigned i; 5542 bitmap_iterator bi; 5543 5544 if (!cp->depends_on) 5545 return true; 5546 5547 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi) 5548 { 5549 if (ivs->n_invariant_uses[i] == 0) 5550 return false; 5551 } 5552 5553 return true; 5554 } 5555 5556 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains 5557 it before NEXT_CHANGE. */ 5558 5559 static struct iv_ca_delta * 5560 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp, 5561 struct cost_pair *new_cp, struct iv_ca_delta *next_change) 5562 { 5563 struct iv_ca_delta *change = XNEW (struct iv_ca_delta); 5564 5565 change->use = use; 5566 change->old_cp = old_cp; 5567 change->new_cp = new_cp; 5568 change->next_change = next_change; 5569 5570 return change; 5571 } 5572 5573 /* Joins two lists of changes L1 and L2. Destructive -- old lists 5574 are rewritten. */ 5575 5576 static struct iv_ca_delta * 5577 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 5578 { 5579 struct iv_ca_delta *last; 5580 5581 if (!l2) 5582 return l1; 5583 5584 if (!l1) 5585 return l2; 5586 5587 for (last = l1; last->next_change; last = last->next_change) 5588 continue; 5589 last->next_change = l2; 5590 5591 return l1; 5592 } 5593 5594 /* Reverse the list of changes DELTA, forming the inverse to it. */ 5595 5596 static struct iv_ca_delta * 5597 iv_ca_delta_reverse (struct iv_ca_delta *delta) 5598 { 5599 struct iv_ca_delta *act, *next, *prev = NULL; 5600 struct cost_pair *tmp; 5601 5602 for (act = delta; act; act = next) 5603 { 5604 next = act->next_change; 5605 act->next_change = prev; 5606 prev = act; 5607 5608 tmp = act->old_cp; 5609 act->old_cp = act->new_cp; 5610 act->new_cp = tmp; 5611 } 5612 5613 return prev; 5614 } 5615 5616 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 5617 reverted instead. */ 5618 5619 static void 5620 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs, 5621 struct iv_ca_delta *delta, bool forward) 5622 { 5623 struct cost_pair *from, *to; 5624 struct iv_ca_delta *act; 5625 5626 if (!forward) 5627 delta = iv_ca_delta_reverse (delta); 5628 5629 for (act = delta; act; act = act->next_change) 5630 { 5631 from = act->old_cp; 5632 to = act->new_cp; 5633 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from); 5634 iv_ca_set_cp (data, ivs, act->use, to); 5635 } 5636 5637 if (!forward) 5638 iv_ca_delta_reverse (delta); 5639 } 5640 5641 /* Returns true if CAND is used in IVS. */ 5642 5643 static bool 5644 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand) 5645 { 5646 return ivs->n_cand_uses[cand->id] > 0; 5647 } 5648 5649 /* Returns number of induction variable candidates in the set IVS. */ 5650 5651 static unsigned 5652 iv_ca_n_cands (struct iv_ca *ivs) 5653 { 5654 return ivs->n_cands; 5655 } 5656 5657 /* Free the list of changes DELTA. */ 5658 5659 static void 5660 iv_ca_delta_free (struct iv_ca_delta **delta) 5661 { 5662 struct iv_ca_delta *act, *next; 5663 5664 for (act = *delta; act; act = next) 5665 { 5666 next = act->next_change; 5667 free (act); 5668 } 5669 5670 *delta = NULL; 5671 } 5672 5673 /* Allocates new iv candidates assignment. */ 5674 5675 static struct iv_ca * 5676 iv_ca_new (struct ivopts_data *data) 5677 { 5678 struct iv_ca *nw = XNEW (struct iv_ca); 5679 5680 nw->upto = 0; 5681 nw->bad_uses = 0; 5682 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data)); 5683 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data)); 5684 nw->cands = BITMAP_ALLOC (NULL); 5685 nw->n_cands = 0; 5686 nw->n_regs = 0; 5687 nw->cand_use_cost = no_cost; 5688 nw->cand_cost = 0; 5689 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1); 5690 nw->cost = no_cost; 5691 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1); 5692 nw->num_used_inv_expr = 0; 5693 5694 return nw; 5695 } 5696 5697 /* Free memory occupied by the set IVS. */ 5698 5699 static void 5700 iv_ca_free (struct iv_ca **ivs) 5701 { 5702 free ((*ivs)->cand_for_use); 5703 free ((*ivs)->n_cand_uses); 5704 BITMAP_FREE ((*ivs)->cands); 5705 free ((*ivs)->n_invariant_uses); 5706 free ((*ivs)->used_inv_expr); 5707 free (*ivs); 5708 *ivs = NULL; 5709 } 5710 5711 /* Dumps IVS to FILE. */ 5712 5713 static void 5714 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) 5715 { 5716 const char *pref = " invariants "; 5717 unsigned i; 5718 comp_cost cost = iv_ca_cost (ivs); 5719 5720 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity); 5721 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n", 5722 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity); 5723 bitmap_print (file, ivs->cands, " candidates: ","\n"); 5724 5725 for (i = 0; i < ivs->upto; i++) 5726 { 5727 struct iv_use *use = iv_use (data, i); 5728 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use); 5729 if (cp) 5730 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n", 5731 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity); 5732 else 5733 fprintf (file, " use:%d --> ??\n", use->id); 5734 } 5735 5736 for (i = 1; i <= data->max_inv_id; i++) 5737 if (ivs->n_invariant_uses[i]) 5738 { 5739 fprintf (file, "%s%d", pref, i); 5740 pref = ", "; 5741 } 5742 fprintf (file, "\n\n"); 5743 } 5744 5745 /* Try changing candidate in IVS to CAND for each use. Return cost of the 5746 new set, and store differences in DELTA. Number of induction variables 5747 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true 5748 the function will try to find a solution with mimimal iv candidates. */ 5749 5750 static comp_cost 5751 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, 5752 struct iv_cand *cand, struct iv_ca_delta **delta, 5753 unsigned *n_ivs, bool min_ncand) 5754 { 5755 unsigned i; 5756 comp_cost cost; 5757 struct iv_use *use; 5758 struct cost_pair *old_cp, *new_cp; 5759 5760 *delta = NULL; 5761 for (i = 0; i < ivs->upto; i++) 5762 { 5763 use = iv_use (data, i); 5764 old_cp = iv_ca_cand_for_use (ivs, use); 5765 5766 if (old_cp 5767 && old_cp->cand == cand) 5768 continue; 5769 5770 new_cp = get_use_iv_cost (data, use, cand); 5771 if (!new_cp) 5772 continue; 5773 5774 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp)) 5775 continue; 5776 5777 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp)) 5778 continue; 5779 5780 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 5781 } 5782 5783 iv_ca_delta_commit (data, ivs, *delta, true); 5784 cost = iv_ca_cost (ivs); 5785 if (n_ivs) 5786 *n_ivs = iv_ca_n_cands (ivs); 5787 iv_ca_delta_commit (data, ivs, *delta, false); 5788 5789 return cost; 5790 } 5791 5792 /* Try narrowing set IVS by removing CAND. Return the cost of 5793 the new set and store the differences in DELTA. START is 5794 the candidate with which we start narrowing. */ 5795 5796 static comp_cost 5797 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, 5798 struct iv_cand *cand, struct iv_cand *start, 5799 struct iv_ca_delta **delta) 5800 { 5801 unsigned i, ci; 5802 struct iv_use *use; 5803 struct cost_pair *old_cp, *new_cp, *cp; 5804 bitmap_iterator bi; 5805 struct iv_cand *cnd; 5806 comp_cost cost, best_cost, acost; 5807 5808 *delta = NULL; 5809 for (i = 0; i < n_iv_uses (data); i++) 5810 { 5811 use = iv_use (data, i); 5812 5813 old_cp = iv_ca_cand_for_use (ivs, use); 5814 if (old_cp->cand != cand) 5815 continue; 5816 5817 best_cost = iv_ca_cost (ivs); 5818 /* Start narrowing with START. */ 5819 new_cp = get_use_iv_cost (data, use, start); 5820 5821 if (data->consider_all_candidates) 5822 { 5823 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 5824 { 5825 if (ci == cand->id || (start && ci == start->id)) 5826 continue; 5827 5828 cnd = iv_cand (data, ci); 5829 5830 cp = get_use_iv_cost (data, use, cnd); 5831 if (!cp) 5832 continue; 5833 5834 iv_ca_set_cp (data, ivs, use, cp); 5835 acost = iv_ca_cost (ivs); 5836 5837 if (compare_costs (acost, best_cost) < 0) 5838 { 5839 best_cost = acost; 5840 new_cp = cp; 5841 } 5842 } 5843 } 5844 else 5845 { 5846 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi) 5847 { 5848 if (ci == cand->id || (start && ci == start->id)) 5849 continue; 5850 5851 cnd = iv_cand (data, ci); 5852 5853 cp = get_use_iv_cost (data, use, cnd); 5854 if (!cp) 5855 continue; 5856 5857 iv_ca_set_cp (data, ivs, use, cp); 5858 acost = iv_ca_cost (ivs); 5859 5860 if (compare_costs (acost, best_cost) < 0) 5861 { 5862 best_cost = acost; 5863 new_cp = cp; 5864 } 5865 } 5866 } 5867 /* Restore to old cp for use. */ 5868 iv_ca_set_cp (data, ivs, use, old_cp); 5869 5870 if (!new_cp) 5871 { 5872 iv_ca_delta_free (delta); 5873 return infinite_cost; 5874 } 5875 5876 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 5877 } 5878 5879 iv_ca_delta_commit (data, ivs, *delta, true); 5880 cost = iv_ca_cost (ivs); 5881 iv_ca_delta_commit (data, ivs, *delta, false); 5882 5883 return cost; 5884 } 5885 5886 /* Try optimizing the set of candidates IVS by removing candidates different 5887 from to EXCEPT_CAND from it. Return cost of the new set, and store 5888 differences in DELTA. */ 5889 5890 static comp_cost 5891 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, 5892 struct iv_cand *except_cand, struct iv_ca_delta **delta) 5893 { 5894 bitmap_iterator bi; 5895 struct iv_ca_delta *act_delta, *best_delta; 5896 unsigned i; 5897 comp_cost best_cost, acost; 5898 struct iv_cand *cand; 5899 5900 best_delta = NULL; 5901 best_cost = iv_ca_cost (ivs); 5902 5903 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5904 { 5905 cand = iv_cand (data, i); 5906 5907 if (cand == except_cand) 5908 continue; 5909 5910 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta); 5911 5912 if (compare_costs (acost, best_cost) < 0) 5913 { 5914 best_cost = acost; 5915 iv_ca_delta_free (&best_delta); 5916 best_delta = act_delta; 5917 } 5918 else 5919 iv_ca_delta_free (&act_delta); 5920 } 5921 5922 if (!best_delta) 5923 { 5924 *delta = NULL; 5925 return best_cost; 5926 } 5927 5928 /* Recurse to possibly remove other unnecessary ivs. */ 5929 iv_ca_delta_commit (data, ivs, best_delta, true); 5930 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 5931 iv_ca_delta_commit (data, ivs, best_delta, false); 5932 *delta = iv_ca_delta_join (best_delta, *delta); 5933 return best_cost; 5934 } 5935 5936 /* Check if CAND_IDX is a candidate other than OLD_CAND and has 5937 cheaper local cost for USE than BEST_CP. Return pointer to 5938 the corresponding cost_pair, otherwise just return BEST_CP. */ 5939 5940 static struct cost_pair* 5941 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use, 5942 unsigned int cand_idx, struct iv_cand *old_cand, 5943 struct cost_pair *best_cp) 5944 { 5945 struct iv_cand *cand; 5946 struct cost_pair *cp; 5947 5948 gcc_assert (old_cand != NULL && best_cp != NULL); 5949 if (cand_idx == old_cand->id) 5950 return best_cp; 5951 5952 cand = iv_cand (data, cand_idx); 5953 cp = get_use_iv_cost (data, use, cand); 5954 if (cp != NULL && cheaper_cost_pair (cp, best_cp)) 5955 return cp; 5956 5957 return best_cp; 5958 } 5959 5960 /* Try breaking local optimal fixed-point for IVS by replacing candidates 5961 which are used by more than one iv uses. For each of those candidates, 5962 this function tries to represent iv uses under that candidate using 5963 other ones with lower local cost, then tries to prune the new set. 5964 If the new set has lower cost, It returns the new cost after recording 5965 candidate replacement in list DELTA. */ 5966 5967 static comp_cost 5968 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs, 5969 struct iv_ca_delta **delta) 5970 { 5971 bitmap_iterator bi, bj; 5972 unsigned int i, j, k; 5973 struct iv_use *use; 5974 struct iv_cand *cand; 5975 comp_cost orig_cost, acost; 5976 struct iv_ca_delta *act_delta, *tmp_delta; 5977 struct cost_pair *old_cp, *best_cp = NULL; 5978 5979 *delta = NULL; 5980 orig_cost = iv_ca_cost (ivs); 5981 5982 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5983 { 5984 if (ivs->n_cand_uses[i] == 1 5985 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND) 5986 continue; 5987 5988 cand = iv_cand (data, i); 5989 5990 act_delta = NULL; 5991 /* Represent uses under current candidate using other ones with 5992 lower local cost. */ 5993 for (j = 0; j < ivs->upto; j++) 5994 { 5995 use = iv_use (data, j); 5996 old_cp = iv_ca_cand_for_use (ivs, use); 5997 5998 if (old_cp->cand != cand) 5999 continue; 6000 6001 best_cp = old_cp; 6002 if (data->consider_all_candidates) 6003 for (k = 0; k < n_iv_cands (data); k++) 6004 best_cp = cheaper_cost_with_cand (data, use, k, 6005 old_cp->cand, best_cp); 6006 else 6007 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj) 6008 best_cp = cheaper_cost_with_cand (data, use, k, 6009 old_cp->cand, best_cp); 6010 6011 if (best_cp == old_cp) 6012 continue; 6013 6014 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta); 6015 } 6016 /* No need for further prune. */ 6017 if (!act_delta) 6018 continue; 6019 6020 /* Prune the new candidate set. */ 6021 iv_ca_delta_commit (data, ivs, act_delta, true); 6022 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta); 6023 iv_ca_delta_commit (data, ivs, act_delta, false); 6024 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6025 6026 if (compare_costs (acost, orig_cost) < 0) 6027 { 6028 *delta = act_delta; 6029 return acost; 6030 } 6031 else 6032 iv_ca_delta_free (&act_delta); 6033 } 6034 6035 return orig_cost; 6036 } 6037 6038 /* Tries to extend the sets IVS in the best possible way in order 6039 to express the USE. If ORIGINALP is true, prefer candidates from 6040 the original set of IVs, otherwise favor important candidates not 6041 based on any memory object. */ 6042 6043 static bool 6044 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, 6045 struct iv_use *use, bool originalp) 6046 { 6047 comp_cost best_cost, act_cost; 6048 unsigned i; 6049 bitmap_iterator bi; 6050 struct iv_cand *cand; 6051 struct iv_ca_delta *best_delta = NULL, *act_delta; 6052 struct cost_pair *cp; 6053 6054 iv_ca_add_use (data, ivs, use); 6055 best_cost = iv_ca_cost (ivs); 6056 cp = iv_ca_cand_for_use (ivs, use); 6057 if (cp) 6058 { 6059 best_delta = iv_ca_delta_add (use, NULL, cp, NULL); 6060 iv_ca_set_no_cp (data, ivs, use); 6061 } 6062 6063 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise 6064 first try important candidates not based on any memory object. Only if 6065 this fails, try the specific ones. Rationale -- in loops with many 6066 variables the best choice often is to use just one generic biv. If we 6067 added here many ivs specific to the uses, the optimization algorithm later 6068 would be likely to get stuck in a local minimum, thus causing us to create 6069 too many ivs. The approach from few ivs to more seems more likely to be 6070 successful -- starting from few ivs, replacing an expensive use by a 6071 specific iv should always be a win. */ 6072 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 6073 { 6074 cand = iv_cand (data, i); 6075 6076 if (originalp && cand->pos !=IP_ORIGINAL) 6077 continue; 6078 6079 if (!originalp && cand->iv->base_object != NULL_TREE) 6080 continue; 6081 6082 if (iv_ca_cand_used_p (ivs, cand)) 6083 continue; 6084 6085 cp = get_use_iv_cost (data, use, cand); 6086 if (!cp) 6087 continue; 6088 6089 iv_ca_set_cp (data, ivs, use, cp); 6090 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, 6091 true); 6092 iv_ca_set_no_cp (data, ivs, use); 6093 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta); 6094 6095 if (compare_costs (act_cost, best_cost) < 0) 6096 { 6097 best_cost = act_cost; 6098 6099 iv_ca_delta_free (&best_delta); 6100 best_delta = act_delta; 6101 } 6102 else 6103 iv_ca_delta_free (&act_delta); 6104 } 6105 6106 if (infinite_cost_p (best_cost)) 6107 { 6108 for (i = 0; i < use->n_map_members; i++) 6109 { 6110 cp = use->cost_map + i; 6111 cand = cp->cand; 6112 if (!cand) 6113 continue; 6114 6115 /* Already tried this. */ 6116 if (cand->important) 6117 { 6118 if (originalp && cand->pos == IP_ORIGINAL) 6119 continue; 6120 if (!originalp && cand->iv->base_object == NULL_TREE) 6121 continue; 6122 } 6123 6124 if (iv_ca_cand_used_p (ivs, cand)) 6125 continue; 6126 6127 act_delta = NULL; 6128 iv_ca_set_cp (data, ivs, use, cp); 6129 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); 6130 iv_ca_set_no_cp (data, ivs, use); 6131 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use), 6132 cp, act_delta); 6133 6134 if (compare_costs (act_cost, best_cost) < 0) 6135 { 6136 best_cost = act_cost; 6137 6138 if (best_delta) 6139 iv_ca_delta_free (&best_delta); 6140 best_delta = act_delta; 6141 } 6142 else 6143 iv_ca_delta_free (&act_delta); 6144 } 6145 } 6146 6147 iv_ca_delta_commit (data, ivs, best_delta, true); 6148 iv_ca_delta_free (&best_delta); 6149 6150 return !infinite_cost_p (best_cost); 6151 } 6152 6153 /* Finds an initial assignment of candidates to uses. */ 6154 6155 static struct iv_ca * 6156 get_initial_solution (struct ivopts_data *data, bool originalp) 6157 { 6158 struct iv_ca *ivs = iv_ca_new (data); 6159 unsigned i; 6160 6161 for (i = 0; i < n_iv_uses (data); i++) 6162 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp)) 6163 { 6164 iv_ca_free (&ivs); 6165 return NULL; 6166 } 6167 6168 return ivs; 6169 } 6170 6171 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P 6172 points to a bool variable, this function tries to break local 6173 optimal fixed-point by replacing candidates in IVS if it's true. */ 6174 6175 static bool 6176 try_improve_iv_set (struct ivopts_data *data, 6177 struct iv_ca *ivs, bool *try_replace_p) 6178 { 6179 unsigned i, n_ivs; 6180 comp_cost acost, best_cost = iv_ca_cost (ivs); 6181 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 6182 struct iv_cand *cand; 6183 6184 /* Try extending the set of induction variables by one. */ 6185 for (i = 0; i < n_iv_cands (data); i++) 6186 { 6187 cand = iv_cand (data, i); 6188 6189 if (iv_ca_cand_used_p (ivs, cand)) 6190 continue; 6191 6192 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); 6193 if (!act_delta) 6194 continue; 6195 6196 /* If we successfully added the candidate and the set is small enough, 6197 try optimizing it by removing other candidates. */ 6198 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 6199 { 6200 iv_ca_delta_commit (data, ivs, act_delta, true); 6201 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 6202 iv_ca_delta_commit (data, ivs, act_delta, false); 6203 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6204 } 6205 6206 if (compare_costs (acost, best_cost) < 0) 6207 { 6208 best_cost = acost; 6209 iv_ca_delta_free (&best_delta); 6210 best_delta = act_delta; 6211 } 6212 else 6213 iv_ca_delta_free (&act_delta); 6214 } 6215 6216 if (!best_delta) 6217 { 6218 /* Try removing the candidates from the set instead. */ 6219 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 6220 6221 if (!best_delta && *try_replace_p) 6222 { 6223 *try_replace_p = false; 6224 /* So far candidate selecting algorithm tends to choose fewer IVs 6225 so that it can handle cases in which loops have many variables 6226 but the best choice is often to use only one general biv. One 6227 weakness is it can't handle opposite cases, in which different 6228 candidates should be chosen with respect to each use. To solve 6229 the problem, we replace candidates in a manner described by the 6230 comments of iv_ca_replace, thus give general algorithm a chance 6231 to break local optimal fixed-point in these cases. */ 6232 best_cost = iv_ca_replace (data, ivs, &best_delta); 6233 } 6234 6235 if (!best_delta) 6236 return false; 6237 } 6238 6239 iv_ca_delta_commit (data, ivs, best_delta, true); 6240 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0); 6241 iv_ca_delta_free (&best_delta); 6242 return true; 6243 } 6244 6245 /* Attempts to find the optimal set of induction variables. We do simple 6246 greedy heuristic -- we try to replace at most one candidate in the selected 6247 solution and remove the unused ivs while this improves the cost. */ 6248 6249 static struct iv_ca * 6250 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) 6251 { 6252 struct iv_ca *set; 6253 bool try_replace_p = true; 6254 6255 /* Get the initial solution. */ 6256 set = get_initial_solution (data, originalp); 6257 if (!set) 6258 { 6259 if (dump_file && (dump_flags & TDF_DETAILS)) 6260 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 6261 return NULL; 6262 } 6263 6264 if (dump_file && (dump_flags & TDF_DETAILS)) 6265 { 6266 fprintf (dump_file, "Initial set of candidates:\n"); 6267 iv_ca_dump (data, dump_file, set); 6268 } 6269 6270 while (try_improve_iv_set (data, set, &try_replace_p)) 6271 { 6272 if (dump_file && (dump_flags & TDF_DETAILS)) 6273 { 6274 fprintf (dump_file, "Improved to:\n"); 6275 iv_ca_dump (data, dump_file, set); 6276 } 6277 } 6278 6279 return set; 6280 } 6281 6282 static struct iv_ca * 6283 find_optimal_iv_set (struct ivopts_data *data) 6284 { 6285 unsigned i; 6286 struct iv_ca *set, *origset; 6287 struct iv_use *use; 6288 comp_cost cost, origcost; 6289 6290 /* Determine the cost based on a strategy that starts with original IVs, 6291 and try again using a strategy that prefers candidates not based 6292 on any IVs. */ 6293 origset = find_optimal_iv_set_1 (data, true); 6294 set = find_optimal_iv_set_1 (data, false); 6295 6296 if (!origset && !set) 6297 return NULL; 6298 6299 origcost = origset ? iv_ca_cost (origset) : infinite_cost; 6300 cost = set ? iv_ca_cost (set) : infinite_cost; 6301 6302 if (dump_file && (dump_flags & TDF_DETAILS)) 6303 { 6304 fprintf (dump_file, "Original cost %d (complexity %d)\n\n", 6305 origcost.cost, origcost.complexity); 6306 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", 6307 cost.cost, cost.complexity); 6308 } 6309 6310 /* Choose the one with the best cost. */ 6311 if (compare_costs (origcost, cost) <= 0) 6312 { 6313 if (set) 6314 iv_ca_free (&set); 6315 set = origset; 6316 } 6317 else if (origset) 6318 iv_ca_free (&origset); 6319 6320 for (i = 0; i < n_iv_uses (data); i++) 6321 { 6322 use = iv_use (data, i); 6323 use->selected = iv_ca_cand_for_use (set, use)->cand; 6324 } 6325 6326 return set; 6327 } 6328 6329 /* Creates a new induction variable corresponding to CAND. */ 6330 6331 static void 6332 create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 6333 { 6334 gimple_stmt_iterator incr_pos; 6335 tree base; 6336 bool after = false; 6337 6338 if (!cand->iv) 6339 return; 6340 6341 switch (cand->pos) 6342 { 6343 case IP_NORMAL: 6344 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop)); 6345 break; 6346 6347 case IP_END: 6348 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 6349 after = true; 6350 break; 6351 6352 case IP_AFTER_USE: 6353 after = true; 6354 /* fall through */ 6355 case IP_BEFORE_USE: 6356 incr_pos = gsi_for_stmt (cand->incremented_at); 6357 break; 6358 6359 case IP_ORIGINAL: 6360 /* Mark that the iv is preserved. */ 6361 name_info (data, cand->var_before)->preserve_biv = true; 6362 name_info (data, cand->var_after)->preserve_biv = true; 6363 6364 /* Rewrite the increment so that it uses var_before directly. */ 6365 find_interesting_uses_op (data, cand->var_after)->selected = cand; 6366 return; 6367 } 6368 6369 gimple_add_tmp_var (cand->var_before); 6370 6371 base = unshare_expr (cand->iv->base); 6372 6373 create_iv (base, unshare_expr (cand->iv->step), 6374 cand->var_before, data->current_loop, 6375 &incr_pos, after, &cand->var_before, &cand->var_after); 6376 } 6377 6378 /* Creates new induction variables described in SET. */ 6379 6380 static void 6381 create_new_ivs (struct ivopts_data *data, struct iv_ca *set) 6382 { 6383 unsigned i; 6384 struct iv_cand *cand; 6385 bitmap_iterator bi; 6386 6387 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6388 { 6389 cand = iv_cand (data, i); 6390 create_new_iv (data, cand); 6391 } 6392 6393 if (dump_file && (dump_flags & TDF_DETAILS)) 6394 { 6395 fprintf (dump_file, "Selected IV set for loop %d", 6396 data->current_loop->num); 6397 if (data->loop_loc != UNKNOWN_LOCATION) 6398 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 6399 LOCATION_LINE (data->loop_loc)); 6400 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands)); 6401 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6402 { 6403 cand = iv_cand (data, i); 6404 dump_cand (dump_file, cand); 6405 } 6406 fprintf (dump_file, "\n"); 6407 } 6408 } 6409 6410 /* Rewrites USE (definition of iv used in a nonlinear expression) 6411 using candidate CAND. */ 6412 6413 static void 6414 rewrite_use_nonlinear_expr (struct ivopts_data *data, 6415 struct iv_use *use, struct iv_cand *cand) 6416 { 6417 tree comp; 6418 tree op, tgt; 6419 gassign *ass; 6420 gimple_stmt_iterator bsi; 6421 6422 /* An important special case -- if we are asked to express value of 6423 the original iv by itself, just exit; there is no need to 6424 introduce a new computation (that might also need casting the 6425 variable to unsigned and back). */ 6426 if (cand->pos == IP_ORIGINAL 6427 && cand->incremented_at == use->stmt) 6428 { 6429 enum tree_code stmt_code; 6430 6431 gcc_assert (is_gimple_assign (use->stmt)); 6432 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 6433 6434 /* Check whether we may leave the computation unchanged. 6435 This is the case only if it does not rely on other 6436 computations in the loop -- otherwise, the computation 6437 we rely upon may be removed in remove_unused_ivs, 6438 thus leading to ICE. */ 6439 stmt_code = gimple_assign_rhs_code (use->stmt); 6440 if (stmt_code == PLUS_EXPR 6441 || stmt_code == MINUS_EXPR 6442 || stmt_code == POINTER_PLUS_EXPR) 6443 { 6444 if (gimple_assign_rhs1 (use->stmt) == cand->var_before) 6445 op = gimple_assign_rhs2 (use->stmt); 6446 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before) 6447 op = gimple_assign_rhs1 (use->stmt); 6448 else 6449 op = NULL_TREE; 6450 } 6451 else 6452 op = NULL_TREE; 6453 6454 if (op && expr_invariant_in_loop_p (data->current_loop, op)) 6455 return; 6456 } 6457 6458 comp = get_computation (data->current_loop, use, cand); 6459 gcc_assert (comp != NULL_TREE); 6460 6461 switch (gimple_code (use->stmt)) 6462 { 6463 case GIMPLE_PHI: 6464 tgt = PHI_RESULT (use->stmt); 6465 6466 /* If we should keep the biv, do not replace it. */ 6467 if (name_info (data, tgt)->preserve_biv) 6468 return; 6469 6470 bsi = gsi_after_labels (gimple_bb (use->stmt)); 6471 break; 6472 6473 case GIMPLE_ASSIGN: 6474 tgt = gimple_assign_lhs (use->stmt); 6475 bsi = gsi_for_stmt (use->stmt); 6476 break; 6477 6478 default: 6479 gcc_unreachable (); 6480 } 6481 6482 if (!valid_gimple_rhs_p (comp) 6483 || (gimple_code (use->stmt) != GIMPLE_PHI 6484 /* We can't allow re-allocating the stmt as it might be pointed 6485 to still. */ 6486 && (get_gimple_rhs_num_ops (TREE_CODE (comp)) 6487 >= gimple_num_ops (gsi_stmt (bsi))))) 6488 { 6489 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE, 6490 true, GSI_SAME_STMT); 6491 if (POINTER_TYPE_P (TREE_TYPE (tgt))) 6492 { 6493 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt)); 6494 /* As this isn't a plain copy we have to reset alignment 6495 information. */ 6496 if (SSA_NAME_PTR_INFO (comp)) 6497 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp)); 6498 } 6499 } 6500 6501 if (gimple_code (use->stmt) == GIMPLE_PHI) 6502 { 6503 ass = gimple_build_assign (tgt, comp); 6504 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 6505 6506 bsi = gsi_for_stmt (use->stmt); 6507 remove_phi_node (&bsi, false); 6508 } 6509 else 6510 { 6511 gimple_assign_set_rhs_from_tree (&bsi, comp); 6512 use->stmt = gsi_stmt (bsi); 6513 } 6514 } 6515 6516 /* Performs a peephole optimization to reorder the iv update statement with 6517 a mem ref to enable instruction combining in later phases. The mem ref uses 6518 the iv value before the update, so the reordering transformation requires 6519 adjustment of the offset. CAND is the selected IV_CAND. 6520 6521 Example: 6522 6523 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset 6524 iv2 = iv1 + 1; 6525 6526 if (t < val) (1) 6527 goto L; 6528 goto Head; 6529 6530 6531 directly propagating t over to (1) will introduce overlapping live range 6532 thus increase register pressure. This peephole transform it into: 6533 6534 6535 iv2 = iv1 + 1; 6536 t = MEM_REF (base, iv2, 8, 8); 6537 if (t < val) 6538 goto L; 6539 goto Head; 6540 */ 6541 6542 static void 6543 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use) 6544 { 6545 tree var_after; 6546 gimple iv_update, stmt; 6547 basic_block bb; 6548 gimple_stmt_iterator gsi, gsi_iv; 6549 6550 if (cand->pos != IP_NORMAL) 6551 return; 6552 6553 var_after = cand->var_after; 6554 iv_update = SSA_NAME_DEF_STMT (var_after); 6555 6556 bb = gimple_bb (iv_update); 6557 gsi = gsi_last_nondebug_bb (bb); 6558 stmt = gsi_stmt (gsi); 6559 6560 /* Only handle conditional statement for now. */ 6561 if (gimple_code (stmt) != GIMPLE_COND) 6562 return; 6563 6564 gsi_prev_nondebug (&gsi); 6565 stmt = gsi_stmt (gsi); 6566 if (stmt != iv_update) 6567 return; 6568 6569 gsi_prev_nondebug (&gsi); 6570 if (gsi_end_p (gsi)) 6571 return; 6572 6573 stmt = gsi_stmt (gsi); 6574 if (gimple_code (stmt) != GIMPLE_ASSIGN) 6575 return; 6576 6577 if (stmt != use->stmt) 6578 return; 6579 6580 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 6581 return; 6582 6583 if (dump_file && (dump_flags & TDF_DETAILS)) 6584 { 6585 fprintf (dump_file, "Reordering \n"); 6586 print_gimple_stmt (dump_file, iv_update, 0, 0); 6587 print_gimple_stmt (dump_file, use->stmt, 0, 0); 6588 fprintf (dump_file, "\n"); 6589 } 6590 6591 gsi = gsi_for_stmt (use->stmt); 6592 gsi_iv = gsi_for_stmt (iv_update); 6593 gsi_move_before (&gsi_iv, &gsi); 6594 6595 cand->pos = IP_BEFORE_USE; 6596 cand->incremented_at = use->stmt; 6597 } 6598 6599 /* Rewrites USE (address that is an iv) using candidate CAND. */ 6600 6601 static void 6602 rewrite_use_address (struct ivopts_data *data, 6603 struct iv_use *use, struct iv_cand *cand) 6604 { 6605 aff_tree aff; 6606 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 6607 tree base_hint = NULL_TREE; 6608 tree ref, iv; 6609 bool ok; 6610 6611 adjust_iv_update_pos (cand, use); 6612 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); 6613 gcc_assert (ok); 6614 unshare_aff_combination (&aff); 6615 6616 /* To avoid undefined overflow problems, all IV candidates use unsigned 6617 integer types. The drawback is that this makes it impossible for 6618 create_mem_ref to distinguish an IV that is based on a memory object 6619 from one that represents simply an offset. 6620 6621 To work around this problem, we pass a hint to create_mem_ref that 6622 indicates which variable (if any) in aff is an IV based on a memory 6623 object. Note that we only consider the candidate. If this is not 6624 based on an object, the base of the reference is in some subexpression 6625 of the use -- but these will use pointer types, so they are recognized 6626 by the create_mem_ref heuristics anyway. */ 6627 if (cand->iv->base_object) 6628 base_hint = var_at_stmt (data->current_loop, cand, use->stmt); 6629 6630 iv = var_at_stmt (data->current_loop, cand, use->stmt); 6631 tree type = TREE_TYPE (*use->op_p); 6632 unsigned int align = get_object_alignment (*use->op_p); 6633 if (align != TYPE_ALIGN (type)) 6634 type = build_aligned_type (type, align); 6635 ref = create_mem_ref (&bsi, type, &aff, 6636 reference_alias_ptr_type (*use->op_p), 6637 iv, base_hint, data->speed); 6638 copy_ref_info (ref, *use->op_p); 6639 *use->op_p = ref; 6640 } 6641 6642 /* Rewrites USE (the condition such that one of the arguments is an iv) using 6643 candidate CAND. */ 6644 6645 static void 6646 rewrite_use_compare (struct ivopts_data *data, 6647 struct iv_use *use, struct iv_cand *cand) 6648 { 6649 tree comp, *var_p, op, bound; 6650 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 6651 enum tree_code compare; 6652 struct cost_pair *cp = get_use_iv_cost (data, use, cand); 6653 bool ok; 6654 6655 bound = cp->value; 6656 if (bound) 6657 { 6658 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 6659 tree var_type = TREE_TYPE (var); 6660 gimple_seq stmts; 6661 6662 if (dump_file && (dump_flags & TDF_DETAILS)) 6663 { 6664 fprintf (dump_file, "Replacing exit test: "); 6665 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); 6666 } 6667 compare = cp->comp; 6668 bound = unshare_expr (fold_convert (var_type, bound)); 6669 op = force_gimple_operand (bound, &stmts, true, NULL_TREE); 6670 if (stmts) 6671 gsi_insert_seq_on_edge_immediate ( 6672 loop_preheader_edge (data->current_loop), 6673 stmts); 6674 6675 gcond *cond_stmt = as_a <gcond *> (use->stmt); 6676 gimple_cond_set_lhs (cond_stmt, var); 6677 gimple_cond_set_code (cond_stmt, compare); 6678 gimple_cond_set_rhs (cond_stmt, op); 6679 return; 6680 } 6681 6682 /* The induction variable elimination failed; just express the original 6683 giv. */ 6684 comp = get_computation (data->current_loop, use, cand); 6685 gcc_assert (comp != NULL_TREE); 6686 6687 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL); 6688 gcc_assert (ok); 6689 6690 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p), 6691 true, GSI_SAME_STMT); 6692 } 6693 6694 /* Rewrites USE using candidate CAND. */ 6695 6696 static void 6697 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand) 6698 { 6699 switch (use->type) 6700 { 6701 case USE_NONLINEAR_EXPR: 6702 rewrite_use_nonlinear_expr (data, use, cand); 6703 break; 6704 6705 case USE_ADDRESS: 6706 rewrite_use_address (data, use, cand); 6707 break; 6708 6709 case USE_COMPARE: 6710 rewrite_use_compare (data, use, cand); 6711 break; 6712 6713 default: 6714 gcc_unreachable (); 6715 } 6716 6717 update_stmt (use->stmt); 6718 } 6719 6720 /* Rewrite the uses using the selected induction variables. */ 6721 6722 static void 6723 rewrite_uses (struct ivopts_data *data) 6724 { 6725 unsigned i; 6726 struct iv_cand *cand; 6727 struct iv_use *use; 6728 6729 for (i = 0; i < n_iv_uses (data); i++) 6730 { 6731 use = iv_use (data, i); 6732 cand = use->selected; 6733 gcc_assert (cand); 6734 6735 rewrite_use (data, use, cand); 6736 } 6737 } 6738 6739 /* Removes the ivs that are not used after rewriting. */ 6740 6741 static void 6742 remove_unused_ivs (struct ivopts_data *data) 6743 { 6744 unsigned j; 6745 bitmap_iterator bi; 6746 bitmap toremove = BITMAP_ALLOC (NULL); 6747 6748 /* Figure out an order in which to release SSA DEFs so that we don't 6749 release something that we'd have to propagate into a debug stmt 6750 afterwards. */ 6751 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 6752 { 6753 struct version_info *info; 6754 6755 info = ver_info (data, j); 6756 if (info->iv 6757 && !integer_zerop (info->iv->step) 6758 && !info->inv_id 6759 && !info->iv->have_use_for 6760 && !info->preserve_biv) 6761 { 6762 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name)); 6763 6764 tree def = info->iv->ssa_name; 6765 6766 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def)) 6767 { 6768 imm_use_iterator imm_iter; 6769 use_operand_p use_p; 6770 gimple stmt; 6771 int count = 0; 6772 6773 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 6774 { 6775 if (!gimple_debug_bind_p (stmt)) 6776 continue; 6777 6778 /* We just want to determine whether to do nothing 6779 (count == 0), to substitute the computed 6780 expression into a single use of the SSA DEF by 6781 itself (count == 1), or to use a debug temp 6782 because the SSA DEF is used multiple times or as 6783 part of a larger expression (count > 1). */ 6784 count++; 6785 if (gimple_debug_bind_get_value (stmt) != def) 6786 count++; 6787 6788 if (count > 1) 6789 BREAK_FROM_IMM_USE_STMT (imm_iter); 6790 } 6791 6792 if (!count) 6793 continue; 6794 6795 struct iv_use dummy_use; 6796 struct iv_cand *best_cand = NULL, *cand; 6797 unsigned i, best_pref = 0, cand_pref; 6798 6799 memset (&dummy_use, 0, sizeof (dummy_use)); 6800 dummy_use.iv = info->iv; 6801 for (i = 0; i < n_iv_uses (data) && i < 64; i++) 6802 { 6803 cand = iv_use (data, i)->selected; 6804 if (cand == best_cand) 6805 continue; 6806 cand_pref = operand_equal_p (cand->iv->step, 6807 info->iv->step, 0) 6808 ? 4 : 0; 6809 cand_pref 6810 += TYPE_MODE (TREE_TYPE (cand->iv->base)) 6811 == TYPE_MODE (TREE_TYPE (info->iv->base)) 6812 ? 2 : 0; 6813 cand_pref 6814 += TREE_CODE (cand->iv->base) == INTEGER_CST 6815 ? 1 : 0; 6816 if (best_cand == NULL || best_pref < cand_pref) 6817 { 6818 best_cand = cand; 6819 best_pref = cand_pref; 6820 } 6821 } 6822 6823 if (!best_cand) 6824 continue; 6825 6826 tree comp = get_computation_at (data->current_loop, 6827 &dummy_use, best_cand, 6828 SSA_NAME_DEF_STMT (def)); 6829 if (!comp) 6830 continue; 6831 6832 if (count > 1) 6833 { 6834 tree vexpr = make_node (DEBUG_EXPR_DECL); 6835 DECL_ARTIFICIAL (vexpr) = 1; 6836 TREE_TYPE (vexpr) = TREE_TYPE (comp); 6837 if (SSA_NAME_VAR (def)) 6838 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def)); 6839 else 6840 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr)); 6841 gdebug *def_temp 6842 = gimple_build_debug_bind (vexpr, comp, NULL); 6843 gimple_stmt_iterator gsi; 6844 6845 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI) 6846 gsi = gsi_after_labels (gimple_bb 6847 (SSA_NAME_DEF_STMT (def))); 6848 else 6849 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def)); 6850 6851 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT); 6852 comp = vexpr; 6853 } 6854 6855 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 6856 { 6857 if (!gimple_debug_bind_p (stmt)) 6858 continue; 6859 6860 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 6861 SET_USE (use_p, comp); 6862 6863 update_stmt (stmt); 6864 } 6865 } 6866 } 6867 } 6868 6869 release_defs_bitset (toremove); 6870 6871 BITMAP_FREE (toremove); 6872 } 6873 6874 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback 6875 for hash_map::traverse. */ 6876 6877 bool 6878 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *) 6879 { 6880 free (value); 6881 return true; 6882 } 6883 6884 /* Frees data allocated by the optimization of a single loop. */ 6885 6886 static void 6887 free_loop_data (struct ivopts_data *data) 6888 { 6889 unsigned i, j; 6890 bitmap_iterator bi; 6891 tree obj; 6892 6893 if (data->niters) 6894 { 6895 data->niters->traverse<void *, free_tree_niter_desc> (NULL); 6896 delete data->niters; 6897 data->niters = NULL; 6898 } 6899 6900 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 6901 { 6902 struct version_info *info; 6903 6904 info = ver_info (data, i); 6905 free (info->iv); 6906 info->iv = NULL; 6907 info->has_nonlin_use = false; 6908 info->preserve_biv = false; 6909 info->inv_id = 0; 6910 } 6911 bitmap_clear (data->relevant); 6912 bitmap_clear (data->important_candidates); 6913 6914 for (i = 0; i < n_iv_uses (data); i++) 6915 { 6916 struct iv_use *use = iv_use (data, i); 6917 6918 free (use->iv); 6919 BITMAP_FREE (use->related_cands); 6920 for (j = 0; j < use->n_map_members; j++) 6921 if (use->cost_map[j].depends_on) 6922 BITMAP_FREE (use->cost_map[j].depends_on); 6923 free (use->cost_map); 6924 free (use); 6925 } 6926 data->iv_uses.truncate (0); 6927 6928 for (i = 0; i < n_iv_cands (data); i++) 6929 { 6930 struct iv_cand *cand = iv_cand (data, i); 6931 6932 free (cand->iv); 6933 if (cand->depends_on) 6934 BITMAP_FREE (cand->depends_on); 6935 free (cand); 6936 } 6937 data->iv_candidates.truncate (0); 6938 6939 if (data->version_info_size < num_ssa_names) 6940 { 6941 data->version_info_size = 2 * num_ssa_names; 6942 free (data->version_info); 6943 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 6944 } 6945 6946 data->max_inv_id = 0; 6947 6948 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj) 6949 SET_DECL_RTL (obj, NULL_RTX); 6950 6951 decl_rtl_to_reset.truncate (0); 6952 6953 data->inv_expr_tab->empty (); 6954 data->inv_expr_id = 0; 6955 } 6956 6957 /* Finalizes data structures used by the iv optimization pass. LOOPS is the 6958 loop tree. */ 6959 6960 static void 6961 tree_ssa_iv_optimize_finalize (struct ivopts_data *data) 6962 { 6963 free_loop_data (data); 6964 free (data->version_info); 6965 BITMAP_FREE (data->relevant); 6966 BITMAP_FREE (data->important_candidates); 6967 6968 decl_rtl_to_reset.release (); 6969 data->iv_uses.release (); 6970 data->iv_candidates.release (); 6971 delete data->inv_expr_tab; 6972 data->inv_expr_tab = NULL; 6973 free_affine_expand_cache (&data->name_expansion_cache); 6974 } 6975 6976 /* Returns true if the loop body BODY includes any function calls. */ 6977 6978 static bool 6979 loop_body_includes_call (basic_block *body, unsigned num_nodes) 6980 { 6981 gimple_stmt_iterator gsi; 6982 unsigned i; 6983 6984 for (i = 0; i < num_nodes; i++) 6985 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 6986 { 6987 gimple stmt = gsi_stmt (gsi); 6988 if (is_gimple_call (stmt) 6989 && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) 6990 return true; 6991 } 6992 return false; 6993 } 6994 6995 /* Optimizes the LOOP. Returns true if anything changed. */ 6996 6997 static bool 6998 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) 6999 { 7000 bool changed = false; 7001 struct iv_ca *iv_ca; 7002 edge exit = single_dom_exit (loop); 7003 basic_block *body; 7004 7005 gcc_assert (!data->niters); 7006 data->current_loop = loop; 7007 data->loop_loc = find_loop_location (loop); 7008 data->speed = optimize_loop_for_speed_p (loop); 7009 7010 if (dump_file && (dump_flags & TDF_DETAILS)) 7011 { 7012 fprintf (dump_file, "Processing loop %d", loop->num); 7013 if (data->loop_loc != UNKNOWN_LOCATION) 7014 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 7015 LOCATION_LINE (data->loop_loc)); 7016 fprintf (dump_file, "\n"); 7017 7018 if (exit) 7019 { 7020 fprintf (dump_file, " single exit %d -> %d, exit condition ", 7021 exit->src->index, exit->dest->index); 7022 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM); 7023 fprintf (dump_file, "\n"); 7024 } 7025 7026 fprintf (dump_file, "\n"); 7027 } 7028 7029 body = get_loop_body (loop); 7030 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); 7031 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); 7032 free (body); 7033 7034 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit); 7035 7036 /* For each ssa name determines whether it behaves as an induction variable 7037 in some loop. */ 7038 if (!find_induction_variables (data)) 7039 goto finish; 7040 7041 /* Finds interesting uses (item 1). */ 7042 find_interesting_uses (data); 7043 if (n_iv_uses (data) > MAX_CONSIDERED_USES) 7044 goto finish; 7045 7046 /* Finds candidates for the induction variables (item 2). */ 7047 find_iv_candidates (data); 7048 7049 /* Calculates the costs (item 3, part 1). */ 7050 determine_iv_costs (data); 7051 determine_use_iv_costs (data); 7052 determine_set_costs (data); 7053 7054 /* Find the optimal set of induction variables (item 3, part 2). */ 7055 iv_ca = find_optimal_iv_set (data); 7056 if (!iv_ca) 7057 goto finish; 7058 changed = true; 7059 7060 /* Create the new induction variables (item 4, part 1). */ 7061 create_new_ivs (data, iv_ca); 7062 iv_ca_free (&iv_ca); 7063 7064 /* Rewrite the uses (item 4, part 2). */ 7065 rewrite_uses (data); 7066 7067 /* Remove the ivs that are unused after rewriting. */ 7068 remove_unused_ivs (data); 7069 7070 /* We have changed the structure of induction variables; it might happen 7071 that definitions in the scev database refer to some of them that were 7072 eliminated. */ 7073 scev_reset (); 7074 7075 finish: 7076 free_loop_data (data); 7077 7078 return changed; 7079 } 7080 7081 /* Main entry point. Optimizes induction variables in loops. */ 7082 7083 void 7084 tree_ssa_iv_optimize (void) 7085 { 7086 struct loop *loop; 7087 struct ivopts_data data; 7088 7089 tree_ssa_iv_optimize_init (&data); 7090 7091 /* Optimize the loops starting with the innermost ones. */ 7092 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 7093 { 7094 if (dump_file && (dump_flags & TDF_DETAILS)) 7095 flow_loop_dump (loop, dump_file, NULL, 1); 7096 7097 tree_ssa_iv_optimize_loop (&data, loop); 7098 } 7099 7100 tree_ssa_iv_optimize_finalize (&data); 7101 } 7102