1 /* Induction variable optimizations. 2 Copyright (C) 2003-2013 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 /* This pass tries to find the optimal set of induction variables for the loop. 21 It optimizes just the basic linear induction variables (although adding 22 support for other types should not be too hard). It includes the 23 optimizations commonly known as strength reduction, induction variable 24 coalescing and induction variable elimination. It does it in the 25 following steps: 26 27 1) The interesting uses of induction variables are found. This includes 28 29 -- uses of induction variables in non-linear expressions 30 -- addresses of arrays 31 -- comparisons of induction variables 32 33 2) Candidates for the induction variables are found. This includes 34 35 -- old induction variables 36 -- the variables defined by expressions derived from the "interesting 37 uses" above 38 39 3) The optimal (w.r. to a cost function) set of variables is chosen. The 40 cost function assigns a cost to sets of induction variables and consists 41 of three parts: 42 43 -- The use costs. Each of the interesting uses chooses the best induction 44 variable in the set and adds its cost to the sum. The cost reflects 45 the time spent on modifying the induction variables value to be usable 46 for the given purpose (adding base and offset for arrays, etc.). 47 -- The variable costs. Each of the variables has a cost assigned that 48 reflects the costs associated with incrementing the value of the 49 variable. The original variables are somewhat preferred. 50 -- The set cost. Depending on the size of the set, extra cost may be 51 added to reflect register pressure. 52 53 All the costs are defined in a machine-specific way, using the target 54 hooks and machine descriptions to determine them. 55 56 4) The trees are transformed to use the new variables, the dead code is 57 removed. 58 59 All of this is done loop by loop. Doing it globally is theoretically 60 possible, it might give a better performance and it might enable us 61 to decide costs more precisely, but getting all the interactions right 62 would be complicated. */ 63 64 #include "config.h" 65 #include "system.h" 66 #include "coretypes.h" 67 #include "tm.h" 68 #include "tree.h" 69 #include "tm_p.h" 70 #include "basic-block.h" 71 #include "gimple-pretty-print.h" 72 #include "tree-flow.h" 73 #include "cfgloop.h" 74 #include "tree-pass.h" 75 #include "ggc.h" 76 #include "insn-config.h" 77 #include "pointer-set.h" 78 #include "hashtab.h" 79 #include "tree-chrec.h" 80 #include "tree-scalar-evolution.h" 81 #include "cfgloop.h" 82 #include "params.h" 83 #include "langhooks.h" 84 #include "tree-affine.h" 85 #include "target.h" 86 #include "tree-inline.h" 87 #include "tree-ssa-propagate.h" 88 #include "expmed.h" 89 90 /* FIXME: Expressions are expanded to RTL in this pass to determine the 91 cost of different addressing modes. This should be moved to a TBD 92 interface between the GIMPLE and RTL worlds. */ 93 #include "expr.h" 94 #include "recog.h" 95 96 /* The infinite cost. */ 97 #define INFTY 10000000 98 99 #define AVG_LOOP_NITER(LOOP) 5 100 101 /* Returns the expected number of loop iterations for LOOP. 102 The average trip count is computed from profile data if it 103 exists. */ 104 105 static inline HOST_WIDE_INT 106 avg_loop_niter (struct loop *loop) 107 { 108 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); 109 if (niter == -1) 110 return AVG_LOOP_NITER (loop); 111 112 return niter; 113 } 114 115 /* Representation of the induction variable. */ 116 struct iv 117 { 118 tree base; /* Initial value of the iv. */ 119 tree base_object; /* A memory object to that the induction variable points. */ 120 tree step; /* Step of the iv (constant only). */ 121 tree ssa_name; /* The ssa name with the value. */ 122 bool biv_p; /* Is it a biv? */ 123 bool have_use_for; /* Do we already have a use for it? */ 124 unsigned use_id; /* The identifier in the use if it is the case. */ 125 }; 126 127 /* Per-ssa version information (induction variable descriptions, etc.). */ 128 struct version_info 129 { 130 tree name; /* The ssa name. */ 131 struct iv *iv; /* Induction variable description. */ 132 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 133 an expression that is not an induction variable. */ 134 bool preserve_biv; /* For the original biv, whether to preserve it. */ 135 unsigned inv_id; /* Id of an invariant. */ 136 }; 137 138 /* Types of uses. */ 139 enum use_type 140 { 141 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 142 USE_ADDRESS, /* Use in an address. */ 143 USE_COMPARE /* Use is a compare. */ 144 }; 145 146 /* Cost of a computation. */ 147 typedef struct 148 { 149 int cost; /* The runtime cost. */ 150 unsigned complexity; /* The estimate of the complexity of the code for 151 the computation (in no concrete units -- 152 complexity field should be larger for more 153 complex expressions and addressing modes). */ 154 } comp_cost; 155 156 static const comp_cost no_cost = {0, 0}; 157 static const comp_cost infinite_cost = {INFTY, INFTY}; 158 159 /* The candidate - cost pair. */ 160 struct cost_pair 161 { 162 struct iv_cand *cand; /* The candidate. */ 163 comp_cost cost; /* The cost. */ 164 bitmap depends_on; /* The list of invariants that have to be 165 preserved. */ 166 tree value; /* For final value elimination, the expression for 167 the final value of the iv. For iv elimination, 168 the new bound to compare with. */ 169 enum tree_code comp; /* For iv elimination, the comparison. */ 170 int inv_expr_id; /* Loop invariant expression id. */ 171 }; 172 173 /* Use. */ 174 struct iv_use 175 { 176 unsigned id; /* The id of the use. */ 177 enum use_type type; /* Type of the use. */ 178 struct iv *iv; /* The induction variable it is based on. */ 179 gimple stmt; /* Statement in that it occurs. */ 180 tree *op_p; /* The place where it occurs. */ 181 bitmap related_cands; /* The set of "related" iv candidates, plus the common 182 important ones. */ 183 184 unsigned n_map_members; /* Number of candidates in the cost_map list. */ 185 struct cost_pair *cost_map; 186 /* The costs wrto the iv candidates. */ 187 188 struct iv_cand *selected; 189 /* The selected candidate. */ 190 }; 191 192 /* The position where the iv is computed. */ 193 enum iv_position 194 { 195 IP_NORMAL, /* At the end, just before the exit condition. */ 196 IP_END, /* At the end of the latch block. */ 197 IP_BEFORE_USE, /* Immediately before a specific use. */ 198 IP_AFTER_USE, /* Immediately after a specific use. */ 199 IP_ORIGINAL /* The original biv. */ 200 }; 201 202 /* The induction variable candidate. */ 203 struct iv_cand 204 { 205 unsigned id; /* The number of the candidate. */ 206 bool important; /* Whether this is an "important" candidate, i.e. such 207 that it should be considered by all uses. */ 208 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */ 209 gimple incremented_at;/* For original biv, the statement where it is 210 incremented. */ 211 tree var_before; /* The variable used for it before increment. */ 212 tree var_after; /* The variable used for it after increment. */ 213 struct iv *iv; /* The value of the candidate. NULL for 214 "pseudocandidate" used to indicate the possibility 215 to replace the final value of an iv by direct 216 computation of the value. */ 217 unsigned cost; /* Cost of the candidate. */ 218 unsigned cost_step; /* Cost of the candidate's increment operation. */ 219 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place 220 where it is incremented. */ 221 bitmap depends_on; /* The list of invariants that are used in step of the 222 biv. */ 223 }; 224 225 /* Loop invariant expression hashtable entry. */ 226 struct iv_inv_expr_ent 227 { 228 tree expr; 229 int id; 230 hashval_t hash; 231 }; 232 233 /* The data used by the induction variable optimizations. */ 234 235 typedef struct iv_use *iv_use_p; 236 237 typedef struct iv_cand *iv_cand_p; 238 239 struct ivopts_data 240 { 241 /* The currently optimized loop. */ 242 struct loop *current_loop; 243 244 /* Numbers of iterations for all exits of the current loop. */ 245 struct pointer_map_t *niters; 246 247 /* Number of registers used in it. */ 248 unsigned regs_used; 249 250 /* The size of version_info array allocated. */ 251 unsigned version_info_size; 252 253 /* The array of information for the ssa names. */ 254 struct version_info *version_info; 255 256 /* The hashtable of loop invariant expressions created 257 by ivopt. */ 258 htab_t inv_expr_tab; 259 260 /* Loop invariant expression id. */ 261 int inv_expr_id; 262 263 /* The bitmap of indices in version_info whose value was changed. */ 264 bitmap relevant; 265 266 /* The uses of induction variables. */ 267 vec<iv_use_p> iv_uses; 268 269 /* The candidates. */ 270 vec<iv_cand_p> iv_candidates; 271 272 /* A bitmap of important candidates. */ 273 bitmap important_candidates; 274 275 /* The maximum invariant id. */ 276 unsigned max_inv_id; 277 278 /* Whether to consider just related and important candidates when replacing a 279 use. */ 280 bool consider_all_candidates; 281 282 /* Are we optimizing for speed? */ 283 bool speed; 284 285 /* Whether the loop body includes any function calls. */ 286 bool body_includes_call; 287 288 /* Whether the loop body can only be exited via single exit. */ 289 bool loop_single_exit_p; 290 }; 291 292 /* An assignment of iv candidates to uses. */ 293 294 struct iv_ca 295 { 296 /* The number of uses covered by the assignment. */ 297 unsigned upto; 298 299 /* Number of uses that cannot be expressed by the candidates in the set. */ 300 unsigned bad_uses; 301 302 /* Candidate assigned to a use, together with the related costs. */ 303 struct cost_pair **cand_for_use; 304 305 /* Number of times each candidate is used. */ 306 unsigned *n_cand_uses; 307 308 /* The candidates used. */ 309 bitmap cands; 310 311 /* The number of candidates in the set. */ 312 unsigned n_cands; 313 314 /* Total number of registers needed. */ 315 unsigned n_regs; 316 317 /* Total cost of expressing uses. */ 318 comp_cost cand_use_cost; 319 320 /* Total cost of candidates. */ 321 unsigned cand_cost; 322 323 /* Number of times each invariant is used. */ 324 unsigned *n_invariant_uses; 325 326 /* The array holding the number of uses of each loop 327 invariant expressions created by ivopt. */ 328 unsigned *used_inv_expr; 329 330 /* The number of created loop invariants. */ 331 unsigned num_used_inv_expr; 332 333 /* Total cost of the assignment. */ 334 comp_cost cost; 335 }; 336 337 /* Difference of two iv candidate assignments. */ 338 339 struct iv_ca_delta 340 { 341 /* Changed use. */ 342 struct iv_use *use; 343 344 /* An old assignment (for rollback purposes). */ 345 struct cost_pair *old_cp; 346 347 /* A new assignment. */ 348 struct cost_pair *new_cp; 349 350 /* Next change in the list. */ 351 struct iv_ca_delta *next_change; 352 }; 353 354 /* Bound on number of candidates below that all candidates are considered. */ 355 356 #define CONSIDER_ALL_CANDIDATES_BOUND \ 357 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) 358 359 /* If there are more iv occurrences, we just give up (it is quite unlikely that 360 optimizing such a loop would help, and it would take ages). */ 361 362 #define MAX_CONSIDERED_USES \ 363 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) 364 365 /* If there are at most this number of ivs in the set, try removing unnecessary 366 ivs from the set always. */ 367 368 #define ALWAYS_PRUNE_CAND_SET_BOUND \ 369 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) 370 371 /* The list of trees for that the decl_rtl field must be reset is stored 372 here. */ 373 374 static vec<tree> decl_rtl_to_reset; 375 376 static comp_cost force_expr_to_var_cost (tree, bool); 377 378 /* Number of uses recorded in DATA. */ 379 380 static inline unsigned 381 n_iv_uses (struct ivopts_data *data) 382 { 383 return data->iv_uses.length (); 384 } 385 386 /* Ith use recorded in DATA. */ 387 388 static inline struct iv_use * 389 iv_use (struct ivopts_data *data, unsigned i) 390 { 391 return data->iv_uses[i]; 392 } 393 394 /* Number of candidates recorded in DATA. */ 395 396 static inline unsigned 397 n_iv_cands (struct ivopts_data *data) 398 { 399 return data->iv_candidates.length (); 400 } 401 402 /* Ith candidate recorded in DATA. */ 403 404 static inline struct iv_cand * 405 iv_cand (struct ivopts_data *data, unsigned i) 406 { 407 return data->iv_candidates[i]; 408 } 409 410 /* The single loop exit if it dominates the latch, NULL otherwise. */ 411 412 edge 413 single_dom_exit (struct loop *loop) 414 { 415 edge exit = single_exit (loop); 416 417 if (!exit) 418 return NULL; 419 420 if (!just_once_each_iteration_p (loop, exit->src)) 421 return NULL; 422 423 return exit; 424 } 425 426 /* Dumps information about the induction variable IV to FILE. */ 427 428 extern void dump_iv (FILE *, struct iv *); 429 void 430 dump_iv (FILE *file, struct iv *iv) 431 { 432 if (iv->ssa_name) 433 { 434 fprintf (file, "ssa name "); 435 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 436 fprintf (file, "\n"); 437 } 438 439 fprintf (file, " type "); 440 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 441 fprintf (file, "\n"); 442 443 if (iv->step) 444 { 445 fprintf (file, " base "); 446 print_generic_expr (file, iv->base, TDF_SLIM); 447 fprintf (file, "\n"); 448 449 fprintf (file, " step "); 450 print_generic_expr (file, iv->step, TDF_SLIM); 451 fprintf (file, "\n"); 452 } 453 else 454 { 455 fprintf (file, " invariant "); 456 print_generic_expr (file, iv->base, TDF_SLIM); 457 fprintf (file, "\n"); 458 } 459 460 if (iv->base_object) 461 { 462 fprintf (file, " base object "); 463 print_generic_expr (file, iv->base_object, TDF_SLIM); 464 fprintf (file, "\n"); 465 } 466 467 if (iv->biv_p) 468 fprintf (file, " is a biv\n"); 469 } 470 471 /* Dumps information about the USE to FILE. */ 472 473 extern void dump_use (FILE *, struct iv_use *); 474 void 475 dump_use (FILE *file, struct iv_use *use) 476 { 477 fprintf (file, "use %d\n", use->id); 478 479 switch (use->type) 480 { 481 case USE_NONLINEAR_EXPR: 482 fprintf (file, " generic\n"); 483 break; 484 485 case USE_ADDRESS: 486 fprintf (file, " address\n"); 487 break; 488 489 case USE_COMPARE: 490 fprintf (file, " compare\n"); 491 break; 492 493 default: 494 gcc_unreachable (); 495 } 496 497 fprintf (file, " in statement "); 498 print_gimple_stmt (file, use->stmt, 0, 0); 499 fprintf (file, "\n"); 500 501 fprintf (file, " at position "); 502 if (use->op_p) 503 print_generic_expr (file, *use->op_p, TDF_SLIM); 504 fprintf (file, "\n"); 505 506 dump_iv (file, use->iv); 507 508 if (use->related_cands) 509 { 510 fprintf (file, " related candidates "); 511 dump_bitmap (file, use->related_cands); 512 } 513 } 514 515 /* Dumps information about the uses to FILE. */ 516 517 extern void dump_uses (FILE *, struct ivopts_data *); 518 void 519 dump_uses (FILE *file, struct ivopts_data *data) 520 { 521 unsigned i; 522 struct iv_use *use; 523 524 for (i = 0; i < n_iv_uses (data); i++) 525 { 526 use = iv_use (data, i); 527 528 dump_use (file, use); 529 fprintf (file, "\n"); 530 } 531 } 532 533 /* Dumps information about induction variable candidate CAND to FILE. */ 534 535 extern void dump_cand (FILE *, struct iv_cand *); 536 void 537 dump_cand (FILE *file, struct iv_cand *cand) 538 { 539 struct iv *iv = cand->iv; 540 541 fprintf (file, "candidate %d%s\n", 542 cand->id, cand->important ? " (important)" : ""); 543 544 if (cand->depends_on) 545 { 546 fprintf (file, " depends on "); 547 dump_bitmap (file, cand->depends_on); 548 } 549 550 if (!iv) 551 { 552 fprintf (file, " final value replacement\n"); 553 return; 554 } 555 556 if (cand->var_before) 557 { 558 fprintf (file, " var_before "); 559 print_generic_expr (file, cand->var_before, TDF_SLIM); 560 fprintf (file, "\n"); 561 } 562 if (cand->var_after) 563 { 564 fprintf (file, " var_after "); 565 print_generic_expr (file, cand->var_after, TDF_SLIM); 566 fprintf (file, "\n"); 567 } 568 569 switch (cand->pos) 570 { 571 case IP_NORMAL: 572 fprintf (file, " incremented before exit test\n"); 573 break; 574 575 case IP_BEFORE_USE: 576 fprintf (file, " incremented before use %d\n", cand->ainc_use->id); 577 break; 578 579 case IP_AFTER_USE: 580 fprintf (file, " incremented after use %d\n", cand->ainc_use->id); 581 break; 582 583 case IP_END: 584 fprintf (file, " incremented at end\n"); 585 break; 586 587 case IP_ORIGINAL: 588 fprintf (file, " original biv\n"); 589 break; 590 } 591 592 dump_iv (file, iv); 593 } 594 595 /* Returns the info for ssa version VER. */ 596 597 static inline struct version_info * 598 ver_info (struct ivopts_data *data, unsigned ver) 599 { 600 return data->version_info + ver; 601 } 602 603 /* Returns the info for ssa name NAME. */ 604 605 static inline struct version_info * 606 name_info (struct ivopts_data *data, tree name) 607 { 608 return ver_info (data, SSA_NAME_VERSION (name)); 609 } 610 611 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be 612 emitted in LOOP. */ 613 614 static bool 615 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt) 616 { 617 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); 618 619 gcc_assert (bb); 620 621 if (sbb == loop->latch) 622 return true; 623 624 if (sbb != bb) 625 return false; 626 627 return stmt == last_stmt (bb); 628 } 629 630 /* Returns true if STMT if after the place where the original induction 631 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true 632 if the positions are identical. */ 633 634 static bool 635 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal) 636 { 637 basic_block cand_bb = gimple_bb (cand->incremented_at); 638 basic_block stmt_bb = gimple_bb (stmt); 639 640 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 641 return false; 642 643 if (stmt_bb != cand_bb) 644 return true; 645 646 if (true_if_equal 647 && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) 648 return true; 649 return gimple_uid (stmt) > gimple_uid (cand->incremented_at); 650 } 651 652 /* Returns true if STMT if after the place where the induction variable 653 CAND is incremented in LOOP. */ 654 655 static bool 656 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt) 657 { 658 switch (cand->pos) 659 { 660 case IP_END: 661 return false; 662 663 case IP_NORMAL: 664 return stmt_after_ip_normal_pos (loop, stmt); 665 666 case IP_ORIGINAL: 667 case IP_AFTER_USE: 668 return stmt_after_inc_pos (cand, stmt, false); 669 670 case IP_BEFORE_USE: 671 return stmt_after_inc_pos (cand, stmt, true); 672 673 default: 674 gcc_unreachable (); 675 } 676 } 677 678 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ 679 680 static bool 681 abnormal_ssa_name_p (tree exp) 682 { 683 if (!exp) 684 return false; 685 686 if (TREE_CODE (exp) != SSA_NAME) 687 return false; 688 689 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; 690 } 691 692 /* Returns false if BASE or INDEX contains a ssa name that occurs in an 693 abnormal phi node. Callback for for_each_index. */ 694 695 static bool 696 idx_contains_abnormal_ssa_name_p (tree base, tree *index, 697 void *data ATTRIBUTE_UNUSED) 698 { 699 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 700 { 701 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) 702 return false; 703 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) 704 return false; 705 } 706 707 return !abnormal_ssa_name_p (*index); 708 } 709 710 /* Returns true if EXPR contains a ssa name that occurs in an 711 abnormal phi node. */ 712 713 bool 714 contains_abnormal_ssa_name_p (tree expr) 715 { 716 enum tree_code code; 717 enum tree_code_class codeclass; 718 719 if (!expr) 720 return false; 721 722 code = TREE_CODE (expr); 723 codeclass = TREE_CODE_CLASS (code); 724 725 if (code == SSA_NAME) 726 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; 727 728 if (code == INTEGER_CST 729 || is_gimple_min_invariant (expr)) 730 return false; 731 732 if (code == ADDR_EXPR) 733 return !for_each_index (&TREE_OPERAND (expr, 0), 734 idx_contains_abnormal_ssa_name_p, 735 NULL); 736 737 if (code == COND_EXPR) 738 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)) 739 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)) 740 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2)); 741 742 switch (codeclass) 743 { 744 case tcc_binary: 745 case tcc_comparison: 746 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) 747 return true; 748 749 /* Fallthru. */ 750 case tcc_unary: 751 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) 752 return true; 753 754 break; 755 756 default: 757 gcc_unreachable (); 758 } 759 760 return false; 761 } 762 763 /* Returns the structure describing number of iterations determined from 764 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 765 766 static struct tree_niter_desc * 767 niter_for_exit (struct ivopts_data *data, edge exit) 768 { 769 struct tree_niter_desc *desc; 770 void **slot; 771 772 if (!data->niters) 773 { 774 data->niters = pointer_map_create (); 775 slot = NULL; 776 } 777 else 778 slot = pointer_map_contains (data->niters, exit); 779 780 if (!slot) 781 { 782 /* Try to determine number of iterations. We cannot safely work with ssa 783 names that appear in phi nodes on abnormal edges, so that we do not 784 create overlapping life ranges for them (PR 27283). */ 785 desc = XNEW (struct tree_niter_desc); 786 if (!number_of_iterations_exit (data->current_loop, 787 exit, desc, true) 788 || contains_abnormal_ssa_name_p (desc->niter)) 789 { 790 XDELETE (desc); 791 desc = NULL; 792 } 793 slot = pointer_map_insert (data->niters, exit); 794 *slot = desc; 795 } 796 else 797 desc = (struct tree_niter_desc *) *slot; 798 799 return desc; 800 } 801 802 /* Returns the structure describing number of iterations determined from 803 single dominating exit of DATA->current_loop, or NULL if something 804 goes wrong. */ 805 806 static struct tree_niter_desc * 807 niter_for_single_dom_exit (struct ivopts_data *data) 808 { 809 edge exit = single_dom_exit (data->current_loop); 810 811 if (!exit) 812 return NULL; 813 814 return niter_for_exit (data, exit); 815 } 816 817 /* Hash table equality function for expressions. */ 818 819 static int 820 htab_inv_expr_eq (const void *ent1, const void *ent2) 821 { 822 const struct iv_inv_expr_ent *expr1 = 823 (const struct iv_inv_expr_ent *)ent1; 824 const struct iv_inv_expr_ent *expr2 = 825 (const struct iv_inv_expr_ent *)ent2; 826 827 return expr1->hash == expr2->hash 828 && operand_equal_p (expr1->expr, expr2->expr, 0); 829 } 830 831 /* Hash function for loop invariant expressions. */ 832 833 static hashval_t 834 htab_inv_expr_hash (const void *ent) 835 { 836 const struct iv_inv_expr_ent *expr = 837 (const struct iv_inv_expr_ent *)ent; 838 return expr->hash; 839 } 840 841 /* Initializes data structures used by the iv optimization pass, stored 842 in DATA. */ 843 844 static void 845 tree_ssa_iv_optimize_init (struct ivopts_data *data) 846 { 847 data->version_info_size = 2 * num_ssa_names; 848 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 849 data->relevant = BITMAP_ALLOC (NULL); 850 data->important_candidates = BITMAP_ALLOC (NULL); 851 data->max_inv_id = 0; 852 data->niters = NULL; 853 data->iv_uses.create (20); 854 data->iv_candidates.create (20); 855 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash, 856 htab_inv_expr_eq, free); 857 data->inv_expr_id = 0; 858 decl_rtl_to_reset.create (20); 859 } 860 861 /* Returns a memory object to that EXPR points. In case we are able to 862 determine that it does not point to any such object, NULL is returned. */ 863 864 static tree 865 determine_base_object (tree expr) 866 { 867 enum tree_code code = TREE_CODE (expr); 868 tree base, obj; 869 870 /* If this is a pointer casted to any type, we need to determine 871 the base object for the pointer; so handle conversions before 872 throwing away non-pointer expressions. */ 873 if (CONVERT_EXPR_P (expr)) 874 return determine_base_object (TREE_OPERAND (expr, 0)); 875 876 if (!POINTER_TYPE_P (TREE_TYPE (expr))) 877 return NULL_TREE; 878 879 switch (code) 880 { 881 case INTEGER_CST: 882 return NULL_TREE; 883 884 case ADDR_EXPR: 885 obj = TREE_OPERAND (expr, 0); 886 base = get_base_address (obj); 887 888 if (!base) 889 return expr; 890 891 if (TREE_CODE (base) == MEM_REF) 892 return determine_base_object (TREE_OPERAND (base, 0)); 893 894 return fold_convert (ptr_type_node, 895 build_fold_addr_expr (base)); 896 897 case POINTER_PLUS_EXPR: 898 return determine_base_object (TREE_OPERAND (expr, 0)); 899 900 case PLUS_EXPR: 901 case MINUS_EXPR: 902 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */ 903 gcc_unreachable (); 904 905 default: 906 return fold_convert (ptr_type_node, expr); 907 } 908 } 909 910 /* Allocates an induction variable with given initial value BASE and step STEP 911 for loop LOOP. */ 912 913 static struct iv * 914 alloc_iv (tree base, tree step) 915 { 916 struct iv *iv = XCNEW (struct iv); 917 gcc_assert (step != NULL_TREE); 918 919 iv->base = base; 920 iv->base_object = determine_base_object (base); 921 iv->step = step; 922 iv->biv_p = false; 923 iv->have_use_for = false; 924 iv->use_id = 0; 925 iv->ssa_name = NULL_TREE; 926 927 return iv; 928 } 929 930 /* Sets STEP and BASE for induction variable IV. */ 931 932 static void 933 set_iv (struct ivopts_data *data, tree iv, tree base, tree step) 934 { 935 struct version_info *info = name_info (data, iv); 936 937 gcc_assert (!info->iv); 938 939 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 940 info->iv = alloc_iv (base, step); 941 info->iv->ssa_name = iv; 942 } 943 944 /* Finds induction variable declaration for VAR. */ 945 946 static struct iv * 947 get_iv (struct ivopts_data *data, tree var) 948 { 949 basic_block bb; 950 tree type = TREE_TYPE (var); 951 952 if (!POINTER_TYPE_P (type) 953 && !INTEGRAL_TYPE_P (type)) 954 return NULL; 955 956 if (!name_info (data, var)->iv) 957 { 958 bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 959 960 if (!bb 961 || !flow_bb_inside_loop_p (data->current_loop, bb)) 962 set_iv (data, var, var, build_int_cst (type, 0)); 963 } 964 965 return name_info (data, var)->iv; 966 } 967 968 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does 969 not define a simple affine biv with nonzero step. */ 970 971 static tree 972 determine_biv_step (gimple phi) 973 { 974 struct loop *loop = gimple_bb (phi)->loop_father; 975 tree name = PHI_RESULT (phi); 976 affine_iv iv; 977 978 if (virtual_operand_p (name)) 979 return NULL_TREE; 980 981 if (!simple_iv (loop, loop, name, &iv, true)) 982 return NULL_TREE; 983 984 return integer_zerop (iv.step) ? NULL_TREE : iv.step; 985 } 986 987 /* Finds basic ivs. */ 988 989 static bool 990 find_bivs (struct ivopts_data *data) 991 { 992 gimple phi; 993 tree step, type, base; 994 bool found = false; 995 struct loop *loop = data->current_loop; 996 gimple_stmt_iterator psi; 997 998 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 999 { 1000 phi = gsi_stmt (psi); 1001 1002 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 1003 continue; 1004 1005 step = determine_biv_step (phi); 1006 if (!step) 1007 continue; 1008 1009 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1010 base = expand_simple_operations (base); 1011 if (contains_abnormal_ssa_name_p (base) 1012 || contains_abnormal_ssa_name_p (step)) 1013 continue; 1014 1015 type = TREE_TYPE (PHI_RESULT (phi)); 1016 base = fold_convert (type, base); 1017 if (step) 1018 { 1019 if (POINTER_TYPE_P (type)) 1020 step = convert_to_ptrofftype (step); 1021 else 1022 step = fold_convert (type, step); 1023 } 1024 1025 set_iv (data, PHI_RESULT (phi), base, step); 1026 found = true; 1027 } 1028 1029 return found; 1030 } 1031 1032 /* Marks basic ivs. */ 1033 1034 static void 1035 mark_bivs (struct ivopts_data *data) 1036 { 1037 gimple phi; 1038 tree var; 1039 struct iv *iv, *incr_iv; 1040 struct loop *loop = data->current_loop; 1041 basic_block incr_bb; 1042 gimple_stmt_iterator psi; 1043 1044 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1045 { 1046 phi = gsi_stmt (psi); 1047 1048 iv = get_iv (data, PHI_RESULT (phi)); 1049 if (!iv) 1050 continue; 1051 1052 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1053 incr_iv = get_iv (data, var); 1054 if (!incr_iv) 1055 continue; 1056 1057 /* If the increment is in the subloop, ignore it. */ 1058 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1059 if (incr_bb->loop_father != data->current_loop 1060 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 1061 continue; 1062 1063 iv->biv_p = true; 1064 incr_iv->biv_p = true; 1065 } 1066 } 1067 1068 /* Checks whether STMT defines a linear induction variable and stores its 1069 parameters to IV. */ 1070 1071 static bool 1072 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv) 1073 { 1074 tree lhs; 1075 struct loop *loop = data->current_loop; 1076 1077 iv->base = NULL_TREE; 1078 iv->step = NULL_TREE; 1079 1080 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1081 return false; 1082 1083 lhs = gimple_assign_lhs (stmt); 1084 if (TREE_CODE (lhs) != SSA_NAME) 1085 return false; 1086 1087 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) 1088 return false; 1089 iv->base = expand_simple_operations (iv->base); 1090 1091 if (contains_abnormal_ssa_name_p (iv->base) 1092 || contains_abnormal_ssa_name_p (iv->step)) 1093 return false; 1094 1095 /* If STMT could throw, then do not consider STMT as defining a GIV. 1096 While this will suppress optimizations, we can not safely delete this 1097 GIV and associated statements, even if it appears it is not used. */ 1098 if (stmt_could_throw_p (stmt)) 1099 return false; 1100 1101 return true; 1102 } 1103 1104 /* Finds general ivs in statement STMT. */ 1105 1106 static void 1107 find_givs_in_stmt (struct ivopts_data *data, gimple stmt) 1108 { 1109 affine_iv iv; 1110 1111 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1112 return; 1113 1114 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step); 1115 } 1116 1117 /* Finds general ivs in basic block BB. */ 1118 1119 static void 1120 find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1121 { 1122 gimple_stmt_iterator bsi; 1123 1124 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1125 find_givs_in_stmt (data, gsi_stmt (bsi)); 1126 } 1127 1128 /* Finds general ivs. */ 1129 1130 static void 1131 find_givs (struct ivopts_data *data) 1132 { 1133 struct loop *loop = data->current_loop; 1134 basic_block *body = get_loop_body_in_dom_order (loop); 1135 unsigned i; 1136 1137 for (i = 0; i < loop->num_nodes; i++) 1138 find_givs_in_bb (data, body[i]); 1139 free (body); 1140 } 1141 1142 /* For each ssa name defined in LOOP determines whether it is an induction 1143 variable and if so, its initial value and step. */ 1144 1145 static bool 1146 find_induction_variables (struct ivopts_data *data) 1147 { 1148 unsigned i; 1149 bitmap_iterator bi; 1150 1151 if (!find_bivs (data)) 1152 return false; 1153 1154 find_givs (data); 1155 mark_bivs (data); 1156 1157 if (dump_file && (dump_flags & TDF_DETAILS)) 1158 { 1159 struct tree_niter_desc *niter = niter_for_single_dom_exit (data); 1160 1161 if (niter) 1162 { 1163 fprintf (dump_file, " number of iterations "); 1164 print_generic_expr (dump_file, niter->niter, TDF_SLIM); 1165 if (!integer_zerop (niter->may_be_zero)) 1166 { 1167 fprintf (dump_file, "; zero if "); 1168 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); 1169 } 1170 fprintf (dump_file, "\n\n"); 1171 }; 1172 1173 fprintf (dump_file, "Induction variables:\n\n"); 1174 1175 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1176 { 1177 if (ver_info (data, i)->iv) 1178 dump_iv (dump_file, ver_info (data, i)->iv); 1179 } 1180 } 1181 1182 return true; 1183 } 1184 1185 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */ 1186 1187 static struct iv_use * 1188 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv, 1189 gimple stmt, enum use_type use_type) 1190 { 1191 struct iv_use *use = XCNEW (struct iv_use); 1192 1193 use->id = n_iv_uses (data); 1194 use->type = use_type; 1195 use->iv = iv; 1196 use->stmt = stmt; 1197 use->op_p = use_p; 1198 use->related_cands = BITMAP_ALLOC (NULL); 1199 1200 /* To avoid showing ssa name in the dumps, if it was not reset by the 1201 caller. */ 1202 iv->ssa_name = NULL_TREE; 1203 1204 if (dump_file && (dump_flags & TDF_DETAILS)) 1205 dump_use (dump_file, use); 1206 1207 data->iv_uses.safe_push (use); 1208 1209 return use; 1210 } 1211 1212 /* Checks whether OP is a loop-level invariant and if so, records it. 1213 NONLINEAR_USE is true if the invariant is used in a way we do not 1214 handle specially. */ 1215 1216 static void 1217 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1218 { 1219 basic_block bb; 1220 struct version_info *info; 1221 1222 if (TREE_CODE (op) != SSA_NAME 1223 || virtual_operand_p (op)) 1224 return; 1225 1226 bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 1227 if (bb 1228 && flow_bb_inside_loop_p (data->current_loop, bb)) 1229 return; 1230 1231 info = name_info (data, op); 1232 info->name = op; 1233 info->has_nonlin_use |= nonlinear_use; 1234 if (!info->inv_id) 1235 info->inv_id = ++data->max_inv_id; 1236 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1237 } 1238 1239 /* Checks whether the use OP is interesting and if so, records it. */ 1240 1241 static struct iv_use * 1242 find_interesting_uses_op (struct ivopts_data *data, tree op) 1243 { 1244 struct iv *iv; 1245 struct iv *civ; 1246 gimple stmt; 1247 struct iv_use *use; 1248 1249 if (TREE_CODE (op) != SSA_NAME) 1250 return NULL; 1251 1252 iv = get_iv (data, op); 1253 if (!iv) 1254 return NULL; 1255 1256 if (iv->have_use_for) 1257 { 1258 use = iv_use (data, iv->use_id); 1259 1260 gcc_assert (use->type == USE_NONLINEAR_EXPR); 1261 return use; 1262 } 1263 1264 if (integer_zerop (iv->step)) 1265 { 1266 record_invariant (data, op, true); 1267 return NULL; 1268 } 1269 iv->have_use_for = true; 1270 1271 civ = XNEW (struct iv); 1272 *civ = *iv; 1273 1274 stmt = SSA_NAME_DEF_STMT (op); 1275 gcc_assert (gimple_code (stmt) == GIMPLE_PHI 1276 || is_gimple_assign (stmt)); 1277 1278 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR); 1279 iv->use_id = use->id; 1280 1281 return use; 1282 } 1283 1284 /* Given a condition in statement STMT, checks whether it is a compare 1285 of an induction variable and an invariant. If this is the case, 1286 CONTROL_VAR is set to location of the iv, BOUND to the location of 1287 the invariant, IV_VAR and IV_BOUND are set to the corresponding 1288 induction variable descriptions, and true is returned. If this is not 1289 the case, CONTROL_VAR and BOUND are set to the arguments of the 1290 condition and false is returned. */ 1291 1292 static bool 1293 extract_cond_operands (struct ivopts_data *data, gimple stmt, 1294 tree **control_var, tree **bound, 1295 struct iv **iv_var, struct iv **iv_bound) 1296 { 1297 /* The objects returned when COND has constant operands. */ 1298 static struct iv const_iv; 1299 static tree zero; 1300 tree *op0 = &zero, *op1 = &zero, *tmp_op; 1301 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv; 1302 bool ret = false; 1303 1304 if (gimple_code (stmt) == GIMPLE_COND) 1305 { 1306 op0 = gimple_cond_lhs_ptr (stmt); 1307 op1 = gimple_cond_rhs_ptr (stmt); 1308 } 1309 else 1310 { 1311 op0 = gimple_assign_rhs1_ptr (stmt); 1312 op1 = gimple_assign_rhs2_ptr (stmt); 1313 } 1314 1315 zero = integer_zero_node; 1316 const_iv.step = integer_zero_node; 1317 1318 if (TREE_CODE (*op0) == SSA_NAME) 1319 iv0 = get_iv (data, *op0); 1320 if (TREE_CODE (*op1) == SSA_NAME) 1321 iv1 = get_iv (data, *op1); 1322 1323 /* Exactly one of the compared values must be an iv, and the other one must 1324 be an invariant. */ 1325 if (!iv0 || !iv1) 1326 goto end; 1327 1328 if (integer_zerop (iv0->step)) 1329 { 1330 /* Control variable may be on the other side. */ 1331 tmp_op = op0; op0 = op1; op1 = tmp_op; 1332 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv; 1333 } 1334 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step); 1335 1336 end: 1337 if (control_var) 1338 *control_var = op0;; 1339 if (iv_var) 1340 *iv_var = iv0;; 1341 if (bound) 1342 *bound = op1; 1343 if (iv_bound) 1344 *iv_bound = iv1; 1345 1346 return ret; 1347 } 1348 1349 /* Checks whether the condition in STMT is interesting and if so, 1350 records it. */ 1351 1352 static void 1353 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt) 1354 { 1355 tree *var_p, *bound_p; 1356 struct iv *var_iv, *civ; 1357 1358 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL)) 1359 { 1360 find_interesting_uses_op (data, *var_p); 1361 find_interesting_uses_op (data, *bound_p); 1362 return; 1363 } 1364 1365 civ = XNEW (struct iv); 1366 *civ = *var_iv; 1367 record_use (data, NULL, civ, stmt, USE_COMPARE); 1368 } 1369 1370 /* Returns true if expression EXPR is obviously invariant in LOOP, 1371 i.e. if all its operands are defined outside of the LOOP. LOOP 1372 should not be the function body. */ 1373 1374 bool 1375 expr_invariant_in_loop_p (struct loop *loop, tree expr) 1376 { 1377 basic_block def_bb; 1378 unsigned i, len; 1379 1380 gcc_assert (loop_depth (loop) > 0); 1381 1382 if (is_gimple_min_invariant (expr)) 1383 return true; 1384 1385 if (TREE_CODE (expr) == SSA_NAME) 1386 { 1387 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1388 if (def_bb 1389 && flow_bb_inside_loop_p (loop, def_bb)) 1390 return false; 1391 1392 return true; 1393 } 1394 1395 if (!EXPR_P (expr)) 1396 return false; 1397 1398 len = TREE_OPERAND_LENGTH (expr); 1399 for (i = 0; i < len; i++) 1400 if (TREE_OPERAND (expr, i) 1401 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1402 return false; 1403 1404 return true; 1405 } 1406 1407 /* Returns true if statement STMT is obviously invariant in LOOP, 1408 i.e. if all its operands on the RHS are defined outside of the LOOP. 1409 LOOP should not be the function body. */ 1410 1411 bool 1412 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt) 1413 { 1414 unsigned i; 1415 tree lhs; 1416 1417 gcc_assert (loop_depth (loop) > 0); 1418 1419 lhs = gimple_get_lhs (stmt); 1420 for (i = 0; i < gimple_num_ops (stmt); i++) 1421 { 1422 tree op = gimple_op (stmt, i); 1423 if (op != lhs && !expr_invariant_in_loop_p (loop, op)) 1424 return false; 1425 } 1426 1427 return true; 1428 } 1429 1430 /* Cumulates the steps of indices into DATA and replaces their values with the 1431 initial ones. Returns false when the value of the index cannot be determined. 1432 Callback for for_each_index. */ 1433 1434 struct ifs_ivopts_data 1435 { 1436 struct ivopts_data *ivopts_data; 1437 gimple stmt; 1438 tree step; 1439 }; 1440 1441 static bool 1442 idx_find_step (tree base, tree *idx, void *data) 1443 { 1444 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; 1445 struct iv *iv; 1446 tree step, iv_base, iv_step, lbound, off; 1447 struct loop *loop = dta->ivopts_data->current_loop; 1448 1449 /* If base is a component ref, require that the offset of the reference 1450 be invariant. */ 1451 if (TREE_CODE (base) == COMPONENT_REF) 1452 { 1453 off = component_ref_field_offset (base); 1454 return expr_invariant_in_loop_p (loop, off); 1455 } 1456 1457 /* If base is array, first check whether we will be able to move the 1458 reference out of the loop (in order to take its address in strength 1459 reduction). In order for this to work we need both lower bound 1460 and step to be loop invariants. */ 1461 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1462 { 1463 /* Moreover, for a range, the size needs to be invariant as well. */ 1464 if (TREE_CODE (base) == ARRAY_RANGE_REF 1465 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) 1466 return false; 1467 1468 step = array_ref_element_size (base); 1469 lbound = array_ref_low_bound (base); 1470 1471 if (!expr_invariant_in_loop_p (loop, step) 1472 || !expr_invariant_in_loop_p (loop, lbound)) 1473 return false; 1474 } 1475 1476 if (TREE_CODE (*idx) != SSA_NAME) 1477 return true; 1478 1479 iv = get_iv (dta->ivopts_data, *idx); 1480 if (!iv) 1481 return false; 1482 1483 /* XXX We produce for a base of *D42 with iv->base being &x[0] 1484 *&x[0], which is not folded and does not trigger the 1485 ARRAY_REF path below. */ 1486 *idx = iv->base; 1487 1488 if (integer_zerop (iv->step)) 1489 return true; 1490 1491 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1492 { 1493 step = array_ref_element_size (base); 1494 1495 /* We only handle addresses whose step is an integer constant. */ 1496 if (TREE_CODE (step) != INTEGER_CST) 1497 return false; 1498 } 1499 else 1500 /* The step for pointer arithmetics already is 1 byte. */ 1501 step = size_one_node; 1502 1503 iv_base = iv->base; 1504 iv_step = iv->step; 1505 if (!convert_affine_scev (dta->ivopts_data->current_loop, 1506 sizetype, &iv_base, &iv_step, dta->stmt, 1507 false)) 1508 { 1509 /* The index might wrap. */ 1510 return false; 1511 } 1512 1513 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 1514 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); 1515 1516 return true; 1517 } 1518 1519 /* Records use in index IDX. Callback for for_each_index. Ivopts data 1520 object is passed to it in DATA. */ 1521 1522 static bool 1523 idx_record_use (tree base, tree *idx, 1524 void *vdata) 1525 { 1526 struct ivopts_data *data = (struct ivopts_data *) vdata; 1527 find_interesting_uses_op (data, *idx); 1528 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1529 { 1530 find_interesting_uses_op (data, array_ref_element_size (base)); 1531 find_interesting_uses_op (data, array_ref_low_bound (base)); 1532 } 1533 return true; 1534 } 1535 1536 /* If we can prove that TOP = cst * BOT for some constant cst, 1537 store cst to MUL and return true. Otherwise return false. 1538 The returned value is always sign-extended, regardless of the 1539 signedness of TOP and BOT. */ 1540 1541 static bool 1542 constant_multiple_of (tree top, tree bot, double_int *mul) 1543 { 1544 tree mby; 1545 enum tree_code code; 1546 double_int res, p0, p1; 1547 unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); 1548 1549 STRIP_NOPS (top); 1550 STRIP_NOPS (bot); 1551 1552 if (operand_equal_p (top, bot, 0)) 1553 { 1554 *mul = double_int_one; 1555 return true; 1556 } 1557 1558 code = TREE_CODE (top); 1559 switch (code) 1560 { 1561 case MULT_EXPR: 1562 mby = TREE_OPERAND (top, 1); 1563 if (TREE_CODE (mby) != INTEGER_CST) 1564 return false; 1565 1566 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) 1567 return false; 1568 1569 *mul = (res * tree_to_double_int (mby)).sext (precision); 1570 return true; 1571 1572 case PLUS_EXPR: 1573 case MINUS_EXPR: 1574 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) 1575 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) 1576 return false; 1577 1578 if (code == MINUS_EXPR) 1579 p1 = -p1; 1580 *mul = (p0 + p1).sext (precision); 1581 return true; 1582 1583 case INTEGER_CST: 1584 if (TREE_CODE (bot) != INTEGER_CST) 1585 return false; 1586 1587 p0 = tree_to_double_int (top).sext (precision); 1588 p1 = tree_to_double_int (bot).sext (precision); 1589 if (p1.is_zero ()) 1590 return false; 1591 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision); 1592 return res.is_zero (); 1593 1594 default: 1595 return false; 1596 } 1597 } 1598 1599 /* Returns true if memory reference REF with step STEP may be unaligned. */ 1600 1601 static bool 1602 may_be_unaligned_p (tree ref, tree step) 1603 { 1604 tree base; 1605 tree base_type; 1606 HOST_WIDE_INT bitsize; 1607 HOST_WIDE_INT bitpos; 1608 tree toffset; 1609 enum machine_mode mode; 1610 int unsignedp, volatilep; 1611 unsigned base_align; 1612 1613 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 1614 thus they are not misaligned. */ 1615 if (TREE_CODE (ref) == TARGET_MEM_REF) 1616 return false; 1617 1618 /* The test below is basically copy of what expr.c:normal_inner_ref 1619 does to check whether the object must be loaded by parts when 1620 STRICT_ALIGNMENT is true. */ 1621 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode, 1622 &unsignedp, &volatilep, true); 1623 base_type = TREE_TYPE (base); 1624 base_align = get_object_alignment (base); 1625 base_align = MAX (base_align, TYPE_ALIGN (base_type)); 1626 1627 if (mode != BLKmode) 1628 { 1629 unsigned mode_align = GET_MODE_ALIGNMENT (mode); 1630 1631 if (base_align < mode_align 1632 || (bitpos % mode_align) != 0 1633 || (bitpos % BITS_PER_UNIT) != 0) 1634 return true; 1635 1636 if (toffset 1637 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align) 1638 return true; 1639 1640 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align) 1641 return true; 1642 } 1643 1644 return false; 1645 } 1646 1647 /* Return true if EXPR may be non-addressable. */ 1648 1649 bool 1650 may_be_nonaddressable_p (tree expr) 1651 { 1652 switch (TREE_CODE (expr)) 1653 { 1654 case TARGET_MEM_REF: 1655 /* TARGET_MEM_REFs are translated directly to valid MEMs on the 1656 target, thus they are always addressable. */ 1657 return false; 1658 1659 case COMPONENT_REF: 1660 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) 1661 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 1662 1663 case VIEW_CONVERT_EXPR: 1664 /* This kind of view-conversions may wrap non-addressable objects 1665 and make them look addressable. After some processing the 1666 non-addressability may be uncovered again, causing ADDR_EXPRs 1667 of inappropriate objects to be built. */ 1668 if (is_gimple_reg (TREE_OPERAND (expr, 0)) 1669 || !is_gimple_addressable (TREE_OPERAND (expr, 0))) 1670 return true; 1671 1672 /* ... fall through ... */ 1673 1674 case ARRAY_REF: 1675 case ARRAY_RANGE_REF: 1676 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 1677 1678 CASE_CONVERT: 1679 return true; 1680 1681 default: 1682 break; 1683 } 1684 1685 return false; 1686 } 1687 1688 /* Finds addresses in *OP_P inside STMT. */ 1689 1690 static void 1691 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p) 1692 { 1693 tree base = *op_p, step = size_zero_node; 1694 struct iv *civ; 1695 struct ifs_ivopts_data ifs_ivopts_data; 1696 1697 /* Do not play with volatile memory references. A bit too conservative, 1698 perhaps, but safe. */ 1699 if (gimple_has_volatile_ops (stmt)) 1700 goto fail; 1701 1702 /* Ignore bitfields for now. Not really something terribly complicated 1703 to handle. TODO. */ 1704 if (TREE_CODE (base) == BIT_FIELD_REF) 1705 goto fail; 1706 1707 base = unshare_expr (base); 1708 1709 if (TREE_CODE (base) == TARGET_MEM_REF) 1710 { 1711 tree type = build_pointer_type (TREE_TYPE (base)); 1712 tree astep; 1713 1714 if (TMR_BASE (base) 1715 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 1716 { 1717 civ = get_iv (data, TMR_BASE (base)); 1718 if (!civ) 1719 goto fail; 1720 1721 TMR_BASE (base) = civ->base; 1722 step = civ->step; 1723 } 1724 if (TMR_INDEX2 (base) 1725 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) 1726 { 1727 civ = get_iv (data, TMR_INDEX2 (base)); 1728 if (!civ) 1729 goto fail; 1730 1731 TMR_INDEX2 (base) = civ->base; 1732 step = civ->step; 1733 } 1734 if (TMR_INDEX (base) 1735 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 1736 { 1737 civ = get_iv (data, TMR_INDEX (base)); 1738 if (!civ) 1739 goto fail; 1740 1741 TMR_INDEX (base) = civ->base; 1742 astep = civ->step; 1743 1744 if (astep) 1745 { 1746 if (TMR_STEP (base)) 1747 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 1748 1749 step = fold_build2 (PLUS_EXPR, type, step, astep); 1750 } 1751 } 1752 1753 if (integer_zerop (step)) 1754 goto fail; 1755 base = tree_mem_ref_addr (type, base); 1756 } 1757 else 1758 { 1759 ifs_ivopts_data.ivopts_data = data; 1760 ifs_ivopts_data.stmt = stmt; 1761 ifs_ivopts_data.step = size_zero_node; 1762 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 1763 || integer_zerop (ifs_ivopts_data.step)) 1764 goto fail; 1765 step = ifs_ivopts_data.step; 1766 1767 /* Check that the base expression is addressable. This needs 1768 to be done after substituting bases of IVs into it. */ 1769 if (may_be_nonaddressable_p (base)) 1770 goto fail; 1771 1772 /* Moreover, on strict alignment platforms, check that it is 1773 sufficiently aligned. */ 1774 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) 1775 goto fail; 1776 1777 base = build_fold_addr_expr (base); 1778 1779 /* Substituting bases of IVs into the base expression might 1780 have caused folding opportunities. */ 1781 if (TREE_CODE (base) == ADDR_EXPR) 1782 { 1783 tree *ref = &TREE_OPERAND (base, 0); 1784 while (handled_component_p (*ref)) 1785 ref = &TREE_OPERAND (*ref, 0); 1786 if (TREE_CODE (*ref) == MEM_REF) 1787 { 1788 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), 1789 TREE_OPERAND (*ref, 0), 1790 TREE_OPERAND (*ref, 1)); 1791 if (tem) 1792 *ref = tem; 1793 } 1794 } 1795 } 1796 1797 civ = alloc_iv (base, step); 1798 record_use (data, op_p, civ, stmt, USE_ADDRESS); 1799 return; 1800 1801 fail: 1802 for_each_index (op_p, idx_record_use, data); 1803 } 1804 1805 /* Finds and records invariants used in STMT. */ 1806 1807 static void 1808 find_invariants_stmt (struct ivopts_data *data, gimple stmt) 1809 { 1810 ssa_op_iter iter; 1811 use_operand_p use_p; 1812 tree op; 1813 1814 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 1815 { 1816 op = USE_FROM_PTR (use_p); 1817 record_invariant (data, op, false); 1818 } 1819 } 1820 1821 /* Finds interesting uses of induction variables in the statement STMT. */ 1822 1823 static void 1824 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt) 1825 { 1826 struct iv *iv; 1827 tree op, *lhs, *rhs; 1828 ssa_op_iter iter; 1829 use_operand_p use_p; 1830 enum tree_code code; 1831 1832 find_invariants_stmt (data, stmt); 1833 1834 if (gimple_code (stmt) == GIMPLE_COND) 1835 { 1836 find_interesting_uses_cond (data, stmt); 1837 return; 1838 } 1839 1840 if (is_gimple_assign (stmt)) 1841 { 1842 lhs = gimple_assign_lhs_ptr (stmt); 1843 rhs = gimple_assign_rhs1_ptr (stmt); 1844 1845 if (TREE_CODE (*lhs) == SSA_NAME) 1846 { 1847 /* If the statement defines an induction variable, the uses are not 1848 interesting by themselves. */ 1849 1850 iv = get_iv (data, *lhs); 1851 1852 if (iv && !integer_zerop (iv->step)) 1853 return; 1854 } 1855 1856 code = gimple_assign_rhs_code (stmt); 1857 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS 1858 && (REFERENCE_CLASS_P (*rhs) 1859 || is_gimple_val (*rhs))) 1860 { 1861 if (REFERENCE_CLASS_P (*rhs)) 1862 find_interesting_uses_address (data, stmt, rhs); 1863 else 1864 find_interesting_uses_op (data, *rhs); 1865 1866 if (REFERENCE_CLASS_P (*lhs)) 1867 find_interesting_uses_address (data, stmt, lhs); 1868 return; 1869 } 1870 else if (TREE_CODE_CLASS (code) == tcc_comparison) 1871 { 1872 find_interesting_uses_cond (data, stmt); 1873 return; 1874 } 1875 1876 /* TODO -- we should also handle address uses of type 1877 1878 memory = call (whatever); 1879 1880 and 1881 1882 call (memory). */ 1883 } 1884 1885 if (gimple_code (stmt) == GIMPLE_PHI 1886 && gimple_bb (stmt) == data->current_loop->header) 1887 { 1888 iv = get_iv (data, PHI_RESULT (stmt)); 1889 1890 if (iv && !integer_zerop (iv->step)) 1891 return; 1892 } 1893 1894 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 1895 { 1896 op = USE_FROM_PTR (use_p); 1897 1898 if (TREE_CODE (op) != SSA_NAME) 1899 continue; 1900 1901 iv = get_iv (data, op); 1902 if (!iv) 1903 continue; 1904 1905 find_interesting_uses_op (data, op); 1906 } 1907 } 1908 1909 /* Finds interesting uses of induction variables outside of loops 1910 on loop exit edge EXIT. */ 1911 1912 static void 1913 find_interesting_uses_outside (struct ivopts_data *data, edge exit) 1914 { 1915 gimple phi; 1916 gimple_stmt_iterator psi; 1917 tree def; 1918 1919 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) 1920 { 1921 phi = gsi_stmt (psi); 1922 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 1923 if (!virtual_operand_p (def)) 1924 find_interesting_uses_op (data, def); 1925 } 1926 } 1927 1928 /* Finds uses of the induction variables that are interesting. */ 1929 1930 static void 1931 find_interesting_uses (struct ivopts_data *data) 1932 { 1933 basic_block bb; 1934 gimple_stmt_iterator bsi; 1935 basic_block *body = get_loop_body (data->current_loop); 1936 unsigned i; 1937 struct version_info *info; 1938 edge e; 1939 1940 if (dump_file && (dump_flags & TDF_DETAILS)) 1941 fprintf (dump_file, "Uses:\n\n"); 1942 1943 for (i = 0; i < data->current_loop->num_nodes; i++) 1944 { 1945 edge_iterator ei; 1946 bb = body[i]; 1947 1948 FOR_EACH_EDGE (e, ei, bb->succs) 1949 if (e->dest != EXIT_BLOCK_PTR 1950 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 1951 find_interesting_uses_outside (data, e); 1952 1953 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1954 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 1955 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1956 if (!is_gimple_debug (gsi_stmt (bsi))) 1957 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 1958 } 1959 1960 if (dump_file && (dump_flags & TDF_DETAILS)) 1961 { 1962 bitmap_iterator bi; 1963 1964 fprintf (dump_file, "\n"); 1965 1966 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1967 { 1968 info = ver_info (data, i); 1969 if (info->inv_id) 1970 { 1971 fprintf (dump_file, " "); 1972 print_generic_expr (dump_file, info->name, TDF_SLIM); 1973 fprintf (dump_file, " is invariant (%d)%s\n", 1974 info->inv_id, info->has_nonlin_use ? "" : ", eliminable"); 1975 } 1976 } 1977 1978 fprintf (dump_file, "\n"); 1979 } 1980 1981 free (body); 1982 } 1983 1984 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 1985 is true, assume we are inside an address. If TOP_COMPREF is true, assume 1986 we are at the top-level of the processed address. */ 1987 1988 static tree 1989 strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 1990 unsigned HOST_WIDE_INT *offset) 1991 { 1992 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 1993 enum tree_code code; 1994 tree type, orig_type = TREE_TYPE (expr); 1995 unsigned HOST_WIDE_INT off0, off1, st; 1996 tree orig_expr = expr; 1997 1998 STRIP_NOPS (expr); 1999 2000 type = TREE_TYPE (expr); 2001 code = TREE_CODE (expr); 2002 *offset = 0; 2003 2004 switch (code) 2005 { 2006 case INTEGER_CST: 2007 if (!cst_and_fits_in_hwi (expr) 2008 || integer_zerop (expr)) 2009 return orig_expr; 2010 2011 *offset = int_cst_value (expr); 2012 return build_int_cst (orig_type, 0); 2013 2014 case POINTER_PLUS_EXPR: 2015 case PLUS_EXPR: 2016 case MINUS_EXPR: 2017 op0 = TREE_OPERAND (expr, 0); 2018 op1 = TREE_OPERAND (expr, 1); 2019 2020 op0 = strip_offset_1 (op0, false, false, &off0); 2021 op1 = strip_offset_1 (op1, false, false, &off1); 2022 2023 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); 2024 if (op0 == TREE_OPERAND (expr, 0) 2025 && op1 == TREE_OPERAND (expr, 1)) 2026 return orig_expr; 2027 2028 if (integer_zerop (op1)) 2029 expr = op0; 2030 else if (integer_zerop (op0)) 2031 { 2032 if (code == MINUS_EXPR) 2033 expr = fold_build1 (NEGATE_EXPR, type, op1); 2034 else 2035 expr = op1; 2036 } 2037 else 2038 expr = fold_build2 (code, type, op0, op1); 2039 2040 return fold_convert (orig_type, expr); 2041 2042 case MULT_EXPR: 2043 op1 = TREE_OPERAND (expr, 1); 2044 if (!cst_and_fits_in_hwi (op1)) 2045 return orig_expr; 2046 2047 op0 = TREE_OPERAND (expr, 0); 2048 op0 = strip_offset_1 (op0, false, false, &off0); 2049 if (op0 == TREE_OPERAND (expr, 0)) 2050 return orig_expr; 2051 2052 *offset = off0 * int_cst_value (op1); 2053 if (integer_zerop (op0)) 2054 expr = op0; 2055 else 2056 expr = fold_build2 (MULT_EXPR, type, op0, op1); 2057 2058 return fold_convert (orig_type, expr); 2059 2060 case ARRAY_REF: 2061 case ARRAY_RANGE_REF: 2062 if (!inside_addr) 2063 return orig_expr; 2064 2065 step = array_ref_element_size (expr); 2066 if (!cst_and_fits_in_hwi (step)) 2067 break; 2068 2069 st = int_cst_value (step); 2070 op1 = TREE_OPERAND (expr, 1); 2071 op1 = strip_offset_1 (op1, false, false, &off1); 2072 *offset = off1 * st; 2073 2074 if (top_compref 2075 && integer_zerop (op1)) 2076 { 2077 /* Strip the component reference completely. */ 2078 op0 = TREE_OPERAND (expr, 0); 2079 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2080 *offset += off0; 2081 return op0; 2082 } 2083 break; 2084 2085 case COMPONENT_REF: 2086 if (!inside_addr) 2087 return orig_expr; 2088 2089 tmp = component_ref_field_offset (expr); 2090 if (top_compref 2091 && cst_and_fits_in_hwi (tmp)) 2092 { 2093 /* Strip the component reference completely. */ 2094 op0 = TREE_OPERAND (expr, 0); 2095 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2096 *offset = off0 + int_cst_value (tmp); 2097 return op0; 2098 } 2099 break; 2100 2101 case ADDR_EXPR: 2102 op0 = TREE_OPERAND (expr, 0); 2103 op0 = strip_offset_1 (op0, true, true, &off0); 2104 *offset += off0; 2105 2106 if (op0 == TREE_OPERAND (expr, 0)) 2107 return orig_expr; 2108 2109 expr = build_fold_addr_expr (op0); 2110 return fold_convert (orig_type, expr); 2111 2112 case MEM_REF: 2113 /* ??? Offset operand? */ 2114 inside_addr = false; 2115 break; 2116 2117 default: 2118 return orig_expr; 2119 } 2120 2121 /* Default handling of expressions for that we want to recurse into 2122 the first operand. */ 2123 op0 = TREE_OPERAND (expr, 0); 2124 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 2125 *offset += off0; 2126 2127 if (op0 == TREE_OPERAND (expr, 0) 2128 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 2129 return orig_expr; 2130 2131 expr = copy_node (expr); 2132 TREE_OPERAND (expr, 0) = op0; 2133 if (op1) 2134 TREE_OPERAND (expr, 1) = op1; 2135 2136 /* Inside address, we might strip the top level component references, 2137 thus changing type of the expression. Handling of ADDR_EXPR 2138 will fix that. */ 2139 expr = fold_convert (orig_type, expr); 2140 2141 return expr; 2142 } 2143 2144 /* Strips constant offsets from EXPR and stores them to OFFSET. */ 2145 2146 static tree 2147 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset) 2148 { 2149 return strip_offset_1 (expr, false, false, offset); 2150 } 2151 2152 /* Returns variant of TYPE that can be used as base for different uses. 2153 We return unsigned type with the same precision, which avoids problems 2154 with overflows. */ 2155 2156 static tree 2157 generic_type_for (tree type) 2158 { 2159 if (POINTER_TYPE_P (type)) 2160 return unsigned_type_for (type); 2161 2162 if (TYPE_UNSIGNED (type)) 2163 return type; 2164 2165 return unsigned_type_for (type); 2166 } 2167 2168 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains 2169 the bitmap to that we should store it. */ 2170 2171 static struct ivopts_data *fd_ivopts_data; 2172 static tree 2173 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 2174 { 2175 bitmap *depends_on = (bitmap *) data; 2176 struct version_info *info; 2177 2178 if (TREE_CODE (*expr_p) != SSA_NAME) 2179 return NULL_TREE; 2180 info = name_info (fd_ivopts_data, *expr_p); 2181 2182 if (!info->inv_id || info->has_nonlin_use) 2183 return NULL_TREE; 2184 2185 if (!*depends_on) 2186 *depends_on = BITMAP_ALLOC (NULL); 2187 bitmap_set_bit (*depends_on, info->inv_id); 2188 2189 return NULL_TREE; 2190 } 2191 2192 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2193 position to POS. If USE is not NULL, the candidate is set as related to 2194 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 2195 replacement of the final value of the iv by a direct computation. */ 2196 2197 static struct iv_cand * 2198 add_candidate_1 (struct ivopts_data *data, 2199 tree base, tree step, bool important, enum iv_position pos, 2200 struct iv_use *use, gimple incremented_at) 2201 { 2202 unsigned i; 2203 struct iv_cand *cand = NULL; 2204 tree type, orig_type; 2205 2206 /* For non-original variables, make sure their values are computed in a type 2207 that does not invoke undefined behavior on overflows (since in general, 2208 we cannot prove that these induction variables are non-wrapping). */ 2209 if (pos != IP_ORIGINAL) 2210 { 2211 orig_type = TREE_TYPE (base); 2212 type = generic_type_for (orig_type); 2213 if (type != orig_type) 2214 { 2215 base = fold_convert (type, base); 2216 step = fold_convert (type, step); 2217 } 2218 } 2219 2220 for (i = 0; i < n_iv_cands (data); i++) 2221 { 2222 cand = iv_cand (data, i); 2223 2224 if (cand->pos != pos) 2225 continue; 2226 2227 if (cand->incremented_at != incremented_at 2228 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2229 && cand->ainc_use != use)) 2230 continue; 2231 2232 if (!cand->iv) 2233 { 2234 if (!base && !step) 2235 break; 2236 2237 continue; 2238 } 2239 2240 if (!base && !step) 2241 continue; 2242 2243 if (operand_equal_p (base, cand->iv->base, 0) 2244 && operand_equal_p (step, cand->iv->step, 0) 2245 && (TYPE_PRECISION (TREE_TYPE (base)) 2246 == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) 2247 break; 2248 } 2249 2250 if (i == n_iv_cands (data)) 2251 { 2252 cand = XCNEW (struct iv_cand); 2253 cand->id = i; 2254 2255 if (!base && !step) 2256 cand->iv = NULL; 2257 else 2258 cand->iv = alloc_iv (base, step); 2259 2260 cand->pos = pos; 2261 if (pos != IP_ORIGINAL && cand->iv) 2262 { 2263 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 2264 cand->var_after = cand->var_before; 2265 } 2266 cand->important = important; 2267 cand->incremented_at = incremented_at; 2268 data->iv_candidates.safe_push (cand); 2269 2270 if (step 2271 && TREE_CODE (step) != INTEGER_CST) 2272 { 2273 fd_ivopts_data = data; 2274 walk_tree (&step, find_depends, &cand->depends_on, NULL); 2275 } 2276 2277 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2278 cand->ainc_use = use; 2279 else 2280 cand->ainc_use = NULL; 2281 2282 if (dump_file && (dump_flags & TDF_DETAILS)) 2283 dump_cand (dump_file, cand); 2284 } 2285 2286 if (important && !cand->important) 2287 { 2288 cand->important = true; 2289 if (dump_file && (dump_flags & TDF_DETAILS)) 2290 fprintf (dump_file, "Candidate %d is important\n", cand->id); 2291 } 2292 2293 if (use) 2294 { 2295 bitmap_set_bit (use->related_cands, i); 2296 if (dump_file && (dump_flags & TDF_DETAILS)) 2297 fprintf (dump_file, "Candidate %d is related to use %d\n", 2298 cand->id, use->id); 2299 } 2300 2301 return cand; 2302 } 2303 2304 /* Returns true if incrementing the induction variable at the end of the LOOP 2305 is allowed. 2306 2307 The purpose is to avoid splitting latch edge with a biv increment, thus 2308 creating a jump, possibly confusing other optimization passes and leaving 2309 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS 2310 is not available (so we do not have a better alternative), or if the latch 2311 edge is already nonempty. */ 2312 2313 static bool 2314 allow_ip_end_pos_p (struct loop *loop) 2315 { 2316 if (!ip_normal_pos (loop)) 2317 return true; 2318 2319 if (!empty_block_p (ip_end_pos (loop))) 2320 return true; 2321 2322 return false; 2323 } 2324 2325 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. 2326 Important field is set to IMPORTANT. */ 2327 2328 static void 2329 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, 2330 bool important, struct iv_use *use) 2331 { 2332 basic_block use_bb = gimple_bb (use->stmt); 2333 enum machine_mode mem_mode; 2334 unsigned HOST_WIDE_INT cstepi; 2335 2336 /* If we insert the increment in any position other than the standard 2337 ones, we must ensure that it is incremented once per iteration. 2338 It must not be in an inner nested loop, or one side of an if 2339 statement. */ 2340 if (use_bb->loop_father != data->current_loop 2341 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) 2342 || stmt_could_throw_p (use->stmt) 2343 || !cst_and_fits_in_hwi (step)) 2344 return; 2345 2346 cstepi = int_cst_value (step); 2347 2348 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); 2349 if (((USE_LOAD_PRE_INCREMENT (mem_mode) 2350 || USE_STORE_PRE_INCREMENT (mem_mode)) 2351 && GET_MODE_SIZE (mem_mode) == cstepi) 2352 || ((USE_LOAD_PRE_DECREMENT (mem_mode) 2353 || USE_STORE_PRE_DECREMENT (mem_mode)) 2354 && GET_MODE_SIZE (mem_mode) == -cstepi)) 2355 { 2356 enum tree_code code = MINUS_EXPR; 2357 tree new_base; 2358 tree new_step = step; 2359 2360 if (POINTER_TYPE_P (TREE_TYPE (base))) 2361 { 2362 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 2363 code = POINTER_PLUS_EXPR; 2364 } 2365 else 2366 new_step = fold_convert (TREE_TYPE (base), new_step); 2367 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); 2368 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, 2369 use->stmt); 2370 } 2371 if (((USE_LOAD_POST_INCREMENT (mem_mode) 2372 || USE_STORE_POST_INCREMENT (mem_mode)) 2373 && GET_MODE_SIZE (mem_mode) == cstepi) 2374 || ((USE_LOAD_POST_DECREMENT (mem_mode) 2375 || USE_STORE_POST_DECREMENT (mem_mode)) 2376 && GET_MODE_SIZE (mem_mode) == -cstepi)) 2377 { 2378 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, 2379 use->stmt); 2380 } 2381 } 2382 2383 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2384 position to POS. If USE is not NULL, the candidate is set as related to 2385 it. The candidate computation is scheduled on all available positions. */ 2386 2387 static void 2388 add_candidate (struct ivopts_data *data, 2389 tree base, tree step, bool important, struct iv_use *use) 2390 { 2391 if (ip_normal_pos (data->current_loop)) 2392 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL); 2393 if (ip_end_pos (data->current_loop) 2394 && allow_ip_end_pos_p (data->current_loop)) 2395 add_candidate_1 (data, base, step, important, IP_END, use, NULL); 2396 2397 if (use != NULL && use->type == USE_ADDRESS) 2398 add_autoinc_candidates (data, base, step, important, use); 2399 } 2400 2401 /* Adds standard iv candidates. */ 2402 2403 static void 2404 add_standard_iv_candidates (struct ivopts_data *data) 2405 { 2406 add_candidate (data, integer_zero_node, integer_one_node, true, NULL); 2407 2408 /* The same for a double-integer type if it is still fast enough. */ 2409 if (TYPE_PRECISION 2410 (long_integer_type_node) > TYPE_PRECISION (integer_type_node) 2411 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD) 2412 add_candidate (data, build_int_cst (long_integer_type_node, 0), 2413 build_int_cst (long_integer_type_node, 1), true, NULL); 2414 2415 /* The same for a double-integer type if it is still fast enough. */ 2416 if (TYPE_PRECISION 2417 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node) 2418 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD) 2419 add_candidate (data, build_int_cst (long_long_integer_type_node, 0), 2420 build_int_cst (long_long_integer_type_node, 1), true, NULL); 2421 } 2422 2423 2424 /* Adds candidates bases on the old induction variable IV. */ 2425 2426 static void 2427 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv) 2428 { 2429 gimple phi; 2430 tree def; 2431 struct iv_cand *cand; 2432 2433 add_candidate (data, iv->base, iv->step, true, NULL); 2434 2435 /* The same, but with initial value zero. */ 2436 if (POINTER_TYPE_P (TREE_TYPE (iv->base))) 2437 add_candidate (data, size_int (0), iv->step, true, NULL); 2438 else 2439 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 2440 iv->step, true, NULL); 2441 2442 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 2443 if (gimple_code (phi) == GIMPLE_PHI) 2444 { 2445 /* Additionally record the possibility of leaving the original iv 2446 untouched. */ 2447 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 2448 cand = add_candidate_1 (data, 2449 iv->base, iv->step, true, IP_ORIGINAL, NULL, 2450 SSA_NAME_DEF_STMT (def)); 2451 cand->var_before = iv->ssa_name; 2452 cand->var_after = def; 2453 } 2454 } 2455 2456 /* Adds candidates based on the old induction variables. */ 2457 2458 static void 2459 add_old_ivs_candidates (struct ivopts_data *data) 2460 { 2461 unsigned i; 2462 struct iv *iv; 2463 bitmap_iterator bi; 2464 2465 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2466 { 2467 iv = ver_info (data, i)->iv; 2468 if (iv && iv->biv_p && !integer_zerop (iv->step)) 2469 add_old_iv_candidates (data, iv); 2470 } 2471 } 2472 2473 /* Adds candidates based on the value of the induction variable IV and USE. */ 2474 2475 static void 2476 add_iv_value_candidates (struct ivopts_data *data, 2477 struct iv *iv, struct iv_use *use) 2478 { 2479 unsigned HOST_WIDE_INT offset; 2480 tree base; 2481 tree basetype; 2482 2483 add_candidate (data, iv->base, iv->step, false, use); 2484 2485 /* The same, but with initial value zero. Make such variable important, 2486 since it is generic enough so that possibly many uses may be based 2487 on it. */ 2488 basetype = TREE_TYPE (iv->base); 2489 if (POINTER_TYPE_P (basetype)) 2490 basetype = sizetype; 2491 add_candidate (data, build_int_cst (basetype, 0), 2492 iv->step, true, use); 2493 2494 /* Third, try removing the constant offset. Make sure to even 2495 add a candidate for &a[0] vs. (T *)&a. */ 2496 base = strip_offset (iv->base, &offset); 2497 if (offset 2498 || base != iv->base) 2499 add_candidate (data, base, iv->step, false, use); 2500 } 2501 2502 /* Adds candidates based on the uses. */ 2503 2504 static void 2505 add_derived_ivs_candidates (struct ivopts_data *data) 2506 { 2507 unsigned i; 2508 2509 for (i = 0; i < n_iv_uses (data); i++) 2510 { 2511 struct iv_use *use = iv_use (data, i); 2512 2513 if (!use) 2514 continue; 2515 2516 switch (use->type) 2517 { 2518 case USE_NONLINEAR_EXPR: 2519 case USE_COMPARE: 2520 case USE_ADDRESS: 2521 /* Just add the ivs based on the value of the iv used here. */ 2522 add_iv_value_candidates (data, use->iv, use); 2523 break; 2524 2525 default: 2526 gcc_unreachable (); 2527 } 2528 } 2529 } 2530 2531 /* Record important candidates and add them to related_cands bitmaps 2532 if needed. */ 2533 2534 static void 2535 record_important_candidates (struct ivopts_data *data) 2536 { 2537 unsigned i; 2538 struct iv_use *use; 2539 2540 for (i = 0; i < n_iv_cands (data); i++) 2541 { 2542 struct iv_cand *cand = iv_cand (data, i); 2543 2544 if (cand->important) 2545 bitmap_set_bit (data->important_candidates, i); 2546 } 2547 2548 data->consider_all_candidates = (n_iv_cands (data) 2549 <= CONSIDER_ALL_CANDIDATES_BOUND); 2550 2551 if (data->consider_all_candidates) 2552 { 2553 /* We will not need "related_cands" bitmaps in this case, 2554 so release them to decrease peak memory consumption. */ 2555 for (i = 0; i < n_iv_uses (data); i++) 2556 { 2557 use = iv_use (data, i); 2558 BITMAP_FREE (use->related_cands); 2559 } 2560 } 2561 else 2562 { 2563 /* Add important candidates to the related_cands bitmaps. */ 2564 for (i = 0; i < n_iv_uses (data); i++) 2565 bitmap_ior_into (iv_use (data, i)->related_cands, 2566 data->important_candidates); 2567 } 2568 } 2569 2570 /* Allocates the data structure mapping the (use, candidate) pairs to costs. 2571 If consider_all_candidates is true, we use a two-dimensional array, otherwise 2572 we allocate a simple list to every use. */ 2573 2574 static void 2575 alloc_use_cost_map (struct ivopts_data *data) 2576 { 2577 unsigned i, size, s; 2578 2579 for (i = 0; i < n_iv_uses (data); i++) 2580 { 2581 struct iv_use *use = iv_use (data, i); 2582 2583 if (data->consider_all_candidates) 2584 size = n_iv_cands (data); 2585 else 2586 { 2587 s = bitmap_count_bits (use->related_cands); 2588 2589 /* Round up to the power of two, so that moduling by it is fast. */ 2590 size = s ? (1 << ceil_log2 (s)) : 1; 2591 } 2592 2593 use->n_map_members = size; 2594 use->cost_map = XCNEWVEC (struct cost_pair, size); 2595 } 2596 } 2597 2598 /* Returns description of computation cost of expression whose runtime 2599 cost is RUNTIME and complexity corresponds to COMPLEXITY. */ 2600 2601 static comp_cost 2602 new_cost (unsigned runtime, unsigned complexity) 2603 { 2604 comp_cost cost; 2605 2606 cost.cost = runtime; 2607 cost.complexity = complexity; 2608 2609 return cost; 2610 } 2611 2612 /* Adds costs COST1 and COST2. */ 2613 2614 static comp_cost 2615 add_costs (comp_cost cost1, comp_cost cost2) 2616 { 2617 cost1.cost += cost2.cost; 2618 cost1.complexity += cost2.complexity; 2619 2620 return cost1; 2621 } 2622 /* Subtracts costs COST1 and COST2. */ 2623 2624 static comp_cost 2625 sub_costs (comp_cost cost1, comp_cost cost2) 2626 { 2627 cost1.cost -= cost2.cost; 2628 cost1.complexity -= cost2.complexity; 2629 2630 return cost1; 2631 } 2632 2633 /* Returns a negative number if COST1 < COST2, a positive number if 2634 COST1 > COST2, and 0 if COST1 = COST2. */ 2635 2636 static int 2637 compare_costs (comp_cost cost1, comp_cost cost2) 2638 { 2639 if (cost1.cost == cost2.cost) 2640 return cost1.complexity - cost2.complexity; 2641 2642 return cost1.cost - cost2.cost; 2643 } 2644 2645 /* Returns true if COST is infinite. */ 2646 2647 static bool 2648 infinite_cost_p (comp_cost cost) 2649 { 2650 return cost.cost == INFTY; 2651 } 2652 2653 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends 2654 on invariants DEPENDS_ON and that the value used in expressing it 2655 is VALUE, and in case of iv elimination the comparison operator is COMP. */ 2656 2657 static void 2658 set_use_iv_cost (struct ivopts_data *data, 2659 struct iv_use *use, struct iv_cand *cand, 2660 comp_cost cost, bitmap depends_on, tree value, 2661 enum tree_code comp, int inv_expr_id) 2662 { 2663 unsigned i, s; 2664 2665 if (infinite_cost_p (cost)) 2666 { 2667 BITMAP_FREE (depends_on); 2668 return; 2669 } 2670 2671 if (data->consider_all_candidates) 2672 { 2673 use->cost_map[cand->id].cand = cand; 2674 use->cost_map[cand->id].cost = cost; 2675 use->cost_map[cand->id].depends_on = depends_on; 2676 use->cost_map[cand->id].value = value; 2677 use->cost_map[cand->id].comp = comp; 2678 use->cost_map[cand->id].inv_expr_id = inv_expr_id; 2679 return; 2680 } 2681 2682 /* n_map_members is a power of two, so this computes modulo. */ 2683 s = cand->id & (use->n_map_members - 1); 2684 for (i = s; i < use->n_map_members; i++) 2685 if (!use->cost_map[i].cand) 2686 goto found; 2687 for (i = 0; i < s; i++) 2688 if (!use->cost_map[i].cand) 2689 goto found; 2690 2691 gcc_unreachable (); 2692 2693 found: 2694 use->cost_map[i].cand = cand; 2695 use->cost_map[i].cost = cost; 2696 use->cost_map[i].depends_on = depends_on; 2697 use->cost_map[i].value = value; 2698 use->cost_map[i].comp = comp; 2699 use->cost_map[i].inv_expr_id = inv_expr_id; 2700 } 2701 2702 /* Gets cost of (USE, CANDIDATE) pair. */ 2703 2704 static struct cost_pair * 2705 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use, 2706 struct iv_cand *cand) 2707 { 2708 unsigned i, s; 2709 struct cost_pair *ret; 2710 2711 if (!cand) 2712 return NULL; 2713 2714 if (data->consider_all_candidates) 2715 { 2716 ret = use->cost_map + cand->id; 2717 if (!ret->cand) 2718 return NULL; 2719 2720 return ret; 2721 } 2722 2723 /* n_map_members is a power of two, so this computes modulo. */ 2724 s = cand->id & (use->n_map_members - 1); 2725 for (i = s; i < use->n_map_members; i++) 2726 if (use->cost_map[i].cand == cand) 2727 return use->cost_map + i; 2728 else if (use->cost_map[i].cand == NULL) 2729 return NULL; 2730 for (i = 0; i < s; i++) 2731 if (use->cost_map[i].cand == cand) 2732 return use->cost_map + i; 2733 else if (use->cost_map[i].cand == NULL) 2734 return NULL; 2735 2736 return NULL; 2737 } 2738 2739 /* Returns estimate on cost of computing SEQ. */ 2740 2741 static unsigned 2742 seq_cost (rtx seq, bool speed) 2743 { 2744 unsigned cost = 0; 2745 rtx set; 2746 2747 for (; seq; seq = NEXT_INSN (seq)) 2748 { 2749 set = single_set (seq); 2750 if (set) 2751 cost += set_src_cost (SET_SRC (set), speed); 2752 else 2753 cost++; 2754 } 2755 2756 return cost; 2757 } 2758 2759 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 2760 static rtx 2761 produce_memory_decl_rtl (tree obj, int *regno) 2762 { 2763 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); 2764 enum machine_mode address_mode = targetm.addr_space.address_mode (as); 2765 rtx x; 2766 2767 gcc_assert (obj); 2768 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 2769 { 2770 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 2771 x = gen_rtx_SYMBOL_REF (address_mode, name); 2772 SET_SYMBOL_REF_DECL (x, obj); 2773 x = gen_rtx_MEM (DECL_MODE (obj), x); 2774 set_mem_addr_space (x, as); 2775 targetm.encode_section_info (obj, x, true); 2776 } 2777 else 2778 { 2779 x = gen_raw_REG (address_mode, (*regno)++); 2780 x = gen_rtx_MEM (DECL_MODE (obj), x); 2781 set_mem_addr_space (x, as); 2782 } 2783 2784 return x; 2785 } 2786 2787 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 2788 walk_tree. DATA contains the actual fake register number. */ 2789 2790 static tree 2791 prepare_decl_rtl (tree *expr_p, int *ws, void *data) 2792 { 2793 tree obj = NULL_TREE; 2794 rtx x = NULL_RTX; 2795 int *regno = (int *) data; 2796 2797 switch (TREE_CODE (*expr_p)) 2798 { 2799 case ADDR_EXPR: 2800 for (expr_p = &TREE_OPERAND (*expr_p, 0); 2801 handled_component_p (*expr_p); 2802 expr_p = &TREE_OPERAND (*expr_p, 0)) 2803 continue; 2804 obj = *expr_p; 2805 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) 2806 x = produce_memory_decl_rtl (obj, regno); 2807 break; 2808 2809 case SSA_NAME: 2810 *ws = 0; 2811 obj = SSA_NAME_VAR (*expr_p); 2812 /* Defer handling of anonymous SSA_NAMEs to the expander. */ 2813 if (!obj) 2814 return NULL_TREE; 2815 if (!DECL_RTL_SET_P (obj)) 2816 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2817 break; 2818 2819 case VAR_DECL: 2820 case PARM_DECL: 2821 case RESULT_DECL: 2822 *ws = 0; 2823 obj = *expr_p; 2824 2825 if (DECL_RTL_SET_P (obj)) 2826 break; 2827 2828 if (DECL_MODE (obj) == BLKmode) 2829 x = produce_memory_decl_rtl (obj, regno); 2830 else 2831 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2832 2833 break; 2834 2835 default: 2836 break; 2837 } 2838 2839 if (x) 2840 { 2841 decl_rtl_to_reset.safe_push (obj); 2842 SET_DECL_RTL (obj, x); 2843 } 2844 2845 return NULL_TREE; 2846 } 2847 2848 /* Determines cost of the computation of EXPR. */ 2849 2850 static unsigned 2851 computation_cost (tree expr, bool speed) 2852 { 2853 rtx seq, rslt; 2854 tree type = TREE_TYPE (expr); 2855 unsigned cost; 2856 /* Avoid using hard regs in ways which may be unsupported. */ 2857 int regno = LAST_VIRTUAL_REGISTER + 1; 2858 struct cgraph_node *node = cgraph_get_node (current_function_decl); 2859 enum node_frequency real_frequency = node->frequency; 2860 2861 node->frequency = NODE_FREQUENCY_NORMAL; 2862 crtl->maybe_hot_insn_p = speed; 2863 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 2864 start_sequence (); 2865 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 2866 seq = get_insns (); 2867 end_sequence (); 2868 default_rtl_profile (); 2869 node->frequency = real_frequency; 2870 2871 cost = seq_cost (seq, speed); 2872 if (MEM_P (rslt)) 2873 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), 2874 TYPE_ADDR_SPACE (type), speed); 2875 else if (!REG_P (rslt)) 2876 cost += set_src_cost (rslt, speed); 2877 2878 return cost; 2879 } 2880 2881 /* Returns variable containing the value of candidate CAND at statement AT. */ 2882 2883 static tree 2884 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt) 2885 { 2886 if (stmt_after_increment (loop, cand, stmt)) 2887 return cand->var_after; 2888 else 2889 return cand->var_before; 2890 } 2891 2892 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the 2893 same precision that is at least as wide as the precision of TYPE, stores 2894 BA to A and BB to B, and returns the type of BA. Otherwise, returns the 2895 type of A and B. */ 2896 2897 static tree 2898 determine_common_wider_type (tree *a, tree *b) 2899 { 2900 tree wider_type = NULL; 2901 tree suba, subb; 2902 tree atype = TREE_TYPE (*a); 2903 2904 if (CONVERT_EXPR_P (*a)) 2905 { 2906 suba = TREE_OPERAND (*a, 0); 2907 wider_type = TREE_TYPE (suba); 2908 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) 2909 return atype; 2910 } 2911 else 2912 return atype; 2913 2914 if (CONVERT_EXPR_P (*b)) 2915 { 2916 subb = TREE_OPERAND (*b, 0); 2917 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) 2918 return atype; 2919 } 2920 else 2921 return atype; 2922 2923 *a = suba; 2924 *b = subb; 2925 return wider_type; 2926 } 2927 2928 /* Determines the expression by that USE is expressed from induction variable 2929 CAND at statement AT in LOOP. The expression is stored in a decomposed 2930 form into AFF. Returns false if USE cannot be expressed using CAND. */ 2931 2932 static bool 2933 get_computation_aff (struct loop *loop, 2934 struct iv_use *use, struct iv_cand *cand, gimple at, 2935 struct affine_tree_combination *aff) 2936 { 2937 tree ubase = use->iv->base; 2938 tree ustep = use->iv->step; 2939 tree cbase = cand->iv->base; 2940 tree cstep = cand->iv->step, cstep_common; 2941 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 2942 tree common_type, var; 2943 tree uutype; 2944 aff_tree cbase_aff, var_aff; 2945 double_int rat; 2946 2947 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 2948 { 2949 /* We do not have a precision to express the values of use. */ 2950 return false; 2951 } 2952 2953 var = var_at_stmt (loop, cand, at); 2954 uutype = unsigned_type_for (utype); 2955 2956 /* If the conversion is not noop, perform it. */ 2957 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 2958 { 2959 cstep = fold_convert (uutype, cstep); 2960 cbase = fold_convert (uutype, cbase); 2961 var = fold_convert (uutype, var); 2962 } 2963 2964 if (!constant_multiple_of (ustep, cstep, &rat)) 2965 return false; 2966 2967 /* In case both UBASE and CBASE are shortened to UUTYPE from some common 2968 type, we achieve better folding by computing their difference in this 2969 wider type, and cast the result to UUTYPE. We do not need to worry about 2970 overflows, as all the arithmetics will in the end be performed in UUTYPE 2971 anyway. */ 2972 common_type = determine_common_wider_type (&ubase, &cbase); 2973 2974 /* use = ubase - ratio * cbase + ratio * var. */ 2975 tree_to_aff_combination (ubase, common_type, aff); 2976 tree_to_aff_combination (cbase, common_type, &cbase_aff); 2977 tree_to_aff_combination (var, uutype, &var_aff); 2978 2979 /* We need to shift the value if we are after the increment. */ 2980 if (stmt_after_increment (loop, cand, at)) 2981 { 2982 aff_tree cstep_aff; 2983 2984 if (common_type != uutype) 2985 cstep_common = fold_convert (common_type, cstep); 2986 else 2987 cstep_common = cstep; 2988 2989 tree_to_aff_combination (cstep_common, common_type, &cstep_aff); 2990 aff_combination_add (&cbase_aff, &cstep_aff); 2991 } 2992 2993 aff_combination_scale (&cbase_aff, -rat); 2994 aff_combination_add (aff, &cbase_aff); 2995 if (common_type != uutype) 2996 aff_combination_convert (aff, uutype); 2997 2998 aff_combination_scale (&var_aff, rat); 2999 aff_combination_add (aff, &var_aff); 3000 3001 return true; 3002 } 3003 3004 /* Return the type of USE. */ 3005 3006 static tree 3007 get_use_type (struct iv_use *use) 3008 { 3009 tree base_type = TREE_TYPE (use->iv->base); 3010 tree type; 3011 3012 if (use->type == USE_ADDRESS) 3013 { 3014 /* The base_type may be a void pointer. Create a pointer type based on 3015 the mem_ref instead. */ 3016 type = build_pointer_type (TREE_TYPE (*use->op_p)); 3017 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type)) 3018 == TYPE_ADDR_SPACE (TREE_TYPE (base_type))); 3019 } 3020 else 3021 type = base_type; 3022 3023 return type; 3024 } 3025 3026 /* Determines the expression by that USE is expressed from induction variable 3027 CAND at statement AT in LOOP. The computation is unshared. */ 3028 3029 static tree 3030 get_computation_at (struct loop *loop, 3031 struct iv_use *use, struct iv_cand *cand, gimple at) 3032 { 3033 aff_tree aff; 3034 tree type = get_use_type (use); 3035 3036 if (!get_computation_aff (loop, use, cand, at, &aff)) 3037 return NULL_TREE; 3038 unshare_aff_combination (&aff); 3039 return fold_convert (type, aff_combination_to_tree (&aff)); 3040 } 3041 3042 /* Determines the expression by that USE is expressed from induction variable 3043 CAND in LOOP. The computation is unshared. */ 3044 3045 static tree 3046 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) 3047 { 3048 return get_computation_at (loop, use, cand, use->stmt); 3049 } 3050 3051 /* Adjust the cost COST for being in loop setup rather than loop body. 3052 If we're optimizing for space, the loop setup overhead is constant; 3053 if we're optimizing for speed, amortize it over the per-iteration cost. */ 3054 static unsigned 3055 adjust_setup_cost (struct ivopts_data *data, unsigned cost) 3056 { 3057 if (cost == INFTY) 3058 return cost; 3059 else if (optimize_loop_for_speed_p (data->current_loop)) 3060 return cost / avg_loop_niter (data->current_loop); 3061 else 3062 return cost; 3063 } 3064 3065 /* Returns true if multiplying by RATIO is allowed in an address. Test the 3066 validity for a memory reference accessing memory of mode MODE in 3067 address space AS. */ 3068 3069 3070 bool 3071 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode, 3072 addr_space_t as) 3073 { 3074 #define MAX_RATIO 128 3075 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode; 3076 static vec<sbitmap> valid_mult_list; 3077 sbitmap valid_mult; 3078 3079 if (data_index >= valid_mult_list.length ()) 3080 valid_mult_list.safe_grow_cleared (data_index + 1); 3081 3082 valid_mult = valid_mult_list[data_index]; 3083 if (!valid_mult) 3084 { 3085 enum machine_mode address_mode = targetm.addr_space.address_mode (as); 3086 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3087 rtx addr; 3088 HOST_WIDE_INT i; 3089 3090 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1); 3091 bitmap_clear (valid_mult); 3092 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX); 3093 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3094 { 3095 XEXP (addr, 1) = gen_int_mode (i, address_mode); 3096 if (memory_address_addr_space_p (mode, addr, as)) 3097 bitmap_set_bit (valid_mult, i + MAX_RATIO); 3098 } 3099 3100 if (dump_file && (dump_flags & TDF_DETAILS)) 3101 { 3102 fprintf (dump_file, " allowed multipliers:"); 3103 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3104 if (bitmap_bit_p (valid_mult, i + MAX_RATIO)) 3105 fprintf (dump_file, " %d", (int) i); 3106 fprintf (dump_file, "\n"); 3107 fprintf (dump_file, "\n"); 3108 } 3109 3110 valid_mult_list[data_index] = valid_mult; 3111 } 3112 3113 if (ratio > MAX_RATIO || ratio < -MAX_RATIO) 3114 return false; 3115 3116 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO); 3117 } 3118 3119 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index. 3120 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false, 3121 variable is omitted. Compute the cost for a memory reference that accesses 3122 a memory location of mode MEM_MODE in address space AS. 3123 3124 MAY_AUTOINC is set to true if the autoincrement (increasing index by 3125 size of MEM_MODE / RATIO) is available. To make this determination, we 3126 look at the size of the increment to be made, which is given in CSTEP. 3127 CSTEP may be zero if the step is unknown. 3128 STMT_AFTER_INC is true iff the statement we're looking at is after the 3129 increment of the original biv. 3130 3131 TODO -- there must be some better way. This all is quite crude. */ 3132 3133 typedef struct address_cost_data_s 3134 { 3135 HOST_WIDE_INT min_offset, max_offset; 3136 unsigned costs[2][2][2][2]; 3137 } *address_cost_data; 3138 3139 3140 static comp_cost 3141 get_address_cost (bool symbol_present, bool var_present, 3142 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio, 3143 HOST_WIDE_INT cstep, enum machine_mode mem_mode, 3144 addr_space_t as, bool speed, 3145 bool stmt_after_inc, bool *may_autoinc) 3146 { 3147 enum machine_mode address_mode = targetm.addr_space.address_mode (as); 3148 static vec<address_cost_data> address_cost_data_list; 3149 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode; 3150 address_cost_data data; 3151 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE]; 3152 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE]; 3153 unsigned cost, acost, complexity; 3154 bool offset_p, ratio_p, autoinc; 3155 HOST_WIDE_INT s_offset, autoinc_offset, msize; 3156 unsigned HOST_WIDE_INT mask; 3157 unsigned bits; 3158 3159 if (data_index >= address_cost_data_list.length ()) 3160 address_cost_data_list.safe_grow_cleared (data_index + 1); 3161 3162 data = address_cost_data_list[data_index]; 3163 if (!data) 3164 { 3165 HOST_WIDE_INT i; 3166 HOST_WIDE_INT rat, off = 0; 3167 int old_cse_not_expected, width; 3168 unsigned sym_p, var_p, off_p, rat_p, add_c; 3169 rtx seq, addr, base; 3170 rtx reg0, reg1; 3171 3172 data = (address_cost_data) xcalloc (1, sizeof (*data)); 3173 3174 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3175 3176 width = GET_MODE_BITSIZE (address_mode) - 1; 3177 if (width > (HOST_BITS_PER_WIDE_INT - 1)) 3178 width = HOST_BITS_PER_WIDE_INT - 1; 3179 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); 3180 3181 for (i = width; i >= 0; i--) 3182 { 3183 off = -((unsigned HOST_WIDE_INT) 1 << i); 3184 XEXP (addr, 1) = gen_int_mode (off, address_mode); 3185 if (memory_address_addr_space_p (mem_mode, addr, as)) 3186 break; 3187 } 3188 data->min_offset = (i == -1? 0 : off); 3189 3190 for (i = width; i >= 0; i--) 3191 { 3192 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1; 3193 XEXP (addr, 1) = gen_int_mode (off, address_mode); 3194 if (memory_address_addr_space_p (mem_mode, addr, as)) 3195 break; 3196 } 3197 if (i == -1) 3198 off = 0; 3199 data->max_offset = off; 3200 3201 if (dump_file && (dump_flags & TDF_DETAILS)) 3202 { 3203 fprintf (dump_file, "get_address_cost:\n"); 3204 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n", 3205 GET_MODE_NAME (mem_mode), 3206 data->min_offset); 3207 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n", 3208 GET_MODE_NAME (mem_mode), 3209 data->max_offset); 3210 } 3211 3212 rat = 1; 3213 for (i = 2; i <= MAX_RATIO; i++) 3214 if (multiplier_allowed_in_address_p (i, mem_mode, as)) 3215 { 3216 rat = i; 3217 break; 3218 } 3219 3220 /* Compute the cost of various addressing modes. */ 3221 acost = 0; 3222 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3223 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); 3224 3225 if (USE_LOAD_PRE_DECREMENT (mem_mode) 3226 || USE_STORE_PRE_DECREMENT (mem_mode)) 3227 { 3228 addr = gen_rtx_PRE_DEC (address_mode, reg0); 3229 has_predec[mem_mode] 3230 = memory_address_addr_space_p (mem_mode, addr, as); 3231 } 3232 if (USE_LOAD_POST_DECREMENT (mem_mode) 3233 || USE_STORE_POST_DECREMENT (mem_mode)) 3234 { 3235 addr = gen_rtx_POST_DEC (address_mode, reg0); 3236 has_postdec[mem_mode] 3237 = memory_address_addr_space_p (mem_mode, addr, as); 3238 } 3239 if (USE_LOAD_PRE_INCREMENT (mem_mode) 3240 || USE_STORE_PRE_DECREMENT (mem_mode)) 3241 { 3242 addr = gen_rtx_PRE_INC (address_mode, reg0); 3243 has_preinc[mem_mode] 3244 = memory_address_addr_space_p (mem_mode, addr, as); 3245 } 3246 if (USE_LOAD_POST_INCREMENT (mem_mode) 3247 || USE_STORE_POST_INCREMENT (mem_mode)) 3248 { 3249 addr = gen_rtx_POST_INC (address_mode, reg0); 3250 has_postinc[mem_mode] 3251 = memory_address_addr_space_p (mem_mode, addr, as); 3252 } 3253 for (i = 0; i < 16; i++) 3254 { 3255 sym_p = i & 1; 3256 var_p = (i >> 1) & 1; 3257 off_p = (i >> 2) & 1; 3258 rat_p = (i >> 3) & 1; 3259 3260 addr = reg0; 3261 if (rat_p) 3262 addr = gen_rtx_fmt_ee (MULT, address_mode, addr, 3263 gen_int_mode (rat, address_mode)); 3264 3265 if (var_p) 3266 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1); 3267 3268 if (sym_p) 3269 { 3270 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup ("")); 3271 /* ??? We can run into trouble with some backends by presenting 3272 it with symbols which haven't been properly passed through 3273 targetm.encode_section_info. By setting the local bit, we 3274 enhance the probability of things working. */ 3275 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL; 3276 3277 if (off_p) 3278 base = gen_rtx_fmt_e (CONST, address_mode, 3279 gen_rtx_fmt_ee 3280 (PLUS, address_mode, base, 3281 gen_int_mode (off, address_mode))); 3282 } 3283 else if (off_p) 3284 base = gen_int_mode (off, address_mode); 3285 else 3286 base = NULL_RTX; 3287 3288 if (base) 3289 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base); 3290 3291 start_sequence (); 3292 /* To avoid splitting addressing modes, pretend that no cse will 3293 follow. */ 3294 old_cse_not_expected = cse_not_expected; 3295 cse_not_expected = true; 3296 addr = memory_address_addr_space (mem_mode, addr, as); 3297 cse_not_expected = old_cse_not_expected; 3298 seq = get_insns (); 3299 end_sequence (); 3300 3301 acost = seq_cost (seq, speed); 3302 acost += address_cost (addr, mem_mode, as, speed); 3303 3304 if (!acost) 3305 acost = 1; 3306 data->costs[sym_p][var_p][off_p][rat_p] = acost; 3307 } 3308 3309 /* On some targets, it is quite expensive to load symbol to a register, 3310 which makes addresses that contain symbols look much more expensive. 3311 However, the symbol will have to be loaded in any case before the 3312 loop (and quite likely we have it in register already), so it does not 3313 make much sense to penalize them too heavily. So make some final 3314 tweaks for the SYMBOL_PRESENT modes: 3315 3316 If VAR_PRESENT is false, and the mode obtained by changing symbol to 3317 var is cheaper, use this mode with small penalty. 3318 If VAR_PRESENT is true, try whether the mode with 3319 SYMBOL_PRESENT = false is cheaper even with cost of addition, and 3320 if this is the case, use it. */ 3321 add_c = add_cost (speed, address_mode); 3322 for (i = 0; i < 8; i++) 3323 { 3324 var_p = i & 1; 3325 off_p = (i >> 1) & 1; 3326 rat_p = (i >> 2) & 1; 3327 3328 acost = data->costs[0][1][off_p][rat_p] + 1; 3329 if (var_p) 3330 acost += add_c; 3331 3332 if (acost < data->costs[1][var_p][off_p][rat_p]) 3333 data->costs[1][var_p][off_p][rat_p] = acost; 3334 } 3335 3336 if (dump_file && (dump_flags & TDF_DETAILS)) 3337 { 3338 fprintf (dump_file, "Address costs:\n"); 3339 3340 for (i = 0; i < 16; i++) 3341 { 3342 sym_p = i & 1; 3343 var_p = (i >> 1) & 1; 3344 off_p = (i >> 2) & 1; 3345 rat_p = (i >> 3) & 1; 3346 3347 fprintf (dump_file, " "); 3348 if (sym_p) 3349 fprintf (dump_file, "sym + "); 3350 if (var_p) 3351 fprintf (dump_file, "var + "); 3352 if (off_p) 3353 fprintf (dump_file, "cst + "); 3354 if (rat_p) 3355 fprintf (dump_file, "rat * "); 3356 3357 acost = data->costs[sym_p][var_p][off_p][rat_p]; 3358 fprintf (dump_file, "index costs %d\n", acost); 3359 } 3360 if (has_predec[mem_mode] || has_postdec[mem_mode] 3361 || has_preinc[mem_mode] || has_postinc[mem_mode]) 3362 fprintf (dump_file, " May include autoinc/dec\n"); 3363 fprintf (dump_file, "\n"); 3364 } 3365 3366 address_cost_data_list[data_index] = data; 3367 } 3368 3369 bits = GET_MODE_BITSIZE (address_mode); 3370 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1); 3371 offset &= mask; 3372 if ((offset >> (bits - 1) & 1)) 3373 offset |= ~mask; 3374 s_offset = offset; 3375 3376 autoinc = false; 3377 msize = GET_MODE_SIZE (mem_mode); 3378 autoinc_offset = offset; 3379 if (stmt_after_inc) 3380 autoinc_offset += ratio * cstep; 3381 if (symbol_present || var_present || ratio != 1) 3382 autoinc = false; 3383 else if ((has_postinc[mem_mode] && autoinc_offset == 0 3384 && msize == cstep) 3385 || (has_postdec[mem_mode] && autoinc_offset == 0 3386 && msize == -cstep) 3387 || (has_preinc[mem_mode] && autoinc_offset == msize 3388 && msize == cstep) 3389 || (has_predec[mem_mode] && autoinc_offset == -msize 3390 && msize == -cstep)) 3391 autoinc = true; 3392 3393 cost = 0; 3394 offset_p = (s_offset != 0 3395 && data->min_offset <= s_offset 3396 && s_offset <= data->max_offset); 3397 ratio_p = (ratio != 1 3398 && multiplier_allowed_in_address_p (ratio, mem_mode, as)); 3399 3400 if (ratio != 1 && !ratio_p) 3401 cost += mult_by_coeff_cost (ratio, address_mode, speed); 3402 3403 if (s_offset && !offset_p && !symbol_present) 3404 cost += add_cost (speed, address_mode); 3405 3406 if (may_autoinc) 3407 *may_autoinc = autoinc; 3408 acost = data->costs[symbol_present][var_present][offset_p][ratio_p]; 3409 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p; 3410 return new_cost (cost + acost, complexity); 3411 } 3412 3413 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the 3414 the EXPR operand holding the shift. COST0 and COST1 are the costs for 3415 calculating the operands of EXPR. Returns true if successful, and returns 3416 the cost in COST. */ 3417 3418 static bool 3419 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0, 3420 comp_cost cost1, tree mult, bool speed, comp_cost *cost) 3421 { 3422 comp_cost res; 3423 tree op1 = TREE_OPERAND (expr, 1); 3424 tree cst = TREE_OPERAND (mult, 1); 3425 tree multop = TREE_OPERAND (mult, 0); 3426 int m = exact_log2 (int_cst_value (cst)); 3427 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 3428 int sa_cost; 3429 3430 if (!(m >= 0 && m < maxm)) 3431 return false; 3432 3433 sa_cost = (TREE_CODE (expr) != MINUS_EXPR 3434 ? shiftadd_cost (speed, mode, m) 3435 : (mult == op1 3436 ? shiftsub1_cost (speed, mode, m) 3437 : shiftsub0_cost (speed, mode, m))); 3438 res = new_cost (sa_cost, 0); 3439 res = add_costs (res, mult == op1 ? cost0 : cost1); 3440 3441 STRIP_NOPS (multop); 3442 if (!is_gimple_val (multop)) 3443 res = add_costs (res, force_expr_to_var_cost (multop, speed)); 3444 3445 *cost = res; 3446 return true; 3447 } 3448 3449 /* Estimates cost of forcing expression EXPR into a variable. */ 3450 3451 static comp_cost 3452 force_expr_to_var_cost (tree expr, bool speed) 3453 { 3454 static bool costs_initialized = false; 3455 static unsigned integer_cost [2]; 3456 static unsigned symbol_cost [2]; 3457 static unsigned address_cost [2]; 3458 tree op0, op1; 3459 comp_cost cost0, cost1, cost; 3460 enum machine_mode mode; 3461 3462 if (!costs_initialized) 3463 { 3464 tree type = build_pointer_type (integer_type_node); 3465 tree var, addr; 3466 rtx x; 3467 int i; 3468 3469 var = create_tmp_var_raw (integer_type_node, "test_var"); 3470 TREE_STATIC (var) = 1; 3471 x = produce_memory_decl_rtl (var, NULL); 3472 SET_DECL_RTL (var, x); 3473 3474 addr = build1 (ADDR_EXPR, type, var); 3475 3476 3477 for (i = 0; i < 2; i++) 3478 { 3479 integer_cost[i] = computation_cost (build_int_cst (integer_type_node, 3480 2000), i); 3481 3482 symbol_cost[i] = computation_cost (addr, i) + 1; 3483 3484 address_cost[i] 3485 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; 3486 if (dump_file && (dump_flags & TDF_DETAILS)) 3487 { 3488 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); 3489 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); 3490 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); 3491 fprintf (dump_file, " address %d\n", (int) address_cost[i]); 3492 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); 3493 fprintf (dump_file, "\n"); 3494 } 3495 } 3496 3497 costs_initialized = true; 3498 } 3499 3500 STRIP_NOPS (expr); 3501 3502 if (SSA_VAR_P (expr)) 3503 return no_cost; 3504 3505 if (is_gimple_min_invariant (expr)) 3506 { 3507 if (TREE_CODE (expr) == INTEGER_CST) 3508 return new_cost (integer_cost [speed], 0); 3509 3510 if (TREE_CODE (expr) == ADDR_EXPR) 3511 { 3512 tree obj = TREE_OPERAND (expr, 0); 3513 3514 if (TREE_CODE (obj) == VAR_DECL 3515 || TREE_CODE (obj) == PARM_DECL 3516 || TREE_CODE (obj) == RESULT_DECL) 3517 return new_cost (symbol_cost [speed], 0); 3518 } 3519 3520 return new_cost (address_cost [speed], 0); 3521 } 3522 3523 switch (TREE_CODE (expr)) 3524 { 3525 case POINTER_PLUS_EXPR: 3526 case PLUS_EXPR: 3527 case MINUS_EXPR: 3528 case MULT_EXPR: 3529 op0 = TREE_OPERAND (expr, 0); 3530 op1 = TREE_OPERAND (expr, 1); 3531 STRIP_NOPS (op0); 3532 STRIP_NOPS (op1); 3533 3534 if (is_gimple_val (op0)) 3535 cost0 = no_cost; 3536 else 3537 cost0 = force_expr_to_var_cost (op0, speed); 3538 3539 if (is_gimple_val (op1)) 3540 cost1 = no_cost; 3541 else 3542 cost1 = force_expr_to_var_cost (op1, speed); 3543 3544 break; 3545 3546 case NEGATE_EXPR: 3547 op0 = TREE_OPERAND (expr, 0); 3548 STRIP_NOPS (op0); 3549 op1 = NULL_TREE; 3550 3551 if (is_gimple_val (op0)) 3552 cost0 = no_cost; 3553 else 3554 cost0 = force_expr_to_var_cost (op0, speed); 3555 3556 cost1 = no_cost; 3557 break; 3558 3559 default: 3560 /* Just an arbitrary value, FIXME. */ 3561 return new_cost (target_spill_cost[speed], 0); 3562 } 3563 3564 mode = TYPE_MODE (TREE_TYPE (expr)); 3565 switch (TREE_CODE (expr)) 3566 { 3567 case POINTER_PLUS_EXPR: 3568 case PLUS_EXPR: 3569 case MINUS_EXPR: 3570 case NEGATE_EXPR: 3571 cost = new_cost (add_cost (speed, mode), 0); 3572 if (TREE_CODE (expr) != NEGATE_EXPR) 3573 { 3574 tree mult = NULL_TREE; 3575 comp_cost sa_cost; 3576 if (TREE_CODE (op1) == MULT_EXPR) 3577 mult = op1; 3578 else if (TREE_CODE (op0) == MULT_EXPR) 3579 mult = op0; 3580 3581 if (mult != NULL_TREE 3582 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) 3583 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, 3584 speed, &sa_cost)) 3585 return sa_cost; 3586 } 3587 break; 3588 3589 case MULT_EXPR: 3590 if (cst_and_fits_in_hwi (op0)) 3591 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0), 3592 mode, speed), 0); 3593 else if (cst_and_fits_in_hwi (op1)) 3594 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1), 3595 mode, speed), 0); 3596 else 3597 return new_cost (target_spill_cost [speed], 0); 3598 break; 3599 3600 default: 3601 gcc_unreachable (); 3602 } 3603 3604 cost = add_costs (cost, cost0); 3605 cost = add_costs (cost, cost1); 3606 3607 /* Bound the cost by target_spill_cost. The parts of complicated 3608 computations often are either loop invariant or at least can 3609 be shared between several iv uses, so letting this grow without 3610 limits would not give reasonable results. */ 3611 if (cost.cost > (int) target_spill_cost [speed]) 3612 cost.cost = target_spill_cost [speed]; 3613 3614 return cost; 3615 } 3616 3617 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the 3618 invariants the computation depends on. */ 3619 3620 static comp_cost 3621 force_var_cost (struct ivopts_data *data, 3622 tree expr, bitmap *depends_on) 3623 { 3624 if (depends_on) 3625 { 3626 fd_ivopts_data = data; 3627 walk_tree (&expr, find_depends, depends_on, NULL); 3628 } 3629 3630 return force_expr_to_var_cost (expr, data->speed); 3631 } 3632 3633 /* Estimates cost of expressing address ADDR as var + symbol + offset. The 3634 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set 3635 to false if the corresponding part is missing. DEPENDS_ON is a set of the 3636 invariants the computation depends on. */ 3637 3638 static comp_cost 3639 split_address_cost (struct ivopts_data *data, 3640 tree addr, bool *symbol_present, bool *var_present, 3641 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3642 { 3643 tree core; 3644 HOST_WIDE_INT bitsize; 3645 HOST_WIDE_INT bitpos; 3646 tree toffset; 3647 enum machine_mode mode; 3648 int unsignedp, volatilep; 3649 3650 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode, 3651 &unsignedp, &volatilep, false); 3652 3653 if (toffset != 0 3654 || bitpos % BITS_PER_UNIT != 0 3655 || TREE_CODE (core) != VAR_DECL) 3656 { 3657 *symbol_present = false; 3658 *var_present = true; 3659 fd_ivopts_data = data; 3660 walk_tree (&addr, find_depends, depends_on, NULL); 3661 return new_cost (target_spill_cost[data->speed], 0); 3662 } 3663 3664 *offset += bitpos / BITS_PER_UNIT; 3665 if (TREE_STATIC (core) 3666 || DECL_EXTERNAL (core)) 3667 { 3668 *symbol_present = true; 3669 *var_present = false; 3670 return no_cost; 3671 } 3672 3673 *symbol_present = false; 3674 *var_present = true; 3675 return no_cost; 3676 } 3677 3678 /* Estimates cost of expressing difference of addresses E1 - E2 as 3679 var + symbol + offset. The value of offset is added to OFFSET, 3680 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3681 part is missing. DEPENDS_ON is a set of the invariants the computation 3682 depends on. */ 3683 3684 static comp_cost 3685 ptr_difference_cost (struct ivopts_data *data, 3686 tree e1, tree e2, bool *symbol_present, bool *var_present, 3687 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3688 { 3689 HOST_WIDE_INT diff = 0; 3690 aff_tree aff_e1, aff_e2; 3691 tree type; 3692 3693 gcc_assert (TREE_CODE (e1) == ADDR_EXPR); 3694 3695 if (ptr_difference_const (e1, e2, &diff)) 3696 { 3697 *offset += diff; 3698 *symbol_present = false; 3699 *var_present = false; 3700 return no_cost; 3701 } 3702 3703 if (integer_zerop (e2)) 3704 return split_address_cost (data, TREE_OPERAND (e1, 0), 3705 symbol_present, var_present, offset, depends_on); 3706 3707 *symbol_present = false; 3708 *var_present = true; 3709 3710 type = signed_type_for (TREE_TYPE (e1)); 3711 tree_to_aff_combination (e1, type, &aff_e1); 3712 tree_to_aff_combination (e2, type, &aff_e2); 3713 aff_combination_scale (&aff_e2, double_int_minus_one); 3714 aff_combination_add (&aff_e1, &aff_e2); 3715 3716 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 3717 } 3718 3719 /* Estimates cost of expressing difference E1 - E2 as 3720 var + symbol + offset. The value of offset is added to OFFSET, 3721 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3722 part is missing. DEPENDS_ON is a set of the invariants the computation 3723 depends on. */ 3724 3725 static comp_cost 3726 difference_cost (struct ivopts_data *data, 3727 tree e1, tree e2, bool *symbol_present, bool *var_present, 3728 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3729 { 3730 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1)); 3731 unsigned HOST_WIDE_INT off1, off2; 3732 aff_tree aff_e1, aff_e2; 3733 tree type; 3734 3735 e1 = strip_offset (e1, &off1); 3736 e2 = strip_offset (e2, &off2); 3737 *offset += off1 - off2; 3738 3739 STRIP_NOPS (e1); 3740 STRIP_NOPS (e2); 3741 3742 if (TREE_CODE (e1) == ADDR_EXPR) 3743 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, 3744 offset, depends_on); 3745 *symbol_present = false; 3746 3747 if (operand_equal_p (e1, e2, 0)) 3748 { 3749 *var_present = false; 3750 return no_cost; 3751 } 3752 3753 *var_present = true; 3754 3755 if (integer_zerop (e2)) 3756 return force_var_cost (data, e1, depends_on); 3757 3758 if (integer_zerop (e1)) 3759 { 3760 comp_cost cost = force_var_cost (data, e2, depends_on); 3761 cost.cost += mult_by_coeff_cost (-1, mode, data->speed); 3762 return cost; 3763 } 3764 3765 type = signed_type_for (TREE_TYPE (e1)); 3766 tree_to_aff_combination (e1, type, &aff_e1); 3767 tree_to_aff_combination (e2, type, &aff_e2); 3768 aff_combination_scale (&aff_e2, double_int_minus_one); 3769 aff_combination_add (&aff_e1, &aff_e2); 3770 3771 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 3772 } 3773 3774 /* Returns true if AFF1 and AFF2 are identical. */ 3775 3776 static bool 3777 compare_aff_trees (aff_tree *aff1, aff_tree *aff2) 3778 { 3779 unsigned i; 3780 3781 if (aff1->n != aff2->n) 3782 return false; 3783 3784 for (i = 0; i < aff1->n; i++) 3785 { 3786 if (aff1->elts[i].coef != aff2->elts[i].coef) 3787 return false; 3788 3789 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0)) 3790 return false; 3791 } 3792 return true; 3793 } 3794 3795 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */ 3796 3797 static int 3798 get_expr_id (struct ivopts_data *data, tree expr) 3799 { 3800 struct iv_inv_expr_ent ent; 3801 struct iv_inv_expr_ent **slot; 3802 3803 ent.expr = expr; 3804 ent.hash = iterative_hash_expr (expr, 0); 3805 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab, 3806 &ent, INSERT); 3807 if (*slot) 3808 return (*slot)->id; 3809 3810 *slot = XNEW (struct iv_inv_expr_ent); 3811 (*slot)->expr = expr; 3812 (*slot)->hash = ent.hash; 3813 (*slot)->id = data->inv_expr_id++; 3814 return (*slot)->id; 3815 } 3816 3817 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE 3818 requires a new compiler generated temporary. Returns -1 otherwise. 3819 ADDRESS_P is a flag indicating if the expression is for address 3820 computation. */ 3821 3822 static int 3823 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase, 3824 tree cbase, HOST_WIDE_INT ratio, 3825 bool address_p) 3826 { 3827 aff_tree ubase_aff, cbase_aff; 3828 tree expr, ub, cb; 3829 3830 STRIP_NOPS (ubase); 3831 STRIP_NOPS (cbase); 3832 ub = ubase; 3833 cb = cbase; 3834 3835 if ((TREE_CODE (ubase) == INTEGER_CST) 3836 && (TREE_CODE (cbase) == INTEGER_CST)) 3837 return -1; 3838 3839 /* Strips the constant part. */ 3840 if (TREE_CODE (ubase) == PLUS_EXPR 3841 || TREE_CODE (ubase) == MINUS_EXPR 3842 || TREE_CODE (ubase) == POINTER_PLUS_EXPR) 3843 { 3844 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST) 3845 ubase = TREE_OPERAND (ubase, 0); 3846 } 3847 3848 /* Strips the constant part. */ 3849 if (TREE_CODE (cbase) == PLUS_EXPR 3850 || TREE_CODE (cbase) == MINUS_EXPR 3851 || TREE_CODE (cbase) == POINTER_PLUS_EXPR) 3852 { 3853 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST) 3854 cbase = TREE_OPERAND (cbase, 0); 3855 } 3856 3857 if (address_p) 3858 { 3859 if (((TREE_CODE (ubase) == SSA_NAME) 3860 || (TREE_CODE (ubase) == ADDR_EXPR 3861 && is_gimple_min_invariant (ubase))) 3862 && (TREE_CODE (cbase) == INTEGER_CST)) 3863 return -1; 3864 3865 if (((TREE_CODE (cbase) == SSA_NAME) 3866 || (TREE_CODE (cbase) == ADDR_EXPR 3867 && is_gimple_min_invariant (cbase))) 3868 && (TREE_CODE (ubase) == INTEGER_CST)) 3869 return -1; 3870 } 3871 3872 if (ratio == 1) 3873 { 3874 if(operand_equal_p (ubase, cbase, 0)) 3875 return -1; 3876 3877 if (TREE_CODE (ubase) == ADDR_EXPR 3878 && TREE_CODE (cbase) == ADDR_EXPR) 3879 { 3880 tree usym, csym; 3881 3882 usym = TREE_OPERAND (ubase, 0); 3883 csym = TREE_OPERAND (cbase, 0); 3884 if (TREE_CODE (usym) == ARRAY_REF) 3885 { 3886 tree ind = TREE_OPERAND (usym, 1); 3887 if (TREE_CODE (ind) == INTEGER_CST 3888 && host_integerp (ind, 0) 3889 && TREE_INT_CST_LOW (ind) == 0) 3890 usym = TREE_OPERAND (usym, 0); 3891 } 3892 if (TREE_CODE (csym) == ARRAY_REF) 3893 { 3894 tree ind = TREE_OPERAND (csym, 1); 3895 if (TREE_CODE (ind) == INTEGER_CST 3896 && host_integerp (ind, 0) 3897 && TREE_INT_CST_LOW (ind) == 0) 3898 csym = TREE_OPERAND (csym, 0); 3899 } 3900 if (operand_equal_p (usym, csym, 0)) 3901 return -1; 3902 } 3903 /* Now do more complex comparison */ 3904 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff); 3905 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff); 3906 if (compare_aff_trees (&ubase_aff, &cbase_aff)) 3907 return -1; 3908 } 3909 3910 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff); 3911 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff); 3912 3913 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio)); 3914 aff_combination_add (&ubase_aff, &cbase_aff); 3915 expr = aff_combination_to_tree (&ubase_aff); 3916 return get_expr_id (data, expr); 3917 } 3918 3919 3920 3921 /* Determines the cost of the computation by that USE is expressed 3922 from induction variable CAND. If ADDRESS_P is true, we just need 3923 to create an address from it, otherwise we want to get it into 3924 register. A set of invariants we depend on is stored in 3925 DEPENDS_ON. AT is the statement at that the value is computed. 3926 If CAN_AUTOINC is nonnull, use it to record whether autoinc 3927 addressing is likely. */ 3928 3929 static comp_cost 3930 get_computation_cost_at (struct ivopts_data *data, 3931 struct iv_use *use, struct iv_cand *cand, 3932 bool address_p, bitmap *depends_on, gimple at, 3933 bool *can_autoinc, 3934 int *inv_expr_id) 3935 { 3936 tree ubase = use->iv->base, ustep = use->iv->step; 3937 tree cbase, cstep; 3938 tree utype = TREE_TYPE (ubase), ctype; 3939 unsigned HOST_WIDE_INT cstepi, offset = 0; 3940 HOST_WIDE_INT ratio, aratio; 3941 bool var_present, symbol_present, stmt_is_after_inc; 3942 comp_cost cost; 3943 double_int rat; 3944 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 3945 enum machine_mode mem_mode = (address_p 3946 ? TYPE_MODE (TREE_TYPE (*use->op_p)) 3947 : VOIDmode); 3948 3949 *depends_on = NULL; 3950 3951 /* Only consider real candidates. */ 3952 if (!cand->iv) 3953 return infinite_cost; 3954 3955 cbase = cand->iv->base; 3956 cstep = cand->iv->step; 3957 ctype = TREE_TYPE (cbase); 3958 3959 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3960 { 3961 /* We do not have a precision to express the values of use. */ 3962 return infinite_cost; 3963 } 3964 3965 if (address_p 3966 || (use->iv->base_object 3967 && cand->iv->base_object 3968 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) 3969 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object)))) 3970 { 3971 /* Do not try to express address of an object with computation based 3972 on address of a different object. This may cause problems in rtl 3973 level alias analysis (that does not expect this to be happening, 3974 as this is illegal in C), and would be unlikely to be useful 3975 anyway. */ 3976 if (use->iv->base_object 3977 && cand->iv->base_object 3978 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 3979 return infinite_cost; 3980 } 3981 3982 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3983 { 3984 /* TODO -- add direct handling of this case. */ 3985 goto fallback; 3986 } 3987 3988 /* CSTEPI is removed from the offset in case statement is after the 3989 increment. If the step is not constant, we use zero instead. 3990 This is a bit imprecise (there is the extra addition), but 3991 redundancy elimination is likely to transform the code so that 3992 it uses value of the variable before increment anyway, 3993 so it is not that much unrealistic. */ 3994 if (cst_and_fits_in_hwi (cstep)) 3995 cstepi = int_cst_value (cstep); 3996 else 3997 cstepi = 0; 3998 3999 if (!constant_multiple_of (ustep, cstep, &rat)) 4000 return infinite_cost; 4001 4002 if (rat.fits_shwi ()) 4003 ratio = rat.to_shwi (); 4004 else 4005 return infinite_cost; 4006 4007 STRIP_NOPS (cbase); 4008 ctype = TREE_TYPE (cbase); 4009 4010 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at); 4011 4012 /* use = ubase + ratio * (var - cbase). If either cbase is a constant 4013 or ratio == 1, it is better to handle this like 4014 4015 ubase - ratio * cbase + ratio * var 4016 4017 (also holds in the case ratio == -1, TODO. */ 4018 4019 if (cst_and_fits_in_hwi (cbase)) 4020 { 4021 offset = - ratio * int_cst_value (cbase); 4022 cost = difference_cost (data, 4023 ubase, build_int_cst (utype, 0), 4024 &symbol_present, &var_present, &offset, 4025 depends_on); 4026 cost.cost /= avg_loop_niter (data->current_loop); 4027 } 4028 else if (ratio == 1) 4029 { 4030 tree real_cbase = cbase; 4031 4032 /* Check to see if any adjustment is needed. */ 4033 if (cstepi == 0 && stmt_is_after_inc) 4034 { 4035 aff_tree real_cbase_aff; 4036 aff_tree cstep_aff; 4037 4038 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase), 4039 &real_cbase_aff); 4040 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff); 4041 4042 aff_combination_add (&real_cbase_aff, &cstep_aff); 4043 real_cbase = aff_combination_to_tree (&real_cbase_aff); 4044 } 4045 4046 cost = difference_cost (data, 4047 ubase, real_cbase, 4048 &symbol_present, &var_present, &offset, 4049 depends_on); 4050 cost.cost /= avg_loop_niter (data->current_loop); 4051 } 4052 else if (address_p 4053 && !POINTER_TYPE_P (ctype) 4054 && multiplier_allowed_in_address_p 4055 (ratio, mem_mode, 4056 TYPE_ADDR_SPACE (TREE_TYPE (utype)))) 4057 { 4058 cbase 4059 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio)); 4060 cost = difference_cost (data, 4061 ubase, cbase, 4062 &symbol_present, &var_present, &offset, 4063 depends_on); 4064 cost.cost /= avg_loop_niter (data->current_loop); 4065 } 4066 else 4067 { 4068 cost = force_var_cost (data, cbase, depends_on); 4069 cost = add_costs (cost, 4070 difference_cost (data, 4071 ubase, build_int_cst (utype, 0), 4072 &symbol_present, &var_present, 4073 &offset, depends_on)); 4074 cost.cost /= avg_loop_niter (data->current_loop); 4075 cost.cost += add_cost (data->speed, TYPE_MODE (ctype)); 4076 } 4077 4078 if (inv_expr_id) 4079 { 4080 *inv_expr_id = 4081 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p); 4082 /* Clear depends on. */ 4083 if (*inv_expr_id != -1 && depends_on && *depends_on) 4084 bitmap_clear (*depends_on); 4085 } 4086 4087 /* If we are after the increment, the value of the candidate is higher by 4088 one iteration. */ 4089 if (stmt_is_after_inc) 4090 offset -= ratio * cstepi; 4091 4092 /* Now the computation is in shape symbol + var1 + const + ratio * var2. 4093 (symbol/var1/const parts may be omitted). If we are looking for an 4094 address, find the cost of addressing this. */ 4095 if (address_p) 4096 return add_costs (cost, 4097 get_address_cost (symbol_present, var_present, 4098 offset, ratio, cstepi, 4099 mem_mode, 4100 TYPE_ADDR_SPACE (TREE_TYPE (utype)), 4101 speed, stmt_is_after_inc, 4102 can_autoinc)); 4103 4104 /* Otherwise estimate the costs for computing the expression. */ 4105 if (!symbol_present && !var_present && !offset) 4106 { 4107 if (ratio != 1) 4108 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed); 4109 return cost; 4110 } 4111 4112 /* Symbol + offset should be compile-time computable so consider that they 4113 are added once to the variable, if present. */ 4114 if (var_present && (symbol_present || offset)) 4115 cost.cost += adjust_setup_cost (data, 4116 add_cost (speed, TYPE_MODE (ctype))); 4117 4118 /* Having offset does not affect runtime cost in case it is added to 4119 symbol, but it increases complexity. */ 4120 if (offset) 4121 cost.complexity++; 4122 4123 cost.cost += add_cost (speed, TYPE_MODE (ctype)); 4124 4125 aratio = ratio > 0 ? ratio : -ratio; 4126 if (aratio != 1) 4127 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed); 4128 return cost; 4129 4130 fallback: 4131 if (can_autoinc) 4132 *can_autoinc = false; 4133 4134 { 4135 /* Just get the expression, expand it and measure the cost. */ 4136 tree comp = get_computation_at (data->current_loop, use, cand, at); 4137 4138 if (!comp) 4139 return infinite_cost; 4140 4141 if (address_p) 4142 comp = build_simple_mem_ref (comp); 4143 4144 return new_cost (computation_cost (comp, speed), 0); 4145 } 4146 } 4147 4148 /* Determines the cost of the computation by that USE is expressed 4149 from induction variable CAND. If ADDRESS_P is true, we just need 4150 to create an address from it, otherwise we want to get it into 4151 register. A set of invariants we depend on is stored in 4152 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether 4153 autoinc addressing is likely. */ 4154 4155 static comp_cost 4156 get_computation_cost (struct ivopts_data *data, 4157 struct iv_use *use, struct iv_cand *cand, 4158 bool address_p, bitmap *depends_on, 4159 bool *can_autoinc, int *inv_expr_id) 4160 { 4161 return get_computation_cost_at (data, 4162 use, cand, address_p, depends_on, use->stmt, 4163 can_autoinc, inv_expr_id); 4164 } 4165 4166 /* Determines cost of basing replacement of USE on CAND in a generic 4167 expression. */ 4168 4169 static bool 4170 determine_use_iv_cost_generic (struct ivopts_data *data, 4171 struct iv_use *use, struct iv_cand *cand) 4172 { 4173 bitmap depends_on; 4174 comp_cost cost; 4175 int inv_expr_id = -1; 4176 4177 /* The simple case first -- if we need to express value of the preserved 4178 original biv, the cost is 0. This also prevents us from counting the 4179 cost of increment twice -- once at this use and once in the cost of 4180 the candidate. */ 4181 if (cand->pos == IP_ORIGINAL 4182 && cand->incremented_at == use->stmt) 4183 { 4184 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE, 4185 ERROR_MARK, -1); 4186 return true; 4187 } 4188 4189 cost = get_computation_cost (data, use, cand, false, &depends_on, 4190 NULL, &inv_expr_id); 4191 4192 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK, 4193 inv_expr_id); 4194 4195 return !infinite_cost_p (cost); 4196 } 4197 4198 /* Determines cost of basing replacement of USE on CAND in an address. */ 4199 4200 static bool 4201 determine_use_iv_cost_address (struct ivopts_data *data, 4202 struct iv_use *use, struct iv_cand *cand) 4203 { 4204 bitmap depends_on; 4205 bool can_autoinc; 4206 int inv_expr_id = -1; 4207 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on, 4208 &can_autoinc, &inv_expr_id); 4209 4210 if (cand->ainc_use == use) 4211 { 4212 if (can_autoinc) 4213 cost.cost -= cand->cost_step; 4214 /* If we generated the candidate solely for exploiting autoincrement 4215 opportunities, and it turns out it can't be used, set the cost to 4216 infinity to make sure we ignore it. */ 4217 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) 4218 cost = infinite_cost; 4219 } 4220 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK, 4221 inv_expr_id); 4222 4223 return !infinite_cost_p (cost); 4224 } 4225 4226 /* Computes value of candidate CAND at position AT in iteration NITER, and 4227 stores it to VAL. */ 4228 4229 static void 4230 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter, 4231 aff_tree *val) 4232 { 4233 aff_tree step, delta, nit; 4234 struct iv *iv = cand->iv; 4235 tree type = TREE_TYPE (iv->base); 4236 tree steptype = type; 4237 if (POINTER_TYPE_P (type)) 4238 steptype = sizetype; 4239 steptype = unsigned_type_for (type); 4240 4241 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step); 4242 aff_combination_convert (&step, steptype); 4243 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit); 4244 aff_combination_convert (&nit, steptype); 4245 aff_combination_mult (&nit, &step, &delta); 4246 if (stmt_after_increment (loop, cand, at)) 4247 aff_combination_add (&delta, &step); 4248 4249 tree_to_aff_combination (iv->base, type, val); 4250 if (!POINTER_TYPE_P (type)) 4251 aff_combination_convert (val, steptype); 4252 aff_combination_add (val, &delta); 4253 } 4254 4255 /* Returns period of induction variable iv. */ 4256 4257 static tree 4258 iv_period (struct iv *iv) 4259 { 4260 tree step = iv->step, period, type; 4261 tree pow2div; 4262 4263 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 4264 4265 type = unsigned_type_for (TREE_TYPE (step)); 4266 /* Period of the iv is lcm (step, type_range)/step -1, 4267 i.e., N*type_range/step - 1. Since type range is power 4268 of two, N == (step >> num_of_ending_zeros_binary (step), 4269 so the final result is 4270 4271 (type_range >> num_of_ending_zeros_binary (step)) - 1 4272 4273 */ 4274 pow2div = num_ending_zeros (step); 4275 4276 period = build_low_bits_mask (type, 4277 (TYPE_PRECISION (type) 4278 - tree_low_cst (pow2div, 1))); 4279 4280 return period; 4281 } 4282 4283 /* Returns the comparison operator used when eliminating the iv USE. */ 4284 4285 static enum tree_code 4286 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 4287 { 4288 struct loop *loop = data->current_loop; 4289 basic_block ex_bb; 4290 edge exit; 4291 4292 ex_bb = gimple_bb (use->stmt); 4293 exit = EDGE_SUCC (ex_bb, 0); 4294 if (flow_bb_inside_loop_p (loop, exit->dest)) 4295 exit = EDGE_SUCC (ex_bb, 1); 4296 4297 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 4298 } 4299 4300 static tree 4301 strip_wrap_conserving_type_conversions (tree exp) 4302 { 4303 while (tree_ssa_useless_type_conversion (exp) 4304 && (nowrap_type_p (TREE_TYPE (exp)) 4305 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0))))) 4306 exp = TREE_OPERAND (exp, 0); 4307 return exp; 4308 } 4309 4310 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we 4311 check for an exact match. */ 4312 4313 static bool 4314 expr_equal_p (tree e, tree what) 4315 { 4316 gimple stmt; 4317 enum tree_code code; 4318 4319 e = strip_wrap_conserving_type_conversions (e); 4320 what = strip_wrap_conserving_type_conversions (what); 4321 4322 code = TREE_CODE (what); 4323 if (TREE_TYPE (e) != TREE_TYPE (what)) 4324 return false; 4325 4326 if (operand_equal_p (e, what, 0)) 4327 return true; 4328 4329 if (TREE_CODE (e) != SSA_NAME) 4330 return false; 4331 4332 stmt = SSA_NAME_DEF_STMT (e); 4333 if (gimple_code (stmt) != GIMPLE_ASSIGN 4334 || gimple_assign_rhs_code (stmt) != code) 4335 return false; 4336 4337 switch (get_gimple_rhs_class (code)) 4338 { 4339 case GIMPLE_BINARY_RHS: 4340 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1))) 4341 return false; 4342 /* Fallthru. */ 4343 4344 case GIMPLE_UNARY_RHS: 4345 case GIMPLE_SINGLE_RHS: 4346 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0)); 4347 default: 4348 return false; 4349 } 4350 } 4351 4352 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now, 4353 we only detect the situation that BASE = SOMETHING + OFFSET, where the 4354 calculation is performed in non-wrapping type. 4355 4356 TODO: More generally, we could test for the situation that 4357 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero. 4358 This would require knowing the sign of OFFSET. 4359 4360 Also, we only look for the first addition in the computation of BASE. 4361 More complex analysis would be better, but introducing it just for 4362 this optimization seems like an overkill. */ 4363 4364 static bool 4365 difference_cannot_overflow_p (tree base, tree offset) 4366 { 4367 enum tree_code code; 4368 tree e1, e2; 4369 4370 if (!nowrap_type_p (TREE_TYPE (base))) 4371 return false; 4372 4373 base = expand_simple_operations (base); 4374 4375 if (TREE_CODE (base) == SSA_NAME) 4376 { 4377 gimple stmt = SSA_NAME_DEF_STMT (base); 4378 4379 if (gimple_code (stmt) != GIMPLE_ASSIGN) 4380 return false; 4381 4382 code = gimple_assign_rhs_code (stmt); 4383 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4384 return false; 4385 4386 e1 = gimple_assign_rhs1 (stmt); 4387 e2 = gimple_assign_rhs2 (stmt); 4388 } 4389 else 4390 { 4391 code = TREE_CODE (base); 4392 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4393 return false; 4394 e1 = TREE_OPERAND (base, 0); 4395 e2 = TREE_OPERAND (base, 1); 4396 } 4397 4398 /* TODO: deeper inspection may be necessary to prove the equality. */ 4399 switch (code) 4400 { 4401 case PLUS_EXPR: 4402 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset); 4403 case POINTER_PLUS_EXPR: 4404 return expr_equal_p (e2, offset); 4405 4406 default: 4407 return false; 4408 } 4409 } 4410 4411 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR 4412 comparison with CAND. NITER describes the number of iterations of 4413 the loops. If successful, the comparison in COMP_P is altered accordingly. 4414 4415 We aim to handle the following situation: 4416 4417 sometype *base, *p; 4418 int a, b, i; 4419 4420 i = a; 4421 p = p_0 = base + a; 4422 4423 do 4424 { 4425 bla (*p); 4426 p++; 4427 i++; 4428 } 4429 while (i < b); 4430 4431 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1. 4432 We aim to optimize this to 4433 4434 p = p_0 = base + a; 4435 do 4436 { 4437 bla (*p); 4438 p++; 4439 } 4440 while (p < p_0 - a + b); 4441 4442 This preserves the correctness, since the pointer arithmetics does not 4443 overflow. More precisely: 4444 4445 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no 4446 overflow in computing it or the values of p. 4447 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not 4448 overflow. To prove this, we use the fact that p_0 = base + a. */ 4449 4450 static bool 4451 iv_elimination_compare_lt (struct ivopts_data *data, 4452 struct iv_cand *cand, enum tree_code *comp_p, 4453 struct tree_niter_desc *niter) 4454 { 4455 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; 4456 struct affine_tree_combination nit, tmpa, tmpb; 4457 enum tree_code comp; 4458 HOST_WIDE_INT step; 4459 4460 /* We need to know that the candidate induction variable does not overflow. 4461 While more complex analysis may be used to prove this, for now just 4462 check that the variable appears in the original program and that it 4463 is computed in a type that guarantees no overflows. */ 4464 cand_type = TREE_TYPE (cand->iv->base); 4465 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) 4466 return false; 4467 4468 /* Make sure that the loop iterates till the loop bound is hit, as otherwise 4469 the calculation of the BOUND could overflow, making the comparison 4470 invalid. */ 4471 if (!data->loop_single_exit_p) 4472 return false; 4473 4474 /* We need to be able to decide whether candidate is increasing or decreasing 4475 in order to choose the right comparison operator. */ 4476 if (!cst_and_fits_in_hwi (cand->iv->step)) 4477 return false; 4478 step = int_cst_value (cand->iv->step); 4479 4480 /* Check that the number of iterations matches the expected pattern: 4481 a + 1 > b ? 0 : b - a - 1. */ 4482 mbz = niter->may_be_zero; 4483 if (TREE_CODE (mbz) == GT_EXPR) 4484 { 4485 /* Handle a + 1 > b. */ 4486 tree op0 = TREE_OPERAND (mbz, 0); 4487 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1))) 4488 { 4489 a = TREE_OPERAND (op0, 0); 4490 b = TREE_OPERAND (mbz, 1); 4491 } 4492 else 4493 return false; 4494 } 4495 else if (TREE_CODE (mbz) == LT_EXPR) 4496 { 4497 tree op1 = TREE_OPERAND (mbz, 1); 4498 4499 /* Handle b < a + 1. */ 4500 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1))) 4501 { 4502 a = TREE_OPERAND (op1, 0); 4503 b = TREE_OPERAND (mbz, 0); 4504 } 4505 else 4506 return false; 4507 } 4508 else 4509 return false; 4510 4511 /* Expected number of iterations is B - A - 1. Check that it matches 4512 the actual number, i.e., that B - A - NITER = 1. */ 4513 tree_to_aff_combination (niter->niter, nit_type, &nit); 4514 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa); 4515 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb); 4516 aff_combination_scale (&nit, double_int_minus_one); 4517 aff_combination_scale (&tmpa, double_int_minus_one); 4518 aff_combination_add (&tmpb, &tmpa); 4519 aff_combination_add (&tmpb, &nit); 4520 if (tmpb.n != 0 || tmpb.offset != double_int_one) 4521 return false; 4522 4523 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not 4524 overflow. */ 4525 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), 4526 cand->iv->step, 4527 fold_convert (TREE_TYPE (cand->iv->step), a)); 4528 if (!difference_cannot_overflow_p (cand->iv->base, offset)) 4529 return false; 4530 4531 /* Determine the new comparison operator. */ 4532 comp = step < 0 ? GT_EXPR : LT_EXPR; 4533 if (*comp_p == NE_EXPR) 4534 *comp_p = comp; 4535 else if (*comp_p == EQ_EXPR) 4536 *comp_p = invert_tree_comparison (comp, false); 4537 else 4538 gcc_unreachable (); 4539 4540 return true; 4541 } 4542 4543 /* Check whether it is possible to express the condition in USE by comparison 4544 of candidate CAND. If so, store the value compared with to BOUND, and the 4545 comparison operator to COMP. */ 4546 4547 static bool 4548 may_eliminate_iv (struct ivopts_data *data, 4549 struct iv_use *use, struct iv_cand *cand, tree *bound, 4550 enum tree_code *comp) 4551 { 4552 basic_block ex_bb; 4553 edge exit; 4554 tree period; 4555 struct loop *loop = data->current_loop; 4556 aff_tree bnd; 4557 struct tree_niter_desc *desc = NULL; 4558 4559 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 4560 return false; 4561 4562 /* For now works only for exits that dominate the loop latch. 4563 TODO: extend to other conditions inside loop body. */ 4564 ex_bb = gimple_bb (use->stmt); 4565 if (use->stmt != last_stmt (ex_bb) 4566 || gimple_code (use->stmt) != GIMPLE_COND 4567 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 4568 return false; 4569 4570 exit = EDGE_SUCC (ex_bb, 0); 4571 if (flow_bb_inside_loop_p (loop, exit->dest)) 4572 exit = EDGE_SUCC (ex_bb, 1); 4573 if (flow_bb_inside_loop_p (loop, exit->dest)) 4574 return false; 4575 4576 desc = niter_for_exit (data, exit); 4577 if (!desc) 4578 return false; 4579 4580 /* Determine whether we can use the variable to test the exit condition. 4581 This is the case iff the period of the induction variable is greater 4582 than the number of iterations for which the exit condition is true. */ 4583 period = iv_period (cand->iv); 4584 4585 /* If the number of iterations is constant, compare against it directly. */ 4586 if (TREE_CODE (desc->niter) == INTEGER_CST) 4587 { 4588 /* See cand_value_at. */ 4589 if (stmt_after_increment (loop, cand, use->stmt)) 4590 { 4591 if (!tree_int_cst_lt (desc->niter, period)) 4592 return false; 4593 } 4594 else 4595 { 4596 if (tree_int_cst_lt (period, desc->niter)) 4597 return false; 4598 } 4599 } 4600 4601 /* If not, and if this is the only possible exit of the loop, see whether 4602 we can get a conservative estimate on the number of iterations of the 4603 entire loop and compare against that instead. */ 4604 else 4605 { 4606 double_int period_value, max_niter; 4607 4608 max_niter = desc->max; 4609 if (stmt_after_increment (loop, cand, use->stmt)) 4610 max_niter += double_int_one; 4611 period_value = tree_to_double_int (period); 4612 if (max_niter.ugt (period_value)) 4613 { 4614 /* See if we can take advantage of inferred loop bound information. */ 4615 if (data->loop_single_exit_p) 4616 { 4617 if (!max_loop_iterations (loop, &max_niter)) 4618 return false; 4619 /* The loop bound is already adjusted by adding 1. */ 4620 if (max_niter.ugt (period_value)) 4621 return false; 4622 } 4623 else 4624 return false; 4625 } 4626 } 4627 4628 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); 4629 4630 *bound = fold_convert (TREE_TYPE (cand->iv->base), 4631 aff_combination_to_tree (&bnd)); 4632 *comp = iv_elimination_compare (data, use); 4633 4634 /* It is unlikely that computing the number of iterations using division 4635 would be more profitable than keeping the original induction variable. */ 4636 if (expression_expensive_p (*bound)) 4637 return false; 4638 4639 /* Sometimes, it is possible to handle the situation that the number of 4640 iterations may be zero unless additional assumtions by using < 4641 instead of != in the exit condition. 4642 4643 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and 4644 base the exit condition on it. However, that is often too 4645 expensive. */ 4646 if (!integer_zerop (desc->may_be_zero)) 4647 return iv_elimination_compare_lt (data, cand, comp, desc); 4648 4649 return true; 4650 } 4651 4652 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must 4653 be copied, if is is used in the loop body and DATA->body_includes_call. */ 4654 4655 static int 4656 parm_decl_cost (struct ivopts_data *data, tree bound) 4657 { 4658 tree sbound = bound; 4659 STRIP_NOPS (sbound); 4660 4661 if (TREE_CODE (sbound) == SSA_NAME 4662 && SSA_NAME_IS_DEFAULT_DEF (sbound) 4663 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL 4664 && data->body_includes_call) 4665 return COSTS_N_INSNS (1); 4666 4667 return 0; 4668 } 4669 4670 /* Determines cost of basing replacement of USE on CAND in a condition. */ 4671 4672 static bool 4673 determine_use_iv_cost_condition (struct ivopts_data *data, 4674 struct iv_use *use, struct iv_cand *cand) 4675 { 4676 tree bound = NULL_TREE; 4677 struct iv *cmp_iv; 4678 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on; 4679 comp_cost elim_cost, express_cost, cost, bound_cost; 4680 bool ok; 4681 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id; 4682 tree *control_var, *bound_cst; 4683 enum tree_code comp = ERROR_MARK; 4684 4685 /* Only consider real candidates. */ 4686 if (!cand->iv) 4687 { 4688 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, 4689 ERROR_MARK, -1); 4690 return false; 4691 } 4692 4693 /* Try iv elimination. */ 4694 if (may_eliminate_iv (data, use, cand, &bound, &comp)) 4695 { 4696 elim_cost = force_var_cost (data, bound, &depends_on_elim); 4697 if (elim_cost.cost == 0) 4698 elim_cost.cost = parm_decl_cost (data, bound); 4699 else if (TREE_CODE (bound) == INTEGER_CST) 4700 elim_cost.cost = 0; 4701 /* If we replace a loop condition 'i < n' with 'p < base + n', 4702 depends_on_elim will have 'base' and 'n' set, which implies 4703 that both 'base' and 'n' will be live during the loop. More likely, 4704 'base + n' will be loop invariant, resulting in only one live value 4705 during the loop. So in that case we clear depends_on_elim and set 4706 elim_inv_expr_id instead. */ 4707 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1) 4708 { 4709 elim_inv_expr_id = get_expr_id (data, bound); 4710 bitmap_clear (depends_on_elim); 4711 } 4712 /* The bound is a loop invariant, so it will be only computed 4713 once. */ 4714 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); 4715 } 4716 else 4717 elim_cost = infinite_cost; 4718 4719 /* Try expressing the original giv. If it is compared with an invariant, 4720 note that we cannot get rid of it. */ 4721 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst, 4722 NULL, &cmp_iv); 4723 gcc_assert (ok); 4724 4725 /* When the condition is a comparison of the candidate IV against 4726 zero, prefer this IV. 4727 4728 TODO: The constant that we're subtracting from the cost should 4729 be target-dependent. This information should be added to the 4730 target costs for each backend. */ 4731 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */ 4732 && integer_zerop (*bound_cst) 4733 && (operand_equal_p (*control_var, cand->var_after, 0) 4734 || operand_equal_p (*control_var, cand->var_before, 0))) 4735 elim_cost.cost -= 1; 4736 4737 express_cost = get_computation_cost (data, use, cand, false, 4738 &depends_on_express, NULL, 4739 &express_inv_expr_id); 4740 fd_ivopts_data = data; 4741 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL); 4742 4743 /* Count the cost of the original bound as well. */ 4744 bound_cost = force_var_cost (data, *bound_cst, NULL); 4745 if (bound_cost.cost == 0) 4746 bound_cost.cost = parm_decl_cost (data, *bound_cst); 4747 else if (TREE_CODE (*bound_cst) == INTEGER_CST) 4748 bound_cost.cost = 0; 4749 express_cost.cost += bound_cost.cost; 4750 4751 /* Choose the better approach, preferring the eliminated IV. */ 4752 if (compare_costs (elim_cost, express_cost) <= 0) 4753 { 4754 cost = elim_cost; 4755 depends_on = depends_on_elim; 4756 depends_on_elim = NULL; 4757 inv_expr_id = elim_inv_expr_id; 4758 } 4759 else 4760 { 4761 cost = express_cost; 4762 depends_on = depends_on_express; 4763 depends_on_express = NULL; 4764 bound = NULL_TREE; 4765 comp = ERROR_MARK; 4766 inv_expr_id = express_inv_expr_id; 4767 } 4768 4769 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id); 4770 4771 if (depends_on_elim) 4772 BITMAP_FREE (depends_on_elim); 4773 if (depends_on_express) 4774 BITMAP_FREE (depends_on_express); 4775 4776 return !infinite_cost_p (cost); 4777 } 4778 4779 /* Determines cost of basing replacement of USE on CAND. Returns false 4780 if USE cannot be based on CAND. */ 4781 4782 static bool 4783 determine_use_iv_cost (struct ivopts_data *data, 4784 struct iv_use *use, struct iv_cand *cand) 4785 { 4786 switch (use->type) 4787 { 4788 case USE_NONLINEAR_EXPR: 4789 return determine_use_iv_cost_generic (data, use, cand); 4790 4791 case USE_ADDRESS: 4792 return determine_use_iv_cost_address (data, use, cand); 4793 4794 case USE_COMPARE: 4795 return determine_use_iv_cost_condition (data, use, cand); 4796 4797 default: 4798 gcc_unreachable (); 4799 } 4800 } 4801 4802 /* Return true if get_computation_cost indicates that autoincrement is 4803 a possibility for the pair of USE and CAND, false otherwise. */ 4804 4805 static bool 4806 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, 4807 struct iv_cand *cand) 4808 { 4809 bitmap depends_on; 4810 bool can_autoinc; 4811 comp_cost cost; 4812 4813 if (use->type != USE_ADDRESS) 4814 return false; 4815 4816 cost = get_computation_cost (data, use, cand, true, &depends_on, 4817 &can_autoinc, NULL); 4818 4819 BITMAP_FREE (depends_on); 4820 4821 return !infinite_cost_p (cost) && can_autoinc; 4822 } 4823 4824 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a 4825 use that allows autoincrement, and set their AINC_USE if possible. */ 4826 4827 static void 4828 set_autoinc_for_original_candidates (struct ivopts_data *data) 4829 { 4830 unsigned i, j; 4831 4832 for (i = 0; i < n_iv_cands (data); i++) 4833 { 4834 struct iv_cand *cand = iv_cand (data, i); 4835 struct iv_use *closest = NULL; 4836 if (cand->pos != IP_ORIGINAL) 4837 continue; 4838 for (j = 0; j < n_iv_uses (data); j++) 4839 { 4840 struct iv_use *use = iv_use (data, j); 4841 unsigned uid = gimple_uid (use->stmt); 4842 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at) 4843 || uid > gimple_uid (cand->incremented_at)) 4844 continue; 4845 if (closest == NULL || uid > gimple_uid (closest->stmt)) 4846 closest = use; 4847 } 4848 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand)) 4849 continue; 4850 cand->ainc_use = closest; 4851 } 4852 } 4853 4854 /* Finds the candidates for the induction variables. */ 4855 4856 static void 4857 find_iv_candidates (struct ivopts_data *data) 4858 { 4859 /* Add commonly used ivs. */ 4860 add_standard_iv_candidates (data); 4861 4862 /* Add old induction variables. */ 4863 add_old_ivs_candidates (data); 4864 4865 /* Add induction variables derived from uses. */ 4866 add_derived_ivs_candidates (data); 4867 4868 set_autoinc_for_original_candidates (data); 4869 4870 /* Record the important candidates. */ 4871 record_important_candidates (data); 4872 } 4873 4874 /* Determines costs of basing the use of the iv on an iv candidate. */ 4875 4876 static void 4877 determine_use_iv_costs (struct ivopts_data *data) 4878 { 4879 unsigned i, j; 4880 struct iv_use *use; 4881 struct iv_cand *cand; 4882 bitmap to_clear = BITMAP_ALLOC (NULL); 4883 4884 alloc_use_cost_map (data); 4885 4886 for (i = 0; i < n_iv_uses (data); i++) 4887 { 4888 use = iv_use (data, i); 4889 4890 if (data->consider_all_candidates) 4891 { 4892 for (j = 0; j < n_iv_cands (data); j++) 4893 { 4894 cand = iv_cand (data, j); 4895 determine_use_iv_cost (data, use, cand); 4896 } 4897 } 4898 else 4899 { 4900 bitmap_iterator bi; 4901 4902 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi) 4903 { 4904 cand = iv_cand (data, j); 4905 if (!determine_use_iv_cost (data, use, cand)) 4906 bitmap_set_bit (to_clear, j); 4907 } 4908 4909 /* Remove the candidates for that the cost is infinite from 4910 the list of related candidates. */ 4911 bitmap_and_compl_into (use->related_cands, to_clear); 4912 bitmap_clear (to_clear); 4913 } 4914 } 4915 4916 BITMAP_FREE (to_clear); 4917 4918 if (dump_file && (dump_flags & TDF_DETAILS)) 4919 { 4920 fprintf (dump_file, "Use-candidate costs:\n"); 4921 4922 for (i = 0; i < n_iv_uses (data); i++) 4923 { 4924 use = iv_use (data, i); 4925 4926 fprintf (dump_file, "Use %d:\n", i); 4927 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n"); 4928 for (j = 0; j < use->n_map_members; j++) 4929 { 4930 if (!use->cost_map[j].cand 4931 || infinite_cost_p (use->cost_map[j].cost)) 4932 continue; 4933 4934 fprintf (dump_file, " %d\t%d\t%d\t", 4935 use->cost_map[j].cand->id, 4936 use->cost_map[j].cost.cost, 4937 use->cost_map[j].cost.complexity); 4938 if (use->cost_map[j].depends_on) 4939 bitmap_print (dump_file, 4940 use->cost_map[j].depends_on, "",""); 4941 if (use->cost_map[j].inv_expr_id != -1) 4942 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id); 4943 fprintf (dump_file, "\n"); 4944 } 4945 4946 fprintf (dump_file, "\n"); 4947 } 4948 fprintf (dump_file, "\n"); 4949 } 4950 } 4951 4952 /* Determines cost of the candidate CAND. */ 4953 4954 static void 4955 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 4956 { 4957 comp_cost cost_base; 4958 unsigned cost, cost_step; 4959 tree base; 4960 4961 if (!cand->iv) 4962 { 4963 cand->cost = 0; 4964 return; 4965 } 4966 4967 /* There are two costs associated with the candidate -- its increment 4968 and its initialization. The second is almost negligible for any loop 4969 that rolls enough, so we take it just very little into account. */ 4970 4971 base = cand->iv->base; 4972 cost_base = force_var_cost (data, base, NULL); 4973 /* It will be exceptional that the iv register happens to be initialized with 4974 the proper value at no cost. In general, there will at least be a regcopy 4975 or a const set. */ 4976 if (cost_base.cost == 0) 4977 cost_base.cost = COSTS_N_INSNS (1); 4978 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base))); 4979 4980 cost = cost_step + adjust_setup_cost (data, cost_base.cost); 4981 4982 /* Prefer the original ivs unless we may gain something by replacing it. 4983 The reason is to make debugging simpler; so this is not relevant for 4984 artificial ivs created by other optimization passes. */ 4985 if (cand->pos != IP_ORIGINAL 4986 || !SSA_NAME_VAR (cand->var_before) 4987 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 4988 cost++; 4989 4990 /* Prefer not to insert statements into latch unless there are some 4991 already (so that we do not create unnecessary jumps). */ 4992 if (cand->pos == IP_END 4993 && empty_block_p (ip_end_pos (data->current_loop))) 4994 cost++; 4995 4996 cand->cost = cost; 4997 cand->cost_step = cost_step; 4998 } 4999 5000 /* Determines costs of computation of the candidates. */ 5001 5002 static void 5003 determine_iv_costs (struct ivopts_data *data) 5004 { 5005 unsigned i; 5006 5007 if (dump_file && (dump_flags & TDF_DETAILS)) 5008 { 5009 fprintf (dump_file, "Candidate costs:\n"); 5010 fprintf (dump_file, " cand\tcost\n"); 5011 } 5012 5013 for (i = 0; i < n_iv_cands (data); i++) 5014 { 5015 struct iv_cand *cand = iv_cand (data, i); 5016 5017 determine_iv_cost (data, cand); 5018 5019 if (dump_file && (dump_flags & TDF_DETAILS)) 5020 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 5021 } 5022 5023 if (dump_file && (dump_flags & TDF_DETAILS)) 5024 fprintf (dump_file, "\n"); 5025 } 5026 5027 /* Calculates cost for having SIZE induction variables. */ 5028 5029 static unsigned 5030 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) 5031 { 5032 /* We add size to the cost, so that we prefer eliminating ivs 5033 if possible. */ 5034 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed, 5035 data->body_includes_call); 5036 } 5037 5038 /* For each size of the induction variable set determine the penalty. */ 5039 5040 static void 5041 determine_set_costs (struct ivopts_data *data) 5042 { 5043 unsigned j, n; 5044 gimple phi; 5045 gimple_stmt_iterator psi; 5046 tree op; 5047 struct loop *loop = data->current_loop; 5048 bitmap_iterator bi; 5049 5050 if (dump_file && (dump_flags & TDF_DETAILS)) 5051 { 5052 fprintf (dump_file, "Global costs:\n"); 5053 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 5054 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); 5055 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); 5056 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); 5057 } 5058 5059 n = 0; 5060 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 5061 { 5062 phi = gsi_stmt (psi); 5063 op = PHI_RESULT (phi); 5064 5065 if (virtual_operand_p (op)) 5066 continue; 5067 5068 if (get_iv (data, op)) 5069 continue; 5070 5071 n++; 5072 } 5073 5074 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 5075 { 5076 struct version_info *info = ver_info (data, j); 5077 5078 if (info->inv_id && info->has_nonlin_use) 5079 n++; 5080 } 5081 5082 data->regs_used = n; 5083 if (dump_file && (dump_flags & TDF_DETAILS)) 5084 fprintf (dump_file, " regs_used %d\n", n); 5085 5086 if (dump_file && (dump_flags & TDF_DETAILS)) 5087 { 5088 fprintf (dump_file, " cost for size:\n"); 5089 fprintf (dump_file, " ivs\tcost\n"); 5090 for (j = 0; j <= 2 * target_avail_regs; j++) 5091 fprintf (dump_file, " %d\t%d\n", j, 5092 ivopts_global_cost_for_size (data, j)); 5093 fprintf (dump_file, "\n"); 5094 } 5095 } 5096 5097 /* Returns true if A is a cheaper cost pair than B. */ 5098 5099 static bool 5100 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) 5101 { 5102 int cmp; 5103 5104 if (!a) 5105 return false; 5106 5107 if (!b) 5108 return true; 5109 5110 cmp = compare_costs (a->cost, b->cost); 5111 if (cmp < 0) 5112 return true; 5113 5114 if (cmp > 0) 5115 return false; 5116 5117 /* In case the costs are the same, prefer the cheaper candidate. */ 5118 if (a->cand->cost < b->cand->cost) 5119 return true; 5120 5121 return false; 5122 } 5123 5124 5125 /* Returns candidate by that USE is expressed in IVS. */ 5126 5127 static struct cost_pair * 5128 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) 5129 { 5130 return ivs->cand_for_use[use->id]; 5131 } 5132 5133 /* Computes the cost field of IVS structure. */ 5134 5135 static void 5136 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) 5137 { 5138 comp_cost cost = ivs->cand_use_cost; 5139 5140 cost.cost += ivs->cand_cost; 5141 5142 cost.cost += ivopts_global_cost_for_size (data, 5143 ivs->n_regs + ivs->num_used_inv_expr); 5144 5145 ivs->cost = cost; 5146 } 5147 5148 /* Remove invariants in set INVS to set IVS. */ 5149 5150 static void 5151 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs) 5152 { 5153 bitmap_iterator bi; 5154 unsigned iid; 5155 5156 if (!invs) 5157 return; 5158 5159 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5160 { 5161 ivs->n_invariant_uses[iid]--; 5162 if (ivs->n_invariant_uses[iid] == 0) 5163 ivs->n_regs--; 5164 } 5165 } 5166 5167 /* Set USE not to be expressed by any candidate in IVS. */ 5168 5169 static void 5170 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, 5171 struct iv_use *use) 5172 { 5173 unsigned uid = use->id, cid; 5174 struct cost_pair *cp; 5175 5176 cp = ivs->cand_for_use[uid]; 5177 if (!cp) 5178 return; 5179 cid = cp->cand->id; 5180 5181 ivs->bad_uses++; 5182 ivs->cand_for_use[uid] = NULL; 5183 ivs->n_cand_uses[cid]--; 5184 5185 if (ivs->n_cand_uses[cid] == 0) 5186 { 5187 bitmap_clear_bit (ivs->cands, cid); 5188 /* Do not count the pseudocandidates. */ 5189 if (cp->cand->iv) 5190 ivs->n_regs--; 5191 ivs->n_cands--; 5192 ivs->cand_cost -= cp->cand->cost; 5193 5194 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on); 5195 } 5196 5197 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost); 5198 5199 iv_ca_set_remove_invariants (ivs, cp->depends_on); 5200 5201 if (cp->inv_expr_id != -1) 5202 { 5203 ivs->used_inv_expr[cp->inv_expr_id]--; 5204 if (ivs->used_inv_expr[cp->inv_expr_id] == 0) 5205 ivs->num_used_inv_expr--; 5206 } 5207 iv_ca_recount_cost (data, ivs); 5208 } 5209 5210 /* Add invariants in set INVS to set IVS. */ 5211 5212 static void 5213 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs) 5214 { 5215 bitmap_iterator bi; 5216 unsigned iid; 5217 5218 if (!invs) 5219 return; 5220 5221 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5222 { 5223 ivs->n_invariant_uses[iid]++; 5224 if (ivs->n_invariant_uses[iid] == 1) 5225 ivs->n_regs++; 5226 } 5227 } 5228 5229 /* Set cost pair for USE in set IVS to CP. */ 5230 5231 static void 5232 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, 5233 struct iv_use *use, struct cost_pair *cp) 5234 { 5235 unsigned uid = use->id, cid; 5236 5237 if (ivs->cand_for_use[uid] == cp) 5238 return; 5239 5240 if (ivs->cand_for_use[uid]) 5241 iv_ca_set_no_cp (data, ivs, use); 5242 5243 if (cp) 5244 { 5245 cid = cp->cand->id; 5246 5247 ivs->bad_uses--; 5248 ivs->cand_for_use[uid] = cp; 5249 ivs->n_cand_uses[cid]++; 5250 if (ivs->n_cand_uses[cid] == 1) 5251 { 5252 bitmap_set_bit (ivs->cands, cid); 5253 /* Do not count the pseudocandidates. */ 5254 if (cp->cand->iv) 5255 ivs->n_regs++; 5256 ivs->n_cands++; 5257 ivs->cand_cost += cp->cand->cost; 5258 5259 iv_ca_set_add_invariants (ivs, cp->cand->depends_on); 5260 } 5261 5262 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost); 5263 iv_ca_set_add_invariants (ivs, cp->depends_on); 5264 5265 if (cp->inv_expr_id != -1) 5266 { 5267 ivs->used_inv_expr[cp->inv_expr_id]++; 5268 if (ivs->used_inv_expr[cp->inv_expr_id] == 1) 5269 ivs->num_used_inv_expr++; 5270 } 5271 iv_ca_recount_cost (data, ivs); 5272 } 5273 } 5274 5275 /* Extend set IVS by expressing USE by some of the candidates in it 5276 if possible. All important candidates will be considered 5277 if IMPORTANT_CANDIDATES is true. */ 5278 5279 static void 5280 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, 5281 struct iv_use *use, bool important_candidates) 5282 { 5283 struct cost_pair *best_cp = NULL, *cp; 5284 bitmap_iterator bi; 5285 bitmap cands; 5286 unsigned i; 5287 5288 gcc_assert (ivs->upto >= use->id); 5289 5290 if (ivs->upto == use->id) 5291 { 5292 ivs->upto++; 5293 ivs->bad_uses++; 5294 } 5295 5296 cands = (important_candidates ? data->important_candidates : ivs->cands); 5297 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi) 5298 { 5299 struct iv_cand *cand = iv_cand (data, i); 5300 5301 cp = get_use_iv_cost (data, use, cand); 5302 5303 if (cheaper_cost_pair (cp, best_cp)) 5304 best_cp = cp; 5305 } 5306 5307 iv_ca_set_cp (data, ivs, use, best_cp); 5308 } 5309 5310 /* Get cost for assignment IVS. */ 5311 5312 static comp_cost 5313 iv_ca_cost (struct iv_ca *ivs) 5314 { 5315 /* This was a conditional expression but it triggered a bug in 5316 Sun C 5.5. */ 5317 if (ivs->bad_uses) 5318 return infinite_cost; 5319 else 5320 return ivs->cost; 5321 } 5322 5323 /* Returns true if all dependences of CP are among invariants in IVS. */ 5324 5325 static bool 5326 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp) 5327 { 5328 unsigned i; 5329 bitmap_iterator bi; 5330 5331 if (!cp->depends_on) 5332 return true; 5333 5334 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi) 5335 { 5336 if (ivs->n_invariant_uses[i] == 0) 5337 return false; 5338 } 5339 5340 return true; 5341 } 5342 5343 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains 5344 it before NEXT_CHANGE. */ 5345 5346 static struct iv_ca_delta * 5347 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp, 5348 struct cost_pair *new_cp, struct iv_ca_delta *next_change) 5349 { 5350 struct iv_ca_delta *change = XNEW (struct iv_ca_delta); 5351 5352 change->use = use; 5353 change->old_cp = old_cp; 5354 change->new_cp = new_cp; 5355 change->next_change = next_change; 5356 5357 return change; 5358 } 5359 5360 /* Joins two lists of changes L1 and L2. Destructive -- old lists 5361 are rewritten. */ 5362 5363 static struct iv_ca_delta * 5364 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 5365 { 5366 struct iv_ca_delta *last; 5367 5368 if (!l2) 5369 return l1; 5370 5371 if (!l1) 5372 return l2; 5373 5374 for (last = l1; last->next_change; last = last->next_change) 5375 continue; 5376 last->next_change = l2; 5377 5378 return l1; 5379 } 5380 5381 /* Reverse the list of changes DELTA, forming the inverse to it. */ 5382 5383 static struct iv_ca_delta * 5384 iv_ca_delta_reverse (struct iv_ca_delta *delta) 5385 { 5386 struct iv_ca_delta *act, *next, *prev = NULL; 5387 struct cost_pair *tmp; 5388 5389 for (act = delta; act; act = next) 5390 { 5391 next = act->next_change; 5392 act->next_change = prev; 5393 prev = act; 5394 5395 tmp = act->old_cp; 5396 act->old_cp = act->new_cp; 5397 act->new_cp = tmp; 5398 } 5399 5400 return prev; 5401 } 5402 5403 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 5404 reverted instead. */ 5405 5406 static void 5407 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs, 5408 struct iv_ca_delta *delta, bool forward) 5409 { 5410 struct cost_pair *from, *to; 5411 struct iv_ca_delta *act; 5412 5413 if (!forward) 5414 delta = iv_ca_delta_reverse (delta); 5415 5416 for (act = delta; act; act = act->next_change) 5417 { 5418 from = act->old_cp; 5419 to = act->new_cp; 5420 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from); 5421 iv_ca_set_cp (data, ivs, act->use, to); 5422 } 5423 5424 if (!forward) 5425 iv_ca_delta_reverse (delta); 5426 } 5427 5428 /* Returns true if CAND is used in IVS. */ 5429 5430 static bool 5431 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand) 5432 { 5433 return ivs->n_cand_uses[cand->id] > 0; 5434 } 5435 5436 /* Returns number of induction variable candidates in the set IVS. */ 5437 5438 static unsigned 5439 iv_ca_n_cands (struct iv_ca *ivs) 5440 { 5441 return ivs->n_cands; 5442 } 5443 5444 /* Free the list of changes DELTA. */ 5445 5446 static void 5447 iv_ca_delta_free (struct iv_ca_delta **delta) 5448 { 5449 struct iv_ca_delta *act, *next; 5450 5451 for (act = *delta; act; act = next) 5452 { 5453 next = act->next_change; 5454 free (act); 5455 } 5456 5457 *delta = NULL; 5458 } 5459 5460 /* Allocates new iv candidates assignment. */ 5461 5462 static struct iv_ca * 5463 iv_ca_new (struct ivopts_data *data) 5464 { 5465 struct iv_ca *nw = XNEW (struct iv_ca); 5466 5467 nw->upto = 0; 5468 nw->bad_uses = 0; 5469 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data)); 5470 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data)); 5471 nw->cands = BITMAP_ALLOC (NULL); 5472 nw->n_cands = 0; 5473 nw->n_regs = 0; 5474 nw->cand_use_cost = no_cost; 5475 nw->cand_cost = 0; 5476 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1); 5477 nw->cost = no_cost; 5478 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1); 5479 nw->num_used_inv_expr = 0; 5480 5481 return nw; 5482 } 5483 5484 /* Free memory occupied by the set IVS. */ 5485 5486 static void 5487 iv_ca_free (struct iv_ca **ivs) 5488 { 5489 free ((*ivs)->cand_for_use); 5490 free ((*ivs)->n_cand_uses); 5491 BITMAP_FREE ((*ivs)->cands); 5492 free ((*ivs)->n_invariant_uses); 5493 free ((*ivs)->used_inv_expr); 5494 free (*ivs); 5495 *ivs = NULL; 5496 } 5497 5498 /* Dumps IVS to FILE. */ 5499 5500 static void 5501 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) 5502 { 5503 const char *pref = " invariants "; 5504 unsigned i; 5505 comp_cost cost = iv_ca_cost (ivs); 5506 5507 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity); 5508 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n", 5509 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity); 5510 bitmap_print (file, ivs->cands, " candidates: ","\n"); 5511 5512 for (i = 0; i < ivs->upto; i++) 5513 { 5514 struct iv_use *use = iv_use (data, i); 5515 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use); 5516 if (cp) 5517 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n", 5518 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity); 5519 else 5520 fprintf (file, " use:%d --> ??\n", use->id); 5521 } 5522 5523 for (i = 1; i <= data->max_inv_id; i++) 5524 if (ivs->n_invariant_uses[i]) 5525 { 5526 fprintf (file, "%s%d", pref, i); 5527 pref = ", "; 5528 } 5529 fprintf (file, "\n\n"); 5530 } 5531 5532 /* Try changing candidate in IVS to CAND for each use. Return cost of the 5533 new set, and store differences in DELTA. Number of induction variables 5534 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true 5535 the function will try to find a solution with mimimal iv candidates. */ 5536 5537 static comp_cost 5538 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, 5539 struct iv_cand *cand, struct iv_ca_delta **delta, 5540 unsigned *n_ivs, bool min_ncand) 5541 { 5542 unsigned i; 5543 comp_cost cost; 5544 struct iv_use *use; 5545 struct cost_pair *old_cp, *new_cp; 5546 5547 *delta = NULL; 5548 for (i = 0; i < ivs->upto; i++) 5549 { 5550 use = iv_use (data, i); 5551 old_cp = iv_ca_cand_for_use (ivs, use); 5552 5553 if (old_cp 5554 && old_cp->cand == cand) 5555 continue; 5556 5557 new_cp = get_use_iv_cost (data, use, cand); 5558 if (!new_cp) 5559 continue; 5560 5561 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp)) 5562 continue; 5563 5564 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp)) 5565 continue; 5566 5567 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 5568 } 5569 5570 iv_ca_delta_commit (data, ivs, *delta, true); 5571 cost = iv_ca_cost (ivs); 5572 if (n_ivs) 5573 *n_ivs = iv_ca_n_cands (ivs); 5574 iv_ca_delta_commit (data, ivs, *delta, false); 5575 5576 return cost; 5577 } 5578 5579 /* Try narrowing set IVS by removing CAND. Return the cost of 5580 the new set and store the differences in DELTA. */ 5581 5582 static comp_cost 5583 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, 5584 struct iv_cand *cand, struct iv_ca_delta **delta) 5585 { 5586 unsigned i, ci; 5587 struct iv_use *use; 5588 struct cost_pair *old_cp, *new_cp, *cp; 5589 bitmap_iterator bi; 5590 struct iv_cand *cnd; 5591 comp_cost cost; 5592 5593 *delta = NULL; 5594 for (i = 0; i < n_iv_uses (data); i++) 5595 { 5596 use = iv_use (data, i); 5597 5598 old_cp = iv_ca_cand_for_use (ivs, use); 5599 if (old_cp->cand != cand) 5600 continue; 5601 5602 new_cp = NULL; 5603 5604 if (data->consider_all_candidates) 5605 { 5606 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 5607 { 5608 if (ci == cand->id) 5609 continue; 5610 5611 cnd = iv_cand (data, ci); 5612 5613 cp = get_use_iv_cost (data, use, cnd); 5614 if (!cp) 5615 continue; 5616 5617 if (!iv_ca_has_deps (ivs, cp)) 5618 continue; 5619 5620 if (!cheaper_cost_pair (cp, new_cp)) 5621 continue; 5622 5623 new_cp = cp; 5624 } 5625 } 5626 else 5627 { 5628 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi) 5629 { 5630 if (ci == cand->id) 5631 continue; 5632 5633 cnd = iv_cand (data, ci); 5634 5635 cp = get_use_iv_cost (data, use, cnd); 5636 if (!cp) 5637 continue; 5638 if (!iv_ca_has_deps (ivs, cp)) 5639 continue; 5640 5641 if (!cheaper_cost_pair (cp, new_cp)) 5642 continue; 5643 5644 new_cp = cp; 5645 } 5646 } 5647 5648 if (!new_cp) 5649 { 5650 iv_ca_delta_free (delta); 5651 return infinite_cost; 5652 } 5653 5654 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 5655 } 5656 5657 iv_ca_delta_commit (data, ivs, *delta, true); 5658 cost = iv_ca_cost (ivs); 5659 iv_ca_delta_commit (data, ivs, *delta, false); 5660 5661 return cost; 5662 } 5663 5664 /* Try optimizing the set of candidates IVS by removing candidates different 5665 from to EXCEPT_CAND from it. Return cost of the new set, and store 5666 differences in DELTA. */ 5667 5668 static comp_cost 5669 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, 5670 struct iv_cand *except_cand, struct iv_ca_delta **delta) 5671 { 5672 bitmap_iterator bi; 5673 struct iv_ca_delta *act_delta, *best_delta; 5674 unsigned i; 5675 comp_cost best_cost, acost; 5676 struct iv_cand *cand; 5677 5678 best_delta = NULL; 5679 best_cost = iv_ca_cost (ivs); 5680 5681 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5682 { 5683 cand = iv_cand (data, i); 5684 5685 if (cand == except_cand) 5686 continue; 5687 5688 acost = iv_ca_narrow (data, ivs, cand, &act_delta); 5689 5690 if (compare_costs (acost, best_cost) < 0) 5691 { 5692 best_cost = acost; 5693 iv_ca_delta_free (&best_delta); 5694 best_delta = act_delta; 5695 } 5696 else 5697 iv_ca_delta_free (&act_delta); 5698 } 5699 5700 if (!best_delta) 5701 { 5702 *delta = NULL; 5703 return best_cost; 5704 } 5705 5706 /* Recurse to possibly remove other unnecessary ivs. */ 5707 iv_ca_delta_commit (data, ivs, best_delta, true); 5708 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 5709 iv_ca_delta_commit (data, ivs, best_delta, false); 5710 *delta = iv_ca_delta_join (best_delta, *delta); 5711 return best_cost; 5712 } 5713 5714 /* Tries to extend the sets IVS in the best possible way in order 5715 to express the USE. If ORIGINALP is true, prefer candidates from 5716 the original set of IVs, otherwise favor important candidates not 5717 based on any memory object. */ 5718 5719 static bool 5720 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, 5721 struct iv_use *use, bool originalp) 5722 { 5723 comp_cost best_cost, act_cost; 5724 unsigned i; 5725 bitmap_iterator bi; 5726 struct iv_cand *cand; 5727 struct iv_ca_delta *best_delta = NULL, *act_delta; 5728 struct cost_pair *cp; 5729 5730 iv_ca_add_use (data, ivs, use, false); 5731 best_cost = iv_ca_cost (ivs); 5732 5733 cp = iv_ca_cand_for_use (ivs, use); 5734 if (!cp) 5735 { 5736 ivs->upto--; 5737 ivs->bad_uses--; 5738 iv_ca_add_use (data, ivs, use, true); 5739 best_cost = iv_ca_cost (ivs); 5740 cp = iv_ca_cand_for_use (ivs, use); 5741 } 5742 if (cp) 5743 { 5744 best_delta = iv_ca_delta_add (use, NULL, cp, NULL); 5745 iv_ca_set_no_cp (data, ivs, use); 5746 } 5747 5748 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise 5749 first try important candidates not based on any memory object. Only if 5750 this fails, try the specific ones. Rationale -- in loops with many 5751 variables the best choice often is to use just one generic biv. If we 5752 added here many ivs specific to the uses, the optimization algorithm later 5753 would be likely to get stuck in a local minimum, thus causing us to create 5754 too many ivs. The approach from few ivs to more seems more likely to be 5755 successful -- starting from few ivs, replacing an expensive use by a 5756 specific iv should always be a win. */ 5757 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 5758 { 5759 cand = iv_cand (data, i); 5760 5761 if (originalp && cand->pos !=IP_ORIGINAL) 5762 continue; 5763 5764 if (!originalp && cand->iv->base_object != NULL_TREE) 5765 continue; 5766 5767 if (iv_ca_cand_used_p (ivs, cand)) 5768 continue; 5769 5770 cp = get_use_iv_cost (data, use, cand); 5771 if (!cp) 5772 continue; 5773 5774 iv_ca_set_cp (data, ivs, use, cp); 5775 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, 5776 true); 5777 iv_ca_set_no_cp (data, ivs, use); 5778 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta); 5779 5780 if (compare_costs (act_cost, best_cost) < 0) 5781 { 5782 best_cost = act_cost; 5783 5784 iv_ca_delta_free (&best_delta); 5785 best_delta = act_delta; 5786 } 5787 else 5788 iv_ca_delta_free (&act_delta); 5789 } 5790 5791 if (infinite_cost_p (best_cost)) 5792 { 5793 for (i = 0; i < use->n_map_members; i++) 5794 { 5795 cp = use->cost_map + i; 5796 cand = cp->cand; 5797 if (!cand) 5798 continue; 5799 5800 /* Already tried this. */ 5801 if (cand->important) 5802 { 5803 if (originalp && cand->pos == IP_ORIGINAL) 5804 continue; 5805 if (!originalp && cand->iv->base_object == NULL_TREE) 5806 continue; 5807 } 5808 5809 if (iv_ca_cand_used_p (ivs, cand)) 5810 continue; 5811 5812 act_delta = NULL; 5813 iv_ca_set_cp (data, ivs, use, cp); 5814 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); 5815 iv_ca_set_no_cp (data, ivs, use); 5816 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use), 5817 cp, act_delta); 5818 5819 if (compare_costs (act_cost, best_cost) < 0) 5820 { 5821 best_cost = act_cost; 5822 5823 if (best_delta) 5824 iv_ca_delta_free (&best_delta); 5825 best_delta = act_delta; 5826 } 5827 else 5828 iv_ca_delta_free (&act_delta); 5829 } 5830 } 5831 5832 iv_ca_delta_commit (data, ivs, best_delta, true); 5833 iv_ca_delta_free (&best_delta); 5834 5835 return !infinite_cost_p (best_cost); 5836 } 5837 5838 /* Finds an initial assignment of candidates to uses. */ 5839 5840 static struct iv_ca * 5841 get_initial_solution (struct ivopts_data *data, bool originalp) 5842 { 5843 struct iv_ca *ivs = iv_ca_new (data); 5844 unsigned i; 5845 5846 for (i = 0; i < n_iv_uses (data); i++) 5847 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp)) 5848 { 5849 iv_ca_free (&ivs); 5850 return NULL; 5851 } 5852 5853 return ivs; 5854 } 5855 5856 /* Tries to improve set of induction variables IVS. */ 5857 5858 static bool 5859 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs) 5860 { 5861 unsigned i, n_ivs; 5862 comp_cost acost, best_cost = iv_ca_cost (ivs); 5863 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 5864 struct iv_cand *cand; 5865 5866 /* Try extending the set of induction variables by one. */ 5867 for (i = 0; i < n_iv_cands (data); i++) 5868 { 5869 cand = iv_cand (data, i); 5870 5871 if (iv_ca_cand_used_p (ivs, cand)) 5872 continue; 5873 5874 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); 5875 if (!act_delta) 5876 continue; 5877 5878 /* If we successfully added the candidate and the set is small enough, 5879 try optimizing it by removing other candidates. */ 5880 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 5881 { 5882 iv_ca_delta_commit (data, ivs, act_delta, true); 5883 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 5884 iv_ca_delta_commit (data, ivs, act_delta, false); 5885 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 5886 } 5887 5888 if (compare_costs (acost, best_cost) < 0) 5889 { 5890 best_cost = acost; 5891 iv_ca_delta_free (&best_delta); 5892 best_delta = act_delta; 5893 } 5894 else 5895 iv_ca_delta_free (&act_delta); 5896 } 5897 5898 if (!best_delta) 5899 { 5900 /* Try removing the candidates from the set instead. */ 5901 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 5902 5903 /* Nothing more we can do. */ 5904 if (!best_delta) 5905 return false; 5906 } 5907 5908 iv_ca_delta_commit (data, ivs, best_delta, true); 5909 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0); 5910 iv_ca_delta_free (&best_delta); 5911 return true; 5912 } 5913 5914 /* Attempts to find the optimal set of induction variables. We do simple 5915 greedy heuristic -- we try to replace at most one candidate in the selected 5916 solution and remove the unused ivs while this improves the cost. */ 5917 5918 static struct iv_ca * 5919 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) 5920 { 5921 struct iv_ca *set; 5922 5923 /* Get the initial solution. */ 5924 set = get_initial_solution (data, originalp); 5925 if (!set) 5926 { 5927 if (dump_file && (dump_flags & TDF_DETAILS)) 5928 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 5929 return NULL; 5930 } 5931 5932 if (dump_file && (dump_flags & TDF_DETAILS)) 5933 { 5934 fprintf (dump_file, "Initial set of candidates:\n"); 5935 iv_ca_dump (data, dump_file, set); 5936 } 5937 5938 while (try_improve_iv_set (data, set)) 5939 { 5940 if (dump_file && (dump_flags & TDF_DETAILS)) 5941 { 5942 fprintf (dump_file, "Improved to:\n"); 5943 iv_ca_dump (data, dump_file, set); 5944 } 5945 } 5946 5947 return set; 5948 } 5949 5950 static struct iv_ca * 5951 find_optimal_iv_set (struct ivopts_data *data) 5952 { 5953 unsigned i; 5954 struct iv_ca *set, *origset; 5955 struct iv_use *use; 5956 comp_cost cost, origcost; 5957 5958 /* Determine the cost based on a strategy that starts with original IVs, 5959 and try again using a strategy that prefers candidates not based 5960 on any IVs. */ 5961 origset = find_optimal_iv_set_1 (data, true); 5962 set = find_optimal_iv_set_1 (data, false); 5963 5964 if (!origset && !set) 5965 return NULL; 5966 5967 origcost = origset ? iv_ca_cost (origset) : infinite_cost; 5968 cost = set ? iv_ca_cost (set) : infinite_cost; 5969 5970 if (dump_file && (dump_flags & TDF_DETAILS)) 5971 { 5972 fprintf (dump_file, "Original cost %d (complexity %d)\n\n", 5973 origcost.cost, origcost.complexity); 5974 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", 5975 cost.cost, cost.complexity); 5976 } 5977 5978 /* Choose the one with the best cost. */ 5979 if (compare_costs (origcost, cost) <= 0) 5980 { 5981 if (set) 5982 iv_ca_free (&set); 5983 set = origset; 5984 } 5985 else if (origset) 5986 iv_ca_free (&origset); 5987 5988 for (i = 0; i < n_iv_uses (data); i++) 5989 { 5990 use = iv_use (data, i); 5991 use->selected = iv_ca_cand_for_use (set, use)->cand; 5992 } 5993 5994 return set; 5995 } 5996 5997 /* Creates a new induction variable corresponding to CAND. */ 5998 5999 static void 6000 create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 6001 { 6002 gimple_stmt_iterator incr_pos; 6003 tree base; 6004 bool after = false; 6005 6006 if (!cand->iv) 6007 return; 6008 6009 switch (cand->pos) 6010 { 6011 case IP_NORMAL: 6012 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop)); 6013 break; 6014 6015 case IP_END: 6016 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 6017 after = true; 6018 break; 6019 6020 case IP_AFTER_USE: 6021 after = true; 6022 /* fall through */ 6023 case IP_BEFORE_USE: 6024 incr_pos = gsi_for_stmt (cand->incremented_at); 6025 break; 6026 6027 case IP_ORIGINAL: 6028 /* Mark that the iv is preserved. */ 6029 name_info (data, cand->var_before)->preserve_biv = true; 6030 name_info (data, cand->var_after)->preserve_biv = true; 6031 6032 /* Rewrite the increment so that it uses var_before directly. */ 6033 find_interesting_uses_op (data, cand->var_after)->selected = cand; 6034 return; 6035 } 6036 6037 gimple_add_tmp_var (cand->var_before); 6038 6039 base = unshare_expr (cand->iv->base); 6040 6041 create_iv (base, unshare_expr (cand->iv->step), 6042 cand->var_before, data->current_loop, 6043 &incr_pos, after, &cand->var_before, &cand->var_after); 6044 } 6045 6046 /* Creates new induction variables described in SET. */ 6047 6048 static void 6049 create_new_ivs (struct ivopts_data *data, struct iv_ca *set) 6050 { 6051 unsigned i; 6052 struct iv_cand *cand; 6053 bitmap_iterator bi; 6054 6055 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6056 { 6057 cand = iv_cand (data, i); 6058 create_new_iv (data, cand); 6059 } 6060 6061 if (dump_file && (dump_flags & TDF_DETAILS)) 6062 { 6063 fprintf (dump_file, "\nSelected IV set: \n"); 6064 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6065 { 6066 cand = iv_cand (data, i); 6067 dump_cand (dump_file, cand); 6068 } 6069 fprintf (dump_file, "\n"); 6070 } 6071 } 6072 6073 /* Rewrites USE (definition of iv used in a nonlinear expression) 6074 using candidate CAND. */ 6075 6076 static void 6077 rewrite_use_nonlinear_expr (struct ivopts_data *data, 6078 struct iv_use *use, struct iv_cand *cand) 6079 { 6080 tree comp; 6081 tree op, tgt; 6082 gimple ass; 6083 gimple_stmt_iterator bsi; 6084 6085 /* An important special case -- if we are asked to express value of 6086 the original iv by itself, just exit; there is no need to 6087 introduce a new computation (that might also need casting the 6088 variable to unsigned and back). */ 6089 if (cand->pos == IP_ORIGINAL 6090 && cand->incremented_at == use->stmt) 6091 { 6092 enum tree_code stmt_code; 6093 6094 gcc_assert (is_gimple_assign (use->stmt)); 6095 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 6096 6097 /* Check whether we may leave the computation unchanged. 6098 This is the case only if it does not rely on other 6099 computations in the loop -- otherwise, the computation 6100 we rely upon may be removed in remove_unused_ivs, 6101 thus leading to ICE. */ 6102 stmt_code = gimple_assign_rhs_code (use->stmt); 6103 if (stmt_code == PLUS_EXPR 6104 || stmt_code == MINUS_EXPR 6105 || stmt_code == POINTER_PLUS_EXPR) 6106 { 6107 if (gimple_assign_rhs1 (use->stmt) == cand->var_before) 6108 op = gimple_assign_rhs2 (use->stmt); 6109 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before) 6110 op = gimple_assign_rhs1 (use->stmt); 6111 else 6112 op = NULL_TREE; 6113 } 6114 else 6115 op = NULL_TREE; 6116 6117 if (op && expr_invariant_in_loop_p (data->current_loop, op)) 6118 return; 6119 } 6120 6121 comp = get_computation (data->current_loop, use, cand); 6122 gcc_assert (comp != NULL_TREE); 6123 6124 switch (gimple_code (use->stmt)) 6125 { 6126 case GIMPLE_PHI: 6127 tgt = PHI_RESULT (use->stmt); 6128 6129 /* If we should keep the biv, do not replace it. */ 6130 if (name_info (data, tgt)->preserve_biv) 6131 return; 6132 6133 bsi = gsi_after_labels (gimple_bb (use->stmt)); 6134 break; 6135 6136 case GIMPLE_ASSIGN: 6137 tgt = gimple_assign_lhs (use->stmt); 6138 bsi = gsi_for_stmt (use->stmt); 6139 break; 6140 6141 default: 6142 gcc_unreachable (); 6143 } 6144 6145 if (!valid_gimple_rhs_p (comp) 6146 || (gimple_code (use->stmt) != GIMPLE_PHI 6147 /* We can't allow re-allocating the stmt as it might be pointed 6148 to still. */ 6149 && (get_gimple_rhs_num_ops (TREE_CODE (comp)) 6150 >= gimple_num_ops (gsi_stmt (bsi))))) 6151 { 6152 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE, 6153 true, GSI_SAME_STMT); 6154 if (POINTER_TYPE_P (TREE_TYPE (tgt))) 6155 { 6156 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt)); 6157 /* As this isn't a plain copy we have to reset alignment 6158 information. */ 6159 if (SSA_NAME_PTR_INFO (comp)) 6160 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp)); 6161 } 6162 } 6163 6164 if (gimple_code (use->stmt) == GIMPLE_PHI) 6165 { 6166 ass = gimple_build_assign (tgt, comp); 6167 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 6168 6169 bsi = gsi_for_stmt (use->stmt); 6170 remove_phi_node (&bsi, false); 6171 } 6172 else 6173 { 6174 gimple_assign_set_rhs_from_tree (&bsi, comp); 6175 use->stmt = gsi_stmt (bsi); 6176 } 6177 } 6178 6179 /* Performs a peephole optimization to reorder the iv update statement with 6180 a mem ref to enable instruction combining in later phases. The mem ref uses 6181 the iv value before the update, so the reordering transformation requires 6182 adjustment of the offset. CAND is the selected IV_CAND. 6183 6184 Example: 6185 6186 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset 6187 iv2 = iv1 + 1; 6188 6189 if (t < val) (1) 6190 goto L; 6191 goto Head; 6192 6193 6194 directly propagating t over to (1) will introduce overlapping live range 6195 thus increase register pressure. This peephole transform it into: 6196 6197 6198 iv2 = iv1 + 1; 6199 t = MEM_REF (base, iv2, 8, 8); 6200 if (t < val) 6201 goto L; 6202 goto Head; 6203 */ 6204 6205 static void 6206 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use) 6207 { 6208 tree var_after; 6209 gimple iv_update, stmt; 6210 basic_block bb; 6211 gimple_stmt_iterator gsi, gsi_iv; 6212 6213 if (cand->pos != IP_NORMAL) 6214 return; 6215 6216 var_after = cand->var_after; 6217 iv_update = SSA_NAME_DEF_STMT (var_after); 6218 6219 bb = gimple_bb (iv_update); 6220 gsi = gsi_last_nondebug_bb (bb); 6221 stmt = gsi_stmt (gsi); 6222 6223 /* Only handle conditional statement for now. */ 6224 if (gimple_code (stmt) != GIMPLE_COND) 6225 return; 6226 6227 gsi_prev_nondebug (&gsi); 6228 stmt = gsi_stmt (gsi); 6229 if (stmt != iv_update) 6230 return; 6231 6232 gsi_prev_nondebug (&gsi); 6233 if (gsi_end_p (gsi)) 6234 return; 6235 6236 stmt = gsi_stmt (gsi); 6237 if (gimple_code (stmt) != GIMPLE_ASSIGN) 6238 return; 6239 6240 if (stmt != use->stmt) 6241 return; 6242 6243 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 6244 return; 6245 6246 if (dump_file && (dump_flags & TDF_DETAILS)) 6247 { 6248 fprintf (dump_file, "Reordering \n"); 6249 print_gimple_stmt (dump_file, iv_update, 0, 0); 6250 print_gimple_stmt (dump_file, use->stmt, 0, 0); 6251 fprintf (dump_file, "\n"); 6252 } 6253 6254 gsi = gsi_for_stmt (use->stmt); 6255 gsi_iv = gsi_for_stmt (iv_update); 6256 gsi_move_before (&gsi_iv, &gsi); 6257 6258 cand->pos = IP_BEFORE_USE; 6259 cand->incremented_at = use->stmt; 6260 } 6261 6262 /* Rewrites USE (address that is an iv) using candidate CAND. */ 6263 6264 static void 6265 rewrite_use_address (struct ivopts_data *data, 6266 struct iv_use *use, struct iv_cand *cand) 6267 { 6268 aff_tree aff; 6269 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 6270 tree base_hint = NULL_TREE; 6271 tree ref, iv; 6272 bool ok; 6273 6274 adjust_iv_update_pos (cand, use); 6275 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); 6276 gcc_assert (ok); 6277 unshare_aff_combination (&aff); 6278 6279 /* To avoid undefined overflow problems, all IV candidates use unsigned 6280 integer types. The drawback is that this makes it impossible for 6281 create_mem_ref to distinguish an IV that is based on a memory object 6282 from one that represents simply an offset. 6283 6284 To work around this problem, we pass a hint to create_mem_ref that 6285 indicates which variable (if any) in aff is an IV based on a memory 6286 object. Note that we only consider the candidate. If this is not 6287 based on an object, the base of the reference is in some subexpression 6288 of the use -- but these will use pointer types, so they are recognized 6289 by the create_mem_ref heuristics anyway. */ 6290 if (cand->iv->base_object) 6291 base_hint = var_at_stmt (data->current_loop, cand, use->stmt); 6292 6293 iv = var_at_stmt (data->current_loop, cand, use->stmt); 6294 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, 6295 reference_alias_ptr_type (*use->op_p), 6296 iv, base_hint, data->speed); 6297 copy_ref_info (ref, *use->op_p); 6298 *use->op_p = ref; 6299 } 6300 6301 /* Rewrites USE (the condition such that one of the arguments is an iv) using 6302 candidate CAND. */ 6303 6304 static void 6305 rewrite_use_compare (struct ivopts_data *data, 6306 struct iv_use *use, struct iv_cand *cand) 6307 { 6308 tree comp, *var_p, op, bound; 6309 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 6310 enum tree_code compare; 6311 struct cost_pair *cp = get_use_iv_cost (data, use, cand); 6312 bool ok; 6313 6314 bound = cp->value; 6315 if (bound) 6316 { 6317 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 6318 tree var_type = TREE_TYPE (var); 6319 gimple_seq stmts; 6320 6321 if (dump_file && (dump_flags & TDF_DETAILS)) 6322 { 6323 fprintf (dump_file, "Replacing exit test: "); 6324 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); 6325 } 6326 compare = cp->comp; 6327 bound = unshare_expr (fold_convert (var_type, bound)); 6328 op = force_gimple_operand (bound, &stmts, true, NULL_TREE); 6329 if (stmts) 6330 gsi_insert_seq_on_edge_immediate ( 6331 loop_preheader_edge (data->current_loop), 6332 stmts); 6333 6334 gimple_cond_set_lhs (use->stmt, var); 6335 gimple_cond_set_code (use->stmt, compare); 6336 gimple_cond_set_rhs (use->stmt, op); 6337 return; 6338 } 6339 6340 /* The induction variable elimination failed; just express the original 6341 giv. */ 6342 comp = get_computation (data->current_loop, use, cand); 6343 gcc_assert (comp != NULL_TREE); 6344 6345 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL); 6346 gcc_assert (ok); 6347 6348 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p), 6349 true, GSI_SAME_STMT); 6350 } 6351 6352 /* Rewrites USE using candidate CAND. */ 6353 6354 static void 6355 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand) 6356 { 6357 switch (use->type) 6358 { 6359 case USE_NONLINEAR_EXPR: 6360 rewrite_use_nonlinear_expr (data, use, cand); 6361 break; 6362 6363 case USE_ADDRESS: 6364 rewrite_use_address (data, use, cand); 6365 break; 6366 6367 case USE_COMPARE: 6368 rewrite_use_compare (data, use, cand); 6369 break; 6370 6371 default: 6372 gcc_unreachable (); 6373 } 6374 6375 update_stmt (use->stmt); 6376 } 6377 6378 /* Rewrite the uses using the selected induction variables. */ 6379 6380 static void 6381 rewrite_uses (struct ivopts_data *data) 6382 { 6383 unsigned i; 6384 struct iv_cand *cand; 6385 struct iv_use *use; 6386 6387 for (i = 0; i < n_iv_uses (data); i++) 6388 { 6389 use = iv_use (data, i); 6390 cand = use->selected; 6391 gcc_assert (cand); 6392 6393 rewrite_use (data, use, cand); 6394 } 6395 } 6396 6397 /* Removes the ivs that are not used after rewriting. */ 6398 6399 static void 6400 remove_unused_ivs (struct ivopts_data *data) 6401 { 6402 unsigned j; 6403 bitmap_iterator bi; 6404 bitmap toremove = BITMAP_ALLOC (NULL); 6405 6406 /* Figure out an order in which to release SSA DEFs so that we don't 6407 release something that we'd have to propagate into a debug stmt 6408 afterwards. */ 6409 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 6410 { 6411 struct version_info *info; 6412 6413 info = ver_info (data, j); 6414 if (info->iv 6415 && !integer_zerop (info->iv->step) 6416 && !info->inv_id 6417 && !info->iv->have_use_for 6418 && !info->preserve_biv) 6419 { 6420 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name)); 6421 6422 tree def = info->iv->ssa_name; 6423 6424 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def)) 6425 { 6426 imm_use_iterator imm_iter; 6427 use_operand_p use_p; 6428 gimple stmt; 6429 int count = 0; 6430 6431 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 6432 { 6433 if (!gimple_debug_bind_p (stmt)) 6434 continue; 6435 6436 /* We just want to determine whether to do nothing 6437 (count == 0), to substitute the computed 6438 expression into a single use of the SSA DEF by 6439 itself (count == 1), or to use a debug temp 6440 because the SSA DEF is used multiple times or as 6441 part of a larger expression (count > 1). */ 6442 count++; 6443 if (gimple_debug_bind_get_value (stmt) != def) 6444 count++; 6445 6446 if (count > 1) 6447 BREAK_FROM_IMM_USE_STMT (imm_iter); 6448 } 6449 6450 if (!count) 6451 continue; 6452 6453 struct iv_use dummy_use; 6454 struct iv_cand *best_cand = NULL, *cand; 6455 unsigned i, best_pref = 0, cand_pref; 6456 6457 memset (&dummy_use, 0, sizeof (dummy_use)); 6458 dummy_use.iv = info->iv; 6459 for (i = 0; i < n_iv_uses (data) && i < 64; i++) 6460 { 6461 cand = iv_use (data, i)->selected; 6462 if (cand == best_cand) 6463 continue; 6464 cand_pref = operand_equal_p (cand->iv->step, 6465 info->iv->step, 0) 6466 ? 4 : 0; 6467 cand_pref 6468 += TYPE_MODE (TREE_TYPE (cand->iv->base)) 6469 == TYPE_MODE (TREE_TYPE (info->iv->base)) 6470 ? 2 : 0; 6471 cand_pref 6472 += TREE_CODE (cand->iv->base) == INTEGER_CST 6473 ? 1 : 0; 6474 if (best_cand == NULL || best_pref < cand_pref) 6475 { 6476 best_cand = cand; 6477 best_pref = cand_pref; 6478 } 6479 } 6480 6481 if (!best_cand) 6482 continue; 6483 6484 tree comp = get_computation_at (data->current_loop, 6485 &dummy_use, best_cand, 6486 SSA_NAME_DEF_STMT (def)); 6487 if (!comp) 6488 continue; 6489 6490 if (count > 1) 6491 { 6492 tree vexpr = make_node (DEBUG_EXPR_DECL); 6493 DECL_ARTIFICIAL (vexpr) = 1; 6494 TREE_TYPE (vexpr) = TREE_TYPE (comp); 6495 if (SSA_NAME_VAR (def)) 6496 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def)); 6497 else 6498 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr)); 6499 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL); 6500 gimple_stmt_iterator gsi; 6501 6502 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI) 6503 gsi = gsi_after_labels (gimple_bb 6504 (SSA_NAME_DEF_STMT (def))); 6505 else 6506 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def)); 6507 6508 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT); 6509 comp = vexpr; 6510 } 6511 6512 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 6513 { 6514 if (!gimple_debug_bind_p (stmt)) 6515 continue; 6516 6517 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 6518 SET_USE (use_p, comp); 6519 6520 update_stmt (stmt); 6521 } 6522 } 6523 } 6524 } 6525 6526 release_defs_bitset (toremove); 6527 6528 BITMAP_FREE (toremove); 6529 } 6530 6531 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback 6532 for pointer_map_traverse. */ 6533 6534 static bool 6535 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value, 6536 void *data ATTRIBUTE_UNUSED) 6537 { 6538 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value; 6539 6540 free (niter); 6541 return true; 6542 } 6543 6544 /* Frees data allocated by the optimization of a single loop. */ 6545 6546 static void 6547 free_loop_data (struct ivopts_data *data) 6548 { 6549 unsigned i, j; 6550 bitmap_iterator bi; 6551 tree obj; 6552 6553 if (data->niters) 6554 { 6555 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL); 6556 pointer_map_destroy (data->niters); 6557 data->niters = NULL; 6558 } 6559 6560 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 6561 { 6562 struct version_info *info; 6563 6564 info = ver_info (data, i); 6565 free (info->iv); 6566 info->iv = NULL; 6567 info->has_nonlin_use = false; 6568 info->preserve_biv = false; 6569 info->inv_id = 0; 6570 } 6571 bitmap_clear (data->relevant); 6572 bitmap_clear (data->important_candidates); 6573 6574 for (i = 0; i < n_iv_uses (data); i++) 6575 { 6576 struct iv_use *use = iv_use (data, i); 6577 6578 free (use->iv); 6579 BITMAP_FREE (use->related_cands); 6580 for (j = 0; j < use->n_map_members; j++) 6581 if (use->cost_map[j].depends_on) 6582 BITMAP_FREE (use->cost_map[j].depends_on); 6583 free (use->cost_map); 6584 free (use); 6585 } 6586 data->iv_uses.truncate (0); 6587 6588 for (i = 0; i < n_iv_cands (data); i++) 6589 { 6590 struct iv_cand *cand = iv_cand (data, i); 6591 6592 free (cand->iv); 6593 if (cand->depends_on) 6594 BITMAP_FREE (cand->depends_on); 6595 free (cand); 6596 } 6597 data->iv_candidates.truncate (0); 6598 6599 if (data->version_info_size < num_ssa_names) 6600 { 6601 data->version_info_size = 2 * num_ssa_names; 6602 free (data->version_info); 6603 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 6604 } 6605 6606 data->max_inv_id = 0; 6607 6608 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj) 6609 SET_DECL_RTL (obj, NULL_RTX); 6610 6611 decl_rtl_to_reset.truncate (0); 6612 6613 htab_empty (data->inv_expr_tab); 6614 data->inv_expr_id = 0; 6615 } 6616 6617 /* Finalizes data structures used by the iv optimization pass. LOOPS is the 6618 loop tree. */ 6619 6620 static void 6621 tree_ssa_iv_optimize_finalize (struct ivopts_data *data) 6622 { 6623 free_loop_data (data); 6624 free (data->version_info); 6625 BITMAP_FREE (data->relevant); 6626 BITMAP_FREE (data->important_candidates); 6627 6628 decl_rtl_to_reset.release (); 6629 data->iv_uses.release (); 6630 data->iv_candidates.release (); 6631 htab_delete (data->inv_expr_tab); 6632 } 6633 6634 /* Returns true if the loop body BODY includes any function calls. */ 6635 6636 static bool 6637 loop_body_includes_call (basic_block *body, unsigned num_nodes) 6638 { 6639 gimple_stmt_iterator gsi; 6640 unsigned i; 6641 6642 for (i = 0; i < num_nodes; i++) 6643 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 6644 { 6645 gimple stmt = gsi_stmt (gsi); 6646 if (is_gimple_call (stmt) 6647 && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) 6648 return true; 6649 } 6650 return false; 6651 } 6652 6653 /* Optimizes the LOOP. Returns true if anything changed. */ 6654 6655 static bool 6656 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) 6657 { 6658 bool changed = false; 6659 struct iv_ca *iv_ca; 6660 edge exit = single_dom_exit (loop); 6661 basic_block *body; 6662 6663 gcc_assert (!data->niters); 6664 data->current_loop = loop; 6665 data->speed = optimize_loop_for_speed_p (loop); 6666 6667 if (dump_file && (dump_flags & TDF_DETAILS)) 6668 { 6669 fprintf (dump_file, "Processing loop %d\n", loop->num); 6670 6671 if (exit) 6672 { 6673 fprintf (dump_file, " single exit %d -> %d, exit condition ", 6674 exit->src->index, exit->dest->index); 6675 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM); 6676 fprintf (dump_file, "\n"); 6677 } 6678 6679 fprintf (dump_file, "\n"); 6680 } 6681 6682 body = get_loop_body (loop); 6683 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); 6684 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); 6685 free (body); 6686 6687 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit); 6688 6689 /* For each ssa name determines whether it behaves as an induction variable 6690 in some loop. */ 6691 if (!find_induction_variables (data)) 6692 goto finish; 6693 6694 /* Finds interesting uses (item 1). */ 6695 find_interesting_uses (data); 6696 if (n_iv_uses (data) > MAX_CONSIDERED_USES) 6697 goto finish; 6698 6699 /* Finds candidates for the induction variables (item 2). */ 6700 find_iv_candidates (data); 6701 6702 /* Calculates the costs (item 3, part 1). */ 6703 determine_iv_costs (data); 6704 determine_use_iv_costs (data); 6705 determine_set_costs (data); 6706 6707 /* Find the optimal set of induction variables (item 3, part 2). */ 6708 iv_ca = find_optimal_iv_set (data); 6709 if (!iv_ca) 6710 goto finish; 6711 changed = true; 6712 6713 /* Create the new induction variables (item 4, part 1). */ 6714 create_new_ivs (data, iv_ca); 6715 iv_ca_free (&iv_ca); 6716 6717 /* Rewrite the uses (item 4, part 2). */ 6718 rewrite_uses (data); 6719 6720 /* Remove the ivs that are unused after rewriting. */ 6721 remove_unused_ivs (data); 6722 6723 /* We have changed the structure of induction variables; it might happen 6724 that definitions in the scev database refer to some of them that were 6725 eliminated. */ 6726 scev_reset (); 6727 6728 finish: 6729 free_loop_data (data); 6730 6731 return changed; 6732 } 6733 6734 /* Main entry point. Optimizes induction variables in loops. */ 6735 6736 void 6737 tree_ssa_iv_optimize (void) 6738 { 6739 struct loop *loop; 6740 struct ivopts_data data; 6741 loop_iterator li; 6742 6743 tree_ssa_iv_optimize_init (&data); 6744 6745 /* Optimize the loops starting with the innermost ones. */ 6746 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) 6747 { 6748 if (dump_file && (dump_flags & TDF_DETAILS)) 6749 flow_loop_dump (loop, dump_file, NULL, 1); 6750 6751 tree_ssa_iv_optimize_loop (&data, loop); 6752 } 6753 6754 tree_ssa_iv_optimize_finalize (&data); 6755 } 6756