1 /* Induction variable optimizations. 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 3 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by the 9 Free Software Foundation; either version 3, or (at your option) any 10 later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* This pass tries to find the optimal set of induction variables for the loop. 22 It optimizes just the basic linear induction variables (although adding 23 support for other types should not be too hard). It includes the 24 optimizations commonly known as strength reduction, induction variable 25 coalescing and induction variable elimination. It does it in the 26 following steps: 27 28 1) The interesting uses of induction variables are found. This includes 29 30 -- uses of induction variables in non-linear expressions 31 -- addresses of arrays 32 -- comparisons of induction variables 33 34 2) Candidates for the induction variables are found. This includes 35 36 -- old induction variables 37 -- the variables defined by expressions derived from the "interesting 38 uses" above 39 40 3) The optimal (w.r. to a cost function) set of variables is chosen. The 41 cost function assigns a cost to sets of induction variables and consists 42 of three parts: 43 44 -- The use costs. Each of the interesting uses chooses the best induction 45 variable in the set and adds its cost to the sum. The cost reflects 46 the time spent on modifying the induction variables value to be usable 47 for the given purpose (adding base and offset for arrays, etc.). 48 -- The variable costs. Each of the variables has a cost assigned that 49 reflects the costs associated with incrementing the value of the 50 variable. The original variables are somewhat preferred. 51 -- The set cost. Depending on the size of the set, extra cost may be 52 added to reflect register pressure. 53 54 All the costs are defined in a machine-specific way, using the target 55 hooks and machine descriptions to determine them. 56 57 4) The trees are transformed to use the new variables, the dead code is 58 removed. 59 60 All of this is done loop by loop. Doing it globally is theoretically 61 possible, it might give a better performance and it might enable us 62 to decide costs more precisely, but getting all the interactions right 63 would be complicated. */ 64 65 #include "config.h" 66 #include "system.h" 67 #include "coretypes.h" 68 #include "tm.h" 69 #include "tree.h" 70 #include "rtl.h" 71 #include "tm_p.h" 72 #include "hard-reg-set.h" 73 #include "basic-block.h" 74 #include "output.h" 75 #include "diagnostic.h" 76 #include "tree-flow.h" 77 #include "tree-dump.h" 78 #include "timevar.h" 79 #include "cfgloop.h" 80 #include "varray.h" 81 #include "expr.h" 82 #include "tree-pass.h" 83 #include "ggc.h" 84 #include "insn-config.h" 85 #include "recog.h" 86 #include "pointer-set.h" 87 #include "hashtab.h" 88 #include "tree-chrec.h" 89 #include "tree-scalar-evolution.h" 90 #include "cfgloop.h" 91 #include "params.h" 92 #include "langhooks.h" 93 #include "tree-affine.h" 94 #include "target.h" 95 96 /* The infinite cost. */ 97 #define INFTY 10000000 98 99 /* The expected number of loop iterations. TODO -- use profiling instead of 100 this. */ 101 #define AVG_LOOP_NITER(LOOP) 5 102 103 104 /* Representation of the induction variable. */ 105 struct iv 106 { 107 tree base; /* Initial value of the iv. */ 108 tree base_object; /* A memory object to that the induction variable points. */ 109 tree step; /* Step of the iv (constant only). */ 110 tree ssa_name; /* The ssa name with the value. */ 111 bool biv_p; /* Is it a biv? */ 112 bool have_use_for; /* Do we already have a use for it? */ 113 unsigned use_id; /* The identifier in the use if it is the case. */ 114 }; 115 116 /* Per-ssa version information (induction variable descriptions, etc.). */ 117 struct version_info 118 { 119 tree name; /* The ssa name. */ 120 struct iv *iv; /* Induction variable description. */ 121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 122 an expression that is not an induction variable. */ 123 unsigned inv_id; /* Id of an invariant. */ 124 bool preserve_biv; /* For the original biv, whether to preserve it. */ 125 }; 126 127 /* Types of uses. */ 128 enum use_type 129 { 130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 131 USE_ADDRESS, /* Use in an address. */ 132 USE_COMPARE /* Use is a compare. */ 133 }; 134 135 /* Cost of a computation. */ 136 typedef struct 137 { 138 int cost; /* The runtime cost. */ 139 unsigned complexity; /* The estimate of the complexity of the code for 140 the computation (in no concrete units -- 141 complexity field should be larger for more 142 complex expressions and addressing modes). */ 143 } comp_cost; 144 145 static const comp_cost zero_cost = {0, 0}; 146 static const comp_cost infinite_cost = {INFTY, INFTY}; 147 148 /* The candidate - cost pair. */ 149 struct cost_pair 150 { 151 struct iv_cand *cand; /* The candidate. */ 152 comp_cost cost; /* The cost. */ 153 bitmap depends_on; /* The list of invariants that have to be 154 preserved. */ 155 tree value; /* For final value elimination, the expression for 156 the final value of the iv. For iv elimination, 157 the new bound to compare with. */ 158 }; 159 160 /* Use. */ 161 struct iv_use 162 { 163 unsigned id; /* The id of the use. */ 164 enum use_type type; /* Type of the use. */ 165 struct iv *iv; /* The induction variable it is based on. */ 166 gimple stmt; /* Statement in that it occurs. */ 167 tree *op_p; /* The place where it occurs. */ 168 bitmap related_cands; /* The set of "related" iv candidates, plus the common 169 important ones. */ 170 171 unsigned n_map_members; /* Number of candidates in the cost_map list. */ 172 struct cost_pair *cost_map; 173 /* The costs wrto the iv candidates. */ 174 175 struct iv_cand *selected; 176 /* The selected candidate. */ 177 }; 178 179 /* The position where the iv is computed. */ 180 enum iv_position 181 { 182 IP_NORMAL, /* At the end, just before the exit condition. */ 183 IP_END, /* At the end of the latch block. */ 184 IP_BEFORE_USE, /* Immediately before a specific use. */ 185 IP_AFTER_USE, /* Immediately after a specific use. */ 186 IP_ORIGINAL /* The original biv. */ 187 }; 188 189 /* The induction variable candidate. */ 190 struct iv_cand 191 { 192 unsigned id; /* The number of the candidate. */ 193 bool important; /* Whether this is an "important" candidate, i.e. such 194 that it should be considered by all uses. */ 195 enum iv_position pos; /* Where it is computed. */ 196 gimple incremented_at;/* For original biv, the statement where it is 197 incremented. */ 198 tree var_before; /* The variable used for it before increment. */ 199 tree var_after; /* The variable used for it after increment. */ 200 struct iv *iv; /* The value of the candidate. NULL for 201 "pseudocandidate" used to indicate the possibility 202 to replace the final value of an iv by direct 203 computation of the value. */ 204 unsigned cost; /* Cost of the candidate. */ 205 unsigned cost_step; /* Cost of the candidate's increment operation. */ 206 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place 207 where it is incremented. */ 208 bitmap depends_on; /* The list of invariants that are used in step of the 209 biv. */ 210 }; 211 212 /* The data used by the induction variable optimizations. */ 213 214 typedef struct iv_use *iv_use_p; 215 DEF_VEC_P(iv_use_p); 216 DEF_VEC_ALLOC_P(iv_use_p,heap); 217 218 typedef struct iv_cand *iv_cand_p; 219 DEF_VEC_P(iv_cand_p); 220 DEF_VEC_ALLOC_P(iv_cand_p,heap); 221 222 struct ivopts_data 223 { 224 /* The currently optimized loop. */ 225 struct loop *current_loop; 226 227 /* Numbers of iterations for all exits of the current loop. */ 228 struct pointer_map_t *niters; 229 230 /* Number of registers used in it. */ 231 unsigned regs_used; 232 233 /* The size of version_info array allocated. */ 234 unsigned version_info_size; 235 236 /* The array of information for the ssa names. */ 237 struct version_info *version_info; 238 239 /* The bitmap of indices in version_info whose value was changed. */ 240 bitmap relevant; 241 242 /* The uses of induction variables. */ 243 VEC(iv_use_p,heap) *iv_uses; 244 245 /* The candidates. */ 246 VEC(iv_cand_p,heap) *iv_candidates; 247 248 /* A bitmap of important candidates. */ 249 bitmap important_candidates; 250 251 /* The maximum invariant id. */ 252 unsigned max_inv_id; 253 254 /* Whether to consider just related and important candidates when replacing a 255 use. */ 256 bool consider_all_candidates; 257 258 /* Are we optimizing for speed? */ 259 bool speed; 260 }; 261 262 /* An assignment of iv candidates to uses. */ 263 264 struct iv_ca 265 { 266 /* The number of uses covered by the assignment. */ 267 unsigned upto; 268 269 /* Number of uses that cannot be expressed by the candidates in the set. */ 270 unsigned bad_uses; 271 272 /* Candidate assigned to a use, together with the related costs. */ 273 struct cost_pair **cand_for_use; 274 275 /* Number of times each candidate is used. */ 276 unsigned *n_cand_uses; 277 278 /* The candidates used. */ 279 bitmap cands; 280 281 /* The number of candidates in the set. */ 282 unsigned n_cands; 283 284 /* Total number of registers needed. */ 285 unsigned n_regs; 286 287 /* Total cost of expressing uses. */ 288 comp_cost cand_use_cost; 289 290 /* Total cost of candidates. */ 291 unsigned cand_cost; 292 293 /* Number of times each invariant is used. */ 294 unsigned *n_invariant_uses; 295 296 /* Total cost of the assignment. */ 297 comp_cost cost; 298 }; 299 300 /* Difference of two iv candidate assignments. */ 301 302 struct iv_ca_delta 303 { 304 /* Changed use. */ 305 struct iv_use *use; 306 307 /* An old assignment (for rollback purposes). */ 308 struct cost_pair *old_cp; 309 310 /* A new assignment. */ 311 struct cost_pair *new_cp; 312 313 /* Next change in the list. */ 314 struct iv_ca_delta *next_change; 315 }; 316 317 /* Bound on number of candidates below that all candidates are considered. */ 318 319 #define CONSIDER_ALL_CANDIDATES_BOUND \ 320 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) 321 322 /* If there are more iv occurrences, we just give up (it is quite unlikely that 323 optimizing such a loop would help, and it would take ages). */ 324 325 #define MAX_CONSIDERED_USES \ 326 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) 327 328 /* If there are at most this number of ivs in the set, try removing unnecessary 329 ivs from the set always. */ 330 331 #define ALWAYS_PRUNE_CAND_SET_BOUND \ 332 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) 333 334 /* The list of trees for that the decl_rtl field must be reset is stored 335 here. */ 336 337 static VEC(tree,heap) *decl_rtl_to_reset; 338 339 /* Number of uses recorded in DATA. */ 340 341 static inline unsigned 342 n_iv_uses (struct ivopts_data *data) 343 { 344 return VEC_length (iv_use_p, data->iv_uses); 345 } 346 347 /* Ith use recorded in DATA. */ 348 349 static inline struct iv_use * 350 iv_use (struct ivopts_data *data, unsigned i) 351 { 352 return VEC_index (iv_use_p, data->iv_uses, i); 353 } 354 355 /* Number of candidates recorded in DATA. */ 356 357 static inline unsigned 358 n_iv_cands (struct ivopts_data *data) 359 { 360 return VEC_length (iv_cand_p, data->iv_candidates); 361 } 362 363 /* Ith candidate recorded in DATA. */ 364 365 static inline struct iv_cand * 366 iv_cand (struct ivopts_data *data, unsigned i) 367 { 368 return VEC_index (iv_cand_p, data->iv_candidates, i); 369 } 370 371 /* The single loop exit if it dominates the latch, NULL otherwise. */ 372 373 edge 374 single_dom_exit (struct loop *loop) 375 { 376 edge exit = single_exit (loop); 377 378 if (!exit) 379 return NULL; 380 381 if (!just_once_each_iteration_p (loop, exit->src)) 382 return NULL; 383 384 return exit; 385 } 386 387 /* Dumps information about the induction variable IV to FILE. */ 388 389 extern void dump_iv (FILE *, struct iv *); 390 void 391 dump_iv (FILE *file, struct iv *iv) 392 { 393 if (iv->ssa_name) 394 { 395 fprintf (file, "ssa name "); 396 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 397 fprintf (file, "\n"); 398 } 399 400 fprintf (file, " type "); 401 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 402 fprintf (file, "\n"); 403 404 if (iv->step) 405 { 406 fprintf (file, " base "); 407 print_generic_expr (file, iv->base, TDF_SLIM); 408 fprintf (file, "\n"); 409 410 fprintf (file, " step "); 411 print_generic_expr (file, iv->step, TDF_SLIM); 412 fprintf (file, "\n"); 413 } 414 else 415 { 416 fprintf (file, " invariant "); 417 print_generic_expr (file, iv->base, TDF_SLIM); 418 fprintf (file, "\n"); 419 } 420 421 if (iv->base_object) 422 { 423 fprintf (file, " base object "); 424 print_generic_expr (file, iv->base_object, TDF_SLIM); 425 fprintf (file, "\n"); 426 } 427 428 if (iv->biv_p) 429 fprintf (file, " is a biv\n"); 430 } 431 432 /* Dumps information about the USE to FILE. */ 433 434 extern void dump_use (FILE *, struct iv_use *); 435 void 436 dump_use (FILE *file, struct iv_use *use) 437 { 438 fprintf (file, "use %d\n", use->id); 439 440 switch (use->type) 441 { 442 case USE_NONLINEAR_EXPR: 443 fprintf (file, " generic\n"); 444 break; 445 446 case USE_ADDRESS: 447 fprintf (file, " address\n"); 448 break; 449 450 case USE_COMPARE: 451 fprintf (file, " compare\n"); 452 break; 453 454 default: 455 gcc_unreachable (); 456 } 457 458 fprintf (file, " in statement "); 459 print_gimple_stmt (file, use->stmt, 0, 0); 460 fprintf (file, "\n"); 461 462 fprintf (file, " at position "); 463 if (use->op_p) 464 print_generic_expr (file, *use->op_p, TDF_SLIM); 465 fprintf (file, "\n"); 466 467 dump_iv (file, use->iv); 468 469 if (use->related_cands) 470 { 471 fprintf (file, " related candidates "); 472 dump_bitmap (file, use->related_cands); 473 } 474 } 475 476 /* Dumps information about the uses to FILE. */ 477 478 extern void dump_uses (FILE *, struct ivopts_data *); 479 void 480 dump_uses (FILE *file, struct ivopts_data *data) 481 { 482 unsigned i; 483 struct iv_use *use; 484 485 for (i = 0; i < n_iv_uses (data); i++) 486 { 487 use = iv_use (data, i); 488 489 dump_use (file, use); 490 fprintf (file, "\n"); 491 } 492 } 493 494 /* Dumps information about induction variable candidate CAND to FILE. */ 495 496 extern void dump_cand (FILE *, struct iv_cand *); 497 void 498 dump_cand (FILE *file, struct iv_cand *cand) 499 { 500 struct iv *iv = cand->iv; 501 502 fprintf (file, "candidate %d%s\n", 503 cand->id, cand->important ? " (important)" : ""); 504 505 if (cand->depends_on) 506 { 507 fprintf (file, " depends on "); 508 dump_bitmap (file, cand->depends_on); 509 } 510 511 if (!iv) 512 { 513 fprintf (file, " final value replacement\n"); 514 return; 515 } 516 517 switch (cand->pos) 518 { 519 case IP_NORMAL: 520 fprintf (file, " incremented before exit test\n"); 521 break; 522 523 case IP_BEFORE_USE: 524 fprintf (file, " incremented before use %d\n", cand->ainc_use->id); 525 break; 526 527 case IP_AFTER_USE: 528 fprintf (file, " incremented after use %d\n", cand->ainc_use->id); 529 break; 530 531 case IP_END: 532 fprintf (file, " incremented at end\n"); 533 break; 534 535 case IP_ORIGINAL: 536 fprintf (file, " original biv\n"); 537 break; 538 } 539 540 dump_iv (file, iv); 541 } 542 543 /* Returns the info for ssa version VER. */ 544 545 static inline struct version_info * 546 ver_info (struct ivopts_data *data, unsigned ver) 547 { 548 return data->version_info + ver; 549 } 550 551 /* Returns the info for ssa name NAME. */ 552 553 static inline struct version_info * 554 name_info (struct ivopts_data *data, tree name) 555 { 556 return ver_info (data, SSA_NAME_VERSION (name)); 557 } 558 559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be 560 emitted in LOOP. */ 561 562 static bool 563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt) 564 { 565 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); 566 567 gcc_assert (bb); 568 569 if (sbb == loop->latch) 570 return true; 571 572 if (sbb != bb) 573 return false; 574 575 return stmt == last_stmt (bb); 576 } 577 578 /* Returns true if STMT if after the place where the original induction 579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true 580 if the positions are identical. */ 581 582 static bool 583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal) 584 { 585 basic_block cand_bb = gimple_bb (cand->incremented_at); 586 basic_block stmt_bb = gimple_bb (stmt); 587 588 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 589 return false; 590 591 if (stmt_bb != cand_bb) 592 return true; 593 594 if (true_if_equal 595 && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) 596 return true; 597 return gimple_uid (stmt) > gimple_uid (cand->incremented_at); 598 } 599 600 /* Returns true if STMT if after the place where the induction variable 601 CAND is incremented in LOOP. */ 602 603 static bool 604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt) 605 { 606 switch (cand->pos) 607 { 608 case IP_END: 609 return false; 610 611 case IP_NORMAL: 612 return stmt_after_ip_normal_pos (loop, stmt); 613 614 case IP_ORIGINAL: 615 case IP_AFTER_USE: 616 return stmt_after_inc_pos (cand, stmt, false); 617 618 case IP_BEFORE_USE: 619 return stmt_after_inc_pos (cand, stmt, true); 620 621 default: 622 gcc_unreachable (); 623 } 624 } 625 626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ 627 628 static bool 629 abnormal_ssa_name_p (tree exp) 630 { 631 if (!exp) 632 return false; 633 634 if (TREE_CODE (exp) != SSA_NAME) 635 return false; 636 637 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; 638 } 639 640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an 641 abnormal phi node. Callback for for_each_index. */ 642 643 static bool 644 idx_contains_abnormal_ssa_name_p (tree base, tree *index, 645 void *data ATTRIBUTE_UNUSED) 646 { 647 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 648 { 649 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) 650 return false; 651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) 652 return false; 653 } 654 655 return !abnormal_ssa_name_p (*index); 656 } 657 658 /* Returns true if EXPR contains a ssa name that occurs in an 659 abnormal phi node. */ 660 661 bool 662 contains_abnormal_ssa_name_p (tree expr) 663 { 664 enum tree_code code; 665 enum tree_code_class codeclass; 666 667 if (!expr) 668 return false; 669 670 code = TREE_CODE (expr); 671 codeclass = TREE_CODE_CLASS (code); 672 673 if (code == SSA_NAME) 674 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; 675 676 if (code == INTEGER_CST 677 || is_gimple_min_invariant (expr)) 678 return false; 679 680 if (code == ADDR_EXPR) 681 return !for_each_index (&TREE_OPERAND (expr, 0), 682 idx_contains_abnormal_ssa_name_p, 683 NULL); 684 685 switch (codeclass) 686 { 687 case tcc_binary: 688 case tcc_comparison: 689 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) 690 return true; 691 692 /* Fallthru. */ 693 case tcc_unary: 694 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) 695 return true; 696 697 break; 698 699 default: 700 gcc_unreachable (); 701 } 702 703 return false; 704 } 705 706 /* Returns tree describing number of iterations determined from 707 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 708 709 static tree 710 niter_for_exit (struct ivopts_data *data, edge exit) 711 { 712 struct tree_niter_desc desc; 713 tree niter; 714 void **slot; 715 716 if (!data->niters) 717 { 718 data->niters = pointer_map_create (); 719 slot = NULL; 720 } 721 else 722 slot = pointer_map_contains (data->niters, exit); 723 724 if (!slot) 725 { 726 /* Try to determine number of iterations. We must know it 727 unconditionally (i.e., without possibility of # of iterations 728 being zero). Also, we cannot safely work with ssa names that 729 appear in phi nodes on abnormal edges, so that we do not create 730 overlapping life ranges for them (PR 27283). */ 731 if (number_of_iterations_exit (data->current_loop, 732 exit, &desc, true) 733 && integer_zerop (desc.may_be_zero) 734 && !contains_abnormal_ssa_name_p (desc.niter)) 735 niter = desc.niter; 736 else 737 niter = NULL_TREE; 738 739 *pointer_map_insert (data->niters, exit) = niter; 740 } 741 else 742 niter = (tree) *slot; 743 744 return niter; 745 } 746 747 /* Returns tree describing number of iterations determined from 748 single dominating exit of DATA->current_loop, or NULL if something 749 goes wrong. */ 750 751 static tree 752 niter_for_single_dom_exit (struct ivopts_data *data) 753 { 754 edge exit = single_dom_exit (data->current_loop); 755 756 if (!exit) 757 return NULL; 758 759 return niter_for_exit (data, exit); 760 } 761 762 /* Initializes data structures used by the iv optimization pass, stored 763 in DATA. */ 764 765 static void 766 tree_ssa_iv_optimize_init (struct ivopts_data *data) 767 { 768 data->version_info_size = 2 * num_ssa_names; 769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 770 data->relevant = BITMAP_ALLOC (NULL); 771 data->important_candidates = BITMAP_ALLOC (NULL); 772 data->max_inv_id = 0; 773 data->niters = NULL; 774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20); 775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20); 776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20); 777 } 778 779 /* Returns a memory object to that EXPR points. In case we are able to 780 determine that it does not point to any such object, NULL is returned. */ 781 782 static tree 783 determine_base_object (tree expr) 784 { 785 enum tree_code code = TREE_CODE (expr); 786 tree base, obj; 787 788 /* If this is a pointer casted to any type, we need to determine 789 the base object for the pointer; so handle conversions before 790 throwing away non-pointer expressions. */ 791 if (CONVERT_EXPR_P (expr)) 792 return determine_base_object (TREE_OPERAND (expr, 0)); 793 794 if (!POINTER_TYPE_P (TREE_TYPE (expr))) 795 return NULL_TREE; 796 797 switch (code) 798 { 799 case INTEGER_CST: 800 return NULL_TREE; 801 802 case ADDR_EXPR: 803 obj = TREE_OPERAND (expr, 0); 804 base = get_base_address (obj); 805 806 if (!base) 807 return expr; 808 809 if (TREE_CODE (base) == INDIRECT_REF) 810 return determine_base_object (TREE_OPERAND (base, 0)); 811 812 return fold_convert (ptr_type_node, 813 build_fold_addr_expr (base)); 814 815 case POINTER_PLUS_EXPR: 816 return determine_base_object (TREE_OPERAND (expr, 0)); 817 818 case PLUS_EXPR: 819 case MINUS_EXPR: 820 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */ 821 gcc_unreachable (); 822 823 default: 824 return fold_convert (ptr_type_node, expr); 825 } 826 } 827 828 /* Allocates an induction variable with given initial value BASE and step STEP 829 for loop LOOP. */ 830 831 static struct iv * 832 alloc_iv (tree base, tree step) 833 { 834 struct iv *iv = XCNEW (struct iv); 835 gcc_assert (step != NULL_TREE); 836 837 iv->base = base; 838 iv->base_object = determine_base_object (base); 839 iv->step = step; 840 iv->biv_p = false; 841 iv->have_use_for = false; 842 iv->use_id = 0; 843 iv->ssa_name = NULL_TREE; 844 845 return iv; 846 } 847 848 /* Sets STEP and BASE for induction variable IV. */ 849 850 static void 851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step) 852 { 853 struct version_info *info = name_info (data, iv); 854 855 gcc_assert (!info->iv); 856 857 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 858 info->iv = alloc_iv (base, step); 859 info->iv->ssa_name = iv; 860 } 861 862 /* Finds induction variable declaration for VAR. */ 863 864 static struct iv * 865 get_iv (struct ivopts_data *data, tree var) 866 { 867 basic_block bb; 868 tree type = TREE_TYPE (var); 869 870 if (!POINTER_TYPE_P (type) 871 && !INTEGRAL_TYPE_P (type)) 872 return NULL; 873 874 if (!name_info (data, var)->iv) 875 { 876 bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 877 878 if (!bb 879 || !flow_bb_inside_loop_p (data->current_loop, bb)) 880 set_iv (data, var, var, build_int_cst (type, 0)); 881 } 882 883 return name_info (data, var)->iv; 884 } 885 886 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does 887 not define a simple affine biv with nonzero step. */ 888 889 static tree 890 determine_biv_step (gimple phi) 891 { 892 struct loop *loop = gimple_bb (phi)->loop_father; 893 tree name = PHI_RESULT (phi); 894 affine_iv iv; 895 896 if (!is_gimple_reg (name)) 897 return NULL_TREE; 898 899 if (!simple_iv (loop, loop, name, &iv, true)) 900 return NULL_TREE; 901 902 return integer_zerop (iv.step) ? NULL_TREE : iv.step; 903 } 904 905 /* Finds basic ivs. */ 906 907 static bool 908 find_bivs (struct ivopts_data *data) 909 { 910 gimple phi; 911 tree step, type, base; 912 bool found = false; 913 struct loop *loop = data->current_loop; 914 gimple_stmt_iterator psi; 915 916 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 917 { 918 phi = gsi_stmt (psi); 919 920 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 921 continue; 922 923 step = determine_biv_step (phi); 924 if (!step) 925 continue; 926 927 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 928 base = expand_simple_operations (base); 929 if (contains_abnormal_ssa_name_p (base) 930 || contains_abnormal_ssa_name_p (step)) 931 continue; 932 933 type = TREE_TYPE (PHI_RESULT (phi)); 934 base = fold_convert (type, base); 935 if (step) 936 { 937 if (POINTER_TYPE_P (type)) 938 step = fold_convert (sizetype, step); 939 else 940 step = fold_convert (type, step); 941 } 942 943 set_iv (data, PHI_RESULT (phi), base, step); 944 found = true; 945 } 946 947 return found; 948 } 949 950 /* Marks basic ivs. */ 951 952 static void 953 mark_bivs (struct ivopts_data *data) 954 { 955 gimple phi; 956 tree var; 957 struct iv *iv, *incr_iv; 958 struct loop *loop = data->current_loop; 959 basic_block incr_bb; 960 gimple_stmt_iterator psi; 961 962 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 963 { 964 phi = gsi_stmt (psi); 965 966 iv = get_iv (data, PHI_RESULT (phi)); 967 if (!iv) 968 continue; 969 970 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 971 incr_iv = get_iv (data, var); 972 if (!incr_iv) 973 continue; 974 975 /* If the increment is in the subloop, ignore it. */ 976 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 977 if (incr_bb->loop_father != data->current_loop 978 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 979 continue; 980 981 iv->biv_p = true; 982 incr_iv->biv_p = true; 983 } 984 } 985 986 /* Checks whether STMT defines a linear induction variable and stores its 987 parameters to IV. */ 988 989 static bool 990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv) 991 { 992 tree lhs; 993 struct loop *loop = data->current_loop; 994 995 iv->base = NULL_TREE; 996 iv->step = NULL_TREE; 997 998 if (gimple_code (stmt) != GIMPLE_ASSIGN) 999 return false; 1000 1001 lhs = gimple_assign_lhs (stmt); 1002 if (TREE_CODE (lhs) != SSA_NAME) 1003 return false; 1004 1005 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) 1006 return false; 1007 iv->base = expand_simple_operations (iv->base); 1008 1009 if (contains_abnormal_ssa_name_p (iv->base) 1010 || contains_abnormal_ssa_name_p (iv->step)) 1011 return false; 1012 1013 return true; 1014 } 1015 1016 /* Finds general ivs in statement STMT. */ 1017 1018 static void 1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt) 1020 { 1021 affine_iv iv; 1022 1023 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1024 return; 1025 1026 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step); 1027 } 1028 1029 /* Finds general ivs in basic block BB. */ 1030 1031 static void 1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1033 { 1034 gimple_stmt_iterator bsi; 1035 1036 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1037 find_givs_in_stmt (data, gsi_stmt (bsi)); 1038 } 1039 1040 /* Finds general ivs. */ 1041 1042 static void 1043 find_givs (struct ivopts_data *data) 1044 { 1045 struct loop *loop = data->current_loop; 1046 basic_block *body = get_loop_body_in_dom_order (loop); 1047 unsigned i; 1048 1049 for (i = 0; i < loop->num_nodes; i++) 1050 find_givs_in_bb (data, body[i]); 1051 free (body); 1052 } 1053 1054 /* For each ssa name defined in LOOP determines whether it is an induction 1055 variable and if so, its initial value and step. */ 1056 1057 static bool 1058 find_induction_variables (struct ivopts_data *data) 1059 { 1060 unsigned i; 1061 bitmap_iterator bi; 1062 1063 if (!find_bivs (data)) 1064 return false; 1065 1066 find_givs (data); 1067 mark_bivs (data); 1068 1069 if (dump_file && (dump_flags & TDF_DETAILS)) 1070 { 1071 tree niter = niter_for_single_dom_exit (data); 1072 1073 if (niter) 1074 { 1075 fprintf (dump_file, " number of iterations "); 1076 print_generic_expr (dump_file, niter, TDF_SLIM); 1077 fprintf (dump_file, "\n\n"); 1078 }; 1079 1080 fprintf (dump_file, "Induction variables:\n\n"); 1081 1082 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1083 { 1084 if (ver_info (data, i)->iv) 1085 dump_iv (dump_file, ver_info (data, i)->iv); 1086 } 1087 } 1088 1089 return true; 1090 } 1091 1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */ 1093 1094 static struct iv_use * 1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv, 1096 gimple stmt, enum use_type use_type) 1097 { 1098 struct iv_use *use = XCNEW (struct iv_use); 1099 1100 use->id = n_iv_uses (data); 1101 use->type = use_type; 1102 use->iv = iv; 1103 use->stmt = stmt; 1104 use->op_p = use_p; 1105 use->related_cands = BITMAP_ALLOC (NULL); 1106 1107 /* To avoid showing ssa name in the dumps, if it was not reset by the 1108 caller. */ 1109 iv->ssa_name = NULL_TREE; 1110 1111 if (dump_file && (dump_flags & TDF_DETAILS)) 1112 dump_use (dump_file, use); 1113 1114 VEC_safe_push (iv_use_p, heap, data->iv_uses, use); 1115 1116 return use; 1117 } 1118 1119 /* Checks whether OP is a loop-level invariant and if so, records it. 1120 NONLINEAR_USE is true if the invariant is used in a way we do not 1121 handle specially. */ 1122 1123 static void 1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1125 { 1126 basic_block bb; 1127 struct version_info *info; 1128 1129 if (TREE_CODE (op) != SSA_NAME 1130 || !is_gimple_reg (op)) 1131 return; 1132 1133 bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 1134 if (bb 1135 && flow_bb_inside_loop_p (data->current_loop, bb)) 1136 return; 1137 1138 info = name_info (data, op); 1139 info->name = op; 1140 info->has_nonlin_use |= nonlinear_use; 1141 if (!info->inv_id) 1142 info->inv_id = ++data->max_inv_id; 1143 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1144 } 1145 1146 /* Checks whether the use OP is interesting and if so, records it. */ 1147 1148 static struct iv_use * 1149 find_interesting_uses_op (struct ivopts_data *data, tree op) 1150 { 1151 struct iv *iv; 1152 struct iv *civ; 1153 gimple stmt; 1154 struct iv_use *use; 1155 1156 if (TREE_CODE (op) != SSA_NAME) 1157 return NULL; 1158 1159 iv = get_iv (data, op); 1160 if (!iv) 1161 return NULL; 1162 1163 if (iv->have_use_for) 1164 { 1165 use = iv_use (data, iv->use_id); 1166 1167 gcc_assert (use->type == USE_NONLINEAR_EXPR); 1168 return use; 1169 } 1170 1171 if (integer_zerop (iv->step)) 1172 { 1173 record_invariant (data, op, true); 1174 return NULL; 1175 } 1176 iv->have_use_for = true; 1177 1178 civ = XNEW (struct iv); 1179 *civ = *iv; 1180 1181 stmt = SSA_NAME_DEF_STMT (op); 1182 gcc_assert (gimple_code (stmt) == GIMPLE_PHI 1183 || is_gimple_assign (stmt)); 1184 1185 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR); 1186 iv->use_id = use->id; 1187 1188 return use; 1189 } 1190 1191 /* Given a condition in statement STMT, checks whether it is a compare 1192 of an induction variable and an invariant. If this is the case, 1193 CONTROL_VAR is set to location of the iv, BOUND to the location of 1194 the invariant, IV_VAR and IV_BOUND are set to the corresponding 1195 induction variable descriptions, and true is returned. If this is not 1196 the case, CONTROL_VAR and BOUND are set to the arguments of the 1197 condition and false is returned. */ 1198 1199 static bool 1200 extract_cond_operands (struct ivopts_data *data, gimple stmt, 1201 tree **control_var, tree **bound, 1202 struct iv **iv_var, struct iv **iv_bound) 1203 { 1204 /* The objects returned when COND has constant operands. */ 1205 static struct iv const_iv; 1206 static tree zero; 1207 tree *op0 = &zero, *op1 = &zero, *tmp_op; 1208 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv; 1209 bool ret = false; 1210 1211 if (gimple_code (stmt) == GIMPLE_COND) 1212 { 1213 op0 = gimple_cond_lhs_ptr (stmt); 1214 op1 = gimple_cond_rhs_ptr (stmt); 1215 } 1216 else 1217 { 1218 op0 = gimple_assign_rhs1_ptr (stmt); 1219 op1 = gimple_assign_rhs2_ptr (stmt); 1220 } 1221 1222 zero = integer_zero_node; 1223 const_iv.step = integer_zero_node; 1224 1225 if (TREE_CODE (*op0) == SSA_NAME) 1226 iv0 = get_iv (data, *op0); 1227 if (TREE_CODE (*op1) == SSA_NAME) 1228 iv1 = get_iv (data, *op1); 1229 1230 /* Exactly one of the compared values must be an iv, and the other one must 1231 be an invariant. */ 1232 if (!iv0 || !iv1) 1233 goto end; 1234 1235 if (integer_zerop (iv0->step)) 1236 { 1237 /* Control variable may be on the other side. */ 1238 tmp_op = op0; op0 = op1; op1 = tmp_op; 1239 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv; 1240 } 1241 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step); 1242 1243 end: 1244 if (control_var) 1245 *control_var = op0;; 1246 if (iv_var) 1247 *iv_var = iv0;; 1248 if (bound) 1249 *bound = op1; 1250 if (iv_bound) 1251 *iv_bound = iv1; 1252 1253 return ret; 1254 } 1255 1256 /* Checks whether the condition in STMT is interesting and if so, 1257 records it. */ 1258 1259 static void 1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt) 1261 { 1262 tree *var_p, *bound_p; 1263 struct iv *var_iv, *civ; 1264 1265 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL)) 1266 { 1267 find_interesting_uses_op (data, *var_p); 1268 find_interesting_uses_op (data, *bound_p); 1269 return; 1270 } 1271 1272 civ = XNEW (struct iv); 1273 *civ = *var_iv; 1274 record_use (data, NULL, civ, stmt, USE_COMPARE); 1275 } 1276 1277 /* Returns true if expression EXPR is obviously invariant in LOOP, 1278 i.e. if all its operands are defined outside of the LOOP. LOOP 1279 should not be the function body. */ 1280 1281 bool 1282 expr_invariant_in_loop_p (struct loop *loop, tree expr) 1283 { 1284 basic_block def_bb; 1285 unsigned i, len; 1286 1287 gcc_assert (loop_depth (loop) > 0); 1288 1289 if (is_gimple_min_invariant (expr)) 1290 return true; 1291 1292 if (TREE_CODE (expr) == SSA_NAME) 1293 { 1294 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1295 if (def_bb 1296 && flow_bb_inside_loop_p (loop, def_bb)) 1297 return false; 1298 1299 return true; 1300 } 1301 1302 if (!EXPR_P (expr)) 1303 return false; 1304 1305 len = TREE_OPERAND_LENGTH (expr); 1306 for (i = 0; i < len; i++) 1307 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1308 return false; 1309 1310 return true; 1311 } 1312 1313 /* Returns true if statement STMT is obviously invariant in LOOP, 1314 i.e. if all its operands on the RHS are defined outside of the LOOP. 1315 LOOP should not be the function body. */ 1316 1317 bool 1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt) 1319 { 1320 unsigned i; 1321 tree lhs; 1322 1323 gcc_assert (loop_depth (loop) > 0); 1324 1325 lhs = gimple_get_lhs (stmt); 1326 for (i = 0; i < gimple_num_ops (stmt); i++) 1327 { 1328 tree op = gimple_op (stmt, i); 1329 if (op != lhs && !expr_invariant_in_loop_p (loop, op)) 1330 return false; 1331 } 1332 1333 return true; 1334 } 1335 1336 /* Cumulates the steps of indices into DATA and replaces their values with the 1337 initial ones. Returns false when the value of the index cannot be determined. 1338 Callback for for_each_index. */ 1339 1340 struct ifs_ivopts_data 1341 { 1342 struct ivopts_data *ivopts_data; 1343 gimple stmt; 1344 tree step; 1345 }; 1346 1347 static bool 1348 idx_find_step (tree base, tree *idx, void *data) 1349 { 1350 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; 1351 struct iv *iv; 1352 tree step, iv_base, iv_step, lbound, off; 1353 struct loop *loop = dta->ivopts_data->current_loop; 1354 1355 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF 1356 || TREE_CODE (base) == ALIGN_INDIRECT_REF) 1357 return false; 1358 1359 /* If base is a component ref, require that the offset of the reference 1360 be invariant. */ 1361 if (TREE_CODE (base) == COMPONENT_REF) 1362 { 1363 off = component_ref_field_offset (base); 1364 return expr_invariant_in_loop_p (loop, off); 1365 } 1366 1367 /* If base is array, first check whether we will be able to move the 1368 reference out of the loop (in order to take its address in strength 1369 reduction). In order for this to work we need both lower bound 1370 and step to be loop invariants. */ 1371 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1372 { 1373 /* Moreover, for a range, the size needs to be invariant as well. */ 1374 if (TREE_CODE (base) == ARRAY_RANGE_REF 1375 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) 1376 return false; 1377 1378 step = array_ref_element_size (base); 1379 lbound = array_ref_low_bound (base); 1380 1381 if (!expr_invariant_in_loop_p (loop, step) 1382 || !expr_invariant_in_loop_p (loop, lbound)) 1383 return false; 1384 } 1385 1386 if (TREE_CODE (*idx) != SSA_NAME) 1387 return true; 1388 1389 iv = get_iv (dta->ivopts_data, *idx); 1390 if (!iv) 1391 return false; 1392 1393 /* XXX We produce for a base of *D42 with iv->base being &x[0] 1394 *&x[0], which is not folded and does not trigger the 1395 ARRAY_REF path below. */ 1396 *idx = iv->base; 1397 1398 if (integer_zerop (iv->step)) 1399 return true; 1400 1401 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1402 { 1403 step = array_ref_element_size (base); 1404 1405 /* We only handle addresses whose step is an integer constant. */ 1406 if (TREE_CODE (step) != INTEGER_CST) 1407 return false; 1408 } 1409 else 1410 /* The step for pointer arithmetics already is 1 byte. */ 1411 step = build_int_cst (sizetype, 1); 1412 1413 iv_base = iv->base; 1414 iv_step = iv->step; 1415 if (!convert_affine_scev (dta->ivopts_data->current_loop, 1416 sizetype, &iv_base, &iv_step, dta->stmt, 1417 false)) 1418 { 1419 /* The index might wrap. */ 1420 return false; 1421 } 1422 1423 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 1424 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); 1425 1426 return true; 1427 } 1428 1429 /* Records use in index IDX. Callback for for_each_index. Ivopts data 1430 object is passed to it in DATA. */ 1431 1432 static bool 1433 idx_record_use (tree base, tree *idx, 1434 void *vdata) 1435 { 1436 struct ivopts_data *data = (struct ivopts_data *) vdata; 1437 find_interesting_uses_op (data, *idx); 1438 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1439 { 1440 find_interesting_uses_op (data, array_ref_element_size (base)); 1441 find_interesting_uses_op (data, array_ref_low_bound (base)); 1442 } 1443 return true; 1444 } 1445 1446 /* If we can prove that TOP = cst * BOT for some constant cst, 1447 store cst to MUL and return true. Otherwise return false. 1448 The returned value is always sign-extended, regardless of the 1449 signedness of TOP and BOT. */ 1450 1451 static bool 1452 constant_multiple_of (tree top, tree bot, double_int *mul) 1453 { 1454 tree mby; 1455 enum tree_code code; 1456 double_int res, p0, p1; 1457 unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); 1458 1459 STRIP_NOPS (top); 1460 STRIP_NOPS (bot); 1461 1462 if (operand_equal_p (top, bot, 0)) 1463 { 1464 *mul = double_int_one; 1465 return true; 1466 } 1467 1468 code = TREE_CODE (top); 1469 switch (code) 1470 { 1471 case MULT_EXPR: 1472 mby = TREE_OPERAND (top, 1); 1473 if (TREE_CODE (mby) != INTEGER_CST) 1474 return false; 1475 1476 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) 1477 return false; 1478 1479 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)), 1480 precision); 1481 return true; 1482 1483 case PLUS_EXPR: 1484 case MINUS_EXPR: 1485 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) 1486 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) 1487 return false; 1488 1489 if (code == MINUS_EXPR) 1490 p1 = double_int_neg (p1); 1491 *mul = double_int_sext (double_int_add (p0, p1), precision); 1492 return true; 1493 1494 case INTEGER_CST: 1495 if (TREE_CODE (bot) != INTEGER_CST) 1496 return false; 1497 1498 p0 = double_int_sext (tree_to_double_int (top), precision); 1499 p1 = double_int_sext (tree_to_double_int (bot), precision); 1500 if (double_int_zero_p (p1)) 1501 return false; 1502 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res), 1503 precision); 1504 return double_int_zero_p (res); 1505 1506 default: 1507 return false; 1508 } 1509 } 1510 1511 /* Returns true if memory reference REF with step STEP may be unaligned. */ 1512 1513 static bool 1514 may_be_unaligned_p (tree ref, tree step) 1515 { 1516 tree base; 1517 tree base_type; 1518 HOST_WIDE_INT bitsize; 1519 HOST_WIDE_INT bitpos; 1520 tree toffset; 1521 enum machine_mode mode; 1522 int unsignedp, volatilep; 1523 unsigned base_align; 1524 1525 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 1526 thus they are not misaligned. */ 1527 if (TREE_CODE (ref) == TARGET_MEM_REF) 1528 return false; 1529 1530 /* The test below is basically copy of what expr.c:normal_inner_ref 1531 does to check whether the object must be loaded by parts when 1532 STRICT_ALIGNMENT is true. */ 1533 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode, 1534 &unsignedp, &volatilep, true); 1535 base_type = TREE_TYPE (base); 1536 base_align = TYPE_ALIGN (base_type); 1537 1538 if (mode != BLKmode) 1539 { 1540 unsigned mode_align = GET_MODE_ALIGNMENT (mode); 1541 1542 if (base_align < mode_align 1543 || (bitpos % mode_align) != 0 1544 || (bitpos % BITS_PER_UNIT) != 0) 1545 return true; 1546 1547 if (toffset 1548 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align) 1549 return true; 1550 1551 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align) 1552 return true; 1553 } 1554 1555 return false; 1556 } 1557 1558 /* Return true if EXPR may be non-addressable. */ 1559 1560 bool 1561 may_be_nonaddressable_p (tree expr) 1562 { 1563 switch (TREE_CODE (expr)) 1564 { 1565 case TARGET_MEM_REF: 1566 /* TARGET_MEM_REFs are translated directly to valid MEMs on the 1567 target, thus they are always addressable. */ 1568 return false; 1569 1570 case COMPONENT_REF: 1571 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) 1572 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 1573 1574 case VIEW_CONVERT_EXPR: 1575 /* This kind of view-conversions may wrap non-addressable objects 1576 and make them look addressable. After some processing the 1577 non-addressability may be uncovered again, causing ADDR_EXPRs 1578 of inappropriate objects to be built. */ 1579 if (is_gimple_reg (TREE_OPERAND (expr, 0)) 1580 || !is_gimple_addressable (TREE_OPERAND (expr, 0))) 1581 return true; 1582 1583 /* ... fall through ... */ 1584 1585 case ARRAY_REF: 1586 case ARRAY_RANGE_REF: 1587 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 1588 1589 CASE_CONVERT: 1590 return true; 1591 1592 default: 1593 break; 1594 } 1595 1596 return false; 1597 } 1598 1599 /* Finds addresses in *OP_P inside STMT. */ 1600 1601 static void 1602 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p) 1603 { 1604 tree base = *op_p, step = build_int_cst (sizetype, 0); 1605 struct iv *civ; 1606 struct ifs_ivopts_data ifs_ivopts_data; 1607 1608 /* Do not play with volatile memory references. A bit too conservative, 1609 perhaps, but safe. */ 1610 if (gimple_has_volatile_ops (stmt)) 1611 goto fail; 1612 1613 /* Ignore bitfields for now. Not really something terribly complicated 1614 to handle. TODO. */ 1615 if (TREE_CODE (base) == BIT_FIELD_REF) 1616 goto fail; 1617 1618 base = unshare_expr (base); 1619 1620 if (TREE_CODE (base) == TARGET_MEM_REF) 1621 { 1622 tree type = build_pointer_type (TREE_TYPE (base)); 1623 tree astep; 1624 1625 if (TMR_BASE (base) 1626 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 1627 { 1628 civ = get_iv (data, TMR_BASE (base)); 1629 if (!civ) 1630 goto fail; 1631 1632 TMR_BASE (base) = civ->base; 1633 step = civ->step; 1634 } 1635 if (TMR_INDEX (base) 1636 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 1637 { 1638 civ = get_iv (data, TMR_INDEX (base)); 1639 if (!civ) 1640 goto fail; 1641 1642 TMR_INDEX (base) = civ->base; 1643 astep = civ->step; 1644 1645 if (astep) 1646 { 1647 if (TMR_STEP (base)) 1648 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 1649 1650 step = fold_build2 (PLUS_EXPR, type, step, astep); 1651 } 1652 } 1653 1654 if (integer_zerop (step)) 1655 goto fail; 1656 base = tree_mem_ref_addr (type, base); 1657 } 1658 else 1659 { 1660 ifs_ivopts_data.ivopts_data = data; 1661 ifs_ivopts_data.stmt = stmt; 1662 ifs_ivopts_data.step = build_int_cst (sizetype, 0); 1663 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 1664 || integer_zerop (ifs_ivopts_data.step)) 1665 goto fail; 1666 step = ifs_ivopts_data.step; 1667 1668 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF); 1669 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF); 1670 1671 /* Check that the base expression is addressable. This needs 1672 to be done after substituting bases of IVs into it. */ 1673 if (may_be_nonaddressable_p (base)) 1674 goto fail; 1675 1676 /* Moreover, on strict alignment platforms, check that it is 1677 sufficiently aligned. */ 1678 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) 1679 goto fail; 1680 1681 base = build_fold_addr_expr (base); 1682 1683 /* Substituting bases of IVs into the base expression might 1684 have caused folding opportunities. */ 1685 if (TREE_CODE (base) == ADDR_EXPR) 1686 { 1687 tree *ref = &TREE_OPERAND (base, 0); 1688 while (handled_component_p (*ref)) 1689 ref = &TREE_OPERAND (*ref, 0); 1690 if (TREE_CODE (*ref) == INDIRECT_REF) 1691 { 1692 tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0)); 1693 if (tem) 1694 *ref = tem; 1695 } 1696 } 1697 } 1698 1699 civ = alloc_iv (base, step); 1700 record_use (data, op_p, civ, stmt, USE_ADDRESS); 1701 return; 1702 1703 fail: 1704 for_each_index (op_p, idx_record_use, data); 1705 } 1706 1707 /* Finds and records invariants used in STMT. */ 1708 1709 static void 1710 find_invariants_stmt (struct ivopts_data *data, gimple stmt) 1711 { 1712 ssa_op_iter iter; 1713 use_operand_p use_p; 1714 tree op; 1715 1716 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 1717 { 1718 op = USE_FROM_PTR (use_p); 1719 record_invariant (data, op, false); 1720 } 1721 } 1722 1723 /* Finds interesting uses of induction variables in the statement STMT. */ 1724 1725 static void 1726 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt) 1727 { 1728 struct iv *iv; 1729 tree op, *lhs, *rhs; 1730 ssa_op_iter iter; 1731 use_operand_p use_p; 1732 enum tree_code code; 1733 1734 find_invariants_stmt (data, stmt); 1735 1736 if (gimple_code (stmt) == GIMPLE_COND) 1737 { 1738 find_interesting_uses_cond (data, stmt); 1739 return; 1740 } 1741 1742 if (is_gimple_assign (stmt)) 1743 { 1744 lhs = gimple_assign_lhs_ptr (stmt); 1745 rhs = gimple_assign_rhs1_ptr (stmt); 1746 1747 if (TREE_CODE (*lhs) == SSA_NAME) 1748 { 1749 /* If the statement defines an induction variable, the uses are not 1750 interesting by themselves. */ 1751 1752 iv = get_iv (data, *lhs); 1753 1754 if (iv && !integer_zerop (iv->step)) 1755 return; 1756 } 1757 1758 code = gimple_assign_rhs_code (stmt); 1759 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS 1760 && (REFERENCE_CLASS_P (*rhs) 1761 || is_gimple_val (*rhs))) 1762 { 1763 if (REFERENCE_CLASS_P (*rhs)) 1764 find_interesting_uses_address (data, stmt, rhs); 1765 else 1766 find_interesting_uses_op (data, *rhs); 1767 1768 if (REFERENCE_CLASS_P (*lhs)) 1769 find_interesting_uses_address (data, stmt, lhs); 1770 return; 1771 } 1772 else if (TREE_CODE_CLASS (code) == tcc_comparison) 1773 { 1774 find_interesting_uses_cond (data, stmt); 1775 return; 1776 } 1777 1778 /* TODO -- we should also handle address uses of type 1779 1780 memory = call (whatever); 1781 1782 and 1783 1784 call (memory). */ 1785 } 1786 1787 if (gimple_code (stmt) == GIMPLE_PHI 1788 && gimple_bb (stmt) == data->current_loop->header) 1789 { 1790 iv = get_iv (data, PHI_RESULT (stmt)); 1791 1792 if (iv && !integer_zerop (iv->step)) 1793 return; 1794 } 1795 1796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 1797 { 1798 op = USE_FROM_PTR (use_p); 1799 1800 if (TREE_CODE (op) != SSA_NAME) 1801 continue; 1802 1803 iv = get_iv (data, op); 1804 if (!iv) 1805 continue; 1806 1807 find_interesting_uses_op (data, op); 1808 } 1809 } 1810 1811 /* Finds interesting uses of induction variables outside of loops 1812 on loop exit edge EXIT. */ 1813 1814 static void 1815 find_interesting_uses_outside (struct ivopts_data *data, edge exit) 1816 { 1817 gimple phi; 1818 gimple_stmt_iterator psi; 1819 tree def; 1820 1821 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) 1822 { 1823 phi = gsi_stmt (psi); 1824 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 1825 if (is_gimple_reg (def)) 1826 find_interesting_uses_op (data, def); 1827 } 1828 } 1829 1830 /* Finds uses of the induction variables that are interesting. */ 1831 1832 static void 1833 find_interesting_uses (struct ivopts_data *data) 1834 { 1835 basic_block bb; 1836 gimple_stmt_iterator bsi; 1837 basic_block *body = get_loop_body (data->current_loop); 1838 unsigned i; 1839 struct version_info *info; 1840 edge e; 1841 1842 if (dump_file && (dump_flags & TDF_DETAILS)) 1843 fprintf (dump_file, "Uses:\n\n"); 1844 1845 for (i = 0; i < data->current_loop->num_nodes; i++) 1846 { 1847 edge_iterator ei; 1848 bb = body[i]; 1849 1850 FOR_EACH_EDGE (e, ei, bb->succs) 1851 if (e->dest != EXIT_BLOCK_PTR 1852 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 1853 find_interesting_uses_outside (data, e); 1854 1855 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1856 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 1857 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1858 if (!is_gimple_debug (gsi_stmt (bsi))) 1859 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 1860 } 1861 1862 if (dump_file && (dump_flags & TDF_DETAILS)) 1863 { 1864 bitmap_iterator bi; 1865 1866 fprintf (dump_file, "\n"); 1867 1868 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1869 { 1870 info = ver_info (data, i); 1871 if (info->inv_id) 1872 { 1873 fprintf (dump_file, " "); 1874 print_generic_expr (dump_file, info->name, TDF_SLIM); 1875 fprintf (dump_file, " is invariant (%d)%s\n", 1876 info->inv_id, info->has_nonlin_use ? "" : ", eliminable"); 1877 } 1878 } 1879 1880 fprintf (dump_file, "\n"); 1881 } 1882 1883 free (body); 1884 } 1885 1886 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 1887 is true, assume we are inside an address. If TOP_COMPREF is true, assume 1888 we are at the top-level of the processed address. */ 1889 1890 static tree 1891 strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 1892 unsigned HOST_WIDE_INT *offset) 1893 { 1894 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 1895 enum tree_code code; 1896 tree type, orig_type = TREE_TYPE (expr); 1897 unsigned HOST_WIDE_INT off0, off1, st; 1898 tree orig_expr = expr; 1899 1900 STRIP_NOPS (expr); 1901 1902 type = TREE_TYPE (expr); 1903 code = TREE_CODE (expr); 1904 *offset = 0; 1905 1906 switch (code) 1907 { 1908 case INTEGER_CST: 1909 if (!cst_and_fits_in_hwi (expr) 1910 || integer_zerop (expr)) 1911 return orig_expr; 1912 1913 *offset = int_cst_value (expr); 1914 return build_int_cst (orig_type, 0); 1915 1916 case POINTER_PLUS_EXPR: 1917 case PLUS_EXPR: 1918 case MINUS_EXPR: 1919 op0 = TREE_OPERAND (expr, 0); 1920 op1 = TREE_OPERAND (expr, 1); 1921 1922 op0 = strip_offset_1 (op0, false, false, &off0); 1923 op1 = strip_offset_1 (op1, false, false, &off1); 1924 1925 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); 1926 if (op0 == TREE_OPERAND (expr, 0) 1927 && op1 == TREE_OPERAND (expr, 1)) 1928 return orig_expr; 1929 1930 if (integer_zerop (op1)) 1931 expr = op0; 1932 else if (integer_zerop (op0)) 1933 { 1934 if (code == MINUS_EXPR) 1935 expr = fold_build1 (NEGATE_EXPR, type, op1); 1936 else 1937 expr = op1; 1938 } 1939 else 1940 expr = fold_build2 (code, type, op0, op1); 1941 1942 return fold_convert (orig_type, expr); 1943 1944 case MULT_EXPR: 1945 op1 = TREE_OPERAND (expr, 1); 1946 if (!cst_and_fits_in_hwi (op1)) 1947 return orig_expr; 1948 1949 op0 = TREE_OPERAND (expr, 0); 1950 op0 = strip_offset_1 (op0, false, false, &off0); 1951 if (op0 == TREE_OPERAND (expr, 0)) 1952 return orig_expr; 1953 1954 *offset = off0 * int_cst_value (op1); 1955 if (integer_zerop (op0)) 1956 expr = op0; 1957 else 1958 expr = fold_build2 (MULT_EXPR, type, op0, op1); 1959 1960 return fold_convert (orig_type, expr); 1961 1962 case ARRAY_REF: 1963 case ARRAY_RANGE_REF: 1964 if (!inside_addr) 1965 return orig_expr; 1966 1967 step = array_ref_element_size (expr); 1968 if (!cst_and_fits_in_hwi (step)) 1969 break; 1970 1971 st = int_cst_value (step); 1972 op1 = TREE_OPERAND (expr, 1); 1973 op1 = strip_offset_1 (op1, false, false, &off1); 1974 *offset = off1 * st; 1975 1976 if (top_compref 1977 && integer_zerop (op1)) 1978 { 1979 /* Strip the component reference completely. */ 1980 op0 = TREE_OPERAND (expr, 0); 1981 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 1982 *offset += off0; 1983 return op0; 1984 } 1985 break; 1986 1987 case COMPONENT_REF: 1988 if (!inside_addr) 1989 return orig_expr; 1990 1991 tmp = component_ref_field_offset (expr); 1992 if (top_compref 1993 && cst_and_fits_in_hwi (tmp)) 1994 { 1995 /* Strip the component reference completely. */ 1996 op0 = TREE_OPERAND (expr, 0); 1997 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 1998 *offset = off0 + int_cst_value (tmp); 1999 return op0; 2000 } 2001 break; 2002 2003 case ADDR_EXPR: 2004 op0 = TREE_OPERAND (expr, 0); 2005 op0 = strip_offset_1 (op0, true, true, &off0); 2006 *offset += off0; 2007 2008 if (op0 == TREE_OPERAND (expr, 0)) 2009 return orig_expr; 2010 2011 expr = build_fold_addr_expr (op0); 2012 return fold_convert (orig_type, expr); 2013 2014 case INDIRECT_REF: 2015 inside_addr = false; 2016 break; 2017 2018 default: 2019 return orig_expr; 2020 } 2021 2022 /* Default handling of expressions for that we want to recurse into 2023 the first operand. */ 2024 op0 = TREE_OPERAND (expr, 0); 2025 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 2026 *offset += off0; 2027 2028 if (op0 == TREE_OPERAND (expr, 0) 2029 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 2030 return orig_expr; 2031 2032 expr = copy_node (expr); 2033 TREE_OPERAND (expr, 0) = op0; 2034 if (op1) 2035 TREE_OPERAND (expr, 1) = op1; 2036 2037 /* Inside address, we might strip the top level component references, 2038 thus changing type of the expression. Handling of ADDR_EXPR 2039 will fix that. */ 2040 expr = fold_convert (orig_type, expr); 2041 2042 return expr; 2043 } 2044 2045 /* Strips constant offsets from EXPR and stores them to OFFSET. */ 2046 2047 static tree 2048 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset) 2049 { 2050 return strip_offset_1 (expr, false, false, offset); 2051 } 2052 2053 /* Returns variant of TYPE that can be used as base for different uses. 2054 We return unsigned type with the same precision, which avoids problems 2055 with overflows. */ 2056 2057 static tree 2058 generic_type_for (tree type) 2059 { 2060 if (POINTER_TYPE_P (type)) 2061 return unsigned_type_for (type); 2062 2063 if (TYPE_UNSIGNED (type)) 2064 return type; 2065 2066 return unsigned_type_for (type); 2067 } 2068 2069 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains 2070 the bitmap to that we should store it. */ 2071 2072 static struct ivopts_data *fd_ivopts_data; 2073 static tree 2074 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 2075 { 2076 bitmap *depends_on = (bitmap *) data; 2077 struct version_info *info; 2078 2079 if (TREE_CODE (*expr_p) != SSA_NAME) 2080 return NULL_TREE; 2081 info = name_info (fd_ivopts_data, *expr_p); 2082 2083 if (!info->inv_id || info->has_nonlin_use) 2084 return NULL_TREE; 2085 2086 if (!*depends_on) 2087 *depends_on = BITMAP_ALLOC (NULL); 2088 bitmap_set_bit (*depends_on, info->inv_id); 2089 2090 return NULL_TREE; 2091 } 2092 2093 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2094 position to POS. If USE is not NULL, the candidate is set as related to 2095 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 2096 replacement of the final value of the iv by a direct computation. */ 2097 2098 static struct iv_cand * 2099 add_candidate_1 (struct ivopts_data *data, 2100 tree base, tree step, bool important, enum iv_position pos, 2101 struct iv_use *use, gimple incremented_at) 2102 { 2103 unsigned i; 2104 struct iv_cand *cand = NULL; 2105 tree type, orig_type; 2106 2107 if (base) 2108 { 2109 orig_type = TREE_TYPE (base); 2110 type = generic_type_for (orig_type); 2111 if (type != orig_type) 2112 { 2113 base = fold_convert (type, base); 2114 step = fold_convert (type, step); 2115 } 2116 } 2117 2118 for (i = 0; i < n_iv_cands (data); i++) 2119 { 2120 cand = iv_cand (data, i); 2121 2122 if (cand->pos != pos) 2123 continue; 2124 2125 if (cand->incremented_at != incremented_at 2126 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2127 && cand->ainc_use != use)) 2128 continue; 2129 2130 if (!cand->iv) 2131 { 2132 if (!base && !step) 2133 break; 2134 2135 continue; 2136 } 2137 2138 if (!base && !step) 2139 continue; 2140 2141 if (operand_equal_p (base, cand->iv->base, 0) 2142 && operand_equal_p (step, cand->iv->step, 0)) 2143 break; 2144 } 2145 2146 if (i == n_iv_cands (data)) 2147 { 2148 cand = XCNEW (struct iv_cand); 2149 cand->id = i; 2150 2151 if (!base && !step) 2152 cand->iv = NULL; 2153 else 2154 cand->iv = alloc_iv (base, step); 2155 2156 cand->pos = pos; 2157 if (pos != IP_ORIGINAL && cand->iv) 2158 { 2159 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 2160 cand->var_after = cand->var_before; 2161 } 2162 cand->important = important; 2163 cand->incremented_at = incremented_at; 2164 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand); 2165 2166 if (step 2167 && TREE_CODE (step) != INTEGER_CST) 2168 { 2169 fd_ivopts_data = data; 2170 walk_tree (&step, find_depends, &cand->depends_on, NULL); 2171 } 2172 2173 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2174 cand->ainc_use = use; 2175 else 2176 cand->ainc_use = NULL; 2177 2178 if (dump_file && (dump_flags & TDF_DETAILS)) 2179 dump_cand (dump_file, cand); 2180 } 2181 2182 if (important && !cand->important) 2183 { 2184 cand->important = true; 2185 if (dump_file && (dump_flags & TDF_DETAILS)) 2186 fprintf (dump_file, "Candidate %d is important\n", cand->id); 2187 } 2188 2189 if (use) 2190 { 2191 bitmap_set_bit (use->related_cands, i); 2192 if (dump_file && (dump_flags & TDF_DETAILS)) 2193 fprintf (dump_file, "Candidate %d is related to use %d\n", 2194 cand->id, use->id); 2195 } 2196 2197 return cand; 2198 } 2199 2200 /* Returns true if incrementing the induction variable at the end of the LOOP 2201 is allowed. 2202 2203 The purpose is to avoid splitting latch edge with a biv increment, thus 2204 creating a jump, possibly confusing other optimization passes and leaving 2205 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS 2206 is not available (so we do not have a better alternative), or if the latch 2207 edge is already nonempty. */ 2208 2209 static bool 2210 allow_ip_end_pos_p (struct loop *loop) 2211 { 2212 if (!ip_normal_pos (loop)) 2213 return true; 2214 2215 if (!empty_block_p (ip_end_pos (loop))) 2216 return true; 2217 2218 return false; 2219 } 2220 2221 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. 2222 Important field is set to IMPORTANT. */ 2223 2224 static void 2225 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, 2226 bool important, struct iv_use *use) 2227 { 2228 basic_block use_bb = gimple_bb (use->stmt); 2229 enum machine_mode mem_mode; 2230 unsigned HOST_WIDE_INT cstepi; 2231 2232 /* If we insert the increment in any position other than the standard 2233 ones, we must ensure that it is incremented once per iteration. 2234 It must not be in an inner nested loop, or one side of an if 2235 statement. */ 2236 if (use_bb->loop_father != data->current_loop 2237 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) 2238 || stmt_could_throw_p (use->stmt) 2239 || !cst_and_fits_in_hwi (step)) 2240 return; 2241 2242 cstepi = int_cst_value (step); 2243 2244 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); 2245 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi) 2246 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi)) 2247 { 2248 enum tree_code code = MINUS_EXPR; 2249 tree new_base; 2250 tree new_step = step; 2251 2252 if (POINTER_TYPE_P (TREE_TYPE (base))) 2253 { 2254 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 2255 code = POINTER_PLUS_EXPR; 2256 } 2257 else 2258 new_step = fold_convert (TREE_TYPE (base), new_step); 2259 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); 2260 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, 2261 use->stmt); 2262 } 2263 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi) 2264 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi)) 2265 { 2266 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, 2267 use->stmt); 2268 } 2269 } 2270 2271 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2272 position to POS. If USE is not NULL, the candidate is set as related to 2273 it. The candidate computation is scheduled on all available positions. */ 2274 2275 static void 2276 add_candidate (struct ivopts_data *data, 2277 tree base, tree step, bool important, struct iv_use *use) 2278 { 2279 if (ip_normal_pos (data->current_loop)) 2280 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL); 2281 if (ip_end_pos (data->current_loop) 2282 && allow_ip_end_pos_p (data->current_loop)) 2283 add_candidate_1 (data, base, step, important, IP_END, use, NULL); 2284 2285 if (use != NULL && use->type == USE_ADDRESS) 2286 add_autoinc_candidates (data, base, step, important, use); 2287 } 2288 2289 /* Add a standard "0 + 1 * iteration" iv candidate for a 2290 type with SIZE bits. */ 2291 2292 static void 2293 add_standard_iv_candidates_for_size (struct ivopts_data *data, 2294 unsigned int size) 2295 { 2296 tree type = lang_hooks.types.type_for_size (size, true); 2297 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1), 2298 true, NULL); 2299 } 2300 2301 /* Adds standard iv candidates. */ 2302 2303 static void 2304 add_standard_iv_candidates (struct ivopts_data *data) 2305 { 2306 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE); 2307 2308 /* The same for a double-integer type if it is still fast enough. */ 2309 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2) 2310 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2); 2311 } 2312 2313 2314 /* Adds candidates bases on the old induction variable IV. */ 2315 2316 static void 2317 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv) 2318 { 2319 gimple phi; 2320 tree def; 2321 struct iv_cand *cand; 2322 2323 add_candidate (data, iv->base, iv->step, true, NULL); 2324 2325 /* The same, but with initial value zero. */ 2326 if (POINTER_TYPE_P (TREE_TYPE (iv->base))) 2327 add_candidate (data, size_int (0), iv->step, true, NULL); 2328 else 2329 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 2330 iv->step, true, NULL); 2331 2332 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 2333 if (gimple_code (phi) == GIMPLE_PHI) 2334 { 2335 /* Additionally record the possibility of leaving the original iv 2336 untouched. */ 2337 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 2338 cand = add_candidate_1 (data, 2339 iv->base, iv->step, true, IP_ORIGINAL, NULL, 2340 SSA_NAME_DEF_STMT (def)); 2341 cand->var_before = iv->ssa_name; 2342 cand->var_after = def; 2343 } 2344 } 2345 2346 /* Adds candidates based on the old induction variables. */ 2347 2348 static void 2349 add_old_ivs_candidates (struct ivopts_data *data) 2350 { 2351 unsigned i; 2352 struct iv *iv; 2353 bitmap_iterator bi; 2354 2355 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2356 { 2357 iv = ver_info (data, i)->iv; 2358 if (iv && iv->biv_p && !integer_zerop (iv->step)) 2359 add_old_iv_candidates (data, iv); 2360 } 2361 } 2362 2363 /* Adds candidates based on the value of the induction variable IV and USE. */ 2364 2365 static void 2366 add_iv_value_candidates (struct ivopts_data *data, 2367 struct iv *iv, struct iv_use *use) 2368 { 2369 unsigned HOST_WIDE_INT offset; 2370 tree base; 2371 tree basetype; 2372 2373 add_candidate (data, iv->base, iv->step, false, use); 2374 2375 /* The same, but with initial value zero. Make such variable important, 2376 since it is generic enough so that possibly many uses may be based 2377 on it. */ 2378 basetype = TREE_TYPE (iv->base); 2379 if (POINTER_TYPE_P (basetype)) 2380 basetype = sizetype; 2381 add_candidate (data, build_int_cst (basetype, 0), 2382 iv->step, true, use); 2383 2384 /* Third, try removing the constant offset. Make sure to even 2385 add a candidate for &a[0] vs. (T *)&a. */ 2386 base = strip_offset (iv->base, &offset); 2387 if (offset 2388 || base != iv->base) 2389 add_candidate (data, base, iv->step, false, use); 2390 } 2391 2392 /* Adds candidates based on the uses. */ 2393 2394 static void 2395 add_derived_ivs_candidates (struct ivopts_data *data) 2396 { 2397 unsigned i; 2398 2399 for (i = 0; i < n_iv_uses (data); i++) 2400 { 2401 struct iv_use *use = iv_use (data, i); 2402 2403 if (!use) 2404 continue; 2405 2406 switch (use->type) 2407 { 2408 case USE_NONLINEAR_EXPR: 2409 case USE_COMPARE: 2410 case USE_ADDRESS: 2411 /* Just add the ivs based on the value of the iv used here. */ 2412 add_iv_value_candidates (data, use->iv, use); 2413 break; 2414 2415 default: 2416 gcc_unreachable (); 2417 } 2418 } 2419 } 2420 2421 /* Record important candidates and add them to related_cands bitmaps 2422 if needed. */ 2423 2424 static void 2425 record_important_candidates (struct ivopts_data *data) 2426 { 2427 unsigned i; 2428 struct iv_use *use; 2429 2430 for (i = 0; i < n_iv_cands (data); i++) 2431 { 2432 struct iv_cand *cand = iv_cand (data, i); 2433 2434 if (cand->important) 2435 bitmap_set_bit (data->important_candidates, i); 2436 } 2437 2438 data->consider_all_candidates = (n_iv_cands (data) 2439 <= CONSIDER_ALL_CANDIDATES_BOUND); 2440 2441 if (data->consider_all_candidates) 2442 { 2443 /* We will not need "related_cands" bitmaps in this case, 2444 so release them to decrease peak memory consumption. */ 2445 for (i = 0; i < n_iv_uses (data); i++) 2446 { 2447 use = iv_use (data, i); 2448 BITMAP_FREE (use->related_cands); 2449 } 2450 } 2451 else 2452 { 2453 /* Add important candidates to the related_cands bitmaps. */ 2454 for (i = 0; i < n_iv_uses (data); i++) 2455 bitmap_ior_into (iv_use (data, i)->related_cands, 2456 data->important_candidates); 2457 } 2458 } 2459 2460 /* Allocates the data structure mapping the (use, candidate) pairs to costs. 2461 If consider_all_candidates is true, we use a two-dimensional array, otherwise 2462 we allocate a simple list to every use. */ 2463 2464 static void 2465 alloc_use_cost_map (struct ivopts_data *data) 2466 { 2467 unsigned i, size, s, j; 2468 2469 for (i = 0; i < n_iv_uses (data); i++) 2470 { 2471 struct iv_use *use = iv_use (data, i); 2472 bitmap_iterator bi; 2473 2474 if (data->consider_all_candidates) 2475 size = n_iv_cands (data); 2476 else 2477 { 2478 s = 0; 2479 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi) 2480 { 2481 s++; 2482 } 2483 2484 /* Round up to the power of two, so that moduling by it is fast. */ 2485 for (size = 1; size < s; size <<= 1) 2486 continue; 2487 } 2488 2489 use->n_map_members = size; 2490 use->cost_map = XCNEWVEC (struct cost_pair, size); 2491 } 2492 } 2493 2494 /* Returns description of computation cost of expression whose runtime 2495 cost is RUNTIME and complexity corresponds to COMPLEXITY. */ 2496 2497 static comp_cost 2498 new_cost (unsigned runtime, unsigned complexity) 2499 { 2500 comp_cost cost; 2501 2502 cost.cost = runtime; 2503 cost.complexity = complexity; 2504 2505 return cost; 2506 } 2507 2508 /* Adds costs COST1 and COST2. */ 2509 2510 static comp_cost 2511 add_costs (comp_cost cost1, comp_cost cost2) 2512 { 2513 cost1.cost += cost2.cost; 2514 cost1.complexity += cost2.complexity; 2515 2516 return cost1; 2517 } 2518 /* Subtracts costs COST1 and COST2. */ 2519 2520 static comp_cost 2521 sub_costs (comp_cost cost1, comp_cost cost2) 2522 { 2523 cost1.cost -= cost2.cost; 2524 cost1.complexity -= cost2.complexity; 2525 2526 return cost1; 2527 } 2528 2529 /* Returns a negative number if COST1 < COST2, a positive number if 2530 COST1 > COST2, and 0 if COST1 = COST2. */ 2531 2532 static int 2533 compare_costs (comp_cost cost1, comp_cost cost2) 2534 { 2535 if (cost1.cost == cost2.cost) 2536 return cost1.complexity - cost2.complexity; 2537 2538 return cost1.cost - cost2.cost; 2539 } 2540 2541 /* Returns true if COST is infinite. */ 2542 2543 static bool 2544 infinite_cost_p (comp_cost cost) 2545 { 2546 return cost.cost == INFTY; 2547 } 2548 2549 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends 2550 on invariants DEPENDS_ON and that the value used in expressing it 2551 is VALUE. */ 2552 2553 static void 2554 set_use_iv_cost (struct ivopts_data *data, 2555 struct iv_use *use, struct iv_cand *cand, 2556 comp_cost cost, bitmap depends_on, tree value) 2557 { 2558 unsigned i, s; 2559 2560 if (infinite_cost_p (cost)) 2561 { 2562 BITMAP_FREE (depends_on); 2563 return; 2564 } 2565 2566 if (data->consider_all_candidates) 2567 { 2568 use->cost_map[cand->id].cand = cand; 2569 use->cost_map[cand->id].cost = cost; 2570 use->cost_map[cand->id].depends_on = depends_on; 2571 use->cost_map[cand->id].value = value; 2572 return; 2573 } 2574 2575 /* n_map_members is a power of two, so this computes modulo. */ 2576 s = cand->id & (use->n_map_members - 1); 2577 for (i = s; i < use->n_map_members; i++) 2578 if (!use->cost_map[i].cand) 2579 goto found; 2580 for (i = 0; i < s; i++) 2581 if (!use->cost_map[i].cand) 2582 goto found; 2583 2584 gcc_unreachable (); 2585 2586 found: 2587 use->cost_map[i].cand = cand; 2588 use->cost_map[i].cost = cost; 2589 use->cost_map[i].depends_on = depends_on; 2590 use->cost_map[i].value = value; 2591 } 2592 2593 /* Gets cost of (USE, CANDIDATE) pair. */ 2594 2595 static struct cost_pair * 2596 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use, 2597 struct iv_cand *cand) 2598 { 2599 unsigned i, s; 2600 struct cost_pair *ret; 2601 2602 if (!cand) 2603 return NULL; 2604 2605 if (data->consider_all_candidates) 2606 { 2607 ret = use->cost_map + cand->id; 2608 if (!ret->cand) 2609 return NULL; 2610 2611 return ret; 2612 } 2613 2614 /* n_map_members is a power of two, so this computes modulo. */ 2615 s = cand->id & (use->n_map_members - 1); 2616 for (i = s; i < use->n_map_members; i++) 2617 if (use->cost_map[i].cand == cand) 2618 return use->cost_map + i; 2619 2620 for (i = 0; i < s; i++) 2621 if (use->cost_map[i].cand == cand) 2622 return use->cost_map + i; 2623 2624 return NULL; 2625 } 2626 2627 /* Returns estimate on cost of computing SEQ. */ 2628 2629 static unsigned 2630 seq_cost (rtx seq, bool speed) 2631 { 2632 unsigned cost = 0; 2633 rtx set; 2634 2635 for (; seq; seq = NEXT_INSN (seq)) 2636 { 2637 set = single_set (seq); 2638 if (set) 2639 cost += rtx_cost (set, SET,speed); 2640 else 2641 cost++; 2642 } 2643 2644 return cost; 2645 } 2646 2647 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 2648 static rtx 2649 produce_memory_decl_rtl (tree obj, int *regno) 2650 { 2651 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); 2652 enum machine_mode address_mode = targetm.addr_space.address_mode (as); 2653 rtx x; 2654 2655 gcc_assert (obj); 2656 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 2657 { 2658 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 2659 x = gen_rtx_SYMBOL_REF (address_mode, name); 2660 SET_SYMBOL_REF_DECL (x, obj); 2661 x = gen_rtx_MEM (DECL_MODE (obj), x); 2662 set_mem_addr_space (x, as); 2663 targetm.encode_section_info (obj, x, true); 2664 } 2665 else 2666 { 2667 x = gen_raw_REG (address_mode, (*regno)++); 2668 x = gen_rtx_MEM (DECL_MODE (obj), x); 2669 set_mem_addr_space (x, as); 2670 } 2671 2672 return x; 2673 } 2674 2675 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 2676 walk_tree. DATA contains the actual fake register number. */ 2677 2678 static tree 2679 prepare_decl_rtl (tree *expr_p, int *ws, void *data) 2680 { 2681 tree obj = NULL_TREE; 2682 rtx x = NULL_RTX; 2683 int *regno = (int *) data; 2684 2685 switch (TREE_CODE (*expr_p)) 2686 { 2687 case ADDR_EXPR: 2688 for (expr_p = &TREE_OPERAND (*expr_p, 0); 2689 handled_component_p (*expr_p); 2690 expr_p = &TREE_OPERAND (*expr_p, 0)) 2691 continue; 2692 obj = *expr_p; 2693 if (DECL_P (obj) && !DECL_RTL_SET_P (obj)) 2694 x = produce_memory_decl_rtl (obj, regno); 2695 break; 2696 2697 case SSA_NAME: 2698 *ws = 0; 2699 obj = SSA_NAME_VAR (*expr_p); 2700 if (!DECL_RTL_SET_P (obj)) 2701 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2702 break; 2703 2704 case VAR_DECL: 2705 case PARM_DECL: 2706 case RESULT_DECL: 2707 *ws = 0; 2708 obj = *expr_p; 2709 2710 if (DECL_RTL_SET_P (obj)) 2711 break; 2712 2713 if (DECL_MODE (obj) == BLKmode) 2714 x = produce_memory_decl_rtl (obj, regno); 2715 else 2716 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2717 2718 break; 2719 2720 default: 2721 break; 2722 } 2723 2724 if (x) 2725 { 2726 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj); 2727 SET_DECL_RTL (obj, x); 2728 } 2729 2730 return NULL_TREE; 2731 } 2732 2733 /* Determines cost of the computation of EXPR. */ 2734 2735 static unsigned 2736 computation_cost (tree expr, bool speed) 2737 { 2738 rtx seq, rslt; 2739 tree type = TREE_TYPE (expr); 2740 unsigned cost; 2741 /* Avoid using hard regs in ways which may be unsupported. */ 2742 int regno = LAST_VIRTUAL_REGISTER + 1; 2743 enum function_frequency real_frequency = cfun->function_frequency; 2744 2745 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL; 2746 crtl->maybe_hot_insn_p = speed; 2747 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 2748 start_sequence (); 2749 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 2750 seq = get_insns (); 2751 end_sequence (); 2752 default_rtl_profile (); 2753 cfun->function_frequency = real_frequency; 2754 2755 cost = seq_cost (seq, speed); 2756 if (MEM_P (rslt)) 2757 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), 2758 TYPE_ADDR_SPACE (type), speed); 2759 2760 return cost; 2761 } 2762 2763 /* Returns variable containing the value of candidate CAND at statement AT. */ 2764 2765 static tree 2766 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt) 2767 { 2768 if (stmt_after_increment (loop, cand, stmt)) 2769 return cand->var_after; 2770 else 2771 return cand->var_before; 2772 } 2773 2774 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb, 2775 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */ 2776 2777 int 2778 tree_int_cst_sign_bit (const_tree t) 2779 { 2780 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1; 2781 unsigned HOST_WIDE_INT w; 2782 2783 if (bitno < HOST_BITS_PER_WIDE_INT) 2784 w = TREE_INT_CST_LOW (t); 2785 else 2786 { 2787 w = TREE_INT_CST_HIGH (t); 2788 bitno -= HOST_BITS_PER_WIDE_INT; 2789 } 2790 2791 return (w >> bitno) & 1; 2792 } 2793 2794 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the 2795 same precision that is at least as wide as the precision of TYPE, stores 2796 BA to A and BB to B, and returns the type of BA. Otherwise, returns the 2797 type of A and B. */ 2798 2799 static tree 2800 determine_common_wider_type (tree *a, tree *b) 2801 { 2802 tree wider_type = NULL; 2803 tree suba, subb; 2804 tree atype = TREE_TYPE (*a); 2805 2806 if (CONVERT_EXPR_P (*a)) 2807 { 2808 suba = TREE_OPERAND (*a, 0); 2809 wider_type = TREE_TYPE (suba); 2810 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) 2811 return atype; 2812 } 2813 else 2814 return atype; 2815 2816 if (CONVERT_EXPR_P (*b)) 2817 { 2818 subb = TREE_OPERAND (*b, 0); 2819 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) 2820 return atype; 2821 } 2822 else 2823 return atype; 2824 2825 *a = suba; 2826 *b = subb; 2827 return wider_type; 2828 } 2829 2830 /* Determines the expression by that USE is expressed from induction variable 2831 CAND at statement AT in LOOP. The expression is stored in a decomposed 2832 form into AFF. Returns false if USE cannot be expressed using CAND. */ 2833 2834 static bool 2835 get_computation_aff (struct loop *loop, 2836 struct iv_use *use, struct iv_cand *cand, gimple at, 2837 struct affine_tree_combination *aff) 2838 { 2839 tree ubase = use->iv->base; 2840 tree ustep = use->iv->step; 2841 tree cbase = cand->iv->base; 2842 tree cstep = cand->iv->step, cstep_common; 2843 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 2844 tree common_type, var; 2845 tree uutype; 2846 aff_tree cbase_aff, var_aff; 2847 double_int rat; 2848 2849 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 2850 { 2851 /* We do not have a precision to express the values of use. */ 2852 return false; 2853 } 2854 2855 var = var_at_stmt (loop, cand, at); 2856 uutype = unsigned_type_for (utype); 2857 2858 /* If the conversion is not noop, perform it. */ 2859 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 2860 { 2861 cstep = fold_convert (uutype, cstep); 2862 cbase = fold_convert (uutype, cbase); 2863 var = fold_convert (uutype, var); 2864 } 2865 2866 if (!constant_multiple_of (ustep, cstep, &rat)) 2867 return false; 2868 2869 /* In case both UBASE and CBASE are shortened to UUTYPE from some common 2870 type, we achieve better folding by computing their difference in this 2871 wider type, and cast the result to UUTYPE. We do not need to worry about 2872 overflows, as all the arithmetics will in the end be performed in UUTYPE 2873 anyway. */ 2874 common_type = determine_common_wider_type (&ubase, &cbase); 2875 2876 /* use = ubase - ratio * cbase + ratio * var. */ 2877 tree_to_aff_combination (ubase, common_type, aff); 2878 tree_to_aff_combination (cbase, common_type, &cbase_aff); 2879 tree_to_aff_combination (var, uutype, &var_aff); 2880 2881 /* We need to shift the value if we are after the increment. */ 2882 if (stmt_after_increment (loop, cand, at)) 2883 { 2884 aff_tree cstep_aff; 2885 2886 if (common_type != uutype) 2887 cstep_common = fold_convert (common_type, cstep); 2888 else 2889 cstep_common = cstep; 2890 2891 tree_to_aff_combination (cstep_common, common_type, &cstep_aff); 2892 aff_combination_add (&cbase_aff, &cstep_aff); 2893 } 2894 2895 aff_combination_scale (&cbase_aff, double_int_neg (rat)); 2896 aff_combination_add (aff, &cbase_aff); 2897 if (common_type != uutype) 2898 aff_combination_convert (aff, uutype); 2899 2900 aff_combination_scale (&var_aff, rat); 2901 aff_combination_add (aff, &var_aff); 2902 2903 return true; 2904 } 2905 2906 /* Determines the expression by that USE is expressed from induction variable 2907 CAND at statement AT in LOOP. The computation is unshared. */ 2908 2909 static tree 2910 get_computation_at (struct loop *loop, 2911 struct iv_use *use, struct iv_cand *cand, gimple at) 2912 { 2913 aff_tree aff; 2914 tree type = TREE_TYPE (use->iv->base); 2915 2916 if (!get_computation_aff (loop, use, cand, at, &aff)) 2917 return NULL_TREE; 2918 unshare_aff_combination (&aff); 2919 return fold_convert (type, aff_combination_to_tree (&aff)); 2920 } 2921 2922 /* Determines the expression by that USE is expressed from induction variable 2923 CAND in LOOP. The computation is unshared. */ 2924 2925 static tree 2926 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) 2927 { 2928 return get_computation_at (loop, use, cand, use->stmt); 2929 } 2930 2931 /* Returns cost of addition in MODE. */ 2932 2933 static unsigned 2934 add_cost (enum machine_mode mode, bool speed) 2935 { 2936 static unsigned costs[NUM_MACHINE_MODES]; 2937 rtx seq; 2938 unsigned cost; 2939 2940 if (costs[mode]) 2941 return costs[mode]; 2942 2943 start_sequence (); 2944 force_operand (gen_rtx_fmt_ee (PLUS, mode, 2945 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), 2946 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)), 2947 NULL_RTX); 2948 seq = get_insns (); 2949 end_sequence (); 2950 2951 cost = seq_cost (seq, speed); 2952 if (!cost) 2953 cost = 1; 2954 2955 costs[mode] = cost; 2956 2957 if (dump_file && (dump_flags & TDF_DETAILS)) 2958 fprintf (dump_file, "Addition in %s costs %d\n", 2959 GET_MODE_NAME (mode), cost); 2960 return cost; 2961 } 2962 2963 /* Entry in a hashtable of already known costs for multiplication. */ 2964 struct mbc_entry 2965 { 2966 HOST_WIDE_INT cst; /* The constant to multiply by. */ 2967 enum machine_mode mode; /* In mode. */ 2968 unsigned cost; /* The cost. */ 2969 }; 2970 2971 /* Counts hash value for the ENTRY. */ 2972 2973 static hashval_t 2974 mbc_entry_hash (const void *entry) 2975 { 2976 const struct mbc_entry *e = (const struct mbc_entry *) entry; 2977 2978 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877); 2979 } 2980 2981 /* Compares the hash table entries ENTRY1 and ENTRY2. */ 2982 2983 static int 2984 mbc_entry_eq (const void *entry1, const void *entry2) 2985 { 2986 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1; 2987 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2; 2988 2989 return (e1->mode == e2->mode 2990 && e1->cst == e2->cst); 2991 } 2992 2993 /* Returns cost of multiplication by constant CST in MODE. */ 2994 2995 unsigned 2996 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed) 2997 { 2998 static htab_t costs; 2999 struct mbc_entry **cached, act; 3000 rtx seq; 3001 unsigned cost; 3002 3003 if (!costs) 3004 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free); 3005 3006 act.mode = mode; 3007 act.cst = cst; 3008 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT); 3009 if (*cached) 3010 return (*cached)->cost; 3011 3012 *cached = XNEW (struct mbc_entry); 3013 (*cached)->mode = mode; 3014 (*cached)->cst = cst; 3015 3016 start_sequence (); 3017 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), 3018 gen_int_mode (cst, mode), NULL_RTX, 0); 3019 seq = get_insns (); 3020 end_sequence (); 3021 3022 cost = seq_cost (seq, speed); 3023 3024 if (dump_file && (dump_flags & TDF_DETAILS)) 3025 fprintf (dump_file, "Multiplication by %d in %s costs %d\n", 3026 (int) cst, GET_MODE_NAME (mode), cost); 3027 3028 (*cached)->cost = cost; 3029 3030 return cost; 3031 } 3032 3033 /* Returns true if multiplying by RATIO is allowed in an address. Test the 3034 validity for a memory reference accessing memory of mode MODE in 3035 address space AS. */ 3036 3037 DEF_VEC_P (sbitmap); 3038 DEF_VEC_ALLOC_P (sbitmap, heap); 3039 3040 bool 3041 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode, 3042 addr_space_t as) 3043 { 3044 #define MAX_RATIO 128 3045 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode; 3046 static VEC (sbitmap, heap) *valid_mult_list; 3047 sbitmap valid_mult; 3048 3049 if (data_index >= VEC_length (sbitmap, valid_mult_list)) 3050 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1); 3051 3052 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index); 3053 if (!valid_mult) 3054 { 3055 enum machine_mode address_mode = targetm.addr_space.address_mode (as); 3056 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3057 rtx addr; 3058 HOST_WIDE_INT i; 3059 3060 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1); 3061 sbitmap_zero (valid_mult); 3062 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX); 3063 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3064 { 3065 XEXP (addr, 1) = gen_int_mode (i, address_mode); 3066 if (memory_address_addr_space_p (mode, addr, as)) 3067 SET_BIT (valid_mult, i + MAX_RATIO); 3068 } 3069 3070 if (dump_file && (dump_flags & TDF_DETAILS)) 3071 { 3072 fprintf (dump_file, " allowed multipliers:"); 3073 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3074 if (TEST_BIT (valid_mult, i + MAX_RATIO)) 3075 fprintf (dump_file, " %d", (int) i); 3076 fprintf (dump_file, "\n"); 3077 fprintf (dump_file, "\n"); 3078 } 3079 3080 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult); 3081 } 3082 3083 if (ratio > MAX_RATIO || ratio < -MAX_RATIO) 3084 return false; 3085 3086 return TEST_BIT (valid_mult, ratio + MAX_RATIO); 3087 } 3088 3089 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index. 3090 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false, 3091 variable is omitted. Compute the cost for a memory reference that accesses 3092 a memory location of mode MEM_MODE in address space AS. 3093 3094 MAY_AUTOINC is set to true if the autoincrement (increasing index by 3095 size of MEM_MODE / RATIO) is available. To make this determination, we 3096 look at the size of the increment to be made, which is given in CSTEP. 3097 CSTEP may be zero if the step is unknown. 3098 STMT_AFTER_INC is true iff the statement we're looking at is after the 3099 increment of the original biv. 3100 3101 TODO -- there must be some better way. This all is quite crude. */ 3102 3103 typedef struct 3104 { 3105 HOST_WIDE_INT min_offset, max_offset; 3106 unsigned costs[2][2][2][2]; 3107 } *address_cost_data; 3108 3109 DEF_VEC_P (address_cost_data); 3110 DEF_VEC_ALLOC_P (address_cost_data, heap); 3111 3112 static comp_cost 3113 get_address_cost (bool symbol_present, bool var_present, 3114 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio, 3115 HOST_WIDE_INT cstep, enum machine_mode mem_mode, 3116 addr_space_t as, bool speed, 3117 bool stmt_after_inc, bool *may_autoinc) 3118 { 3119 enum machine_mode address_mode = targetm.addr_space.address_mode (as); 3120 static VEC(address_cost_data, heap) *address_cost_data_list; 3121 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode; 3122 address_cost_data data; 3123 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE]; 3124 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE]; 3125 unsigned cost, acost, complexity; 3126 bool offset_p, ratio_p, autoinc; 3127 HOST_WIDE_INT s_offset, autoinc_offset, msize; 3128 unsigned HOST_WIDE_INT mask; 3129 unsigned bits; 3130 3131 if (data_index >= VEC_length (address_cost_data, address_cost_data_list)) 3132 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list, 3133 data_index + 1); 3134 3135 data = VEC_index (address_cost_data, address_cost_data_list, data_index); 3136 if (!data) 3137 { 3138 HOST_WIDE_INT i; 3139 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT; 3140 HOST_WIDE_INT rat, off; 3141 int old_cse_not_expected; 3142 unsigned sym_p, var_p, off_p, rat_p, add_c; 3143 rtx seq, addr, base; 3144 rtx reg0, reg1; 3145 3146 data = (address_cost_data) xcalloc (1, sizeof (*data)); 3147 3148 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3149 3150 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); 3151 for (i = start; i <= 1 << 20; i <<= 1) 3152 { 3153 XEXP (addr, 1) = gen_int_mode (i, address_mode); 3154 if (!memory_address_addr_space_p (mem_mode, addr, as)) 3155 break; 3156 } 3157 data->max_offset = i == start ? 0 : i >> 1; 3158 off = data->max_offset; 3159 3160 for (i = start; i <= 1 << 20; i <<= 1) 3161 { 3162 XEXP (addr, 1) = gen_int_mode (-i, address_mode); 3163 if (!memory_address_addr_space_p (mem_mode, addr, as)) 3164 break; 3165 } 3166 data->min_offset = i == start ? 0 : -(i >> 1); 3167 3168 if (dump_file && (dump_flags & TDF_DETAILS)) 3169 { 3170 fprintf (dump_file, "get_address_cost:\n"); 3171 fprintf (dump_file, " min offset %s %d\n", 3172 GET_MODE_NAME (mem_mode), 3173 (int) data->min_offset); 3174 fprintf (dump_file, " max offset %s %d\n", 3175 GET_MODE_NAME (mem_mode), 3176 (int) data->max_offset); 3177 } 3178 3179 rat = 1; 3180 for (i = 2; i <= MAX_RATIO; i++) 3181 if (multiplier_allowed_in_address_p (i, mem_mode, as)) 3182 { 3183 rat = i; 3184 break; 3185 } 3186 3187 /* Compute the cost of various addressing modes. */ 3188 acost = 0; 3189 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3190 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); 3191 3192 if (HAVE_PRE_DECREMENT) 3193 { 3194 addr = gen_rtx_PRE_DEC (address_mode, reg0); 3195 has_predec[mem_mode] 3196 = memory_address_addr_space_p (mem_mode, addr, as); 3197 } 3198 if (HAVE_POST_DECREMENT) 3199 { 3200 addr = gen_rtx_POST_DEC (address_mode, reg0); 3201 has_postdec[mem_mode] 3202 = memory_address_addr_space_p (mem_mode, addr, as); 3203 } 3204 if (HAVE_PRE_INCREMENT) 3205 { 3206 addr = gen_rtx_PRE_INC (address_mode, reg0); 3207 has_preinc[mem_mode] 3208 = memory_address_addr_space_p (mem_mode, addr, as); 3209 } 3210 if (HAVE_POST_INCREMENT) 3211 { 3212 addr = gen_rtx_POST_INC (address_mode, reg0); 3213 has_postinc[mem_mode] 3214 = memory_address_addr_space_p (mem_mode, addr, as); 3215 } 3216 for (i = 0; i < 16; i++) 3217 { 3218 sym_p = i & 1; 3219 var_p = (i >> 1) & 1; 3220 off_p = (i >> 2) & 1; 3221 rat_p = (i >> 3) & 1; 3222 3223 addr = reg0; 3224 if (rat_p) 3225 addr = gen_rtx_fmt_ee (MULT, address_mode, addr, 3226 gen_int_mode (rat, address_mode)); 3227 3228 if (var_p) 3229 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1); 3230 3231 if (sym_p) 3232 { 3233 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup ("")); 3234 /* ??? We can run into trouble with some backends by presenting 3235 it with symbols which haven't been properly passed through 3236 targetm.encode_section_info. By setting the local bit, we 3237 enhance the probability of things working. */ 3238 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL; 3239 3240 if (off_p) 3241 base = gen_rtx_fmt_e (CONST, address_mode, 3242 gen_rtx_fmt_ee 3243 (PLUS, address_mode, base, 3244 gen_int_mode (off, address_mode))); 3245 } 3246 else if (off_p) 3247 base = gen_int_mode (off, address_mode); 3248 else 3249 base = NULL_RTX; 3250 3251 if (base) 3252 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base); 3253 3254 start_sequence (); 3255 /* To avoid splitting addressing modes, pretend that no cse will 3256 follow. */ 3257 old_cse_not_expected = cse_not_expected; 3258 cse_not_expected = true; 3259 addr = memory_address_addr_space (mem_mode, addr, as); 3260 cse_not_expected = old_cse_not_expected; 3261 seq = get_insns (); 3262 end_sequence (); 3263 3264 acost = seq_cost (seq, speed); 3265 acost += address_cost (addr, mem_mode, as, speed); 3266 3267 if (!acost) 3268 acost = 1; 3269 data->costs[sym_p][var_p][off_p][rat_p] = acost; 3270 } 3271 3272 /* On some targets, it is quite expensive to load symbol to a register, 3273 which makes addresses that contain symbols look much more expensive. 3274 However, the symbol will have to be loaded in any case before the 3275 loop (and quite likely we have it in register already), so it does not 3276 make much sense to penalize them too heavily. So make some final 3277 tweaks for the SYMBOL_PRESENT modes: 3278 3279 If VAR_PRESENT is false, and the mode obtained by changing symbol to 3280 var is cheaper, use this mode with small penalty. 3281 If VAR_PRESENT is true, try whether the mode with 3282 SYMBOL_PRESENT = false is cheaper even with cost of addition, and 3283 if this is the case, use it. */ 3284 add_c = add_cost (address_mode, speed); 3285 for (i = 0; i < 8; i++) 3286 { 3287 var_p = i & 1; 3288 off_p = (i >> 1) & 1; 3289 rat_p = (i >> 2) & 1; 3290 3291 acost = data->costs[0][1][off_p][rat_p] + 1; 3292 if (var_p) 3293 acost += add_c; 3294 3295 if (acost < data->costs[1][var_p][off_p][rat_p]) 3296 data->costs[1][var_p][off_p][rat_p] = acost; 3297 } 3298 3299 if (dump_file && (dump_flags & TDF_DETAILS)) 3300 { 3301 fprintf (dump_file, "Address costs:\n"); 3302 3303 for (i = 0; i < 16; i++) 3304 { 3305 sym_p = i & 1; 3306 var_p = (i >> 1) & 1; 3307 off_p = (i >> 2) & 1; 3308 rat_p = (i >> 3) & 1; 3309 3310 fprintf (dump_file, " "); 3311 if (sym_p) 3312 fprintf (dump_file, "sym + "); 3313 if (var_p) 3314 fprintf (dump_file, "var + "); 3315 if (off_p) 3316 fprintf (dump_file, "cst + "); 3317 if (rat_p) 3318 fprintf (dump_file, "rat * "); 3319 3320 acost = data->costs[sym_p][var_p][off_p][rat_p]; 3321 fprintf (dump_file, "index costs %d\n", acost); 3322 } 3323 if (has_predec[mem_mode] || has_postdec[mem_mode] 3324 || has_preinc[mem_mode] || has_postinc[mem_mode]) 3325 fprintf (dump_file, " May include autoinc/dec\n"); 3326 fprintf (dump_file, "\n"); 3327 } 3328 3329 VEC_replace (address_cost_data, address_cost_data_list, 3330 data_index, data); 3331 } 3332 3333 bits = GET_MODE_BITSIZE (address_mode); 3334 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1); 3335 offset &= mask; 3336 if ((offset >> (bits - 1) & 1)) 3337 offset |= ~mask; 3338 s_offset = offset; 3339 3340 autoinc = false; 3341 msize = GET_MODE_SIZE (mem_mode); 3342 autoinc_offset = offset; 3343 if (stmt_after_inc) 3344 autoinc_offset += ratio * cstep; 3345 if (symbol_present || var_present || ratio != 1) 3346 autoinc = false; 3347 else if ((has_postinc[mem_mode] && autoinc_offset == 0 3348 && msize == cstep) 3349 || (has_postdec[mem_mode] && autoinc_offset == 0 3350 && msize == -cstep) 3351 || (has_preinc[mem_mode] && autoinc_offset == msize 3352 && msize == cstep) 3353 || (has_predec[mem_mode] && autoinc_offset == -msize 3354 && msize == -cstep)) 3355 autoinc = true; 3356 3357 cost = 0; 3358 offset_p = (s_offset != 0 3359 && data->min_offset <= s_offset 3360 && s_offset <= data->max_offset); 3361 ratio_p = (ratio != 1 3362 && multiplier_allowed_in_address_p (ratio, mem_mode, as)); 3363 3364 if (ratio != 1 && !ratio_p) 3365 cost += multiply_by_cost (ratio, address_mode, speed); 3366 3367 if (s_offset && !offset_p && !symbol_present) 3368 cost += add_cost (address_mode, speed); 3369 3370 if (may_autoinc) 3371 *may_autoinc = autoinc; 3372 acost = data->costs[symbol_present][var_present][offset_p][ratio_p]; 3373 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p; 3374 return new_cost (cost + acost, complexity); 3375 } 3376 3377 /* Estimates cost of forcing expression EXPR into a variable. */ 3378 3379 static comp_cost 3380 force_expr_to_var_cost (tree expr, bool speed) 3381 { 3382 static bool costs_initialized = false; 3383 static unsigned integer_cost [2]; 3384 static unsigned symbol_cost [2]; 3385 static unsigned address_cost [2]; 3386 tree op0, op1; 3387 comp_cost cost0, cost1, cost; 3388 enum machine_mode mode; 3389 3390 if (!costs_initialized) 3391 { 3392 tree type = build_pointer_type (integer_type_node); 3393 tree var, addr; 3394 rtx x; 3395 int i; 3396 3397 var = create_tmp_var_raw (integer_type_node, "test_var"); 3398 TREE_STATIC (var) = 1; 3399 x = produce_memory_decl_rtl (var, NULL); 3400 SET_DECL_RTL (var, x); 3401 3402 addr = build1 (ADDR_EXPR, type, var); 3403 3404 3405 for (i = 0; i < 2; i++) 3406 { 3407 integer_cost[i] = computation_cost (build_int_cst (integer_type_node, 3408 2000), i); 3409 3410 symbol_cost[i] = computation_cost (addr, i) + 1; 3411 3412 address_cost[i] 3413 = computation_cost (build2 (POINTER_PLUS_EXPR, type, 3414 addr, 3415 build_int_cst (sizetype, 2000)), i) + 1; 3416 if (dump_file && (dump_flags & TDF_DETAILS)) 3417 { 3418 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); 3419 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); 3420 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); 3421 fprintf (dump_file, " address %d\n", (int) address_cost[i]); 3422 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); 3423 fprintf (dump_file, "\n"); 3424 } 3425 } 3426 3427 costs_initialized = true; 3428 } 3429 3430 STRIP_NOPS (expr); 3431 3432 if (SSA_VAR_P (expr)) 3433 return zero_cost; 3434 3435 if (is_gimple_min_invariant (expr)) 3436 { 3437 if (TREE_CODE (expr) == INTEGER_CST) 3438 return new_cost (integer_cost [speed], 0); 3439 3440 if (TREE_CODE (expr) == ADDR_EXPR) 3441 { 3442 tree obj = TREE_OPERAND (expr, 0); 3443 3444 if (TREE_CODE (obj) == VAR_DECL 3445 || TREE_CODE (obj) == PARM_DECL 3446 || TREE_CODE (obj) == RESULT_DECL) 3447 return new_cost (symbol_cost [speed], 0); 3448 } 3449 3450 return new_cost (address_cost [speed], 0); 3451 } 3452 3453 switch (TREE_CODE (expr)) 3454 { 3455 case POINTER_PLUS_EXPR: 3456 case PLUS_EXPR: 3457 case MINUS_EXPR: 3458 case MULT_EXPR: 3459 op0 = TREE_OPERAND (expr, 0); 3460 op1 = TREE_OPERAND (expr, 1); 3461 STRIP_NOPS (op0); 3462 STRIP_NOPS (op1); 3463 3464 if (is_gimple_val (op0)) 3465 cost0 = zero_cost; 3466 else 3467 cost0 = force_expr_to_var_cost (op0, speed); 3468 3469 if (is_gimple_val (op1)) 3470 cost1 = zero_cost; 3471 else 3472 cost1 = force_expr_to_var_cost (op1, speed); 3473 3474 break; 3475 3476 case NEGATE_EXPR: 3477 op0 = TREE_OPERAND (expr, 0); 3478 STRIP_NOPS (op0); 3479 op1 = NULL_TREE; 3480 3481 if (is_gimple_val (op0)) 3482 cost0 = zero_cost; 3483 else 3484 cost0 = force_expr_to_var_cost (op0, speed); 3485 3486 cost1 = zero_cost; 3487 break; 3488 3489 default: 3490 /* Just an arbitrary value, FIXME. */ 3491 return new_cost (target_spill_cost[speed], 0); 3492 } 3493 3494 mode = TYPE_MODE (TREE_TYPE (expr)); 3495 switch (TREE_CODE (expr)) 3496 { 3497 case POINTER_PLUS_EXPR: 3498 case PLUS_EXPR: 3499 case MINUS_EXPR: 3500 case NEGATE_EXPR: 3501 cost = new_cost (add_cost (mode, speed), 0); 3502 break; 3503 3504 case MULT_EXPR: 3505 if (cst_and_fits_in_hwi (op0)) 3506 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0); 3507 else if (cst_and_fits_in_hwi (op1)) 3508 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0); 3509 else 3510 return new_cost (target_spill_cost [speed], 0); 3511 break; 3512 3513 default: 3514 gcc_unreachable (); 3515 } 3516 3517 cost = add_costs (cost, cost0); 3518 cost = add_costs (cost, cost1); 3519 3520 /* Bound the cost by target_spill_cost. The parts of complicated 3521 computations often are either loop invariant or at least can 3522 be shared between several iv uses, so letting this grow without 3523 limits would not give reasonable results. */ 3524 if (cost.cost > (int) target_spill_cost [speed]) 3525 cost.cost = target_spill_cost [speed]; 3526 3527 return cost; 3528 } 3529 3530 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the 3531 invariants the computation depends on. */ 3532 3533 static comp_cost 3534 force_var_cost (struct ivopts_data *data, 3535 tree expr, bitmap *depends_on) 3536 { 3537 if (depends_on) 3538 { 3539 fd_ivopts_data = data; 3540 walk_tree (&expr, find_depends, depends_on, NULL); 3541 } 3542 3543 return force_expr_to_var_cost (expr, data->speed); 3544 } 3545 3546 /* Estimates cost of expressing address ADDR as var + symbol + offset. The 3547 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set 3548 to false if the corresponding part is missing. DEPENDS_ON is a set of the 3549 invariants the computation depends on. */ 3550 3551 static comp_cost 3552 split_address_cost (struct ivopts_data *data, 3553 tree addr, bool *symbol_present, bool *var_present, 3554 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3555 { 3556 tree core; 3557 HOST_WIDE_INT bitsize; 3558 HOST_WIDE_INT bitpos; 3559 tree toffset; 3560 enum machine_mode mode; 3561 int unsignedp, volatilep; 3562 3563 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode, 3564 &unsignedp, &volatilep, false); 3565 3566 if (toffset != 0 3567 || bitpos % BITS_PER_UNIT != 0 3568 || TREE_CODE (core) != VAR_DECL) 3569 { 3570 *symbol_present = false; 3571 *var_present = true; 3572 fd_ivopts_data = data; 3573 walk_tree (&addr, find_depends, depends_on, NULL); 3574 return new_cost (target_spill_cost[data->speed], 0); 3575 } 3576 3577 *offset += bitpos / BITS_PER_UNIT; 3578 if (TREE_STATIC (core) 3579 || DECL_EXTERNAL (core)) 3580 { 3581 *symbol_present = true; 3582 *var_present = false; 3583 return zero_cost; 3584 } 3585 3586 *symbol_present = false; 3587 *var_present = true; 3588 return zero_cost; 3589 } 3590 3591 /* Estimates cost of expressing difference of addresses E1 - E2 as 3592 var + symbol + offset. The value of offset is added to OFFSET, 3593 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3594 part is missing. DEPENDS_ON is a set of the invariants the computation 3595 depends on. */ 3596 3597 static comp_cost 3598 ptr_difference_cost (struct ivopts_data *data, 3599 tree e1, tree e2, bool *symbol_present, bool *var_present, 3600 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3601 { 3602 HOST_WIDE_INT diff = 0; 3603 aff_tree aff_e1, aff_e2; 3604 tree type; 3605 3606 gcc_assert (TREE_CODE (e1) == ADDR_EXPR); 3607 3608 if (ptr_difference_const (e1, e2, &diff)) 3609 { 3610 *offset += diff; 3611 *symbol_present = false; 3612 *var_present = false; 3613 return zero_cost; 3614 } 3615 3616 if (integer_zerop (e2)) 3617 return split_address_cost (data, TREE_OPERAND (e1, 0), 3618 symbol_present, var_present, offset, depends_on); 3619 3620 *symbol_present = false; 3621 *var_present = true; 3622 3623 type = signed_type_for (TREE_TYPE (e1)); 3624 tree_to_aff_combination (e1, type, &aff_e1); 3625 tree_to_aff_combination (e2, type, &aff_e2); 3626 aff_combination_scale (&aff_e2, double_int_minus_one); 3627 aff_combination_add (&aff_e1, &aff_e2); 3628 3629 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 3630 } 3631 3632 /* Estimates cost of expressing difference E1 - E2 as 3633 var + symbol + offset. The value of offset is added to OFFSET, 3634 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3635 part is missing. DEPENDS_ON is a set of the invariants the computation 3636 depends on. */ 3637 3638 static comp_cost 3639 difference_cost (struct ivopts_data *data, 3640 tree e1, tree e2, bool *symbol_present, bool *var_present, 3641 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3642 { 3643 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1)); 3644 unsigned HOST_WIDE_INT off1, off2; 3645 aff_tree aff_e1, aff_e2; 3646 tree type; 3647 3648 e1 = strip_offset (e1, &off1); 3649 e2 = strip_offset (e2, &off2); 3650 *offset += off1 - off2; 3651 3652 STRIP_NOPS (e1); 3653 STRIP_NOPS (e2); 3654 3655 if (TREE_CODE (e1) == ADDR_EXPR) 3656 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, 3657 offset, depends_on); 3658 *symbol_present = false; 3659 3660 if (operand_equal_p (e1, e2, 0)) 3661 { 3662 *var_present = false; 3663 return zero_cost; 3664 } 3665 3666 *var_present = true; 3667 3668 if (integer_zerop (e2)) 3669 return force_var_cost (data, e1, depends_on); 3670 3671 if (integer_zerop (e1)) 3672 { 3673 comp_cost cost = force_var_cost (data, e2, depends_on); 3674 cost.cost += multiply_by_cost (-1, mode, data->speed); 3675 return cost; 3676 } 3677 3678 type = signed_type_for (TREE_TYPE (e1)); 3679 tree_to_aff_combination (e1, type, &aff_e1); 3680 tree_to_aff_combination (e2, type, &aff_e2); 3681 aff_combination_scale (&aff_e2, double_int_minus_one); 3682 aff_combination_add (&aff_e1, &aff_e2); 3683 3684 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 3685 } 3686 3687 /* Determines the cost of the computation by that USE is expressed 3688 from induction variable CAND. If ADDRESS_P is true, we just need 3689 to create an address from it, otherwise we want to get it into 3690 register. A set of invariants we depend on is stored in 3691 DEPENDS_ON. AT is the statement at that the value is computed. 3692 If CAN_AUTOINC is nonnull, use it to record whether autoinc 3693 addressing is likely. */ 3694 3695 static comp_cost 3696 get_computation_cost_at (struct ivopts_data *data, 3697 struct iv_use *use, struct iv_cand *cand, 3698 bool address_p, bitmap *depends_on, gimple at, 3699 bool *can_autoinc) 3700 { 3701 tree ubase = use->iv->base, ustep = use->iv->step; 3702 tree cbase, cstep; 3703 tree utype = TREE_TYPE (ubase), ctype; 3704 unsigned HOST_WIDE_INT cstepi, offset = 0; 3705 HOST_WIDE_INT ratio, aratio; 3706 bool var_present, symbol_present, stmt_is_after_inc; 3707 comp_cost cost; 3708 double_int rat; 3709 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 3710 3711 *depends_on = NULL; 3712 3713 /* Only consider real candidates. */ 3714 if (!cand->iv) 3715 return infinite_cost; 3716 3717 cbase = cand->iv->base; 3718 cstep = cand->iv->step; 3719 ctype = TREE_TYPE (cbase); 3720 3721 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3722 { 3723 /* We do not have a precision to express the values of use. */ 3724 return infinite_cost; 3725 } 3726 3727 if (address_p) 3728 { 3729 /* Do not try to express address of an object with computation based 3730 on address of a different object. This may cause problems in rtl 3731 level alias analysis (that does not expect this to be happening, 3732 as this is illegal in C), and would be unlikely to be useful 3733 anyway. */ 3734 if (use->iv->base_object 3735 && cand->iv->base_object 3736 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 3737 return infinite_cost; 3738 } 3739 3740 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3741 { 3742 /* TODO -- add direct handling of this case. */ 3743 goto fallback; 3744 } 3745 3746 /* CSTEPI is removed from the offset in case statement is after the 3747 increment. If the step is not constant, we use zero instead. 3748 This is a bit imprecise (there is the extra addition), but 3749 redundancy elimination is likely to transform the code so that 3750 it uses value of the variable before increment anyway, 3751 so it is not that much unrealistic. */ 3752 if (cst_and_fits_in_hwi (cstep)) 3753 cstepi = int_cst_value (cstep); 3754 else 3755 cstepi = 0; 3756 3757 if (!constant_multiple_of (ustep, cstep, &rat)) 3758 return infinite_cost; 3759 3760 if (double_int_fits_in_shwi_p (rat)) 3761 ratio = double_int_to_shwi (rat); 3762 else 3763 return infinite_cost; 3764 3765 STRIP_NOPS (cbase); 3766 ctype = TREE_TYPE (cbase); 3767 3768 /* use = ubase + ratio * (var - cbase). If either cbase is a constant 3769 or ratio == 1, it is better to handle this like 3770 3771 ubase - ratio * cbase + ratio * var 3772 3773 (also holds in the case ratio == -1, TODO. */ 3774 3775 if (cst_and_fits_in_hwi (cbase)) 3776 { 3777 offset = - ratio * int_cst_value (cbase); 3778 cost = difference_cost (data, 3779 ubase, build_int_cst (utype, 0), 3780 &symbol_present, &var_present, &offset, 3781 depends_on); 3782 } 3783 else if (ratio == 1) 3784 { 3785 cost = difference_cost (data, 3786 ubase, cbase, 3787 &symbol_present, &var_present, &offset, 3788 depends_on); 3789 } 3790 else if (address_p 3791 && !POINTER_TYPE_P (ctype) 3792 && multiplier_allowed_in_address_p 3793 (ratio, TYPE_MODE (TREE_TYPE (utype)), 3794 TYPE_ADDR_SPACE (TREE_TYPE (utype)))) 3795 { 3796 cbase 3797 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio)); 3798 cost = difference_cost (data, 3799 ubase, cbase, 3800 &symbol_present, &var_present, &offset, 3801 depends_on); 3802 } 3803 else 3804 { 3805 cost = force_var_cost (data, cbase, depends_on); 3806 cost.cost += add_cost (TYPE_MODE (ctype), data->speed); 3807 cost = add_costs (cost, 3808 difference_cost (data, 3809 ubase, build_int_cst (utype, 0), 3810 &symbol_present, &var_present, 3811 &offset, depends_on)); 3812 } 3813 3814 /* If we are after the increment, the value of the candidate is higher by 3815 one iteration. */ 3816 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at); 3817 if (stmt_is_after_inc) 3818 offset -= ratio * cstepi; 3819 3820 /* Now the computation is in shape symbol + var1 + const + ratio * var2. 3821 (symbol/var1/const parts may be omitted). If we are looking for an 3822 address, find the cost of addressing this. */ 3823 if (address_p) 3824 return add_costs (cost, 3825 get_address_cost (symbol_present, var_present, 3826 offset, ratio, cstepi, 3827 TYPE_MODE (TREE_TYPE (utype)), 3828 TYPE_ADDR_SPACE (TREE_TYPE (utype)), 3829 speed, stmt_is_after_inc, 3830 can_autoinc)); 3831 3832 /* Otherwise estimate the costs for computing the expression. */ 3833 if (!symbol_present && !var_present && !offset) 3834 { 3835 if (ratio != 1) 3836 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed); 3837 return cost; 3838 } 3839 3840 /* Symbol + offset should be compile-time computable so consider that they 3841 are added once to the variable, if present. */ 3842 if (var_present && (symbol_present || offset)) 3843 cost.cost += add_cost (TYPE_MODE (ctype), speed) 3844 / AVG_LOOP_NITER (data->current_loop); 3845 3846 /* Having offset does not affect runtime cost in case it is added to 3847 symbol, but it increases complexity. */ 3848 if (offset) 3849 cost.complexity++; 3850 3851 cost.cost += add_cost (TYPE_MODE (ctype), speed); 3852 3853 aratio = ratio > 0 ? ratio : -ratio; 3854 if (aratio != 1) 3855 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed); 3856 return cost; 3857 3858 fallback: 3859 if (can_autoinc) 3860 *can_autoinc = false; 3861 3862 { 3863 /* Just get the expression, expand it and measure the cost. */ 3864 tree comp = get_computation_at (data->current_loop, use, cand, at); 3865 3866 if (!comp) 3867 return infinite_cost; 3868 3869 if (address_p) 3870 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp); 3871 3872 return new_cost (computation_cost (comp, speed), 0); 3873 } 3874 } 3875 3876 /* Determines the cost of the computation by that USE is expressed 3877 from induction variable CAND. If ADDRESS_P is true, we just need 3878 to create an address from it, otherwise we want to get it into 3879 register. A set of invariants we depend on is stored in 3880 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether 3881 autoinc addressing is likely. */ 3882 3883 static comp_cost 3884 get_computation_cost (struct ivopts_data *data, 3885 struct iv_use *use, struct iv_cand *cand, 3886 bool address_p, bitmap *depends_on, bool *can_autoinc) 3887 { 3888 return get_computation_cost_at (data, 3889 use, cand, address_p, depends_on, use->stmt, 3890 can_autoinc); 3891 } 3892 3893 /* Determines cost of basing replacement of USE on CAND in a generic 3894 expression. */ 3895 3896 static bool 3897 determine_use_iv_cost_generic (struct ivopts_data *data, 3898 struct iv_use *use, struct iv_cand *cand) 3899 { 3900 bitmap depends_on; 3901 comp_cost cost; 3902 3903 /* The simple case first -- if we need to express value of the preserved 3904 original biv, the cost is 0. This also prevents us from counting the 3905 cost of increment twice -- once at this use and once in the cost of 3906 the candidate. */ 3907 if (cand->pos == IP_ORIGINAL 3908 && cand->incremented_at == use->stmt) 3909 { 3910 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE); 3911 return true; 3912 } 3913 3914 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL); 3915 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); 3916 3917 return !infinite_cost_p (cost); 3918 } 3919 3920 /* Determines cost of basing replacement of USE on CAND in an address. */ 3921 3922 static bool 3923 determine_use_iv_cost_address (struct ivopts_data *data, 3924 struct iv_use *use, struct iv_cand *cand) 3925 { 3926 bitmap depends_on; 3927 bool can_autoinc; 3928 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on, 3929 &can_autoinc); 3930 3931 if (cand->ainc_use == use) 3932 { 3933 if (can_autoinc) 3934 cost.cost -= cand->cost_step; 3935 /* If we generated the candidate solely for exploiting autoincrement 3936 opportunities, and it turns out it can't be used, set the cost to 3937 infinity to make sure we ignore it. */ 3938 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) 3939 cost = infinite_cost; 3940 } 3941 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); 3942 3943 return !infinite_cost_p (cost); 3944 } 3945 3946 /* Computes value of candidate CAND at position AT in iteration NITER, and 3947 stores it to VAL. */ 3948 3949 static void 3950 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter, 3951 aff_tree *val) 3952 { 3953 aff_tree step, delta, nit; 3954 struct iv *iv = cand->iv; 3955 tree type = TREE_TYPE (iv->base); 3956 tree steptype = type; 3957 if (POINTER_TYPE_P (type)) 3958 steptype = sizetype; 3959 3960 tree_to_aff_combination (iv->step, steptype, &step); 3961 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit); 3962 aff_combination_convert (&nit, steptype); 3963 aff_combination_mult (&nit, &step, &delta); 3964 if (stmt_after_increment (loop, cand, at)) 3965 aff_combination_add (&delta, &step); 3966 3967 tree_to_aff_combination (iv->base, type, val); 3968 aff_combination_add (val, &delta); 3969 } 3970 3971 /* Returns period of induction variable iv. */ 3972 3973 static tree 3974 iv_period (struct iv *iv) 3975 { 3976 tree step = iv->step, period, type; 3977 tree pow2div; 3978 3979 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 3980 3981 /* Period of the iv is gcd (step, type range). Since type range is power 3982 of two, it suffices to determine the maximum power of two that divides 3983 step. */ 3984 pow2div = num_ending_zeros (step); 3985 type = unsigned_type_for (TREE_TYPE (step)); 3986 3987 period = build_low_bits_mask (type, 3988 (TYPE_PRECISION (type) 3989 - tree_low_cst (pow2div, 1))); 3990 3991 return period; 3992 } 3993 3994 /* Returns the comparison operator used when eliminating the iv USE. */ 3995 3996 static enum tree_code 3997 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 3998 { 3999 struct loop *loop = data->current_loop; 4000 basic_block ex_bb; 4001 edge exit; 4002 4003 ex_bb = gimple_bb (use->stmt); 4004 exit = EDGE_SUCC (ex_bb, 0); 4005 if (flow_bb_inside_loop_p (loop, exit->dest)) 4006 exit = EDGE_SUCC (ex_bb, 1); 4007 4008 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 4009 } 4010 4011 /* Check whether it is possible to express the condition in USE by comparison 4012 of candidate CAND. If so, store the value compared with to BOUND. */ 4013 4014 static bool 4015 may_eliminate_iv (struct ivopts_data *data, 4016 struct iv_use *use, struct iv_cand *cand, tree *bound) 4017 { 4018 basic_block ex_bb; 4019 edge exit; 4020 tree nit, period; 4021 struct loop *loop = data->current_loop; 4022 aff_tree bnd; 4023 4024 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 4025 return false; 4026 4027 /* For now works only for exits that dominate the loop latch. 4028 TODO: extend to other conditions inside loop body. */ 4029 ex_bb = gimple_bb (use->stmt); 4030 if (use->stmt != last_stmt (ex_bb) 4031 || gimple_code (use->stmt) != GIMPLE_COND 4032 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 4033 return false; 4034 4035 exit = EDGE_SUCC (ex_bb, 0); 4036 if (flow_bb_inside_loop_p (loop, exit->dest)) 4037 exit = EDGE_SUCC (ex_bb, 1); 4038 if (flow_bb_inside_loop_p (loop, exit->dest)) 4039 return false; 4040 4041 nit = niter_for_exit (data, exit); 4042 if (!nit) 4043 return false; 4044 4045 /* Determine whether we can use the variable to test the exit condition. 4046 This is the case iff the period of the induction variable is greater 4047 than the number of iterations for which the exit condition is true. */ 4048 period = iv_period (cand->iv); 4049 4050 /* If the number of iterations is constant, compare against it directly. */ 4051 if (TREE_CODE (nit) == INTEGER_CST) 4052 { 4053 if (!tree_int_cst_lt (nit, period)) 4054 return false; 4055 } 4056 4057 /* If not, and if this is the only possible exit of the loop, see whether 4058 we can get a conservative estimate on the number of iterations of the 4059 entire loop and compare against that instead. */ 4060 else if (loop_only_exit_p (loop, exit)) 4061 { 4062 double_int period_value, max_niter; 4063 if (!estimated_loop_iterations (loop, true, &max_niter)) 4064 return false; 4065 period_value = tree_to_double_int (period); 4066 if (double_int_ucmp (max_niter, period_value) >= 0) 4067 return false; 4068 } 4069 4070 /* Otherwise, punt. */ 4071 else 4072 return false; 4073 4074 cand_value_at (loop, cand, use->stmt, nit, &bnd); 4075 4076 *bound = aff_combination_to_tree (&bnd); 4077 /* It is unlikely that computing the number of iterations using division 4078 would be more profitable than keeping the original induction variable. */ 4079 if (expression_expensive_p (*bound)) 4080 return false; 4081 return true; 4082 } 4083 4084 /* Determines cost of basing replacement of USE on CAND in a condition. */ 4085 4086 static bool 4087 determine_use_iv_cost_condition (struct ivopts_data *data, 4088 struct iv_use *use, struct iv_cand *cand) 4089 { 4090 tree bound = NULL_TREE; 4091 struct iv *cmp_iv; 4092 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on; 4093 comp_cost elim_cost, express_cost, cost; 4094 bool ok; 4095 tree *control_var, *bound_cst; 4096 4097 /* Only consider real candidates. */ 4098 if (!cand->iv) 4099 { 4100 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE); 4101 return false; 4102 } 4103 4104 /* Try iv elimination. */ 4105 if (may_eliminate_iv (data, use, cand, &bound)) 4106 { 4107 elim_cost = force_var_cost (data, bound, &depends_on_elim); 4108 /* The bound is a loop invariant, so it will be only computed 4109 once. */ 4110 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop); 4111 } 4112 else 4113 elim_cost = infinite_cost; 4114 4115 /* Try expressing the original giv. If it is compared with an invariant, 4116 note that we cannot get rid of it. */ 4117 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst, 4118 NULL, &cmp_iv); 4119 gcc_assert (ok); 4120 4121 /* When the condition is a comparison of the candidate IV against 4122 zero, prefer this IV. 4123 4124 TODO: The constant that we're substracting from the cost should 4125 be target-dependent. This information should be added to the 4126 target costs for each backend. */ 4127 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */ 4128 && integer_zerop (*bound_cst) 4129 && (operand_equal_p (*control_var, cand->var_after, 0) 4130 || operand_equal_p (*control_var, cand->var_before, 0))) 4131 elim_cost.cost -= 1; 4132 4133 express_cost = get_computation_cost (data, use, cand, false, 4134 &depends_on_express, NULL); 4135 fd_ivopts_data = data; 4136 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL); 4137 4138 /* Choose the better approach, preferring the eliminated IV. */ 4139 if (compare_costs (elim_cost, express_cost) <= 0) 4140 { 4141 cost = elim_cost; 4142 depends_on = depends_on_elim; 4143 depends_on_elim = NULL; 4144 } 4145 else 4146 { 4147 cost = express_cost; 4148 depends_on = depends_on_express; 4149 depends_on_express = NULL; 4150 bound = NULL_TREE; 4151 } 4152 4153 set_use_iv_cost (data, use, cand, cost, depends_on, bound); 4154 4155 if (depends_on_elim) 4156 BITMAP_FREE (depends_on_elim); 4157 if (depends_on_express) 4158 BITMAP_FREE (depends_on_express); 4159 4160 return !infinite_cost_p (cost); 4161 } 4162 4163 /* Determines cost of basing replacement of USE on CAND. Returns false 4164 if USE cannot be based on CAND. */ 4165 4166 static bool 4167 determine_use_iv_cost (struct ivopts_data *data, 4168 struct iv_use *use, struct iv_cand *cand) 4169 { 4170 switch (use->type) 4171 { 4172 case USE_NONLINEAR_EXPR: 4173 return determine_use_iv_cost_generic (data, use, cand); 4174 4175 case USE_ADDRESS: 4176 return determine_use_iv_cost_address (data, use, cand); 4177 4178 case USE_COMPARE: 4179 return determine_use_iv_cost_condition (data, use, cand); 4180 4181 default: 4182 gcc_unreachable (); 4183 } 4184 } 4185 4186 /* Return true if get_computation_cost indicates that autoincrement is 4187 a possibility for the pair of USE and CAND, false otherwise. */ 4188 4189 static bool 4190 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, 4191 struct iv_cand *cand) 4192 { 4193 bitmap depends_on; 4194 bool can_autoinc; 4195 comp_cost cost; 4196 4197 if (use->type != USE_ADDRESS) 4198 return false; 4199 4200 cost = get_computation_cost (data, use, cand, true, &depends_on, 4201 &can_autoinc); 4202 4203 BITMAP_FREE (depends_on); 4204 4205 return !infinite_cost_p (cost) && can_autoinc; 4206 } 4207 4208 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a 4209 use that allows autoincrement, and set their AINC_USE if possible. */ 4210 4211 static void 4212 set_autoinc_for_original_candidates (struct ivopts_data *data) 4213 { 4214 unsigned i, j; 4215 4216 for (i = 0; i < n_iv_cands (data); i++) 4217 { 4218 struct iv_cand *cand = iv_cand (data, i); 4219 struct iv_use *closest = NULL; 4220 if (cand->pos != IP_ORIGINAL) 4221 continue; 4222 for (j = 0; j < n_iv_uses (data); j++) 4223 { 4224 struct iv_use *use = iv_use (data, j); 4225 unsigned uid = gimple_uid (use->stmt); 4226 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at) 4227 || uid > gimple_uid (cand->incremented_at)) 4228 continue; 4229 if (closest == NULL || uid > gimple_uid (closest->stmt)) 4230 closest = use; 4231 } 4232 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand)) 4233 continue; 4234 cand->ainc_use = closest; 4235 } 4236 } 4237 4238 /* Finds the candidates for the induction variables. */ 4239 4240 static void 4241 find_iv_candidates (struct ivopts_data *data) 4242 { 4243 /* Add commonly used ivs. */ 4244 add_standard_iv_candidates (data); 4245 4246 /* Add old induction variables. */ 4247 add_old_ivs_candidates (data); 4248 4249 /* Add induction variables derived from uses. */ 4250 add_derived_ivs_candidates (data); 4251 4252 set_autoinc_for_original_candidates (data); 4253 4254 /* Record the important candidates. */ 4255 record_important_candidates (data); 4256 } 4257 4258 /* Determines costs of basing the use of the iv on an iv candidate. */ 4259 4260 static void 4261 determine_use_iv_costs (struct ivopts_data *data) 4262 { 4263 unsigned i, j; 4264 struct iv_use *use; 4265 struct iv_cand *cand; 4266 bitmap to_clear = BITMAP_ALLOC (NULL); 4267 4268 alloc_use_cost_map (data); 4269 4270 for (i = 0; i < n_iv_uses (data); i++) 4271 { 4272 use = iv_use (data, i); 4273 4274 if (data->consider_all_candidates) 4275 { 4276 for (j = 0; j < n_iv_cands (data); j++) 4277 { 4278 cand = iv_cand (data, j); 4279 determine_use_iv_cost (data, use, cand); 4280 } 4281 } 4282 else 4283 { 4284 bitmap_iterator bi; 4285 4286 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi) 4287 { 4288 cand = iv_cand (data, j); 4289 if (!determine_use_iv_cost (data, use, cand)) 4290 bitmap_set_bit (to_clear, j); 4291 } 4292 4293 /* Remove the candidates for that the cost is infinite from 4294 the list of related candidates. */ 4295 bitmap_and_compl_into (use->related_cands, to_clear); 4296 bitmap_clear (to_clear); 4297 } 4298 } 4299 4300 BITMAP_FREE (to_clear); 4301 4302 if (dump_file && (dump_flags & TDF_DETAILS)) 4303 { 4304 fprintf (dump_file, "Use-candidate costs:\n"); 4305 4306 for (i = 0; i < n_iv_uses (data); i++) 4307 { 4308 use = iv_use (data, i); 4309 4310 fprintf (dump_file, "Use %d:\n", i); 4311 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n"); 4312 for (j = 0; j < use->n_map_members; j++) 4313 { 4314 if (!use->cost_map[j].cand 4315 || infinite_cost_p (use->cost_map[j].cost)) 4316 continue; 4317 4318 fprintf (dump_file, " %d\t%d\t%d\t", 4319 use->cost_map[j].cand->id, 4320 use->cost_map[j].cost.cost, 4321 use->cost_map[j].cost.complexity); 4322 if (use->cost_map[j].depends_on) 4323 bitmap_print (dump_file, 4324 use->cost_map[j].depends_on, "",""); 4325 fprintf (dump_file, "\n"); 4326 } 4327 4328 fprintf (dump_file, "\n"); 4329 } 4330 fprintf (dump_file, "\n"); 4331 } 4332 } 4333 4334 /* Determines cost of the candidate CAND. */ 4335 4336 static void 4337 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 4338 { 4339 comp_cost cost_base; 4340 unsigned cost, cost_step; 4341 tree base; 4342 4343 if (!cand->iv) 4344 { 4345 cand->cost = 0; 4346 return; 4347 } 4348 4349 /* There are two costs associated with the candidate -- its increment 4350 and its initialization. The second is almost negligible for any loop 4351 that rolls enough, so we take it just very little into account. */ 4352 4353 base = cand->iv->base; 4354 cost_base = force_var_cost (data, base, NULL); 4355 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); 4356 4357 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop); 4358 4359 /* Prefer the original ivs unless we may gain something by replacing it. 4360 The reason is to make debugging simpler; so this is not relevant for 4361 artificial ivs created by other optimization passes. */ 4362 if (cand->pos != IP_ORIGINAL 4363 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 4364 cost++; 4365 4366 /* Prefer not to insert statements into latch unless there are some 4367 already (so that we do not create unnecessary jumps). */ 4368 if (cand->pos == IP_END 4369 && empty_block_p (ip_end_pos (data->current_loop))) 4370 cost++; 4371 4372 cand->cost = cost; 4373 cand->cost_step = cost_step; 4374 } 4375 4376 /* Determines costs of computation of the candidates. */ 4377 4378 static void 4379 determine_iv_costs (struct ivopts_data *data) 4380 { 4381 unsigned i; 4382 4383 if (dump_file && (dump_flags & TDF_DETAILS)) 4384 { 4385 fprintf (dump_file, "Candidate costs:\n"); 4386 fprintf (dump_file, " cand\tcost\n"); 4387 } 4388 4389 for (i = 0; i < n_iv_cands (data); i++) 4390 { 4391 struct iv_cand *cand = iv_cand (data, i); 4392 4393 determine_iv_cost (data, cand); 4394 4395 if (dump_file && (dump_flags & TDF_DETAILS)) 4396 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 4397 } 4398 4399 if (dump_file && (dump_flags & TDF_DETAILS)) 4400 fprintf (dump_file, "\n"); 4401 } 4402 4403 /* Calculates cost for having SIZE induction variables. */ 4404 4405 static unsigned 4406 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) 4407 { 4408 /* We add size to the cost, so that we prefer eliminating ivs 4409 if possible. */ 4410 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed); 4411 } 4412 4413 /* For each size of the induction variable set determine the penalty. */ 4414 4415 static void 4416 determine_set_costs (struct ivopts_data *data) 4417 { 4418 unsigned j, n; 4419 gimple phi; 4420 gimple_stmt_iterator psi; 4421 tree op; 4422 struct loop *loop = data->current_loop; 4423 bitmap_iterator bi; 4424 4425 /* We use the following model (definitely improvable, especially the 4426 cost function -- TODO): 4427 4428 We estimate the number of registers available (using MD data), name it A. 4429 4430 We estimate the number of registers used by the loop, name it U. This 4431 number is obtained as the number of loop phi nodes (not counting virtual 4432 registers and bivs) + the number of variables from outside of the loop. 4433 4434 We set a reserve R (free regs that are used for temporary computations, 4435 etc.). For now the reserve is a constant 3. 4436 4437 Let I be the number of induction variables. 4438 4439 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage 4440 make a lot of ivs without a reason). 4441 -- if A - R < U + I <= A, the cost is I * PRES_COST 4442 -- if U + I > A, the cost is I * PRES_COST and 4443 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */ 4444 4445 if (dump_file && (dump_flags & TDF_DETAILS)) 4446 { 4447 fprintf (dump_file, "Global costs:\n"); 4448 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 4449 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); 4450 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); 4451 } 4452 4453 n = 0; 4454 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 4455 { 4456 phi = gsi_stmt (psi); 4457 op = PHI_RESULT (phi); 4458 4459 if (!is_gimple_reg (op)) 4460 continue; 4461 4462 if (get_iv (data, op)) 4463 continue; 4464 4465 n++; 4466 } 4467 4468 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 4469 { 4470 struct version_info *info = ver_info (data, j); 4471 4472 if (info->inv_id && info->has_nonlin_use) 4473 n++; 4474 } 4475 4476 data->regs_used = n; 4477 if (dump_file && (dump_flags & TDF_DETAILS)) 4478 fprintf (dump_file, " regs_used %d\n", n); 4479 4480 if (dump_file && (dump_flags & TDF_DETAILS)) 4481 { 4482 fprintf (dump_file, " cost for size:\n"); 4483 fprintf (dump_file, " ivs\tcost\n"); 4484 for (j = 0; j <= 2 * target_avail_regs; j++) 4485 fprintf (dump_file, " %d\t%d\n", j, 4486 ivopts_global_cost_for_size (data, j)); 4487 fprintf (dump_file, "\n"); 4488 } 4489 } 4490 4491 /* Returns true if A is a cheaper cost pair than B. */ 4492 4493 static bool 4494 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) 4495 { 4496 int cmp; 4497 4498 if (!a) 4499 return false; 4500 4501 if (!b) 4502 return true; 4503 4504 cmp = compare_costs (a->cost, b->cost); 4505 if (cmp < 0) 4506 return true; 4507 4508 if (cmp > 0) 4509 return false; 4510 4511 /* In case the costs are the same, prefer the cheaper candidate. */ 4512 if (a->cand->cost < b->cand->cost) 4513 return true; 4514 4515 return false; 4516 } 4517 4518 /* Computes the cost field of IVS structure. */ 4519 4520 static void 4521 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) 4522 { 4523 comp_cost cost = ivs->cand_use_cost; 4524 cost.cost += ivs->cand_cost; 4525 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs); 4526 4527 ivs->cost = cost; 4528 } 4529 4530 /* Remove invariants in set INVS to set IVS. */ 4531 4532 static void 4533 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs) 4534 { 4535 bitmap_iterator bi; 4536 unsigned iid; 4537 4538 if (!invs) 4539 return; 4540 4541 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 4542 { 4543 ivs->n_invariant_uses[iid]--; 4544 if (ivs->n_invariant_uses[iid] == 0) 4545 ivs->n_regs--; 4546 } 4547 } 4548 4549 /* Set USE not to be expressed by any candidate in IVS. */ 4550 4551 static void 4552 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, 4553 struct iv_use *use) 4554 { 4555 unsigned uid = use->id, cid; 4556 struct cost_pair *cp; 4557 4558 cp = ivs->cand_for_use[uid]; 4559 if (!cp) 4560 return; 4561 cid = cp->cand->id; 4562 4563 ivs->bad_uses++; 4564 ivs->cand_for_use[uid] = NULL; 4565 ivs->n_cand_uses[cid]--; 4566 4567 if (ivs->n_cand_uses[cid] == 0) 4568 { 4569 bitmap_clear_bit (ivs->cands, cid); 4570 /* Do not count the pseudocandidates. */ 4571 if (cp->cand->iv) 4572 ivs->n_regs--; 4573 ivs->n_cands--; 4574 ivs->cand_cost -= cp->cand->cost; 4575 4576 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on); 4577 } 4578 4579 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost); 4580 4581 iv_ca_set_remove_invariants (ivs, cp->depends_on); 4582 iv_ca_recount_cost (data, ivs); 4583 } 4584 4585 /* Add invariants in set INVS to set IVS. */ 4586 4587 static void 4588 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs) 4589 { 4590 bitmap_iterator bi; 4591 unsigned iid; 4592 4593 if (!invs) 4594 return; 4595 4596 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 4597 { 4598 ivs->n_invariant_uses[iid]++; 4599 if (ivs->n_invariant_uses[iid] == 1) 4600 ivs->n_regs++; 4601 } 4602 } 4603 4604 /* Set cost pair for USE in set IVS to CP. */ 4605 4606 static void 4607 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, 4608 struct iv_use *use, struct cost_pair *cp) 4609 { 4610 unsigned uid = use->id, cid; 4611 4612 if (ivs->cand_for_use[uid] == cp) 4613 return; 4614 4615 if (ivs->cand_for_use[uid]) 4616 iv_ca_set_no_cp (data, ivs, use); 4617 4618 if (cp) 4619 { 4620 cid = cp->cand->id; 4621 4622 ivs->bad_uses--; 4623 ivs->cand_for_use[uid] = cp; 4624 ivs->n_cand_uses[cid]++; 4625 if (ivs->n_cand_uses[cid] == 1) 4626 { 4627 bitmap_set_bit (ivs->cands, cid); 4628 /* Do not count the pseudocandidates. */ 4629 if (cp->cand->iv) 4630 ivs->n_regs++; 4631 ivs->n_cands++; 4632 ivs->cand_cost += cp->cand->cost; 4633 4634 iv_ca_set_add_invariants (ivs, cp->cand->depends_on); 4635 } 4636 4637 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost); 4638 iv_ca_set_add_invariants (ivs, cp->depends_on); 4639 iv_ca_recount_cost (data, ivs); 4640 } 4641 } 4642 4643 /* Extend set IVS by expressing USE by some of the candidates in it 4644 if possible. */ 4645 4646 static void 4647 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, 4648 struct iv_use *use) 4649 { 4650 struct cost_pair *best_cp = NULL, *cp; 4651 bitmap_iterator bi; 4652 unsigned i; 4653 4654 gcc_assert (ivs->upto >= use->id); 4655 4656 if (ivs->upto == use->id) 4657 { 4658 ivs->upto++; 4659 ivs->bad_uses++; 4660 } 4661 4662 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 4663 { 4664 cp = get_use_iv_cost (data, use, iv_cand (data, i)); 4665 4666 if (cheaper_cost_pair (cp, best_cp)) 4667 best_cp = cp; 4668 } 4669 4670 iv_ca_set_cp (data, ivs, use, best_cp); 4671 } 4672 4673 /* Get cost for assignment IVS. */ 4674 4675 static comp_cost 4676 iv_ca_cost (struct iv_ca *ivs) 4677 { 4678 /* This was a conditional expression but it triggered a bug in 4679 Sun C 5.5. */ 4680 if (ivs->bad_uses) 4681 return infinite_cost; 4682 else 4683 return ivs->cost; 4684 } 4685 4686 /* Returns true if all dependences of CP are among invariants in IVS. */ 4687 4688 static bool 4689 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp) 4690 { 4691 unsigned i; 4692 bitmap_iterator bi; 4693 4694 if (!cp->depends_on) 4695 return true; 4696 4697 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi) 4698 { 4699 if (ivs->n_invariant_uses[i] == 0) 4700 return false; 4701 } 4702 4703 return true; 4704 } 4705 4706 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains 4707 it before NEXT_CHANGE. */ 4708 4709 static struct iv_ca_delta * 4710 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp, 4711 struct cost_pair *new_cp, struct iv_ca_delta *next_change) 4712 { 4713 struct iv_ca_delta *change = XNEW (struct iv_ca_delta); 4714 4715 change->use = use; 4716 change->old_cp = old_cp; 4717 change->new_cp = new_cp; 4718 change->next_change = next_change; 4719 4720 return change; 4721 } 4722 4723 /* Joins two lists of changes L1 and L2. Destructive -- old lists 4724 are rewritten. */ 4725 4726 static struct iv_ca_delta * 4727 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 4728 { 4729 struct iv_ca_delta *last; 4730 4731 if (!l2) 4732 return l1; 4733 4734 if (!l1) 4735 return l2; 4736 4737 for (last = l1; last->next_change; last = last->next_change) 4738 continue; 4739 last->next_change = l2; 4740 4741 return l1; 4742 } 4743 4744 /* Returns candidate by that USE is expressed in IVS. */ 4745 4746 static struct cost_pair * 4747 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) 4748 { 4749 return ivs->cand_for_use[use->id]; 4750 } 4751 4752 /* Reverse the list of changes DELTA, forming the inverse to it. */ 4753 4754 static struct iv_ca_delta * 4755 iv_ca_delta_reverse (struct iv_ca_delta *delta) 4756 { 4757 struct iv_ca_delta *act, *next, *prev = NULL; 4758 struct cost_pair *tmp; 4759 4760 for (act = delta; act; act = next) 4761 { 4762 next = act->next_change; 4763 act->next_change = prev; 4764 prev = act; 4765 4766 tmp = act->old_cp; 4767 act->old_cp = act->new_cp; 4768 act->new_cp = tmp; 4769 } 4770 4771 return prev; 4772 } 4773 4774 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 4775 reverted instead. */ 4776 4777 static void 4778 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs, 4779 struct iv_ca_delta *delta, bool forward) 4780 { 4781 struct cost_pair *from, *to; 4782 struct iv_ca_delta *act; 4783 4784 if (!forward) 4785 delta = iv_ca_delta_reverse (delta); 4786 4787 for (act = delta; act; act = act->next_change) 4788 { 4789 from = act->old_cp; 4790 to = act->new_cp; 4791 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from); 4792 iv_ca_set_cp (data, ivs, act->use, to); 4793 } 4794 4795 if (!forward) 4796 iv_ca_delta_reverse (delta); 4797 } 4798 4799 /* Returns true if CAND is used in IVS. */ 4800 4801 static bool 4802 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand) 4803 { 4804 return ivs->n_cand_uses[cand->id] > 0; 4805 } 4806 4807 /* Returns number of induction variable candidates in the set IVS. */ 4808 4809 static unsigned 4810 iv_ca_n_cands (struct iv_ca *ivs) 4811 { 4812 return ivs->n_cands; 4813 } 4814 4815 /* Free the list of changes DELTA. */ 4816 4817 static void 4818 iv_ca_delta_free (struct iv_ca_delta **delta) 4819 { 4820 struct iv_ca_delta *act, *next; 4821 4822 for (act = *delta; act; act = next) 4823 { 4824 next = act->next_change; 4825 free (act); 4826 } 4827 4828 *delta = NULL; 4829 } 4830 4831 /* Allocates new iv candidates assignment. */ 4832 4833 static struct iv_ca * 4834 iv_ca_new (struct ivopts_data *data) 4835 { 4836 struct iv_ca *nw = XNEW (struct iv_ca); 4837 4838 nw->upto = 0; 4839 nw->bad_uses = 0; 4840 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data)); 4841 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data)); 4842 nw->cands = BITMAP_ALLOC (NULL); 4843 nw->n_cands = 0; 4844 nw->n_regs = 0; 4845 nw->cand_use_cost = zero_cost; 4846 nw->cand_cost = 0; 4847 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1); 4848 nw->cost = zero_cost; 4849 4850 return nw; 4851 } 4852 4853 /* Free memory occupied by the set IVS. */ 4854 4855 static void 4856 iv_ca_free (struct iv_ca **ivs) 4857 { 4858 free ((*ivs)->cand_for_use); 4859 free ((*ivs)->n_cand_uses); 4860 BITMAP_FREE ((*ivs)->cands); 4861 free ((*ivs)->n_invariant_uses); 4862 free (*ivs); 4863 *ivs = NULL; 4864 } 4865 4866 /* Dumps IVS to FILE. */ 4867 4868 static void 4869 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) 4870 { 4871 const char *pref = " invariants "; 4872 unsigned i; 4873 comp_cost cost = iv_ca_cost (ivs); 4874 4875 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity); 4876 bitmap_print (file, ivs->cands, " candidates ","\n"); 4877 4878 for (i = 1; i <= data->max_inv_id; i++) 4879 if (ivs->n_invariant_uses[i]) 4880 { 4881 fprintf (file, "%s%d", pref, i); 4882 pref = ", "; 4883 } 4884 fprintf (file, "\n"); 4885 } 4886 4887 /* Try changing candidate in IVS to CAND for each use. Return cost of the 4888 new set, and store differences in DELTA. Number of induction variables 4889 in the new set is stored to N_IVS. */ 4890 4891 static comp_cost 4892 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, 4893 struct iv_cand *cand, struct iv_ca_delta **delta, 4894 unsigned *n_ivs) 4895 { 4896 unsigned i; 4897 comp_cost cost; 4898 struct iv_use *use; 4899 struct cost_pair *old_cp, *new_cp; 4900 4901 *delta = NULL; 4902 for (i = 0; i < ivs->upto; i++) 4903 { 4904 use = iv_use (data, i); 4905 old_cp = iv_ca_cand_for_use (ivs, use); 4906 4907 if (old_cp 4908 && old_cp->cand == cand) 4909 continue; 4910 4911 new_cp = get_use_iv_cost (data, use, cand); 4912 if (!new_cp) 4913 continue; 4914 4915 if (!iv_ca_has_deps (ivs, new_cp)) 4916 continue; 4917 4918 if (!cheaper_cost_pair (new_cp, old_cp)) 4919 continue; 4920 4921 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 4922 } 4923 4924 iv_ca_delta_commit (data, ivs, *delta, true); 4925 cost = iv_ca_cost (ivs); 4926 if (n_ivs) 4927 *n_ivs = iv_ca_n_cands (ivs); 4928 iv_ca_delta_commit (data, ivs, *delta, false); 4929 4930 return cost; 4931 } 4932 4933 /* Try narrowing set IVS by removing CAND. Return the cost of 4934 the new set and store the differences in DELTA. */ 4935 4936 static comp_cost 4937 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, 4938 struct iv_cand *cand, struct iv_ca_delta **delta) 4939 { 4940 unsigned i, ci; 4941 struct iv_use *use; 4942 struct cost_pair *old_cp, *new_cp, *cp; 4943 bitmap_iterator bi; 4944 struct iv_cand *cnd; 4945 comp_cost cost; 4946 4947 *delta = NULL; 4948 for (i = 0; i < n_iv_uses (data); i++) 4949 { 4950 use = iv_use (data, i); 4951 4952 old_cp = iv_ca_cand_for_use (ivs, use); 4953 if (old_cp->cand != cand) 4954 continue; 4955 4956 new_cp = NULL; 4957 4958 if (data->consider_all_candidates) 4959 { 4960 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 4961 { 4962 if (ci == cand->id) 4963 continue; 4964 4965 cnd = iv_cand (data, ci); 4966 4967 cp = get_use_iv_cost (data, use, cnd); 4968 if (!cp) 4969 continue; 4970 if (!iv_ca_has_deps (ivs, cp)) 4971 continue; 4972 4973 if (!cheaper_cost_pair (cp, new_cp)) 4974 continue; 4975 4976 new_cp = cp; 4977 } 4978 } 4979 else 4980 { 4981 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi) 4982 { 4983 if (ci == cand->id) 4984 continue; 4985 4986 cnd = iv_cand (data, ci); 4987 4988 cp = get_use_iv_cost (data, use, cnd); 4989 if (!cp) 4990 continue; 4991 if (!iv_ca_has_deps (ivs, cp)) 4992 continue; 4993 4994 if (!cheaper_cost_pair (cp, new_cp)) 4995 continue; 4996 4997 new_cp = cp; 4998 } 4999 } 5000 5001 if (!new_cp) 5002 { 5003 iv_ca_delta_free (delta); 5004 return infinite_cost; 5005 } 5006 5007 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 5008 } 5009 5010 iv_ca_delta_commit (data, ivs, *delta, true); 5011 cost = iv_ca_cost (ivs); 5012 iv_ca_delta_commit (data, ivs, *delta, false); 5013 5014 return cost; 5015 } 5016 5017 /* Try optimizing the set of candidates IVS by removing candidates different 5018 from to EXCEPT_CAND from it. Return cost of the new set, and store 5019 differences in DELTA. */ 5020 5021 static comp_cost 5022 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, 5023 struct iv_cand *except_cand, struct iv_ca_delta **delta) 5024 { 5025 bitmap_iterator bi; 5026 struct iv_ca_delta *act_delta, *best_delta; 5027 unsigned i; 5028 comp_cost best_cost, acost; 5029 struct iv_cand *cand; 5030 5031 best_delta = NULL; 5032 best_cost = iv_ca_cost (ivs); 5033 5034 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5035 { 5036 cand = iv_cand (data, i); 5037 5038 if (cand == except_cand) 5039 continue; 5040 5041 acost = iv_ca_narrow (data, ivs, cand, &act_delta); 5042 5043 if (compare_costs (acost, best_cost) < 0) 5044 { 5045 best_cost = acost; 5046 iv_ca_delta_free (&best_delta); 5047 best_delta = act_delta; 5048 } 5049 else 5050 iv_ca_delta_free (&act_delta); 5051 } 5052 5053 if (!best_delta) 5054 { 5055 *delta = NULL; 5056 return best_cost; 5057 } 5058 5059 /* Recurse to possibly remove other unnecessary ivs. */ 5060 iv_ca_delta_commit (data, ivs, best_delta, true); 5061 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 5062 iv_ca_delta_commit (data, ivs, best_delta, false); 5063 *delta = iv_ca_delta_join (best_delta, *delta); 5064 return best_cost; 5065 } 5066 5067 /* Tries to extend the sets IVS in the best possible way in order 5068 to express the USE. */ 5069 5070 static bool 5071 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, 5072 struct iv_use *use) 5073 { 5074 comp_cost best_cost, act_cost; 5075 unsigned i; 5076 bitmap_iterator bi; 5077 struct iv_cand *cand; 5078 struct iv_ca_delta *best_delta = NULL, *act_delta; 5079 struct cost_pair *cp; 5080 5081 iv_ca_add_use (data, ivs, use); 5082 best_cost = iv_ca_cost (ivs); 5083 5084 cp = iv_ca_cand_for_use (ivs, use); 5085 if (cp) 5086 { 5087 best_delta = iv_ca_delta_add (use, NULL, cp, NULL); 5088 iv_ca_set_no_cp (data, ivs, use); 5089 } 5090 5091 /* First try important candidates not based on any memory object. Only if 5092 this fails, try the specific ones. Rationale -- in loops with many 5093 variables the best choice often is to use just one generic biv. If we 5094 added here many ivs specific to the uses, the optimization algorithm later 5095 would be likely to get stuck in a local minimum, thus causing us to create 5096 too many ivs. The approach from few ivs to more seems more likely to be 5097 successful -- starting from few ivs, replacing an expensive use by a 5098 specific iv should always be a win. */ 5099 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 5100 { 5101 cand = iv_cand (data, i); 5102 5103 if (cand->iv->base_object != NULL_TREE) 5104 continue; 5105 5106 if (iv_ca_cand_used_p (ivs, cand)) 5107 continue; 5108 5109 cp = get_use_iv_cost (data, use, cand); 5110 if (!cp) 5111 continue; 5112 5113 iv_ca_set_cp (data, ivs, use, cp); 5114 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL); 5115 iv_ca_set_no_cp (data, ivs, use); 5116 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta); 5117 5118 if (compare_costs (act_cost, best_cost) < 0) 5119 { 5120 best_cost = act_cost; 5121 5122 iv_ca_delta_free (&best_delta); 5123 best_delta = act_delta; 5124 } 5125 else 5126 iv_ca_delta_free (&act_delta); 5127 } 5128 5129 if (infinite_cost_p (best_cost)) 5130 { 5131 for (i = 0; i < use->n_map_members; i++) 5132 { 5133 cp = use->cost_map + i; 5134 cand = cp->cand; 5135 if (!cand) 5136 continue; 5137 5138 /* Already tried this. */ 5139 if (cand->important && cand->iv->base_object == NULL_TREE) 5140 continue; 5141 5142 if (iv_ca_cand_used_p (ivs, cand)) 5143 continue; 5144 5145 act_delta = NULL; 5146 iv_ca_set_cp (data, ivs, use, cp); 5147 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL); 5148 iv_ca_set_no_cp (data, ivs, use); 5149 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use), 5150 cp, act_delta); 5151 5152 if (compare_costs (act_cost, best_cost) < 0) 5153 { 5154 best_cost = act_cost; 5155 5156 if (best_delta) 5157 iv_ca_delta_free (&best_delta); 5158 best_delta = act_delta; 5159 } 5160 else 5161 iv_ca_delta_free (&act_delta); 5162 } 5163 } 5164 5165 iv_ca_delta_commit (data, ivs, best_delta, true); 5166 iv_ca_delta_free (&best_delta); 5167 5168 return !infinite_cost_p (best_cost); 5169 } 5170 5171 /* Finds an initial assignment of candidates to uses. */ 5172 5173 static struct iv_ca * 5174 get_initial_solution (struct ivopts_data *data) 5175 { 5176 struct iv_ca *ivs = iv_ca_new (data); 5177 unsigned i; 5178 5179 for (i = 0; i < n_iv_uses (data); i++) 5180 if (!try_add_cand_for (data, ivs, iv_use (data, i))) 5181 { 5182 iv_ca_free (&ivs); 5183 return NULL; 5184 } 5185 5186 return ivs; 5187 } 5188 5189 /* Tries to improve set of induction variables IVS. */ 5190 5191 static bool 5192 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs) 5193 { 5194 unsigned i, n_ivs; 5195 comp_cost acost, best_cost = iv_ca_cost (ivs); 5196 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 5197 struct iv_cand *cand; 5198 5199 /* Try extending the set of induction variables by one. */ 5200 for (i = 0; i < n_iv_cands (data); i++) 5201 { 5202 cand = iv_cand (data, i); 5203 5204 if (iv_ca_cand_used_p (ivs, cand)) 5205 continue; 5206 5207 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs); 5208 if (!act_delta) 5209 continue; 5210 5211 /* If we successfully added the candidate and the set is small enough, 5212 try optimizing it by removing other candidates. */ 5213 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 5214 { 5215 iv_ca_delta_commit (data, ivs, act_delta, true); 5216 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 5217 iv_ca_delta_commit (data, ivs, act_delta, false); 5218 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 5219 } 5220 5221 if (compare_costs (acost, best_cost) < 0) 5222 { 5223 best_cost = acost; 5224 iv_ca_delta_free (&best_delta); 5225 best_delta = act_delta; 5226 } 5227 else 5228 iv_ca_delta_free (&act_delta); 5229 } 5230 5231 if (!best_delta) 5232 { 5233 /* Try removing the candidates from the set instead. */ 5234 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 5235 5236 /* Nothing more we can do. */ 5237 if (!best_delta) 5238 return false; 5239 } 5240 5241 iv_ca_delta_commit (data, ivs, best_delta, true); 5242 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0); 5243 iv_ca_delta_free (&best_delta); 5244 return true; 5245 } 5246 5247 /* Attempts to find the optimal set of induction variables. We do simple 5248 greedy heuristic -- we try to replace at most one candidate in the selected 5249 solution and remove the unused ivs while this improves the cost. */ 5250 5251 static struct iv_ca * 5252 find_optimal_iv_set (struct ivopts_data *data) 5253 { 5254 unsigned i; 5255 struct iv_ca *set; 5256 struct iv_use *use; 5257 5258 /* Get the initial solution. */ 5259 set = get_initial_solution (data); 5260 if (!set) 5261 { 5262 if (dump_file && (dump_flags & TDF_DETAILS)) 5263 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 5264 return NULL; 5265 } 5266 5267 if (dump_file && (dump_flags & TDF_DETAILS)) 5268 { 5269 fprintf (dump_file, "Initial set of candidates:\n"); 5270 iv_ca_dump (data, dump_file, set); 5271 } 5272 5273 while (try_improve_iv_set (data, set)) 5274 { 5275 if (dump_file && (dump_flags & TDF_DETAILS)) 5276 { 5277 fprintf (dump_file, "Improved to:\n"); 5278 iv_ca_dump (data, dump_file, set); 5279 } 5280 } 5281 5282 if (dump_file && (dump_flags & TDF_DETAILS)) 5283 { 5284 comp_cost cost = iv_ca_cost (set); 5285 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity); 5286 } 5287 5288 for (i = 0; i < n_iv_uses (data); i++) 5289 { 5290 use = iv_use (data, i); 5291 use->selected = iv_ca_cand_for_use (set, use)->cand; 5292 } 5293 5294 return set; 5295 } 5296 5297 /* Creates a new induction variable corresponding to CAND. */ 5298 5299 static void 5300 create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 5301 { 5302 gimple_stmt_iterator incr_pos; 5303 tree base; 5304 bool after = false; 5305 5306 if (!cand->iv) 5307 return; 5308 5309 switch (cand->pos) 5310 { 5311 case IP_NORMAL: 5312 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop)); 5313 break; 5314 5315 case IP_END: 5316 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 5317 after = true; 5318 break; 5319 5320 case IP_AFTER_USE: 5321 after = true; 5322 /* fall through */ 5323 case IP_BEFORE_USE: 5324 incr_pos = gsi_for_stmt (cand->incremented_at); 5325 break; 5326 5327 case IP_ORIGINAL: 5328 /* Mark that the iv is preserved. */ 5329 name_info (data, cand->var_before)->preserve_biv = true; 5330 name_info (data, cand->var_after)->preserve_biv = true; 5331 5332 /* Rewrite the increment so that it uses var_before directly. */ 5333 find_interesting_uses_op (data, cand->var_after)->selected = cand; 5334 5335 return; 5336 } 5337 5338 gimple_add_tmp_var (cand->var_before); 5339 add_referenced_var (cand->var_before); 5340 5341 base = unshare_expr (cand->iv->base); 5342 5343 create_iv (base, unshare_expr (cand->iv->step), 5344 cand->var_before, data->current_loop, 5345 &incr_pos, after, &cand->var_before, &cand->var_after); 5346 } 5347 5348 /* Creates new induction variables described in SET. */ 5349 5350 static void 5351 create_new_ivs (struct ivopts_data *data, struct iv_ca *set) 5352 { 5353 unsigned i; 5354 struct iv_cand *cand; 5355 bitmap_iterator bi; 5356 5357 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 5358 { 5359 cand = iv_cand (data, i); 5360 create_new_iv (data, cand); 5361 } 5362 } 5363 5364 5365 /* Rewrites USE (definition of iv used in a nonlinear expression) 5366 using candidate CAND. */ 5367 5368 static void 5369 rewrite_use_nonlinear_expr (struct ivopts_data *data, 5370 struct iv_use *use, struct iv_cand *cand) 5371 { 5372 tree comp; 5373 tree op, tgt; 5374 gimple ass; 5375 gimple_stmt_iterator bsi; 5376 5377 /* An important special case -- if we are asked to express value of 5378 the original iv by itself, just exit; there is no need to 5379 introduce a new computation (that might also need casting the 5380 variable to unsigned and back). */ 5381 if (cand->pos == IP_ORIGINAL 5382 && cand->incremented_at == use->stmt) 5383 { 5384 tree step, ctype, utype; 5385 enum tree_code incr_code = PLUS_EXPR, old_code; 5386 5387 gcc_assert (is_gimple_assign (use->stmt)); 5388 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 5389 5390 step = cand->iv->step; 5391 ctype = TREE_TYPE (step); 5392 utype = TREE_TYPE (cand->var_after); 5393 if (TREE_CODE (step) == NEGATE_EXPR) 5394 { 5395 incr_code = MINUS_EXPR; 5396 step = TREE_OPERAND (step, 0); 5397 } 5398 5399 /* Check whether we may leave the computation unchanged. 5400 This is the case only if it does not rely on other 5401 computations in the loop -- otherwise, the computation 5402 we rely upon may be removed in remove_unused_ivs, 5403 thus leading to ICE. */ 5404 old_code = gimple_assign_rhs_code (use->stmt); 5405 if (old_code == PLUS_EXPR 5406 || old_code == MINUS_EXPR 5407 || old_code == POINTER_PLUS_EXPR) 5408 { 5409 if (gimple_assign_rhs1 (use->stmt) == cand->var_before) 5410 op = gimple_assign_rhs2 (use->stmt); 5411 else if (old_code != MINUS_EXPR 5412 && gimple_assign_rhs2 (use->stmt) == cand->var_before) 5413 op = gimple_assign_rhs1 (use->stmt); 5414 else 5415 op = NULL_TREE; 5416 } 5417 else 5418 op = NULL_TREE; 5419 5420 if (op 5421 && (TREE_CODE (op) == INTEGER_CST 5422 || operand_equal_p (op, step, 0))) 5423 return; 5424 5425 /* Otherwise, add the necessary computations to express 5426 the iv. */ 5427 op = fold_convert (ctype, cand->var_before); 5428 comp = fold_convert (utype, 5429 build2 (incr_code, ctype, op, 5430 unshare_expr (step))); 5431 } 5432 else 5433 { 5434 comp = get_computation (data->current_loop, use, cand); 5435 gcc_assert (comp != NULL_TREE); 5436 } 5437 5438 switch (gimple_code (use->stmt)) 5439 { 5440 case GIMPLE_PHI: 5441 tgt = PHI_RESULT (use->stmt); 5442 5443 /* If we should keep the biv, do not replace it. */ 5444 if (name_info (data, tgt)->preserve_biv) 5445 return; 5446 5447 bsi = gsi_after_labels (gimple_bb (use->stmt)); 5448 break; 5449 5450 case GIMPLE_ASSIGN: 5451 tgt = gimple_assign_lhs (use->stmt); 5452 bsi = gsi_for_stmt (use->stmt); 5453 break; 5454 5455 default: 5456 gcc_unreachable (); 5457 } 5458 5459 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt), 5460 true, GSI_SAME_STMT); 5461 5462 if (gimple_code (use->stmt) == GIMPLE_PHI) 5463 { 5464 ass = gimple_build_assign (tgt, op); 5465 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 5466 5467 bsi = gsi_for_stmt (use->stmt); 5468 remove_phi_node (&bsi, false); 5469 } 5470 else 5471 { 5472 gimple_assign_set_rhs_from_tree (&bsi, op); 5473 use->stmt = gsi_stmt (bsi); 5474 } 5475 } 5476 5477 /* Replaces ssa name in index IDX by its basic variable. Callback for 5478 for_each_index. */ 5479 5480 static bool 5481 idx_remove_ssa_names (tree base, tree *idx, 5482 void *data ATTRIBUTE_UNUSED) 5483 { 5484 tree *op; 5485 5486 if (TREE_CODE (*idx) == SSA_NAME) 5487 *idx = SSA_NAME_VAR (*idx); 5488 5489 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 5490 { 5491 op = &TREE_OPERAND (base, 2); 5492 if (*op 5493 && TREE_CODE (*op) == SSA_NAME) 5494 *op = SSA_NAME_VAR (*op); 5495 op = &TREE_OPERAND (base, 3); 5496 if (*op 5497 && TREE_CODE (*op) == SSA_NAME) 5498 *op = SSA_NAME_VAR (*op); 5499 } 5500 5501 return true; 5502 } 5503 5504 /* Unshares REF and replaces ssa names inside it by their basic variables. */ 5505 5506 static tree 5507 unshare_and_remove_ssa_names (tree ref) 5508 { 5509 ref = unshare_expr (ref); 5510 for_each_index (&ref, idx_remove_ssa_names, NULL); 5511 5512 return ref; 5513 } 5514 5515 /* Copies the reference information from OLD_REF to NEW_REF. */ 5516 5517 static void 5518 copy_ref_info (tree new_ref, tree old_ref) 5519 { 5520 if (TREE_CODE (old_ref) == TARGET_MEM_REF) 5521 copy_mem_ref_info (new_ref, old_ref); 5522 else 5523 { 5524 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref); 5525 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref); 5526 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref); 5527 } 5528 } 5529 5530 /* Rewrites USE (address that is an iv) using candidate CAND. */ 5531 5532 static void 5533 rewrite_use_address (struct ivopts_data *data, 5534 struct iv_use *use, struct iv_cand *cand) 5535 { 5536 aff_tree aff; 5537 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 5538 tree base_hint = NULL_TREE; 5539 tree ref; 5540 bool ok; 5541 5542 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); 5543 gcc_assert (ok); 5544 unshare_aff_combination (&aff); 5545 5546 /* To avoid undefined overflow problems, all IV candidates use unsigned 5547 integer types. The drawback is that this makes it impossible for 5548 create_mem_ref to distinguish an IV that is based on a memory object 5549 from one that represents simply an offset. 5550 5551 To work around this problem, we pass a hint to create_mem_ref that 5552 indicates which variable (if any) in aff is an IV based on a memory 5553 object. Note that we only consider the candidate. If this is not 5554 based on an object, the base of the reference is in some subexpression 5555 of the use -- but these will use pointer types, so they are recognized 5556 by the create_mem_ref heuristics anyway. */ 5557 if (cand->iv->base_object) 5558 base_hint = var_at_stmt (data->current_loop, cand, use->stmt); 5559 5560 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint, 5561 data->speed); 5562 copy_ref_info (ref, *use->op_p); 5563 *use->op_p = ref; 5564 } 5565 5566 /* Rewrites USE (the condition such that one of the arguments is an iv) using 5567 candidate CAND. */ 5568 5569 static void 5570 rewrite_use_compare (struct ivopts_data *data, 5571 struct iv_use *use, struct iv_cand *cand) 5572 { 5573 tree comp, *var_p, op, bound; 5574 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 5575 enum tree_code compare; 5576 struct cost_pair *cp = get_use_iv_cost (data, use, cand); 5577 bool ok; 5578 5579 bound = cp->value; 5580 if (bound) 5581 { 5582 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 5583 tree var_type = TREE_TYPE (var); 5584 gimple_seq stmts; 5585 5586 compare = iv_elimination_compare (data, use); 5587 bound = unshare_expr (fold_convert (var_type, bound)); 5588 op = force_gimple_operand (bound, &stmts, true, NULL_TREE); 5589 if (stmts) 5590 gsi_insert_seq_on_edge_immediate ( 5591 loop_preheader_edge (data->current_loop), 5592 stmts); 5593 5594 gimple_cond_set_lhs (use->stmt, var); 5595 gimple_cond_set_code (use->stmt, compare); 5596 gimple_cond_set_rhs (use->stmt, op); 5597 return; 5598 } 5599 5600 /* The induction variable elimination failed; just express the original 5601 giv. */ 5602 comp = get_computation (data->current_loop, use, cand); 5603 gcc_assert (comp != NULL_TREE); 5604 5605 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL); 5606 gcc_assert (ok); 5607 5608 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p), 5609 true, GSI_SAME_STMT); 5610 } 5611 5612 /* Rewrites USE using candidate CAND. */ 5613 5614 static void 5615 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand) 5616 { 5617 switch (use->type) 5618 { 5619 case USE_NONLINEAR_EXPR: 5620 rewrite_use_nonlinear_expr (data, use, cand); 5621 break; 5622 5623 case USE_ADDRESS: 5624 rewrite_use_address (data, use, cand); 5625 break; 5626 5627 case USE_COMPARE: 5628 rewrite_use_compare (data, use, cand); 5629 break; 5630 5631 default: 5632 gcc_unreachable (); 5633 } 5634 5635 update_stmt (use->stmt); 5636 } 5637 5638 /* Rewrite the uses using the selected induction variables. */ 5639 5640 static void 5641 rewrite_uses (struct ivopts_data *data) 5642 { 5643 unsigned i; 5644 struct iv_cand *cand; 5645 struct iv_use *use; 5646 5647 for (i = 0; i < n_iv_uses (data); i++) 5648 { 5649 use = iv_use (data, i); 5650 cand = use->selected; 5651 gcc_assert (cand); 5652 5653 rewrite_use (data, use, cand); 5654 } 5655 } 5656 5657 /* Removes the ivs that are not used after rewriting. */ 5658 5659 static void 5660 remove_unused_ivs (struct ivopts_data *data) 5661 { 5662 unsigned j; 5663 bitmap_iterator bi; 5664 bitmap toremove = BITMAP_ALLOC (NULL); 5665 5666 /* Figure out an order in which to release SSA DEFs so that we don't 5667 release something that we'd have to propagate into a debug stmt 5668 afterwards. */ 5669 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 5670 { 5671 struct version_info *info; 5672 5673 info = ver_info (data, j); 5674 if (info->iv 5675 && !integer_zerop (info->iv->step) 5676 && !info->inv_id 5677 && !info->iv->have_use_for 5678 && !info->preserve_biv) 5679 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name)); 5680 } 5681 5682 release_defs_bitset (toremove); 5683 5684 BITMAP_FREE (toremove); 5685 } 5686 5687 /* Frees data allocated by the optimization of a single loop. */ 5688 5689 static void 5690 free_loop_data (struct ivopts_data *data) 5691 { 5692 unsigned i, j; 5693 bitmap_iterator bi; 5694 tree obj; 5695 5696 if (data->niters) 5697 { 5698 pointer_map_destroy (data->niters); 5699 data->niters = NULL; 5700 } 5701 5702 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 5703 { 5704 struct version_info *info; 5705 5706 info = ver_info (data, i); 5707 if (info->iv) 5708 free (info->iv); 5709 info->iv = NULL; 5710 info->has_nonlin_use = false; 5711 info->preserve_biv = false; 5712 info->inv_id = 0; 5713 } 5714 bitmap_clear (data->relevant); 5715 bitmap_clear (data->important_candidates); 5716 5717 for (i = 0; i < n_iv_uses (data); i++) 5718 { 5719 struct iv_use *use = iv_use (data, i); 5720 5721 free (use->iv); 5722 BITMAP_FREE (use->related_cands); 5723 for (j = 0; j < use->n_map_members; j++) 5724 if (use->cost_map[j].depends_on) 5725 BITMAP_FREE (use->cost_map[j].depends_on); 5726 free (use->cost_map); 5727 free (use); 5728 } 5729 VEC_truncate (iv_use_p, data->iv_uses, 0); 5730 5731 for (i = 0; i < n_iv_cands (data); i++) 5732 { 5733 struct iv_cand *cand = iv_cand (data, i); 5734 5735 if (cand->iv) 5736 free (cand->iv); 5737 if (cand->depends_on) 5738 BITMAP_FREE (cand->depends_on); 5739 free (cand); 5740 } 5741 VEC_truncate (iv_cand_p, data->iv_candidates, 0); 5742 5743 if (data->version_info_size < num_ssa_names) 5744 { 5745 data->version_info_size = 2 * num_ssa_names; 5746 free (data->version_info); 5747 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 5748 } 5749 5750 data->max_inv_id = 0; 5751 5752 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++) 5753 SET_DECL_RTL (obj, NULL_RTX); 5754 5755 VEC_truncate (tree, decl_rtl_to_reset, 0); 5756 } 5757 5758 /* Finalizes data structures used by the iv optimization pass. LOOPS is the 5759 loop tree. */ 5760 5761 static void 5762 tree_ssa_iv_optimize_finalize (struct ivopts_data *data) 5763 { 5764 free_loop_data (data); 5765 free (data->version_info); 5766 BITMAP_FREE (data->relevant); 5767 BITMAP_FREE (data->important_candidates); 5768 5769 VEC_free (tree, heap, decl_rtl_to_reset); 5770 VEC_free (iv_use_p, heap, data->iv_uses); 5771 VEC_free (iv_cand_p, heap, data->iv_candidates); 5772 } 5773 5774 /* Optimizes the LOOP. Returns true if anything changed. */ 5775 5776 static bool 5777 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) 5778 { 5779 bool changed = false; 5780 struct iv_ca *iv_ca; 5781 edge exit; 5782 basic_block *body; 5783 5784 gcc_assert (!data->niters); 5785 data->current_loop = loop; 5786 data->speed = optimize_loop_for_speed_p (loop); 5787 5788 if (dump_file && (dump_flags & TDF_DETAILS)) 5789 { 5790 fprintf (dump_file, "Processing loop %d\n", loop->num); 5791 5792 exit = single_dom_exit (loop); 5793 if (exit) 5794 { 5795 fprintf (dump_file, " single exit %d -> %d, exit condition ", 5796 exit->src->index, exit->dest->index); 5797 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM); 5798 fprintf (dump_file, "\n"); 5799 } 5800 5801 fprintf (dump_file, "\n"); 5802 } 5803 5804 body = get_loop_body (loop); 5805 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); 5806 free (body); 5807 5808 /* For each ssa name determines whether it behaves as an induction variable 5809 in some loop. */ 5810 if (!find_induction_variables (data)) 5811 goto finish; 5812 5813 /* Finds interesting uses (item 1). */ 5814 find_interesting_uses (data); 5815 if (n_iv_uses (data) > MAX_CONSIDERED_USES) 5816 goto finish; 5817 5818 /* Finds candidates for the induction variables (item 2). */ 5819 find_iv_candidates (data); 5820 5821 /* Calculates the costs (item 3, part 1). */ 5822 determine_iv_costs (data); 5823 determine_use_iv_costs (data); 5824 determine_set_costs (data); 5825 5826 /* Find the optimal set of induction variables (item 3, part 2). */ 5827 iv_ca = find_optimal_iv_set (data); 5828 if (!iv_ca) 5829 goto finish; 5830 changed = true; 5831 5832 /* Create the new induction variables (item 4, part 1). */ 5833 create_new_ivs (data, iv_ca); 5834 iv_ca_free (&iv_ca); 5835 5836 /* Rewrite the uses (item 4, part 2). */ 5837 rewrite_uses (data); 5838 5839 /* Remove the ivs that are unused after rewriting. */ 5840 remove_unused_ivs (data); 5841 5842 /* We have changed the structure of induction variables; it might happen 5843 that definitions in the scev database refer to some of them that were 5844 eliminated. */ 5845 scev_reset (); 5846 5847 finish: 5848 free_loop_data (data); 5849 5850 return changed; 5851 } 5852 5853 /* Main entry point. Optimizes induction variables in loops. */ 5854 5855 void 5856 tree_ssa_iv_optimize (void) 5857 { 5858 struct loop *loop; 5859 struct ivopts_data data; 5860 loop_iterator li; 5861 5862 tree_ssa_iv_optimize_init (&data); 5863 5864 /* Optimize the loops starting with the innermost ones. */ 5865 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) 5866 { 5867 if (dump_file && (dump_flags & TDF_DETAILS)) 5868 flow_loop_dump (loop, dump_file, NULL, 1); 5869 5870 tree_ssa_iv_optimize_loop (&data, loop); 5871 } 5872 5873 tree_ssa_iv_optimize_finalize (&data); 5874 } 5875