1 /* RTL dead store elimination. 2 Copyright (C) 2005-2020 Free Software Foundation, Inc. 3 4 Contributed by Richard Sandiford <rsandifor@codesourcery.com> 5 and Kenneth Zadeck <zadeck@naturalbridge.com> 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #undef BASELINE 24 25 #include "config.h" 26 #include "system.h" 27 #include "coretypes.h" 28 #include "backend.h" 29 #include "target.h" 30 #include "rtl.h" 31 #include "tree.h" 32 #include "gimple.h" 33 #include "predict.h" 34 #include "df.h" 35 #include "memmodel.h" 36 #include "tm_p.h" 37 #include "gimple-ssa.h" 38 #include "expmed.h" 39 #include "optabs.h" 40 #include "emit-rtl.h" 41 #include "recog.h" 42 #include "alias.h" 43 #include "stor-layout.h" 44 #include "cfgrtl.h" 45 #include "cselib.h" 46 #include "tree-pass.h" 47 #include "explow.h" 48 #include "expr.h" 49 #include "dbgcnt.h" 50 #include "rtl-iter.h" 51 #include "cfgcleanup.h" 52 #include "calls.h" 53 54 /* This file contains three techniques for performing Dead Store 55 Elimination (dse). 56 57 * The first technique performs dse locally on any base address. It 58 is based on the cselib which is a local value numbering technique. 59 This technique is local to a basic block but deals with a fairly 60 general addresses. 61 62 * The second technique performs dse globally but is restricted to 63 base addresses that are either constant or are relative to the 64 frame_pointer. 65 66 * The third technique, (which is only done after register allocation) 67 processes the spill slots. This differs from the second 68 technique because it takes advantage of the fact that spilling is 69 completely free from the effects of aliasing. 70 71 Logically, dse is a backwards dataflow problem. A store can be 72 deleted if it if cannot be reached in the backward direction by any 73 use of the value being stored. However, the local technique uses a 74 forwards scan of the basic block because cselib requires that the 75 block be processed in that order. 76 77 The pass is logically broken into 7 steps: 78 79 0) Initialization. 80 81 1) The local algorithm, as well as scanning the insns for the two 82 global algorithms. 83 84 2) Analysis to see if the global algs are necessary. In the case 85 of stores base on a constant address, there must be at least two 86 stores to that address, to make it possible to delete some of the 87 stores. In the case of stores off of the frame or spill related 88 stores, only one store to an address is necessary because those 89 stores die at the end of the function. 90 91 3) Set up the global dataflow equations based on processing the 92 info parsed in the first step. 93 94 4) Solve the dataflow equations. 95 96 5) Delete the insns that the global analysis has indicated are 97 unnecessary. 98 99 6) Delete insns that store the same value as preceding store 100 where the earlier store couldn't be eliminated. 101 102 7) Cleanup. 103 104 This step uses cselib and canon_rtx to build the largest expression 105 possible for each address. This pass is a forwards pass through 106 each basic block. From the point of view of the global technique, 107 the first pass could examine a block in either direction. The 108 forwards ordering is to accommodate cselib. 109 110 We make a simplifying assumption: addresses fall into four broad 111 categories: 112 113 1) base has rtx_varies_p == false, offset is constant. 114 2) base has rtx_varies_p == false, offset variable. 115 3) base has rtx_varies_p == true, offset constant. 116 4) base has rtx_varies_p == true, offset variable. 117 118 The local passes are able to process all 4 kinds of addresses. The 119 global pass only handles 1). 120 121 The global problem is formulated as follows: 122 123 A store, S1, to address A, where A is not relative to the stack 124 frame, can be eliminated if all paths from S1 to the end of the 125 function contain another store to A before a read to A. 126 127 If the address A is relative to the stack frame, a store S2 to A 128 can be eliminated if there are no paths from S2 that reach the 129 end of the function that read A before another store to A. In 130 this case S2 can be deleted if there are paths from S2 to the 131 end of the function that have no reads or writes to A. This 132 second case allows stores to the stack frame to be deleted that 133 would otherwise die when the function returns. This cannot be 134 done if stores_off_frame_dead_at_return is not true. See the doc 135 for that variable for when this variable is false. 136 137 The global problem is formulated as a backwards set union 138 dataflow problem where the stores are the gens and reads are the 139 kills. Set union problems are rare and require some special 140 handling given our representation of bitmaps. A straightforward 141 implementation requires a lot of bitmaps filled with 1s. 142 These are expensive and cumbersome in our bitmap formulation so 143 care has been taken to avoid large vectors filled with 1s. See 144 the comments in bb_info and in the dataflow confluence functions 145 for details. 146 147 There are two places for further enhancements to this algorithm: 148 149 1) The original dse which was embedded in a pass called flow also 150 did local address forwarding. For example in 151 152 A <- r100 153 ... <- A 154 155 flow would replace the right hand side of the second insn with a 156 reference to r100. Most of the information is available to add this 157 to this pass. It has not done it because it is a lot of work in 158 the case that either r100 is assigned to between the first and 159 second insn and/or the second insn is a load of part of the value 160 stored by the first insn. 161 162 insn 5 in gcc.c-torture/compile/990203-1.c simple case. 163 insn 15 in gcc.c-torture/execute/20001017-2.c simple case. 164 insn 25 in gcc.c-torture/execute/20001026-1.c simple case. 165 insn 44 in gcc.c-torture/execute/20010910-1.c simple case. 166 167 2) The cleaning up of spill code is quite profitable. It currently 168 depends on reading tea leaves and chicken entrails left by reload. 169 This pass depends on reload creating a singleton alias set for each 170 spill slot and telling the next dse pass which of these alias sets 171 are the singletons. Rather than analyze the addresses of the 172 spills, dse's spill processing just does analysis of the loads and 173 stores that use those alias sets. There are three cases where this 174 falls short: 175 176 a) Reload sometimes creates the slot for one mode of access, and 177 then inserts loads and/or stores for a smaller mode. In this 178 case, the current code just punts on the slot. The proper thing 179 to do is to back out and use one bit vector position for each 180 byte of the entity associated with the slot. This depends on 181 KNOWING that reload always generates the accesses for each of the 182 bytes in some canonical (read that easy to understand several 183 passes after reload happens) way. 184 185 b) Reload sometimes decides that spill slot it allocated was not 186 large enough for the mode and goes back and allocates more slots 187 with the same mode and alias set. The backout in this case is a 188 little more graceful than (a). In this case the slot is unmarked 189 as being a spill slot and if final address comes out to be based 190 off the frame pointer, the global algorithm handles this slot. 191 192 c) For any pass that may prespill, there is currently no 193 mechanism to tell the dse pass that the slot being used has the 194 special properties that reload uses. It may be that all that is 195 required is to have those passes make the same calls that reload 196 does, assuming that the alias sets can be manipulated in the same 197 way. */ 198 199 /* There are limits to the size of constant offsets we model for the 200 global problem. There are certainly test cases, that exceed this 201 limit, however, it is unlikely that there are important programs 202 that really have constant offsets this size. */ 203 #define MAX_OFFSET (64 * 1024) 204 205 /* Obstack for the DSE dataflow bitmaps. We don't want to put these 206 on the default obstack because these bitmaps can grow quite large 207 (~2GB for the small (!) test case of PR54146) and we'll hold on to 208 all that memory until the end of the compiler run. 209 As a bonus, delete_tree_live_info can destroy all the bitmaps by just 210 releasing the whole obstack. */ 211 static bitmap_obstack dse_bitmap_obstack; 212 213 /* Obstack for other data. As for above: Kinda nice to be able to 214 throw it all away at the end in one big sweep. */ 215 static struct obstack dse_obstack; 216 217 /* Scratch bitmap for cselib's cselib_expand_value_rtx. */ 218 static bitmap scratch = NULL; 219 220 struct insn_info_type; 221 222 /* This structure holds information about a candidate store. */ 223 class store_info 224 { 225 public: 226 227 /* False means this is a clobber. */ 228 bool is_set; 229 230 /* False if a single HOST_WIDE_INT bitmap is used for positions_needed. */ 231 bool is_large; 232 233 /* The id of the mem group of the base address. If rtx_varies_p is 234 true, this is -1. Otherwise, it is the index into the group 235 table. */ 236 int group_id; 237 238 /* This is the cselib value. */ 239 cselib_val *cse_base; 240 241 /* This canonized mem. */ 242 rtx mem; 243 244 /* Canonized MEM address for use by canon_true_dependence. */ 245 rtx mem_addr; 246 247 /* The offset of the first byte associated with the operation. */ 248 poly_int64 offset; 249 250 /* The number of bytes covered by the operation. This is always exact 251 and known (rather than -1). */ 252 poly_int64 width; 253 254 union 255 { 256 /* A bitmask as wide as the number of bytes in the word that 257 contains a 1 if the byte may be needed. The store is unused if 258 all of the bits are 0. This is used if IS_LARGE is false. */ 259 unsigned HOST_WIDE_INT small_bitmask; 260 261 struct 262 { 263 /* A bitmap with one bit per byte, or null if the number of 264 bytes isn't known at compile time. A cleared bit means 265 the position is needed. Used if IS_LARGE is true. */ 266 bitmap bmap; 267 268 /* When BITMAP is nonnull, this counts the number of set bits 269 (i.e. unneeded bytes) in the bitmap. If it is equal to 270 WIDTH, the whole store is unused. 271 272 When BITMAP is null: 273 - the store is definitely not needed when COUNT == 1 274 - all the store is needed when COUNT == 0 and RHS is nonnull 275 - otherwise we don't know which parts of the store are needed. */ 276 int count; 277 } large; 278 } positions_needed; 279 280 /* The next store info for this insn. */ 281 class store_info *next; 282 283 /* The right hand side of the store. This is used if there is a 284 subsequent reload of the mems address somewhere later in the 285 basic block. */ 286 rtx rhs; 287 288 /* If rhs is or holds a constant, this contains that constant, 289 otherwise NULL. */ 290 rtx const_rhs; 291 292 /* Set if this store stores the same constant value as REDUNDANT_REASON 293 insn stored. These aren't eliminated early, because doing that 294 might prevent the earlier larger store to be eliminated. */ 295 struct insn_info_type *redundant_reason; 296 }; 297 298 /* Return a bitmask with the first N low bits set. */ 299 300 static unsigned HOST_WIDE_INT 301 lowpart_bitmask (int n) 302 { 303 unsigned HOST_WIDE_INT mask = HOST_WIDE_INT_M1U; 304 #if 1 // XXXMRG 305 gcc_assert(n >= 0 && n <= HOST_BITS_PER_WIDE_INT); 306 if (n == 0) 307 return 0; 308 #endif 309 return mask >> (HOST_BITS_PER_WIDE_INT - n); 310 } 311 312 static object_allocator<store_info> cse_store_info_pool ("cse_store_info_pool"); 313 314 static object_allocator<store_info> rtx_store_info_pool ("rtx_store_info_pool"); 315 316 /* This structure holds information about a load. These are only 317 built for rtx bases. */ 318 class read_info_type 319 { 320 public: 321 /* The id of the mem group of the base address. */ 322 int group_id; 323 324 /* The offset of the first byte associated with the operation. */ 325 poly_int64 offset; 326 327 /* The number of bytes covered by the operation, or -1 if not known. */ 328 poly_int64 width; 329 330 /* The mem being read. */ 331 rtx mem; 332 333 /* The next read_info for this insn. */ 334 class read_info_type *next; 335 }; 336 typedef class read_info_type *read_info_t; 337 338 static object_allocator<read_info_type> read_info_type_pool ("read_info_pool"); 339 340 /* One of these records is created for each insn. */ 341 342 struct insn_info_type 343 { 344 /* Set true if the insn contains a store but the insn itself cannot 345 be deleted. This is set if the insn is a parallel and there is 346 more than one non dead output or if the insn is in some way 347 volatile. */ 348 bool cannot_delete; 349 350 /* This field is only used by the global algorithm. It is set true 351 if the insn contains any read of mem except for a (1). This is 352 also set if the insn is a call or has a clobber mem. If the insn 353 contains a wild read, the use_rec will be null. */ 354 bool wild_read; 355 356 /* This is true only for CALL instructions which could potentially read 357 any non-frame memory location. This field is used by the global 358 algorithm. */ 359 bool non_frame_wild_read; 360 361 /* This field is only used for the processing of const functions. 362 These functions cannot read memory, but they can read the stack 363 because that is where they may get their parms. We need to be 364 this conservative because, like the store motion pass, we don't 365 consider CALL_INSN_FUNCTION_USAGE when processing call insns. 366 Moreover, we need to distinguish two cases: 367 1. Before reload (register elimination), the stores related to 368 outgoing arguments are stack pointer based and thus deemed 369 of non-constant base in this pass. This requires special 370 handling but also means that the frame pointer based stores 371 need not be killed upon encountering a const function call. 372 2. After reload, the stores related to outgoing arguments can be 373 either stack pointer or hard frame pointer based. This means 374 that we have no other choice than also killing all the frame 375 pointer based stores upon encountering a const function call. 376 This field is set after reload for const function calls and before 377 reload for const tail function calls on targets where arg pointer 378 is the frame pointer. Having this set is less severe than a wild 379 read, it just means that all the frame related stores are killed 380 rather than all the stores. */ 381 bool frame_read; 382 383 /* This field is only used for the processing of const functions. 384 It is set if the insn may contain a stack pointer based store. */ 385 bool stack_pointer_based; 386 387 /* This is true if any of the sets within the store contains a 388 cselib base. Such stores can only be deleted by the local 389 algorithm. */ 390 bool contains_cselib_groups; 391 392 /* The insn. */ 393 rtx_insn *insn; 394 395 /* The list of mem sets or mem clobbers that are contained in this 396 insn. If the insn is deletable, it contains only one mem set. 397 But it could also contain clobbers. Insns that contain more than 398 one mem set are not deletable, but each of those mems are here in 399 order to provide info to delete other insns. */ 400 store_info *store_rec; 401 402 /* The linked list of mem uses in this insn. Only the reads from 403 rtx bases are listed here. The reads to cselib bases are 404 completely processed during the first scan and so are never 405 created. */ 406 read_info_t read_rec; 407 408 /* The live fixed registers. We assume only fixed registers can 409 cause trouble by being clobbered from an expanded pattern; 410 storing only the live fixed registers (rather than all registers) 411 means less memory needs to be allocated / copied for the individual 412 stores. */ 413 regset fixed_regs_live; 414 415 /* The prev insn in the basic block. */ 416 struct insn_info_type * prev_insn; 417 418 /* The linked list of insns that are in consideration for removal in 419 the forwards pass through the basic block. This pointer may be 420 trash as it is not cleared when a wild read occurs. The only 421 time it is guaranteed to be correct is when the traversal starts 422 at active_local_stores. */ 423 struct insn_info_type * next_local_store; 424 }; 425 typedef struct insn_info_type *insn_info_t; 426 427 static object_allocator<insn_info_type> insn_info_type_pool ("insn_info_pool"); 428 429 /* The linked list of stores that are under consideration in this 430 basic block. */ 431 static insn_info_t active_local_stores; 432 static int active_local_stores_len; 433 434 struct dse_bb_info_type 435 { 436 /* Pointer to the insn info for the last insn in the block. These 437 are linked so this is how all of the insns are reached. During 438 scanning this is the current insn being scanned. */ 439 insn_info_t last_insn; 440 441 /* The info for the global dataflow problem. */ 442 443 444 /* This is set if the transfer function should and in the wild_read 445 bitmap before applying the kill and gen sets. That vector knocks 446 out most of the bits in the bitmap and thus speeds up the 447 operations. */ 448 bool apply_wild_read; 449 450 /* The following 4 bitvectors hold information about which positions 451 of which stores are live or dead. They are indexed by 452 get_bitmap_index. */ 453 454 /* The set of store positions that exist in this block before a wild read. */ 455 bitmap gen; 456 457 /* The set of load positions that exist in this block above the 458 same position of a store. */ 459 bitmap kill; 460 461 /* The set of stores that reach the top of the block without being 462 killed by a read. 463 464 Do not represent the in if it is all ones. Note that this is 465 what the bitvector should logically be initialized to for a set 466 intersection problem. However, like the kill set, this is too 467 expensive. So initially, the in set will only be created for the 468 exit block and any block that contains a wild read. */ 469 bitmap in; 470 471 /* The set of stores that reach the bottom of the block from it's 472 successors. 473 474 Do not represent the in if it is all ones. Note that this is 475 what the bitvector should logically be initialized to for a set 476 intersection problem. However, like the kill and in set, this is 477 too expensive. So what is done is that the confluence operator 478 just initializes the vector from one of the out sets of the 479 successors of the block. */ 480 bitmap out; 481 482 /* The following bitvector is indexed by the reg number. It 483 contains the set of regs that are live at the current instruction 484 being processed. While it contains info for all of the 485 registers, only the hard registers are actually examined. It is used 486 to assure that shift and/or add sequences that are inserted do not 487 accidentally clobber live hard regs. */ 488 bitmap regs_live; 489 }; 490 491 typedef struct dse_bb_info_type *bb_info_t; 492 493 static object_allocator<dse_bb_info_type> dse_bb_info_type_pool 494 ("bb_info_pool"); 495 496 /* Table to hold all bb_infos. */ 497 static bb_info_t *bb_table; 498 499 /* There is a group_info for each rtx base that is used to reference 500 memory. There are also not many of the rtx bases because they are 501 very limited in scope. */ 502 503 struct group_info 504 { 505 /* The actual base of the address. */ 506 rtx rtx_base; 507 508 /* The sequential id of the base. This allows us to have a 509 canonical ordering of these that is not based on addresses. */ 510 int id; 511 512 /* True if there are any positions that are to be processed 513 globally. */ 514 bool process_globally; 515 516 /* True if the base of this group is either the frame_pointer or 517 hard_frame_pointer. */ 518 bool frame_related; 519 520 /* A mem wrapped around the base pointer for the group in order to do 521 read dependency. It must be given BLKmode in order to encompass all 522 the possible offsets from the base. */ 523 rtx base_mem; 524 525 /* Canonized version of base_mem's address. */ 526 rtx canon_base_addr; 527 528 /* These two sets of two bitmaps are used to keep track of how many 529 stores are actually referencing that position from this base. We 530 only do this for rtx bases as this will be used to assign 531 positions in the bitmaps for the global problem. Bit N is set in 532 store1 on the first store for offset N. Bit N is set in store2 533 for the second store to offset N. This is all we need since we 534 only care about offsets that have two or more stores for them. 535 536 The "_n" suffix is for offsets less than 0 and the "_p" suffix is 537 for 0 and greater offsets. 538 539 There is one special case here, for stores into the stack frame, 540 we will or store1 into store2 before deciding which stores look 541 at globally. This is because stores to the stack frame that have 542 no other reads before the end of the function can also be 543 deleted. */ 544 bitmap store1_n, store1_p, store2_n, store2_p; 545 546 /* These bitmaps keep track of offsets in this group escape this function. 547 An offset escapes if it corresponds to a named variable whose 548 addressable flag is set. */ 549 bitmap escaped_n, escaped_p; 550 551 /* The positions in this bitmap have the same assignments as the in, 552 out, gen and kill bitmaps. This bitmap is all zeros except for 553 the positions that are occupied by stores for this group. */ 554 bitmap group_kill; 555 556 /* The offset_map is used to map the offsets from this base into 557 positions in the global bitmaps. It is only created after all of 558 the all of stores have been scanned and we know which ones we 559 care about. */ 560 int *offset_map_n, *offset_map_p; 561 int offset_map_size_n, offset_map_size_p; 562 }; 563 564 static object_allocator<group_info> group_info_pool ("rtx_group_info_pool"); 565 566 /* Index into the rtx_group_vec. */ 567 static int rtx_group_next_id; 568 569 570 static vec<group_info *> rtx_group_vec; 571 572 573 /* This structure holds the set of changes that are being deferred 574 when removing read operation. See replace_read. */ 575 struct deferred_change 576 { 577 578 /* The mem that is being replaced. */ 579 rtx *loc; 580 581 /* The reg it is being replaced with. */ 582 rtx reg; 583 584 struct deferred_change *next; 585 }; 586 587 static object_allocator<deferred_change> deferred_change_pool 588 ("deferred_change_pool"); 589 590 static deferred_change *deferred_change_list = NULL; 591 592 /* This is true except if cfun->stdarg -- i.e. we cannot do 593 this for vararg functions because they play games with the frame. */ 594 static bool stores_off_frame_dead_at_return; 595 596 /* Counter for stats. */ 597 static int globally_deleted; 598 static int locally_deleted; 599 600 static bitmap all_blocks; 601 602 /* Locations that are killed by calls in the global phase. */ 603 static bitmap kill_on_calls; 604 605 /* The number of bits used in the global bitmaps. */ 606 static unsigned int current_position; 607 608 /* Print offset range [OFFSET, OFFSET + WIDTH) to FILE. */ 609 610 static void 611 print_range (FILE *file, poly_int64 offset, poly_int64 width) 612 { 613 fprintf (file, "["); 614 print_dec (offset, file, SIGNED); 615 fprintf (file, ".."); 616 print_dec (offset + width, file, SIGNED); 617 fprintf (file, ")"); 618 } 619 620 /*---------------------------------------------------------------------------- 621 Zeroth step. 622 623 Initialization. 624 ----------------------------------------------------------------------------*/ 625 626 627 /* Hashtable callbacks for maintaining the "bases" field of 628 store_group_info, given that the addresses are function invariants. */ 629 630 struct invariant_group_base_hasher : nofree_ptr_hash <group_info> 631 { 632 static inline hashval_t hash (const group_info *); 633 static inline bool equal (const group_info *, const group_info *); 634 }; 635 636 inline bool 637 invariant_group_base_hasher::equal (const group_info *gi1, 638 const group_info *gi2) 639 { 640 return rtx_equal_p (gi1->rtx_base, gi2->rtx_base); 641 } 642 643 inline hashval_t 644 invariant_group_base_hasher::hash (const group_info *gi) 645 { 646 int do_not_record; 647 return hash_rtx (gi->rtx_base, Pmode, &do_not_record, NULL, false); 648 } 649 650 /* Tables of group_info structures, hashed by base value. */ 651 static hash_table<invariant_group_base_hasher> *rtx_group_table; 652 653 654 /* Get the GROUP for BASE. Add a new group if it is not there. */ 655 656 static group_info * 657 get_group_info (rtx base) 658 { 659 struct group_info tmp_gi; 660 group_info *gi; 661 group_info **slot; 662 663 gcc_assert (base != NULL_RTX); 664 665 /* Find the store_base_info structure for BASE, creating a new one 666 if necessary. */ 667 tmp_gi.rtx_base = base; 668 slot = rtx_group_table->find_slot (&tmp_gi, INSERT); 669 gi = *slot; 670 671 if (gi == NULL) 672 { 673 *slot = gi = group_info_pool.allocate (); 674 gi->rtx_base = base; 675 gi->id = rtx_group_next_id++; 676 gi->base_mem = gen_rtx_MEM (BLKmode, base); 677 gi->canon_base_addr = canon_rtx (base); 678 gi->store1_n = BITMAP_ALLOC (&dse_bitmap_obstack); 679 gi->store1_p = BITMAP_ALLOC (&dse_bitmap_obstack); 680 gi->store2_n = BITMAP_ALLOC (&dse_bitmap_obstack); 681 gi->store2_p = BITMAP_ALLOC (&dse_bitmap_obstack); 682 gi->escaped_p = BITMAP_ALLOC (&dse_bitmap_obstack); 683 gi->escaped_n = BITMAP_ALLOC (&dse_bitmap_obstack); 684 gi->group_kill = BITMAP_ALLOC (&dse_bitmap_obstack); 685 gi->process_globally = false; 686 gi->frame_related = 687 (base == frame_pointer_rtx) || (base == hard_frame_pointer_rtx); 688 gi->offset_map_size_n = 0; 689 gi->offset_map_size_p = 0; 690 gi->offset_map_n = NULL; 691 gi->offset_map_p = NULL; 692 rtx_group_vec.safe_push (gi); 693 } 694 695 return gi; 696 } 697 698 699 /* Initialization of data structures. */ 700 701 static void 702 dse_step0 (void) 703 { 704 locally_deleted = 0; 705 globally_deleted = 0; 706 707 bitmap_obstack_initialize (&dse_bitmap_obstack); 708 gcc_obstack_init (&dse_obstack); 709 710 scratch = BITMAP_ALLOC (®_obstack); 711 kill_on_calls = BITMAP_ALLOC (&dse_bitmap_obstack); 712 713 714 rtx_group_table = new hash_table<invariant_group_base_hasher> (11); 715 716 bb_table = XNEWVEC (bb_info_t, last_basic_block_for_fn (cfun)); 717 rtx_group_next_id = 0; 718 719 stores_off_frame_dead_at_return = !cfun->stdarg; 720 721 init_alias_analysis (); 722 } 723 724 725 726 /*---------------------------------------------------------------------------- 727 First step. 728 729 Scan all of the insns. Any random ordering of the blocks is fine. 730 Each block is scanned in forward order to accommodate cselib which 731 is used to remove stores with non-constant bases. 732 ----------------------------------------------------------------------------*/ 733 734 /* Delete all of the store_info recs from INSN_INFO. */ 735 736 static void 737 free_store_info (insn_info_t insn_info) 738 { 739 store_info *cur = insn_info->store_rec; 740 while (cur) 741 { 742 store_info *next = cur->next; 743 if (cur->is_large) 744 BITMAP_FREE (cur->positions_needed.large.bmap); 745 if (cur->cse_base) 746 cse_store_info_pool.remove (cur); 747 else 748 rtx_store_info_pool.remove (cur); 749 cur = next; 750 } 751 752 insn_info->cannot_delete = true; 753 insn_info->contains_cselib_groups = false; 754 insn_info->store_rec = NULL; 755 } 756 757 struct note_add_store_info 758 { 759 rtx_insn *first, *current; 760 regset fixed_regs_live; 761 bool failure; 762 }; 763 764 /* Callback for emit_inc_dec_insn_before via note_stores. 765 Check if a register is clobbered which is live afterwards. */ 766 767 static void 768 note_add_store (rtx loc, const_rtx expr ATTRIBUTE_UNUSED, void *data) 769 { 770 rtx_insn *insn; 771 note_add_store_info *info = (note_add_store_info *) data; 772 773 if (!REG_P (loc)) 774 return; 775 776 /* If this register is referenced by the current or an earlier insn, 777 that's OK. E.g. this applies to the register that is being incremented 778 with this addition. */ 779 for (insn = info->first; 780 insn != NEXT_INSN (info->current); 781 insn = NEXT_INSN (insn)) 782 if (reg_referenced_p (loc, PATTERN (insn))) 783 return; 784 785 /* If we come here, we have a clobber of a register that's only OK 786 if that register is not live. If we don't have liveness information 787 available, fail now. */ 788 if (!info->fixed_regs_live) 789 { 790 info->failure = true; 791 return; 792 } 793 /* Now check if this is a live fixed register. */ 794 unsigned int end_regno = END_REGNO (loc); 795 for (unsigned int regno = REGNO (loc); regno < end_regno; ++regno) 796 if (REGNO_REG_SET_P (info->fixed_regs_live, regno)) 797 info->failure = true; 798 } 799 800 /* Callback for for_each_inc_dec that emits an INSN that sets DEST to 801 SRC + SRCOFF before insn ARG. */ 802 803 static int 804 emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED, 805 rtx op ATTRIBUTE_UNUSED, 806 rtx dest, rtx src, rtx srcoff, void *arg) 807 { 808 insn_info_t insn_info = (insn_info_t) arg; 809 rtx_insn *insn = insn_info->insn, *new_insn, *cur; 810 note_add_store_info info; 811 812 /* We can reuse all operands without copying, because we are about 813 to delete the insn that contained it. */ 814 if (srcoff) 815 { 816 start_sequence (); 817 emit_insn (gen_add3_insn (dest, src, srcoff)); 818 new_insn = get_insns (); 819 end_sequence (); 820 } 821 else 822 new_insn = gen_move_insn (dest, src); 823 info.first = new_insn; 824 info.fixed_regs_live = insn_info->fixed_regs_live; 825 info.failure = false; 826 for (cur = new_insn; cur; cur = NEXT_INSN (cur)) 827 { 828 info.current = cur; 829 note_stores (cur, note_add_store, &info); 830 } 831 832 /* If a failure was flagged above, return 1 so that for_each_inc_dec will 833 return it immediately, communicating the failure to its caller. */ 834 if (info.failure) 835 return 1; 836 837 emit_insn_before (new_insn, insn); 838 839 return 0; 840 } 841 842 /* Before we delete INSN_INFO->INSN, make sure that the auto inc/dec, if it 843 is there, is split into a separate insn. 844 Return true on success (or if there was nothing to do), false on failure. */ 845 846 static bool 847 check_for_inc_dec_1 (insn_info_t insn_info) 848 { 849 rtx_insn *insn = insn_info->insn; 850 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); 851 if (note) 852 return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before, 853 insn_info) == 0; 854 855 /* Punt on stack pushes, those don't have REG_INC notes and we are 856 unprepared to deal with distribution of REG_ARGS_SIZE notes etc. */ 857 subrtx_iterator::array_type array; 858 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 859 { 860 const_rtx x = *iter; 861 if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC) 862 return false; 863 } 864 865 return true; 866 } 867 868 869 /* Entry point for postreload. If you work on reload_cse, or you need this 870 anywhere else, consider if you can provide register liveness information 871 and add a parameter to this function so that it can be passed down in 872 insn_info.fixed_regs_live. */ 873 bool 874 check_for_inc_dec (rtx_insn *insn) 875 { 876 insn_info_type insn_info; 877 rtx note; 878 879 insn_info.insn = insn; 880 insn_info.fixed_regs_live = NULL; 881 note = find_reg_note (insn, REG_INC, NULL_RTX); 882 if (note) 883 return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before, 884 &insn_info) == 0; 885 886 /* Punt on stack pushes, those don't have REG_INC notes and we are 887 unprepared to deal with distribution of REG_ARGS_SIZE notes etc. */ 888 subrtx_iterator::array_type array; 889 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 890 { 891 const_rtx x = *iter; 892 if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC) 893 return false; 894 } 895 896 return true; 897 } 898 899 /* Delete the insn and free all of the fields inside INSN_INFO. */ 900 901 static void 902 delete_dead_store_insn (insn_info_t insn_info) 903 { 904 read_info_t read_info; 905 906 if (!dbg_cnt (dse)) 907 return; 908 909 if (!check_for_inc_dec_1 (insn_info)) 910 return; 911 if (dump_file && (dump_flags & TDF_DETAILS)) 912 fprintf (dump_file, "Locally deleting insn %d\n", 913 INSN_UID (insn_info->insn)); 914 915 free_store_info (insn_info); 916 read_info = insn_info->read_rec; 917 918 while (read_info) 919 { 920 read_info_t next = read_info->next; 921 read_info_type_pool.remove (read_info); 922 read_info = next; 923 } 924 insn_info->read_rec = NULL; 925 926 delete_insn (insn_info->insn); 927 locally_deleted++; 928 insn_info->insn = NULL; 929 930 insn_info->wild_read = false; 931 } 932 933 /* Return whether DECL, a local variable, can possibly escape the current 934 function scope. */ 935 936 static bool 937 local_variable_can_escape (tree decl) 938 { 939 if (TREE_ADDRESSABLE (decl)) 940 return true; 941 942 /* If this is a partitioned variable, we need to consider all the variables 943 in the partition. This is necessary because a store into one of them can 944 be replaced with a store into another and this may not change the outcome 945 of the escape analysis. */ 946 if (cfun->gimple_df->decls_to_pointers != NULL) 947 { 948 tree *namep = cfun->gimple_df->decls_to_pointers->get (decl); 949 if (namep) 950 return TREE_ADDRESSABLE (*namep); 951 } 952 953 return false; 954 } 955 956 /* Return whether EXPR can possibly escape the current function scope. */ 957 958 static bool 959 can_escape (tree expr) 960 { 961 tree base; 962 if (!expr) 963 return true; 964 base = get_base_address (expr); 965 if (DECL_P (base) 966 && !may_be_aliased (base) 967 && !(VAR_P (base) 968 && !DECL_EXTERNAL (base) 969 && !TREE_STATIC (base) 970 && local_variable_can_escape (base))) 971 return false; 972 return true; 973 } 974 975 /* Set the store* bitmaps offset_map_size* fields in GROUP based on 976 OFFSET and WIDTH. */ 977 978 static void 979 set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width, 980 tree expr) 981 { 982 /* Non-constant offsets and widths act as global kills, so there's no point 983 trying to use them to derive global DSE candidates. */ 984 HOST_WIDE_INT i, const_offset, const_width; 985 bool expr_escapes = can_escape (expr); 986 if (offset.is_constant (&const_offset) 987 && width.is_constant (&const_width) 988 && const_offset > -MAX_OFFSET 989 && const_offset + const_width < MAX_OFFSET) 990 for (i = const_offset; i < const_offset + const_width; ++i) 991 { 992 bitmap store1; 993 bitmap store2; 994 bitmap escaped; 995 int ai; 996 if (i < 0) 997 { 998 store1 = group->store1_n; 999 store2 = group->store2_n; 1000 escaped = group->escaped_n; 1001 ai = -i; 1002 } 1003 else 1004 { 1005 store1 = group->store1_p; 1006 store2 = group->store2_p; 1007 escaped = group->escaped_p; 1008 ai = i; 1009 } 1010 1011 if (!bitmap_set_bit (store1, ai)) 1012 bitmap_set_bit (store2, ai); 1013 else 1014 { 1015 if (i < 0) 1016 { 1017 if (group->offset_map_size_n < ai) 1018 group->offset_map_size_n = ai; 1019 } 1020 else 1021 { 1022 if (group->offset_map_size_p < ai) 1023 group->offset_map_size_p = ai; 1024 } 1025 } 1026 if (expr_escapes) 1027 bitmap_set_bit (escaped, ai); 1028 } 1029 } 1030 1031 static void 1032 reset_active_stores (void) 1033 { 1034 active_local_stores = NULL; 1035 active_local_stores_len = 0; 1036 } 1037 1038 /* Free all READ_REC of the LAST_INSN of BB_INFO. */ 1039 1040 static void 1041 free_read_records (bb_info_t bb_info) 1042 { 1043 insn_info_t insn_info = bb_info->last_insn; 1044 read_info_t *ptr = &insn_info->read_rec; 1045 while (*ptr) 1046 { 1047 read_info_t next = (*ptr)->next; 1048 read_info_type_pool.remove (*ptr); 1049 *ptr = next; 1050 } 1051 } 1052 1053 /* Set the BB_INFO so that the last insn is marked as a wild read. */ 1054 1055 static void 1056 add_wild_read (bb_info_t bb_info) 1057 { 1058 insn_info_t insn_info = bb_info->last_insn; 1059 insn_info->wild_read = true; 1060 free_read_records (bb_info); 1061 reset_active_stores (); 1062 } 1063 1064 /* Set the BB_INFO so that the last insn is marked as a wild read of 1065 non-frame locations. */ 1066 1067 static void 1068 add_non_frame_wild_read (bb_info_t bb_info) 1069 { 1070 insn_info_t insn_info = bb_info->last_insn; 1071 insn_info->non_frame_wild_read = true; 1072 free_read_records (bb_info); 1073 reset_active_stores (); 1074 } 1075 1076 /* Return true if X is a constant or one of the registers that behave 1077 as a constant over the life of a function. This is equivalent to 1078 !rtx_varies_p for memory addresses. */ 1079 1080 static bool 1081 const_or_frame_p (rtx x) 1082 { 1083 if (CONSTANT_P (x)) 1084 return true; 1085 1086 if (GET_CODE (x) == REG) 1087 { 1088 /* Note that we have to test for the actual rtx used for the frame 1089 and arg pointers and not just the register number in case we have 1090 eliminated the frame and/or arg pointer and are using it 1091 for pseudos. */ 1092 if (x == frame_pointer_rtx || x == hard_frame_pointer_rtx 1093 /* The arg pointer varies if it is not a fixed register. */ 1094 || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM]) 1095 || x == pic_offset_table_rtx) 1096 return true; 1097 return false; 1098 } 1099 1100 return false; 1101 } 1102 1103 /* Take all reasonable action to put the address of MEM into the form 1104 that we can do analysis on. 1105 1106 The gold standard is to get the address into the form: address + 1107 OFFSET where address is something that rtx_varies_p considers a 1108 constant. When we can get the address in this form, we can do 1109 global analysis on it. Note that for constant bases, address is 1110 not actually returned, only the group_id. The address can be 1111 obtained from that. 1112 1113 If that fails, we try cselib to get a value we can at least use 1114 locally. If that fails we return false. 1115 1116 The GROUP_ID is set to -1 for cselib bases and the index of the 1117 group for non_varying bases. 1118 1119 FOR_READ is true if this is a mem read and false if not. */ 1120 1121 static bool 1122 canon_address (rtx mem, 1123 int *group_id, 1124 poly_int64 *offset, 1125 cselib_val **base) 1126 { 1127 machine_mode address_mode = get_address_mode (mem); 1128 rtx mem_address = XEXP (mem, 0); 1129 rtx expanded_address, address; 1130 int expanded; 1131 1132 cselib_lookup (mem_address, address_mode, 1, GET_MODE (mem)); 1133 1134 if (dump_file && (dump_flags & TDF_DETAILS)) 1135 { 1136 fprintf (dump_file, " mem: "); 1137 print_inline_rtx (dump_file, mem_address, 0); 1138 fprintf (dump_file, "\n"); 1139 } 1140 1141 /* First see if just canon_rtx (mem_address) is const or frame, 1142 if not, try cselib_expand_value_rtx and call canon_rtx on that. */ 1143 address = NULL_RTX; 1144 for (expanded = 0; expanded < 2; expanded++) 1145 { 1146 if (expanded) 1147 { 1148 /* Use cselib to replace all of the reg references with the full 1149 expression. This will take care of the case where we have 1150 1151 r_x = base + offset; 1152 val = *r_x; 1153 1154 by making it into 1155 1156 val = *(base + offset); */ 1157 1158 expanded_address = cselib_expand_value_rtx (mem_address, 1159 scratch, 5); 1160 1161 /* If this fails, just go with the address from first 1162 iteration. */ 1163 if (!expanded_address) 1164 break; 1165 } 1166 else 1167 expanded_address = mem_address; 1168 1169 /* Split the address into canonical BASE + OFFSET terms. */ 1170 address = canon_rtx (expanded_address); 1171 1172 *offset = 0; 1173 1174 if (dump_file && (dump_flags & TDF_DETAILS)) 1175 { 1176 if (expanded) 1177 { 1178 fprintf (dump_file, "\n after cselib_expand address: "); 1179 print_inline_rtx (dump_file, expanded_address, 0); 1180 fprintf (dump_file, "\n"); 1181 } 1182 1183 fprintf (dump_file, "\n after canon_rtx address: "); 1184 print_inline_rtx (dump_file, address, 0); 1185 fprintf (dump_file, "\n"); 1186 } 1187 1188 if (GET_CODE (address) == CONST) 1189 address = XEXP (address, 0); 1190 1191 address = strip_offset_and_add (address, offset); 1192 1193 if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)) 1194 && const_or_frame_p (address)) 1195 { 1196 group_info *group = get_group_info (address); 1197 1198 if (dump_file && (dump_flags & TDF_DETAILS)) 1199 { 1200 fprintf (dump_file, " gid=%d offset=", group->id); 1201 print_dec (*offset, dump_file); 1202 fprintf (dump_file, "\n"); 1203 } 1204 *base = NULL; 1205 *group_id = group->id; 1206 return true; 1207 } 1208 } 1209 1210 *base = cselib_lookup (address, address_mode, true, GET_MODE (mem)); 1211 *group_id = -1; 1212 1213 if (*base == NULL) 1214 { 1215 if (dump_file && (dump_flags & TDF_DETAILS)) 1216 fprintf (dump_file, " no cselib val - should be a wild read.\n"); 1217 return false; 1218 } 1219 if (dump_file && (dump_flags & TDF_DETAILS)) 1220 { 1221 fprintf (dump_file, " varying cselib base=%u:%u offset = ", 1222 (*base)->uid, (*base)->hash); 1223 print_dec (*offset, dump_file); 1224 fprintf (dump_file, "\n"); 1225 } 1226 return true; 1227 } 1228 1229 1230 /* Clear the rhs field from the active_local_stores array. */ 1231 1232 static void 1233 clear_rhs_from_active_local_stores (void) 1234 { 1235 insn_info_t ptr = active_local_stores; 1236 1237 while (ptr) 1238 { 1239 store_info *store_info = ptr->store_rec; 1240 /* Skip the clobbers. */ 1241 while (!store_info->is_set) 1242 store_info = store_info->next; 1243 1244 store_info->rhs = NULL; 1245 store_info->const_rhs = NULL; 1246 1247 ptr = ptr->next_local_store; 1248 } 1249 } 1250 1251 1252 /* Mark byte POS bytes from the beginning of store S_INFO as unneeded. */ 1253 1254 static inline void 1255 set_position_unneeded (store_info *s_info, int pos) 1256 { 1257 if (__builtin_expect (s_info->is_large, false)) 1258 { 1259 if (bitmap_set_bit (s_info->positions_needed.large.bmap, pos)) 1260 s_info->positions_needed.large.count++; 1261 } 1262 else 1263 s_info->positions_needed.small_bitmask 1264 &= ~(HOST_WIDE_INT_1U << pos); 1265 } 1266 1267 /* Mark the whole store S_INFO as unneeded. */ 1268 1269 static inline void 1270 set_all_positions_unneeded (store_info *s_info) 1271 { 1272 if (__builtin_expect (s_info->is_large, false)) 1273 { 1274 HOST_WIDE_INT width; 1275 if (s_info->width.is_constant (&width)) 1276 { 1277 bitmap_set_range (s_info->positions_needed.large.bmap, 0, width); 1278 s_info->positions_needed.large.count = width; 1279 } 1280 else 1281 { 1282 gcc_checking_assert (!s_info->positions_needed.large.bmap); 1283 s_info->positions_needed.large.count = 1; 1284 } 1285 } 1286 else 1287 s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U; 1288 } 1289 1290 /* Return TRUE if any bytes from S_INFO store are needed. */ 1291 1292 static inline bool 1293 any_positions_needed_p (store_info *s_info) 1294 { 1295 if (__builtin_expect (s_info->is_large, false)) 1296 { 1297 HOST_WIDE_INT width; 1298 if (s_info->width.is_constant (&width)) 1299 { 1300 gcc_checking_assert (s_info->positions_needed.large.bmap); 1301 return s_info->positions_needed.large.count < width; 1302 } 1303 else 1304 { 1305 gcc_checking_assert (!s_info->positions_needed.large.bmap); 1306 return s_info->positions_needed.large.count == 0; 1307 } 1308 } 1309 else 1310 return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U); 1311 } 1312 1313 /* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO 1314 store are known to be needed. */ 1315 1316 static inline bool 1317 all_positions_needed_p (store_info *s_info, poly_int64 start, 1318 poly_int64 width) 1319 { 1320 gcc_assert (s_info->rhs); 1321 if (!s_info->width.is_constant ()) 1322 { 1323 gcc_assert (s_info->is_large 1324 && !s_info->positions_needed.large.bmap); 1325 return s_info->positions_needed.large.count == 0; 1326 } 1327 1328 /* Otherwise, if START and WIDTH are non-constant, we're asking about 1329 a non-constant region of a constant-sized store. We can't say for 1330 sure that all positions are needed. */ 1331 HOST_WIDE_INT const_start, const_width; 1332 if (!start.is_constant (&const_start) 1333 || !width.is_constant (&const_width)) 1334 return false; 1335 1336 if (__builtin_expect (s_info->is_large, false)) 1337 { 1338 for (HOST_WIDE_INT i = const_start; i < const_start + const_width; ++i) 1339 if (bitmap_bit_p (s_info->positions_needed.large.bmap, i)) 1340 return false; 1341 return true; 1342 } 1343 else 1344 { 1345 unsigned HOST_WIDE_INT mask 1346 = lowpart_bitmask (const_width) << const_start; 1347 return (s_info->positions_needed.small_bitmask & mask) == mask; 1348 } 1349 } 1350 1351 1352 static rtx get_stored_val (store_info *, machine_mode, poly_int64, 1353 poly_int64, basic_block, bool); 1354 1355 1356 /* BODY is an instruction pattern that belongs to INSN. Return 1 if 1357 there is a candidate store, after adding it to the appropriate 1358 local store group if so. */ 1359 1360 static int 1361 record_store (rtx body, bb_info_t bb_info) 1362 { 1363 rtx mem, rhs, const_rhs, mem_addr; 1364 poly_int64 offset = 0; 1365 poly_int64 width = 0; 1366 insn_info_t insn_info = bb_info->last_insn; 1367 store_info *store_info = NULL; 1368 int group_id; 1369 cselib_val *base = NULL; 1370 insn_info_t ptr, last, redundant_reason; 1371 bool store_is_unused; 1372 1373 if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER) 1374 return 0; 1375 1376 mem = SET_DEST (body); 1377 1378 /* If this is not used, then this cannot be used to keep the insn 1379 from being deleted. On the other hand, it does provide something 1380 that can be used to prove that another store is dead. */ 1381 store_is_unused 1382 = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL); 1383 1384 /* Check whether that value is a suitable memory location. */ 1385 if (!MEM_P (mem)) 1386 { 1387 /* If the set or clobber is unused, then it does not effect our 1388 ability to get rid of the entire insn. */ 1389 if (!store_is_unused) 1390 insn_info->cannot_delete = true; 1391 return 0; 1392 } 1393 1394 /* At this point we know mem is a mem. */ 1395 if (GET_MODE (mem) == BLKmode) 1396 { 1397 HOST_WIDE_INT const_size; 1398 if (GET_CODE (XEXP (mem, 0)) == SCRATCH) 1399 { 1400 if (dump_file && (dump_flags & TDF_DETAILS)) 1401 fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n"); 1402 add_wild_read (bb_info); 1403 insn_info->cannot_delete = true; 1404 return 0; 1405 } 1406 /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0)) 1407 as memset (addr, 0, 36); */ 1408 else if (!MEM_SIZE_KNOWN_P (mem) 1409 || maybe_le (MEM_SIZE (mem), 0) 1410 /* This is a limit on the bitmap size, which is only relevant 1411 for constant-sized MEMs. */ 1412 || (MEM_SIZE (mem).is_constant (&const_size) 1413 && const_size > MAX_OFFSET) 1414 || GET_CODE (body) != SET 1415 || !CONST_INT_P (SET_SRC (body))) 1416 { 1417 if (!store_is_unused) 1418 { 1419 /* If the set or clobber is unused, then it does not effect our 1420 ability to get rid of the entire insn. */ 1421 insn_info->cannot_delete = true; 1422 clear_rhs_from_active_local_stores (); 1423 } 1424 return 0; 1425 } 1426 } 1427 1428 /* We can still process a volatile mem, we just cannot delete it. */ 1429 if (MEM_VOLATILE_P (mem)) 1430 insn_info->cannot_delete = true; 1431 1432 if (!canon_address (mem, &group_id, &offset, &base)) 1433 { 1434 clear_rhs_from_active_local_stores (); 1435 return 0; 1436 } 1437 1438 if (GET_MODE (mem) == BLKmode) 1439 width = MEM_SIZE (mem); 1440 else 1441 width = GET_MODE_SIZE (GET_MODE (mem)); 1442 1443 if (!endpoint_representable_p (offset, width)) 1444 { 1445 clear_rhs_from_active_local_stores (); 1446 return 0; 1447 } 1448 1449 if (known_eq (width, 0)) 1450 return 0; 1451 1452 if (group_id >= 0) 1453 { 1454 /* In the restrictive case where the base is a constant or the 1455 frame pointer we can do global analysis. */ 1456 1457 group_info *group 1458 = rtx_group_vec[group_id]; 1459 tree expr = MEM_EXPR (mem); 1460 1461 store_info = rtx_store_info_pool.allocate (); 1462 set_usage_bits (group, offset, width, expr); 1463 1464 if (dump_file && (dump_flags & TDF_DETAILS)) 1465 { 1466 fprintf (dump_file, " processing const base store gid=%d", 1467 group_id); 1468 print_range (dump_file, offset, width); 1469 fprintf (dump_file, "\n"); 1470 } 1471 } 1472 else 1473 { 1474 if (may_be_sp_based_p (XEXP (mem, 0))) 1475 insn_info->stack_pointer_based = true; 1476 insn_info->contains_cselib_groups = true; 1477 1478 store_info = cse_store_info_pool.allocate (); 1479 group_id = -1; 1480 1481 if (dump_file && (dump_flags & TDF_DETAILS)) 1482 { 1483 fprintf (dump_file, " processing cselib store "); 1484 print_range (dump_file, offset, width); 1485 fprintf (dump_file, "\n"); 1486 } 1487 } 1488 1489 const_rhs = rhs = NULL_RTX; 1490 if (GET_CODE (body) == SET 1491 /* No place to keep the value after ra. */ 1492 && !reload_completed 1493 && (REG_P (SET_SRC (body)) 1494 || GET_CODE (SET_SRC (body)) == SUBREG 1495 || CONSTANT_P (SET_SRC (body))) 1496 && !MEM_VOLATILE_P (mem) 1497 /* Sometimes the store and reload is used for truncation and 1498 rounding. */ 1499 && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store))) 1500 { 1501 rhs = SET_SRC (body); 1502 if (CONSTANT_P (rhs)) 1503 const_rhs = rhs; 1504 else if (body == PATTERN (insn_info->insn)) 1505 { 1506 rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX); 1507 if (tem && CONSTANT_P (XEXP (tem, 0))) 1508 const_rhs = XEXP (tem, 0); 1509 } 1510 if (const_rhs == NULL_RTX && REG_P (rhs)) 1511 { 1512 rtx tem = cselib_expand_value_rtx (rhs, scratch, 5); 1513 1514 if (tem && CONSTANT_P (tem)) 1515 const_rhs = tem; 1516 } 1517 } 1518 1519 /* Check to see if this stores causes some other stores to be 1520 dead. */ 1521 ptr = active_local_stores; 1522 last = NULL; 1523 redundant_reason = NULL; 1524 mem = canon_rtx (mem); 1525 1526 if (group_id < 0) 1527 mem_addr = base->val_rtx; 1528 else 1529 { 1530 group_info *group = rtx_group_vec[group_id]; 1531 mem_addr = group->canon_base_addr; 1532 } 1533 if (maybe_ne (offset, 0)) 1534 mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset); 1535 1536 while (ptr) 1537 { 1538 insn_info_t next = ptr->next_local_store; 1539 class store_info *s_info = ptr->store_rec; 1540 bool del = true; 1541 1542 /* Skip the clobbers. We delete the active insn if this insn 1543 shadows the set. To have been put on the active list, it 1544 has exactly on set. */ 1545 while (!s_info->is_set) 1546 s_info = s_info->next; 1547 1548 if (s_info->group_id == group_id && s_info->cse_base == base) 1549 { 1550 HOST_WIDE_INT i; 1551 if (dump_file && (dump_flags & TDF_DETAILS)) 1552 { 1553 fprintf (dump_file, " trying store in insn=%d gid=%d", 1554 INSN_UID (ptr->insn), s_info->group_id); 1555 print_range (dump_file, s_info->offset, s_info->width); 1556 fprintf (dump_file, "\n"); 1557 } 1558 1559 /* Even if PTR won't be eliminated as unneeded, if both 1560 PTR and this insn store the same constant value, we might 1561 eliminate this insn instead. */ 1562 if (s_info->const_rhs 1563 && const_rhs 1564 && known_subrange_p (offset, width, 1565 s_info->offset, s_info->width) 1566 && all_positions_needed_p (s_info, offset - s_info->offset, 1567 width) 1568 /* We can only remove the later store if the earlier aliases 1569 at least all accesses the later one. */ 1570 && ((MEM_ALIAS_SET (mem) == MEM_ALIAS_SET (s_info->mem) 1571 || alias_set_subset_of (MEM_ALIAS_SET (mem), 1572 MEM_ALIAS_SET (s_info->mem))) 1573 && (!MEM_EXPR (s_info->mem) 1574 || refs_same_for_tbaa_p (MEM_EXPR (s_info->mem), 1575 MEM_EXPR (mem))))) 1576 { 1577 if (GET_MODE (mem) == BLKmode) 1578 { 1579 if (GET_MODE (s_info->mem) == BLKmode 1580 && s_info->const_rhs == const_rhs) 1581 redundant_reason = ptr; 1582 } 1583 else if (s_info->const_rhs == const0_rtx 1584 && const_rhs == const0_rtx) 1585 redundant_reason = ptr; 1586 else 1587 { 1588 rtx val; 1589 start_sequence (); 1590 val = get_stored_val (s_info, GET_MODE (mem), offset, width, 1591 BLOCK_FOR_INSN (insn_info->insn), 1592 true); 1593 if (get_insns () != NULL) 1594 val = NULL_RTX; 1595 end_sequence (); 1596 if (val && rtx_equal_p (val, const_rhs)) 1597 redundant_reason = ptr; 1598 } 1599 } 1600 1601 HOST_WIDE_INT begin_unneeded, const_s_width, const_width; 1602 if (known_subrange_p (s_info->offset, s_info->width, offset, width)) 1603 /* The new store touches every byte that S_INFO does. */ 1604 set_all_positions_unneeded (s_info); 1605 else if ((offset - s_info->offset).is_constant (&begin_unneeded) 1606 && s_info->width.is_constant (&const_s_width) 1607 && width.is_constant (&const_width)) 1608 { 1609 HOST_WIDE_INT end_unneeded = begin_unneeded + const_width; 1610 begin_unneeded = MAX (begin_unneeded, 0); 1611 end_unneeded = MIN (end_unneeded, const_s_width); 1612 for (i = begin_unneeded; i < end_unneeded; ++i) 1613 set_position_unneeded (s_info, i); 1614 } 1615 else 1616 { 1617 /* We don't know which parts of S_INFO are needed and 1618 which aren't, so invalidate the RHS. */ 1619 s_info->rhs = NULL; 1620 s_info->const_rhs = NULL; 1621 } 1622 } 1623 else if (s_info->rhs) 1624 /* Need to see if it is possible for this store to overwrite 1625 the value of store_info. If it is, set the rhs to NULL to 1626 keep it from being used to remove a load. */ 1627 { 1628 if (canon_output_dependence (s_info->mem, true, 1629 mem, GET_MODE (mem), 1630 mem_addr)) 1631 { 1632 s_info->rhs = NULL; 1633 s_info->const_rhs = NULL; 1634 } 1635 } 1636 1637 /* An insn can be deleted if every position of every one of 1638 its s_infos is zero. */ 1639 if (any_positions_needed_p (s_info)) 1640 del = false; 1641 1642 if (del) 1643 { 1644 insn_info_t insn_to_delete = ptr; 1645 1646 active_local_stores_len--; 1647 if (last) 1648 last->next_local_store = ptr->next_local_store; 1649 else 1650 active_local_stores = ptr->next_local_store; 1651 1652 if (!insn_to_delete->cannot_delete) 1653 delete_dead_store_insn (insn_to_delete); 1654 } 1655 else 1656 last = ptr; 1657 1658 ptr = next; 1659 } 1660 1661 /* Finish filling in the store_info. */ 1662 store_info->next = insn_info->store_rec; 1663 insn_info->store_rec = store_info; 1664 store_info->mem = mem; 1665 store_info->mem_addr = mem_addr; 1666 store_info->cse_base = base; 1667 HOST_WIDE_INT const_width; 1668 if (!width.is_constant (&const_width)) 1669 { 1670 store_info->is_large = true; 1671 store_info->positions_needed.large.count = 0; 1672 store_info->positions_needed.large.bmap = NULL; 1673 } 1674 else if (const_width > HOST_BITS_PER_WIDE_INT) 1675 { 1676 store_info->is_large = true; 1677 store_info->positions_needed.large.count = 0; 1678 store_info->positions_needed.large.bmap = BITMAP_ALLOC (&dse_bitmap_obstack); 1679 } 1680 else 1681 { 1682 store_info->is_large = false; 1683 store_info->positions_needed.small_bitmask 1684 = lowpart_bitmask (const_width); 1685 } 1686 store_info->group_id = group_id; 1687 store_info->offset = offset; 1688 store_info->width = width; 1689 store_info->is_set = GET_CODE (body) == SET; 1690 store_info->rhs = rhs; 1691 store_info->const_rhs = const_rhs; 1692 store_info->redundant_reason = redundant_reason; 1693 1694 /* If this is a clobber, we return 0. We will only be able to 1695 delete this insn if there is only one store USED store, but we 1696 can use the clobber to delete other stores earlier. */ 1697 return store_info->is_set ? 1 : 0; 1698 } 1699 1700 1701 static void 1702 dump_insn_info (const char * start, insn_info_t insn_info) 1703 { 1704 fprintf (dump_file, "%s insn=%d %s\n", start, 1705 INSN_UID (insn_info->insn), 1706 insn_info->store_rec ? "has store" : "naked"); 1707 } 1708 1709 1710 /* If the modes are different and the value's source and target do not 1711 line up, we need to extract the value from lower part of the rhs of 1712 the store, shift it, and then put it into a form that can be shoved 1713 into the read_insn. This function generates a right SHIFT of a 1714 value that is at least ACCESS_SIZE bytes wide of READ_MODE. The 1715 shift sequence is returned or NULL if we failed to find a 1716 shift. */ 1717 1718 static rtx 1719 find_shift_sequence (poly_int64 access_size, 1720 store_info *store_info, 1721 machine_mode read_mode, 1722 poly_int64 shift, bool speed, bool require_cst) 1723 { 1724 machine_mode store_mode = GET_MODE (store_info->mem); 1725 scalar_int_mode new_mode; 1726 rtx read_reg = NULL; 1727 1728 /* Some machines like the x86 have shift insns for each size of 1729 operand. Other machines like the ppc or the ia-64 may only have 1730 shift insns that shift values within 32 or 64 bit registers. 1731 This loop tries to find the smallest shift insn that will right 1732 justify the value we want to read but is available in one insn on 1733 the machine. */ 1734 1735 opt_scalar_int_mode new_mode_iter; 1736 FOR_EACH_MODE_IN_CLASS (new_mode_iter, MODE_INT) 1737 { 1738 rtx target, new_reg, new_lhs; 1739 rtx_insn *shift_seq, *insn; 1740 int cost; 1741 1742 new_mode = new_mode_iter.require (); 1743 if (GET_MODE_BITSIZE (new_mode) > BITS_PER_WORD) 1744 break; 1745 if (maybe_lt (GET_MODE_SIZE (new_mode), access_size)) 1746 continue; 1747 1748 /* If a constant was stored into memory, try to simplify it here, 1749 otherwise the cost of the shift might preclude this optimization 1750 e.g. at -Os, even when no actual shift will be needed. */ 1751 if (store_info->const_rhs) 1752 { 1753 poly_uint64 byte = subreg_lowpart_offset (new_mode, store_mode); 1754 rtx ret = simplify_subreg (new_mode, store_info->const_rhs, 1755 store_mode, byte); 1756 if (ret && CONSTANT_P (ret)) 1757 { 1758 rtx shift_rtx = gen_int_shift_amount (new_mode, shift); 1759 ret = simplify_const_binary_operation (LSHIFTRT, new_mode, 1760 ret, shift_rtx); 1761 if (ret && CONSTANT_P (ret)) 1762 { 1763 byte = subreg_lowpart_offset (read_mode, new_mode); 1764 ret = simplify_subreg (read_mode, ret, new_mode, byte); 1765 if (ret && CONSTANT_P (ret) 1766 && (set_src_cost (ret, read_mode, speed) 1767 <= COSTS_N_INSNS (1))) 1768 return ret; 1769 } 1770 } 1771 } 1772 1773 if (require_cst) 1774 return NULL_RTX; 1775 1776 /* Try a wider mode if truncating the store mode to NEW_MODE 1777 requires a real instruction. */ 1778 if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode)) 1779 && !TRULY_NOOP_TRUNCATION_MODES_P (new_mode, store_mode)) 1780 continue; 1781 1782 /* Also try a wider mode if the necessary punning is either not 1783 desirable or not possible. */ 1784 if (!CONSTANT_P (store_info->rhs) 1785 && !targetm.modes_tieable_p (new_mode, store_mode)) 1786 continue; 1787 1788 new_reg = gen_reg_rtx (new_mode); 1789 1790 start_sequence (); 1791 1792 /* In theory we could also check for an ashr. Ian Taylor knows 1793 of one dsp where the cost of these two was not the same. But 1794 this really is a rare case anyway. */ 1795 target = expand_binop (new_mode, lshr_optab, new_reg, 1796 gen_int_shift_amount (new_mode, shift), 1797 new_reg, 1, OPTAB_DIRECT); 1798 1799 shift_seq = get_insns (); 1800 end_sequence (); 1801 1802 if (target != new_reg || shift_seq == NULL) 1803 continue; 1804 1805 cost = 0; 1806 for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn)) 1807 if (INSN_P (insn)) 1808 cost += insn_cost (insn, speed); 1809 1810 /* The computation up to here is essentially independent 1811 of the arguments and could be precomputed. It may 1812 not be worth doing so. We could precompute if 1813 worthwhile or at least cache the results. The result 1814 technically depends on both SHIFT and ACCESS_SIZE, 1815 but in practice the answer will depend only on ACCESS_SIZE. */ 1816 1817 if (cost > COSTS_N_INSNS (1)) 1818 continue; 1819 1820 new_lhs = extract_low_bits (new_mode, store_mode, 1821 copy_rtx (store_info->rhs)); 1822 if (new_lhs == NULL_RTX) 1823 continue; 1824 1825 /* We found an acceptable shift. Generate a move to 1826 take the value from the store and put it into the 1827 shift pseudo, then shift it, then generate another 1828 move to put in into the target of the read. */ 1829 emit_move_insn (new_reg, new_lhs); 1830 emit_insn (shift_seq); 1831 read_reg = extract_low_bits (read_mode, new_mode, new_reg); 1832 break; 1833 } 1834 1835 return read_reg; 1836 } 1837 1838 1839 /* Call back for note_stores to find the hard regs set or clobbered by 1840 insn. Data is a bitmap of the hardregs set so far. */ 1841 1842 static void 1843 look_for_hardregs (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) 1844 { 1845 bitmap regs_set = (bitmap) data; 1846 1847 if (REG_P (x) 1848 && HARD_REGISTER_P (x)) 1849 bitmap_set_range (regs_set, REGNO (x), REG_NREGS (x)); 1850 } 1851 1852 /* Helper function for replace_read and record_store. 1853 Attempt to return a value of mode READ_MODE stored in STORE_INFO, 1854 consisting of READ_WIDTH bytes starting from READ_OFFSET. Return NULL 1855 if not successful. If REQUIRE_CST is true, return always constant. */ 1856 1857 static rtx 1858 get_stored_val (store_info *store_info, machine_mode read_mode, 1859 poly_int64 read_offset, poly_int64 read_width, 1860 basic_block bb, bool require_cst) 1861 { 1862 machine_mode store_mode = GET_MODE (store_info->mem); 1863 poly_int64 gap; 1864 rtx read_reg; 1865 1866 /* To get here the read is within the boundaries of the write so 1867 shift will never be negative. Start out with the shift being in 1868 bytes. */ 1869 if (store_mode == BLKmode) 1870 gap = 0; 1871 else if (BYTES_BIG_ENDIAN) 1872 gap = ((store_info->offset + store_info->width) 1873 - (read_offset + read_width)); 1874 else 1875 gap = read_offset - store_info->offset; 1876 1877 if (gap.is_constant () && maybe_ne (gap, 0)) 1878 { 1879 poly_int64 shift = gap * BITS_PER_UNIT; 1880 poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap; 1881 read_reg = find_shift_sequence (access_size, store_info, read_mode, 1882 shift, optimize_bb_for_speed_p (bb), 1883 require_cst); 1884 } 1885 else if (store_mode == BLKmode) 1886 { 1887 /* The store is a memset (addr, const_val, const_size). */ 1888 gcc_assert (CONST_INT_P (store_info->rhs)); 1889 scalar_int_mode int_store_mode; 1890 if (!int_mode_for_mode (read_mode).exists (&int_store_mode)) 1891 read_reg = NULL_RTX; 1892 else if (store_info->rhs == const0_rtx) 1893 read_reg = extract_low_bits (read_mode, int_store_mode, const0_rtx); 1894 else if (GET_MODE_BITSIZE (int_store_mode) > HOST_BITS_PER_WIDE_INT 1895 || BITS_PER_UNIT >= HOST_BITS_PER_WIDE_INT) 1896 read_reg = NULL_RTX; 1897 else 1898 { 1899 unsigned HOST_WIDE_INT c 1900 = INTVAL (store_info->rhs) 1901 & ((HOST_WIDE_INT_1 << BITS_PER_UNIT) - 1); 1902 int shift = BITS_PER_UNIT; 1903 while (shift < HOST_BITS_PER_WIDE_INT) 1904 { 1905 c |= (c << shift); 1906 shift <<= 1; 1907 } 1908 read_reg = gen_int_mode (c, int_store_mode); 1909 read_reg = extract_low_bits (read_mode, int_store_mode, read_reg); 1910 } 1911 } 1912 else if (store_info->const_rhs 1913 && (require_cst 1914 || GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode))) 1915 read_reg = extract_low_bits (read_mode, store_mode, 1916 copy_rtx (store_info->const_rhs)); 1917 else 1918 read_reg = extract_low_bits (read_mode, store_mode, 1919 copy_rtx (store_info->rhs)); 1920 if (require_cst && read_reg && !CONSTANT_P (read_reg)) 1921 read_reg = NULL_RTX; 1922 return read_reg; 1923 } 1924 1925 /* Take a sequence of: 1926 A <- r1 1927 ... 1928 ... <- A 1929 1930 and change it into 1931 r2 <- r1 1932 A <- r1 1933 ... 1934 ... <- r2 1935 1936 or 1937 1938 r3 <- extract (r1) 1939 r3 <- r3 >> shift 1940 r2 <- extract (r3) 1941 ... <- r2 1942 1943 or 1944 1945 r2 <- extract (r1) 1946 ... <- r2 1947 1948 Depending on the alignment and the mode of the store and 1949 subsequent load. 1950 1951 1952 The STORE_INFO and STORE_INSN are for the store and READ_INFO 1953 and READ_INSN are for the read. Return true if the replacement 1954 went ok. */ 1955 1956 static bool 1957 replace_read (store_info *store_info, insn_info_t store_insn, 1958 read_info_t read_info, insn_info_t read_insn, rtx *loc) 1959 { 1960 machine_mode store_mode = GET_MODE (store_info->mem); 1961 machine_mode read_mode = GET_MODE (read_info->mem); 1962 rtx_insn *insns, *this_insn; 1963 rtx read_reg; 1964 basic_block bb; 1965 1966 if (!dbg_cnt (dse)) 1967 return false; 1968 1969 /* Create a sequence of instructions to set up the read register. 1970 This sequence goes immediately before the store and its result 1971 is read by the load. 1972 1973 We need to keep this in perspective. We are replacing a read 1974 with a sequence of insns, but the read will almost certainly be 1975 in cache, so it is not going to be an expensive one. Thus, we 1976 are not willing to do a multi insn shift or worse a subroutine 1977 call to get rid of the read. */ 1978 if (dump_file && (dump_flags & TDF_DETAILS)) 1979 fprintf (dump_file, "trying to replace %smode load in insn %d" 1980 " from %smode store in insn %d\n", 1981 GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn), 1982 GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn)); 1983 start_sequence (); 1984 bb = BLOCK_FOR_INSN (read_insn->insn); 1985 read_reg = get_stored_val (store_info, 1986 read_mode, read_info->offset, read_info->width, 1987 bb, false); 1988 if (read_reg == NULL_RTX) 1989 { 1990 end_sequence (); 1991 if (dump_file && (dump_flags & TDF_DETAILS)) 1992 fprintf (dump_file, " -- could not extract bits of stored value\n"); 1993 return false; 1994 } 1995 /* Force the value into a new register so that it won't be clobbered 1996 between the store and the load. */ 1997 read_reg = copy_to_mode_reg (read_mode, read_reg); 1998 insns = get_insns (); 1999 end_sequence (); 2000 2001 if (insns != NULL_RTX) 2002 { 2003 /* Now we have to scan the set of new instructions to see if the 2004 sequence contains and sets of hardregs that happened to be 2005 live at this point. For instance, this can happen if one of 2006 the insns sets the CC and the CC happened to be live at that 2007 point. This does occasionally happen, see PR 37922. */ 2008 bitmap regs_set = BITMAP_ALLOC (®_obstack); 2009 2010 for (this_insn = insns; 2011 this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn)) 2012 { 2013 if (insn_invalid_p (this_insn, false)) 2014 { 2015 if (dump_file && (dump_flags & TDF_DETAILS)) 2016 { 2017 fprintf (dump_file, " -- replacing the loaded MEM with "); 2018 print_simple_rtl (dump_file, read_reg); 2019 fprintf (dump_file, " led to an invalid instruction\n"); 2020 } 2021 BITMAP_FREE (regs_set); 2022 return false; 2023 } 2024 note_stores (this_insn, look_for_hardregs, regs_set); 2025 } 2026 2027 if (store_insn->fixed_regs_live) 2028 bitmap_and_into (regs_set, store_insn->fixed_regs_live); 2029 if (!bitmap_empty_p (regs_set)) 2030 { 2031 if (dump_file && (dump_flags & TDF_DETAILS)) 2032 { 2033 fprintf (dump_file, "abandoning replacement because sequence " 2034 "clobbers live hardregs:"); 2035 df_print_regset (dump_file, regs_set); 2036 } 2037 2038 BITMAP_FREE (regs_set); 2039 return false; 2040 } 2041 BITMAP_FREE (regs_set); 2042 } 2043 2044 subrtx_iterator::array_type array; 2045 FOR_EACH_SUBRTX (iter, array, *loc, NONCONST) 2046 { 2047 const_rtx x = *iter; 2048 if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC) 2049 { 2050 if (dump_file && (dump_flags & TDF_DETAILS)) 2051 fprintf (dump_file, " -- replacing the MEM failed due to address " 2052 "side-effects\n"); 2053 return false; 2054 } 2055 } 2056 2057 if (validate_change (read_insn->insn, loc, read_reg, 0)) 2058 { 2059 deferred_change *change = deferred_change_pool.allocate (); 2060 2061 /* Insert this right before the store insn where it will be safe 2062 from later insns that might change it before the read. */ 2063 emit_insn_before (insns, store_insn->insn); 2064 2065 /* And now for the kludge part: cselib croaks if you just 2066 return at this point. There are two reasons for this: 2067 2068 1) Cselib has an idea of how many pseudos there are and 2069 that does not include the new ones we just added. 2070 2071 2) Cselib does not know about the move insn we added 2072 above the store_info, and there is no way to tell it 2073 about it, because it has "moved on". 2074 2075 Problem (1) is fixable with a certain amount of engineering. 2076 Problem (2) is requires starting the bb from scratch. This 2077 could be expensive. 2078 2079 So we are just going to have to lie. The move/extraction 2080 insns are not really an issue, cselib did not see them. But 2081 the use of the new pseudo read_insn is a real problem because 2082 cselib has not scanned this insn. The way that we solve this 2083 problem is that we are just going to put the mem back for now 2084 and when we are finished with the block, we undo this. We 2085 keep a table of mems to get rid of. At the end of the basic 2086 block we can put them back. */ 2087 2088 *loc = read_info->mem; 2089 change->next = deferred_change_list; 2090 deferred_change_list = change; 2091 change->loc = loc; 2092 change->reg = read_reg; 2093 2094 /* Get rid of the read_info, from the point of view of the 2095 rest of dse, play like this read never happened. */ 2096 read_insn->read_rec = read_info->next; 2097 read_info_type_pool.remove (read_info); 2098 if (dump_file && (dump_flags & TDF_DETAILS)) 2099 { 2100 fprintf (dump_file, " -- replaced the loaded MEM with "); 2101 print_simple_rtl (dump_file, read_reg); 2102 fprintf (dump_file, "\n"); 2103 } 2104 return true; 2105 } 2106 else 2107 { 2108 if (dump_file && (dump_flags & TDF_DETAILS)) 2109 { 2110 fprintf (dump_file, " -- replacing the loaded MEM with "); 2111 print_simple_rtl (dump_file, read_reg); 2112 fprintf (dump_file, " led to an invalid instruction\n"); 2113 } 2114 return false; 2115 } 2116 } 2117 2118 /* Check the address of MEM *LOC and kill any appropriate stores that may 2119 be active. */ 2120 2121 static void 2122 check_mem_read_rtx (rtx *loc, bb_info_t bb_info) 2123 { 2124 rtx mem = *loc, mem_addr; 2125 insn_info_t insn_info; 2126 poly_int64 offset = 0; 2127 poly_int64 width = 0; 2128 cselib_val *base = NULL; 2129 int group_id; 2130 read_info_t read_info; 2131 2132 insn_info = bb_info->last_insn; 2133 2134 if ((MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER) 2135 || MEM_VOLATILE_P (mem)) 2136 { 2137 if (crtl->stack_protect_guard 2138 && (MEM_EXPR (mem) == crtl->stack_protect_guard 2139 || (crtl->stack_protect_guard_decl 2140 && MEM_EXPR (mem) == crtl->stack_protect_guard_decl)) 2141 && MEM_VOLATILE_P (mem)) 2142 { 2143 /* This is either the stack protector canary on the stack, 2144 which ought to be written by a MEM_VOLATILE_P store and 2145 thus shouldn't be deleted and is read at the very end of 2146 function, but shouldn't conflict with any other store. 2147 Or it is __stack_chk_guard variable or TLS or whatever else 2148 MEM holding the canary value, which really shouldn't be 2149 ever modified in -fstack-protector* protected functions, 2150 otherwise the prologue store wouldn't match the epilogue 2151 check. */ 2152 if (dump_file && (dump_flags & TDF_DETAILS)) 2153 fprintf (dump_file, " stack protector canary read ignored.\n"); 2154 insn_info->cannot_delete = true; 2155 return; 2156 } 2157 2158 if (dump_file && (dump_flags & TDF_DETAILS)) 2159 fprintf (dump_file, " adding wild read, volatile or barrier.\n"); 2160 add_wild_read (bb_info); 2161 insn_info->cannot_delete = true; 2162 return; 2163 } 2164 2165 /* If it is reading readonly mem, then there can be no conflict with 2166 another write. */ 2167 if (MEM_READONLY_P (mem)) 2168 return; 2169 2170 if (!canon_address (mem, &group_id, &offset, &base)) 2171 { 2172 if (dump_file && (dump_flags & TDF_DETAILS)) 2173 fprintf (dump_file, " adding wild read, canon_address failure.\n"); 2174 add_wild_read (bb_info); 2175 return; 2176 } 2177 2178 if (GET_MODE (mem) == BLKmode) 2179 width = -1; 2180 else 2181 width = GET_MODE_SIZE (GET_MODE (mem)); 2182 2183 if (!endpoint_representable_p (offset, known_eq (width, -1) ? 1 : width)) 2184 { 2185 if (dump_file && (dump_flags & TDF_DETAILS)) 2186 fprintf (dump_file, " adding wild read, due to overflow.\n"); 2187 add_wild_read (bb_info); 2188 return; 2189 } 2190 2191 read_info = read_info_type_pool.allocate (); 2192 read_info->group_id = group_id; 2193 read_info->mem = mem; 2194 read_info->offset = offset; 2195 read_info->width = width; 2196 read_info->next = insn_info->read_rec; 2197 insn_info->read_rec = read_info; 2198 if (group_id < 0) 2199 mem_addr = base->val_rtx; 2200 else 2201 { 2202 group_info *group = rtx_group_vec[group_id]; 2203 mem_addr = group->canon_base_addr; 2204 } 2205 if (maybe_ne (offset, 0)) 2206 mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset); 2207 /* Avoid passing VALUE RTXen as mem_addr to canon_true_dependence 2208 which will over and over re-create proper RTL and re-apply the 2209 offset above. See PR80960 where we almost allocate 1.6GB of PLUS 2210 RTXen that way. */ 2211 mem_addr = get_addr (mem_addr); 2212 2213 if (group_id >= 0) 2214 { 2215 /* This is the restricted case where the base is a constant or 2216 the frame pointer and offset is a constant. */ 2217 insn_info_t i_ptr = active_local_stores; 2218 insn_info_t last = NULL; 2219 2220 if (dump_file && (dump_flags & TDF_DETAILS)) 2221 { 2222 if (!known_size_p (width)) 2223 fprintf (dump_file, " processing const load gid=%d[BLK]\n", 2224 group_id); 2225 else 2226 { 2227 fprintf (dump_file, " processing const load gid=%d", group_id); 2228 print_range (dump_file, offset, width); 2229 fprintf (dump_file, "\n"); 2230 } 2231 } 2232 2233 while (i_ptr) 2234 { 2235 bool remove = false; 2236 store_info *store_info = i_ptr->store_rec; 2237 2238 /* Skip the clobbers. */ 2239 while (!store_info->is_set) 2240 store_info = store_info->next; 2241 2242 /* There are three cases here. */ 2243 if (store_info->group_id < 0) 2244 /* We have a cselib store followed by a read from a 2245 const base. */ 2246 remove 2247 = canon_true_dependence (store_info->mem, 2248 GET_MODE (store_info->mem), 2249 store_info->mem_addr, 2250 mem, mem_addr); 2251 2252 else if (group_id == store_info->group_id) 2253 { 2254 /* This is a block mode load. We may get lucky and 2255 canon_true_dependence may save the day. */ 2256 if (!known_size_p (width)) 2257 remove 2258 = canon_true_dependence (store_info->mem, 2259 GET_MODE (store_info->mem), 2260 store_info->mem_addr, 2261 mem, mem_addr); 2262 2263 /* If this read is just reading back something that we just 2264 stored, rewrite the read. */ 2265 else 2266 { 2267 if (store_info->rhs 2268 && known_subrange_p (offset, width, store_info->offset, 2269 store_info->width) 2270 && all_positions_needed_p (store_info, 2271 offset - store_info->offset, 2272 width) 2273 && replace_read (store_info, i_ptr, read_info, 2274 insn_info, loc)) 2275 return; 2276 2277 /* The bases are the same, just see if the offsets 2278 could overlap. */ 2279 if (ranges_maybe_overlap_p (offset, width, 2280 store_info->offset, 2281 store_info->width)) 2282 remove = true; 2283 } 2284 } 2285 2286 /* else 2287 The else case that is missing here is that the 2288 bases are constant but different. There is nothing 2289 to do here because there is no overlap. */ 2290 2291 if (remove) 2292 { 2293 if (dump_file && (dump_flags & TDF_DETAILS)) 2294 dump_insn_info ("removing from active", i_ptr); 2295 2296 active_local_stores_len--; 2297 if (last) 2298 last->next_local_store = i_ptr->next_local_store; 2299 else 2300 active_local_stores = i_ptr->next_local_store; 2301 } 2302 else 2303 last = i_ptr; 2304 i_ptr = i_ptr->next_local_store; 2305 } 2306 } 2307 else 2308 { 2309 insn_info_t i_ptr = active_local_stores; 2310 insn_info_t last = NULL; 2311 if (dump_file && (dump_flags & TDF_DETAILS)) 2312 { 2313 fprintf (dump_file, " processing cselib load mem:"); 2314 print_inline_rtx (dump_file, mem, 0); 2315 fprintf (dump_file, "\n"); 2316 } 2317 2318 while (i_ptr) 2319 { 2320 bool remove = false; 2321 store_info *store_info = i_ptr->store_rec; 2322 2323 if (dump_file && (dump_flags & TDF_DETAILS)) 2324 fprintf (dump_file, " processing cselib load against insn %d\n", 2325 INSN_UID (i_ptr->insn)); 2326 2327 /* Skip the clobbers. */ 2328 while (!store_info->is_set) 2329 store_info = store_info->next; 2330 2331 /* If this read is just reading back something that we just 2332 stored, rewrite the read. */ 2333 if (store_info->rhs 2334 && store_info->group_id == -1 2335 && store_info->cse_base == base 2336 && known_subrange_p (offset, width, store_info->offset, 2337 store_info->width) 2338 && all_positions_needed_p (store_info, 2339 offset - store_info->offset, width) 2340 && replace_read (store_info, i_ptr, read_info, insn_info, loc)) 2341 return; 2342 2343 remove = canon_true_dependence (store_info->mem, 2344 GET_MODE (store_info->mem), 2345 store_info->mem_addr, 2346 mem, mem_addr); 2347 2348 if (remove) 2349 { 2350 if (dump_file && (dump_flags & TDF_DETAILS)) 2351 dump_insn_info ("removing from active", i_ptr); 2352 2353 active_local_stores_len--; 2354 if (last) 2355 last->next_local_store = i_ptr->next_local_store; 2356 else 2357 active_local_stores = i_ptr->next_local_store; 2358 } 2359 else 2360 last = i_ptr; 2361 i_ptr = i_ptr->next_local_store; 2362 } 2363 } 2364 } 2365 2366 /* A note_uses callback in which DATA points the INSN_INFO for 2367 as check_mem_read_rtx. Nullify the pointer if i_m_r_m_r returns 2368 true for any part of *LOC. */ 2369 2370 static void 2371 check_mem_read_use (rtx *loc, void *data) 2372 { 2373 subrtx_ptr_iterator::array_type array; 2374 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST) 2375 { 2376 rtx *loc = *iter; 2377 if (MEM_P (*loc)) 2378 check_mem_read_rtx (loc, (bb_info_t) data); 2379 } 2380 } 2381 2382 2383 /* Get arguments passed to CALL_INSN. Return TRUE if successful. 2384 So far it only handles arguments passed in registers. */ 2385 2386 static bool 2387 get_call_args (rtx call_insn, tree fn, rtx *args, int nargs) 2388 { 2389 CUMULATIVE_ARGS args_so_far_v; 2390 cumulative_args_t args_so_far; 2391 tree arg; 2392 int idx; 2393 2394 INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3); 2395 args_so_far = pack_cumulative_args (&args_so_far_v); 2396 2397 arg = TYPE_ARG_TYPES (TREE_TYPE (fn)); 2398 for (idx = 0; 2399 arg != void_list_node && idx < nargs; 2400 arg = TREE_CHAIN (arg), idx++) 2401 { 2402 scalar_int_mode mode; 2403 rtx reg, link, tmp; 2404 2405 if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode)) 2406 return false; 2407 2408 function_arg_info arg (mode, /*named=*/true); 2409 reg = targetm.calls.function_arg (args_so_far, arg); 2410 if (!reg || !REG_P (reg) || GET_MODE (reg) != mode) 2411 return false; 2412 2413 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); 2414 link; 2415 link = XEXP (link, 1)) 2416 if (GET_CODE (XEXP (link, 0)) == USE) 2417 { 2418 scalar_int_mode arg_mode; 2419 args[idx] = XEXP (XEXP (link, 0), 0); 2420 if (REG_P (args[idx]) 2421 && REGNO (args[idx]) == REGNO (reg) 2422 && (GET_MODE (args[idx]) == mode 2423 || (is_int_mode (GET_MODE (args[idx]), &arg_mode) 2424 && (GET_MODE_SIZE (arg_mode) <= UNITS_PER_WORD) 2425 && (GET_MODE_SIZE (arg_mode) > GET_MODE_SIZE (mode))))) 2426 break; 2427 } 2428 if (!link) 2429 return false; 2430 2431 tmp = cselib_expand_value_rtx (args[idx], scratch, 5); 2432 if (GET_MODE (args[idx]) != mode) 2433 { 2434 if (!tmp || !CONST_INT_P (tmp)) 2435 return false; 2436 tmp = gen_int_mode (INTVAL (tmp), mode); 2437 } 2438 if (tmp) 2439 args[idx] = tmp; 2440 2441 targetm.calls.function_arg_advance (args_so_far, arg); 2442 } 2443 if (arg != void_list_node || idx != nargs) 2444 return false; 2445 return true; 2446 } 2447 2448 /* Return a bitmap of the fixed registers contained in IN. */ 2449 2450 static bitmap 2451 copy_fixed_regs (const_bitmap in) 2452 { 2453 bitmap ret; 2454 2455 ret = ALLOC_REG_SET (NULL); 2456 bitmap_and (ret, in, bitmap_view<HARD_REG_SET> (fixed_reg_set)); 2457 return ret; 2458 } 2459 2460 /* Apply record_store to all candidate stores in INSN. Mark INSN 2461 if some part of it is not a candidate store and assigns to a 2462 non-register target. */ 2463 2464 static void 2465 scan_insn (bb_info_t bb_info, rtx_insn *insn, int max_active_local_stores) 2466 { 2467 rtx body; 2468 insn_info_type *insn_info = insn_info_type_pool.allocate (); 2469 int mems_found = 0; 2470 memset (insn_info, 0, sizeof (struct insn_info_type)); 2471 2472 if (dump_file && (dump_flags & TDF_DETAILS)) 2473 fprintf (dump_file, "\n**scanning insn=%d\n", 2474 INSN_UID (insn)); 2475 2476 insn_info->prev_insn = bb_info->last_insn; 2477 insn_info->insn = insn; 2478 bb_info->last_insn = insn_info; 2479 2480 if (DEBUG_INSN_P (insn)) 2481 { 2482 insn_info->cannot_delete = true; 2483 return; 2484 } 2485 2486 /* Look at all of the uses in the insn. */ 2487 note_uses (&PATTERN (insn), check_mem_read_use, bb_info); 2488 2489 if (CALL_P (insn)) 2490 { 2491 bool const_call; 2492 rtx call, sym; 2493 tree memset_call = NULL_TREE; 2494 2495 insn_info->cannot_delete = true; 2496 2497 /* Const functions cannot do anything bad i.e. read memory, 2498 however, they can read their parameters which may have 2499 been pushed onto the stack. 2500 memset and bzero don't read memory either. */ 2501 const_call = RTL_CONST_CALL_P (insn); 2502 if (!const_call 2503 && (call = get_call_rtx_from (insn)) 2504 && (sym = XEXP (XEXP (call, 0), 0)) 2505 && GET_CODE (sym) == SYMBOL_REF 2506 && SYMBOL_REF_DECL (sym) 2507 && TREE_CODE (SYMBOL_REF_DECL (sym)) == FUNCTION_DECL 2508 && fndecl_built_in_p (SYMBOL_REF_DECL (sym), BUILT_IN_MEMSET)) 2509 memset_call = SYMBOL_REF_DECL (sym); 2510 2511 if (const_call || memset_call) 2512 { 2513 insn_info_t i_ptr = active_local_stores; 2514 insn_info_t last = NULL; 2515 2516 if (dump_file && (dump_flags & TDF_DETAILS)) 2517 fprintf (dump_file, "%s call %d\n", 2518 const_call ? "const" : "memset", INSN_UID (insn)); 2519 2520 /* See the head comment of the frame_read field. */ 2521 if (reload_completed 2522 /* Tail calls are storing their arguments using 2523 arg pointer. If it is a frame pointer on the target, 2524 even before reload we need to kill frame pointer based 2525 stores. */ 2526 || (SIBLING_CALL_P (insn) 2527 && HARD_FRAME_POINTER_IS_ARG_POINTER)) 2528 insn_info->frame_read = true; 2529 2530 /* Loop over the active stores and remove those which are 2531 killed by the const function call. */ 2532 while (i_ptr) 2533 { 2534 bool remove_store = false; 2535 2536 /* The stack pointer based stores are always killed. */ 2537 if (i_ptr->stack_pointer_based) 2538 remove_store = true; 2539 2540 /* If the frame is read, the frame related stores are killed. */ 2541 else if (insn_info->frame_read) 2542 { 2543 store_info *store_info = i_ptr->store_rec; 2544 2545 /* Skip the clobbers. */ 2546 while (!store_info->is_set) 2547 store_info = store_info->next; 2548 2549 if (store_info->group_id >= 0 2550 && rtx_group_vec[store_info->group_id]->frame_related) 2551 remove_store = true; 2552 } 2553 2554 if (remove_store) 2555 { 2556 if (dump_file && (dump_flags & TDF_DETAILS)) 2557 dump_insn_info ("removing from active", i_ptr); 2558 2559 active_local_stores_len--; 2560 if (last) 2561 last->next_local_store = i_ptr->next_local_store; 2562 else 2563 active_local_stores = i_ptr->next_local_store; 2564 } 2565 else 2566 last = i_ptr; 2567 2568 i_ptr = i_ptr->next_local_store; 2569 } 2570 2571 if (memset_call) 2572 { 2573 rtx args[3]; 2574 if (get_call_args (insn, memset_call, args, 3) 2575 && CONST_INT_P (args[1]) 2576 && CONST_INT_P (args[2]) 2577 && INTVAL (args[2]) > 0) 2578 { 2579 rtx mem = gen_rtx_MEM (BLKmode, args[0]); 2580 set_mem_size (mem, INTVAL (args[2])); 2581 body = gen_rtx_SET (mem, args[1]); 2582 mems_found += record_store (body, bb_info); 2583 if (dump_file && (dump_flags & TDF_DETAILS)) 2584 fprintf (dump_file, "handling memset as BLKmode store\n"); 2585 if (mems_found == 1) 2586 { 2587 if (active_local_stores_len++ >= max_active_local_stores) 2588 { 2589 active_local_stores_len = 1; 2590 active_local_stores = NULL; 2591 } 2592 insn_info->fixed_regs_live 2593 = copy_fixed_regs (bb_info->regs_live); 2594 insn_info->next_local_store = active_local_stores; 2595 active_local_stores = insn_info; 2596 } 2597 } 2598 else 2599 clear_rhs_from_active_local_stores (); 2600 } 2601 } 2602 else if (SIBLING_CALL_P (insn) 2603 && (reload_completed || HARD_FRAME_POINTER_IS_ARG_POINTER)) 2604 /* Arguments for a sibling call that are pushed to memory are passed 2605 using the incoming argument pointer of the current function. After 2606 reload that might be (and likely is) frame pointer based. And, if 2607 it is a frame pointer on the target, even before reload we need to 2608 kill frame pointer based stores. */ 2609 add_wild_read (bb_info); 2610 else 2611 /* Every other call, including pure functions, may read any memory 2612 that is not relative to the frame. */ 2613 add_non_frame_wild_read (bb_info); 2614 2615 return; 2616 } 2617 2618 /* Assuming that there are sets in these insns, we cannot delete 2619 them. */ 2620 if ((GET_CODE (PATTERN (insn)) == CLOBBER) 2621 || volatile_refs_p (PATTERN (insn)) 2622 || (!cfun->can_delete_dead_exceptions && !insn_nothrow_p (insn)) 2623 || (RTX_FRAME_RELATED_P (insn)) 2624 || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX)) 2625 insn_info->cannot_delete = true; 2626 2627 body = PATTERN (insn); 2628 if (GET_CODE (body) == PARALLEL) 2629 { 2630 int i; 2631 for (i = 0; i < XVECLEN (body, 0); i++) 2632 mems_found += record_store (XVECEXP (body, 0, i), bb_info); 2633 } 2634 else 2635 mems_found += record_store (body, bb_info); 2636 2637 if (dump_file && (dump_flags & TDF_DETAILS)) 2638 fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n", 2639 mems_found, insn_info->cannot_delete ? "true" : "false"); 2640 2641 /* If we found some sets of mems, add it into the active_local_stores so 2642 that it can be locally deleted if found dead or used for 2643 replace_read and redundant constant store elimination. Otherwise mark 2644 it as cannot delete. This simplifies the processing later. */ 2645 if (mems_found == 1) 2646 { 2647 if (active_local_stores_len++ >= max_active_local_stores) 2648 { 2649 active_local_stores_len = 1; 2650 active_local_stores = NULL; 2651 } 2652 insn_info->fixed_regs_live = copy_fixed_regs (bb_info->regs_live); 2653 insn_info->next_local_store = active_local_stores; 2654 active_local_stores = insn_info; 2655 } 2656 else 2657 insn_info->cannot_delete = true; 2658 } 2659 2660 2661 /* Remove BASE from the set of active_local_stores. This is a 2662 callback from cselib that is used to get rid of the stores in 2663 active_local_stores. */ 2664 2665 static void 2666 remove_useless_values (cselib_val *base) 2667 { 2668 insn_info_t insn_info = active_local_stores; 2669 insn_info_t last = NULL; 2670 2671 while (insn_info) 2672 { 2673 store_info *store_info = insn_info->store_rec; 2674 bool del = false; 2675 2676 /* If ANY of the store_infos match the cselib group that is 2677 being deleted, then the insn cannot be deleted. */ 2678 while (store_info) 2679 { 2680 if ((store_info->group_id == -1) 2681 && (store_info->cse_base == base)) 2682 { 2683 del = true; 2684 break; 2685 } 2686 store_info = store_info->next; 2687 } 2688 2689 if (del) 2690 { 2691 active_local_stores_len--; 2692 if (last) 2693 last->next_local_store = insn_info->next_local_store; 2694 else 2695 active_local_stores = insn_info->next_local_store; 2696 free_store_info (insn_info); 2697 } 2698 else 2699 last = insn_info; 2700 2701 insn_info = insn_info->next_local_store; 2702 } 2703 } 2704 2705 2706 /* Do all of step 1. */ 2707 2708 static void 2709 dse_step1 (void) 2710 { 2711 basic_block bb; 2712 bitmap regs_live = BITMAP_ALLOC (®_obstack); 2713 2714 cselib_init (0); 2715 all_blocks = BITMAP_ALLOC (NULL); 2716 bitmap_set_bit (all_blocks, ENTRY_BLOCK); 2717 bitmap_set_bit (all_blocks, EXIT_BLOCK); 2718 2719 /* For -O1 reduce the maximum number of active local stores for RTL DSE 2720 since this can consume huge amounts of memory (PR89115). */ 2721 int max_active_local_stores = param_max_dse_active_local_stores; 2722 if (optimize < 2) 2723 max_active_local_stores /= 10; 2724 2725 FOR_ALL_BB_FN (bb, cfun) 2726 { 2727 insn_info_t ptr; 2728 bb_info_t bb_info = dse_bb_info_type_pool.allocate (); 2729 2730 memset (bb_info, 0, sizeof (dse_bb_info_type)); 2731 bitmap_set_bit (all_blocks, bb->index); 2732 bb_info->regs_live = regs_live; 2733 2734 bitmap_copy (regs_live, DF_LR_IN (bb)); 2735 df_simulate_initialize_forwards (bb, regs_live); 2736 2737 bb_table[bb->index] = bb_info; 2738 cselib_discard_hook = remove_useless_values; 2739 2740 if (bb->index >= NUM_FIXED_BLOCKS) 2741 { 2742 rtx_insn *insn; 2743 2744 active_local_stores = NULL; 2745 active_local_stores_len = 0; 2746 cselib_clear_table (); 2747 2748 /* Scan the insns. */ 2749 FOR_BB_INSNS (bb, insn) 2750 { 2751 if (INSN_P (insn)) 2752 scan_insn (bb_info, insn, max_active_local_stores); 2753 cselib_process_insn (insn); 2754 if (INSN_P (insn)) 2755 df_simulate_one_insn_forwards (bb, insn, regs_live); 2756 } 2757 2758 /* This is something of a hack, because the global algorithm 2759 is supposed to take care of the case where stores go dead 2760 at the end of the function. However, the global 2761 algorithm must take a more conservative view of block 2762 mode reads than the local alg does. So to get the case 2763 where you have a store to the frame followed by a non 2764 overlapping block more read, we look at the active local 2765 stores at the end of the function and delete all of the 2766 frame and spill based ones. */ 2767 if (stores_off_frame_dead_at_return 2768 && (EDGE_COUNT (bb->succs) == 0 2769 || (single_succ_p (bb) 2770 && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun) 2771 && ! crtl->calls_eh_return))) 2772 { 2773 insn_info_t i_ptr = active_local_stores; 2774 while (i_ptr) 2775 { 2776 store_info *store_info = i_ptr->store_rec; 2777 2778 /* Skip the clobbers. */ 2779 while (!store_info->is_set) 2780 store_info = store_info->next; 2781 if (store_info->group_id >= 0) 2782 { 2783 group_info *group = rtx_group_vec[store_info->group_id]; 2784 if (group->frame_related && !i_ptr->cannot_delete) 2785 delete_dead_store_insn (i_ptr); 2786 } 2787 2788 i_ptr = i_ptr->next_local_store; 2789 } 2790 } 2791 2792 /* Get rid of the loads that were discovered in 2793 replace_read. Cselib is finished with this block. */ 2794 while (deferred_change_list) 2795 { 2796 deferred_change *next = deferred_change_list->next; 2797 2798 /* There is no reason to validate this change. That was 2799 done earlier. */ 2800 *deferred_change_list->loc = deferred_change_list->reg; 2801 deferred_change_pool.remove (deferred_change_list); 2802 deferred_change_list = next; 2803 } 2804 2805 /* Get rid of all of the cselib based store_infos in this 2806 block and mark the containing insns as not being 2807 deletable. */ 2808 ptr = bb_info->last_insn; 2809 while (ptr) 2810 { 2811 if (ptr->contains_cselib_groups) 2812 { 2813 store_info *s_info = ptr->store_rec; 2814 while (s_info && !s_info->is_set) 2815 s_info = s_info->next; 2816 if (s_info 2817 && s_info->redundant_reason 2818 && s_info->redundant_reason->insn 2819 && !ptr->cannot_delete) 2820 { 2821 if (dump_file && (dump_flags & TDF_DETAILS)) 2822 fprintf (dump_file, "Locally deleting insn %d " 2823 "because insn %d stores the " 2824 "same value and couldn't be " 2825 "eliminated\n", 2826 INSN_UID (ptr->insn), 2827 INSN_UID (s_info->redundant_reason->insn)); 2828 delete_dead_store_insn (ptr); 2829 } 2830 free_store_info (ptr); 2831 } 2832 else 2833 { 2834 store_info *s_info; 2835 2836 /* Free at least positions_needed bitmaps. */ 2837 for (s_info = ptr->store_rec; s_info; s_info = s_info->next) 2838 if (s_info->is_large) 2839 { 2840 BITMAP_FREE (s_info->positions_needed.large.bmap); 2841 s_info->is_large = false; 2842 } 2843 } 2844 ptr = ptr->prev_insn; 2845 } 2846 2847 cse_store_info_pool.release (); 2848 } 2849 bb_info->regs_live = NULL; 2850 } 2851 2852 BITMAP_FREE (regs_live); 2853 cselib_finish (); 2854 rtx_group_table->empty (); 2855 } 2856 2857 2858 /*---------------------------------------------------------------------------- 2859 Second step. 2860 2861 Assign each byte position in the stores that we are going to 2862 analyze globally to a position in the bitmaps. Returns true if 2863 there are any bit positions assigned. 2864 ----------------------------------------------------------------------------*/ 2865 2866 static void 2867 dse_step2_init (void) 2868 { 2869 unsigned int i; 2870 group_info *group; 2871 2872 FOR_EACH_VEC_ELT (rtx_group_vec, i, group) 2873 { 2874 /* For all non stack related bases, we only consider a store to 2875 be deletable if there are two or more stores for that 2876 position. This is because it takes one store to make the 2877 other store redundant. However, for the stores that are 2878 stack related, we consider them if there is only one store 2879 for the position. We do this because the stack related 2880 stores can be deleted if their is no read between them and 2881 the end of the function. 2882 2883 To make this work in the current framework, we take the stack 2884 related bases add all of the bits from store1 into store2. 2885 This has the effect of making the eligible even if there is 2886 only one store. */ 2887 2888 if (stores_off_frame_dead_at_return && group->frame_related) 2889 { 2890 bitmap_ior_into (group->store2_n, group->store1_n); 2891 bitmap_ior_into (group->store2_p, group->store1_p); 2892 if (dump_file && (dump_flags & TDF_DETAILS)) 2893 fprintf (dump_file, "group %d is frame related ", i); 2894 } 2895 2896 group->offset_map_size_n++; 2897 group->offset_map_n = XOBNEWVEC (&dse_obstack, int, 2898 group->offset_map_size_n); 2899 group->offset_map_size_p++; 2900 group->offset_map_p = XOBNEWVEC (&dse_obstack, int, 2901 group->offset_map_size_p); 2902 group->process_globally = false; 2903 if (dump_file && (dump_flags & TDF_DETAILS)) 2904 { 2905 fprintf (dump_file, "group %d(%d+%d): ", i, 2906 (int)bitmap_count_bits (group->store2_n), 2907 (int)bitmap_count_bits (group->store2_p)); 2908 bitmap_print (dump_file, group->store2_n, "n ", " "); 2909 bitmap_print (dump_file, group->store2_p, "p ", "\n"); 2910 } 2911 } 2912 } 2913 2914 2915 /* Init the offset tables. */ 2916 2917 static bool 2918 dse_step2 (void) 2919 { 2920 unsigned int i; 2921 group_info *group; 2922 /* Position 0 is unused because 0 is used in the maps to mean 2923 unused. */ 2924 current_position = 1; 2925 FOR_EACH_VEC_ELT (rtx_group_vec, i, group) 2926 { 2927 bitmap_iterator bi; 2928 unsigned int j; 2929 2930 memset (group->offset_map_n, 0, sizeof (int) * group->offset_map_size_n); 2931 memset (group->offset_map_p, 0, sizeof (int) * group->offset_map_size_p); 2932 bitmap_clear (group->group_kill); 2933 2934 EXECUTE_IF_SET_IN_BITMAP (group->store2_n, 0, j, bi) 2935 { 2936 bitmap_set_bit (group->group_kill, current_position); 2937 if (bitmap_bit_p (group->escaped_n, j)) 2938 bitmap_set_bit (kill_on_calls, current_position); 2939 group->offset_map_n[j] = current_position++; 2940 group->process_globally = true; 2941 } 2942 EXECUTE_IF_SET_IN_BITMAP (group->store2_p, 0, j, bi) 2943 { 2944 bitmap_set_bit (group->group_kill, current_position); 2945 if (bitmap_bit_p (group->escaped_p, j)) 2946 bitmap_set_bit (kill_on_calls, current_position); 2947 group->offset_map_p[j] = current_position++; 2948 group->process_globally = true; 2949 } 2950 } 2951 return current_position != 1; 2952 } 2953 2954 2955 2956 /*---------------------------------------------------------------------------- 2957 Third step. 2958 2959 Build the bit vectors for the transfer functions. 2960 ----------------------------------------------------------------------------*/ 2961 2962 2963 /* Look up the bitmap index for OFFSET in GROUP_INFO. If it is not 2964 there, return 0. */ 2965 2966 static int 2967 get_bitmap_index (group_info *group_info, HOST_WIDE_INT offset) 2968 { 2969 if (offset < 0) 2970 { 2971 HOST_WIDE_INT offset_p = -offset; 2972 if (offset_p >= group_info->offset_map_size_n) 2973 return 0; 2974 return group_info->offset_map_n[offset_p]; 2975 } 2976 else 2977 { 2978 if (offset >= group_info->offset_map_size_p) 2979 return 0; 2980 return group_info->offset_map_p[offset]; 2981 } 2982 } 2983 2984 2985 /* Process the STORE_INFOs into the bitmaps into GEN and KILL. KILL 2986 may be NULL. */ 2987 2988 static void 2989 scan_stores (store_info *store_info, bitmap gen, bitmap kill) 2990 { 2991 while (store_info) 2992 { 2993 HOST_WIDE_INT i, offset, width; 2994 group_info *group_info 2995 = rtx_group_vec[store_info->group_id]; 2996 /* We can (conservatively) ignore stores whose bounds aren't known; 2997 they simply don't generate new global dse opportunities. */ 2998 if (group_info->process_globally 2999 && store_info->offset.is_constant (&offset) 3000 && store_info->width.is_constant (&width)) 3001 { 3002 HOST_WIDE_INT end = offset + width; 3003 for (i = offset; i < end; i++) 3004 { 3005 int index = get_bitmap_index (group_info, i); 3006 if (index != 0) 3007 { 3008 bitmap_set_bit (gen, index); 3009 if (kill) 3010 bitmap_clear_bit (kill, index); 3011 } 3012 } 3013 } 3014 store_info = store_info->next; 3015 } 3016 } 3017 3018 3019 /* Process the READ_INFOs into the bitmaps into GEN and KILL. KILL 3020 may be NULL. */ 3021 3022 static void 3023 scan_reads (insn_info_t insn_info, bitmap gen, bitmap kill) 3024 { 3025 read_info_t read_info = insn_info->read_rec; 3026 int i; 3027 group_info *group; 3028 3029 /* If this insn reads the frame, kill all the frame related stores. */ 3030 if (insn_info->frame_read) 3031 { 3032 FOR_EACH_VEC_ELT (rtx_group_vec, i, group) 3033 if (group->process_globally && group->frame_related) 3034 { 3035 if (kill) 3036 bitmap_ior_into (kill, group->group_kill); 3037 bitmap_and_compl_into (gen, group->group_kill); 3038 } 3039 } 3040 if (insn_info->non_frame_wild_read) 3041 { 3042 /* Kill all non-frame related stores. Kill all stores of variables that 3043 escape. */ 3044 if (kill) 3045 bitmap_ior_into (kill, kill_on_calls); 3046 bitmap_and_compl_into (gen, kill_on_calls); 3047 FOR_EACH_VEC_ELT (rtx_group_vec, i, group) 3048 if (group->process_globally && !group->frame_related) 3049 { 3050 if (kill) 3051 bitmap_ior_into (kill, group->group_kill); 3052 bitmap_and_compl_into (gen, group->group_kill); 3053 } 3054 } 3055 while (read_info) 3056 { 3057 FOR_EACH_VEC_ELT (rtx_group_vec, i, group) 3058 { 3059 if (group->process_globally) 3060 { 3061 if (i == read_info->group_id) 3062 { 3063 HOST_WIDE_INT offset, width; 3064 /* Reads with non-constant size kill all DSE opportunities 3065 in the group. */ 3066 if (!read_info->offset.is_constant (&offset) 3067 || !read_info->width.is_constant (&width) 3068 || !known_size_p (width)) 3069 { 3070 /* Handle block mode reads. */ 3071 if (kill) 3072 bitmap_ior_into (kill, group->group_kill); 3073 bitmap_and_compl_into (gen, group->group_kill); 3074 } 3075 else 3076 { 3077 /* The groups are the same, just process the 3078 offsets. */ 3079 HOST_WIDE_INT j; 3080 HOST_WIDE_INT end = offset + width; 3081 for (j = offset; j < end; j++) 3082 { 3083 int index = get_bitmap_index (group, j); 3084 if (index != 0) 3085 { 3086 if (kill) 3087 bitmap_set_bit (kill, index); 3088 bitmap_clear_bit (gen, index); 3089 } 3090 } 3091 } 3092 } 3093 else 3094 { 3095 /* The groups are different, if the alias sets 3096 conflict, clear the entire group. We only need 3097 to apply this test if the read_info is a cselib 3098 read. Anything with a constant base cannot alias 3099 something else with a different constant 3100 base. */ 3101 if ((read_info->group_id < 0) 3102 && canon_true_dependence (group->base_mem, 3103 GET_MODE (group->base_mem), 3104 group->canon_base_addr, 3105 read_info->mem, NULL_RTX)) 3106 { 3107 if (kill) 3108 bitmap_ior_into (kill, group->group_kill); 3109 bitmap_and_compl_into (gen, group->group_kill); 3110 } 3111 } 3112 } 3113 } 3114 3115 read_info = read_info->next; 3116 } 3117 } 3118 3119 3120 /* Return the insn in BB_INFO before the first wild read or if there 3121 are no wild reads in the block, return the last insn. */ 3122 3123 static insn_info_t 3124 find_insn_before_first_wild_read (bb_info_t bb_info) 3125 { 3126 insn_info_t insn_info = bb_info->last_insn; 3127 insn_info_t last_wild_read = NULL; 3128 3129 while (insn_info) 3130 { 3131 if (insn_info->wild_read) 3132 { 3133 last_wild_read = insn_info->prev_insn; 3134 /* Block starts with wild read. */ 3135 if (!last_wild_read) 3136 return NULL; 3137 } 3138 3139 insn_info = insn_info->prev_insn; 3140 } 3141 3142 if (last_wild_read) 3143 return last_wild_read; 3144 else 3145 return bb_info->last_insn; 3146 } 3147 3148 3149 /* Scan the insns in BB_INFO starting at PTR and going to the top of 3150 the block in order to build the gen and kill sets for the block. 3151 We start at ptr which may be the last insn in the block or may be 3152 the first insn with a wild read. In the latter case we are able to 3153 skip the rest of the block because it just does not matter: 3154 anything that happens is hidden by the wild read. */ 3155 3156 static void 3157 dse_step3_scan (basic_block bb) 3158 { 3159 bb_info_t bb_info = bb_table[bb->index]; 3160 insn_info_t insn_info; 3161 3162 insn_info = find_insn_before_first_wild_read (bb_info); 3163 3164 /* In the spill case or in the no_spill case if there is no wild 3165 read in the block, we will need a kill set. */ 3166 if (insn_info == bb_info->last_insn) 3167 { 3168 if (bb_info->kill) 3169 bitmap_clear (bb_info->kill); 3170 else 3171 bb_info->kill = BITMAP_ALLOC (&dse_bitmap_obstack); 3172 } 3173 else 3174 if (bb_info->kill) 3175 BITMAP_FREE (bb_info->kill); 3176 3177 while (insn_info) 3178 { 3179 /* There may have been code deleted by the dce pass run before 3180 this phase. */ 3181 if (insn_info->insn && INSN_P (insn_info->insn)) 3182 { 3183 scan_stores (insn_info->store_rec, bb_info->gen, bb_info->kill); 3184 scan_reads (insn_info, bb_info->gen, bb_info->kill); 3185 } 3186 3187 insn_info = insn_info->prev_insn; 3188 } 3189 } 3190 3191 3192 /* Set the gen set of the exit block, and also any block with no 3193 successors that does not have a wild read. */ 3194 3195 static void 3196 dse_step3_exit_block_scan (bb_info_t bb_info) 3197 { 3198 /* The gen set is all 0's for the exit block except for the 3199 frame_pointer_group. */ 3200 3201 if (stores_off_frame_dead_at_return) 3202 { 3203 unsigned int i; 3204 group_info *group; 3205 3206 FOR_EACH_VEC_ELT (rtx_group_vec, i, group) 3207 { 3208 if (group->process_globally && group->frame_related) 3209 bitmap_ior_into (bb_info->gen, group->group_kill); 3210 } 3211 } 3212 } 3213 3214 3215 /* Find all of the blocks that are not backwards reachable from the 3216 exit block or any block with no successors (BB). These are the 3217 infinite loops or infinite self loops. These blocks will still 3218 have their bits set in UNREACHABLE_BLOCKS. */ 3219 3220 static void 3221 mark_reachable_blocks (sbitmap unreachable_blocks, basic_block bb) 3222 { 3223 edge e; 3224 edge_iterator ei; 3225 3226 if (bitmap_bit_p (unreachable_blocks, bb->index)) 3227 { 3228 bitmap_clear_bit (unreachable_blocks, bb->index); 3229 FOR_EACH_EDGE (e, ei, bb->preds) 3230 { 3231 mark_reachable_blocks (unreachable_blocks, e->src); 3232 } 3233 } 3234 } 3235 3236 /* Build the transfer functions for the function. */ 3237 3238 static void 3239 dse_step3 () 3240 { 3241 basic_block bb; 3242 sbitmap_iterator sbi; 3243 bitmap all_ones = NULL; 3244 unsigned int i; 3245 3246 auto_sbitmap unreachable_blocks (last_basic_block_for_fn (cfun)); 3247 bitmap_ones (unreachable_blocks); 3248 3249 FOR_ALL_BB_FN (bb, cfun) 3250 { 3251 bb_info_t bb_info = bb_table[bb->index]; 3252 if (bb_info->gen) 3253 bitmap_clear (bb_info->gen); 3254 else 3255 bb_info->gen = BITMAP_ALLOC (&dse_bitmap_obstack); 3256 3257 if (bb->index == ENTRY_BLOCK) 3258 ; 3259 else if (bb->index == EXIT_BLOCK) 3260 dse_step3_exit_block_scan (bb_info); 3261 else 3262 dse_step3_scan (bb); 3263 if (EDGE_COUNT (bb->succs) == 0) 3264 mark_reachable_blocks (unreachable_blocks, bb); 3265 3266 /* If this is the second time dataflow is run, delete the old 3267 sets. */ 3268 if (bb_info->in) 3269 BITMAP_FREE (bb_info->in); 3270 if (bb_info->out) 3271 BITMAP_FREE (bb_info->out); 3272 } 3273 3274 /* For any block in an infinite loop, we must initialize the out set 3275 to all ones. This could be expensive, but almost never occurs in 3276 practice. However, it is common in regression tests. */ 3277 EXECUTE_IF_SET_IN_BITMAP (unreachable_blocks, 0, i, sbi) 3278 { 3279 if (bitmap_bit_p (all_blocks, i)) 3280 { 3281 bb_info_t bb_info = bb_table[i]; 3282 if (!all_ones) 3283 { 3284 unsigned int j; 3285 group_info *group; 3286 3287 all_ones = BITMAP_ALLOC (&dse_bitmap_obstack); 3288 FOR_EACH_VEC_ELT (rtx_group_vec, j, group) 3289 bitmap_ior_into (all_ones, group->group_kill); 3290 } 3291 if (!bb_info->out) 3292 { 3293 bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack); 3294 bitmap_copy (bb_info->out, all_ones); 3295 } 3296 } 3297 } 3298 3299 if (all_ones) 3300 BITMAP_FREE (all_ones); 3301 } 3302 3303 3304 3305 /*---------------------------------------------------------------------------- 3306 Fourth step. 3307 3308 Solve the bitvector equations. 3309 ----------------------------------------------------------------------------*/ 3310 3311 3312 /* Confluence function for blocks with no successors. Create an out 3313 set from the gen set of the exit block. This block logically has 3314 the exit block as a successor. */ 3315 3316 3317 3318 static void 3319 dse_confluence_0 (basic_block bb) 3320 { 3321 bb_info_t bb_info = bb_table[bb->index]; 3322 3323 if (bb->index == EXIT_BLOCK) 3324 return; 3325 3326 if (!bb_info->out) 3327 { 3328 bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack); 3329 bitmap_copy (bb_info->out, bb_table[EXIT_BLOCK]->gen); 3330 } 3331 } 3332 3333 /* Propagate the information from the in set of the dest of E to the 3334 out set of the src of E. If the various in or out sets are not 3335 there, that means they are all ones. */ 3336 3337 static bool 3338 dse_confluence_n (edge e) 3339 { 3340 bb_info_t src_info = bb_table[e->src->index]; 3341 bb_info_t dest_info = bb_table[e->dest->index]; 3342 3343 if (dest_info->in) 3344 { 3345 if (src_info->out) 3346 bitmap_and_into (src_info->out, dest_info->in); 3347 else 3348 { 3349 src_info->out = BITMAP_ALLOC (&dse_bitmap_obstack); 3350 bitmap_copy (src_info->out, dest_info->in); 3351 } 3352 } 3353 return true; 3354 } 3355 3356 3357 /* Propagate the info from the out to the in set of BB_INDEX's basic 3358 block. There are three cases: 3359 3360 1) The block has no kill set. In this case the kill set is all 3361 ones. It does not matter what the out set of the block is, none of 3362 the info can reach the top. The only thing that reaches the top is 3363 the gen set and we just copy the set. 3364 3365 2) There is a kill set but no out set and bb has successors. In 3366 this case we just return. Eventually an out set will be created and 3367 it is better to wait than to create a set of ones. 3368 3369 3) There is both a kill and out set. We apply the obvious transfer 3370 function. 3371 */ 3372 3373 static bool 3374 dse_transfer_function (int bb_index) 3375 { 3376 bb_info_t bb_info = bb_table[bb_index]; 3377 3378 if (bb_info->kill) 3379 { 3380 if (bb_info->out) 3381 { 3382 /* Case 3 above. */ 3383 if (bb_info->in) 3384 return bitmap_ior_and_compl (bb_info->in, bb_info->gen, 3385 bb_info->out, bb_info->kill); 3386 else 3387 { 3388 bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack); 3389 bitmap_ior_and_compl (bb_info->in, bb_info->gen, 3390 bb_info->out, bb_info->kill); 3391 return true; 3392 } 3393 } 3394 else 3395 /* Case 2 above. */ 3396 return false; 3397 } 3398 else 3399 { 3400 /* Case 1 above. If there is already an in set, nothing 3401 happens. */ 3402 if (bb_info->in) 3403 return false; 3404 else 3405 { 3406 bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack); 3407 bitmap_copy (bb_info->in, bb_info->gen); 3408 return true; 3409 } 3410 } 3411 } 3412 3413 /* Solve the dataflow equations. */ 3414 3415 static void 3416 dse_step4 (void) 3417 { 3418 df_simple_dataflow (DF_BACKWARD, NULL, dse_confluence_0, 3419 dse_confluence_n, dse_transfer_function, 3420 all_blocks, df_get_postorder (DF_BACKWARD), 3421 df_get_n_blocks (DF_BACKWARD)); 3422 if (dump_file && (dump_flags & TDF_DETAILS)) 3423 { 3424 basic_block bb; 3425 3426 fprintf (dump_file, "\n\n*** Global dataflow info after analysis.\n"); 3427 FOR_ALL_BB_FN (bb, cfun) 3428 { 3429 bb_info_t bb_info = bb_table[bb->index]; 3430 3431 df_print_bb_index (bb, dump_file); 3432 if (bb_info->in) 3433 bitmap_print (dump_file, bb_info->in, " in: ", "\n"); 3434 else 3435 fprintf (dump_file, " in: *MISSING*\n"); 3436 if (bb_info->gen) 3437 bitmap_print (dump_file, bb_info->gen, " gen: ", "\n"); 3438 else 3439 fprintf (dump_file, " gen: *MISSING*\n"); 3440 if (bb_info->kill) 3441 bitmap_print (dump_file, bb_info->kill, " kill: ", "\n"); 3442 else 3443 fprintf (dump_file, " kill: *MISSING*\n"); 3444 if (bb_info->out) 3445 bitmap_print (dump_file, bb_info->out, " out: ", "\n"); 3446 else 3447 fprintf (dump_file, " out: *MISSING*\n\n"); 3448 } 3449 } 3450 } 3451 3452 3453 3454 /*---------------------------------------------------------------------------- 3455 Fifth step. 3456 3457 Delete the stores that can only be deleted using the global information. 3458 ----------------------------------------------------------------------------*/ 3459 3460 3461 static void 3462 dse_step5 (void) 3463 { 3464 basic_block bb; 3465 FOR_EACH_BB_FN (bb, cfun) 3466 { 3467 bb_info_t bb_info = bb_table[bb->index]; 3468 insn_info_t insn_info = bb_info->last_insn; 3469 bitmap v = bb_info->out; 3470 3471 while (insn_info) 3472 { 3473 bool deleted = false; 3474 if (dump_file && insn_info->insn) 3475 { 3476 fprintf (dump_file, "starting to process insn %d\n", 3477 INSN_UID (insn_info->insn)); 3478 bitmap_print (dump_file, v, " v: ", "\n"); 3479 } 3480 3481 /* There may have been code deleted by the dce pass run before 3482 this phase. */ 3483 if (insn_info->insn 3484 && INSN_P (insn_info->insn) 3485 && (!insn_info->cannot_delete) 3486 && (!bitmap_empty_p (v))) 3487 { 3488 store_info *store_info = insn_info->store_rec; 3489 3490 /* Try to delete the current insn. */ 3491 deleted = true; 3492 3493 /* Skip the clobbers. */ 3494 while (!store_info->is_set) 3495 store_info = store_info->next; 3496 3497 HOST_WIDE_INT i, offset, width; 3498 group_info *group_info = rtx_group_vec[store_info->group_id]; 3499 3500 if (!store_info->offset.is_constant (&offset) 3501 || !store_info->width.is_constant (&width)) 3502 deleted = false; 3503 else 3504 { 3505 HOST_WIDE_INT end = offset + width; 3506 for (i = offset; i < end; i++) 3507 { 3508 int index = get_bitmap_index (group_info, i); 3509 3510 if (dump_file && (dump_flags & TDF_DETAILS)) 3511 fprintf (dump_file, "i = %d, index = %d\n", 3512 (int) i, index); 3513 if (index == 0 || !bitmap_bit_p (v, index)) 3514 { 3515 if (dump_file && (dump_flags & TDF_DETAILS)) 3516 fprintf (dump_file, "failing at i = %d\n", 3517 (int) i); 3518 deleted = false; 3519 break; 3520 } 3521 } 3522 } 3523 if (deleted) 3524 { 3525 if (dbg_cnt (dse) 3526 && check_for_inc_dec_1 (insn_info)) 3527 { 3528 delete_insn (insn_info->insn); 3529 insn_info->insn = NULL; 3530 globally_deleted++; 3531 } 3532 } 3533 } 3534 /* We do want to process the local info if the insn was 3535 deleted. For instance, if the insn did a wild read, we 3536 no longer need to trash the info. */ 3537 if (insn_info->insn 3538 && INSN_P (insn_info->insn) 3539 && (!deleted)) 3540 { 3541 scan_stores (insn_info->store_rec, v, NULL); 3542 if (insn_info->wild_read) 3543 { 3544 if (dump_file && (dump_flags & TDF_DETAILS)) 3545 fprintf (dump_file, "wild read\n"); 3546 bitmap_clear (v); 3547 } 3548 else if (insn_info->read_rec 3549 || insn_info->non_frame_wild_read 3550 || insn_info->frame_read) 3551 { 3552 if (dump_file && (dump_flags & TDF_DETAILS)) 3553 { 3554 if (!insn_info->non_frame_wild_read 3555 && !insn_info->frame_read) 3556 fprintf (dump_file, "regular read\n"); 3557 if (insn_info->non_frame_wild_read) 3558 fprintf (dump_file, "non-frame wild read\n"); 3559 if (insn_info->frame_read) 3560 fprintf (dump_file, "frame read\n"); 3561 } 3562 scan_reads (insn_info, v, NULL); 3563 } 3564 } 3565 3566 insn_info = insn_info->prev_insn; 3567 } 3568 } 3569 } 3570 3571 3572 3573 /*---------------------------------------------------------------------------- 3574 Sixth step. 3575 3576 Delete stores made redundant by earlier stores (which store the same 3577 value) that couldn't be eliminated. 3578 ----------------------------------------------------------------------------*/ 3579 3580 static void 3581 dse_step6 (void) 3582 { 3583 basic_block bb; 3584 3585 FOR_ALL_BB_FN (bb, cfun) 3586 { 3587 bb_info_t bb_info = bb_table[bb->index]; 3588 insn_info_t insn_info = bb_info->last_insn; 3589 3590 while (insn_info) 3591 { 3592 /* There may have been code deleted by the dce pass run before 3593 this phase. */ 3594 if (insn_info->insn 3595 && INSN_P (insn_info->insn) 3596 && !insn_info->cannot_delete) 3597 { 3598 store_info *s_info = insn_info->store_rec; 3599 3600 while (s_info && !s_info->is_set) 3601 s_info = s_info->next; 3602 if (s_info 3603 && s_info->redundant_reason 3604 && s_info->redundant_reason->insn 3605 && INSN_P (s_info->redundant_reason->insn)) 3606 { 3607 rtx_insn *rinsn = s_info->redundant_reason->insn; 3608 if (dump_file && (dump_flags & TDF_DETAILS)) 3609 fprintf (dump_file, "Locally deleting insn %d " 3610 "because insn %d stores the " 3611 "same value and couldn't be " 3612 "eliminated\n", 3613 INSN_UID (insn_info->insn), 3614 INSN_UID (rinsn)); 3615 delete_dead_store_insn (insn_info); 3616 } 3617 } 3618 insn_info = insn_info->prev_insn; 3619 } 3620 } 3621 } 3622 3623 /*---------------------------------------------------------------------------- 3624 Seventh step. 3625 3626 Destroy everything left standing. 3627 ----------------------------------------------------------------------------*/ 3628 3629 static void 3630 dse_step7 (void) 3631 { 3632 bitmap_obstack_release (&dse_bitmap_obstack); 3633 obstack_free (&dse_obstack, NULL); 3634 3635 end_alias_analysis (); 3636 free (bb_table); 3637 delete rtx_group_table; 3638 rtx_group_table = NULL; 3639 rtx_group_vec.release (); 3640 BITMAP_FREE (all_blocks); 3641 BITMAP_FREE (scratch); 3642 3643 rtx_store_info_pool.release (); 3644 read_info_type_pool.release (); 3645 insn_info_type_pool.release (); 3646 dse_bb_info_type_pool.release (); 3647 group_info_pool.release (); 3648 deferred_change_pool.release (); 3649 } 3650 3651 3652 /* ------------------------------------------------------------------------- 3653 DSE 3654 ------------------------------------------------------------------------- */ 3655 3656 /* Callback for running pass_rtl_dse. */ 3657 3658 static unsigned int 3659 rest_of_handle_dse (void) 3660 { 3661 df_set_flags (DF_DEFER_INSN_RESCAN); 3662 3663 /* Need the notes since we must track live hardregs in the forwards 3664 direction. */ 3665 df_note_add_problem (); 3666 df_analyze (); 3667 3668 dse_step0 (); 3669 dse_step1 (); 3670 dse_step2_init (); 3671 if (dse_step2 ()) 3672 { 3673 df_set_flags (DF_LR_RUN_DCE); 3674 df_analyze (); 3675 if (dump_file && (dump_flags & TDF_DETAILS)) 3676 fprintf (dump_file, "doing global processing\n"); 3677 dse_step3 (); 3678 dse_step4 (); 3679 dse_step5 (); 3680 } 3681 3682 dse_step6 (); 3683 dse_step7 (); 3684 3685 if (dump_file) 3686 fprintf (dump_file, "dse: local deletions = %d, global deletions = %d\n", 3687 locally_deleted, globally_deleted); 3688 3689 /* DSE can eliminate potentially-trapping MEMs. 3690 Remove any EH edges associated with them. */ 3691 if ((locally_deleted || globally_deleted) 3692 && cfun->can_throw_non_call_exceptions 3693 && purge_all_dead_edges ()) 3694 { 3695 free_dominance_info (CDI_DOMINATORS); 3696 cleanup_cfg (0); 3697 } 3698 3699 return 0; 3700 } 3701 3702 namespace { 3703 3704 const pass_data pass_data_rtl_dse1 = 3705 { 3706 RTL_PASS, /* type */ 3707 "dse1", /* name */ 3708 OPTGROUP_NONE, /* optinfo_flags */ 3709 TV_DSE1, /* tv_id */ 3710 0, /* properties_required */ 3711 0, /* properties_provided */ 3712 0, /* properties_destroyed */ 3713 0, /* todo_flags_start */ 3714 TODO_df_finish, /* todo_flags_finish */ 3715 }; 3716 3717 class pass_rtl_dse1 : public rtl_opt_pass 3718 { 3719 public: 3720 pass_rtl_dse1 (gcc::context *ctxt) 3721 : rtl_opt_pass (pass_data_rtl_dse1, ctxt) 3722 {} 3723 3724 /* opt_pass methods: */ 3725 virtual bool gate (function *) 3726 { 3727 return optimize > 0 && flag_dse && dbg_cnt (dse1); 3728 } 3729 3730 virtual unsigned int execute (function *) { return rest_of_handle_dse (); } 3731 3732 }; // class pass_rtl_dse1 3733 3734 } // anon namespace 3735 3736 rtl_opt_pass * 3737 make_pass_rtl_dse1 (gcc::context *ctxt) 3738 { 3739 return new pass_rtl_dse1 (ctxt); 3740 } 3741 3742 namespace { 3743 3744 const pass_data pass_data_rtl_dse2 = 3745 { 3746 RTL_PASS, /* type */ 3747 "dse2", /* name */ 3748 OPTGROUP_NONE, /* optinfo_flags */ 3749 TV_DSE2, /* tv_id */ 3750 0, /* properties_required */ 3751 0, /* properties_provided */ 3752 0, /* properties_destroyed */ 3753 0, /* todo_flags_start */ 3754 TODO_df_finish, /* todo_flags_finish */ 3755 }; 3756 3757 class pass_rtl_dse2 : public rtl_opt_pass 3758 { 3759 public: 3760 pass_rtl_dse2 (gcc::context *ctxt) 3761 : rtl_opt_pass (pass_data_rtl_dse2, ctxt) 3762 {} 3763 3764 /* opt_pass methods: */ 3765 virtual bool gate (function *) 3766 { 3767 return optimize > 0 && flag_dse && dbg_cnt (dse2); 3768 } 3769 3770 virtual unsigned int execute (function *) { return rest_of_handle_dse (); } 3771 3772 }; // class pass_rtl_dse2 3773 3774 } // anon namespace 3775 3776 rtl_opt_pass * 3777 make_pass_rtl_dse2 (gcc::context *ctxt) 3778 { 3779 return new pass_rtl_dse2 (ctxt); 3780 } 3781