11debfc3dSmrg /* Partial redundancy elimination / Hoisting for RTL.
2*8feb0f0bSmrg Copyright (C) 1997-2020 Free Software Foundation, Inc.
31debfc3dSmrg
41debfc3dSmrg This file is part of GCC.
51debfc3dSmrg
61debfc3dSmrg GCC is free software; you can redistribute it and/or modify it under
71debfc3dSmrg the terms of the GNU General Public License as published by the Free
81debfc3dSmrg Software Foundation; either version 3, or (at your option) any later
91debfc3dSmrg version.
101debfc3dSmrg
111debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
121debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
131debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
141debfc3dSmrg for more details.
151debfc3dSmrg
161debfc3dSmrg You should have received a copy of the GNU General Public License
171debfc3dSmrg along with GCC; see the file COPYING3. If not see
181debfc3dSmrg <http://www.gnu.org/licenses/>. */
191debfc3dSmrg
201debfc3dSmrg /* TODO
211debfc3dSmrg - reordering of memory allocation and freeing to be more space efficient
221debfc3dSmrg - calc rough register pressure information and use the info to drive all
231debfc3dSmrg kinds of code motion (including code hoisting) in a unified way.
241debfc3dSmrg */
251debfc3dSmrg
261debfc3dSmrg /* References searched while implementing this.
271debfc3dSmrg
281debfc3dSmrg Compilers Principles, Techniques and Tools
291debfc3dSmrg Aho, Sethi, Ullman
301debfc3dSmrg Addison-Wesley, 1988
311debfc3dSmrg
321debfc3dSmrg Global Optimization by Suppression of Partial Redundancies
331debfc3dSmrg E. Morel, C. Renvoise
341debfc3dSmrg communications of the acm, Vol. 22, Num. 2, Feb. 1979
351debfc3dSmrg
361debfc3dSmrg A Portable Machine-Independent Global Optimizer - Design and Measurements
371debfc3dSmrg Frederick Chow
381debfc3dSmrg Stanford Ph.D. thesis, Dec. 1983
391debfc3dSmrg
401debfc3dSmrg A Fast Algorithm for Code Movement Optimization
411debfc3dSmrg D.M. Dhamdhere
421debfc3dSmrg SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
431debfc3dSmrg
441debfc3dSmrg A Solution to a Problem with Morel and Renvoise's
451debfc3dSmrg Global Optimization by Suppression of Partial Redundancies
461debfc3dSmrg K-H Drechsler, M.P. Stadel
471debfc3dSmrg ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
481debfc3dSmrg
491debfc3dSmrg Practical Adaptation of the Global Optimization
501debfc3dSmrg Algorithm of Morel and Renvoise
511debfc3dSmrg D.M. Dhamdhere
521debfc3dSmrg ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
531debfc3dSmrg
541debfc3dSmrg Efficiently Computing Static Single Assignment Form and the Control
551debfc3dSmrg Dependence Graph
561debfc3dSmrg R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
571debfc3dSmrg ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
581debfc3dSmrg
591debfc3dSmrg Lazy Code Motion
601debfc3dSmrg J. Knoop, O. Ruthing, B. Steffen
611debfc3dSmrg ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
621debfc3dSmrg
631debfc3dSmrg What's In a Region? Or Computing Control Dependence Regions in Near-Linear
641debfc3dSmrg Time for Reducible Flow Control
651debfc3dSmrg Thomas Ball
661debfc3dSmrg ACM Letters on Programming Languages and Systems,
671debfc3dSmrg Vol. 2, Num. 1-4, Mar-Dec 1993
681debfc3dSmrg
691debfc3dSmrg An Efficient Representation for Sparse Sets
701debfc3dSmrg Preston Briggs, Linda Torczon
711debfc3dSmrg ACM Letters on Programming Languages and Systems,
721debfc3dSmrg Vol. 2, Num. 1-4, Mar-Dec 1993
731debfc3dSmrg
741debfc3dSmrg A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
751debfc3dSmrg K-H Drechsler, M.P. Stadel
761debfc3dSmrg ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
771debfc3dSmrg
781debfc3dSmrg Partial Dead Code Elimination
791debfc3dSmrg J. Knoop, O. Ruthing, B. Steffen
801debfc3dSmrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
811debfc3dSmrg
821debfc3dSmrg Effective Partial Redundancy Elimination
831debfc3dSmrg P. Briggs, K.D. Cooper
841debfc3dSmrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
851debfc3dSmrg
861debfc3dSmrg The Program Structure Tree: Computing Control Regions in Linear Time
871debfc3dSmrg R. Johnson, D. Pearson, K. Pingali
881debfc3dSmrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
891debfc3dSmrg
901debfc3dSmrg Optimal Code Motion: Theory and Practice
911debfc3dSmrg J. Knoop, O. Ruthing, B. Steffen
921debfc3dSmrg ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
931debfc3dSmrg
941debfc3dSmrg The power of assignment motion
951debfc3dSmrg J. Knoop, O. Ruthing, B. Steffen
961debfc3dSmrg ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
971debfc3dSmrg
981debfc3dSmrg Global code motion / global value numbering
991debfc3dSmrg C. Click
1001debfc3dSmrg ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
1011debfc3dSmrg
1021debfc3dSmrg Value Driven Redundancy Elimination
1031debfc3dSmrg L.T. Simpson
1041debfc3dSmrg Rice University Ph.D. thesis, Apr. 1996
1051debfc3dSmrg
1061debfc3dSmrg Value Numbering
1071debfc3dSmrg L.T. Simpson
1081debfc3dSmrg Massively Scalar Compiler Project, Rice University, Sep. 1996
1091debfc3dSmrg
1101debfc3dSmrg High Performance Compilers for Parallel Computing
1111debfc3dSmrg Michael Wolfe
1121debfc3dSmrg Addison-Wesley, 1996
1131debfc3dSmrg
1141debfc3dSmrg Advanced Compiler Design and Implementation
1151debfc3dSmrg Steven Muchnick
1161debfc3dSmrg Morgan Kaufmann, 1997
1171debfc3dSmrg
1181debfc3dSmrg Building an Optimizing Compiler
1191debfc3dSmrg Robert Morgan
1201debfc3dSmrg Digital Press, 1998
1211debfc3dSmrg
1221debfc3dSmrg People wishing to speed up the code here should read:
1231debfc3dSmrg Elimination Algorithms for Data Flow Analysis
1241debfc3dSmrg B.G. Ryder, M.C. Paull
1251debfc3dSmrg ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
1261debfc3dSmrg
1271debfc3dSmrg How to Analyze Large Programs Efficiently and Informatively
1281debfc3dSmrg D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
1291debfc3dSmrg ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
1301debfc3dSmrg
1311debfc3dSmrg People wishing to do something different can find various possibilities
1321debfc3dSmrg in the above papers and elsewhere.
1331debfc3dSmrg */
1341debfc3dSmrg
1351debfc3dSmrg #include "config.h"
1361debfc3dSmrg #include "system.h"
1371debfc3dSmrg #include "coretypes.h"
1381debfc3dSmrg #include "backend.h"
1391debfc3dSmrg #include "target.h"
1401debfc3dSmrg #include "rtl.h"
1411debfc3dSmrg #include "tree.h"
1421debfc3dSmrg #include "predict.h"
1431debfc3dSmrg #include "df.h"
1441debfc3dSmrg #include "memmodel.h"
1451debfc3dSmrg #include "tm_p.h"
1461debfc3dSmrg #include "insn-config.h"
1471debfc3dSmrg #include "print-rtl.h"
1481debfc3dSmrg #include "regs.h"
1491debfc3dSmrg #include "ira.h"
1501debfc3dSmrg #include "recog.h"
1511debfc3dSmrg #include "diagnostic-core.h"
1521debfc3dSmrg #include "cfgrtl.h"
1531debfc3dSmrg #include "cfganal.h"
1541debfc3dSmrg #include "lcm.h"
1551debfc3dSmrg #include "cfgcleanup.h"
1561debfc3dSmrg #include "expr.h"
1571debfc3dSmrg #include "intl.h"
1581debfc3dSmrg #include "tree-pass.h"
1591debfc3dSmrg #include "dbgcnt.h"
1601debfc3dSmrg #include "gcse.h"
1611debfc3dSmrg #include "gcse-common.h"
162*8feb0f0bSmrg #include "function-abi.h"
1631debfc3dSmrg
1641debfc3dSmrg /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
1651debfc3dSmrg are a superset of those done by classic GCSE.
1661debfc3dSmrg
1671debfc3dSmrg Two passes of copy/constant propagation are done around PRE or hoisting
1681debfc3dSmrg because the first one enables more GCSE and the second one helps to clean
1691debfc3dSmrg up the copies that PRE and HOIST create. This is needed more for PRE than
1701debfc3dSmrg for HOIST because code hoisting will try to use an existing register
1711debfc3dSmrg containing the common subexpression rather than create a new one. This is
1721debfc3dSmrg harder to do for PRE because of the code motion (which HOIST doesn't do).
1731debfc3dSmrg
1741debfc3dSmrg Expressions we are interested in GCSE-ing are of the form
1751debfc3dSmrg (set (pseudo-reg) (expression)).
1761debfc3dSmrg Function want_to_gcse_p says what these are.
1771debfc3dSmrg
1781debfc3dSmrg In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
1791debfc3dSmrg This allows PRE to hoist expressions that are expressed in multiple insns,
1801debfc3dSmrg such as complex address calculations (e.g. for PIC code, or loads with a
1811debfc3dSmrg high part and a low part).
1821debfc3dSmrg
1831debfc3dSmrg PRE handles moving invariant expressions out of loops (by treating them as
1841debfc3dSmrg partially redundant).
1851debfc3dSmrg
1861debfc3dSmrg **********************
1871debfc3dSmrg
1881debfc3dSmrg We used to support multiple passes but there are diminishing returns in
1891debfc3dSmrg doing so. The first pass usually makes 90% of the changes that are doable.
1901debfc3dSmrg A second pass can make a few more changes made possible by the first pass.
1911debfc3dSmrg Experiments show any further passes don't make enough changes to justify
1921debfc3dSmrg the expense.
1931debfc3dSmrg
1941debfc3dSmrg A study of spec92 using an unlimited number of passes:
1951debfc3dSmrg [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
1961debfc3dSmrg [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
1971debfc3dSmrg [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
1981debfc3dSmrg
1991debfc3dSmrg It was found doing copy propagation between each pass enables further
2001debfc3dSmrg substitutions.
2011debfc3dSmrg
2021debfc3dSmrg This study was done before expressions in REG_EQUAL notes were added as
2031debfc3dSmrg candidate expressions for optimization, and before the GIMPLE optimizers
2041debfc3dSmrg were added. Probably, multiple passes is even less efficient now than
2051debfc3dSmrg at the time when the study was conducted.
2061debfc3dSmrg
2071debfc3dSmrg PRE is quite expensive in complicated functions because the DFA can take
2081debfc3dSmrg a while to converge. Hence we only perform one pass.
2091debfc3dSmrg
2101debfc3dSmrg **********************
2111debfc3dSmrg
2121debfc3dSmrg The steps for PRE are:
2131debfc3dSmrg
2141debfc3dSmrg 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
2151debfc3dSmrg
2161debfc3dSmrg 2) Perform the data flow analysis for PRE.
2171debfc3dSmrg
2181debfc3dSmrg 3) Delete the redundant instructions
2191debfc3dSmrg
2201debfc3dSmrg 4) Insert the required copies [if any] that make the partially
2211debfc3dSmrg redundant instructions fully redundant.
2221debfc3dSmrg
2231debfc3dSmrg 5) For other reaching expressions, insert an instruction to copy the value
2241debfc3dSmrg to a newly created pseudo that will reach the redundant instruction.
2251debfc3dSmrg
2261debfc3dSmrg The deletion is done first so that when we do insertions we
2271debfc3dSmrg know which pseudo reg to use.
2281debfc3dSmrg
2291debfc3dSmrg Various papers have argued that PRE DFA is expensive (O(n^2)) and others
2301debfc3dSmrg argue it is not. The number of iterations for the algorithm to converge
2311debfc3dSmrg is typically 2-4 so I don't view it as that expensive (relatively speaking).
2321debfc3dSmrg
2331debfc3dSmrg PRE GCSE depends heavily on the second CPROP pass to clean up the copies
2341debfc3dSmrg we create. To make an expression reach the place where it's redundant,
2351debfc3dSmrg the result of the expression is copied to a new register, and the redundant
2361debfc3dSmrg expression is deleted by replacing it with this new register. Classic GCSE
2371debfc3dSmrg doesn't have this problem as much as it computes the reaching defs of
2381debfc3dSmrg each register in each block and thus can try to use an existing
2391debfc3dSmrg register. */
2401debfc3dSmrg
2411debfc3dSmrg /* GCSE global vars. */
2421debfc3dSmrg
2431debfc3dSmrg struct target_gcse default_target_gcse;
2441debfc3dSmrg #if SWITCHABLE_TARGET
2451debfc3dSmrg struct target_gcse *this_target_gcse = &default_target_gcse;
2461debfc3dSmrg #endif
2471debfc3dSmrg
2481debfc3dSmrg /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
2491debfc3dSmrg int flag_rerun_cse_after_global_opts;
2501debfc3dSmrg
2511debfc3dSmrg /* An obstack for our working variables. */
2521debfc3dSmrg static struct obstack gcse_obstack;
2531debfc3dSmrg
2541debfc3dSmrg /* Hash table of expressions. */
2551debfc3dSmrg
2561debfc3dSmrg struct gcse_expr
2571debfc3dSmrg {
2581debfc3dSmrg /* The expression. */
2591debfc3dSmrg rtx expr;
2601debfc3dSmrg /* Index in the available expression bitmaps. */
2611debfc3dSmrg int bitmap_index;
2621debfc3dSmrg /* Next entry with the same hash. */
2631debfc3dSmrg struct gcse_expr *next_same_hash;
2641debfc3dSmrg /* List of anticipatable occurrences in basic blocks in the function.
2651debfc3dSmrg An "anticipatable occurrence" is one that is the first occurrence in the
2661debfc3dSmrg basic block, the operands are not modified in the basic block prior
2671debfc3dSmrg to the occurrence and the output is not used between the start of
2681debfc3dSmrg the block and the occurrence. */
2691debfc3dSmrg struct gcse_occr *antic_occr;
2701debfc3dSmrg /* List of available occurrence in basic blocks in the function.
2711debfc3dSmrg An "available occurrence" is one that is the last occurrence in the
2721debfc3dSmrg basic block and the operands are not modified by following statements in
2731debfc3dSmrg the basic block [including this insn]. */
2741debfc3dSmrg struct gcse_occr *avail_occr;
2751debfc3dSmrg /* Non-null if the computation is PRE redundant.
2761debfc3dSmrg The value is the newly created pseudo-reg to record a copy of the
2771debfc3dSmrg expression in all the places that reach the redundant copy. */
2781debfc3dSmrg rtx reaching_reg;
2791debfc3dSmrg /* Maximum distance in instructions this expression can travel.
2801debfc3dSmrg We avoid moving simple expressions for more than a few instructions
2811debfc3dSmrg to keep register pressure under control.
2821debfc3dSmrg A value of "0" removes restrictions on how far the expression can
2831debfc3dSmrg travel. */
2841debfc3dSmrg HOST_WIDE_INT max_distance;
2851debfc3dSmrg };
2861debfc3dSmrg
2871debfc3dSmrg /* Occurrence of an expression.
2881debfc3dSmrg There is one per basic block. If a pattern appears more than once the
2891debfc3dSmrg last appearance is used [or first for anticipatable expressions]. */
2901debfc3dSmrg
2911debfc3dSmrg struct gcse_occr
2921debfc3dSmrg {
2931debfc3dSmrg /* Next occurrence of this expression. */
2941debfc3dSmrg struct gcse_occr *next;
2951debfc3dSmrg /* The insn that computes the expression. */
2961debfc3dSmrg rtx_insn *insn;
2971debfc3dSmrg /* Nonzero if this [anticipatable] occurrence has been deleted. */
2981debfc3dSmrg char deleted_p;
2991debfc3dSmrg /* Nonzero if this [available] occurrence has been copied to
3001debfc3dSmrg reaching_reg. */
3011debfc3dSmrg /* ??? This is mutually exclusive with deleted_p, so they could share
3021debfc3dSmrg the same byte. */
3031debfc3dSmrg char copied_p;
3041debfc3dSmrg };
3051debfc3dSmrg
3061debfc3dSmrg typedef struct gcse_occr *occr_t;
3071debfc3dSmrg
3081debfc3dSmrg /* Expression hash tables.
3091debfc3dSmrg Each hash table is an array of buckets.
3101debfc3dSmrg ??? It is known that if it were an array of entries, structure elements
3111debfc3dSmrg `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
3121debfc3dSmrg not clear whether in the final analysis a sufficient amount of memory would
3131debfc3dSmrg be saved as the size of the available expression bitmaps would be larger
3141debfc3dSmrg [one could build a mapping table without holes afterwards though].
3151debfc3dSmrg Someday I'll perform the computation and figure it out. */
3161debfc3dSmrg
3171debfc3dSmrg struct gcse_hash_table_d
3181debfc3dSmrg {
3191debfc3dSmrg /* The table itself.
3201debfc3dSmrg This is an array of `expr_hash_table_size' elements. */
3211debfc3dSmrg struct gcse_expr **table;
3221debfc3dSmrg
3231debfc3dSmrg /* Size of the hash table, in elements. */
3241debfc3dSmrg unsigned int size;
3251debfc3dSmrg
3261debfc3dSmrg /* Number of hash table elements. */
3271debfc3dSmrg unsigned int n_elems;
3281debfc3dSmrg };
3291debfc3dSmrg
3301debfc3dSmrg /* Expression hash table. */
3311debfc3dSmrg static struct gcse_hash_table_d expr_hash_table;
3321debfc3dSmrg
3331debfc3dSmrg /* This is a list of expressions which are MEMs and will be used by load
3341debfc3dSmrg or store motion.
3351debfc3dSmrg Load motion tracks MEMs which aren't killed by anything except itself,
3361debfc3dSmrg i.e. loads and stores to a single location.
3371debfc3dSmrg We can then allow movement of these MEM refs with a little special
3381debfc3dSmrg allowance. (all stores copy the same value to the reaching reg used
3391debfc3dSmrg for the loads). This means all values used to store into memory must have
3401debfc3dSmrg no side effects so we can re-issue the setter value. */
3411debfc3dSmrg
3421debfc3dSmrg struct ls_expr
3431debfc3dSmrg {
3441debfc3dSmrg struct gcse_expr * expr; /* Gcse expression reference for LM. */
3451debfc3dSmrg rtx pattern; /* Pattern of this mem. */
3461debfc3dSmrg rtx pattern_regs; /* List of registers mentioned by the mem. */
3471debfc3dSmrg vec<rtx_insn *> stores; /* INSN list of stores seen. */
3481debfc3dSmrg struct ls_expr * next; /* Next in the list. */
3491debfc3dSmrg int invalid; /* Invalid for some reason. */
3501debfc3dSmrg int index; /* If it maps to a bitmap index. */
3511debfc3dSmrg unsigned int hash_index; /* Index when in a hash table. */
3521debfc3dSmrg rtx reaching_reg; /* Register to use when re-writing. */
3531debfc3dSmrg };
3541debfc3dSmrg
3551debfc3dSmrg /* Head of the list of load/store memory refs. */
3561debfc3dSmrg static struct ls_expr * pre_ldst_mems = NULL;
3571debfc3dSmrg
3581debfc3dSmrg struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr>
3591debfc3dSmrg {
3601debfc3dSmrg typedef value_type compare_type;
3611debfc3dSmrg static inline hashval_t hash (const ls_expr *);
3621debfc3dSmrg static inline bool equal (const ls_expr *, const ls_expr *);
3631debfc3dSmrg };
3641debfc3dSmrg
3651debfc3dSmrg /* Hashtable helpers. */
3661debfc3dSmrg inline hashval_t
hash(const ls_expr * x)3671debfc3dSmrg pre_ldst_expr_hasher::hash (const ls_expr *x)
3681debfc3dSmrg {
3691debfc3dSmrg int do_not_record_p = 0;
3701debfc3dSmrg return
3711debfc3dSmrg hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3721debfc3dSmrg }
3731debfc3dSmrg
3741debfc3dSmrg static int expr_equiv_p (const_rtx, const_rtx);
3751debfc3dSmrg
3761debfc3dSmrg inline bool
equal(const ls_expr * ptr1,const ls_expr * ptr2)3771debfc3dSmrg pre_ldst_expr_hasher::equal (const ls_expr *ptr1,
3781debfc3dSmrg const ls_expr *ptr2)
3791debfc3dSmrg {
3801debfc3dSmrg return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3811debfc3dSmrg }
3821debfc3dSmrg
3831debfc3dSmrg /* Hashtable for the load/store memory refs. */
3841debfc3dSmrg static hash_table<pre_ldst_expr_hasher> *pre_ldst_table;
3851debfc3dSmrg
3861debfc3dSmrg /* Bitmap containing one bit for each register in the program.
3871debfc3dSmrg Used when performing GCSE to track which registers have been set since
3881debfc3dSmrg the start of the basic block. */
3891debfc3dSmrg static regset reg_set_bitmap;
3901debfc3dSmrg
3911debfc3dSmrg /* Array, indexed by basic block number for a list of insns which modify
3921debfc3dSmrg memory within that block. */
3931debfc3dSmrg static vec<rtx_insn *> *modify_mem_list;
3941debfc3dSmrg static bitmap modify_mem_list_set;
3951debfc3dSmrg
3961debfc3dSmrg /* This array parallels modify_mem_list, except that it stores MEMs
3971debfc3dSmrg being set and their canonicalized memory addresses. */
3981debfc3dSmrg static vec<modify_pair> *canon_modify_mem_list;
3991debfc3dSmrg
4001debfc3dSmrg /* Bitmap indexed by block numbers to record which blocks contain
4011debfc3dSmrg function calls. */
4021debfc3dSmrg static bitmap blocks_with_calls;
4031debfc3dSmrg
4041debfc3dSmrg /* Various variables for statistics gathering. */
4051debfc3dSmrg
4061debfc3dSmrg /* Memory used in a pass.
4071debfc3dSmrg This isn't intended to be absolutely precise. Its intent is only
4081debfc3dSmrg to keep an eye on memory usage. */
4091debfc3dSmrg static int bytes_used;
4101debfc3dSmrg
4111debfc3dSmrg /* GCSE substitutions made. */
4121debfc3dSmrg static int gcse_subst_count;
4131debfc3dSmrg /* Number of copy instructions created. */
4141debfc3dSmrg static int gcse_create_count;
4151debfc3dSmrg
4161debfc3dSmrg /* Doing code hoisting. */
4171debfc3dSmrg static bool doing_code_hoisting_p = false;
4181debfc3dSmrg
4191debfc3dSmrg /* For available exprs */
4201debfc3dSmrg static sbitmap *ae_kill;
4211debfc3dSmrg
4221debfc3dSmrg /* Data stored for each basic block. */
4231debfc3dSmrg struct bb_data
4241debfc3dSmrg {
4251debfc3dSmrg /* Maximal register pressure inside basic block for given register class
4261debfc3dSmrg (defined only for the pressure classes). */
4271debfc3dSmrg int max_reg_pressure[N_REG_CLASSES];
4281debfc3dSmrg /* Recorded register pressure of basic block before trying to hoist
4291debfc3dSmrg an expression. Will be used to restore the register pressure
4301debfc3dSmrg if the expression should not be hoisted. */
4311debfc3dSmrg int old_pressure;
4321debfc3dSmrg /* Recorded register live_in info of basic block during code hoisting
4331debfc3dSmrg process. BACKUP is used to record live_in info before trying to
4341debfc3dSmrg hoist an expression, and will be used to restore LIVE_IN if the
4351debfc3dSmrg expression should not be hoisted. */
4361debfc3dSmrg bitmap live_in, backup;
4371debfc3dSmrg };
4381debfc3dSmrg
4391debfc3dSmrg #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
4401debfc3dSmrg
4411debfc3dSmrg static basic_block curr_bb;
4421debfc3dSmrg
4431debfc3dSmrg /* Current register pressure for each pressure class. */
4441debfc3dSmrg static int curr_reg_pressure[N_REG_CLASSES];
4451debfc3dSmrg
4461debfc3dSmrg
4471debfc3dSmrg static void compute_can_copy (void);
4481debfc3dSmrg static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
4491debfc3dSmrg static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
4501debfc3dSmrg static void *gcse_alloc (unsigned long);
4511debfc3dSmrg static void alloc_gcse_mem (void);
4521debfc3dSmrg static void free_gcse_mem (void);
4531debfc3dSmrg static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *);
4541debfc3dSmrg static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *);
4551debfc3dSmrg static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *);
4561debfc3dSmrg static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *);
4571debfc3dSmrg static int oprs_unchanged_p (const_rtx, const rtx_insn *, int);
4581debfc3dSmrg static int oprs_anticipatable_p (const_rtx, const rtx_insn *);
4591debfc3dSmrg static int oprs_available_p (const_rtx, const rtx_insn *);
4601debfc3dSmrg static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, int, int,
4611debfc3dSmrg HOST_WIDE_INT, struct gcse_hash_table_d *);
4621debfc3dSmrg static unsigned int hash_expr (const_rtx, machine_mode, int *, int);
4631debfc3dSmrg static void record_last_reg_set_info (rtx_insn *, int);
4641debfc3dSmrg static void record_last_mem_set_info (rtx_insn *);
4651debfc3dSmrg static void record_last_set_info (rtx, const_rtx, void *);
4661debfc3dSmrg static void compute_hash_table (struct gcse_hash_table_d *);
4671debfc3dSmrg static void alloc_hash_table (struct gcse_hash_table_d *);
4681debfc3dSmrg static void free_hash_table (struct gcse_hash_table_d *);
4691debfc3dSmrg static void compute_hash_table_work (struct gcse_hash_table_d *);
4701debfc3dSmrg static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *);
4711debfc3dSmrg static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
4721debfc3dSmrg struct gcse_hash_table_d *);
4731debfc3dSmrg static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
4741debfc3dSmrg static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
4751debfc3dSmrg static void alloc_pre_mem (int, int);
4761debfc3dSmrg static void free_pre_mem (void);
4771debfc3dSmrg static struct edge_list *compute_pre_data (void);
4781debfc3dSmrg static int pre_expr_reaches_here_p (basic_block, struct gcse_expr *,
4791debfc3dSmrg basic_block);
4801debfc3dSmrg static void insert_insn_end_basic_block (struct gcse_expr *, basic_block);
4811debfc3dSmrg static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *);
4821debfc3dSmrg static void pre_insert_copies (void);
4831debfc3dSmrg static int pre_delete (void);
4841debfc3dSmrg static int pre_gcse (struct edge_list *);
4851debfc3dSmrg static int one_pre_gcse_pass (void);
4861debfc3dSmrg static void add_label_notes (rtx, rtx_insn *);
4871debfc3dSmrg static void alloc_code_hoist_mem (int, int);
4881debfc3dSmrg static void free_code_hoist_mem (void);
4891debfc3dSmrg static void compute_code_hoist_vbeinout (void);
4901debfc3dSmrg static void compute_code_hoist_data (void);
4911debfc3dSmrg static int should_hoist_expr_to_dom (basic_block, struct gcse_expr *,
4921debfc3dSmrg basic_block,
4931debfc3dSmrg sbitmap, HOST_WIDE_INT, int *,
4941debfc3dSmrg enum reg_class,
4951debfc3dSmrg int *, bitmap, rtx_insn *);
4961debfc3dSmrg static int hoist_code (void);
4971debfc3dSmrg static enum reg_class get_regno_pressure_class (int regno, int *nregs);
4981debfc3dSmrg static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs);
4991debfc3dSmrg static int one_code_hoisting_pass (void);
5001debfc3dSmrg static rtx_insn *process_insert_insn (struct gcse_expr *);
5011debfc3dSmrg static int pre_edge_insert (struct edge_list *, struct gcse_expr **);
5021debfc3dSmrg static int pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *,
5031debfc3dSmrg basic_block, char *);
5041debfc3dSmrg static struct ls_expr * ldst_entry (rtx);
5051debfc3dSmrg static void free_ldst_entry (struct ls_expr *);
5061debfc3dSmrg static void free_ld_motion_mems (void);
5071debfc3dSmrg static void print_ldst_list (FILE *);
5081debfc3dSmrg static struct ls_expr * find_rtx_in_ldst (rtx);
5091debfc3dSmrg static int simple_mem (const_rtx);
5101debfc3dSmrg static void invalidate_any_buried_refs (rtx);
5111debfc3dSmrg static void compute_ld_motion_mems (void);
5121debfc3dSmrg static void trim_ld_motion_mems (void);
5131debfc3dSmrg static void update_ld_motion_stores (struct gcse_expr *);
5141debfc3dSmrg static void clear_modify_mem_tables (void);
5151debfc3dSmrg static void free_modify_mem_tables (void);
5161debfc3dSmrg
5171debfc3dSmrg #define GNEW(T) ((T *) gmalloc (sizeof (T)))
5181debfc3dSmrg #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
5191debfc3dSmrg
5201debfc3dSmrg #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
5211debfc3dSmrg #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
5221debfc3dSmrg
5231debfc3dSmrg #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
5241debfc3dSmrg #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
5251debfc3dSmrg
5261debfc3dSmrg #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
5271debfc3dSmrg #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
5281debfc3dSmrg
5291debfc3dSmrg /* Misc. utilities. */
5301debfc3dSmrg
5311debfc3dSmrg #define can_copy \
5321debfc3dSmrg (this_target_gcse->x_can_copy)
5331debfc3dSmrg #define can_copy_init_p \
5341debfc3dSmrg (this_target_gcse->x_can_copy_init_p)
5351debfc3dSmrg
5361debfc3dSmrg /* Compute which modes support reg/reg copy operations. */
5371debfc3dSmrg
5381debfc3dSmrg static void
compute_can_copy(void)5391debfc3dSmrg compute_can_copy (void)
5401debfc3dSmrg {
5411debfc3dSmrg int i;
5421debfc3dSmrg #ifndef AVOID_CCMODE_COPIES
5431debfc3dSmrg rtx reg;
5441debfc3dSmrg rtx_insn *insn;
5451debfc3dSmrg #endif
5461debfc3dSmrg memset (can_copy, 0, NUM_MACHINE_MODES);
5471debfc3dSmrg
5481debfc3dSmrg start_sequence ();
5491debfc3dSmrg for (i = 0; i < NUM_MACHINE_MODES; i++)
5501debfc3dSmrg if (GET_MODE_CLASS (i) == MODE_CC)
5511debfc3dSmrg {
5521debfc3dSmrg #ifdef AVOID_CCMODE_COPIES
5531debfc3dSmrg can_copy[i] = 0;
5541debfc3dSmrg #else
5551debfc3dSmrg reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
5561debfc3dSmrg insn = emit_insn (gen_rtx_SET (reg, reg));
5571debfc3dSmrg if (recog (PATTERN (insn), insn, NULL) >= 0)
5581debfc3dSmrg can_copy[i] = 1;
5591debfc3dSmrg #endif
5601debfc3dSmrg }
5611debfc3dSmrg else
5621debfc3dSmrg can_copy[i] = 1;
5631debfc3dSmrg
5641debfc3dSmrg end_sequence ();
5651debfc3dSmrg }
5661debfc3dSmrg
5671debfc3dSmrg /* Returns whether the mode supports reg/reg copy operations. */
5681debfc3dSmrg
5691debfc3dSmrg bool
can_copy_p(machine_mode mode)5701debfc3dSmrg can_copy_p (machine_mode mode)
5711debfc3dSmrg {
5721debfc3dSmrg if (! can_copy_init_p)
5731debfc3dSmrg {
5741debfc3dSmrg compute_can_copy ();
5751debfc3dSmrg can_copy_init_p = true;
5761debfc3dSmrg }
5771debfc3dSmrg
5781debfc3dSmrg return can_copy[mode] != 0;
5791debfc3dSmrg }
5801debfc3dSmrg
5811debfc3dSmrg /* Cover function to xmalloc to record bytes allocated. */
5821debfc3dSmrg
5831debfc3dSmrg static void *
gmalloc(size_t size)5841debfc3dSmrg gmalloc (size_t size)
5851debfc3dSmrg {
5861debfc3dSmrg bytes_used += size;
5871debfc3dSmrg return xmalloc (size);
5881debfc3dSmrg }
5891debfc3dSmrg
5901debfc3dSmrg /* Cover function to xcalloc to record bytes allocated. */
5911debfc3dSmrg
5921debfc3dSmrg static void *
gcalloc(size_t nelem,size_t elsize)5931debfc3dSmrg gcalloc (size_t nelem, size_t elsize)
5941debfc3dSmrg {
5951debfc3dSmrg bytes_used += nelem * elsize;
5961debfc3dSmrg return xcalloc (nelem, elsize);
5971debfc3dSmrg }
5981debfc3dSmrg
5991debfc3dSmrg /* Cover function to obstack_alloc. */
6001debfc3dSmrg
6011debfc3dSmrg static void *
gcse_alloc(unsigned long size)6021debfc3dSmrg gcse_alloc (unsigned long size)
6031debfc3dSmrg {
6041debfc3dSmrg bytes_used += size;
6051debfc3dSmrg return obstack_alloc (&gcse_obstack, size);
6061debfc3dSmrg }
6071debfc3dSmrg
6081debfc3dSmrg /* Allocate memory for the reg/memory set tracking tables.
6091debfc3dSmrg This is called at the start of each pass. */
6101debfc3dSmrg
6111debfc3dSmrg static void
alloc_gcse_mem(void)6121debfc3dSmrg alloc_gcse_mem (void)
6131debfc3dSmrg {
6141debfc3dSmrg /* Allocate vars to track sets of regs. */
6151debfc3dSmrg reg_set_bitmap = ALLOC_REG_SET (NULL);
6161debfc3dSmrg
6171debfc3dSmrg /* Allocate array to keep a list of insns which modify memory in each
6181debfc3dSmrg basic block. The two typedefs are needed to work around the
6191debfc3dSmrg pre-processor limitation with template types in macro arguments. */
6201debfc3dSmrg typedef vec<rtx_insn *> vec_rtx_heap;
6211debfc3dSmrg typedef vec<modify_pair> vec_modify_pair_heap;
6221debfc3dSmrg modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun));
6231debfc3dSmrg canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap,
6241debfc3dSmrg last_basic_block_for_fn (cfun));
6251debfc3dSmrg modify_mem_list_set = BITMAP_ALLOC (NULL);
6261debfc3dSmrg blocks_with_calls = BITMAP_ALLOC (NULL);
6271debfc3dSmrg }
6281debfc3dSmrg
6291debfc3dSmrg /* Free memory allocated by alloc_gcse_mem. */
6301debfc3dSmrg
6311debfc3dSmrg static void
free_gcse_mem(void)6321debfc3dSmrg free_gcse_mem (void)
6331debfc3dSmrg {
6341debfc3dSmrg FREE_REG_SET (reg_set_bitmap);
6351debfc3dSmrg
6361debfc3dSmrg free_modify_mem_tables ();
6371debfc3dSmrg BITMAP_FREE (modify_mem_list_set);
6381debfc3dSmrg BITMAP_FREE (blocks_with_calls);
6391debfc3dSmrg }
6401debfc3dSmrg
6411debfc3dSmrg /* Compute the local properties of each recorded expression.
6421debfc3dSmrg
6431debfc3dSmrg Local properties are those that are defined by the block, irrespective of
6441debfc3dSmrg other blocks.
6451debfc3dSmrg
6461debfc3dSmrg An expression is transparent in a block if its operands are not modified
6471debfc3dSmrg in the block.
6481debfc3dSmrg
6491debfc3dSmrg An expression is computed (locally available) in a block if it is computed
6501debfc3dSmrg at least once and expression would contain the same value if the
6511debfc3dSmrg computation was moved to the end of the block.
6521debfc3dSmrg
6531debfc3dSmrg An expression is locally anticipatable in a block if it is computed at
6541debfc3dSmrg least once and expression would contain the same value if the computation
6551debfc3dSmrg was moved to the beginning of the block.
6561debfc3dSmrg
6571debfc3dSmrg We call this routine for pre and code hoisting. They all compute
6581debfc3dSmrg basically the same information and thus can easily share this code.
6591debfc3dSmrg
6601debfc3dSmrg TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
6611debfc3dSmrg properties. If NULL, then it is not necessary to compute or record that
6621debfc3dSmrg particular property.
6631debfc3dSmrg
6641debfc3dSmrg TABLE controls which hash table to look at. */
6651debfc3dSmrg
6661debfc3dSmrg static void
compute_local_properties(sbitmap * transp,sbitmap * comp,sbitmap * antloc,struct gcse_hash_table_d * table)6671debfc3dSmrg compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
6681debfc3dSmrg struct gcse_hash_table_d *table)
6691debfc3dSmrg {
6701debfc3dSmrg unsigned int i;
6711debfc3dSmrg
6721debfc3dSmrg /* Initialize any bitmaps that were passed in. */
6731debfc3dSmrg if (transp)
6741debfc3dSmrg {
6751debfc3dSmrg bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
6761debfc3dSmrg }
6771debfc3dSmrg
6781debfc3dSmrg if (comp)
6791debfc3dSmrg bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
6801debfc3dSmrg if (antloc)
6811debfc3dSmrg bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
6821debfc3dSmrg
6831debfc3dSmrg for (i = 0; i < table->size; i++)
6841debfc3dSmrg {
6851debfc3dSmrg struct gcse_expr *expr;
6861debfc3dSmrg
6871debfc3dSmrg for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
6881debfc3dSmrg {
6891debfc3dSmrg int indx = expr->bitmap_index;
6901debfc3dSmrg struct gcse_occr *occr;
6911debfc3dSmrg
6921debfc3dSmrg /* The expression is transparent in this block if it is not killed.
6931debfc3dSmrg We start by assuming all are transparent [none are killed], and
6941debfc3dSmrg then reset the bits for those that are. */
6951debfc3dSmrg if (transp)
6961debfc3dSmrg compute_transp (expr->expr, indx, transp,
6971debfc3dSmrg blocks_with_calls,
6981debfc3dSmrg modify_mem_list_set,
6991debfc3dSmrg canon_modify_mem_list);
7001debfc3dSmrg
7011debfc3dSmrg /* The occurrences recorded in antic_occr are exactly those that
7021debfc3dSmrg we want to set to nonzero in ANTLOC. */
7031debfc3dSmrg if (antloc)
7041debfc3dSmrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
7051debfc3dSmrg {
7061debfc3dSmrg bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
7071debfc3dSmrg
7081debfc3dSmrg /* While we're scanning the table, this is a good place to
7091debfc3dSmrg initialize this. */
7101debfc3dSmrg occr->deleted_p = 0;
7111debfc3dSmrg }
7121debfc3dSmrg
7131debfc3dSmrg /* The occurrences recorded in avail_occr are exactly those that
7141debfc3dSmrg we want to set to nonzero in COMP. */
7151debfc3dSmrg if (comp)
7161debfc3dSmrg for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
7171debfc3dSmrg {
7181debfc3dSmrg bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
7191debfc3dSmrg
7201debfc3dSmrg /* While we're scanning the table, this is a good place to
7211debfc3dSmrg initialize this. */
7221debfc3dSmrg occr->copied_p = 0;
7231debfc3dSmrg }
7241debfc3dSmrg
7251debfc3dSmrg /* While we're scanning the table, this is a good place to
7261debfc3dSmrg initialize this. */
7271debfc3dSmrg expr->reaching_reg = 0;
7281debfc3dSmrg }
7291debfc3dSmrg }
7301debfc3dSmrg }
7311debfc3dSmrg
7321debfc3dSmrg /* Hash table support. */
7331debfc3dSmrg
7341debfc3dSmrg struct reg_avail_info
7351debfc3dSmrg {
7361debfc3dSmrg basic_block last_bb;
7371debfc3dSmrg int first_set;
7381debfc3dSmrg int last_set;
7391debfc3dSmrg };
7401debfc3dSmrg
7411debfc3dSmrg static struct reg_avail_info *reg_avail_info;
7421debfc3dSmrg static basic_block current_bb;
7431debfc3dSmrg
7441debfc3dSmrg /* See whether X, the source of a set, is something we want to consider for
7451debfc3dSmrg GCSE. */
7461debfc3dSmrg
7471debfc3dSmrg static int
want_to_gcse_p(rtx x,machine_mode mode,HOST_WIDE_INT * max_distance_ptr)7481debfc3dSmrg want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr)
7491debfc3dSmrg {
7501debfc3dSmrg #ifdef STACK_REGS
7511debfc3dSmrg /* On register stack architectures, don't GCSE constants from the
7521debfc3dSmrg constant pool, as the benefits are often swamped by the overhead
7531debfc3dSmrg of shuffling the register stack between basic blocks. */
7541debfc3dSmrg if (IS_STACK_MODE (GET_MODE (x)))
7551debfc3dSmrg x = avoid_constant_pool_reference (x);
7561debfc3dSmrg #endif
7571debfc3dSmrg
7581debfc3dSmrg /* GCSE'ing constants:
7591debfc3dSmrg
7601debfc3dSmrg We do not specifically distinguish between constant and non-constant
7611debfc3dSmrg expressions in PRE and Hoist. We use set_src_cost below to limit
7621debfc3dSmrg the maximum distance simple expressions can travel.
7631debfc3dSmrg
7641debfc3dSmrg Nevertheless, constants are much easier to GCSE, and, hence,
7651debfc3dSmrg it is easy to overdo the optimizations. Usually, excessive PRE and
7661debfc3dSmrg Hoisting of constant leads to increased register pressure.
7671debfc3dSmrg
7681debfc3dSmrg RA can deal with this by rematerialing some of the constants.
7691debfc3dSmrg Therefore, it is important that the back-end generates sets of constants
7701debfc3dSmrg in a way that allows reload rematerialize them under high register
7711debfc3dSmrg pressure, i.e., a pseudo register with REG_EQUAL to constant
7721debfc3dSmrg is set only once. Failing to do so will result in IRA/reload
7731debfc3dSmrg spilling such constants under high register pressure instead of
7741debfc3dSmrg rematerializing them. */
7751debfc3dSmrg
7761debfc3dSmrg switch (GET_CODE (x))
7771debfc3dSmrg {
7781debfc3dSmrg case REG:
7791debfc3dSmrg case SUBREG:
7801debfc3dSmrg case CALL:
7811debfc3dSmrg return 0;
7821debfc3dSmrg
7831debfc3dSmrg CASE_CONST_ANY:
7841debfc3dSmrg if (!doing_code_hoisting_p)
7851debfc3dSmrg /* Do not PRE constants. */
7861debfc3dSmrg return 0;
7871debfc3dSmrg
7881debfc3dSmrg /* FALLTHRU */
7891debfc3dSmrg
7901debfc3dSmrg default:
7911debfc3dSmrg if (doing_code_hoisting_p)
7921debfc3dSmrg /* PRE doesn't implement max_distance restriction. */
7931debfc3dSmrg {
7941debfc3dSmrg int cost;
7951debfc3dSmrg HOST_WIDE_INT max_distance;
7961debfc3dSmrg
7971debfc3dSmrg gcc_assert (!optimize_function_for_speed_p (cfun)
7981debfc3dSmrg && optimize_function_for_size_p (cfun));
7991debfc3dSmrg cost = set_src_cost (x, mode, 0);
8001debfc3dSmrg
801*8feb0f0bSmrg if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost))
8021debfc3dSmrg {
8031debfc3dSmrg max_distance
804*8feb0f0bSmrg = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10;
8051debfc3dSmrg if (max_distance == 0)
8061debfc3dSmrg return 0;
8071debfc3dSmrg
8081debfc3dSmrg gcc_assert (max_distance > 0);
8091debfc3dSmrg }
8101debfc3dSmrg else
8111debfc3dSmrg max_distance = 0;
8121debfc3dSmrg
8131debfc3dSmrg if (max_distance_ptr)
8141debfc3dSmrg *max_distance_ptr = max_distance;
8151debfc3dSmrg }
8161debfc3dSmrg
8171debfc3dSmrg return can_assign_to_reg_without_clobbers_p (x, mode);
8181debfc3dSmrg }
8191debfc3dSmrg }
8201debfc3dSmrg
8211debfc3dSmrg /* Used internally by can_assign_to_reg_without_clobbers_p. */
8221debfc3dSmrg
8231debfc3dSmrg static GTY(()) rtx_insn *test_insn;
8241debfc3dSmrg
8251debfc3dSmrg /* Return true if we can assign X to a pseudo register of mode MODE
8261debfc3dSmrg such that the resulting insn does not result in clobbering a hard
8271debfc3dSmrg register as a side-effect.
8281debfc3dSmrg
8291debfc3dSmrg Additionally, if the target requires it, check that the resulting insn
8301debfc3dSmrg can be copied. If it cannot, this means that X is special and probably
8311debfc3dSmrg has hidden side-effects we don't want to mess with.
8321debfc3dSmrg
8331debfc3dSmrg This function is typically used by code motion passes, to verify
8341debfc3dSmrg that it is safe to insert an insn without worrying about clobbering
8351debfc3dSmrg maybe live hard regs. */
8361debfc3dSmrg
8371debfc3dSmrg bool
can_assign_to_reg_without_clobbers_p(rtx x,machine_mode mode)8381debfc3dSmrg can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode)
8391debfc3dSmrg {
8401debfc3dSmrg int num_clobbers = 0;
8411debfc3dSmrg int icode;
8421debfc3dSmrg bool can_assign = false;
8431debfc3dSmrg
8441debfc3dSmrg /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
8451debfc3dSmrg if (general_operand (x, mode))
8461debfc3dSmrg return 1;
8471debfc3dSmrg else if (GET_MODE (x) == VOIDmode)
8481debfc3dSmrg return 0;
8491debfc3dSmrg
8501debfc3dSmrg /* Otherwise, check if we can make a valid insn from it. First initialize
8511debfc3dSmrg our test insn if we haven't already. */
8521debfc3dSmrg if (test_insn == 0)
8531debfc3dSmrg {
8541debfc3dSmrg test_insn
8551debfc3dSmrg = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode,
8561debfc3dSmrg FIRST_PSEUDO_REGISTER * 2),
8571debfc3dSmrg const0_rtx));
8581debfc3dSmrg SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0;
8591debfc3dSmrg INSN_LOCATION (test_insn) = UNKNOWN_LOCATION;
8601debfc3dSmrg }
8611debfc3dSmrg
8621debfc3dSmrg /* Now make an insn like the one we would make when GCSE'ing and see if
8631debfc3dSmrg valid. */
8641debfc3dSmrg PUT_MODE (SET_DEST (PATTERN (test_insn)), mode);
8651debfc3dSmrg SET_SRC (PATTERN (test_insn)) = x;
8661debfc3dSmrg
8671debfc3dSmrg icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
8681debfc3dSmrg
8691debfc3dSmrg /* If the test insn is valid and doesn't need clobbers, and the target also
8701debfc3dSmrg has no objections, we're good. */
8711debfc3dSmrg if (icode >= 0
8721debfc3dSmrg && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode))
8731debfc3dSmrg && ! (targetm.cannot_copy_insn_p
8741debfc3dSmrg && targetm.cannot_copy_insn_p (test_insn)))
8751debfc3dSmrg can_assign = true;
8761debfc3dSmrg
8771debfc3dSmrg /* Make sure test_insn doesn't have any pointers into GC space. */
8781debfc3dSmrg SET_SRC (PATTERN (test_insn)) = NULL_RTX;
8791debfc3dSmrg
8801debfc3dSmrg return can_assign;
8811debfc3dSmrg }
8821debfc3dSmrg
8831debfc3dSmrg /* Return nonzero if the operands of expression X are unchanged from the
8841debfc3dSmrg start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
8851debfc3dSmrg or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
8861debfc3dSmrg
8871debfc3dSmrg static int
oprs_unchanged_p(const_rtx x,const rtx_insn * insn,int avail_p)8881debfc3dSmrg oprs_unchanged_p (const_rtx x, const rtx_insn *insn, int avail_p)
8891debfc3dSmrg {
8901debfc3dSmrg int i, j;
8911debfc3dSmrg enum rtx_code code;
8921debfc3dSmrg const char *fmt;
8931debfc3dSmrg
8941debfc3dSmrg if (x == 0)
8951debfc3dSmrg return 1;
8961debfc3dSmrg
8971debfc3dSmrg code = GET_CODE (x);
8981debfc3dSmrg switch (code)
8991debfc3dSmrg {
9001debfc3dSmrg case REG:
9011debfc3dSmrg {
9021debfc3dSmrg struct reg_avail_info *info = ®_avail_info[REGNO (x)];
9031debfc3dSmrg
9041debfc3dSmrg if (info->last_bb != current_bb)
9051debfc3dSmrg return 1;
9061debfc3dSmrg if (avail_p)
9071debfc3dSmrg return info->last_set < DF_INSN_LUID (insn);
9081debfc3dSmrg else
9091debfc3dSmrg return info->first_set >= DF_INSN_LUID (insn);
9101debfc3dSmrg }
9111debfc3dSmrg
9121debfc3dSmrg case MEM:
9131debfc3dSmrg if (! flag_gcse_lm
9141debfc3dSmrg || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
9151debfc3dSmrg x, avail_p))
9161debfc3dSmrg return 0;
9171debfc3dSmrg else
9181debfc3dSmrg return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
9191debfc3dSmrg
9201debfc3dSmrg case PRE_DEC:
9211debfc3dSmrg case PRE_INC:
9221debfc3dSmrg case POST_DEC:
9231debfc3dSmrg case POST_INC:
9241debfc3dSmrg case PRE_MODIFY:
9251debfc3dSmrg case POST_MODIFY:
9261debfc3dSmrg return 0;
9271debfc3dSmrg
9281debfc3dSmrg case PC:
9291debfc3dSmrg case CC0: /*FIXME*/
9301debfc3dSmrg case CONST:
9311debfc3dSmrg CASE_CONST_ANY:
9321debfc3dSmrg case SYMBOL_REF:
9331debfc3dSmrg case LABEL_REF:
9341debfc3dSmrg case ADDR_VEC:
9351debfc3dSmrg case ADDR_DIFF_VEC:
9361debfc3dSmrg return 1;
9371debfc3dSmrg
9381debfc3dSmrg default:
9391debfc3dSmrg break;
9401debfc3dSmrg }
9411debfc3dSmrg
9421debfc3dSmrg for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
9431debfc3dSmrg {
9441debfc3dSmrg if (fmt[i] == 'e')
9451debfc3dSmrg {
9461debfc3dSmrg /* If we are about to do the last recursive call needed at this
9471debfc3dSmrg level, change it into iteration. This function is called enough
9481debfc3dSmrg to be worth it. */
9491debfc3dSmrg if (i == 0)
9501debfc3dSmrg return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
9511debfc3dSmrg
9521debfc3dSmrg else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
9531debfc3dSmrg return 0;
9541debfc3dSmrg }
9551debfc3dSmrg else if (fmt[i] == 'E')
9561debfc3dSmrg for (j = 0; j < XVECLEN (x, i); j++)
9571debfc3dSmrg if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
9581debfc3dSmrg return 0;
9591debfc3dSmrg }
9601debfc3dSmrg
9611debfc3dSmrg return 1;
9621debfc3dSmrg }
9631debfc3dSmrg
9641debfc3dSmrg /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
9651debfc3dSmrg
9661debfc3dSmrg struct mem_conflict_info
9671debfc3dSmrg {
9681debfc3dSmrg /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
9691debfc3dSmrg see if a memory store conflicts with this memory load. */
9701debfc3dSmrg const_rtx mem;
9711debfc3dSmrg
9721debfc3dSmrg /* True if mems_conflict_for_gcse_p finds a conflict between two memory
9731debfc3dSmrg references. */
9741debfc3dSmrg bool conflict;
9751debfc3dSmrg };
9761debfc3dSmrg
9771debfc3dSmrg /* DEST is the output of an instruction. If it is a memory reference and
9781debfc3dSmrg possibly conflicts with the load found in DATA, then communicate this
9791debfc3dSmrg information back through DATA. */
9801debfc3dSmrg
9811debfc3dSmrg static void
mems_conflict_for_gcse_p(rtx dest,const_rtx setter ATTRIBUTE_UNUSED,void * data)9821debfc3dSmrg mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
9831debfc3dSmrg void *data)
9841debfc3dSmrg {
9851debfc3dSmrg struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
9861debfc3dSmrg
9871debfc3dSmrg while (GET_CODE (dest) == SUBREG
9881debfc3dSmrg || GET_CODE (dest) == ZERO_EXTRACT
9891debfc3dSmrg || GET_CODE (dest) == STRICT_LOW_PART)
9901debfc3dSmrg dest = XEXP (dest, 0);
9911debfc3dSmrg
9921debfc3dSmrg /* If DEST is not a MEM, then it will not conflict with the load. Note
9931debfc3dSmrg that function calls are assumed to clobber memory, but are handled
9941debfc3dSmrg elsewhere. */
9951debfc3dSmrg if (! MEM_P (dest))
9961debfc3dSmrg return;
9971debfc3dSmrg
9981debfc3dSmrg /* If we are setting a MEM in our list of specially recognized MEMs,
9991debfc3dSmrg don't mark as killed this time. */
10001debfc3dSmrg if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
10011debfc3dSmrg {
10021debfc3dSmrg if (!find_rtx_in_ldst (dest))
10031debfc3dSmrg mci->conflict = true;
10041debfc3dSmrg return;
10051debfc3dSmrg }
10061debfc3dSmrg
10071debfc3dSmrg if (true_dependence (dest, GET_MODE (dest), mci->mem))
10081debfc3dSmrg mci->conflict = true;
10091debfc3dSmrg }
10101debfc3dSmrg
10111debfc3dSmrg /* Return nonzero if the expression in X (a memory reference) is killed
10121debfc3dSmrg in block BB before or after the insn with the LUID in UID_LIMIT.
10131debfc3dSmrg AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
10141debfc3dSmrg before UID_LIMIT.
10151debfc3dSmrg
10161debfc3dSmrg To check the entire block, set UID_LIMIT to max_uid + 1 and
10171debfc3dSmrg AVAIL_P to 0. */
10181debfc3dSmrg
10191debfc3dSmrg static int
load_killed_in_block_p(const_basic_block bb,int uid_limit,const_rtx x,int avail_p)10201debfc3dSmrg load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
10211debfc3dSmrg int avail_p)
10221debfc3dSmrg {
10231debfc3dSmrg vec<rtx_insn *> list = modify_mem_list[bb->index];
10241debfc3dSmrg rtx_insn *setter;
10251debfc3dSmrg unsigned ix;
10261debfc3dSmrg
10271debfc3dSmrg /* If this is a readonly then we aren't going to be changing it. */
10281debfc3dSmrg if (MEM_READONLY_P (x))
10291debfc3dSmrg return 0;
10301debfc3dSmrg
10311debfc3dSmrg FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
10321debfc3dSmrg {
10331debfc3dSmrg struct mem_conflict_info mci;
10341debfc3dSmrg
10351debfc3dSmrg /* Ignore entries in the list that do not apply. */
10361debfc3dSmrg if ((avail_p
10371debfc3dSmrg && DF_INSN_LUID (setter) < uid_limit)
10381debfc3dSmrg || (! avail_p
10391debfc3dSmrg && DF_INSN_LUID (setter) > uid_limit))
10401debfc3dSmrg continue;
10411debfc3dSmrg
10421debfc3dSmrg /* If SETTER is a call everything is clobbered. Note that calls
10431debfc3dSmrg to pure functions are never put on the list, so we need not
10441debfc3dSmrg worry about them. */
10451debfc3dSmrg if (CALL_P (setter))
10461debfc3dSmrg return 1;
10471debfc3dSmrg
10481debfc3dSmrg /* SETTER must be an INSN of some kind that sets memory. Call
10491debfc3dSmrg note_stores to examine each hunk of memory that is modified. */
10501debfc3dSmrg mci.mem = x;
10511debfc3dSmrg mci.conflict = false;
1052*8feb0f0bSmrg note_stores (setter, mems_conflict_for_gcse_p, &mci);
10531debfc3dSmrg if (mci.conflict)
10541debfc3dSmrg return 1;
10551debfc3dSmrg }
10561debfc3dSmrg return 0;
10571debfc3dSmrg }
10581debfc3dSmrg
10591debfc3dSmrg /* Return nonzero if the operands of expression X are unchanged from
10601debfc3dSmrg the start of INSN's basic block up to but not including INSN. */
10611debfc3dSmrg
10621debfc3dSmrg static int
oprs_anticipatable_p(const_rtx x,const rtx_insn * insn)10631debfc3dSmrg oprs_anticipatable_p (const_rtx x, const rtx_insn *insn)
10641debfc3dSmrg {
10651debfc3dSmrg return oprs_unchanged_p (x, insn, 0);
10661debfc3dSmrg }
10671debfc3dSmrg
10681debfc3dSmrg /* Return nonzero if the operands of expression X are unchanged from
10691debfc3dSmrg INSN to the end of INSN's basic block. */
10701debfc3dSmrg
10711debfc3dSmrg static int
oprs_available_p(const_rtx x,const rtx_insn * insn)10721debfc3dSmrg oprs_available_p (const_rtx x, const rtx_insn *insn)
10731debfc3dSmrg {
10741debfc3dSmrg return oprs_unchanged_p (x, insn, 1);
10751debfc3dSmrg }
10761debfc3dSmrg
10771debfc3dSmrg /* Hash expression X.
10781debfc3dSmrg
10791debfc3dSmrg MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
10801debfc3dSmrg indicating if a volatile operand is found or if the expression contains
10811debfc3dSmrg something we don't want to insert in the table. HASH_TABLE_SIZE is
10821debfc3dSmrg the current size of the hash table to be probed. */
10831debfc3dSmrg
10841debfc3dSmrg static unsigned int
hash_expr(const_rtx x,machine_mode mode,int * do_not_record_p,int hash_table_size)10851debfc3dSmrg hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p,
10861debfc3dSmrg int hash_table_size)
10871debfc3dSmrg {
10881debfc3dSmrg unsigned int hash;
10891debfc3dSmrg
10901debfc3dSmrg *do_not_record_p = 0;
10911debfc3dSmrg
10921debfc3dSmrg hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
10931debfc3dSmrg return hash % hash_table_size;
10941debfc3dSmrg }
10951debfc3dSmrg
10961debfc3dSmrg /* Return nonzero if exp1 is equivalent to exp2. */
10971debfc3dSmrg
10981debfc3dSmrg static int
expr_equiv_p(const_rtx x,const_rtx y)10991debfc3dSmrg expr_equiv_p (const_rtx x, const_rtx y)
11001debfc3dSmrg {
11011debfc3dSmrg return exp_equiv_p (x, y, 0, true);
11021debfc3dSmrg }
11031debfc3dSmrg
11041debfc3dSmrg /* Insert expression X in INSN in the hash TABLE.
11051debfc3dSmrg If it is already present, record it as the last occurrence in INSN's
11061debfc3dSmrg basic block.
11071debfc3dSmrg
11081debfc3dSmrg MODE is the mode of the value X is being stored into.
11091debfc3dSmrg It is only used if X is a CONST_INT.
11101debfc3dSmrg
11111debfc3dSmrg ANTIC_P is nonzero if X is an anticipatable expression.
11121debfc3dSmrg AVAIL_P is nonzero if X is an available expression.
11131debfc3dSmrg
11141debfc3dSmrg MAX_DISTANCE is the maximum distance in instructions this expression can
11151debfc3dSmrg be moved. */
11161debfc3dSmrg
11171debfc3dSmrg static void
insert_expr_in_table(rtx x,machine_mode mode,rtx_insn * insn,int antic_p,int avail_p,HOST_WIDE_INT max_distance,struct gcse_hash_table_d * table)11181debfc3dSmrg insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn,
11191debfc3dSmrg int antic_p,
11201debfc3dSmrg int avail_p, HOST_WIDE_INT max_distance,
11211debfc3dSmrg struct gcse_hash_table_d *table)
11221debfc3dSmrg {
11231debfc3dSmrg int found, do_not_record_p;
11241debfc3dSmrg unsigned int hash;
11251debfc3dSmrg struct gcse_expr *cur_expr, *last_expr = NULL;
11261debfc3dSmrg struct gcse_occr *antic_occr, *avail_occr;
11271debfc3dSmrg
11281debfc3dSmrg hash = hash_expr (x, mode, &do_not_record_p, table->size);
11291debfc3dSmrg
11301debfc3dSmrg /* Do not insert expression in table if it contains volatile operands,
11311debfc3dSmrg or if hash_expr determines the expression is something we don't want
11321debfc3dSmrg to or can't handle. */
11331debfc3dSmrg if (do_not_record_p)
11341debfc3dSmrg return;
11351debfc3dSmrg
11361debfc3dSmrg cur_expr = table->table[hash];
11371debfc3dSmrg found = 0;
11381debfc3dSmrg
1139a2dc1f3fSmrg while (cur_expr && (found = expr_equiv_p (cur_expr->expr, x)) == 0)
11401debfc3dSmrg {
11411debfc3dSmrg /* If the expression isn't found, save a pointer to the end of
11421debfc3dSmrg the list. */
11431debfc3dSmrg last_expr = cur_expr;
11441debfc3dSmrg cur_expr = cur_expr->next_same_hash;
11451debfc3dSmrg }
11461debfc3dSmrg
11471debfc3dSmrg if (! found)
11481debfc3dSmrg {
11491debfc3dSmrg cur_expr = GOBNEW (struct gcse_expr);
11501debfc3dSmrg bytes_used += sizeof (struct gcse_expr);
11511debfc3dSmrg if (table->table[hash] == NULL)
11521debfc3dSmrg /* This is the first pattern that hashed to this index. */
11531debfc3dSmrg table->table[hash] = cur_expr;
11541debfc3dSmrg else
11551debfc3dSmrg /* Add EXPR to end of this hash chain. */
11561debfc3dSmrg last_expr->next_same_hash = cur_expr;
11571debfc3dSmrg
11581debfc3dSmrg /* Set the fields of the expr element. */
11591debfc3dSmrg cur_expr->expr = x;
11601debfc3dSmrg cur_expr->bitmap_index = table->n_elems++;
11611debfc3dSmrg cur_expr->next_same_hash = NULL;
11621debfc3dSmrg cur_expr->antic_occr = NULL;
11631debfc3dSmrg cur_expr->avail_occr = NULL;
11641debfc3dSmrg gcc_assert (max_distance >= 0);
11651debfc3dSmrg cur_expr->max_distance = max_distance;
11661debfc3dSmrg }
11671debfc3dSmrg else
11681debfc3dSmrg gcc_assert (cur_expr->max_distance == max_distance);
11691debfc3dSmrg
11701debfc3dSmrg /* Now record the occurrence(s). */
11711debfc3dSmrg if (antic_p)
11721debfc3dSmrg {
11731debfc3dSmrg antic_occr = cur_expr->antic_occr;
11741debfc3dSmrg
11751debfc3dSmrg if (antic_occr
11761debfc3dSmrg && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
11771debfc3dSmrg antic_occr = NULL;
11781debfc3dSmrg
11791debfc3dSmrg if (antic_occr)
11801debfc3dSmrg /* Found another instance of the expression in the same basic block.
11811debfc3dSmrg Prefer the currently recorded one. We want the first one in the
11821debfc3dSmrg block and the block is scanned from start to end. */
11831debfc3dSmrg ; /* nothing to do */
11841debfc3dSmrg else
11851debfc3dSmrg {
11861debfc3dSmrg /* First occurrence of this expression in this basic block. */
11871debfc3dSmrg antic_occr = GOBNEW (struct gcse_occr);
11881debfc3dSmrg bytes_used += sizeof (struct gcse_occr);
11891debfc3dSmrg antic_occr->insn = insn;
11901debfc3dSmrg antic_occr->next = cur_expr->antic_occr;
11911debfc3dSmrg antic_occr->deleted_p = 0;
11921debfc3dSmrg cur_expr->antic_occr = antic_occr;
11931debfc3dSmrg }
11941debfc3dSmrg }
11951debfc3dSmrg
11961debfc3dSmrg if (avail_p)
11971debfc3dSmrg {
11981debfc3dSmrg avail_occr = cur_expr->avail_occr;
11991debfc3dSmrg
12001debfc3dSmrg if (avail_occr
12011debfc3dSmrg && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
12021debfc3dSmrg {
12031debfc3dSmrg /* Found another instance of the expression in the same basic block.
12041debfc3dSmrg Prefer this occurrence to the currently recorded one. We want
12051debfc3dSmrg the last one in the block and the block is scanned from start
12061debfc3dSmrg to end. */
12071debfc3dSmrg avail_occr->insn = insn;
12081debfc3dSmrg }
12091debfc3dSmrg else
12101debfc3dSmrg {
12111debfc3dSmrg /* First occurrence of this expression in this basic block. */
12121debfc3dSmrg avail_occr = GOBNEW (struct gcse_occr);
12131debfc3dSmrg bytes_used += sizeof (struct gcse_occr);
12141debfc3dSmrg avail_occr->insn = insn;
12151debfc3dSmrg avail_occr->next = cur_expr->avail_occr;
12161debfc3dSmrg avail_occr->deleted_p = 0;
12171debfc3dSmrg cur_expr->avail_occr = avail_occr;
12181debfc3dSmrg }
12191debfc3dSmrg }
12201debfc3dSmrg }
12211debfc3dSmrg
12221debfc3dSmrg /* Scan SET present in INSN and add an entry to the hash TABLE. */
12231debfc3dSmrg
12241debfc3dSmrg static void
hash_scan_set(rtx set,rtx_insn * insn,struct gcse_hash_table_d * table)12251debfc3dSmrg hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table)
12261debfc3dSmrg {
12271debfc3dSmrg rtx src = SET_SRC (set);
12281debfc3dSmrg rtx dest = SET_DEST (set);
12291debfc3dSmrg rtx note;
12301debfc3dSmrg
12311debfc3dSmrg if (GET_CODE (src) == CALL)
12321debfc3dSmrg hash_scan_call (src, insn, table);
12331debfc3dSmrg
12341debfc3dSmrg else if (REG_P (dest))
12351debfc3dSmrg {
12361debfc3dSmrg unsigned int regno = REGNO (dest);
12371debfc3dSmrg HOST_WIDE_INT max_distance = 0;
12381debfc3dSmrg
12391debfc3dSmrg /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
12401debfc3dSmrg
12411debfc3dSmrg This allows us to do a single GCSE pass and still eliminate
12421debfc3dSmrg redundant constants, addresses or other expressions that are
12431debfc3dSmrg constructed with multiple instructions.
12441debfc3dSmrg
12451debfc3dSmrg However, keep the original SRC if INSN is a simple reg-reg move.
12461debfc3dSmrg In this case, there will almost always be a REG_EQUAL note on the
12471debfc3dSmrg insn that sets SRC. By recording the REG_EQUAL value here as SRC
12481debfc3dSmrg for INSN, we miss copy propagation opportunities and we perform the
12491debfc3dSmrg same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
12501debfc3dSmrg do more than one PRE GCSE pass.
12511debfc3dSmrg
12521debfc3dSmrg Note that this does not impede profitable constant propagations. We
12531debfc3dSmrg "look through" reg-reg sets in lookup_avail_set. */
12541debfc3dSmrg note = find_reg_equal_equiv_note (insn);
12551debfc3dSmrg if (note != 0
12561debfc3dSmrg && REG_NOTE_KIND (note) == REG_EQUAL
12571debfc3dSmrg && !REG_P (src)
12581debfc3dSmrg && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL))
12591debfc3dSmrg src = XEXP (note, 0), set = gen_rtx_SET (dest, src);
12601debfc3dSmrg
12611debfc3dSmrg /* Only record sets of pseudo-regs in the hash table. */
12621debfc3dSmrg if (regno >= FIRST_PSEUDO_REGISTER
12631debfc3dSmrg /* Don't GCSE something if we can't do a reg/reg copy. */
12641debfc3dSmrg && can_copy_p (GET_MODE (dest))
12651debfc3dSmrg /* GCSE commonly inserts instruction after the insn. We can't
12661debfc3dSmrg do that easily for EH edges so disable GCSE on these for now. */
12671debfc3dSmrg /* ??? We can now easily create new EH landing pads at the
12681debfc3dSmrg gimple level, for splitting edges; there's no reason we
12691debfc3dSmrg can't do the same thing at the rtl level. */
12701debfc3dSmrg && !can_throw_internal (insn)
12711debfc3dSmrg /* Is SET_SRC something we want to gcse? */
12721debfc3dSmrg && want_to_gcse_p (src, GET_MODE (dest), &max_distance)
12731debfc3dSmrg /* Don't CSE a nop. */
12741debfc3dSmrg && ! set_noop_p (set)
12751debfc3dSmrg /* Don't GCSE if it has attached REG_EQUIV note.
12761debfc3dSmrg At this point this only function parameters should have
12771debfc3dSmrg REG_EQUIV notes and if the argument slot is used somewhere
12781debfc3dSmrg explicitly, it means address of parameter has been taken,
12791debfc3dSmrg so we should not extend the lifetime of the pseudo. */
12801debfc3dSmrg && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
12811debfc3dSmrg {
12821debfc3dSmrg /* An expression is not anticipatable if its operands are
12831debfc3dSmrg modified before this insn or if this is not the only SET in
12841debfc3dSmrg this insn. The latter condition does not have to mean that
12851debfc3dSmrg SRC itself is not anticipatable, but we just will not be
12861debfc3dSmrg able to handle code motion of insns with multiple sets. */
12871debfc3dSmrg int antic_p = oprs_anticipatable_p (src, insn)
12881debfc3dSmrg && !multiple_sets (insn);
12891debfc3dSmrg /* An expression is not available if its operands are
12901debfc3dSmrg subsequently modified, including this insn. It's also not
12911debfc3dSmrg available if this is a branch, because we can't insert
12921debfc3dSmrg a set after the branch. */
12931debfc3dSmrg int avail_p = (oprs_available_p (src, insn)
12941debfc3dSmrg && ! JUMP_P (insn));
12951debfc3dSmrg
12961debfc3dSmrg insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
12971debfc3dSmrg max_distance, table);
12981debfc3dSmrg }
12991debfc3dSmrg }
13001debfc3dSmrg /* In case of store we want to consider the memory value as available in
13011debfc3dSmrg the REG stored in that memory. This makes it possible to remove
13021debfc3dSmrg redundant loads from due to stores to the same location. */
13031debfc3dSmrg else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
13041debfc3dSmrg {
13051debfc3dSmrg unsigned int regno = REGNO (src);
13061debfc3dSmrg HOST_WIDE_INT max_distance = 0;
13071debfc3dSmrg
13081debfc3dSmrg /* Only record sets of pseudo-regs in the hash table. */
13091debfc3dSmrg if (regno >= FIRST_PSEUDO_REGISTER
13101debfc3dSmrg /* Don't GCSE something if we can't do a reg/reg copy. */
13111debfc3dSmrg && can_copy_p (GET_MODE (src))
13121debfc3dSmrg /* GCSE commonly inserts instruction after the insn. We can't
13131debfc3dSmrg do that easily for EH edges so disable GCSE on these for now. */
13141debfc3dSmrg && !can_throw_internal (insn)
13151debfc3dSmrg /* Is SET_DEST something we want to gcse? */
13161debfc3dSmrg && want_to_gcse_p (dest, GET_MODE (dest), &max_distance)
13171debfc3dSmrg /* Don't CSE a nop. */
13181debfc3dSmrg && ! set_noop_p (set)
13191debfc3dSmrg /* Don't GCSE if it has attached REG_EQUIV note.
13201debfc3dSmrg At this point this only function parameters should have
13211debfc3dSmrg REG_EQUIV notes and if the argument slot is used somewhere
13221debfc3dSmrg explicitly, it means address of parameter has been taken,
13231debfc3dSmrg so we should not extend the lifetime of the pseudo. */
13241debfc3dSmrg && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
13251debfc3dSmrg || ! MEM_P (XEXP (note, 0))))
13261debfc3dSmrg {
13271debfc3dSmrg /* Stores are never anticipatable. */
13281debfc3dSmrg int antic_p = 0;
13291debfc3dSmrg /* An expression is not available if its operands are
13301debfc3dSmrg subsequently modified, including this insn. It's also not
13311debfc3dSmrg available if this is a branch, because we can't insert
13321debfc3dSmrg a set after the branch. */
13331debfc3dSmrg int avail_p = oprs_available_p (dest, insn) && ! JUMP_P (insn);
13341debfc3dSmrg
13351debfc3dSmrg /* Record the memory expression (DEST) in the hash table. */
13361debfc3dSmrg insert_expr_in_table (dest, GET_MODE (dest), insn,
13371debfc3dSmrg antic_p, avail_p, max_distance, table);
13381debfc3dSmrg }
13391debfc3dSmrg }
13401debfc3dSmrg }
13411debfc3dSmrg
13421debfc3dSmrg static void
hash_scan_clobber(rtx x ATTRIBUTE_UNUSED,rtx_insn * insn ATTRIBUTE_UNUSED,struct gcse_hash_table_d * table ATTRIBUTE_UNUSED)13431debfc3dSmrg hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
13441debfc3dSmrg struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
13451debfc3dSmrg {
13461debfc3dSmrg /* Currently nothing to do. */
13471debfc3dSmrg }
13481debfc3dSmrg
13491debfc3dSmrg static void
hash_scan_call(rtx x ATTRIBUTE_UNUSED,rtx_insn * insn ATTRIBUTE_UNUSED,struct gcse_hash_table_d * table ATTRIBUTE_UNUSED)13501debfc3dSmrg hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
13511debfc3dSmrg struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
13521debfc3dSmrg {
13531debfc3dSmrg /* Currently nothing to do. */
13541debfc3dSmrg }
13551debfc3dSmrg
13561debfc3dSmrg /* Process INSN and add hash table entries as appropriate. */
13571debfc3dSmrg
13581debfc3dSmrg static void
hash_scan_insn(rtx_insn * insn,struct gcse_hash_table_d * table)13591debfc3dSmrg hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table)
13601debfc3dSmrg {
13611debfc3dSmrg rtx pat = PATTERN (insn);
13621debfc3dSmrg int i;
13631debfc3dSmrg
13641debfc3dSmrg /* Pick out the sets of INSN and for other forms of instructions record
13651debfc3dSmrg what's been modified. */
13661debfc3dSmrg
13671debfc3dSmrg if (GET_CODE (pat) == SET)
13681debfc3dSmrg hash_scan_set (pat, insn, table);
13691debfc3dSmrg
13701debfc3dSmrg else if (GET_CODE (pat) == CLOBBER)
13711debfc3dSmrg hash_scan_clobber (pat, insn, table);
13721debfc3dSmrg
13731debfc3dSmrg else if (GET_CODE (pat) == CALL)
13741debfc3dSmrg hash_scan_call (pat, insn, table);
13751debfc3dSmrg
13761debfc3dSmrg else if (GET_CODE (pat) == PARALLEL)
13771debfc3dSmrg for (i = 0; i < XVECLEN (pat, 0); i++)
13781debfc3dSmrg {
13791debfc3dSmrg rtx x = XVECEXP (pat, 0, i);
13801debfc3dSmrg
13811debfc3dSmrg if (GET_CODE (x) == SET)
13821debfc3dSmrg hash_scan_set (x, insn, table);
13831debfc3dSmrg else if (GET_CODE (x) == CLOBBER)
13841debfc3dSmrg hash_scan_clobber (x, insn, table);
13851debfc3dSmrg else if (GET_CODE (x) == CALL)
13861debfc3dSmrg hash_scan_call (x, insn, table);
13871debfc3dSmrg }
13881debfc3dSmrg }
13891debfc3dSmrg
13901debfc3dSmrg /* Dump the hash table TABLE to file FILE under the name NAME. */
13911debfc3dSmrg
13921debfc3dSmrg static void
dump_hash_table(FILE * file,const char * name,struct gcse_hash_table_d * table)13931debfc3dSmrg dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table)
13941debfc3dSmrg {
13951debfc3dSmrg int i;
13961debfc3dSmrg /* Flattened out table, so it's printed in proper order. */
13971debfc3dSmrg struct gcse_expr **flat_table;
13981debfc3dSmrg unsigned int *hash_val;
13991debfc3dSmrg struct gcse_expr *expr;
14001debfc3dSmrg
14011debfc3dSmrg flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems);
14021debfc3dSmrg hash_val = XNEWVEC (unsigned int, table->n_elems);
14031debfc3dSmrg
14041debfc3dSmrg for (i = 0; i < (int) table->size; i++)
14051debfc3dSmrg for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
14061debfc3dSmrg {
14071debfc3dSmrg flat_table[expr->bitmap_index] = expr;
14081debfc3dSmrg hash_val[expr->bitmap_index] = i;
14091debfc3dSmrg }
14101debfc3dSmrg
14111debfc3dSmrg fprintf (file, "%s hash table (%d buckets, %d entries)\n",
14121debfc3dSmrg name, table->size, table->n_elems);
14131debfc3dSmrg
14141debfc3dSmrg for (i = 0; i < (int) table->n_elems; i++)
14151debfc3dSmrg if (flat_table[i] != 0)
14161debfc3dSmrg {
14171debfc3dSmrg expr = flat_table[i];
14181debfc3dSmrg fprintf (file, "Index %d (hash value %d; max distance "
14191debfc3dSmrg HOST_WIDE_INT_PRINT_DEC ")\n ",
14201debfc3dSmrg expr->bitmap_index, hash_val[i], expr->max_distance);
14211debfc3dSmrg print_rtl (file, expr->expr);
14221debfc3dSmrg fprintf (file, "\n");
14231debfc3dSmrg }
14241debfc3dSmrg
14251debfc3dSmrg fprintf (file, "\n");
14261debfc3dSmrg
14271debfc3dSmrg free (flat_table);
14281debfc3dSmrg free (hash_val);
14291debfc3dSmrg }
14301debfc3dSmrg
14311debfc3dSmrg /* Record register first/last/block set information for REGNO in INSN.
14321debfc3dSmrg
14331debfc3dSmrg first_set records the first place in the block where the register
14341debfc3dSmrg is set and is used to compute "anticipatability".
14351debfc3dSmrg
14361debfc3dSmrg last_set records the last place in the block where the register
14371debfc3dSmrg is set and is used to compute "availability".
14381debfc3dSmrg
14391debfc3dSmrg last_bb records the block for which first_set and last_set are
14401debfc3dSmrg valid, as a quick test to invalidate them. */
14411debfc3dSmrg
14421debfc3dSmrg static void
record_last_reg_set_info(rtx_insn * insn,int regno)14431debfc3dSmrg record_last_reg_set_info (rtx_insn *insn, int regno)
14441debfc3dSmrg {
14451debfc3dSmrg struct reg_avail_info *info = ®_avail_info[regno];
14461debfc3dSmrg int luid = DF_INSN_LUID (insn);
14471debfc3dSmrg
14481debfc3dSmrg info->last_set = luid;
14491debfc3dSmrg if (info->last_bb != current_bb)
14501debfc3dSmrg {
14511debfc3dSmrg info->last_bb = current_bb;
14521debfc3dSmrg info->first_set = luid;
14531debfc3dSmrg }
14541debfc3dSmrg }
14551debfc3dSmrg
14561debfc3dSmrg /* Record memory modification information for INSN. We do not actually care
14571debfc3dSmrg about the memory location(s) that are set, or even how they are set (consider
14581debfc3dSmrg a CALL_INSN). We merely need to record which insns modify memory. */
14591debfc3dSmrg
14601debfc3dSmrg static void
record_last_mem_set_info(rtx_insn * insn)14611debfc3dSmrg record_last_mem_set_info (rtx_insn *insn)
14621debfc3dSmrg {
14631debfc3dSmrg if (! flag_gcse_lm)
14641debfc3dSmrg return;
14651debfc3dSmrg
14661debfc3dSmrg record_last_mem_set_info_common (insn, modify_mem_list,
14671debfc3dSmrg canon_modify_mem_list,
14681debfc3dSmrg modify_mem_list_set,
14691debfc3dSmrg blocks_with_calls);
14701debfc3dSmrg }
14711debfc3dSmrg
14721debfc3dSmrg /* Called from compute_hash_table via note_stores to handle one
14731debfc3dSmrg SET or CLOBBER in an insn. DATA is really the instruction in which
14741debfc3dSmrg the SET is taking place. */
14751debfc3dSmrg
14761debfc3dSmrg static void
record_last_set_info(rtx dest,const_rtx setter ATTRIBUTE_UNUSED,void * data)14771debfc3dSmrg record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
14781debfc3dSmrg {
14791debfc3dSmrg rtx_insn *last_set_insn = (rtx_insn *) data;
14801debfc3dSmrg
14811debfc3dSmrg if (GET_CODE (dest) == SUBREG)
14821debfc3dSmrg dest = SUBREG_REG (dest);
14831debfc3dSmrg
14841debfc3dSmrg if (REG_P (dest))
14851debfc3dSmrg record_last_reg_set_info (last_set_insn, REGNO (dest));
14861debfc3dSmrg else if (MEM_P (dest)
14871debfc3dSmrg /* Ignore pushes, they clobber nothing. */
14881debfc3dSmrg && ! push_operand (dest, GET_MODE (dest)))
14891debfc3dSmrg record_last_mem_set_info (last_set_insn);
14901debfc3dSmrg }
14911debfc3dSmrg
14921debfc3dSmrg /* Top level function to create an expression hash table.
14931debfc3dSmrg
14941debfc3dSmrg Expression entries are placed in the hash table if
14951debfc3dSmrg - they are of the form (set (pseudo-reg) src),
14961debfc3dSmrg - src is something we want to perform GCSE on,
14971debfc3dSmrg - none of the operands are subsequently modified in the block
14981debfc3dSmrg
14991debfc3dSmrg Currently src must be a pseudo-reg or a const_int.
15001debfc3dSmrg
15011debfc3dSmrg TABLE is the table computed. */
15021debfc3dSmrg
15031debfc3dSmrg static void
compute_hash_table_work(struct gcse_hash_table_d * table)15041debfc3dSmrg compute_hash_table_work (struct gcse_hash_table_d *table)
15051debfc3dSmrg {
15061debfc3dSmrg int i;
15071debfc3dSmrg
15081debfc3dSmrg /* re-Cache any INSN_LIST nodes we have allocated. */
15091debfc3dSmrg clear_modify_mem_tables ();
15101debfc3dSmrg /* Some working arrays used to track first and last set in each block. */
15111debfc3dSmrg reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
15121debfc3dSmrg
15131debfc3dSmrg for (i = 0; i < max_reg_num (); ++i)
15141debfc3dSmrg reg_avail_info[i].last_bb = NULL;
15151debfc3dSmrg
15161debfc3dSmrg FOR_EACH_BB_FN (current_bb, cfun)
15171debfc3dSmrg {
15181debfc3dSmrg rtx_insn *insn;
15191debfc3dSmrg unsigned int regno;
15201debfc3dSmrg
15211debfc3dSmrg /* First pass over the instructions records information used to
15221debfc3dSmrg determine when registers and memory are first and last set. */
15231debfc3dSmrg FOR_BB_INSNS (current_bb, insn)
15241debfc3dSmrg {
15251debfc3dSmrg if (!NONDEBUG_INSN_P (insn))
15261debfc3dSmrg continue;
15271debfc3dSmrg
15281debfc3dSmrg if (CALL_P (insn))
15291debfc3dSmrg {
15301debfc3dSmrg hard_reg_set_iterator hrsi;
1531*8feb0f0bSmrg
1532*8feb0f0bSmrg /* We don't track modes of hard registers, so we need
1533*8feb0f0bSmrg to be conservative and assume that partial kills
1534*8feb0f0bSmrg are full kills. */
1535*8feb0f0bSmrg HARD_REG_SET callee_clobbers
1536*8feb0f0bSmrg = insn_callee_abi (insn).full_and_partial_reg_clobbers ();
1537*8feb0f0bSmrg EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
15381debfc3dSmrg record_last_reg_set_info (insn, regno);
15391debfc3dSmrg
1540a05ac97eSmrg if (! RTL_CONST_OR_PURE_CALL_P (insn)
1541a05ac97eSmrg || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn))
15421debfc3dSmrg record_last_mem_set_info (insn);
15431debfc3dSmrg }
15441debfc3dSmrg
1545*8feb0f0bSmrg note_stores (insn, record_last_set_info, insn);
15461debfc3dSmrg }
15471debfc3dSmrg
15481debfc3dSmrg /* The next pass builds the hash table. */
15491debfc3dSmrg FOR_BB_INSNS (current_bb, insn)
15501debfc3dSmrg if (NONDEBUG_INSN_P (insn))
15511debfc3dSmrg hash_scan_insn (insn, table);
15521debfc3dSmrg }
15531debfc3dSmrg
15541debfc3dSmrg free (reg_avail_info);
15551debfc3dSmrg reg_avail_info = NULL;
15561debfc3dSmrg }
15571debfc3dSmrg
15581debfc3dSmrg /* Allocate space for the set/expr hash TABLE.
15591debfc3dSmrg It is used to determine the number of buckets to use. */
15601debfc3dSmrg
15611debfc3dSmrg static void
alloc_hash_table(struct gcse_hash_table_d * table)15621debfc3dSmrg alloc_hash_table (struct gcse_hash_table_d *table)
15631debfc3dSmrg {
15641debfc3dSmrg int n;
15651debfc3dSmrg
15661debfc3dSmrg n = get_max_insn_count ();
15671debfc3dSmrg
15681debfc3dSmrg table->size = n / 4;
15691debfc3dSmrg if (table->size < 11)
15701debfc3dSmrg table->size = 11;
15711debfc3dSmrg
15721debfc3dSmrg /* Attempt to maintain efficient use of hash table.
15731debfc3dSmrg Making it an odd number is simplest for now.
15741debfc3dSmrg ??? Later take some measurements. */
15751debfc3dSmrg table->size |= 1;
15761debfc3dSmrg n = table->size * sizeof (struct gcse_expr *);
15771debfc3dSmrg table->table = GNEWVAR (struct gcse_expr *, n);
15781debfc3dSmrg }
15791debfc3dSmrg
15801debfc3dSmrg /* Free things allocated by alloc_hash_table. */
15811debfc3dSmrg
15821debfc3dSmrg static void
free_hash_table(struct gcse_hash_table_d * table)15831debfc3dSmrg free_hash_table (struct gcse_hash_table_d *table)
15841debfc3dSmrg {
15851debfc3dSmrg free (table->table);
15861debfc3dSmrg }
15871debfc3dSmrg
15881debfc3dSmrg /* Compute the expression hash table TABLE. */
15891debfc3dSmrg
15901debfc3dSmrg static void
compute_hash_table(struct gcse_hash_table_d * table)15911debfc3dSmrg compute_hash_table (struct gcse_hash_table_d *table)
15921debfc3dSmrg {
15931debfc3dSmrg /* Initialize count of number of entries in hash table. */
15941debfc3dSmrg table->n_elems = 0;
15951debfc3dSmrg memset (table->table, 0, table->size * sizeof (struct gcse_expr *));
15961debfc3dSmrg
15971debfc3dSmrg compute_hash_table_work (table);
15981debfc3dSmrg }
15991debfc3dSmrg
16001debfc3dSmrg /* Expression tracking support. */
16011debfc3dSmrg
16021debfc3dSmrg /* Clear canon_modify_mem_list and modify_mem_list tables. */
16031debfc3dSmrg static void
clear_modify_mem_tables(void)16041debfc3dSmrg clear_modify_mem_tables (void)
16051debfc3dSmrg {
16061debfc3dSmrg unsigned i;
16071debfc3dSmrg bitmap_iterator bi;
16081debfc3dSmrg
16091debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
16101debfc3dSmrg {
16111debfc3dSmrg modify_mem_list[i].release ();
16121debfc3dSmrg canon_modify_mem_list[i].release ();
16131debfc3dSmrg }
16141debfc3dSmrg bitmap_clear (modify_mem_list_set);
16151debfc3dSmrg bitmap_clear (blocks_with_calls);
16161debfc3dSmrg }
16171debfc3dSmrg
16181debfc3dSmrg /* Release memory used by modify_mem_list_set. */
16191debfc3dSmrg
16201debfc3dSmrg static void
free_modify_mem_tables(void)16211debfc3dSmrg free_modify_mem_tables (void)
16221debfc3dSmrg {
16231debfc3dSmrg clear_modify_mem_tables ();
16241debfc3dSmrg free (modify_mem_list);
16251debfc3dSmrg free (canon_modify_mem_list);
16261debfc3dSmrg modify_mem_list = 0;
16271debfc3dSmrg canon_modify_mem_list = 0;
16281debfc3dSmrg }
16291debfc3dSmrg
16301debfc3dSmrg /* Compute PRE+LCM working variables. */
16311debfc3dSmrg
16321debfc3dSmrg /* Local properties of expressions. */
16331debfc3dSmrg
16341debfc3dSmrg /* Nonzero for expressions that are transparent in the block. */
16351debfc3dSmrg static sbitmap *transp;
16361debfc3dSmrg
16371debfc3dSmrg /* Nonzero for expressions that are computed (available) in the block. */
16381debfc3dSmrg static sbitmap *comp;
16391debfc3dSmrg
16401debfc3dSmrg /* Nonzero for expressions that are locally anticipatable in the block. */
16411debfc3dSmrg static sbitmap *antloc;
16421debfc3dSmrg
16431debfc3dSmrg /* Nonzero for expressions where this block is an optimal computation
16441debfc3dSmrg point. */
16451debfc3dSmrg static sbitmap *pre_optimal;
16461debfc3dSmrg
16471debfc3dSmrg /* Nonzero for expressions which are redundant in a particular block. */
16481debfc3dSmrg static sbitmap *pre_redundant;
16491debfc3dSmrg
16501debfc3dSmrg /* Nonzero for expressions which should be inserted on a specific edge. */
16511debfc3dSmrg static sbitmap *pre_insert_map;
16521debfc3dSmrg
16531debfc3dSmrg /* Nonzero for expressions which should be deleted in a specific block. */
16541debfc3dSmrg static sbitmap *pre_delete_map;
16551debfc3dSmrg
16561debfc3dSmrg /* Allocate vars used for PRE analysis. */
16571debfc3dSmrg
16581debfc3dSmrg static void
alloc_pre_mem(int n_blocks,int n_exprs)16591debfc3dSmrg alloc_pre_mem (int n_blocks, int n_exprs)
16601debfc3dSmrg {
16611debfc3dSmrg transp = sbitmap_vector_alloc (n_blocks, n_exprs);
16621debfc3dSmrg comp = sbitmap_vector_alloc (n_blocks, n_exprs);
16631debfc3dSmrg antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
16641debfc3dSmrg
16651debfc3dSmrg pre_optimal = NULL;
16661debfc3dSmrg pre_redundant = NULL;
16671debfc3dSmrg pre_insert_map = NULL;
16681debfc3dSmrg pre_delete_map = NULL;
16691debfc3dSmrg ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
16701debfc3dSmrg
16711debfc3dSmrg /* pre_insert and pre_delete are allocated later. */
16721debfc3dSmrg }
16731debfc3dSmrg
16741debfc3dSmrg /* Free vars used for PRE analysis. */
16751debfc3dSmrg
16761debfc3dSmrg static void
free_pre_mem(void)16771debfc3dSmrg free_pre_mem (void)
16781debfc3dSmrg {
16791debfc3dSmrg sbitmap_vector_free (transp);
16801debfc3dSmrg sbitmap_vector_free (comp);
16811debfc3dSmrg
16821debfc3dSmrg /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
16831debfc3dSmrg
16841debfc3dSmrg if (pre_optimal)
16851debfc3dSmrg sbitmap_vector_free (pre_optimal);
16861debfc3dSmrg if (pre_redundant)
16871debfc3dSmrg sbitmap_vector_free (pre_redundant);
16881debfc3dSmrg if (pre_insert_map)
16891debfc3dSmrg sbitmap_vector_free (pre_insert_map);
16901debfc3dSmrg if (pre_delete_map)
16911debfc3dSmrg sbitmap_vector_free (pre_delete_map);
16921debfc3dSmrg
16931debfc3dSmrg transp = comp = NULL;
16941debfc3dSmrg pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
16951debfc3dSmrg }
16961debfc3dSmrg
16971debfc3dSmrg /* Remove certain expressions from anticipatable and transparent
16981debfc3dSmrg sets of basic blocks that have incoming abnormal edge.
16991debfc3dSmrg For PRE remove potentially trapping expressions to avoid placing
17001debfc3dSmrg them on abnormal edges. For hoisting remove memory references that
17011debfc3dSmrg can be clobbered by calls. */
17021debfc3dSmrg
17031debfc3dSmrg static void
prune_expressions(bool pre_p)17041debfc3dSmrg prune_expressions (bool pre_p)
17051debfc3dSmrg {
17061debfc3dSmrg struct gcse_expr *expr;
17071debfc3dSmrg unsigned int ui;
17081debfc3dSmrg basic_block bb;
17091debfc3dSmrg
17101debfc3dSmrg auto_sbitmap prune_exprs (expr_hash_table.n_elems);
17111debfc3dSmrg bitmap_clear (prune_exprs);
17121debfc3dSmrg for (ui = 0; ui < expr_hash_table.size; ui++)
17131debfc3dSmrg {
17141debfc3dSmrg for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
17151debfc3dSmrg {
17161debfc3dSmrg /* Note potentially trapping expressions. */
17171debfc3dSmrg if (may_trap_p (expr->expr))
17181debfc3dSmrg {
17191debfc3dSmrg bitmap_set_bit (prune_exprs, expr->bitmap_index);
17201debfc3dSmrg continue;
17211debfc3dSmrg }
17221debfc3dSmrg
17231debfc3dSmrg if (!pre_p && contains_mem_rtx_p (expr->expr))
17241debfc3dSmrg /* Note memory references that can be clobbered by a call.
17251debfc3dSmrg We do not split abnormal edges in hoisting, so would
17261debfc3dSmrg a memory reference get hoisted along an abnormal edge,
17271debfc3dSmrg it would be placed /before/ the call. Therefore, only
17281debfc3dSmrg constant memory references can be hoisted along abnormal
17291debfc3dSmrg edges. */
17301debfc3dSmrg {
17311debfc3dSmrg rtx x = expr->expr;
17321debfc3dSmrg
17331debfc3dSmrg /* Common cases where we might find the MEM which may allow us
17341debfc3dSmrg to avoid pruning the expression. */
17351debfc3dSmrg while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
17361debfc3dSmrg x = XEXP (x, 0);
17371debfc3dSmrg
17381debfc3dSmrg /* If we found the MEM, go ahead and look at it to see if it has
17391debfc3dSmrg properties that allow us to avoid pruning its expression out
17401debfc3dSmrg of the tables. */
17411debfc3dSmrg if (MEM_P (x))
17421debfc3dSmrg {
17431debfc3dSmrg if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
17441debfc3dSmrg && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
17451debfc3dSmrg continue;
17461debfc3dSmrg
17471debfc3dSmrg if (MEM_READONLY_P (x)
17481debfc3dSmrg && !MEM_VOLATILE_P (x)
17491debfc3dSmrg && MEM_NOTRAP_P (x))
17501debfc3dSmrg /* Constant memory reference, e.g., a PIC address. */
17511debfc3dSmrg continue;
17521debfc3dSmrg }
17531debfc3dSmrg
17541debfc3dSmrg /* ??? Optimally, we would use interprocedural alias
17551debfc3dSmrg analysis to determine if this mem is actually killed
17561debfc3dSmrg by this call. */
17571debfc3dSmrg
17581debfc3dSmrg bitmap_set_bit (prune_exprs, expr->bitmap_index);
17591debfc3dSmrg }
17601debfc3dSmrg }
17611debfc3dSmrg }
17621debfc3dSmrg
17631debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
17641debfc3dSmrg {
17651debfc3dSmrg edge e;
17661debfc3dSmrg edge_iterator ei;
17671debfc3dSmrg
17681debfc3dSmrg /* If the current block is the destination of an abnormal edge, we
17691debfc3dSmrg kill all trapping (for PRE) and memory (for hoist) expressions
17701debfc3dSmrg because we won't be able to properly place the instruction on
17711debfc3dSmrg the edge. So make them neither anticipatable nor transparent.
17721debfc3dSmrg This is fairly conservative.
17731debfc3dSmrg
17741debfc3dSmrg ??? For hoisting it may be necessary to check for set-and-jump
17751debfc3dSmrg instructions here, not just for abnormal edges. The general problem
17761debfc3dSmrg is that when an expression cannot not be placed right at the end of
17771debfc3dSmrg a basic block we should account for any side-effects of a subsequent
17781debfc3dSmrg jump instructions that could clobber the expression. It would
17791debfc3dSmrg be best to implement this check along the lines of
17801debfc3dSmrg should_hoist_expr_to_dom where the target block is already known
17811debfc3dSmrg and, hence, there's no need to conservatively prune expressions on
17821debfc3dSmrg "intermediate" set-and-jump instructions. */
17831debfc3dSmrg FOR_EACH_EDGE (e, ei, bb->preds)
17841debfc3dSmrg if ((e->flags & EDGE_ABNORMAL)
17851debfc3dSmrg && (pre_p || CALL_P (BB_END (e->src))))
17861debfc3dSmrg {
17871debfc3dSmrg bitmap_and_compl (antloc[bb->index],
17881debfc3dSmrg antloc[bb->index], prune_exprs);
17891debfc3dSmrg bitmap_and_compl (transp[bb->index],
17901debfc3dSmrg transp[bb->index], prune_exprs);
17911debfc3dSmrg break;
17921debfc3dSmrg }
17931debfc3dSmrg }
17941debfc3dSmrg }
17951debfc3dSmrg
17961debfc3dSmrg /* It may be necessary to insert a large number of insns on edges to
17971debfc3dSmrg make the existing occurrences of expressions fully redundant. This
17981debfc3dSmrg routine examines the set of insertions and deletions and if the ratio
17991debfc3dSmrg of insertions to deletions is too high for a particular expression, then
18001debfc3dSmrg the expression is removed from the insertion/deletion sets.
18011debfc3dSmrg
18021debfc3dSmrg N_ELEMS is the number of elements in the hash table. */
18031debfc3dSmrg
18041debfc3dSmrg static void
prune_insertions_deletions(int n_elems)18051debfc3dSmrg prune_insertions_deletions (int n_elems)
18061debfc3dSmrg {
18071debfc3dSmrg sbitmap_iterator sbi;
18081debfc3dSmrg
18091debfc3dSmrg /* We always use I to iterate over blocks/edges and J to iterate over
18101debfc3dSmrg expressions. */
18111debfc3dSmrg unsigned int i, j;
18121debfc3dSmrg
18131debfc3dSmrg /* Counts for the number of times an expression needs to be inserted and
18141debfc3dSmrg number of times an expression can be removed as a result. */
18151debfc3dSmrg int *insertions = GCNEWVEC (int, n_elems);
18161debfc3dSmrg int *deletions = GCNEWVEC (int, n_elems);
18171debfc3dSmrg
18181debfc3dSmrg /* Set of expressions which require too many insertions relative to
18191debfc3dSmrg the number of deletions achieved. We will prune these out of the
18201debfc3dSmrg insertion/deletion sets. */
18211debfc3dSmrg auto_sbitmap prune_exprs (n_elems);
18221debfc3dSmrg bitmap_clear (prune_exprs);
18231debfc3dSmrg
18241debfc3dSmrg /* Iterate over the edges counting the number of times each expression
18251debfc3dSmrg needs to be inserted. */
18261debfc3dSmrg for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
18271debfc3dSmrg {
18281debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
18291debfc3dSmrg insertions[j]++;
18301debfc3dSmrg }
18311debfc3dSmrg
18321debfc3dSmrg /* Similarly for deletions, but those occur in blocks rather than on
18331debfc3dSmrg edges. */
18341debfc3dSmrg for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
18351debfc3dSmrg {
18361debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
18371debfc3dSmrg deletions[j]++;
18381debfc3dSmrg }
18391debfc3dSmrg
18401debfc3dSmrg /* Now that we have accurate counts, iterate over the elements in the
18411debfc3dSmrg hash table and see if any need too many insertions relative to the
18421debfc3dSmrg number of evaluations that can be removed. If so, mark them in
18431debfc3dSmrg PRUNE_EXPRS. */
18441debfc3dSmrg for (j = 0; j < (unsigned) n_elems; j++)
18451debfc3dSmrg if (deletions[j]
1846*8feb0f0bSmrg && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio)
18471debfc3dSmrg bitmap_set_bit (prune_exprs, j);
18481debfc3dSmrg
18491debfc3dSmrg /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
18501debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
18511debfc3dSmrg {
18521debfc3dSmrg for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
18531debfc3dSmrg bitmap_clear_bit (pre_insert_map[i], j);
18541debfc3dSmrg
18551debfc3dSmrg for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
18561debfc3dSmrg bitmap_clear_bit (pre_delete_map[i], j);
18571debfc3dSmrg }
18581debfc3dSmrg
18591debfc3dSmrg free (insertions);
18601debfc3dSmrg free (deletions);
18611debfc3dSmrg }
18621debfc3dSmrg
18631debfc3dSmrg /* Top level routine to do the dataflow analysis needed by PRE. */
18641debfc3dSmrg
18651debfc3dSmrg static struct edge_list *
compute_pre_data(void)18661debfc3dSmrg compute_pre_data (void)
18671debfc3dSmrg {
18681debfc3dSmrg struct edge_list *edge_list;
18691debfc3dSmrg basic_block bb;
18701debfc3dSmrg
18711debfc3dSmrg compute_local_properties (transp, comp, antloc, &expr_hash_table);
18721debfc3dSmrg prune_expressions (true);
18731debfc3dSmrg bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun));
18741debfc3dSmrg
18751debfc3dSmrg /* Compute ae_kill for each basic block using:
18761debfc3dSmrg
18771debfc3dSmrg ~(TRANSP | COMP)
18781debfc3dSmrg */
18791debfc3dSmrg
18801debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
18811debfc3dSmrg {
18821debfc3dSmrg bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
18831debfc3dSmrg bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
18841debfc3dSmrg }
18851debfc3dSmrg
18861debfc3dSmrg edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
18871debfc3dSmrg ae_kill, &pre_insert_map, &pre_delete_map);
18881debfc3dSmrg sbitmap_vector_free (antloc);
18891debfc3dSmrg antloc = NULL;
18901debfc3dSmrg sbitmap_vector_free (ae_kill);
18911debfc3dSmrg ae_kill = NULL;
18921debfc3dSmrg
18931debfc3dSmrg prune_insertions_deletions (expr_hash_table.n_elems);
18941debfc3dSmrg
18951debfc3dSmrg return edge_list;
18961debfc3dSmrg }
18971debfc3dSmrg
18981debfc3dSmrg /* PRE utilities */
18991debfc3dSmrg
19001debfc3dSmrg /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
19011debfc3dSmrg block BB.
19021debfc3dSmrg
19031debfc3dSmrg VISITED is a pointer to a working buffer for tracking which BB's have
19041debfc3dSmrg been visited. It is NULL for the top-level call.
19051debfc3dSmrg
19061debfc3dSmrg We treat reaching expressions that go through blocks containing the same
19071debfc3dSmrg reaching expression as "not reaching". E.g. if EXPR is generated in blocks
19081debfc3dSmrg 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
19091debfc3dSmrg 2 as not reaching. The intent is to improve the probability of finding
19101debfc3dSmrg only one reaching expression and to reduce register lifetimes by picking
19111debfc3dSmrg the closest such expression. */
19121debfc3dSmrg
19131debfc3dSmrg static int
pre_expr_reaches_here_p_work(basic_block occr_bb,struct gcse_expr * expr,basic_block bb,char * visited)19141debfc3dSmrg pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr,
19151debfc3dSmrg basic_block bb, char *visited)
19161debfc3dSmrg {
19171debfc3dSmrg edge pred;
19181debfc3dSmrg edge_iterator ei;
19191debfc3dSmrg
19201debfc3dSmrg FOR_EACH_EDGE (pred, ei, bb->preds)
19211debfc3dSmrg {
19221debfc3dSmrg basic_block pred_bb = pred->src;
19231debfc3dSmrg
19241debfc3dSmrg if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
19251debfc3dSmrg /* Has predecessor has already been visited? */
19261debfc3dSmrg || visited[pred_bb->index])
19271debfc3dSmrg ;/* Nothing to do. */
19281debfc3dSmrg
19291debfc3dSmrg /* Does this predecessor generate this expression? */
19301debfc3dSmrg else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
19311debfc3dSmrg {
19321debfc3dSmrg /* Is this the occurrence we're looking for?
19331debfc3dSmrg Note that there's only one generating occurrence per block
19341debfc3dSmrg so we just need to check the block number. */
19351debfc3dSmrg if (occr_bb == pred_bb)
19361debfc3dSmrg return 1;
19371debfc3dSmrg
19381debfc3dSmrg visited[pred_bb->index] = 1;
19391debfc3dSmrg }
19401debfc3dSmrg /* Ignore this predecessor if it kills the expression. */
19411debfc3dSmrg else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
19421debfc3dSmrg visited[pred_bb->index] = 1;
19431debfc3dSmrg
19441debfc3dSmrg /* Neither gen nor kill. */
19451debfc3dSmrg else
19461debfc3dSmrg {
19471debfc3dSmrg visited[pred_bb->index] = 1;
19481debfc3dSmrg if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
19491debfc3dSmrg return 1;
19501debfc3dSmrg }
19511debfc3dSmrg }
19521debfc3dSmrg
19531debfc3dSmrg /* All paths have been checked. */
19541debfc3dSmrg return 0;
19551debfc3dSmrg }
19561debfc3dSmrg
19571debfc3dSmrg /* The wrapper for pre_expr_reaches_here_work that ensures that any
19581debfc3dSmrg memory allocated for that function is returned. */
19591debfc3dSmrg
19601debfc3dSmrg static int
pre_expr_reaches_here_p(basic_block occr_bb,struct gcse_expr * expr,basic_block bb)19611debfc3dSmrg pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr, basic_block bb)
19621debfc3dSmrg {
19631debfc3dSmrg int rval;
19641debfc3dSmrg char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun));
19651debfc3dSmrg
19661debfc3dSmrg rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
19671debfc3dSmrg
19681debfc3dSmrg free (visited);
19691debfc3dSmrg return rval;
19701debfc3dSmrg }
19711debfc3dSmrg
1972a05ac97eSmrg /* Generate RTL to copy an EXP to REG and return it. */
19731debfc3dSmrg
1974a05ac97eSmrg rtx_insn *
prepare_copy_insn(rtx reg,rtx exp)1975a05ac97eSmrg prepare_copy_insn (rtx reg, rtx exp)
19761debfc3dSmrg {
19771debfc3dSmrg rtx_insn *pat;
19781debfc3dSmrg
19791debfc3dSmrg start_sequence ();
19801debfc3dSmrg
19811debfc3dSmrg /* If the expression is something that's an operand, like a constant,
19821debfc3dSmrg just copy it to a register. */
19831debfc3dSmrg if (general_operand (exp, GET_MODE (reg)))
19841debfc3dSmrg emit_move_insn (reg, exp);
19851debfc3dSmrg
19861debfc3dSmrg /* Otherwise, make a new insn to compute this expression and make sure the
19871debfc3dSmrg insn will be recognized (this also adds any needed CLOBBERs). */
19881debfc3dSmrg else
19891debfc3dSmrg {
19901debfc3dSmrg rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp));
19911debfc3dSmrg
19921debfc3dSmrg if (insn_invalid_p (insn, false))
19931debfc3dSmrg gcc_unreachable ();
19941debfc3dSmrg }
19951debfc3dSmrg
19961debfc3dSmrg pat = get_insns ();
19971debfc3dSmrg end_sequence ();
19981debfc3dSmrg
19991debfc3dSmrg return pat;
20001debfc3dSmrg }
20011debfc3dSmrg
2002a05ac97eSmrg /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2003a05ac97eSmrg
2004a05ac97eSmrg static rtx_insn *
process_insert_insn(struct gcse_expr * expr)2005a05ac97eSmrg process_insert_insn (struct gcse_expr *expr)
2006a05ac97eSmrg {
2007a05ac97eSmrg rtx reg = expr->reaching_reg;
2008a05ac97eSmrg /* Copy the expression to make sure we don't have any sharing issues. */
2009a05ac97eSmrg rtx exp = copy_rtx (expr->expr);
2010a05ac97eSmrg
2011a05ac97eSmrg return prepare_copy_insn (reg, exp);
2012a05ac97eSmrg }
2013a05ac97eSmrg
20141debfc3dSmrg /* Add EXPR to the end of basic block BB.
20151debfc3dSmrg
20161debfc3dSmrg This is used by both the PRE and code hoisting. */
20171debfc3dSmrg
20181debfc3dSmrg static void
insert_insn_end_basic_block(struct gcse_expr * expr,basic_block bb)20191debfc3dSmrg insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb)
20201debfc3dSmrg {
20211debfc3dSmrg rtx_insn *insn = BB_END (bb);
20221debfc3dSmrg rtx_insn *new_insn;
20231debfc3dSmrg rtx reg = expr->reaching_reg;
20241debfc3dSmrg int regno = REGNO (reg);
20251debfc3dSmrg rtx_insn *pat, *pat_end;
20261debfc3dSmrg
20271debfc3dSmrg pat = process_insert_insn (expr);
20281debfc3dSmrg gcc_assert (pat && INSN_P (pat));
20291debfc3dSmrg
20301debfc3dSmrg pat_end = pat;
20311debfc3dSmrg while (NEXT_INSN (pat_end) != NULL_RTX)
20321debfc3dSmrg pat_end = NEXT_INSN (pat_end);
20331debfc3dSmrg
20341debfc3dSmrg /* If the last insn is a jump, insert EXPR in front [taking care to
20351debfc3dSmrg handle cc0, etc. properly]. Similarly we need to care trapping
20361debfc3dSmrg instructions in presence of non-call exceptions. */
20371debfc3dSmrg
20381debfc3dSmrg if (JUMP_P (insn)
20391debfc3dSmrg || (NONJUMP_INSN_P (insn)
20401debfc3dSmrg && (!single_succ_p (bb)
20411debfc3dSmrg || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
20421debfc3dSmrg {
20431debfc3dSmrg /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
20441debfc3dSmrg if cc0 isn't set. */
20451debfc3dSmrg if (HAVE_cc0)
20461debfc3dSmrg {
20471debfc3dSmrg rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
20481debfc3dSmrg if (note)
20491debfc3dSmrg insn = safe_as_a <rtx_insn *> (XEXP (note, 0));
20501debfc3dSmrg else
20511debfc3dSmrg {
20521debfc3dSmrg rtx_insn *maybe_cc0_setter = prev_nonnote_insn (insn);
20531debfc3dSmrg if (maybe_cc0_setter
20541debfc3dSmrg && INSN_P (maybe_cc0_setter)
20551debfc3dSmrg && sets_cc0_p (PATTERN (maybe_cc0_setter)))
20561debfc3dSmrg insn = maybe_cc0_setter;
20571debfc3dSmrg }
20581debfc3dSmrg }
20591debfc3dSmrg
20601debfc3dSmrg /* FIXME: What if something in cc0/jump uses value set in new insn? */
20611debfc3dSmrg new_insn = emit_insn_before_noloc (pat, insn, bb);
20621debfc3dSmrg }
20631debfc3dSmrg
20641debfc3dSmrg /* Likewise if the last insn is a call, as will happen in the presence
20651debfc3dSmrg of exception handling. */
20661debfc3dSmrg else if (CALL_P (insn)
20671debfc3dSmrg && (!single_succ_p (bb)
20681debfc3dSmrg || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
20691debfc3dSmrg {
20701debfc3dSmrg /* Keeping in mind targets with small register classes and parameters
20711debfc3dSmrg in registers, we search backward and place the instructions before
20721debfc3dSmrg the first parameter is loaded. Do this for everyone for consistency
20731debfc3dSmrg and a presumption that we'll get better code elsewhere as well. */
20741debfc3dSmrg
20751debfc3dSmrg /* Since different machines initialize their parameter registers
20761debfc3dSmrg in different orders, assume nothing. Collect the set of all
20771debfc3dSmrg parameter registers. */
20781debfc3dSmrg insn = find_first_parameter_load (insn, BB_HEAD (bb));
20791debfc3dSmrg
20801debfc3dSmrg /* If we found all the parameter loads, then we want to insert
20811debfc3dSmrg before the first parameter load.
20821debfc3dSmrg
20831debfc3dSmrg If we did not find all the parameter loads, then we might have
20841debfc3dSmrg stopped on the head of the block, which could be a CODE_LABEL.
20851debfc3dSmrg If we inserted before the CODE_LABEL, then we would be putting
20861debfc3dSmrg the insn in the wrong basic block. In that case, put the insn
20871debfc3dSmrg after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
20881debfc3dSmrg while (LABEL_P (insn)
20891debfc3dSmrg || NOTE_INSN_BASIC_BLOCK_P (insn))
20901debfc3dSmrg insn = NEXT_INSN (insn);
20911debfc3dSmrg
20921debfc3dSmrg new_insn = emit_insn_before_noloc (pat, insn, bb);
20931debfc3dSmrg }
20941debfc3dSmrg else
20951debfc3dSmrg new_insn = emit_insn_after_noloc (pat, insn, bb);
20961debfc3dSmrg
20971debfc3dSmrg while (1)
20981debfc3dSmrg {
20991debfc3dSmrg if (INSN_P (pat))
21001debfc3dSmrg add_label_notes (PATTERN (pat), new_insn);
21011debfc3dSmrg if (pat == pat_end)
21021debfc3dSmrg break;
21031debfc3dSmrg pat = NEXT_INSN (pat);
21041debfc3dSmrg }
21051debfc3dSmrg
21061debfc3dSmrg gcse_create_count++;
21071debfc3dSmrg
21081debfc3dSmrg if (dump_file)
21091debfc3dSmrg {
21101debfc3dSmrg fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
21111debfc3dSmrg bb->index, INSN_UID (new_insn));
21121debfc3dSmrg fprintf (dump_file, "copying expression %d to reg %d\n",
21131debfc3dSmrg expr->bitmap_index, regno);
21141debfc3dSmrg }
21151debfc3dSmrg }
21161debfc3dSmrg
21171debfc3dSmrg /* Insert partially redundant expressions on edges in the CFG to make
21181debfc3dSmrg the expressions fully redundant. */
21191debfc3dSmrg
21201debfc3dSmrg static int
pre_edge_insert(struct edge_list * edge_list,struct gcse_expr ** index_map)21211debfc3dSmrg pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map)
21221debfc3dSmrg {
21231debfc3dSmrg int e, i, j, num_edges, set_size, did_insert = 0;
21241debfc3dSmrg sbitmap *inserted;
21251debfc3dSmrg
21261debfc3dSmrg /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
21271debfc3dSmrg if it reaches any of the deleted expressions. */
21281debfc3dSmrg
21291debfc3dSmrg set_size = pre_insert_map[0]->size;
21301debfc3dSmrg num_edges = NUM_EDGES (edge_list);
21311debfc3dSmrg inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
21321debfc3dSmrg bitmap_vector_clear (inserted, num_edges);
21331debfc3dSmrg
21341debfc3dSmrg for (e = 0; e < num_edges; e++)
21351debfc3dSmrg {
21361debfc3dSmrg int indx;
21371debfc3dSmrg basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
21381debfc3dSmrg
21391debfc3dSmrg for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
21401debfc3dSmrg {
21411debfc3dSmrg SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
21421debfc3dSmrg
21431debfc3dSmrg for (j = indx;
21441debfc3dSmrg insert && j < (int) expr_hash_table.n_elems;
21451debfc3dSmrg j++, insert >>= 1)
21461debfc3dSmrg if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
21471debfc3dSmrg {
21481debfc3dSmrg struct gcse_expr *expr = index_map[j];
21491debfc3dSmrg struct gcse_occr *occr;
21501debfc3dSmrg
21511debfc3dSmrg /* Now look at each deleted occurrence of this expression. */
21521debfc3dSmrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
21531debfc3dSmrg {
21541debfc3dSmrg if (! occr->deleted_p)
21551debfc3dSmrg continue;
21561debfc3dSmrg
21571debfc3dSmrg /* Insert this expression on this edge if it would
21581debfc3dSmrg reach the deleted occurrence in BB. */
21591debfc3dSmrg if (!bitmap_bit_p (inserted[e], j))
21601debfc3dSmrg {
21611debfc3dSmrg rtx_insn *insn;
21621debfc3dSmrg edge eg = INDEX_EDGE (edge_list, e);
21631debfc3dSmrg
21641debfc3dSmrg /* We can't insert anything on an abnormal and
21651debfc3dSmrg critical edge, so we insert the insn at the end of
21661debfc3dSmrg the previous block. There are several alternatives
21671debfc3dSmrg detailed in Morgans book P277 (sec 10.5) for
21681debfc3dSmrg handling this situation. This one is easiest for
21691debfc3dSmrg now. */
21701debfc3dSmrg
21711debfc3dSmrg if (eg->flags & EDGE_ABNORMAL)
21721debfc3dSmrg insert_insn_end_basic_block (index_map[j], bb);
21731debfc3dSmrg else
21741debfc3dSmrg {
21751debfc3dSmrg insn = process_insert_insn (index_map[j]);
21761debfc3dSmrg insert_insn_on_edge (insn, eg);
21771debfc3dSmrg }
21781debfc3dSmrg
21791debfc3dSmrg if (dump_file)
21801debfc3dSmrg {
21811debfc3dSmrg fprintf (dump_file, "PRE: edge (%d,%d), ",
21821debfc3dSmrg bb->index,
21831debfc3dSmrg INDEX_EDGE_SUCC_BB (edge_list, e)->index);
21841debfc3dSmrg fprintf (dump_file, "copy expression %d\n",
21851debfc3dSmrg expr->bitmap_index);
21861debfc3dSmrg }
21871debfc3dSmrg
21881debfc3dSmrg update_ld_motion_stores (expr);
21891debfc3dSmrg bitmap_set_bit (inserted[e], j);
21901debfc3dSmrg did_insert = 1;
21911debfc3dSmrg gcse_create_count++;
21921debfc3dSmrg }
21931debfc3dSmrg }
21941debfc3dSmrg }
21951debfc3dSmrg }
21961debfc3dSmrg }
21971debfc3dSmrg
21981debfc3dSmrg sbitmap_vector_free (inserted);
21991debfc3dSmrg return did_insert;
22001debfc3dSmrg }
22011debfc3dSmrg
22021debfc3dSmrg /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
22031debfc3dSmrg Given "old_reg <- expr" (INSN), instead of adding after it
22041debfc3dSmrg reaching_reg <- old_reg
22051debfc3dSmrg it's better to do the following:
22061debfc3dSmrg reaching_reg <- expr
22071debfc3dSmrg old_reg <- reaching_reg
22081debfc3dSmrg because this way copy propagation can discover additional PRE
22091debfc3dSmrg opportunities. But if this fails, we try the old way.
22101debfc3dSmrg When "expr" is a store, i.e.
22111debfc3dSmrg given "MEM <- old_reg", instead of adding after it
22121debfc3dSmrg reaching_reg <- old_reg
22131debfc3dSmrg it's better to add it before as follows:
22141debfc3dSmrg reaching_reg <- old_reg
22151debfc3dSmrg MEM <- reaching_reg. */
22161debfc3dSmrg
22171debfc3dSmrg static void
pre_insert_copy_insn(struct gcse_expr * expr,rtx_insn * insn)22181debfc3dSmrg pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn)
22191debfc3dSmrg {
22201debfc3dSmrg rtx reg = expr->reaching_reg;
22211debfc3dSmrg int regno = REGNO (reg);
22221debfc3dSmrg int indx = expr->bitmap_index;
22231debfc3dSmrg rtx pat = PATTERN (insn);
22241debfc3dSmrg rtx set, first_set;
22251debfc3dSmrg rtx_insn *new_insn;
22261debfc3dSmrg rtx old_reg;
22271debfc3dSmrg int i;
22281debfc3dSmrg
22291debfc3dSmrg /* This block matches the logic in hash_scan_insn. */
22301debfc3dSmrg switch (GET_CODE (pat))
22311debfc3dSmrg {
22321debfc3dSmrg case SET:
22331debfc3dSmrg set = pat;
22341debfc3dSmrg break;
22351debfc3dSmrg
22361debfc3dSmrg case PARALLEL:
22371debfc3dSmrg /* Search through the parallel looking for the set whose
22381debfc3dSmrg source was the expression that we're interested in. */
22391debfc3dSmrg first_set = NULL_RTX;
22401debfc3dSmrg set = NULL_RTX;
22411debfc3dSmrg for (i = 0; i < XVECLEN (pat, 0); i++)
22421debfc3dSmrg {
22431debfc3dSmrg rtx x = XVECEXP (pat, 0, i);
22441debfc3dSmrg if (GET_CODE (x) == SET)
22451debfc3dSmrg {
22461debfc3dSmrg /* If the source was a REG_EQUAL or REG_EQUIV note, we
22471debfc3dSmrg may not find an equivalent expression, but in this
22481debfc3dSmrg case the PARALLEL will have a single set. */
22491debfc3dSmrg if (first_set == NULL_RTX)
22501debfc3dSmrg first_set = x;
22511debfc3dSmrg if (expr_equiv_p (SET_SRC (x), expr->expr))
22521debfc3dSmrg {
22531debfc3dSmrg set = x;
22541debfc3dSmrg break;
22551debfc3dSmrg }
22561debfc3dSmrg }
22571debfc3dSmrg }
22581debfc3dSmrg
22591debfc3dSmrg gcc_assert (first_set);
22601debfc3dSmrg if (set == NULL_RTX)
22611debfc3dSmrg set = first_set;
22621debfc3dSmrg break;
22631debfc3dSmrg
22641debfc3dSmrg default:
22651debfc3dSmrg gcc_unreachable ();
22661debfc3dSmrg }
22671debfc3dSmrg
22681debfc3dSmrg if (REG_P (SET_DEST (set)))
22691debfc3dSmrg {
22701debfc3dSmrg old_reg = SET_DEST (set);
22711debfc3dSmrg /* Check if we can modify the set destination in the original insn. */
22721debfc3dSmrg if (validate_change (insn, &SET_DEST (set), reg, 0))
22731debfc3dSmrg {
22741debfc3dSmrg new_insn = gen_move_insn (old_reg, reg);
22751debfc3dSmrg new_insn = emit_insn_after (new_insn, insn);
22761debfc3dSmrg }
22771debfc3dSmrg else
22781debfc3dSmrg {
22791debfc3dSmrg new_insn = gen_move_insn (reg, old_reg);
22801debfc3dSmrg new_insn = emit_insn_after (new_insn, insn);
22811debfc3dSmrg }
22821debfc3dSmrg }
22831debfc3dSmrg else /* This is possible only in case of a store to memory. */
22841debfc3dSmrg {
22851debfc3dSmrg old_reg = SET_SRC (set);
22861debfc3dSmrg new_insn = gen_move_insn (reg, old_reg);
22871debfc3dSmrg
22881debfc3dSmrg /* Check if we can modify the set source in the original insn. */
22891debfc3dSmrg if (validate_change (insn, &SET_SRC (set), reg, 0))
22901debfc3dSmrg new_insn = emit_insn_before (new_insn, insn);
22911debfc3dSmrg else
22921debfc3dSmrg new_insn = emit_insn_after (new_insn, insn);
22931debfc3dSmrg }
22941debfc3dSmrg
22951debfc3dSmrg gcse_create_count++;
22961debfc3dSmrg
22971debfc3dSmrg if (dump_file)
22981debfc3dSmrg fprintf (dump_file,
22991debfc3dSmrg "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
23001debfc3dSmrg BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
23011debfc3dSmrg INSN_UID (insn), regno);
23021debfc3dSmrg }
23031debfc3dSmrg
23041debfc3dSmrg /* Copy available expressions that reach the redundant expression
23051debfc3dSmrg to `reaching_reg'. */
23061debfc3dSmrg
23071debfc3dSmrg static void
pre_insert_copies(void)23081debfc3dSmrg pre_insert_copies (void)
23091debfc3dSmrg {
23101debfc3dSmrg unsigned int i, added_copy;
23111debfc3dSmrg struct gcse_expr *expr;
23121debfc3dSmrg struct gcse_occr *occr;
23131debfc3dSmrg struct gcse_occr *avail;
23141debfc3dSmrg
23151debfc3dSmrg /* For each available expression in the table, copy the result to
23161debfc3dSmrg `reaching_reg' if the expression reaches a deleted one.
23171debfc3dSmrg
23181debfc3dSmrg ??? The current algorithm is rather brute force.
23191debfc3dSmrg Need to do some profiling. */
23201debfc3dSmrg
23211debfc3dSmrg for (i = 0; i < expr_hash_table.size; i++)
23221debfc3dSmrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
23231debfc3dSmrg {
23241debfc3dSmrg /* If the basic block isn't reachable, PPOUT will be TRUE. However,
23251debfc3dSmrg we don't want to insert a copy here because the expression may not
23261debfc3dSmrg really be redundant. So only insert an insn if the expression was
23271debfc3dSmrg deleted. This test also avoids further processing if the
23281debfc3dSmrg expression wasn't deleted anywhere. */
23291debfc3dSmrg if (expr->reaching_reg == NULL)
23301debfc3dSmrg continue;
23311debfc3dSmrg
23321debfc3dSmrg /* Set when we add a copy for that expression. */
23331debfc3dSmrg added_copy = 0;
23341debfc3dSmrg
23351debfc3dSmrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
23361debfc3dSmrg {
23371debfc3dSmrg if (! occr->deleted_p)
23381debfc3dSmrg continue;
23391debfc3dSmrg
23401debfc3dSmrg for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
23411debfc3dSmrg {
23421debfc3dSmrg rtx_insn *insn = avail->insn;
23431debfc3dSmrg
23441debfc3dSmrg /* No need to handle this one if handled already. */
23451debfc3dSmrg if (avail->copied_p)
23461debfc3dSmrg continue;
23471debfc3dSmrg
23481debfc3dSmrg /* Don't handle this one if it's a redundant one. */
23491debfc3dSmrg if (insn->deleted ())
23501debfc3dSmrg continue;
23511debfc3dSmrg
23521debfc3dSmrg /* Or if the expression doesn't reach the deleted one. */
23531debfc3dSmrg if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
23541debfc3dSmrg expr,
23551debfc3dSmrg BLOCK_FOR_INSN (occr->insn)))
23561debfc3dSmrg continue;
23571debfc3dSmrg
23581debfc3dSmrg added_copy = 1;
23591debfc3dSmrg
23601debfc3dSmrg /* Copy the result of avail to reaching_reg. */
23611debfc3dSmrg pre_insert_copy_insn (expr, insn);
23621debfc3dSmrg avail->copied_p = 1;
23631debfc3dSmrg }
23641debfc3dSmrg }
23651debfc3dSmrg
23661debfc3dSmrg if (added_copy)
23671debfc3dSmrg update_ld_motion_stores (expr);
23681debfc3dSmrg }
23691debfc3dSmrg }
23701debfc3dSmrg
23711debfc3dSmrg struct set_data
23721debfc3dSmrg {
23731debfc3dSmrg rtx_insn *insn;
23741debfc3dSmrg const_rtx set;
23751debfc3dSmrg int nsets;
23761debfc3dSmrg };
23771debfc3dSmrg
23781debfc3dSmrg /* Increment number of sets and record set in DATA. */
23791debfc3dSmrg
23801debfc3dSmrg static void
record_set_data(rtx dest,const_rtx set,void * data)23811debfc3dSmrg record_set_data (rtx dest, const_rtx set, void *data)
23821debfc3dSmrg {
23831debfc3dSmrg struct set_data *s = (struct set_data *)data;
23841debfc3dSmrg
23851debfc3dSmrg if (GET_CODE (set) == SET)
23861debfc3dSmrg {
23871debfc3dSmrg /* We allow insns having multiple sets, where all but one are
23881debfc3dSmrg dead as single set insns. In the common case only a single
23891debfc3dSmrg set is present, so we want to avoid checking for REG_UNUSED
23901debfc3dSmrg notes unless necessary. */
23911debfc3dSmrg if (s->nsets == 1
23921debfc3dSmrg && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set))
23931debfc3dSmrg && !side_effects_p (s->set))
23941debfc3dSmrg s->nsets = 0;
23951debfc3dSmrg
23961debfc3dSmrg if (!s->nsets)
23971debfc3dSmrg {
23981debfc3dSmrg /* Record this set. */
23991debfc3dSmrg s->nsets += 1;
24001debfc3dSmrg s->set = set;
24011debfc3dSmrg }
24021debfc3dSmrg else if (!find_reg_note (s->insn, REG_UNUSED, dest)
24031debfc3dSmrg || side_effects_p (set))
24041debfc3dSmrg s->nsets += 1;
24051debfc3dSmrg }
24061debfc3dSmrg }
24071debfc3dSmrg
24081debfc3dSmrg static const_rtx
single_set_gcse(rtx_insn * insn)24091debfc3dSmrg single_set_gcse (rtx_insn *insn)
24101debfc3dSmrg {
24111debfc3dSmrg struct set_data s;
24121debfc3dSmrg rtx pattern;
24131debfc3dSmrg
24141debfc3dSmrg gcc_assert (INSN_P (insn));
24151debfc3dSmrg
24161debfc3dSmrg /* Optimize common case. */
24171debfc3dSmrg pattern = PATTERN (insn);
24181debfc3dSmrg if (GET_CODE (pattern) == SET)
24191debfc3dSmrg return pattern;
24201debfc3dSmrg
24211debfc3dSmrg s.insn = insn;
24221debfc3dSmrg s.nsets = 0;
2423*8feb0f0bSmrg note_pattern_stores (pattern, record_set_data, &s);
24241debfc3dSmrg
24251debfc3dSmrg /* Considered invariant insns have exactly one set. */
24261debfc3dSmrg gcc_assert (s.nsets == 1);
24271debfc3dSmrg return s.set;
24281debfc3dSmrg }
24291debfc3dSmrg
24301debfc3dSmrg /* Emit move from SRC to DEST noting the equivalence with expression computed
24311debfc3dSmrg in INSN. */
24321debfc3dSmrg
24331debfc3dSmrg static rtx_insn *
gcse_emit_move_after(rtx dest,rtx src,rtx_insn * insn)24341debfc3dSmrg gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn)
24351debfc3dSmrg {
24361debfc3dSmrg rtx_insn *new_rtx;
24371debfc3dSmrg const_rtx set = single_set_gcse (insn);
24381debfc3dSmrg rtx set2;
24391debfc3dSmrg rtx note;
24401debfc3dSmrg rtx eqv = NULL_RTX;
24411debfc3dSmrg
24421debfc3dSmrg /* This should never fail since we're creating a reg->reg copy
24431debfc3dSmrg we've verified to be valid. */
24441debfc3dSmrg
24451debfc3dSmrg new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
24461debfc3dSmrg
24471debfc3dSmrg /* Note the equivalence for local CSE pass. Take the note from the old
24481debfc3dSmrg set if there was one. Otherwise record the SET_SRC from the old set
24491debfc3dSmrg unless DEST is also an operand of the SET_SRC. */
24501debfc3dSmrg set2 = single_set (new_rtx);
24511debfc3dSmrg if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
24521debfc3dSmrg return new_rtx;
24531debfc3dSmrg if ((note = find_reg_equal_equiv_note (insn)))
24541debfc3dSmrg eqv = XEXP (note, 0);
24551debfc3dSmrg else if (! REG_P (dest)
24561debfc3dSmrg || ! reg_mentioned_p (dest, SET_SRC (set)))
24571debfc3dSmrg eqv = SET_SRC (set);
24581debfc3dSmrg
24591debfc3dSmrg if (eqv != NULL_RTX)
24601debfc3dSmrg set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
24611debfc3dSmrg
24621debfc3dSmrg return new_rtx;
24631debfc3dSmrg }
24641debfc3dSmrg
24651debfc3dSmrg /* Delete redundant computations.
24661debfc3dSmrg Deletion is done by changing the insn to copy the `reaching_reg' of
24671debfc3dSmrg the expression into the result of the SET. It is left to later passes
24681debfc3dSmrg to propagate the copy or eliminate it.
24691debfc3dSmrg
24701debfc3dSmrg Return nonzero if a change is made. */
24711debfc3dSmrg
24721debfc3dSmrg static int
pre_delete(void)24731debfc3dSmrg pre_delete (void)
24741debfc3dSmrg {
24751debfc3dSmrg unsigned int i;
24761debfc3dSmrg int changed;
24771debfc3dSmrg struct gcse_expr *expr;
24781debfc3dSmrg struct gcse_occr *occr;
24791debfc3dSmrg
24801debfc3dSmrg changed = 0;
24811debfc3dSmrg for (i = 0; i < expr_hash_table.size; i++)
24821debfc3dSmrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
24831debfc3dSmrg {
24841debfc3dSmrg int indx = expr->bitmap_index;
24851debfc3dSmrg
24861debfc3dSmrg /* We only need to search antic_occr since we require ANTLOC != 0. */
24871debfc3dSmrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
24881debfc3dSmrg {
24891debfc3dSmrg rtx_insn *insn = occr->insn;
24901debfc3dSmrg rtx set;
24911debfc3dSmrg basic_block bb = BLOCK_FOR_INSN (insn);
24921debfc3dSmrg
24931debfc3dSmrg /* We only delete insns that have a single_set. */
24941debfc3dSmrg if (bitmap_bit_p (pre_delete_map[bb->index], indx)
24951debfc3dSmrg && (set = single_set (insn)) != 0
24961debfc3dSmrg && dbg_cnt (pre_insn))
24971debfc3dSmrg {
24981debfc3dSmrg /* Create a pseudo-reg to store the result of reaching
24991debfc3dSmrg expressions into. Get the mode for the new pseudo from
25001debfc3dSmrg the mode of the original destination pseudo. */
25011debfc3dSmrg if (expr->reaching_reg == NULL)
25021debfc3dSmrg expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
25031debfc3dSmrg
25041debfc3dSmrg gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
25051debfc3dSmrg delete_insn (insn);
25061debfc3dSmrg occr->deleted_p = 1;
25071debfc3dSmrg changed = 1;
25081debfc3dSmrg gcse_subst_count++;
25091debfc3dSmrg
25101debfc3dSmrg if (dump_file)
25111debfc3dSmrg {
25121debfc3dSmrg fprintf (dump_file,
25131debfc3dSmrg "PRE: redundant insn %d (expression %d) in ",
25141debfc3dSmrg INSN_UID (insn), indx);
25151debfc3dSmrg fprintf (dump_file, "bb %d, reaching reg is %d\n",
25161debfc3dSmrg bb->index, REGNO (expr->reaching_reg));
25171debfc3dSmrg }
25181debfc3dSmrg }
25191debfc3dSmrg }
25201debfc3dSmrg }
25211debfc3dSmrg
25221debfc3dSmrg return changed;
25231debfc3dSmrg }
25241debfc3dSmrg
25251debfc3dSmrg /* Perform GCSE optimizations using PRE.
25261debfc3dSmrg This is called by one_pre_gcse_pass after all the dataflow analysis
25271debfc3dSmrg has been done.
25281debfc3dSmrg
25291debfc3dSmrg This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
25301debfc3dSmrg lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
25311debfc3dSmrg Compiler Design and Implementation.
25321debfc3dSmrg
25331debfc3dSmrg ??? A new pseudo reg is created to hold the reaching expression. The nice
25341debfc3dSmrg thing about the classical approach is that it would try to use an existing
25351debfc3dSmrg reg. If the register can't be adequately optimized [i.e. we introduce
25361debfc3dSmrg reload problems], one could add a pass here to propagate the new register
25371debfc3dSmrg through the block.
25381debfc3dSmrg
25391debfc3dSmrg ??? We don't handle single sets in PARALLELs because we're [currently] not
25401debfc3dSmrg able to copy the rest of the parallel when we insert copies to create full
25411debfc3dSmrg redundancies from partial redundancies. However, there's no reason why we
25421debfc3dSmrg can't handle PARALLELs in the cases where there are no partial
25431debfc3dSmrg redundancies. */
25441debfc3dSmrg
25451debfc3dSmrg static int
pre_gcse(struct edge_list * edge_list)25461debfc3dSmrg pre_gcse (struct edge_list *edge_list)
25471debfc3dSmrg {
25481debfc3dSmrg unsigned int i;
25491debfc3dSmrg int did_insert, changed;
25501debfc3dSmrg struct gcse_expr **index_map;
25511debfc3dSmrg struct gcse_expr *expr;
25521debfc3dSmrg
25531debfc3dSmrg /* Compute a mapping from expression number (`bitmap_index') to
25541debfc3dSmrg hash table entry. */
25551debfc3dSmrg
25561debfc3dSmrg index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
25571debfc3dSmrg for (i = 0; i < expr_hash_table.size; i++)
25581debfc3dSmrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
25591debfc3dSmrg index_map[expr->bitmap_index] = expr;
25601debfc3dSmrg
25611debfc3dSmrg /* Delete the redundant insns first so that
25621debfc3dSmrg - we know what register to use for the new insns and for the other
25631debfc3dSmrg ones with reaching expressions
25641debfc3dSmrg - we know which insns are redundant when we go to create copies */
25651debfc3dSmrg
25661debfc3dSmrg changed = pre_delete ();
25671debfc3dSmrg did_insert = pre_edge_insert (edge_list, index_map);
25681debfc3dSmrg
25691debfc3dSmrg /* In other places with reaching expressions, copy the expression to the
25701debfc3dSmrg specially allocated pseudo-reg that reaches the redundant expr. */
25711debfc3dSmrg pre_insert_copies ();
25721debfc3dSmrg if (did_insert)
25731debfc3dSmrg {
25741debfc3dSmrg commit_edge_insertions ();
25751debfc3dSmrg changed = 1;
25761debfc3dSmrg }
25771debfc3dSmrg
25781debfc3dSmrg free (index_map);
25791debfc3dSmrg return changed;
25801debfc3dSmrg }
25811debfc3dSmrg
25821debfc3dSmrg /* Top level routine to perform one PRE GCSE pass.
25831debfc3dSmrg
25841debfc3dSmrg Return nonzero if a change was made. */
25851debfc3dSmrg
25861debfc3dSmrg static int
one_pre_gcse_pass(void)25871debfc3dSmrg one_pre_gcse_pass (void)
25881debfc3dSmrg {
25891debfc3dSmrg int changed = 0;
25901debfc3dSmrg
25911debfc3dSmrg gcse_subst_count = 0;
25921debfc3dSmrg gcse_create_count = 0;
25931debfc3dSmrg
25941debfc3dSmrg /* Return if there's nothing to do, or it is too expensive. */
25951debfc3dSmrg if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
25961debfc3dSmrg || gcse_or_cprop_is_too_expensive (_("PRE disabled")))
25971debfc3dSmrg return 0;
25981debfc3dSmrg
25991debfc3dSmrg /* We need alias. */
26001debfc3dSmrg init_alias_analysis ();
26011debfc3dSmrg
26021debfc3dSmrg bytes_used = 0;
26031debfc3dSmrg gcc_obstack_init (&gcse_obstack);
26041debfc3dSmrg alloc_gcse_mem ();
26051debfc3dSmrg
26061debfc3dSmrg alloc_hash_table (&expr_hash_table);
26071debfc3dSmrg add_noreturn_fake_exit_edges ();
26081debfc3dSmrg if (flag_gcse_lm)
26091debfc3dSmrg compute_ld_motion_mems ();
26101debfc3dSmrg
26111debfc3dSmrg compute_hash_table (&expr_hash_table);
26121debfc3dSmrg if (flag_gcse_lm)
26131debfc3dSmrg trim_ld_motion_mems ();
26141debfc3dSmrg if (dump_file)
26151debfc3dSmrg dump_hash_table (dump_file, "Expression", &expr_hash_table);
26161debfc3dSmrg
26171debfc3dSmrg if (expr_hash_table.n_elems > 0)
26181debfc3dSmrg {
26191debfc3dSmrg struct edge_list *edge_list;
26201debfc3dSmrg alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems);
26211debfc3dSmrg edge_list = compute_pre_data ();
26221debfc3dSmrg changed |= pre_gcse (edge_list);
26231debfc3dSmrg free_edge_list (edge_list);
26241debfc3dSmrg free_pre_mem ();
26251debfc3dSmrg }
26261debfc3dSmrg
26271debfc3dSmrg if (flag_gcse_lm)
26281debfc3dSmrg free_ld_motion_mems ();
26291debfc3dSmrg remove_fake_exit_edges ();
26301debfc3dSmrg free_hash_table (&expr_hash_table);
26311debfc3dSmrg
26321debfc3dSmrg free_gcse_mem ();
26331debfc3dSmrg obstack_free (&gcse_obstack, NULL);
26341debfc3dSmrg
26351debfc3dSmrg /* We are finished with alias. */
26361debfc3dSmrg end_alias_analysis ();
26371debfc3dSmrg
26381debfc3dSmrg if (dump_file)
26391debfc3dSmrg {
26401debfc3dSmrg fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
26411debfc3dSmrg current_function_name (), n_basic_blocks_for_fn (cfun),
26421debfc3dSmrg bytes_used);
26431debfc3dSmrg fprintf (dump_file, "%d substs, %d insns created\n",
26441debfc3dSmrg gcse_subst_count, gcse_create_count);
26451debfc3dSmrg }
26461debfc3dSmrg
26471debfc3dSmrg return changed;
26481debfc3dSmrg }
26491debfc3dSmrg
26501debfc3dSmrg /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
26511debfc3dSmrg to INSN. If such notes are added to an insn which references a
26521debfc3dSmrg CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
26531debfc3dSmrg that note, because the following loop optimization pass requires
26541debfc3dSmrg them. */
26551debfc3dSmrg
26561debfc3dSmrg /* ??? If there was a jump optimization pass after gcse and before loop,
26571debfc3dSmrg then we would not need to do this here, because jump would add the
26581debfc3dSmrg necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
26591debfc3dSmrg
26601debfc3dSmrg static void
add_label_notes(rtx x,rtx_insn * insn)26611debfc3dSmrg add_label_notes (rtx x, rtx_insn *insn)
26621debfc3dSmrg {
26631debfc3dSmrg enum rtx_code code = GET_CODE (x);
26641debfc3dSmrg int i, j;
26651debfc3dSmrg const char *fmt;
26661debfc3dSmrg
26671debfc3dSmrg if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
26681debfc3dSmrg {
26691debfc3dSmrg /* This code used to ignore labels that referred to dispatch tables to
26701debfc3dSmrg avoid flow generating (slightly) worse code.
26711debfc3dSmrg
26721debfc3dSmrg We no longer ignore such label references (see LABEL_REF handling in
26731debfc3dSmrg mark_jump_label for additional information). */
26741debfc3dSmrg
26751debfc3dSmrg /* There's no reason for current users to emit jump-insns with
26761debfc3dSmrg such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
26771debfc3dSmrg notes. */
26781debfc3dSmrg gcc_assert (!JUMP_P (insn));
26791debfc3dSmrg add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
26801debfc3dSmrg
26811debfc3dSmrg if (LABEL_P (label_ref_label (x)))
26821debfc3dSmrg LABEL_NUSES (label_ref_label (x))++;
26831debfc3dSmrg
26841debfc3dSmrg return;
26851debfc3dSmrg }
26861debfc3dSmrg
26871debfc3dSmrg for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
26881debfc3dSmrg {
26891debfc3dSmrg if (fmt[i] == 'e')
26901debfc3dSmrg add_label_notes (XEXP (x, i), insn);
26911debfc3dSmrg else if (fmt[i] == 'E')
26921debfc3dSmrg for (j = XVECLEN (x, i) - 1; j >= 0; j--)
26931debfc3dSmrg add_label_notes (XVECEXP (x, i, j), insn);
26941debfc3dSmrg }
26951debfc3dSmrg }
26961debfc3dSmrg
26971debfc3dSmrg /* Code Hoisting variables and subroutines. */
26981debfc3dSmrg
26991debfc3dSmrg /* Very busy expressions. */
27001debfc3dSmrg static sbitmap *hoist_vbein;
27011debfc3dSmrg static sbitmap *hoist_vbeout;
27021debfc3dSmrg
27031debfc3dSmrg /* ??? We could compute post dominators and run this algorithm in
27041debfc3dSmrg reverse to perform tail merging, doing so would probably be
27051debfc3dSmrg more effective than the tail merging code in jump.c.
27061debfc3dSmrg
27071debfc3dSmrg It's unclear if tail merging could be run in parallel with
27081debfc3dSmrg code hoisting. It would be nice. */
27091debfc3dSmrg
27101debfc3dSmrg /* Allocate vars used for code hoisting analysis. */
27111debfc3dSmrg
27121debfc3dSmrg static void
alloc_code_hoist_mem(int n_blocks,int n_exprs)27131debfc3dSmrg alloc_code_hoist_mem (int n_blocks, int n_exprs)
27141debfc3dSmrg {
27151debfc3dSmrg antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
27161debfc3dSmrg transp = sbitmap_vector_alloc (n_blocks, n_exprs);
27171debfc3dSmrg comp = sbitmap_vector_alloc (n_blocks, n_exprs);
27181debfc3dSmrg
27191debfc3dSmrg hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
27201debfc3dSmrg hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
27211debfc3dSmrg }
27221debfc3dSmrg
27231debfc3dSmrg /* Free vars used for code hoisting analysis. */
27241debfc3dSmrg
27251debfc3dSmrg static void
free_code_hoist_mem(void)27261debfc3dSmrg free_code_hoist_mem (void)
27271debfc3dSmrg {
27281debfc3dSmrg sbitmap_vector_free (antloc);
27291debfc3dSmrg sbitmap_vector_free (transp);
27301debfc3dSmrg sbitmap_vector_free (comp);
27311debfc3dSmrg
27321debfc3dSmrg sbitmap_vector_free (hoist_vbein);
27331debfc3dSmrg sbitmap_vector_free (hoist_vbeout);
27341debfc3dSmrg
27351debfc3dSmrg free_dominance_info (CDI_DOMINATORS);
27361debfc3dSmrg }
27371debfc3dSmrg
27381debfc3dSmrg /* Compute the very busy expressions at entry/exit from each block.
27391debfc3dSmrg
27401debfc3dSmrg An expression is very busy if all paths from a given point
27411debfc3dSmrg compute the expression. */
27421debfc3dSmrg
27431debfc3dSmrg static void
compute_code_hoist_vbeinout(void)27441debfc3dSmrg compute_code_hoist_vbeinout (void)
27451debfc3dSmrg {
27461debfc3dSmrg int changed, passes;
27471debfc3dSmrg basic_block bb;
27481debfc3dSmrg
27491debfc3dSmrg bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
27501debfc3dSmrg bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
27511debfc3dSmrg
27521debfc3dSmrg passes = 0;
27531debfc3dSmrg changed = 1;
27541debfc3dSmrg
27551debfc3dSmrg while (changed)
27561debfc3dSmrg {
27571debfc3dSmrg changed = 0;
27581debfc3dSmrg
27591debfc3dSmrg /* We scan the blocks in the reverse order to speed up
27601debfc3dSmrg the convergence. */
27611debfc3dSmrg FOR_EACH_BB_REVERSE_FN (bb, cfun)
27621debfc3dSmrg {
27631debfc3dSmrg if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
27641debfc3dSmrg {
27651debfc3dSmrg bitmap_intersection_of_succs (hoist_vbeout[bb->index],
27661debfc3dSmrg hoist_vbein, bb);
27671debfc3dSmrg
27681debfc3dSmrg /* Include expressions in VBEout that are calculated
27691debfc3dSmrg in BB and available at its end. */
27701debfc3dSmrg bitmap_ior (hoist_vbeout[bb->index],
27711debfc3dSmrg hoist_vbeout[bb->index], comp[bb->index]);
27721debfc3dSmrg }
27731debfc3dSmrg
27741debfc3dSmrg changed |= bitmap_or_and (hoist_vbein[bb->index],
27751debfc3dSmrg antloc[bb->index],
27761debfc3dSmrg hoist_vbeout[bb->index],
27771debfc3dSmrg transp[bb->index]);
27781debfc3dSmrg }
27791debfc3dSmrg
27801debfc3dSmrg passes++;
27811debfc3dSmrg }
27821debfc3dSmrg
27831debfc3dSmrg if (dump_file)
27841debfc3dSmrg {
27851debfc3dSmrg fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
27861debfc3dSmrg
27871debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
27881debfc3dSmrg {
27891debfc3dSmrg fprintf (dump_file, "vbein (%d): ", bb->index);
27901debfc3dSmrg dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
27911debfc3dSmrg fprintf (dump_file, "vbeout(%d): ", bb->index);
27921debfc3dSmrg dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
27931debfc3dSmrg }
27941debfc3dSmrg }
27951debfc3dSmrg }
27961debfc3dSmrg
27971debfc3dSmrg /* Top level routine to do the dataflow analysis needed by code hoisting. */
27981debfc3dSmrg
27991debfc3dSmrg static void
compute_code_hoist_data(void)28001debfc3dSmrg compute_code_hoist_data (void)
28011debfc3dSmrg {
28021debfc3dSmrg compute_local_properties (transp, comp, antloc, &expr_hash_table);
28031debfc3dSmrg prune_expressions (false);
28041debfc3dSmrg compute_code_hoist_vbeinout ();
28051debfc3dSmrg calculate_dominance_info (CDI_DOMINATORS);
28061debfc3dSmrg if (dump_file)
28071debfc3dSmrg fprintf (dump_file, "\n");
28081debfc3dSmrg }
28091debfc3dSmrg
28101debfc3dSmrg /* Update register pressure for BB when hoisting an expression from
28111debfc3dSmrg instruction FROM, if live ranges of inputs are shrunk. Also
28121debfc3dSmrg maintain live_in information if live range of register referred
28131debfc3dSmrg in FROM is shrunk.
28141debfc3dSmrg
28151debfc3dSmrg Return 0 if register pressure doesn't change, otherwise return
28161debfc3dSmrg the number by which register pressure is decreased.
28171debfc3dSmrg
28181debfc3dSmrg NOTE: Register pressure won't be increased in this function. */
28191debfc3dSmrg
28201debfc3dSmrg static int
update_bb_reg_pressure(basic_block bb,rtx_insn * from)28211debfc3dSmrg update_bb_reg_pressure (basic_block bb, rtx_insn *from)
28221debfc3dSmrg {
28231debfc3dSmrg rtx dreg;
28241debfc3dSmrg rtx_insn *insn;
28251debfc3dSmrg basic_block succ_bb;
28261debfc3dSmrg df_ref use, op_ref;
28271debfc3dSmrg edge succ;
28281debfc3dSmrg edge_iterator ei;
28291debfc3dSmrg int decreased_pressure = 0;
28301debfc3dSmrg int nregs;
28311debfc3dSmrg enum reg_class pressure_class;
28321debfc3dSmrg
28331debfc3dSmrg FOR_EACH_INSN_USE (use, from)
28341debfc3dSmrg {
28351debfc3dSmrg dreg = DF_REF_REAL_REG (use);
28361debfc3dSmrg /* The live range of register is shrunk only if it isn't:
28371debfc3dSmrg 1. referred on any path from the end of this block to EXIT, or
28381debfc3dSmrg 2. referred by insns other than FROM in this block. */
28391debfc3dSmrg FOR_EACH_EDGE (succ, ei, bb->succs)
28401debfc3dSmrg {
28411debfc3dSmrg succ_bb = succ->dest;
28421debfc3dSmrg if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
28431debfc3dSmrg continue;
28441debfc3dSmrg
28451debfc3dSmrg if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
28461debfc3dSmrg break;
28471debfc3dSmrg }
28481debfc3dSmrg if (succ != NULL)
28491debfc3dSmrg continue;
28501debfc3dSmrg
28511debfc3dSmrg op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
28521debfc3dSmrg for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
28531debfc3dSmrg {
28541debfc3dSmrg if (!DF_REF_INSN_INFO (op_ref))
28551debfc3dSmrg continue;
28561debfc3dSmrg
28571debfc3dSmrg insn = DF_REF_INSN (op_ref);
28581debfc3dSmrg if (BLOCK_FOR_INSN (insn) == bb
28591debfc3dSmrg && NONDEBUG_INSN_P (insn) && insn != from)
28601debfc3dSmrg break;
28611debfc3dSmrg }
28621debfc3dSmrg
28631debfc3dSmrg pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
28641debfc3dSmrg /* Decrease register pressure and update live_in information for
28651debfc3dSmrg this block. */
28661debfc3dSmrg if (!op_ref && pressure_class != NO_REGS)
28671debfc3dSmrg {
28681debfc3dSmrg decreased_pressure += nregs;
28691debfc3dSmrg BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
28701debfc3dSmrg bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
28711debfc3dSmrg }
28721debfc3dSmrg }
28731debfc3dSmrg return decreased_pressure;
28741debfc3dSmrg }
28751debfc3dSmrg
28761debfc3dSmrg /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
28771debfc3dSmrg flow graph, if it can reach BB unimpared. Stop the search if the
28781debfc3dSmrg expression would need to be moved more than DISTANCE instructions.
28791debfc3dSmrg
28801debfc3dSmrg DISTANCE is the number of instructions through which EXPR can be
28811debfc3dSmrg hoisted up in flow graph.
28821debfc3dSmrg
28831debfc3dSmrg BB_SIZE points to an array which contains the number of instructions
28841debfc3dSmrg for each basic block.
28851debfc3dSmrg
28861debfc3dSmrg PRESSURE_CLASS and NREGS are register class and number of hard registers
28871debfc3dSmrg for storing EXPR.
28881debfc3dSmrg
28891debfc3dSmrg HOISTED_BBS points to a bitmap indicating basic blocks through which
28901debfc3dSmrg EXPR is hoisted.
28911debfc3dSmrg
28921debfc3dSmrg FROM is the instruction from which EXPR is hoisted.
28931debfc3dSmrg
28941debfc3dSmrg It's unclear exactly what Muchnick meant by "unimpared". It seems
28951debfc3dSmrg to me that the expression must either be computed or transparent in
28961debfc3dSmrg *every* block in the path(s) from EXPR_BB to BB. Any other definition
28971debfc3dSmrg would allow the expression to be hoisted out of loops, even if
28981debfc3dSmrg the expression wasn't a loop invariant.
28991debfc3dSmrg
29001debfc3dSmrg Contrast this to reachability for PRE where an expression is
29011debfc3dSmrg considered reachable if *any* path reaches instead of *all*
29021debfc3dSmrg paths. */
29031debfc3dSmrg
29041debfc3dSmrg static int
should_hoist_expr_to_dom(basic_block expr_bb,struct gcse_expr * expr,basic_block bb,sbitmap visited,HOST_WIDE_INT distance,int * bb_size,enum reg_class pressure_class,int * nregs,bitmap hoisted_bbs,rtx_insn * from)29051debfc3dSmrg should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr,
29061debfc3dSmrg basic_block bb, sbitmap visited,
29071debfc3dSmrg HOST_WIDE_INT distance,
29081debfc3dSmrg int *bb_size, enum reg_class pressure_class,
29091debfc3dSmrg int *nregs, bitmap hoisted_bbs, rtx_insn *from)
29101debfc3dSmrg {
29111debfc3dSmrg unsigned int i;
29121debfc3dSmrg edge pred;
29131debfc3dSmrg edge_iterator ei;
29141debfc3dSmrg sbitmap_iterator sbi;
29151debfc3dSmrg int visited_allocated_locally = 0;
29161debfc3dSmrg int decreased_pressure = 0;
29171debfc3dSmrg
29181debfc3dSmrg if (flag_ira_hoist_pressure)
29191debfc3dSmrg {
29201debfc3dSmrg /* Record old information of basic block BB when it is visited
29211debfc3dSmrg at the first time. */
29221debfc3dSmrg if (!bitmap_bit_p (hoisted_bbs, bb->index))
29231debfc3dSmrg {
29241debfc3dSmrg struct bb_data *data = BB_DATA (bb);
29251debfc3dSmrg bitmap_copy (data->backup, data->live_in);
29261debfc3dSmrg data->old_pressure = data->max_reg_pressure[pressure_class];
29271debfc3dSmrg }
29281debfc3dSmrg decreased_pressure = update_bb_reg_pressure (bb, from);
29291debfc3dSmrg }
29301debfc3dSmrg /* Terminate the search if distance, for which EXPR is allowed to move,
29311debfc3dSmrg is exhausted. */
29321debfc3dSmrg if (distance > 0)
29331debfc3dSmrg {
29341debfc3dSmrg if (flag_ira_hoist_pressure)
29351debfc3dSmrg {
29361debfc3dSmrg /* Prefer to hoist EXPR if register pressure is decreased. */
29371debfc3dSmrg if (decreased_pressure > *nregs)
29381debfc3dSmrg distance += bb_size[bb->index];
29391debfc3dSmrg /* Let EXPR be hoisted through basic block at no cost if one
29401debfc3dSmrg of following conditions is satisfied:
29411debfc3dSmrg
29421debfc3dSmrg 1. The basic block has low register pressure.
29431debfc3dSmrg 2. Register pressure won't be increases after hoisting EXPR.
29441debfc3dSmrg
29451debfc3dSmrg Constant expressions is handled conservatively, because
29461debfc3dSmrg hoisting constant expression aggressively results in worse
29471debfc3dSmrg code. This decision is made by the observation of CSiBE
29481debfc3dSmrg on ARM target, while it has no obvious effect on other
29491debfc3dSmrg targets like x86, x86_64, mips and powerpc. */
29501debfc3dSmrg else if (CONST_INT_P (expr->expr)
29511debfc3dSmrg || (BB_DATA (bb)->max_reg_pressure[pressure_class]
29521debfc3dSmrg >= ira_class_hard_regs_num[pressure_class]
29531debfc3dSmrg && decreased_pressure < *nregs))
29541debfc3dSmrg distance -= bb_size[bb->index];
29551debfc3dSmrg }
29561debfc3dSmrg else
29571debfc3dSmrg distance -= bb_size[bb->index];
29581debfc3dSmrg
29591debfc3dSmrg if (distance <= 0)
29601debfc3dSmrg return 0;
29611debfc3dSmrg }
29621debfc3dSmrg else
29631debfc3dSmrg gcc_assert (distance == 0);
29641debfc3dSmrg
29651debfc3dSmrg if (visited == NULL)
29661debfc3dSmrg {
29671debfc3dSmrg visited_allocated_locally = 1;
29681debfc3dSmrg visited = sbitmap_alloc (last_basic_block_for_fn (cfun));
29691debfc3dSmrg bitmap_clear (visited);
29701debfc3dSmrg }
29711debfc3dSmrg
29721debfc3dSmrg FOR_EACH_EDGE (pred, ei, bb->preds)
29731debfc3dSmrg {
29741debfc3dSmrg basic_block pred_bb = pred->src;
29751debfc3dSmrg
29761debfc3dSmrg if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
29771debfc3dSmrg break;
29781debfc3dSmrg else if (pred_bb == expr_bb)
29791debfc3dSmrg continue;
29801debfc3dSmrg else if (bitmap_bit_p (visited, pred_bb->index))
29811debfc3dSmrg continue;
29821debfc3dSmrg else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
29831debfc3dSmrg break;
29841debfc3dSmrg /* Not killed. */
29851debfc3dSmrg else
29861debfc3dSmrg {
29871debfc3dSmrg bitmap_set_bit (visited, pred_bb->index);
29881debfc3dSmrg if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
29891debfc3dSmrg visited, distance, bb_size,
29901debfc3dSmrg pressure_class, nregs,
29911debfc3dSmrg hoisted_bbs, from))
29921debfc3dSmrg break;
29931debfc3dSmrg }
29941debfc3dSmrg }
29951debfc3dSmrg if (visited_allocated_locally)
29961debfc3dSmrg {
29971debfc3dSmrg /* If EXPR can be hoisted to expr_bb, record basic blocks through
29981debfc3dSmrg which EXPR is hoisted in hoisted_bbs. */
29991debfc3dSmrg if (flag_ira_hoist_pressure && !pred)
30001debfc3dSmrg {
30011debfc3dSmrg /* Record the basic block from which EXPR is hoisted. */
30021debfc3dSmrg bitmap_set_bit (visited, bb->index);
30031debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
30041debfc3dSmrg bitmap_set_bit (hoisted_bbs, i);
30051debfc3dSmrg }
30061debfc3dSmrg sbitmap_free (visited);
30071debfc3dSmrg }
30081debfc3dSmrg
30091debfc3dSmrg return (pred == NULL);
30101debfc3dSmrg }
30111debfc3dSmrg
30121debfc3dSmrg /* Find occurrence in BB. */
30131debfc3dSmrg
30141debfc3dSmrg static struct gcse_occr *
find_occr_in_bb(struct gcse_occr * occr,basic_block bb)30151debfc3dSmrg find_occr_in_bb (struct gcse_occr *occr, basic_block bb)
30161debfc3dSmrg {
30171debfc3dSmrg /* Find the right occurrence of this expression. */
30181debfc3dSmrg while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
30191debfc3dSmrg occr = occr->next;
30201debfc3dSmrg
30211debfc3dSmrg return occr;
30221debfc3dSmrg }
30231debfc3dSmrg
30241debfc3dSmrg /* Actually perform code hoisting.
30251debfc3dSmrg
30261debfc3dSmrg The code hoisting pass can hoist multiple computations of the same
30271debfc3dSmrg expression along dominated path to a dominating basic block, like
30281debfc3dSmrg from b2/b3 to b1 as depicted below:
30291debfc3dSmrg
30301debfc3dSmrg b1 ------
30311debfc3dSmrg /\ |
30321debfc3dSmrg / \ |
30331debfc3dSmrg bx by distance
30341debfc3dSmrg / \ |
30351debfc3dSmrg / \ |
30361debfc3dSmrg b2 b3 ------
30371debfc3dSmrg
30381debfc3dSmrg Unfortunately code hoisting generally extends the live range of an
30391debfc3dSmrg output pseudo register, which increases register pressure and hurts
30401debfc3dSmrg register allocation. To address this issue, an attribute MAX_DISTANCE
30411debfc3dSmrg is computed and attached to each expression. The attribute is computed
30421debfc3dSmrg from rtx cost of the corresponding expression and it's used to control
30431debfc3dSmrg how long the expression can be hoisted up in flow graph. As the
30441debfc3dSmrg expression is hoisted up in flow graph, GCC decreases its DISTANCE
30451debfc3dSmrg and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease
30461debfc3dSmrg register pressure if live ranges of inputs are shrunk.
30471debfc3dSmrg
30481debfc3dSmrg Option "-fira-hoist-pressure" implements register pressure directed
30491debfc3dSmrg hoist based on upper method. The rationale is:
30501debfc3dSmrg 1. Calculate register pressure for each basic block by reusing IRA
30511debfc3dSmrg facility.
30521debfc3dSmrg 2. When expression is hoisted through one basic block, GCC checks
30531debfc3dSmrg the change of live ranges for inputs/output. The basic block's
30541debfc3dSmrg register pressure will be increased because of extended live
30551debfc3dSmrg range of output. However, register pressure will be decreased
30561debfc3dSmrg if the live ranges of inputs are shrunk.
30571debfc3dSmrg 3. After knowing how hoisting affects register pressure, GCC prefers
30581debfc3dSmrg to hoist the expression if it can decrease register pressure, by
30591debfc3dSmrg increasing DISTANCE of the corresponding expression.
30601debfc3dSmrg 4. If hoisting the expression increases register pressure, GCC checks
30611debfc3dSmrg register pressure of the basic block and decrease DISTANCE only if
30621debfc3dSmrg the register pressure is high. In other words, expression will be
30631debfc3dSmrg hoisted through at no cost if the basic block has low register
30641debfc3dSmrg pressure.
30651debfc3dSmrg 5. Update register pressure information for basic blocks through
30661debfc3dSmrg which expression is hoisted. */
30671debfc3dSmrg
30681debfc3dSmrg static int
hoist_code(void)30691debfc3dSmrg hoist_code (void)
30701debfc3dSmrg {
30711debfc3dSmrg basic_block bb, dominated;
30721debfc3dSmrg vec<basic_block> dom_tree_walk;
30731debfc3dSmrg unsigned int dom_tree_walk_index;
30741debfc3dSmrg vec<basic_block> domby;
30751debfc3dSmrg unsigned int i, j, k;
30761debfc3dSmrg struct gcse_expr **index_map;
30771debfc3dSmrg struct gcse_expr *expr;
30781debfc3dSmrg int *to_bb_head;
30791debfc3dSmrg int *bb_size;
30801debfc3dSmrg int changed = 0;
30811debfc3dSmrg struct bb_data *data;
30821debfc3dSmrg /* Basic blocks that have occurrences reachable from BB. */
30831debfc3dSmrg bitmap from_bbs;
30841debfc3dSmrg /* Basic blocks through which expr is hoisted. */
30851debfc3dSmrg bitmap hoisted_bbs = NULL;
30861debfc3dSmrg bitmap_iterator bi;
30871debfc3dSmrg
30881debfc3dSmrg /* Compute a mapping from expression number (`bitmap_index') to
30891debfc3dSmrg hash table entry. */
30901debfc3dSmrg
30911debfc3dSmrg index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
30921debfc3dSmrg for (i = 0; i < expr_hash_table.size; i++)
30931debfc3dSmrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
30941debfc3dSmrg index_map[expr->bitmap_index] = expr;
30951debfc3dSmrg
30961debfc3dSmrg /* Calculate sizes of basic blocks and note how far
30971debfc3dSmrg each instruction is from the start of its block. We then use this
30981debfc3dSmrg data to restrict distance an expression can travel. */
30991debfc3dSmrg
31001debfc3dSmrg to_bb_head = XCNEWVEC (int, get_max_uid ());
31011debfc3dSmrg bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun));
31021debfc3dSmrg
31031debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
31041debfc3dSmrg {
31051debfc3dSmrg rtx_insn *insn;
31061debfc3dSmrg int to_head;
31071debfc3dSmrg
31081debfc3dSmrg to_head = 0;
31091debfc3dSmrg FOR_BB_INSNS (bb, insn)
31101debfc3dSmrg {
31111debfc3dSmrg /* Don't count debug instructions to avoid them affecting
31121debfc3dSmrg decision choices. */
31131debfc3dSmrg if (NONDEBUG_INSN_P (insn))
31141debfc3dSmrg to_bb_head[INSN_UID (insn)] = to_head++;
31151debfc3dSmrg }
31161debfc3dSmrg
31171debfc3dSmrg bb_size[bb->index] = to_head;
31181debfc3dSmrg }
31191debfc3dSmrg
31201debfc3dSmrg gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1
31211debfc3dSmrg && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
31221debfc3dSmrg == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb));
31231debfc3dSmrg
31241debfc3dSmrg from_bbs = BITMAP_ALLOC (NULL);
31251debfc3dSmrg if (flag_ira_hoist_pressure)
31261debfc3dSmrg hoisted_bbs = BITMAP_ALLOC (NULL);
31271debfc3dSmrg
31281debfc3dSmrg dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
31291debfc3dSmrg ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb);
31301debfc3dSmrg
31311debfc3dSmrg /* Walk over each basic block looking for potentially hoistable
31321debfc3dSmrg expressions, nothing gets hoisted from the entry block. */
31331debfc3dSmrg FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
31341debfc3dSmrg {
3135*8feb0f0bSmrg domby = get_dominated_to_depth (CDI_DOMINATORS, bb,
3136*8feb0f0bSmrg param_max_hoist_depth);
31371debfc3dSmrg
31381debfc3dSmrg if (domby.length () == 0)
31391debfc3dSmrg continue;
31401debfc3dSmrg
31411debfc3dSmrg /* Examine each expression that is very busy at the exit of this
31421debfc3dSmrg block. These are the potentially hoistable expressions. */
31431debfc3dSmrg for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
31441debfc3dSmrg {
31451debfc3dSmrg if (bitmap_bit_p (hoist_vbeout[bb->index], i))
31461debfc3dSmrg {
31471debfc3dSmrg int nregs = 0;
31481debfc3dSmrg enum reg_class pressure_class = NO_REGS;
31491debfc3dSmrg /* Current expression. */
31501debfc3dSmrg struct gcse_expr *expr = index_map[i];
31511debfc3dSmrg /* Number of occurrences of EXPR that can be hoisted to BB. */
31521debfc3dSmrg int hoistable = 0;
31531debfc3dSmrg /* Occurrences reachable from BB. */
31541debfc3dSmrg vec<occr_t> occrs_to_hoist = vNULL;
31551debfc3dSmrg /* We want to insert the expression into BB only once, so
31561debfc3dSmrg note when we've inserted it. */
31571debfc3dSmrg int insn_inserted_p;
31581debfc3dSmrg occr_t occr;
31591debfc3dSmrg
31601debfc3dSmrg /* If an expression is computed in BB and is available at end of
31611debfc3dSmrg BB, hoist all occurrences dominated by BB to BB. */
31621debfc3dSmrg if (bitmap_bit_p (comp[bb->index], i))
31631debfc3dSmrg {
31641debfc3dSmrg occr = find_occr_in_bb (expr->antic_occr, bb);
31651debfc3dSmrg
31661debfc3dSmrg if (occr)
31671debfc3dSmrg {
31681debfc3dSmrg /* An occurrence might've been already deleted
31691debfc3dSmrg while processing a dominator of BB. */
31701debfc3dSmrg if (!occr->deleted_p)
31711debfc3dSmrg {
31721debfc3dSmrg gcc_assert (NONDEBUG_INSN_P (occr->insn));
31731debfc3dSmrg hoistable++;
31741debfc3dSmrg }
31751debfc3dSmrg }
31761debfc3dSmrg else
31771debfc3dSmrg hoistable++;
31781debfc3dSmrg }
31791debfc3dSmrg
31801debfc3dSmrg /* We've found a potentially hoistable expression, now
31811debfc3dSmrg we look at every block BB dominates to see if it
31821debfc3dSmrg computes the expression. */
31831debfc3dSmrg FOR_EACH_VEC_ELT (domby, j, dominated)
31841debfc3dSmrg {
31851debfc3dSmrg HOST_WIDE_INT max_distance;
31861debfc3dSmrg
31871debfc3dSmrg /* Ignore self dominance. */
31881debfc3dSmrg if (bb == dominated)
31891debfc3dSmrg continue;
31901debfc3dSmrg /* We've found a dominated block, now see if it computes
31911debfc3dSmrg the busy expression and whether or not moving that
31921debfc3dSmrg expression to the "beginning" of that block is safe. */
31931debfc3dSmrg if (!bitmap_bit_p (antloc[dominated->index], i))
31941debfc3dSmrg continue;
31951debfc3dSmrg
31961debfc3dSmrg occr = find_occr_in_bb (expr->antic_occr, dominated);
31971debfc3dSmrg gcc_assert (occr);
31981debfc3dSmrg
31991debfc3dSmrg /* An occurrence might've been already deleted
32001debfc3dSmrg while processing a dominator of BB. */
32011debfc3dSmrg if (occr->deleted_p)
32021debfc3dSmrg continue;
32031debfc3dSmrg gcc_assert (NONDEBUG_INSN_P (occr->insn));
32041debfc3dSmrg
32051debfc3dSmrg max_distance = expr->max_distance;
32061debfc3dSmrg if (max_distance > 0)
32071debfc3dSmrg /* Adjust MAX_DISTANCE to account for the fact that
32081debfc3dSmrg OCCR won't have to travel all of DOMINATED, but
32091debfc3dSmrg only part of it. */
32101debfc3dSmrg max_distance += (bb_size[dominated->index]
32111debfc3dSmrg - to_bb_head[INSN_UID (occr->insn)]);
32121debfc3dSmrg
32131debfc3dSmrg pressure_class = get_pressure_class_and_nregs (occr->insn,
32141debfc3dSmrg &nregs);
32151debfc3dSmrg
32161debfc3dSmrg /* Note if the expression should be hoisted from the dominated
32171debfc3dSmrg block to BB if it can reach DOMINATED unimpared.
32181debfc3dSmrg
32191debfc3dSmrg Keep track of how many times this expression is hoistable
32201debfc3dSmrg from a dominated block into BB. */
32211debfc3dSmrg if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
32221debfc3dSmrg max_distance, bb_size,
32231debfc3dSmrg pressure_class, &nregs,
32241debfc3dSmrg hoisted_bbs, occr->insn))
32251debfc3dSmrg {
32261debfc3dSmrg hoistable++;
32271debfc3dSmrg occrs_to_hoist.safe_push (occr);
32281debfc3dSmrg bitmap_set_bit (from_bbs, dominated->index);
32291debfc3dSmrg }
32301debfc3dSmrg }
32311debfc3dSmrg
32321debfc3dSmrg /* If we found more than one hoistable occurrence of this
32331debfc3dSmrg expression, then note it in the vector of expressions to
32341debfc3dSmrg hoist. It makes no sense to hoist things which are computed
32351debfc3dSmrg in only one BB, and doing so tends to pessimize register
32361debfc3dSmrg allocation. One could increase this value to try harder
32371debfc3dSmrg to avoid any possible code expansion due to register
32381debfc3dSmrg allocation issues; however experiments have shown that
32391debfc3dSmrg the vast majority of hoistable expressions are only movable
32401debfc3dSmrg from two successors, so raising this threshold is likely
32411debfc3dSmrg to nullify any benefit we get from code hoisting. */
32421debfc3dSmrg if (hoistable > 1 && dbg_cnt (hoist_insn))
32431debfc3dSmrg {
32441debfc3dSmrg /* If (hoistable != vec::length), then there is
32451debfc3dSmrg an occurrence of EXPR in BB itself. Don't waste
32461debfc3dSmrg time looking for LCA in this case. */
32471debfc3dSmrg if ((unsigned) hoistable == occrs_to_hoist.length ())
32481debfc3dSmrg {
32491debfc3dSmrg basic_block lca;
32501debfc3dSmrg
32511debfc3dSmrg lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
32521debfc3dSmrg from_bbs);
32531debfc3dSmrg if (lca != bb)
32541debfc3dSmrg /* Punt, it's better to hoist these occurrences to
32551debfc3dSmrg LCA. */
32561debfc3dSmrg occrs_to_hoist.release ();
32571debfc3dSmrg }
32581debfc3dSmrg }
32591debfc3dSmrg else
32601debfc3dSmrg /* Punt, no point hoisting a single occurrence. */
32611debfc3dSmrg occrs_to_hoist.release ();
32621debfc3dSmrg
32631debfc3dSmrg if (flag_ira_hoist_pressure
32641debfc3dSmrg && !occrs_to_hoist.is_empty ())
32651debfc3dSmrg {
32661debfc3dSmrg /* Increase register pressure of basic blocks to which
32671debfc3dSmrg expr is hoisted because of extended live range of
32681debfc3dSmrg output. */
32691debfc3dSmrg data = BB_DATA (bb);
32701debfc3dSmrg data->max_reg_pressure[pressure_class] += nregs;
32711debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
32721debfc3dSmrg {
32731debfc3dSmrg data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
32741debfc3dSmrg data->max_reg_pressure[pressure_class] += nregs;
32751debfc3dSmrg }
32761debfc3dSmrg }
32771debfc3dSmrg else if (flag_ira_hoist_pressure)
32781debfc3dSmrg {
32791debfc3dSmrg /* Restore register pressure and live_in info for basic
32801debfc3dSmrg blocks recorded in hoisted_bbs when expr will not be
32811debfc3dSmrg hoisted. */
32821debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
32831debfc3dSmrg {
32841debfc3dSmrg data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
32851debfc3dSmrg bitmap_copy (data->live_in, data->backup);
32861debfc3dSmrg data->max_reg_pressure[pressure_class]
32871debfc3dSmrg = data->old_pressure;
32881debfc3dSmrg }
32891debfc3dSmrg }
32901debfc3dSmrg
32911debfc3dSmrg if (flag_ira_hoist_pressure)
32921debfc3dSmrg bitmap_clear (hoisted_bbs);
32931debfc3dSmrg
32941debfc3dSmrg insn_inserted_p = 0;
32951debfc3dSmrg
32961debfc3dSmrg /* Walk through occurrences of I'th expressions we want
32971debfc3dSmrg to hoist to BB and make the transformations. */
32981debfc3dSmrg FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
32991debfc3dSmrg {
33001debfc3dSmrg rtx_insn *insn;
33011debfc3dSmrg const_rtx set;
33021debfc3dSmrg
33031debfc3dSmrg gcc_assert (!occr->deleted_p);
33041debfc3dSmrg
33051debfc3dSmrg insn = occr->insn;
33061debfc3dSmrg set = single_set_gcse (insn);
33071debfc3dSmrg
33081debfc3dSmrg /* Create a pseudo-reg to store the result of reaching
33091debfc3dSmrg expressions into. Get the mode for the new pseudo
33101debfc3dSmrg from the mode of the original destination pseudo.
33111debfc3dSmrg
33121debfc3dSmrg It is important to use new pseudos whenever we
33131debfc3dSmrg emit a set. This will allow reload to use
33141debfc3dSmrg rematerialization for such registers. */
33151debfc3dSmrg if (!insn_inserted_p)
33161debfc3dSmrg expr->reaching_reg
33171debfc3dSmrg = gen_reg_rtx_and_attrs (SET_DEST (set));
33181debfc3dSmrg
33191debfc3dSmrg gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
33201debfc3dSmrg insn);
33211debfc3dSmrg delete_insn (insn);
33221debfc3dSmrg occr->deleted_p = 1;
33231debfc3dSmrg changed = 1;
33241debfc3dSmrg gcse_subst_count++;
33251debfc3dSmrg
33261debfc3dSmrg if (!insn_inserted_p)
33271debfc3dSmrg {
33281debfc3dSmrg insert_insn_end_basic_block (expr, bb);
33291debfc3dSmrg insn_inserted_p = 1;
33301debfc3dSmrg }
33311debfc3dSmrg }
33321debfc3dSmrg
33331debfc3dSmrg occrs_to_hoist.release ();
33341debfc3dSmrg bitmap_clear (from_bbs);
33351debfc3dSmrg }
33361debfc3dSmrg }
33371debfc3dSmrg domby.release ();
33381debfc3dSmrg }
33391debfc3dSmrg
33401debfc3dSmrg dom_tree_walk.release ();
33411debfc3dSmrg BITMAP_FREE (from_bbs);
33421debfc3dSmrg if (flag_ira_hoist_pressure)
33431debfc3dSmrg BITMAP_FREE (hoisted_bbs);
33441debfc3dSmrg
33451debfc3dSmrg free (bb_size);
33461debfc3dSmrg free (to_bb_head);
33471debfc3dSmrg free (index_map);
33481debfc3dSmrg
33491debfc3dSmrg return changed;
33501debfc3dSmrg }
33511debfc3dSmrg
33521debfc3dSmrg /* Return pressure class and number of needed hard registers (through
33531debfc3dSmrg *NREGS) of register REGNO. */
33541debfc3dSmrg static enum reg_class
get_regno_pressure_class(int regno,int * nregs)33551debfc3dSmrg get_regno_pressure_class (int regno, int *nregs)
33561debfc3dSmrg {
33571debfc3dSmrg if (regno >= FIRST_PSEUDO_REGISTER)
33581debfc3dSmrg {
33591debfc3dSmrg enum reg_class pressure_class;
33601debfc3dSmrg
33611debfc3dSmrg pressure_class = reg_allocno_class (regno);
33621debfc3dSmrg pressure_class = ira_pressure_class_translate[pressure_class];
33631debfc3dSmrg *nregs
33641debfc3dSmrg = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
33651debfc3dSmrg return pressure_class;
33661debfc3dSmrg }
33671debfc3dSmrg else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
33681debfc3dSmrg && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
33691debfc3dSmrg {
33701debfc3dSmrg *nregs = 1;
33711debfc3dSmrg return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
33721debfc3dSmrg }
33731debfc3dSmrg else
33741debfc3dSmrg {
33751debfc3dSmrg *nregs = 0;
33761debfc3dSmrg return NO_REGS;
33771debfc3dSmrg }
33781debfc3dSmrg }
33791debfc3dSmrg
33801debfc3dSmrg /* Return pressure class and number of hard registers (through *NREGS)
33811debfc3dSmrg for destination of INSN. */
33821debfc3dSmrg static enum reg_class
get_pressure_class_and_nregs(rtx_insn * insn,int * nregs)33831debfc3dSmrg get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
33841debfc3dSmrg {
33851debfc3dSmrg rtx reg;
33861debfc3dSmrg enum reg_class pressure_class;
33871debfc3dSmrg const_rtx set = single_set_gcse (insn);
33881debfc3dSmrg
33891debfc3dSmrg reg = SET_DEST (set);
33901debfc3dSmrg if (GET_CODE (reg) == SUBREG)
33911debfc3dSmrg reg = SUBREG_REG (reg);
33921debfc3dSmrg if (MEM_P (reg))
33931debfc3dSmrg {
33941debfc3dSmrg *nregs = 0;
33951debfc3dSmrg pressure_class = NO_REGS;
33961debfc3dSmrg }
33971debfc3dSmrg else
33981debfc3dSmrg {
33991debfc3dSmrg gcc_assert (REG_P (reg));
34001debfc3dSmrg pressure_class = reg_allocno_class (REGNO (reg));
34011debfc3dSmrg pressure_class = ira_pressure_class_translate[pressure_class];
34021debfc3dSmrg *nregs
34031debfc3dSmrg = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
34041debfc3dSmrg }
34051debfc3dSmrg return pressure_class;
34061debfc3dSmrg }
34071debfc3dSmrg
34081debfc3dSmrg /* Increase (if INCR_P) or decrease current register pressure for
34091debfc3dSmrg register REGNO. */
34101debfc3dSmrg static void
change_pressure(int regno,bool incr_p)34111debfc3dSmrg change_pressure (int regno, bool incr_p)
34121debfc3dSmrg {
34131debfc3dSmrg int nregs;
34141debfc3dSmrg enum reg_class pressure_class;
34151debfc3dSmrg
34161debfc3dSmrg pressure_class = get_regno_pressure_class (regno, &nregs);
34171debfc3dSmrg if (! incr_p)
34181debfc3dSmrg curr_reg_pressure[pressure_class] -= nregs;
34191debfc3dSmrg else
34201debfc3dSmrg {
34211debfc3dSmrg curr_reg_pressure[pressure_class] += nregs;
34221debfc3dSmrg if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
34231debfc3dSmrg < curr_reg_pressure[pressure_class])
34241debfc3dSmrg BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
34251debfc3dSmrg = curr_reg_pressure[pressure_class];
34261debfc3dSmrg }
34271debfc3dSmrg }
34281debfc3dSmrg
34291debfc3dSmrg /* Calculate register pressure for each basic block by walking insns
34301debfc3dSmrg from last to first. */
34311debfc3dSmrg static void
calculate_bb_reg_pressure(void)34321debfc3dSmrg calculate_bb_reg_pressure (void)
34331debfc3dSmrg {
34341debfc3dSmrg int i;
34351debfc3dSmrg unsigned int j;
34361debfc3dSmrg rtx_insn *insn;
34371debfc3dSmrg basic_block bb;
34381debfc3dSmrg bitmap curr_regs_live;
34391debfc3dSmrg bitmap_iterator bi;
34401debfc3dSmrg
34411debfc3dSmrg
34421debfc3dSmrg ira_setup_eliminable_regset ();
34431debfc3dSmrg curr_regs_live = BITMAP_ALLOC (®_obstack);
34441debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
34451debfc3dSmrg {
34461debfc3dSmrg curr_bb = bb;
34471debfc3dSmrg BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
34481debfc3dSmrg BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
34491debfc3dSmrg bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
34501debfc3dSmrg bitmap_copy (curr_regs_live, df_get_live_out (bb));
34511debfc3dSmrg for (i = 0; i < ira_pressure_classes_num; i++)
34521debfc3dSmrg curr_reg_pressure[ira_pressure_classes[i]] = 0;
34531debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
34541debfc3dSmrg change_pressure (j, true);
34551debfc3dSmrg
34561debfc3dSmrg FOR_BB_INSNS_REVERSE (bb, insn)
34571debfc3dSmrg {
34581debfc3dSmrg rtx dreg;
34591debfc3dSmrg int regno;
34601debfc3dSmrg df_ref def, use;
34611debfc3dSmrg
34621debfc3dSmrg if (! NONDEBUG_INSN_P (insn))
34631debfc3dSmrg continue;
34641debfc3dSmrg
34651debfc3dSmrg FOR_EACH_INSN_DEF (def, insn)
34661debfc3dSmrg {
34671debfc3dSmrg dreg = DF_REF_REAL_REG (def);
34681debfc3dSmrg gcc_assert (REG_P (dreg));
34691debfc3dSmrg regno = REGNO (dreg);
34701debfc3dSmrg if (!(DF_REF_FLAGS (def)
34711debfc3dSmrg & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
34721debfc3dSmrg {
34731debfc3dSmrg if (bitmap_clear_bit (curr_regs_live, regno))
34741debfc3dSmrg change_pressure (regno, false);
34751debfc3dSmrg }
34761debfc3dSmrg }
34771debfc3dSmrg
34781debfc3dSmrg FOR_EACH_INSN_USE (use, insn)
34791debfc3dSmrg {
34801debfc3dSmrg dreg = DF_REF_REAL_REG (use);
34811debfc3dSmrg gcc_assert (REG_P (dreg));
34821debfc3dSmrg regno = REGNO (dreg);
34831debfc3dSmrg if (bitmap_set_bit (curr_regs_live, regno))
34841debfc3dSmrg change_pressure (regno, true);
34851debfc3dSmrg }
34861debfc3dSmrg }
34871debfc3dSmrg }
34881debfc3dSmrg BITMAP_FREE (curr_regs_live);
34891debfc3dSmrg
34901debfc3dSmrg if (dump_file == NULL)
34911debfc3dSmrg return;
34921debfc3dSmrg
34931debfc3dSmrg fprintf (dump_file, "\nRegister Pressure: \n");
34941debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
34951debfc3dSmrg {
34961debfc3dSmrg fprintf (dump_file, " Basic block %d: \n", bb->index);
34971debfc3dSmrg for (i = 0; (int) i < ira_pressure_classes_num; i++)
34981debfc3dSmrg {
34991debfc3dSmrg enum reg_class pressure_class;
35001debfc3dSmrg
35011debfc3dSmrg pressure_class = ira_pressure_classes[i];
35021debfc3dSmrg if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
35031debfc3dSmrg continue;
35041debfc3dSmrg
35051debfc3dSmrg fprintf (dump_file, " %s=%d\n", reg_class_names[pressure_class],
35061debfc3dSmrg BB_DATA (bb)->max_reg_pressure[pressure_class]);
35071debfc3dSmrg }
35081debfc3dSmrg }
35091debfc3dSmrg fprintf (dump_file, "\n");
35101debfc3dSmrg }
35111debfc3dSmrg
35121debfc3dSmrg /* Top level routine to perform one code hoisting (aka unification) pass
35131debfc3dSmrg
35141debfc3dSmrg Return nonzero if a change was made. */
35151debfc3dSmrg
35161debfc3dSmrg static int
one_code_hoisting_pass(void)35171debfc3dSmrg one_code_hoisting_pass (void)
35181debfc3dSmrg {
35191debfc3dSmrg int changed = 0;
35201debfc3dSmrg
35211debfc3dSmrg gcse_subst_count = 0;
35221debfc3dSmrg gcse_create_count = 0;
35231debfc3dSmrg
35241debfc3dSmrg /* Return if there's nothing to do, or it is too expensive. */
35251debfc3dSmrg if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
35261debfc3dSmrg || gcse_or_cprop_is_too_expensive (_("GCSE disabled")))
35271debfc3dSmrg return 0;
35281debfc3dSmrg
35291debfc3dSmrg doing_code_hoisting_p = true;
35301debfc3dSmrg
35311debfc3dSmrg /* Calculate register pressure for each basic block. */
35321debfc3dSmrg if (flag_ira_hoist_pressure)
35331debfc3dSmrg {
35341debfc3dSmrg regstat_init_n_sets_and_refs ();
35351debfc3dSmrg ira_set_pseudo_classes (false, dump_file);
35361debfc3dSmrg alloc_aux_for_blocks (sizeof (struct bb_data));
35371debfc3dSmrg calculate_bb_reg_pressure ();
35381debfc3dSmrg regstat_free_n_sets_and_refs ();
35391debfc3dSmrg }
35401debfc3dSmrg
35411debfc3dSmrg /* We need alias. */
35421debfc3dSmrg init_alias_analysis ();
35431debfc3dSmrg
35441debfc3dSmrg bytes_used = 0;
35451debfc3dSmrg gcc_obstack_init (&gcse_obstack);
35461debfc3dSmrg alloc_gcse_mem ();
35471debfc3dSmrg
35481debfc3dSmrg alloc_hash_table (&expr_hash_table);
35491debfc3dSmrg compute_hash_table (&expr_hash_table);
35501debfc3dSmrg if (dump_file)
35511debfc3dSmrg dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
35521debfc3dSmrg
35531debfc3dSmrg if (expr_hash_table.n_elems > 0)
35541debfc3dSmrg {
35551debfc3dSmrg alloc_code_hoist_mem (last_basic_block_for_fn (cfun),
35561debfc3dSmrg expr_hash_table.n_elems);
35571debfc3dSmrg compute_code_hoist_data ();
35581debfc3dSmrg changed = hoist_code ();
35591debfc3dSmrg free_code_hoist_mem ();
35601debfc3dSmrg }
35611debfc3dSmrg
35621debfc3dSmrg if (flag_ira_hoist_pressure)
35631debfc3dSmrg {
35641debfc3dSmrg free_aux_for_blocks ();
35651debfc3dSmrg free_reg_info ();
35661debfc3dSmrg }
35671debfc3dSmrg free_hash_table (&expr_hash_table);
35681debfc3dSmrg free_gcse_mem ();
35691debfc3dSmrg obstack_free (&gcse_obstack, NULL);
35701debfc3dSmrg
35711debfc3dSmrg /* We are finished with alias. */
35721debfc3dSmrg end_alias_analysis ();
35731debfc3dSmrg
35741debfc3dSmrg if (dump_file)
35751debfc3dSmrg {
35761debfc3dSmrg fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
35771debfc3dSmrg current_function_name (), n_basic_blocks_for_fn (cfun),
35781debfc3dSmrg bytes_used);
35791debfc3dSmrg fprintf (dump_file, "%d substs, %d insns created\n",
35801debfc3dSmrg gcse_subst_count, gcse_create_count);
35811debfc3dSmrg }
35821debfc3dSmrg
35831debfc3dSmrg doing_code_hoisting_p = false;
35841debfc3dSmrg
35851debfc3dSmrg return changed;
35861debfc3dSmrg }
35871debfc3dSmrg
35881debfc3dSmrg /* Here we provide the things required to do store motion towards the exit.
35891debfc3dSmrg In order for this to be effective, gcse also needed to be taught how to
35901debfc3dSmrg move a load when it is killed only by a store to itself.
35911debfc3dSmrg
35921debfc3dSmrg int i;
35931debfc3dSmrg float a[10];
35941debfc3dSmrg
35951debfc3dSmrg void foo(float scale)
35961debfc3dSmrg {
35971debfc3dSmrg for (i=0; i<10; i++)
35981debfc3dSmrg a[i] *= scale;
35991debfc3dSmrg }
36001debfc3dSmrg
36011debfc3dSmrg 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
36021debfc3dSmrg the load out since its live around the loop, and stored at the bottom
36031debfc3dSmrg of the loop.
36041debfc3dSmrg
36051debfc3dSmrg The 'Load Motion' referred to and implemented in this file is
36061debfc3dSmrg an enhancement to gcse which when using edge based LCM, recognizes
36071debfc3dSmrg this situation and allows gcse to move the load out of the loop.
36081debfc3dSmrg
36091debfc3dSmrg Once gcse has hoisted the load, store motion can then push this
36101debfc3dSmrg load towards the exit, and we end up with no loads or stores of 'i'
36111debfc3dSmrg in the loop. */
36121debfc3dSmrg
36131debfc3dSmrg /* This will search the ldst list for a matching expression. If it
36141debfc3dSmrg doesn't find one, we create one and initialize it. */
36151debfc3dSmrg
36161debfc3dSmrg static struct ls_expr *
ldst_entry(rtx x)36171debfc3dSmrg ldst_entry (rtx x)
36181debfc3dSmrg {
36191debfc3dSmrg int do_not_record_p = 0;
36201debfc3dSmrg struct ls_expr * ptr;
36211debfc3dSmrg unsigned int hash;
36221debfc3dSmrg ls_expr **slot;
36231debfc3dSmrg struct ls_expr e;
36241debfc3dSmrg
36251debfc3dSmrg hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
36261debfc3dSmrg NULL, /*have_reg_qty=*/false);
36271debfc3dSmrg
36281debfc3dSmrg e.pattern = x;
36291debfc3dSmrg slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT);
36301debfc3dSmrg if (*slot)
36311debfc3dSmrg return *slot;
36321debfc3dSmrg
36331debfc3dSmrg ptr = XNEW (struct ls_expr);
36341debfc3dSmrg
36351debfc3dSmrg ptr->next = pre_ldst_mems;
36361debfc3dSmrg ptr->expr = NULL;
36371debfc3dSmrg ptr->pattern = x;
36381debfc3dSmrg ptr->pattern_regs = NULL_RTX;
36391debfc3dSmrg ptr->stores.create (0);
36401debfc3dSmrg ptr->reaching_reg = NULL_RTX;
36411debfc3dSmrg ptr->invalid = 0;
36421debfc3dSmrg ptr->index = 0;
36431debfc3dSmrg ptr->hash_index = hash;
36441debfc3dSmrg pre_ldst_mems = ptr;
36451debfc3dSmrg *slot = ptr;
36461debfc3dSmrg
36471debfc3dSmrg return ptr;
36481debfc3dSmrg }
36491debfc3dSmrg
36501debfc3dSmrg /* Free up an individual ldst entry. */
36511debfc3dSmrg
36521debfc3dSmrg static void
free_ldst_entry(struct ls_expr * ptr)36531debfc3dSmrg free_ldst_entry (struct ls_expr * ptr)
36541debfc3dSmrg {
36551debfc3dSmrg ptr->stores.release ();
36561debfc3dSmrg
36571debfc3dSmrg free (ptr);
36581debfc3dSmrg }
36591debfc3dSmrg
36601debfc3dSmrg /* Free up all memory associated with the ldst list. */
36611debfc3dSmrg
36621debfc3dSmrg static void
free_ld_motion_mems(void)36631debfc3dSmrg free_ld_motion_mems (void)
36641debfc3dSmrg {
36651debfc3dSmrg delete pre_ldst_table;
36661debfc3dSmrg pre_ldst_table = NULL;
36671debfc3dSmrg
36681debfc3dSmrg while (pre_ldst_mems)
36691debfc3dSmrg {
36701debfc3dSmrg struct ls_expr * tmp = pre_ldst_mems;
36711debfc3dSmrg
36721debfc3dSmrg pre_ldst_mems = pre_ldst_mems->next;
36731debfc3dSmrg
36741debfc3dSmrg free_ldst_entry (tmp);
36751debfc3dSmrg }
36761debfc3dSmrg
36771debfc3dSmrg pre_ldst_mems = NULL;
36781debfc3dSmrg }
36791debfc3dSmrg
36801debfc3dSmrg /* Dump debugging info about the ldst list. */
36811debfc3dSmrg
36821debfc3dSmrg static void
print_ldst_list(FILE * file)36831debfc3dSmrg print_ldst_list (FILE * file)
36841debfc3dSmrg {
36851debfc3dSmrg struct ls_expr * ptr;
36861debfc3dSmrg
36871debfc3dSmrg fprintf (file, "LDST list: \n");
36881debfc3dSmrg
36891debfc3dSmrg for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
36901debfc3dSmrg {
36911debfc3dSmrg fprintf (file, " Pattern (%3d): ", ptr->index);
36921debfc3dSmrg
36931debfc3dSmrg print_rtl (file, ptr->pattern);
36941debfc3dSmrg
36951debfc3dSmrg fprintf (file, "\n Stores : ");
36961debfc3dSmrg print_rtx_insn_vec (file, ptr->stores);
36971debfc3dSmrg
36981debfc3dSmrg fprintf (file, "\n\n");
36991debfc3dSmrg }
37001debfc3dSmrg
37011debfc3dSmrg fprintf (file, "\n");
37021debfc3dSmrg }
37031debfc3dSmrg
37041debfc3dSmrg /* Returns 1 if X is in the list of ldst only expressions. */
37051debfc3dSmrg
37061debfc3dSmrg static struct ls_expr *
find_rtx_in_ldst(rtx x)37071debfc3dSmrg find_rtx_in_ldst (rtx x)
37081debfc3dSmrg {
37091debfc3dSmrg struct ls_expr e;
37101debfc3dSmrg ls_expr **slot;
37111debfc3dSmrg if (!pre_ldst_table)
37121debfc3dSmrg return NULL;
37131debfc3dSmrg e.pattern = x;
37141debfc3dSmrg slot = pre_ldst_table->find_slot (&e, NO_INSERT);
37151debfc3dSmrg if (!slot || (*slot)->invalid)
37161debfc3dSmrg return NULL;
37171debfc3dSmrg return *slot;
37181debfc3dSmrg }
37191debfc3dSmrg
37201debfc3dSmrg /* Load Motion for loads which only kill themselves. */
37211debfc3dSmrg
37221debfc3dSmrg /* Return true if x, a MEM, is a simple access with no side effects.
37231debfc3dSmrg These are the types of loads we consider for the ld_motion list,
37241debfc3dSmrg otherwise we let the usual aliasing take care of it. */
37251debfc3dSmrg
37261debfc3dSmrg static int
simple_mem(const_rtx x)37271debfc3dSmrg simple_mem (const_rtx x)
37281debfc3dSmrg {
37291debfc3dSmrg if (MEM_VOLATILE_P (x))
37301debfc3dSmrg return 0;
37311debfc3dSmrg
37321debfc3dSmrg if (GET_MODE (x) == BLKmode)
37331debfc3dSmrg return 0;
37341debfc3dSmrg
37351debfc3dSmrg /* If we are handling exceptions, we must be careful with memory references
37361debfc3dSmrg that may trap. If we are not, the behavior is undefined, so we may just
37371debfc3dSmrg continue. */
37381debfc3dSmrg if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
37391debfc3dSmrg return 0;
37401debfc3dSmrg
37411debfc3dSmrg if (side_effects_p (x))
37421debfc3dSmrg return 0;
37431debfc3dSmrg
37441debfc3dSmrg /* Do not consider function arguments passed on stack. */
37451debfc3dSmrg if (reg_mentioned_p (stack_pointer_rtx, x))
37461debfc3dSmrg return 0;
37471debfc3dSmrg
37481debfc3dSmrg if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
37491debfc3dSmrg return 0;
37501debfc3dSmrg
37511debfc3dSmrg return 1;
37521debfc3dSmrg }
37531debfc3dSmrg
37541debfc3dSmrg /* Make sure there isn't a buried reference in this pattern anywhere.
37551debfc3dSmrg If there is, invalidate the entry for it since we're not capable
37561debfc3dSmrg of fixing it up just yet.. We have to be sure we know about ALL
37571debfc3dSmrg loads since the aliasing code will allow all entries in the
37581debfc3dSmrg ld_motion list to not-alias itself. If we miss a load, we will get
37591debfc3dSmrg the wrong value since gcse might common it and we won't know to
37601debfc3dSmrg fix it up. */
37611debfc3dSmrg
37621debfc3dSmrg static void
invalidate_any_buried_refs(rtx x)37631debfc3dSmrg invalidate_any_buried_refs (rtx x)
37641debfc3dSmrg {
37651debfc3dSmrg const char * fmt;
37661debfc3dSmrg int i, j;
37671debfc3dSmrg struct ls_expr * ptr;
37681debfc3dSmrg
37691debfc3dSmrg /* Invalidate it in the list. */
37701debfc3dSmrg if (MEM_P (x) && simple_mem (x))
37711debfc3dSmrg {
37721debfc3dSmrg ptr = ldst_entry (x);
37731debfc3dSmrg ptr->invalid = 1;
37741debfc3dSmrg }
37751debfc3dSmrg
37761debfc3dSmrg /* Recursively process the insn. */
37771debfc3dSmrg fmt = GET_RTX_FORMAT (GET_CODE (x));
37781debfc3dSmrg
37791debfc3dSmrg for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
37801debfc3dSmrg {
37811debfc3dSmrg if (fmt[i] == 'e')
37821debfc3dSmrg invalidate_any_buried_refs (XEXP (x, i));
37831debfc3dSmrg else if (fmt[i] == 'E')
37841debfc3dSmrg for (j = XVECLEN (x, i) - 1; j >= 0; j--)
37851debfc3dSmrg invalidate_any_buried_refs (XVECEXP (x, i, j));
37861debfc3dSmrg }
37871debfc3dSmrg }
37881debfc3dSmrg
37891debfc3dSmrg /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
37901debfc3dSmrg being defined as MEM loads and stores to symbols, with no side effects
37911debfc3dSmrg and no registers in the expression. For a MEM destination, we also
37921debfc3dSmrg check that the insn is still valid if we replace the destination with a
37931debfc3dSmrg REG, as is done in update_ld_motion_stores. If there are any uses/defs
37941debfc3dSmrg which don't match this criteria, they are invalidated and trimmed out
37951debfc3dSmrg later. */
37961debfc3dSmrg
37971debfc3dSmrg static void
compute_ld_motion_mems(void)37981debfc3dSmrg compute_ld_motion_mems (void)
37991debfc3dSmrg {
38001debfc3dSmrg struct ls_expr * ptr;
38011debfc3dSmrg basic_block bb;
38021debfc3dSmrg rtx_insn *insn;
38031debfc3dSmrg
38041debfc3dSmrg pre_ldst_mems = NULL;
38051debfc3dSmrg pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13);
38061debfc3dSmrg
38071debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
38081debfc3dSmrg {
38091debfc3dSmrg FOR_BB_INSNS (bb, insn)
38101debfc3dSmrg {
38111debfc3dSmrg if (NONDEBUG_INSN_P (insn))
38121debfc3dSmrg {
38131debfc3dSmrg if (GET_CODE (PATTERN (insn)) == SET)
38141debfc3dSmrg {
38151debfc3dSmrg rtx src = SET_SRC (PATTERN (insn));
38161debfc3dSmrg rtx dest = SET_DEST (PATTERN (insn));
38171debfc3dSmrg
38181debfc3dSmrg /* Check for a simple load. */
38191debfc3dSmrg if (MEM_P (src) && simple_mem (src))
38201debfc3dSmrg {
38211debfc3dSmrg ptr = ldst_entry (src);
38221debfc3dSmrg if (!REG_P (dest))
38231debfc3dSmrg ptr->invalid = 1;
38241debfc3dSmrg }
38251debfc3dSmrg else
38261debfc3dSmrg {
38271debfc3dSmrg /* Make sure there isn't a buried load somewhere. */
38281debfc3dSmrg invalidate_any_buried_refs (src);
38291debfc3dSmrg }
38301debfc3dSmrg
38311debfc3dSmrg /* Check for a simple load through a REG_EQUAL note. */
38321debfc3dSmrg rtx note = find_reg_equal_equiv_note (insn), src_eq;
38331debfc3dSmrg if (note
38341debfc3dSmrg && REG_NOTE_KIND (note) == REG_EQUAL
38351debfc3dSmrg && (src_eq = XEXP (note, 0))
38361debfc3dSmrg && !(MEM_P (src_eq) && simple_mem (src_eq)))
38371debfc3dSmrg invalidate_any_buried_refs (src_eq);
38381debfc3dSmrg
38391debfc3dSmrg /* Check for stores. Don't worry about aliased ones, they
38401debfc3dSmrg will block any movement we might do later. We only care
38411debfc3dSmrg about this exact pattern since those are the only
38421debfc3dSmrg circumstance that we will ignore the aliasing info. */
38431debfc3dSmrg if (MEM_P (dest) && simple_mem (dest))
38441debfc3dSmrg {
38451debfc3dSmrg ptr = ldst_entry (dest);
38461debfc3dSmrg machine_mode src_mode = GET_MODE (src);
38471debfc3dSmrg if (! MEM_P (src)
38481debfc3dSmrg && GET_CODE (src) != ASM_OPERANDS
38491debfc3dSmrg /* Check for REG manually since want_to_gcse_p
38501debfc3dSmrg returns 0 for all REGs. */
38511debfc3dSmrg && can_assign_to_reg_without_clobbers_p (src,
38521debfc3dSmrg src_mode))
38531debfc3dSmrg ptr->stores.safe_push (insn);
38541debfc3dSmrg else
38551debfc3dSmrg ptr->invalid = 1;
38561debfc3dSmrg }
38571debfc3dSmrg }
38581debfc3dSmrg else
38591debfc3dSmrg {
38601debfc3dSmrg /* Invalidate all MEMs in the pattern and... */
38611debfc3dSmrg invalidate_any_buried_refs (PATTERN (insn));
38621debfc3dSmrg
38631debfc3dSmrg /* ...in REG_EQUAL notes for PARALLELs with single SET. */
38641debfc3dSmrg rtx note = find_reg_equal_equiv_note (insn), src_eq;
38651debfc3dSmrg if (note
38661debfc3dSmrg && REG_NOTE_KIND (note) == REG_EQUAL
38671debfc3dSmrg && (src_eq = XEXP (note, 0)))
38681debfc3dSmrg invalidate_any_buried_refs (src_eq);
38691debfc3dSmrg }
38701debfc3dSmrg }
38711debfc3dSmrg }
38721debfc3dSmrg }
38731debfc3dSmrg }
38741debfc3dSmrg
38751debfc3dSmrg /* Remove any references that have been either invalidated or are not in the
38761debfc3dSmrg expression list for pre gcse. */
38771debfc3dSmrg
38781debfc3dSmrg static void
trim_ld_motion_mems(void)38791debfc3dSmrg trim_ld_motion_mems (void)
38801debfc3dSmrg {
38811debfc3dSmrg struct ls_expr * * last = & pre_ldst_mems;
38821debfc3dSmrg struct ls_expr * ptr = pre_ldst_mems;
38831debfc3dSmrg
38841debfc3dSmrg while (ptr != NULL)
38851debfc3dSmrg {
38861debfc3dSmrg struct gcse_expr * expr;
38871debfc3dSmrg
38881debfc3dSmrg /* Delete if entry has been made invalid. */
38891debfc3dSmrg if (! ptr->invalid)
38901debfc3dSmrg {
38911debfc3dSmrg /* Delete if we cannot find this mem in the expression list. */
38921debfc3dSmrg unsigned int hash = ptr->hash_index % expr_hash_table.size;
38931debfc3dSmrg
38941debfc3dSmrg for (expr = expr_hash_table.table[hash];
38951debfc3dSmrg expr != NULL;
38961debfc3dSmrg expr = expr->next_same_hash)
38971debfc3dSmrg if (expr_equiv_p (expr->expr, ptr->pattern))
38981debfc3dSmrg break;
38991debfc3dSmrg }
39001debfc3dSmrg else
39011debfc3dSmrg expr = (struct gcse_expr *) 0;
39021debfc3dSmrg
39031debfc3dSmrg if (expr)
39041debfc3dSmrg {
39051debfc3dSmrg /* Set the expression field if we are keeping it. */
39061debfc3dSmrg ptr->expr = expr;
39071debfc3dSmrg last = & ptr->next;
39081debfc3dSmrg ptr = ptr->next;
39091debfc3dSmrg }
39101debfc3dSmrg else
39111debfc3dSmrg {
39121debfc3dSmrg *last = ptr->next;
39131debfc3dSmrg pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index);
39141debfc3dSmrg free_ldst_entry (ptr);
39151debfc3dSmrg ptr = * last;
39161debfc3dSmrg }
39171debfc3dSmrg }
39181debfc3dSmrg
39191debfc3dSmrg /* Show the world what we've found. */
39201debfc3dSmrg if (dump_file && pre_ldst_mems != NULL)
39211debfc3dSmrg print_ldst_list (dump_file);
39221debfc3dSmrg }
39231debfc3dSmrg
39241debfc3dSmrg /* This routine will take an expression which we are replacing with
39251debfc3dSmrg a reaching register, and update any stores that are needed if
39261debfc3dSmrg that expression is in the ld_motion list. Stores are updated by
39271debfc3dSmrg copying their SRC to the reaching register, and then storing
39281debfc3dSmrg the reaching register into the store location. These keeps the
39291debfc3dSmrg correct value in the reaching register for the loads. */
39301debfc3dSmrg
39311debfc3dSmrg static void
update_ld_motion_stores(struct gcse_expr * expr)39321debfc3dSmrg update_ld_motion_stores (struct gcse_expr * expr)
39331debfc3dSmrg {
39341debfc3dSmrg struct ls_expr * mem_ptr;
39351debfc3dSmrg
39361debfc3dSmrg if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
39371debfc3dSmrg {
39381debfc3dSmrg /* We can try to find just the REACHED stores, but is shouldn't
39391debfc3dSmrg matter to set the reaching reg everywhere... some might be
39401debfc3dSmrg dead and should be eliminated later. */
39411debfc3dSmrg
39421debfc3dSmrg /* We replace (set mem expr) with (set reg expr) (set mem reg)
39431debfc3dSmrg where reg is the reaching reg used in the load. We checked in
39441debfc3dSmrg compute_ld_motion_mems that we can replace (set mem expr) with
39451debfc3dSmrg (set reg expr) in that insn. */
39461debfc3dSmrg rtx_insn *insn;
39471debfc3dSmrg unsigned int i;
39481debfc3dSmrg FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn)
39491debfc3dSmrg {
39501debfc3dSmrg rtx pat = PATTERN (insn);
39511debfc3dSmrg rtx src = SET_SRC (pat);
39521debfc3dSmrg rtx reg = expr->reaching_reg;
39531debfc3dSmrg
39541debfc3dSmrg /* If we've already copied it, continue. */
39551debfc3dSmrg if (expr->reaching_reg == src)
39561debfc3dSmrg continue;
39571debfc3dSmrg
39581debfc3dSmrg if (dump_file)
39591debfc3dSmrg {
39601debfc3dSmrg fprintf (dump_file, "PRE: store updated with reaching reg ");
39611debfc3dSmrg print_rtl (dump_file, reg);
39621debfc3dSmrg fprintf (dump_file, ":\n ");
39631debfc3dSmrg print_inline_rtx (dump_file, insn, 8);
39641debfc3dSmrg fprintf (dump_file, "\n");
39651debfc3dSmrg }
39661debfc3dSmrg
39671debfc3dSmrg rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
39681debfc3dSmrg emit_insn_before (copy, insn);
39691debfc3dSmrg SET_SRC (pat) = reg;
39701debfc3dSmrg df_insn_rescan (insn);
39711debfc3dSmrg
39721debfc3dSmrg /* un-recognize this pattern since it's probably different now. */
39731debfc3dSmrg INSN_CODE (insn) = -1;
39741debfc3dSmrg gcse_create_count++;
39751debfc3dSmrg }
39761debfc3dSmrg }
39771debfc3dSmrg }
39781debfc3dSmrg
39791debfc3dSmrg /* Return true if the graph is too expensive to optimize. PASS is the
39801debfc3dSmrg optimization about to be performed. */
39811debfc3dSmrg
39821debfc3dSmrg bool
gcse_or_cprop_is_too_expensive(const char * pass)39831debfc3dSmrg gcse_or_cprop_is_too_expensive (const char *pass)
39841debfc3dSmrg {
3985*8feb0f0bSmrg unsigned HOST_WIDE_INT memory_request
3986*8feb0f0bSmrg = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun)
3987*8feb0f0bSmrg * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE));
39881debfc3dSmrg
39891debfc3dSmrg /* Trying to perform global optimizations on flow graphs which have
39901debfc3dSmrg a high connectivity will take a long time and is unlikely to be
39911debfc3dSmrg particularly useful.
39921debfc3dSmrg
39931debfc3dSmrg In normal circumstances a cfg should have about twice as many
39941debfc3dSmrg edges as blocks. But we do not want to punish small functions
39951debfc3dSmrg which have a couple switch statements. Rather than simply
39961debfc3dSmrg threshold the number of blocks, uses something with a more
39971debfc3dSmrg graceful degradation. */
39981debfc3dSmrg if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4)
39991debfc3dSmrg {
40001debfc3dSmrg warning (OPT_Wdisabled_optimization,
40011debfc3dSmrg "%s: %d basic blocks and %d edges/basic block",
40021debfc3dSmrg pass, n_basic_blocks_for_fn (cfun),
40031debfc3dSmrg n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
40041debfc3dSmrg
40051debfc3dSmrg return true;
40061debfc3dSmrg }
40071debfc3dSmrg
40081debfc3dSmrg /* If allocating memory for the dataflow bitmaps would take up too much
40091debfc3dSmrg storage it's better just to disable the optimization. */
4010*8feb0f0bSmrg if (memory_request > (unsigned HOST_WIDE_INT)param_max_gcse_memory)
40111debfc3dSmrg {
40121debfc3dSmrg warning (OPT_Wdisabled_optimization,
4013*8feb0f0bSmrg "%s: %d basic blocks and %d registers; "
4014*8feb0f0bSmrg "increase %<--param max-gcse-memory%> above "
4015*8feb0f0bSmrg HOST_WIDE_INT_PRINT_UNSIGNED,
40161debfc3dSmrg pass, n_basic_blocks_for_fn (cfun), max_reg_num (),
40171debfc3dSmrg memory_request);
40181debfc3dSmrg
40191debfc3dSmrg return true;
40201debfc3dSmrg }
40211debfc3dSmrg
40221debfc3dSmrg return false;
40231debfc3dSmrg }
40241debfc3dSmrg
40251debfc3dSmrg static unsigned int
execute_rtl_pre(void)40261debfc3dSmrg execute_rtl_pre (void)
40271debfc3dSmrg {
40281debfc3dSmrg int changed;
40291debfc3dSmrg delete_unreachable_blocks ();
40301debfc3dSmrg df_analyze ();
40311debfc3dSmrg changed = one_pre_gcse_pass ();
40321debfc3dSmrg flag_rerun_cse_after_global_opts |= changed;
40331debfc3dSmrg if (changed)
40341debfc3dSmrg cleanup_cfg (0);
40351debfc3dSmrg return 0;
40361debfc3dSmrg }
40371debfc3dSmrg
40381debfc3dSmrg static unsigned int
execute_rtl_hoist(void)40391debfc3dSmrg execute_rtl_hoist (void)
40401debfc3dSmrg {
40411debfc3dSmrg int changed;
40421debfc3dSmrg delete_unreachable_blocks ();
40431debfc3dSmrg df_analyze ();
40441debfc3dSmrg changed = one_code_hoisting_pass ();
40451debfc3dSmrg flag_rerun_cse_after_global_opts |= changed;
40461debfc3dSmrg if (changed)
40471debfc3dSmrg cleanup_cfg (0);
40481debfc3dSmrg return 0;
40491debfc3dSmrg }
40501debfc3dSmrg
40511debfc3dSmrg namespace {
40521debfc3dSmrg
40531debfc3dSmrg const pass_data pass_data_rtl_pre =
40541debfc3dSmrg {
40551debfc3dSmrg RTL_PASS, /* type */
40561debfc3dSmrg "rtl pre", /* name */
40571debfc3dSmrg OPTGROUP_NONE, /* optinfo_flags */
40581debfc3dSmrg TV_PRE, /* tv_id */
40591debfc3dSmrg PROP_cfglayout, /* properties_required */
40601debfc3dSmrg 0, /* properties_provided */
40611debfc3dSmrg 0, /* properties_destroyed */
40621debfc3dSmrg 0, /* todo_flags_start */
40631debfc3dSmrg TODO_df_finish, /* todo_flags_finish */
40641debfc3dSmrg };
40651debfc3dSmrg
40661debfc3dSmrg class pass_rtl_pre : public rtl_opt_pass
40671debfc3dSmrg {
40681debfc3dSmrg public:
pass_rtl_pre(gcc::context * ctxt)40691debfc3dSmrg pass_rtl_pre (gcc::context *ctxt)
40701debfc3dSmrg : rtl_opt_pass (pass_data_rtl_pre, ctxt)
40711debfc3dSmrg {}
40721debfc3dSmrg
40731debfc3dSmrg /* opt_pass methods: */
40741debfc3dSmrg virtual bool gate (function *);
execute(function *)40751debfc3dSmrg virtual unsigned int execute (function *) { return execute_rtl_pre (); }
40761debfc3dSmrg
40771debfc3dSmrg }; // class pass_rtl_pre
40781debfc3dSmrg
40791debfc3dSmrg /* We do not construct an accurate cfg in functions which call
40801debfc3dSmrg setjmp, so none of these passes runs if the function calls
40811debfc3dSmrg setjmp.
40821debfc3dSmrg FIXME: Should just handle setjmp via REG_SETJMP notes. */
40831debfc3dSmrg
40841debfc3dSmrg bool
gate(function * fun)40851debfc3dSmrg pass_rtl_pre::gate (function *fun)
40861debfc3dSmrg {
40871debfc3dSmrg return optimize > 0 && flag_gcse
40881debfc3dSmrg && !fun->calls_setjmp
40891debfc3dSmrg && optimize_function_for_speed_p (fun)
40901debfc3dSmrg && dbg_cnt (pre);
40911debfc3dSmrg }
40921debfc3dSmrg
40931debfc3dSmrg } // anon namespace
40941debfc3dSmrg
40951debfc3dSmrg rtl_opt_pass *
make_pass_rtl_pre(gcc::context * ctxt)40961debfc3dSmrg make_pass_rtl_pre (gcc::context *ctxt)
40971debfc3dSmrg {
40981debfc3dSmrg return new pass_rtl_pre (ctxt);
40991debfc3dSmrg }
41001debfc3dSmrg
41011debfc3dSmrg namespace {
41021debfc3dSmrg
41031debfc3dSmrg const pass_data pass_data_rtl_hoist =
41041debfc3dSmrg {
41051debfc3dSmrg RTL_PASS, /* type */
41061debfc3dSmrg "hoist", /* name */
41071debfc3dSmrg OPTGROUP_NONE, /* optinfo_flags */
41081debfc3dSmrg TV_HOIST, /* tv_id */
41091debfc3dSmrg PROP_cfglayout, /* properties_required */
41101debfc3dSmrg 0, /* properties_provided */
41111debfc3dSmrg 0, /* properties_destroyed */
41121debfc3dSmrg 0, /* todo_flags_start */
41131debfc3dSmrg TODO_df_finish, /* todo_flags_finish */
41141debfc3dSmrg };
41151debfc3dSmrg
41161debfc3dSmrg class pass_rtl_hoist : public rtl_opt_pass
41171debfc3dSmrg {
41181debfc3dSmrg public:
pass_rtl_hoist(gcc::context * ctxt)41191debfc3dSmrg pass_rtl_hoist (gcc::context *ctxt)
41201debfc3dSmrg : rtl_opt_pass (pass_data_rtl_hoist, ctxt)
41211debfc3dSmrg {}
41221debfc3dSmrg
41231debfc3dSmrg /* opt_pass methods: */
41241debfc3dSmrg virtual bool gate (function *);
execute(function *)41251debfc3dSmrg virtual unsigned int execute (function *) { return execute_rtl_hoist (); }
41261debfc3dSmrg
41271debfc3dSmrg }; // class pass_rtl_hoist
41281debfc3dSmrg
41291debfc3dSmrg bool
gate(function *)41301debfc3dSmrg pass_rtl_hoist::gate (function *)
41311debfc3dSmrg {
41321debfc3dSmrg return optimize > 0 && flag_gcse
41331debfc3dSmrg && !cfun->calls_setjmp
41341debfc3dSmrg /* It does not make sense to run code hoisting unless we are optimizing
41351debfc3dSmrg for code size -- it rarely makes programs faster, and can make then
41361debfc3dSmrg bigger if we did PRE (when optimizing for space, we don't run PRE). */
41371debfc3dSmrg && optimize_function_for_size_p (cfun)
41381debfc3dSmrg && dbg_cnt (hoist);
41391debfc3dSmrg }
41401debfc3dSmrg
41411debfc3dSmrg } // anon namespace
41421debfc3dSmrg
41431debfc3dSmrg rtl_opt_pass *
make_pass_rtl_hoist(gcc::context * ctxt)41441debfc3dSmrg make_pass_rtl_hoist (gcc::context *ctxt)
41451debfc3dSmrg {
41461debfc3dSmrg return new pass_rtl_hoist (ctxt);
41471debfc3dSmrg }
41481debfc3dSmrg
41491debfc3dSmrg /* Reset all state within gcse.c so that we can rerun the compiler
41501debfc3dSmrg within the same process. For use by toplev::finalize. */
41511debfc3dSmrg
41521debfc3dSmrg void
gcse_c_finalize(void)41531debfc3dSmrg gcse_c_finalize (void)
41541debfc3dSmrg {
41551debfc3dSmrg test_insn = NULL;
41561debfc3dSmrg }
41571debfc3dSmrg
41581debfc3dSmrg #include "gt-gcse.h"
4159