xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/gimple-loop-versioning.cc (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1627f7eb2Smrg /* Loop versioning pass.
2*4c3eb207Smrg    Copyright (C) 2018-2020 Free Software Foundation, Inc.
3627f7eb2Smrg 
4627f7eb2Smrg This file is part of GCC.
5627f7eb2Smrg 
6627f7eb2Smrg GCC is free software; you can redistribute it and/or modify it
7627f7eb2Smrg under the terms of the GNU General Public License as published by the
8627f7eb2Smrg Free Software Foundation; either version 3, or (at your option) any
9627f7eb2Smrg later version.
10627f7eb2Smrg 
11627f7eb2Smrg GCC is distributed in the hope that it will be useful, but WITHOUT
12627f7eb2Smrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13627f7eb2Smrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14627f7eb2Smrg for more details.
15627f7eb2Smrg 
16627f7eb2Smrg You should have received a copy of the GNU General Public License
17627f7eb2Smrg along with GCC; see the file COPYING3.  If not see
18627f7eb2Smrg <http://www.gnu.org/licenses/>.  */
19627f7eb2Smrg 
20627f7eb2Smrg #include "config.h"
21627f7eb2Smrg #include "system.h"
22627f7eb2Smrg #include "coretypes.h"
23627f7eb2Smrg #include "backend.h"
24627f7eb2Smrg #include "tree.h"
25627f7eb2Smrg #include "gimple.h"
26627f7eb2Smrg #include "gimple-iterator.h"
27627f7eb2Smrg #include "tree-pass.h"
28627f7eb2Smrg #include "gimplify-me.h"
29627f7eb2Smrg #include "cfgloop.h"
30627f7eb2Smrg #include "tree-ssa-loop.h"
31627f7eb2Smrg #include "ssa.h"
32627f7eb2Smrg #include "tree-scalar-evolution.h"
33627f7eb2Smrg #include "tree-chrec.h"
34627f7eb2Smrg #include "tree-ssa-loop-ivopts.h"
35627f7eb2Smrg #include "fold-const.h"
36627f7eb2Smrg #include "tree-ssa-propagate.h"
37627f7eb2Smrg #include "tree-inline.h"
38627f7eb2Smrg #include "domwalk.h"
39627f7eb2Smrg #include "alloc-pool.h"
40627f7eb2Smrg #include "vr-values.h"
41627f7eb2Smrg #include "gimple-ssa-evrp-analyze.h"
42627f7eb2Smrg #include "tree-vectorizer.h"
43627f7eb2Smrg #include "omp-general.h"
44627f7eb2Smrg #include "predict.h"
45627f7eb2Smrg #include "tree-into-ssa.h"
46627f7eb2Smrg 
47627f7eb2Smrg namespace {
48627f7eb2Smrg 
49627f7eb2Smrg /* This pass looks for loops that could be simplified if certain loop
50627f7eb2Smrg    invariant conditions were true.  It is effectively a form of loop
51627f7eb2Smrg    splitting in which the pass produces the split conditions itself,
52627f7eb2Smrg    instead of using ones that are already present in the IL.
53627f7eb2Smrg 
54627f7eb2Smrg    Versioning for when strides are 1
55627f7eb2Smrg    ---------------------------------
56627f7eb2Smrg 
57627f7eb2Smrg    At the moment the only thing the pass looks for are memory references
58627f7eb2Smrg    like:
59627f7eb2Smrg 
60627f7eb2Smrg      for (auto i : ...)
61627f7eb2Smrg        ...x[i * stride]...
62627f7eb2Smrg 
63627f7eb2Smrg    It considers changing such loops to:
64627f7eb2Smrg 
65627f7eb2Smrg      if (stride == 1)
66627f7eb2Smrg        for (auto i : ...)    [A]
67627f7eb2Smrg 	 ...x[i]...
68627f7eb2Smrg      else
69627f7eb2Smrg        for (auto i : ...)    [B]
70627f7eb2Smrg 	 ...x[i * stride]...
71627f7eb2Smrg 
72627f7eb2Smrg    This can have several benefits:
73627f7eb2Smrg 
74627f7eb2Smrg    (1) [A] is often easier or cheaper to vectorize than [B].
75627f7eb2Smrg 
76627f7eb2Smrg    (2) The scalar code in [A] is simpler than the scalar code in [B]
77627f7eb2Smrg        (if the loops cannot be vectorized or need an epilogue loop).
78627f7eb2Smrg 
79627f7eb2Smrg    (3) We might recognize [A] as a pattern, such as a memcpy or memset.
80627f7eb2Smrg 
81627f7eb2Smrg    (4) [A] has simpler address evolutions, which can help other passes
82627f7eb2Smrg        like loop interchange.
83627f7eb2Smrg 
84627f7eb2Smrg    The optimization is particularly useful for assumed-shape arrays in
85627f7eb2Smrg    Fortran, where the stride of the innermost dimension depends on the
86627f7eb2Smrg    array descriptor but is often equal to 1 in practice.  For example:
87627f7eb2Smrg 
88627f7eb2Smrg      subroutine f1(x)
89627f7eb2Smrg        real :: x(:)
90627f7eb2Smrg        x(:) = 100
91627f7eb2Smrg      end subroutine f1
92627f7eb2Smrg 
93627f7eb2Smrg    generates the equivalent of:
94627f7eb2Smrg 
95627f7eb2Smrg      raw_stride = *x.dim[0].stride;
96627f7eb2Smrg      stride = raw_stride != 0 ? raw_stride : 1;
97627f7eb2Smrg      x_base = *x.data;
98627f7eb2Smrg      ...
99627f7eb2Smrg      tmp1 = stride * S;
100627f7eb2Smrg      tmp2 = tmp1 - stride;
101627f7eb2Smrg      *x_base[tmp2] = 1.0e+2;
102627f7eb2Smrg 
103627f7eb2Smrg    but in the common case that stride == 1, the last three statements
104627f7eb2Smrg    simplify to:
105627f7eb2Smrg 
106627f7eb2Smrg      tmp3 = S + -1;
107627f7eb2Smrg      *x_base[tmp3] = 1.0e+2;
108627f7eb2Smrg 
109627f7eb2Smrg    The optimization is in principle very simple.  The difficult parts are:
110627f7eb2Smrg 
111627f7eb2Smrg    (a) deciding which parts of a general address calculation correspond
112627f7eb2Smrg        to the inner dimension of an array, since this usually isn't explicit
113627f7eb2Smrg        in the IL, and for C often isn't even explicit in the source code
114627f7eb2Smrg 
115627f7eb2Smrg    (b) estimating when the transformation is worthwhile
116627f7eb2Smrg 
117627f7eb2Smrg    Structure
118627f7eb2Smrg    ---------
119627f7eb2Smrg 
120627f7eb2Smrg    The pass has four phases:
121627f7eb2Smrg 
122627f7eb2Smrg    (1) Walk through the statements looking for and recording potential
123627f7eb2Smrg        versioning opportunities.  Stop if there are none.
124627f7eb2Smrg 
125627f7eb2Smrg    (2) Use context-sensitive range information to see whether any versioning
126627f7eb2Smrg        conditions are impossible in practice.  Remove them if so, and stop
127627f7eb2Smrg        if no opportunities remain.
128627f7eb2Smrg 
129627f7eb2Smrg        (We do this only after (1) to keep compile time down when no
130627f7eb2Smrg        versioning opportunities exist.)
131627f7eb2Smrg 
132627f7eb2Smrg    (3) Apply the cost model.  Decide which versioning opportunities are
133627f7eb2Smrg        worthwhile and at which nesting level they should be applied.
134627f7eb2Smrg 
135627f7eb2Smrg    (4) Attempt to version all the loops selected by (3), so that:
136627f7eb2Smrg 
137627f7eb2Smrg 	 for (...)
138627f7eb2Smrg 	   ...
139627f7eb2Smrg 
140627f7eb2Smrg        becomes:
141627f7eb2Smrg 
142627f7eb2Smrg 	 if (!cond)
143627f7eb2Smrg 	   for (...) // Original loop
144627f7eb2Smrg 	     ...
145627f7eb2Smrg 	 else
146627f7eb2Smrg 	   for (...) // New loop
147627f7eb2Smrg 	     ...
148627f7eb2Smrg 
149627f7eb2Smrg        Use the version condition COND to simplify the new loop.  */
150627f7eb2Smrg 
151627f7eb2Smrg /* Enumerates the likelihood that a particular value indexes the inner
152627f7eb2Smrg    dimension of an array.  */
153627f7eb2Smrg enum inner_likelihood {
154627f7eb2Smrg   INNER_UNLIKELY,
155627f7eb2Smrg   INNER_DONT_KNOW,
156627f7eb2Smrg   INNER_LIKELY
157627f7eb2Smrg };
158627f7eb2Smrg 
159627f7eb2Smrg /* Information about one term of an address_info.  */
160627f7eb2Smrg struct address_term_info
161627f7eb2Smrg {
162627f7eb2Smrg   /* The value of the term is EXPR * MULTIPLIER.  */
163627f7eb2Smrg   tree expr;
164627f7eb2Smrg   unsigned HOST_WIDE_INT multiplier;
165627f7eb2Smrg 
166627f7eb2Smrg   /* The stride applied by EXPR in each iteration of some unrecorded loop,
167627f7eb2Smrg      or null if no stride has been identified.  */
168627f7eb2Smrg   tree stride;
169627f7eb2Smrg 
170627f7eb2Smrg   /* Enumerates the likelihood that EXPR indexes the inner dimension
171627f7eb2Smrg      of an array.  */
172627f7eb2Smrg   enum inner_likelihood inner_likelihood;
173627f7eb2Smrg 
174627f7eb2Smrg   /* True if STRIDE == 1 is a versioning opportunity when considered
175627f7eb2Smrg      in isolation.  */
176627f7eb2Smrg   bool versioning_opportunity_p;
177627f7eb2Smrg };
178627f7eb2Smrg 
179627f7eb2Smrg /* Information about an address calculation, and the range of constant
180627f7eb2Smrg    offsets applied to it.  */
181*4c3eb207Smrg class address_info
182627f7eb2Smrg {
183*4c3eb207Smrg public:
184627f7eb2Smrg   static const unsigned int MAX_TERMS = 8;
185627f7eb2Smrg 
186627f7eb2Smrg   /* One statement that calculates the address.  If multiple statements
187627f7eb2Smrg      share the same address, we only record the first.  */
188627f7eb2Smrg   gimple *stmt;
189627f7eb2Smrg 
190627f7eb2Smrg   /* The loop containing STMT (cached for convenience).  If multiple
191627f7eb2Smrg      statements share the same address, they all belong to this loop.  */
192*4c3eb207Smrg   class loop *loop;
193627f7eb2Smrg 
194627f7eb2Smrg   /* A decomposition of the calculation into a sum of terms plus an
195627f7eb2Smrg      optional base.  When BASE is provided, it is never an SSA name.
196627f7eb2Smrg      Once initialization is complete, all members of TERMs are SSA names.  */
197627f7eb2Smrg   tree base;
198627f7eb2Smrg   auto_vec<address_term_info, MAX_TERMS> terms;
199627f7eb2Smrg 
200627f7eb2Smrg   /* All bytes accessed from the address fall in the offset range
201627f7eb2Smrg      [MIN_OFFSET, MAX_OFFSET).  */
202627f7eb2Smrg   HOST_WIDE_INT min_offset, max_offset;
203627f7eb2Smrg };
204627f7eb2Smrg 
205627f7eb2Smrg /* Stores addresses based on their base and terms (ignoring the offsets).  */
206627f7eb2Smrg struct address_info_hasher : nofree_ptr_hash <address_info>
207627f7eb2Smrg {
208627f7eb2Smrg   static hashval_t hash (const address_info *);
209627f7eb2Smrg   static bool equal (const address_info *, const address_info *);
210627f7eb2Smrg };
211627f7eb2Smrg 
212627f7eb2Smrg /* Information about the versioning we'd like to apply to a loop.  */
213*4c3eb207Smrg class loop_info
214627f7eb2Smrg {
215*4c3eb207Smrg public:
216627f7eb2Smrg   bool worth_versioning_p () const;
217627f7eb2Smrg 
218627f7eb2Smrg   /* True if we've decided not to version this loop.  The remaining
219627f7eb2Smrg      fields are meaningless if so.  */
220627f7eb2Smrg   bool rejected_p;
221627f7eb2Smrg 
222627f7eb2Smrg   /* True if at least one subloop of this loop benefits from versioning.  */
223627f7eb2Smrg   bool subloops_benefit_p;
224627f7eb2Smrg 
225627f7eb2Smrg   /* An estimate of the total number of instructions in the loop,
226627f7eb2Smrg      excluding those in subloops that benefit from versioning.  */
227627f7eb2Smrg   unsigned int num_insns;
228627f7eb2Smrg 
229627f7eb2Smrg   /* The outermost loop that can handle all the version checks
230627f7eb2Smrg      described below.  */
231*4c3eb207Smrg   class loop *outermost;
232627f7eb2Smrg 
233627f7eb2Smrg   /* The first entry in the list of blocks that belong to this loop
234627f7eb2Smrg      (and not to subloops).  m_next_block_in_loop provides the chain
235627f7eb2Smrg      pointers for the list.  */
236627f7eb2Smrg   basic_block block_list;
237627f7eb2Smrg 
238627f7eb2Smrg   /* We'd like to version the loop for the case in which these SSA names
239627f7eb2Smrg      (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime.  */
240627f7eb2Smrg   bitmap_head unity_names;
241627f7eb2Smrg 
242627f7eb2Smrg   /* If versioning succeeds, this points the version of the loop that
243627f7eb2Smrg      assumes the version conditions holds.  */
244*4c3eb207Smrg   class loop *optimized_loop;
245627f7eb2Smrg };
246627f7eb2Smrg 
247627f7eb2Smrg /* The main pass structure.  */
248627f7eb2Smrg class loop_versioning
249627f7eb2Smrg {
250627f7eb2Smrg public:
251627f7eb2Smrg   loop_versioning (function *);
252627f7eb2Smrg   ~loop_versioning ();
253627f7eb2Smrg   unsigned int run ();
254627f7eb2Smrg 
255627f7eb2Smrg private:
256627f7eb2Smrg   /* Used to walk the dominator tree to find loop versioning conditions
257627f7eb2Smrg      that are always false.  */
258627f7eb2Smrg   class lv_dom_walker : public dom_walker
259627f7eb2Smrg   {
260627f7eb2Smrg   public:
261627f7eb2Smrg     lv_dom_walker (loop_versioning &);
262627f7eb2Smrg 
263627f7eb2Smrg     edge before_dom_children (basic_block) FINAL OVERRIDE;
264627f7eb2Smrg     void after_dom_children (basic_block) FINAL OVERRIDE;
265627f7eb2Smrg 
266627f7eb2Smrg   private:
267627f7eb2Smrg     /* The parent pass.  */
268627f7eb2Smrg     loop_versioning &m_lv;
269627f7eb2Smrg 
270627f7eb2Smrg     /* Used to build context-dependent range information.  */
271627f7eb2Smrg     evrp_range_analyzer m_range_analyzer;
272627f7eb2Smrg   };
273627f7eb2Smrg 
274627f7eb2Smrg   /* Used to simplify statements based on conditions that are established
275627f7eb2Smrg      by the version checks.  */
276627f7eb2Smrg   class name_prop : public substitute_and_fold_engine
277627f7eb2Smrg   {
278627f7eb2Smrg   public:
name_prop(loop_info & li)279627f7eb2Smrg     name_prop (loop_info &li) : m_li (li) {}
280627f7eb2Smrg     tree get_value (tree) FINAL OVERRIDE;
281627f7eb2Smrg 
282627f7eb2Smrg   private:
283627f7eb2Smrg     /* Information about the versioning we've performed on the loop.  */
284627f7eb2Smrg     loop_info &m_li;
285627f7eb2Smrg   };
286627f7eb2Smrg 
get_loop_info(class loop * loop)287*4c3eb207Smrg   loop_info &get_loop_info (class loop *loop) { return m_loops[loop->num]; }
288627f7eb2Smrg 
289*4c3eb207Smrg   unsigned int max_insns_for_loop (class loop *);
290627f7eb2Smrg   bool expensive_stmt_p (gimple *);
291627f7eb2Smrg 
292627f7eb2Smrg   void version_for_unity (gimple *, tree);
293627f7eb2Smrg   bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT,
294627f7eb2Smrg 				unsigned HOST_WIDE_INT * = 0);
295627f7eb2Smrg   bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *);
296627f7eb2Smrg   bool multiply_term_by (address_term_info &, tree);
297627f7eb2Smrg   inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT);
298627f7eb2Smrg   void dump_inner_likelihood (address_info &, address_term_info &);
299627f7eb2Smrg   void analyze_stride (address_info &, address_term_info &,
300*4c3eb207Smrg 		       tree, class loop *);
301627f7eb2Smrg   bool find_per_loop_multiplication (address_info &, address_term_info &);
302627f7eb2Smrg   bool analyze_term_using_scevs (address_info &, address_term_info &);
303627f7eb2Smrg   void analyze_arbitrary_term (address_info &, address_term_info &);
304627f7eb2Smrg   void analyze_address_fragment (address_info &);
305627f7eb2Smrg   void record_address_fragment (gimple *, unsigned HOST_WIDE_INT,
306627f7eb2Smrg 				tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT);
307627f7eb2Smrg   void analyze_expr (gimple *, tree);
308627f7eb2Smrg   bool analyze_block (basic_block);
309627f7eb2Smrg   bool analyze_blocks ();
310627f7eb2Smrg 
311*4c3eb207Smrg   void prune_loop_conditions (class loop *, vr_values *);
312627f7eb2Smrg   bool prune_conditions ();
313627f7eb2Smrg 
314*4c3eb207Smrg   void merge_loop_info (class loop *, class loop *);
315*4c3eb207Smrg   void add_loop_to_queue (class loop *);
316*4c3eb207Smrg   bool decide_whether_loop_is_versionable (class loop *);
317627f7eb2Smrg   bool make_versioning_decisions ();
318627f7eb2Smrg 
319*4c3eb207Smrg   bool version_loop (class loop *);
320627f7eb2Smrg   void implement_versioning_decisions ();
321627f7eb2Smrg 
322627f7eb2Smrg   /* The function we're optimizing.  */
323627f7eb2Smrg   function *m_fn;
324627f7eb2Smrg 
325627f7eb2Smrg   /* The obstack to use for all pass-specific bitmaps.  */
326627f7eb2Smrg   bitmap_obstack m_bitmap_obstack;
327627f7eb2Smrg 
328627f7eb2Smrg   /* An obstack to use for general allocation.  */
329627f7eb2Smrg   obstack m_obstack;
330627f7eb2Smrg 
331627f7eb2Smrg   /* The number of loops in the function.  */
332627f7eb2Smrg   unsigned int m_nloops;
333627f7eb2Smrg 
334627f7eb2Smrg   /* The total number of loop version conditions we've found.  */
335627f7eb2Smrg   unsigned int m_num_conditions;
336627f7eb2Smrg 
337627f7eb2Smrg   /* Assume that an address fragment of the form i * stride * scale
338627f7eb2Smrg      (for variable stride and constant scale) will not benefit from
339627f7eb2Smrg      versioning for stride == 1 when scale is greater than this value.  */
340627f7eb2Smrg   unsigned HOST_WIDE_INT m_maximum_scale;
341627f7eb2Smrg 
342627f7eb2Smrg   /* Information about each loop.  */
343627f7eb2Smrg   auto_vec<loop_info> m_loops;
344627f7eb2Smrg 
345627f7eb2Smrg   /* Used to form a linked list of blocks that belong to a loop,
346627f7eb2Smrg      started by loop_info::block_list.  */
347627f7eb2Smrg   auto_vec<basic_block> m_next_block_in_loop;
348627f7eb2Smrg 
349627f7eb2Smrg   /* The list of loops that we've decided to version.  */
350*4c3eb207Smrg   auto_vec<class loop *> m_loops_to_version;
351627f7eb2Smrg 
352627f7eb2Smrg   /* A table of addresses in the current loop, keyed off their values
353627f7eb2Smrg      but not their offsets.  */
354627f7eb2Smrg   hash_table <address_info_hasher> m_address_table;
355627f7eb2Smrg 
356627f7eb2Smrg   /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order.  */
357627f7eb2Smrg   auto_vec <address_info *, 32> m_address_list;
358627f7eb2Smrg };
359627f7eb2Smrg 
360627f7eb2Smrg /* If EXPR is an SSA name and not a default definition, return the
361627f7eb2Smrg    defining statement, otherwise return null.  */
362627f7eb2Smrg 
363627f7eb2Smrg static gimple *
maybe_get_stmt(tree expr)364627f7eb2Smrg maybe_get_stmt (tree expr)
365627f7eb2Smrg {
366627f7eb2Smrg   if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr))
367627f7eb2Smrg     return SSA_NAME_DEF_STMT (expr);
368627f7eb2Smrg   return NULL;
369627f7eb2Smrg }
370627f7eb2Smrg 
371627f7eb2Smrg /* Like maybe_get_stmt, but also return null if the defining
372627f7eb2Smrg    statement isn't an assignment.  */
373627f7eb2Smrg 
374627f7eb2Smrg static gassign *
maybe_get_assign(tree expr)375627f7eb2Smrg maybe_get_assign (tree expr)
376627f7eb2Smrg {
377627f7eb2Smrg   return safe_dyn_cast <gassign *> (maybe_get_stmt (expr));
378627f7eb2Smrg }
379627f7eb2Smrg 
380627f7eb2Smrg /* Return true if this pass should look through a cast of expression FROM
381627f7eb2Smrg    to type TYPE when analyzing pieces of an address.  */
382627f7eb2Smrg 
383627f7eb2Smrg static bool
look_through_cast_p(tree type,tree from)384627f7eb2Smrg look_through_cast_p (tree type, tree from)
385627f7eb2Smrg {
386627f7eb2Smrg   return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type)
387627f7eb2Smrg 	  && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type));
388627f7eb2Smrg }
389627f7eb2Smrg 
390627f7eb2Smrg /* Strip all conversions of integers or pointers from EXPR, regardless
391627f7eb2Smrg    of whether the conversions are nops.  This is useful in the context
392627f7eb2Smrg    of this pass because we're not trying to fold or simulate the
393627f7eb2Smrg    expression; we just want to see how it's structured.  */
394627f7eb2Smrg 
395627f7eb2Smrg static tree
strip_casts(tree expr)396627f7eb2Smrg strip_casts (tree expr)
397627f7eb2Smrg {
398627f7eb2Smrg   const unsigned int MAX_NITERS = 4;
399627f7eb2Smrg 
400627f7eb2Smrg   tree type = TREE_TYPE (expr);
401627f7eb2Smrg   while (CONVERT_EXPR_P (expr)
402627f7eb2Smrg 	 && look_through_cast_p (type, TREE_OPERAND (expr, 0)))
403627f7eb2Smrg     expr = TREE_OPERAND (expr, 0);
404627f7eb2Smrg 
405627f7eb2Smrg   for (unsigned int niters = 0; niters < MAX_NITERS; ++niters)
406627f7eb2Smrg     {
407627f7eb2Smrg       gassign *assign = maybe_get_assign (expr);
408627f7eb2Smrg       if (assign
409627f7eb2Smrg 	  && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
410627f7eb2Smrg 	  && look_through_cast_p (type, gimple_assign_rhs1 (assign)))
411627f7eb2Smrg 	expr = gimple_assign_rhs1 (assign);
412627f7eb2Smrg       else
413627f7eb2Smrg 	break;
414627f7eb2Smrg     }
415627f7eb2Smrg   return expr;
416627f7eb2Smrg }
417627f7eb2Smrg 
418627f7eb2Smrg /* Compare two address_term_infos in the same address_info.  */
419627f7eb2Smrg 
420627f7eb2Smrg static int
compare_address_terms(const void * a_uncast,const void * b_uncast)421627f7eb2Smrg compare_address_terms (const void *a_uncast, const void *b_uncast)
422627f7eb2Smrg {
423627f7eb2Smrg   const address_term_info *a = (const address_term_info *) a_uncast;
424627f7eb2Smrg   const address_term_info *b = (const address_term_info *) b_uncast;
425627f7eb2Smrg 
426627f7eb2Smrg   if (a->expr != b->expr)
427627f7eb2Smrg     return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1;
428627f7eb2Smrg 
429627f7eb2Smrg   if (a->multiplier != b->multiplier)
430627f7eb2Smrg     return a->multiplier < b->multiplier ? -1 : 1;
431627f7eb2Smrg 
432627f7eb2Smrg   return 0;
433627f7eb2Smrg }
434627f7eb2Smrg 
435627f7eb2Smrg /* Dump ADDRESS using flags FLAGS.  */
436627f7eb2Smrg 
437627f7eb2Smrg static void
dump_address_info(dump_flags_t flags,address_info & address)438627f7eb2Smrg dump_address_info (dump_flags_t flags, address_info &address)
439627f7eb2Smrg {
440627f7eb2Smrg   if (address.base)
441627f7eb2Smrg     dump_printf (flags, "%T + ", address.base);
442627f7eb2Smrg   for (unsigned int i = 0; i < address.terms.length (); ++i)
443627f7eb2Smrg     {
444627f7eb2Smrg       if (i != 0)
445627f7eb2Smrg 	dump_printf (flags, " + ");
446627f7eb2Smrg       dump_printf (flags, "%T", address.terms[i].expr);
447627f7eb2Smrg       if (address.terms[i].multiplier != 1)
448627f7eb2Smrg 	dump_printf (flags, " * %wd", address.terms[i].multiplier);
449627f7eb2Smrg     }
450627f7eb2Smrg   dump_printf (flags, " + [%wd, %wd]",
451627f7eb2Smrg 	       address.min_offset, address.max_offset - 1);
452627f7eb2Smrg }
453627f7eb2Smrg 
454627f7eb2Smrg /* Hash an address_info based on its base and terms.  */
455627f7eb2Smrg 
456627f7eb2Smrg hashval_t
hash(const address_info * info)457627f7eb2Smrg address_info_hasher::hash (const address_info *info)
458627f7eb2Smrg {
459627f7eb2Smrg   inchash::hash hash;
460627f7eb2Smrg   hash.add_int (info->base ? TREE_CODE (info->base) : 0);
461627f7eb2Smrg   hash.add_int (info->terms.length ());
462627f7eb2Smrg   for (unsigned int i = 0; i < info->terms.length (); ++i)
463627f7eb2Smrg     {
464627f7eb2Smrg       hash.add_int (SSA_NAME_VERSION (info->terms[i].expr));
465627f7eb2Smrg       hash.add_hwi (info->terms[i].multiplier);
466627f7eb2Smrg     }
467627f7eb2Smrg   return hash.end ();
468627f7eb2Smrg }
469627f7eb2Smrg 
470627f7eb2Smrg /* Return true if two address_infos have equal bases and terms.  Other
471627f7eb2Smrg    properties might be different (such as the statement or constant
472627f7eb2Smrg    offset range).  */
473627f7eb2Smrg 
474627f7eb2Smrg bool
equal(const address_info * a,const address_info * b)475627f7eb2Smrg address_info_hasher::equal (const address_info *a, const address_info *b)
476627f7eb2Smrg {
477627f7eb2Smrg   if (a->base != b->base
478627f7eb2Smrg       && (!a->base || !b->base || !operand_equal_p (a->base, b->base, 0)))
479627f7eb2Smrg     return false;
480627f7eb2Smrg 
481627f7eb2Smrg   if (a->terms.length () != b->terms.length ())
482627f7eb2Smrg     return false;
483627f7eb2Smrg 
484627f7eb2Smrg   for (unsigned int i = 0; i < a->terms.length (); ++i)
485627f7eb2Smrg     if (a->terms[i].expr != b->terms[i].expr
486627f7eb2Smrg 	|| a->terms[i].multiplier != b->terms[i].multiplier)
487627f7eb2Smrg       return false;
488627f7eb2Smrg 
489627f7eb2Smrg   return true;
490627f7eb2Smrg }
491627f7eb2Smrg 
492627f7eb2Smrg /* Return true if we want to version the loop, i.e. if we have a
493627f7eb2Smrg    specific reason for doing so and no specific reason not to.  */
494627f7eb2Smrg 
495627f7eb2Smrg bool
worth_versioning_p() const496627f7eb2Smrg loop_info::worth_versioning_p () const
497627f7eb2Smrg {
498627f7eb2Smrg   return (!rejected_p
499627f7eb2Smrg 	  && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
500627f7eb2Smrg }
501627f7eb2Smrg 
lv_dom_walker(loop_versioning & lv)502627f7eb2Smrg loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
503627f7eb2Smrg   : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false)
504627f7eb2Smrg {
505627f7eb2Smrg }
506627f7eb2Smrg 
507627f7eb2Smrg /* Process BB before processing the blocks it dominates.  */
508627f7eb2Smrg 
509627f7eb2Smrg edge
before_dom_children(basic_block bb)510627f7eb2Smrg loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
511627f7eb2Smrg {
512627f7eb2Smrg   m_range_analyzer.enter (bb);
513627f7eb2Smrg 
514627f7eb2Smrg   if (bb == bb->loop_father->header)
515627f7eb2Smrg     m_lv.prune_loop_conditions (bb->loop_father,
516627f7eb2Smrg 				m_range_analyzer.get_vr_values ());
517627f7eb2Smrg 
518627f7eb2Smrg   for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
519627f7eb2Smrg        gsi_next (&si))
520627f7eb2Smrg     m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false);
521627f7eb2Smrg 
522627f7eb2Smrg   return NULL;
523627f7eb2Smrg }
524627f7eb2Smrg 
525627f7eb2Smrg /* Process BB after processing the blocks it dominates.  */
526627f7eb2Smrg 
527627f7eb2Smrg void
after_dom_children(basic_block bb)528627f7eb2Smrg loop_versioning::lv_dom_walker::after_dom_children (basic_block bb)
529627f7eb2Smrg {
530627f7eb2Smrg   m_range_analyzer.leave (bb);
531627f7eb2Smrg }
532627f7eb2Smrg 
533627f7eb2Smrg /* Decide whether to replace VAL with a new value in a versioned loop.
534627f7eb2Smrg    Return the new value if so, otherwise return null.  */
535627f7eb2Smrg 
536627f7eb2Smrg tree
get_value(tree val)537627f7eb2Smrg loop_versioning::name_prop::get_value (tree val)
538627f7eb2Smrg {
539627f7eb2Smrg   if (TREE_CODE (val) == SSA_NAME
540627f7eb2Smrg       && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
541627f7eb2Smrg     return build_one_cst (TREE_TYPE (val));
542627f7eb2Smrg   return NULL_TREE;
543627f7eb2Smrg }
544627f7eb2Smrg 
545627f7eb2Smrg /* Initialize the structure to optimize FN.  */
546627f7eb2Smrg 
loop_versioning(function * fn)547627f7eb2Smrg loop_versioning::loop_versioning (function *fn)
548627f7eb2Smrg   : m_fn (fn),
549627f7eb2Smrg     m_nloops (number_of_loops (fn)),
550627f7eb2Smrg     m_num_conditions (0),
551627f7eb2Smrg     m_address_table (31)
552627f7eb2Smrg {
553627f7eb2Smrg   bitmap_obstack_initialize (&m_bitmap_obstack);
554627f7eb2Smrg   gcc_obstack_init (&m_obstack);
555627f7eb2Smrg 
556627f7eb2Smrg   /* Initialize the loop information.  */
557627f7eb2Smrg   m_loops.safe_grow_cleared (m_nloops);
558627f7eb2Smrg   for (unsigned int i = 0; i < m_nloops; ++i)
559627f7eb2Smrg     {
560627f7eb2Smrg       m_loops[i].outermost = get_loop (m_fn, 0);
561627f7eb2Smrg       bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack);
562627f7eb2Smrg     }
563627f7eb2Smrg 
564627f7eb2Smrg   /* Initialize the list of blocks that belong to each loop.  */
565627f7eb2Smrg   unsigned int nbbs = last_basic_block_for_fn (fn);
566627f7eb2Smrg   m_next_block_in_loop.safe_grow (nbbs);
567627f7eb2Smrg   basic_block bb;
568627f7eb2Smrg   FOR_EACH_BB_FN (bb, fn)
569627f7eb2Smrg     {
570627f7eb2Smrg       loop_info &li = get_loop_info (bb->loop_father);
571627f7eb2Smrg       m_next_block_in_loop[bb->index] = li.block_list;
572627f7eb2Smrg       li.block_list = bb;
573627f7eb2Smrg     }
574627f7eb2Smrg 
575627f7eb2Smrg   /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for
576627f7eb2Smrg      unvectorizable code, since it is the largest size that can be
577627f7eb2Smrg      handled efficiently by scalar code.  omp_max_vf calculates the
578627f7eb2Smrg      maximum number of bytes in a vector, when such a value is relevant
579627f7eb2Smrg      to loop optimization.  */
580627f7eb2Smrg   m_maximum_scale = estimated_poly_value (omp_max_vf ());
581627f7eb2Smrg   m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
582627f7eb2Smrg }
583627f7eb2Smrg 
~loop_versioning()584627f7eb2Smrg loop_versioning::~loop_versioning ()
585627f7eb2Smrg {
586627f7eb2Smrg   bitmap_obstack_release (&m_bitmap_obstack);
587627f7eb2Smrg   obstack_free (&m_obstack, NULL);
588627f7eb2Smrg }
589627f7eb2Smrg 
590627f7eb2Smrg /* Return the maximum number of instructions allowed in LOOP before
591627f7eb2Smrg    it becomes too big for versioning.
592627f7eb2Smrg 
593627f7eb2Smrg    There are separate limits for inner and outer loops.  The limit for
594627f7eb2Smrg    inner loops applies only to loops that benefit directly from versioning.
595627f7eb2Smrg    The limit for outer loops applies to all code in the outer loop and
596627f7eb2Smrg    its subloops that *doesn't* benefit directly from versioning; such code
597627f7eb2Smrg    would be "taken along for the ride".  The idea is that if the cost of
598627f7eb2Smrg    the latter is small, it is better to version outer loops rather than
599627f7eb2Smrg    inner loops, both to reduce the number of repeated checks and to enable
600627f7eb2Smrg    more of the loop nest to be optimized as a natural nest (e.g. by loop
601627f7eb2Smrg    interchange or outer-loop vectorization).  */
602627f7eb2Smrg 
603627f7eb2Smrg unsigned int
max_insns_for_loop(class loop * loop)604*4c3eb207Smrg loop_versioning::max_insns_for_loop (class loop *loop)
605627f7eb2Smrg {
606627f7eb2Smrg   return (loop->inner
607*4c3eb207Smrg 	  ? param_loop_versioning_max_outer_insns
608*4c3eb207Smrg 	  : param_loop_versioning_max_inner_insns);
609627f7eb2Smrg }
610627f7eb2Smrg 
611627f7eb2Smrg /* Return true if for cost reasons we should avoid versioning any loop
612627f7eb2Smrg    that contains STMT.
613627f7eb2Smrg 
614627f7eb2Smrg    Note that we don't need to check whether versioning is invalid for
615627f7eb2Smrg    correctness reasons, since the versioning process does that for us.
616627f7eb2Smrg    The conditions involved are too rare to be worth duplicating here.  */
617627f7eb2Smrg 
618627f7eb2Smrg bool
expensive_stmt_p(gimple * stmt)619627f7eb2Smrg loop_versioning::expensive_stmt_p (gimple *stmt)
620627f7eb2Smrg {
621627f7eb2Smrg   if (gcall *call = dyn_cast <gcall *> (stmt))
622627f7eb2Smrg     /* Assume for now that the time spent in an "expensive" call would
623627f7eb2Smrg        overwhelm any saving from versioning.  */
624627f7eb2Smrg     return !gimple_inexpensive_call_p (call);
625627f7eb2Smrg   return false;
626627f7eb2Smrg }
627627f7eb2Smrg 
628627f7eb2Smrg /* Record that we want to version the loop that contains STMT for the
629627f7eb2Smrg    case in which SSA name NAME is equal to 1.  We already know that NAME
630627f7eb2Smrg    is invariant in the loop.  */
631627f7eb2Smrg 
632627f7eb2Smrg void
version_for_unity(gimple * stmt,tree name)633627f7eb2Smrg loop_versioning::version_for_unity (gimple *stmt, tree name)
634627f7eb2Smrg {
635*4c3eb207Smrg   class loop *loop = loop_containing_stmt (stmt);
636627f7eb2Smrg   loop_info &li = get_loop_info (loop);
637627f7eb2Smrg 
638627f7eb2Smrg   if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
639627f7eb2Smrg     {
640627f7eb2Smrg       /* This is the first time we've wanted to version LOOP for NAME.
641627f7eb2Smrg 	 Keep track of the outermost loop that can handle all versioning
642627f7eb2Smrg 	 checks in LI.  */
643*4c3eb207Smrg       class loop *outermost
644627f7eb2Smrg 	= outermost_invariant_loop_for_expr (loop, name);
645627f7eb2Smrg       if (loop_depth (li.outermost) < loop_depth (outermost))
646627f7eb2Smrg 	li.outermost = outermost;
647627f7eb2Smrg 
648627f7eb2Smrg       if (dump_enabled_p ())
649627f7eb2Smrg 	{
650627f7eb2Smrg 	  dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
651627f7eb2Smrg 			   " for when %T == 1", name);
652627f7eb2Smrg 	  if (outermost == loop)
653627f7eb2Smrg 	    dump_printf (MSG_NOTE, "; cannot hoist check further");
654627f7eb2Smrg 	  else
655627f7eb2Smrg 	    {
656627f7eb2Smrg 	      dump_printf (MSG_NOTE, "; could implement the check at loop"
657627f7eb2Smrg 			   " depth %d", loop_depth (outermost));
658627f7eb2Smrg 	      if (loop_depth (li.outermost) > loop_depth (outermost))
659627f7eb2Smrg 		dump_printf (MSG_NOTE, ", but other checks only allow"
660627f7eb2Smrg 			     " a depth of %d", loop_depth (li.outermost));
661627f7eb2Smrg 	    }
662627f7eb2Smrg 	  dump_printf (MSG_NOTE, "\n");
663627f7eb2Smrg 	}
664627f7eb2Smrg 
665627f7eb2Smrg       m_num_conditions += 1;
666627f7eb2Smrg     }
667627f7eb2Smrg   else
668627f7eb2Smrg     {
669627f7eb2Smrg       /* This is a duplicate request.  */
670627f7eb2Smrg       if (dump_enabled_p ())
671627f7eb2Smrg 	dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing"
672627f7eb2Smrg 			 " loop for when %T == 1\n", name);
673627f7eb2Smrg     }
674627f7eb2Smrg }
675627f7eb2Smrg 
676627f7eb2Smrg /* Return true if OP1_TREE is constant and if in principle it is worth
677627f7eb2Smrg    versioning an address fragment of the form:
678627f7eb2Smrg 
679627f7eb2Smrg      i * OP1_TREE * OP2 * stride
680627f7eb2Smrg 
681627f7eb2Smrg    for the case in which stride == 1.  This in practice means testing
682627f7eb2Smrg    whether:
683627f7eb2Smrg 
684627f7eb2Smrg      OP1_TREE * OP2 <= M_MAXIMUM_SCALE.
685627f7eb2Smrg 
686627f7eb2Smrg    If RESULT is nonnull, store OP1_TREE * OP2 there when returning true.  */
687627f7eb2Smrg 
688627f7eb2Smrg bool
acceptable_multiplier_p(tree op1_tree,unsigned HOST_WIDE_INT op2,unsigned HOST_WIDE_INT * result)689627f7eb2Smrg loop_versioning::acceptable_multiplier_p (tree op1_tree,
690627f7eb2Smrg 					  unsigned HOST_WIDE_INT op2,
691627f7eb2Smrg 					  unsigned HOST_WIDE_INT *result)
692627f7eb2Smrg {
693627f7eb2Smrg   if (tree_fits_uhwi_p (op1_tree))
694627f7eb2Smrg     {
695627f7eb2Smrg       unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree);
696627f7eb2Smrg       /* The first part checks for overflow.  */
697627f7eb2Smrg       if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale)
698627f7eb2Smrg 	{
699627f7eb2Smrg 	  if (result)
700627f7eb2Smrg 	    *result = op1 * op2;
701627f7eb2Smrg 	  return true;
702627f7eb2Smrg 	}
703627f7eb2Smrg     }
704627f7eb2Smrg   return false;
705627f7eb2Smrg }
706627f7eb2Smrg 
707627f7eb2Smrg /* Return true if it is worth using loop versioning on a memory access
708627f7eb2Smrg    of type TYPE.  Store the size of the access in *SIZE if so.  */
709627f7eb2Smrg 
710627f7eb2Smrg bool
acceptable_type_p(tree type,unsigned HOST_WIDE_INT * size)711627f7eb2Smrg loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size)
712627f7eb2Smrg {
713627f7eb2Smrg   return (TYPE_SIZE_UNIT (type)
714627f7eb2Smrg 	  && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), 1, size));
715627f7eb2Smrg }
716627f7eb2Smrg 
717627f7eb2Smrg /* See whether OP is constant and whether we can multiply TERM by that
718627f7eb2Smrg    constant without exceeding M_MAXIMUM_SCALE.  Return true and update
719627f7eb2Smrg    TERM if so.  */
720627f7eb2Smrg 
721627f7eb2Smrg bool
multiply_term_by(address_term_info & term,tree op)722627f7eb2Smrg loop_versioning::multiply_term_by (address_term_info &term, tree op)
723627f7eb2Smrg {
724627f7eb2Smrg   return acceptable_multiplier_p (op, term.multiplier, &term.multiplier);
725627f7eb2Smrg }
726627f7eb2Smrg 
727627f7eb2Smrg /* Decide whether an address fragment of the form STRIDE * MULTIPLIER
728627f7eb2Smrg    is likely to be indexing an innermost dimension, returning the result
729627f7eb2Smrg    as an INNER_* probability.  */
730627f7eb2Smrg 
731627f7eb2Smrg inner_likelihood
get_inner_likelihood(tree stride,unsigned HOST_WIDE_INT multiplier)732627f7eb2Smrg loop_versioning::get_inner_likelihood (tree stride,
733627f7eb2Smrg 				       unsigned HOST_WIDE_INT multiplier)
734627f7eb2Smrg {
735627f7eb2Smrg   const unsigned int MAX_NITERS = 8;
736627f7eb2Smrg 
737627f7eb2Smrg   /* Iterate over possible values of STRIDE.  Return INNER_LIKELY if at
738627f7eb2Smrg      least one of those values is likely to be for the innermost dimension.
739627f7eb2Smrg      Record in UNLIKELY_P if at least one of those values is unlikely to be
740627f7eb2Smrg      for the innermost dimension.
741627f7eb2Smrg 
742627f7eb2Smrg      E.g. for:
743627f7eb2Smrg 
744627f7eb2Smrg        stride = cond ? a * b : 1
745627f7eb2Smrg 
746627f7eb2Smrg      we should treat STRIDE as being a likely inner dimension, since
747627f7eb2Smrg      we know that it is 1 under at least some circumstances.  (See the
748627f7eb2Smrg      Fortran example below.)  However:
749627f7eb2Smrg 
750627f7eb2Smrg        stride = a * b
751627f7eb2Smrg 
752627f7eb2Smrg      on its own is unlikely to be for the innermost dimension, since
753627f7eb2Smrg      that would require both a and b to be 1 at runtime.  */
754627f7eb2Smrg   bool unlikely_p = false;
755627f7eb2Smrg   tree worklist[MAX_NITERS];
756627f7eb2Smrg   unsigned int length = 0;
757627f7eb2Smrg   worklist[length++] = stride;
758627f7eb2Smrg   for (unsigned int i = 0; i < length; ++i)
759627f7eb2Smrg     {
760627f7eb2Smrg       tree expr = worklist[i];
761627f7eb2Smrg 
762627f7eb2Smrg       if (CONSTANT_CLASS_P (expr))
763627f7eb2Smrg 	{
764627f7eb2Smrg 	  /* See if EXPR * MULTIPLIER would be consistent with an individual
765627f7eb2Smrg 	     access or a small grouped access.  */
766627f7eb2Smrg 	  if (acceptable_multiplier_p (expr, multiplier))
767627f7eb2Smrg 	    return INNER_LIKELY;
768627f7eb2Smrg 	  else
769627f7eb2Smrg 	    unlikely_p = true;
770627f7eb2Smrg 	}
771627f7eb2Smrg       else if (gimple *stmt = maybe_get_stmt (expr))
772627f7eb2Smrg 	{
773627f7eb2Smrg 	  /* If EXPR is set by a PHI node, queue its arguments in case
774627f7eb2Smrg 	     we find one that is consistent with an inner dimension.
775627f7eb2Smrg 
776627f7eb2Smrg 	     An important instance of this is the Fortran handling of array
777627f7eb2Smrg 	     descriptors, which calculates the stride of the inner dimension
778627f7eb2Smrg 	     using a PHI equivalent of:
779627f7eb2Smrg 
780627f7eb2Smrg 		raw_stride = a.dim[0].stride;
781627f7eb2Smrg 		stride = raw_stride != 0 ? raw_stride : 1;
782627f7eb2Smrg 
783627f7eb2Smrg 	     (Strides for outer dimensions do not treat 0 specially.)  */
784627f7eb2Smrg 	  if (gphi *phi = dyn_cast <gphi *> (stmt))
785627f7eb2Smrg 	    {
786627f7eb2Smrg 	      unsigned int nargs = gimple_phi_num_args (phi);
787627f7eb2Smrg 	      for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j)
788627f7eb2Smrg 		worklist[length++] = strip_casts (gimple_phi_arg_def (phi, j));
789627f7eb2Smrg 	    }
790627f7eb2Smrg 	  /* If the value is set by an assignment, expect it to be read
791627f7eb2Smrg 	     from memory (such as an array descriptor) rather than be
792627f7eb2Smrg 	     calculated.  */
793627f7eb2Smrg 	  else if (gassign *assign = dyn_cast <gassign *> (stmt))
794627f7eb2Smrg 	    {
795627f7eb2Smrg 	      if (!gimple_assign_load_p (assign))
796627f7eb2Smrg 		unlikely_p = true;
797627f7eb2Smrg 	    }
798627f7eb2Smrg 	  /* Things like calls don't really tell us anything.  */
799627f7eb2Smrg 	}
800627f7eb2Smrg     }
801627f7eb2Smrg 
802627f7eb2Smrg   /* We didn't find any possible values of STRIDE that were likely to be
803627f7eb2Smrg      for the innermost dimension.  If we found one that was actively
804627f7eb2Smrg      unlikely to be for the innermost dimension, assume that that applies
805627f7eb2Smrg      to STRIDE too.  */
806627f7eb2Smrg   return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW;
807627f7eb2Smrg }
808627f7eb2Smrg 
809627f7eb2Smrg /* Dump the likelihood that TERM's stride is for the innermost dimension.
810627f7eb2Smrg    ADDRESS is the address that contains TERM.  */
811627f7eb2Smrg 
812627f7eb2Smrg void
dump_inner_likelihood(address_info & address,address_term_info & term)813627f7eb2Smrg loop_versioning::dump_inner_likelihood (address_info &address,
814627f7eb2Smrg 					address_term_info &term)
815627f7eb2Smrg {
816627f7eb2Smrg   if (term.inner_likelihood == INNER_LIKELY)
817627f7eb2Smrg     dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the"
818627f7eb2Smrg 		     " innermost dimension\n", term.stride);
819627f7eb2Smrg   else if (term.inner_likelihood == INNER_UNLIKELY)
820627f7eb2Smrg     dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the"
821627f7eb2Smrg 		     " innermost dimension\n", term.stride);
822627f7eb2Smrg   else
823627f7eb2Smrg     dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T"
824627f7eb2Smrg 		     " is the innermost dimension\n", term.stride);
825627f7eb2Smrg }
826627f7eb2Smrg 
827627f7eb2Smrg /* The caller has identified that STRIDE is the stride of interest
828627f7eb2Smrg    in TERM, and that the stride is applied in OP_LOOP.  Record this
829627f7eb2Smrg    information in TERM, deciding whether STRIDE is likely to be for
830627f7eb2Smrg    the innermost dimension of an array and whether it represents a
831627f7eb2Smrg    versioning opportunity.  ADDRESS is the address that contains TERM.  */
832627f7eb2Smrg 
833627f7eb2Smrg void
analyze_stride(address_info & address,address_term_info & term,tree stride,class loop * op_loop)834627f7eb2Smrg loop_versioning::analyze_stride (address_info &address,
835627f7eb2Smrg 				 address_term_info &term,
836*4c3eb207Smrg 				 tree stride, class loop *op_loop)
837627f7eb2Smrg {
838627f7eb2Smrg   term.stride = stride;
839627f7eb2Smrg 
840627f7eb2Smrg   term.inner_likelihood = get_inner_likelihood (stride, term.multiplier);
841627f7eb2Smrg   if (dump_enabled_p ())
842627f7eb2Smrg     dump_inner_likelihood (address, term);
843627f7eb2Smrg 
844627f7eb2Smrg   /* To be a versioning opportunity we require:
845627f7eb2Smrg 
846627f7eb2Smrg      - The multiplier applied by TERM is equal to the access size,
847627f7eb2Smrg        so that when STRIDE is 1, the accesses in successive loop
848627f7eb2Smrg        iterations are consecutive.
849627f7eb2Smrg 
850627f7eb2Smrg        This is deliberately conservative.  We could relax it to handle
851627f7eb2Smrg        other cases (such as those with gaps between iterations) if we
852627f7eb2Smrg        find any real testcases for which it's useful.
853627f7eb2Smrg 
854627f7eb2Smrg      - the stride is applied in the same loop as STMT rather than
855627f7eb2Smrg        in an outer loop.  Although versioning for strides applied in
856627f7eb2Smrg        outer loops could help in some cases -- such as enabling
857627f7eb2Smrg        more loop interchange -- the savings are much lower than for
858627f7eb2Smrg        inner loops.
859627f7eb2Smrg 
860627f7eb2Smrg      - the stride is an SSA name that is invariant in STMT's loop,
861627f7eb2Smrg        since otherwise versioning isn't possible.  */
862627f7eb2Smrg   unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset;
863627f7eb2Smrg   if (term.multiplier == access_size
864627f7eb2Smrg       && address.loop == op_loop
865627f7eb2Smrg       && TREE_CODE (stride) == SSA_NAME
866627f7eb2Smrg       && expr_invariant_in_loop_p (address.loop, stride))
867627f7eb2Smrg     {
868627f7eb2Smrg       term.versioning_opportunity_p = true;
869627f7eb2Smrg       if (dump_enabled_p ())
870627f7eb2Smrg 	dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning"
871627f7eb2Smrg 			 " opportunity\n", stride);
872627f7eb2Smrg     }
873627f7eb2Smrg }
874627f7eb2Smrg 
875627f7eb2Smrg /* See whether address term TERM (which belongs to ADDRESS) is the result
876627f7eb2Smrg    of multiplying a varying SSA name by a loop-invariant SSA name.
877627f7eb2Smrg    Return true and update TERM if so.
878627f7eb2Smrg 
879627f7eb2Smrg    This handles both cases that SCEV might handle, such as:
880627f7eb2Smrg 
881627f7eb2Smrg      for (int i = 0; i < n; ++i)
882627f7eb2Smrg        res += a[i * stride];
883627f7eb2Smrg 
884627f7eb2Smrg    and ones in which the term varies arbitrarily between iterations, such as:
885627f7eb2Smrg 
886627f7eb2Smrg      for (int i = 0; i < n; ++i)
887627f7eb2Smrg        res += a[index[i] * stride];  */
888627f7eb2Smrg 
889627f7eb2Smrg bool
find_per_loop_multiplication(address_info & address,address_term_info & term)890627f7eb2Smrg loop_versioning::find_per_loop_multiplication (address_info &address,
891627f7eb2Smrg 					       address_term_info &term)
892627f7eb2Smrg {
893627f7eb2Smrg   gassign *mult = maybe_get_assign (term.expr);
894627f7eb2Smrg   if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
895627f7eb2Smrg     return false;
896627f7eb2Smrg 
897*4c3eb207Smrg   class loop *mult_loop = loop_containing_stmt (mult);
898627f7eb2Smrg   if (!loop_outer (mult_loop))
899627f7eb2Smrg     return false;
900627f7eb2Smrg 
901627f7eb2Smrg   tree op1 = strip_casts (gimple_assign_rhs1 (mult));
902627f7eb2Smrg   tree op2 = strip_casts (gimple_assign_rhs2 (mult));
903627f7eb2Smrg   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
904627f7eb2Smrg     return false;
905627f7eb2Smrg 
906627f7eb2Smrg   bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1);
907627f7eb2Smrg   bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2);
908627f7eb2Smrg   if (invariant1_p == invariant2_p)
909627f7eb2Smrg     return false;
910627f7eb2Smrg 
911627f7eb2Smrg   /* Make sure that the loop invariant is OP2 rather than OP1.  */
912627f7eb2Smrg   if (invariant1_p)
913627f7eb2Smrg     std::swap (op1, op2);
914627f7eb2Smrg 
915627f7eb2Smrg   if (dump_enabled_p ())
916627f7eb2Smrg     dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T"
917627f7eb2Smrg 		     " * loop-invariant %T\n", term.expr, op1, op2);
918627f7eb2Smrg   analyze_stride (address, term, op2, mult_loop);
919627f7eb2Smrg   return true;
920627f7eb2Smrg }
921627f7eb2Smrg 
922627f7eb2Smrg /* Try to use scalar evolutions to find an address stride for TERM,
923627f7eb2Smrg    which belongs to ADDRESS.  Return true and update TERM if so.
924627f7eb2Smrg 
925627f7eb2Smrg    Here we are interested in any evolution information we can find,
926627f7eb2Smrg    not just evolutions wrt ADDRESS->LOOP.  For example, if we find that
927627f7eb2Smrg    an outer loop obviously iterates over the inner dimension of an array,
928627f7eb2Smrg    that information can help us eliminate worthless versioning opportunities
929627f7eb2Smrg    in inner loops.  */
930627f7eb2Smrg 
931627f7eb2Smrg bool
analyze_term_using_scevs(address_info & address,address_term_info & term)932627f7eb2Smrg loop_versioning::analyze_term_using_scevs (address_info &address,
933627f7eb2Smrg 					   address_term_info &term)
934627f7eb2Smrg {
935627f7eb2Smrg   gimple *setter = maybe_get_stmt (term.expr);
936627f7eb2Smrg   if (!setter)
937627f7eb2Smrg     return false;
938627f7eb2Smrg 
939*4c3eb207Smrg   class loop *wrt_loop = loop_containing_stmt (setter);
940627f7eb2Smrg   if (!loop_outer (wrt_loop))
941627f7eb2Smrg     return false;
942627f7eb2Smrg 
943627f7eb2Smrg   tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr));
944627f7eb2Smrg   if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
945627f7eb2Smrg     {
946627f7eb2Smrg       if (dump_enabled_p ())
947627f7eb2Smrg 	dump_printf_loc (MSG_NOTE, address.stmt,
948627f7eb2Smrg 			 "address term %T = %T\n", term.expr, chrec);
949627f7eb2Smrg 
950627f7eb2Smrg       /* Peel casts and accumulate constant multiplications, up to the
951627f7eb2Smrg 	 limit allowed by M_MAXIMUM_SCALE.  */
952627f7eb2Smrg       tree stride = strip_casts (CHREC_RIGHT (chrec));
953627f7eb2Smrg       while (TREE_CODE (stride) == MULT_EXPR
954627f7eb2Smrg 	     && multiply_term_by (term, TREE_OPERAND (stride, 1)))
955627f7eb2Smrg 	stride = strip_casts (TREE_OPERAND (stride, 0));
956627f7eb2Smrg 
957627f7eb2Smrg       gassign *assign;
958627f7eb2Smrg       while ((assign = maybe_get_assign (stride))
959627f7eb2Smrg 	     && gimple_assign_rhs_code (assign) == MULT_EXPR
960627f7eb2Smrg 	     && multiply_term_by (term, gimple_assign_rhs2 (assign)))
961627f7eb2Smrg 	{
962627f7eb2Smrg 	  if (dump_enabled_p ())
963627f7eb2Smrg 	    dump_printf_loc (MSG_NOTE, address.stmt,
964627f7eb2Smrg 			     "looking through %G", assign);
965627f7eb2Smrg 	  stride = strip_casts (gimple_assign_rhs1 (assign));
966627f7eb2Smrg 	}
967627f7eb2Smrg 
968627f7eb2Smrg       analyze_stride (address, term, stride, get_chrec_loop (chrec));
969627f7eb2Smrg       return true;
970627f7eb2Smrg     }
971627f7eb2Smrg 
972627f7eb2Smrg   return false;
973627f7eb2Smrg }
974627f7eb2Smrg 
975627f7eb2Smrg /* Address term TERM is an arbitrary term that provides no versioning
976627f7eb2Smrg    opportunities.  Analyze it to see whether it contains any likely
977627f7eb2Smrg    inner strides, so that we don't mistakenly version for other
978627f7eb2Smrg    (less likely) candidates.
979627f7eb2Smrg 
980627f7eb2Smrg    This copes with invariant innermost indices such as:
981627f7eb2Smrg 
982627f7eb2Smrg      x(i, :) = 100
983627f7eb2Smrg 
984627f7eb2Smrg    where the "i" component of the address is invariant in the loop
985627f7eb2Smrg    but provides the real inner stride.
986627f7eb2Smrg 
987627f7eb2Smrg    ADDRESS is the address that contains TERM.  */
988627f7eb2Smrg 
989627f7eb2Smrg void
analyze_arbitrary_term(address_info & address,address_term_info & term)990627f7eb2Smrg loop_versioning::analyze_arbitrary_term (address_info &address,
991627f7eb2Smrg 					 address_term_info &term)
992627f7eb2Smrg 
993627f7eb2Smrg {
994627f7eb2Smrg   /* A multiplication offers two potential strides.  Pick the one that
995627f7eb2Smrg      is most likely to be an innermost stride.  */
996627f7eb2Smrg   tree expr = term.expr, alt = NULL_TREE;
997627f7eb2Smrg   gassign *mult = maybe_get_assign (expr);
998627f7eb2Smrg   if (mult && gimple_assign_rhs_code (mult) == MULT_EXPR)
999627f7eb2Smrg     {
1000627f7eb2Smrg       expr = strip_casts (gimple_assign_rhs1 (mult));
1001627f7eb2Smrg       alt = strip_casts (gimple_assign_rhs2 (mult));
1002627f7eb2Smrg     }
1003627f7eb2Smrg   term.stride = expr;
1004627f7eb2Smrg   term.inner_likelihood = get_inner_likelihood (expr, term.multiplier);
1005627f7eb2Smrg   if (alt)
1006627f7eb2Smrg     {
1007627f7eb2Smrg       inner_likelihood alt_l = get_inner_likelihood (alt, term.multiplier);
1008627f7eb2Smrg       if (alt_l > term.inner_likelihood)
1009627f7eb2Smrg 	{
1010627f7eb2Smrg 	  term.stride = alt;
1011627f7eb2Smrg 	  term.inner_likelihood = alt_l;
1012627f7eb2Smrg 	}
1013627f7eb2Smrg     }
1014627f7eb2Smrg   if (dump_enabled_p ())
1015627f7eb2Smrg     dump_inner_likelihood (address, term);
1016627f7eb2Smrg }
1017627f7eb2Smrg 
1018627f7eb2Smrg /* Try to identify loop strides in ADDRESS and try to choose realistic
1019627f7eb2Smrg    versioning opportunities based on these strides.
1020627f7eb2Smrg 
1021627f7eb2Smrg    The main difficulty here isn't finding strides that could be used
1022627f7eb2Smrg    in a version check (that's pretty easy).  The problem instead is to
1023627f7eb2Smrg    avoid versioning for some stride S that is unlikely ever to be 1 at
1024627f7eb2Smrg    runtime.  Versioning for S == 1 on its own would lead to unnecessary
1025627f7eb2Smrg    code bloat, while adding S == 1 to more realistic version conditions
1026627f7eb2Smrg    would lose the optimisation opportunity offered by those other conditions.
1027627f7eb2Smrg 
1028627f7eb2Smrg    For example, versioning for a stride of 1 in the Fortran code:
1029627f7eb2Smrg 
1030627f7eb2Smrg      integer :: a(:,:)
1031627f7eb2Smrg      a(1,:) = 1
1032627f7eb2Smrg 
1033627f7eb2Smrg    is not usually a good idea, since the assignment is iterating over
1034627f7eb2Smrg    an outer dimension and is relatively unlikely to have a stride of 1.
1035627f7eb2Smrg    (It isn't impossible, since the inner dimension might be 1, or the
1036627f7eb2Smrg    array might be transposed.)  Similarly, in:
1037627f7eb2Smrg 
1038627f7eb2Smrg      integer :: a(:,:), b(:,:)
1039627f7eb2Smrg      b(:,1) = a(1,:)
1040627f7eb2Smrg 
1041627f7eb2Smrg    b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't.
1042627f7eb2Smrg    Versioning for when both strides are 1 would lose most of the benefit
1043627f7eb2Smrg    of versioning for b's access.
1044627f7eb2Smrg 
1045627f7eb2Smrg    The approach we take is as follows:
1046627f7eb2Smrg 
1047627f7eb2Smrg    - Analyze each term to see whether it has an identifiable stride,
1048627f7eb2Smrg      regardless of which loop applies the stride.
1049627f7eb2Smrg 
1050627f7eb2Smrg    - Evaluate the likelihood that each such stride is for the innermost
1051627f7eb2Smrg      dimension of an array, on the scale "likely", "don't know" or "unlikely".
1052627f7eb2Smrg 
1053627f7eb2Smrg    - If there is a single "likely" innermost stride, and that stride is
1054627f7eb2Smrg      applied in the loop that contains STMT, version the loop for when the
1055627f7eb2Smrg      stride is 1.  This deals with the cases in which we're fairly
1056627f7eb2Smrg      confident of doing the right thing, such as the b(:,1) reference above.
1057627f7eb2Smrg 
1058627f7eb2Smrg    - If there are no "likely" innermost strides, and the loop that contains
1059627f7eb2Smrg      STMT uses a stride that we rated as "don't know", version for when
1060627f7eb2Smrg      that stride is 1.  This is principally used for C code such as:
1061627f7eb2Smrg 
1062627f7eb2Smrg        for (int i = 0; i < n; ++i)
1063627f7eb2Smrg 	 a[i * x] = ...;
1064627f7eb2Smrg 
1065627f7eb2Smrg      and:
1066627f7eb2Smrg 
1067627f7eb2Smrg        for (int j = 0; j < n; ++j)
1068627f7eb2Smrg 	 for (int i = 0; i < n; ++i)
1069627f7eb2Smrg 	   a[i * x + j * y] = ...;
1070627f7eb2Smrg 
1071627f7eb2Smrg      where nothing in the way "x" and "y" are set gives a hint as to
1072627f7eb2Smrg      whether "i" iterates over the innermost dimension of the array.
1073*4c3eb207Smrg      In these situations it seems reasonable to assume the
1074627f7eb2Smrg      programmer has nested the loops appropriately (although of course
1075627f7eb2Smrg      there are examples like GEMM in which this assumption doesn't hold
1076627f7eb2Smrg      for all accesses in the loop).
1077627f7eb2Smrg 
1078627f7eb2Smrg      This case is also useful for the Fortran equivalent of the
1079627f7eb2Smrg      above C code.  */
1080627f7eb2Smrg 
1081627f7eb2Smrg void
analyze_address_fragment(address_info & address)1082627f7eb2Smrg loop_versioning::analyze_address_fragment (address_info &address)
1083627f7eb2Smrg {
1084627f7eb2Smrg   if (dump_enabled_p ())
1085627f7eb2Smrg     {
1086627f7eb2Smrg       dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment ");
1087627f7eb2Smrg       dump_address_info (MSG_NOTE, address);
1088627f7eb2Smrg       dump_printf (MSG_NOTE, "\n");
1089627f7eb2Smrg     }
1090627f7eb2Smrg 
1091627f7eb2Smrg   /* Analyze each component of the sum to see whether it involves an
1092627f7eb2Smrg      apparent stride.
1093627f7eb2Smrg 
1094627f7eb2Smrg      There is an overlap between the addresses that
1095627f7eb2Smrg      find_per_loop_multiplication and analyze_term_using_scevs can handle,
1096627f7eb2Smrg      but the former is much cheaper than SCEV analysis, so try it first.  */
1097627f7eb2Smrg   for (unsigned int i = 0; i < address.terms.length (); ++i)
1098627f7eb2Smrg     if (!find_per_loop_multiplication (address, address.terms[i])
1099627f7eb2Smrg 	&& !analyze_term_using_scevs (address, address.terms[i])
1100627f7eb2Smrg 	&& !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr)))
1101627f7eb2Smrg       analyze_arbitrary_term (address, address.terms[i]);
1102627f7eb2Smrg 
1103627f7eb2Smrg   /* Check for strides that are likely to be for the innermost dimension.
1104627f7eb2Smrg 
1105627f7eb2Smrg      1. If there is a single likely inner stride, if it is an SSA name,
1106627f7eb2Smrg 	and if it is worth versioning the loop for when the SSA name
1107627f7eb2Smrg 	equals 1, record that we want to do so.
1108627f7eb2Smrg 
1109627f7eb2Smrg      2. Otherwise, if there any likely inner strides, bail out.  This means
1110627f7eb2Smrg 	one of:
1111627f7eb2Smrg 
1112627f7eb2Smrg 	(a) There are multiple likely inner strides.  This suggests we're
1113627f7eb2Smrg 	    confused and be can't be confident of doing the right thing.
1114627f7eb2Smrg 
1115627f7eb2Smrg 	(b) There is a single likely inner stride and it is a constant
1116627f7eb2Smrg 	    rather than an SSA name.  This can mean either that the access
1117627f7eb2Smrg 	    is a natural one without any variable strides, such as:
1118627f7eb2Smrg 
1119627f7eb2Smrg 	      for (int i = 0; i < n; ++i)
1120627f7eb2Smrg 		a[i] += 1;
1121627f7eb2Smrg 
1122627f7eb2Smrg 	    or that a variable stride is applied to an outer dimension,
1123627f7eb2Smrg 	    such as:
1124627f7eb2Smrg 
1125627f7eb2Smrg 	      for (int i = 0; i < n; ++i)
1126627f7eb2Smrg 		for (int j = 0; j < n; ++j)
1127627f7eb2Smrg 		  a[j * stride][i] += 1;
1128627f7eb2Smrg 
1129627f7eb2Smrg 	(c) There is a single likely inner stride, and it is an SSA name,
1130627f7eb2Smrg 	    but it isn't a worthwhile versioning opportunity.  This usually
1131627f7eb2Smrg 	    means that the variable stride is applied by an outer loop,
1132627f7eb2Smrg 	    such as:
1133627f7eb2Smrg 
1134627f7eb2Smrg 	      for (int i = 0; i < n; ++i)
1135627f7eb2Smrg 		for (int j = 0; j < n; ++j)
1136627f7eb2Smrg 		  a[j][i * stride] += 1;
1137627f7eb2Smrg 
1138627f7eb2Smrg 	    or (using an example with a more natural loop nesting):
1139627f7eb2Smrg 
1140627f7eb2Smrg 	      for (int i = 0; i < n; ++i)
1141627f7eb2Smrg 		for (int j = 0; j < n; ++j)
1142627f7eb2Smrg 		  a[i][j] += b[i * stride];
1143627f7eb2Smrg 
1144627f7eb2Smrg 	    in cases where b[i * stride] cannot (yet) be hoisted for
1145627f7eb2Smrg 	    aliasing reasons.
1146627f7eb2Smrg 
1147627f7eb2Smrg      3. If there are no likely inner strides, fall through to the next
1148627f7eb2Smrg 	set of checks.
1149627f7eb2Smrg 
1150627f7eb2Smrg      Pointer equality is enough to check for uniqueness in (1), since we
1151627f7eb2Smrg      only care about SSA names.  */
1152627f7eb2Smrg   tree chosen_stride = NULL_TREE;
1153627f7eb2Smrg   tree version_stride = NULL_TREE;
1154627f7eb2Smrg   for (unsigned int i = 0; i < address.terms.length (); ++i)
1155627f7eb2Smrg     if (chosen_stride != address.terms[i].stride
1156627f7eb2Smrg 	&& address.terms[i].inner_likelihood == INNER_LIKELY)
1157627f7eb2Smrg       {
1158627f7eb2Smrg 	if (chosen_stride)
1159627f7eb2Smrg 	  return;
1160627f7eb2Smrg 	chosen_stride = address.terms[i].stride;
1161627f7eb2Smrg 	if (address.terms[i].versioning_opportunity_p)
1162627f7eb2Smrg 	  version_stride = chosen_stride;
1163627f7eb2Smrg       }
1164627f7eb2Smrg 
1165627f7eb2Smrg   /* If there are no likely inner strides, see if there is a single
1166627f7eb2Smrg      versioning opportunity for a stride that was rated as INNER_DONT_KNOW.
1167627f7eb2Smrg      See the comment above the function for the cases that this code
1168627f7eb2Smrg      handles.  */
1169627f7eb2Smrg   if (!chosen_stride)
1170627f7eb2Smrg     for (unsigned int i = 0; i < address.terms.length (); ++i)
1171627f7eb2Smrg       if (version_stride != address.terms[i].stride
1172627f7eb2Smrg 	  && address.terms[i].inner_likelihood == INNER_DONT_KNOW
1173627f7eb2Smrg 	  && address.terms[i].versioning_opportunity_p)
1174627f7eb2Smrg 	{
1175627f7eb2Smrg 	  if (version_stride)
1176627f7eb2Smrg 	    return;
1177627f7eb2Smrg 	  version_stride = address.terms[i].stride;
1178627f7eb2Smrg 	}
1179627f7eb2Smrg 
1180627f7eb2Smrg   if (version_stride)
1181627f7eb2Smrg     version_for_unity (address.stmt, version_stride);
1182627f7eb2Smrg }
1183627f7eb2Smrg 
1184627f7eb2Smrg /* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses
1185627f7eb2Smrg    TYPE_SIZE bytes and record this address fragment for later processing.
1186627f7eb2Smrg    STMT is the statement that contains the address.  */
1187627f7eb2Smrg 
1188627f7eb2Smrg void
record_address_fragment(gimple * stmt,unsigned HOST_WIDE_INT type_size,tree expr,unsigned HOST_WIDE_INT multiplier,HOST_WIDE_INT offset)1189627f7eb2Smrg loop_versioning::record_address_fragment (gimple *stmt,
1190627f7eb2Smrg 					  unsigned HOST_WIDE_INT type_size,
1191627f7eb2Smrg 					  tree expr,
1192627f7eb2Smrg 					  unsigned HOST_WIDE_INT multiplier,
1193627f7eb2Smrg 					  HOST_WIDE_INT offset)
1194627f7eb2Smrg {
1195627f7eb2Smrg   /* We're only interested in computed values.  */
1196627f7eb2Smrg   if (TREE_CODE (expr) != SSA_NAME)
1197627f7eb2Smrg     return;
1198627f7eb2Smrg 
1199627f7eb2Smrg   /* Quick exit if no part of the address is calculated in STMT's loop,
1200627f7eb2Smrg      since such addresses have no versioning opportunities.  */
1201*4c3eb207Smrg   class loop *loop = loop_containing_stmt (stmt);
1202627f7eb2Smrg   if (expr_invariant_in_loop_p (loop, expr))
1203627f7eb2Smrg     return;
1204627f7eb2Smrg 
1205627f7eb2Smrg   /* Set up an address_info for EXPR * MULTIPLIER.  */
1206627f7eb2Smrg   address_info *address = XOBNEW (&m_obstack, address_info);
1207627f7eb2Smrg   new (address) address_info;
1208627f7eb2Smrg   address->stmt = stmt;
1209627f7eb2Smrg   address->loop = loop;
1210627f7eb2Smrg   address->base = NULL_TREE;
1211627f7eb2Smrg   address->terms.quick_grow (1);
1212627f7eb2Smrg   address->terms[0].expr = expr;
1213627f7eb2Smrg   address->terms[0].multiplier = multiplier;
1214627f7eb2Smrg   address->terms[0].stride = NULL_TREE;
1215627f7eb2Smrg   address->terms[0].inner_likelihood = INNER_UNLIKELY;
1216627f7eb2Smrg   address->terms[0].versioning_opportunity_p = false;
1217627f7eb2Smrg   address->min_offset = offset;
1218627f7eb2Smrg 
1219627f7eb2Smrg   /* Peel apart the expression into a sum of address_terms, where each
1220627f7eb2Smrg      term is multiplied by a constant.  Treat a + b and a - b the same,
1221627f7eb2Smrg      since it doesn't matter for our purposes whether an address is
1222627f7eb2Smrg      increasing or decreasing.  Distribute (a + b) * constant into
1223627f7eb2Smrg      a * constant + b * constant.
1224627f7eb2Smrg 
1225627f7eb2Smrg      We don't care which loop each term belongs to, since we want to
1226627f7eb2Smrg      examine as many candidate strides as possible when determining
1227627f7eb2Smrg      which is likely to be for the innermost dimension.  We therefore
1228627f7eb2Smrg      don't limit the search to statements in STMT's loop.  */
1229627f7eb2Smrg   for (unsigned int i = 0; i < address->terms.length (); )
1230627f7eb2Smrg     {
1231627f7eb2Smrg       if (gassign *assign = maybe_get_assign (address->terms[i].expr))
1232627f7eb2Smrg 	{
1233627f7eb2Smrg 	  tree_code code = gimple_assign_rhs_code (assign);
1234627f7eb2Smrg 	  if (code == PLUS_EXPR
1235627f7eb2Smrg 	      || code == POINTER_PLUS_EXPR
1236627f7eb2Smrg 	      || code == MINUS_EXPR)
1237627f7eb2Smrg 	    {
1238627f7eb2Smrg 	      tree op1 = gimple_assign_rhs1 (assign);
1239627f7eb2Smrg 	      tree op2 = gimple_assign_rhs2 (assign);
1240627f7eb2Smrg 	      if (TREE_CODE (op2) == INTEGER_CST)
1241627f7eb2Smrg 		{
1242627f7eb2Smrg 		  address->terms[i].expr = strip_casts (op1);
1243627f7eb2Smrg 		  /* This is heuristic only, so don't worry about truncation
1244627f7eb2Smrg 		     or overflow.  */
1245627f7eb2Smrg 		  address->min_offset += (TREE_INT_CST_LOW (op2)
1246627f7eb2Smrg 					  * address->terms[i].multiplier);
1247627f7eb2Smrg 		  continue;
1248627f7eb2Smrg 		}
1249627f7eb2Smrg 	      else if (address->terms.length () < address_info::MAX_TERMS)
1250627f7eb2Smrg 		{
1251627f7eb2Smrg 		  unsigned int j = address->terms.length ();
1252627f7eb2Smrg 		  address->terms.quick_push (address->terms[i]);
1253627f7eb2Smrg 		  address->terms[i].expr = strip_casts (op1);
1254627f7eb2Smrg 		  address->terms[j].expr = strip_casts (op2);
1255627f7eb2Smrg 		  continue;
1256627f7eb2Smrg 		}
1257627f7eb2Smrg 	    }
1258627f7eb2Smrg 	  if (code == MULT_EXPR)
1259627f7eb2Smrg 	    {
1260627f7eb2Smrg 	      tree op1 = gimple_assign_rhs1 (assign);
1261627f7eb2Smrg 	      tree op2 = gimple_assign_rhs2 (assign);
1262627f7eb2Smrg 	      if (multiply_term_by (address->terms[i], op2))
1263627f7eb2Smrg 		{
1264627f7eb2Smrg 		  address->terms[i].expr = strip_casts (op1);
1265627f7eb2Smrg 		  continue;
1266627f7eb2Smrg 		}
1267627f7eb2Smrg 	    }
1268*4c3eb207Smrg 	  if (CONVERT_EXPR_CODE_P (code))
1269*4c3eb207Smrg 	    {
1270*4c3eb207Smrg 	      tree op1 = gimple_assign_rhs1 (assign);
1271*4c3eb207Smrg 	      address->terms[i].expr = strip_casts (op1);
1272*4c3eb207Smrg 	      continue;
1273*4c3eb207Smrg 	    }
1274627f7eb2Smrg 	}
1275627f7eb2Smrg       i += 1;
1276627f7eb2Smrg     }
1277627f7eb2Smrg 
1278627f7eb2Smrg   /* Peel off any symbolic pointer.  */
1279627f7eb2Smrg   if (TREE_CODE (address->terms[0].expr) != SSA_NAME
1280627f7eb2Smrg       && address->terms[0].multiplier == 1)
1281627f7eb2Smrg     {
1282627f7eb2Smrg       if (address->terms.length () == 1)
1283627f7eb2Smrg 	{
1284627f7eb2Smrg 	  obstack_free (&m_obstack, address);
1285627f7eb2Smrg 	  return;
1286627f7eb2Smrg 	}
1287627f7eb2Smrg       address->base = address->terms[0].expr;
1288627f7eb2Smrg       address->terms.ordered_remove (0);
1289627f7eb2Smrg     }
1290627f7eb2Smrg 
1291627f7eb2Smrg   /* Require all remaining terms to be SSA names.  (This could be false
1292627f7eb2Smrg      for unfolded statements, but they aren't worth dealing with.)  */
1293627f7eb2Smrg   for (unsigned int i = 0; i < address->terms.length (); ++i)
1294627f7eb2Smrg     if (TREE_CODE (address->terms[i].expr) != SSA_NAME)
1295627f7eb2Smrg       {
1296627f7eb2Smrg 	obstack_free (&m_obstack, address);
1297627f7eb2Smrg 	return;
1298627f7eb2Smrg       }
1299627f7eb2Smrg 
1300627f7eb2Smrg   /* The loop above set MIN_OFFSET based on the first byte of the
1301627f7eb2Smrg      referenced data.  Calculate the end + 1.  */
1302627f7eb2Smrg   address->max_offset = address->min_offset + type_size;
1303627f7eb2Smrg 
1304627f7eb2Smrg   /* Put the terms into a canonical order for the hash table lookup below.  */
1305627f7eb2Smrg   address->terms.qsort (compare_address_terms);
1306627f7eb2Smrg 
1307627f7eb2Smrg   if (dump_enabled_p ())
1308627f7eb2Smrg     {
1309627f7eb2Smrg       dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T", expr);
1310627f7eb2Smrg       if (multiplier != 1)
1311627f7eb2Smrg 	dump_printf (MSG_NOTE, " * %wd", multiplier);
1312627f7eb2Smrg       dump_printf (MSG_NOTE, " = ");
1313627f7eb2Smrg       dump_address_info (MSG_NOTE, *address);
1314627f7eb2Smrg       dump_printf (MSG_NOTE, "\n");
1315627f7eb2Smrg     }
1316627f7eb2Smrg 
1317627f7eb2Smrg   /* Pool address information with the same terms (but potentially
1318627f7eb2Smrg      different offsets).  */
1319627f7eb2Smrg   address_info **slot = m_address_table.find_slot (address, INSERT);
1320627f7eb2Smrg   if (address_info *old_address = *slot)
1321627f7eb2Smrg     {
1322627f7eb2Smrg       /* We've already seen an address with the same terms.  Extend the
1323627f7eb2Smrg 	 offset range to account for this access.  Doing this can paper
1324627f7eb2Smrg 	 over gaps, such as in:
1325627f7eb2Smrg 
1326627f7eb2Smrg 	   a[i * stride * 4] + a[i * stride * 4 + 3];
1327627f7eb2Smrg 
1328627f7eb2Smrg 	 where nothing references "+ 1" or "+ 2".  However, the vectorizer
1329627f7eb2Smrg 	 handles such gapped accesses without problems, so it's not worth
1330627f7eb2Smrg 	 trying to exclude them.  */
1331627f7eb2Smrg       if (old_address->min_offset > address->min_offset)
1332627f7eb2Smrg 	old_address->min_offset = address->min_offset;
1333627f7eb2Smrg       if (old_address->max_offset < address->max_offset)
1334627f7eb2Smrg 	old_address->max_offset = address->max_offset;
1335627f7eb2Smrg       obstack_free (&m_obstack, address);
1336627f7eb2Smrg     }
1337627f7eb2Smrg   else
1338627f7eb2Smrg     {
1339627f7eb2Smrg       /* This is the first time we've seen an address with these terms.  */
1340627f7eb2Smrg       *slot = address;
1341627f7eb2Smrg       m_address_list.safe_push (address);
1342627f7eb2Smrg     }
1343627f7eb2Smrg }
1344627f7eb2Smrg 
1345627f7eb2Smrg /* Analyze expression EXPR, which occurs in STMT.  */
1346627f7eb2Smrg 
1347627f7eb2Smrg void
analyze_expr(gimple * stmt,tree expr)1348627f7eb2Smrg loop_versioning::analyze_expr (gimple *stmt, tree expr)
1349627f7eb2Smrg {
1350627f7eb2Smrg   unsigned HOST_WIDE_INT type_size;
1351627f7eb2Smrg 
1352627f7eb2Smrg   while (handled_component_p (expr))
1353627f7eb2Smrg     {
1354627f7eb2Smrg       /* See whether we can use versioning to avoid a multiplication
1355627f7eb2Smrg 	 in an array index.  */
1356627f7eb2Smrg       if (TREE_CODE (expr) == ARRAY_REF
1357627f7eb2Smrg 	  && acceptable_type_p (TREE_TYPE (expr), &type_size))
1358627f7eb2Smrg 	record_address_fragment (stmt, type_size,
1359627f7eb2Smrg 				 TREE_OPERAND (expr, 1), type_size, 0);
1360627f7eb2Smrg       expr = TREE_OPERAND (expr, 0);
1361627f7eb2Smrg     }
1362627f7eb2Smrg 
1363627f7eb2Smrg   /* See whether we can use versioning to avoid a multiplication
1364627f7eb2Smrg      in the pointer calculation of a MEM_REF.  */
1365627f7eb2Smrg   if (TREE_CODE (expr) == MEM_REF
1366627f7eb2Smrg       && acceptable_type_p (TREE_TYPE (expr), &type_size))
1367627f7eb2Smrg     record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), 1,
1368627f7eb2Smrg 			     /* This is heuristic only, so don't worry
1369627f7eb2Smrg 				about truncation or overflow.  */
1370627f7eb2Smrg 			     TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)));
1371627f7eb2Smrg 
1372627f7eb2Smrg   /* These would be easy to handle if they existed at this stage.  */
1373627f7eb2Smrg   gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF);
1374627f7eb2Smrg }
1375627f7eb2Smrg 
1376627f7eb2Smrg /* Analyze all the statements in BB looking for useful version checks.
1377627f7eb2Smrg    Return true on success, false if something prevents the block from
1378627f7eb2Smrg    being versioned.  */
1379627f7eb2Smrg 
1380627f7eb2Smrg bool
analyze_block(basic_block bb)1381627f7eb2Smrg loop_versioning::analyze_block (basic_block bb)
1382627f7eb2Smrg {
1383*4c3eb207Smrg   class loop *loop = bb->loop_father;
1384627f7eb2Smrg   loop_info &li = get_loop_info (loop);
1385627f7eb2Smrg   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1386627f7eb2Smrg        gsi_next (&gsi))
1387627f7eb2Smrg     {
1388627f7eb2Smrg       gimple *stmt = gsi_stmt (gsi);
1389627f7eb2Smrg       if (is_gimple_debug (stmt))
1390627f7eb2Smrg 	continue;
1391627f7eb2Smrg 
1392627f7eb2Smrg       if (expensive_stmt_p (stmt))
1393627f7eb2Smrg 	{
1394627f7eb2Smrg 	  if (dump_enabled_p ())
1395627f7eb2Smrg 	    dump_printf_loc (MSG_NOTE, stmt, "expensive statement"
1396627f7eb2Smrg 			     " prevents versioning: %G", stmt);
1397627f7eb2Smrg 	  return false;
1398627f7eb2Smrg 	}
1399627f7eb2Smrg 
1400627f7eb2Smrg       /* Only look for direct versioning opportunities in inner loops
1401627f7eb2Smrg 	 since the benefit tends to be much smaller for outer loops.  */
1402627f7eb2Smrg       if (!loop->inner)
1403627f7eb2Smrg 	{
1404627f7eb2Smrg 	  unsigned int nops = gimple_num_ops (stmt);
1405627f7eb2Smrg 	  for (unsigned int i = 0; i < nops; ++i)
1406627f7eb2Smrg 	    if (tree op = gimple_op (stmt, i))
1407627f7eb2Smrg 	      analyze_expr (stmt, op);
1408627f7eb2Smrg 	}
1409627f7eb2Smrg 
1410627f7eb2Smrg       /* The point of the instruction limit is to prevent excessive
1411627f7eb2Smrg 	 code growth, so this is a size-based estimate even though
1412627f7eb2Smrg 	 the optimization is aimed at speed.  */
1413627f7eb2Smrg       li.num_insns += estimate_num_insns (stmt, &eni_size_weights);
1414627f7eb2Smrg     }
1415627f7eb2Smrg 
1416627f7eb2Smrg   return true;
1417627f7eb2Smrg }
1418627f7eb2Smrg 
1419627f7eb2Smrg /* Analyze all the blocks in the function, looking for useful version checks.
1420627f7eb2Smrg    Return true if we found one.  */
1421627f7eb2Smrg 
1422627f7eb2Smrg bool
analyze_blocks()1423627f7eb2Smrg loop_versioning::analyze_blocks ()
1424627f7eb2Smrg {
1425627f7eb2Smrg   AUTO_DUMP_SCOPE ("analyze_blocks",
1426627f7eb2Smrg 		   dump_user_location_t::from_function_decl (m_fn->decl));
1427627f7eb2Smrg 
1428627f7eb2Smrg   /* For now we don't try to version the whole function, although
1429627f7eb2Smrg      versioning at that level could be useful in some cases.  */
1430627f7eb2Smrg   get_loop_info (get_loop (m_fn, 0)).rejected_p = true;
1431627f7eb2Smrg 
1432*4c3eb207Smrg   class loop *loop;
1433627f7eb2Smrg   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1434627f7eb2Smrg     {
1435627f7eb2Smrg       loop_info &linfo = get_loop_info (loop);
1436627f7eb2Smrg 
1437627f7eb2Smrg       /* Ignore cold loops.  */
1438627f7eb2Smrg       if (!optimize_loop_for_speed_p (loop))
1439627f7eb2Smrg 	linfo.rejected_p = true;
1440627f7eb2Smrg 
1441627f7eb2Smrg       /* See whether an inner loop prevents versioning of this loop.  */
1442627f7eb2Smrg       if (!linfo.rejected_p)
1443*4c3eb207Smrg 	for (class loop *inner = loop->inner; inner; inner = inner->next)
1444627f7eb2Smrg 	  if (get_loop_info (inner).rejected_p)
1445627f7eb2Smrg 	    {
1446627f7eb2Smrg 	      linfo.rejected_p = true;
1447627f7eb2Smrg 	      break;
1448627f7eb2Smrg 	    }
1449627f7eb2Smrg 
1450627f7eb2Smrg       /* If versioning the loop is still a possibility, examine the
1451627f7eb2Smrg 	 statements in the loop to look for versioning opportunities.  */
1452627f7eb2Smrg       if (!linfo.rejected_p)
1453627f7eb2Smrg 	{
1454627f7eb2Smrg 	  void *start_point = obstack_alloc (&m_obstack, 0);
1455627f7eb2Smrg 
1456627f7eb2Smrg 	  for (basic_block bb = linfo.block_list; bb;
1457627f7eb2Smrg 	       bb = m_next_block_in_loop[bb->index])
1458627f7eb2Smrg 	    if (!analyze_block (bb))
1459627f7eb2Smrg 	      {
1460627f7eb2Smrg 		linfo.rejected_p = true;
1461627f7eb2Smrg 		break;
1462627f7eb2Smrg 	    }
1463627f7eb2Smrg 
1464627f7eb2Smrg 	  if (!linfo.rejected_p)
1465627f7eb2Smrg 	    {
1466627f7eb2Smrg 	      /* Process any queued address fragments, now that we have
1467627f7eb2Smrg 		 complete grouping information.  */
1468627f7eb2Smrg 	      address_info *address;
1469627f7eb2Smrg 	      unsigned int i;
1470627f7eb2Smrg 	      FOR_EACH_VEC_ELT (m_address_list, i, address)
1471627f7eb2Smrg 		analyze_address_fragment (*address);
1472627f7eb2Smrg 	    }
1473627f7eb2Smrg 
1474627f7eb2Smrg 	  m_address_table.empty ();
1475627f7eb2Smrg 	  m_address_list.truncate (0);
1476627f7eb2Smrg 	  obstack_free (&m_obstack, start_point);
1477627f7eb2Smrg 	}
1478627f7eb2Smrg     }
1479627f7eb2Smrg 
1480627f7eb2Smrg   return m_num_conditions != 0;
1481627f7eb2Smrg }
1482627f7eb2Smrg 
1483627f7eb2Smrg /* Use the ranges in VRS to remove impossible versioning conditions from
1484627f7eb2Smrg    LOOP.  */
1485627f7eb2Smrg 
1486627f7eb2Smrg void
prune_loop_conditions(class loop * loop,vr_values * vrs)1487*4c3eb207Smrg loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs)
1488627f7eb2Smrg {
1489627f7eb2Smrg   loop_info &li = get_loop_info (loop);
1490627f7eb2Smrg 
1491627f7eb2Smrg   int to_remove = -1;
1492627f7eb2Smrg   bitmap_iterator bi;
1493627f7eb2Smrg   unsigned int i;
1494627f7eb2Smrg   EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1495627f7eb2Smrg     {
1496627f7eb2Smrg       tree name = ssa_name (i);
1497*4c3eb207Smrg       const value_range_equiv *vr = vrs->get_value_range (name);
1498*4c3eb207Smrg       if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name))))
1499627f7eb2Smrg 	{
1500627f7eb2Smrg 	  if (dump_enabled_p ())
1501627f7eb2Smrg 	    dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1502627f7eb2Smrg 			     "%T can never be 1 in this loop\n", name);
1503627f7eb2Smrg 
1504627f7eb2Smrg 	  if (to_remove >= 0)
1505627f7eb2Smrg 	    bitmap_clear_bit (&li.unity_names, to_remove);
1506627f7eb2Smrg 	  to_remove = i;
1507627f7eb2Smrg 	  m_num_conditions -= 1;
1508627f7eb2Smrg 	}
1509627f7eb2Smrg     }
1510627f7eb2Smrg   if (to_remove >= 0)
1511627f7eb2Smrg     bitmap_clear_bit (&li.unity_names, to_remove);
1512627f7eb2Smrg }
1513627f7eb2Smrg 
1514627f7eb2Smrg /* Remove any scheduled loop version conditions that will never be true.
1515627f7eb2Smrg    Return true if any remain.  */
1516627f7eb2Smrg 
1517627f7eb2Smrg bool
prune_conditions()1518627f7eb2Smrg loop_versioning::prune_conditions ()
1519627f7eb2Smrg {
1520627f7eb2Smrg   AUTO_DUMP_SCOPE ("prune_loop_conditions",
1521627f7eb2Smrg 		   dump_user_location_t::from_function_decl (m_fn->decl));
1522627f7eb2Smrg 
1523627f7eb2Smrg   calculate_dominance_info (CDI_DOMINATORS);
1524627f7eb2Smrg   lv_dom_walker dom_walker (*this);
1525627f7eb2Smrg   dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn));
1526627f7eb2Smrg   return m_num_conditions != 0;
1527627f7eb2Smrg }
1528627f7eb2Smrg 
1529627f7eb2Smrg /* Merge the version checks for INNER into immediately-enclosing loop
1530627f7eb2Smrg    OUTER.  */
1531627f7eb2Smrg 
1532627f7eb2Smrg void
merge_loop_info(class loop * outer,class loop * inner)1533*4c3eb207Smrg loop_versioning::merge_loop_info (class loop *outer, class loop *inner)
1534627f7eb2Smrg {
1535627f7eb2Smrg   loop_info &inner_li = get_loop_info (inner);
1536627f7eb2Smrg   loop_info &outer_li = get_loop_info (outer);
1537627f7eb2Smrg 
1538627f7eb2Smrg   if (dump_enabled_p ())
1539627f7eb2Smrg     {
1540627f7eb2Smrg       bitmap_iterator bi;
1541627f7eb2Smrg       unsigned int i;
1542627f7eb2Smrg       EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi)
1543627f7eb2Smrg 	if (!bitmap_bit_p (&outer_li.unity_names, i))
1544627f7eb2Smrg 	  dump_printf_loc (MSG_NOTE, find_loop_location (inner),
1545627f7eb2Smrg 			   "hoisting check that %T == 1 to outer loop\n",
1546627f7eb2Smrg 			   ssa_name (i));
1547627f7eb2Smrg     }
1548627f7eb2Smrg 
1549627f7eb2Smrg   bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names);
1550627f7eb2Smrg   if (loop_depth (outer_li.outermost) < loop_depth (inner_li.outermost))
1551627f7eb2Smrg     outer_li.outermost = inner_li.outermost;
1552627f7eb2Smrg }
1553627f7eb2Smrg 
1554627f7eb2Smrg /* Add LOOP to the queue of loops to version.  */
1555627f7eb2Smrg 
1556627f7eb2Smrg void
add_loop_to_queue(class loop * loop)1557*4c3eb207Smrg loop_versioning::add_loop_to_queue (class loop *loop)
1558627f7eb2Smrg {
1559627f7eb2Smrg   loop_info &li = get_loop_info (loop);
1560627f7eb2Smrg 
1561627f7eb2Smrg   if (dump_enabled_p ())
1562627f7eb2Smrg     dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1563627f7eb2Smrg 		     "queuing this loop for versioning\n");
1564627f7eb2Smrg   m_loops_to_version.safe_push (loop);
1565627f7eb2Smrg 
1566627f7eb2Smrg   /* Don't try to version superloops.  */
1567627f7eb2Smrg   li.rejected_p = true;
1568627f7eb2Smrg }
1569627f7eb2Smrg 
1570627f7eb2Smrg /* Decide whether the cost model would allow us to version LOOP,
1571627f7eb2Smrg    either directly or as part of a parent loop, and return true if so.
1572627f7eb2Smrg    This does not imply that the loop is actually worth versioning in its
1573627f7eb2Smrg    own right, just that it would be valid to version it if something
1574627f7eb2Smrg    benefited.
1575627f7eb2Smrg 
1576627f7eb2Smrg    We have already made this decision for all inner loops of LOOP.  */
1577627f7eb2Smrg 
1578627f7eb2Smrg bool
decide_whether_loop_is_versionable(class loop * loop)1579*4c3eb207Smrg loop_versioning::decide_whether_loop_is_versionable (class loop *loop)
1580627f7eb2Smrg {
1581627f7eb2Smrg   loop_info &li = get_loop_info (loop);
1582627f7eb2Smrg 
1583627f7eb2Smrg   if (li.rejected_p)
1584627f7eb2Smrg     return false;
1585627f7eb2Smrg 
1586627f7eb2Smrg   /* Examine the decisions made for inner loops.  */
1587*4c3eb207Smrg   for (class loop *inner = loop->inner; inner; inner = inner->next)
1588627f7eb2Smrg     {
1589627f7eb2Smrg       loop_info &inner_li = get_loop_info (inner);
1590627f7eb2Smrg       if (inner_li.rejected_p)
1591627f7eb2Smrg 	{
1592627f7eb2Smrg 	  if (dump_enabled_p ())
1593627f7eb2Smrg 	    dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1594627f7eb2Smrg 			     "not versioning this loop because one of its"
1595627f7eb2Smrg 			     " inner loops should not be versioned\n");
1596627f7eb2Smrg 	  return false;
1597627f7eb2Smrg 	}
1598627f7eb2Smrg 
1599627f7eb2Smrg       if (inner_li.worth_versioning_p ())
1600627f7eb2Smrg 	li.subloops_benefit_p = true;
1601627f7eb2Smrg 
1602627f7eb2Smrg       /* Accumulate the number of instructions from subloops that are not
1603627f7eb2Smrg 	 the innermost, or that don't benefit from versioning.  Only the
1604627f7eb2Smrg 	 instructions from innermost loops that benefit from versioning
1605627f7eb2Smrg 	 should be weighed against loop-versioning-max-inner-insns;
1606627f7eb2Smrg 	 everything else should be weighed against
1607627f7eb2Smrg 	 loop-versioning-max-outer-insns.  */
1608627f7eb2Smrg       if (!inner_li.worth_versioning_p () || inner->inner)
1609627f7eb2Smrg 	{
1610627f7eb2Smrg 	  if (dump_enabled_p ())
1611627f7eb2Smrg 	    dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1612627f7eb2Smrg 			     "counting %d instructions from this loop"
1613627f7eb2Smrg 			     " against its parent loop\n", inner_li.num_insns);
1614627f7eb2Smrg 	  li.num_insns += inner_li.num_insns;
1615627f7eb2Smrg 	}
1616627f7eb2Smrg     }
1617627f7eb2Smrg 
1618627f7eb2Smrg   /* Enforce the size limits.  */
1619627f7eb2Smrg   if (li.worth_versioning_p ())
1620627f7eb2Smrg     {
1621627f7eb2Smrg       unsigned int max_num_insns = max_insns_for_loop (loop);
1622627f7eb2Smrg       if (dump_enabled_p ())
1623627f7eb2Smrg 	dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1624627f7eb2Smrg 			 "this loop has %d instructions, against"
1625627f7eb2Smrg 			 " a versioning limit of %d\n",
1626627f7eb2Smrg 			 li.num_insns, max_num_insns);
1627627f7eb2Smrg       if (li.num_insns > max_num_insns)
1628627f7eb2Smrg 	{
1629627f7eb2Smrg 	  if (dump_enabled_p ())
1630627f7eb2Smrg 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION
1631627f7eb2Smrg 			     | MSG_PRIORITY_USER_FACING,
1632627f7eb2Smrg 			     find_loop_location (loop),
1633627f7eb2Smrg 			     "this loop is too big to version");
1634627f7eb2Smrg 	  return false;
1635627f7eb2Smrg 	}
1636627f7eb2Smrg     }
1637627f7eb2Smrg 
1638627f7eb2Smrg   /* Hoist all version checks from subloops to this loop.  */
1639*4c3eb207Smrg   for (class loop *subloop = loop->inner; subloop; subloop = subloop->next)
1640627f7eb2Smrg     merge_loop_info (loop, subloop);
1641627f7eb2Smrg 
1642627f7eb2Smrg   return true;
1643627f7eb2Smrg }
1644627f7eb2Smrg 
1645627f7eb2Smrg /* Decide which loops to version and add them to the versioning queue.
1646627f7eb2Smrg    Return true if there are any loops to version.  */
1647627f7eb2Smrg 
1648627f7eb2Smrg bool
make_versioning_decisions()1649627f7eb2Smrg loop_versioning::make_versioning_decisions ()
1650627f7eb2Smrg {
1651627f7eb2Smrg   AUTO_DUMP_SCOPE ("make_versioning_decisions",
1652627f7eb2Smrg 		   dump_user_location_t::from_function_decl (m_fn->decl));
1653627f7eb2Smrg 
1654*4c3eb207Smrg   class loop *loop;
1655627f7eb2Smrg   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1656627f7eb2Smrg     {
1657627f7eb2Smrg       loop_info &linfo = get_loop_info (loop);
1658627f7eb2Smrg       if (decide_whether_loop_is_versionable (loop))
1659627f7eb2Smrg 	{
1660627f7eb2Smrg 	  /* Commit to versioning LOOP directly if we can't hoist the
1661627f7eb2Smrg 	     version checks any further.  */
1662627f7eb2Smrg 	  if (linfo.worth_versioning_p ()
1663627f7eb2Smrg 	      && (loop_depth (loop) == 1 || linfo.outermost == loop))
1664627f7eb2Smrg 	    add_loop_to_queue (loop);
1665627f7eb2Smrg 	}
1666627f7eb2Smrg       else
1667627f7eb2Smrg 	{
1668627f7eb2Smrg 	  /* We can't version this loop, so individually version any
1669627f7eb2Smrg 	     subloops that would benefit and haven't been versioned yet.  */
1670627f7eb2Smrg 	  linfo.rejected_p = true;
1671*4c3eb207Smrg 	  for (class loop *subloop = loop->inner; subloop;
1672627f7eb2Smrg 	       subloop = subloop->next)
1673627f7eb2Smrg 	    if (get_loop_info (subloop).worth_versioning_p ())
1674627f7eb2Smrg 	      add_loop_to_queue (subloop);
1675627f7eb2Smrg 	}
1676627f7eb2Smrg     }
1677627f7eb2Smrg 
1678627f7eb2Smrg   return !m_loops_to_version.is_empty ();
1679627f7eb2Smrg }
1680627f7eb2Smrg 
1681627f7eb2Smrg /* Attempt to implement loop versioning for LOOP, using the information
1682627f7eb2Smrg    cached in the associated loop_info.  Return true on success.  */
1683627f7eb2Smrg 
1684627f7eb2Smrg bool
version_loop(class loop * loop)1685*4c3eb207Smrg loop_versioning::version_loop (class loop *loop)
1686627f7eb2Smrg {
1687627f7eb2Smrg   loop_info &li = get_loop_info (loop);
1688627f7eb2Smrg 
1689627f7eb2Smrg   /* Build up a condition that selects the original loop instead of
1690627f7eb2Smrg      the simplified loop.  */
1691627f7eb2Smrg   tree cond = boolean_false_node;
1692627f7eb2Smrg   bitmap_iterator bi;
1693627f7eb2Smrg   unsigned int i;
1694627f7eb2Smrg   EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1695627f7eb2Smrg     {
1696627f7eb2Smrg       tree name = ssa_name (i);
1697627f7eb2Smrg       tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name,
1698627f7eb2Smrg 				 build_one_cst (TREE_TYPE (name)));
1699627f7eb2Smrg       cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one);
1700627f7eb2Smrg     }
1701627f7eb2Smrg 
1702627f7eb2Smrg   /* Convert the condition into a suitable gcond.  */
1703627f7eb2Smrg   gimple_seq stmts = NULL;
1704627f7eb2Smrg   cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr, NULL_TREE);
1705627f7eb2Smrg 
1706627f7eb2Smrg   /* Version the loop.  */
1707627f7eb2Smrg   initialize_original_copy_tables ();
1708627f7eb2Smrg   basic_block cond_bb;
1709627f7eb2Smrg   li.optimized_loop = loop_version (loop, cond, &cond_bb,
1710627f7eb2Smrg 				    profile_probability::unlikely (),
1711627f7eb2Smrg 				    profile_probability::likely (),
1712627f7eb2Smrg 				    profile_probability::unlikely (),
1713627f7eb2Smrg 				    profile_probability::likely (), true);
1714627f7eb2Smrg   free_original_copy_tables ();
1715627f7eb2Smrg   if (!li.optimized_loop)
1716627f7eb2Smrg     {
1717627f7eb2Smrg       if (dump_enabled_p ())
1718627f7eb2Smrg 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop),
1719627f7eb2Smrg 			 "tried but failed to version this loop for when"
1720627f7eb2Smrg 			 " certain strides are 1\n");
1721627f7eb2Smrg       return false;
1722627f7eb2Smrg     }
1723627f7eb2Smrg 
1724627f7eb2Smrg   if (dump_enabled_p ())
1725627f7eb2Smrg     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop),
1726627f7eb2Smrg 		     "versioned this loop for when certain strides are 1\n");
1727627f7eb2Smrg 
1728627f7eb2Smrg   /* Insert the statements that feed COND.  */
1729627f7eb2Smrg   if (stmts)
1730627f7eb2Smrg     {
1731627f7eb2Smrg       gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
1732627f7eb2Smrg       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1733627f7eb2Smrg     }
1734627f7eb2Smrg 
1735627f7eb2Smrg   return true;
1736627f7eb2Smrg }
1737627f7eb2Smrg 
1738627f7eb2Smrg /* Attempt to version all loops in the versioning queue.  */
1739627f7eb2Smrg 
1740627f7eb2Smrg void
implement_versioning_decisions()1741627f7eb2Smrg loop_versioning::implement_versioning_decisions ()
1742627f7eb2Smrg {
1743627f7eb2Smrg   /* No AUTO_DUMP_SCOPE here since all messages are top-level and
1744627f7eb2Smrg      user-facing at this point.  */
1745627f7eb2Smrg 
1746627f7eb2Smrg   bool any_succeeded_p = false;
1747*4c3eb207Smrg   class loop *loop;
1748627f7eb2Smrg   unsigned int i;
1749627f7eb2Smrg   FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1750627f7eb2Smrg     if (version_loop (loop))
1751627f7eb2Smrg       any_succeeded_p = true;
1752627f7eb2Smrg   if (!any_succeeded_p)
1753627f7eb2Smrg     return;
1754627f7eb2Smrg 
1755627f7eb2Smrg   update_ssa (TODO_update_ssa);
1756627f7eb2Smrg 
1757627f7eb2Smrg   /* Simplify the new loop, which is used when COND is false.  */
1758627f7eb2Smrg   FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1759627f7eb2Smrg     {
1760627f7eb2Smrg       loop_info &linfo = get_loop_info (loop);
1761627f7eb2Smrg       if (linfo.optimized_loop)
1762627f7eb2Smrg 	name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header);
1763627f7eb2Smrg     }
1764627f7eb2Smrg }
1765627f7eb2Smrg 
1766627f7eb2Smrg /* Run the pass and return a set of TODO_* flags.  */
1767627f7eb2Smrg 
1768627f7eb2Smrg unsigned int
run()1769627f7eb2Smrg loop_versioning::run ()
1770627f7eb2Smrg {
1771627f7eb2Smrg   gcc_assert (scev_initialized_p ());
1772627f7eb2Smrg 
1773627f7eb2Smrg   if (analyze_blocks ()
1774627f7eb2Smrg       && prune_conditions ()
1775627f7eb2Smrg       && make_versioning_decisions ())
1776627f7eb2Smrg     implement_versioning_decisions ();
1777627f7eb2Smrg 
1778627f7eb2Smrg   return 0;
1779627f7eb2Smrg }
1780627f7eb2Smrg 
1781627f7eb2Smrg /* Loop versioning pass.  */
1782627f7eb2Smrg 
1783627f7eb2Smrg const pass_data pass_data_loop_versioning =
1784627f7eb2Smrg {
1785627f7eb2Smrg   GIMPLE_PASS, /* type */
1786627f7eb2Smrg   "lversion", /* name */
1787627f7eb2Smrg   OPTGROUP_LOOP, /* optinfo_flags */
1788627f7eb2Smrg   TV_LOOP_VERSIONING, /* tv_id */
1789627f7eb2Smrg   PROP_cfg, /* properties_required */
1790627f7eb2Smrg   0, /* properties_provided */
1791627f7eb2Smrg   0, /* properties_destroyed */
1792627f7eb2Smrg   0, /* todo_flags_start */
1793627f7eb2Smrg   0, /* todo_flags_finish */
1794627f7eb2Smrg };
1795627f7eb2Smrg 
1796627f7eb2Smrg class pass_loop_versioning : public gimple_opt_pass
1797627f7eb2Smrg {
1798627f7eb2Smrg public:
pass_loop_versioning(gcc::context * ctxt)1799627f7eb2Smrg   pass_loop_versioning (gcc::context *ctxt)
1800627f7eb2Smrg     : gimple_opt_pass (pass_data_loop_versioning, ctxt)
1801627f7eb2Smrg   {}
1802627f7eb2Smrg 
1803627f7eb2Smrg   /* opt_pass methods: */
gate(function *)1804627f7eb2Smrg   virtual bool gate (function *) { return flag_version_loops_for_strides; }
1805627f7eb2Smrg   virtual unsigned int execute (function *);
1806627f7eb2Smrg };
1807627f7eb2Smrg 
1808627f7eb2Smrg unsigned int
execute(function * fn)1809627f7eb2Smrg pass_loop_versioning::execute (function *fn)
1810627f7eb2Smrg {
1811627f7eb2Smrg   if (number_of_loops (fn) <= 1)
1812627f7eb2Smrg     return 0;
1813627f7eb2Smrg 
1814627f7eb2Smrg   return loop_versioning (fn).run ();
1815627f7eb2Smrg }
1816627f7eb2Smrg 
1817627f7eb2Smrg } // anon namespace
1818627f7eb2Smrg 
1819627f7eb2Smrg gimple_opt_pass *
make_pass_loop_versioning(gcc::context * ctxt)1820627f7eb2Smrg make_pass_loop_versioning (gcc::context *ctxt)
1821627f7eb2Smrg {
1822627f7eb2Smrg   return new pass_loop_versioning (ctxt);
1823627f7eb2Smrg }
1824