1627f7eb2Smrg /* Loop versioning pass.
2*4c3eb207Smrg Copyright (C) 2018-2020 Free Software Foundation, Inc.
3627f7eb2Smrg
4627f7eb2Smrg This file is part of GCC.
5627f7eb2Smrg
6627f7eb2Smrg GCC is free software; you can redistribute it and/or modify it
7627f7eb2Smrg under the terms of the GNU General Public License as published by the
8627f7eb2Smrg Free Software Foundation; either version 3, or (at your option) any
9627f7eb2Smrg later version.
10627f7eb2Smrg
11627f7eb2Smrg GCC is distributed in the hope that it will be useful, but WITHOUT
12627f7eb2Smrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13627f7eb2Smrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14627f7eb2Smrg for more details.
15627f7eb2Smrg
16627f7eb2Smrg You should have received a copy of the GNU General Public License
17627f7eb2Smrg along with GCC; see the file COPYING3. If not see
18627f7eb2Smrg <http://www.gnu.org/licenses/>. */
19627f7eb2Smrg
20627f7eb2Smrg #include "config.h"
21627f7eb2Smrg #include "system.h"
22627f7eb2Smrg #include "coretypes.h"
23627f7eb2Smrg #include "backend.h"
24627f7eb2Smrg #include "tree.h"
25627f7eb2Smrg #include "gimple.h"
26627f7eb2Smrg #include "gimple-iterator.h"
27627f7eb2Smrg #include "tree-pass.h"
28627f7eb2Smrg #include "gimplify-me.h"
29627f7eb2Smrg #include "cfgloop.h"
30627f7eb2Smrg #include "tree-ssa-loop.h"
31627f7eb2Smrg #include "ssa.h"
32627f7eb2Smrg #include "tree-scalar-evolution.h"
33627f7eb2Smrg #include "tree-chrec.h"
34627f7eb2Smrg #include "tree-ssa-loop-ivopts.h"
35627f7eb2Smrg #include "fold-const.h"
36627f7eb2Smrg #include "tree-ssa-propagate.h"
37627f7eb2Smrg #include "tree-inline.h"
38627f7eb2Smrg #include "domwalk.h"
39627f7eb2Smrg #include "alloc-pool.h"
40627f7eb2Smrg #include "vr-values.h"
41627f7eb2Smrg #include "gimple-ssa-evrp-analyze.h"
42627f7eb2Smrg #include "tree-vectorizer.h"
43627f7eb2Smrg #include "omp-general.h"
44627f7eb2Smrg #include "predict.h"
45627f7eb2Smrg #include "tree-into-ssa.h"
46627f7eb2Smrg
47627f7eb2Smrg namespace {
48627f7eb2Smrg
49627f7eb2Smrg /* This pass looks for loops that could be simplified if certain loop
50627f7eb2Smrg invariant conditions were true. It is effectively a form of loop
51627f7eb2Smrg splitting in which the pass produces the split conditions itself,
52627f7eb2Smrg instead of using ones that are already present in the IL.
53627f7eb2Smrg
54627f7eb2Smrg Versioning for when strides are 1
55627f7eb2Smrg ---------------------------------
56627f7eb2Smrg
57627f7eb2Smrg At the moment the only thing the pass looks for are memory references
58627f7eb2Smrg like:
59627f7eb2Smrg
60627f7eb2Smrg for (auto i : ...)
61627f7eb2Smrg ...x[i * stride]...
62627f7eb2Smrg
63627f7eb2Smrg It considers changing such loops to:
64627f7eb2Smrg
65627f7eb2Smrg if (stride == 1)
66627f7eb2Smrg for (auto i : ...) [A]
67627f7eb2Smrg ...x[i]...
68627f7eb2Smrg else
69627f7eb2Smrg for (auto i : ...) [B]
70627f7eb2Smrg ...x[i * stride]...
71627f7eb2Smrg
72627f7eb2Smrg This can have several benefits:
73627f7eb2Smrg
74627f7eb2Smrg (1) [A] is often easier or cheaper to vectorize than [B].
75627f7eb2Smrg
76627f7eb2Smrg (2) The scalar code in [A] is simpler than the scalar code in [B]
77627f7eb2Smrg (if the loops cannot be vectorized or need an epilogue loop).
78627f7eb2Smrg
79627f7eb2Smrg (3) We might recognize [A] as a pattern, such as a memcpy or memset.
80627f7eb2Smrg
81627f7eb2Smrg (4) [A] has simpler address evolutions, which can help other passes
82627f7eb2Smrg like loop interchange.
83627f7eb2Smrg
84627f7eb2Smrg The optimization is particularly useful for assumed-shape arrays in
85627f7eb2Smrg Fortran, where the stride of the innermost dimension depends on the
86627f7eb2Smrg array descriptor but is often equal to 1 in practice. For example:
87627f7eb2Smrg
88627f7eb2Smrg subroutine f1(x)
89627f7eb2Smrg real :: x(:)
90627f7eb2Smrg x(:) = 100
91627f7eb2Smrg end subroutine f1
92627f7eb2Smrg
93627f7eb2Smrg generates the equivalent of:
94627f7eb2Smrg
95627f7eb2Smrg raw_stride = *x.dim[0].stride;
96627f7eb2Smrg stride = raw_stride != 0 ? raw_stride : 1;
97627f7eb2Smrg x_base = *x.data;
98627f7eb2Smrg ...
99627f7eb2Smrg tmp1 = stride * S;
100627f7eb2Smrg tmp2 = tmp1 - stride;
101627f7eb2Smrg *x_base[tmp2] = 1.0e+2;
102627f7eb2Smrg
103627f7eb2Smrg but in the common case that stride == 1, the last three statements
104627f7eb2Smrg simplify to:
105627f7eb2Smrg
106627f7eb2Smrg tmp3 = S + -1;
107627f7eb2Smrg *x_base[tmp3] = 1.0e+2;
108627f7eb2Smrg
109627f7eb2Smrg The optimization is in principle very simple. The difficult parts are:
110627f7eb2Smrg
111627f7eb2Smrg (a) deciding which parts of a general address calculation correspond
112627f7eb2Smrg to the inner dimension of an array, since this usually isn't explicit
113627f7eb2Smrg in the IL, and for C often isn't even explicit in the source code
114627f7eb2Smrg
115627f7eb2Smrg (b) estimating when the transformation is worthwhile
116627f7eb2Smrg
117627f7eb2Smrg Structure
118627f7eb2Smrg ---------
119627f7eb2Smrg
120627f7eb2Smrg The pass has four phases:
121627f7eb2Smrg
122627f7eb2Smrg (1) Walk through the statements looking for and recording potential
123627f7eb2Smrg versioning opportunities. Stop if there are none.
124627f7eb2Smrg
125627f7eb2Smrg (2) Use context-sensitive range information to see whether any versioning
126627f7eb2Smrg conditions are impossible in practice. Remove them if so, and stop
127627f7eb2Smrg if no opportunities remain.
128627f7eb2Smrg
129627f7eb2Smrg (We do this only after (1) to keep compile time down when no
130627f7eb2Smrg versioning opportunities exist.)
131627f7eb2Smrg
132627f7eb2Smrg (3) Apply the cost model. Decide which versioning opportunities are
133627f7eb2Smrg worthwhile and at which nesting level they should be applied.
134627f7eb2Smrg
135627f7eb2Smrg (4) Attempt to version all the loops selected by (3), so that:
136627f7eb2Smrg
137627f7eb2Smrg for (...)
138627f7eb2Smrg ...
139627f7eb2Smrg
140627f7eb2Smrg becomes:
141627f7eb2Smrg
142627f7eb2Smrg if (!cond)
143627f7eb2Smrg for (...) // Original loop
144627f7eb2Smrg ...
145627f7eb2Smrg else
146627f7eb2Smrg for (...) // New loop
147627f7eb2Smrg ...
148627f7eb2Smrg
149627f7eb2Smrg Use the version condition COND to simplify the new loop. */
150627f7eb2Smrg
151627f7eb2Smrg /* Enumerates the likelihood that a particular value indexes the inner
152627f7eb2Smrg dimension of an array. */
153627f7eb2Smrg enum inner_likelihood {
154627f7eb2Smrg INNER_UNLIKELY,
155627f7eb2Smrg INNER_DONT_KNOW,
156627f7eb2Smrg INNER_LIKELY
157627f7eb2Smrg };
158627f7eb2Smrg
159627f7eb2Smrg /* Information about one term of an address_info. */
160627f7eb2Smrg struct address_term_info
161627f7eb2Smrg {
162627f7eb2Smrg /* The value of the term is EXPR * MULTIPLIER. */
163627f7eb2Smrg tree expr;
164627f7eb2Smrg unsigned HOST_WIDE_INT multiplier;
165627f7eb2Smrg
166627f7eb2Smrg /* The stride applied by EXPR in each iteration of some unrecorded loop,
167627f7eb2Smrg or null if no stride has been identified. */
168627f7eb2Smrg tree stride;
169627f7eb2Smrg
170627f7eb2Smrg /* Enumerates the likelihood that EXPR indexes the inner dimension
171627f7eb2Smrg of an array. */
172627f7eb2Smrg enum inner_likelihood inner_likelihood;
173627f7eb2Smrg
174627f7eb2Smrg /* True if STRIDE == 1 is a versioning opportunity when considered
175627f7eb2Smrg in isolation. */
176627f7eb2Smrg bool versioning_opportunity_p;
177627f7eb2Smrg };
178627f7eb2Smrg
179627f7eb2Smrg /* Information about an address calculation, and the range of constant
180627f7eb2Smrg offsets applied to it. */
181*4c3eb207Smrg class address_info
182627f7eb2Smrg {
183*4c3eb207Smrg public:
184627f7eb2Smrg static const unsigned int MAX_TERMS = 8;
185627f7eb2Smrg
186627f7eb2Smrg /* One statement that calculates the address. If multiple statements
187627f7eb2Smrg share the same address, we only record the first. */
188627f7eb2Smrg gimple *stmt;
189627f7eb2Smrg
190627f7eb2Smrg /* The loop containing STMT (cached for convenience). If multiple
191627f7eb2Smrg statements share the same address, they all belong to this loop. */
192*4c3eb207Smrg class loop *loop;
193627f7eb2Smrg
194627f7eb2Smrg /* A decomposition of the calculation into a sum of terms plus an
195627f7eb2Smrg optional base. When BASE is provided, it is never an SSA name.
196627f7eb2Smrg Once initialization is complete, all members of TERMs are SSA names. */
197627f7eb2Smrg tree base;
198627f7eb2Smrg auto_vec<address_term_info, MAX_TERMS> terms;
199627f7eb2Smrg
200627f7eb2Smrg /* All bytes accessed from the address fall in the offset range
201627f7eb2Smrg [MIN_OFFSET, MAX_OFFSET). */
202627f7eb2Smrg HOST_WIDE_INT min_offset, max_offset;
203627f7eb2Smrg };
204627f7eb2Smrg
205627f7eb2Smrg /* Stores addresses based on their base and terms (ignoring the offsets). */
206627f7eb2Smrg struct address_info_hasher : nofree_ptr_hash <address_info>
207627f7eb2Smrg {
208627f7eb2Smrg static hashval_t hash (const address_info *);
209627f7eb2Smrg static bool equal (const address_info *, const address_info *);
210627f7eb2Smrg };
211627f7eb2Smrg
212627f7eb2Smrg /* Information about the versioning we'd like to apply to a loop. */
213*4c3eb207Smrg class loop_info
214627f7eb2Smrg {
215*4c3eb207Smrg public:
216627f7eb2Smrg bool worth_versioning_p () const;
217627f7eb2Smrg
218627f7eb2Smrg /* True if we've decided not to version this loop. The remaining
219627f7eb2Smrg fields are meaningless if so. */
220627f7eb2Smrg bool rejected_p;
221627f7eb2Smrg
222627f7eb2Smrg /* True if at least one subloop of this loop benefits from versioning. */
223627f7eb2Smrg bool subloops_benefit_p;
224627f7eb2Smrg
225627f7eb2Smrg /* An estimate of the total number of instructions in the loop,
226627f7eb2Smrg excluding those in subloops that benefit from versioning. */
227627f7eb2Smrg unsigned int num_insns;
228627f7eb2Smrg
229627f7eb2Smrg /* The outermost loop that can handle all the version checks
230627f7eb2Smrg described below. */
231*4c3eb207Smrg class loop *outermost;
232627f7eb2Smrg
233627f7eb2Smrg /* The first entry in the list of blocks that belong to this loop
234627f7eb2Smrg (and not to subloops). m_next_block_in_loop provides the chain
235627f7eb2Smrg pointers for the list. */
236627f7eb2Smrg basic_block block_list;
237627f7eb2Smrg
238627f7eb2Smrg /* We'd like to version the loop for the case in which these SSA names
239627f7eb2Smrg (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime. */
240627f7eb2Smrg bitmap_head unity_names;
241627f7eb2Smrg
242627f7eb2Smrg /* If versioning succeeds, this points the version of the loop that
243627f7eb2Smrg assumes the version conditions holds. */
244*4c3eb207Smrg class loop *optimized_loop;
245627f7eb2Smrg };
246627f7eb2Smrg
247627f7eb2Smrg /* The main pass structure. */
248627f7eb2Smrg class loop_versioning
249627f7eb2Smrg {
250627f7eb2Smrg public:
251627f7eb2Smrg loop_versioning (function *);
252627f7eb2Smrg ~loop_versioning ();
253627f7eb2Smrg unsigned int run ();
254627f7eb2Smrg
255627f7eb2Smrg private:
256627f7eb2Smrg /* Used to walk the dominator tree to find loop versioning conditions
257627f7eb2Smrg that are always false. */
258627f7eb2Smrg class lv_dom_walker : public dom_walker
259627f7eb2Smrg {
260627f7eb2Smrg public:
261627f7eb2Smrg lv_dom_walker (loop_versioning &);
262627f7eb2Smrg
263627f7eb2Smrg edge before_dom_children (basic_block) FINAL OVERRIDE;
264627f7eb2Smrg void after_dom_children (basic_block) FINAL OVERRIDE;
265627f7eb2Smrg
266627f7eb2Smrg private:
267627f7eb2Smrg /* The parent pass. */
268627f7eb2Smrg loop_versioning &m_lv;
269627f7eb2Smrg
270627f7eb2Smrg /* Used to build context-dependent range information. */
271627f7eb2Smrg evrp_range_analyzer m_range_analyzer;
272627f7eb2Smrg };
273627f7eb2Smrg
274627f7eb2Smrg /* Used to simplify statements based on conditions that are established
275627f7eb2Smrg by the version checks. */
276627f7eb2Smrg class name_prop : public substitute_and_fold_engine
277627f7eb2Smrg {
278627f7eb2Smrg public:
name_prop(loop_info & li)279627f7eb2Smrg name_prop (loop_info &li) : m_li (li) {}
280627f7eb2Smrg tree get_value (tree) FINAL OVERRIDE;
281627f7eb2Smrg
282627f7eb2Smrg private:
283627f7eb2Smrg /* Information about the versioning we've performed on the loop. */
284627f7eb2Smrg loop_info &m_li;
285627f7eb2Smrg };
286627f7eb2Smrg
get_loop_info(class loop * loop)287*4c3eb207Smrg loop_info &get_loop_info (class loop *loop) { return m_loops[loop->num]; }
288627f7eb2Smrg
289*4c3eb207Smrg unsigned int max_insns_for_loop (class loop *);
290627f7eb2Smrg bool expensive_stmt_p (gimple *);
291627f7eb2Smrg
292627f7eb2Smrg void version_for_unity (gimple *, tree);
293627f7eb2Smrg bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT,
294627f7eb2Smrg unsigned HOST_WIDE_INT * = 0);
295627f7eb2Smrg bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *);
296627f7eb2Smrg bool multiply_term_by (address_term_info &, tree);
297627f7eb2Smrg inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT);
298627f7eb2Smrg void dump_inner_likelihood (address_info &, address_term_info &);
299627f7eb2Smrg void analyze_stride (address_info &, address_term_info &,
300*4c3eb207Smrg tree, class loop *);
301627f7eb2Smrg bool find_per_loop_multiplication (address_info &, address_term_info &);
302627f7eb2Smrg bool analyze_term_using_scevs (address_info &, address_term_info &);
303627f7eb2Smrg void analyze_arbitrary_term (address_info &, address_term_info &);
304627f7eb2Smrg void analyze_address_fragment (address_info &);
305627f7eb2Smrg void record_address_fragment (gimple *, unsigned HOST_WIDE_INT,
306627f7eb2Smrg tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT);
307627f7eb2Smrg void analyze_expr (gimple *, tree);
308627f7eb2Smrg bool analyze_block (basic_block);
309627f7eb2Smrg bool analyze_blocks ();
310627f7eb2Smrg
311*4c3eb207Smrg void prune_loop_conditions (class loop *, vr_values *);
312627f7eb2Smrg bool prune_conditions ();
313627f7eb2Smrg
314*4c3eb207Smrg void merge_loop_info (class loop *, class loop *);
315*4c3eb207Smrg void add_loop_to_queue (class loop *);
316*4c3eb207Smrg bool decide_whether_loop_is_versionable (class loop *);
317627f7eb2Smrg bool make_versioning_decisions ();
318627f7eb2Smrg
319*4c3eb207Smrg bool version_loop (class loop *);
320627f7eb2Smrg void implement_versioning_decisions ();
321627f7eb2Smrg
322627f7eb2Smrg /* The function we're optimizing. */
323627f7eb2Smrg function *m_fn;
324627f7eb2Smrg
325627f7eb2Smrg /* The obstack to use for all pass-specific bitmaps. */
326627f7eb2Smrg bitmap_obstack m_bitmap_obstack;
327627f7eb2Smrg
328627f7eb2Smrg /* An obstack to use for general allocation. */
329627f7eb2Smrg obstack m_obstack;
330627f7eb2Smrg
331627f7eb2Smrg /* The number of loops in the function. */
332627f7eb2Smrg unsigned int m_nloops;
333627f7eb2Smrg
334627f7eb2Smrg /* The total number of loop version conditions we've found. */
335627f7eb2Smrg unsigned int m_num_conditions;
336627f7eb2Smrg
337627f7eb2Smrg /* Assume that an address fragment of the form i * stride * scale
338627f7eb2Smrg (for variable stride and constant scale) will not benefit from
339627f7eb2Smrg versioning for stride == 1 when scale is greater than this value. */
340627f7eb2Smrg unsigned HOST_WIDE_INT m_maximum_scale;
341627f7eb2Smrg
342627f7eb2Smrg /* Information about each loop. */
343627f7eb2Smrg auto_vec<loop_info> m_loops;
344627f7eb2Smrg
345627f7eb2Smrg /* Used to form a linked list of blocks that belong to a loop,
346627f7eb2Smrg started by loop_info::block_list. */
347627f7eb2Smrg auto_vec<basic_block> m_next_block_in_loop;
348627f7eb2Smrg
349627f7eb2Smrg /* The list of loops that we've decided to version. */
350*4c3eb207Smrg auto_vec<class loop *> m_loops_to_version;
351627f7eb2Smrg
352627f7eb2Smrg /* A table of addresses in the current loop, keyed off their values
353627f7eb2Smrg but not their offsets. */
354627f7eb2Smrg hash_table <address_info_hasher> m_address_table;
355627f7eb2Smrg
356627f7eb2Smrg /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order. */
357627f7eb2Smrg auto_vec <address_info *, 32> m_address_list;
358627f7eb2Smrg };
359627f7eb2Smrg
360627f7eb2Smrg /* If EXPR is an SSA name and not a default definition, return the
361627f7eb2Smrg defining statement, otherwise return null. */
362627f7eb2Smrg
363627f7eb2Smrg static gimple *
maybe_get_stmt(tree expr)364627f7eb2Smrg maybe_get_stmt (tree expr)
365627f7eb2Smrg {
366627f7eb2Smrg if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr))
367627f7eb2Smrg return SSA_NAME_DEF_STMT (expr);
368627f7eb2Smrg return NULL;
369627f7eb2Smrg }
370627f7eb2Smrg
371627f7eb2Smrg /* Like maybe_get_stmt, but also return null if the defining
372627f7eb2Smrg statement isn't an assignment. */
373627f7eb2Smrg
374627f7eb2Smrg static gassign *
maybe_get_assign(tree expr)375627f7eb2Smrg maybe_get_assign (tree expr)
376627f7eb2Smrg {
377627f7eb2Smrg return safe_dyn_cast <gassign *> (maybe_get_stmt (expr));
378627f7eb2Smrg }
379627f7eb2Smrg
380627f7eb2Smrg /* Return true if this pass should look through a cast of expression FROM
381627f7eb2Smrg to type TYPE when analyzing pieces of an address. */
382627f7eb2Smrg
383627f7eb2Smrg static bool
look_through_cast_p(tree type,tree from)384627f7eb2Smrg look_through_cast_p (tree type, tree from)
385627f7eb2Smrg {
386627f7eb2Smrg return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type)
387627f7eb2Smrg && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type));
388627f7eb2Smrg }
389627f7eb2Smrg
390627f7eb2Smrg /* Strip all conversions of integers or pointers from EXPR, regardless
391627f7eb2Smrg of whether the conversions are nops. This is useful in the context
392627f7eb2Smrg of this pass because we're not trying to fold or simulate the
393627f7eb2Smrg expression; we just want to see how it's structured. */
394627f7eb2Smrg
395627f7eb2Smrg static tree
strip_casts(tree expr)396627f7eb2Smrg strip_casts (tree expr)
397627f7eb2Smrg {
398627f7eb2Smrg const unsigned int MAX_NITERS = 4;
399627f7eb2Smrg
400627f7eb2Smrg tree type = TREE_TYPE (expr);
401627f7eb2Smrg while (CONVERT_EXPR_P (expr)
402627f7eb2Smrg && look_through_cast_p (type, TREE_OPERAND (expr, 0)))
403627f7eb2Smrg expr = TREE_OPERAND (expr, 0);
404627f7eb2Smrg
405627f7eb2Smrg for (unsigned int niters = 0; niters < MAX_NITERS; ++niters)
406627f7eb2Smrg {
407627f7eb2Smrg gassign *assign = maybe_get_assign (expr);
408627f7eb2Smrg if (assign
409627f7eb2Smrg && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
410627f7eb2Smrg && look_through_cast_p (type, gimple_assign_rhs1 (assign)))
411627f7eb2Smrg expr = gimple_assign_rhs1 (assign);
412627f7eb2Smrg else
413627f7eb2Smrg break;
414627f7eb2Smrg }
415627f7eb2Smrg return expr;
416627f7eb2Smrg }
417627f7eb2Smrg
418627f7eb2Smrg /* Compare two address_term_infos in the same address_info. */
419627f7eb2Smrg
420627f7eb2Smrg static int
compare_address_terms(const void * a_uncast,const void * b_uncast)421627f7eb2Smrg compare_address_terms (const void *a_uncast, const void *b_uncast)
422627f7eb2Smrg {
423627f7eb2Smrg const address_term_info *a = (const address_term_info *) a_uncast;
424627f7eb2Smrg const address_term_info *b = (const address_term_info *) b_uncast;
425627f7eb2Smrg
426627f7eb2Smrg if (a->expr != b->expr)
427627f7eb2Smrg return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1;
428627f7eb2Smrg
429627f7eb2Smrg if (a->multiplier != b->multiplier)
430627f7eb2Smrg return a->multiplier < b->multiplier ? -1 : 1;
431627f7eb2Smrg
432627f7eb2Smrg return 0;
433627f7eb2Smrg }
434627f7eb2Smrg
435627f7eb2Smrg /* Dump ADDRESS using flags FLAGS. */
436627f7eb2Smrg
437627f7eb2Smrg static void
dump_address_info(dump_flags_t flags,address_info & address)438627f7eb2Smrg dump_address_info (dump_flags_t flags, address_info &address)
439627f7eb2Smrg {
440627f7eb2Smrg if (address.base)
441627f7eb2Smrg dump_printf (flags, "%T + ", address.base);
442627f7eb2Smrg for (unsigned int i = 0; i < address.terms.length (); ++i)
443627f7eb2Smrg {
444627f7eb2Smrg if (i != 0)
445627f7eb2Smrg dump_printf (flags, " + ");
446627f7eb2Smrg dump_printf (flags, "%T", address.terms[i].expr);
447627f7eb2Smrg if (address.terms[i].multiplier != 1)
448627f7eb2Smrg dump_printf (flags, " * %wd", address.terms[i].multiplier);
449627f7eb2Smrg }
450627f7eb2Smrg dump_printf (flags, " + [%wd, %wd]",
451627f7eb2Smrg address.min_offset, address.max_offset - 1);
452627f7eb2Smrg }
453627f7eb2Smrg
454627f7eb2Smrg /* Hash an address_info based on its base and terms. */
455627f7eb2Smrg
456627f7eb2Smrg hashval_t
hash(const address_info * info)457627f7eb2Smrg address_info_hasher::hash (const address_info *info)
458627f7eb2Smrg {
459627f7eb2Smrg inchash::hash hash;
460627f7eb2Smrg hash.add_int (info->base ? TREE_CODE (info->base) : 0);
461627f7eb2Smrg hash.add_int (info->terms.length ());
462627f7eb2Smrg for (unsigned int i = 0; i < info->terms.length (); ++i)
463627f7eb2Smrg {
464627f7eb2Smrg hash.add_int (SSA_NAME_VERSION (info->terms[i].expr));
465627f7eb2Smrg hash.add_hwi (info->terms[i].multiplier);
466627f7eb2Smrg }
467627f7eb2Smrg return hash.end ();
468627f7eb2Smrg }
469627f7eb2Smrg
470627f7eb2Smrg /* Return true if two address_infos have equal bases and terms. Other
471627f7eb2Smrg properties might be different (such as the statement or constant
472627f7eb2Smrg offset range). */
473627f7eb2Smrg
474627f7eb2Smrg bool
equal(const address_info * a,const address_info * b)475627f7eb2Smrg address_info_hasher::equal (const address_info *a, const address_info *b)
476627f7eb2Smrg {
477627f7eb2Smrg if (a->base != b->base
478627f7eb2Smrg && (!a->base || !b->base || !operand_equal_p (a->base, b->base, 0)))
479627f7eb2Smrg return false;
480627f7eb2Smrg
481627f7eb2Smrg if (a->terms.length () != b->terms.length ())
482627f7eb2Smrg return false;
483627f7eb2Smrg
484627f7eb2Smrg for (unsigned int i = 0; i < a->terms.length (); ++i)
485627f7eb2Smrg if (a->terms[i].expr != b->terms[i].expr
486627f7eb2Smrg || a->terms[i].multiplier != b->terms[i].multiplier)
487627f7eb2Smrg return false;
488627f7eb2Smrg
489627f7eb2Smrg return true;
490627f7eb2Smrg }
491627f7eb2Smrg
492627f7eb2Smrg /* Return true if we want to version the loop, i.e. if we have a
493627f7eb2Smrg specific reason for doing so and no specific reason not to. */
494627f7eb2Smrg
495627f7eb2Smrg bool
worth_versioning_p() const496627f7eb2Smrg loop_info::worth_versioning_p () const
497627f7eb2Smrg {
498627f7eb2Smrg return (!rejected_p
499627f7eb2Smrg && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
500627f7eb2Smrg }
501627f7eb2Smrg
lv_dom_walker(loop_versioning & lv)502627f7eb2Smrg loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
503627f7eb2Smrg : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false)
504627f7eb2Smrg {
505627f7eb2Smrg }
506627f7eb2Smrg
507627f7eb2Smrg /* Process BB before processing the blocks it dominates. */
508627f7eb2Smrg
509627f7eb2Smrg edge
before_dom_children(basic_block bb)510627f7eb2Smrg loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
511627f7eb2Smrg {
512627f7eb2Smrg m_range_analyzer.enter (bb);
513627f7eb2Smrg
514627f7eb2Smrg if (bb == bb->loop_father->header)
515627f7eb2Smrg m_lv.prune_loop_conditions (bb->loop_father,
516627f7eb2Smrg m_range_analyzer.get_vr_values ());
517627f7eb2Smrg
518627f7eb2Smrg for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
519627f7eb2Smrg gsi_next (&si))
520627f7eb2Smrg m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false);
521627f7eb2Smrg
522627f7eb2Smrg return NULL;
523627f7eb2Smrg }
524627f7eb2Smrg
525627f7eb2Smrg /* Process BB after processing the blocks it dominates. */
526627f7eb2Smrg
527627f7eb2Smrg void
after_dom_children(basic_block bb)528627f7eb2Smrg loop_versioning::lv_dom_walker::after_dom_children (basic_block bb)
529627f7eb2Smrg {
530627f7eb2Smrg m_range_analyzer.leave (bb);
531627f7eb2Smrg }
532627f7eb2Smrg
533627f7eb2Smrg /* Decide whether to replace VAL with a new value in a versioned loop.
534627f7eb2Smrg Return the new value if so, otherwise return null. */
535627f7eb2Smrg
536627f7eb2Smrg tree
get_value(tree val)537627f7eb2Smrg loop_versioning::name_prop::get_value (tree val)
538627f7eb2Smrg {
539627f7eb2Smrg if (TREE_CODE (val) == SSA_NAME
540627f7eb2Smrg && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
541627f7eb2Smrg return build_one_cst (TREE_TYPE (val));
542627f7eb2Smrg return NULL_TREE;
543627f7eb2Smrg }
544627f7eb2Smrg
545627f7eb2Smrg /* Initialize the structure to optimize FN. */
546627f7eb2Smrg
loop_versioning(function * fn)547627f7eb2Smrg loop_versioning::loop_versioning (function *fn)
548627f7eb2Smrg : m_fn (fn),
549627f7eb2Smrg m_nloops (number_of_loops (fn)),
550627f7eb2Smrg m_num_conditions (0),
551627f7eb2Smrg m_address_table (31)
552627f7eb2Smrg {
553627f7eb2Smrg bitmap_obstack_initialize (&m_bitmap_obstack);
554627f7eb2Smrg gcc_obstack_init (&m_obstack);
555627f7eb2Smrg
556627f7eb2Smrg /* Initialize the loop information. */
557627f7eb2Smrg m_loops.safe_grow_cleared (m_nloops);
558627f7eb2Smrg for (unsigned int i = 0; i < m_nloops; ++i)
559627f7eb2Smrg {
560627f7eb2Smrg m_loops[i].outermost = get_loop (m_fn, 0);
561627f7eb2Smrg bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack);
562627f7eb2Smrg }
563627f7eb2Smrg
564627f7eb2Smrg /* Initialize the list of blocks that belong to each loop. */
565627f7eb2Smrg unsigned int nbbs = last_basic_block_for_fn (fn);
566627f7eb2Smrg m_next_block_in_loop.safe_grow (nbbs);
567627f7eb2Smrg basic_block bb;
568627f7eb2Smrg FOR_EACH_BB_FN (bb, fn)
569627f7eb2Smrg {
570627f7eb2Smrg loop_info &li = get_loop_info (bb->loop_father);
571627f7eb2Smrg m_next_block_in_loop[bb->index] = li.block_list;
572627f7eb2Smrg li.block_list = bb;
573627f7eb2Smrg }
574627f7eb2Smrg
575627f7eb2Smrg /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for
576627f7eb2Smrg unvectorizable code, since it is the largest size that can be
577627f7eb2Smrg handled efficiently by scalar code. omp_max_vf calculates the
578627f7eb2Smrg maximum number of bytes in a vector, when such a value is relevant
579627f7eb2Smrg to loop optimization. */
580627f7eb2Smrg m_maximum_scale = estimated_poly_value (omp_max_vf ());
581627f7eb2Smrg m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
582627f7eb2Smrg }
583627f7eb2Smrg
~loop_versioning()584627f7eb2Smrg loop_versioning::~loop_versioning ()
585627f7eb2Smrg {
586627f7eb2Smrg bitmap_obstack_release (&m_bitmap_obstack);
587627f7eb2Smrg obstack_free (&m_obstack, NULL);
588627f7eb2Smrg }
589627f7eb2Smrg
590627f7eb2Smrg /* Return the maximum number of instructions allowed in LOOP before
591627f7eb2Smrg it becomes too big for versioning.
592627f7eb2Smrg
593627f7eb2Smrg There are separate limits for inner and outer loops. The limit for
594627f7eb2Smrg inner loops applies only to loops that benefit directly from versioning.
595627f7eb2Smrg The limit for outer loops applies to all code in the outer loop and
596627f7eb2Smrg its subloops that *doesn't* benefit directly from versioning; such code
597627f7eb2Smrg would be "taken along for the ride". The idea is that if the cost of
598627f7eb2Smrg the latter is small, it is better to version outer loops rather than
599627f7eb2Smrg inner loops, both to reduce the number of repeated checks and to enable
600627f7eb2Smrg more of the loop nest to be optimized as a natural nest (e.g. by loop
601627f7eb2Smrg interchange or outer-loop vectorization). */
602627f7eb2Smrg
603627f7eb2Smrg unsigned int
max_insns_for_loop(class loop * loop)604*4c3eb207Smrg loop_versioning::max_insns_for_loop (class loop *loop)
605627f7eb2Smrg {
606627f7eb2Smrg return (loop->inner
607*4c3eb207Smrg ? param_loop_versioning_max_outer_insns
608*4c3eb207Smrg : param_loop_versioning_max_inner_insns);
609627f7eb2Smrg }
610627f7eb2Smrg
611627f7eb2Smrg /* Return true if for cost reasons we should avoid versioning any loop
612627f7eb2Smrg that contains STMT.
613627f7eb2Smrg
614627f7eb2Smrg Note that we don't need to check whether versioning is invalid for
615627f7eb2Smrg correctness reasons, since the versioning process does that for us.
616627f7eb2Smrg The conditions involved are too rare to be worth duplicating here. */
617627f7eb2Smrg
618627f7eb2Smrg bool
expensive_stmt_p(gimple * stmt)619627f7eb2Smrg loop_versioning::expensive_stmt_p (gimple *stmt)
620627f7eb2Smrg {
621627f7eb2Smrg if (gcall *call = dyn_cast <gcall *> (stmt))
622627f7eb2Smrg /* Assume for now that the time spent in an "expensive" call would
623627f7eb2Smrg overwhelm any saving from versioning. */
624627f7eb2Smrg return !gimple_inexpensive_call_p (call);
625627f7eb2Smrg return false;
626627f7eb2Smrg }
627627f7eb2Smrg
628627f7eb2Smrg /* Record that we want to version the loop that contains STMT for the
629627f7eb2Smrg case in which SSA name NAME is equal to 1. We already know that NAME
630627f7eb2Smrg is invariant in the loop. */
631627f7eb2Smrg
632627f7eb2Smrg void
version_for_unity(gimple * stmt,tree name)633627f7eb2Smrg loop_versioning::version_for_unity (gimple *stmt, tree name)
634627f7eb2Smrg {
635*4c3eb207Smrg class loop *loop = loop_containing_stmt (stmt);
636627f7eb2Smrg loop_info &li = get_loop_info (loop);
637627f7eb2Smrg
638627f7eb2Smrg if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
639627f7eb2Smrg {
640627f7eb2Smrg /* This is the first time we've wanted to version LOOP for NAME.
641627f7eb2Smrg Keep track of the outermost loop that can handle all versioning
642627f7eb2Smrg checks in LI. */
643*4c3eb207Smrg class loop *outermost
644627f7eb2Smrg = outermost_invariant_loop_for_expr (loop, name);
645627f7eb2Smrg if (loop_depth (li.outermost) < loop_depth (outermost))
646627f7eb2Smrg li.outermost = outermost;
647627f7eb2Smrg
648627f7eb2Smrg if (dump_enabled_p ())
649627f7eb2Smrg {
650627f7eb2Smrg dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
651627f7eb2Smrg " for when %T == 1", name);
652627f7eb2Smrg if (outermost == loop)
653627f7eb2Smrg dump_printf (MSG_NOTE, "; cannot hoist check further");
654627f7eb2Smrg else
655627f7eb2Smrg {
656627f7eb2Smrg dump_printf (MSG_NOTE, "; could implement the check at loop"
657627f7eb2Smrg " depth %d", loop_depth (outermost));
658627f7eb2Smrg if (loop_depth (li.outermost) > loop_depth (outermost))
659627f7eb2Smrg dump_printf (MSG_NOTE, ", but other checks only allow"
660627f7eb2Smrg " a depth of %d", loop_depth (li.outermost));
661627f7eb2Smrg }
662627f7eb2Smrg dump_printf (MSG_NOTE, "\n");
663627f7eb2Smrg }
664627f7eb2Smrg
665627f7eb2Smrg m_num_conditions += 1;
666627f7eb2Smrg }
667627f7eb2Smrg else
668627f7eb2Smrg {
669627f7eb2Smrg /* This is a duplicate request. */
670627f7eb2Smrg if (dump_enabled_p ())
671627f7eb2Smrg dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing"
672627f7eb2Smrg " loop for when %T == 1\n", name);
673627f7eb2Smrg }
674627f7eb2Smrg }
675627f7eb2Smrg
676627f7eb2Smrg /* Return true if OP1_TREE is constant and if in principle it is worth
677627f7eb2Smrg versioning an address fragment of the form:
678627f7eb2Smrg
679627f7eb2Smrg i * OP1_TREE * OP2 * stride
680627f7eb2Smrg
681627f7eb2Smrg for the case in which stride == 1. This in practice means testing
682627f7eb2Smrg whether:
683627f7eb2Smrg
684627f7eb2Smrg OP1_TREE * OP2 <= M_MAXIMUM_SCALE.
685627f7eb2Smrg
686627f7eb2Smrg If RESULT is nonnull, store OP1_TREE * OP2 there when returning true. */
687627f7eb2Smrg
688627f7eb2Smrg bool
acceptable_multiplier_p(tree op1_tree,unsigned HOST_WIDE_INT op2,unsigned HOST_WIDE_INT * result)689627f7eb2Smrg loop_versioning::acceptable_multiplier_p (tree op1_tree,
690627f7eb2Smrg unsigned HOST_WIDE_INT op2,
691627f7eb2Smrg unsigned HOST_WIDE_INT *result)
692627f7eb2Smrg {
693627f7eb2Smrg if (tree_fits_uhwi_p (op1_tree))
694627f7eb2Smrg {
695627f7eb2Smrg unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree);
696627f7eb2Smrg /* The first part checks for overflow. */
697627f7eb2Smrg if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale)
698627f7eb2Smrg {
699627f7eb2Smrg if (result)
700627f7eb2Smrg *result = op1 * op2;
701627f7eb2Smrg return true;
702627f7eb2Smrg }
703627f7eb2Smrg }
704627f7eb2Smrg return false;
705627f7eb2Smrg }
706627f7eb2Smrg
707627f7eb2Smrg /* Return true if it is worth using loop versioning on a memory access
708627f7eb2Smrg of type TYPE. Store the size of the access in *SIZE if so. */
709627f7eb2Smrg
710627f7eb2Smrg bool
acceptable_type_p(tree type,unsigned HOST_WIDE_INT * size)711627f7eb2Smrg loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size)
712627f7eb2Smrg {
713627f7eb2Smrg return (TYPE_SIZE_UNIT (type)
714627f7eb2Smrg && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), 1, size));
715627f7eb2Smrg }
716627f7eb2Smrg
717627f7eb2Smrg /* See whether OP is constant and whether we can multiply TERM by that
718627f7eb2Smrg constant without exceeding M_MAXIMUM_SCALE. Return true and update
719627f7eb2Smrg TERM if so. */
720627f7eb2Smrg
721627f7eb2Smrg bool
multiply_term_by(address_term_info & term,tree op)722627f7eb2Smrg loop_versioning::multiply_term_by (address_term_info &term, tree op)
723627f7eb2Smrg {
724627f7eb2Smrg return acceptable_multiplier_p (op, term.multiplier, &term.multiplier);
725627f7eb2Smrg }
726627f7eb2Smrg
727627f7eb2Smrg /* Decide whether an address fragment of the form STRIDE * MULTIPLIER
728627f7eb2Smrg is likely to be indexing an innermost dimension, returning the result
729627f7eb2Smrg as an INNER_* probability. */
730627f7eb2Smrg
731627f7eb2Smrg inner_likelihood
get_inner_likelihood(tree stride,unsigned HOST_WIDE_INT multiplier)732627f7eb2Smrg loop_versioning::get_inner_likelihood (tree stride,
733627f7eb2Smrg unsigned HOST_WIDE_INT multiplier)
734627f7eb2Smrg {
735627f7eb2Smrg const unsigned int MAX_NITERS = 8;
736627f7eb2Smrg
737627f7eb2Smrg /* Iterate over possible values of STRIDE. Return INNER_LIKELY if at
738627f7eb2Smrg least one of those values is likely to be for the innermost dimension.
739627f7eb2Smrg Record in UNLIKELY_P if at least one of those values is unlikely to be
740627f7eb2Smrg for the innermost dimension.
741627f7eb2Smrg
742627f7eb2Smrg E.g. for:
743627f7eb2Smrg
744627f7eb2Smrg stride = cond ? a * b : 1
745627f7eb2Smrg
746627f7eb2Smrg we should treat STRIDE as being a likely inner dimension, since
747627f7eb2Smrg we know that it is 1 under at least some circumstances. (See the
748627f7eb2Smrg Fortran example below.) However:
749627f7eb2Smrg
750627f7eb2Smrg stride = a * b
751627f7eb2Smrg
752627f7eb2Smrg on its own is unlikely to be for the innermost dimension, since
753627f7eb2Smrg that would require both a and b to be 1 at runtime. */
754627f7eb2Smrg bool unlikely_p = false;
755627f7eb2Smrg tree worklist[MAX_NITERS];
756627f7eb2Smrg unsigned int length = 0;
757627f7eb2Smrg worklist[length++] = stride;
758627f7eb2Smrg for (unsigned int i = 0; i < length; ++i)
759627f7eb2Smrg {
760627f7eb2Smrg tree expr = worklist[i];
761627f7eb2Smrg
762627f7eb2Smrg if (CONSTANT_CLASS_P (expr))
763627f7eb2Smrg {
764627f7eb2Smrg /* See if EXPR * MULTIPLIER would be consistent with an individual
765627f7eb2Smrg access or a small grouped access. */
766627f7eb2Smrg if (acceptable_multiplier_p (expr, multiplier))
767627f7eb2Smrg return INNER_LIKELY;
768627f7eb2Smrg else
769627f7eb2Smrg unlikely_p = true;
770627f7eb2Smrg }
771627f7eb2Smrg else if (gimple *stmt = maybe_get_stmt (expr))
772627f7eb2Smrg {
773627f7eb2Smrg /* If EXPR is set by a PHI node, queue its arguments in case
774627f7eb2Smrg we find one that is consistent with an inner dimension.
775627f7eb2Smrg
776627f7eb2Smrg An important instance of this is the Fortran handling of array
777627f7eb2Smrg descriptors, which calculates the stride of the inner dimension
778627f7eb2Smrg using a PHI equivalent of:
779627f7eb2Smrg
780627f7eb2Smrg raw_stride = a.dim[0].stride;
781627f7eb2Smrg stride = raw_stride != 0 ? raw_stride : 1;
782627f7eb2Smrg
783627f7eb2Smrg (Strides for outer dimensions do not treat 0 specially.) */
784627f7eb2Smrg if (gphi *phi = dyn_cast <gphi *> (stmt))
785627f7eb2Smrg {
786627f7eb2Smrg unsigned int nargs = gimple_phi_num_args (phi);
787627f7eb2Smrg for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j)
788627f7eb2Smrg worklist[length++] = strip_casts (gimple_phi_arg_def (phi, j));
789627f7eb2Smrg }
790627f7eb2Smrg /* If the value is set by an assignment, expect it to be read
791627f7eb2Smrg from memory (such as an array descriptor) rather than be
792627f7eb2Smrg calculated. */
793627f7eb2Smrg else if (gassign *assign = dyn_cast <gassign *> (stmt))
794627f7eb2Smrg {
795627f7eb2Smrg if (!gimple_assign_load_p (assign))
796627f7eb2Smrg unlikely_p = true;
797627f7eb2Smrg }
798627f7eb2Smrg /* Things like calls don't really tell us anything. */
799627f7eb2Smrg }
800627f7eb2Smrg }
801627f7eb2Smrg
802627f7eb2Smrg /* We didn't find any possible values of STRIDE that were likely to be
803627f7eb2Smrg for the innermost dimension. If we found one that was actively
804627f7eb2Smrg unlikely to be for the innermost dimension, assume that that applies
805627f7eb2Smrg to STRIDE too. */
806627f7eb2Smrg return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW;
807627f7eb2Smrg }
808627f7eb2Smrg
809627f7eb2Smrg /* Dump the likelihood that TERM's stride is for the innermost dimension.
810627f7eb2Smrg ADDRESS is the address that contains TERM. */
811627f7eb2Smrg
812627f7eb2Smrg void
dump_inner_likelihood(address_info & address,address_term_info & term)813627f7eb2Smrg loop_versioning::dump_inner_likelihood (address_info &address,
814627f7eb2Smrg address_term_info &term)
815627f7eb2Smrg {
816627f7eb2Smrg if (term.inner_likelihood == INNER_LIKELY)
817627f7eb2Smrg dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the"
818627f7eb2Smrg " innermost dimension\n", term.stride);
819627f7eb2Smrg else if (term.inner_likelihood == INNER_UNLIKELY)
820627f7eb2Smrg dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the"
821627f7eb2Smrg " innermost dimension\n", term.stride);
822627f7eb2Smrg else
823627f7eb2Smrg dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T"
824627f7eb2Smrg " is the innermost dimension\n", term.stride);
825627f7eb2Smrg }
826627f7eb2Smrg
827627f7eb2Smrg /* The caller has identified that STRIDE is the stride of interest
828627f7eb2Smrg in TERM, and that the stride is applied in OP_LOOP. Record this
829627f7eb2Smrg information in TERM, deciding whether STRIDE is likely to be for
830627f7eb2Smrg the innermost dimension of an array and whether it represents a
831627f7eb2Smrg versioning opportunity. ADDRESS is the address that contains TERM. */
832627f7eb2Smrg
833627f7eb2Smrg void
analyze_stride(address_info & address,address_term_info & term,tree stride,class loop * op_loop)834627f7eb2Smrg loop_versioning::analyze_stride (address_info &address,
835627f7eb2Smrg address_term_info &term,
836*4c3eb207Smrg tree stride, class loop *op_loop)
837627f7eb2Smrg {
838627f7eb2Smrg term.stride = stride;
839627f7eb2Smrg
840627f7eb2Smrg term.inner_likelihood = get_inner_likelihood (stride, term.multiplier);
841627f7eb2Smrg if (dump_enabled_p ())
842627f7eb2Smrg dump_inner_likelihood (address, term);
843627f7eb2Smrg
844627f7eb2Smrg /* To be a versioning opportunity we require:
845627f7eb2Smrg
846627f7eb2Smrg - The multiplier applied by TERM is equal to the access size,
847627f7eb2Smrg so that when STRIDE is 1, the accesses in successive loop
848627f7eb2Smrg iterations are consecutive.
849627f7eb2Smrg
850627f7eb2Smrg This is deliberately conservative. We could relax it to handle
851627f7eb2Smrg other cases (such as those with gaps between iterations) if we
852627f7eb2Smrg find any real testcases for which it's useful.
853627f7eb2Smrg
854627f7eb2Smrg - the stride is applied in the same loop as STMT rather than
855627f7eb2Smrg in an outer loop. Although versioning for strides applied in
856627f7eb2Smrg outer loops could help in some cases -- such as enabling
857627f7eb2Smrg more loop interchange -- the savings are much lower than for
858627f7eb2Smrg inner loops.
859627f7eb2Smrg
860627f7eb2Smrg - the stride is an SSA name that is invariant in STMT's loop,
861627f7eb2Smrg since otherwise versioning isn't possible. */
862627f7eb2Smrg unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset;
863627f7eb2Smrg if (term.multiplier == access_size
864627f7eb2Smrg && address.loop == op_loop
865627f7eb2Smrg && TREE_CODE (stride) == SSA_NAME
866627f7eb2Smrg && expr_invariant_in_loop_p (address.loop, stride))
867627f7eb2Smrg {
868627f7eb2Smrg term.versioning_opportunity_p = true;
869627f7eb2Smrg if (dump_enabled_p ())
870627f7eb2Smrg dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning"
871627f7eb2Smrg " opportunity\n", stride);
872627f7eb2Smrg }
873627f7eb2Smrg }
874627f7eb2Smrg
875627f7eb2Smrg /* See whether address term TERM (which belongs to ADDRESS) is the result
876627f7eb2Smrg of multiplying a varying SSA name by a loop-invariant SSA name.
877627f7eb2Smrg Return true and update TERM if so.
878627f7eb2Smrg
879627f7eb2Smrg This handles both cases that SCEV might handle, such as:
880627f7eb2Smrg
881627f7eb2Smrg for (int i = 0; i < n; ++i)
882627f7eb2Smrg res += a[i * stride];
883627f7eb2Smrg
884627f7eb2Smrg and ones in which the term varies arbitrarily between iterations, such as:
885627f7eb2Smrg
886627f7eb2Smrg for (int i = 0; i < n; ++i)
887627f7eb2Smrg res += a[index[i] * stride]; */
888627f7eb2Smrg
889627f7eb2Smrg bool
find_per_loop_multiplication(address_info & address,address_term_info & term)890627f7eb2Smrg loop_versioning::find_per_loop_multiplication (address_info &address,
891627f7eb2Smrg address_term_info &term)
892627f7eb2Smrg {
893627f7eb2Smrg gassign *mult = maybe_get_assign (term.expr);
894627f7eb2Smrg if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
895627f7eb2Smrg return false;
896627f7eb2Smrg
897*4c3eb207Smrg class loop *mult_loop = loop_containing_stmt (mult);
898627f7eb2Smrg if (!loop_outer (mult_loop))
899627f7eb2Smrg return false;
900627f7eb2Smrg
901627f7eb2Smrg tree op1 = strip_casts (gimple_assign_rhs1 (mult));
902627f7eb2Smrg tree op2 = strip_casts (gimple_assign_rhs2 (mult));
903627f7eb2Smrg if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
904627f7eb2Smrg return false;
905627f7eb2Smrg
906627f7eb2Smrg bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1);
907627f7eb2Smrg bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2);
908627f7eb2Smrg if (invariant1_p == invariant2_p)
909627f7eb2Smrg return false;
910627f7eb2Smrg
911627f7eb2Smrg /* Make sure that the loop invariant is OP2 rather than OP1. */
912627f7eb2Smrg if (invariant1_p)
913627f7eb2Smrg std::swap (op1, op2);
914627f7eb2Smrg
915627f7eb2Smrg if (dump_enabled_p ())
916627f7eb2Smrg dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T"
917627f7eb2Smrg " * loop-invariant %T\n", term.expr, op1, op2);
918627f7eb2Smrg analyze_stride (address, term, op2, mult_loop);
919627f7eb2Smrg return true;
920627f7eb2Smrg }
921627f7eb2Smrg
922627f7eb2Smrg /* Try to use scalar evolutions to find an address stride for TERM,
923627f7eb2Smrg which belongs to ADDRESS. Return true and update TERM if so.
924627f7eb2Smrg
925627f7eb2Smrg Here we are interested in any evolution information we can find,
926627f7eb2Smrg not just evolutions wrt ADDRESS->LOOP. For example, if we find that
927627f7eb2Smrg an outer loop obviously iterates over the inner dimension of an array,
928627f7eb2Smrg that information can help us eliminate worthless versioning opportunities
929627f7eb2Smrg in inner loops. */
930627f7eb2Smrg
931627f7eb2Smrg bool
analyze_term_using_scevs(address_info & address,address_term_info & term)932627f7eb2Smrg loop_versioning::analyze_term_using_scevs (address_info &address,
933627f7eb2Smrg address_term_info &term)
934627f7eb2Smrg {
935627f7eb2Smrg gimple *setter = maybe_get_stmt (term.expr);
936627f7eb2Smrg if (!setter)
937627f7eb2Smrg return false;
938627f7eb2Smrg
939*4c3eb207Smrg class loop *wrt_loop = loop_containing_stmt (setter);
940627f7eb2Smrg if (!loop_outer (wrt_loop))
941627f7eb2Smrg return false;
942627f7eb2Smrg
943627f7eb2Smrg tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr));
944627f7eb2Smrg if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
945627f7eb2Smrg {
946627f7eb2Smrg if (dump_enabled_p ())
947627f7eb2Smrg dump_printf_loc (MSG_NOTE, address.stmt,
948627f7eb2Smrg "address term %T = %T\n", term.expr, chrec);
949627f7eb2Smrg
950627f7eb2Smrg /* Peel casts and accumulate constant multiplications, up to the
951627f7eb2Smrg limit allowed by M_MAXIMUM_SCALE. */
952627f7eb2Smrg tree stride = strip_casts (CHREC_RIGHT (chrec));
953627f7eb2Smrg while (TREE_CODE (stride) == MULT_EXPR
954627f7eb2Smrg && multiply_term_by (term, TREE_OPERAND (stride, 1)))
955627f7eb2Smrg stride = strip_casts (TREE_OPERAND (stride, 0));
956627f7eb2Smrg
957627f7eb2Smrg gassign *assign;
958627f7eb2Smrg while ((assign = maybe_get_assign (stride))
959627f7eb2Smrg && gimple_assign_rhs_code (assign) == MULT_EXPR
960627f7eb2Smrg && multiply_term_by (term, gimple_assign_rhs2 (assign)))
961627f7eb2Smrg {
962627f7eb2Smrg if (dump_enabled_p ())
963627f7eb2Smrg dump_printf_loc (MSG_NOTE, address.stmt,
964627f7eb2Smrg "looking through %G", assign);
965627f7eb2Smrg stride = strip_casts (gimple_assign_rhs1 (assign));
966627f7eb2Smrg }
967627f7eb2Smrg
968627f7eb2Smrg analyze_stride (address, term, stride, get_chrec_loop (chrec));
969627f7eb2Smrg return true;
970627f7eb2Smrg }
971627f7eb2Smrg
972627f7eb2Smrg return false;
973627f7eb2Smrg }
974627f7eb2Smrg
975627f7eb2Smrg /* Address term TERM is an arbitrary term that provides no versioning
976627f7eb2Smrg opportunities. Analyze it to see whether it contains any likely
977627f7eb2Smrg inner strides, so that we don't mistakenly version for other
978627f7eb2Smrg (less likely) candidates.
979627f7eb2Smrg
980627f7eb2Smrg This copes with invariant innermost indices such as:
981627f7eb2Smrg
982627f7eb2Smrg x(i, :) = 100
983627f7eb2Smrg
984627f7eb2Smrg where the "i" component of the address is invariant in the loop
985627f7eb2Smrg but provides the real inner stride.
986627f7eb2Smrg
987627f7eb2Smrg ADDRESS is the address that contains TERM. */
988627f7eb2Smrg
989627f7eb2Smrg void
analyze_arbitrary_term(address_info & address,address_term_info & term)990627f7eb2Smrg loop_versioning::analyze_arbitrary_term (address_info &address,
991627f7eb2Smrg address_term_info &term)
992627f7eb2Smrg
993627f7eb2Smrg {
994627f7eb2Smrg /* A multiplication offers two potential strides. Pick the one that
995627f7eb2Smrg is most likely to be an innermost stride. */
996627f7eb2Smrg tree expr = term.expr, alt = NULL_TREE;
997627f7eb2Smrg gassign *mult = maybe_get_assign (expr);
998627f7eb2Smrg if (mult && gimple_assign_rhs_code (mult) == MULT_EXPR)
999627f7eb2Smrg {
1000627f7eb2Smrg expr = strip_casts (gimple_assign_rhs1 (mult));
1001627f7eb2Smrg alt = strip_casts (gimple_assign_rhs2 (mult));
1002627f7eb2Smrg }
1003627f7eb2Smrg term.stride = expr;
1004627f7eb2Smrg term.inner_likelihood = get_inner_likelihood (expr, term.multiplier);
1005627f7eb2Smrg if (alt)
1006627f7eb2Smrg {
1007627f7eb2Smrg inner_likelihood alt_l = get_inner_likelihood (alt, term.multiplier);
1008627f7eb2Smrg if (alt_l > term.inner_likelihood)
1009627f7eb2Smrg {
1010627f7eb2Smrg term.stride = alt;
1011627f7eb2Smrg term.inner_likelihood = alt_l;
1012627f7eb2Smrg }
1013627f7eb2Smrg }
1014627f7eb2Smrg if (dump_enabled_p ())
1015627f7eb2Smrg dump_inner_likelihood (address, term);
1016627f7eb2Smrg }
1017627f7eb2Smrg
1018627f7eb2Smrg /* Try to identify loop strides in ADDRESS and try to choose realistic
1019627f7eb2Smrg versioning opportunities based on these strides.
1020627f7eb2Smrg
1021627f7eb2Smrg The main difficulty here isn't finding strides that could be used
1022627f7eb2Smrg in a version check (that's pretty easy). The problem instead is to
1023627f7eb2Smrg avoid versioning for some stride S that is unlikely ever to be 1 at
1024627f7eb2Smrg runtime. Versioning for S == 1 on its own would lead to unnecessary
1025627f7eb2Smrg code bloat, while adding S == 1 to more realistic version conditions
1026627f7eb2Smrg would lose the optimisation opportunity offered by those other conditions.
1027627f7eb2Smrg
1028627f7eb2Smrg For example, versioning for a stride of 1 in the Fortran code:
1029627f7eb2Smrg
1030627f7eb2Smrg integer :: a(:,:)
1031627f7eb2Smrg a(1,:) = 1
1032627f7eb2Smrg
1033627f7eb2Smrg is not usually a good idea, since the assignment is iterating over
1034627f7eb2Smrg an outer dimension and is relatively unlikely to have a stride of 1.
1035627f7eb2Smrg (It isn't impossible, since the inner dimension might be 1, or the
1036627f7eb2Smrg array might be transposed.) Similarly, in:
1037627f7eb2Smrg
1038627f7eb2Smrg integer :: a(:,:), b(:,:)
1039627f7eb2Smrg b(:,1) = a(1,:)
1040627f7eb2Smrg
1041627f7eb2Smrg b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't.
1042627f7eb2Smrg Versioning for when both strides are 1 would lose most of the benefit
1043627f7eb2Smrg of versioning for b's access.
1044627f7eb2Smrg
1045627f7eb2Smrg The approach we take is as follows:
1046627f7eb2Smrg
1047627f7eb2Smrg - Analyze each term to see whether it has an identifiable stride,
1048627f7eb2Smrg regardless of which loop applies the stride.
1049627f7eb2Smrg
1050627f7eb2Smrg - Evaluate the likelihood that each such stride is for the innermost
1051627f7eb2Smrg dimension of an array, on the scale "likely", "don't know" or "unlikely".
1052627f7eb2Smrg
1053627f7eb2Smrg - If there is a single "likely" innermost stride, and that stride is
1054627f7eb2Smrg applied in the loop that contains STMT, version the loop for when the
1055627f7eb2Smrg stride is 1. This deals with the cases in which we're fairly
1056627f7eb2Smrg confident of doing the right thing, such as the b(:,1) reference above.
1057627f7eb2Smrg
1058627f7eb2Smrg - If there are no "likely" innermost strides, and the loop that contains
1059627f7eb2Smrg STMT uses a stride that we rated as "don't know", version for when
1060627f7eb2Smrg that stride is 1. This is principally used for C code such as:
1061627f7eb2Smrg
1062627f7eb2Smrg for (int i = 0; i < n; ++i)
1063627f7eb2Smrg a[i * x] = ...;
1064627f7eb2Smrg
1065627f7eb2Smrg and:
1066627f7eb2Smrg
1067627f7eb2Smrg for (int j = 0; j < n; ++j)
1068627f7eb2Smrg for (int i = 0; i < n; ++i)
1069627f7eb2Smrg a[i * x + j * y] = ...;
1070627f7eb2Smrg
1071627f7eb2Smrg where nothing in the way "x" and "y" are set gives a hint as to
1072627f7eb2Smrg whether "i" iterates over the innermost dimension of the array.
1073*4c3eb207Smrg In these situations it seems reasonable to assume the
1074627f7eb2Smrg programmer has nested the loops appropriately (although of course
1075627f7eb2Smrg there are examples like GEMM in which this assumption doesn't hold
1076627f7eb2Smrg for all accesses in the loop).
1077627f7eb2Smrg
1078627f7eb2Smrg This case is also useful for the Fortran equivalent of the
1079627f7eb2Smrg above C code. */
1080627f7eb2Smrg
1081627f7eb2Smrg void
analyze_address_fragment(address_info & address)1082627f7eb2Smrg loop_versioning::analyze_address_fragment (address_info &address)
1083627f7eb2Smrg {
1084627f7eb2Smrg if (dump_enabled_p ())
1085627f7eb2Smrg {
1086627f7eb2Smrg dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment ");
1087627f7eb2Smrg dump_address_info (MSG_NOTE, address);
1088627f7eb2Smrg dump_printf (MSG_NOTE, "\n");
1089627f7eb2Smrg }
1090627f7eb2Smrg
1091627f7eb2Smrg /* Analyze each component of the sum to see whether it involves an
1092627f7eb2Smrg apparent stride.
1093627f7eb2Smrg
1094627f7eb2Smrg There is an overlap between the addresses that
1095627f7eb2Smrg find_per_loop_multiplication and analyze_term_using_scevs can handle,
1096627f7eb2Smrg but the former is much cheaper than SCEV analysis, so try it first. */
1097627f7eb2Smrg for (unsigned int i = 0; i < address.terms.length (); ++i)
1098627f7eb2Smrg if (!find_per_loop_multiplication (address, address.terms[i])
1099627f7eb2Smrg && !analyze_term_using_scevs (address, address.terms[i])
1100627f7eb2Smrg && !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr)))
1101627f7eb2Smrg analyze_arbitrary_term (address, address.terms[i]);
1102627f7eb2Smrg
1103627f7eb2Smrg /* Check for strides that are likely to be for the innermost dimension.
1104627f7eb2Smrg
1105627f7eb2Smrg 1. If there is a single likely inner stride, if it is an SSA name,
1106627f7eb2Smrg and if it is worth versioning the loop for when the SSA name
1107627f7eb2Smrg equals 1, record that we want to do so.
1108627f7eb2Smrg
1109627f7eb2Smrg 2. Otherwise, if there any likely inner strides, bail out. This means
1110627f7eb2Smrg one of:
1111627f7eb2Smrg
1112627f7eb2Smrg (a) There are multiple likely inner strides. This suggests we're
1113627f7eb2Smrg confused and be can't be confident of doing the right thing.
1114627f7eb2Smrg
1115627f7eb2Smrg (b) There is a single likely inner stride and it is a constant
1116627f7eb2Smrg rather than an SSA name. This can mean either that the access
1117627f7eb2Smrg is a natural one without any variable strides, such as:
1118627f7eb2Smrg
1119627f7eb2Smrg for (int i = 0; i < n; ++i)
1120627f7eb2Smrg a[i] += 1;
1121627f7eb2Smrg
1122627f7eb2Smrg or that a variable stride is applied to an outer dimension,
1123627f7eb2Smrg such as:
1124627f7eb2Smrg
1125627f7eb2Smrg for (int i = 0; i < n; ++i)
1126627f7eb2Smrg for (int j = 0; j < n; ++j)
1127627f7eb2Smrg a[j * stride][i] += 1;
1128627f7eb2Smrg
1129627f7eb2Smrg (c) There is a single likely inner stride, and it is an SSA name,
1130627f7eb2Smrg but it isn't a worthwhile versioning opportunity. This usually
1131627f7eb2Smrg means that the variable stride is applied by an outer loop,
1132627f7eb2Smrg such as:
1133627f7eb2Smrg
1134627f7eb2Smrg for (int i = 0; i < n; ++i)
1135627f7eb2Smrg for (int j = 0; j < n; ++j)
1136627f7eb2Smrg a[j][i * stride] += 1;
1137627f7eb2Smrg
1138627f7eb2Smrg or (using an example with a more natural loop nesting):
1139627f7eb2Smrg
1140627f7eb2Smrg for (int i = 0; i < n; ++i)
1141627f7eb2Smrg for (int j = 0; j < n; ++j)
1142627f7eb2Smrg a[i][j] += b[i * stride];
1143627f7eb2Smrg
1144627f7eb2Smrg in cases where b[i * stride] cannot (yet) be hoisted for
1145627f7eb2Smrg aliasing reasons.
1146627f7eb2Smrg
1147627f7eb2Smrg 3. If there are no likely inner strides, fall through to the next
1148627f7eb2Smrg set of checks.
1149627f7eb2Smrg
1150627f7eb2Smrg Pointer equality is enough to check for uniqueness in (1), since we
1151627f7eb2Smrg only care about SSA names. */
1152627f7eb2Smrg tree chosen_stride = NULL_TREE;
1153627f7eb2Smrg tree version_stride = NULL_TREE;
1154627f7eb2Smrg for (unsigned int i = 0; i < address.terms.length (); ++i)
1155627f7eb2Smrg if (chosen_stride != address.terms[i].stride
1156627f7eb2Smrg && address.terms[i].inner_likelihood == INNER_LIKELY)
1157627f7eb2Smrg {
1158627f7eb2Smrg if (chosen_stride)
1159627f7eb2Smrg return;
1160627f7eb2Smrg chosen_stride = address.terms[i].stride;
1161627f7eb2Smrg if (address.terms[i].versioning_opportunity_p)
1162627f7eb2Smrg version_stride = chosen_stride;
1163627f7eb2Smrg }
1164627f7eb2Smrg
1165627f7eb2Smrg /* If there are no likely inner strides, see if there is a single
1166627f7eb2Smrg versioning opportunity for a stride that was rated as INNER_DONT_KNOW.
1167627f7eb2Smrg See the comment above the function for the cases that this code
1168627f7eb2Smrg handles. */
1169627f7eb2Smrg if (!chosen_stride)
1170627f7eb2Smrg for (unsigned int i = 0; i < address.terms.length (); ++i)
1171627f7eb2Smrg if (version_stride != address.terms[i].stride
1172627f7eb2Smrg && address.terms[i].inner_likelihood == INNER_DONT_KNOW
1173627f7eb2Smrg && address.terms[i].versioning_opportunity_p)
1174627f7eb2Smrg {
1175627f7eb2Smrg if (version_stride)
1176627f7eb2Smrg return;
1177627f7eb2Smrg version_stride = address.terms[i].stride;
1178627f7eb2Smrg }
1179627f7eb2Smrg
1180627f7eb2Smrg if (version_stride)
1181627f7eb2Smrg version_for_unity (address.stmt, version_stride);
1182627f7eb2Smrg }
1183627f7eb2Smrg
1184627f7eb2Smrg /* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses
1185627f7eb2Smrg TYPE_SIZE bytes and record this address fragment for later processing.
1186627f7eb2Smrg STMT is the statement that contains the address. */
1187627f7eb2Smrg
1188627f7eb2Smrg void
record_address_fragment(gimple * stmt,unsigned HOST_WIDE_INT type_size,tree expr,unsigned HOST_WIDE_INT multiplier,HOST_WIDE_INT offset)1189627f7eb2Smrg loop_versioning::record_address_fragment (gimple *stmt,
1190627f7eb2Smrg unsigned HOST_WIDE_INT type_size,
1191627f7eb2Smrg tree expr,
1192627f7eb2Smrg unsigned HOST_WIDE_INT multiplier,
1193627f7eb2Smrg HOST_WIDE_INT offset)
1194627f7eb2Smrg {
1195627f7eb2Smrg /* We're only interested in computed values. */
1196627f7eb2Smrg if (TREE_CODE (expr) != SSA_NAME)
1197627f7eb2Smrg return;
1198627f7eb2Smrg
1199627f7eb2Smrg /* Quick exit if no part of the address is calculated in STMT's loop,
1200627f7eb2Smrg since such addresses have no versioning opportunities. */
1201*4c3eb207Smrg class loop *loop = loop_containing_stmt (stmt);
1202627f7eb2Smrg if (expr_invariant_in_loop_p (loop, expr))
1203627f7eb2Smrg return;
1204627f7eb2Smrg
1205627f7eb2Smrg /* Set up an address_info for EXPR * MULTIPLIER. */
1206627f7eb2Smrg address_info *address = XOBNEW (&m_obstack, address_info);
1207627f7eb2Smrg new (address) address_info;
1208627f7eb2Smrg address->stmt = stmt;
1209627f7eb2Smrg address->loop = loop;
1210627f7eb2Smrg address->base = NULL_TREE;
1211627f7eb2Smrg address->terms.quick_grow (1);
1212627f7eb2Smrg address->terms[0].expr = expr;
1213627f7eb2Smrg address->terms[0].multiplier = multiplier;
1214627f7eb2Smrg address->terms[0].stride = NULL_TREE;
1215627f7eb2Smrg address->terms[0].inner_likelihood = INNER_UNLIKELY;
1216627f7eb2Smrg address->terms[0].versioning_opportunity_p = false;
1217627f7eb2Smrg address->min_offset = offset;
1218627f7eb2Smrg
1219627f7eb2Smrg /* Peel apart the expression into a sum of address_terms, where each
1220627f7eb2Smrg term is multiplied by a constant. Treat a + b and a - b the same,
1221627f7eb2Smrg since it doesn't matter for our purposes whether an address is
1222627f7eb2Smrg increasing or decreasing. Distribute (a + b) * constant into
1223627f7eb2Smrg a * constant + b * constant.
1224627f7eb2Smrg
1225627f7eb2Smrg We don't care which loop each term belongs to, since we want to
1226627f7eb2Smrg examine as many candidate strides as possible when determining
1227627f7eb2Smrg which is likely to be for the innermost dimension. We therefore
1228627f7eb2Smrg don't limit the search to statements in STMT's loop. */
1229627f7eb2Smrg for (unsigned int i = 0; i < address->terms.length (); )
1230627f7eb2Smrg {
1231627f7eb2Smrg if (gassign *assign = maybe_get_assign (address->terms[i].expr))
1232627f7eb2Smrg {
1233627f7eb2Smrg tree_code code = gimple_assign_rhs_code (assign);
1234627f7eb2Smrg if (code == PLUS_EXPR
1235627f7eb2Smrg || code == POINTER_PLUS_EXPR
1236627f7eb2Smrg || code == MINUS_EXPR)
1237627f7eb2Smrg {
1238627f7eb2Smrg tree op1 = gimple_assign_rhs1 (assign);
1239627f7eb2Smrg tree op2 = gimple_assign_rhs2 (assign);
1240627f7eb2Smrg if (TREE_CODE (op2) == INTEGER_CST)
1241627f7eb2Smrg {
1242627f7eb2Smrg address->terms[i].expr = strip_casts (op1);
1243627f7eb2Smrg /* This is heuristic only, so don't worry about truncation
1244627f7eb2Smrg or overflow. */
1245627f7eb2Smrg address->min_offset += (TREE_INT_CST_LOW (op2)
1246627f7eb2Smrg * address->terms[i].multiplier);
1247627f7eb2Smrg continue;
1248627f7eb2Smrg }
1249627f7eb2Smrg else if (address->terms.length () < address_info::MAX_TERMS)
1250627f7eb2Smrg {
1251627f7eb2Smrg unsigned int j = address->terms.length ();
1252627f7eb2Smrg address->terms.quick_push (address->terms[i]);
1253627f7eb2Smrg address->terms[i].expr = strip_casts (op1);
1254627f7eb2Smrg address->terms[j].expr = strip_casts (op2);
1255627f7eb2Smrg continue;
1256627f7eb2Smrg }
1257627f7eb2Smrg }
1258627f7eb2Smrg if (code == MULT_EXPR)
1259627f7eb2Smrg {
1260627f7eb2Smrg tree op1 = gimple_assign_rhs1 (assign);
1261627f7eb2Smrg tree op2 = gimple_assign_rhs2 (assign);
1262627f7eb2Smrg if (multiply_term_by (address->terms[i], op2))
1263627f7eb2Smrg {
1264627f7eb2Smrg address->terms[i].expr = strip_casts (op1);
1265627f7eb2Smrg continue;
1266627f7eb2Smrg }
1267627f7eb2Smrg }
1268*4c3eb207Smrg if (CONVERT_EXPR_CODE_P (code))
1269*4c3eb207Smrg {
1270*4c3eb207Smrg tree op1 = gimple_assign_rhs1 (assign);
1271*4c3eb207Smrg address->terms[i].expr = strip_casts (op1);
1272*4c3eb207Smrg continue;
1273*4c3eb207Smrg }
1274627f7eb2Smrg }
1275627f7eb2Smrg i += 1;
1276627f7eb2Smrg }
1277627f7eb2Smrg
1278627f7eb2Smrg /* Peel off any symbolic pointer. */
1279627f7eb2Smrg if (TREE_CODE (address->terms[0].expr) != SSA_NAME
1280627f7eb2Smrg && address->terms[0].multiplier == 1)
1281627f7eb2Smrg {
1282627f7eb2Smrg if (address->terms.length () == 1)
1283627f7eb2Smrg {
1284627f7eb2Smrg obstack_free (&m_obstack, address);
1285627f7eb2Smrg return;
1286627f7eb2Smrg }
1287627f7eb2Smrg address->base = address->terms[0].expr;
1288627f7eb2Smrg address->terms.ordered_remove (0);
1289627f7eb2Smrg }
1290627f7eb2Smrg
1291627f7eb2Smrg /* Require all remaining terms to be SSA names. (This could be false
1292627f7eb2Smrg for unfolded statements, but they aren't worth dealing with.) */
1293627f7eb2Smrg for (unsigned int i = 0; i < address->terms.length (); ++i)
1294627f7eb2Smrg if (TREE_CODE (address->terms[i].expr) != SSA_NAME)
1295627f7eb2Smrg {
1296627f7eb2Smrg obstack_free (&m_obstack, address);
1297627f7eb2Smrg return;
1298627f7eb2Smrg }
1299627f7eb2Smrg
1300627f7eb2Smrg /* The loop above set MIN_OFFSET based on the first byte of the
1301627f7eb2Smrg referenced data. Calculate the end + 1. */
1302627f7eb2Smrg address->max_offset = address->min_offset + type_size;
1303627f7eb2Smrg
1304627f7eb2Smrg /* Put the terms into a canonical order for the hash table lookup below. */
1305627f7eb2Smrg address->terms.qsort (compare_address_terms);
1306627f7eb2Smrg
1307627f7eb2Smrg if (dump_enabled_p ())
1308627f7eb2Smrg {
1309627f7eb2Smrg dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T", expr);
1310627f7eb2Smrg if (multiplier != 1)
1311627f7eb2Smrg dump_printf (MSG_NOTE, " * %wd", multiplier);
1312627f7eb2Smrg dump_printf (MSG_NOTE, " = ");
1313627f7eb2Smrg dump_address_info (MSG_NOTE, *address);
1314627f7eb2Smrg dump_printf (MSG_NOTE, "\n");
1315627f7eb2Smrg }
1316627f7eb2Smrg
1317627f7eb2Smrg /* Pool address information with the same terms (but potentially
1318627f7eb2Smrg different offsets). */
1319627f7eb2Smrg address_info **slot = m_address_table.find_slot (address, INSERT);
1320627f7eb2Smrg if (address_info *old_address = *slot)
1321627f7eb2Smrg {
1322627f7eb2Smrg /* We've already seen an address with the same terms. Extend the
1323627f7eb2Smrg offset range to account for this access. Doing this can paper
1324627f7eb2Smrg over gaps, such as in:
1325627f7eb2Smrg
1326627f7eb2Smrg a[i * stride * 4] + a[i * stride * 4 + 3];
1327627f7eb2Smrg
1328627f7eb2Smrg where nothing references "+ 1" or "+ 2". However, the vectorizer
1329627f7eb2Smrg handles such gapped accesses without problems, so it's not worth
1330627f7eb2Smrg trying to exclude them. */
1331627f7eb2Smrg if (old_address->min_offset > address->min_offset)
1332627f7eb2Smrg old_address->min_offset = address->min_offset;
1333627f7eb2Smrg if (old_address->max_offset < address->max_offset)
1334627f7eb2Smrg old_address->max_offset = address->max_offset;
1335627f7eb2Smrg obstack_free (&m_obstack, address);
1336627f7eb2Smrg }
1337627f7eb2Smrg else
1338627f7eb2Smrg {
1339627f7eb2Smrg /* This is the first time we've seen an address with these terms. */
1340627f7eb2Smrg *slot = address;
1341627f7eb2Smrg m_address_list.safe_push (address);
1342627f7eb2Smrg }
1343627f7eb2Smrg }
1344627f7eb2Smrg
1345627f7eb2Smrg /* Analyze expression EXPR, which occurs in STMT. */
1346627f7eb2Smrg
1347627f7eb2Smrg void
analyze_expr(gimple * stmt,tree expr)1348627f7eb2Smrg loop_versioning::analyze_expr (gimple *stmt, tree expr)
1349627f7eb2Smrg {
1350627f7eb2Smrg unsigned HOST_WIDE_INT type_size;
1351627f7eb2Smrg
1352627f7eb2Smrg while (handled_component_p (expr))
1353627f7eb2Smrg {
1354627f7eb2Smrg /* See whether we can use versioning to avoid a multiplication
1355627f7eb2Smrg in an array index. */
1356627f7eb2Smrg if (TREE_CODE (expr) == ARRAY_REF
1357627f7eb2Smrg && acceptable_type_p (TREE_TYPE (expr), &type_size))
1358627f7eb2Smrg record_address_fragment (stmt, type_size,
1359627f7eb2Smrg TREE_OPERAND (expr, 1), type_size, 0);
1360627f7eb2Smrg expr = TREE_OPERAND (expr, 0);
1361627f7eb2Smrg }
1362627f7eb2Smrg
1363627f7eb2Smrg /* See whether we can use versioning to avoid a multiplication
1364627f7eb2Smrg in the pointer calculation of a MEM_REF. */
1365627f7eb2Smrg if (TREE_CODE (expr) == MEM_REF
1366627f7eb2Smrg && acceptable_type_p (TREE_TYPE (expr), &type_size))
1367627f7eb2Smrg record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), 1,
1368627f7eb2Smrg /* This is heuristic only, so don't worry
1369627f7eb2Smrg about truncation or overflow. */
1370627f7eb2Smrg TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)));
1371627f7eb2Smrg
1372627f7eb2Smrg /* These would be easy to handle if they existed at this stage. */
1373627f7eb2Smrg gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF);
1374627f7eb2Smrg }
1375627f7eb2Smrg
1376627f7eb2Smrg /* Analyze all the statements in BB looking for useful version checks.
1377627f7eb2Smrg Return true on success, false if something prevents the block from
1378627f7eb2Smrg being versioned. */
1379627f7eb2Smrg
1380627f7eb2Smrg bool
analyze_block(basic_block bb)1381627f7eb2Smrg loop_versioning::analyze_block (basic_block bb)
1382627f7eb2Smrg {
1383*4c3eb207Smrg class loop *loop = bb->loop_father;
1384627f7eb2Smrg loop_info &li = get_loop_info (loop);
1385627f7eb2Smrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1386627f7eb2Smrg gsi_next (&gsi))
1387627f7eb2Smrg {
1388627f7eb2Smrg gimple *stmt = gsi_stmt (gsi);
1389627f7eb2Smrg if (is_gimple_debug (stmt))
1390627f7eb2Smrg continue;
1391627f7eb2Smrg
1392627f7eb2Smrg if (expensive_stmt_p (stmt))
1393627f7eb2Smrg {
1394627f7eb2Smrg if (dump_enabled_p ())
1395627f7eb2Smrg dump_printf_loc (MSG_NOTE, stmt, "expensive statement"
1396627f7eb2Smrg " prevents versioning: %G", stmt);
1397627f7eb2Smrg return false;
1398627f7eb2Smrg }
1399627f7eb2Smrg
1400627f7eb2Smrg /* Only look for direct versioning opportunities in inner loops
1401627f7eb2Smrg since the benefit tends to be much smaller for outer loops. */
1402627f7eb2Smrg if (!loop->inner)
1403627f7eb2Smrg {
1404627f7eb2Smrg unsigned int nops = gimple_num_ops (stmt);
1405627f7eb2Smrg for (unsigned int i = 0; i < nops; ++i)
1406627f7eb2Smrg if (tree op = gimple_op (stmt, i))
1407627f7eb2Smrg analyze_expr (stmt, op);
1408627f7eb2Smrg }
1409627f7eb2Smrg
1410627f7eb2Smrg /* The point of the instruction limit is to prevent excessive
1411627f7eb2Smrg code growth, so this is a size-based estimate even though
1412627f7eb2Smrg the optimization is aimed at speed. */
1413627f7eb2Smrg li.num_insns += estimate_num_insns (stmt, &eni_size_weights);
1414627f7eb2Smrg }
1415627f7eb2Smrg
1416627f7eb2Smrg return true;
1417627f7eb2Smrg }
1418627f7eb2Smrg
1419627f7eb2Smrg /* Analyze all the blocks in the function, looking for useful version checks.
1420627f7eb2Smrg Return true if we found one. */
1421627f7eb2Smrg
1422627f7eb2Smrg bool
analyze_blocks()1423627f7eb2Smrg loop_versioning::analyze_blocks ()
1424627f7eb2Smrg {
1425627f7eb2Smrg AUTO_DUMP_SCOPE ("analyze_blocks",
1426627f7eb2Smrg dump_user_location_t::from_function_decl (m_fn->decl));
1427627f7eb2Smrg
1428627f7eb2Smrg /* For now we don't try to version the whole function, although
1429627f7eb2Smrg versioning at that level could be useful in some cases. */
1430627f7eb2Smrg get_loop_info (get_loop (m_fn, 0)).rejected_p = true;
1431627f7eb2Smrg
1432*4c3eb207Smrg class loop *loop;
1433627f7eb2Smrg FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1434627f7eb2Smrg {
1435627f7eb2Smrg loop_info &linfo = get_loop_info (loop);
1436627f7eb2Smrg
1437627f7eb2Smrg /* Ignore cold loops. */
1438627f7eb2Smrg if (!optimize_loop_for_speed_p (loop))
1439627f7eb2Smrg linfo.rejected_p = true;
1440627f7eb2Smrg
1441627f7eb2Smrg /* See whether an inner loop prevents versioning of this loop. */
1442627f7eb2Smrg if (!linfo.rejected_p)
1443*4c3eb207Smrg for (class loop *inner = loop->inner; inner; inner = inner->next)
1444627f7eb2Smrg if (get_loop_info (inner).rejected_p)
1445627f7eb2Smrg {
1446627f7eb2Smrg linfo.rejected_p = true;
1447627f7eb2Smrg break;
1448627f7eb2Smrg }
1449627f7eb2Smrg
1450627f7eb2Smrg /* If versioning the loop is still a possibility, examine the
1451627f7eb2Smrg statements in the loop to look for versioning opportunities. */
1452627f7eb2Smrg if (!linfo.rejected_p)
1453627f7eb2Smrg {
1454627f7eb2Smrg void *start_point = obstack_alloc (&m_obstack, 0);
1455627f7eb2Smrg
1456627f7eb2Smrg for (basic_block bb = linfo.block_list; bb;
1457627f7eb2Smrg bb = m_next_block_in_loop[bb->index])
1458627f7eb2Smrg if (!analyze_block (bb))
1459627f7eb2Smrg {
1460627f7eb2Smrg linfo.rejected_p = true;
1461627f7eb2Smrg break;
1462627f7eb2Smrg }
1463627f7eb2Smrg
1464627f7eb2Smrg if (!linfo.rejected_p)
1465627f7eb2Smrg {
1466627f7eb2Smrg /* Process any queued address fragments, now that we have
1467627f7eb2Smrg complete grouping information. */
1468627f7eb2Smrg address_info *address;
1469627f7eb2Smrg unsigned int i;
1470627f7eb2Smrg FOR_EACH_VEC_ELT (m_address_list, i, address)
1471627f7eb2Smrg analyze_address_fragment (*address);
1472627f7eb2Smrg }
1473627f7eb2Smrg
1474627f7eb2Smrg m_address_table.empty ();
1475627f7eb2Smrg m_address_list.truncate (0);
1476627f7eb2Smrg obstack_free (&m_obstack, start_point);
1477627f7eb2Smrg }
1478627f7eb2Smrg }
1479627f7eb2Smrg
1480627f7eb2Smrg return m_num_conditions != 0;
1481627f7eb2Smrg }
1482627f7eb2Smrg
1483627f7eb2Smrg /* Use the ranges in VRS to remove impossible versioning conditions from
1484627f7eb2Smrg LOOP. */
1485627f7eb2Smrg
1486627f7eb2Smrg void
prune_loop_conditions(class loop * loop,vr_values * vrs)1487*4c3eb207Smrg loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs)
1488627f7eb2Smrg {
1489627f7eb2Smrg loop_info &li = get_loop_info (loop);
1490627f7eb2Smrg
1491627f7eb2Smrg int to_remove = -1;
1492627f7eb2Smrg bitmap_iterator bi;
1493627f7eb2Smrg unsigned int i;
1494627f7eb2Smrg EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1495627f7eb2Smrg {
1496627f7eb2Smrg tree name = ssa_name (i);
1497*4c3eb207Smrg const value_range_equiv *vr = vrs->get_value_range (name);
1498*4c3eb207Smrg if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name))))
1499627f7eb2Smrg {
1500627f7eb2Smrg if (dump_enabled_p ())
1501627f7eb2Smrg dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1502627f7eb2Smrg "%T can never be 1 in this loop\n", name);
1503627f7eb2Smrg
1504627f7eb2Smrg if (to_remove >= 0)
1505627f7eb2Smrg bitmap_clear_bit (&li.unity_names, to_remove);
1506627f7eb2Smrg to_remove = i;
1507627f7eb2Smrg m_num_conditions -= 1;
1508627f7eb2Smrg }
1509627f7eb2Smrg }
1510627f7eb2Smrg if (to_remove >= 0)
1511627f7eb2Smrg bitmap_clear_bit (&li.unity_names, to_remove);
1512627f7eb2Smrg }
1513627f7eb2Smrg
1514627f7eb2Smrg /* Remove any scheduled loop version conditions that will never be true.
1515627f7eb2Smrg Return true if any remain. */
1516627f7eb2Smrg
1517627f7eb2Smrg bool
prune_conditions()1518627f7eb2Smrg loop_versioning::prune_conditions ()
1519627f7eb2Smrg {
1520627f7eb2Smrg AUTO_DUMP_SCOPE ("prune_loop_conditions",
1521627f7eb2Smrg dump_user_location_t::from_function_decl (m_fn->decl));
1522627f7eb2Smrg
1523627f7eb2Smrg calculate_dominance_info (CDI_DOMINATORS);
1524627f7eb2Smrg lv_dom_walker dom_walker (*this);
1525627f7eb2Smrg dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn));
1526627f7eb2Smrg return m_num_conditions != 0;
1527627f7eb2Smrg }
1528627f7eb2Smrg
1529627f7eb2Smrg /* Merge the version checks for INNER into immediately-enclosing loop
1530627f7eb2Smrg OUTER. */
1531627f7eb2Smrg
1532627f7eb2Smrg void
merge_loop_info(class loop * outer,class loop * inner)1533*4c3eb207Smrg loop_versioning::merge_loop_info (class loop *outer, class loop *inner)
1534627f7eb2Smrg {
1535627f7eb2Smrg loop_info &inner_li = get_loop_info (inner);
1536627f7eb2Smrg loop_info &outer_li = get_loop_info (outer);
1537627f7eb2Smrg
1538627f7eb2Smrg if (dump_enabled_p ())
1539627f7eb2Smrg {
1540627f7eb2Smrg bitmap_iterator bi;
1541627f7eb2Smrg unsigned int i;
1542627f7eb2Smrg EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi)
1543627f7eb2Smrg if (!bitmap_bit_p (&outer_li.unity_names, i))
1544627f7eb2Smrg dump_printf_loc (MSG_NOTE, find_loop_location (inner),
1545627f7eb2Smrg "hoisting check that %T == 1 to outer loop\n",
1546627f7eb2Smrg ssa_name (i));
1547627f7eb2Smrg }
1548627f7eb2Smrg
1549627f7eb2Smrg bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names);
1550627f7eb2Smrg if (loop_depth (outer_li.outermost) < loop_depth (inner_li.outermost))
1551627f7eb2Smrg outer_li.outermost = inner_li.outermost;
1552627f7eb2Smrg }
1553627f7eb2Smrg
1554627f7eb2Smrg /* Add LOOP to the queue of loops to version. */
1555627f7eb2Smrg
1556627f7eb2Smrg void
add_loop_to_queue(class loop * loop)1557*4c3eb207Smrg loop_versioning::add_loop_to_queue (class loop *loop)
1558627f7eb2Smrg {
1559627f7eb2Smrg loop_info &li = get_loop_info (loop);
1560627f7eb2Smrg
1561627f7eb2Smrg if (dump_enabled_p ())
1562627f7eb2Smrg dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1563627f7eb2Smrg "queuing this loop for versioning\n");
1564627f7eb2Smrg m_loops_to_version.safe_push (loop);
1565627f7eb2Smrg
1566627f7eb2Smrg /* Don't try to version superloops. */
1567627f7eb2Smrg li.rejected_p = true;
1568627f7eb2Smrg }
1569627f7eb2Smrg
1570627f7eb2Smrg /* Decide whether the cost model would allow us to version LOOP,
1571627f7eb2Smrg either directly or as part of a parent loop, and return true if so.
1572627f7eb2Smrg This does not imply that the loop is actually worth versioning in its
1573627f7eb2Smrg own right, just that it would be valid to version it if something
1574627f7eb2Smrg benefited.
1575627f7eb2Smrg
1576627f7eb2Smrg We have already made this decision for all inner loops of LOOP. */
1577627f7eb2Smrg
1578627f7eb2Smrg bool
decide_whether_loop_is_versionable(class loop * loop)1579*4c3eb207Smrg loop_versioning::decide_whether_loop_is_versionable (class loop *loop)
1580627f7eb2Smrg {
1581627f7eb2Smrg loop_info &li = get_loop_info (loop);
1582627f7eb2Smrg
1583627f7eb2Smrg if (li.rejected_p)
1584627f7eb2Smrg return false;
1585627f7eb2Smrg
1586627f7eb2Smrg /* Examine the decisions made for inner loops. */
1587*4c3eb207Smrg for (class loop *inner = loop->inner; inner; inner = inner->next)
1588627f7eb2Smrg {
1589627f7eb2Smrg loop_info &inner_li = get_loop_info (inner);
1590627f7eb2Smrg if (inner_li.rejected_p)
1591627f7eb2Smrg {
1592627f7eb2Smrg if (dump_enabled_p ())
1593627f7eb2Smrg dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1594627f7eb2Smrg "not versioning this loop because one of its"
1595627f7eb2Smrg " inner loops should not be versioned\n");
1596627f7eb2Smrg return false;
1597627f7eb2Smrg }
1598627f7eb2Smrg
1599627f7eb2Smrg if (inner_li.worth_versioning_p ())
1600627f7eb2Smrg li.subloops_benefit_p = true;
1601627f7eb2Smrg
1602627f7eb2Smrg /* Accumulate the number of instructions from subloops that are not
1603627f7eb2Smrg the innermost, or that don't benefit from versioning. Only the
1604627f7eb2Smrg instructions from innermost loops that benefit from versioning
1605627f7eb2Smrg should be weighed against loop-versioning-max-inner-insns;
1606627f7eb2Smrg everything else should be weighed against
1607627f7eb2Smrg loop-versioning-max-outer-insns. */
1608627f7eb2Smrg if (!inner_li.worth_versioning_p () || inner->inner)
1609627f7eb2Smrg {
1610627f7eb2Smrg if (dump_enabled_p ())
1611627f7eb2Smrg dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1612627f7eb2Smrg "counting %d instructions from this loop"
1613627f7eb2Smrg " against its parent loop\n", inner_li.num_insns);
1614627f7eb2Smrg li.num_insns += inner_li.num_insns;
1615627f7eb2Smrg }
1616627f7eb2Smrg }
1617627f7eb2Smrg
1618627f7eb2Smrg /* Enforce the size limits. */
1619627f7eb2Smrg if (li.worth_versioning_p ())
1620627f7eb2Smrg {
1621627f7eb2Smrg unsigned int max_num_insns = max_insns_for_loop (loop);
1622627f7eb2Smrg if (dump_enabled_p ())
1623627f7eb2Smrg dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1624627f7eb2Smrg "this loop has %d instructions, against"
1625627f7eb2Smrg " a versioning limit of %d\n",
1626627f7eb2Smrg li.num_insns, max_num_insns);
1627627f7eb2Smrg if (li.num_insns > max_num_insns)
1628627f7eb2Smrg {
1629627f7eb2Smrg if (dump_enabled_p ())
1630627f7eb2Smrg dump_printf_loc (MSG_MISSED_OPTIMIZATION
1631627f7eb2Smrg | MSG_PRIORITY_USER_FACING,
1632627f7eb2Smrg find_loop_location (loop),
1633627f7eb2Smrg "this loop is too big to version");
1634627f7eb2Smrg return false;
1635627f7eb2Smrg }
1636627f7eb2Smrg }
1637627f7eb2Smrg
1638627f7eb2Smrg /* Hoist all version checks from subloops to this loop. */
1639*4c3eb207Smrg for (class loop *subloop = loop->inner; subloop; subloop = subloop->next)
1640627f7eb2Smrg merge_loop_info (loop, subloop);
1641627f7eb2Smrg
1642627f7eb2Smrg return true;
1643627f7eb2Smrg }
1644627f7eb2Smrg
1645627f7eb2Smrg /* Decide which loops to version and add them to the versioning queue.
1646627f7eb2Smrg Return true if there are any loops to version. */
1647627f7eb2Smrg
1648627f7eb2Smrg bool
make_versioning_decisions()1649627f7eb2Smrg loop_versioning::make_versioning_decisions ()
1650627f7eb2Smrg {
1651627f7eb2Smrg AUTO_DUMP_SCOPE ("make_versioning_decisions",
1652627f7eb2Smrg dump_user_location_t::from_function_decl (m_fn->decl));
1653627f7eb2Smrg
1654*4c3eb207Smrg class loop *loop;
1655627f7eb2Smrg FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1656627f7eb2Smrg {
1657627f7eb2Smrg loop_info &linfo = get_loop_info (loop);
1658627f7eb2Smrg if (decide_whether_loop_is_versionable (loop))
1659627f7eb2Smrg {
1660627f7eb2Smrg /* Commit to versioning LOOP directly if we can't hoist the
1661627f7eb2Smrg version checks any further. */
1662627f7eb2Smrg if (linfo.worth_versioning_p ()
1663627f7eb2Smrg && (loop_depth (loop) == 1 || linfo.outermost == loop))
1664627f7eb2Smrg add_loop_to_queue (loop);
1665627f7eb2Smrg }
1666627f7eb2Smrg else
1667627f7eb2Smrg {
1668627f7eb2Smrg /* We can't version this loop, so individually version any
1669627f7eb2Smrg subloops that would benefit and haven't been versioned yet. */
1670627f7eb2Smrg linfo.rejected_p = true;
1671*4c3eb207Smrg for (class loop *subloop = loop->inner; subloop;
1672627f7eb2Smrg subloop = subloop->next)
1673627f7eb2Smrg if (get_loop_info (subloop).worth_versioning_p ())
1674627f7eb2Smrg add_loop_to_queue (subloop);
1675627f7eb2Smrg }
1676627f7eb2Smrg }
1677627f7eb2Smrg
1678627f7eb2Smrg return !m_loops_to_version.is_empty ();
1679627f7eb2Smrg }
1680627f7eb2Smrg
1681627f7eb2Smrg /* Attempt to implement loop versioning for LOOP, using the information
1682627f7eb2Smrg cached in the associated loop_info. Return true on success. */
1683627f7eb2Smrg
1684627f7eb2Smrg bool
version_loop(class loop * loop)1685*4c3eb207Smrg loop_versioning::version_loop (class loop *loop)
1686627f7eb2Smrg {
1687627f7eb2Smrg loop_info &li = get_loop_info (loop);
1688627f7eb2Smrg
1689627f7eb2Smrg /* Build up a condition that selects the original loop instead of
1690627f7eb2Smrg the simplified loop. */
1691627f7eb2Smrg tree cond = boolean_false_node;
1692627f7eb2Smrg bitmap_iterator bi;
1693627f7eb2Smrg unsigned int i;
1694627f7eb2Smrg EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1695627f7eb2Smrg {
1696627f7eb2Smrg tree name = ssa_name (i);
1697627f7eb2Smrg tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name,
1698627f7eb2Smrg build_one_cst (TREE_TYPE (name)));
1699627f7eb2Smrg cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one);
1700627f7eb2Smrg }
1701627f7eb2Smrg
1702627f7eb2Smrg /* Convert the condition into a suitable gcond. */
1703627f7eb2Smrg gimple_seq stmts = NULL;
1704627f7eb2Smrg cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr, NULL_TREE);
1705627f7eb2Smrg
1706627f7eb2Smrg /* Version the loop. */
1707627f7eb2Smrg initialize_original_copy_tables ();
1708627f7eb2Smrg basic_block cond_bb;
1709627f7eb2Smrg li.optimized_loop = loop_version (loop, cond, &cond_bb,
1710627f7eb2Smrg profile_probability::unlikely (),
1711627f7eb2Smrg profile_probability::likely (),
1712627f7eb2Smrg profile_probability::unlikely (),
1713627f7eb2Smrg profile_probability::likely (), true);
1714627f7eb2Smrg free_original_copy_tables ();
1715627f7eb2Smrg if (!li.optimized_loop)
1716627f7eb2Smrg {
1717627f7eb2Smrg if (dump_enabled_p ())
1718627f7eb2Smrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop),
1719627f7eb2Smrg "tried but failed to version this loop for when"
1720627f7eb2Smrg " certain strides are 1\n");
1721627f7eb2Smrg return false;
1722627f7eb2Smrg }
1723627f7eb2Smrg
1724627f7eb2Smrg if (dump_enabled_p ())
1725627f7eb2Smrg dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop),
1726627f7eb2Smrg "versioned this loop for when certain strides are 1\n");
1727627f7eb2Smrg
1728627f7eb2Smrg /* Insert the statements that feed COND. */
1729627f7eb2Smrg if (stmts)
1730627f7eb2Smrg {
1731627f7eb2Smrg gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
1732627f7eb2Smrg gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1733627f7eb2Smrg }
1734627f7eb2Smrg
1735627f7eb2Smrg return true;
1736627f7eb2Smrg }
1737627f7eb2Smrg
1738627f7eb2Smrg /* Attempt to version all loops in the versioning queue. */
1739627f7eb2Smrg
1740627f7eb2Smrg void
implement_versioning_decisions()1741627f7eb2Smrg loop_versioning::implement_versioning_decisions ()
1742627f7eb2Smrg {
1743627f7eb2Smrg /* No AUTO_DUMP_SCOPE here since all messages are top-level and
1744627f7eb2Smrg user-facing at this point. */
1745627f7eb2Smrg
1746627f7eb2Smrg bool any_succeeded_p = false;
1747*4c3eb207Smrg class loop *loop;
1748627f7eb2Smrg unsigned int i;
1749627f7eb2Smrg FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1750627f7eb2Smrg if (version_loop (loop))
1751627f7eb2Smrg any_succeeded_p = true;
1752627f7eb2Smrg if (!any_succeeded_p)
1753627f7eb2Smrg return;
1754627f7eb2Smrg
1755627f7eb2Smrg update_ssa (TODO_update_ssa);
1756627f7eb2Smrg
1757627f7eb2Smrg /* Simplify the new loop, which is used when COND is false. */
1758627f7eb2Smrg FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1759627f7eb2Smrg {
1760627f7eb2Smrg loop_info &linfo = get_loop_info (loop);
1761627f7eb2Smrg if (linfo.optimized_loop)
1762627f7eb2Smrg name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header);
1763627f7eb2Smrg }
1764627f7eb2Smrg }
1765627f7eb2Smrg
1766627f7eb2Smrg /* Run the pass and return a set of TODO_* flags. */
1767627f7eb2Smrg
1768627f7eb2Smrg unsigned int
run()1769627f7eb2Smrg loop_versioning::run ()
1770627f7eb2Smrg {
1771627f7eb2Smrg gcc_assert (scev_initialized_p ());
1772627f7eb2Smrg
1773627f7eb2Smrg if (analyze_blocks ()
1774627f7eb2Smrg && prune_conditions ()
1775627f7eb2Smrg && make_versioning_decisions ())
1776627f7eb2Smrg implement_versioning_decisions ();
1777627f7eb2Smrg
1778627f7eb2Smrg return 0;
1779627f7eb2Smrg }
1780627f7eb2Smrg
1781627f7eb2Smrg /* Loop versioning pass. */
1782627f7eb2Smrg
1783627f7eb2Smrg const pass_data pass_data_loop_versioning =
1784627f7eb2Smrg {
1785627f7eb2Smrg GIMPLE_PASS, /* type */
1786627f7eb2Smrg "lversion", /* name */
1787627f7eb2Smrg OPTGROUP_LOOP, /* optinfo_flags */
1788627f7eb2Smrg TV_LOOP_VERSIONING, /* tv_id */
1789627f7eb2Smrg PROP_cfg, /* properties_required */
1790627f7eb2Smrg 0, /* properties_provided */
1791627f7eb2Smrg 0, /* properties_destroyed */
1792627f7eb2Smrg 0, /* todo_flags_start */
1793627f7eb2Smrg 0, /* todo_flags_finish */
1794627f7eb2Smrg };
1795627f7eb2Smrg
1796627f7eb2Smrg class pass_loop_versioning : public gimple_opt_pass
1797627f7eb2Smrg {
1798627f7eb2Smrg public:
pass_loop_versioning(gcc::context * ctxt)1799627f7eb2Smrg pass_loop_versioning (gcc::context *ctxt)
1800627f7eb2Smrg : gimple_opt_pass (pass_data_loop_versioning, ctxt)
1801627f7eb2Smrg {}
1802627f7eb2Smrg
1803627f7eb2Smrg /* opt_pass methods: */
gate(function *)1804627f7eb2Smrg virtual bool gate (function *) { return flag_version_loops_for_strides; }
1805627f7eb2Smrg virtual unsigned int execute (function *);
1806627f7eb2Smrg };
1807627f7eb2Smrg
1808627f7eb2Smrg unsigned int
execute(function * fn)1809627f7eb2Smrg pass_loop_versioning::execute (function *fn)
1810627f7eb2Smrg {
1811627f7eb2Smrg if (number_of_loops (fn) <= 1)
1812627f7eb2Smrg return 0;
1813627f7eb2Smrg
1814627f7eb2Smrg return loop_versioning (fn).run ();
1815627f7eb2Smrg }
1816627f7eb2Smrg
1817627f7eb2Smrg } // anon namespace
1818627f7eb2Smrg
1819627f7eb2Smrg gimple_opt_pass *
make_pass_loop_versioning(gcc::context * ctxt)1820627f7eb2Smrg make_pass_loop_versioning (gcc::context *ctxt)
1821627f7eb2Smrg {
1822627f7eb2Smrg return new pass_loop_versioning (ctxt);
1823627f7eb2Smrg }
1824