11debfc3dSmrg /* Vectorizer
2*8feb0f0bSmrg Copyright (C) 2003-2020 Free Software Foundation, Inc.
31debfc3dSmrg Contributed by Dorit Naishlos <dorit@il.ibm.com>
41debfc3dSmrg
51debfc3dSmrg This file is part of GCC.
61debfc3dSmrg
71debfc3dSmrg GCC is free software; you can redistribute it and/or modify it under
81debfc3dSmrg the terms of the GNU General Public License as published by the Free
91debfc3dSmrg Software Foundation; either version 3, or (at your option) any later
101debfc3dSmrg version.
111debfc3dSmrg
121debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
141debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
151debfc3dSmrg for more details.
161debfc3dSmrg
171debfc3dSmrg You should have received a copy of the GNU General Public License
181debfc3dSmrg along with GCC; see the file COPYING3. If not see
191debfc3dSmrg <http://www.gnu.org/licenses/>. */
201debfc3dSmrg
211debfc3dSmrg /* Loop and basic block vectorizer.
221debfc3dSmrg
231debfc3dSmrg This file contains drivers for the three vectorizers:
241debfc3dSmrg (1) loop vectorizer (inter-iteration parallelism),
251debfc3dSmrg (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
261debfc3dSmrg vectorizer)
271debfc3dSmrg (3) BB vectorizer (out-of-loops), aka SLP
281debfc3dSmrg
291debfc3dSmrg The rest of the vectorizer's code is organized as follows:
301debfc3dSmrg - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
311debfc3dSmrg used by drivers (1) and (2).
321debfc3dSmrg - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
331debfc3dSmrg drivers (1) and (2).
341debfc3dSmrg - tree-vect-slp.c - BB vectorization specific analysis and transformation,
351debfc3dSmrg used by drivers (2) and (3).
361debfc3dSmrg - tree-vect-stmts.c - statements analysis and transformation (used by all).
371debfc3dSmrg - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
381debfc3dSmrg manipulations (used by all).
391debfc3dSmrg - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
401debfc3dSmrg
411debfc3dSmrg Here's a poor attempt at illustrating that:
421debfc3dSmrg
431debfc3dSmrg tree-vectorizer.c:
441debfc3dSmrg loop_vect() loop_aware_slp() slp_vect()
451debfc3dSmrg | / \ /
461debfc3dSmrg | / \ /
471debfc3dSmrg tree-vect-loop.c tree-vect-slp.c
481debfc3dSmrg | \ \ / / |
491debfc3dSmrg | \ \/ / |
501debfc3dSmrg | \ /\ / |
511debfc3dSmrg | \ / \ / |
521debfc3dSmrg tree-vect-stmts.c tree-vect-data-refs.c
531debfc3dSmrg \ /
541debfc3dSmrg tree-vect-patterns.c
551debfc3dSmrg */
561debfc3dSmrg
571debfc3dSmrg #include "config.h"
581debfc3dSmrg #include "system.h"
591debfc3dSmrg #include "coretypes.h"
601debfc3dSmrg #include "backend.h"
611debfc3dSmrg #include "tree.h"
621debfc3dSmrg #include "gimple.h"
631debfc3dSmrg #include "predict.h"
641debfc3dSmrg #include "tree-pass.h"
651debfc3dSmrg #include "ssa.h"
661debfc3dSmrg #include "cgraph.h"
671debfc3dSmrg #include "fold-const.h"
681debfc3dSmrg #include "stor-layout.h"
691debfc3dSmrg #include "gimple-iterator.h"
701debfc3dSmrg #include "gimple-walk.h"
711debfc3dSmrg #include "tree-ssa-loop-manip.h"
721debfc3dSmrg #include "tree-ssa-loop-niter.h"
731debfc3dSmrg #include "tree-cfg.h"
741debfc3dSmrg #include "cfgloop.h"
751debfc3dSmrg #include "tree-vectorizer.h"
761debfc3dSmrg #include "tree-ssa-propagate.h"
771debfc3dSmrg #include "dbgcnt.h"
781debfc3dSmrg #include "tree-scalar-evolution.h"
79a2dc1f3fSmrg #include "stringpool.h"
80a2dc1f3fSmrg #include "attribs.h"
81c0a68be4Smrg #include "gimple-pretty-print.h"
82c0a68be4Smrg #include "opt-problem.h"
83c0a68be4Smrg #include "internal-fn.h"
841debfc3dSmrg
851debfc3dSmrg
86c0a68be4Smrg /* Loop or bb location, with hotness information. */
87c0a68be4Smrg dump_user_location_t vect_location;
881debfc3dSmrg
89c0a68be4Smrg /* auto_purge_vect_location's dtor: reset the vect_location
90c0a68be4Smrg global, to avoid stale location_t values that could reference
91c0a68be4Smrg GC-ed blocks. */
92c0a68be4Smrg
~auto_purge_vect_location()93c0a68be4Smrg auto_purge_vect_location::~auto_purge_vect_location ()
94c0a68be4Smrg {
95c0a68be4Smrg vect_location = dump_user_location_t ();
96c0a68be4Smrg }
97c0a68be4Smrg
98c0a68be4Smrg /* Dump a cost entry according to args to F. */
99c0a68be4Smrg
100c0a68be4Smrg void
dump_stmt_cost(FILE * f,void * data,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,unsigned cost,enum vect_cost_model_location where)101c0a68be4Smrg dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
102c0a68be4Smrg stmt_vec_info stmt_info, int misalign, unsigned cost,
103c0a68be4Smrg enum vect_cost_model_location where)
104c0a68be4Smrg {
105c0a68be4Smrg fprintf (f, "%p ", data);
106c0a68be4Smrg if (stmt_info)
107c0a68be4Smrg {
108c0a68be4Smrg print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
109c0a68be4Smrg fprintf (f, " ");
110c0a68be4Smrg }
111c0a68be4Smrg else
112c0a68be4Smrg fprintf (f, "<unknown> ");
113c0a68be4Smrg fprintf (f, "%d times ", count);
114c0a68be4Smrg const char *ks = "unknown";
115c0a68be4Smrg switch (kind)
116c0a68be4Smrg {
117c0a68be4Smrg case scalar_stmt:
118c0a68be4Smrg ks = "scalar_stmt";
119c0a68be4Smrg break;
120c0a68be4Smrg case scalar_load:
121c0a68be4Smrg ks = "scalar_load";
122c0a68be4Smrg break;
123c0a68be4Smrg case scalar_store:
124c0a68be4Smrg ks = "scalar_store";
125c0a68be4Smrg break;
126c0a68be4Smrg case vector_stmt:
127c0a68be4Smrg ks = "vector_stmt";
128c0a68be4Smrg break;
129c0a68be4Smrg case vector_load:
130c0a68be4Smrg ks = "vector_load";
131c0a68be4Smrg break;
132c0a68be4Smrg case vector_gather_load:
133c0a68be4Smrg ks = "vector_gather_load";
134c0a68be4Smrg break;
135c0a68be4Smrg case unaligned_load:
136c0a68be4Smrg ks = "unaligned_load";
137c0a68be4Smrg break;
138c0a68be4Smrg case unaligned_store:
139c0a68be4Smrg ks = "unaligned_store";
140c0a68be4Smrg break;
141c0a68be4Smrg case vector_store:
142c0a68be4Smrg ks = "vector_store";
143c0a68be4Smrg break;
144c0a68be4Smrg case vector_scatter_store:
145c0a68be4Smrg ks = "vector_scatter_store";
146c0a68be4Smrg break;
147c0a68be4Smrg case vec_to_scalar:
148c0a68be4Smrg ks = "vec_to_scalar";
149c0a68be4Smrg break;
150c0a68be4Smrg case scalar_to_vec:
151c0a68be4Smrg ks = "scalar_to_vec";
152c0a68be4Smrg break;
153c0a68be4Smrg case cond_branch_not_taken:
154c0a68be4Smrg ks = "cond_branch_not_taken";
155c0a68be4Smrg break;
156c0a68be4Smrg case cond_branch_taken:
157c0a68be4Smrg ks = "cond_branch_taken";
158c0a68be4Smrg break;
159c0a68be4Smrg case vec_perm:
160c0a68be4Smrg ks = "vec_perm";
161c0a68be4Smrg break;
162c0a68be4Smrg case vec_promote_demote:
163c0a68be4Smrg ks = "vec_promote_demote";
164c0a68be4Smrg break;
165c0a68be4Smrg case vec_construct:
166c0a68be4Smrg ks = "vec_construct";
167c0a68be4Smrg break;
168c0a68be4Smrg }
169c0a68be4Smrg fprintf (f, "%s ", ks);
170c0a68be4Smrg if (kind == unaligned_load || kind == unaligned_store)
171c0a68be4Smrg fprintf (f, "(misalign %d) ", misalign);
172c0a68be4Smrg fprintf (f, "costs %u ", cost);
173c0a68be4Smrg const char *ws = "unknown";
174c0a68be4Smrg switch (where)
175c0a68be4Smrg {
176c0a68be4Smrg case vect_prologue:
177c0a68be4Smrg ws = "prologue";
178c0a68be4Smrg break;
179c0a68be4Smrg case vect_body:
180c0a68be4Smrg ws = "body";
181c0a68be4Smrg break;
182c0a68be4Smrg case vect_epilogue:
183c0a68be4Smrg ws = "epilogue";
184c0a68be4Smrg break;
185c0a68be4Smrg }
186c0a68be4Smrg fprintf (f, "in %s\n", ws);
187c0a68be4Smrg }
1881debfc3dSmrg
1891debfc3dSmrg /* For mapping simduid to vectorization factor. */
1901debfc3dSmrg
191*8feb0f0bSmrg class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
1921debfc3dSmrg {
193*8feb0f0bSmrg public:
1941debfc3dSmrg unsigned int simduid;
195a2dc1f3fSmrg poly_uint64 vf;
1961debfc3dSmrg
1971debfc3dSmrg /* hash_table support. */
1981debfc3dSmrg static inline hashval_t hash (const simduid_to_vf *);
1991debfc3dSmrg static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
2001debfc3dSmrg };
2011debfc3dSmrg
2021debfc3dSmrg inline hashval_t
hash(const simduid_to_vf * p)2031debfc3dSmrg simduid_to_vf::hash (const simduid_to_vf *p)
2041debfc3dSmrg {
2051debfc3dSmrg return p->simduid;
2061debfc3dSmrg }
2071debfc3dSmrg
2081debfc3dSmrg inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)2091debfc3dSmrg simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
2101debfc3dSmrg {
2111debfc3dSmrg return p1->simduid == p2->simduid;
2121debfc3dSmrg }
2131debfc3dSmrg
2141debfc3dSmrg /* This hash maps the OMP simd array to the corresponding simduid used
2151debfc3dSmrg to index into it. Like thus,
2161debfc3dSmrg
2171debfc3dSmrg _7 = GOMP_SIMD_LANE (simduid.0)
2181debfc3dSmrg ...
2191debfc3dSmrg ...
2201debfc3dSmrg D.1737[_7] = stuff;
2211debfc3dSmrg
2221debfc3dSmrg
2231debfc3dSmrg This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
2241debfc3dSmrg simduid.0. */
2251debfc3dSmrg
2261debfc3dSmrg struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
2271debfc3dSmrg {
2281debfc3dSmrg tree decl;
2291debfc3dSmrg unsigned int simduid;
2301debfc3dSmrg
2311debfc3dSmrg /* hash_table support. */
2321debfc3dSmrg static inline hashval_t hash (const simd_array_to_simduid *);
2331debfc3dSmrg static inline int equal (const simd_array_to_simduid *,
2341debfc3dSmrg const simd_array_to_simduid *);
2351debfc3dSmrg };
2361debfc3dSmrg
2371debfc3dSmrg inline hashval_t
hash(const simd_array_to_simduid * p)2381debfc3dSmrg simd_array_to_simduid::hash (const simd_array_to_simduid *p)
2391debfc3dSmrg {
2401debfc3dSmrg return DECL_UID (p->decl);
2411debfc3dSmrg }
2421debfc3dSmrg
2431debfc3dSmrg inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)2441debfc3dSmrg simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
2451debfc3dSmrg const simd_array_to_simduid *p2)
2461debfc3dSmrg {
2471debfc3dSmrg return p1->decl == p2->decl;
2481debfc3dSmrg }
2491debfc3dSmrg
2501debfc3dSmrg /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
2511debfc3dSmrg into their corresponding constants and remove
2521debfc3dSmrg IFN_GOMP_SIMD_ORDERED_{START,END}. */
2531debfc3dSmrg
2541debfc3dSmrg static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab)2551debfc3dSmrg adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
2561debfc3dSmrg {
2571debfc3dSmrg basic_block bb;
2581debfc3dSmrg
2591debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
2601debfc3dSmrg {
2611debfc3dSmrg gimple_stmt_iterator i;
2621debfc3dSmrg
2631debfc3dSmrg for (i = gsi_start_bb (bb); !gsi_end_p (i); )
2641debfc3dSmrg {
265a2dc1f3fSmrg poly_uint64 vf = 1;
2661debfc3dSmrg enum internal_fn ifn;
2671debfc3dSmrg gimple *stmt = gsi_stmt (i);
2681debfc3dSmrg tree t;
2691debfc3dSmrg if (!is_gimple_call (stmt)
2701debfc3dSmrg || !gimple_call_internal_p (stmt))
2711debfc3dSmrg {
2721debfc3dSmrg gsi_next (&i);
2731debfc3dSmrg continue;
2741debfc3dSmrg }
2751debfc3dSmrg ifn = gimple_call_internal_fn (stmt);
2761debfc3dSmrg switch (ifn)
2771debfc3dSmrg {
2781debfc3dSmrg case IFN_GOMP_SIMD_LANE:
2791debfc3dSmrg case IFN_GOMP_SIMD_VF:
2801debfc3dSmrg case IFN_GOMP_SIMD_LAST_LANE:
2811debfc3dSmrg break;
2821debfc3dSmrg case IFN_GOMP_SIMD_ORDERED_START:
2831debfc3dSmrg case IFN_GOMP_SIMD_ORDERED_END:
2841debfc3dSmrg if (integer_onep (gimple_call_arg (stmt, 0)))
2851debfc3dSmrg {
2861debfc3dSmrg enum built_in_function bcode
2871debfc3dSmrg = (ifn == IFN_GOMP_SIMD_ORDERED_START
2881debfc3dSmrg ? BUILT_IN_GOMP_ORDERED_START
2891debfc3dSmrg : BUILT_IN_GOMP_ORDERED_END);
2901debfc3dSmrg gimple *g
2911debfc3dSmrg = gimple_build_call (builtin_decl_explicit (bcode), 0);
292*8feb0f0bSmrg gimple_move_vops (g, stmt);
2931debfc3dSmrg gsi_replace (&i, g, true);
2941debfc3dSmrg continue;
2951debfc3dSmrg }
2961debfc3dSmrg gsi_remove (&i, true);
2971debfc3dSmrg unlink_stmt_vdef (stmt);
2981debfc3dSmrg continue;
2991debfc3dSmrg default:
3001debfc3dSmrg gsi_next (&i);
3011debfc3dSmrg continue;
3021debfc3dSmrg }
3031debfc3dSmrg tree arg = gimple_call_arg (stmt, 0);
3041debfc3dSmrg gcc_assert (arg != NULL_TREE);
3051debfc3dSmrg gcc_assert (TREE_CODE (arg) == SSA_NAME);
3061debfc3dSmrg simduid_to_vf *p = NULL, data;
3071debfc3dSmrg data.simduid = DECL_UID (SSA_NAME_VAR (arg));
3081debfc3dSmrg /* Need to nullify loop safelen field since it's value is not
3091debfc3dSmrg valid after transformation. */
3101debfc3dSmrg if (bb->loop_father && bb->loop_father->safelen > 0)
3111debfc3dSmrg bb->loop_father->safelen = 0;
3121debfc3dSmrg if (htab)
3131debfc3dSmrg {
3141debfc3dSmrg p = htab->find (&data);
3151debfc3dSmrg if (p)
3161debfc3dSmrg vf = p->vf;
3171debfc3dSmrg }
3181debfc3dSmrg switch (ifn)
3191debfc3dSmrg {
3201debfc3dSmrg case IFN_GOMP_SIMD_VF:
3211debfc3dSmrg t = build_int_cst (unsigned_type_node, vf);
3221debfc3dSmrg break;
3231debfc3dSmrg case IFN_GOMP_SIMD_LANE:
3241debfc3dSmrg t = build_int_cst (unsigned_type_node, 0);
3251debfc3dSmrg break;
3261debfc3dSmrg case IFN_GOMP_SIMD_LAST_LANE:
3271debfc3dSmrg t = gimple_call_arg (stmt, 1);
3281debfc3dSmrg break;
3291debfc3dSmrg default:
3301debfc3dSmrg gcc_unreachable ();
3311debfc3dSmrg }
332a2dc1f3fSmrg tree lhs = gimple_call_lhs (stmt);
333a2dc1f3fSmrg if (lhs)
334a2dc1f3fSmrg replace_uses_by (lhs, t);
335a2dc1f3fSmrg release_defs (stmt);
336a2dc1f3fSmrg gsi_remove (&i, true);
3371debfc3dSmrg }
3381debfc3dSmrg }
3391debfc3dSmrg }
3401debfc3dSmrg
3411debfc3dSmrg /* Helper structure for note_simd_array_uses. */
3421debfc3dSmrg
3431debfc3dSmrg struct note_simd_array_uses_struct
3441debfc3dSmrg {
3451debfc3dSmrg hash_table<simd_array_to_simduid> **htab;
3461debfc3dSmrg unsigned int simduid;
3471debfc3dSmrg };
3481debfc3dSmrg
3491debfc3dSmrg /* Callback for note_simd_array_uses, called through walk_gimple_op. */
3501debfc3dSmrg
3511debfc3dSmrg static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)3521debfc3dSmrg note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
3531debfc3dSmrg {
3541debfc3dSmrg struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
3551debfc3dSmrg struct note_simd_array_uses_struct *ns
3561debfc3dSmrg = (struct note_simd_array_uses_struct *) wi->info;
3571debfc3dSmrg
3581debfc3dSmrg if (TYPE_P (*tp))
3591debfc3dSmrg *walk_subtrees = 0;
3601debfc3dSmrg else if (VAR_P (*tp)
3611debfc3dSmrg && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
3621debfc3dSmrg && DECL_CONTEXT (*tp) == current_function_decl)
3631debfc3dSmrg {
3641debfc3dSmrg simd_array_to_simduid data;
3651debfc3dSmrg if (!*ns->htab)
3661debfc3dSmrg *ns->htab = new hash_table<simd_array_to_simduid> (15);
3671debfc3dSmrg data.decl = *tp;
3681debfc3dSmrg data.simduid = ns->simduid;
3691debfc3dSmrg simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
3701debfc3dSmrg if (*slot == NULL)
3711debfc3dSmrg {
3721debfc3dSmrg simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
3731debfc3dSmrg *p = data;
3741debfc3dSmrg *slot = p;
3751debfc3dSmrg }
3761debfc3dSmrg else if ((*slot)->simduid != ns->simduid)
3771debfc3dSmrg (*slot)->simduid = -1U;
3781debfc3dSmrg *walk_subtrees = 0;
3791debfc3dSmrg }
3801debfc3dSmrg return NULL_TREE;
3811debfc3dSmrg }
3821debfc3dSmrg
3831debfc3dSmrg /* Find "omp simd array" temporaries and map them to corresponding
3841debfc3dSmrg simduid. */
3851debfc3dSmrg
3861debfc3dSmrg static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab)3871debfc3dSmrg note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
3881debfc3dSmrg {
3891debfc3dSmrg basic_block bb;
3901debfc3dSmrg gimple_stmt_iterator gsi;
3911debfc3dSmrg struct walk_stmt_info wi;
3921debfc3dSmrg struct note_simd_array_uses_struct ns;
3931debfc3dSmrg
3941debfc3dSmrg memset (&wi, 0, sizeof (wi));
3951debfc3dSmrg wi.info = &ns;
3961debfc3dSmrg ns.htab = htab;
3971debfc3dSmrg
3981debfc3dSmrg FOR_EACH_BB_FN (bb, cfun)
3991debfc3dSmrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4001debfc3dSmrg {
4011debfc3dSmrg gimple *stmt = gsi_stmt (gsi);
4021debfc3dSmrg if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
4031debfc3dSmrg continue;
4041debfc3dSmrg switch (gimple_call_internal_fn (stmt))
4051debfc3dSmrg {
4061debfc3dSmrg case IFN_GOMP_SIMD_LANE:
4071debfc3dSmrg case IFN_GOMP_SIMD_VF:
4081debfc3dSmrg case IFN_GOMP_SIMD_LAST_LANE:
4091debfc3dSmrg break;
4101debfc3dSmrg default:
4111debfc3dSmrg continue;
4121debfc3dSmrg }
4131debfc3dSmrg tree lhs = gimple_call_lhs (stmt);
4141debfc3dSmrg if (lhs == NULL_TREE)
4151debfc3dSmrg continue;
4161debfc3dSmrg imm_use_iterator use_iter;
4171debfc3dSmrg gimple *use_stmt;
4181debfc3dSmrg ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
4191debfc3dSmrg FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
4201debfc3dSmrg if (!is_gimple_debug (use_stmt))
4211debfc3dSmrg walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
4221debfc3dSmrg }
4231debfc3dSmrg }
4241debfc3dSmrg
4251debfc3dSmrg /* Shrink arrays with "omp simd array" attribute to the corresponding
4261debfc3dSmrg vectorization factor. */
4271debfc3dSmrg
4281debfc3dSmrg static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)4291debfc3dSmrg shrink_simd_arrays
4301debfc3dSmrg (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
4311debfc3dSmrg hash_table<simduid_to_vf> *simduid_to_vf_htab)
4321debfc3dSmrg {
4331debfc3dSmrg for (hash_table<simd_array_to_simduid>::iterator iter
4341debfc3dSmrg = simd_array_to_simduid_htab->begin ();
4351debfc3dSmrg iter != simd_array_to_simduid_htab->end (); ++iter)
4361debfc3dSmrg if ((*iter)->simduid != -1U)
4371debfc3dSmrg {
4381debfc3dSmrg tree decl = (*iter)->decl;
439a2dc1f3fSmrg poly_uint64 vf = 1;
4401debfc3dSmrg if (simduid_to_vf_htab)
4411debfc3dSmrg {
4421debfc3dSmrg simduid_to_vf *p = NULL, data;
4431debfc3dSmrg data.simduid = (*iter)->simduid;
4441debfc3dSmrg p = simduid_to_vf_htab->find (&data);
4451debfc3dSmrg if (p)
4461debfc3dSmrg vf = p->vf;
4471debfc3dSmrg }
4481debfc3dSmrg tree atype
4491debfc3dSmrg = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
4501debfc3dSmrg TREE_TYPE (decl) = atype;
4511debfc3dSmrg relayout_decl (decl);
4521debfc3dSmrg }
4531debfc3dSmrg
4541debfc3dSmrg delete simd_array_to_simduid_htab;
4551debfc3dSmrg }
4561debfc3dSmrg
457a2dc1f3fSmrg /* Initialize the vec_info with kind KIND_IN and target cost data
458a2dc1f3fSmrg TARGET_COST_DATA_IN. */
4591debfc3dSmrg
vec_info(vec_info::vec_kind kind_in,void * target_cost_data_in,vec_info_shared * shared_)460c0a68be4Smrg vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in,
461c0a68be4Smrg vec_info_shared *shared_)
462a2dc1f3fSmrg : kind (kind_in),
463c0a68be4Smrg shared (shared_),
464a2dc1f3fSmrg target_cost_data (target_cost_data_in)
4651debfc3dSmrg {
466c0a68be4Smrg stmt_vec_infos.create (50);
467a2dc1f3fSmrg }
468a2dc1f3fSmrg
~vec_info()469a2dc1f3fSmrg vec_info::~vec_info ()
470a2dc1f3fSmrg {
471a2dc1f3fSmrg slp_instance instance;
4721debfc3dSmrg unsigned int i;
4731debfc3dSmrg
474c0a68be4Smrg FOR_EACH_VEC_ELT (slp_instances, i, instance)
475c0a68be4Smrg vect_free_slp_instance (instance, true);
476c0a68be4Smrg
477c0a68be4Smrg destroy_cost_data (target_cost_data);
478c0a68be4Smrg free_stmt_vec_infos ();
4791debfc3dSmrg }
4801debfc3dSmrg
vec_info_shared()481c0a68be4Smrg vec_info_shared::vec_info_shared ()
482c0a68be4Smrg : datarefs (vNULL),
483c0a68be4Smrg datarefs_copy (vNULL),
484c0a68be4Smrg ddrs (vNULL)
485c0a68be4Smrg {
486c0a68be4Smrg }
487a2dc1f3fSmrg
~vec_info_shared()488c0a68be4Smrg vec_info_shared::~vec_info_shared ()
489c0a68be4Smrg {
490a2dc1f3fSmrg free_data_refs (datarefs);
491a2dc1f3fSmrg free_dependence_relations (ddrs);
492c0a68be4Smrg datarefs_copy.release ();
493c0a68be4Smrg }
494c0a68be4Smrg
495c0a68be4Smrg void
save_datarefs()496c0a68be4Smrg vec_info_shared::save_datarefs ()
497c0a68be4Smrg {
498c0a68be4Smrg if (!flag_checking)
499c0a68be4Smrg return;
500c0a68be4Smrg datarefs_copy.reserve_exact (datarefs.length ());
501c0a68be4Smrg for (unsigned i = 0; i < datarefs.length (); ++i)
502c0a68be4Smrg datarefs_copy.quick_push (*datarefs[i]);
503c0a68be4Smrg }
504c0a68be4Smrg
505c0a68be4Smrg void
check_datarefs()506c0a68be4Smrg vec_info_shared::check_datarefs ()
507c0a68be4Smrg {
508c0a68be4Smrg if (!flag_checking)
509c0a68be4Smrg return;
510c0a68be4Smrg gcc_assert (datarefs.length () == datarefs_copy.length ());
511c0a68be4Smrg for (unsigned i = 0; i < datarefs.length (); ++i)
512c0a68be4Smrg if (memcmp (&datarefs_copy[i], datarefs[i], sizeof (data_reference)) != 0)
513c0a68be4Smrg gcc_unreachable ();
514c0a68be4Smrg }
515c0a68be4Smrg
516c0a68be4Smrg /* Record that STMT belongs to the vectorizable region. Create and return
517c0a68be4Smrg an associated stmt_vec_info. */
518c0a68be4Smrg
519c0a68be4Smrg stmt_vec_info
add_stmt(gimple * stmt)520c0a68be4Smrg vec_info::add_stmt (gimple *stmt)
521c0a68be4Smrg {
522c0a68be4Smrg stmt_vec_info res = new_stmt_vec_info (stmt);
523c0a68be4Smrg set_vinfo_for_stmt (stmt, res);
524c0a68be4Smrg return res;
525c0a68be4Smrg }
526c0a68be4Smrg
527c0a68be4Smrg /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
528c0a68be4Smrg return null. It is safe to call this function on any statement, even if
529c0a68be4Smrg it might not be part of the vectorizable region. */
530c0a68be4Smrg
531c0a68be4Smrg stmt_vec_info
lookup_stmt(gimple * stmt)532c0a68be4Smrg vec_info::lookup_stmt (gimple *stmt)
533c0a68be4Smrg {
534c0a68be4Smrg unsigned int uid = gimple_uid (stmt);
535c0a68be4Smrg if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
536c0a68be4Smrg {
537c0a68be4Smrg stmt_vec_info res = stmt_vec_infos[uid - 1];
538c0a68be4Smrg if (res && res->stmt == stmt)
539c0a68be4Smrg return res;
540c0a68be4Smrg }
541c0a68be4Smrg return NULL;
542c0a68be4Smrg }
543c0a68be4Smrg
544c0a68be4Smrg /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
545c0a68be4Smrg return that stmt_vec_info, otherwise return null. It is safe to call
546c0a68be4Smrg this on arbitrary operands. */
547c0a68be4Smrg
548c0a68be4Smrg stmt_vec_info
lookup_def(tree name)549c0a68be4Smrg vec_info::lookup_def (tree name)
550c0a68be4Smrg {
551c0a68be4Smrg if (TREE_CODE (name) == SSA_NAME
552c0a68be4Smrg && !SSA_NAME_IS_DEFAULT_DEF (name))
553c0a68be4Smrg return lookup_stmt (SSA_NAME_DEF_STMT (name));
554c0a68be4Smrg return NULL;
555c0a68be4Smrg }
556c0a68be4Smrg
557c0a68be4Smrg /* See whether there is a single non-debug statement that uses LHS and
558c0a68be4Smrg whether that statement has an associated stmt_vec_info. Return the
559c0a68be4Smrg stmt_vec_info if so, otherwise return null. */
560c0a68be4Smrg
561c0a68be4Smrg stmt_vec_info
lookup_single_use(tree lhs)562c0a68be4Smrg vec_info::lookup_single_use (tree lhs)
563c0a68be4Smrg {
564c0a68be4Smrg use_operand_p dummy;
565c0a68be4Smrg gimple *use_stmt;
566c0a68be4Smrg if (single_imm_use (lhs, &dummy, &use_stmt))
567c0a68be4Smrg return lookup_stmt (use_stmt);
568c0a68be4Smrg return NULL;
569c0a68be4Smrg }
570c0a68be4Smrg
571c0a68be4Smrg /* Return vectorization information about DR. */
572c0a68be4Smrg
573c0a68be4Smrg dr_vec_info *
lookup_dr(data_reference * dr)574c0a68be4Smrg vec_info::lookup_dr (data_reference *dr)
575c0a68be4Smrg {
576c0a68be4Smrg stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
577c0a68be4Smrg /* DR_STMT should never refer to a stmt in a pattern replacement. */
578c0a68be4Smrg gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
579c0a68be4Smrg return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
580c0a68be4Smrg }
581c0a68be4Smrg
582c0a68be4Smrg /* Record that NEW_STMT_INFO now implements the same data reference
583c0a68be4Smrg as OLD_STMT_INFO. */
584c0a68be4Smrg
585c0a68be4Smrg void
move_dr(stmt_vec_info new_stmt_info,stmt_vec_info old_stmt_info)586c0a68be4Smrg vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
587c0a68be4Smrg {
588c0a68be4Smrg gcc_assert (!is_pattern_stmt_p (old_stmt_info));
589c0a68be4Smrg STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
590c0a68be4Smrg new_stmt_info->dr_aux = old_stmt_info->dr_aux;
591c0a68be4Smrg STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
592c0a68be4Smrg = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
593c0a68be4Smrg STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
594c0a68be4Smrg = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
595c0a68be4Smrg }
596c0a68be4Smrg
597c0a68be4Smrg /* Permanently remove the statement described by STMT_INFO from the
598c0a68be4Smrg function. */
599c0a68be4Smrg
600c0a68be4Smrg void
remove_stmt(stmt_vec_info stmt_info)601c0a68be4Smrg vec_info::remove_stmt (stmt_vec_info stmt_info)
602c0a68be4Smrg {
603c0a68be4Smrg gcc_assert (!stmt_info->pattern_stmt_p);
604c0a68be4Smrg set_vinfo_for_stmt (stmt_info->stmt, NULL);
605c0a68be4Smrg gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
606c0a68be4Smrg unlink_stmt_vdef (stmt_info->stmt);
607c0a68be4Smrg gsi_remove (&si, true);
608c0a68be4Smrg release_defs (stmt_info->stmt);
609c0a68be4Smrg free_stmt_vec_info (stmt_info);
610c0a68be4Smrg }
611c0a68be4Smrg
612c0a68be4Smrg /* Replace the statement at GSI by NEW_STMT, both the vectorization
613c0a68be4Smrg information and the function itself. STMT_INFO describes the statement
614c0a68be4Smrg at GSI. */
615c0a68be4Smrg
616c0a68be4Smrg void
replace_stmt(gimple_stmt_iterator * gsi,stmt_vec_info stmt_info,gimple * new_stmt)617c0a68be4Smrg vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
618c0a68be4Smrg gimple *new_stmt)
619c0a68be4Smrg {
620c0a68be4Smrg gimple *old_stmt = stmt_info->stmt;
621c0a68be4Smrg gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
622c0a68be4Smrg set_vinfo_for_stmt (old_stmt, NULL);
623c0a68be4Smrg set_vinfo_for_stmt (new_stmt, stmt_info);
624c0a68be4Smrg stmt_info->stmt = new_stmt;
625c0a68be4Smrg gsi_replace (gsi, new_stmt, true);
626c0a68be4Smrg }
627c0a68be4Smrg
628c0a68be4Smrg /* Create and initialize a new stmt_vec_info struct for STMT. */
629c0a68be4Smrg
630c0a68be4Smrg stmt_vec_info
new_stmt_vec_info(gimple * stmt)631c0a68be4Smrg vec_info::new_stmt_vec_info (gimple *stmt)
632c0a68be4Smrg {
633*8feb0f0bSmrg stmt_vec_info res = XCNEW (class _stmt_vec_info);
634c0a68be4Smrg res->vinfo = this;
635c0a68be4Smrg res->stmt = stmt;
636c0a68be4Smrg
637c0a68be4Smrg STMT_VINFO_TYPE (res) = undef_vec_info_type;
638c0a68be4Smrg STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
639c0a68be4Smrg STMT_VINFO_VECTORIZABLE (res) = true;
640*8feb0f0bSmrg STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
641*8feb0f0bSmrg STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
642*8feb0f0bSmrg STMT_VINFO_REDUC_FN (res) = IFN_LAST;
643*8feb0f0bSmrg STMT_VINFO_REDUC_IDX (res) = -1;
644*8feb0f0bSmrg STMT_VINFO_SLP_VECT_ONLY (res) = false;
645c0a68be4Smrg
646c0a68be4Smrg if (gimple_code (stmt) == GIMPLE_PHI
647c0a68be4Smrg && is_loop_header_bb_p (gimple_bb (stmt)))
648c0a68be4Smrg STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
649c0a68be4Smrg else
650c0a68be4Smrg STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
651c0a68be4Smrg
652c0a68be4Smrg STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
653c0a68be4Smrg STMT_SLP_TYPE (res) = loop_vect;
654c0a68be4Smrg
655c0a68be4Smrg /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
656c0a68be4Smrg res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
657c0a68be4Smrg
658c0a68be4Smrg return res;
659c0a68be4Smrg }
660c0a68be4Smrg
661c0a68be4Smrg /* Associate STMT with INFO. */
662c0a68be4Smrg
663c0a68be4Smrg void
set_vinfo_for_stmt(gimple * stmt,stmt_vec_info info)664c0a68be4Smrg vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info)
665c0a68be4Smrg {
666c0a68be4Smrg unsigned int uid = gimple_uid (stmt);
667c0a68be4Smrg if (uid == 0)
668c0a68be4Smrg {
669c0a68be4Smrg gcc_checking_assert (info);
670c0a68be4Smrg uid = stmt_vec_infos.length () + 1;
671c0a68be4Smrg gimple_set_uid (stmt, uid);
672c0a68be4Smrg stmt_vec_infos.safe_push (info);
673c0a68be4Smrg }
674c0a68be4Smrg else
675c0a68be4Smrg {
676c0a68be4Smrg gcc_checking_assert (info == NULL);
677c0a68be4Smrg stmt_vec_infos[uid - 1] = info;
678c0a68be4Smrg }
679c0a68be4Smrg }
680c0a68be4Smrg
681c0a68be4Smrg /* Free the contents of stmt_vec_infos. */
682c0a68be4Smrg
683c0a68be4Smrg void
free_stmt_vec_infos(void)684c0a68be4Smrg vec_info::free_stmt_vec_infos (void)
685c0a68be4Smrg {
686c0a68be4Smrg unsigned int i;
687c0a68be4Smrg stmt_vec_info info;
688c0a68be4Smrg FOR_EACH_VEC_ELT (stmt_vec_infos, i, info)
689c0a68be4Smrg if (info != NULL)
690c0a68be4Smrg free_stmt_vec_info (info);
691c0a68be4Smrg stmt_vec_infos.release ();
692c0a68be4Smrg }
693c0a68be4Smrg
694c0a68be4Smrg /* Free STMT_INFO. */
695c0a68be4Smrg
696c0a68be4Smrg void
free_stmt_vec_info(stmt_vec_info stmt_info)697c0a68be4Smrg vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
698c0a68be4Smrg {
699c0a68be4Smrg if (stmt_info->pattern_stmt_p)
700c0a68be4Smrg {
701c0a68be4Smrg gimple_set_bb (stmt_info->stmt, NULL);
702c0a68be4Smrg tree lhs = gimple_get_lhs (stmt_info->stmt);
703c0a68be4Smrg if (lhs && TREE_CODE (lhs) == SSA_NAME)
704c0a68be4Smrg release_ssa_name (lhs);
705c0a68be4Smrg }
706c0a68be4Smrg
707c0a68be4Smrg STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
708c0a68be4Smrg STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
709c0a68be4Smrg free (stmt_info);
7101debfc3dSmrg }
7111debfc3dSmrg
7121debfc3dSmrg /* A helper function to free scev and LOOP niter information, as well as
7131debfc3dSmrg clear loop constraint LOOP_C_FINITE. */
7141debfc3dSmrg
7151debfc3dSmrg void
vect_free_loop_info_assumptions(class loop * loop)716*8feb0f0bSmrg vect_free_loop_info_assumptions (class loop *loop)
7171debfc3dSmrg {
7181debfc3dSmrg scev_reset_htab ();
7191debfc3dSmrg /* We need to explicitly reset upper bound information since they are
720a2dc1f3fSmrg used even after free_numbers_of_iterations_estimates. */
7211debfc3dSmrg loop->any_upper_bound = false;
7221debfc3dSmrg loop->any_likely_upper_bound = false;
723a2dc1f3fSmrg free_numbers_of_iterations_estimates (loop);
7241debfc3dSmrg loop_constraint_clear (loop, LOOP_C_FINITE);
7251debfc3dSmrg }
7261debfc3dSmrg
7271debfc3dSmrg /* If LOOP has been versioned during ifcvt, return the internal call
7281debfc3dSmrg guarding it. */
7291debfc3dSmrg
730*8feb0f0bSmrg gimple *
vect_loop_vectorized_call(class loop * loop,gcond ** cond)731*8feb0f0bSmrg vect_loop_vectorized_call (class loop *loop, gcond **cond)
7321debfc3dSmrg {
7331debfc3dSmrg basic_block bb = loop_preheader_edge (loop)->src;
7341debfc3dSmrg gimple *g;
7351debfc3dSmrg do
7361debfc3dSmrg {
7371debfc3dSmrg g = last_stmt (bb);
7381debfc3dSmrg if (g)
7391debfc3dSmrg break;
7401debfc3dSmrg if (!single_pred_p (bb))
7411debfc3dSmrg break;
7421debfc3dSmrg bb = single_pred (bb);
7431debfc3dSmrg }
7441debfc3dSmrg while (1);
7451debfc3dSmrg if (g && gimple_code (g) == GIMPLE_COND)
7461debfc3dSmrg {
747*8feb0f0bSmrg if (cond)
748*8feb0f0bSmrg *cond = as_a <gcond *> (g);
7491debfc3dSmrg gimple_stmt_iterator gsi = gsi_for_stmt (g);
7501debfc3dSmrg gsi_prev (&gsi);
7511debfc3dSmrg if (!gsi_end_p (gsi))
7521debfc3dSmrg {
7531debfc3dSmrg g = gsi_stmt (gsi);
7541debfc3dSmrg if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
7551debfc3dSmrg && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
7561debfc3dSmrg || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
7571debfc3dSmrg return g;
7581debfc3dSmrg }
7591debfc3dSmrg }
7601debfc3dSmrg return NULL;
7611debfc3dSmrg }
7621debfc3dSmrg
763a2dc1f3fSmrg /* If LOOP has been versioned during loop distribution, return the gurading
764a2dc1f3fSmrg internal call. */
7651debfc3dSmrg
766a2dc1f3fSmrg static gimple *
vect_loop_dist_alias_call(class loop * loop)767*8feb0f0bSmrg vect_loop_dist_alias_call (class loop *loop)
7681debfc3dSmrg {
769a2dc1f3fSmrg basic_block bb;
770a2dc1f3fSmrg basic_block entry;
771*8feb0f0bSmrg class loop *outer, *orig;
772a2dc1f3fSmrg gimple_stmt_iterator gsi;
773a2dc1f3fSmrg gimple *g;
7741debfc3dSmrg
775a2dc1f3fSmrg if (loop->orig_loop_num == 0)
776a2dc1f3fSmrg return NULL;
777a2dc1f3fSmrg
778a2dc1f3fSmrg orig = get_loop (cfun, loop->orig_loop_num);
779a2dc1f3fSmrg if (orig == NULL)
7801debfc3dSmrg {
781a2dc1f3fSmrg /* The original loop is somehow destroyed. Clear the information. */
782a2dc1f3fSmrg loop->orig_loop_num = 0;
783a2dc1f3fSmrg return NULL;
7841debfc3dSmrg }
785a2dc1f3fSmrg
786a2dc1f3fSmrg if (loop != orig)
787a2dc1f3fSmrg bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
788a2dc1f3fSmrg else
789a2dc1f3fSmrg bb = loop_preheader_edge (loop)->src;
790a2dc1f3fSmrg
791a2dc1f3fSmrg outer = bb->loop_father;
792a2dc1f3fSmrg entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
793a2dc1f3fSmrg
794a2dc1f3fSmrg /* Look upward in dominance tree. */
795a2dc1f3fSmrg for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
796a2dc1f3fSmrg bb = get_immediate_dominator (CDI_DOMINATORS, bb))
797a2dc1f3fSmrg {
798a2dc1f3fSmrg g = last_stmt (bb);
799a2dc1f3fSmrg if (g == NULL || gimple_code (g) != GIMPLE_COND)
800a2dc1f3fSmrg continue;
801a2dc1f3fSmrg
802a2dc1f3fSmrg gsi = gsi_for_stmt (g);
803a2dc1f3fSmrg gsi_prev (&gsi);
804a2dc1f3fSmrg if (gsi_end_p (gsi))
805a2dc1f3fSmrg continue;
806a2dc1f3fSmrg
807a2dc1f3fSmrg g = gsi_stmt (gsi);
808a2dc1f3fSmrg /* The guarding internal function call must have the same distribution
809a2dc1f3fSmrg alias id. */
810a2dc1f3fSmrg if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
811a2dc1f3fSmrg && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
812a2dc1f3fSmrg return g;
813a2dc1f3fSmrg }
814a2dc1f3fSmrg return NULL;
8151debfc3dSmrg }
8161debfc3dSmrg
8171debfc3dSmrg /* Set the uids of all the statements in basic blocks inside loop
8181debfc3dSmrg represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
8191debfc3dSmrg call guarding the loop which has been if converted. */
8201debfc3dSmrg static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call)8211debfc3dSmrg set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
8221debfc3dSmrg {
8231debfc3dSmrg tree arg = gimple_call_arg (loop_vectorized_call, 1);
8241debfc3dSmrg basic_block *bbs;
8251debfc3dSmrg unsigned int i;
826*8feb0f0bSmrg class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
8271debfc3dSmrg
8281debfc3dSmrg LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
8291debfc3dSmrg gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
8301debfc3dSmrg == loop_vectorized_call);
8311debfc3dSmrg /* If we are going to vectorize outer loop, prevent vectorization
8321debfc3dSmrg of the inner loop in the scalar loop - either the scalar loop is
8331debfc3dSmrg thrown away, so it is a wasted work, or is used only for
8341debfc3dSmrg a few iterations. */
8351debfc3dSmrg if (scalar_loop->inner)
8361debfc3dSmrg {
8371debfc3dSmrg gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
8381debfc3dSmrg if (g)
8391debfc3dSmrg {
8401debfc3dSmrg arg = gimple_call_arg (g, 0);
8411debfc3dSmrg get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
842a2dc1f3fSmrg fold_loop_internal_call (g, boolean_false_node);
8431debfc3dSmrg }
8441debfc3dSmrg }
8451debfc3dSmrg bbs = get_loop_body (scalar_loop);
8461debfc3dSmrg for (i = 0; i < scalar_loop->num_nodes; i++)
8471debfc3dSmrg {
8481debfc3dSmrg basic_block bb = bbs[i];
8491debfc3dSmrg gimple_stmt_iterator gsi;
8501debfc3dSmrg for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8511debfc3dSmrg {
8521debfc3dSmrg gimple *phi = gsi_stmt (gsi);
8531debfc3dSmrg gimple_set_uid (phi, 0);
8541debfc3dSmrg }
8551debfc3dSmrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8561debfc3dSmrg {
8571debfc3dSmrg gimple *stmt = gsi_stmt (gsi);
8581debfc3dSmrg gimple_set_uid (stmt, 0);
8591debfc3dSmrg }
8601debfc3dSmrg }
8611debfc3dSmrg free (bbs);
8621debfc3dSmrg }
8631debfc3dSmrg
864c0a68be4Smrg /* Try to vectorize LOOP. */
865c0a68be4Smrg
866c0a68be4Smrg static unsigned
try_vectorize_loop_1(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop,gimple * loop_vectorized_call,gimple * loop_dist_alias_call)867c0a68be4Smrg try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
868*8feb0f0bSmrg unsigned *num_vectorized_loops, loop_p loop,
869c0a68be4Smrg gimple *loop_vectorized_call,
870c0a68be4Smrg gimple *loop_dist_alias_call)
871c0a68be4Smrg {
872c0a68be4Smrg unsigned ret = 0;
873c0a68be4Smrg vec_info_shared shared;
874c0a68be4Smrg auto_purge_vect_location sentinel;
875c0a68be4Smrg vect_location = find_loop_location (loop);
876*8feb0f0bSmrg
877c0a68be4Smrg if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
878c0a68be4Smrg && dump_enabled_p ())
879c0a68be4Smrg dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
880c0a68be4Smrg "\nAnalyzing loop at %s:%d\n",
881c0a68be4Smrg LOCATION_FILE (vect_location.get_location_t ()),
882c0a68be4Smrg LOCATION_LINE (vect_location.get_location_t ()));
883c0a68be4Smrg
884*8feb0f0bSmrg opt_loop_vec_info loop_vinfo = opt_loop_vec_info::success (NULL);
885*8feb0f0bSmrg /* In the case of epilogue vectorization the loop already has its
886*8feb0f0bSmrg loop_vec_info set, we do not require to analyze the loop in this case. */
887*8feb0f0bSmrg if (loop_vec_info vinfo = loop_vec_info_for_loop (loop))
888*8feb0f0bSmrg loop_vinfo = opt_loop_vec_info::success (vinfo);
889*8feb0f0bSmrg else
890*8feb0f0bSmrg {
891c0a68be4Smrg /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */
892*8feb0f0bSmrg loop_vinfo = vect_analyze_loop (loop, &shared);
893c0a68be4Smrg loop->aux = loop_vinfo;
894*8feb0f0bSmrg }
895c0a68be4Smrg
896c0a68be4Smrg if (!loop_vinfo)
897c0a68be4Smrg if (dump_enabled_p ())
898c0a68be4Smrg if (opt_problem *problem = loop_vinfo.get_problem ())
899c0a68be4Smrg {
900c0a68be4Smrg dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
901c0a68be4Smrg "couldn't vectorize loop\n");
902c0a68be4Smrg problem->emit_and_clear ();
903c0a68be4Smrg }
904c0a68be4Smrg
905c0a68be4Smrg if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
906c0a68be4Smrg {
907c0a68be4Smrg /* Free existing information if loop is analyzed with some
908c0a68be4Smrg assumptions. */
909c0a68be4Smrg if (loop_constraint_set_p (loop, LOOP_C_FINITE))
910c0a68be4Smrg vect_free_loop_info_assumptions (loop);
911c0a68be4Smrg
912c0a68be4Smrg /* If we applied if-conversion then try to vectorize the
913c0a68be4Smrg BB of innermost loops.
914c0a68be4Smrg ??? Ideally BB vectorization would learn to vectorize
915c0a68be4Smrg control flow by applying if-conversion on-the-fly, the
916c0a68be4Smrg following retains the if-converted loop body even when
917c0a68be4Smrg only non-if-converted parts took part in BB vectorization. */
918c0a68be4Smrg if (flag_tree_slp_vectorize != 0
919c0a68be4Smrg && loop_vectorized_call
920c0a68be4Smrg && ! loop->inner)
921c0a68be4Smrg {
922c0a68be4Smrg basic_block bb = loop->header;
923c0a68be4Smrg bool require_loop_vectorize = false;
924c0a68be4Smrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
925c0a68be4Smrg !gsi_end_p (gsi); gsi_next (&gsi))
926c0a68be4Smrg {
927c0a68be4Smrg gimple *stmt = gsi_stmt (gsi);
928c0a68be4Smrg gcall *call = dyn_cast <gcall *> (stmt);
929c0a68be4Smrg if (call && gimple_call_internal_p (call))
930c0a68be4Smrg {
931c0a68be4Smrg internal_fn ifn = gimple_call_internal_fn (call);
932c0a68be4Smrg if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
933c0a68be4Smrg /* Don't keep the if-converted parts when the ifn with
934c0a68be4Smrg specifc type is not supported by the backend. */
935c0a68be4Smrg || (direct_internal_fn_p (ifn)
936c0a68be4Smrg && !direct_internal_fn_supported_p
937c0a68be4Smrg (call, OPTIMIZE_FOR_SPEED)))
938c0a68be4Smrg {
939c0a68be4Smrg require_loop_vectorize = true;
940c0a68be4Smrg break;
941c0a68be4Smrg }
942c0a68be4Smrg }
943c0a68be4Smrg gimple_set_uid (stmt, -1);
944c0a68be4Smrg gimple_set_visited (stmt, false);
945c0a68be4Smrg }
946c0a68be4Smrg if (!require_loop_vectorize && vect_slp_bb (bb))
947c0a68be4Smrg {
948c0a68be4Smrg if (dump_enabled_p ())
949c0a68be4Smrg dump_printf_loc (MSG_NOTE, vect_location,
950c0a68be4Smrg "basic block vectorized\n");
951c0a68be4Smrg fold_loop_internal_call (loop_vectorized_call,
952c0a68be4Smrg boolean_true_node);
953c0a68be4Smrg loop_vectorized_call = NULL;
954c0a68be4Smrg ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
955c0a68be4Smrg }
956c0a68be4Smrg }
957c0a68be4Smrg /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
958c0a68be4Smrg loop, don't vectorize its inner loop; we'll attempt to
959c0a68be4Smrg vectorize LOOP_VECTORIZED guarded inner loop of the scalar
960c0a68be4Smrg loop version. */
961c0a68be4Smrg if (loop_vectorized_call && loop->inner)
962c0a68be4Smrg loop->inner->dont_vectorize = true;
963c0a68be4Smrg return ret;
964c0a68be4Smrg }
965c0a68be4Smrg
966c0a68be4Smrg if (!dbg_cnt (vect_loop))
967c0a68be4Smrg {
968c0a68be4Smrg /* Free existing information if loop is analyzed with some
969c0a68be4Smrg assumptions. */
970c0a68be4Smrg if (loop_constraint_set_p (loop, LOOP_C_FINITE))
971c0a68be4Smrg vect_free_loop_info_assumptions (loop);
972c0a68be4Smrg return ret;
973c0a68be4Smrg }
974c0a68be4Smrg
975c0a68be4Smrg if (loop_vectorized_call)
976c0a68be4Smrg set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
977c0a68be4Smrg
978c0a68be4Smrg unsigned HOST_WIDE_INT bytes;
979c0a68be4Smrg if (dump_enabled_p ())
980c0a68be4Smrg {
981*8feb0f0bSmrg if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
982c0a68be4Smrg dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
983c0a68be4Smrg "loop vectorized using %wu byte vectors\n", bytes);
984c0a68be4Smrg else
985c0a68be4Smrg dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
986c0a68be4Smrg "loop vectorized using variable length vectors\n");
987c0a68be4Smrg }
988c0a68be4Smrg
989*8feb0f0bSmrg loop_p new_loop = vect_transform_loop (loop_vinfo,
990*8feb0f0bSmrg loop_vectorized_call);
991c0a68be4Smrg (*num_vectorized_loops)++;
992c0a68be4Smrg /* Now that the loop has been vectorized, allow it to be unrolled
993c0a68be4Smrg etc. */
994c0a68be4Smrg loop->force_vectorize = false;
995c0a68be4Smrg
996c0a68be4Smrg if (loop->simduid)
997c0a68be4Smrg {
998c0a68be4Smrg simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
999c0a68be4Smrg if (!simduid_to_vf_htab)
1000c0a68be4Smrg simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
1001c0a68be4Smrg simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1002c0a68be4Smrg simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1003c0a68be4Smrg *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
1004c0a68be4Smrg = simduid_to_vf_data;
1005c0a68be4Smrg }
1006c0a68be4Smrg
1007c0a68be4Smrg if (loop_vectorized_call)
1008c0a68be4Smrg {
1009c0a68be4Smrg fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1010c0a68be4Smrg loop_vectorized_call = NULL;
1011c0a68be4Smrg ret |= TODO_cleanup_cfg;
1012c0a68be4Smrg }
1013c0a68be4Smrg if (loop_dist_alias_call)
1014c0a68be4Smrg {
1015c0a68be4Smrg tree value = gimple_call_arg (loop_dist_alias_call, 1);
1016c0a68be4Smrg fold_loop_internal_call (loop_dist_alias_call, value);
1017c0a68be4Smrg loop_dist_alias_call = NULL;
1018c0a68be4Smrg ret |= TODO_cleanup_cfg;
1019c0a68be4Smrg }
1020c0a68be4Smrg
1021c0a68be4Smrg /* Epilogue of vectorized loop must be vectorized too. */
1022c0a68be4Smrg if (new_loop)
1023*8feb0f0bSmrg {
1024*8feb0f0bSmrg /* Don't include vectorized epilogues in the "vectorized loops" count.
1025*8feb0f0bSmrg */
1026*8feb0f0bSmrg unsigned dont_count = *num_vectorized_loops;
1027*8feb0f0bSmrg ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count,
1028*8feb0f0bSmrg new_loop, NULL, NULL);
1029*8feb0f0bSmrg }
1030c0a68be4Smrg
1031c0a68be4Smrg return ret;
1032c0a68be4Smrg }
1033c0a68be4Smrg
1034c0a68be4Smrg /* Try to vectorize LOOP. */
1035c0a68be4Smrg
1036c0a68be4Smrg static unsigned
try_vectorize_loop(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop)1037c0a68be4Smrg try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1038c0a68be4Smrg unsigned *num_vectorized_loops, loop_p loop)
1039c0a68be4Smrg {
1040c0a68be4Smrg if (!((flag_tree_loop_vectorize
1041c0a68be4Smrg && optimize_loop_nest_for_speed_p (loop))
1042c0a68be4Smrg || loop->force_vectorize))
1043c0a68be4Smrg return 0;
1044c0a68be4Smrg
1045*8feb0f0bSmrg return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1046c0a68be4Smrg vect_loop_vectorized_call (loop),
1047c0a68be4Smrg vect_loop_dist_alias_call (loop));
1048c0a68be4Smrg }
1049c0a68be4Smrg
1050c0a68be4Smrg
10511debfc3dSmrg /* Function vectorize_loops.
10521debfc3dSmrg
10531debfc3dSmrg Entry point to loop vectorization phase. */
10541debfc3dSmrg
10551debfc3dSmrg unsigned
vectorize_loops(void)10561debfc3dSmrg vectorize_loops (void)
10571debfc3dSmrg {
10581debfc3dSmrg unsigned int i;
10591debfc3dSmrg unsigned int num_vectorized_loops = 0;
10601debfc3dSmrg unsigned int vect_loops_num;
1061*8feb0f0bSmrg class loop *loop;
10621debfc3dSmrg hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
10631debfc3dSmrg hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
10641debfc3dSmrg bool any_ifcvt_loops = false;
10651debfc3dSmrg unsigned ret = 0;
10661debfc3dSmrg
10671debfc3dSmrg vect_loops_num = number_of_loops (cfun);
10681debfc3dSmrg
10691debfc3dSmrg /* Bail out if there are no loops. */
10701debfc3dSmrg if (vect_loops_num <= 1)
10711debfc3dSmrg return 0;
10721debfc3dSmrg
10731debfc3dSmrg if (cfun->has_simduid_loops)
10741debfc3dSmrg note_simd_array_uses (&simd_array_to_simduid_htab);
10751debfc3dSmrg
10761debfc3dSmrg /* ----------- Analyze loops. ----------- */
10771debfc3dSmrg
10781debfc3dSmrg /* If some loop was duplicated, it gets bigger number
10791debfc3dSmrg than all previously defined loops. This fact allows us to run
10801debfc3dSmrg only over initial loops skipping newly generated ones. */
10811debfc3dSmrg FOR_EACH_LOOP (loop, 0)
10821debfc3dSmrg if (loop->dont_vectorize)
10831debfc3dSmrg {
10841debfc3dSmrg any_ifcvt_loops = true;
10851debfc3dSmrg /* If-conversion sometimes versions both the outer loop
10861debfc3dSmrg (for the case when outer loop vectorization might be
10871debfc3dSmrg desirable) as well as the inner loop in the scalar version
10881debfc3dSmrg of the loop. So we have:
10891debfc3dSmrg if (LOOP_VECTORIZED (1, 3))
10901debfc3dSmrg {
10911debfc3dSmrg loop1
10921debfc3dSmrg loop2
10931debfc3dSmrg }
10941debfc3dSmrg else
10951debfc3dSmrg loop3 (copy of loop1)
10961debfc3dSmrg if (LOOP_VECTORIZED (4, 5))
10971debfc3dSmrg loop4 (copy of loop2)
10981debfc3dSmrg else
10991debfc3dSmrg loop5 (copy of loop4)
11001debfc3dSmrg If FOR_EACH_LOOP gives us loop3 first (which has
11011debfc3dSmrg dont_vectorize set), make sure to process loop1 before loop4;
11021debfc3dSmrg so that we can prevent vectorization of loop4 if loop1
11031debfc3dSmrg is successfully vectorized. */
11041debfc3dSmrg if (loop->inner)
11051debfc3dSmrg {
11061debfc3dSmrg gimple *loop_vectorized_call
11071debfc3dSmrg = vect_loop_vectorized_call (loop);
11081debfc3dSmrg if (loop_vectorized_call
11091debfc3dSmrg && vect_loop_vectorized_call (loop->inner))
11101debfc3dSmrg {
11111debfc3dSmrg tree arg = gimple_call_arg (loop_vectorized_call, 0);
1112*8feb0f0bSmrg class loop *vector_loop
11131debfc3dSmrg = get_loop (cfun, tree_to_shwi (arg));
11141debfc3dSmrg if (vector_loop && vector_loop != loop)
11151debfc3dSmrg {
11161debfc3dSmrg /* Make sure we don't vectorize it twice. */
1117c0a68be4Smrg vector_loop->dont_vectorize = true;
1118c0a68be4Smrg ret |= try_vectorize_loop (simduid_to_vf_htab,
1119c0a68be4Smrg &num_vectorized_loops,
1120c0a68be4Smrg vector_loop);
11211debfc3dSmrg }
11221debfc3dSmrg }
11231debfc3dSmrg }
11241debfc3dSmrg }
11251debfc3dSmrg else
1126c0a68be4Smrg ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
1127c0a68be4Smrg loop);
11281debfc3dSmrg
1129c0a68be4Smrg vect_location = dump_user_location_t ();
11301debfc3dSmrg
11311debfc3dSmrg statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
11321debfc3dSmrg if (dump_enabled_p ()
11331debfc3dSmrg || (num_vectorized_loops > 0 && dump_enabled_p ()))
11341debfc3dSmrg dump_printf_loc (MSG_NOTE, vect_location,
11351debfc3dSmrg "vectorized %u loops in function.\n",
11361debfc3dSmrg num_vectorized_loops);
11371debfc3dSmrg
11381debfc3dSmrg /* ----------- Finalize. ----------- */
11391debfc3dSmrg
11401debfc3dSmrg if (any_ifcvt_loops)
1141c0a68be4Smrg for (i = 1; i < number_of_loops (cfun); i++)
11421debfc3dSmrg {
11431debfc3dSmrg loop = get_loop (cfun, i);
11441debfc3dSmrg if (loop && loop->dont_vectorize)
11451debfc3dSmrg {
11461debfc3dSmrg gimple *g = vect_loop_vectorized_call (loop);
11471debfc3dSmrg if (g)
11481debfc3dSmrg {
1149a2dc1f3fSmrg fold_loop_internal_call (g, boolean_false_node);
1150a2dc1f3fSmrg ret |= TODO_cleanup_cfg;
1151a2dc1f3fSmrg g = NULL;
1152a2dc1f3fSmrg }
1153a2dc1f3fSmrg else
1154a2dc1f3fSmrg g = vect_loop_dist_alias_call (loop);
1155a2dc1f3fSmrg
1156a2dc1f3fSmrg if (g)
1157a2dc1f3fSmrg {
1158a2dc1f3fSmrg fold_loop_internal_call (g, boolean_false_node);
11591debfc3dSmrg ret |= TODO_cleanup_cfg;
11601debfc3dSmrg }
11611debfc3dSmrg }
11621debfc3dSmrg }
11631debfc3dSmrg
1164c0a68be4Smrg for (i = 1; i < number_of_loops (cfun); i++)
11651debfc3dSmrg {
11661debfc3dSmrg loop_vec_info loop_vinfo;
11671debfc3dSmrg bool has_mask_store;
11681debfc3dSmrg
11691debfc3dSmrg loop = get_loop (cfun, i);
1170c0a68be4Smrg if (!loop || !loop->aux)
11711debfc3dSmrg continue;
11721debfc3dSmrg loop_vinfo = (loop_vec_info) loop->aux;
11731debfc3dSmrg has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1174a2dc1f3fSmrg delete loop_vinfo;
1175a2dc1f3fSmrg if (has_mask_store
1176a2dc1f3fSmrg && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
11771debfc3dSmrg optimize_mask_stores (loop);
11781debfc3dSmrg loop->aux = NULL;
11791debfc3dSmrg }
11801debfc3dSmrg
11811debfc3dSmrg /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
11821debfc3dSmrg if (cfun->has_simduid_loops)
11831debfc3dSmrg adjust_simduid_builtins (simduid_to_vf_htab);
11841debfc3dSmrg
11851debfc3dSmrg /* Shrink any "omp array simd" temporary arrays to the
11861debfc3dSmrg actual vectorization factors. */
11871debfc3dSmrg if (simd_array_to_simduid_htab)
11881debfc3dSmrg shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
11891debfc3dSmrg delete simduid_to_vf_htab;
11901debfc3dSmrg cfun->has_simduid_loops = false;
11911debfc3dSmrg
11921debfc3dSmrg if (num_vectorized_loops > 0)
11931debfc3dSmrg {
11941debfc3dSmrg /* If we vectorized any loop only virtual SSA form needs to be updated.
11951debfc3dSmrg ??? Also while we try hard to update loop-closed SSA form we fail
11961debfc3dSmrg to properly do this in some corner-cases (see PR56286). */
11971debfc3dSmrg rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
11981debfc3dSmrg return TODO_cleanup_cfg;
11991debfc3dSmrg }
12001debfc3dSmrg
12011debfc3dSmrg return ret;
12021debfc3dSmrg }
12031debfc3dSmrg
12041debfc3dSmrg
12051debfc3dSmrg /* Entry point to the simduid cleanup pass. */
12061debfc3dSmrg
12071debfc3dSmrg namespace {
12081debfc3dSmrg
12091debfc3dSmrg const pass_data pass_data_simduid_cleanup =
12101debfc3dSmrg {
12111debfc3dSmrg GIMPLE_PASS, /* type */
12121debfc3dSmrg "simduid", /* name */
12131debfc3dSmrg OPTGROUP_NONE, /* optinfo_flags */
12141debfc3dSmrg TV_NONE, /* tv_id */
12151debfc3dSmrg ( PROP_ssa | PROP_cfg ), /* properties_required */
12161debfc3dSmrg 0, /* properties_provided */
12171debfc3dSmrg 0, /* properties_destroyed */
12181debfc3dSmrg 0, /* todo_flags_start */
12191debfc3dSmrg 0, /* todo_flags_finish */
12201debfc3dSmrg };
12211debfc3dSmrg
12221debfc3dSmrg class pass_simduid_cleanup : public gimple_opt_pass
12231debfc3dSmrg {
12241debfc3dSmrg public:
pass_simduid_cleanup(gcc::context * ctxt)12251debfc3dSmrg pass_simduid_cleanup (gcc::context *ctxt)
12261debfc3dSmrg : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
12271debfc3dSmrg {}
12281debfc3dSmrg
12291debfc3dSmrg /* opt_pass methods: */
clone()12301debfc3dSmrg opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)12311debfc3dSmrg virtual bool gate (function *fun) { return fun->has_simduid_loops; }
12321debfc3dSmrg virtual unsigned int execute (function *);
12331debfc3dSmrg
12341debfc3dSmrg }; // class pass_simduid_cleanup
12351debfc3dSmrg
12361debfc3dSmrg unsigned int
execute(function * fun)12371debfc3dSmrg pass_simduid_cleanup::execute (function *fun)
12381debfc3dSmrg {
12391debfc3dSmrg hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
12401debfc3dSmrg
12411debfc3dSmrg note_simd_array_uses (&simd_array_to_simduid_htab);
12421debfc3dSmrg
12431debfc3dSmrg /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
12441debfc3dSmrg adjust_simduid_builtins (NULL);
12451debfc3dSmrg
12461debfc3dSmrg /* Shrink any "omp array simd" temporary arrays to the
12471debfc3dSmrg actual vectorization factors. */
12481debfc3dSmrg if (simd_array_to_simduid_htab)
12491debfc3dSmrg shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
12501debfc3dSmrg fun->has_simduid_loops = false;
12511debfc3dSmrg return 0;
12521debfc3dSmrg }
12531debfc3dSmrg
12541debfc3dSmrg } // anon namespace
12551debfc3dSmrg
12561debfc3dSmrg gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)12571debfc3dSmrg make_pass_simduid_cleanup (gcc::context *ctxt)
12581debfc3dSmrg {
12591debfc3dSmrg return new pass_simduid_cleanup (ctxt);
12601debfc3dSmrg }
12611debfc3dSmrg
12621debfc3dSmrg
12631debfc3dSmrg /* Entry point to basic block SLP phase. */
12641debfc3dSmrg
12651debfc3dSmrg namespace {
12661debfc3dSmrg
12671debfc3dSmrg const pass_data pass_data_slp_vectorize =
12681debfc3dSmrg {
12691debfc3dSmrg GIMPLE_PASS, /* type */
12701debfc3dSmrg "slp", /* name */
12711debfc3dSmrg OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
12721debfc3dSmrg TV_TREE_SLP_VECTORIZATION, /* tv_id */
12731debfc3dSmrg ( PROP_ssa | PROP_cfg ), /* properties_required */
12741debfc3dSmrg 0, /* properties_provided */
12751debfc3dSmrg 0, /* properties_destroyed */
12761debfc3dSmrg 0, /* todo_flags_start */
12771debfc3dSmrg TODO_update_ssa, /* todo_flags_finish */
12781debfc3dSmrg };
12791debfc3dSmrg
12801debfc3dSmrg class pass_slp_vectorize : public gimple_opt_pass
12811debfc3dSmrg {
12821debfc3dSmrg public:
pass_slp_vectorize(gcc::context * ctxt)12831debfc3dSmrg pass_slp_vectorize (gcc::context *ctxt)
12841debfc3dSmrg : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
12851debfc3dSmrg {}
12861debfc3dSmrg
12871debfc3dSmrg /* opt_pass methods: */
clone()12881debfc3dSmrg opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)12891debfc3dSmrg virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
12901debfc3dSmrg virtual unsigned int execute (function *);
12911debfc3dSmrg
12921debfc3dSmrg }; // class pass_slp_vectorize
12931debfc3dSmrg
12941debfc3dSmrg unsigned int
execute(function * fun)12951debfc3dSmrg pass_slp_vectorize::execute (function *fun)
12961debfc3dSmrg {
1297c0a68be4Smrg auto_purge_vect_location sentinel;
12981debfc3dSmrg basic_block bb;
12991debfc3dSmrg
13001debfc3dSmrg bool in_loop_pipeline = scev_initialized_p ();
13011debfc3dSmrg if (!in_loop_pipeline)
13021debfc3dSmrg {
13031debfc3dSmrg loop_optimizer_init (LOOPS_NORMAL);
13041debfc3dSmrg scev_initialize ();
13051debfc3dSmrg }
13061debfc3dSmrg
13071debfc3dSmrg /* Mark all stmts as not belonging to the current region and unvisited. */
13081debfc3dSmrg FOR_EACH_BB_FN (bb, fun)
13091debfc3dSmrg {
13101debfc3dSmrg for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
13111debfc3dSmrg gsi_next (&gsi))
13121debfc3dSmrg {
13131debfc3dSmrg gimple *stmt = gsi_stmt (gsi);
13141debfc3dSmrg gimple_set_uid (stmt, -1);
13151debfc3dSmrg gimple_set_visited (stmt, false);
13161debfc3dSmrg }
13171debfc3dSmrg }
13181debfc3dSmrg
13191debfc3dSmrg FOR_EACH_BB_FN (bb, fun)
13201debfc3dSmrg {
13211debfc3dSmrg if (vect_slp_bb (bb))
1322c0a68be4Smrg if (dump_enabled_p ())
1323c0a68be4Smrg dump_printf_loc (MSG_NOTE, vect_location, "basic block vectorized\n");
13241debfc3dSmrg }
13251debfc3dSmrg
13261debfc3dSmrg if (!in_loop_pipeline)
13271debfc3dSmrg {
13281debfc3dSmrg scev_finalize ();
13291debfc3dSmrg loop_optimizer_finalize ();
13301debfc3dSmrg }
13311debfc3dSmrg
13321debfc3dSmrg return 0;
13331debfc3dSmrg }
13341debfc3dSmrg
13351debfc3dSmrg } // anon namespace
13361debfc3dSmrg
13371debfc3dSmrg gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)13381debfc3dSmrg make_pass_slp_vectorize (gcc::context *ctxt)
13391debfc3dSmrg {
13401debfc3dSmrg return new pass_slp_vectorize (ctxt);
13411debfc3dSmrg }
13421debfc3dSmrg
13431debfc3dSmrg
13441debfc3dSmrg /* Increase alignment of global arrays to improve vectorization potential.
13451debfc3dSmrg TODO:
13461debfc3dSmrg - Consider also structs that have an array field.
13471debfc3dSmrg - Use ipa analysis to prune arrays that can't be vectorized?
13481debfc3dSmrg This should involve global alignment analysis and in the future also
13491debfc3dSmrg array padding. */
13501debfc3dSmrg
13511debfc3dSmrg static unsigned get_vec_alignment_for_type (tree);
13521debfc3dSmrg static hash_map<tree, unsigned> *type_align_map;
13531debfc3dSmrg
13541debfc3dSmrg /* Return alignment of array's vector type corresponding to scalar type.
13551debfc3dSmrg 0 if no vector type exists. */
13561debfc3dSmrg static unsigned
get_vec_alignment_for_array_type(tree type)13571debfc3dSmrg get_vec_alignment_for_array_type (tree type)
13581debfc3dSmrg {
13591debfc3dSmrg gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1360a2dc1f3fSmrg poly_uint64 array_size, vector_size;
13611debfc3dSmrg
1362*8feb0f0bSmrg tree scalar_type = strip_array_types (type);
1363*8feb0f0bSmrg tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
13641debfc3dSmrg if (!vectype
1365a2dc1f3fSmrg || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1366a2dc1f3fSmrg || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1367a2dc1f3fSmrg || maybe_lt (array_size, vector_size))
13681debfc3dSmrg return 0;
13691debfc3dSmrg
13701debfc3dSmrg return TYPE_ALIGN (vectype);
13711debfc3dSmrg }
13721debfc3dSmrg
13731debfc3dSmrg /* Return alignment of field having maximum alignment of vector type
13741debfc3dSmrg corresponding to it's scalar type. For now, we only consider fields whose
13751debfc3dSmrg offset is a multiple of it's vector alignment.
13761debfc3dSmrg 0 if no suitable field is found. */
13771debfc3dSmrg static unsigned
get_vec_alignment_for_record_type(tree type)13781debfc3dSmrg get_vec_alignment_for_record_type (tree type)
13791debfc3dSmrg {
13801debfc3dSmrg gcc_assert (TREE_CODE (type) == RECORD_TYPE);
13811debfc3dSmrg
13821debfc3dSmrg unsigned max_align = 0, alignment;
13831debfc3dSmrg HOST_WIDE_INT offset;
13841debfc3dSmrg tree offset_tree;
13851debfc3dSmrg
13861debfc3dSmrg if (TYPE_PACKED (type))
13871debfc3dSmrg return 0;
13881debfc3dSmrg
13891debfc3dSmrg unsigned *slot = type_align_map->get (type);
13901debfc3dSmrg if (slot)
13911debfc3dSmrg return *slot;
13921debfc3dSmrg
13931debfc3dSmrg for (tree field = first_field (type);
13941debfc3dSmrg field != NULL_TREE;
13951debfc3dSmrg field = DECL_CHAIN (field))
13961debfc3dSmrg {
13971debfc3dSmrg /* Skip if not FIELD_DECL or if alignment is set by user. */
13981debfc3dSmrg if (TREE_CODE (field) != FIELD_DECL
13991debfc3dSmrg || DECL_USER_ALIGN (field)
14001debfc3dSmrg || DECL_ARTIFICIAL (field))
14011debfc3dSmrg continue;
14021debfc3dSmrg
14031debfc3dSmrg /* We don't need to process the type further if offset is variable,
14041debfc3dSmrg since the offsets of remaining members will also be variable. */
14051debfc3dSmrg if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
14061debfc3dSmrg || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
14071debfc3dSmrg break;
14081debfc3dSmrg
14091debfc3dSmrg /* Similarly stop processing the type if offset_tree
14101debfc3dSmrg does not fit in unsigned HOST_WIDE_INT. */
14111debfc3dSmrg offset_tree = bit_position (field);
14121debfc3dSmrg if (!tree_fits_uhwi_p (offset_tree))
14131debfc3dSmrg break;
14141debfc3dSmrg
14151debfc3dSmrg offset = tree_to_uhwi (offset_tree);
14161debfc3dSmrg alignment = get_vec_alignment_for_type (TREE_TYPE (field));
14171debfc3dSmrg
14181debfc3dSmrg /* Get maximum alignment of vectorized field/array among those members
14191debfc3dSmrg whose offset is multiple of the vector alignment. */
14201debfc3dSmrg if (alignment
14211debfc3dSmrg && (offset % alignment == 0)
14221debfc3dSmrg && (alignment > max_align))
14231debfc3dSmrg max_align = alignment;
14241debfc3dSmrg }
14251debfc3dSmrg
14261debfc3dSmrg type_align_map->put (type, max_align);
14271debfc3dSmrg return max_align;
14281debfc3dSmrg }
14291debfc3dSmrg
14301debfc3dSmrg /* Return alignment of vector type corresponding to decl's scalar type
14311debfc3dSmrg or 0 if it doesn't exist or the vector alignment is lesser than
14321debfc3dSmrg decl's alignment. */
14331debfc3dSmrg static unsigned
get_vec_alignment_for_type(tree type)14341debfc3dSmrg get_vec_alignment_for_type (tree type)
14351debfc3dSmrg {
14361debfc3dSmrg if (type == NULL_TREE)
14371debfc3dSmrg return 0;
14381debfc3dSmrg
14391debfc3dSmrg gcc_assert (TYPE_P (type));
14401debfc3dSmrg
14411debfc3dSmrg static unsigned alignment = 0;
14421debfc3dSmrg switch (TREE_CODE (type))
14431debfc3dSmrg {
14441debfc3dSmrg case ARRAY_TYPE:
14451debfc3dSmrg alignment = get_vec_alignment_for_array_type (type);
14461debfc3dSmrg break;
14471debfc3dSmrg case RECORD_TYPE:
14481debfc3dSmrg alignment = get_vec_alignment_for_record_type (type);
14491debfc3dSmrg break;
14501debfc3dSmrg default:
14511debfc3dSmrg alignment = 0;
14521debfc3dSmrg break;
14531debfc3dSmrg }
14541debfc3dSmrg
14551debfc3dSmrg return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
14561debfc3dSmrg }
14571debfc3dSmrg
14581debfc3dSmrg /* Entry point to increase_alignment pass. */
14591debfc3dSmrg static unsigned int
increase_alignment(void)14601debfc3dSmrg increase_alignment (void)
14611debfc3dSmrg {
14621debfc3dSmrg varpool_node *vnode;
14631debfc3dSmrg
1464c0a68be4Smrg vect_location = dump_user_location_t ();
14651debfc3dSmrg type_align_map = new hash_map<tree, unsigned>;
14661debfc3dSmrg
14671debfc3dSmrg /* Increase the alignment of all global arrays for vectorization. */
14681debfc3dSmrg FOR_EACH_DEFINED_VARIABLE (vnode)
14691debfc3dSmrg {
14701debfc3dSmrg tree decl = vnode->decl;
14711debfc3dSmrg unsigned int alignment;
14721debfc3dSmrg
14731debfc3dSmrg if ((decl_in_symtab_p (decl)
14741debfc3dSmrg && !symtab_node::get (decl)->can_increase_alignment_p ())
14751debfc3dSmrg || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
14761debfc3dSmrg continue;
14771debfc3dSmrg
14781debfc3dSmrg alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
14791debfc3dSmrg if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
14801debfc3dSmrg {
14811debfc3dSmrg vnode->increase_alignment (alignment);
1482c0a68be4Smrg if (dump_enabled_p ())
1483c0a68be4Smrg dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
14841debfc3dSmrg }
14851debfc3dSmrg }
14861debfc3dSmrg
14871debfc3dSmrg delete type_align_map;
14881debfc3dSmrg return 0;
14891debfc3dSmrg }
14901debfc3dSmrg
14911debfc3dSmrg
14921debfc3dSmrg namespace {
14931debfc3dSmrg
14941debfc3dSmrg const pass_data pass_data_ipa_increase_alignment =
14951debfc3dSmrg {
14961debfc3dSmrg SIMPLE_IPA_PASS, /* type */
14971debfc3dSmrg "increase_alignment", /* name */
14981debfc3dSmrg OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
14991debfc3dSmrg TV_IPA_OPT, /* tv_id */
15001debfc3dSmrg 0, /* properties_required */
15011debfc3dSmrg 0, /* properties_provided */
15021debfc3dSmrg 0, /* properties_destroyed */
15031debfc3dSmrg 0, /* todo_flags_start */
15041debfc3dSmrg 0, /* todo_flags_finish */
15051debfc3dSmrg };
15061debfc3dSmrg
15071debfc3dSmrg class pass_ipa_increase_alignment : public simple_ipa_opt_pass
15081debfc3dSmrg {
15091debfc3dSmrg public:
pass_ipa_increase_alignment(gcc::context * ctxt)15101debfc3dSmrg pass_ipa_increase_alignment (gcc::context *ctxt)
15111debfc3dSmrg : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
15121debfc3dSmrg {}
15131debfc3dSmrg
15141debfc3dSmrg /* opt_pass methods: */
gate(function *)15151debfc3dSmrg virtual bool gate (function *)
15161debfc3dSmrg {
15171debfc3dSmrg return flag_section_anchors && flag_tree_loop_vectorize;
15181debfc3dSmrg }
15191debfc3dSmrg
execute(function *)15201debfc3dSmrg virtual unsigned int execute (function *) { return increase_alignment (); }
15211debfc3dSmrg
15221debfc3dSmrg }; // class pass_ipa_increase_alignment
15231debfc3dSmrg
15241debfc3dSmrg } // anon namespace
15251debfc3dSmrg
15261debfc3dSmrg simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)15271debfc3dSmrg make_pass_ipa_increase_alignment (gcc::context *ctxt)
15281debfc3dSmrg {
15291debfc3dSmrg return new pass_ipa_increase_alignment (ctxt);
15301debfc3dSmrg }
1531*8feb0f0bSmrg
1532*8feb0f0bSmrg /* If the condition represented by T is a comparison or the SSA name
1533*8feb0f0bSmrg result of a comparison, extract the comparison's operands. Represent
1534*8feb0f0bSmrg T as NE_EXPR <T, 0> otherwise. */
1535*8feb0f0bSmrg
1536*8feb0f0bSmrg void
get_cond_ops_from_tree(tree t)1537*8feb0f0bSmrg scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1538*8feb0f0bSmrg {
1539*8feb0f0bSmrg if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1540*8feb0f0bSmrg {
1541*8feb0f0bSmrg this->code = TREE_CODE (t);
1542*8feb0f0bSmrg this->op0 = TREE_OPERAND (t, 0);
1543*8feb0f0bSmrg this->op1 = TREE_OPERAND (t, 1);
1544*8feb0f0bSmrg return;
1545*8feb0f0bSmrg }
1546*8feb0f0bSmrg
1547*8feb0f0bSmrg if (TREE_CODE (t) == SSA_NAME)
1548*8feb0f0bSmrg if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
1549*8feb0f0bSmrg {
1550*8feb0f0bSmrg tree_code code = gimple_assign_rhs_code (stmt);
1551*8feb0f0bSmrg if (TREE_CODE_CLASS (code) == tcc_comparison)
1552*8feb0f0bSmrg {
1553*8feb0f0bSmrg this->code = code;
1554*8feb0f0bSmrg this->op0 = gimple_assign_rhs1 (stmt);
1555*8feb0f0bSmrg this->op1 = gimple_assign_rhs2 (stmt);
1556*8feb0f0bSmrg return;
1557*8feb0f0bSmrg }
1558*8feb0f0bSmrg }
1559*8feb0f0bSmrg
1560*8feb0f0bSmrg this->code = NE_EXPR;
1561*8feb0f0bSmrg this->op0 = t;
1562*8feb0f0bSmrg this->op1 = build_zero_cst (TREE_TYPE (t));
1563*8feb0f0bSmrg }
1564