xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vectorizer.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
11debfc3dSmrg /* Vectorizer
2*8feb0f0bSmrg    Copyright (C) 2003-2020 Free Software Foundation, Inc.
31debfc3dSmrg    Contributed by Dorit Naishlos <dorit@il.ibm.com>
41debfc3dSmrg 
51debfc3dSmrg This file is part of GCC.
61debfc3dSmrg 
71debfc3dSmrg GCC is free software; you can redistribute it and/or modify it under
81debfc3dSmrg the terms of the GNU General Public License as published by the Free
91debfc3dSmrg Software Foundation; either version 3, or (at your option) any later
101debfc3dSmrg version.
111debfc3dSmrg 
121debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
141debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
151debfc3dSmrg for more details.
161debfc3dSmrg 
171debfc3dSmrg You should have received a copy of the GNU General Public License
181debfc3dSmrg along with GCC; see the file COPYING3.  If not see
191debfc3dSmrg <http://www.gnu.org/licenses/>.  */
201debfc3dSmrg 
211debfc3dSmrg /* Loop and basic block vectorizer.
221debfc3dSmrg 
231debfc3dSmrg   This file contains drivers for the three vectorizers:
241debfc3dSmrg   (1) loop vectorizer (inter-iteration parallelism),
251debfc3dSmrg   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
261debfc3dSmrg       vectorizer)
271debfc3dSmrg   (3) BB vectorizer (out-of-loops), aka SLP
281debfc3dSmrg 
291debfc3dSmrg   The rest of the vectorizer's code is organized as follows:
301debfc3dSmrg   - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
311debfc3dSmrg     used by drivers (1) and (2).
321debfc3dSmrg   - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
331debfc3dSmrg     drivers (1) and (2).
341debfc3dSmrg   - tree-vect-slp.c - BB vectorization specific analysis and transformation,
351debfc3dSmrg     used by drivers (2) and (3).
361debfc3dSmrg   - tree-vect-stmts.c - statements analysis and transformation (used by all).
371debfc3dSmrg   - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
381debfc3dSmrg     manipulations (used by all).
391debfc3dSmrg   - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
401debfc3dSmrg 
411debfc3dSmrg   Here's a poor attempt at illustrating that:
421debfc3dSmrg 
431debfc3dSmrg      tree-vectorizer.c:
441debfc3dSmrg      loop_vect()  loop_aware_slp()  slp_vect()
451debfc3dSmrg           |        /           \          /
461debfc3dSmrg           |       /             \        /
471debfc3dSmrg           tree-vect-loop.c  tree-vect-slp.c
481debfc3dSmrg                 | \      \  /      /   |
491debfc3dSmrg                 |  \      \/      /    |
501debfc3dSmrg                 |   \     /\     /     |
511debfc3dSmrg                 |    \   /  \   /      |
521debfc3dSmrg          tree-vect-stmts.c  tree-vect-data-refs.c
531debfc3dSmrg                        \      /
541debfc3dSmrg                     tree-vect-patterns.c
551debfc3dSmrg */
561debfc3dSmrg 
571debfc3dSmrg #include "config.h"
581debfc3dSmrg #include "system.h"
591debfc3dSmrg #include "coretypes.h"
601debfc3dSmrg #include "backend.h"
611debfc3dSmrg #include "tree.h"
621debfc3dSmrg #include "gimple.h"
631debfc3dSmrg #include "predict.h"
641debfc3dSmrg #include "tree-pass.h"
651debfc3dSmrg #include "ssa.h"
661debfc3dSmrg #include "cgraph.h"
671debfc3dSmrg #include "fold-const.h"
681debfc3dSmrg #include "stor-layout.h"
691debfc3dSmrg #include "gimple-iterator.h"
701debfc3dSmrg #include "gimple-walk.h"
711debfc3dSmrg #include "tree-ssa-loop-manip.h"
721debfc3dSmrg #include "tree-ssa-loop-niter.h"
731debfc3dSmrg #include "tree-cfg.h"
741debfc3dSmrg #include "cfgloop.h"
751debfc3dSmrg #include "tree-vectorizer.h"
761debfc3dSmrg #include "tree-ssa-propagate.h"
771debfc3dSmrg #include "dbgcnt.h"
781debfc3dSmrg #include "tree-scalar-evolution.h"
79a2dc1f3fSmrg #include "stringpool.h"
80a2dc1f3fSmrg #include "attribs.h"
81c0a68be4Smrg #include "gimple-pretty-print.h"
82c0a68be4Smrg #include "opt-problem.h"
83c0a68be4Smrg #include "internal-fn.h"
841debfc3dSmrg 
851debfc3dSmrg 
86c0a68be4Smrg /* Loop or bb location, with hotness information.  */
87c0a68be4Smrg dump_user_location_t vect_location;
881debfc3dSmrg 
89c0a68be4Smrg /* auto_purge_vect_location's dtor: reset the vect_location
90c0a68be4Smrg    global, to avoid stale location_t values that could reference
91c0a68be4Smrg    GC-ed blocks.  */
92c0a68be4Smrg 
~auto_purge_vect_location()93c0a68be4Smrg auto_purge_vect_location::~auto_purge_vect_location ()
94c0a68be4Smrg {
95c0a68be4Smrg   vect_location = dump_user_location_t ();
96c0a68be4Smrg }
97c0a68be4Smrg 
98c0a68be4Smrg /* Dump a cost entry according to args to F.  */
99c0a68be4Smrg 
100c0a68be4Smrg void
dump_stmt_cost(FILE * f,void * data,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,unsigned cost,enum vect_cost_model_location where)101c0a68be4Smrg dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind,
102c0a68be4Smrg 		stmt_vec_info stmt_info, int misalign, unsigned cost,
103c0a68be4Smrg 		enum vect_cost_model_location where)
104c0a68be4Smrg {
105c0a68be4Smrg   fprintf (f, "%p ", data);
106c0a68be4Smrg   if (stmt_info)
107c0a68be4Smrg     {
108c0a68be4Smrg       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
109c0a68be4Smrg       fprintf (f, " ");
110c0a68be4Smrg     }
111c0a68be4Smrg   else
112c0a68be4Smrg     fprintf (f, "<unknown> ");
113c0a68be4Smrg   fprintf (f, "%d times ", count);
114c0a68be4Smrg   const char *ks = "unknown";
115c0a68be4Smrg   switch (kind)
116c0a68be4Smrg     {
117c0a68be4Smrg     case scalar_stmt:
118c0a68be4Smrg       ks = "scalar_stmt";
119c0a68be4Smrg       break;
120c0a68be4Smrg     case scalar_load:
121c0a68be4Smrg       ks = "scalar_load";
122c0a68be4Smrg       break;
123c0a68be4Smrg     case scalar_store:
124c0a68be4Smrg       ks = "scalar_store";
125c0a68be4Smrg       break;
126c0a68be4Smrg     case vector_stmt:
127c0a68be4Smrg       ks = "vector_stmt";
128c0a68be4Smrg       break;
129c0a68be4Smrg     case vector_load:
130c0a68be4Smrg       ks = "vector_load";
131c0a68be4Smrg       break;
132c0a68be4Smrg     case vector_gather_load:
133c0a68be4Smrg       ks = "vector_gather_load";
134c0a68be4Smrg       break;
135c0a68be4Smrg     case unaligned_load:
136c0a68be4Smrg       ks = "unaligned_load";
137c0a68be4Smrg       break;
138c0a68be4Smrg     case unaligned_store:
139c0a68be4Smrg       ks = "unaligned_store";
140c0a68be4Smrg       break;
141c0a68be4Smrg     case vector_store:
142c0a68be4Smrg       ks = "vector_store";
143c0a68be4Smrg       break;
144c0a68be4Smrg     case vector_scatter_store:
145c0a68be4Smrg       ks = "vector_scatter_store";
146c0a68be4Smrg       break;
147c0a68be4Smrg     case vec_to_scalar:
148c0a68be4Smrg       ks = "vec_to_scalar";
149c0a68be4Smrg       break;
150c0a68be4Smrg     case scalar_to_vec:
151c0a68be4Smrg       ks = "scalar_to_vec";
152c0a68be4Smrg       break;
153c0a68be4Smrg     case cond_branch_not_taken:
154c0a68be4Smrg       ks = "cond_branch_not_taken";
155c0a68be4Smrg       break;
156c0a68be4Smrg     case cond_branch_taken:
157c0a68be4Smrg       ks = "cond_branch_taken";
158c0a68be4Smrg       break;
159c0a68be4Smrg     case vec_perm:
160c0a68be4Smrg       ks = "vec_perm";
161c0a68be4Smrg       break;
162c0a68be4Smrg     case vec_promote_demote:
163c0a68be4Smrg       ks = "vec_promote_demote";
164c0a68be4Smrg       break;
165c0a68be4Smrg     case vec_construct:
166c0a68be4Smrg       ks = "vec_construct";
167c0a68be4Smrg       break;
168c0a68be4Smrg     }
169c0a68be4Smrg   fprintf (f, "%s ", ks);
170c0a68be4Smrg   if (kind == unaligned_load || kind == unaligned_store)
171c0a68be4Smrg     fprintf (f, "(misalign %d) ", misalign);
172c0a68be4Smrg   fprintf (f, "costs %u ", cost);
173c0a68be4Smrg   const char *ws = "unknown";
174c0a68be4Smrg   switch (where)
175c0a68be4Smrg     {
176c0a68be4Smrg     case vect_prologue:
177c0a68be4Smrg       ws = "prologue";
178c0a68be4Smrg       break;
179c0a68be4Smrg     case vect_body:
180c0a68be4Smrg       ws = "body";
181c0a68be4Smrg       break;
182c0a68be4Smrg     case vect_epilogue:
183c0a68be4Smrg       ws = "epilogue";
184c0a68be4Smrg       break;
185c0a68be4Smrg     }
186c0a68be4Smrg   fprintf (f, "in %s\n", ws);
187c0a68be4Smrg }
1881debfc3dSmrg 
1891debfc3dSmrg /* For mapping simduid to vectorization factor.  */
1901debfc3dSmrg 
191*8feb0f0bSmrg class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
1921debfc3dSmrg {
193*8feb0f0bSmrg public:
1941debfc3dSmrg   unsigned int simduid;
195a2dc1f3fSmrg   poly_uint64 vf;
1961debfc3dSmrg 
1971debfc3dSmrg   /* hash_table support.  */
1981debfc3dSmrg   static inline hashval_t hash (const simduid_to_vf *);
1991debfc3dSmrg   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
2001debfc3dSmrg };
2011debfc3dSmrg 
2021debfc3dSmrg inline hashval_t
hash(const simduid_to_vf * p)2031debfc3dSmrg simduid_to_vf::hash (const simduid_to_vf *p)
2041debfc3dSmrg {
2051debfc3dSmrg   return p->simduid;
2061debfc3dSmrg }
2071debfc3dSmrg 
2081debfc3dSmrg inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)2091debfc3dSmrg simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
2101debfc3dSmrg {
2111debfc3dSmrg   return p1->simduid == p2->simduid;
2121debfc3dSmrg }
2131debfc3dSmrg 
2141debfc3dSmrg /* This hash maps the OMP simd array to the corresponding simduid used
2151debfc3dSmrg    to index into it.  Like thus,
2161debfc3dSmrg 
2171debfc3dSmrg         _7 = GOMP_SIMD_LANE (simduid.0)
2181debfc3dSmrg         ...
2191debfc3dSmrg         ...
2201debfc3dSmrg         D.1737[_7] = stuff;
2211debfc3dSmrg 
2221debfc3dSmrg 
2231debfc3dSmrg    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
2241debfc3dSmrg    simduid.0.  */
2251debfc3dSmrg 
2261debfc3dSmrg struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
2271debfc3dSmrg {
2281debfc3dSmrg   tree decl;
2291debfc3dSmrg   unsigned int simduid;
2301debfc3dSmrg 
2311debfc3dSmrg   /* hash_table support.  */
2321debfc3dSmrg   static inline hashval_t hash (const simd_array_to_simduid *);
2331debfc3dSmrg   static inline int equal (const simd_array_to_simduid *,
2341debfc3dSmrg 			   const simd_array_to_simduid *);
2351debfc3dSmrg };
2361debfc3dSmrg 
2371debfc3dSmrg inline hashval_t
hash(const simd_array_to_simduid * p)2381debfc3dSmrg simd_array_to_simduid::hash (const simd_array_to_simduid *p)
2391debfc3dSmrg {
2401debfc3dSmrg   return DECL_UID (p->decl);
2411debfc3dSmrg }
2421debfc3dSmrg 
2431debfc3dSmrg inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)2441debfc3dSmrg simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
2451debfc3dSmrg 			      const simd_array_to_simduid *p2)
2461debfc3dSmrg {
2471debfc3dSmrg   return p1->decl == p2->decl;
2481debfc3dSmrg }
2491debfc3dSmrg 
2501debfc3dSmrg /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
2511debfc3dSmrg    into their corresponding constants and remove
2521debfc3dSmrg    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
2531debfc3dSmrg 
2541debfc3dSmrg static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab)2551debfc3dSmrg adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
2561debfc3dSmrg {
2571debfc3dSmrg   basic_block bb;
2581debfc3dSmrg 
2591debfc3dSmrg   FOR_EACH_BB_FN (bb, cfun)
2601debfc3dSmrg     {
2611debfc3dSmrg       gimple_stmt_iterator i;
2621debfc3dSmrg 
2631debfc3dSmrg       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
2641debfc3dSmrg 	{
265a2dc1f3fSmrg 	  poly_uint64 vf = 1;
2661debfc3dSmrg 	  enum internal_fn ifn;
2671debfc3dSmrg 	  gimple *stmt = gsi_stmt (i);
2681debfc3dSmrg 	  tree t;
2691debfc3dSmrg 	  if (!is_gimple_call (stmt)
2701debfc3dSmrg 	      || !gimple_call_internal_p (stmt))
2711debfc3dSmrg 	    {
2721debfc3dSmrg 	      gsi_next (&i);
2731debfc3dSmrg 	      continue;
2741debfc3dSmrg 	    }
2751debfc3dSmrg 	  ifn = gimple_call_internal_fn (stmt);
2761debfc3dSmrg 	  switch (ifn)
2771debfc3dSmrg 	    {
2781debfc3dSmrg 	    case IFN_GOMP_SIMD_LANE:
2791debfc3dSmrg 	    case IFN_GOMP_SIMD_VF:
2801debfc3dSmrg 	    case IFN_GOMP_SIMD_LAST_LANE:
2811debfc3dSmrg 	      break;
2821debfc3dSmrg 	    case IFN_GOMP_SIMD_ORDERED_START:
2831debfc3dSmrg 	    case IFN_GOMP_SIMD_ORDERED_END:
2841debfc3dSmrg 	      if (integer_onep (gimple_call_arg (stmt, 0)))
2851debfc3dSmrg 		{
2861debfc3dSmrg 		  enum built_in_function bcode
2871debfc3dSmrg 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
2881debfc3dSmrg 		       ? BUILT_IN_GOMP_ORDERED_START
2891debfc3dSmrg 		       : BUILT_IN_GOMP_ORDERED_END);
2901debfc3dSmrg 		  gimple *g
2911debfc3dSmrg 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
292*8feb0f0bSmrg 		  gimple_move_vops (g, stmt);
2931debfc3dSmrg 		  gsi_replace (&i, g, true);
2941debfc3dSmrg 		  continue;
2951debfc3dSmrg 		}
2961debfc3dSmrg 	      gsi_remove (&i, true);
2971debfc3dSmrg 	      unlink_stmt_vdef (stmt);
2981debfc3dSmrg 	      continue;
2991debfc3dSmrg 	    default:
3001debfc3dSmrg 	      gsi_next (&i);
3011debfc3dSmrg 	      continue;
3021debfc3dSmrg 	    }
3031debfc3dSmrg 	  tree arg = gimple_call_arg (stmt, 0);
3041debfc3dSmrg 	  gcc_assert (arg != NULL_TREE);
3051debfc3dSmrg 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
3061debfc3dSmrg 	  simduid_to_vf *p = NULL, data;
3071debfc3dSmrg 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
3081debfc3dSmrg 	  /* Need to nullify loop safelen field since it's value is not
3091debfc3dSmrg 	     valid after transformation.  */
3101debfc3dSmrg 	  if (bb->loop_father && bb->loop_father->safelen > 0)
3111debfc3dSmrg 	    bb->loop_father->safelen = 0;
3121debfc3dSmrg 	  if (htab)
3131debfc3dSmrg 	    {
3141debfc3dSmrg 	      p = htab->find (&data);
3151debfc3dSmrg 	      if (p)
3161debfc3dSmrg 		vf = p->vf;
3171debfc3dSmrg 	    }
3181debfc3dSmrg 	  switch (ifn)
3191debfc3dSmrg 	    {
3201debfc3dSmrg 	    case IFN_GOMP_SIMD_VF:
3211debfc3dSmrg 	      t = build_int_cst (unsigned_type_node, vf);
3221debfc3dSmrg 	      break;
3231debfc3dSmrg 	    case IFN_GOMP_SIMD_LANE:
3241debfc3dSmrg 	      t = build_int_cst (unsigned_type_node, 0);
3251debfc3dSmrg 	      break;
3261debfc3dSmrg 	    case IFN_GOMP_SIMD_LAST_LANE:
3271debfc3dSmrg 	      t = gimple_call_arg (stmt, 1);
3281debfc3dSmrg 	      break;
3291debfc3dSmrg 	    default:
3301debfc3dSmrg 	      gcc_unreachable ();
3311debfc3dSmrg 	    }
332a2dc1f3fSmrg 	  tree lhs = gimple_call_lhs (stmt);
333a2dc1f3fSmrg 	  if (lhs)
334a2dc1f3fSmrg 	    replace_uses_by (lhs, t);
335a2dc1f3fSmrg 	  release_defs (stmt);
336a2dc1f3fSmrg 	  gsi_remove (&i, true);
3371debfc3dSmrg 	}
3381debfc3dSmrg     }
3391debfc3dSmrg }
3401debfc3dSmrg 
3411debfc3dSmrg /* Helper structure for note_simd_array_uses.  */
3421debfc3dSmrg 
3431debfc3dSmrg struct note_simd_array_uses_struct
3441debfc3dSmrg {
3451debfc3dSmrg   hash_table<simd_array_to_simduid> **htab;
3461debfc3dSmrg   unsigned int simduid;
3471debfc3dSmrg };
3481debfc3dSmrg 
3491debfc3dSmrg /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
3501debfc3dSmrg 
3511debfc3dSmrg static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)3521debfc3dSmrg note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
3531debfc3dSmrg {
3541debfc3dSmrg   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
3551debfc3dSmrg   struct note_simd_array_uses_struct *ns
3561debfc3dSmrg     = (struct note_simd_array_uses_struct *) wi->info;
3571debfc3dSmrg 
3581debfc3dSmrg   if (TYPE_P (*tp))
3591debfc3dSmrg     *walk_subtrees = 0;
3601debfc3dSmrg   else if (VAR_P (*tp)
3611debfc3dSmrg 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
3621debfc3dSmrg 	   && DECL_CONTEXT (*tp) == current_function_decl)
3631debfc3dSmrg     {
3641debfc3dSmrg       simd_array_to_simduid data;
3651debfc3dSmrg       if (!*ns->htab)
3661debfc3dSmrg 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
3671debfc3dSmrg       data.decl = *tp;
3681debfc3dSmrg       data.simduid = ns->simduid;
3691debfc3dSmrg       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
3701debfc3dSmrg       if (*slot == NULL)
3711debfc3dSmrg 	{
3721debfc3dSmrg 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
3731debfc3dSmrg 	  *p = data;
3741debfc3dSmrg 	  *slot = p;
3751debfc3dSmrg 	}
3761debfc3dSmrg       else if ((*slot)->simduid != ns->simduid)
3771debfc3dSmrg 	(*slot)->simduid = -1U;
3781debfc3dSmrg       *walk_subtrees = 0;
3791debfc3dSmrg     }
3801debfc3dSmrg   return NULL_TREE;
3811debfc3dSmrg }
3821debfc3dSmrg 
3831debfc3dSmrg /* Find "omp simd array" temporaries and map them to corresponding
3841debfc3dSmrg    simduid.  */
3851debfc3dSmrg 
3861debfc3dSmrg static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab)3871debfc3dSmrg note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
3881debfc3dSmrg {
3891debfc3dSmrg   basic_block bb;
3901debfc3dSmrg   gimple_stmt_iterator gsi;
3911debfc3dSmrg   struct walk_stmt_info wi;
3921debfc3dSmrg   struct note_simd_array_uses_struct ns;
3931debfc3dSmrg 
3941debfc3dSmrg   memset (&wi, 0, sizeof (wi));
3951debfc3dSmrg   wi.info = &ns;
3961debfc3dSmrg   ns.htab = htab;
3971debfc3dSmrg 
3981debfc3dSmrg   FOR_EACH_BB_FN (bb, cfun)
3991debfc3dSmrg     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4001debfc3dSmrg       {
4011debfc3dSmrg 	gimple *stmt = gsi_stmt (gsi);
4021debfc3dSmrg 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
4031debfc3dSmrg 	  continue;
4041debfc3dSmrg 	switch (gimple_call_internal_fn (stmt))
4051debfc3dSmrg 	  {
4061debfc3dSmrg 	  case IFN_GOMP_SIMD_LANE:
4071debfc3dSmrg 	  case IFN_GOMP_SIMD_VF:
4081debfc3dSmrg 	  case IFN_GOMP_SIMD_LAST_LANE:
4091debfc3dSmrg 	    break;
4101debfc3dSmrg 	  default:
4111debfc3dSmrg 	    continue;
4121debfc3dSmrg 	  }
4131debfc3dSmrg 	tree lhs = gimple_call_lhs (stmt);
4141debfc3dSmrg 	if (lhs == NULL_TREE)
4151debfc3dSmrg 	  continue;
4161debfc3dSmrg 	imm_use_iterator use_iter;
4171debfc3dSmrg 	gimple *use_stmt;
4181debfc3dSmrg 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
4191debfc3dSmrg 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
4201debfc3dSmrg 	  if (!is_gimple_debug (use_stmt))
4211debfc3dSmrg 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
4221debfc3dSmrg       }
4231debfc3dSmrg }
4241debfc3dSmrg 
4251debfc3dSmrg /* Shrink arrays with "omp simd array" attribute to the corresponding
4261debfc3dSmrg    vectorization factor.  */
4271debfc3dSmrg 
4281debfc3dSmrg static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)4291debfc3dSmrg shrink_simd_arrays
4301debfc3dSmrg   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
4311debfc3dSmrg    hash_table<simduid_to_vf> *simduid_to_vf_htab)
4321debfc3dSmrg {
4331debfc3dSmrg   for (hash_table<simd_array_to_simduid>::iterator iter
4341debfc3dSmrg 	 = simd_array_to_simduid_htab->begin ();
4351debfc3dSmrg        iter != simd_array_to_simduid_htab->end (); ++iter)
4361debfc3dSmrg     if ((*iter)->simduid != -1U)
4371debfc3dSmrg       {
4381debfc3dSmrg 	tree decl = (*iter)->decl;
439a2dc1f3fSmrg 	poly_uint64 vf = 1;
4401debfc3dSmrg 	if (simduid_to_vf_htab)
4411debfc3dSmrg 	  {
4421debfc3dSmrg 	    simduid_to_vf *p = NULL, data;
4431debfc3dSmrg 	    data.simduid = (*iter)->simduid;
4441debfc3dSmrg 	    p = simduid_to_vf_htab->find (&data);
4451debfc3dSmrg 	    if (p)
4461debfc3dSmrg 	      vf = p->vf;
4471debfc3dSmrg 	  }
4481debfc3dSmrg 	tree atype
4491debfc3dSmrg 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
4501debfc3dSmrg 	TREE_TYPE (decl) = atype;
4511debfc3dSmrg 	relayout_decl (decl);
4521debfc3dSmrg       }
4531debfc3dSmrg 
4541debfc3dSmrg   delete simd_array_to_simduid_htab;
4551debfc3dSmrg }
4561debfc3dSmrg 
457a2dc1f3fSmrg /* Initialize the vec_info with kind KIND_IN and target cost data
458a2dc1f3fSmrg    TARGET_COST_DATA_IN.  */
4591debfc3dSmrg 
vec_info(vec_info::vec_kind kind_in,void * target_cost_data_in,vec_info_shared * shared_)460c0a68be4Smrg vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in,
461c0a68be4Smrg 		    vec_info_shared *shared_)
462a2dc1f3fSmrg   : kind (kind_in),
463c0a68be4Smrg     shared (shared_),
464a2dc1f3fSmrg     target_cost_data (target_cost_data_in)
4651debfc3dSmrg {
466c0a68be4Smrg   stmt_vec_infos.create (50);
467a2dc1f3fSmrg }
468a2dc1f3fSmrg 
~vec_info()469a2dc1f3fSmrg vec_info::~vec_info ()
470a2dc1f3fSmrg {
471a2dc1f3fSmrg   slp_instance instance;
4721debfc3dSmrg   unsigned int i;
4731debfc3dSmrg 
474c0a68be4Smrg   FOR_EACH_VEC_ELT (slp_instances, i, instance)
475c0a68be4Smrg     vect_free_slp_instance (instance, true);
476c0a68be4Smrg 
477c0a68be4Smrg   destroy_cost_data (target_cost_data);
478c0a68be4Smrg   free_stmt_vec_infos ();
4791debfc3dSmrg }
4801debfc3dSmrg 
vec_info_shared()481c0a68be4Smrg vec_info_shared::vec_info_shared ()
482c0a68be4Smrg   : datarefs (vNULL),
483c0a68be4Smrg     datarefs_copy (vNULL),
484c0a68be4Smrg     ddrs (vNULL)
485c0a68be4Smrg {
486c0a68be4Smrg }
487a2dc1f3fSmrg 
~vec_info_shared()488c0a68be4Smrg vec_info_shared::~vec_info_shared ()
489c0a68be4Smrg {
490a2dc1f3fSmrg   free_data_refs (datarefs);
491a2dc1f3fSmrg   free_dependence_relations (ddrs);
492c0a68be4Smrg   datarefs_copy.release ();
493c0a68be4Smrg }
494c0a68be4Smrg 
495c0a68be4Smrg void
save_datarefs()496c0a68be4Smrg vec_info_shared::save_datarefs ()
497c0a68be4Smrg {
498c0a68be4Smrg   if (!flag_checking)
499c0a68be4Smrg     return;
500c0a68be4Smrg   datarefs_copy.reserve_exact (datarefs.length ());
501c0a68be4Smrg   for (unsigned i = 0; i < datarefs.length (); ++i)
502c0a68be4Smrg     datarefs_copy.quick_push (*datarefs[i]);
503c0a68be4Smrg }
504c0a68be4Smrg 
505c0a68be4Smrg void
check_datarefs()506c0a68be4Smrg vec_info_shared::check_datarefs ()
507c0a68be4Smrg {
508c0a68be4Smrg   if (!flag_checking)
509c0a68be4Smrg     return;
510c0a68be4Smrg   gcc_assert (datarefs.length () == datarefs_copy.length ());
511c0a68be4Smrg   for (unsigned i = 0; i < datarefs.length (); ++i)
512c0a68be4Smrg     if (memcmp (&datarefs_copy[i], datarefs[i], sizeof (data_reference)) != 0)
513c0a68be4Smrg       gcc_unreachable ();
514c0a68be4Smrg }
515c0a68be4Smrg 
516c0a68be4Smrg /* Record that STMT belongs to the vectorizable region.  Create and return
517c0a68be4Smrg    an associated stmt_vec_info.  */
518c0a68be4Smrg 
519c0a68be4Smrg stmt_vec_info
add_stmt(gimple * stmt)520c0a68be4Smrg vec_info::add_stmt (gimple *stmt)
521c0a68be4Smrg {
522c0a68be4Smrg   stmt_vec_info res = new_stmt_vec_info (stmt);
523c0a68be4Smrg   set_vinfo_for_stmt (stmt, res);
524c0a68be4Smrg   return res;
525c0a68be4Smrg }
526c0a68be4Smrg 
527c0a68be4Smrg /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
528c0a68be4Smrg    return null.  It is safe to call this function on any statement, even if
529c0a68be4Smrg    it might not be part of the vectorizable region.  */
530c0a68be4Smrg 
531c0a68be4Smrg stmt_vec_info
lookup_stmt(gimple * stmt)532c0a68be4Smrg vec_info::lookup_stmt (gimple *stmt)
533c0a68be4Smrg {
534c0a68be4Smrg   unsigned int uid = gimple_uid (stmt);
535c0a68be4Smrg   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
536c0a68be4Smrg     {
537c0a68be4Smrg       stmt_vec_info res = stmt_vec_infos[uid - 1];
538c0a68be4Smrg       if (res && res->stmt == stmt)
539c0a68be4Smrg 	return res;
540c0a68be4Smrg     }
541c0a68be4Smrg   return NULL;
542c0a68be4Smrg }
543c0a68be4Smrg 
544c0a68be4Smrg /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
545c0a68be4Smrg    return that stmt_vec_info, otherwise return null.  It is safe to call
546c0a68be4Smrg    this on arbitrary operands.  */
547c0a68be4Smrg 
548c0a68be4Smrg stmt_vec_info
lookup_def(tree name)549c0a68be4Smrg vec_info::lookup_def (tree name)
550c0a68be4Smrg {
551c0a68be4Smrg   if (TREE_CODE (name) == SSA_NAME
552c0a68be4Smrg       && !SSA_NAME_IS_DEFAULT_DEF (name))
553c0a68be4Smrg     return lookup_stmt (SSA_NAME_DEF_STMT (name));
554c0a68be4Smrg   return NULL;
555c0a68be4Smrg }
556c0a68be4Smrg 
557c0a68be4Smrg /* See whether there is a single non-debug statement that uses LHS and
558c0a68be4Smrg    whether that statement has an associated stmt_vec_info.  Return the
559c0a68be4Smrg    stmt_vec_info if so, otherwise return null.  */
560c0a68be4Smrg 
561c0a68be4Smrg stmt_vec_info
lookup_single_use(tree lhs)562c0a68be4Smrg vec_info::lookup_single_use (tree lhs)
563c0a68be4Smrg {
564c0a68be4Smrg   use_operand_p dummy;
565c0a68be4Smrg   gimple *use_stmt;
566c0a68be4Smrg   if (single_imm_use (lhs, &dummy, &use_stmt))
567c0a68be4Smrg     return lookup_stmt (use_stmt);
568c0a68be4Smrg   return NULL;
569c0a68be4Smrg }
570c0a68be4Smrg 
571c0a68be4Smrg /* Return vectorization information about DR.  */
572c0a68be4Smrg 
573c0a68be4Smrg dr_vec_info *
lookup_dr(data_reference * dr)574c0a68be4Smrg vec_info::lookup_dr (data_reference *dr)
575c0a68be4Smrg {
576c0a68be4Smrg   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
577c0a68be4Smrg   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
578c0a68be4Smrg   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
579c0a68be4Smrg   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
580c0a68be4Smrg }
581c0a68be4Smrg 
582c0a68be4Smrg /* Record that NEW_STMT_INFO now implements the same data reference
583c0a68be4Smrg    as OLD_STMT_INFO.  */
584c0a68be4Smrg 
585c0a68be4Smrg void
move_dr(stmt_vec_info new_stmt_info,stmt_vec_info old_stmt_info)586c0a68be4Smrg vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
587c0a68be4Smrg {
588c0a68be4Smrg   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
589c0a68be4Smrg   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
590c0a68be4Smrg   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
591c0a68be4Smrg   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
592c0a68be4Smrg     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
593c0a68be4Smrg   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
594c0a68be4Smrg     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
595c0a68be4Smrg }
596c0a68be4Smrg 
597c0a68be4Smrg /* Permanently remove the statement described by STMT_INFO from the
598c0a68be4Smrg    function.  */
599c0a68be4Smrg 
600c0a68be4Smrg void
remove_stmt(stmt_vec_info stmt_info)601c0a68be4Smrg vec_info::remove_stmt (stmt_vec_info stmt_info)
602c0a68be4Smrg {
603c0a68be4Smrg   gcc_assert (!stmt_info->pattern_stmt_p);
604c0a68be4Smrg   set_vinfo_for_stmt (stmt_info->stmt, NULL);
605c0a68be4Smrg   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
606c0a68be4Smrg   unlink_stmt_vdef (stmt_info->stmt);
607c0a68be4Smrg   gsi_remove (&si, true);
608c0a68be4Smrg   release_defs (stmt_info->stmt);
609c0a68be4Smrg   free_stmt_vec_info (stmt_info);
610c0a68be4Smrg }
611c0a68be4Smrg 
612c0a68be4Smrg /* Replace the statement at GSI by NEW_STMT, both the vectorization
613c0a68be4Smrg    information and the function itself.  STMT_INFO describes the statement
614c0a68be4Smrg    at GSI.  */
615c0a68be4Smrg 
616c0a68be4Smrg void
replace_stmt(gimple_stmt_iterator * gsi,stmt_vec_info stmt_info,gimple * new_stmt)617c0a68be4Smrg vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
618c0a68be4Smrg 			gimple *new_stmt)
619c0a68be4Smrg {
620c0a68be4Smrg   gimple *old_stmt = stmt_info->stmt;
621c0a68be4Smrg   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
622c0a68be4Smrg   set_vinfo_for_stmt (old_stmt, NULL);
623c0a68be4Smrg   set_vinfo_for_stmt (new_stmt, stmt_info);
624c0a68be4Smrg   stmt_info->stmt = new_stmt;
625c0a68be4Smrg   gsi_replace (gsi, new_stmt, true);
626c0a68be4Smrg }
627c0a68be4Smrg 
628c0a68be4Smrg /* Create and initialize a new stmt_vec_info struct for STMT.  */
629c0a68be4Smrg 
630c0a68be4Smrg stmt_vec_info
new_stmt_vec_info(gimple * stmt)631c0a68be4Smrg vec_info::new_stmt_vec_info (gimple *stmt)
632c0a68be4Smrg {
633*8feb0f0bSmrg   stmt_vec_info res = XCNEW (class _stmt_vec_info);
634c0a68be4Smrg   res->vinfo = this;
635c0a68be4Smrg   res->stmt = stmt;
636c0a68be4Smrg 
637c0a68be4Smrg   STMT_VINFO_TYPE (res) = undef_vec_info_type;
638c0a68be4Smrg   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
639c0a68be4Smrg   STMT_VINFO_VECTORIZABLE (res) = true;
640*8feb0f0bSmrg   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
641*8feb0f0bSmrg   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
642*8feb0f0bSmrg   STMT_VINFO_REDUC_FN (res) = IFN_LAST;
643*8feb0f0bSmrg   STMT_VINFO_REDUC_IDX (res) = -1;
644*8feb0f0bSmrg   STMT_VINFO_SLP_VECT_ONLY (res) = false;
645c0a68be4Smrg 
646c0a68be4Smrg   if (gimple_code (stmt) == GIMPLE_PHI
647c0a68be4Smrg       && is_loop_header_bb_p (gimple_bb (stmt)))
648c0a68be4Smrg     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
649c0a68be4Smrg   else
650c0a68be4Smrg     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
651c0a68be4Smrg 
652c0a68be4Smrg   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
653c0a68be4Smrg   STMT_SLP_TYPE (res) = loop_vect;
654c0a68be4Smrg 
655c0a68be4Smrg   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
656c0a68be4Smrg   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
657c0a68be4Smrg 
658c0a68be4Smrg   return res;
659c0a68be4Smrg }
660c0a68be4Smrg 
661c0a68be4Smrg /* Associate STMT with INFO.  */
662c0a68be4Smrg 
663c0a68be4Smrg void
set_vinfo_for_stmt(gimple * stmt,stmt_vec_info info)664c0a68be4Smrg vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info)
665c0a68be4Smrg {
666c0a68be4Smrg   unsigned int uid = gimple_uid (stmt);
667c0a68be4Smrg   if (uid == 0)
668c0a68be4Smrg     {
669c0a68be4Smrg       gcc_checking_assert (info);
670c0a68be4Smrg       uid = stmt_vec_infos.length () + 1;
671c0a68be4Smrg       gimple_set_uid (stmt, uid);
672c0a68be4Smrg       stmt_vec_infos.safe_push (info);
673c0a68be4Smrg     }
674c0a68be4Smrg   else
675c0a68be4Smrg     {
676c0a68be4Smrg       gcc_checking_assert (info == NULL);
677c0a68be4Smrg       stmt_vec_infos[uid - 1] = info;
678c0a68be4Smrg     }
679c0a68be4Smrg }
680c0a68be4Smrg 
681c0a68be4Smrg /* Free the contents of stmt_vec_infos.  */
682c0a68be4Smrg 
683c0a68be4Smrg void
free_stmt_vec_infos(void)684c0a68be4Smrg vec_info::free_stmt_vec_infos (void)
685c0a68be4Smrg {
686c0a68be4Smrg   unsigned int i;
687c0a68be4Smrg   stmt_vec_info info;
688c0a68be4Smrg   FOR_EACH_VEC_ELT (stmt_vec_infos, i, info)
689c0a68be4Smrg     if (info != NULL)
690c0a68be4Smrg       free_stmt_vec_info (info);
691c0a68be4Smrg   stmt_vec_infos.release ();
692c0a68be4Smrg }
693c0a68be4Smrg 
694c0a68be4Smrg /* Free STMT_INFO.  */
695c0a68be4Smrg 
696c0a68be4Smrg void
free_stmt_vec_info(stmt_vec_info stmt_info)697c0a68be4Smrg vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
698c0a68be4Smrg {
699c0a68be4Smrg   if (stmt_info->pattern_stmt_p)
700c0a68be4Smrg     {
701c0a68be4Smrg       gimple_set_bb (stmt_info->stmt, NULL);
702c0a68be4Smrg       tree lhs = gimple_get_lhs (stmt_info->stmt);
703c0a68be4Smrg       if (lhs && TREE_CODE (lhs) == SSA_NAME)
704c0a68be4Smrg 	release_ssa_name (lhs);
705c0a68be4Smrg     }
706c0a68be4Smrg 
707c0a68be4Smrg   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
708c0a68be4Smrg   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
709c0a68be4Smrg   free (stmt_info);
7101debfc3dSmrg }
7111debfc3dSmrg 
7121debfc3dSmrg /* A helper function to free scev and LOOP niter information, as well as
7131debfc3dSmrg    clear loop constraint LOOP_C_FINITE.  */
7141debfc3dSmrg 
7151debfc3dSmrg void
vect_free_loop_info_assumptions(class loop * loop)716*8feb0f0bSmrg vect_free_loop_info_assumptions (class loop *loop)
7171debfc3dSmrg {
7181debfc3dSmrg   scev_reset_htab ();
7191debfc3dSmrg   /* We need to explicitly reset upper bound information since they are
720a2dc1f3fSmrg      used even after free_numbers_of_iterations_estimates.  */
7211debfc3dSmrg   loop->any_upper_bound = false;
7221debfc3dSmrg   loop->any_likely_upper_bound = false;
723a2dc1f3fSmrg   free_numbers_of_iterations_estimates (loop);
7241debfc3dSmrg   loop_constraint_clear (loop, LOOP_C_FINITE);
7251debfc3dSmrg }
7261debfc3dSmrg 
7271debfc3dSmrg /* If LOOP has been versioned during ifcvt, return the internal call
7281debfc3dSmrg    guarding it.  */
7291debfc3dSmrg 
730*8feb0f0bSmrg gimple *
vect_loop_vectorized_call(class loop * loop,gcond ** cond)731*8feb0f0bSmrg vect_loop_vectorized_call (class loop *loop, gcond **cond)
7321debfc3dSmrg {
7331debfc3dSmrg   basic_block bb = loop_preheader_edge (loop)->src;
7341debfc3dSmrg   gimple *g;
7351debfc3dSmrg   do
7361debfc3dSmrg     {
7371debfc3dSmrg       g = last_stmt (bb);
7381debfc3dSmrg       if (g)
7391debfc3dSmrg 	break;
7401debfc3dSmrg       if (!single_pred_p (bb))
7411debfc3dSmrg 	break;
7421debfc3dSmrg       bb = single_pred (bb);
7431debfc3dSmrg     }
7441debfc3dSmrg   while (1);
7451debfc3dSmrg   if (g && gimple_code (g) == GIMPLE_COND)
7461debfc3dSmrg     {
747*8feb0f0bSmrg       if (cond)
748*8feb0f0bSmrg 	*cond = as_a <gcond *> (g);
7491debfc3dSmrg       gimple_stmt_iterator gsi = gsi_for_stmt (g);
7501debfc3dSmrg       gsi_prev (&gsi);
7511debfc3dSmrg       if (!gsi_end_p (gsi))
7521debfc3dSmrg 	{
7531debfc3dSmrg 	  g = gsi_stmt (gsi);
7541debfc3dSmrg 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
7551debfc3dSmrg 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
7561debfc3dSmrg 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
7571debfc3dSmrg 	    return g;
7581debfc3dSmrg 	}
7591debfc3dSmrg     }
7601debfc3dSmrg   return NULL;
7611debfc3dSmrg }
7621debfc3dSmrg 
763a2dc1f3fSmrg /* If LOOP has been versioned during loop distribution, return the gurading
764a2dc1f3fSmrg    internal call.  */
7651debfc3dSmrg 
766a2dc1f3fSmrg static gimple *
vect_loop_dist_alias_call(class loop * loop)767*8feb0f0bSmrg vect_loop_dist_alias_call (class loop *loop)
7681debfc3dSmrg {
769a2dc1f3fSmrg   basic_block bb;
770a2dc1f3fSmrg   basic_block entry;
771*8feb0f0bSmrg   class loop *outer, *orig;
772a2dc1f3fSmrg   gimple_stmt_iterator gsi;
773a2dc1f3fSmrg   gimple *g;
7741debfc3dSmrg 
775a2dc1f3fSmrg   if (loop->orig_loop_num == 0)
776a2dc1f3fSmrg     return NULL;
777a2dc1f3fSmrg 
778a2dc1f3fSmrg   orig = get_loop (cfun, loop->orig_loop_num);
779a2dc1f3fSmrg   if (orig == NULL)
7801debfc3dSmrg     {
781a2dc1f3fSmrg       /* The original loop is somehow destroyed.  Clear the information.  */
782a2dc1f3fSmrg       loop->orig_loop_num = 0;
783a2dc1f3fSmrg       return NULL;
7841debfc3dSmrg     }
785a2dc1f3fSmrg 
786a2dc1f3fSmrg   if (loop != orig)
787a2dc1f3fSmrg     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
788a2dc1f3fSmrg   else
789a2dc1f3fSmrg     bb = loop_preheader_edge (loop)->src;
790a2dc1f3fSmrg 
791a2dc1f3fSmrg   outer = bb->loop_father;
792a2dc1f3fSmrg   entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
793a2dc1f3fSmrg 
794a2dc1f3fSmrg   /* Look upward in dominance tree.  */
795a2dc1f3fSmrg   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
796a2dc1f3fSmrg        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
797a2dc1f3fSmrg     {
798a2dc1f3fSmrg       g = last_stmt (bb);
799a2dc1f3fSmrg       if (g == NULL || gimple_code (g) != GIMPLE_COND)
800a2dc1f3fSmrg 	continue;
801a2dc1f3fSmrg 
802a2dc1f3fSmrg       gsi = gsi_for_stmt (g);
803a2dc1f3fSmrg       gsi_prev (&gsi);
804a2dc1f3fSmrg       if (gsi_end_p (gsi))
805a2dc1f3fSmrg 	continue;
806a2dc1f3fSmrg 
807a2dc1f3fSmrg       g = gsi_stmt (gsi);
808a2dc1f3fSmrg       /* The guarding internal function call must have the same distribution
809a2dc1f3fSmrg 	 alias id.  */
810a2dc1f3fSmrg       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
811a2dc1f3fSmrg 	  && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
812a2dc1f3fSmrg 	return g;
813a2dc1f3fSmrg     }
814a2dc1f3fSmrg   return NULL;
8151debfc3dSmrg }
8161debfc3dSmrg 
8171debfc3dSmrg /* Set the uids of all the statements in basic blocks inside loop
8181debfc3dSmrg    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
8191debfc3dSmrg    call guarding the loop which has been if converted.  */
8201debfc3dSmrg static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call)8211debfc3dSmrg set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
8221debfc3dSmrg {
8231debfc3dSmrg   tree arg = gimple_call_arg (loop_vectorized_call, 1);
8241debfc3dSmrg   basic_block *bbs;
8251debfc3dSmrg   unsigned int i;
826*8feb0f0bSmrg   class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
8271debfc3dSmrg 
8281debfc3dSmrg   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
8291debfc3dSmrg   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
8301debfc3dSmrg 		       == loop_vectorized_call);
8311debfc3dSmrg   /* If we are going to vectorize outer loop, prevent vectorization
8321debfc3dSmrg      of the inner loop in the scalar loop - either the scalar loop is
8331debfc3dSmrg      thrown away, so it is a wasted work, or is used only for
8341debfc3dSmrg      a few iterations.  */
8351debfc3dSmrg   if (scalar_loop->inner)
8361debfc3dSmrg     {
8371debfc3dSmrg       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
8381debfc3dSmrg       if (g)
8391debfc3dSmrg 	{
8401debfc3dSmrg 	  arg = gimple_call_arg (g, 0);
8411debfc3dSmrg 	  get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
842a2dc1f3fSmrg 	  fold_loop_internal_call (g, boolean_false_node);
8431debfc3dSmrg 	}
8441debfc3dSmrg     }
8451debfc3dSmrg   bbs = get_loop_body (scalar_loop);
8461debfc3dSmrg   for (i = 0; i < scalar_loop->num_nodes; i++)
8471debfc3dSmrg     {
8481debfc3dSmrg       basic_block bb = bbs[i];
8491debfc3dSmrg       gimple_stmt_iterator gsi;
8501debfc3dSmrg       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8511debfc3dSmrg 	{
8521debfc3dSmrg 	  gimple *phi = gsi_stmt (gsi);
8531debfc3dSmrg 	  gimple_set_uid (phi, 0);
8541debfc3dSmrg 	}
8551debfc3dSmrg       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8561debfc3dSmrg 	{
8571debfc3dSmrg 	  gimple *stmt = gsi_stmt (gsi);
8581debfc3dSmrg 	  gimple_set_uid (stmt, 0);
8591debfc3dSmrg 	}
8601debfc3dSmrg     }
8611debfc3dSmrg   free (bbs);
8621debfc3dSmrg }
8631debfc3dSmrg 
864c0a68be4Smrg /* Try to vectorize LOOP.  */
865c0a68be4Smrg 
866c0a68be4Smrg static unsigned
try_vectorize_loop_1(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop,gimple * loop_vectorized_call,gimple * loop_dist_alias_call)867c0a68be4Smrg try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
868*8feb0f0bSmrg 		      unsigned *num_vectorized_loops, loop_p loop,
869c0a68be4Smrg 		      gimple *loop_vectorized_call,
870c0a68be4Smrg 		      gimple *loop_dist_alias_call)
871c0a68be4Smrg {
872c0a68be4Smrg   unsigned ret = 0;
873c0a68be4Smrg   vec_info_shared shared;
874c0a68be4Smrg   auto_purge_vect_location sentinel;
875c0a68be4Smrg   vect_location = find_loop_location (loop);
876*8feb0f0bSmrg 
877c0a68be4Smrg   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
878c0a68be4Smrg       && dump_enabled_p ())
879c0a68be4Smrg     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
880c0a68be4Smrg 		 "\nAnalyzing loop at %s:%d\n",
881c0a68be4Smrg 		 LOCATION_FILE (vect_location.get_location_t ()),
882c0a68be4Smrg 		 LOCATION_LINE (vect_location.get_location_t ()));
883c0a68be4Smrg 
884*8feb0f0bSmrg   opt_loop_vec_info loop_vinfo = opt_loop_vec_info::success (NULL);
885*8feb0f0bSmrg   /* In the case of epilogue vectorization the loop already has its
886*8feb0f0bSmrg      loop_vec_info set, we do not require to analyze the loop in this case.  */
887*8feb0f0bSmrg   if (loop_vec_info vinfo = loop_vec_info_for_loop (loop))
888*8feb0f0bSmrg     loop_vinfo = opt_loop_vec_info::success (vinfo);
889*8feb0f0bSmrg   else
890*8feb0f0bSmrg     {
891c0a68be4Smrg       /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
892*8feb0f0bSmrg       loop_vinfo = vect_analyze_loop (loop, &shared);
893c0a68be4Smrg       loop->aux = loop_vinfo;
894*8feb0f0bSmrg     }
895c0a68be4Smrg 
896c0a68be4Smrg   if (!loop_vinfo)
897c0a68be4Smrg     if (dump_enabled_p ())
898c0a68be4Smrg       if (opt_problem *problem = loop_vinfo.get_problem ())
899c0a68be4Smrg 	{
900c0a68be4Smrg 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
901c0a68be4Smrg 			   "couldn't vectorize loop\n");
902c0a68be4Smrg 	  problem->emit_and_clear ();
903c0a68be4Smrg 	}
904c0a68be4Smrg 
905c0a68be4Smrg   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
906c0a68be4Smrg     {
907c0a68be4Smrg       /* Free existing information if loop is analyzed with some
908c0a68be4Smrg 	 assumptions.  */
909c0a68be4Smrg       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
910c0a68be4Smrg 	vect_free_loop_info_assumptions (loop);
911c0a68be4Smrg 
912c0a68be4Smrg       /* If we applied if-conversion then try to vectorize the
913c0a68be4Smrg 	 BB of innermost loops.
914c0a68be4Smrg 	 ???  Ideally BB vectorization would learn to vectorize
915c0a68be4Smrg 	 control flow by applying if-conversion on-the-fly, the
916c0a68be4Smrg 	 following retains the if-converted loop body even when
917c0a68be4Smrg 	 only non-if-converted parts took part in BB vectorization.  */
918c0a68be4Smrg       if (flag_tree_slp_vectorize != 0
919c0a68be4Smrg 	  && loop_vectorized_call
920c0a68be4Smrg 	  && ! loop->inner)
921c0a68be4Smrg 	{
922c0a68be4Smrg 	  basic_block bb = loop->header;
923c0a68be4Smrg 	  bool require_loop_vectorize = false;
924c0a68be4Smrg 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
925c0a68be4Smrg 	       !gsi_end_p (gsi); gsi_next (&gsi))
926c0a68be4Smrg 	    {
927c0a68be4Smrg 	      gimple *stmt = gsi_stmt (gsi);
928c0a68be4Smrg 	      gcall *call = dyn_cast <gcall *> (stmt);
929c0a68be4Smrg 	      if (call && gimple_call_internal_p (call))
930c0a68be4Smrg 		{
931c0a68be4Smrg 		  internal_fn ifn = gimple_call_internal_fn (call);
932c0a68be4Smrg 		  if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
933c0a68be4Smrg 		      /* Don't keep the if-converted parts when the ifn with
934c0a68be4Smrg 			 specifc type is not supported by the backend.  */
935c0a68be4Smrg 		      || (direct_internal_fn_p (ifn)
936c0a68be4Smrg 			  && !direct_internal_fn_supported_p
937c0a68be4Smrg 			  (call, OPTIMIZE_FOR_SPEED)))
938c0a68be4Smrg 		    {
939c0a68be4Smrg 		      require_loop_vectorize = true;
940c0a68be4Smrg 		      break;
941c0a68be4Smrg 		    }
942c0a68be4Smrg 		}
943c0a68be4Smrg 	      gimple_set_uid (stmt, -1);
944c0a68be4Smrg 	      gimple_set_visited (stmt, false);
945c0a68be4Smrg 	    }
946c0a68be4Smrg 	  if (!require_loop_vectorize && vect_slp_bb (bb))
947c0a68be4Smrg 	    {
948c0a68be4Smrg 	      if (dump_enabled_p ())
949c0a68be4Smrg 		dump_printf_loc (MSG_NOTE, vect_location,
950c0a68be4Smrg 				 "basic block vectorized\n");
951c0a68be4Smrg 	      fold_loop_internal_call (loop_vectorized_call,
952c0a68be4Smrg 				       boolean_true_node);
953c0a68be4Smrg 	      loop_vectorized_call = NULL;
954c0a68be4Smrg 	      ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
955c0a68be4Smrg 	    }
956c0a68be4Smrg 	}
957c0a68be4Smrg       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
958c0a68be4Smrg 	 loop, don't vectorize its inner loop; we'll attempt to
959c0a68be4Smrg 	 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
960c0a68be4Smrg 	 loop version.  */
961c0a68be4Smrg       if (loop_vectorized_call && loop->inner)
962c0a68be4Smrg 	loop->inner->dont_vectorize = true;
963c0a68be4Smrg       return ret;
964c0a68be4Smrg     }
965c0a68be4Smrg 
966c0a68be4Smrg   if (!dbg_cnt (vect_loop))
967c0a68be4Smrg     {
968c0a68be4Smrg       /* Free existing information if loop is analyzed with some
969c0a68be4Smrg 	 assumptions.  */
970c0a68be4Smrg       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
971c0a68be4Smrg 	vect_free_loop_info_assumptions (loop);
972c0a68be4Smrg       return ret;
973c0a68be4Smrg     }
974c0a68be4Smrg 
975c0a68be4Smrg   if (loop_vectorized_call)
976c0a68be4Smrg     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
977c0a68be4Smrg 
978c0a68be4Smrg   unsigned HOST_WIDE_INT bytes;
979c0a68be4Smrg   if (dump_enabled_p ())
980c0a68be4Smrg     {
981*8feb0f0bSmrg       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
982c0a68be4Smrg 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
983c0a68be4Smrg 			 "loop vectorized using %wu byte vectors\n", bytes);
984c0a68be4Smrg       else
985c0a68be4Smrg 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
986c0a68be4Smrg 			 "loop vectorized using variable length vectors\n");
987c0a68be4Smrg     }
988c0a68be4Smrg 
989*8feb0f0bSmrg   loop_p new_loop = vect_transform_loop (loop_vinfo,
990*8feb0f0bSmrg 					 loop_vectorized_call);
991c0a68be4Smrg   (*num_vectorized_loops)++;
992c0a68be4Smrg   /* Now that the loop has been vectorized, allow it to be unrolled
993c0a68be4Smrg      etc.  */
994c0a68be4Smrg   loop->force_vectorize = false;
995c0a68be4Smrg 
996c0a68be4Smrg   if (loop->simduid)
997c0a68be4Smrg     {
998c0a68be4Smrg       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
999c0a68be4Smrg       if (!simduid_to_vf_htab)
1000c0a68be4Smrg 	simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
1001c0a68be4Smrg       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1002c0a68be4Smrg       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1003c0a68be4Smrg       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
1004c0a68be4Smrg 	  = simduid_to_vf_data;
1005c0a68be4Smrg     }
1006c0a68be4Smrg 
1007c0a68be4Smrg   if (loop_vectorized_call)
1008c0a68be4Smrg     {
1009c0a68be4Smrg       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1010c0a68be4Smrg       loop_vectorized_call = NULL;
1011c0a68be4Smrg       ret |= TODO_cleanup_cfg;
1012c0a68be4Smrg     }
1013c0a68be4Smrg   if (loop_dist_alias_call)
1014c0a68be4Smrg     {
1015c0a68be4Smrg       tree value = gimple_call_arg (loop_dist_alias_call, 1);
1016c0a68be4Smrg       fold_loop_internal_call (loop_dist_alias_call, value);
1017c0a68be4Smrg       loop_dist_alias_call = NULL;
1018c0a68be4Smrg       ret |= TODO_cleanup_cfg;
1019c0a68be4Smrg     }
1020c0a68be4Smrg 
1021c0a68be4Smrg   /* Epilogue of vectorized loop must be vectorized too.  */
1022c0a68be4Smrg   if (new_loop)
1023*8feb0f0bSmrg     {
1024*8feb0f0bSmrg       /* Don't include vectorized epilogues in the "vectorized loops" count.
1025*8feb0f0bSmrg        */
1026*8feb0f0bSmrg       unsigned dont_count = *num_vectorized_loops;
1027*8feb0f0bSmrg       ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count,
1028*8feb0f0bSmrg 				   new_loop, NULL, NULL);
1029*8feb0f0bSmrg     }
1030c0a68be4Smrg 
1031c0a68be4Smrg   return ret;
1032c0a68be4Smrg }
1033c0a68be4Smrg 
1034c0a68be4Smrg /* Try to vectorize LOOP.  */
1035c0a68be4Smrg 
1036c0a68be4Smrg static unsigned
try_vectorize_loop(hash_table<simduid_to_vf> * & simduid_to_vf_htab,unsigned * num_vectorized_loops,loop_p loop)1037c0a68be4Smrg try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1038c0a68be4Smrg 		    unsigned *num_vectorized_loops, loop_p loop)
1039c0a68be4Smrg {
1040c0a68be4Smrg   if (!((flag_tree_loop_vectorize
1041c0a68be4Smrg 	 && optimize_loop_nest_for_speed_p (loop))
1042c0a68be4Smrg 	|| loop->force_vectorize))
1043c0a68be4Smrg     return 0;
1044c0a68be4Smrg 
1045*8feb0f0bSmrg   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1046c0a68be4Smrg 			       vect_loop_vectorized_call (loop),
1047c0a68be4Smrg 			       vect_loop_dist_alias_call (loop));
1048c0a68be4Smrg }
1049c0a68be4Smrg 
1050c0a68be4Smrg 
10511debfc3dSmrg /* Function vectorize_loops.
10521debfc3dSmrg 
10531debfc3dSmrg    Entry point to loop vectorization phase.  */
10541debfc3dSmrg 
10551debfc3dSmrg unsigned
vectorize_loops(void)10561debfc3dSmrg vectorize_loops (void)
10571debfc3dSmrg {
10581debfc3dSmrg   unsigned int i;
10591debfc3dSmrg   unsigned int num_vectorized_loops = 0;
10601debfc3dSmrg   unsigned int vect_loops_num;
1061*8feb0f0bSmrg   class loop *loop;
10621debfc3dSmrg   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
10631debfc3dSmrg   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
10641debfc3dSmrg   bool any_ifcvt_loops = false;
10651debfc3dSmrg   unsigned ret = 0;
10661debfc3dSmrg 
10671debfc3dSmrg   vect_loops_num = number_of_loops (cfun);
10681debfc3dSmrg 
10691debfc3dSmrg   /* Bail out if there are no loops.  */
10701debfc3dSmrg   if (vect_loops_num <= 1)
10711debfc3dSmrg     return 0;
10721debfc3dSmrg 
10731debfc3dSmrg   if (cfun->has_simduid_loops)
10741debfc3dSmrg     note_simd_array_uses (&simd_array_to_simduid_htab);
10751debfc3dSmrg 
10761debfc3dSmrg   /*  ----------- Analyze loops. -----------  */
10771debfc3dSmrg 
10781debfc3dSmrg   /* If some loop was duplicated, it gets bigger number
10791debfc3dSmrg      than all previously defined loops.  This fact allows us to run
10801debfc3dSmrg      only over initial loops skipping newly generated ones.  */
10811debfc3dSmrg   FOR_EACH_LOOP (loop, 0)
10821debfc3dSmrg     if (loop->dont_vectorize)
10831debfc3dSmrg       {
10841debfc3dSmrg 	any_ifcvt_loops = true;
10851debfc3dSmrg 	/* If-conversion sometimes versions both the outer loop
10861debfc3dSmrg 	   (for the case when outer loop vectorization might be
10871debfc3dSmrg 	   desirable) as well as the inner loop in the scalar version
10881debfc3dSmrg 	   of the loop.  So we have:
10891debfc3dSmrg 	    if (LOOP_VECTORIZED (1, 3))
10901debfc3dSmrg 	      {
10911debfc3dSmrg 		loop1
10921debfc3dSmrg 		  loop2
10931debfc3dSmrg 	      }
10941debfc3dSmrg 	    else
10951debfc3dSmrg 	      loop3 (copy of loop1)
10961debfc3dSmrg 		if (LOOP_VECTORIZED (4, 5))
10971debfc3dSmrg 		  loop4 (copy of loop2)
10981debfc3dSmrg 		else
10991debfc3dSmrg 		  loop5 (copy of loop4)
11001debfc3dSmrg 	   If FOR_EACH_LOOP gives us loop3 first (which has
11011debfc3dSmrg 	   dont_vectorize set), make sure to process loop1 before loop4;
11021debfc3dSmrg 	   so that we can prevent vectorization of loop4 if loop1
11031debfc3dSmrg 	   is successfully vectorized.  */
11041debfc3dSmrg 	if (loop->inner)
11051debfc3dSmrg 	  {
11061debfc3dSmrg 	    gimple *loop_vectorized_call
11071debfc3dSmrg 	      = vect_loop_vectorized_call (loop);
11081debfc3dSmrg 	    if (loop_vectorized_call
11091debfc3dSmrg 		&& vect_loop_vectorized_call (loop->inner))
11101debfc3dSmrg 	      {
11111debfc3dSmrg 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
1112*8feb0f0bSmrg 		class loop *vector_loop
11131debfc3dSmrg 		  = get_loop (cfun, tree_to_shwi (arg));
11141debfc3dSmrg 		if (vector_loop && vector_loop != loop)
11151debfc3dSmrg 		  {
11161debfc3dSmrg 		    /* Make sure we don't vectorize it twice.  */
1117c0a68be4Smrg 		    vector_loop->dont_vectorize = true;
1118c0a68be4Smrg 		    ret |= try_vectorize_loop (simduid_to_vf_htab,
1119c0a68be4Smrg 					       &num_vectorized_loops,
1120c0a68be4Smrg 					       vector_loop);
11211debfc3dSmrg 		  }
11221debfc3dSmrg 	      }
11231debfc3dSmrg 	  }
11241debfc3dSmrg       }
11251debfc3dSmrg     else
1126c0a68be4Smrg       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
1127c0a68be4Smrg 				 loop);
11281debfc3dSmrg 
1129c0a68be4Smrg   vect_location = dump_user_location_t ();
11301debfc3dSmrg 
11311debfc3dSmrg   statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
11321debfc3dSmrg   if (dump_enabled_p ()
11331debfc3dSmrg       || (num_vectorized_loops > 0 && dump_enabled_p ()))
11341debfc3dSmrg     dump_printf_loc (MSG_NOTE, vect_location,
11351debfc3dSmrg                      "vectorized %u loops in function.\n",
11361debfc3dSmrg                      num_vectorized_loops);
11371debfc3dSmrg 
11381debfc3dSmrg   /*  ----------- Finalize. -----------  */
11391debfc3dSmrg 
11401debfc3dSmrg   if (any_ifcvt_loops)
1141c0a68be4Smrg     for (i = 1; i < number_of_loops (cfun); i++)
11421debfc3dSmrg       {
11431debfc3dSmrg 	loop = get_loop (cfun, i);
11441debfc3dSmrg 	if (loop && loop->dont_vectorize)
11451debfc3dSmrg 	  {
11461debfc3dSmrg 	    gimple *g = vect_loop_vectorized_call (loop);
11471debfc3dSmrg 	    if (g)
11481debfc3dSmrg 	      {
1149a2dc1f3fSmrg 		fold_loop_internal_call (g, boolean_false_node);
1150a2dc1f3fSmrg 		ret |= TODO_cleanup_cfg;
1151a2dc1f3fSmrg 		g = NULL;
1152a2dc1f3fSmrg 	      }
1153a2dc1f3fSmrg 	    else
1154a2dc1f3fSmrg 	      g = vect_loop_dist_alias_call (loop);
1155a2dc1f3fSmrg 
1156a2dc1f3fSmrg 	    if (g)
1157a2dc1f3fSmrg 	      {
1158a2dc1f3fSmrg 		fold_loop_internal_call (g, boolean_false_node);
11591debfc3dSmrg 		ret |= TODO_cleanup_cfg;
11601debfc3dSmrg 	      }
11611debfc3dSmrg 	  }
11621debfc3dSmrg       }
11631debfc3dSmrg 
1164c0a68be4Smrg   for (i = 1; i < number_of_loops (cfun); i++)
11651debfc3dSmrg     {
11661debfc3dSmrg       loop_vec_info loop_vinfo;
11671debfc3dSmrg       bool has_mask_store;
11681debfc3dSmrg 
11691debfc3dSmrg       loop = get_loop (cfun, i);
1170c0a68be4Smrg       if (!loop || !loop->aux)
11711debfc3dSmrg 	continue;
11721debfc3dSmrg       loop_vinfo = (loop_vec_info) loop->aux;
11731debfc3dSmrg       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1174a2dc1f3fSmrg       delete loop_vinfo;
1175a2dc1f3fSmrg       if (has_mask_store
1176a2dc1f3fSmrg 	  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
11771debfc3dSmrg 	optimize_mask_stores (loop);
11781debfc3dSmrg       loop->aux = NULL;
11791debfc3dSmrg     }
11801debfc3dSmrg 
11811debfc3dSmrg   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
11821debfc3dSmrg   if (cfun->has_simduid_loops)
11831debfc3dSmrg     adjust_simduid_builtins (simduid_to_vf_htab);
11841debfc3dSmrg 
11851debfc3dSmrg   /* Shrink any "omp array simd" temporary arrays to the
11861debfc3dSmrg      actual vectorization factors.  */
11871debfc3dSmrg   if (simd_array_to_simduid_htab)
11881debfc3dSmrg     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
11891debfc3dSmrg   delete simduid_to_vf_htab;
11901debfc3dSmrg   cfun->has_simduid_loops = false;
11911debfc3dSmrg 
11921debfc3dSmrg   if (num_vectorized_loops > 0)
11931debfc3dSmrg     {
11941debfc3dSmrg       /* If we vectorized any loop only virtual SSA form needs to be updated.
11951debfc3dSmrg 	 ???  Also while we try hard to update loop-closed SSA form we fail
11961debfc3dSmrg 	 to properly do this in some corner-cases (see PR56286).  */
11971debfc3dSmrg       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
11981debfc3dSmrg       return TODO_cleanup_cfg;
11991debfc3dSmrg     }
12001debfc3dSmrg 
12011debfc3dSmrg   return ret;
12021debfc3dSmrg }
12031debfc3dSmrg 
12041debfc3dSmrg 
12051debfc3dSmrg /* Entry point to the simduid cleanup pass.  */
12061debfc3dSmrg 
12071debfc3dSmrg namespace {
12081debfc3dSmrg 
12091debfc3dSmrg const pass_data pass_data_simduid_cleanup =
12101debfc3dSmrg {
12111debfc3dSmrg   GIMPLE_PASS, /* type */
12121debfc3dSmrg   "simduid", /* name */
12131debfc3dSmrg   OPTGROUP_NONE, /* optinfo_flags */
12141debfc3dSmrg   TV_NONE, /* tv_id */
12151debfc3dSmrg   ( PROP_ssa | PROP_cfg ), /* properties_required */
12161debfc3dSmrg   0, /* properties_provided */
12171debfc3dSmrg   0, /* properties_destroyed */
12181debfc3dSmrg   0, /* todo_flags_start */
12191debfc3dSmrg   0, /* todo_flags_finish */
12201debfc3dSmrg };
12211debfc3dSmrg 
12221debfc3dSmrg class pass_simduid_cleanup : public gimple_opt_pass
12231debfc3dSmrg {
12241debfc3dSmrg public:
pass_simduid_cleanup(gcc::context * ctxt)12251debfc3dSmrg   pass_simduid_cleanup (gcc::context *ctxt)
12261debfc3dSmrg     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
12271debfc3dSmrg   {}
12281debfc3dSmrg 
12291debfc3dSmrg   /* opt_pass methods: */
clone()12301debfc3dSmrg   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)12311debfc3dSmrg   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
12321debfc3dSmrg   virtual unsigned int execute (function *);
12331debfc3dSmrg 
12341debfc3dSmrg }; // class pass_simduid_cleanup
12351debfc3dSmrg 
12361debfc3dSmrg unsigned int
execute(function * fun)12371debfc3dSmrg pass_simduid_cleanup::execute (function *fun)
12381debfc3dSmrg {
12391debfc3dSmrg   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
12401debfc3dSmrg 
12411debfc3dSmrg   note_simd_array_uses (&simd_array_to_simduid_htab);
12421debfc3dSmrg 
12431debfc3dSmrg   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
12441debfc3dSmrg   adjust_simduid_builtins (NULL);
12451debfc3dSmrg 
12461debfc3dSmrg   /* Shrink any "omp array simd" temporary arrays to the
12471debfc3dSmrg      actual vectorization factors.  */
12481debfc3dSmrg   if (simd_array_to_simduid_htab)
12491debfc3dSmrg     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
12501debfc3dSmrg   fun->has_simduid_loops = false;
12511debfc3dSmrg   return 0;
12521debfc3dSmrg }
12531debfc3dSmrg 
12541debfc3dSmrg }  // anon namespace
12551debfc3dSmrg 
12561debfc3dSmrg gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)12571debfc3dSmrg make_pass_simduid_cleanup (gcc::context *ctxt)
12581debfc3dSmrg {
12591debfc3dSmrg   return new pass_simduid_cleanup (ctxt);
12601debfc3dSmrg }
12611debfc3dSmrg 
12621debfc3dSmrg 
12631debfc3dSmrg /*  Entry point to basic block SLP phase.  */
12641debfc3dSmrg 
12651debfc3dSmrg namespace {
12661debfc3dSmrg 
12671debfc3dSmrg const pass_data pass_data_slp_vectorize =
12681debfc3dSmrg {
12691debfc3dSmrg   GIMPLE_PASS, /* type */
12701debfc3dSmrg   "slp", /* name */
12711debfc3dSmrg   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
12721debfc3dSmrg   TV_TREE_SLP_VECTORIZATION, /* tv_id */
12731debfc3dSmrg   ( PROP_ssa | PROP_cfg ), /* properties_required */
12741debfc3dSmrg   0, /* properties_provided */
12751debfc3dSmrg   0, /* properties_destroyed */
12761debfc3dSmrg   0, /* todo_flags_start */
12771debfc3dSmrg   TODO_update_ssa, /* todo_flags_finish */
12781debfc3dSmrg };
12791debfc3dSmrg 
12801debfc3dSmrg class pass_slp_vectorize : public gimple_opt_pass
12811debfc3dSmrg {
12821debfc3dSmrg public:
pass_slp_vectorize(gcc::context * ctxt)12831debfc3dSmrg   pass_slp_vectorize (gcc::context *ctxt)
12841debfc3dSmrg     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
12851debfc3dSmrg   {}
12861debfc3dSmrg 
12871debfc3dSmrg   /* opt_pass methods: */
clone()12881debfc3dSmrg   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)12891debfc3dSmrg   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
12901debfc3dSmrg   virtual unsigned int execute (function *);
12911debfc3dSmrg 
12921debfc3dSmrg }; // class pass_slp_vectorize
12931debfc3dSmrg 
12941debfc3dSmrg unsigned int
execute(function * fun)12951debfc3dSmrg pass_slp_vectorize::execute (function *fun)
12961debfc3dSmrg {
1297c0a68be4Smrg   auto_purge_vect_location sentinel;
12981debfc3dSmrg   basic_block bb;
12991debfc3dSmrg 
13001debfc3dSmrg   bool in_loop_pipeline = scev_initialized_p ();
13011debfc3dSmrg   if (!in_loop_pipeline)
13021debfc3dSmrg     {
13031debfc3dSmrg       loop_optimizer_init (LOOPS_NORMAL);
13041debfc3dSmrg       scev_initialize ();
13051debfc3dSmrg     }
13061debfc3dSmrg 
13071debfc3dSmrg   /* Mark all stmts as not belonging to the current region and unvisited.  */
13081debfc3dSmrg   FOR_EACH_BB_FN (bb, fun)
13091debfc3dSmrg     {
13101debfc3dSmrg       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
13111debfc3dSmrg 	   gsi_next (&gsi))
13121debfc3dSmrg 	{
13131debfc3dSmrg 	  gimple *stmt = gsi_stmt (gsi);
13141debfc3dSmrg 	  gimple_set_uid (stmt, -1);
13151debfc3dSmrg 	  gimple_set_visited (stmt, false);
13161debfc3dSmrg 	}
13171debfc3dSmrg     }
13181debfc3dSmrg 
13191debfc3dSmrg   FOR_EACH_BB_FN (bb, fun)
13201debfc3dSmrg     {
13211debfc3dSmrg       if (vect_slp_bb (bb))
1322c0a68be4Smrg 	if (dump_enabled_p ())
1323c0a68be4Smrg 	  dump_printf_loc (MSG_NOTE, vect_location, "basic block vectorized\n");
13241debfc3dSmrg     }
13251debfc3dSmrg 
13261debfc3dSmrg   if (!in_loop_pipeline)
13271debfc3dSmrg     {
13281debfc3dSmrg       scev_finalize ();
13291debfc3dSmrg       loop_optimizer_finalize ();
13301debfc3dSmrg     }
13311debfc3dSmrg 
13321debfc3dSmrg   return 0;
13331debfc3dSmrg }
13341debfc3dSmrg 
13351debfc3dSmrg } // anon namespace
13361debfc3dSmrg 
13371debfc3dSmrg gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)13381debfc3dSmrg make_pass_slp_vectorize (gcc::context *ctxt)
13391debfc3dSmrg {
13401debfc3dSmrg   return new pass_slp_vectorize (ctxt);
13411debfc3dSmrg }
13421debfc3dSmrg 
13431debfc3dSmrg 
13441debfc3dSmrg /* Increase alignment of global arrays to improve vectorization potential.
13451debfc3dSmrg    TODO:
13461debfc3dSmrg    - Consider also structs that have an array field.
13471debfc3dSmrg    - Use ipa analysis to prune arrays that can't be vectorized?
13481debfc3dSmrg      This should involve global alignment analysis and in the future also
13491debfc3dSmrg      array padding.  */
13501debfc3dSmrg 
13511debfc3dSmrg static unsigned get_vec_alignment_for_type (tree);
13521debfc3dSmrg static hash_map<tree, unsigned> *type_align_map;
13531debfc3dSmrg 
13541debfc3dSmrg /* Return alignment of array's vector type corresponding to scalar type.
13551debfc3dSmrg    0 if no vector type exists.  */
13561debfc3dSmrg static unsigned
get_vec_alignment_for_array_type(tree type)13571debfc3dSmrg get_vec_alignment_for_array_type (tree type)
13581debfc3dSmrg {
13591debfc3dSmrg   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1360a2dc1f3fSmrg   poly_uint64 array_size, vector_size;
13611debfc3dSmrg 
1362*8feb0f0bSmrg   tree scalar_type = strip_array_types (type);
1363*8feb0f0bSmrg   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
13641debfc3dSmrg   if (!vectype
1365a2dc1f3fSmrg       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1366a2dc1f3fSmrg       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1367a2dc1f3fSmrg       || maybe_lt (array_size, vector_size))
13681debfc3dSmrg     return 0;
13691debfc3dSmrg 
13701debfc3dSmrg   return TYPE_ALIGN (vectype);
13711debfc3dSmrg }
13721debfc3dSmrg 
13731debfc3dSmrg /* Return alignment of field having maximum alignment of vector type
13741debfc3dSmrg    corresponding to it's scalar type. For now, we only consider fields whose
13751debfc3dSmrg    offset is a multiple of it's vector alignment.
13761debfc3dSmrg    0 if no suitable field is found.  */
13771debfc3dSmrg static unsigned
get_vec_alignment_for_record_type(tree type)13781debfc3dSmrg get_vec_alignment_for_record_type (tree type)
13791debfc3dSmrg {
13801debfc3dSmrg   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
13811debfc3dSmrg 
13821debfc3dSmrg   unsigned max_align = 0, alignment;
13831debfc3dSmrg   HOST_WIDE_INT offset;
13841debfc3dSmrg   tree offset_tree;
13851debfc3dSmrg 
13861debfc3dSmrg   if (TYPE_PACKED (type))
13871debfc3dSmrg     return 0;
13881debfc3dSmrg 
13891debfc3dSmrg   unsigned *slot = type_align_map->get (type);
13901debfc3dSmrg   if (slot)
13911debfc3dSmrg     return *slot;
13921debfc3dSmrg 
13931debfc3dSmrg   for (tree field = first_field (type);
13941debfc3dSmrg        field != NULL_TREE;
13951debfc3dSmrg        field = DECL_CHAIN (field))
13961debfc3dSmrg     {
13971debfc3dSmrg       /* Skip if not FIELD_DECL or if alignment is set by user.  */
13981debfc3dSmrg       if (TREE_CODE (field) != FIELD_DECL
13991debfc3dSmrg 	  || DECL_USER_ALIGN (field)
14001debfc3dSmrg 	  || DECL_ARTIFICIAL (field))
14011debfc3dSmrg 	continue;
14021debfc3dSmrg 
14031debfc3dSmrg       /* We don't need to process the type further if offset is variable,
14041debfc3dSmrg 	 since the offsets of remaining members will also be variable.  */
14051debfc3dSmrg       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
14061debfc3dSmrg 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
14071debfc3dSmrg 	break;
14081debfc3dSmrg 
14091debfc3dSmrg       /* Similarly stop processing the type if offset_tree
14101debfc3dSmrg 	 does not fit in unsigned HOST_WIDE_INT.  */
14111debfc3dSmrg       offset_tree = bit_position (field);
14121debfc3dSmrg       if (!tree_fits_uhwi_p (offset_tree))
14131debfc3dSmrg 	break;
14141debfc3dSmrg 
14151debfc3dSmrg       offset = tree_to_uhwi (offset_tree);
14161debfc3dSmrg       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
14171debfc3dSmrg 
14181debfc3dSmrg       /* Get maximum alignment of vectorized field/array among those members
14191debfc3dSmrg 	 whose offset is multiple of the vector alignment.  */
14201debfc3dSmrg       if (alignment
14211debfc3dSmrg 	  && (offset % alignment == 0)
14221debfc3dSmrg 	  && (alignment > max_align))
14231debfc3dSmrg 	max_align = alignment;
14241debfc3dSmrg     }
14251debfc3dSmrg 
14261debfc3dSmrg   type_align_map->put (type, max_align);
14271debfc3dSmrg   return max_align;
14281debfc3dSmrg }
14291debfc3dSmrg 
14301debfc3dSmrg /* Return alignment of vector type corresponding to decl's scalar type
14311debfc3dSmrg    or 0 if it doesn't exist or the vector alignment is lesser than
14321debfc3dSmrg    decl's alignment.  */
14331debfc3dSmrg static unsigned
get_vec_alignment_for_type(tree type)14341debfc3dSmrg get_vec_alignment_for_type (tree type)
14351debfc3dSmrg {
14361debfc3dSmrg   if (type == NULL_TREE)
14371debfc3dSmrg     return 0;
14381debfc3dSmrg 
14391debfc3dSmrg   gcc_assert (TYPE_P (type));
14401debfc3dSmrg 
14411debfc3dSmrg   static unsigned alignment = 0;
14421debfc3dSmrg   switch (TREE_CODE (type))
14431debfc3dSmrg     {
14441debfc3dSmrg       case ARRAY_TYPE:
14451debfc3dSmrg 	alignment = get_vec_alignment_for_array_type (type);
14461debfc3dSmrg 	break;
14471debfc3dSmrg       case RECORD_TYPE:
14481debfc3dSmrg 	alignment = get_vec_alignment_for_record_type (type);
14491debfc3dSmrg 	break;
14501debfc3dSmrg       default:
14511debfc3dSmrg 	alignment = 0;
14521debfc3dSmrg 	break;
14531debfc3dSmrg     }
14541debfc3dSmrg 
14551debfc3dSmrg   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
14561debfc3dSmrg }
14571debfc3dSmrg 
14581debfc3dSmrg /* Entry point to increase_alignment pass.  */
14591debfc3dSmrg static unsigned int
increase_alignment(void)14601debfc3dSmrg increase_alignment (void)
14611debfc3dSmrg {
14621debfc3dSmrg   varpool_node *vnode;
14631debfc3dSmrg 
1464c0a68be4Smrg   vect_location = dump_user_location_t ();
14651debfc3dSmrg   type_align_map = new hash_map<tree, unsigned>;
14661debfc3dSmrg 
14671debfc3dSmrg   /* Increase the alignment of all global arrays for vectorization.  */
14681debfc3dSmrg   FOR_EACH_DEFINED_VARIABLE (vnode)
14691debfc3dSmrg     {
14701debfc3dSmrg       tree decl = vnode->decl;
14711debfc3dSmrg       unsigned int alignment;
14721debfc3dSmrg 
14731debfc3dSmrg       if ((decl_in_symtab_p (decl)
14741debfc3dSmrg 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
14751debfc3dSmrg 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
14761debfc3dSmrg 	continue;
14771debfc3dSmrg 
14781debfc3dSmrg       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
14791debfc3dSmrg       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
14801debfc3dSmrg         {
14811debfc3dSmrg 	  vnode->increase_alignment (alignment);
1482c0a68be4Smrg 	  if (dump_enabled_p ())
1483c0a68be4Smrg 	    dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
14841debfc3dSmrg         }
14851debfc3dSmrg     }
14861debfc3dSmrg 
14871debfc3dSmrg   delete type_align_map;
14881debfc3dSmrg   return 0;
14891debfc3dSmrg }
14901debfc3dSmrg 
14911debfc3dSmrg 
14921debfc3dSmrg namespace {
14931debfc3dSmrg 
14941debfc3dSmrg const pass_data pass_data_ipa_increase_alignment =
14951debfc3dSmrg {
14961debfc3dSmrg   SIMPLE_IPA_PASS, /* type */
14971debfc3dSmrg   "increase_alignment", /* name */
14981debfc3dSmrg   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
14991debfc3dSmrg   TV_IPA_OPT, /* tv_id */
15001debfc3dSmrg   0, /* properties_required */
15011debfc3dSmrg   0, /* properties_provided */
15021debfc3dSmrg   0, /* properties_destroyed */
15031debfc3dSmrg   0, /* todo_flags_start */
15041debfc3dSmrg   0, /* todo_flags_finish */
15051debfc3dSmrg };
15061debfc3dSmrg 
15071debfc3dSmrg class pass_ipa_increase_alignment : public simple_ipa_opt_pass
15081debfc3dSmrg {
15091debfc3dSmrg public:
pass_ipa_increase_alignment(gcc::context * ctxt)15101debfc3dSmrg   pass_ipa_increase_alignment (gcc::context *ctxt)
15111debfc3dSmrg     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
15121debfc3dSmrg   {}
15131debfc3dSmrg 
15141debfc3dSmrg   /* opt_pass methods: */
gate(function *)15151debfc3dSmrg   virtual bool gate (function *)
15161debfc3dSmrg     {
15171debfc3dSmrg       return flag_section_anchors && flag_tree_loop_vectorize;
15181debfc3dSmrg     }
15191debfc3dSmrg 
execute(function *)15201debfc3dSmrg   virtual unsigned int execute (function *) { return increase_alignment (); }
15211debfc3dSmrg 
15221debfc3dSmrg }; // class pass_ipa_increase_alignment
15231debfc3dSmrg 
15241debfc3dSmrg } // anon namespace
15251debfc3dSmrg 
15261debfc3dSmrg simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)15271debfc3dSmrg make_pass_ipa_increase_alignment (gcc::context *ctxt)
15281debfc3dSmrg {
15291debfc3dSmrg   return new pass_ipa_increase_alignment (ctxt);
15301debfc3dSmrg }
1531*8feb0f0bSmrg 
1532*8feb0f0bSmrg /* If the condition represented by T is a comparison or the SSA name
1533*8feb0f0bSmrg    result of a comparison, extract the comparison's operands.  Represent
1534*8feb0f0bSmrg    T as NE_EXPR <T, 0> otherwise.  */
1535*8feb0f0bSmrg 
1536*8feb0f0bSmrg void
get_cond_ops_from_tree(tree t)1537*8feb0f0bSmrg scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1538*8feb0f0bSmrg {
1539*8feb0f0bSmrg   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1540*8feb0f0bSmrg     {
1541*8feb0f0bSmrg       this->code = TREE_CODE (t);
1542*8feb0f0bSmrg       this->op0 = TREE_OPERAND (t, 0);
1543*8feb0f0bSmrg       this->op1 = TREE_OPERAND (t, 1);
1544*8feb0f0bSmrg       return;
1545*8feb0f0bSmrg     }
1546*8feb0f0bSmrg 
1547*8feb0f0bSmrg   if (TREE_CODE (t) == SSA_NAME)
1548*8feb0f0bSmrg     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
1549*8feb0f0bSmrg       {
1550*8feb0f0bSmrg 	tree_code code = gimple_assign_rhs_code (stmt);
1551*8feb0f0bSmrg 	if (TREE_CODE_CLASS (code) == tcc_comparison)
1552*8feb0f0bSmrg 	  {
1553*8feb0f0bSmrg 	    this->code = code;
1554*8feb0f0bSmrg 	    this->op0 = gimple_assign_rhs1 (stmt);
1555*8feb0f0bSmrg 	    this->op1 = gimple_assign_rhs2 (stmt);
1556*8feb0f0bSmrg 	    return;
1557*8feb0f0bSmrg 	  }
1558*8feb0f0bSmrg       }
1559*8feb0f0bSmrg 
1560*8feb0f0bSmrg   this->code = NE_EXPR;
1561*8feb0f0bSmrg   this->op0 = t;
1562*8feb0f0bSmrg   this->op1 = build_zero_cst (TREE_TYPE (t));
1563*8feb0f0bSmrg }
1564