xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vectorizer.c (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1 /* Vectorizer
2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Loop and basic block vectorizer.
22 
23   This file contains drivers for the three vectorizers:
24   (1) loop vectorizer (inter-iteration parallelism),
25   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26       vectorizer)
27   (3) BB vectorizer (out-of-loops), aka SLP
28 
29   The rest of the vectorizer's code is organized as follows:
30   - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31     used by drivers (1) and (2).
32   - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
33     drivers (1) and (2).
34   - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35     used by drivers (2) and (3).
36   - tree-vect-stmts.c - statements analysis and transformation (used by all).
37   - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38     manipulations (used by all).
39   - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
40 
41   Here's a poor attempt at illustrating that:
42 
43      tree-vectorizer.c:
44      loop_vect()  loop_aware_slp()  slp_vect()
45           |        /           \          /
46           |       /             \        /
47           tree-vect-loop.c  tree-vect-slp.c
48                 | \      \  /      /   |
49                 |  \      \/      /    |
50                 |   \     /\     /     |
51                 |    \   /  \   /      |
52          tree-vect-stmts.c  tree-vect-data-refs.c
53                        \      /
54                     tree-vect-patterns.c
55 */
56 
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79 
80 
81 /* Loop or bb location.  */
82 source_location vect_location;
83 
84 /* Vector mapping GIMPLE stmt to stmt_vec_info. */
85 vec<stmt_vec_info> stmt_vec_info_vec;
86 
87 /* For mapping simduid to vectorization factor.  */
88 
89 struct simduid_to_vf : free_ptr_hash<simduid_to_vf>
90 {
91   unsigned int simduid;
92   int vf;
93 
94   /* hash_table support.  */
95   static inline hashval_t hash (const simduid_to_vf *);
96   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
97 };
98 
99 inline hashval_t
100 simduid_to_vf::hash (const simduid_to_vf *p)
101 {
102   return p->simduid;
103 }
104 
105 inline int
106 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
107 {
108   return p1->simduid == p2->simduid;
109 }
110 
111 /* This hash maps the OMP simd array to the corresponding simduid used
112    to index into it.  Like thus,
113 
114         _7 = GOMP_SIMD_LANE (simduid.0)
115         ...
116         ...
117         D.1737[_7] = stuff;
118 
119 
120    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
121    simduid.0.  */
122 
123 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
124 {
125   tree decl;
126   unsigned int simduid;
127 
128   /* hash_table support.  */
129   static inline hashval_t hash (const simd_array_to_simduid *);
130   static inline int equal (const simd_array_to_simduid *,
131 			   const simd_array_to_simduid *);
132 };
133 
134 inline hashval_t
135 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
136 {
137   return DECL_UID (p->decl);
138 }
139 
140 inline int
141 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
142 			      const simd_array_to_simduid *p2)
143 {
144   return p1->decl == p2->decl;
145 }
146 
147 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
148    into their corresponding constants and remove
149    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
150 
151 static void
152 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
153 {
154   basic_block bb;
155 
156   FOR_EACH_BB_FN (bb, cfun)
157     {
158       gimple_stmt_iterator i;
159 
160       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
161 	{
162 	  unsigned int vf = 1;
163 	  enum internal_fn ifn;
164 	  gimple *stmt = gsi_stmt (i);
165 	  tree t;
166 	  if (!is_gimple_call (stmt)
167 	      || !gimple_call_internal_p (stmt))
168 	    {
169 	      gsi_next (&i);
170 	      continue;
171 	    }
172 	  ifn = gimple_call_internal_fn (stmt);
173 	  switch (ifn)
174 	    {
175 	    case IFN_GOMP_SIMD_LANE:
176 	    case IFN_GOMP_SIMD_VF:
177 	    case IFN_GOMP_SIMD_LAST_LANE:
178 	      break;
179 	    case IFN_GOMP_SIMD_ORDERED_START:
180 	    case IFN_GOMP_SIMD_ORDERED_END:
181 	      if (integer_onep (gimple_call_arg (stmt, 0)))
182 		{
183 		  enum built_in_function bcode
184 		    = (ifn == IFN_GOMP_SIMD_ORDERED_START
185 		       ? BUILT_IN_GOMP_ORDERED_START
186 		       : BUILT_IN_GOMP_ORDERED_END);
187 		  gimple *g
188 		    = gimple_build_call (builtin_decl_explicit (bcode), 0);
189 		  tree vdef = gimple_vdef (stmt);
190 		  gimple_set_vdef (g, vdef);
191 		  SSA_NAME_DEF_STMT (vdef) = g;
192 		  gimple_set_vuse (g, gimple_vuse (stmt));
193 		  gsi_replace (&i, g, true);
194 		  continue;
195 		}
196 	      gsi_remove (&i, true);
197 	      unlink_stmt_vdef (stmt);
198 	      continue;
199 	    default:
200 	      gsi_next (&i);
201 	      continue;
202 	    }
203 	  tree arg = gimple_call_arg (stmt, 0);
204 	  gcc_assert (arg != NULL_TREE);
205 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
206 	  simduid_to_vf *p = NULL, data;
207 	  data.simduid = DECL_UID (SSA_NAME_VAR (arg));
208 	  /* Need to nullify loop safelen field since it's value is not
209 	     valid after transformation.  */
210 	  if (bb->loop_father && bb->loop_father->safelen > 0)
211 	    bb->loop_father->safelen = 0;
212 	  if (htab)
213 	    {
214 	      p = htab->find (&data);
215 	      if (p)
216 		vf = p->vf;
217 	    }
218 	  switch (ifn)
219 	    {
220 	    case IFN_GOMP_SIMD_VF:
221 	      t = build_int_cst (unsigned_type_node, vf);
222 	      break;
223 	    case IFN_GOMP_SIMD_LANE:
224 	      t = build_int_cst (unsigned_type_node, 0);
225 	      break;
226 	    case IFN_GOMP_SIMD_LAST_LANE:
227 	      t = gimple_call_arg (stmt, 1);
228 	      break;
229 	    default:
230 	      gcc_unreachable ();
231 	    }
232 	  update_call_from_tree (&i, t);
233 	  gsi_next (&i);
234 	}
235     }
236 }
237 
238 /* Helper structure for note_simd_array_uses.  */
239 
240 struct note_simd_array_uses_struct
241 {
242   hash_table<simd_array_to_simduid> **htab;
243   unsigned int simduid;
244 };
245 
246 /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
247 
248 static tree
249 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
250 {
251   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
252   struct note_simd_array_uses_struct *ns
253     = (struct note_simd_array_uses_struct *) wi->info;
254 
255   if (TYPE_P (*tp))
256     *walk_subtrees = 0;
257   else if (VAR_P (*tp)
258 	   && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
259 	   && DECL_CONTEXT (*tp) == current_function_decl)
260     {
261       simd_array_to_simduid data;
262       if (!*ns->htab)
263 	*ns->htab = new hash_table<simd_array_to_simduid> (15);
264       data.decl = *tp;
265       data.simduid = ns->simduid;
266       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
267       if (*slot == NULL)
268 	{
269 	  simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
270 	  *p = data;
271 	  *slot = p;
272 	}
273       else if ((*slot)->simduid != ns->simduid)
274 	(*slot)->simduid = -1U;
275       *walk_subtrees = 0;
276     }
277   return NULL_TREE;
278 }
279 
280 /* Find "omp simd array" temporaries and map them to corresponding
281    simduid.  */
282 
283 static void
284 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
285 {
286   basic_block bb;
287   gimple_stmt_iterator gsi;
288   struct walk_stmt_info wi;
289   struct note_simd_array_uses_struct ns;
290 
291   memset (&wi, 0, sizeof (wi));
292   wi.info = &ns;
293   ns.htab = htab;
294 
295   FOR_EACH_BB_FN (bb, cfun)
296     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
297       {
298 	gimple *stmt = gsi_stmt (gsi);
299 	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
300 	  continue;
301 	switch (gimple_call_internal_fn (stmt))
302 	  {
303 	  case IFN_GOMP_SIMD_LANE:
304 	  case IFN_GOMP_SIMD_VF:
305 	  case IFN_GOMP_SIMD_LAST_LANE:
306 	    break;
307 	  default:
308 	    continue;
309 	  }
310 	tree lhs = gimple_call_lhs (stmt);
311 	if (lhs == NULL_TREE)
312 	  continue;
313 	imm_use_iterator use_iter;
314 	gimple *use_stmt;
315 	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
316 	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
317 	  if (!is_gimple_debug (use_stmt))
318 	    walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
319       }
320 }
321 
322 /* Shrink arrays with "omp simd array" attribute to the corresponding
323    vectorization factor.  */
324 
325 static void
326 shrink_simd_arrays
327   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
328    hash_table<simduid_to_vf> *simduid_to_vf_htab)
329 {
330   for (hash_table<simd_array_to_simduid>::iterator iter
331 	 = simd_array_to_simduid_htab->begin ();
332        iter != simd_array_to_simduid_htab->end (); ++iter)
333     if ((*iter)->simduid != -1U)
334       {
335 	tree decl = (*iter)->decl;
336 	int vf = 1;
337 	if (simduid_to_vf_htab)
338 	  {
339 	    simduid_to_vf *p = NULL, data;
340 	    data.simduid = (*iter)->simduid;
341 	    p = simduid_to_vf_htab->find (&data);
342 	    if (p)
343 	      vf = p->vf;
344 	  }
345 	tree atype
346 	  = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
347 	TREE_TYPE (decl) = atype;
348 	relayout_decl (decl);
349       }
350 
351   delete simd_array_to_simduid_htab;
352 }
353 
354 /* A helper function to free data refs.  */
355 
356 void
357 vect_destroy_datarefs (vec_info *vinfo)
358 {
359   struct data_reference *dr;
360   unsigned int i;
361 
362   FOR_EACH_VEC_ELT (vinfo->datarefs, i, dr)
363     if (dr->aux)
364       {
365         free (dr->aux);
366         dr->aux = NULL;
367       }
368 
369   free_data_refs (vinfo->datarefs);
370 }
371 
372 /* A helper function to free scev and LOOP niter information, as well as
373    clear loop constraint LOOP_C_FINITE.  */
374 
375 void
376 vect_free_loop_info_assumptions (struct loop *loop)
377 {
378   scev_reset_htab ();
379   /* We need to explicitly reset upper bound information since they are
380      used even after free_numbers_of_iterations_estimates_loop.  */
381   loop->any_upper_bound = false;
382   loop->any_likely_upper_bound = false;
383   free_numbers_of_iterations_estimates_loop (loop);
384   loop_constraint_clear (loop, LOOP_C_FINITE);
385 }
386 
387 /* Return whether STMT is inside the region we try to vectorize.  */
388 
389 bool
390 vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt)
391 {
392   if (!gimple_bb (stmt))
393     return false;
394 
395   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
396     {
397       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
398       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
399 	return false;
400     }
401   else
402     {
403       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
404       if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo)
405 	  || gimple_uid (stmt) == -1U
406 	  || gimple_code (stmt) == GIMPLE_PHI)
407 	return false;
408     }
409 
410   return true;
411 }
412 
413 
414 /* If LOOP has been versioned during ifcvt, return the internal call
415    guarding it.  */
416 
417 static gimple *
418 vect_loop_vectorized_call (struct loop *loop)
419 {
420   basic_block bb = loop_preheader_edge (loop)->src;
421   gimple *g;
422   do
423     {
424       g = last_stmt (bb);
425       if (g)
426 	break;
427       if (!single_pred_p (bb))
428 	break;
429       bb = single_pred (bb);
430     }
431   while (1);
432   if (g && gimple_code (g) == GIMPLE_COND)
433     {
434       gimple_stmt_iterator gsi = gsi_for_stmt (g);
435       gsi_prev (&gsi);
436       if (!gsi_end_p (gsi))
437 	{
438 	  g = gsi_stmt (gsi);
439 	  if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
440 	      && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
441 		  || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
442 	    return g;
443 	}
444     }
445   return NULL;
446 }
447 
448 /* Fold LOOP_VECTORIZED internal call G to VALUE and
449    update any immediate uses of it's LHS.  */
450 
451 static void
452 fold_loop_vectorized_call (gimple *g, tree value)
453 {
454   tree lhs = gimple_call_lhs (g);
455   use_operand_p use_p;
456   imm_use_iterator iter;
457   gimple *use_stmt;
458   gimple_stmt_iterator gsi = gsi_for_stmt (g);
459 
460   update_call_from_tree (&gsi, value);
461   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
462     {
463       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
464 	SET_USE (use_p, value);
465       update_stmt (use_stmt);
466     }
467 }
468 
469 /* Set the uids of all the statements in basic blocks inside loop
470    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
471    call guarding the loop which has been if converted.  */
472 static void
473 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
474 {
475   tree arg = gimple_call_arg (loop_vectorized_call, 1);
476   basic_block *bbs;
477   unsigned int i;
478   struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
479 
480   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
481   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
482 		       == loop_vectorized_call);
483   /* If we are going to vectorize outer loop, prevent vectorization
484      of the inner loop in the scalar loop - either the scalar loop is
485      thrown away, so it is a wasted work, or is used only for
486      a few iterations.  */
487   if (scalar_loop->inner)
488     {
489       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
490       if (g)
491 	{
492 	  arg = gimple_call_arg (g, 0);
493 	  get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
494 	  fold_loop_vectorized_call (g, boolean_false_node);
495 	}
496     }
497   bbs = get_loop_body (scalar_loop);
498   for (i = 0; i < scalar_loop->num_nodes; i++)
499     {
500       basic_block bb = bbs[i];
501       gimple_stmt_iterator gsi;
502       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
503 	{
504 	  gimple *phi = gsi_stmt (gsi);
505 	  gimple_set_uid (phi, 0);
506 	}
507       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
508 	{
509 	  gimple *stmt = gsi_stmt (gsi);
510 	  gimple_set_uid (stmt, 0);
511 	}
512     }
513   free (bbs);
514 }
515 
516 /* Function vectorize_loops.
517 
518    Entry point to loop vectorization phase.  */
519 
520 unsigned
521 vectorize_loops (void)
522 {
523   unsigned int i;
524   unsigned int num_vectorized_loops = 0;
525   unsigned int vect_loops_num;
526   struct loop *loop;
527   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
528   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
529   bool any_ifcvt_loops = false;
530   unsigned ret = 0;
531   struct loop *new_loop;
532 
533   vect_loops_num = number_of_loops (cfun);
534 
535   /* Bail out if there are no loops.  */
536   if (vect_loops_num <= 1)
537     return 0;
538 
539   if (cfun->has_simduid_loops)
540     note_simd_array_uses (&simd_array_to_simduid_htab);
541 
542   init_stmt_vec_info_vec ();
543 
544   /*  ----------- Analyze loops. -----------  */
545 
546   /* If some loop was duplicated, it gets bigger number
547      than all previously defined loops.  This fact allows us to run
548      only over initial loops skipping newly generated ones.  */
549   FOR_EACH_LOOP (loop, 0)
550     if (loop->dont_vectorize)
551       {
552 	any_ifcvt_loops = true;
553 	/* If-conversion sometimes versions both the outer loop
554 	   (for the case when outer loop vectorization might be
555 	   desirable) as well as the inner loop in the scalar version
556 	   of the loop.  So we have:
557 	    if (LOOP_VECTORIZED (1, 3))
558 	      {
559 		loop1
560 		  loop2
561 	      }
562 	    else
563 	      loop3 (copy of loop1)
564 		if (LOOP_VECTORIZED (4, 5))
565 		  loop4 (copy of loop2)
566 		else
567 		  loop5 (copy of loop4)
568 	   If FOR_EACH_LOOP gives us loop3 first (which has
569 	   dont_vectorize set), make sure to process loop1 before loop4;
570 	   so that we can prevent vectorization of loop4 if loop1
571 	   is successfully vectorized.  */
572 	if (loop->inner)
573 	  {
574 	    gimple *loop_vectorized_call
575 	      = vect_loop_vectorized_call (loop);
576 	    if (loop_vectorized_call
577 		&& vect_loop_vectorized_call (loop->inner))
578 	      {
579 		tree arg = gimple_call_arg (loop_vectorized_call, 0);
580 		struct loop *vector_loop
581 		  = get_loop (cfun, tree_to_shwi (arg));
582 		if (vector_loop && vector_loop != loop)
583 		  {
584 		    loop = vector_loop;
585 		    /* Make sure we don't vectorize it twice.  */
586 		    loop->dont_vectorize = true;
587 		    goto try_vectorize;
588 		  }
589 	      }
590 	  }
591       }
592     else
593       {
594 	loop_vec_info loop_vinfo, orig_loop_vinfo;
595 	gimple *loop_vectorized_call;
596        try_vectorize:
597 	if (!((flag_tree_loop_vectorize
598 	       && optimize_loop_nest_for_speed_p (loop))
599 	      || loop->force_vectorize))
600 	  continue;
601 	orig_loop_vinfo = NULL;
602 	loop_vectorized_call = vect_loop_vectorized_call (loop);
603        vectorize_epilogue:
604 	vect_location = find_loop_location (loop);
605         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
606 	    && dump_enabled_p ())
607 	  dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n",
608                        LOCATION_FILE (vect_location),
609 		       LOCATION_LINE (vect_location));
610 
611 	loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo);
612 	loop->aux = loop_vinfo;
613 
614 	if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
615 	  {
616 	    /* Free existing information if loop is analyzed with some
617 	       assumptions.  */
618 	    if (loop_constraint_set_p (loop, LOOP_C_FINITE))
619 	      vect_free_loop_info_assumptions (loop);
620 
621 	    /* If we applied if-conversion then try to vectorize the
622 	       BB of innermost loops.
623 	       ???  Ideally BB vectorization would learn to vectorize
624 	       control flow by applying if-conversion on-the-fly, the
625 	       following retains the if-converted loop body even when
626 	       only non-if-converted parts took part in BB vectorization.  */
627 	    if (flag_tree_slp_vectorize != 0
628 		&& loop_vectorized_call
629 		&& ! loop->inner)
630 	      {
631 		basic_block bb = loop->header;
632 		bool has_mask_load_store = false;
633 		for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
634 		     !gsi_end_p (gsi); gsi_next (&gsi))
635 		  {
636 		    gimple *stmt = gsi_stmt (gsi);
637 		    if (is_gimple_call (stmt)
638 			&& gimple_call_internal_p (stmt)
639 			&& (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
640 			    || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
641 		      {
642 			has_mask_load_store = true;
643 			break;
644 		      }
645 		    gimple_set_uid (stmt, -1);
646 		    gimple_set_visited (stmt, false);
647 		  }
648 		if (! has_mask_load_store && vect_slp_bb (bb))
649 		  {
650 		    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
651 				     "basic block vectorized\n");
652 		    fold_loop_vectorized_call (loop_vectorized_call,
653 					       boolean_true_node);
654 		    loop_vectorized_call = NULL;
655 		    ret |= TODO_cleanup_cfg;
656 		  }
657 	      }
658 	    /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
659 	       loop, don't vectorize its inner loop; we'll attempt to
660 	       vectorize LOOP_VECTORIZED guarded inner loop of the scalar
661 	       loop version.  */
662 	    if (loop_vectorized_call && loop->inner)
663 	      loop->inner->dont_vectorize = true;
664 	    continue;
665 	  }
666 
667         if (!dbg_cnt (vect_loop))
668 	  {
669 	    /* We may miss some if-converted loops due to
670 	       debug counter.  Set any_ifcvt_loops to visit
671 	       them at finalization.  */
672 	    any_ifcvt_loops = true;
673 	    /* Free existing information if loop is analyzed with some
674 	       assumptions.  */
675 	    if (loop_constraint_set_p (loop, LOOP_C_FINITE))
676 	      vect_free_loop_info_assumptions (loop);
677 
678 	    break;
679 	  }
680 
681 	if (loop_vectorized_call)
682 	  set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
683         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
684 	    && dump_enabled_p ())
685           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
686                            "loop vectorized\n");
687 	new_loop = vect_transform_loop (loop_vinfo);
688 	num_vectorized_loops++;
689 	/* Now that the loop has been vectorized, allow it to be unrolled
690 	   etc.  */
691 	loop->force_vectorize = false;
692 
693 	if (loop->simduid)
694 	  {
695 	    simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
696 	    if (!simduid_to_vf_htab)
697 	      simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
698 	    simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
699 	    simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
700 	    *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
701 	      = simduid_to_vf_data;
702 	  }
703 
704 	if (loop_vectorized_call)
705 	  {
706 	    fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node);
707 	    loop_vectorized_call = NULL;
708 	    ret |= TODO_cleanup_cfg;
709 	  }
710 
711 	if (new_loop)
712 	  {
713 	    /* Epilogue of vectorized loop must be vectorized too.  */
714 	    vect_loops_num = number_of_loops (cfun);
715 	    loop = new_loop;
716 	    orig_loop_vinfo = loop_vinfo;  /* To pass vect_analyze_loop.  */
717 	    goto vectorize_epilogue;
718 	  }
719       }
720 
721   vect_location = UNKNOWN_LOCATION;
722 
723   statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
724   if (dump_enabled_p ()
725       || (num_vectorized_loops > 0 && dump_enabled_p ()))
726     dump_printf_loc (MSG_NOTE, vect_location,
727                      "vectorized %u loops in function.\n",
728                      num_vectorized_loops);
729 
730   /*  ----------- Finalize. -----------  */
731 
732   if (any_ifcvt_loops)
733     for (i = 1; i < vect_loops_num; i++)
734       {
735 	loop = get_loop (cfun, i);
736 	if (loop && loop->dont_vectorize)
737 	  {
738 	    gimple *g = vect_loop_vectorized_call (loop);
739 	    if (g)
740 	      {
741 		fold_loop_vectorized_call (g, boolean_false_node);
742 		ret |= TODO_cleanup_cfg;
743 	      }
744 	  }
745       }
746 
747   for (i = 1; i < vect_loops_num; i++)
748     {
749       loop_vec_info loop_vinfo;
750       bool has_mask_store;
751 
752       loop = get_loop (cfun, i);
753       if (!loop)
754 	continue;
755       loop_vinfo = (loop_vec_info) loop->aux;
756       has_mask_store = false;
757       if (loop_vinfo)
758 	has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
759       destroy_loop_vec_info (loop_vinfo, true);
760       if (has_mask_store)
761 	optimize_mask_stores (loop);
762       loop->aux = NULL;
763     }
764 
765   free_stmt_vec_info_vec ();
766 
767   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
768   if (cfun->has_simduid_loops)
769     adjust_simduid_builtins (simduid_to_vf_htab);
770 
771   /* Shrink any "omp array simd" temporary arrays to the
772      actual vectorization factors.  */
773   if (simd_array_to_simduid_htab)
774     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
775   delete simduid_to_vf_htab;
776   cfun->has_simduid_loops = false;
777 
778   if (num_vectorized_loops > 0)
779     {
780       /* If we vectorized any loop only virtual SSA form needs to be updated.
781 	 ???  Also while we try hard to update loop-closed SSA form we fail
782 	 to properly do this in some corner-cases (see PR56286).  */
783       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
784       return TODO_cleanup_cfg;
785     }
786 
787   return ret;
788 }
789 
790 
791 /* Entry point to the simduid cleanup pass.  */
792 
793 namespace {
794 
795 const pass_data pass_data_simduid_cleanup =
796 {
797   GIMPLE_PASS, /* type */
798   "simduid", /* name */
799   OPTGROUP_NONE, /* optinfo_flags */
800   TV_NONE, /* tv_id */
801   ( PROP_ssa | PROP_cfg ), /* properties_required */
802   0, /* properties_provided */
803   0, /* properties_destroyed */
804   0, /* todo_flags_start */
805   0, /* todo_flags_finish */
806 };
807 
808 class pass_simduid_cleanup : public gimple_opt_pass
809 {
810 public:
811   pass_simduid_cleanup (gcc::context *ctxt)
812     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
813   {}
814 
815   /* opt_pass methods: */
816   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
817   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
818   virtual unsigned int execute (function *);
819 
820 }; // class pass_simduid_cleanup
821 
822 unsigned int
823 pass_simduid_cleanup::execute (function *fun)
824 {
825   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
826 
827   note_simd_array_uses (&simd_array_to_simduid_htab);
828 
829   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
830   adjust_simduid_builtins (NULL);
831 
832   /* Shrink any "omp array simd" temporary arrays to the
833      actual vectorization factors.  */
834   if (simd_array_to_simduid_htab)
835     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
836   fun->has_simduid_loops = false;
837   return 0;
838 }
839 
840 }  // anon namespace
841 
842 gimple_opt_pass *
843 make_pass_simduid_cleanup (gcc::context *ctxt)
844 {
845   return new pass_simduid_cleanup (ctxt);
846 }
847 
848 
849 /*  Entry point to basic block SLP phase.  */
850 
851 namespace {
852 
853 const pass_data pass_data_slp_vectorize =
854 {
855   GIMPLE_PASS, /* type */
856   "slp", /* name */
857   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
858   TV_TREE_SLP_VECTORIZATION, /* tv_id */
859   ( PROP_ssa | PROP_cfg ), /* properties_required */
860   0, /* properties_provided */
861   0, /* properties_destroyed */
862   0, /* todo_flags_start */
863   TODO_update_ssa, /* todo_flags_finish */
864 };
865 
866 class pass_slp_vectorize : public gimple_opt_pass
867 {
868 public:
869   pass_slp_vectorize (gcc::context *ctxt)
870     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
871   {}
872 
873   /* opt_pass methods: */
874   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
875   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
876   virtual unsigned int execute (function *);
877 
878 }; // class pass_slp_vectorize
879 
880 unsigned int
881 pass_slp_vectorize::execute (function *fun)
882 {
883   basic_block bb;
884 
885   bool in_loop_pipeline = scev_initialized_p ();
886   if (!in_loop_pipeline)
887     {
888       loop_optimizer_init (LOOPS_NORMAL);
889       scev_initialize ();
890     }
891 
892   /* Mark all stmts as not belonging to the current region and unvisited.  */
893   FOR_EACH_BB_FN (bb, fun)
894     {
895       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
896 	   gsi_next (&gsi))
897 	{
898 	  gimple *stmt = gsi_stmt (gsi);
899 	  gimple_set_uid (stmt, -1);
900 	  gimple_set_visited (stmt, false);
901 	}
902     }
903 
904   init_stmt_vec_info_vec ();
905 
906   FOR_EACH_BB_FN (bb, fun)
907     {
908       if (vect_slp_bb (bb))
909 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
910 			 "basic block vectorized\n");
911     }
912 
913   free_stmt_vec_info_vec ();
914 
915   if (!in_loop_pipeline)
916     {
917       scev_finalize ();
918       loop_optimizer_finalize ();
919     }
920 
921   return 0;
922 }
923 
924 } // anon namespace
925 
926 gimple_opt_pass *
927 make_pass_slp_vectorize (gcc::context *ctxt)
928 {
929   return new pass_slp_vectorize (ctxt);
930 }
931 
932 
933 /* Increase alignment of global arrays to improve vectorization potential.
934    TODO:
935    - Consider also structs that have an array field.
936    - Use ipa analysis to prune arrays that can't be vectorized?
937      This should involve global alignment analysis and in the future also
938      array padding.  */
939 
940 static unsigned get_vec_alignment_for_type (tree);
941 static hash_map<tree, unsigned> *type_align_map;
942 
943 /* Return alignment of array's vector type corresponding to scalar type.
944    0 if no vector type exists.  */
945 static unsigned
946 get_vec_alignment_for_array_type (tree type)
947 {
948   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
949 
950   tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
951   if (!vectype
952       || !TYPE_SIZE (type)
953       || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
954       || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype)))
955     return 0;
956 
957   return TYPE_ALIGN (vectype);
958 }
959 
960 /* Return alignment of field having maximum alignment of vector type
961    corresponding to it's scalar type. For now, we only consider fields whose
962    offset is a multiple of it's vector alignment.
963    0 if no suitable field is found.  */
964 static unsigned
965 get_vec_alignment_for_record_type (tree type)
966 {
967   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
968 
969   unsigned max_align = 0, alignment;
970   HOST_WIDE_INT offset;
971   tree offset_tree;
972 
973   if (TYPE_PACKED (type))
974     return 0;
975 
976   unsigned *slot = type_align_map->get (type);
977   if (slot)
978     return *slot;
979 
980   for (tree field = first_field (type);
981        field != NULL_TREE;
982        field = DECL_CHAIN (field))
983     {
984       /* Skip if not FIELD_DECL or if alignment is set by user.  */
985       if (TREE_CODE (field) != FIELD_DECL
986 	  || DECL_USER_ALIGN (field)
987 	  || DECL_ARTIFICIAL (field))
988 	continue;
989 
990       /* We don't need to process the type further if offset is variable,
991 	 since the offsets of remaining members will also be variable.  */
992       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
993 	  || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
994 	break;
995 
996       /* Similarly stop processing the type if offset_tree
997 	 does not fit in unsigned HOST_WIDE_INT.  */
998       offset_tree = bit_position (field);
999       if (!tree_fits_uhwi_p (offset_tree))
1000 	break;
1001 
1002       offset = tree_to_uhwi (offset_tree);
1003       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1004 
1005       /* Get maximum alignment of vectorized field/array among those members
1006 	 whose offset is multiple of the vector alignment.  */
1007       if (alignment
1008 	  && (offset % alignment == 0)
1009 	  && (alignment > max_align))
1010 	max_align = alignment;
1011     }
1012 
1013   type_align_map->put (type, max_align);
1014   return max_align;
1015 }
1016 
1017 /* Return alignment of vector type corresponding to decl's scalar type
1018    or 0 if it doesn't exist or the vector alignment is lesser than
1019    decl's alignment.  */
1020 static unsigned
1021 get_vec_alignment_for_type (tree type)
1022 {
1023   if (type == NULL_TREE)
1024     return 0;
1025 
1026   gcc_assert (TYPE_P (type));
1027 
1028   static unsigned alignment = 0;
1029   switch (TREE_CODE (type))
1030     {
1031       case ARRAY_TYPE:
1032 	alignment = get_vec_alignment_for_array_type (type);
1033 	break;
1034       case RECORD_TYPE:
1035 	alignment = get_vec_alignment_for_record_type (type);
1036 	break;
1037       default:
1038 	alignment = 0;
1039 	break;
1040     }
1041 
1042   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1043 }
1044 
1045 /* Entry point to increase_alignment pass.  */
1046 static unsigned int
1047 increase_alignment (void)
1048 {
1049   varpool_node *vnode;
1050 
1051   vect_location = UNKNOWN_LOCATION;
1052   type_align_map = new hash_map<tree, unsigned>;
1053 
1054   /* Increase the alignment of all global arrays for vectorization.  */
1055   FOR_EACH_DEFINED_VARIABLE (vnode)
1056     {
1057       tree decl = vnode->decl;
1058       unsigned int alignment;
1059 
1060       if ((decl_in_symtab_p (decl)
1061 	  && !symtab_node::get (decl)->can_increase_alignment_p ())
1062 	  || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1063 	continue;
1064 
1065       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1066       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1067         {
1068 	  vnode->increase_alignment (alignment);
1069           dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
1070           dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
1071           dump_printf (MSG_NOTE, "\n");
1072         }
1073     }
1074 
1075   delete type_align_map;
1076   return 0;
1077 }
1078 
1079 
1080 namespace {
1081 
1082 const pass_data pass_data_ipa_increase_alignment =
1083 {
1084   SIMPLE_IPA_PASS, /* type */
1085   "increase_alignment", /* name */
1086   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1087   TV_IPA_OPT, /* tv_id */
1088   0, /* properties_required */
1089   0, /* properties_provided */
1090   0, /* properties_destroyed */
1091   0, /* todo_flags_start */
1092   0, /* todo_flags_finish */
1093 };
1094 
1095 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1096 {
1097 public:
1098   pass_ipa_increase_alignment (gcc::context *ctxt)
1099     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1100   {}
1101 
1102   /* opt_pass methods: */
1103   virtual bool gate (function *)
1104     {
1105       return flag_section_anchors && flag_tree_loop_vectorize;
1106     }
1107 
1108   virtual unsigned int execute (function *) { return increase_alignment (); }
1109 
1110 }; // class pass_ipa_increase_alignment
1111 
1112 } // anon namespace
1113 
1114 simple_ipa_opt_pass *
1115 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1116 {
1117   return new pass_ipa_increase_alignment (ctxt);
1118 }
1119