xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-slp.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2020 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h"		/* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 
48 
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50    FINAL_P is true if we have vectorized the instance or if we have
51    made a final decision not to vectorize the statements in any way.  */
52 
53 static void
vect_free_slp_tree(slp_tree node,bool final_p)54 vect_free_slp_tree (slp_tree node, bool final_p)
55 {
56   int i;
57   slp_tree child;
58 
59   if (--node->refcnt != 0)
60     return;
61 
62   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
63     vect_free_slp_tree (child, final_p);
64 
65   /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66      Some statements might no longer exist, after having been
67      removed by vect_transform_stmt.  Updating the remaining
68      statements would be redundant.  */
69   if (!final_p)
70     {
71       stmt_vec_info stmt_info;
72       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
73 	{
74 	  gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 	  STMT_VINFO_NUM_SLP_USES (stmt_info)--;
76 	}
77     }
78 
79   SLP_TREE_CHILDREN (node).release ();
80   SLP_TREE_SCALAR_STMTS (node).release ();
81   SLP_TREE_SCALAR_OPS (node).release ();
82   SLP_TREE_VEC_STMTS (node).release ();
83   SLP_TREE_LOAD_PERMUTATION (node).release ();
84 
85   free (node);
86 }
87 
88 /* Free the memory allocated for the SLP instance.  FINAL_P is true if we
89    have vectorized the instance or if we have made a final decision not
90    to vectorize the statements in any way.  */
91 
92 void
vect_free_slp_instance(slp_instance instance,bool final_p)93 vect_free_slp_instance (slp_instance instance, bool final_p)
94 {
95   vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96   SLP_INSTANCE_LOADS (instance).release ();
97   free (instance);
98 }
99 
100 
101 /* Create an SLP node for SCALAR_STMTS.  */
102 
103 static slp_tree
vect_create_new_slp_node(vec<stmt_vec_info> scalar_stmts)104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
105 {
106   slp_tree node;
107   stmt_vec_info stmt_info = scalar_stmts[0];
108   unsigned int nops;
109 
110   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111     nops = gimple_call_num_args (stmt);
112   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
113     {
114       nops = gimple_num_ops (stmt) - 1;
115       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 	nops++;
117     }
118   else if (is_a <gphi *> (stmt_info->stmt))
119     nops = 0;
120   else
121     return NULL;
122 
123   node = XNEW (struct _slp_tree);
124   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125   SLP_TREE_SCALAR_OPS (node) = vNULL;
126   SLP_TREE_VEC_STMTS (node).create (0);
127   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
128   SLP_TREE_CHILDREN (node).create (nops);
129   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
130   SLP_TREE_TWO_OPERATORS (node) = false;
131   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
132   node->refcnt = 1;
133   node->max_nunits = 1;
134 
135   unsigned i;
136   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137     STMT_VINFO_NUM_SLP_USES (stmt_info)++;
138 
139   return node;
140 }
141 
142 /* Create an SLP node for OPS.  */
143 
144 static slp_tree
vect_create_new_slp_node(vec<tree> ops)145 vect_create_new_slp_node (vec<tree> ops)
146 {
147   slp_tree node;
148 
149   node = XNEW (struct _slp_tree);
150   SLP_TREE_SCALAR_STMTS (node) = vNULL;
151   SLP_TREE_SCALAR_OPS (node) = ops;
152   SLP_TREE_VEC_STMTS (node).create (0);
153   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154   SLP_TREE_CHILDREN (node) = vNULL;
155   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156   SLP_TREE_TWO_OPERATORS (node) = false;
157   SLP_TREE_DEF_TYPE (node) = vect_external_def;
158   node->refcnt = 1;
159   node->max_nunits = 1;
160 
161   return node;
162 }
163 
164 
165 /* This structure is used in creation of an SLP tree.  Each instance
166    corresponds to the same operand in a group of scalar stmts in an SLP
167    node.  */
168 typedef struct _slp_oprnd_info
169 {
170   /* Def-stmts for the operands.  */
171   vec<stmt_vec_info> def_stmts;
172   /* Operands.  */
173   vec<tree> ops;
174   /* Information about the first statement, its vector def-type, type, the
175      operand itself in case it's constant, and an indication if it's a pattern
176      stmt.  */
177   tree first_op_type;
178   enum vect_def_type first_dt;
179   bool any_pattern;
180 } *slp_oprnd_info;
181 
182 
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184    operand.  */
185 static vec<slp_oprnd_info>
vect_create_oprnd_info(int nops,int group_size)186 vect_create_oprnd_info (int nops, int group_size)
187 {
188   int i;
189   slp_oprnd_info oprnd_info;
190   vec<slp_oprnd_info> oprnds_info;
191 
192   oprnds_info.create (nops);
193   for (i = 0; i < nops; i++)
194     {
195       oprnd_info = XNEW (struct _slp_oprnd_info);
196       oprnd_info->def_stmts.create (group_size);
197       oprnd_info->ops.create (group_size);
198       oprnd_info->first_dt = vect_uninitialized_def;
199       oprnd_info->first_op_type = NULL_TREE;
200       oprnd_info->any_pattern = false;
201       oprnds_info.quick_push (oprnd_info);
202     }
203 
204   return oprnds_info;
205 }
206 
207 
208 /* Free operands info.  */
209 
210 static void
vect_free_oprnd_info(vec<slp_oprnd_info> & oprnds_info)211 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
212 {
213   int i;
214   slp_oprnd_info oprnd_info;
215 
216   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
217     {
218       oprnd_info->def_stmts.release ();
219       oprnd_info->ops.release ();
220       XDELETE (oprnd_info);
221     }
222 
223   oprnds_info.release ();
224 }
225 
226 
227 /* Return true if STMTS contains a pattern statement.  */
228 
229 static bool
vect_contains_pattern_stmt_p(vec<stmt_vec_info> stmts)230 vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
231 {
232   stmt_vec_info stmt_info;
233   unsigned int i;
234   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
235     if (is_pattern_stmt_p (stmt_info))
236       return true;
237   return false;
238 }
239 
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241    that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
242    of the chain.  */
243 
244 int
vect_get_place_in_interleaving_chain(stmt_vec_info stmt_info,stmt_vec_info first_stmt_info)245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
246 				      stmt_vec_info first_stmt_info)
247 {
248   stmt_vec_info next_stmt_info = first_stmt_info;
249   int result = 0;
250 
251   if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
252     return -1;
253 
254   do
255     {
256       if (next_stmt_info == stmt_info)
257 	return result;
258       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
259       if (next_stmt_info)
260 	result += DR_GROUP_GAP (next_stmt_info);
261     }
262   while (next_stmt_info);
263 
264   return -1;
265 }
266 
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268    using the method implemented by duplicate_and_interleave.  Return true
269    if so, returning the number of intermediate vectors in *NVECTORS_OUT
270    (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
271    (if nonnull).  */
272 
273 bool
can_duplicate_and_interleave_p(vec_info * vinfo,unsigned int count,tree elt_type,unsigned int * nvectors_out,tree * vector_type_out,tree * permutes)274 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
275 				tree elt_type, unsigned int *nvectors_out,
276 				tree *vector_type_out,
277 				tree *permutes)
278 {
279   tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
280   if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
281     return false;
282 
283   machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
284   poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
285   unsigned int nvectors = 1;
286   for (;;)
287     {
288       scalar_int_mode int_mode;
289       poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
290       if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
291 	{
292 	  /* Get the natural vector type for this SLP group size.  */
293 	  tree int_type = build_nonstandard_integer_type
294 	    (GET_MODE_BITSIZE (int_mode), 1);
295 	  tree vector_type
296 	    = get_vectype_for_scalar_type (vinfo, int_type, count);
297 	  if (vector_type
298 	      && VECTOR_MODE_P (TYPE_MODE (vector_type))
299 	      && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
300 			   GET_MODE_SIZE (base_vector_mode)))
301 	    {
302 	      /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 		 together into elements of type INT_TYPE and using the result
304 		 to build NVECTORS vectors.  */
305 	      poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
306 	      vec_perm_builder sel1 (nelts, 2, 3);
307 	      vec_perm_builder sel2 (nelts, 2, 3);
308 	      poly_int64 half_nelts = exact_div (nelts, 2);
309 	      for (unsigned int i = 0; i < 3; ++i)
310 		{
311 		  sel1.quick_push (i);
312 		  sel1.quick_push (i + nelts);
313 		  sel2.quick_push (half_nelts + i);
314 		  sel2.quick_push (half_nelts + i + nelts);
315 		}
316 	      vec_perm_indices indices1 (sel1, 2, nelts);
317 	      vec_perm_indices indices2 (sel2, 2, nelts);
318 	      if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
319 		  && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
320 		{
321 		  if (nvectors_out)
322 		    *nvectors_out = nvectors;
323 		  if (vector_type_out)
324 		    *vector_type_out = vector_type;
325 		  if (permutes)
326 		    {
327 		      permutes[0] = vect_gen_perm_mask_checked (vector_type,
328 								indices1);
329 		      permutes[1] = vect_gen_perm_mask_checked (vector_type,
330 								indices2);
331 		    }
332 		  return true;
333 		}
334 	    }
335 	}
336       if (!multiple_p (elt_bytes, 2, &elt_bytes))
337 	return false;
338       nvectors *= 2;
339     }
340 }
341 
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343    they are of a valid type and that they match the defs of the first stmt of
344    the SLP group (stored in OPRNDS_INFO).  This function tries to match stmts
345    by swapping operands of STMTS[STMT_NUM] when possible.  Non-zero *SWAP
346    indicates swap is required for cond_expr stmts.  Specifically, *SWAP
347    is 1 if STMT is cond and operands of comparison need to be swapped;
348    *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349    If there is any operand swap in this function, *SWAP is set to non-zero
350    value.
351    If there was a fatal error return -1; if the error could be corrected by
352    swapping operands of father node of this one, return 1; if everything is
353    ok return 0.  */
354 static int
vect_get_and_check_slp_defs(vec_info * vinfo,unsigned char * swap,vec<stmt_vec_info> stmts,unsigned stmt_num,vec<slp_oprnd_info> * oprnds_info)355 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
356 			     vec<stmt_vec_info> stmts, unsigned stmt_num,
357 			     vec<slp_oprnd_info> *oprnds_info)
358 {
359   stmt_vec_info stmt_info = stmts[stmt_num];
360   tree oprnd;
361   unsigned int i, number_of_oprnds;
362   enum vect_def_type dt = vect_uninitialized_def;
363   slp_oprnd_info oprnd_info;
364   int first_op_idx = 1;
365   unsigned int commutative_op = -1U;
366   bool first_op_cond = false;
367   bool first = stmt_num == 0;
368 
369   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
370     {
371       number_of_oprnds = gimple_call_num_args (stmt);
372       first_op_idx = 3;
373       if (gimple_call_internal_p (stmt))
374 	{
375 	  internal_fn ifn = gimple_call_internal_fn (stmt);
376 	  commutative_op = first_commutative_argument (ifn);
377 
378 	  /* Masked load, only look at mask.  */
379 	  if (ifn == IFN_MASK_LOAD)
380 	    {
381 	      number_of_oprnds = 1;
382 	      /* Mask operand index.  */
383 	      first_op_idx = 5;
384 	    }
385 	}
386     }
387   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
388     {
389       enum tree_code code = gimple_assign_rhs_code (stmt);
390       number_of_oprnds = gimple_num_ops (stmt) - 1;
391       /* Swap can only be done for cond_expr if asked to, otherwise we
392 	 could result in different comparison code to the first stmt.  */
393       if (code == COND_EXPR
394 	  && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
395 	{
396 	  first_op_cond = true;
397 	  number_of_oprnds++;
398 	}
399       else
400 	commutative_op = commutative_tree_code (code) ? 0U : -1U;
401     }
402   else
403     return -1;
404 
405   bool swapped = (*swap != 0);
406   gcc_assert (!swapped || first_op_cond);
407   for (i = 0; i < number_of_oprnds; i++)
408     {
409 again:
410       if (first_op_cond)
411 	{
412 	  /* Map indicating how operands of cond_expr should be swapped.  */
413 	  int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 	  int *map = maps[*swap];
415 
416 	  if (i < 2)
417 	    oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
418 					     first_op_idx), map[i]);
419 	  else
420 	    oprnd = gimple_op (stmt_info->stmt, map[i]);
421 	}
422       else
423 	oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
424       if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
425 	oprnd = TREE_OPERAND (oprnd, 0);
426 
427       oprnd_info = (*oprnds_info)[i];
428 
429       stmt_vec_info def_stmt_info;
430       if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
431 	{
432 	  if (dump_enabled_p ())
433 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 			     "Build SLP failed: can't analyze def for %T\n",
435 			     oprnd);
436 
437 	  return -1;
438 	}
439 
440       if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
441 	oprnd_info->any_pattern = true;
442 
443       if (first)
444 	{
445 	  /* For the swapping logic below force vect_reduction_def
446 	     for the reduction op in a SLP reduction group.  */
447 	  if (!STMT_VINFO_DATA_REF (stmt_info)
448 	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
449 	      && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
450 	      && def_stmt_info)
451 	    dt = vect_reduction_def;
452 	  oprnd_info->first_dt = dt;
453 	  oprnd_info->first_op_type = TREE_TYPE (oprnd);
454 	}
455       else
456 	{
457 	  /* Not first stmt of the group, check that the def-stmt/s match
458 	     the def-stmt/s of the first stmt.  Allow different definition
459 	     types for reduction chains: the first stmt must be a
460 	     vect_reduction_def (a phi node), and the rest
461 	     end in the reduction chain.  */
462 	  tree type = TREE_TYPE (oprnd);
463 	  if ((oprnd_info->first_dt != dt
464 	       && !(oprnd_info->first_dt == vect_reduction_def
465 		    && !STMT_VINFO_DATA_REF (stmt_info)
466 		    && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
467 		    && def_stmt_info
468 		    && !STMT_VINFO_DATA_REF (def_stmt_info)
469 		    && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
470 			== REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
471 	       && !((oprnd_info->first_dt == vect_external_def
472 		     || oprnd_info->first_dt == vect_constant_def)
473 		    && (dt == vect_external_def
474 			|| dt == vect_constant_def)))
475 	      || !types_compatible_p (oprnd_info->first_op_type, type)
476 	      || (!STMT_VINFO_DATA_REF (stmt_info)
477 		  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
478 		  && ((!def_stmt_info
479 		       || STMT_VINFO_DATA_REF (def_stmt_info)
480 		       || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
481 			   != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
482 		      != (oprnd_info->first_dt != vect_reduction_def))))
483 	    {
484 	      /* Try swapping operands if we got a mismatch.  */
485 	      if (i == commutative_op && !swapped)
486 		{
487 		  if (dump_enabled_p ())
488 		    dump_printf_loc (MSG_NOTE, vect_location,
489 				     "trying swapped operands\n");
490 		  swapped = true;
491 		  goto again;
492 		}
493 
494 	      if (dump_enabled_p ())
495 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 				 "Build SLP failed: different types\n");
497 
498 	      return 1;
499 	    }
500 	  if ((dt == vect_constant_def
501 	       || dt == vect_external_def)
502 	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
503 	      && (TREE_CODE (type) == BOOLEAN_TYPE
504 		  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
505 						      type)))
506 	    {
507 	      if (dump_enabled_p ())
508 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 				 "Build SLP failed: invalid type of def "
510 				 "for variable-length SLP %T\n", oprnd);
511 	      return -1;
512 	    }
513 	}
514 
515       /* Check the types of the definitions.  */
516       switch (dt)
517 	{
518 	case vect_external_def:
519 	  /* Make sure to demote the overall operand to external.  */
520 	  oprnd_info->first_dt = vect_external_def;
521 	  /* Fallthru.  */
522 	case vect_constant_def:
523 	  oprnd_info->def_stmts.quick_push (NULL);
524 	  oprnd_info->ops.quick_push (oprnd);
525 	  break;
526 
527 	case vect_internal_def:
528 	case vect_reduction_def:
529 	  if (oprnd_info->first_dt == vect_reduction_def
530 	      && !STMT_VINFO_DATA_REF (stmt_info)
531 	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
532 	      && !STMT_VINFO_DATA_REF (def_stmt_info)
533 	      && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
534 		  == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
535 	    {
536 	      /* For a SLP reduction chain we want to duplicate the
537 	         reduction to each of the chain members.  That gets
538 		 us a sane SLP graph (still the stmts are not 100%
539 		 correct wrt the initial values).  */
540 	      gcc_assert (!first);
541 	      oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
542 	      oprnd_info->ops.quick_push (oprnd_info->ops[0]);
543 	      break;
544 	    }
545 	  /* Fallthru.  */
546 	case vect_induction_def:
547 	  oprnd_info->def_stmts.quick_push (def_stmt_info);
548 	  oprnd_info->ops.quick_push (oprnd);
549 	  break;
550 
551 	default:
552 	  /* FORNOW: Not supported.  */
553 	  if (dump_enabled_p ())
554 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 			     "Build SLP failed: illegal type of def %T\n",
556 			     oprnd);
557 
558 	  return -1;
559 	}
560     }
561 
562   /* Swap operands.  */
563   if (swapped)
564     {
565       if (dump_enabled_p ())
566 	dump_printf_loc (MSG_NOTE, vect_location,
567 			 "swapped operands to match def types in %G",
568 			 stmt_info->stmt);
569     }
570 
571   *swap = swapped;
572   return 0;
573 }
574 
575 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
576    Return true if we can, meaning that this choice doesn't conflict with
577    existing SLP nodes that use STMT_INFO.  */
578 
579 static bool
vect_update_shared_vectype(stmt_vec_info stmt_info,tree vectype)580 vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
581 {
582   tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
583   if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
584     return true;
585 
586   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
587       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
588     {
589       /* We maintain the invariant that if any statement in the group is
590 	 used, all other members of the group have the same vector type.  */
591       stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
592       stmt_vec_info member_info = first_info;
593       for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
594 	if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
595 	    || is_pattern_stmt_p (member_info))
596 	  break;
597 
598       if (!member_info)
599 	{
600 	  for (member_info = first_info; member_info;
601 	       member_info = DR_GROUP_NEXT_ELEMENT (member_info))
602 	    STMT_VINFO_VECTYPE (member_info) = vectype;
603 	  return true;
604 	}
605     }
606   else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
607 	   && !is_pattern_stmt_p (stmt_info))
608     {
609       STMT_VINFO_VECTYPE (stmt_info) = vectype;
610       return true;
611     }
612 
613   if (dump_enabled_p ())
614     {
615       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 		       "Build SLP failed: incompatible vector"
617 		       " types for: %G", stmt_info->stmt);
618       dump_printf_loc (MSG_NOTE, vect_location,
619 		       "    old vector type: %T\n", old_vectype);
620       dump_printf_loc (MSG_NOTE, vect_location,
621 		       "    new vector type: %T\n", vectype);
622     }
623   return false;
624 }
625 
626 /* Try to infer and assign a vector type to all the statements in STMTS.
627    Used only for BB vectorization.  */
628 
629 static bool
vect_update_all_shared_vectypes(vec<stmt_vec_info> stmts)630 vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
631 {
632   tree vectype, nunits_vectype;
633   if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
634 				       &nunits_vectype, stmts.length ()))
635     return false;
636 
637   stmt_vec_info stmt_info;
638   unsigned int i;
639   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
640     if (!vect_update_shared_vectype (stmt_info, vectype))
641       return false;
642 
643   return true;
644 }
645 
646 /* Return true if call statements CALL1 and CALL2 are similar enough
647    to be combined into the same SLP group.  */
648 
649 static bool
compatible_calls_p(gcall * call1,gcall * call2)650 compatible_calls_p (gcall *call1, gcall *call2)
651 {
652   unsigned int nargs = gimple_call_num_args (call1);
653   if (nargs != gimple_call_num_args (call2))
654     return false;
655 
656   if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
657     return false;
658 
659   if (gimple_call_internal_p (call1))
660     {
661       if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
662 			       TREE_TYPE (gimple_call_lhs (call2))))
663 	return false;
664       for (unsigned int i = 0; i < nargs; ++i)
665 	if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
666 				 TREE_TYPE (gimple_call_arg (call2, i))))
667 	  return false;
668     }
669   else
670     {
671       if (!operand_equal_p (gimple_call_fn (call1),
672 			    gimple_call_fn (call2), 0))
673 	return false;
674 
675       if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
676 	return false;
677     }
678   return true;
679 }
680 
681 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
682    caller's attempt to find the vector type in STMT_INFO with the narrowest
683    element type.  Return true if VECTYPE is nonnull and if it is valid
684    for STMT_INFO.  When returning true, update MAX_NUNITS to reflect the
685    number of units in VECTYPE.  GROUP_SIZE and MAX_NUNITS are as for
686    vect_build_slp_tree.  */
687 
688 static bool
vect_record_max_nunits(stmt_vec_info stmt_info,unsigned int group_size,tree vectype,poly_uint64 * max_nunits)689 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
690 			tree vectype, poly_uint64 *max_nunits)
691 {
692   if (!vectype)
693     {
694       if (dump_enabled_p ())
695 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 			 "Build SLP failed: unsupported data-type in %G\n",
697 			 stmt_info->stmt);
698       /* Fatal mismatch.  */
699       return false;
700     }
701 
702   /* If populating the vector type requires unrolling then fail
703      before adjusting *max_nunits for basic-block vectorization.  */
704   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
705   unsigned HOST_WIDE_INT const_nunits;
706   if (STMT_VINFO_BB_VINFO (stmt_info)
707       && (!nunits.is_constant (&const_nunits)
708 	  || const_nunits > group_size))
709     {
710       if (dump_enabled_p ())
711 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 			 "Build SLP failed: unrolling required "
713 			 "in basic block SLP\n");
714       /* Fatal mismatch.  */
715       return false;
716     }
717 
718   /* In case of multiple types we need to detect the smallest type.  */
719   vect_update_max_nunits (max_nunits, vectype);
720   return true;
721 }
722 
723 /* STMTS is a group of GROUP_SIZE SLP statements in which some
724    statements do the same operation as the first statement and in which
725    the others do ALT_STMT_CODE.  Return true if we can take one vector
726    of the first operation and one vector of the second and permute them
727    to get the required result.  VECTYPE is the type of the vector that
728    would be permuted.  */
729 
730 static bool
vect_two_operations_perm_ok_p(vec<stmt_vec_info> stmts,unsigned int group_size,tree vectype,tree_code alt_stmt_code)731 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
732 			       unsigned int group_size, tree vectype,
733 			       tree_code alt_stmt_code)
734 {
735   unsigned HOST_WIDE_INT count;
736   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
737     return false;
738 
739   vec_perm_builder sel (count, count, 1);
740   for (unsigned int i = 0; i < count; ++i)
741     {
742       unsigned int elt = i;
743       gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
744       if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
745 	elt += count;
746       sel.quick_push (elt);
747     }
748   vec_perm_indices indices (sel, 2, count);
749   return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
750 }
751 
752 /* Verify if the scalar stmts STMTS are isomorphic, require data
753    permutation or are of unsupported types of operation.  Return
754    true if they are, otherwise return false and indicate in *MATCHES
755    which stmts are not isomorphic to the first one.  If MATCHES[0]
756    is false then this indicates the comparison could not be
757    carried out or the stmts will never be vectorized by SLP.
758 
759    Note COND_EXPR is possibly isomorphic to another one after swapping its
760    operands.  Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
761    the first stmt by swapping the two operands of comparison; set SWAP[i]
762    to 2 if stmt I is isormorphic to the first stmt by inverting the code
763    of comparison.  Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
764    to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1.  */
765 
766 static bool
vect_build_slp_tree_1(unsigned char * swap,vec<stmt_vec_info> stmts,unsigned int group_size,poly_uint64 * max_nunits,bool * matches,bool * two_operators)767 vect_build_slp_tree_1 (unsigned char *swap,
768 		       vec<stmt_vec_info> stmts, unsigned int group_size,
769 		       poly_uint64 *max_nunits, bool *matches,
770 		       bool *two_operators)
771 {
772   unsigned int i;
773   stmt_vec_info first_stmt_info = stmts[0];
774   enum tree_code first_stmt_code = ERROR_MARK;
775   enum tree_code alt_stmt_code = ERROR_MARK;
776   enum tree_code rhs_code = ERROR_MARK;
777   enum tree_code first_cond_code = ERROR_MARK;
778   tree lhs;
779   bool need_same_oprnds = false;
780   tree vectype = NULL_TREE, first_op1 = NULL_TREE;
781   optab optab;
782   int icode;
783   machine_mode optab_op2_mode;
784   machine_mode vec_mode;
785   stmt_vec_info first_load = NULL, prev_first_load = NULL;
786   bool load_p = false;
787 
788   /* For every stmt in NODE find its def stmt/s.  */
789   stmt_vec_info stmt_info;
790   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
791     {
792       vec_info *vinfo = stmt_info->vinfo;
793       gimple *stmt = stmt_info->stmt;
794       swap[i] = 0;
795       matches[i] = false;
796 
797       if (dump_enabled_p ())
798 	dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
799 
800       /* Fail to vectorize statements marked as unvectorizable.  */
801       if (!STMT_VINFO_VECTORIZABLE (stmt_info))
802         {
803           if (dump_enabled_p ())
804 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
805 			     "Build SLP failed: unvectorizable statement %G",
806 			     stmt);
807 	  /* Fatal mismatch.  */
808 	  matches[0] = false;
809           return false;
810         }
811 
812       lhs = gimple_get_lhs (stmt);
813       if (lhs == NULL_TREE)
814 	{
815 	  if (dump_enabled_p ())
816 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817 			     "Build SLP failed: not GIMPLE_ASSIGN nor "
818 			     "GIMPLE_CALL %G", stmt);
819 	  /* Fatal mismatch.  */
820 	  matches[0] = false;
821 	  return false;
822 	}
823 
824       tree nunits_vectype;
825       if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
826 					   &nunits_vectype, group_size)
827 	  || (nunits_vectype
828 	      && !vect_record_max_nunits (stmt_info, group_size,
829 					  nunits_vectype, max_nunits)))
830 	{
831 	  /* Fatal mismatch.  */
832 	  matches[0] = false;
833 	  return false;
834 	}
835 
836       gcc_assert (vectype);
837 
838       if (is_a <bb_vec_info> (vinfo)
839 	  && !vect_update_shared_vectype (stmt_info, vectype))
840 	continue;
841 
842       gcall *call_stmt = dyn_cast <gcall *> (stmt);
843       if (call_stmt)
844 	{
845 	  rhs_code = CALL_EXPR;
846 
847 	  if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
848 	    load_p = true;
849 	  else if ((gimple_call_internal_p (call_stmt)
850 		    && (!vectorizable_internal_fn_p
851 			(gimple_call_internal_fn (call_stmt))))
852 		   || gimple_call_tail_p (call_stmt)
853 		   || gimple_call_noreturn_p (call_stmt)
854 		   || !gimple_call_nothrow_p (call_stmt)
855 		   || gimple_call_chain (call_stmt))
856 	    {
857 	      if (dump_enabled_p ())
858 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
859 				 "Build SLP failed: unsupported call type %G",
860 				 call_stmt);
861 	      /* Fatal mismatch.  */
862 	      matches[0] = false;
863 	      return false;
864 	    }
865 	}
866       else
867 	{
868 	  rhs_code = gimple_assign_rhs_code (stmt);
869 	  load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
870 	}
871 
872       /* Check the operation.  */
873       if (i == 0)
874 	{
875 	  first_stmt_code = rhs_code;
876 
877 	  /* Shift arguments should be equal in all the packed stmts for a
878 	     vector shift with scalar shift operand.  */
879 	  if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
880 	      || rhs_code == LROTATE_EXPR
881 	      || rhs_code == RROTATE_EXPR)
882 	    {
883 	      vec_mode = TYPE_MODE (vectype);
884 
885 	      /* First see if we have a vector/vector shift.  */
886 	      optab = optab_for_tree_code (rhs_code, vectype,
887 					   optab_vector);
888 
889 	      if (!optab
890 		  || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
891 		{
892 		  /* No vector/vector shift, try for a vector/scalar shift.  */
893 		  optab = optab_for_tree_code (rhs_code, vectype,
894 					       optab_scalar);
895 
896 		  if (!optab)
897 		    {
898 		      if (dump_enabled_p ())
899 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
900 					 "Build SLP failed: no optab.\n");
901 		      /* Fatal mismatch.  */
902 		      matches[0] = false;
903 		      return false;
904 		    }
905 		  icode = (int) optab_handler (optab, vec_mode);
906 		  if (icode == CODE_FOR_nothing)
907 		    {
908 		      if (dump_enabled_p ())
909 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
910 					 "Build SLP failed: "
911 					 "op not supported by target.\n");
912 		      /* Fatal mismatch.  */
913 		      matches[0] = false;
914 		      return false;
915 		    }
916 		  optab_op2_mode = insn_data[icode].operand[2].mode;
917 		  if (!VECTOR_MODE_P (optab_op2_mode))
918 		    {
919 		      need_same_oprnds = true;
920 		      first_op1 = gimple_assign_rhs2 (stmt);
921 		    }
922 		}
923 	    }
924 	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
925             {
926               need_same_oprnds = true;
927               first_op1 = gimple_assign_rhs2 (stmt);
928             }
929 	  else if (call_stmt
930 		   && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
931 	    {
932 	      need_same_oprnds = true;
933 	      first_op1 = gimple_call_arg (call_stmt, 1);
934 	    }
935 	}
936       else
937 	{
938 	  if (first_stmt_code != rhs_code
939 	      && alt_stmt_code == ERROR_MARK)
940 	    alt_stmt_code = rhs_code;
941 	  if (first_stmt_code != rhs_code
942 	      && (first_stmt_code != IMAGPART_EXPR
943 		  || rhs_code != REALPART_EXPR)
944 	      && (first_stmt_code != REALPART_EXPR
945 		  || rhs_code != IMAGPART_EXPR)
946 	      /* Handle mismatches in plus/minus by computing both
947 		 and merging the results.  */
948 	      && !((first_stmt_code == PLUS_EXPR
949 		    || first_stmt_code == MINUS_EXPR)
950 		   && (alt_stmt_code == PLUS_EXPR
951 		       || alt_stmt_code == MINUS_EXPR)
952 		   && rhs_code == alt_stmt_code)
953 	      && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
954                    && (first_stmt_code == ARRAY_REF
955                        || first_stmt_code == BIT_FIELD_REF
956                        || first_stmt_code == INDIRECT_REF
957                        || first_stmt_code == COMPONENT_REF
958                        || first_stmt_code == MEM_REF)))
959 	    {
960 	      if (dump_enabled_p ())
961 		{
962 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
963 				   "Build SLP failed: different operation "
964 				   "in stmt %G", stmt);
965 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
966 				   "original stmt %G", first_stmt_info->stmt);
967 		}
968 	      /* Mismatch.  */
969 	      continue;
970 	    }
971 
972 	  if (!load_p && rhs_code == CALL_EXPR)
973 	    {
974 	      if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
975 				       as_a <gcall *> (stmt)))
976 		{
977 		  if (dump_enabled_p ())
978 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
979 				     "Build SLP failed: different calls in %G",
980 				     stmt);
981 		  /* Mismatch.  */
982 		  continue;
983 		}
984 	    }
985 
986 	  if (need_same_oprnds)
987 	    {
988 	      tree other_op1 = (call_stmt
989 				? gimple_call_arg (call_stmt, 1)
990 				: gimple_assign_rhs2 (stmt));
991 	      if (!operand_equal_p (first_op1, other_op1, 0))
992 		{
993 		  if (dump_enabled_p ())
994 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 				     "Build SLP failed: different shift "
996 				     "arguments in %G", stmt);
997 		  /* Mismatch.  */
998 		  continue;
999 		}
1000 	    }
1001 	}
1002 
1003       /* Grouped store or load.  */
1004       if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1005 	{
1006 	  if (REFERENCE_CLASS_P (lhs))
1007 	    {
1008 	      /* Store.  */
1009 	      ;
1010 	    }
1011 	  else
1012 	    {
1013 	      /* Load.  */
1014 	      first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
1015               if (prev_first_load)
1016                 {
1017                   /* Check that there are no loads from different interleaving
1018                      chains in the same node.  */
1019                   if (prev_first_load != first_load)
1020                     {
1021                       if (dump_enabled_p ())
1022 			dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1023 					 vect_location,
1024 					 "Build SLP failed: different "
1025 					 "interleaving chains in one node %G",
1026 					 stmt);
1027 		      /* Mismatch.  */
1028 		      continue;
1029                     }
1030                 }
1031               else
1032                 prev_first_load = first_load;
1033            }
1034         } /* Grouped access.  */
1035       else
1036 	{
1037 	  if (load_p)
1038 	    {
1039 	      /* Not grouped load.  */
1040 	      if (dump_enabled_p ())
1041 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1042 				 "Build SLP failed: not grouped load %G", stmt);
1043 
1044 	      /* FORNOW: Not grouped loads are not supported.  */
1045 	      /* Fatal mismatch.  */
1046 	      matches[0] = false;
1047 	      return false;
1048 	    }
1049 
1050 	  /* Not memory operation.  */
1051 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
1052 	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
1053 	      && TREE_CODE_CLASS (rhs_code) != tcc_expression
1054 	      && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1055 	      && rhs_code != VIEW_CONVERT_EXPR
1056 	      && rhs_code != CALL_EXPR)
1057 	    {
1058 	      if (dump_enabled_p ())
1059 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1060 				 "Build SLP failed: operation unsupported %G",
1061 				 stmt);
1062 	      /* Fatal mismatch.  */
1063 	      matches[0] = false;
1064 	      return false;
1065 	    }
1066 
1067 	  if (rhs_code == COND_EXPR)
1068 	    {
1069 	      tree cond_expr = gimple_assign_rhs1 (stmt);
1070 	      enum tree_code cond_code = TREE_CODE (cond_expr);
1071 	      enum tree_code swap_code = ERROR_MARK;
1072 	      enum tree_code invert_code = ERROR_MARK;
1073 
1074 	      if (i == 0)
1075 		first_cond_code = TREE_CODE (cond_expr);
1076 	      else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1077 		{
1078 		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1079 		  swap_code = swap_tree_comparison (cond_code);
1080 		  invert_code = invert_tree_comparison (cond_code, honor_nans);
1081 		}
1082 
1083 	      if (first_cond_code == cond_code)
1084 		;
1085 	      /* Isomorphic can be achieved by swapping.  */
1086 	      else if (first_cond_code == swap_code)
1087 		swap[i] = 1;
1088 	      /* Isomorphic can be achieved by inverting.  */
1089 	      else if (first_cond_code == invert_code)
1090 		swap[i] = 2;
1091 	      else
1092 		{
1093 		  if (dump_enabled_p ())
1094 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1095 				     "Build SLP failed: different"
1096 				     " operation %G", stmt);
1097 		  /* Mismatch.  */
1098 		  continue;
1099 		}
1100 	    }
1101 	}
1102 
1103       matches[i] = true;
1104     }
1105 
1106   for (i = 0; i < group_size; ++i)
1107     if (!matches[i])
1108       return false;
1109 
1110   /* If we allowed a two-operation SLP node verify the target can cope
1111      with the permute we are going to use.  */
1112   if (alt_stmt_code != ERROR_MARK
1113       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1114     {
1115       if (!vect_two_operations_perm_ok_p (stmts, group_size,
1116 					  vectype, alt_stmt_code))
1117 	{
1118 	  for (i = 0; i < group_size; ++i)
1119 	    if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1120 	      {
1121 		matches[i] = false;
1122 		if (dump_enabled_p ())
1123 		  {
1124 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1125 				     "Build SLP failed: different operation "
1126 				     "in stmt %G", stmts[i]->stmt);
1127 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1128 				     "original stmt %G", first_stmt_info->stmt);
1129 		  }
1130 	      }
1131 	  return false;
1132 	}
1133       *two_operators = true;
1134     }
1135 
1136   return true;
1137 }
1138 
1139 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1140    Note we never remove apart from at destruction time so we do not
1141    need a special value for deleted that differs from empty.  */
1142 struct bst_traits
1143 {
1144   typedef vec <stmt_vec_info> value_type;
1145   typedef vec <stmt_vec_info> compare_type;
1146   static inline hashval_t hash (value_type);
1147   static inline bool equal (value_type existing, value_type candidate);
is_emptybst_traits1148   static inline bool is_empty (value_type x) { return !x.exists (); }
is_deletedbst_traits1149   static inline bool is_deleted (value_type x) { return !x.exists (); }
1150   static const bool empty_zero_p = true;
mark_emptybst_traits1151   static inline void mark_empty (value_type &x) { x.release (); }
mark_deletedbst_traits1152   static inline void mark_deleted (value_type &x) { x.release (); }
removebst_traits1153   static inline void remove (value_type &x) { x.release (); }
1154 };
1155 inline hashval_t
hash(value_type x)1156 bst_traits::hash (value_type x)
1157 {
1158   inchash::hash h;
1159   for (unsigned i = 0; i < x.length (); ++i)
1160     h.add_int (gimple_uid (x[i]->stmt));
1161   return h.end ();
1162 }
1163 inline bool
equal(value_type existing,value_type candidate)1164 bst_traits::equal (value_type existing, value_type candidate)
1165 {
1166   if (existing.length () != candidate.length ())
1167     return false;
1168   for (unsigned i = 0; i < existing.length (); ++i)
1169     if (existing[i] != candidate[i])
1170       return false;
1171   return true;
1172 }
1173 
1174 typedef hash_map <vec <gimple *>, slp_tree,
1175 		  simple_hashmap_traits <bst_traits, slp_tree> >
1176   scalar_stmts_to_slp_tree_map_t;
1177 
1178 static slp_tree
1179 vect_build_slp_tree_2 (vec_info *vinfo,
1180 		       vec<stmt_vec_info> stmts, unsigned int group_size,
1181 		       poly_uint64 *max_nunits,
1182 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
1183 		       scalar_stmts_to_slp_tree_map_t *bst_map);
1184 
1185 static slp_tree
vect_build_slp_tree(vec_info * vinfo,vec<stmt_vec_info> stmts,unsigned int group_size,poly_uint64 * max_nunits,bool * matches,unsigned * npermutes,unsigned * tree_size,scalar_stmts_to_slp_tree_map_t * bst_map)1186 vect_build_slp_tree (vec_info *vinfo,
1187 		     vec<stmt_vec_info> stmts, unsigned int group_size,
1188 		     poly_uint64 *max_nunits,
1189 		     bool *matches, unsigned *npermutes, unsigned *tree_size,
1190 		     scalar_stmts_to_slp_tree_map_t *bst_map)
1191 {
1192   if (slp_tree *leader = bst_map->get (stmts))
1193     {
1194       if (dump_enabled_p ())
1195 	dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1196 			 *leader ? "" : "failed ", *leader);
1197       if (*leader)
1198 	{
1199 	  (*leader)->refcnt++;
1200 	  vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1201 	}
1202       return *leader;
1203     }
1204   poly_uint64 this_max_nunits = 1;
1205   slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
1206 					&this_max_nunits,
1207 					matches, npermutes, tree_size, bst_map);
1208   if (res)
1209     {
1210       res->max_nunits = this_max_nunits;
1211       vect_update_max_nunits (max_nunits, this_max_nunits);
1212       /* Keep a reference for the bst_map use.  */
1213       res->refcnt++;
1214     }
1215   bst_map->put (stmts.copy (), res);
1216   return res;
1217 }
1218 
1219 /* Recursively build an SLP tree starting from NODE.
1220    Fail (and return a value not equal to zero) if def-stmts are not
1221    isomorphic, require data permutation or are of unsupported types of
1222    operation.  Otherwise, return 0.
1223    The value returned is the depth in the SLP tree where a mismatch
1224    was found.  */
1225 
1226 static slp_tree
vect_build_slp_tree_2(vec_info * vinfo,vec<stmt_vec_info> stmts,unsigned int group_size,poly_uint64 * max_nunits,bool * matches,unsigned * npermutes,unsigned * tree_size,scalar_stmts_to_slp_tree_map_t * bst_map)1227 vect_build_slp_tree_2 (vec_info *vinfo,
1228 		       vec<stmt_vec_info> stmts, unsigned int group_size,
1229 		       poly_uint64 *max_nunits,
1230 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
1231 		       scalar_stmts_to_slp_tree_map_t *bst_map)
1232 {
1233   unsigned nops, i, this_tree_size = 0;
1234   poly_uint64 this_max_nunits = *max_nunits;
1235   slp_tree node;
1236 
1237   matches[0] = false;
1238 
1239   stmt_vec_info stmt_info = stmts[0];
1240   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1241     nops = gimple_call_num_args (stmt);
1242   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1243     {
1244       nops = gimple_num_ops (stmt) - 1;
1245       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1246 	nops++;
1247     }
1248   else if (is_a <gphi *> (stmt_info->stmt))
1249     nops = 0;
1250   else
1251     return NULL;
1252 
1253   /* If the SLP node is a PHI (induction or reduction), terminate
1254      the recursion.  */
1255   if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1256     {
1257       tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1258       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1259       if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1260 	return NULL;
1261 
1262       vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1263       /* Induction from different IVs is not supported.  */
1264       if (def_type == vect_induction_def)
1265 	{
1266 	  stmt_vec_info other_info;
1267 	  FOR_EACH_VEC_ELT (stmts, i, other_info)
1268 	    if (stmt_info != other_info)
1269 	      return NULL;
1270 	}
1271       else if (def_type == vect_reduction_def
1272 	       || def_type == vect_double_reduction_def
1273 	       || def_type == vect_nested_cycle)
1274 	{
1275 	  /* Else def types have to match.  */
1276 	  stmt_vec_info other_info;
1277 	  FOR_EACH_VEC_ELT (stmts, i, other_info)
1278 	    if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1279 	      return NULL;
1280 	}
1281       else
1282 	return NULL;
1283       (*tree_size)++;
1284       node = vect_create_new_slp_node (stmts);
1285       return node;
1286     }
1287 
1288 
1289   bool two_operators = false;
1290   unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1291   if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1292 			      &this_max_nunits, matches, &two_operators))
1293     return NULL;
1294 
1295   /* If the SLP node is a load, terminate the recursion unless masked.  */
1296   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1297       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1298     {
1299       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1300 	{
1301 	  /* Masked load.  */
1302 	  gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1303 	  nops = 1;
1304 	}
1305       else
1306 	{
1307 	  *max_nunits = this_max_nunits;
1308 	  (*tree_size)++;
1309 	  node = vect_create_new_slp_node (stmts);
1310 	  /* And compute the load permutation.  Whether it is actually
1311 	     a permutation depends on the unrolling factor which is
1312 	     decided later.  */
1313 	  vec<unsigned> load_permutation;
1314 	  int j;
1315 	  stmt_vec_info load_info;
1316 	  load_permutation.create (group_size);
1317 	  stmt_vec_info first_stmt_info
1318 	    = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
1319 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1320 	    {
1321 	      int load_place = vect_get_place_in_interleaving_chain
1322 		  (load_info, first_stmt_info);
1323 	      gcc_assert (load_place != -1);
1324 	      load_permutation.safe_push (load_place);
1325 	    }
1326 	  SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
1327 	  return node;
1328 	}
1329     }
1330 
1331   /* Get at the operands, verifying they are compatible.  */
1332   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1333   slp_oprnd_info oprnd_info;
1334   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1335     {
1336       int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1337 					     stmts, i, &oprnds_info);
1338       if (res != 0)
1339 	matches[(res == -1) ? 0 : i] = false;
1340       if (!matches[0])
1341 	break;
1342     }
1343   for (i = 0; i < group_size; ++i)
1344     if (!matches[i])
1345       {
1346 	vect_free_oprnd_info (oprnds_info);
1347 	return NULL;
1348       }
1349 
1350   auto_vec<slp_tree, 4> children;
1351 
1352   stmt_info = stmts[0];
1353 
1354   /* Create SLP_TREE nodes for the definition node/s.  */
1355   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1356     {
1357       slp_tree child;
1358       unsigned old_tree_size = this_tree_size;
1359       unsigned int j;
1360 
1361       if (oprnd_info->first_dt == vect_uninitialized_def)
1362 	{
1363 	  /* COND_EXPR have one too many eventually if the condition
1364 	     is a SSA name.  */
1365 	  gcc_assert (i == 3 && nops == 4);
1366 	  continue;
1367 	}
1368 
1369       if (oprnd_info->first_dt != vect_internal_def
1370 	  && oprnd_info->first_dt != vect_reduction_def
1371 	  && oprnd_info->first_dt != vect_induction_def)
1372 	{
1373 	  slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1374 	  SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1375 	  oprnd_info->ops = vNULL;
1376 	  children.safe_push (invnode);
1377 	  continue;
1378 	}
1379 
1380       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1381 					group_size, &this_max_nunits,
1382 					matches, npermutes,
1383 					&this_tree_size, bst_map)) != NULL)
1384 	{
1385 	  /* If we have all children of a non-unary child built up from
1386 	     scalars then just throw that away and build it up this node
1387 	     from scalars.  */
1388 	  if (is_a <bb_vec_info> (vinfo)
1389 	      && SLP_TREE_CHILDREN (child).length () > 1
1390 	      /* ???  Rejecting patterns this way doesn't work.  We'd have to
1391 		 do extra work to cancel the pattern so the uses see the
1392 		 scalar version.  */
1393 	      && !oprnd_info->any_pattern)
1394 	    {
1395 	      slp_tree grandchild;
1396 
1397 	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1398 		if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1399 		  break;
1400 	      if (!grandchild
1401 		  && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1402 		{
1403 		  /* Roll back.  */
1404 		  this_tree_size = old_tree_size;
1405 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1406 		    vect_free_slp_tree (grandchild, false);
1407 		  SLP_TREE_CHILDREN (child).truncate (0);
1408 
1409 		  if (dump_enabled_p ())
1410 		    dump_printf_loc (MSG_NOTE, vect_location,
1411 				     "Building parent vector operands from "
1412 				     "scalars instead\n");
1413 		  oprnd_info->def_stmts = vNULL;
1414 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
1415 		  SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1416 		  oprnd_info->ops = vNULL;
1417 		  ++this_tree_size;
1418 		  children.safe_push (child);
1419 		  continue;
1420 		}
1421 	    }
1422 
1423 	  oprnd_info->def_stmts = vNULL;
1424 	  children.safe_push (child);
1425 	  continue;
1426 	}
1427 
1428       /* If the SLP build failed fatally and we analyze a basic-block
1429          simply treat nodes we fail to build as externally defined
1430 	 (and thus build vectors from the scalar defs).
1431 	 The cost model will reject outright expensive cases.
1432 	 ???  This doesn't treat cases where permutation ultimatively
1433 	 fails (or we don't try permutation below).  Ideally we'd
1434 	 even compute a permutation that will end up with the maximum
1435 	 SLP tree size...  */
1436       if (is_a <bb_vec_info> (vinfo)
1437 	  && !matches[0]
1438 	  /* ???  Rejecting patterns this way doesn't work.  We'd have to
1439 	     do extra work to cancel the pattern so the uses see the
1440 	     scalar version.  */
1441 	  && !is_pattern_stmt_p (stmt_info)
1442 	  && !oprnd_info->any_pattern
1443 	  && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1444 	{
1445 	  if (dump_enabled_p ())
1446 	    dump_printf_loc (MSG_NOTE, vect_location,
1447 			     "Building vector operands from scalars\n");
1448 	  this_tree_size++;
1449 	  child = vect_create_new_slp_node (oprnd_info->def_stmts);
1450 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
1451 	  SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1452 	  children.safe_push (child);
1453 	  oprnd_info->ops = vNULL;
1454 	  oprnd_info->def_stmts = vNULL;
1455 	  continue;
1456 	}
1457 
1458       /* If the SLP build for operand zero failed and operand zero
1459 	 and one can be commutated try that for the scalar stmts
1460 	 that failed the match.  */
1461       if (i == 0
1462 	  /* A first scalar stmt mismatch signals a fatal mismatch.  */
1463 	  && matches[0]
1464 	  /* ???  For COND_EXPRs we can swap the comparison operands
1465 	     as well as the arms under some constraints.  */
1466 	  && nops == 2
1467 	  && oprnds_info[1]->first_dt == vect_internal_def
1468 	  && is_gimple_assign (stmt_info->stmt)
1469 	  /* Swapping operands for reductions breaks assumptions later on.  */
1470 	  && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1471 	  && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1472 	  /* Do so only if the number of not successful permutes was nor more
1473 	     than a cut-ff as re-trying the recursive match on
1474 	     possibly each level of the tree would expose exponential
1475 	     behavior.  */
1476 	  && *npermutes < 4)
1477 	{
1478 	  /* See whether we can swap the matching or the non-matching
1479 	     stmt operands.  */
1480 	  bool swap_not_matching = true;
1481 	  do
1482 	    {
1483 	      for (j = 0; j < group_size; ++j)
1484 		{
1485 		  if (matches[j] != !swap_not_matching)
1486 		    continue;
1487 		  stmt_vec_info stmt_info = stmts[j];
1488 		  /* Verify if we can swap operands of this stmt.  */
1489 		  gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1490 		  if (!stmt
1491 		      || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1492 		    {
1493 		      if (!swap_not_matching)
1494 			goto fail;
1495 		      swap_not_matching = false;
1496 		      break;
1497 		    }
1498 		}
1499 	    }
1500 	  while (j != group_size);
1501 
1502 	  /* Swap mismatched definition stmts.  */
1503 	  if (dump_enabled_p ())
1504 	    dump_printf_loc (MSG_NOTE, vect_location,
1505 			     "Re-trying with swapped operands of stmts ");
1506 	  for (j = 0; j < group_size; ++j)
1507 	    if (matches[j] == !swap_not_matching)
1508 	      {
1509 		std::swap (oprnds_info[0]->def_stmts[j],
1510 			   oprnds_info[1]->def_stmts[j]);
1511 		std::swap (oprnds_info[0]->ops[j],
1512 			   oprnds_info[1]->ops[j]);
1513 		if (dump_enabled_p ())
1514 		  dump_printf (MSG_NOTE, "%d ", j);
1515 	      }
1516 	  if (dump_enabled_p ())
1517 	    dump_printf (MSG_NOTE, "\n");
1518 	  /* After swapping some operands we lost track whether an
1519 	     operand has any pattern defs so be conservative here.  */
1520 	  if (oprnds_info[0]->any_pattern || oprnds_info[1]->any_pattern)
1521 	    oprnds_info[0]->any_pattern = oprnds_info[1]->any_pattern = true;
1522 	  /* And try again with scratch 'matches' ... */
1523 	  bool *tem = XALLOCAVEC (bool, group_size);
1524 	  if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1525 					    group_size, &this_max_nunits,
1526 					    tem, npermutes,
1527 					    &this_tree_size, bst_map)) != NULL)
1528 	    {
1529 	      /* If we have all children of a non-unary child built up from
1530 		 scalars then just throw that away and build it up this node
1531 		 from scalars.  */
1532 	      if (is_a <bb_vec_info> (vinfo)
1533 		  && SLP_TREE_CHILDREN (child).length () > 1
1534 		  /* ???  Rejecting patterns this way doesn't work.  We'd have
1535 		     to do extra work to cancel the pattern so the uses see the
1536 		     scalar version.  */
1537 		  && !oprnd_info->any_pattern)
1538 		{
1539 		  unsigned int j;
1540 		  slp_tree grandchild;
1541 
1542 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1543 		    if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1544 		      break;
1545 		  if (!grandchild
1546 		      && (vect_update_all_shared_vectypes
1547 			  (oprnd_info->def_stmts)))
1548 		    {
1549 		      /* Roll back.  */
1550 		      this_tree_size = old_tree_size;
1551 		      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1552 			vect_free_slp_tree (grandchild, false);
1553 		      SLP_TREE_CHILDREN (child).truncate (0);
1554 
1555 		      if (dump_enabled_p ())
1556 			dump_printf_loc (MSG_NOTE, vect_location,
1557 					 "Building parent vector operands from "
1558 					 "scalars instead\n");
1559 		      oprnd_info->def_stmts = vNULL;
1560 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
1561 		      SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1562 		      oprnd_info->ops = vNULL;
1563 		      ++this_tree_size;
1564 		      children.safe_push (child);
1565 		      continue;
1566 		    }
1567 		}
1568 
1569 	      oprnd_info->def_stmts = vNULL;
1570 	      children.safe_push (child);
1571 	      continue;
1572 	    }
1573 
1574 	  ++*npermutes;
1575 	}
1576 
1577 fail:
1578       gcc_assert (child == NULL);
1579       FOR_EACH_VEC_ELT (children, j, child)
1580 	vect_free_slp_tree (child, false);
1581       vect_free_oprnd_info (oprnds_info);
1582       return NULL;
1583     }
1584 
1585   vect_free_oprnd_info (oprnds_info);
1586 
1587   *tree_size += this_tree_size + 1;
1588   *max_nunits = this_max_nunits;
1589 
1590   node = vect_create_new_slp_node (stmts);
1591   SLP_TREE_TWO_OPERATORS (node) = two_operators;
1592   SLP_TREE_CHILDREN (node).splice (children);
1593   return node;
1594 }
1595 
1596 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1597 
1598 static void
vect_print_slp_tree(dump_flags_t dump_kind,dump_location_t loc,slp_tree node,hash_set<slp_tree> & visited)1599 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1600 		     slp_tree node, hash_set<slp_tree> &visited)
1601 {
1602   unsigned i, j;
1603   stmt_vec_info stmt_info;
1604   slp_tree child;
1605   tree op;
1606 
1607   if (visited.add (node))
1608     return;
1609 
1610   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1611   dump_user_location_t user_loc = loc.get_user_location ();
1612   dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1613 		   SLP_TREE_DEF_TYPE (node) == vect_external_def
1614 		   ? " (external)"
1615 		   : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1616 		      ? " (constant)"
1617 		      : ""), node,
1618 		   estimated_poly_value (node->max_nunits), node->refcnt);
1619   if (SLP_TREE_SCALAR_STMTS (node).exists ())
1620     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1621       dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1622   else
1623     {
1624       dump_printf_loc (metadata, user_loc, "\t{ ");
1625       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1626 	dump_printf (metadata, "%T%s ", op,
1627 		     i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1628       dump_printf (metadata, "}\n");
1629     }
1630   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1631     {
1632       dump_printf_loc (metadata, user_loc, "\tload permutation {");
1633       FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), i, j)
1634 	dump_printf (dump_kind, " %u", j);
1635       dump_printf (dump_kind, " }\n");
1636     }
1637   if (SLP_TREE_CHILDREN (node).is_empty ())
1638     return;
1639   dump_printf_loc (metadata, user_loc, "\tchildren");
1640   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1641     dump_printf (dump_kind, " %p", (void *)child);
1642   dump_printf (dump_kind, "\n");
1643   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1644     vect_print_slp_tree (dump_kind, loc, child, visited);
1645 }
1646 
1647 static void
vect_print_slp_tree(dump_flags_t dump_kind,dump_location_t loc,slp_tree node)1648 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1649 		     slp_tree node)
1650 {
1651   hash_set<slp_tree> visited;
1652   vect_print_slp_tree (dump_kind, loc, node, visited);
1653 }
1654 
1655 /* Mark the tree rooted at NODE with PURE_SLP.  */
1656 
1657 static void
vect_mark_slp_stmts(slp_tree node,hash_set<slp_tree> & visited)1658 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1659 {
1660   int i;
1661   stmt_vec_info stmt_info;
1662   slp_tree child;
1663 
1664   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1665     return;
1666 
1667   if (visited.add (node))
1668     return;
1669 
1670   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1671     STMT_SLP_TYPE (stmt_info) = pure_slp;
1672 
1673   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1674     vect_mark_slp_stmts (child, visited);
1675 }
1676 
1677 static void
vect_mark_slp_stmts(slp_tree node)1678 vect_mark_slp_stmts (slp_tree node)
1679 {
1680   hash_set<slp_tree> visited;
1681   vect_mark_slp_stmts (node, visited);
1682 }
1683 
1684 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1685 
1686 static void
vect_mark_slp_stmts_relevant(slp_tree node,hash_set<slp_tree> & visited)1687 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1688 {
1689   int i;
1690   stmt_vec_info stmt_info;
1691   slp_tree child;
1692 
1693   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1694     return;
1695 
1696   if (visited.add (node))
1697     return;
1698 
1699   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1700     {
1701       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1702                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1703       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1704     }
1705 
1706   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1707     vect_mark_slp_stmts_relevant (child, visited);
1708 }
1709 
1710 static void
vect_mark_slp_stmts_relevant(slp_tree node)1711 vect_mark_slp_stmts_relevant (slp_tree node)
1712 {
1713   hash_set<slp_tree> visited;
1714   vect_mark_slp_stmts_relevant (node, visited);
1715 }
1716 
1717 /* Copy the SLP subtree rooted at NODE.  */
1718 
1719 static slp_tree
slp_copy_subtree(slp_tree node,hash_map<slp_tree,slp_tree> & map)1720 slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
1721 {
1722   unsigned i;
1723 
1724   bool existed_p;
1725   slp_tree &copy_ref = map.get_or_insert (node, &existed_p);
1726   if (existed_p)
1727     return copy_ref;
1728 
1729   copy_ref = XNEW (_slp_tree);
1730   slp_tree copy = copy_ref;
1731   memcpy (copy, node, sizeof (_slp_tree));
1732   if (SLP_TREE_SCALAR_STMTS (node).exists ())
1733     {
1734       SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
1735       stmt_vec_info stmt_info;
1736       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1737 	STMT_VINFO_NUM_SLP_USES (stmt_info)++;
1738     }
1739   if (SLP_TREE_SCALAR_OPS (node).exists ())
1740     SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy ();
1741   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1742     SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy ();
1743   if (SLP_TREE_CHILDREN (node).exists ())
1744     SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
1745   gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
1746   copy->refcnt = 0;
1747 
1748   slp_tree child;
1749   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
1750     {
1751       SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map);
1752       SLP_TREE_CHILDREN (copy)[i]->refcnt++;
1753     }
1754   return copy;
1755 }
1756 
1757 /* Rearrange the statements of NODE according to PERMUTATION.  */
1758 
1759 static void
vect_slp_rearrange_stmts(slp_tree node,unsigned int group_size,vec<unsigned> permutation,hash_set<slp_tree> & visited)1760 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1761                           vec<unsigned> permutation,
1762 			  hash_set<slp_tree> &visited)
1763 {
1764   unsigned int i;
1765   slp_tree child;
1766 
1767   if (visited.add (node))
1768     return;
1769 
1770   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1771     vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1772 
1773   if (SLP_TREE_SCALAR_STMTS (node).exists ())
1774     {
1775       gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1776       /* ???  Computation nodes are isomorphic and need no rearrangement.
1777 	 This is a quick hack to cover those where rearrangement breaks
1778 	 semantics because only the first stmt is guaranteed to have the
1779 	 correct operation code due to others being swapped or inverted.  */
1780       stmt_vec_info first = SLP_TREE_SCALAR_STMTS (node)[0];
1781       if (is_gimple_assign (first->stmt)
1782 	  && gimple_assign_rhs_code (first->stmt) == COND_EXPR)
1783 	return;
1784       vec<stmt_vec_info> tmp_stmts;
1785       tmp_stmts.create (group_size);
1786       tmp_stmts.quick_grow (group_size);
1787       stmt_vec_info stmt_info;
1788       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1789 	tmp_stmts[permutation[i]] = stmt_info;
1790       SLP_TREE_SCALAR_STMTS (node).release ();
1791       SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1792     }
1793   if (SLP_TREE_SCALAR_OPS (node).exists ())
1794     {
1795       gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1796       vec<tree> tmp_ops;
1797       tmp_ops.create (group_size);
1798       tmp_ops.quick_grow (group_size);
1799       tree op;
1800       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1801 	tmp_ops[permutation[i]] = op;
1802       SLP_TREE_SCALAR_OPS (node).release ();
1803       SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1804     }
1805 }
1806 
1807 
1808 /* Attempt to reorder stmts in a reduction chain so that we don't
1809    require any load permutation.  Return true if that was possible,
1810    otherwise return false.  */
1811 
1812 static bool
vect_attempt_slp_rearrange_stmts(slp_instance slp_instn)1813 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1814 {
1815   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1816   unsigned int i, j;
1817   unsigned int lidx;
1818   slp_tree node, load;
1819 
1820   /* Compare all the permutation sequences to the first one.  We know
1821      that at least one load is permuted.  */
1822   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1823   if (!node->load_permutation.exists ())
1824     return false;
1825   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1826     {
1827       if (!load->load_permutation.exists ())
1828 	return false;
1829       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1830 	if (lidx != node->load_permutation[j])
1831 	  return false;
1832     }
1833 
1834   /* Check that the loads in the first sequence are different and there
1835      are no gaps between them.  */
1836   auto_sbitmap load_index (group_size);
1837   bitmap_clear (load_index);
1838   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1839     {
1840       if (lidx >= group_size)
1841 	return false;
1842       if (bitmap_bit_p (load_index, lidx))
1843 	return false;
1844 
1845       bitmap_set_bit (load_index, lidx);
1846     }
1847   for (i = 0; i < group_size; i++)
1848     if (!bitmap_bit_p (load_index, i))
1849       return false;
1850 
1851   /* This permutation is valid for reduction.  Since the order of the
1852      statements in the nodes is not important unless they are memory
1853      accesses, we can rearrange the statements in all the nodes
1854      according to the order of the loads.  */
1855 
1856   /* We have to unshare the SLP tree we modify.  */
1857   hash_map<slp_tree, slp_tree> map;
1858   slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map);
1859   vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false);
1860   unshared->refcnt++;
1861   SLP_INSTANCE_TREE (slp_instn) = unshared;
1862   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1863     SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node);
1864   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1865 
1866   /* Do the actual re-arrangement.  */
1867   hash_set<slp_tree> visited;
1868   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1869 			    node->load_permutation, visited);
1870 
1871   /* We are done, no actual permutations need to be generated.  */
1872   poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1873   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1874     {
1875       stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1876       first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1877       /* But we have to keep those permutations that are required because
1878          of handling of gaps.  */
1879       if (known_eq (unrolling_factor, 1U)
1880 	  || (group_size == DR_GROUP_SIZE (first_stmt_info)
1881 	      && DR_GROUP_GAP (first_stmt_info) == 0))
1882 	SLP_TREE_LOAD_PERMUTATION (node).release ();
1883       else
1884 	for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1885 	  SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1886     }
1887 
1888   return true;
1889 }
1890 
1891 /* Gather loads in the SLP graph NODE and populate the INST loads array.  */
1892 
1893 static void
vect_gather_slp_loads(slp_instance inst,slp_tree node,hash_set<slp_tree> & visited)1894 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1895 		       hash_set<slp_tree> &visited)
1896 {
1897   if (visited.add (node))
1898     return;
1899 
1900   if (SLP_TREE_CHILDREN (node).length () == 0)
1901     {
1902       if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1903 	return;
1904       stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1905       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1906 	  && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1907 	SLP_INSTANCE_LOADS (inst).safe_push (node);
1908     }
1909   else
1910     {
1911       unsigned i;
1912       slp_tree child;
1913       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1914 	vect_gather_slp_loads (inst, child, visited);
1915     }
1916 }
1917 
1918 static void
vect_gather_slp_loads(slp_instance inst,slp_tree node)1919 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1920 {
1921   hash_set<slp_tree> visited;
1922   vect_gather_slp_loads (inst, node, visited);
1923 }
1924 
1925 /* Check if the required load permutations in the SLP instance
1926    SLP_INSTN are supported.  */
1927 
1928 static bool
vect_supported_load_permutation_p(slp_instance slp_instn)1929 vect_supported_load_permutation_p (slp_instance slp_instn)
1930 {
1931   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1932   unsigned int i, j, k, next;
1933   slp_tree node;
1934 
1935   if (dump_enabled_p ())
1936     {
1937       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1938       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1939 	if (node->load_permutation.exists ())
1940 	  FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1941 	    dump_printf (MSG_NOTE, "%d ", next);
1942 	else
1943 	  for (k = 0; k < group_size; ++k)
1944 	    dump_printf (MSG_NOTE, "%d ", k);
1945       dump_printf (MSG_NOTE, "\n");
1946     }
1947 
1948   /* In case of reduction every load permutation is allowed, since the order
1949      of the reduction statements is not important (as opposed to the case of
1950      grouped stores).  The only condition we need to check is that all the
1951      load nodes are of the same size and have the same permutation (and then
1952      rearrange all the nodes of the SLP instance according to this
1953      permutation).  */
1954 
1955   /* Check that all the load nodes are of the same size.  */
1956   /* ???  Can't we assert this? */
1957   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1958     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1959       return false;
1960 
1961   node = SLP_INSTANCE_TREE (slp_instn);
1962   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1963 
1964   /* Reduction (there are no data-refs in the root).
1965      In reduction chain the order of the loads is not important.  */
1966   if (!STMT_VINFO_DATA_REF (stmt_info)
1967       && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
1968       && !SLP_INSTANCE_ROOT_STMT (slp_instn))
1969     vect_attempt_slp_rearrange_stmts (slp_instn);
1970 
1971   /* In basic block vectorization we allow any subchain of an interleaving
1972      chain.
1973      FORNOW: not supported in loop SLP because of realignment compications.  */
1974   if (STMT_VINFO_BB_VINFO (stmt_info))
1975     {
1976       /* Check whether the loads in an instance form a subchain and thus
1977          no permutation is necessary.  */
1978       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1979         {
1980 	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1981 	    continue;
1982 	  bool subchain_p = true;
1983 	  stmt_vec_info next_load_info = NULL;
1984 	  stmt_vec_info load_info;
1985 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1986 	    {
1987 	      if (j != 0
1988 		  && (next_load_info != load_info
1989 		      || DR_GROUP_GAP (load_info) != 1))
1990 		{
1991 		  subchain_p = false;
1992 		  break;
1993 		}
1994 	      next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1995 	    }
1996 	  if (subchain_p)
1997 	    SLP_TREE_LOAD_PERMUTATION (node).release ();
1998 	  else
1999 	    {
2000 	      stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
2001 	      group_info = DR_GROUP_FIRST_ELEMENT (group_info);
2002 	      unsigned HOST_WIDE_INT nunits;
2003 	      unsigned k, maxk = 0;
2004 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
2005 		if (k > maxk)
2006 		  maxk = k;
2007 	      /* In BB vectorization we may not actually use a loaded vector
2008 		 accessing elements in excess of DR_GROUP_SIZE.  */
2009 	      tree vectype = STMT_VINFO_VECTYPE (group_info);
2010 	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2011 		  || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
2012 		{
2013 		  if (dump_enabled_p ())
2014 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2015 				     "BB vectorization with gaps at the end of "
2016 				     "a load is not supported\n");
2017 		  return false;
2018 		}
2019 
2020 	      /* Verify the permutation can be generated.  */
2021 	      vec<tree> tem;
2022 	      unsigned n_perms;
2023 	      if (!vect_transform_slp_perm_load (node, tem, NULL,
2024 						 1, slp_instn, true, &n_perms))
2025 		{
2026 		  if (dump_enabled_p ())
2027 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2028 				     vect_location,
2029 				     "unsupported load permutation\n");
2030 		  return false;
2031 		}
2032 	    }
2033         }
2034       return true;
2035     }
2036 
2037   /* For loop vectorization verify we can generate the permutation.  Be
2038      conservative about the vectorization factor, there are permutations
2039      that will use three vector inputs only starting from a specific factor
2040      and the vectorization factor is not yet final.
2041      ???  The SLP instance unrolling factor might not be the maximum one.  */
2042   unsigned n_perms;
2043   poly_uint64 test_vf
2044     = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
2045 			     LOOP_VINFO_VECT_FACTOR
2046 			     (STMT_VINFO_LOOP_VINFO (stmt_info)));
2047   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
2048     if (node->load_permutation.exists ()
2049 	&& !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
2050 					  slp_instn, true, &n_perms))
2051       return false;
2052 
2053   return true;
2054 }
2055 
2056 
2057 /* Find the last store in SLP INSTANCE.  */
2058 
2059 stmt_vec_info
vect_find_last_scalar_stmt_in_slp(slp_tree node)2060 vect_find_last_scalar_stmt_in_slp (slp_tree node)
2061 {
2062   stmt_vec_info last = NULL;
2063   stmt_vec_info stmt_vinfo;
2064 
2065   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2066     {
2067       stmt_vinfo = vect_orig_stmt (stmt_vinfo);
2068       last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2069     }
2070 
2071   return last;
2072 }
2073 
2074 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2075    two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2076    (also containing the first GROUP1_SIZE stmts, since stores are
2077    consecutive), the second containing the remainder.
2078    Return the first stmt in the second group.  */
2079 
2080 static stmt_vec_info
vect_split_slp_store_group(stmt_vec_info first_vinfo,unsigned group1_size)2081 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
2082 {
2083   gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
2084   gcc_assert (group1_size > 0);
2085   int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
2086   gcc_assert (group2_size > 0);
2087   DR_GROUP_SIZE (first_vinfo) = group1_size;
2088 
2089   stmt_vec_info stmt_info = first_vinfo;
2090   for (unsigned i = group1_size; i > 1; i--)
2091     {
2092       stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2093       gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2094     }
2095   /* STMT is now the last element of the first group.  */
2096   stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
2097   DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
2098 
2099   DR_GROUP_SIZE (group2) = group2_size;
2100   for (stmt_info = group2; stmt_info;
2101        stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
2102     {
2103       DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
2104       gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2105     }
2106 
2107   /* For the second group, the DR_GROUP_GAP is that before the original group,
2108      plus skipping over the first vector.  */
2109   DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
2110 
2111   /* DR_GROUP_GAP of the first group now has to skip over the second group too.  */
2112   DR_GROUP_GAP (first_vinfo) += group2_size;
2113 
2114   if (dump_enabled_p ())
2115     dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
2116 		     group1_size, group2_size);
2117 
2118   return group2;
2119 }
2120 
2121 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2122    statements and a vector of NUNITS elements.  */
2123 
2124 static poly_uint64
calculate_unrolling_factor(poly_uint64 nunits,unsigned int group_size)2125 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
2126 {
2127   return exact_div (common_multiple (nunits, group_size), group_size);
2128 }
2129 
2130 /* Analyze an SLP instance starting from a group of grouped stores.  Call
2131    vect_build_slp_tree to build a tree of packed stmts if possible.
2132    Return FALSE if it's impossible to SLP any stmt in the loop.  */
2133 
2134 static bool
vect_analyze_slp_instance(vec_info * vinfo,scalar_stmts_to_slp_tree_map_t * bst_map,stmt_vec_info stmt_info,unsigned max_tree_size)2135 vect_analyze_slp_instance (vec_info *vinfo,
2136 			   scalar_stmts_to_slp_tree_map_t *bst_map,
2137 			   stmt_vec_info stmt_info, unsigned max_tree_size)
2138 {
2139   slp_instance new_instance;
2140   slp_tree node;
2141   unsigned int group_size;
2142   tree vectype, scalar_type = NULL_TREE;
2143   unsigned int i;
2144   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2145   vec<stmt_vec_info> scalar_stmts;
2146   bool constructor = false;
2147 
2148   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2149     {
2150       scalar_type = TREE_TYPE (DR_REF (dr));
2151       group_size = DR_GROUP_SIZE (stmt_info);
2152       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
2153     }
2154   else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2155     {
2156       gcc_assert (is_a <loop_vec_info> (vinfo));
2157       vectype = STMT_VINFO_VECTYPE (stmt_info);
2158       group_size = REDUC_GROUP_SIZE (stmt_info);
2159     }
2160   else if (is_gimple_assign (stmt_info->stmt)
2161 	    && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2162     {
2163       vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2164       group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2165       constructor = true;
2166     }
2167   else
2168     {
2169       gcc_assert (is_a <loop_vec_info> (vinfo));
2170       vectype = STMT_VINFO_VECTYPE (stmt_info);
2171       group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2172     }
2173 
2174   if (!vectype)
2175     {
2176       if (dump_enabled_p ())
2177 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 			 "Build SLP failed: unsupported data-type %T\n",
2179 			 scalar_type);
2180 
2181       return false;
2182     }
2183   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2184 
2185   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
2186   scalar_stmts.create (group_size);
2187   stmt_vec_info next_info = stmt_info;
2188   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2189     {
2190       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
2191       while (next_info)
2192         {
2193 	  scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2194 	  next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2195         }
2196     }
2197   else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2198     {
2199       /* Collect the reduction stmts and store them in
2200 	 SLP_TREE_SCALAR_STMTS.  */
2201       while (next_info)
2202         {
2203 	  scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2204 	  next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2205         }
2206       /* Mark the first element of the reduction chain as reduction to properly
2207 	 transform the node.  In the reduction analysis phase only the last
2208 	 element of the chain is marked as reduction.  */
2209       STMT_VINFO_DEF_TYPE (stmt_info)
2210 	= STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2211       STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2212 	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2213     }
2214   else if (constructor)
2215     {
2216       tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2217       tree val;
2218       FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2219 	{
2220 	  if (TREE_CODE (val) == SSA_NAME)
2221 	    {
2222 	      gimple* def = SSA_NAME_DEF_STMT (val);
2223 	      stmt_vec_info def_info = vinfo->lookup_stmt (def);
2224 	      /* Value is defined in another basic block.  */
2225 	      if (!def_info)
2226 		return false;
2227 	      def_info = vect_stmt_to_vectorize (def_info);
2228 	      scalar_stmts.safe_push (def_info);
2229 	    }
2230 	  else
2231 	    return false;
2232 	}
2233       if (dump_enabled_p ())
2234 	dump_printf_loc (MSG_NOTE, vect_location,
2235 			 "Analyzing vectorizable constructor: %G\n",
2236 			 stmt_info->stmt);
2237     }
2238   else
2239     {
2240       /* Collect reduction statements.  */
2241       vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2242       for (i = 0; reductions.iterate (i, &next_info); i++)
2243 	scalar_stmts.safe_push (next_info);
2244     }
2245 
2246   /* Build the tree for the SLP instance.  */
2247   bool *matches = XALLOCAVEC (bool, group_size);
2248   unsigned npermutes = 0;
2249   poly_uint64 max_nunits = nunits;
2250   unsigned tree_size = 0;
2251   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2252 			      &max_nunits, matches, &npermutes,
2253 			      &tree_size, bst_map);
2254   if (node != NULL)
2255     {
2256       /* Calculate the unrolling factor based on the smallest type.  */
2257       poly_uint64 unrolling_factor
2258 	= calculate_unrolling_factor (max_nunits, group_size);
2259 
2260       if (maybe_ne (unrolling_factor, 1U)
2261 	  && is_a <bb_vec_info> (vinfo))
2262 	{
2263 	  unsigned HOST_WIDE_INT const_max_nunits;
2264 	  if (!max_nunits.is_constant (&const_max_nunits)
2265 	      || const_max_nunits > group_size)
2266 	    {
2267 	      if (dump_enabled_p ())
2268 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269 				 "Build SLP failed: store group "
2270 				 "size not a multiple of the vector size "
2271 				 "in basic block SLP\n");
2272 	      vect_free_slp_tree (node, false);
2273 	      return false;
2274 	    }
2275 	  /* Fatal mismatch.  */
2276 	  matches[group_size / const_max_nunits * const_max_nunits] = false;
2277 	  vect_free_slp_tree (node, false);
2278 	}
2279       else
2280 	{
2281 	  /* Create a new SLP instance.  */
2282 	  new_instance = XNEW (class _slp_instance);
2283 	  SLP_INSTANCE_TREE (new_instance) = node;
2284 	  SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2285 	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2286 	  SLP_INSTANCE_LOADS (new_instance) = vNULL;
2287 	  SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2288 	  new_instance->reduc_phis = NULL;
2289 
2290 	  vect_gather_slp_loads (new_instance, node);
2291 	  if (dump_enabled_p ())
2292 	    dump_printf_loc (MSG_NOTE, vect_location,
2293 			     "SLP size %u vs. limit %u.\n",
2294 			     tree_size, max_tree_size);
2295 
2296 	  /* Compute the load permutation.  */
2297 	  slp_tree load_node;
2298 	  bool loads_permuted = false;
2299 	  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2300 	    {
2301 	      if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2302 		continue;
2303 	      unsigned j;
2304 	      stmt_vec_info load_info;
2305 	      bool this_load_permuted = false;
2306 	      stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2307 		  (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2308 	      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2309 		if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2310 		  {
2311 		    this_load_permuted = true;
2312 		    break;
2313 		  }
2314 	      if (!this_load_permuted
2315 		  /* The load requires permutation when unrolling exposes
2316 		     a gap either because the group is larger than the SLP
2317 		     group-size or because there is a gap between the groups.  */
2318 		  && group_size == DR_GROUP_SIZE (first_stmt_info)
2319 		  && DR_GROUP_GAP (first_stmt_info) == 0)
2320 		{
2321 		  SLP_TREE_LOAD_PERMUTATION (load_node).release ();
2322 		  continue;
2323 		}
2324 	      loads_permuted = true;
2325 	    }
2326 
2327 	  if (loads_permuted)
2328 	    {
2329 	      if (!vect_supported_load_permutation_p (new_instance))
2330 		{
2331 		  if (dump_enabled_p ())
2332 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2333 				     "Build SLP failed: unsupported load "
2334 				     "permutation %G", stmt_info->stmt);
2335 		  vect_free_slp_instance (new_instance, false);
2336 		  return false;
2337 		}
2338 	    }
2339 
2340 	  /* If the loads and stores can be handled with load/store-lan
2341 	     instructions do not generate this SLP instance.  */
2342 	  if (is_a <loop_vec_info> (vinfo)
2343 	      && loads_permuted
2344 	      && dr && vect_store_lanes_supported (vectype, group_size, false))
2345 	    {
2346 	      slp_tree load_node;
2347 	      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2348 		{
2349 		  stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2350 		      (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2351 		  /* Use SLP for strided accesses (or if we can't load-lanes).  */
2352 		  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2353 		      || ! vect_load_lanes_supported
2354 		      (STMT_VINFO_VECTYPE (stmt_vinfo),
2355 		       DR_GROUP_SIZE (stmt_vinfo), false))
2356 		    break;
2357 		}
2358 	      if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2359 		{
2360 		  if (dump_enabled_p ())
2361 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2362 				     "Built SLP cancelled: can use "
2363 				     "load/store-lanes\n");
2364 		  vect_free_slp_instance (new_instance, false);
2365 		  return false;
2366 		}
2367 	    }
2368 
2369 	  /* If this is a reduction chain with a conversion in front
2370 	     amend the SLP tree with a node for that.  */
2371 	  if (!dr
2372 	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2373 	      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2374 	    {
2375 	      /* Get at the conversion stmt - we know it's the single use
2376 		 of the last stmt of the reduction chain.  */
2377 	      gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2378 	      use_operand_p use_p;
2379 	      gimple *use_stmt;
2380 	      bool r = single_imm_use (gimple_assign_lhs (tem),
2381 				       &use_p, &use_stmt);
2382 	      gcc_assert (r);
2383 	      next_info = vinfo->lookup_stmt (use_stmt);
2384 	      next_info = vect_stmt_to_vectorize (next_info);
2385 	      scalar_stmts = vNULL;
2386 	      scalar_stmts.create (group_size);
2387 	      for (unsigned i = 0; i < group_size; ++i)
2388 		scalar_stmts.quick_push (next_info);
2389 	      slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2390 	      SLP_TREE_CHILDREN (conv).quick_push (node);
2391 	      SLP_INSTANCE_TREE (new_instance) = conv;
2392 	      /* We also have to fake this conversion stmt as SLP reduction
2393 		 group so we don't have to mess with too much code
2394 		 elsewhere.  */
2395 	      REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2396 	      REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2397 	    }
2398 
2399 	  vinfo->slp_instances.safe_push (new_instance);
2400 
2401 	  if (dump_enabled_p ())
2402 	    {
2403 	      dump_printf_loc (MSG_NOTE, vect_location,
2404 			       "Final SLP tree for instance:\n");
2405 	      vect_print_slp_tree (MSG_NOTE, vect_location,
2406 				   SLP_INSTANCE_TREE (new_instance));
2407 	    }
2408 
2409 	  return true;
2410 	}
2411     }
2412   else
2413     {
2414       /* Failed to SLP.  */
2415       /* Free the allocated memory.  */
2416       scalar_stmts.release ();
2417     }
2418 
2419   /* For basic block SLP, try to break the group up into multiples of the
2420      vector size.  */
2421   unsigned HOST_WIDE_INT const_nunits;
2422   if (is_a <bb_vec_info> (vinfo)
2423       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2424       && DR_GROUP_FIRST_ELEMENT (stmt_info)
2425       && nunits.is_constant (&const_nunits))
2426     {
2427       /* We consider breaking the group only on VF boundaries from the existing
2428 	 start.  */
2429       for (i = 0; i < group_size; i++)
2430 	if (!matches[i]) break;
2431 
2432       if (i >= const_nunits && i < group_size)
2433 	{
2434 	  /* Split into two groups at the first vector boundary before i.  */
2435 	  gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2436 	  unsigned group1_size = i & ~(const_nunits - 1);
2437 
2438 	  stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2439 							   group1_size);
2440 	  bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
2441 						max_tree_size);
2442 	  /* If the first non-match was in the middle of a vector,
2443 	     skip the rest of that vector.  */
2444 	  if (group1_size < i)
2445 	    {
2446 	      i = group1_size + const_nunits;
2447 	      if (i < group_size)
2448 		rest = vect_split_slp_store_group (rest, const_nunits);
2449 	    }
2450 	  if (i < group_size)
2451 	    res |= vect_analyze_slp_instance (vinfo, bst_map,
2452 					      rest, max_tree_size);
2453 	  return res;
2454 	}
2455       /* Even though the first vector did not all match, we might be able to SLP
2456 	 (some) of the remainder.  FORNOW ignore this possibility.  */
2457     }
2458 
2459   return false;
2460 }
2461 
2462 
2463 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
2464    trees of packed scalar stmts if SLP is possible.  */
2465 
2466 opt_result
vect_analyze_slp(vec_info * vinfo,unsigned max_tree_size)2467 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2468 {
2469   unsigned int i;
2470   stmt_vec_info first_element;
2471 
2472   DUMP_VECT_SCOPE ("vect_analyze_slp");
2473 
2474   scalar_stmts_to_slp_tree_map_t *bst_map
2475     = new scalar_stmts_to_slp_tree_map_t ();
2476 
2477   /* Find SLP sequences starting from groups of grouped stores.  */
2478   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2479     vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
2480 
2481   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2482     {
2483       if (loop_vinfo->reduction_chains.length () > 0)
2484 	{
2485 	  /* Find SLP sequences starting from reduction chains.  */
2486 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2487 	    if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
2488 					     max_tree_size))
2489 	      {
2490 		/* Dissolve reduction chain group.  */
2491 		stmt_vec_info vinfo = first_element;
2492 		stmt_vec_info last = NULL;
2493 		while (vinfo)
2494 		  {
2495 		    stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2496 		    REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2497 		    REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2498 		    last = vinfo;
2499 		    vinfo = next;
2500 		  }
2501 		STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2502 		/* It can be still vectorized as part of an SLP reduction.  */
2503 		loop_vinfo->reductions.safe_push (last);
2504 	      }
2505 	}
2506 
2507       /* Find SLP sequences starting from groups of reductions.  */
2508       if (loop_vinfo->reductions.length () > 1)
2509 	vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
2510 				   max_tree_size);
2511     }
2512 
2513   /* The map keeps a reference on SLP nodes built, release that.  */
2514   for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2515        it != bst_map->end (); ++it)
2516     if ((*it).second)
2517       vect_free_slp_tree ((*it).second, false);
2518   delete bst_map;
2519 
2520   return opt_result::success ();
2521 }
2522 
2523 
2524 /* For each possible SLP instance decide whether to SLP it and calculate overall
2525    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
2526    least one instance.  */
2527 
2528 bool
vect_make_slp_decision(loop_vec_info loop_vinfo)2529 vect_make_slp_decision (loop_vec_info loop_vinfo)
2530 {
2531   unsigned int i;
2532   poly_uint64 unrolling_factor = 1;
2533   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2534   slp_instance instance;
2535   int decided_to_slp = 0;
2536 
2537   DUMP_VECT_SCOPE ("vect_make_slp_decision");
2538 
2539   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2540     {
2541       /* FORNOW: SLP if you can.  */
2542       /* All unroll factors have the form:
2543 
2544 	   GET_MODE_SIZE (vinfo->vector_mode) * X
2545 
2546 	 for some rational X, so they must have a common multiple.  */
2547       unrolling_factor
2548 	= force_common_multiple (unrolling_factor,
2549 				 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2550 
2551       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
2552 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2553 	 loop-based vectorization.  Such stmts will be marked as HYBRID.  */
2554       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2555       decided_to_slp++;
2556     }
2557 
2558   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2559 
2560   if (decided_to_slp && dump_enabled_p ())
2561     {
2562       dump_printf_loc (MSG_NOTE, vect_location,
2563 		       "Decided to SLP %d instances. Unrolling factor ",
2564 		       decided_to_slp);
2565       dump_dec (MSG_NOTE, unrolling_factor);
2566       dump_printf (MSG_NOTE, "\n");
2567     }
2568 
2569   return (decided_to_slp > 0);
2570 }
2571 
2572 
2573 /* Private data for vect_detect_hybrid_slp.  */
2574 struct vdhs_data
2575 {
2576   loop_vec_info loop_vinfo;
2577   vec<stmt_vec_info> *worklist;
2578 };
2579 
2580 /* Walker for walk_gimple_op.  */
2581 
2582 static tree
vect_detect_hybrid_slp(tree * tp,int *,void * data)2583 vect_detect_hybrid_slp (tree *tp, int *, void *data)
2584 {
2585   walk_stmt_info *wi = (walk_stmt_info *)data;
2586   vdhs_data *dat = (vdhs_data *)wi->info;
2587 
2588   if (wi->is_lhs)
2589     return NULL_TREE;
2590 
2591   stmt_vec_info def_stmt_info = dat->loop_vinfo->lookup_def (*tp);
2592   if (!def_stmt_info)
2593     return NULL_TREE;
2594   def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
2595   if (PURE_SLP_STMT (def_stmt_info))
2596     {
2597       if (dump_enabled_p ())
2598 	dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2599 			 def_stmt_info->stmt);
2600       STMT_SLP_TYPE (def_stmt_info) = hybrid;
2601       dat->worklist->safe_push (def_stmt_info);
2602     }
2603 
2604   return NULL_TREE;
2605 }
2606 
2607 /* Find stmts that must be both vectorized and SLPed.  */
2608 
2609 void
vect_detect_hybrid_slp(loop_vec_info loop_vinfo)2610 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2611 {
2612   DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2613 
2614   /* All stmts participating in SLP are marked pure_slp, all other
2615      stmts are loop_vect.
2616      First collect all loop_vect stmts into a worklist.  */
2617   auto_vec<stmt_vec_info> worklist;
2618   for (unsigned i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2619     {
2620       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2621       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
2622 	   gsi_next (&gsi))
2623 	{
2624 	  gphi *phi = gsi.phi ();
2625 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (phi);
2626 	  if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
2627 	    worklist.safe_push (stmt_info);
2628 	}
2629       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2630 	   gsi_next (&gsi))
2631 	{
2632 	  gimple *stmt = gsi_stmt (gsi);
2633 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2634 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2635 	    {
2636 	      for (gimple_stmt_iterator gsi2
2637 		     = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2638 		   !gsi_end_p (gsi2); gsi_next (&gsi2))
2639 		{
2640 		  stmt_vec_info patt_info
2641 		    = loop_vinfo->lookup_stmt (gsi_stmt (gsi2));
2642 		  if (!STMT_SLP_TYPE (patt_info)
2643 		      && STMT_VINFO_RELEVANT (patt_info))
2644 		    worklist.safe_push (patt_info);
2645 		}
2646 	      stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
2647 	    }
2648 	  if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
2649 	    worklist.safe_push (stmt_info);
2650 	}
2651     }
2652 
2653   /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2654      mark any SLP vectorized stmt as hybrid.
2655      ???  We're visiting def stmts N times (once for each non-SLP and
2656      once for each hybrid-SLP use).  */
2657   walk_stmt_info wi;
2658   vdhs_data dat;
2659   dat.worklist = &worklist;
2660   dat.loop_vinfo = loop_vinfo;
2661   memset (&wi, 0, sizeof (wi));
2662   wi.info = (void *)&dat;
2663   while (!worklist.is_empty ())
2664     {
2665       stmt_vec_info stmt_info = worklist.pop ();
2666       /* Since SSA operands are not set up for pattern stmts we need
2667 	 to use walk_gimple_op.  */
2668       wi.is_lhs = 0;
2669       walk_gimple_op (stmt_info->stmt, vect_detect_hybrid_slp, &wi);
2670     }
2671 }
2672 
2673 
2674 /* Initialize a bb_vec_info struct for the statements between
2675    REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive).  */
2676 
_bb_vec_info(gimple_stmt_iterator region_begin_in,gimple_stmt_iterator region_end_in,vec_info_shared * shared)2677 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2678 			    gimple_stmt_iterator region_end_in,
2679 			    vec_info_shared *shared)
2680   : vec_info (vec_info::bb, init_cost (NULL), shared),
2681     bb (gsi_bb (region_begin_in)),
2682     region_begin (region_begin_in),
2683     region_end (region_end_in)
2684 {
2685   gimple_stmt_iterator gsi;
2686 
2687   for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2688        gsi_next (&gsi))
2689     {
2690       gimple *stmt = gsi_stmt (gsi);
2691       gimple_set_uid (stmt, 0);
2692       add_stmt (stmt);
2693     }
2694 
2695   bb->aux = this;
2696 }
2697 
2698 
2699 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2700    stmts in the basic block.  */
2701 
~_bb_vec_info()2702 _bb_vec_info::~_bb_vec_info ()
2703 {
2704   for (gimple_stmt_iterator si = region_begin;
2705        gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2706     /* Reset region marker.  */
2707     gimple_set_uid (gsi_stmt (si), -1);
2708 
2709   bb->aux = NULL;
2710 }
2711 
2712 /* Subroutine of vect_slp_analyze_node_operations.  Handle the root of NODE,
2713    given then that child nodes have already been processed, and that
2714    their def types currently match their SLP node's def type.  */
2715 
2716 static bool
vect_slp_analyze_node_operations_1(vec_info * vinfo,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)2717 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2718 				    slp_instance node_instance,
2719 				    stmt_vector_for_cost *cost_vec)
2720 {
2721   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2722   gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2723 
2724   /* Calculate the number of vector statements to be created for the
2725      scalar stmts in this node.  For SLP reductions it is equal to the
2726      number of vector statements in the children (which has already been
2727      calculated by the recursive call).  Otherwise it is the number of
2728      scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2729      VF divided by the number of elements in a vector.  */
2730   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2731       && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2732     {
2733       for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2734 	if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2735 	  {
2736 	    SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2737 	      = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2738 	    break;
2739 	  }
2740     }
2741   else
2742     {
2743       poly_uint64 vf;
2744       if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2745 	vf = loop_vinfo->vectorization_factor;
2746       else
2747 	vf = 1;
2748       unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2749       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2750       SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2751 	= vect_get_num_vectors (vf * group_size, vectype);
2752     }
2753 
2754   bool dummy;
2755   return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2756 }
2757 
2758 /* Try to build NODE from scalars, returning true on success.
2759    NODE_INSTANCE is the SLP instance that contains NODE.  */
2760 
2761 static bool
vect_slp_convert_to_external(vec_info * vinfo,slp_tree node,slp_instance node_instance)2762 vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2763 			      slp_instance node_instance)
2764 {
2765   stmt_vec_info stmt_info;
2766   unsigned int i;
2767 
2768   if (!is_a <bb_vec_info> (vinfo)
2769       || node == SLP_INSTANCE_TREE (node_instance)
2770       || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2771     return false;
2772 
2773   if (dump_enabled_p ())
2774     dump_printf_loc (MSG_NOTE, vect_location,
2775 		     "Building vector operands from scalars instead\n");
2776 
2777   /* Don't remove and free the child nodes here, since they could be
2778      referenced by other structures.  The analysis and scheduling phases
2779      (need to) ignore child nodes of anything that isn't vect_internal_def.  */
2780   unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2781   SLP_TREE_DEF_TYPE (node) = vect_external_def;
2782   SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2783   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2784     {
2785       tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2786       SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2787     }
2788   return true;
2789 }
2790 
2791 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2792    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2793 
2794    Return true if the operations are supported.  */
2795 
2796 static bool
vect_slp_analyze_node_operations(vec_info * vinfo,slp_tree node,slp_instance node_instance,hash_set<slp_tree> & visited,hash_set<slp_tree> & lvisited,stmt_vector_for_cost * cost_vec)2797 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2798 				  slp_instance node_instance,
2799 				  hash_set<slp_tree> &visited,
2800 				  hash_set<slp_tree> &lvisited,
2801 				  stmt_vector_for_cost *cost_vec)
2802 {
2803   int i, j;
2804   slp_tree child;
2805 
2806   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2807     return true;
2808 
2809   /* If we already analyzed the exact same set of scalar stmts we're done.
2810      We share the generated vector stmts for those.
2811      The SLP graph is acyclic so not caching whether we failed or succeeded
2812      doesn't result in any issue since we throw away the lvisited set
2813      when we fail.  */
2814   if (visited.contains (node)
2815       || lvisited.add (node))
2816     return true;
2817 
2818   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2819     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2820 					   visited, lvisited, cost_vec))
2821       return false;
2822 
2823   /* ???  We have to catch the case late where two first scalar stmts appear
2824      in multiple SLP children with different def type and fail.  Remember
2825      original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2826      match it when that is vect_internal_def.  */
2827   auto_vec<vect_def_type, 4> dt;
2828   dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2829   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2830     if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2831       dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2832 
2833   /* Push SLP node def-type to stmt operands.  */
2834   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2835     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2836 	&& SLP_TREE_SCALAR_STMTS (child).length () != 0)
2837       STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2838 	= SLP_TREE_DEF_TYPE (child);
2839 
2840   /* Check everything worked out.  */
2841   bool res = true;
2842   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2843       if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2844 	{
2845 	  if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2846 	    {
2847 	      if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2848 		  != SLP_TREE_DEF_TYPE (child))
2849 		res = false;
2850 	    }
2851 	  else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2852 		   != dt[j])
2853 	    res = false;
2854 	}
2855   if (!res && dump_enabled_p ())
2856     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2857 		     "not vectorized: same operand with different "
2858 		     "def type in stmt.\n");
2859 
2860   if (res)
2861     res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2862 					      cost_vec);
2863 
2864   /* Restore def-types.  */
2865   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2866     if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2867       STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2868 
2869   /* If this node can't be vectorized, try pruning the tree here rather
2870      than felling the whole thing.  */
2871   if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2872     res = true;
2873 
2874   return res;
2875 }
2876 
2877 
2878 /* Analyze statements in SLP instances of VINFO.  Return true if the
2879    operations are supported. */
2880 
2881 bool
vect_slp_analyze_operations(vec_info * vinfo)2882 vect_slp_analyze_operations (vec_info *vinfo)
2883 {
2884   slp_instance instance;
2885   int i;
2886 
2887   DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2888 
2889   hash_set<slp_tree> visited;
2890   for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2891     {
2892       hash_set<slp_tree> lvisited;
2893       stmt_vector_for_cost cost_vec;
2894       cost_vec.create (2);
2895       if (!vect_slp_analyze_node_operations (vinfo,
2896 					     SLP_INSTANCE_TREE (instance),
2897 					     instance, visited, lvisited,
2898 					     &cost_vec)
2899 	  /* Instances with a root stmt require vectorized defs for the
2900 	     SLP tree root.  */
2901 	  || (SLP_INSTANCE_ROOT_STMT (instance)
2902 	      && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2903 		  != vect_internal_def)))
2904         {
2905 	  slp_tree node = SLP_INSTANCE_TREE (instance);
2906 	  stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2907 	  if (dump_enabled_p ())
2908 	    dump_printf_loc (MSG_NOTE, vect_location,
2909 			     "removing SLP instance operations starting from: %G",
2910 			     stmt_info->stmt);
2911 	  vect_free_slp_instance (instance, false);
2912           vinfo->slp_instances.ordered_remove (i);
2913 	  cost_vec.release ();
2914 	}
2915       else
2916 	{
2917 	  for (hash_set<slp_tree>::iterator x = lvisited.begin();
2918 	       x != lvisited.end(); ++x)
2919 	    visited.add (*x);
2920 	  i++;
2921 
2922 	  add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2923 	  cost_vec.release ();
2924 	}
2925     }
2926 
2927   return !vinfo->slp_instances.is_empty ();
2928 }
2929 
2930 
2931 /* Compute the scalar cost of the SLP node NODE and its children
2932    and return it.  Do not account defs that are marked in LIFE and
2933    update LIFE according to uses of NODE.  */
2934 
2935 static void
vect_bb_slp_scalar_cost(basic_block bb,slp_tree node,vec<bool,va_heap> * life,stmt_vector_for_cost * cost_vec,hash_set<slp_tree> & visited)2936 vect_bb_slp_scalar_cost (basic_block bb,
2937 			 slp_tree node, vec<bool, va_heap> *life,
2938 			 stmt_vector_for_cost *cost_vec,
2939 			 hash_set<slp_tree> &visited)
2940 {
2941   unsigned i;
2942   stmt_vec_info stmt_info;
2943   slp_tree child;
2944 
2945   if (visited.add (node))
2946     return;
2947 
2948   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2949     {
2950       gimple *stmt = stmt_info->stmt;
2951       vec_info *vinfo = stmt_info->vinfo;
2952       ssa_op_iter op_iter;
2953       def_operand_p def_p;
2954 
2955       if ((*life)[i])
2956 	continue;
2957 
2958       /* If there is a non-vectorized use of the defs then the scalar
2959          stmt is kept live in which case we do not account it or any
2960 	 required defs in the SLP children in the scalar cost.  This
2961 	 way we make the vectorization more costly when compared to
2962 	 the scalar cost.  */
2963       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2964 	{
2965 	  imm_use_iterator use_iter;
2966 	  gimple *use_stmt;
2967 	  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2968 	    if (!is_gimple_debug (use_stmt))
2969 	      {
2970 		stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2971 		if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2972 		  {
2973 		    (*life)[i] = true;
2974 		    BREAK_FROM_IMM_USE_STMT (use_iter);
2975 		  }
2976 	      }
2977 	}
2978       if ((*life)[i])
2979 	continue;
2980 
2981       /* Count scalar stmts only once.  */
2982       if (gimple_visited_p (stmt))
2983 	continue;
2984       gimple_set_visited (stmt, true);
2985 
2986       vect_cost_for_stmt kind;
2987       if (STMT_VINFO_DATA_REF (stmt_info))
2988         {
2989           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2990 	    kind = scalar_load;
2991           else
2992 	    kind = scalar_store;
2993         }
2994       else if (vect_nop_conversion_p (stmt_info))
2995 	continue;
2996       else
2997 	kind = scalar_stmt;
2998       record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2999     }
3000 
3001   auto_vec<bool, 20> subtree_life;
3002   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3003     {
3004       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3005 	{
3006 	  /* Do not directly pass LIFE to the recursive call, copy it to
3007 	     confine changes in the callee to the current child/subtree.  */
3008 	  subtree_life.safe_splice (*life);
3009 	  vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
3010 				   visited);
3011 	  subtree_life.truncate (0);
3012 	}
3013     }
3014 }
3015 
3016 /* Check if vectorization of the basic block is profitable.  */
3017 
3018 static bool
vect_bb_vectorization_profitable_p(bb_vec_info bb_vinfo)3019 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
3020 {
3021   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3022   slp_instance instance;
3023   int i;
3024   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
3025   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
3026 
3027   /* Calculate scalar cost.  */
3028   stmt_vector_for_cost scalar_costs;
3029   scalar_costs.create (0);
3030   hash_set<slp_tree> visited;
3031   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3032     {
3033       auto_vec<bool, 20> life;
3034       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
3035       vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
3036 			       SLP_INSTANCE_TREE (instance),
3037 			       &life, &scalar_costs, visited);
3038     }
3039   void *target_cost_data = init_cost (NULL);
3040   add_stmt_costs (target_cost_data, &scalar_costs);
3041   scalar_costs.release ();
3042   unsigned dummy;
3043   finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
3044   destroy_cost_data (target_cost_data);
3045 
3046   /* Unset visited flag.  */
3047   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
3048        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3049     gimple_set_visited  (gsi_stmt (gsi), false);
3050 
3051   /* Complete the target-specific cost calculation.  */
3052   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
3053 	       &vec_inside_cost, &vec_epilogue_cost);
3054 
3055   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
3056 
3057   if (dump_enabled_p ())
3058     {
3059       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3060       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
3061 		   vec_inside_cost);
3062       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
3063       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
3064       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
3065     }
3066 
3067   /* Vectorization is profitable if its cost is more than the cost of scalar
3068      version.  Note that we err on the vector side for equal cost because
3069      the cost estimate is otherwise quite pessimistic (constant uses are
3070      free on the scalar side but cost a load on the vector side for
3071      example).  */
3072   if (vec_outside_cost + vec_inside_cost > scalar_cost)
3073     return false;
3074 
3075   return true;
3076 }
3077 
3078 /* Find any vectorizable constructors and add them to the grouped_store
3079    array.  */
3080 
3081 static void
vect_slp_check_for_constructors(bb_vec_info bb_vinfo)3082 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3083 {
3084   gimple_stmt_iterator gsi;
3085 
3086   for (gsi = bb_vinfo->region_begin;
3087        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3088     {
3089       gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
3090       if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
3091 	continue;
3092 
3093       tree rhs = gimple_assign_rhs1 (stmt);
3094       if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
3095 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
3096 		       CONSTRUCTOR_NELTS (rhs))
3097 	  || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
3098 	  || uniform_vector_p (rhs))
3099 	continue;
3100 
3101       stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3102       BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3103     }
3104 }
3105 
3106 /* Check if the region described by BB_VINFO can be vectorized, returning
3107    true if so.  When returning false, set FATAL to true if the same failure
3108    would prevent vectorization at other vector sizes, false if it is still
3109    worth trying other sizes.  N_STMTS is the number of statements in the
3110    region.  */
3111 
3112 static bool
vect_slp_analyze_bb_1(bb_vec_info bb_vinfo,int n_stmts,bool & fatal)3113 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3114 {
3115   DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3116 
3117   slp_instance instance;
3118   int i;
3119   poly_uint64 min_vf = 2;
3120 
3121   /* The first group of checks is independent of the vector size.  */
3122   fatal = true;
3123 
3124   /* Analyze the data references.  */
3125 
3126   if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3127     {
3128       if (dump_enabled_p ())
3129         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3130 			 "not vectorized: unhandled data-ref in basic "
3131 			 "block.\n");
3132       return false;
3133     }
3134 
3135   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3136     {
3137       if (dump_enabled_p ())
3138         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3139 			 "not vectorized: not enough data-refs in "
3140 			 "basic block.\n");
3141       return false;
3142     }
3143 
3144   if (!vect_analyze_data_ref_accesses (bb_vinfo))
3145     {
3146      if (dump_enabled_p ())
3147        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3148 			"not vectorized: unhandled data access in "
3149 			"basic block.\n");
3150       return false;
3151     }
3152 
3153   vect_slp_check_for_constructors (bb_vinfo);
3154 
3155   /* If there are no grouped stores in the region there is no need
3156      to continue with pattern recog as vect_analyze_slp will fail
3157      anyway.  */
3158   if (bb_vinfo->grouped_stores.is_empty ())
3159     {
3160       if (dump_enabled_p ())
3161 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3162 			 "not vectorized: no grouped stores in "
3163 			 "basic block.\n");
3164       return false;
3165     }
3166 
3167   /* While the rest of the analysis below depends on it in some way.  */
3168   fatal = false;
3169 
3170   vect_pattern_recog (bb_vinfo);
3171 
3172   /* Check the SLP opportunities in the basic block, analyze and build SLP
3173      trees.  */
3174   if (!vect_analyze_slp (bb_vinfo, n_stmts))
3175     {
3176       if (dump_enabled_p ())
3177 	{
3178 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3179 			   "Failed to SLP the basic block.\n");
3180 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3181 			   "not vectorized: failed to find SLP opportunities "
3182 			   "in basic block.\n");
3183 	}
3184       return false;
3185     }
3186 
3187   vect_record_base_alignments (bb_vinfo);
3188 
3189   /* Analyze and verify the alignment of data references and the
3190      dependence in the SLP instances.  */
3191   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3192     {
3193       if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3194 	  || ! vect_slp_analyze_instance_dependence (instance))
3195 	{
3196 	  slp_tree node = SLP_INSTANCE_TREE (instance);
3197 	  stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3198 	  if (dump_enabled_p ())
3199 	    dump_printf_loc (MSG_NOTE, vect_location,
3200 			     "removing SLP instance operations starting from: %G",
3201 			     stmt_info->stmt);
3202 	  vect_free_slp_instance (instance, false);
3203 	  BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3204 	  continue;
3205 	}
3206 
3207       /* Mark all the statements that we want to vectorize as pure SLP and
3208 	 relevant.  */
3209       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3210       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3211       if (SLP_INSTANCE_ROOT_STMT (instance))
3212 	STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3213 
3214       i++;
3215     }
3216   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3217     return false;
3218 
3219   if (!vect_slp_analyze_operations (bb_vinfo))
3220     {
3221       if (dump_enabled_p ())
3222         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3223 			 "not vectorized: bad operation in basic block.\n");
3224       return false;
3225     }
3226 
3227   /* Cost model: check if the vectorization is worthwhile.  */
3228   if (!unlimited_cost_model (NULL)
3229       && !vect_bb_vectorization_profitable_p (bb_vinfo))
3230     {
3231       if (dump_enabled_p ())
3232         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3233 			 "not vectorized: vectorization is not "
3234 			 "profitable.\n");
3235       return false;
3236     }
3237 
3238   if (dump_enabled_p ())
3239     dump_printf_loc (MSG_NOTE, vect_location,
3240 		     "Basic block will be vectorized using SLP\n");
3241   return true;
3242 }
3243 
3244 /* Subroutine of vect_slp_bb.  Try to vectorize the statements between
3245    REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3246    on success.  The region has N_STMTS statements and has the datarefs
3247    given by DATAREFS.  */
3248 
3249 static bool
vect_slp_bb_region(gimple_stmt_iterator region_begin,gimple_stmt_iterator region_end,vec<data_reference_p> datarefs,unsigned int n_stmts)3250 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3251 		    gimple_stmt_iterator region_end,
3252 		    vec<data_reference_p> datarefs,
3253 		    unsigned int n_stmts)
3254 {
3255   bb_vec_info bb_vinfo;
3256   auto_vector_modes vector_modes;
3257 
3258   /* Autodetect first vector size we try.  */
3259   machine_mode next_vector_mode = VOIDmode;
3260   targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3261   unsigned int mode_i = 0;
3262 
3263   vec_info_shared shared;
3264 
3265   machine_mode autodetected_vector_mode = VOIDmode;
3266   while (1)
3267     {
3268       bool vectorized = false;
3269       bool fatal = false;
3270       bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3271 
3272       bool first_time_p = shared.datarefs.is_empty ();
3273       BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3274       if (first_time_p)
3275 	bb_vinfo->shared->save_datarefs ();
3276       else
3277 	bb_vinfo->shared->check_datarefs ();
3278       bb_vinfo->vector_mode = next_vector_mode;
3279 
3280       if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3281 	  && dbg_cnt (vect_slp))
3282 	{
3283 	  if (dump_enabled_p ())
3284 	    {
3285 	      dump_printf_loc (MSG_NOTE, vect_location,
3286 			       "***** Analysis succeeded with vector mode"
3287 			       " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3288 	      dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3289 	    }
3290 
3291 	  bb_vinfo->shared->check_datarefs ();
3292 	  vect_schedule_slp (bb_vinfo);
3293 
3294 	  unsigned HOST_WIDE_INT bytes;
3295 	  if (dump_enabled_p ())
3296 	    {
3297 	      if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
3298 		dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3299 				 "basic block part vectorized using %wu byte "
3300 				 "vectors\n", bytes);
3301 	      else
3302 		dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3303 				 "basic block part vectorized using variable "
3304 				 "length vectors\n");
3305 	    }
3306 
3307 	  vectorized = true;
3308 	}
3309       else
3310 	{
3311 	  if (dump_enabled_p ())
3312 	    dump_printf_loc (MSG_NOTE, vect_location,
3313 			     "***** Analysis failed with vector mode %s\n",
3314 			     GET_MODE_NAME (bb_vinfo->vector_mode));
3315 	}
3316 
3317       if (mode_i == 0)
3318 	autodetected_vector_mode = bb_vinfo->vector_mode;
3319 
3320       if (!fatal)
3321 	while (mode_i < vector_modes.length ()
3322 	       && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3323 	  {
3324 	    if (dump_enabled_p ())
3325 	      dump_printf_loc (MSG_NOTE, vect_location,
3326 			       "***** The result for vector mode %s would"
3327 			       " be the same\n",
3328 			       GET_MODE_NAME (vector_modes[mode_i]));
3329 	    mode_i += 1;
3330 	  }
3331 
3332       delete bb_vinfo;
3333 
3334       if (mode_i < vector_modes.length ()
3335 	  && VECTOR_MODE_P (autodetected_vector_mode)
3336 	  && (related_vector_mode (vector_modes[mode_i],
3337 				   GET_MODE_INNER (autodetected_vector_mode))
3338 	      == autodetected_vector_mode)
3339 	  && (related_vector_mode (autodetected_vector_mode,
3340 				   GET_MODE_INNER (vector_modes[mode_i]))
3341 	      == vector_modes[mode_i]))
3342 	{
3343 	  if (dump_enabled_p ())
3344 	    dump_printf_loc (MSG_NOTE, vect_location,
3345 			     "***** Skipping vector mode %s, which would"
3346 			     " repeat the analysis for %s\n",
3347 			     GET_MODE_NAME (vector_modes[mode_i]),
3348 			     GET_MODE_NAME (autodetected_vector_mode));
3349 	  mode_i += 1;
3350 	}
3351 
3352       if (vectorized
3353 	  || mode_i == vector_modes.length ()
3354 	  || autodetected_vector_mode == VOIDmode
3355 	  /* If vect_slp_analyze_bb_1 signaled that analysis for all
3356 	     vector sizes will fail do not bother iterating.  */
3357 	  || fatal)
3358 	return vectorized;
3359 
3360       /* Try the next biggest vector size.  */
3361       next_vector_mode = vector_modes[mode_i++];
3362       if (dump_enabled_p ())
3363 	dump_printf_loc (MSG_NOTE, vect_location,
3364 			 "***** Re-trying analysis with vector mode %s\n",
3365 			 GET_MODE_NAME (next_vector_mode));
3366     }
3367 }
3368 
3369 /* Main entry for the BB vectorizer.  Analyze and transform BB, returns
3370    true if anything in the basic-block was vectorized.  */
3371 
3372 bool
vect_slp_bb(basic_block bb)3373 vect_slp_bb (basic_block bb)
3374 {
3375   gimple_stmt_iterator gsi;
3376   bool any_vectorized = false;
3377 
3378   gsi = gsi_start_bb (bb);
3379   while (!gsi_end_p (gsi))
3380     {
3381       gimple_stmt_iterator region_begin = gsi;
3382       vec<data_reference_p> datarefs = vNULL;
3383       int insns = 0;
3384 
3385       for (; !gsi_end_p (gsi); gsi_next (&gsi))
3386 	{
3387 	  gimple *stmt = gsi_stmt (gsi);
3388 	  if (is_gimple_debug (stmt))
3389 	    continue;
3390 	  insns++;
3391 
3392 	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
3393 	    vect_location = stmt;
3394 
3395 	  if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3396 	    break;
3397 	}
3398 
3399       /* Skip leading unhandled stmts.  */
3400       if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3401 	{
3402 	  gsi_next (&gsi);
3403 	  continue;
3404 	}
3405 
3406       gimple_stmt_iterator region_end = gsi;
3407 
3408       if (insns > param_slp_max_insns_in_bb)
3409 	{
3410 	  if (dump_enabled_p ())
3411 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3412 			     "not vectorized: too many instructions in "
3413 			     "basic block.\n");
3414 	}
3415       else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3416 	any_vectorized = true;
3417 
3418       if (gsi_end_p (region_end))
3419 	break;
3420 
3421       /* Skip the unhandled stmt.  */
3422       gsi_next (&gsi);
3423     }
3424 
3425   return any_vectorized;
3426 }
3427 
3428 
3429 /* Return 1 if vector type STMT_VINFO is a boolean vector.  */
3430 
3431 static bool
vect_mask_constant_operand_p(stmt_vec_info stmt_vinfo,unsigned op_num)3432 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num)
3433 {
3434   enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3435   tree op, vectype;
3436   enum vect_def_type dt;
3437 
3438   /* For comparison and COND_EXPR type is chosen depending
3439      on the non-constant other comparison operand.  */
3440   if (TREE_CODE_CLASS (code) == tcc_comparison)
3441     {
3442       gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3443       op = gimple_assign_rhs1 (stmt);
3444 
3445       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3446 	gcc_unreachable ();
3447 
3448       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3449     }
3450 
3451   if (code == COND_EXPR)
3452     {
3453       gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3454       tree cond = gimple_assign_rhs1 (stmt);
3455 
3456       if (TREE_CODE (cond) == SSA_NAME)
3457 	{
3458 	  if (op_num > 0)
3459 	    return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3460 	  op = cond;
3461 	}
3462       else
3463 	{
3464 	  if (op_num > 1)
3465 	    return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3466 	  op = TREE_OPERAND (cond, 0);
3467 	}
3468 
3469       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3470 	gcc_unreachable ();
3471 
3472       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3473     }
3474 
3475   return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3476 }
3477 
3478 /* Build a variable-length vector in which the elements in ELTS are repeated
3479    to a fill NRESULTS vectors of type VECTOR_TYPE.  Store the vectors in
3480    RESULTS and add any new instructions to SEQ.
3481 
3482    The approach we use is:
3483 
3484    (1) Find a vector mode VM with integer elements of mode IM.
3485 
3486    (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3487        ELTS' has mode IM.  This involves creating NELTS' VIEW_CONVERT_EXPRs
3488        from small vectors to IM.
3489 
3490    (3) Duplicate each ELTS'[I] into a vector of mode VM.
3491 
3492    (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3493        correct byte contents.
3494 
3495    (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3496 
3497    We try to find the largest IM for which this sequence works, in order
3498    to cut down on the number of interleaves.  */
3499 
3500 void
duplicate_and_interleave(vec_info * vinfo,gimple_seq * seq,tree vector_type,vec<tree> elts,unsigned int nresults,vec<tree> & results)3501 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3502 			  vec<tree> elts, unsigned int nresults,
3503 			  vec<tree> &results)
3504 {
3505   unsigned int nelts = elts.length ();
3506   tree element_type = TREE_TYPE (vector_type);
3507 
3508   /* (1) Find a vector mode VM with integer elements of mode IM.  */
3509   unsigned int nvectors = 1;
3510   tree new_vector_type;
3511   tree permutes[2];
3512   if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
3513 				       &nvectors, &new_vector_type,
3514 				       permutes))
3515     gcc_unreachable ();
3516 
3517   /* Get a vector type that holds ELTS[0:NELTS/NELTS'].  */
3518   unsigned int partial_nelts = nelts / nvectors;
3519   tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3520 
3521   tree_vector_builder partial_elts;
3522   auto_vec<tree, 32> pieces (nvectors * 2);
3523   pieces.quick_grow_cleared (nvectors * 2);
3524   for (unsigned int i = 0; i < nvectors; ++i)
3525     {
3526       /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3527 	     ELTS' has mode IM.  */
3528       partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3529       for (unsigned int j = 0; j < partial_nelts; ++j)
3530 	partial_elts.quick_push (elts[i * partial_nelts + j]);
3531       tree t = gimple_build_vector (seq, &partial_elts);
3532       t = gimple_build (seq, VIEW_CONVERT_EXPR,
3533 			TREE_TYPE (new_vector_type), t);
3534 
3535       /* (3) Duplicate each ELTS'[I] into a vector of mode VM.  */
3536       pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3537     }
3538 
3539   /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3540 	 correct byte contents.
3541 
3542      Conceptually, we need to repeat the following operation log2(nvectors)
3543      times, where hi_start = nvectors / 2:
3544 
3545 	out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3546 	out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3547 
3548      However, if each input repeats every N elements and the VF is
3549      a multiple of N * 2, the HI result is the same as the LO result.
3550      This will be true for the first N1 iterations of the outer loop,
3551      followed by N2 iterations for which both the LO and HI results
3552      are needed.  I.e.:
3553 
3554 	N1 + N2 = log2(nvectors)
3555 
3556      Each "N1 iteration" doubles the number of redundant vectors and the
3557      effect of the process as a whole is to have a sequence of nvectors/2**N1
3558      vectors that repeats 2**N1 times.  Rather than generate these redundant
3559      vectors, we halve the number of vectors for each N1 iteration.  */
3560   unsigned int in_start = 0;
3561   unsigned int out_start = nvectors;
3562   unsigned int new_nvectors = nvectors;
3563   for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3564     {
3565       unsigned int hi_start = new_nvectors / 2;
3566       unsigned int out_i = 0;
3567       for (unsigned int in_i = 0; in_i < new_nvectors; ++in_i)
3568 	{
3569 	  if ((in_i & 1) != 0
3570 	      && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3571 			     2 * in_repeat))
3572 	    continue;
3573 
3574 	  tree output = make_ssa_name (new_vector_type);
3575 	  tree input1 = pieces[in_start + (in_i / 2)];
3576 	  tree input2 = pieces[in_start + (in_i / 2) + hi_start];
3577 	  gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3578 					       input1, input2,
3579 					       permutes[in_i & 1]);
3580 	  gimple_seq_add_stmt (seq, stmt);
3581 	  pieces[out_start + out_i] = output;
3582 	  out_i += 1;
3583 	}
3584       std::swap (in_start, out_start);
3585       new_nvectors = out_i;
3586     }
3587 
3588   /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type.  */
3589   results.reserve (nresults);
3590   for (unsigned int i = 0; i < nresults; ++i)
3591     if (i < new_nvectors)
3592       results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3593 					pieces[in_start + i]));
3594     else
3595       results.quick_push (results[i - new_nvectors]);
3596 }
3597 
3598 
3599 /* For constant and loop invariant defs of SLP_NODE this function returns
3600    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3601    OP_NODE determines the node for the operand containing the scalar
3602    operands.  */
3603 
3604 static void
vect_get_constant_vectors(slp_tree slp_node,unsigned op_num,vec<tree> * vec_oprnds)3605 vect_get_constant_vectors (slp_tree slp_node, unsigned op_num,
3606                            vec<tree> *vec_oprnds)
3607 {
3608   slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
3609   stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3610   vec_info *vinfo = stmt_vinfo->vinfo;
3611   unsigned HOST_WIDE_INT nunits;
3612   tree vec_cst;
3613   unsigned j, number_of_places_left_in_vector;
3614   tree vector_type;
3615   tree vop;
3616   int group_size = op_node->ops.length ();
3617   unsigned int vec_num, i;
3618   unsigned number_of_copies = 1;
3619   bool constant_p;
3620   tree neutral_op = NULL;
3621   gimple_seq ctor_seq = NULL;
3622   auto_vec<tree, 16> permute_results;
3623 
3624   /* ???  SLP analysis should compute the vector type for the
3625      constant / invariant and store it in the SLP node.  */
3626   tree op = op_node->ops[0];
3627   /* Check if vector type is a boolean vector.  */
3628   tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3629   if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3630       && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3631     vector_type = truth_type_for (stmt_vectype);
3632   else
3633     vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
3634 
3635   /* ???  For lane-reducing ops we should also have the required number
3636      of vector stmts initialized rather than second-guessing here.  */
3637   unsigned int number_of_vectors;
3638   if (is_gimple_assign (stmt_vinfo->stmt)
3639       && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3640 	  || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3641 	  || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3642     number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3643   else
3644     number_of_vectors
3645       = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3646 			      * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3647 			      vector_type);
3648   vec_oprnds->create (number_of_vectors);
3649   auto_vec<tree> voprnds (number_of_vectors);
3650 
3651   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3652      created vectors. It is greater than 1 if unrolling is performed.
3653 
3654      For example, we have two scalar operands, s1 and s2 (e.g., group of
3655      strided accesses of size two), while NUNITS is four (i.e., four scalars
3656      of this type can be packed in a vector).  The output vector will contain
3657      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
3658      will be 2).
3659 
3660      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3661      containing the operands.
3662 
3663      For example, NUNITS is four as before, and the group size is 8
3664      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
3665      {s5, s6, s7, s8}.  */
3666 
3667   /* When using duplicate_and_interleave, we just need one element for
3668      each scalar statement.  */
3669   if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3670     nunits = group_size;
3671 
3672   number_of_copies = nunits * number_of_vectors / group_size;
3673 
3674   number_of_places_left_in_vector = nunits;
3675   constant_p = true;
3676   tree_vector_builder elts (vector_type, nunits, 1);
3677   elts.quick_grow (nunits);
3678   bool place_after_defs = false;
3679   for (j = 0; j < number_of_copies; j++)
3680     {
3681       for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3682         {
3683           /* Create 'vect_ = {op0,op1,...,opn}'.  */
3684           number_of_places_left_in_vector--;
3685 	  tree orig_op = op;
3686 	  if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3687 	    {
3688 	      if (CONSTANT_CLASS_P (op))
3689 		{
3690 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3691 		    {
3692 		      /* Can't use VIEW_CONVERT_EXPR for booleans because
3693 			 of possibly different sizes of scalar value and
3694 			 vector element.  */
3695 		      if (integer_zerop (op))
3696 			op = build_int_cst (TREE_TYPE (vector_type), 0);
3697 		      else if (integer_onep (op))
3698 			op = build_all_ones_cst (TREE_TYPE (vector_type));
3699 		      else
3700 			gcc_unreachable ();
3701 		    }
3702 		  else
3703 		    op = fold_unary (VIEW_CONVERT_EXPR,
3704 				     TREE_TYPE (vector_type), op);
3705 		  gcc_assert (op && CONSTANT_CLASS_P (op));
3706 		}
3707 	      else
3708 		{
3709 		  tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3710 		  gimple *init_stmt;
3711 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3712 		    {
3713 		      tree true_val
3714 			= build_all_ones_cst (TREE_TYPE (vector_type));
3715 		      tree false_val
3716 			= build_zero_cst (TREE_TYPE (vector_type));
3717 		      gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3718 		      init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3719 						       op, true_val,
3720 						       false_val);
3721 		    }
3722 		  else
3723 		    {
3724 		      op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3725 				   op);
3726 		      init_stmt
3727 			= gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3728 					       op);
3729 		    }
3730 		  gimple_seq_add_stmt (&ctor_seq, init_stmt);
3731 		  op = new_temp;
3732 		}
3733 	    }
3734 	  elts[number_of_places_left_in_vector] = op;
3735 	  if (!CONSTANT_CLASS_P (op))
3736 	    constant_p = false;
3737 	  if (TREE_CODE (orig_op) == SSA_NAME
3738 	      && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3739 	      && STMT_VINFO_BB_VINFO (stmt_vinfo)
3740 	      && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3741 		  == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3742 	    place_after_defs = true;
3743 
3744           if (number_of_places_left_in_vector == 0)
3745             {
3746 	      if (constant_p
3747 		  ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3748 		  : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3749 		vec_cst = gimple_build_vector (&ctor_seq, &elts);
3750 	      else
3751 		{
3752 		  if (permute_results.is_empty ())
3753 		    duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3754 					      elts, number_of_vectors,
3755 					      permute_results);
3756 		  vec_cst = permute_results[number_of_vectors - j - 1];
3757 		}
3758 	      tree init;
3759 	      gimple_stmt_iterator gsi;
3760 	      if (place_after_defs)
3761 		{
3762 		  stmt_vec_info last_stmt_info
3763 		    = vect_find_last_scalar_stmt_in_slp (slp_node);
3764 		  gsi = gsi_for_stmt (last_stmt_info->stmt);
3765 		  init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3766 					   &gsi);
3767 		}
3768 	      else
3769 		init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3770 					 NULL);
3771 	      if (ctor_seq != NULL)
3772 		{
3773 		  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3774 		  gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3775 		  ctor_seq = NULL;
3776 		}
3777 	      voprnds.quick_push (init);
3778 	      place_after_defs = false;
3779               number_of_places_left_in_vector = nunits;
3780 	      constant_p = true;
3781 	      elts.new_vector (vector_type, nunits, 1);
3782 	      elts.quick_grow (nunits);
3783             }
3784         }
3785     }
3786 
3787   /* Since the vectors are created in the reverse order, we should invert
3788      them.  */
3789   vec_num = voprnds.length ();
3790   for (j = vec_num; j != 0; j--)
3791     {
3792       vop = voprnds[j - 1];
3793       vec_oprnds->quick_push (vop);
3794     }
3795 
3796   /* In case that VF is greater than the unrolling factor needed for the SLP
3797      group of stmts, NUMBER_OF_VECTORS to be created is greater than
3798      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3799      to replicate the vectors.  */
3800   while (number_of_vectors > vec_oprnds->length ())
3801     {
3802       tree neutral_vec = NULL;
3803 
3804       if (neutral_op)
3805         {
3806           if (!neutral_vec)
3807 	    neutral_vec = build_vector_from_val (vector_type, neutral_op);
3808 
3809           vec_oprnds->quick_push (neutral_vec);
3810         }
3811       else
3812         {
3813           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3814             vec_oprnds->quick_push (vop);
3815         }
3816     }
3817 }
3818 
3819 
3820 /* Get vectorized definitions from SLP_NODE that contains corresponding
3821    vectorized def-stmts.  */
3822 
3823 static void
vect_get_slp_vect_defs(slp_tree slp_node,vec<tree> * vec_oprnds)3824 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3825 {
3826   stmt_vec_info vec_def_stmt_info;
3827   unsigned int i;
3828 
3829   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3830 
3831   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3832     vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3833 }
3834 
3835 
3836 /* Get N vectorized definitions for SLP_NODE.
3837    If the scalar definitions are loop invariants or constants, collect them and
3838    call vect_get_constant_vectors() to create vector stmts.
3839    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3840    must be stored in the corresponding child of SLP_NODE, and we call
3841    vect_get_slp_vect_defs () to retrieve them.  */
3842 
3843 void
vect_get_slp_defs(slp_tree slp_node,vec<vec<tree>> * vec_oprnds,unsigned n)3844 vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3845 {
3846   if (n == -1U)
3847     n = SLP_TREE_CHILDREN (slp_node).length ();
3848 
3849   for (unsigned i = 0; i < n; ++i)
3850     {
3851       slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3852 
3853       vec<tree> vec_defs = vNULL;
3854 
3855       /* For each operand we check if it has vectorized definitions in a child
3856 	 node or we need to create them (for invariants and constants).  */
3857       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3858 	{
3859 	  vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3860 	  vect_get_slp_vect_defs (child, &vec_defs);
3861 	}
3862       else
3863 	vect_get_constant_vectors (slp_node, i, &vec_defs);
3864 
3865       vec_oprnds->quick_push (vec_defs);
3866     }
3867 }
3868 
3869 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3870    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3871    permute statements for the SLP node NODE of the SLP instance
3872    SLP_NODE_INSTANCE.  */
3873 
3874 bool
vect_transform_slp_perm_load(slp_tree node,vec<tree> dr_chain,gimple_stmt_iterator * gsi,poly_uint64 vf,slp_instance slp_node_instance,bool analyze_only,unsigned * n_perms)3875 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3876 			      gimple_stmt_iterator *gsi, poly_uint64 vf,
3877 			      slp_instance slp_node_instance, bool analyze_only,
3878 			      unsigned *n_perms)
3879 {
3880   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3881   vec_info *vinfo = stmt_info->vinfo;
3882   int vec_index = 0;
3883   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3884   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3885   unsigned int mask_element;
3886   machine_mode mode;
3887 
3888   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3889     return false;
3890 
3891   stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3892 
3893   mode = TYPE_MODE (vectype);
3894   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3895 
3896   /* Initialize the vect stmts of NODE to properly insert the generated
3897      stmts later.  */
3898   if (! analyze_only)
3899     for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3900 	 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3901       SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3902 
3903   /* Generate permutation masks for every NODE. Number of masks for each NODE
3904      is equal to GROUP_SIZE.
3905      E.g., we have a group of three nodes with three loads from the same
3906      location in each node, and the vector size is 4. I.e., we have a
3907      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3908      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3909      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3910      ...
3911 
3912      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3913      The last mask is illegal since we assume two operands for permute
3914      operation, and the mask element values can't be outside that range.
3915      Hence, the last mask must be converted into {2,5,5,5}.
3916      For the first two permutations we need the first and the second input
3917      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3918      we need the second and the third vectors: {b1,c1,a2,b2} and
3919      {c2,a3,b3,c3}.  */
3920 
3921   int vect_stmts_counter = 0;
3922   unsigned int index = 0;
3923   int first_vec_index = -1;
3924   int second_vec_index = -1;
3925   bool noop_p = true;
3926   *n_perms = 0;
3927 
3928   vec_perm_builder mask;
3929   unsigned int nelts_to_build;
3930   unsigned int nvectors_per_build;
3931   bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3932 		      && multiple_p (nunits, group_size));
3933   if (repeating_p)
3934     {
3935       /* A single vector contains a whole number of copies of the node, so:
3936 	 (a) all permutes can use the same mask; and
3937 	 (b) the permutes only need a single vector input.  */
3938       mask.new_vector (nunits, group_size, 3);
3939       nelts_to_build = mask.encoded_nelts ();
3940       nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3941     }
3942   else
3943     {
3944       /* We need to construct a separate mask for each vector statement.  */
3945       unsigned HOST_WIDE_INT const_nunits, const_vf;
3946       if (!nunits.is_constant (&const_nunits)
3947 	  || !vf.is_constant (&const_vf))
3948 	return false;
3949       mask.new_vector (const_nunits, const_nunits, 1);
3950       nelts_to_build = const_vf * group_size;
3951       nvectors_per_build = 1;
3952     }
3953 
3954   unsigned int count = mask.encoded_nelts ();
3955   mask.quick_grow (count);
3956   vec_perm_indices indices;
3957 
3958   for (unsigned int j = 0; j < nelts_to_build; j++)
3959     {
3960       unsigned int iter_num = j / group_size;
3961       unsigned int stmt_num = j % group_size;
3962       unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3963 			+ SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3964       if (repeating_p)
3965 	{
3966 	  first_vec_index = 0;
3967 	  mask_element = i;
3968 	}
3969       else
3970 	{
3971 	  /* Enforced before the loop when !repeating_p.  */
3972 	  unsigned int const_nunits = nunits.to_constant ();
3973 	  vec_index = i / const_nunits;
3974 	  mask_element = i % const_nunits;
3975 	  if (vec_index == first_vec_index
3976 	      || first_vec_index == -1)
3977 	    {
3978 	      first_vec_index = vec_index;
3979 	    }
3980 	  else if (vec_index == second_vec_index
3981 		   || second_vec_index == -1)
3982 	    {
3983 	      second_vec_index = vec_index;
3984 	      mask_element += const_nunits;
3985 	    }
3986 	  else
3987 	    {
3988 	      if (dump_enabled_p ())
3989 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3990 				 "permutation requires at "
3991 				 "least three vectors %G",
3992 				 stmt_info->stmt);
3993 	      gcc_assert (analyze_only);
3994 	      return false;
3995 	    }
3996 
3997 	  gcc_assert (mask_element < 2 * const_nunits);
3998 	}
3999 
4000       if (mask_element != index)
4001 	noop_p = false;
4002       mask[index++] = mask_element;
4003 
4004       if (index == count && !noop_p)
4005 	{
4006 	  indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
4007 	  if (!can_vec_perm_const_p (mode, indices))
4008 	    {
4009 	      if (dump_enabled_p ())
4010 		{
4011 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4012 				   vect_location,
4013 				   "unsupported vect permute { ");
4014 		  for (i = 0; i < count; ++i)
4015 		    {
4016 		      dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
4017 		      dump_printf (MSG_MISSED_OPTIMIZATION, " ");
4018 		    }
4019 		  dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
4020 		}
4021 	      gcc_assert (analyze_only);
4022 	      return false;
4023 	    }
4024 
4025 	  ++*n_perms;
4026 	}
4027 
4028       if (index == count)
4029 	{
4030 	  if (!analyze_only)
4031 	    {
4032 	      tree mask_vec = NULL_TREE;
4033 
4034 	      if (! noop_p)
4035 		mask_vec = vect_gen_perm_mask_checked (vectype, indices);
4036 
4037 	      if (second_vec_index == -1)
4038 		second_vec_index = first_vec_index;
4039 
4040 	      for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
4041 		{
4042 		  /* Generate the permute statement if necessary.  */
4043 		  tree first_vec = dr_chain[first_vec_index + ri];
4044 		  tree second_vec = dr_chain[second_vec_index + ri];
4045 		  stmt_vec_info perm_stmt_info;
4046 		  if (! noop_p)
4047 		    {
4048 		      gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4049 		      tree perm_dest
4050 			= vect_create_destination_var (gimple_assign_lhs (stmt),
4051 						       vectype);
4052 		      perm_dest = make_ssa_name (perm_dest);
4053 		      gassign *perm_stmt
4054 			= gimple_build_assign (perm_dest, VEC_PERM_EXPR,
4055 					       first_vec, second_vec,
4056 					       mask_vec);
4057 		      perm_stmt_info
4058 			= vect_finish_stmt_generation (stmt_info, perm_stmt,
4059 						       gsi);
4060 		    }
4061 		  else
4062 		    /* If mask was NULL_TREE generate the requested
4063 		       identity transform.  */
4064 		    perm_stmt_info = vinfo->lookup_def (first_vec);
4065 
4066 		  /* Store the vector statement in NODE.  */
4067 		  SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
4068 		    = perm_stmt_info;
4069 		}
4070 	    }
4071 
4072 	  index = 0;
4073 	  first_vec_index = -1;
4074 	  second_vec_index = -1;
4075 	  noop_p = true;
4076 	}
4077     }
4078 
4079   return true;
4080 }
4081 
4082 /* Vectorize SLP instance tree in postorder.  */
4083 
4084 static void
vect_schedule_slp_instance(slp_tree node,slp_instance instance)4085 vect_schedule_slp_instance (slp_tree node, slp_instance instance)
4086 {
4087   gimple_stmt_iterator si;
4088   stmt_vec_info stmt_info;
4089   unsigned int group_size;
4090   tree vectype;
4091   int i, j;
4092   slp_tree child;
4093 
4094   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4095     return;
4096 
4097   /* See if we have already vectorized the node in the graph of the
4098      SLP instance.  */
4099   if (SLP_TREE_VEC_STMTS (node).exists ())
4100     return;
4101 
4102   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4103     vect_schedule_slp_instance (child, instance);
4104 
4105   /* Push SLP node def-type to stmts.  */
4106   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4107     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4108       {
4109 	stmt_vec_info child_stmt_info;
4110 	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4111 	  STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4112       }
4113 
4114   stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
4115 
4116   /* VECTYPE is the type of the destination.  */
4117   vectype = STMT_VINFO_VECTYPE (stmt_info);
4118   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4119   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
4120 
4121   gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
4122   SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
4123 
4124   if (dump_enabled_p ())
4125     dump_printf_loc (MSG_NOTE, vect_location,
4126 		     "------>vectorizing SLP node starting from: %G",
4127 		     stmt_info->stmt);
4128 
4129   /* Vectorized stmts go before the last scalar stmt which is where
4130      all uses are ready.  */
4131   stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4132   si = gsi_for_stmt (last_stmt_info->stmt);
4133 
4134   /* Handle two-operation SLP nodes by vectorizing the group with
4135      both operations and then performing a merge.  */
4136   bool done_p = false;
4137   if (SLP_TREE_TWO_OPERATORS (node))
4138     {
4139       gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4140       enum tree_code code0 = gimple_assign_rhs_code (stmt);
4141       enum tree_code ocode = ERROR_MARK;
4142       stmt_vec_info ostmt_info;
4143       vec_perm_builder mask (group_size, group_size, 1);
4144       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4145 	{
4146 	  gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4147 	  if (gimple_assign_rhs_code (ostmt) != code0)
4148 	    {
4149 	      mask.quick_push (1);
4150 	      ocode = gimple_assign_rhs_code (ostmt);
4151 	    }
4152 	  else
4153 	    mask.quick_push (0);
4154 	}
4155       if (ocode != ERROR_MARK)
4156 	{
4157 	  vec<stmt_vec_info> v0;
4158 	  vec<stmt_vec_info> v1;
4159 	  unsigned j;
4160 	  tree tmask = NULL_TREE;
4161 	  vect_transform_stmt (stmt_info, &si, node, instance);
4162 	  v0 = SLP_TREE_VEC_STMTS (node).copy ();
4163 	  SLP_TREE_VEC_STMTS (node).truncate (0);
4164 	  gimple_assign_set_rhs_code (stmt, ocode);
4165 	  vect_transform_stmt (stmt_info, &si, node, instance);
4166 	  gimple_assign_set_rhs_code (stmt, code0);
4167 	  v1 = SLP_TREE_VEC_STMTS (node).copy ();
4168 	  SLP_TREE_VEC_STMTS (node).truncate (0);
4169 	  tree meltype = build_nonstandard_integer_type
4170 	      (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4171 	  tree mvectype = get_same_sized_vectype (meltype, vectype);
4172 	  unsigned k = 0, l;
4173 	  for (j = 0; j < v0.length (); ++j)
4174 	    {
4175 	      /* Enforced by vect_build_slp_tree, which rejects variable-length
4176 		 vectors for SLP_TREE_TWO_OPERATORS.  */
4177 	      unsigned int const_nunits = nunits.to_constant ();
4178 	      tree_vector_builder melts (mvectype, const_nunits, 1);
4179 	      for (l = 0; l < const_nunits; ++l)
4180 		{
4181 		  if (k >= group_size)
4182 		    k = 0;
4183 		  tree t = build_int_cst (meltype,
4184 					  mask[k++] * const_nunits + l);
4185 		  melts.quick_push (t);
4186 		}
4187 	      tmask = melts.build ();
4188 
4189 	      /* ???  Not all targets support a VEC_PERM_EXPR with a
4190 	         constant mask that would translate to a vec_merge RTX
4191 		 (with their vec_perm_const_ok).  We can either not
4192 		 vectorize in that case or let veclower do its job.
4193 		 Unfortunately that isn't too great and at least for
4194 		 plus/minus we'd eventually like to match targets
4195 		 vector addsub instructions.  */
4196 	      gimple *vstmt;
4197 	      vstmt = gimple_build_assign (make_ssa_name (vectype),
4198 					   VEC_PERM_EXPR,
4199 					   gimple_assign_lhs (v0[j]->stmt),
4200 					   gimple_assign_lhs (v1[j]->stmt),
4201 					   tmask);
4202 	      SLP_TREE_VEC_STMTS (node).quick_push
4203 		(vect_finish_stmt_generation (stmt_info, vstmt, &si));
4204 	    }
4205 	  v0.release ();
4206 	  v1.release ();
4207 	  done_p = true;
4208 	}
4209     }
4210   if (!done_p)
4211     vect_transform_stmt (stmt_info, &si, node, instance);
4212 
4213   /* Restore stmt def-types.  */
4214   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4215     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4216       {
4217 	stmt_vec_info child_stmt_info;
4218 	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4219 	  STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4220       }
4221 }
4222 
4223 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4224    For loop vectorization this is done in vectorizable_call, but for SLP
4225    it needs to be deferred until end of vect_schedule_slp, because multiple
4226    SLP instances may refer to the same scalar stmt.  */
4227 
4228 static void
vect_remove_slp_scalar_calls(slp_tree node,hash_set<slp_tree> & visited)4229 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4230 {
4231   gimple *new_stmt;
4232   gimple_stmt_iterator gsi;
4233   int i;
4234   slp_tree child;
4235   tree lhs;
4236   stmt_vec_info stmt_info;
4237 
4238   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4239     return;
4240 
4241   if (visited.add (node))
4242     return;
4243 
4244   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4245     vect_remove_slp_scalar_calls (child, visited);
4246 
4247   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4248     {
4249       gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4250       if (!stmt || gimple_bb (stmt) == NULL)
4251 	continue;
4252       if (is_pattern_stmt_p (stmt_info)
4253 	  || !PURE_SLP_STMT (stmt_info))
4254 	continue;
4255       lhs = gimple_call_lhs (stmt);
4256       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4257       gsi = gsi_for_stmt (stmt);
4258       stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4259       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4260     }
4261 }
4262 
4263 static void
vect_remove_slp_scalar_calls(slp_tree node)4264 vect_remove_slp_scalar_calls (slp_tree node)
4265 {
4266   hash_set<slp_tree> visited;
4267   vect_remove_slp_scalar_calls (node, visited);
4268 }
4269 
4270 /* Vectorize the instance root.  */
4271 
4272 void
vectorize_slp_instance_root_stmt(slp_tree node,slp_instance instance)4273 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4274 {
4275   gassign *rstmt = NULL;
4276 
4277   if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4278     {
4279       stmt_vec_info child_stmt_info;
4280       int j;
4281 
4282       FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4283 	{
4284 	  tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4285 	  tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4286 	  if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
4287 					  TREE_TYPE (vect_lhs)))
4288 	    vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
4289 			       vect_lhs);
4290 	  rstmt = gimple_build_assign (root_lhs, vect_lhs);
4291 	  break;
4292 	}
4293     }
4294   else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4295     {
4296       int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4297       stmt_vec_info child_stmt_info;
4298       int j;
4299       vec<constructor_elt, va_gc> *v;
4300       vec_alloc (v, nelts);
4301 
4302       FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4303 	{
4304 	  CONSTRUCTOR_APPEND_ELT (v,
4305 				  NULL_TREE,
4306 				  gimple_get_lhs (child_stmt_info->stmt));
4307 	}
4308       tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4309       tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4310       tree r_constructor = build_constructor (rtype, v);
4311       rstmt = gimple_build_assign (lhs, r_constructor);
4312     }
4313 
4314     gcc_assert (rstmt);
4315 
4316     gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4317     gsi_replace (&rgsi, rstmt, true);
4318 }
4319 
4320 /* Generate vector code for all SLP instances in the loop/basic block.  */
4321 
4322 void
vect_schedule_slp(vec_info * vinfo)4323 vect_schedule_slp (vec_info *vinfo)
4324 {
4325   vec<slp_instance> slp_instances;
4326   slp_instance instance;
4327   unsigned int i;
4328 
4329   slp_instances = vinfo->slp_instances;
4330   FOR_EACH_VEC_ELT (slp_instances, i, instance)
4331     {
4332       slp_tree node = SLP_INSTANCE_TREE (instance);
4333       /* Schedule the tree of INSTANCE.  */
4334       vect_schedule_slp_instance (node, instance);
4335 
4336       if (SLP_INSTANCE_ROOT_STMT (instance))
4337 	vectorize_slp_instance_root_stmt (node, instance);
4338 
4339       if (dump_enabled_p ())
4340 	dump_printf_loc (MSG_NOTE, vect_location,
4341                          "vectorizing stmts using SLP.\n");
4342     }
4343 
4344   FOR_EACH_VEC_ELT (slp_instances, i, instance)
4345     {
4346       slp_tree root = SLP_INSTANCE_TREE (instance);
4347       stmt_vec_info store_info;
4348       unsigned int j;
4349 
4350       /* For reductions set the latch values of the vectorized PHIs.  */
4351       if (instance->reduc_phis
4352 	  && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
4353 			(instance->reduc_phis)[0]) != FOLD_LEFT_REDUCTION
4354 	  && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
4355 			(instance->reduc_phis)[0]) != EXTRACT_LAST_REDUCTION)
4356 	{
4357 	  slp_tree slp_node = root;
4358 	  slp_tree phi_node = instance->reduc_phis;
4359 	  gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
4360 	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
4361 	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
4362 		      == SLP_TREE_VEC_STMTS (slp_node).length ());
4363 	  for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
4364 	    add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
4365 			 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
4366 			 e, gimple_phi_arg_location (phi, e->dest_idx));
4367 	}
4368 
4369       /* Remove scalar call stmts.  Do not do this for basic-block
4370 	 vectorization as not all uses may be vectorized.
4371 	 ???  Why should this be necessary?  DCE should be able to
4372 	 remove the stmts itself.
4373 	 ???  For BB vectorization we can as well remove scalar
4374 	 stmts starting from the SLP tree root if they have no
4375 	 uses.  */
4376       if (is_a <loop_vec_info> (vinfo))
4377 	vect_remove_slp_scalar_calls (root);
4378 
4379       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4380                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4381         {
4382 	  if (!STMT_VINFO_DATA_REF (store_info))
4383 	    break;
4384 
4385 	  if (SLP_INSTANCE_ROOT_STMT (instance))
4386 	    continue;
4387 
4388 	  store_info = vect_orig_stmt (store_info);
4389 	  /* Free the attached stmt_vec_info and remove the stmt.  */
4390 	  vinfo->remove_stmt (store_info);
4391         }
4392     }
4393 }
4394