xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-slp.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2017 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h"		/* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 
45 
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
47 
48 static void
49 vect_free_slp_tree (slp_tree node)
50 {
51   int i;
52   slp_tree child;
53 
54   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55     vect_free_slp_tree (child);
56 
57   gimple *stmt;
58   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59     /* After transform some stmts are removed and thus their vinfo is gone.  */
60     if (vinfo_for_stmt (stmt))
61       {
62 	gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 	STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
64       }
65 
66   SLP_TREE_CHILDREN (node).release ();
67   SLP_TREE_SCALAR_STMTS (node).release ();
68   SLP_TREE_VEC_STMTS (node).release ();
69   SLP_TREE_LOAD_PERMUTATION (node).release ();
70 
71   free (node);
72 }
73 
74 
75 /* Free the memory allocated for the SLP instance.  */
76 
77 void
78 vect_free_slp_instance (slp_instance instance)
79 {
80   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81   SLP_INSTANCE_LOADS (instance).release ();
82   free (instance);
83 }
84 
85 
86 /* Create an SLP node for SCALAR_STMTS.  */
87 
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
90 {
91   slp_tree node;
92   gimple *stmt = scalar_stmts[0];
93   unsigned int nops;
94 
95   if (is_gimple_call (stmt))
96     nops = gimple_call_num_args (stmt);
97   else if (is_gimple_assign (stmt))
98     {
99       nops = gimple_num_ops (stmt) - 1;
100       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 	nops++;
102     }
103   else
104     return NULL;
105 
106   node = XNEW (struct _slp_tree);
107   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
108   SLP_TREE_VEC_STMTS (node).create (0);
109   SLP_TREE_CHILDREN (node).create (nops);
110   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
111   SLP_TREE_TWO_OPERATORS (node) = false;
112   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
113 
114   unsigned i;
115   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
116     STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
117 
118   return node;
119 }
120 
121 
122 /* This structure is used in creation of an SLP tree.  Each instance
123    corresponds to the same operand in a group of scalar stmts in an SLP
124    node.  */
125 typedef struct _slp_oprnd_info
126 {
127   /* Def-stmts for the operands.  */
128   vec<gimple *> def_stmts;
129   /* Information about the first statement, its vector def-type, type, the
130      operand itself in case it's constant, and an indication if it's a pattern
131      stmt.  */
132   tree first_op_type;
133   enum vect_def_type first_dt;
134   bool first_pattern;
135   bool second_pattern;
136 } *slp_oprnd_info;
137 
138 
139 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140    operand.  */
141 static vec<slp_oprnd_info>
142 vect_create_oprnd_info (int nops, int group_size)
143 {
144   int i;
145   slp_oprnd_info oprnd_info;
146   vec<slp_oprnd_info> oprnds_info;
147 
148   oprnds_info.create (nops);
149   for (i = 0; i < nops; i++)
150     {
151       oprnd_info = XNEW (struct _slp_oprnd_info);
152       oprnd_info->def_stmts.create (group_size);
153       oprnd_info->first_dt = vect_uninitialized_def;
154       oprnd_info->first_op_type = NULL_TREE;
155       oprnd_info->first_pattern = false;
156       oprnd_info->second_pattern = false;
157       oprnds_info.quick_push (oprnd_info);
158     }
159 
160   return oprnds_info;
161 }
162 
163 
164 /* Free operands info.  */
165 
166 static void
167 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
168 {
169   int i;
170   slp_oprnd_info oprnd_info;
171 
172   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
173     {
174       oprnd_info->def_stmts.release ();
175       XDELETE (oprnd_info);
176     }
177 
178   oprnds_info.release ();
179 }
180 
181 
182 /* Find the place of the data-ref in STMT in the interleaving chain that starts
183    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
184 
185 static int
186 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
187 {
188   gimple *next_stmt = first_stmt;
189   int result = 0;
190 
191   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
192     return -1;
193 
194   do
195     {
196       if (next_stmt == stmt)
197 	return result;
198       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
199       if (next_stmt)
200 	result += GROUP_GAP (vinfo_for_stmt (next_stmt));
201     }
202   while (next_stmt);
203 
204   return -1;
205 }
206 
207 
208 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
209    they are of a valid type and that they match the defs of the first stmt of
210    the SLP group (stored in OPRNDS_INFO).  This function tries to match stmts
211    by swapping operands of STMT when possible.  Non-zero *SWAP indicates swap
212    is required for cond_expr stmts.  Specifically, *SWAP is 1 if STMT is cond
213    and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
214    and code of comparison needs to be inverted.  If there is any operand swap
215    in this function, *SWAP is set to non-zero value.
216    If there was a fatal error return -1; if the error could be corrected by
217    swapping operands of father node of this one, return 1; if everything is
218    ok return 0.  */
219 
220 static int
221 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
222 			     gimple *stmt, unsigned stmt_num,
223 			     vec<slp_oprnd_info> *oprnds_info)
224 {
225   tree oprnd;
226   unsigned int i, number_of_oprnds;
227   gimple *def_stmt;
228   enum vect_def_type dt = vect_uninitialized_def;
229   bool pattern = false;
230   slp_oprnd_info oprnd_info;
231   int first_op_idx = 1;
232   bool commutative = false;
233   bool first_op_cond = false;
234   bool first = stmt_num == 0;
235   bool second = stmt_num == 1;
236 
237   if (is_gimple_call (stmt))
238     {
239       number_of_oprnds = gimple_call_num_args (stmt);
240       first_op_idx = 3;
241     }
242   else if (is_gimple_assign (stmt))
243     {
244       enum tree_code code = gimple_assign_rhs_code (stmt);
245       number_of_oprnds = gimple_num_ops (stmt) - 1;
246       /* Swap can only be done for cond_expr if asked to, otherwise we
247 	 could result in different comparison code to the first stmt.  */
248       if (gimple_assign_rhs_code (stmt) == COND_EXPR
249 	  && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
250 	{
251 	  first_op_cond = true;
252 	  number_of_oprnds++;
253 	}
254       else
255 	commutative = commutative_tree_code (code);
256     }
257   else
258     return -1;
259 
260   bool swapped = (*swap != 0);
261   gcc_assert (!swapped || first_op_cond);
262   for (i = 0; i < number_of_oprnds; i++)
263     {
264 again:
265       if (first_op_cond)
266 	{
267 	  /* Map indicating how operands of cond_expr should be swapped.  */
268 	  int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
269 	  int *map = maps[*swap];
270 
271 	  if (i < 2)
272 	    oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
273 	  else
274 	    oprnd = gimple_op (stmt, map[i]);
275 	}
276       else
277 	oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
278 
279       oprnd_info = (*oprnds_info)[i];
280 
281       if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
282 	{
283 	  if (dump_enabled_p ())
284 	    {
285 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
286 			       "Build SLP failed: can't analyze def for ");
287 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
288               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
289 	    }
290 
291 	  return -1;
292 	}
293 
294       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
295          from the pattern.  Check that all the stmts of the node are in the
296          pattern.  */
297       if (def_stmt && gimple_bb (def_stmt)
298           && vect_stmt_in_region_p (vinfo, def_stmt)
299           && vinfo_for_stmt (def_stmt)
300           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
301 	  && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
302 	  && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
303         {
304           pattern = true;
305           if (!first && !oprnd_info->first_pattern
306 	      /* Allow different pattern state for the defs of the
307 		 first stmt in reduction chains.  */
308 	      && (oprnd_info->first_dt != vect_reduction_def
309 		  || (!second && !oprnd_info->second_pattern)))
310 	    {
311 	      if (i == 0
312 		  && !swapped
313 		  && commutative)
314 		{
315 		  swapped = true;
316 		  goto again;
317 		}
318 
319 	      if (dump_enabled_p ())
320 		{
321 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
322 				   "Build SLP failed: some of the stmts"
323 				   " are in a pattern, and others are not ");
324 		  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
325                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
326 		}
327 
328 	      return 1;
329             }
330 
331           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
332           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
333 
334           if (dt == vect_unknown_def_type)
335             {
336               if (dump_enabled_p ())
337                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
338 				 "Unsupported pattern.\n");
339               return -1;
340             }
341 
342           switch (gimple_code (def_stmt))
343             {
344             case GIMPLE_PHI:
345             case GIMPLE_ASSIGN:
346 	      break;
347 
348 	    default:
349 	      if (dump_enabled_p ())
350 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
351 				 "unsupported defining stmt:\n");
352 	      return -1;
353             }
354         }
355 
356       if (second)
357 	oprnd_info->second_pattern = pattern;
358 
359       if (first)
360 	{
361 	  oprnd_info->first_dt = dt;
362 	  oprnd_info->first_pattern = pattern;
363 	  oprnd_info->first_op_type = TREE_TYPE (oprnd);
364 	}
365       else
366 	{
367 	  /* Not first stmt of the group, check that the def-stmt/s match
368 	     the def-stmt/s of the first stmt.  Allow different definition
369 	     types for reduction chains: the first stmt must be a
370 	     vect_reduction_def (a phi node), and the rest
371 	     vect_internal_def.  */
372 	  if (((oprnd_info->first_dt != dt
373                 && !(oprnd_info->first_dt == vect_reduction_def
374                      && dt == vect_internal_def)
375 		&& !((oprnd_info->first_dt == vect_external_def
376 		      || oprnd_info->first_dt == vect_constant_def)
377 		     && (dt == vect_external_def
378 			 || dt == vect_constant_def)))
379                || !types_compatible_p (oprnd_info->first_op_type,
380 				       TREE_TYPE (oprnd))))
381 	    {
382 	      /* Try swapping operands if we got a mismatch.  */
383 	      if (i == 0
384 		  && !swapped
385 		  && commutative)
386 		{
387 		  swapped = true;
388 		  goto again;
389 		}
390 
391 	      if (dump_enabled_p ())
392 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
393 				 "Build SLP failed: different types\n");
394 
395 	      return 1;
396 	    }
397 	}
398 
399       /* Check the types of the definitions.  */
400       switch (dt)
401 	{
402 	case vect_constant_def:
403 	case vect_external_def:
404         case vect_reduction_def:
405 	  break;
406 
407 	case vect_internal_def:
408 	  oprnd_info->def_stmts.quick_push (def_stmt);
409 	  break;
410 
411 	default:
412 	  /* FORNOW: Not supported.  */
413 	  if (dump_enabled_p ())
414 	    {
415 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
416 			       "Build SLP failed: illegal type of def ");
417 	      dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
418               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
419 	    }
420 
421 	  return -1;
422 	}
423     }
424 
425   /* Swap operands.  */
426   if (swapped)
427     {
428       /* If there are already uses of this stmt in a SLP instance then
429          we've committed to the operand order and can't swap it.  */
430       if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
431 	{
432 	  if (dump_enabled_p ())
433 	    {
434 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
435 			       "Build SLP failed: cannot swap operands of "
436 			       "shared stmt ");
437 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
438 	    }
439 	  return -1;
440 	}
441 
442       if (first_op_cond)
443 	{
444 	  tree cond = gimple_assign_rhs1 (stmt);
445 	  enum tree_code code = TREE_CODE (cond);
446 
447 	  /* Swap.  */
448 	  if (*swap == 1)
449 	    {
450 	      swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
451 				 &TREE_OPERAND (cond, 1));
452 	      TREE_SET_CODE (cond, swap_tree_comparison (code));
453 	    }
454 	  /* Invert.  */
455 	  else
456 	    {
457 	      swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
458 				 gimple_assign_rhs3_ptr (stmt));
459 	      bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
460 	      code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
461 	      gcc_assert (code != ERROR_MARK);
462 	      TREE_SET_CODE (cond, code);
463 	    }
464 	}
465       else
466 	swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
467 			   gimple_assign_rhs2_ptr (stmt));
468       if (dump_enabled_p ())
469 	{
470 	  dump_printf_loc (MSG_NOTE, vect_location,
471 			   "swapped operands to match def types in ");
472 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
473 	}
474     }
475 
476   *swap = swapped;
477   return 0;
478 }
479 
480 
481 /* Verify if the scalar stmts STMTS are isomorphic, require data
482    permutation or are of unsupported types of operation.  Return
483    true if they are, otherwise return false and indicate in *MATCHES
484    which stmts are not isomorphic to the first one.  If MATCHES[0]
485    is false then this indicates the comparison could not be
486    carried out or the stmts will never be vectorized by SLP.
487 
488    Note COND_EXPR is possibly ismorphic to another one after swapping its
489    operands.  Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
490    the first stmt by swapping the two operands of comparison; set SWAP[i]
491    to 2 if stmt I is isormorphic to the first stmt by inverting the code
492    of comparison.  Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
493    to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1.  */
494 
495 static bool
496 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
497 		       vec<gimple *> stmts, unsigned int group_size,
498 		       unsigned nops, unsigned int *max_nunits,
499 		       bool *matches, bool *two_operators)
500 {
501   unsigned int i;
502   gimple *first_stmt = stmts[0], *stmt = stmts[0];
503   enum tree_code first_stmt_code = ERROR_MARK;
504   enum tree_code alt_stmt_code = ERROR_MARK;
505   enum tree_code rhs_code = ERROR_MARK;
506   enum tree_code first_cond_code = ERROR_MARK;
507   tree lhs;
508   bool need_same_oprnds = false;
509   tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
510   optab optab;
511   int icode;
512   machine_mode optab_op2_mode;
513   machine_mode vec_mode;
514   HOST_WIDE_INT dummy;
515   gimple *first_load = NULL, *prev_first_load = NULL;
516 
517   /* For every stmt in NODE find its def stmt/s.  */
518   FOR_EACH_VEC_ELT (stmts, i, stmt)
519     {
520       swap[i] = 0;
521       matches[i] = false;
522 
523       if (dump_enabled_p ())
524 	{
525 	  dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
526 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
527 	}
528 
529       /* Fail to vectorize statements marked as unvectorizable.  */
530       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
531         {
532           if (dump_enabled_p ())
533             {
534               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
535 			       "Build SLP failed: unvectorizable statement ");
536               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
537             }
538 	  /* Fatal mismatch.  */
539 	  matches[0] = false;
540           return false;
541         }
542 
543       lhs = gimple_get_lhs (stmt);
544       if (lhs == NULL_TREE)
545 	{
546 	  if (dump_enabled_p ())
547 	    {
548 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
549 			       "Build SLP failed: not GIMPLE_ASSIGN nor "
550 			       "GIMPLE_CALL ");
551 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
552 	    }
553 	  /* Fatal mismatch.  */
554 	  matches[0] = false;
555 	  return false;
556 	}
557 
558       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
559       vectype = get_vectype_for_scalar_type (scalar_type);
560       if (!vectype)
561         {
562           if (dump_enabled_p ())
563             {
564               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
565 			       "Build SLP failed: unsupported data-type ");
566               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
567 				 scalar_type);
568               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
569             }
570 	  /* Fatal mismatch.  */
571 	  matches[0] = false;
572           return false;
573         }
574 
575       /* If populating the vector type requires unrolling then fail
576          before adjusting *max_nunits for basic-block vectorization.  */
577       if (is_a <bb_vec_info> (vinfo)
578 	  && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
579 	{
580 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
581 			   "Build SLP failed: unrolling required "
582 			   "in basic block SLP\n");
583 	  /* Fatal mismatch.  */
584 	  matches[0] = false;
585 	  return false;
586 	}
587 
588       /* In case of multiple types we need to detect the smallest type.  */
589       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
590 	*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
591 
592       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
593 	{
594 	  rhs_code = CALL_EXPR;
595 	  if (gimple_call_internal_p (call_stmt)
596 	      || gimple_call_tail_p (call_stmt)
597 	      || gimple_call_noreturn_p (call_stmt)
598 	      || !gimple_call_nothrow_p (call_stmt)
599 	      || gimple_call_chain (call_stmt))
600 	    {
601 	      if (dump_enabled_p ())
602 		{
603 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
604 				   "Build SLP failed: unsupported call type ");
605 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
606 				    call_stmt, 0);
607 		}
608 	      /* Fatal mismatch.  */
609 	      matches[0] = false;
610 	      return false;
611 	    }
612 	}
613       else
614 	rhs_code = gimple_assign_rhs_code (stmt);
615 
616       /* Check the operation.  */
617       if (i == 0)
618 	{
619 	  first_stmt_code = rhs_code;
620 
621 	  /* Shift arguments should be equal in all the packed stmts for a
622 	     vector shift with scalar shift operand.  */
623 	  if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
624 	      || rhs_code == LROTATE_EXPR
625 	      || rhs_code == RROTATE_EXPR)
626 	    {
627 	      vec_mode = TYPE_MODE (vectype);
628 
629 	      /* First see if we have a vector/vector shift.  */
630 	      optab = optab_for_tree_code (rhs_code, vectype,
631 					   optab_vector);
632 
633 	      if (!optab
634 		  || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
635 		{
636 		  /* No vector/vector shift, try for a vector/scalar shift.  */
637 		  optab = optab_for_tree_code (rhs_code, vectype,
638 					       optab_scalar);
639 
640 		  if (!optab)
641 		    {
642 		      if (dump_enabled_p ())
643 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
644 					 "Build SLP failed: no optab.\n");
645 		      /* Fatal mismatch.  */
646 		      matches[0] = false;
647 		      return false;
648 		    }
649 		  icode = (int) optab_handler (optab, vec_mode);
650 		  if (icode == CODE_FOR_nothing)
651 		    {
652 		      if (dump_enabled_p ())
653 			dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
654 					 "Build SLP failed: "
655 					 "op not supported by target.\n");
656 		      /* Fatal mismatch.  */
657 		      matches[0] = false;
658 		      return false;
659 		    }
660 		  optab_op2_mode = insn_data[icode].operand[2].mode;
661 		  if (!VECTOR_MODE_P (optab_op2_mode))
662 		    {
663 		      need_same_oprnds = true;
664 		      first_op1 = gimple_assign_rhs2 (stmt);
665 		    }
666 		}
667 	    }
668 	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
669             {
670               need_same_oprnds = true;
671               first_op1 = gimple_assign_rhs2 (stmt);
672             }
673 	}
674       else
675 	{
676 	  if (first_stmt_code != rhs_code
677 	      && alt_stmt_code == ERROR_MARK)
678 	    alt_stmt_code = rhs_code;
679 	  if (first_stmt_code != rhs_code
680 	      && (first_stmt_code != IMAGPART_EXPR
681 		  || rhs_code != REALPART_EXPR)
682 	      && (first_stmt_code != REALPART_EXPR
683 		  || rhs_code != IMAGPART_EXPR)
684 	      /* Handle mismatches in plus/minus by computing both
685 		 and merging the results.  */
686 	      && !((first_stmt_code == PLUS_EXPR
687 		    || first_stmt_code == MINUS_EXPR)
688 		   && (alt_stmt_code == PLUS_EXPR
689 		       || alt_stmt_code == MINUS_EXPR)
690 		   && rhs_code == alt_stmt_code)
691               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
692                    && (first_stmt_code == ARRAY_REF
693                        || first_stmt_code == BIT_FIELD_REF
694                        || first_stmt_code == INDIRECT_REF
695                        || first_stmt_code == COMPONENT_REF
696                        || first_stmt_code == MEM_REF)))
697 	    {
698 	      if (dump_enabled_p ())
699 		{
700 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
701 				   "Build SLP failed: different operation "
702 				   "in stmt ");
703 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
704 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
705 				   "original stmt ");
706 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
707 				    first_stmt, 0);
708 		}
709 	      /* Mismatch.  */
710 	      continue;
711 	    }
712 
713 	  if (need_same_oprnds
714 	      && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
715 	    {
716 	      if (dump_enabled_p ())
717 		{
718 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
719 				   "Build SLP failed: different shift "
720 				   "arguments in ");
721 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
722 		}
723 	      /* Mismatch.  */
724 	      continue;
725 	    }
726 
727 	  if (rhs_code == CALL_EXPR)
728 	    {
729 	      gimple *first_stmt = stmts[0];
730 	      if (gimple_call_num_args (stmt) != nops
731 		  || !operand_equal_p (gimple_call_fn (first_stmt),
732 				       gimple_call_fn (stmt), 0)
733 		  || gimple_call_fntype (first_stmt)
734 		     != gimple_call_fntype (stmt))
735 		{
736 		  if (dump_enabled_p ())
737 		    {
738 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 				       "Build SLP failed: different calls in ");
740 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
741 					stmt, 0);
742 		    }
743 		  /* Mismatch.  */
744 		  continue;
745 		}
746 	    }
747 	}
748 
749       /* Grouped store or load.  */
750       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
751 	{
752 	  if (REFERENCE_CLASS_P (lhs))
753 	    {
754 	      /* Store.  */
755 	      ;
756 	    }
757 	  else
758 	    {
759 	      /* Load.  */
760               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
761               if (prev_first_load)
762                 {
763                   /* Check that there are no loads from different interleaving
764                      chains in the same node.  */
765                   if (prev_first_load != first_load)
766                     {
767                       if (dump_enabled_p ())
768                         {
769                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
770 					   vect_location,
771 					   "Build SLP failed: different "
772 					   "interleaving chains in one node ");
773                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
774 					    stmt, 0);
775                         }
776 		      /* Mismatch.  */
777 		      continue;
778                     }
779                 }
780               else
781                 prev_first_load = first_load;
782            }
783         } /* Grouped access.  */
784       else
785 	{
786 	  if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
787 	    {
788 	      /* Not grouped load.  */
789 	      if (dump_enabled_p ())
790 		{
791 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
792 				   "Build SLP failed: not grouped load ");
793 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
794 		}
795 
796 	      /* FORNOW: Not grouped loads are not supported.  */
797 	      /* Fatal mismatch.  */
798 	      matches[0] = false;
799 	      return false;
800 	    }
801 
802 	  /* Not memory operation.  */
803 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
804 	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
805 	      && TREE_CODE_CLASS (rhs_code) != tcc_expression
806 	      && TREE_CODE_CLASS (rhs_code) != tcc_comparison
807 	      && rhs_code != CALL_EXPR)
808 	    {
809 	      if (dump_enabled_p ())
810 		{
811 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
812 				   "Build SLP failed: operation");
813 		  dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
814 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
815 		}
816 	      /* Fatal mismatch.  */
817 	      matches[0] = false;
818 	      return false;
819 	    }
820 
821 	  if (rhs_code == COND_EXPR)
822 	    {
823 	      tree cond_expr = gimple_assign_rhs1 (stmt);
824 	      enum tree_code cond_code = TREE_CODE (cond_expr);
825 	      enum tree_code swap_code = ERROR_MARK;
826 	      enum tree_code invert_code = ERROR_MARK;
827 
828 	      if (i == 0)
829 		first_cond_code = TREE_CODE (cond_expr);
830 	      else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
831 		{
832 		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
833 		  swap_code = swap_tree_comparison (cond_code);
834 		  invert_code = invert_tree_comparison (cond_code, honor_nans);
835 		}
836 
837 	      if (first_cond_code == cond_code)
838 		;
839 	      /* Isomorphic can be achieved by swapping.  */
840 	      else if (first_cond_code == swap_code)
841 		swap[i] = 1;
842 	      /* Isomorphic can be achieved by inverting.  */
843 	      else if (first_cond_code == invert_code)
844 		swap[i] = 2;
845 	      else
846 		{
847 		  if (dump_enabled_p ())
848 		    {
849 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 				       "Build SLP failed: different"
851 				       " operation");
852 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
853 					stmt, 0);
854 		    }
855 		  /* Mismatch.  */
856 		  continue;
857 		}
858 	    }
859 	}
860 
861       matches[i] = true;
862     }
863 
864   for (i = 0; i < group_size; ++i)
865     if (!matches[i])
866       return false;
867 
868   /* If we allowed a two-operation SLP node verify the target can cope
869      with the permute we are going to use.  */
870   if (alt_stmt_code != ERROR_MARK
871       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
872     {
873       unsigned char *sel
874 	= XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
875       for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
876 	{
877 	  sel[i] = i;
878 	  if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
879 	    sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
880 	}
881       if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
882 	{
883 	  for (i = 0; i < group_size; ++i)
884 	    if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
885 	      {
886 		matches[i] = false;
887 		if (dump_enabled_p ())
888 		  {
889 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
890 				     "Build SLP failed: different operation "
891 				     "in stmt ");
892 		    dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
893 				      stmts[i], 0);
894 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
895 				     "original stmt ");
896 		    dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
897 				      first_stmt, 0);
898 		  }
899 	      }
900 	  return false;
901 	}
902       *two_operators = true;
903     }
904 
905   return true;
906 }
907 
908 /* Traits for the hash_set to record failed SLP builds for a stmt set.
909    Note we never remove apart from at destruction time so we do not
910    need a special value for deleted that differs from empty.  */
911 struct bst_traits
912 {
913   typedef vec <gimple *> value_type;
914   typedef vec <gimple *> compare_type;
915   static inline hashval_t hash (value_type);
916   static inline bool equal (value_type existing, value_type candidate);
917   static inline bool is_empty (value_type x) { return !x.exists (); }
918   static inline bool is_deleted (value_type x) { return !x.exists (); }
919   static inline void mark_empty (value_type &x) { x.release (); }
920   static inline void mark_deleted (value_type &x) { x.release (); }
921   static inline void remove (value_type &x) { x.release (); }
922 };
923 inline hashval_t
924 bst_traits::hash (value_type x)
925 {
926   inchash::hash h;
927   for (unsigned i = 0; i < x.length (); ++i)
928     h.add_int (gimple_uid (x[i]));
929   return h.end ();
930 }
931 inline bool
932 bst_traits::equal (value_type existing, value_type candidate)
933 {
934   if (existing.length () != candidate.length ())
935     return false;
936   for (unsigned i = 0; i < existing.length (); ++i)
937     if (existing[i] != candidate[i])
938       return false;
939   return true;
940 }
941 
942 static hash_set <vec <gimple *>, bst_traits> *bst_fail;
943 
944 static slp_tree
945 vect_build_slp_tree_2 (vec_info *vinfo,
946 		       vec<gimple *> stmts, unsigned int group_size,
947 		       unsigned int *max_nunits,
948 		       vec<slp_tree> *loads,
949 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
950 		       unsigned max_tree_size);
951 
952 static slp_tree
953 vect_build_slp_tree (vec_info *vinfo,
954                      vec<gimple *> stmts, unsigned int group_size,
955                      unsigned int *max_nunits,
956                      vec<slp_tree> *loads,
957 		     bool *matches, unsigned *npermutes, unsigned *tree_size,
958 		     unsigned max_tree_size)
959 {
960   if (bst_fail->contains (stmts))
961     return NULL;
962   slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
963 					loads, matches, npermutes, tree_size,
964 					max_tree_size);
965   /* When SLP build fails for stmts record this, otherwise SLP build
966      can be exponential in time when we allow to construct parts from
967      scalars, see PR81723.  */
968   if (! res)
969     {
970       vec <gimple *> x;
971       x.create (stmts.length ());
972       x.splice (stmts);
973       bst_fail->add (x);
974     }
975   return res;
976 }
977 
978 /* Recursively build an SLP tree starting from NODE.
979    Fail (and return a value not equal to zero) if def-stmts are not
980    isomorphic, require data permutation or are of unsupported types of
981    operation.  Otherwise, return 0.
982    The value returned is the depth in the SLP tree where a mismatch
983    was found.  */
984 
985 static slp_tree
986 vect_build_slp_tree_2 (vec_info *vinfo,
987 		       vec<gimple *> stmts, unsigned int group_size,
988 		       unsigned int *max_nunits,
989 		       vec<slp_tree> *loads,
990 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
991 		       unsigned max_tree_size)
992 {
993   unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
994   gimple *stmt;
995   slp_tree node;
996 
997   matches[0] = false;
998 
999   stmt = stmts[0];
1000   if (is_gimple_call (stmt))
1001     nops = gimple_call_num_args (stmt);
1002   else if (is_gimple_assign (stmt))
1003     {
1004       nops = gimple_num_ops (stmt) - 1;
1005       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1006 	nops++;
1007     }
1008   else
1009     return NULL;
1010 
1011   bool two_operators = false;
1012   unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1013   if (!vect_build_slp_tree_1 (vinfo, swap,
1014 			      stmts, group_size, nops,
1015 			      &this_max_nunits, matches, &two_operators))
1016     return NULL;
1017 
1018   /* If the SLP node is a load, terminate the recursion.  */
1019   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1020       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1021     {
1022       *max_nunits = this_max_nunits;
1023       node = vect_create_new_slp_node (stmts);
1024       loads->safe_push (node);
1025       return node;
1026     }
1027 
1028   /* Get at the operands, verifying they are compatible.  */
1029   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1030   slp_oprnd_info oprnd_info;
1031   FOR_EACH_VEC_ELT (stmts, i, stmt)
1032     {
1033       int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1034 					     stmt, i, &oprnds_info);
1035       if (res != 0)
1036 	matches[(res == -1) ? 0 : i] = false;
1037       if (!matches[0])
1038 	break;
1039     }
1040   for (i = 0; i < group_size; ++i)
1041     if (!matches[i])
1042       {
1043 	vect_free_oprnd_info (oprnds_info);
1044 	return NULL;
1045       }
1046 
1047   auto_vec<slp_tree, 4> children;
1048   auto_vec<slp_tree> this_loads;
1049 
1050   stmt = stmts[0];
1051 
1052   if (tree_size)
1053     max_tree_size -= *tree_size;
1054 
1055   /* Create SLP_TREE nodes for the definition node/s.  */
1056   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1057     {
1058       slp_tree child;
1059       unsigned old_nloads = this_loads.length ();
1060       unsigned old_tree_size = this_tree_size;
1061       unsigned int j;
1062 
1063       if (oprnd_info->first_dt != vect_internal_def)
1064         continue;
1065 
1066       if (++this_tree_size > max_tree_size)
1067 	{
1068 	  FOR_EACH_VEC_ELT (children, j, child)
1069 	    vect_free_slp_tree (child);
1070 	  vect_free_oprnd_info (oprnds_info);
1071 	  return NULL;
1072 	}
1073 
1074       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1075 					group_size, &this_max_nunits,
1076 					&this_loads, matches, npermutes,
1077 					&this_tree_size,
1078 					max_tree_size)) != NULL)
1079 	{
1080 	  /* If we have all children of child built up from scalars then just
1081 	     throw that away and build it up this node from scalars.  */
1082 	  if (!SLP_TREE_CHILDREN (child).is_empty ()
1083 	      /* ???  Rejecting patterns this way doesn't work.  We'd have to
1084 		 do extra work to cancel the pattern so the uses see the
1085 		 scalar version.  */
1086 	      && !is_pattern_stmt_p
1087 	            (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1088 	    {
1089 	      slp_tree grandchild;
1090 
1091 	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1092 		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1093 		  break;
1094 	      if (!grandchild)
1095 		{
1096 		  /* Roll back.  */
1097 		  this_loads.truncate (old_nloads);
1098 		  this_tree_size = old_tree_size;
1099 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1100 		    vect_free_slp_tree (grandchild);
1101 		  SLP_TREE_CHILDREN (child).truncate (0);
1102 
1103 		  dump_printf_loc (MSG_NOTE, vect_location,
1104 				   "Building parent vector operands from "
1105 				   "scalars instead\n");
1106 		  oprnd_info->def_stmts = vNULL;
1107 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
1108 		  children.safe_push (child);
1109 		  continue;
1110 		}
1111 	    }
1112 
1113 	  oprnd_info->def_stmts = vNULL;
1114 	  children.safe_push (child);
1115 	  continue;
1116 	}
1117 
1118       /* If the SLP build failed fatally and we analyze a basic-block
1119          simply treat nodes we fail to build as externally defined
1120 	 (and thus build vectors from the scalar defs).
1121 	 The cost model will reject outright expensive cases.
1122 	 ???  This doesn't treat cases where permutation ultimatively
1123 	 fails (or we don't try permutation below).  Ideally we'd
1124 	 even compute a permutation that will end up with the maximum
1125 	 SLP tree size...  */
1126       if (is_a <bb_vec_info> (vinfo)
1127 	  && !matches[0]
1128 	  /* ???  Rejecting patterns this way doesn't work.  We'd have to
1129 	     do extra work to cancel the pattern so the uses see the
1130 	     scalar version.  */
1131 	  && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1132 	{
1133 	  dump_printf_loc (MSG_NOTE, vect_location,
1134 			   "Building vector operands from scalars\n");
1135 	  child = vect_create_new_slp_node (oprnd_info->def_stmts);
1136 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
1137 	  children.safe_push (child);
1138 	  oprnd_info->def_stmts = vNULL;
1139 	  continue;
1140 	}
1141 
1142       /* If the SLP build for operand zero failed and operand zero
1143 	 and one can be commutated try that for the scalar stmts
1144 	 that failed the match.  */
1145       if (i == 0
1146 	  /* A first scalar stmt mismatch signals a fatal mismatch.  */
1147 	  && matches[0]
1148 	  /* ???  For COND_EXPRs we can swap the comparison operands
1149 	     as well as the arms under some constraints.  */
1150 	  && nops == 2
1151 	  && oprnds_info[1]->first_dt == vect_internal_def
1152 	  && is_gimple_assign (stmt)
1153 	  && commutative_tree_code (gimple_assign_rhs_code (stmt))
1154 	  && ! two_operators
1155 	  /* Do so only if the number of not successful permutes was nor more
1156 	     than a cut-ff as re-trying the recursive match on
1157 	     possibly each level of the tree would expose exponential
1158 	     behavior.  */
1159 	  && *npermutes < 4)
1160 	{
1161 	  /* Verify if we can safely swap or if we committed to a specific
1162 	     operand order already.  */
1163 	  for (j = 0; j < group_size; ++j)
1164 	    if (!matches[j]
1165 		&& (swap[j] != 0
1166 		    || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1167 	      {
1168 		if (dump_enabled_p ())
1169 		  {
1170 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1171 				     "Build SLP failed: cannot swap operands "
1172 				     "of shared stmt ");
1173 		    dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1174 				      stmts[j], 0);
1175 		  }
1176 		goto fail;
1177 	      }
1178 
1179 	  /* Swap mismatched definition stmts.  */
1180 	  dump_printf_loc (MSG_NOTE, vect_location,
1181 			   "Re-trying with swapped operands of stmts ");
1182 	  for (j = 0; j < group_size; ++j)
1183 	    if (!matches[j])
1184 	      {
1185 		std::swap (oprnds_info[0]->def_stmts[j],
1186 			   oprnds_info[1]->def_stmts[j]);
1187 		dump_printf (MSG_NOTE, "%d ", j);
1188 	      }
1189 	  dump_printf (MSG_NOTE, "\n");
1190 	  /* And try again with scratch 'matches' ... */
1191 	  bool *tem = XALLOCAVEC (bool, group_size);
1192 	  if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1193 					    group_size, &this_max_nunits,
1194 					    &this_loads, tem, npermutes,
1195 					    &this_tree_size,
1196 					    max_tree_size)) != NULL)
1197 	    {
1198 	      /* ... so if successful we can apply the operand swapping
1199 		 to the GIMPLE IL.  This is necessary because for example
1200 		 vect_get_slp_defs uses operand indexes and thus expects
1201 		 canonical operand order.  This is also necessary even
1202 		 if we end up building the operand from scalars as
1203 		 we'll continue to process swapped operand two.  */
1204 	      for (j = 0; j < group_size; ++j)
1205 		{
1206 		  gimple *stmt = stmts[j];
1207 		  gimple_set_plf (stmt, GF_PLF_1, false);
1208 		}
1209 	      for (j = 0; j < group_size; ++j)
1210 		{
1211 		  gimple *stmt = stmts[j];
1212 		  if (!matches[j])
1213 		    {
1214 		      /* Avoid swapping operands twice.  */
1215 		      if (gimple_plf (stmt, GF_PLF_1))
1216 			continue;
1217 		      swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1218 					 gimple_assign_rhs2_ptr (stmt));
1219 		      gimple_set_plf (stmt, GF_PLF_1, true);
1220 		    }
1221 		}
1222 	      /* Verify we swap all duplicates or none.  */
1223 	      if (flag_checking)
1224 		for (j = 0; j < group_size; ++j)
1225 		  {
1226 		    gimple *stmt = stmts[j];
1227 		    gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1228 		  }
1229 
1230 	      /* If we have all children of child built up from scalars then
1231 		 just throw that away and build it up this node from scalars.  */
1232 	      if (!SLP_TREE_CHILDREN (child).is_empty ()
1233 		  /* ???  Rejecting patterns this way doesn't work.  We'd have
1234 		     to do extra work to cancel the pattern so the uses see the
1235 		     scalar version.  */
1236 		  && !is_pattern_stmt_p
1237 			(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1238 		{
1239 		  unsigned int j;
1240 		  slp_tree grandchild;
1241 
1242 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1243 		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1244 		      break;
1245 		  if (!grandchild)
1246 		    {
1247 		      /* Roll back.  */
1248 		      this_loads.truncate (old_nloads);
1249 		      this_tree_size = old_tree_size;
1250 		      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1251 			vect_free_slp_tree (grandchild);
1252 		      SLP_TREE_CHILDREN (child).truncate (0);
1253 
1254 		      dump_printf_loc (MSG_NOTE, vect_location,
1255 				       "Building parent vector operands from "
1256 				       "scalars instead\n");
1257 		      oprnd_info->def_stmts = vNULL;
1258 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
1259 		      children.safe_push (child);
1260 		      continue;
1261 		    }
1262 		}
1263 
1264 	      oprnd_info->def_stmts = vNULL;
1265 	      children.safe_push (child);
1266 	      continue;
1267 	    }
1268 
1269 	  ++*npermutes;
1270 	}
1271 
1272 fail:
1273       gcc_assert (child == NULL);
1274       FOR_EACH_VEC_ELT (children, j, child)
1275 	vect_free_slp_tree (child);
1276       vect_free_oprnd_info (oprnds_info);
1277       return NULL;
1278     }
1279 
1280   vect_free_oprnd_info (oprnds_info);
1281 
1282   if (tree_size)
1283     *tree_size += this_tree_size;
1284   *max_nunits = this_max_nunits;
1285   loads->safe_splice (this_loads);
1286 
1287   node = vect_create_new_slp_node (stmts);
1288   SLP_TREE_TWO_OPERATORS (node) = two_operators;
1289   SLP_TREE_CHILDREN (node).splice (children);
1290   return node;
1291 }
1292 
1293 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1294 
1295 static void
1296 vect_print_slp_tree (int dump_kind, location_t loc, slp_tree node)
1297 {
1298   int i;
1299   gimple *stmt;
1300   slp_tree child;
1301 
1302   dump_printf_loc (dump_kind, loc, "node%s\n",
1303 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
1304 		   ? " (external)" : "");
1305   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1306     {
1307       dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1308       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1309     }
1310   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1311     vect_print_slp_tree (dump_kind, loc, child);
1312 }
1313 
1314 
1315 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1316    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1317    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1318    stmts in NODE are to be marked.  */
1319 
1320 static void
1321 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1322 {
1323   int i;
1324   gimple *stmt;
1325   slp_tree child;
1326 
1327   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1328     return;
1329 
1330   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1331     if (j < 0 || i == j)
1332       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1333 
1334   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1335     vect_mark_slp_stmts (child, mark, j);
1336 }
1337 
1338 
1339 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1340 
1341 static void
1342 vect_mark_slp_stmts_relevant (slp_tree node)
1343 {
1344   int i;
1345   gimple *stmt;
1346   stmt_vec_info stmt_info;
1347   slp_tree child;
1348 
1349   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1350     return;
1351 
1352   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1353     {
1354       stmt_info = vinfo_for_stmt (stmt);
1355       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1356                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1357       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1358     }
1359 
1360   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1361     vect_mark_slp_stmts_relevant (child);
1362 }
1363 
1364 
1365 /* Rearrange the statements of NODE according to PERMUTATION.  */
1366 
1367 static void
1368 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1369                           vec<unsigned> permutation)
1370 {
1371   gimple *stmt;
1372   vec<gimple *> tmp_stmts;
1373   unsigned int i;
1374   slp_tree child;
1375 
1376   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1377     vect_slp_rearrange_stmts (child, group_size, permutation);
1378 
1379   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1380   tmp_stmts.create (group_size);
1381   tmp_stmts.quick_grow_cleared (group_size);
1382 
1383   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1384     tmp_stmts[permutation[i]] = stmt;
1385 
1386   SLP_TREE_SCALAR_STMTS (node).release ();
1387   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1388 }
1389 
1390 
1391 /* Attempt to reorder stmts in a reduction chain so that we don't
1392    require any load permutation.  Return true if that was possible,
1393    otherwise return false.  */
1394 
1395 static bool
1396 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1397 {
1398   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1399   unsigned int i, j;
1400   unsigned int lidx;
1401   slp_tree node, load;
1402 
1403   /* Compare all the permutation sequences to the first one.  We know
1404      that at least one load is permuted.  */
1405   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1406   if (!node->load_permutation.exists ())
1407     return false;
1408   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1409     {
1410       if (!load->load_permutation.exists ())
1411 	return false;
1412       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1413 	if (lidx != node->load_permutation[j])
1414 	  return false;
1415     }
1416 
1417   /* Check that the loads in the first sequence are different and there
1418      are no gaps between them.  */
1419   auto_sbitmap load_index (group_size);
1420   bitmap_clear (load_index);
1421   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1422     {
1423       if (lidx >= group_size)
1424 	return false;
1425       if (bitmap_bit_p (load_index, lidx))
1426 	return false;
1427 
1428       bitmap_set_bit (load_index, lidx);
1429     }
1430   for (i = 0; i < group_size; i++)
1431     if (!bitmap_bit_p (load_index, i))
1432       return false;
1433 
1434   /* This permutation is valid for reduction.  Since the order of the
1435      statements in the nodes is not important unless they are memory
1436      accesses, we can rearrange the statements in all the nodes
1437      according to the order of the loads.  */
1438   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1439 			    node->load_permutation);
1440 
1441   /* We are done, no actual permutations need to be generated.  */
1442   unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1443   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1444     {
1445       gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1446       first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1447       /* But we have to keep those permutations that are required because
1448          of handling of gaps.  */
1449       if (unrolling_factor == 1
1450 	  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1451 	      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1452 	SLP_TREE_LOAD_PERMUTATION (node).release ();
1453       else
1454 	for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1455 	  SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1456     }
1457 
1458   return true;
1459 }
1460 
1461 /* Check if the required load permutations in the SLP instance
1462    SLP_INSTN are supported.  */
1463 
1464 static bool
1465 vect_supported_load_permutation_p (slp_instance slp_instn)
1466 {
1467   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1468   unsigned int i, j, k, next;
1469   slp_tree node;
1470   gimple *stmt, *load, *next_load;
1471 
1472   if (dump_enabled_p ())
1473     {
1474       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1475       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1476 	if (node->load_permutation.exists ())
1477 	  FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1478 	    dump_printf (MSG_NOTE, "%d ", next);
1479 	else
1480 	  for (k = 0; k < group_size; ++k)
1481 	    dump_printf (MSG_NOTE, "%d ", k);
1482       dump_printf (MSG_NOTE, "\n");
1483     }
1484 
1485   /* In case of reduction every load permutation is allowed, since the order
1486      of the reduction statements is not important (as opposed to the case of
1487      grouped stores).  The only condition we need to check is that all the
1488      load nodes are of the same size and have the same permutation (and then
1489      rearrange all the nodes of the SLP instance according to this
1490      permutation).  */
1491 
1492   /* Check that all the load nodes are of the same size.  */
1493   /* ???  Can't we assert this? */
1494   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1495     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1496       return false;
1497 
1498   node = SLP_INSTANCE_TREE (slp_instn);
1499   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1500 
1501   /* Reduction (there are no data-refs in the root).
1502      In reduction chain the order of the loads is not important.  */
1503   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1504       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1505     vect_attempt_slp_rearrange_stmts (slp_instn);
1506 
1507   /* In basic block vectorization we allow any subchain of an interleaving
1508      chain.
1509      FORNOW: not supported in loop SLP because of realignment compications.  */
1510   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1511     {
1512       /* Check whether the loads in an instance form a subchain and thus
1513          no permutation is necessary.  */
1514       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1515         {
1516 	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1517 	    continue;
1518 	  bool subchain_p = true;
1519           next_load = NULL;
1520           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1521             {
1522               if (j != 0
1523 		  && (next_load != load
1524 		      || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1525 		{
1526 		  subchain_p = false;
1527 		  break;
1528 		}
1529               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1530             }
1531 	  if (subchain_p)
1532 	    SLP_TREE_LOAD_PERMUTATION (node).release ();
1533 	  else
1534 	    {
1535 	      stmt_vec_info group_info
1536 		= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1537 	      group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1538 	      unsigned nunits
1539 		= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1540 	      unsigned k, maxk = 0;
1541 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1542 		if (k > maxk)
1543 		  maxk = k;
1544 	      /* In BB vectorization we may not actually use a loaded vector
1545 		 accessing elements in excess of GROUP_SIZE.  */
1546 	      if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1547 		{
1548 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1549 				   "BB vectorization with gaps at the end of "
1550 				   "a load is not supported\n");
1551 		  return false;
1552 		}
1553 
1554 	      /* Verify the permutation can be generated.  */
1555 	      vec<tree> tem;
1556 	      unsigned n_perms;
1557 	      if (!vect_transform_slp_perm_load (node, tem, NULL,
1558 						 1, slp_instn, true, &n_perms))
1559 		{
1560 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1561 				   vect_location,
1562 				   "unsupported load permutation\n");
1563 		  return false;
1564 		}
1565 	    }
1566         }
1567       return true;
1568     }
1569 
1570   /* For loop vectorization verify we can generate the permutation.  Be
1571      conservative about the vectorization factor, there are permutations
1572      that will use three vector inputs only starting from a specific factor
1573      and the vectorization factor is not yet final.
1574      ???  The SLP instance unrolling factor might not be the maximum one.  */
1575   unsigned n_perms;
1576   unsigned test_vf
1577     = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1578 			     LOOP_VINFO_VECT_FACTOR
1579 			       (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1580   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1581     if (node->load_permutation.exists ()
1582 	&& !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1583 					  slp_instn, true, &n_perms))
1584       return false;
1585 
1586   return true;
1587 }
1588 
1589 
1590 /* Find the last store in SLP INSTANCE.  */
1591 
1592 gimple *
1593 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1594 {
1595   gimple *last = NULL, *stmt;
1596 
1597   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1598     {
1599       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1600       if (is_pattern_stmt_p (stmt_vinfo))
1601 	last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1602       else
1603 	last = get_later_stmt (stmt, last);
1604     }
1605 
1606   return last;
1607 }
1608 
1609 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1610 
1611 static void
1612 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1613 			 stmt_vector_for_cost *prologue_cost_vec,
1614 			 stmt_vector_for_cost *body_cost_vec,
1615 			 unsigned ncopies_for_cost)
1616 {
1617   unsigned i, j;
1618   slp_tree child;
1619   gimple *stmt;
1620   stmt_vec_info stmt_info;
1621   tree lhs;
1622 
1623   /* Recurse down the SLP tree.  */
1624   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1625     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1626       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1627 			       body_cost_vec, ncopies_for_cost);
1628 
1629   /* Look at the first scalar stmt to determine the cost.  */
1630   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1631   stmt_info = vinfo_for_stmt (stmt);
1632   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1633     {
1634       vect_memory_access_type memory_access_type
1635 	= (STMT_VINFO_STRIDED_P (stmt_info)
1636 	   ? VMAT_STRIDED_SLP
1637 	   : VMAT_CONTIGUOUS);
1638       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1639 	vect_model_store_cost (stmt_info, ncopies_for_cost,
1640 			       memory_access_type, vect_uninitialized_def,
1641 			       node, prologue_cost_vec, body_cost_vec);
1642       else
1643 	{
1644 	  gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1645 	  if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1646 	    {
1647 	      /* If the load is permuted then the alignment is determined by
1648 		 the first group element not by the first scalar stmt DR.  */
1649 	      stmt = GROUP_FIRST_ELEMENT (stmt_info);
1650 	      stmt_info = vinfo_for_stmt (stmt);
1651 	      /* Record the cost for the permutation.  */
1652 	      unsigned n_perms;
1653 	      vect_transform_slp_perm_load (node, vNULL, NULL,
1654 					    ncopies_for_cost, instance, true,
1655 					    &n_perms);
1656 	      record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1657 				stmt_info, 0, vect_body);
1658 	      unsigned nunits
1659 		= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1660 	      /* And adjust the number of loads performed.  This handles
1661 	         redundancies as well as loads that are later dead.  */
1662 	      auto_sbitmap perm (GROUP_SIZE (stmt_info));
1663 	      bitmap_clear (perm);
1664 	      for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1665 		bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1666 	      ncopies_for_cost = 0;
1667 	      bool load_seen = false;
1668 	      for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1669 		{
1670 		  if (i % nunits == 0)
1671 		    {
1672 		      if (load_seen)
1673 			ncopies_for_cost++;
1674 		      load_seen = false;
1675 		    }
1676 		  if (bitmap_bit_p (perm, i))
1677 		    load_seen = true;
1678 		}
1679 	      if (load_seen)
1680 		ncopies_for_cost++;
1681 	      gcc_assert (ncopies_for_cost
1682 			  <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1683 			      + nunits - 1) / nunits);
1684 	      ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1685 	    }
1686 	  /* Record the cost for the vector loads.  */
1687 	  vect_model_load_cost (stmt_info, ncopies_for_cost,
1688 				memory_access_type, node, prologue_cost_vec,
1689 				body_cost_vec);
1690 	  return;
1691 	}
1692     }
1693   else
1694     {
1695       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1696 			stmt_info, 0, vect_body);
1697       if (SLP_TREE_TWO_OPERATORS (node))
1698 	{
1699 	  record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1700 			    stmt_info, 0, vect_body);
1701 	  record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1702 			    stmt_info, 0, vect_body);
1703 	}
1704     }
1705 
1706   /* Push SLP node def-type to stmts.  */
1707   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1708     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1709       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1710 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1711 
1712   /* Scan operands and account for prologue cost of constants/externals.
1713      ???  This over-estimates cost for multiple uses and should be
1714      re-engineered.  */
1715   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1716   lhs = gimple_get_lhs (stmt);
1717   for (i = 0; i < gimple_num_ops (stmt); ++i)
1718     {
1719       tree op = gimple_op (stmt, i);
1720       gimple *def_stmt;
1721       enum vect_def_type dt;
1722       if (!op || op == lhs)
1723 	continue;
1724       if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1725 	{
1726 	  /* Without looking at the actual initializer a vector of
1727 	     constants can be implemented as load from the constant pool.
1728 	     ???  We need to pass down stmt_info for a vector type
1729 	     even if it points to the wrong stmt.  */
1730 	  if (dt == vect_constant_def)
1731 	    record_stmt_cost (prologue_cost_vec, 1, vector_load,
1732 			      stmt_info, 0, vect_prologue);
1733 	  else if (dt == vect_external_def)
1734 	    record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1735 			      stmt_info, 0, vect_prologue);
1736 	}
1737     }
1738 
1739   /* Restore stmt def-types.  */
1740   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1741     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1742       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1743 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1744 }
1745 
1746 /* Compute the cost for the SLP instance INSTANCE.  */
1747 
1748 static void
1749 vect_analyze_slp_cost (slp_instance instance, void *data)
1750 {
1751   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1752   unsigned ncopies_for_cost;
1753   stmt_info_for_cost *si;
1754   unsigned i;
1755 
1756   if (dump_enabled_p ())
1757     dump_printf_loc (MSG_NOTE, vect_location,
1758 		     "=== vect_analyze_slp_cost ===\n");
1759 
1760   /* Calculate the number of vector stmts to create based on the unrolling
1761      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1762      GROUP_SIZE / NUNITS otherwise.  */
1763   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1764   slp_tree node = SLP_INSTANCE_TREE (instance);
1765   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1766   /* Adjust the group_size by the vectorization factor which is always one
1767      for basic-block vectorization.  */
1768   if (STMT_VINFO_LOOP_VINFO (stmt_info))
1769     group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1770   unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1771   /* For reductions look at a reduction operand in case the reduction
1772      operation is widening like DOT_PROD or SAD.  */
1773   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1774     {
1775       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1776       switch (gimple_assign_rhs_code (stmt))
1777 	{
1778 	case DOT_PROD_EXPR:
1779 	case SAD_EXPR:
1780 	  nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1781 				(TREE_TYPE (gimple_assign_rhs1 (stmt))));
1782 	  break;
1783 	default:;
1784 	}
1785     }
1786   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1787 
1788   prologue_cost_vec.create (10);
1789   body_cost_vec.create (10);
1790   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1791 			   &prologue_cost_vec, &body_cost_vec,
1792 			   ncopies_for_cost);
1793 
1794   /* Record the prologue costs, which were delayed until we were
1795      sure that SLP was successful.  */
1796   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1797     {
1798       struct _stmt_vec_info *stmt_info
1799 	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1800       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1801 			    si->misalign, vect_prologue);
1802     }
1803 
1804   /* Record the instance's instructions in the target cost model.  */
1805   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1806     {
1807       struct _stmt_vec_info *stmt_info
1808 	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1809       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1810 			    si->misalign, vect_body);
1811     }
1812 
1813   prologue_cost_vec.release ();
1814   body_cost_vec.release ();
1815 }
1816 
1817 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1818    one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1819    the first GROUP1_SIZE stmts, since stores are consecutive), the second
1820    containing the remainder.
1821    Return the first stmt in the second group.  */
1822 
1823 static gimple *
1824 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1825 {
1826   stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1827   gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1828   gcc_assert (group1_size > 0);
1829   int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1830   gcc_assert (group2_size > 0);
1831   GROUP_SIZE (first_vinfo) = group1_size;
1832 
1833   gimple *stmt = first_stmt;
1834   for (unsigned i = group1_size; i > 1; i--)
1835     {
1836       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1837       gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1838     }
1839   /* STMT is now the last element of the first group.  */
1840   gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1841   GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1842 
1843   GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1844   for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1845     {
1846       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1847       gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1848     }
1849 
1850   /* For the second group, the GROUP_GAP is that before the original group,
1851      plus skipping over the first vector.  */
1852   GROUP_GAP (vinfo_for_stmt (group2)) =
1853     GROUP_GAP (first_vinfo) + group1_size;
1854 
1855   /* GROUP_GAP of the first group now has to skip over the second group too.  */
1856   GROUP_GAP (first_vinfo) += group2_size;
1857 
1858   if (dump_enabled_p ())
1859     dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1860 		     group1_size, group2_size);
1861 
1862   return group2;
1863 }
1864 
1865 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1866    vect_build_slp_tree to build a tree of packed stmts if possible.
1867    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1868 
1869 static bool
1870 vect_analyze_slp_instance (vec_info *vinfo,
1871 			   gimple *stmt, unsigned max_tree_size)
1872 {
1873   slp_instance new_instance;
1874   slp_tree node;
1875   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1876   unsigned int unrolling_factor = 1, nunits;
1877   tree vectype, scalar_type = NULL_TREE;
1878   gimple *next;
1879   unsigned int i;
1880   unsigned int max_nunits = 0;
1881   vec<slp_tree> loads;
1882   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1883   vec<gimple *> scalar_stmts;
1884 
1885   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1886     {
1887       if (dr)
1888         {
1889           scalar_type = TREE_TYPE (DR_REF (dr));
1890           vectype = get_vectype_for_scalar_type (scalar_type);
1891         }
1892       else
1893         {
1894           gcc_assert (is_a <loop_vec_info> (vinfo));
1895           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1896         }
1897 
1898       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1899     }
1900   else
1901     {
1902       gcc_assert (is_a <loop_vec_info> (vinfo));
1903       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1904       group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1905     }
1906 
1907   if (!vectype)
1908     {
1909       if (dump_enabled_p ())
1910         {
1911           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1912 			   "Build SLP failed: unsupported data-type ");
1913           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1914           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1915         }
1916 
1917       return false;
1918     }
1919   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1920 
1921   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1922   scalar_stmts.create (group_size);
1923   next = stmt;
1924   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1925     {
1926       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1927       while (next)
1928         {
1929 	  if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1930 	      && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1931 	    scalar_stmts.safe_push (
1932 		  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1933 	  else
1934             scalar_stmts.safe_push (next);
1935           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1936         }
1937       /* Mark the first element of the reduction chain as reduction to properly
1938 	 transform the node.  In the reduction analysis phase only the last
1939 	 element of the chain is marked as reduction.  */
1940       if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1941 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1942     }
1943   else
1944     {
1945       /* Collect reduction statements.  */
1946       vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1947       for (i = 0; reductions.iterate (i, &next); i++)
1948 	scalar_stmts.safe_push (next);
1949     }
1950 
1951   loads.create (group_size);
1952 
1953   /* Build the tree for the SLP instance.  */
1954   bool *matches = XALLOCAVEC (bool, group_size);
1955   unsigned npermutes = 0;
1956   bst_fail = new hash_set <vec <gimple *>, bst_traits> ();
1957   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1958 				   &max_nunits, &loads, matches, &npermutes,
1959 			      NULL, max_tree_size);
1960   delete bst_fail;
1961   if (node != NULL)
1962     {
1963       /* Calculate the unrolling factor based on the smallest type.  */
1964       unrolling_factor
1965 	= least_common_multiple (max_nunits, group_size) / group_size;
1966 
1967       if (unrolling_factor != 1
1968 	  && is_a <bb_vec_info> (vinfo))
1969 	{
1970 
1971 	  if (max_nunits > group_size)
1972         {
1973             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1974 			       "Build SLP failed: store group "
1975 			       "size not a multiple of the vector size "
1976 			       "in basic block SLP\n");
1977 	  vect_free_slp_tree (node);
1978 	  loads.release ();
1979           return false;
1980         }
1981 	  /* Fatal mismatch.  */
1982 	  matches[group_size/max_nunits * max_nunits] = false;
1983 	  vect_free_slp_tree (node);
1984 	  loads.release ();
1985 	}
1986       else
1987 	{
1988       /* Create a new SLP instance.  */
1989       new_instance = XNEW (struct _slp_instance);
1990       SLP_INSTANCE_TREE (new_instance) = node;
1991       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1992       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1993       SLP_INSTANCE_LOADS (new_instance) = loads;
1994 
1995       /* Compute the load permutation.  */
1996       slp_tree load_node;
1997       bool loads_permuted = false;
1998       FOR_EACH_VEC_ELT (loads, i, load_node)
1999 	{
2000 	  vec<unsigned> load_permutation;
2001 	  int j;
2002 	  gimple *load, *first_stmt;
2003 	  bool this_load_permuted = false;
2004 	  load_permutation.create (group_size);
2005 	  first_stmt = GROUP_FIRST_ELEMENT
2006 	      (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2007 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2008 	    {
2009 		  int load_place = vect_get_place_in_interleaving_chain
2010 				     (load, first_stmt);
2011 	      gcc_assert (load_place != -1);
2012 	      if (load_place != j)
2013 		this_load_permuted = true;
2014 	      load_permutation.safe_push (load_place);
2015 	    }
2016 	  if (!this_load_permuted
2017 	      /* The load requires permutation when unrolling exposes
2018 	         a gap either because the group is larger than the SLP
2019 		 group-size or because there is a gap between the groups.  */
2020 	      && (unrolling_factor == 1
2021 		  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2022 		      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2023 	    {
2024 	      load_permutation.release ();
2025 	      continue;
2026 	    }
2027 	  SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2028 	  loads_permuted = true;
2029 	}
2030 
2031       if (loads_permuted)
2032         {
2033           if (!vect_supported_load_permutation_p (new_instance))
2034             {
2035               if (dump_enabled_p ())
2036                 {
2037                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2038 				   "Build SLP failed: unsupported load "
2039 				   "permutation ");
2040 		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2041 					TDF_SLIM, stmt, 0);
2042                 }
2043               vect_free_slp_instance (new_instance);
2044               return false;
2045             }
2046         }
2047 
2048 	  /* If the loads and stores can be handled with load/store-lan
2049 	 instructions do not generate this SLP instance.  */
2050       if (is_a <loop_vec_info> (vinfo)
2051 	  && loads_permuted
2052 	  && dr && vect_store_lanes_supported (vectype, group_size))
2053 	{
2054 	  slp_tree load_node;
2055 	  FOR_EACH_VEC_ELT (loads, i, load_node)
2056 	    {
2057 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
2058 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2059 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2060 		  /* Use SLP for strided accesses (or if we
2061 		     can't load-lanes).  */
2062 	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2063 		  || ! vect_load_lanes_supported
2064 			(STMT_VINFO_VECTYPE (stmt_vinfo),
2065 			 GROUP_SIZE (stmt_vinfo)))
2066 		break;
2067 	    }
2068 	  if (i == loads.length ())
2069 	    {
2070 	      if (dump_enabled_p ())
2071 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2072 				 "Built SLP cancelled: can use "
2073 				 "load/store-lanes\n");
2074 	      vect_free_slp_instance (new_instance);
2075 	      return false;
2076 	    }
2077 	}
2078 
2079       vinfo->slp_instances.safe_push (new_instance);
2080 
2081       if (dump_enabled_p ())
2082 	{
2083 	  dump_printf_loc (MSG_NOTE, vect_location,
2084 			   "Final SLP tree for instance:\n");
2085 	  vect_print_slp_tree (MSG_NOTE, vect_location, node);
2086 	}
2087 
2088       return true;
2089     }
2090     }
2091   else
2092     {
2093   /* Failed to SLP.  */
2094   /* Free the allocated memory.  */
2095   scalar_stmts.release ();
2096   loads.release ();
2097     }
2098 
2099   /* For basic block SLP, try to break the group up into multiples of the
2100      vector size.  */
2101   if (is_a <bb_vec_info> (vinfo)
2102       && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2103       && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2104     {
2105       /* We consider breaking the group only on VF boundaries from the existing
2106 	 start.  */
2107       for (i = 0; i < group_size; i++)
2108 	if (!matches[i]) break;
2109 
2110       if (i >= nunits && i < group_size)
2111 	{
2112 	  /* Split into two groups at the first vector boundary before i.  */
2113 	  gcc_assert ((nunits & (nunits - 1)) == 0);
2114 	  unsigned group1_size = i & ~(nunits - 1);
2115 
2116 	  gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2117 	  bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2118 	  /* If the first non-match was in the middle of a vector,
2119 	     skip the rest of that vector.  */
2120 	  if (group1_size < i)
2121 	    {
2122 	      i = group1_size + nunits;
2123 	      if (i < group_size)
2124 		rest = vect_split_slp_store_group (rest, nunits);
2125 	    }
2126 	  if (i < group_size)
2127 	    res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2128 	  return res;
2129 	}
2130       /* Even though the first vector did not all match, we might be able to SLP
2131 	 (some) of the remainder.  FORNOW ignore this possibility.  */
2132     }
2133 
2134   return false;
2135 }
2136 
2137 
2138 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
2139    trees of packed scalar stmts if SLP is possible.  */
2140 
2141 bool
2142 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2143 {
2144   unsigned int i;
2145   gimple *first_element;
2146   bool ok = false;
2147 
2148   if (dump_enabled_p ())
2149     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2150 
2151   /* Find SLP sequences starting from groups of grouped stores.  */
2152   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2153     if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
2154       ok = true;
2155 
2156   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2157     {
2158       if (loop_vinfo->reduction_chains.length () > 0)
2159 	{
2160 	  /* Find SLP sequences starting from reduction chains.  */
2161 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2162 	      if (vect_analyze_slp_instance (vinfo, first_element,
2163 					     max_tree_size))
2164 		ok = true;
2165 	      else
2166 		return false;
2167 
2168 	  /* Don't try to vectorize SLP reductions if reduction chain was
2169 	     detected.  */
2170 	  return ok;
2171 	}
2172 
2173       /* Find SLP sequences starting from groups of reductions.  */
2174       if (loop_vinfo->reductions.length () > 1
2175 	  && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2176 					max_tree_size))
2177 	ok = true;
2178     }
2179 
2180   return true;
2181 }
2182 
2183 
2184 /* For each possible SLP instance decide whether to SLP it and calculate overall
2185    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
2186    least one instance.  */
2187 
2188 bool
2189 vect_make_slp_decision (loop_vec_info loop_vinfo)
2190 {
2191   unsigned int i, unrolling_factor = 1;
2192   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2193   slp_instance instance;
2194   int decided_to_slp = 0;
2195 
2196   if (dump_enabled_p ())
2197     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2198                      "\n");
2199 
2200   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2201     {
2202       /* FORNOW: SLP if you can.  */
2203       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2204 	unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2205 
2206       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
2207 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2208 	 loop-based vectorization.  Such stmts will be marked as HYBRID.  */
2209       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2210       decided_to_slp++;
2211     }
2212 
2213   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2214 
2215   if (decided_to_slp && dump_enabled_p ())
2216     dump_printf_loc (MSG_NOTE, vect_location,
2217 		     "Decided to SLP %d instances. Unrolling factor %d\n",
2218 		     decided_to_slp, unrolling_factor);
2219 
2220   return (decided_to_slp > 0);
2221 }
2222 
2223 
2224 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2225    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
2226 
2227 static void
2228 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2229 {
2230   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2231   imm_use_iterator imm_iter;
2232   gimple *use_stmt;
2233   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2234   slp_tree child;
2235   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2236   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2237   int j;
2238 
2239   /* Propagate hybrid down the SLP tree.  */
2240   if (stype == hybrid)
2241     ;
2242   else if (HYBRID_SLP_STMT (stmt_vinfo))
2243     stype = hybrid;
2244   else
2245     {
2246       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
2247       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2248       /* If we get a pattern stmt here we have to use the LHS of the
2249          original stmt for immediate uses.  */
2250       if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2251 	  && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2252 	stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2253       if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2254 	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
2255 	  {
2256 	    if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2257 	      continue;
2258 	    use_vinfo = vinfo_for_stmt (use_stmt);
2259 	    if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2260 		&& STMT_VINFO_RELATED_STMT (use_vinfo))
2261 	      use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2262 	    if (!STMT_SLP_TYPE (use_vinfo)
2263 		&& (STMT_VINFO_RELEVANT (use_vinfo)
2264 		    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2265 		&& !(gimple_code (use_stmt) == GIMPLE_PHI
2266 		     && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2267 	      {
2268 		if (dump_enabled_p ())
2269 		  {
2270 		    dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2271 				     "def in non-SLP stmt: ");
2272 		    dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2273 		  }
2274 		stype = hybrid;
2275 	      }
2276 	  }
2277     }
2278 
2279   if (stype == hybrid
2280       && !HYBRID_SLP_STMT (stmt_vinfo))
2281     {
2282       if (dump_enabled_p ())
2283 	{
2284 	  dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2285 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2286 	}
2287       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2288     }
2289 
2290   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2291     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2292       vect_detect_hybrid_slp_stmts (child, i, stype);
2293 }
2294 
2295 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2296 
2297 static tree
2298 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2299 {
2300   walk_stmt_info *wi = (walk_stmt_info *)data;
2301   struct loop *loopp = (struct loop *)wi->info;
2302 
2303   if (wi->is_lhs)
2304     return NULL_TREE;
2305 
2306   if (TREE_CODE (*tp) == SSA_NAME
2307       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2308     {
2309       gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2310       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2311 	  && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2312 	{
2313 	  if (dump_enabled_p ())
2314 	    {
2315 	      dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2316 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2317 	    }
2318 	  STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2319 	}
2320     }
2321 
2322   return NULL_TREE;
2323 }
2324 
2325 static tree
2326 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2327 			  walk_stmt_info *)
2328 {
2329   /* If the stmt is in a SLP instance then this isn't a reason
2330      to mark use definitions in other SLP instances as hybrid.  */
2331   if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2332     *handled = true;
2333   return NULL_TREE;
2334 }
2335 
2336 /* Find stmts that must be both vectorized and SLPed.  */
2337 
2338 void
2339 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2340 {
2341   unsigned int i;
2342   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2343   slp_instance instance;
2344 
2345   if (dump_enabled_p ())
2346     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2347                      "\n");
2348 
2349   /* First walk all pattern stmt in the loop and mark defs of uses as
2350      hybrid because immediate uses in them are not recorded.  */
2351   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2352     {
2353       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2354       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2355 	   gsi_next (&gsi))
2356 	{
2357 	  gimple *stmt = gsi_stmt (gsi);
2358 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2359 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2360 	    {
2361 	      walk_stmt_info wi;
2362 	      memset (&wi, 0, sizeof (wi));
2363 	      wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2364 	      gimple_stmt_iterator gsi2
2365 		= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2366 	      walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2367 				vect_detect_hybrid_slp_1, &wi);
2368 	      walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2369 			       vect_detect_hybrid_slp_2,
2370 			       vect_detect_hybrid_slp_1, &wi);
2371 	    }
2372 	}
2373     }
2374 
2375   /* Then walk the SLP instance trees marking stmts with uses in
2376      non-SLP stmts as hybrid, also propagating hybrid down the
2377      SLP tree, collecting the above info on-the-fly.  */
2378   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2379     {
2380       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2381 	vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2382 				      i, pure_slp);
2383     }
2384 }
2385 
2386 
2387 /* Create and initialize a new bb_vec_info struct for BB, as well as
2388    stmt_vec_info structs for all the stmts in it.  */
2389 
2390 static bb_vec_info
2391 new_bb_vec_info (gimple_stmt_iterator region_begin,
2392 		 gimple_stmt_iterator region_end)
2393 {
2394   basic_block bb = gsi_bb (region_begin);
2395   bb_vec_info res = NULL;
2396   gimple_stmt_iterator gsi;
2397 
2398   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2399   res->kind = vec_info::bb;
2400   BB_VINFO_BB (res) = bb;
2401   res->region_begin = region_begin;
2402   res->region_end = region_end;
2403 
2404   for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2405        gsi_next (&gsi))
2406     {
2407       gimple *stmt = gsi_stmt (gsi);
2408       gimple_set_uid (stmt, 0);
2409       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
2410     }
2411 
2412   BB_VINFO_GROUPED_STORES (res).create (10);
2413   BB_VINFO_SLP_INSTANCES (res).create (2);
2414   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2415 
2416   bb->aux = res;
2417   return res;
2418 }
2419 
2420 
2421 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2422    stmts in the basic block.  */
2423 
2424 static void
2425 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2426 {
2427   slp_instance instance;
2428   unsigned i;
2429 
2430   if (!bb_vinfo)
2431     return;
2432 
2433   vect_destroy_datarefs (bb_vinfo);
2434   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2435   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2436   FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
2437     vect_free_slp_instance (instance);
2438   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2439   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2440 
2441   for (gimple_stmt_iterator si = bb_vinfo->region_begin;
2442        gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
2443     {
2444       gimple *stmt = gsi_stmt (si);
2445       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2446 
2447       if (stmt_info)
2448         /* Free stmt_vec_info.  */
2449         free_stmt_vec_info (stmt);
2450 
2451       /* Reset region marker.  */
2452       gimple_set_uid (stmt, -1);
2453     }
2454 
2455   BB_VINFO_BB (bb_vinfo)->aux = NULL;
2456   free (bb_vinfo);
2457 }
2458 
2459 
2460 /* Analyze statements contained in SLP tree node after recursively analyzing
2461    the subtree. Return TRUE if the operations are supported.  */
2462 
2463 static bool
2464 vect_slp_analyze_node_operations (slp_tree node)
2465 {
2466   bool dummy;
2467   int i, j;
2468   gimple *stmt;
2469   slp_tree child;
2470 
2471   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2472     return true;
2473 
2474   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2475     if (!vect_slp_analyze_node_operations (child))
2476       return false;
2477 
2478   bool res = true;
2479   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2480     {
2481       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2482       gcc_assert (stmt_info);
2483       gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2484 
2485       /* Push SLP node def-type to stmt operands.  */
2486       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2487 	if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2488 	  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2489 	    = SLP_TREE_DEF_TYPE (child);
2490       res = vect_analyze_stmt (stmt, &dummy, node);
2491       /* Restore def-types.  */
2492       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2493 	if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2494 	  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2495 	    = vect_internal_def;
2496       if (! res)
2497 	break;
2498     }
2499 
2500   return res;
2501 }
2502 
2503 
2504 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
2505    operations are supported. */
2506 
2507 bool
2508 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2509 {
2510   slp_instance instance;
2511   int i;
2512 
2513   if (dump_enabled_p ())
2514     dump_printf_loc (MSG_NOTE, vect_location,
2515 		     "=== vect_slp_analyze_operations ===\n");
2516 
2517   for (i = 0; slp_instances.iterate (i, &instance); )
2518     {
2519       if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2520         {
2521 	  dump_printf_loc (MSG_NOTE, vect_location,
2522 			   "removing SLP instance operations starting from: ");
2523 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2524 			    SLP_TREE_SCALAR_STMTS
2525 			      (SLP_INSTANCE_TREE (instance))[0], 0);
2526 	  vect_free_slp_instance (instance);
2527           slp_instances.ordered_remove (i);
2528 	}
2529       else
2530 	{
2531 	  /* Compute the costs of the SLP instance.  */
2532 	  vect_analyze_slp_cost (instance, data);
2533 	  i++;
2534 	}
2535     }
2536 
2537   if (!slp_instances.length ())
2538     return false;
2539 
2540   return true;
2541 }
2542 
2543 
2544 /* Compute the scalar cost of the SLP node NODE and its children
2545    and return it.  Do not account defs that are marked in LIFE and
2546    update LIFE according to uses of NODE.  */
2547 
2548 static unsigned
2549 vect_bb_slp_scalar_cost (basic_block bb,
2550 			 slp_tree node, vec<bool, va_heap> *life)
2551 {
2552   unsigned scalar_cost = 0;
2553   unsigned i;
2554   gimple *stmt;
2555   slp_tree child;
2556 
2557   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2558     {
2559       unsigned stmt_cost;
2560       ssa_op_iter op_iter;
2561       def_operand_p def_p;
2562       stmt_vec_info stmt_info;
2563 
2564       if ((*life)[i])
2565 	continue;
2566 
2567       /* If there is a non-vectorized use of the defs then the scalar
2568          stmt is kept live in which case we do not account it or any
2569 	 required defs in the SLP children in the scalar cost.  This
2570 	 way we make the vectorization more costly when compared to
2571 	 the scalar cost.  */
2572       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2573 	{
2574 	  imm_use_iterator use_iter;
2575 	  gimple *use_stmt;
2576 	  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2577 	    if (!is_gimple_debug (use_stmt)
2578 		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2579 					     use_stmt)
2580 		    || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2581 	      {
2582 		(*life)[i] = true;
2583 		BREAK_FROM_IMM_USE_STMT (use_iter);
2584 	      }
2585 	}
2586       if ((*life)[i])
2587 	continue;
2588 
2589       /* Count scalar stmts only once.  */
2590       if (gimple_visited_p (stmt))
2591 	continue;
2592       gimple_set_visited (stmt, true);
2593 
2594       stmt_info = vinfo_for_stmt (stmt);
2595       if (STMT_VINFO_DATA_REF (stmt_info))
2596         {
2597           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2598             stmt_cost = vect_get_stmt_cost (scalar_load);
2599           else
2600             stmt_cost = vect_get_stmt_cost (scalar_store);
2601         }
2602       else
2603         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2604 
2605       scalar_cost += stmt_cost;
2606     }
2607 
2608   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2609     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2610       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2611 
2612   return scalar_cost;
2613 }
2614 
2615 /* Check if vectorization of the basic block is profitable.  */
2616 
2617 static bool
2618 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2619 {
2620   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2621   slp_instance instance;
2622   int i;
2623   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2624   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2625 
2626   /* Calculate scalar cost.  */
2627   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2628     {
2629       auto_vec<bool, 20> life;
2630       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2631       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2632 					      SLP_INSTANCE_TREE (instance),
2633 					      &life);
2634     }
2635 
2636   /* Unset visited flag.  */
2637   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2638        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2639     gimple_set_visited  (gsi_stmt (gsi), false);
2640 
2641   /* Complete the target-specific cost calculation.  */
2642   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2643 	       &vec_inside_cost, &vec_epilogue_cost);
2644 
2645   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2646 
2647   if (dump_enabled_p ())
2648     {
2649       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2650       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2651 		   vec_inside_cost);
2652       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2653       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2654       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2655     }
2656 
2657   /* Vectorization is profitable if its cost is more than the cost of scalar
2658      version.  Note that we err on the vector side for equal cost because
2659      the cost estimate is otherwise quite pessimistic (constant uses are
2660      free on the scalar side but cost a load on the vector side for
2661      example).  */
2662   if (vec_outside_cost + vec_inside_cost > scalar_cost)
2663     return false;
2664 
2665   return true;
2666 }
2667 
2668 /* Check if the basic block can be vectorized.  Returns a bb_vec_info
2669    if so and sets fatal to true if failure is independent of
2670    current_vector_size.  */
2671 
2672 static bb_vec_info
2673 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2674 		       gimple_stmt_iterator region_end,
2675 		       vec<data_reference_p> datarefs, int n_stmts,
2676 		       bool &fatal)
2677 {
2678   bb_vec_info bb_vinfo;
2679   slp_instance instance;
2680   int i;
2681   int min_vf = 2;
2682 
2683   /* The first group of checks is independent of the vector size.  */
2684   fatal = true;
2685 
2686   if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2687     {
2688       if (dump_enabled_p ())
2689 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2690 			 "not vectorized: too many instructions in "
2691 			 "basic block.\n");
2692       free_data_refs (datarefs);
2693       return NULL;
2694     }
2695 
2696   bb_vinfo = new_bb_vec_info (region_begin, region_end);
2697   if (!bb_vinfo)
2698     return NULL;
2699 
2700   BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2701 
2702   /* Analyze the data references.  */
2703 
2704   if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2705     {
2706       if (dump_enabled_p ())
2707         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2708 			 "not vectorized: unhandled data-ref in basic "
2709 			 "block.\n");
2710 
2711       destroy_bb_vec_info (bb_vinfo);
2712       return NULL;
2713     }
2714 
2715   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2716     {
2717       if (dump_enabled_p ())
2718         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2719 			 "not vectorized: not enough data-refs in "
2720 			 "basic block.\n");
2721 
2722       destroy_bb_vec_info (bb_vinfo);
2723       return NULL;
2724     }
2725 
2726   if (!vect_analyze_data_ref_accesses (bb_vinfo))
2727     {
2728      if (dump_enabled_p ())
2729        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2730 			"not vectorized: unhandled data access in "
2731 			"basic block.\n");
2732 
2733       destroy_bb_vec_info (bb_vinfo);
2734       return NULL;
2735     }
2736 
2737   /* If there are no grouped stores in the region there is no need
2738      to continue with pattern recog as vect_analyze_slp will fail
2739      anyway.  */
2740   if (bb_vinfo->grouped_stores.is_empty ())
2741     {
2742       if (dump_enabled_p ())
2743 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2744 			 "not vectorized: no grouped stores in "
2745 			 "basic block.\n");
2746 
2747       destroy_bb_vec_info (bb_vinfo);
2748       return NULL;
2749     }
2750 
2751   /* While the rest of the analysis below depends on it in some way.  */
2752   fatal = false;
2753 
2754   vect_pattern_recog (bb_vinfo);
2755 
2756   /* Check the SLP opportunities in the basic block, analyze and build SLP
2757      trees.  */
2758   if (!vect_analyze_slp (bb_vinfo, n_stmts))
2759     {
2760       if (dump_enabled_p ())
2761 	{
2762 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763 			   "Failed to SLP the basic block.\n");
2764 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2765 			   "not vectorized: failed to find SLP opportunities "
2766 			   "in basic block.\n");
2767 	}
2768 
2769       destroy_bb_vec_info (bb_vinfo);
2770       return NULL;
2771     }
2772 
2773   /* Analyze and verify the alignment of data references and the
2774      dependence in the SLP instances.  */
2775   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2776     {
2777       if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2778 	  || ! vect_slp_analyze_instance_dependence (instance))
2779 	{
2780 	  dump_printf_loc (MSG_NOTE, vect_location,
2781 			   "removing SLP instance operations starting from: ");
2782 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2783 			    SLP_TREE_SCALAR_STMTS
2784 			      (SLP_INSTANCE_TREE (instance))[0], 0);
2785 	  vect_free_slp_instance (instance);
2786 	  BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2787 	  continue;
2788 	}
2789 
2790       /* Mark all the statements that we want to vectorize as pure SLP and
2791 	 relevant.  */
2792       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2793       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2794 
2795       i++;
2796     }
2797   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2798     {
2799       destroy_bb_vec_info (bb_vinfo);
2800       return NULL;
2801     }
2802 
2803   if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2804 				    BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2805     {
2806       if (dump_enabled_p ())
2807         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2808 			 "not vectorized: bad operation in basic block.\n");
2809 
2810       destroy_bb_vec_info (bb_vinfo);
2811       return NULL;
2812     }
2813 
2814   /* Cost model: check if the vectorization is worthwhile.  */
2815   if (!unlimited_cost_model (NULL)
2816       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2817     {
2818       if (dump_enabled_p ())
2819         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2820 			 "not vectorized: vectorization is not "
2821 			 "profitable.\n");
2822 
2823       destroy_bb_vec_info (bb_vinfo);
2824       return NULL;
2825     }
2826 
2827   if (dump_enabled_p ())
2828     dump_printf_loc (MSG_NOTE, vect_location,
2829 		     "Basic block will be vectorized using SLP\n");
2830 
2831   return bb_vinfo;
2832 }
2833 
2834 
2835 /* Main entry for the BB vectorizer.  Analyze and transform BB, returns
2836    true if anything in the basic-block was vectorized.  */
2837 
2838 bool
2839 vect_slp_bb (basic_block bb)
2840 {
2841   bb_vec_info bb_vinfo;
2842   gimple_stmt_iterator gsi;
2843   unsigned int vector_sizes;
2844   bool any_vectorized = false;
2845 
2846   if (dump_enabled_p ())
2847     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2848 
2849   /* Autodetect first vector size we try.  */
2850   current_vector_size = 0;
2851   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2852 
2853   gsi = gsi_start_bb (bb);
2854 
2855   while (1)
2856     {
2857       if (gsi_end_p (gsi))
2858 	break;
2859 
2860       gimple_stmt_iterator region_begin = gsi;
2861       vec<data_reference_p> datarefs = vNULL;
2862       int insns = 0;
2863 
2864       for (; !gsi_end_p (gsi); gsi_next (&gsi))
2865 	{
2866 	  gimple *stmt = gsi_stmt (gsi);
2867 	  if (is_gimple_debug (stmt))
2868 	    continue;
2869 	  insns++;
2870 
2871 	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
2872 	    vect_location = gimple_location (stmt);
2873 
2874 	  if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2875 	    break;
2876 	}
2877 
2878       /* Skip leading unhandled stmts.  */
2879       if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2880 	{
2881 	  gsi_next (&gsi);
2882 	  continue;
2883 	}
2884 
2885       gimple_stmt_iterator region_end = gsi;
2886 
2887       bool vectorized = false;
2888       bool fatal = false;
2889       bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2890 					datarefs, insns, fatal);
2891       if (bb_vinfo
2892 	  && dbg_cnt (vect_slp))
2893 	{
2894 	  if (dump_enabled_p ())
2895 	    dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2896 
2897 	  vect_schedule_slp (bb_vinfo);
2898 
2899 	  if (dump_enabled_p ())
2900 	    dump_printf_loc (MSG_NOTE, vect_location,
2901 			     "basic block part vectorized\n");
2902 
2903 	  destroy_bb_vec_info (bb_vinfo);
2904 
2905 	  vectorized = true;
2906 	}
2907       else
2908 	destroy_bb_vec_info (bb_vinfo);
2909 
2910       any_vectorized |= vectorized;
2911 
2912       vector_sizes &= ~current_vector_size;
2913       if (vectorized
2914 	  || vector_sizes == 0
2915 	  || current_vector_size == 0
2916 	  /* If vect_slp_analyze_bb_1 signaled that analysis for all
2917 	     vector sizes will fail do not bother iterating.  */
2918 	  || fatal)
2919 	{
2920 	  if (gsi_end_p (region_end))
2921 	    break;
2922 
2923 	  /* Skip the unhandled stmt.  */
2924 	  gsi_next (&gsi);
2925 
2926 	  /* And reset vector sizes.  */
2927 	  current_vector_size = 0;
2928 	  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2929 	}
2930       else
2931 	{
2932 	  /* Try the next biggest vector size.  */
2933 	  current_vector_size = 1 << floor_log2 (vector_sizes);
2934 	  if (dump_enabled_p ())
2935 	    dump_printf_loc (MSG_NOTE, vect_location,
2936 			     "***** Re-trying analysis with "
2937 			     "vector size %d\n", current_vector_size);
2938 
2939 	  /* Start over.  */
2940 	  gsi = region_begin;
2941 	}
2942     }
2943 
2944   return any_vectorized;
2945 }
2946 
2947 
2948 /* Return 1 if vector type of boolean constant which is OPNUM
2949    operand in statement STMT is a boolean vector.  */
2950 
2951 static bool
2952 vect_mask_constant_operand_p (gimple *stmt, int opnum)
2953 {
2954   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2955   enum tree_code code = gimple_expr_code (stmt);
2956   tree op, vectype;
2957   gimple *def_stmt;
2958   enum vect_def_type dt;
2959 
2960   /* For comparison and COND_EXPR type is chosen depending
2961      on the other comparison operand.  */
2962   if (TREE_CODE_CLASS (code) == tcc_comparison)
2963     {
2964       if (opnum)
2965 	op = gimple_assign_rhs1 (stmt);
2966       else
2967 	op = gimple_assign_rhs2 (stmt);
2968 
2969       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2970 			       &dt, &vectype))
2971 	gcc_unreachable ();
2972 
2973       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2974     }
2975 
2976   if (code == COND_EXPR)
2977     {
2978       tree cond = gimple_assign_rhs1 (stmt);
2979 
2980       if (TREE_CODE (cond) == SSA_NAME)
2981 	op = cond;
2982       else if (opnum)
2983 	op = TREE_OPERAND (cond, 1);
2984       else
2985 	op = TREE_OPERAND (cond, 0);
2986 
2987       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2988 			       &dt, &vectype))
2989 	gcc_unreachable ();
2990 
2991       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2992     }
2993 
2994   return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
2995 }
2996 
2997 
2998 /* For constant and loop invariant defs of SLP_NODE this function returns
2999    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3000    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3001    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
3002    REDUC_INDEX is the index of the reduction operand in the statements, unless
3003    it is -1.  */
3004 
3005 static void
3006 vect_get_constant_vectors (tree op, slp_tree slp_node,
3007                            vec<tree> *vec_oprnds,
3008 			   unsigned int op_num, unsigned int number_of_vectors,
3009                            int reduc_index)
3010 {
3011   vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3012   gimple *stmt = stmts[0];
3013   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3014   unsigned nunits;
3015   tree vec_cst;
3016   tree *elts;
3017   unsigned j, number_of_places_left_in_vector;
3018   tree vector_type;
3019   tree vop;
3020   int group_size = stmts.length ();
3021   unsigned int vec_num, i;
3022   unsigned number_of_copies = 1;
3023   vec<tree> voprnds;
3024   voprnds.create (number_of_vectors);
3025   bool constant_p, is_store;
3026   tree neutral_op = NULL;
3027   enum tree_code code = gimple_expr_code (stmt);
3028   gimple *def_stmt;
3029   struct loop *loop;
3030   gimple_seq ctor_seq = NULL;
3031 
3032   /* Check if vector type is a boolean vector.  */
3033   if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3034       && vect_mask_constant_operand_p (stmt, op_num))
3035     vector_type
3036       = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3037   else
3038     vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3039   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3040 
3041   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3042       && reduc_index != -1)
3043     {
3044       op_num = reduc_index;
3045       op = gimple_op (stmt, op_num + 1);
3046       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
3047          we need either neutral operands or the original operands.  See
3048          get_initial_def_for_reduction() for details.  */
3049       switch (code)
3050         {
3051           case WIDEN_SUM_EXPR:
3052           case DOT_PROD_EXPR:
3053 	  case SAD_EXPR:
3054           case PLUS_EXPR:
3055           case MINUS_EXPR:
3056           case BIT_IOR_EXPR:
3057           case BIT_XOR_EXPR:
3058              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
3059                neutral_op = build_real (TREE_TYPE (op), dconst0);
3060              else
3061                neutral_op = build_int_cst (TREE_TYPE (op), 0);
3062 
3063              break;
3064 
3065           case MULT_EXPR:
3066              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
3067                neutral_op = build_real (TREE_TYPE (op), dconst1);
3068              else
3069                neutral_op = build_int_cst (TREE_TYPE (op), 1);
3070 
3071              break;
3072 
3073           case BIT_AND_EXPR:
3074             neutral_op = build_int_cst (TREE_TYPE (op), -1);
3075             break;
3076 
3077 	  /* For MIN/MAX we don't have an easy neutral operand but
3078 	     the initial values can be used fine here.  Only for
3079 	     a reduction chain we have to force a neutral element.  */
3080 	  case MAX_EXPR:
3081 	  case MIN_EXPR:
3082 	    if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
3083 	      neutral_op = NULL;
3084 	    else
3085 	      {
3086 		def_stmt = SSA_NAME_DEF_STMT (op);
3087 		loop = (gimple_bb (stmt))->loop_father;
3088 		neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
3089 						    loop_preheader_edge (loop));
3090 	      }
3091 	    break;
3092 
3093           default:
3094 	    gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
3095             neutral_op = NULL;
3096         }
3097     }
3098 
3099   if (STMT_VINFO_DATA_REF (stmt_vinfo))
3100     {
3101       is_store = true;
3102       op = gimple_assign_rhs1 (stmt);
3103     }
3104   else
3105     is_store = false;
3106 
3107   gcc_assert (op);
3108 
3109   if (CONSTANT_CLASS_P (op))
3110     constant_p = true;
3111   else
3112     constant_p = false;
3113 
3114   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3115      created vectors. It is greater than 1 if unrolling is performed.
3116 
3117      For example, we have two scalar operands, s1 and s2 (e.g., group of
3118      strided accesses of size two), while NUNITS is four (i.e., four scalars
3119      of this type can be packed in a vector).  The output vector will contain
3120      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
3121      will be 2).
3122 
3123      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3124      containing the operands.
3125 
3126      For example, NUNITS is four as before, and the group size is 8
3127      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
3128      {s5, s6, s7, s8}.  */
3129 
3130   number_of_copies = nunits * number_of_vectors / group_size;
3131 
3132   number_of_places_left_in_vector = nunits;
3133   elts = XALLOCAVEC (tree, nunits);
3134   bool place_after_defs = false;
3135   for (j = 0; j < number_of_copies; j++)
3136     {
3137       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3138         {
3139           if (is_store)
3140             op = gimple_assign_rhs1 (stmt);
3141           else
3142 	    {
3143 	      switch (code)
3144 		{
3145 		  case COND_EXPR:
3146 		    {
3147 		      tree cond = gimple_assign_rhs1 (stmt);
3148 		      if (TREE_CODE (cond) == SSA_NAME)
3149 			op = gimple_op (stmt, op_num + 1);
3150 		      else if (op_num == 0 || op_num == 1)
3151 			op = TREE_OPERAND (cond, op_num);
3152 		      else
3153 			{
3154 			  if (op_num == 2)
3155 			    op = gimple_assign_rhs2 (stmt);
3156 			  else
3157 			    op = gimple_assign_rhs3 (stmt);
3158 			}
3159 		    }
3160 		    break;
3161 
3162 		  case CALL_EXPR:
3163 		    op = gimple_call_arg (stmt, op_num);
3164 		    break;
3165 
3166 		  case LSHIFT_EXPR:
3167 		  case RSHIFT_EXPR:
3168 		  case LROTATE_EXPR:
3169 		  case RROTATE_EXPR:
3170 		    op = gimple_op (stmt, op_num + 1);
3171 		    /* Unlike the other binary operators, shifts/rotates have
3172 		       the shift count being int, instead of the same type as
3173 		       the lhs, so make sure the scalar is the right type if
3174 		       we are dealing with vectors of
3175 		       long long/long/short/char.  */
3176 		    if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3177 		      op = fold_convert (TREE_TYPE (vector_type), op);
3178 		    break;
3179 
3180 		  default:
3181 		    op = gimple_op (stmt, op_num + 1);
3182 		    break;
3183 		}
3184 	    }
3185 
3186           if (reduc_index != -1)
3187             {
3188               loop = (gimple_bb (stmt))->loop_father;
3189               def_stmt = SSA_NAME_DEF_STMT (op);
3190 
3191               gcc_assert (loop);
3192 
3193               /* Get the def before the loop.  In reduction chain we have only
3194                  one initial value.  */
3195               if ((j != (number_of_copies - 1)
3196                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
3197                        && i != 0))
3198                   && neutral_op)
3199                 op = neutral_op;
3200               else
3201                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
3202                                             loop_preheader_edge (loop));
3203             }
3204 
3205           /* Create 'vect_ = {op0,op1,...,opn}'.  */
3206           number_of_places_left_in_vector--;
3207 	  tree orig_op = op;
3208 	  if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3209 	    {
3210 	      if (CONSTANT_CLASS_P (op))
3211 		{
3212 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3213 		    {
3214 		      /* Can't use VIEW_CONVERT_EXPR for booleans because
3215 			 of possibly different sizes of scalar value and
3216 			 vector element.  */
3217 		      if (integer_zerop (op))
3218 			op = build_int_cst (TREE_TYPE (vector_type), 0);
3219 		      else if (integer_onep (op))
3220 			op = build_all_ones_cst (TREE_TYPE (vector_type));
3221 		      else
3222 			gcc_unreachable ();
3223 		    }
3224 		  else
3225 		    op = fold_unary (VIEW_CONVERT_EXPR,
3226 				     TREE_TYPE (vector_type), op);
3227 		  gcc_assert (op && CONSTANT_CLASS_P (op));
3228 		}
3229 	      else
3230 		{
3231 		  tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3232 		  gimple *init_stmt;
3233 		  if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3234 		    {
3235 		      tree true_val
3236 			= build_all_ones_cst (TREE_TYPE (vector_type));
3237 		      tree false_val
3238 			= build_zero_cst (TREE_TYPE (vector_type));
3239 		      gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3240 		      init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3241 						       op, true_val,
3242 						       false_val);
3243 		    }
3244 		  else
3245 		    {
3246 		      op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3247 				   op);
3248 		      init_stmt
3249 			= gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3250 					       op);
3251 		    }
3252 		  gimple_seq_add_stmt (&ctor_seq, init_stmt);
3253 		  op = new_temp;
3254 		}
3255 	    }
3256 	  elts[number_of_places_left_in_vector] = op;
3257 	  if (!CONSTANT_CLASS_P (op))
3258 	    constant_p = false;
3259 	  if (TREE_CODE (orig_op) == SSA_NAME
3260 	      && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3261 	      && STMT_VINFO_BB_VINFO (stmt_vinfo)
3262 	      && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3263 		  == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3264 	    place_after_defs = true;
3265 
3266           if (number_of_places_left_in_vector == 0)
3267             {
3268               number_of_places_left_in_vector = nunits;
3269 
3270 	      if (constant_p)
3271 		vec_cst = build_vector (vector_type, elts);
3272 	      else
3273 		{
3274 		  vec<constructor_elt, va_gc> *v;
3275 		  unsigned k;
3276 		  vec_alloc (v, nunits);
3277 		  for (k = 0; k < nunits; ++k)
3278 		    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3279 		  vec_cst = build_constructor (vector_type, v);
3280 		}
3281 	      tree init;
3282 	      gimple_stmt_iterator gsi;
3283 	      if (place_after_defs)
3284 		{
3285 		  gsi = gsi_for_stmt
3286 		          (vect_find_last_scalar_stmt_in_slp (slp_node));
3287 		  init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3288 		}
3289 	      else
3290 		init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3291 	      if (ctor_seq != NULL)
3292 		{
3293 		  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3294 		  gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3295 							GSI_SAME_STMT);
3296 		  ctor_seq = NULL;
3297 		}
3298 	      voprnds.quick_push (init);
3299 	      place_after_defs = false;
3300             }
3301         }
3302     }
3303 
3304   /* Since the vectors are created in the reverse order, we should invert
3305      them.  */
3306   vec_num = voprnds.length ();
3307   for (j = vec_num; j != 0; j--)
3308     {
3309       vop = voprnds[j - 1];
3310       vec_oprnds->quick_push (vop);
3311     }
3312 
3313   voprnds.release ();
3314 
3315   /* In case that VF is greater than the unrolling factor needed for the SLP
3316      group of stmts, NUMBER_OF_VECTORS to be created is greater than
3317      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3318      to replicate the vectors.  */
3319   while (number_of_vectors > vec_oprnds->length ())
3320     {
3321       tree neutral_vec = NULL;
3322 
3323       if (neutral_op)
3324         {
3325           if (!neutral_vec)
3326 	    neutral_vec = build_vector_from_val (vector_type, neutral_op);
3327 
3328           vec_oprnds->quick_push (neutral_vec);
3329         }
3330       else
3331         {
3332           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3333             vec_oprnds->quick_push (vop);
3334         }
3335     }
3336 }
3337 
3338 
3339 /* Get vectorized definitions from SLP_NODE that contains corresponding
3340    vectorized def-stmts.  */
3341 
3342 static void
3343 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3344 {
3345   tree vec_oprnd;
3346   gimple *vec_def_stmt;
3347   unsigned int i;
3348 
3349   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3350 
3351   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3352     {
3353       gcc_assert (vec_def_stmt);
3354       vec_oprnd = gimple_get_lhs (vec_def_stmt);
3355       vec_oprnds->quick_push (vec_oprnd);
3356     }
3357 }
3358 
3359 
3360 /* Get vectorized definitions for SLP_NODE.
3361    If the scalar definitions are loop invariants or constants, collect them and
3362    call vect_get_constant_vectors() to create vector stmts.
3363    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3364    must be stored in the corresponding child of SLP_NODE, and we call
3365    vect_get_slp_vect_defs () to retrieve them.  */
3366 
3367 void
3368 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3369 		   vec<vec<tree> > *vec_oprnds, int reduc_index)
3370 {
3371   gimple *first_stmt;
3372   int number_of_vects = 0, i;
3373   unsigned int child_index = 0;
3374   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3375   slp_tree child = NULL;
3376   vec<tree> vec_defs;
3377   tree oprnd;
3378   bool vectorized_defs;
3379   bool first_iteration = true;
3380 
3381   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3382   FOR_EACH_VEC_ELT (ops, i, oprnd)
3383     {
3384       if (oprnd == NULL)
3385 	{
3386 	  /* Only vectorizable_reduction will call us with a NULL op which
3387 	     will always match the reduction operand for which we have no
3388 	     SLP child.  */
3389 	  vec_defs = vNULL;
3390 	  vec_defs.create (0);
3391 	  vec_oprnds->quick_push (vec_defs);
3392 	  continue;
3393 	}
3394 
3395       /* For each operand we check if it has vectorized definitions in a child
3396 	 node or we need to create them (for invariants and constants).  We
3397 	 check if the LHS of the first stmt of the next child matches OPRND.
3398 	 If it does, we found the correct child.  Otherwise, we call
3399 	 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3400 	 to check this child node for the next operand.  */
3401       vectorized_defs = false;
3402       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3403         {
3404           child = SLP_TREE_CHILDREN (slp_node)[child_index];
3405 
3406 	  /* We have to check both pattern and original def, if available.  */
3407 	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3408 	    {
3409 	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3410 	      gimple *related
3411 		= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3412 
3413 	      if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
3414 		  || (related
3415 		      && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3416 		{
3417 		  /* The number of vector defs is determined by the number of
3418 		     vector statements in the node from which we get those
3419 		     statements.  */
3420 		  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3421 		  vectorized_defs = true;
3422 		  child_index++;
3423 		}
3424 	    }
3425 	  else
3426 	    child_index++;
3427         }
3428 
3429       if (!vectorized_defs)
3430         {
3431           if (first_iteration)
3432             {
3433               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3434               /* Number of vector stmts was calculated according to LHS in
3435                  vect_schedule_slp_instance (), fix it by replacing LHS with
3436                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
3437                  details.  */
3438               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3439                                              &rhs_size_unit);
3440               if (rhs_size_unit != lhs_size_unit)
3441                 {
3442                   number_of_vects *= rhs_size_unit;
3443                   number_of_vects /= lhs_size_unit;
3444                 }
3445             }
3446         }
3447 
3448       /* Allocate memory for vectorized defs.  */
3449       vec_defs = vNULL;
3450       vec_defs.create (number_of_vects);
3451 
3452       /* For reduction defs we call vect_get_constant_vectors (), since we are
3453          looking for initial loop invariant values.  */
3454       if (vectorized_defs && reduc_index == -1)
3455         /* The defs are already vectorized.  */
3456 	vect_get_slp_vect_defs (child, &vec_defs);
3457       else
3458         /* Build vectors from scalar defs.  */
3459 	vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3460                                    number_of_vects, reduc_index);
3461 
3462       vec_oprnds->quick_push (vec_defs);
3463 
3464       /* For reductions, we only need initial values.  */
3465       if (reduc_index != -1)
3466         return;
3467 
3468       first_iteration = false;
3469     }
3470 }
3471 
3472 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3473    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3474    permute statements for the SLP node NODE of the SLP instance
3475    SLP_NODE_INSTANCE.  */
3476 
3477 bool
3478 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3479                               gimple_stmt_iterator *gsi, int vf,
3480                               slp_instance slp_node_instance, bool analyze_only,
3481 			      unsigned *n_perms)
3482 {
3483   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3484   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3485   tree mask_element_type = NULL_TREE, mask_type;
3486   int nunits, vec_index = 0;
3487   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3488   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3489   int mask_element;
3490   unsigned char *mask;
3491   machine_mode mode;
3492 
3493   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3494     return false;
3495 
3496   stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3497 
3498   mode = TYPE_MODE (vectype);
3499 
3500   /* The generic VEC_PERM_EXPR code always uses an integral type of the
3501      same size as the vector element being permuted.  */
3502   mask_element_type = lang_hooks.types.type_for_mode
3503 		(int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3504   mask_type = get_vectype_for_scalar_type (mask_element_type);
3505   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3506   mask = XALLOCAVEC (unsigned char, nunits);
3507 
3508   /* Initialize the vect stmts of NODE to properly insert the generated
3509      stmts later.  */
3510   if (! analyze_only)
3511     for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3512 	 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3513       SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3514 
3515   /* Generate permutation masks for every NODE. Number of masks for each NODE
3516      is equal to GROUP_SIZE.
3517      E.g., we have a group of three nodes with three loads from the same
3518      location in each node, and the vector size is 4. I.e., we have a
3519      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3520      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3521      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3522      ...
3523 
3524      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3525      The last mask is illegal since we assume two operands for permute
3526      operation, and the mask element values can't be outside that range.
3527      Hence, the last mask must be converted into {2,5,5,5}.
3528      For the first two permutations we need the first and the second input
3529      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3530      we need the second and the third vectors: {b1,c1,a2,b2} and
3531      {c2,a3,b3,c3}.  */
3532 
3533   int vect_stmts_counter = 0;
3534   int index = 0;
3535   int first_vec_index = -1;
3536   int second_vec_index = -1;
3537   bool noop_p = true;
3538   *n_perms = 0;
3539 
3540   for (int j = 0; j < vf; j++)
3541     {
3542       for (int k = 0; k < group_size; k++)
3543 	{
3544 	  int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3545 		   + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3546 	  vec_index = i / nunits;
3547 	  mask_element = i % nunits;
3548 	  if (vec_index == first_vec_index
3549 	      || first_vec_index == -1)
3550 	    {
3551 	      first_vec_index = vec_index;
3552 	    }
3553 	  else if (vec_index == second_vec_index
3554 		   || second_vec_index == -1)
3555 	    {
3556 	      second_vec_index = vec_index;
3557 	      mask_element += nunits;
3558 	    }
3559 	  else
3560 	    {
3561 	      if (dump_enabled_p ())
3562 		{
3563 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3564 				   "permutation requires at "
3565 				   "least three vectors ");
3566 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3567 				    stmt, 0);
3568 		}
3569 	      gcc_assert (analyze_only);
3570 	      return false;
3571 	    }
3572 
3573 	  gcc_assert (mask_element >= 0
3574 		      && mask_element < 2 * nunits);
3575 	  if (mask_element != index)
3576 	    noop_p = false;
3577 	  mask[index++] = mask_element;
3578 
3579 	  if (index == nunits)
3580 	    {
3581 	      if (! noop_p
3582 		  && ! can_vec_perm_p (mode, false, mask))
3583 		{
3584 		  if (dump_enabled_p ())
3585 		    {
3586 		      dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3587 				       vect_location,
3588 				       "unsupported vect permute { ");
3589 		      for (i = 0; i < nunits; ++i)
3590 			dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3591 		      dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3592 		    }
3593 		  gcc_assert (analyze_only);
3594 		  return false;
3595 		}
3596 
3597 	      if (! noop_p)
3598 		++*n_perms;
3599 
3600 	      if (!analyze_only)
3601 		{
3602 		  tree mask_vec = NULL_TREE;
3603 
3604 		  if (! noop_p)
3605 		    {
3606 		      tree *mask_elts = XALLOCAVEC (tree, nunits);
3607 		      for (int l = 0; l < nunits; ++l)
3608 			mask_elts[l] = build_int_cst (mask_element_type,
3609 						      mask[l]);
3610 		      mask_vec = build_vector (mask_type, mask_elts);
3611 		    }
3612 
3613 		  if (second_vec_index == -1)
3614 		    second_vec_index = first_vec_index;
3615 
3616 		  /* Generate the permute statement if necessary.  */
3617 		  tree first_vec = dr_chain[first_vec_index];
3618 		  tree second_vec = dr_chain[second_vec_index];
3619 		  gimple *perm_stmt;
3620 		  if (! noop_p)
3621 		    {
3622 		      tree perm_dest
3623 			= vect_create_destination_var (gimple_assign_lhs (stmt),
3624 						       vectype);
3625 		      perm_dest = make_ssa_name (perm_dest);
3626 		      perm_stmt = gimple_build_assign (perm_dest,
3627 						       VEC_PERM_EXPR,
3628 						       first_vec, second_vec,
3629 						       mask_vec);
3630 		      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3631 		    }
3632 		  else
3633 		    /* If mask was NULL_TREE generate the requested
3634 		       identity transform.  */
3635 		    perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3636 
3637 		  /* Store the vector statement in NODE.  */
3638 		  SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3639 		}
3640 
3641 	      index = 0;
3642 	      first_vec_index = -1;
3643 	      second_vec_index = -1;
3644 	      noop_p = true;
3645 	    }
3646 	}
3647     }
3648 
3649   return true;
3650 }
3651 
3652 
3653 
3654 /* Vectorize SLP instance tree in postorder.  */
3655 
3656 static bool
3657 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3658                             unsigned int vectorization_factor)
3659 {
3660   gimple *stmt;
3661   bool grouped_store, is_store;
3662   gimple_stmt_iterator si;
3663   stmt_vec_info stmt_info;
3664   unsigned int vec_stmts_size, nunits, group_size;
3665   tree vectype;
3666   int i, j;
3667   slp_tree child;
3668 
3669   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3670     return false;
3671 
3672   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3673     vect_schedule_slp_instance (child, instance, vectorization_factor);
3674 
3675   /* Push SLP node def-type to stmts.  */
3676   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3677     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3678       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3679 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3680 
3681   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3682   stmt_info = vinfo_for_stmt (stmt);
3683 
3684   /* VECTYPE is the type of the destination.  */
3685   vectype = STMT_VINFO_VECTYPE (stmt_info);
3686   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3687   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3688 
3689   /* For each SLP instance calculate number of vector stmts to be created
3690      for the scalar stmts in each node of the SLP tree.  Number of vector
3691      elements in one vector iteration is the number of scalar elements in
3692      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3693      size.
3694      Unless this is a SLP reduction in which case the number of vector
3695      stmts is equal to the number of vector stmts of the children.  */
3696   if (GROUP_FIRST_ELEMENT (stmt_info)
3697       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3698     vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3699   else
3700     vec_stmts_size = (vectorization_factor * group_size) / nunits;
3701 
3702   if (!SLP_TREE_VEC_STMTS (node).exists ())
3703     {
3704       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3705       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3706     }
3707 
3708   if (dump_enabled_p ())
3709     {
3710       dump_printf_loc (MSG_NOTE,vect_location,
3711 		       "------>vectorizing SLP node starting from: ");
3712       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3713     }
3714 
3715   /* Vectorized stmts go before the last scalar stmt which is where
3716      all uses are ready.  */
3717   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3718 
3719   /* Mark the first element of the reduction chain as reduction to properly
3720      transform the node.  In the analysis phase only the last element of the
3721      chain is marked as reduction.  */
3722   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3723       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3724     {
3725       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3726       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3727     }
3728 
3729   /* Handle two-operation SLP nodes by vectorizing the group with
3730      both operations and then performing a merge.  */
3731   if (SLP_TREE_TWO_OPERATORS (node))
3732     {
3733       enum tree_code code0 = gimple_assign_rhs_code (stmt);
3734       enum tree_code ocode = ERROR_MARK;
3735       gimple *ostmt;
3736       unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3737       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3738 	if (gimple_assign_rhs_code (ostmt) != code0)
3739 	  {
3740 	    mask[i] = 1;
3741 	    ocode = gimple_assign_rhs_code (ostmt);
3742 	  }
3743 	else
3744 	  mask[i] = 0;
3745       if (ocode != ERROR_MARK)
3746 	{
3747 	  vec<gimple *> v0;
3748 	  vec<gimple *> v1;
3749 	  unsigned j;
3750 	  tree tmask = NULL_TREE;
3751 	  vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3752 	  v0 = SLP_TREE_VEC_STMTS (node).copy ();
3753 	  SLP_TREE_VEC_STMTS (node).truncate (0);
3754 	  gimple_assign_set_rhs_code (stmt, ocode);
3755 	  vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3756 	  gimple_assign_set_rhs_code (stmt, code0);
3757 	  v1 = SLP_TREE_VEC_STMTS (node).copy ();
3758 	  SLP_TREE_VEC_STMTS (node).truncate (0);
3759 	  tree meltype = build_nonstandard_integer_type
3760 	      (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3761 	  tree mvectype = get_same_sized_vectype (meltype, vectype);
3762 	  unsigned k = 0, l;
3763 	  for (j = 0; j < v0.length (); ++j)
3764 	    {
3765 	      tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3766 	      for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3767 		{
3768 		  if (k >= group_size)
3769 		    k = 0;
3770 		  melts[l] = build_int_cst
3771 		      (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3772 		}
3773 	      tmask = build_vector (mvectype, melts);
3774 
3775 	      /* ???  Not all targets support a VEC_PERM_EXPR with a
3776 	         constant mask that would translate to a vec_merge RTX
3777 		 (with their vec_perm_const_ok).  We can either not
3778 		 vectorize in that case or let veclower do its job.
3779 		 Unfortunately that isn't too great and at least for
3780 		 plus/minus we'd eventually like to match targets
3781 		 vector addsub instructions.  */
3782 	      gimple *vstmt;
3783 	      vstmt = gimple_build_assign (make_ssa_name (vectype),
3784 					   VEC_PERM_EXPR,
3785 					   gimple_assign_lhs (v0[j]),
3786 					   gimple_assign_lhs (v1[j]), tmask);
3787 	      vect_finish_stmt_generation (stmt, vstmt, &si);
3788 	      SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3789 	    }
3790 	  v0.release ();
3791 	  v1.release ();
3792 	  return false;
3793 	}
3794     }
3795   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3796 
3797   /* Restore stmt def-types.  */
3798   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3799     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3800       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3801 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3802 
3803   return is_store;
3804 }
3805 
3806 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3807    For loop vectorization this is done in vectorizable_call, but for SLP
3808    it needs to be deferred until end of vect_schedule_slp, because multiple
3809    SLP instances may refer to the same scalar stmt.  */
3810 
3811 static void
3812 vect_remove_slp_scalar_calls (slp_tree node)
3813 {
3814   gimple *stmt, *new_stmt;
3815   gimple_stmt_iterator gsi;
3816   int i;
3817   slp_tree child;
3818   tree lhs;
3819   stmt_vec_info stmt_info;
3820 
3821   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3822     return;
3823 
3824   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3825     vect_remove_slp_scalar_calls (child);
3826 
3827   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3828     {
3829       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3830 	continue;
3831       stmt_info = vinfo_for_stmt (stmt);
3832       if (stmt_info == NULL
3833 	  || is_pattern_stmt_p (stmt_info)
3834 	  || !PURE_SLP_STMT (stmt_info))
3835 	continue;
3836       lhs = gimple_call_lhs (stmt);
3837       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3838       set_vinfo_for_stmt (new_stmt, stmt_info);
3839       set_vinfo_for_stmt (stmt, NULL);
3840       STMT_VINFO_STMT (stmt_info) = new_stmt;
3841       gsi = gsi_for_stmt (stmt);
3842       gsi_replace (&gsi, new_stmt, false);
3843       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3844     }
3845 }
3846 
3847 /* Generate vector code for all SLP instances in the loop/basic block.  */
3848 
3849 bool
3850 vect_schedule_slp (vec_info *vinfo)
3851 {
3852   vec<slp_instance> slp_instances;
3853   slp_instance instance;
3854   unsigned int i, vf;
3855   bool is_store = false;
3856 
3857   slp_instances = vinfo->slp_instances;
3858   if (is_a <loop_vec_info> (vinfo))
3859     vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3860   else
3861     vf = 1;
3862 
3863   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3864     {
3865       /* Schedule the tree of INSTANCE.  */
3866       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3867                                              instance, vf);
3868       if (dump_enabled_p ())
3869 	dump_printf_loc (MSG_NOTE, vect_location,
3870                          "vectorizing stmts using SLP.\n");
3871     }
3872 
3873   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3874     {
3875       slp_tree root = SLP_INSTANCE_TREE (instance);
3876       gimple *store;
3877       unsigned int j;
3878       gimple_stmt_iterator gsi;
3879 
3880       /* Remove scalar call stmts.  Do not do this for basic-block
3881 	 vectorization as not all uses may be vectorized.
3882 	 ???  Why should this be necessary?  DCE should be able to
3883 	 remove the stmts itself.
3884 	 ???  For BB vectorization we can as well remove scalar
3885 	 stmts starting from the SLP tree root if they have no
3886 	 uses.  */
3887       if (is_a <loop_vec_info> (vinfo))
3888 	vect_remove_slp_scalar_calls (root);
3889 
3890       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3891                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3892         {
3893           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3894             break;
3895 
3896          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3897            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3898           /* Free the attached stmt_vec_info and remove the stmt.  */
3899           gsi = gsi_for_stmt (store);
3900 	  unlink_stmt_vdef (store);
3901           gsi_remove (&gsi, true);
3902 	  release_defs (store);
3903           free_stmt_vec_info (store);
3904         }
3905     }
3906 
3907   return is_store;
3908 }
3909