xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-slp.c (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007, 2008, 2009, 2010
3    Free Software Foundation, Inc.
4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
5    and Ira Rosen <irar@il.ibm.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
40 
41 /* Extract the location of the basic block in the source code.
42    Return the basic block location if succeed and NULL if not.  */
43 
44 LOC
45 find_bb_location (basic_block bb)
46 {
47   gimple stmt = NULL;
48   gimple_stmt_iterator si;
49 
50   if (!bb)
51     return UNKNOWN_LOC;
52 
53   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54     {
55       stmt = gsi_stmt (si);
56       if (gimple_location (stmt) != UNKNOWN_LOC)
57         return gimple_location (stmt);
58     }
59 
60   return UNKNOWN_LOC;
61 }
62 
63 
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
65 
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69   if (!node)
70     return;
71 
72   if (SLP_TREE_LEFT (node))
73     vect_free_slp_tree (SLP_TREE_LEFT (node));
74 
75   if (SLP_TREE_RIGHT (node))
76     vect_free_slp_tree (SLP_TREE_RIGHT (node));
77 
78   VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
79 
80   if (SLP_TREE_VEC_STMTS (node))
81     VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
82 
83   free (node);
84 }
85 
86 
87 /* Free the memory allocated for the SLP instance.  */
88 
89 void
90 vect_free_slp_instance (slp_instance instance)
91 {
92   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93   VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94   VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
95 }
96 
97 
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99    they are of a legal type and that they match the defs of the first stmt of
100    the SLP group (stored in FIRST_STMT_...).  */
101 
102 static bool
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104                              slp_tree slp_node, gimple stmt,
105 			     VEC (gimple, heap) **def_stmts0,
106 			     VEC (gimple, heap) **def_stmts1,
107 			     enum vect_def_type *first_stmt_dt0,
108 			     enum vect_def_type *first_stmt_dt1,
109 			     tree *first_stmt_def0_type,
110 			     tree *first_stmt_def1_type,
111 			     tree *first_stmt_const_oprnd,
112 			     int ncopies_for_cost,
113                              bool *pattern0, bool *pattern1)
114 {
115   tree oprnd;
116   unsigned int i, number_of_oprnds;
117   tree def;
118   gimple def_stmt;
119   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120   stmt_vec_info stmt_info =
121     vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122   enum gimple_rhs_class rhs_class;
123   struct loop *loop = NULL;
124 
125   if (loop_vinfo)
126     loop = LOOP_VINFO_LOOP (loop_vinfo);
127 
128   rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129   number_of_oprnds = gimple_num_ops (stmt) - 1;	/* RHS only */
130 
131   for (i = 0; i < number_of_oprnds; i++)
132     {
133       oprnd = gimple_op (stmt, i + 1);
134 
135       if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
136                                &dt[i])
137 	  || (!def_stmt && dt[i] != vect_constant_def))
138 	{
139 	  if (vect_print_dump_info (REPORT_SLP))
140 	    {
141 	      fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 	      print_generic_expr (vect_dump, oprnd, TDF_SLIM);
143 	    }
144 
145 	  return false;
146 	}
147 
148       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149          from the pattern. Check that all the stmts of the node are in the
150          pattern.  */
151       if (loop && def_stmt && gimple_bb (def_stmt)
152           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153           && vinfo_for_stmt (def_stmt)
154           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
155         {
156           if (!*first_stmt_dt0)
157             *pattern0 = true;
158           else
159             {
160               if (i == 1 && !*first_stmt_dt1)
161                 *pattern1 = true;
162               else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
163                 {
164                   if (vect_print_dump_info (REPORT_DETAILS))
165                     {
166                       fprintf (vect_dump, "Build SLP failed: some of the stmts"
167                                      " are in a pattern, and others are not ");
168                       print_generic_expr (vect_dump, oprnd, TDF_SLIM);
169                     }
170 
171                   return false;
172                 }
173             }
174 
175           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176           dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
177 
178           if (*dt == vect_unknown_def_type)
179             {
180               if (vect_print_dump_info (REPORT_DETAILS))
181                 fprintf (vect_dump, "Unsupported pattern.");
182               return false;
183             }
184 
185           switch (gimple_code (def_stmt))
186             {
187               case GIMPLE_PHI:
188                 def = gimple_phi_result (def_stmt);
189                 break;
190 
191               case GIMPLE_ASSIGN:
192                 def = gimple_assign_lhs (def_stmt);
193                 break;
194 
195               default:
196                 if (vect_print_dump_info (REPORT_DETAILS))
197                   fprintf (vect_dump, "unsupported defining stmt: ");
198                 return false;
199             }
200         }
201 
202       if (!*first_stmt_dt0)
203 	{
204 	  /* op0 of the first stmt of the group - store its info.  */
205 	  *first_stmt_dt0 = dt[i];
206 	  if (def)
207 	    *first_stmt_def0_type = TREE_TYPE (def);
208 	  else
209 	    *first_stmt_const_oprnd = oprnd;
210 
211 	  /* Analyze costs (for the first stmt of the group only).  */
212 	  if (rhs_class != GIMPLE_SINGLE_RHS)
213 	    /* Not memory operation (we don't call this functions for loads).  */
214 	    vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215 	  else
216 	    /* Store.  */
217 	    vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
218 	}
219 
220       else
221 	{
222 	  if (!*first_stmt_dt1 && i == 1)
223 	    {
224 	      /* op1 of the first stmt of the group - store its info.  */
225 	      *first_stmt_dt1 = dt[i];
226 	      if (def)
227 		*first_stmt_def1_type = TREE_TYPE (def);
228 	      else
229 		{
230 		  /* We assume that the stmt contains only one constant
231 		     operand. We fail otherwise, to be on the safe side.  */
232 		  if (*first_stmt_const_oprnd)
233 		    {
234 		      if (vect_print_dump_info (REPORT_SLP))
235 			fprintf (vect_dump, "Build SLP failed: two constant "
236 				 "oprnds in stmt");
237 		      return false;
238 		    }
239 		  *first_stmt_const_oprnd = oprnd;
240 		}
241 	    }
242 	  else
243 	    {
244 	      /* Not first stmt of the group, check that the def-stmt/s match
245 		 the def-stmt/s of the first stmt.  */
246 	      if ((i == 0
247 		   && (*first_stmt_dt0 != dt[i]
248 		       || (*first_stmt_def0_type && def
249 			   && !types_compatible_p (*first_stmt_def0_type,
250 						   TREE_TYPE (def)))))
251 		  || (i == 1
252 		      && (*first_stmt_dt1 != dt[i]
253 			  || (*first_stmt_def1_type && def
254 			      && !types_compatible_p (*first_stmt_def1_type,
255 						      TREE_TYPE (def)))))
256 		  || (!def
257 		      && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
258 					      TREE_TYPE (oprnd))))
259 		{
260 		  if (vect_print_dump_info (REPORT_SLP))
261 		    fprintf (vect_dump, "Build SLP failed: different types ");
262 
263 		  return false;
264 		}
265 	    }
266 	}
267 
268       /* Check the types of the definitions.  */
269       switch (dt[i])
270 	{
271 	case vect_constant_def:
272 	case vect_external_def:
273 	  break;
274 
275 	case vect_internal_def:
276 	  if (i == 0)
277 	    VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
278 	  else
279 	    VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
280 	  break;
281 
282 	default:
283 	  /* FORNOW: Not supported.  */
284 	  if (vect_print_dump_info (REPORT_SLP))
285 	    {
286 	      fprintf (vect_dump, "Build SLP failed: illegal type of def ");
287 	      print_generic_expr (vect_dump, def, TDF_SLIM);
288 	    }
289 
290 	  return false;
291 	}
292     }
293 
294   return true;
295 }
296 
297 
298 /* Recursively build an SLP tree starting from NODE.
299    Fail (and return FALSE) if def-stmts are not isomorphic, require data
300    permutation or are of unsupported types of operation. Otherwise, return
301    TRUE.  */
302 
303 static bool
304 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
305                      slp_tree *node, unsigned int group_size,
306                      int *inside_cost, int *outside_cost,
307                      int ncopies_for_cost, unsigned int *max_nunits,
308                      VEC (int, heap) **load_permutation,
309                      VEC (slp_tree, heap) **loads,
310                      unsigned int vectorization_factor)
311 {
312   VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
313   VEC (gimple, heap) *def_stmts1 =  VEC_alloc (gimple, heap, group_size);
314   unsigned int i;
315   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
316   gimple stmt = VEC_index (gimple, stmts, 0);
317   enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
318   enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
319   enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
320   tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
321   tree lhs;
322   bool stop_recursion = false, need_same_oprnds = false;
323   tree vectype, scalar_type, first_op1 = NULL_TREE;
324   unsigned int ncopies;
325   optab optab;
326   int icode;
327   enum machine_mode optab_op2_mode;
328   enum machine_mode vec_mode;
329   tree first_stmt_const_oprnd = NULL_TREE;
330   struct data_reference *first_dr;
331   bool pattern0 = false, pattern1 = false;
332   HOST_WIDE_INT dummy;
333   bool permutation = false;
334   unsigned int load_place;
335   gimple first_load;
336 
337   /* For every stmt in NODE find its def stmt/s.  */
338   for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
339     {
340       if (vect_print_dump_info (REPORT_SLP))
341 	{
342 	  fprintf (vect_dump, "Build SLP for ");
343 	  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
344 	}
345 
346       lhs = gimple_get_lhs (stmt);
347       if (lhs == NULL_TREE)
348 	{
349 	  if (vect_print_dump_info (REPORT_SLP))
350 	    {
351 	      fprintf (vect_dump,
352 		       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
353 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
354 	    }
355 
356 	  return false;
357 	}
358 
359       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
360       vectype = get_vectype_for_scalar_type (scalar_type);
361       if (!vectype)
362         {
363           if (vect_print_dump_info (REPORT_SLP))
364             {
365               fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
366               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
367             }
368           return false;
369         }
370 
371       ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
372       if (ncopies != 1)
373         {
374 	  if (vect_print_dump_info (REPORT_SLP))
375             fprintf (vect_dump, "SLP with multiple types ");
376 
377           /* FORNOW: multiple types are unsupported in BB SLP.  */
378 	  if (bb_vinfo)
379 	    return false;
380         }
381 
382       /* In case of multiple types we need to detect the smallest type.  */
383       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
384         *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
385 
386       if (is_gimple_call (stmt))
387 	rhs_code = CALL_EXPR;
388       else
389 	rhs_code = gimple_assign_rhs_code (stmt);
390 
391       /* Check the operation.  */
392       if (i == 0)
393 	{
394 	  first_stmt_code = rhs_code;
395 
396 	  /* Shift arguments should be equal in all the packed stmts for a
397 	     vector shift with scalar shift operand.  */
398 	  if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
399 	      || rhs_code == LROTATE_EXPR
400 	      || rhs_code == RROTATE_EXPR)
401 	    {
402 	      vec_mode = TYPE_MODE (vectype);
403 
404 	      /* First see if we have a vector/vector shift.  */
405 	      optab = optab_for_tree_code (rhs_code, vectype,
406 					   optab_vector);
407 
408 	      if (!optab
409 		  || (optab->handlers[(int) vec_mode].insn_code
410 		      == CODE_FOR_nothing))
411 		{
412 		  /* No vector/vector shift, try for a vector/scalar shift.  */
413 		  optab = optab_for_tree_code (rhs_code, vectype,
414 					       optab_scalar);
415 
416 		  if (!optab)
417 		    {
418 		      if (vect_print_dump_info (REPORT_SLP))
419 			fprintf (vect_dump, "Build SLP failed: no optab.");
420 		      return false;
421 		    }
422 		  icode = (int) optab->handlers[(int) vec_mode].insn_code;
423 		  if (icode == CODE_FOR_nothing)
424 		    {
425 		      if (vect_print_dump_info (REPORT_SLP))
426 			fprintf (vect_dump, "Build SLP failed: "
427 				            "op not supported by target.");
428 		      return false;
429 		    }
430 		  optab_op2_mode = insn_data[icode].operand[2].mode;
431 		  if (!VECTOR_MODE_P (optab_op2_mode))
432 		    {
433 		      need_same_oprnds = true;
434 		      first_op1 = gimple_assign_rhs2 (stmt);
435 		    }
436 		}
437 	    }
438 	}
439       else
440 	{
441 	  if (first_stmt_code != rhs_code
442 	      && (first_stmt_code != IMAGPART_EXPR
443 		  || rhs_code != REALPART_EXPR)
444 	      && (first_stmt_code != REALPART_EXPR
445 		  || rhs_code != IMAGPART_EXPR))
446 	    {
447 	      if (vect_print_dump_info (REPORT_SLP))
448 		{
449 		  fprintf (vect_dump,
450 			   "Build SLP failed: different operation in stmt ");
451 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
452 		}
453 
454 	      return false;
455 	    }
456 
457 	  if (need_same_oprnds
458 	      && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
459 	    {
460 	      if (vect_print_dump_info (REPORT_SLP))
461 		{
462 		  fprintf (vect_dump,
463 			   "Build SLP failed: different shift arguments in ");
464 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
465 		}
466 
467 	      return false;
468 	    }
469 	}
470 
471       /* Strided store or load.  */
472       if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
473 	{
474 	  if (REFERENCE_CLASS_P (lhs))
475 	    {
476 	      /* Store.  */
477 	      if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
478 						stmt, &def_stmts0, &def_stmts1,
479 						&first_stmt_dt0,
480 						&first_stmt_dt1,
481 						&first_stmt_def0_type,
482 						&first_stmt_def1_type,
483 						&first_stmt_const_oprnd,
484 						ncopies_for_cost,
485                                                 &pattern0, &pattern1))
486 		return false;
487 	    }
488 	    else
489 	      {
490 		/* Load.  */
491                 /* FORNOW: Check that there is no gap between the loads.  */
492                 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
493                      && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
494                     || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
495                         && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
496                   {
497                     if (vect_print_dump_info (REPORT_SLP))
498                       {
499                         fprintf (vect_dump, "Build SLP failed: strided "
500                                             "loads have gaps ");
501                         print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
502                       }
503 
504                     return false;
505                   }
506 
507                 /* Check that the size of interleaved loads group is not
508                    greater than the SLP group size.  */
509                 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
510                     > ncopies * group_size)
511                   {
512                     if (vect_print_dump_info (REPORT_SLP))
513                       {
514                         fprintf (vect_dump, "Build SLP failed: the number of "
515                                             "interleaved loads is greater than"
516                                             " the SLP group size ");
517                         print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
518                       }
519 
520                     return false;
521                   }
522 
523                 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
524 
525               if (first_load == stmt)
526                 {
527                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
528                   if (vect_supportable_dr_alignment (first_dr)
529                       == dr_unaligned_unsupported)
530                     {
531                       if (vect_print_dump_info (REPORT_SLP))
532                         {
533                           fprintf (vect_dump, "Build SLP failed: unsupported "
534                                               "unaligned load ");
535                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
536                         }
537 
538                       return false;
539                     }
540 
541                   /* Analyze costs (for the first stmt in the group).  */
542                   vect_model_load_cost (vinfo_for_stmt (stmt),
543                                         ncopies_for_cost, *node);
544                 }
545 
546               /* Store the place of this load in the interleaving chain. In
547                  case that permutation is needed we later decide if a specific
548                  permutation is supported.  */
549               load_place = vect_get_place_in_interleaving_chain (stmt,
550                                                                  first_load);
551               if (load_place != i)
552                 permutation = true;
553 
554               VEC_safe_push (int, heap, *load_permutation, load_place);
555 
556               /* We stop the tree when we reach a group of loads.  */
557               stop_recursion = true;
558              continue;
559            }
560         } /* Strided access.  */
561       else
562 	{
563 	  if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
564 	    {
565 	      /* Not strided load. */
566 	      if (vect_print_dump_info (REPORT_SLP))
567 		{
568 		  fprintf (vect_dump, "Build SLP failed: not strided load ");
569 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
570 		}
571 
572 	      /* FORNOW: Not strided loads are not supported.  */
573 	      return false;
574 	    }
575 
576 	  /* Not memory operation.  */
577 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
578 	      && TREE_CODE_CLASS (rhs_code) != tcc_unary)
579 	    {
580 	      if (vect_print_dump_info (REPORT_SLP))
581 		{
582 		  fprintf (vect_dump, "Build SLP failed: operation");
583 		  fprintf (vect_dump, " unsupported ");
584 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
585 		}
586 
587 	      return false;
588 	    }
589 
590 	  /* Find the def-stmts.  */
591 	  if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
592 					    &def_stmts0, &def_stmts1,
593 					    &first_stmt_dt0, &first_stmt_dt1,
594 					    &first_stmt_def0_type,
595 					    &first_stmt_def1_type,
596 					    &first_stmt_const_oprnd,
597 					    ncopies_for_cost,
598                                             &pattern0, &pattern1))
599 	    return false;
600 	}
601     }
602 
603   /* Add the costs of the node to the overall instance costs.  */
604   *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
605   *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
606 
607   /* Strided loads were reached - stop the recursion.  */
608   if (stop_recursion)
609     {
610       if (permutation)
611         {
612           VEC_safe_push (slp_tree, heap, *loads, *node);
613           *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
614         }
615 
616       return true;
617     }
618 
619   /* Create SLP_TREE nodes for the definition node/s.  */
620   if (first_stmt_dt0 == vect_internal_def)
621     {
622       slp_tree left_node = XNEW (struct _slp_tree);
623       SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
624       SLP_TREE_VEC_STMTS (left_node) = NULL;
625       SLP_TREE_LEFT (left_node) = NULL;
626       SLP_TREE_RIGHT (left_node) = NULL;
627       SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
628       SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
629       if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
630 				inside_cost, outside_cost, ncopies_for_cost,
631 				max_nunits, load_permutation, loads,
632 				vectorization_factor))
633 	return false;
634 
635       SLP_TREE_LEFT (*node) = left_node;
636     }
637 
638   if (first_stmt_dt1 == vect_internal_def)
639     {
640       slp_tree right_node = XNEW (struct _slp_tree);
641       SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
642       SLP_TREE_VEC_STMTS (right_node) = NULL;
643       SLP_TREE_LEFT (right_node) = NULL;
644       SLP_TREE_RIGHT (right_node) = NULL;
645       SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
646       SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
647       if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
648 				inside_cost, outside_cost, ncopies_for_cost,
649 				max_nunits, load_permutation, loads,
650 				vectorization_factor))
651 	return false;
652 
653       SLP_TREE_RIGHT (*node) = right_node;
654     }
655 
656   return true;
657 }
658 
659 
660 static void
661 vect_print_slp_tree (slp_tree node)
662 {
663   int i;
664   gimple stmt;
665 
666   if (!node)
667     return;
668 
669   fprintf (vect_dump, "node ");
670   for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
671     {
672       fprintf (vect_dump, "\n\tstmt %d ", i);
673       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
674     }
675   fprintf (vect_dump, "\n");
676 
677   vect_print_slp_tree (SLP_TREE_LEFT (node));
678   vect_print_slp_tree (SLP_TREE_RIGHT (node));
679 }
680 
681 
682 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
683    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
684    J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
685    stmts in NODE are to be marked.  */
686 
687 static void
688 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
689 {
690   int i;
691   gimple stmt;
692 
693   if (!node)
694     return;
695 
696   for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
697     if (j < 0 || i == j)
698       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
699 
700   vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
701   vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
702 }
703 
704 
705 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
706 
707 static void
708 vect_mark_slp_stmts_relevant (slp_tree node)
709 {
710   int i;
711   gimple stmt;
712   stmt_vec_info stmt_info;
713 
714   if (!node)
715     return;
716 
717   for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
718     {
719       stmt_info = vinfo_for_stmt (stmt);
720       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
721                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
722       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
723     }
724 
725   vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
726   vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
727 }
728 
729 
730 /* Check if the permutation required by the SLP INSTANCE is supported.
731    Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed.  */
732 
733 static bool
734 vect_supported_slp_permutation_p (slp_instance instance)
735 {
736   slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
737   gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
738   gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
739   VEC (slp_tree, heap) *sorted_loads = NULL;
740   int index;
741   slp_tree *tmp_loads = NULL;
742   int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
743   slp_tree load;
744 
745   /* FORNOW: The only supported loads permutation is loads from the same
746      location in all the loads in the node, when the data-refs in
747      nodes of LOADS constitute an interleaving chain.
748      Sort the nodes according to the order of accesses in the chain.  */
749   tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
750   for (i = 0, j = 0;
751        VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
752        && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
753        i += group_size, j++)
754     {
755       gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
756       /* Check that the loads are all in the same interleaving chain.  */
757       if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
758         {
759           if (vect_print_dump_info (REPORT_DETAILS))
760             {
761               fprintf (vect_dump, "Build SLP failed: unsupported data "
762                                    "permutation ");
763               print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
764             }
765 
766           free (tmp_loads);
767           return false;
768         }
769 
770       tmp_loads[index] = load;
771     }
772 
773   sorted_loads = VEC_alloc (slp_tree, heap, group_size);
774   for (i = 0; i < group_size; i++)
775      VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
776 
777   VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
778   SLP_INSTANCE_LOADS (instance) = sorted_loads;
779   free (tmp_loads);
780 
781   if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
782                                      SLP_INSTANCE_UNROLLING_FACTOR (instance),
783                                      instance, true))
784     return false;
785 
786   return true;
787 }
788 
789 
790 /* Check if the required load permutation is supported.
791    LOAD_PERMUTATION contains a list of indices of the loads.
792    In SLP this permutation is relative to the order of strided stores that are
793    the base of the SLP instance.  */
794 
795 static bool
796 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
797                                    VEC (int, heap) *load_permutation)
798 {
799   int i = 0, j, prev = -1, next, k;
800   bool supported;
801   sbitmap load_index;
802 
803   /* FORNOW: permutations are only supported in SLP.  */
804   if (!slp_instn)
805     return false;
806 
807   if (vect_print_dump_info (REPORT_SLP))
808     {
809       fprintf (vect_dump, "Load permutation ");
810       for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
811         fprintf (vect_dump, "%d ", next);
812     }
813 
814   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
815      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
816      well.  */
817   if (VEC_length (int, load_permutation)
818       != (unsigned int) (group_size * group_size))
819     return false;
820 
821   supported = true;
822   load_index = sbitmap_alloc (group_size);
823   sbitmap_zero (load_index);
824   for (j = 0; j < group_size; j++)
825     {
826       for (i = j * group_size, k = 0;
827            VEC_iterate (int, load_permutation, i, next) && k < group_size;
828            i++, k++)
829        {
830          if (i != j * group_size && next != prev)
831           {
832             supported = false;
833             break;
834           }
835 
836          prev = next;
837        }
838 
839       if (TEST_BIT (load_index, prev))
840         {
841           supported = false;
842           break;
843         }
844 
845       SET_BIT (load_index, prev);
846     }
847 
848   for (j = 0; j < group_size; j++)
849     if (!TEST_BIT (load_index, j))
850       return false;
851 
852   sbitmap_free (load_index);
853 
854   if (supported && i == group_size * group_size
855       && vect_supported_slp_permutation_p (slp_instn))
856     return true;
857 
858   return false;
859 }
860 
861 
862 /* Find the first load in the loop that belongs to INSTANCE.
863    When loads are in several SLP nodes, there can be a case in which the first
864    load does not appear in the first SLP node to be transformed, causing
865    incorrect order of statements. Since we generate all the loads together,
866    they must be inserted before the first load of the SLP instance and not
867    before the first load of the first node of the instance.  */
868 static gimple
869 vect_find_first_load_in_slp_instance (slp_instance instance)
870 {
871   int i, j;
872   slp_tree load_node;
873   gimple first_load = NULL, load;
874 
875   for (i = 0;
876        VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
877        i++)
878     for (j = 0;
879          VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
880          j++)
881       first_load = get_earlier_stmt (load, first_load);
882 
883   return first_load;
884 }
885 
886 
887 /* Analyze an SLP instance starting from a group of strided stores. Call
888    vect_build_slp_tree to build a tree of packed stmts if possible.
889    Return FALSE if it's impossible to SLP any stmt in the loop.  */
890 
891 static bool
892 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
893                            gimple stmt)
894 {
895   slp_instance new_instance;
896   slp_tree node = XNEW (struct _slp_tree);
897   unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
898   unsigned int unrolling_factor = 1, nunits;
899   tree vectype, scalar_type;
900   gimple next;
901   unsigned int vectorization_factor = 0;
902   int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
903   unsigned int max_nunits = 0;
904   VEC (int, heap) *load_permutation;
905   VEC (slp_tree, heap) *loads;
906 
907   scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
908                                              vinfo_for_stmt (stmt))));
909   vectype = get_vectype_for_scalar_type (scalar_type);
910   if (!vectype)
911     {
912       if (vect_print_dump_info (REPORT_SLP))
913         {
914           fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
915           print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
916         }
917       return false;
918     }
919 
920   nunits = TYPE_VECTOR_SUBPARTS (vectype);
921   if (loop_vinfo)
922     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
923   else
924     /* No multitypes in BB SLP.  */
925     vectorization_factor = nunits;
926 
927   /* Calculate the unrolling factor.  */
928   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
929   if (unrolling_factor != 1 && !loop_vinfo)
930     {
931       if (vect_print_dump_info (REPORT_SLP))
932         fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
933                             " block SLP");
934 
935       return false;
936     }
937 
938   /* Create a node (a root of the SLP tree) for the packed strided stores.  */
939   SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
940   next = stmt;
941   /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
942   while (next)
943     {
944       VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
945       next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
946     }
947 
948   SLP_TREE_VEC_STMTS (node) = NULL;
949   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
950   SLP_TREE_LEFT (node) = NULL;
951   SLP_TREE_RIGHT (node) = NULL;
952   SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
953   SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
954 
955   /* Calculate the number of vector stmts to create based on the unrolling
956      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
957      GROUP_SIZE / NUNITS otherwise.  */
958   ncopies_for_cost = unrolling_factor * group_size / nunits;
959 
960   load_permutation = VEC_alloc (int, heap, group_size * group_size);
961   loads = VEC_alloc (slp_tree, heap, group_size);
962 
963   /* Build the tree for the SLP instance.  */
964   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
965                            &inside_cost, &outside_cost, ncopies_for_cost,
966 			   &max_nunits, &load_permutation, &loads,
967 			   vectorization_factor))
968     {
969       /* Create a new SLP instance.  */
970       new_instance = XNEW (struct _slp_instance);
971       SLP_INSTANCE_TREE (new_instance) = node;
972       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
973       /* Calculate the unrolling factor based on the smallest type in the
974          loop.  */
975       if (max_nunits > nunits)
976         unrolling_factor = least_common_multiple (max_nunits, group_size)
977                            / group_size;
978 
979       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
980       SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
981       SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
982       SLP_INSTANCE_LOADS (new_instance) = loads;
983       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
984       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
985       if (VEC_length (slp_tree, loads))
986         {
987           if (!vect_supported_load_permutation_p (new_instance, group_size,
988                                                   load_permutation))
989             {
990               if (vect_print_dump_info (REPORT_SLP))
991                 {
992                   fprintf (vect_dump, "Build SLP failed: unsupported load "
993                                       "permutation ");
994                   print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
995                 }
996 
997               vect_free_slp_instance (new_instance);
998               return false;
999             }
1000 
1001           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1002              = vect_find_first_load_in_slp_instance (new_instance);
1003         }
1004       else
1005         VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1006 
1007       if (loop_vinfo)
1008         VEC_safe_push (slp_instance, heap,
1009                        LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1010   		       new_instance);
1011       else
1012         VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1013                        new_instance);
1014 
1015       if (vect_print_dump_info (REPORT_SLP))
1016 	vect_print_slp_tree (node);
1017 
1018       return true;
1019     }
1020 
1021   /* Failed to SLP.  */
1022   /* Free the allocated memory.  */
1023   vect_free_slp_tree (node);
1024   VEC_free (int, heap, load_permutation);
1025   VEC_free (slp_tree, heap, loads);
1026 
1027   return false;
1028 }
1029 
1030 
1031 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1032    trees of packed scalar stmts if SLP is possible.  */
1033 
1034 bool
1035 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1036 {
1037   unsigned int i;
1038   VEC (gimple, heap) *strided_stores;
1039   gimple store;
1040   bool ok = false;
1041 
1042   if (vect_print_dump_info (REPORT_SLP))
1043     fprintf (vect_dump, "=== vect_analyze_slp ===");
1044 
1045   if (loop_vinfo)
1046     strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1047   else
1048     strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1049 
1050   for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1051     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1052       ok = true;
1053 
1054   if (bb_vinfo && !ok)
1055     {
1056       if (vect_print_dump_info (REPORT_SLP))
1057         fprintf (vect_dump, "Failed to SLP the basic block.");
1058 
1059       return false;
1060     }
1061 
1062   return true;
1063 }
1064 
1065 
1066 /* For each possible SLP instance decide whether to SLP it and calculate overall
1067    unrolling factor needed to SLP the loop.  */
1068 
1069 void
1070 vect_make_slp_decision (loop_vec_info loop_vinfo)
1071 {
1072   unsigned int i, unrolling_factor = 1;
1073   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1074   slp_instance instance;
1075   int decided_to_slp = 0;
1076 
1077   if (vect_print_dump_info (REPORT_SLP))
1078     fprintf (vect_dump, "=== vect_make_slp_decision ===");
1079 
1080   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1081     {
1082       /* FORNOW: SLP if you can.  */
1083       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1084 	unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1085 
1086       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1087 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1088 	 loop-based vectorization. Such stmts will be marked as HYBRID.  */
1089       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1090       decided_to_slp++;
1091     }
1092 
1093   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1094 
1095   if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1096     fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1097 	     decided_to_slp, unrolling_factor);
1098 }
1099 
1100 
1101 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1102    can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID.  */
1103 
1104 static void
1105 vect_detect_hybrid_slp_stmts (slp_tree node)
1106 {
1107   int i;
1108   gimple stmt;
1109   imm_use_iterator imm_iter;
1110   gimple use_stmt;
1111   stmt_vec_info stmt_vinfo;
1112 
1113   if (!node)
1114     return;
1115 
1116   for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1117     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1118 	&& TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1119       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1120 	if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1121 	    && !STMT_SLP_TYPE (stmt_vinfo)
1122             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1123                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
1124 	  vect_mark_slp_stmts (node, hybrid, i);
1125 
1126   vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1127   vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1128 }
1129 
1130 
1131 /* Find stmts that must be both vectorized and SLPed.  */
1132 
1133 void
1134 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1135 {
1136   unsigned int i;
1137   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1138   slp_instance instance;
1139 
1140   if (vect_print_dump_info (REPORT_SLP))
1141     fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1142 
1143   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1144     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1145 }
1146 
1147 
1148 /* Create and initialize a new bb_vec_info struct for BB, as well as
1149    stmt_vec_info structs for all the stmts in it.  */
1150 
1151 static bb_vec_info
1152 new_bb_vec_info (basic_block bb)
1153 {
1154   bb_vec_info res = NULL;
1155   gimple_stmt_iterator gsi;
1156 
1157   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1158   BB_VINFO_BB (res) = bb;
1159 
1160   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1161     {
1162       gimple stmt = gsi_stmt (gsi);
1163       gimple_set_uid (stmt, 0);
1164       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1165     }
1166 
1167   BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1168   BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1169 
1170   bb->aux = res;
1171   return res;
1172 }
1173 
1174 
1175 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1176    stmts in the basic block.  */
1177 
1178 static void
1179 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1180 {
1181   basic_block bb;
1182   gimple_stmt_iterator si;
1183 
1184   if (!bb_vinfo)
1185     return;
1186 
1187   bb = BB_VINFO_BB (bb_vinfo);
1188 
1189   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1190     {
1191       gimple stmt = gsi_stmt (si);
1192       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1193 
1194       if (stmt_info)
1195         /* Free stmt_vec_info.  */
1196         free_stmt_vec_info (stmt);
1197     }
1198 
1199   VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1200   VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1201   free (bb_vinfo);
1202   bb->aux = NULL;
1203 }
1204 
1205 
1206 /* Analyze statements contained in SLP tree node after recursively analyzing
1207    the subtree. Return TRUE if the operations are supported.  */
1208 
1209 static bool
1210 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1211 {
1212   bool dummy;
1213   int i;
1214   gimple stmt;
1215 
1216   if (!node)
1217     return true;
1218 
1219   if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1220       || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1221     return false;
1222 
1223   for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1224     {
1225       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1226       gcc_assert (stmt_info);
1227       gcc_assert (PURE_SLP_STMT (stmt_info));
1228 
1229       if (!vect_analyze_stmt (stmt, &dummy, node))
1230         return false;
1231     }
1232 
1233   return true;
1234 }
1235 
1236 
1237 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1238    operations are supported. */
1239 
1240 static bool
1241 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1242 {
1243   VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1244   slp_instance instance;
1245   int i;
1246 
1247   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1248     {
1249       if (!vect_slp_analyze_node_operations (bb_vinfo,
1250                                              SLP_INSTANCE_TREE (instance)))
1251         {
1252  	  vect_free_slp_instance (instance);
1253           VEC_ordered_remove (slp_instance, slp_instances, i);
1254 	}
1255       else
1256         i++;
1257     }
1258 
1259   if (!VEC_length (slp_instance, slp_instances))
1260     return false;
1261 
1262   return true;
1263 }
1264 
1265 
1266 /* Cheick if the basic block can be vectorized.  */
1267 
1268 bb_vec_info
1269 vect_slp_analyze_bb (basic_block bb)
1270 {
1271   bb_vec_info bb_vinfo;
1272   VEC (ddr_p, heap) *ddrs;
1273   VEC (slp_instance, heap) *slp_instances;
1274   slp_instance instance;
1275   int i, insns = 0;
1276   gimple_stmt_iterator gsi;
1277 
1278   if (vect_print_dump_info (REPORT_DETAILS))
1279     fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1280 
1281   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1282     {
1283       gimple stmt = gsi_stmt (gsi);
1284       if (!is_gimple_debug (stmt)
1285 	  && !gimple_nop_p (stmt)
1286 	  && gimple_code (stmt) != GIMPLE_LABEL)
1287 	insns++;
1288     }
1289 
1290   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1291     {
1292       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1293         fprintf (vect_dump, "not vectorized: too many instructions in basic "
1294                             "block.\n");
1295 
1296       return NULL;
1297     }
1298 
1299   bb_vinfo = new_bb_vec_info (bb);
1300   if (!bb_vinfo)
1301     return NULL;
1302 
1303   if (!vect_analyze_data_refs (NULL, bb_vinfo))
1304     {
1305       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1306         fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1307                             "block.\n");
1308 
1309       destroy_bb_vec_info (bb_vinfo);
1310       return NULL;
1311     }
1312 
1313   ddrs = BB_VINFO_DDRS (bb_vinfo);
1314   if (!VEC_length (ddr_p, ddrs))
1315     {
1316       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1317         fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1318                             "block.\n");
1319 
1320       destroy_bb_vec_info (bb_vinfo);
1321       return NULL;
1322     }
1323 
1324   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1325     {
1326       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1327         fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1328                             "block.\n");
1329 
1330       destroy_bb_vec_info (bb_vinfo);
1331       return NULL;
1332     }
1333 
1334    if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1335     {
1336      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1337        fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1338                            " block.\n");
1339 
1340       destroy_bb_vec_info (bb_vinfo);
1341       return NULL;
1342     }
1343 
1344   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1345     {
1346      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1347        fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1348                            "block.\n");
1349 
1350       destroy_bb_vec_info (bb_vinfo);
1351       return NULL;
1352     }
1353 
1354    if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1355     {
1356       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1357         fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1358                             "block.\n");
1359 
1360       destroy_bb_vec_info (bb_vinfo);
1361       return NULL;
1362     }
1363 
1364   /* Check the SLP opportunities in the basic block, analyze and build SLP
1365      trees.  */
1366   if (!vect_analyze_slp (NULL, bb_vinfo))
1367     {
1368       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1369         fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1370                             "in basic block.\n");
1371 
1372       destroy_bb_vec_info (bb_vinfo);
1373       return NULL;
1374     }
1375 
1376   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1377 
1378   /* Mark all the statements that we want to vectorize as pure SLP and
1379      relevant.  */
1380   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1381     {
1382       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1383       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1384     }
1385 
1386   if (!vect_slp_analyze_operations (bb_vinfo))
1387     {
1388       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1389         fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1390 
1391       destroy_bb_vec_info (bb_vinfo);
1392       return NULL;
1393     }
1394 
1395   if (vect_print_dump_info (REPORT_DETAILS))
1396     fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1397 
1398   return bb_vinfo;
1399 }
1400 
1401 
1402 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1403    the number of created vector stmts depends on the unrolling factor). However,
1404    the actual number of vector stmts for every SLP node depends on VF which is
1405    set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1406    In this function we assume that the inside costs calculated in
1407    vect_model_xxx_cost are linear in ncopies.  */
1408 
1409 void
1410 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1411 {
1412   unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1413   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1414   slp_instance instance;
1415 
1416   if (vect_print_dump_info (REPORT_SLP))
1417     fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1418 
1419   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1420     /* We assume that costs are linear in ncopies.  */
1421     SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1422       / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1423 }
1424 
1425 
1426 /* For constant and loop invariant defs of SLP_NODE this function returns
1427    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1428    OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1429    stmts. NUMBER_OF_VECTORS is the number of vector defs to create.  */
1430 
1431 static void
1432 vect_get_constant_vectors (tree op, slp_tree slp_node,
1433                            VEC(tree,heap) **vec_oprnds,
1434 			   unsigned int op_num, unsigned int number_of_vectors)
1435 {
1436   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1437   gimple stmt = VEC_index (gimple, stmts, 0);
1438   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1439   int nunits;
1440   tree vec_cst;
1441   tree t = NULL_TREE;
1442   int j, number_of_places_left_in_vector;
1443   tree vector_type;
1444   tree vop;
1445   int group_size = VEC_length (gimple, stmts);
1446   unsigned int vec_num, i;
1447   int number_of_copies = 1;
1448   VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1449   bool constant_p, is_store;
1450 
1451   if (STMT_VINFO_DATA_REF (stmt_vinfo))
1452     {
1453       is_store = true;
1454       op = gimple_assign_rhs1 (stmt);
1455     }
1456   else
1457     is_store = false;
1458 
1459   if (CONSTANT_CLASS_P (op))
1460     constant_p = true;
1461   else
1462     constant_p = false;
1463 
1464   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1465   gcc_assert (vector_type);
1466 
1467   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1468 
1469   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1470      created vectors. It is greater than 1 if unrolling is performed.
1471 
1472      For example, we have two scalar operands, s1 and s2 (e.g., group of
1473      strided accesses of size two), while NUNITS is four (i.e., four scalars
1474      of this type can be packed in a vector). The output vector will contain
1475      two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1476      will be 2).
1477 
1478      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1479      containing the operands.
1480 
1481      For example, NUNITS is four as before, and the group size is 8
1482      (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1483      {s5, s6, s7, s8}.  */
1484 
1485   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1486 
1487   number_of_places_left_in_vector = nunits;
1488   for (j = 0; j < number_of_copies; j++)
1489     {
1490       for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1491         {
1492           if (is_store)
1493             op = gimple_assign_rhs1 (stmt);
1494           else
1495             op = gimple_op (stmt, op_num + 1);
1496 
1497           /* Create 'vect_ = {op0,op1,...,opn}'.  */
1498           t = tree_cons (NULL_TREE, op, t);
1499 
1500           number_of_places_left_in_vector--;
1501 
1502           if (number_of_places_left_in_vector == 0)
1503             {
1504               number_of_places_left_in_vector = nunits;
1505 
1506 	      if (constant_p)
1507 		vec_cst = build_vector (vector_type, t);
1508 	      else
1509 		vec_cst = build_constructor_from_list (vector_type, t);
1510               VEC_quick_push (tree, voprnds,
1511                               vect_init_vector (stmt, vec_cst, vector_type, NULL));
1512               t = NULL_TREE;
1513             }
1514         }
1515     }
1516 
1517   /* Since the vectors are created in the reverse order, we should invert
1518      them.  */
1519   vec_num = VEC_length (tree, voprnds);
1520   for (j = vec_num - 1; j >= 0; j--)
1521     {
1522       vop = VEC_index (tree, voprnds, j);
1523       VEC_quick_push (tree, *vec_oprnds, vop);
1524     }
1525 
1526   VEC_free (tree, heap, voprnds);
1527 
1528   /* In case that VF is greater than the unrolling factor needed for the SLP
1529      group of stmts, NUMBER_OF_VECTORS to be created is greater than
1530      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1531      to replicate the vectors.  */
1532   while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1533     {
1534       for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1535         VEC_quick_push (tree, *vec_oprnds, vop);
1536     }
1537 }
1538 
1539 
1540 /* Get vectorized definitions from SLP_NODE that contains corresponding
1541    vectorized def-stmts.  */
1542 
1543 static void
1544 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1545 {
1546   tree vec_oprnd;
1547   gimple vec_def_stmt;
1548   unsigned int i;
1549 
1550   gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1551 
1552   for (i = 0;
1553        VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1554        i++)
1555     {
1556       gcc_assert (vec_def_stmt);
1557       vec_oprnd = gimple_get_lhs (vec_def_stmt);
1558       VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1559     }
1560 }
1561 
1562 
1563 /* Get vectorized definitions for SLP_NODE.
1564    If the scalar definitions are loop invariants or constants, collect them and
1565    call vect_get_constant_vectors() to create vector stmts.
1566    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1567    must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1568    vect_get_slp_vect_defs() to retrieve them.
1569    If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1570    the right node. This is used when the second operand must remain scalar.  */
1571 
1572 void
1573 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
1574                    VEC (tree,heap) **vec_oprnds0,
1575                    VEC (tree,heap) **vec_oprnds1)
1576 {
1577   gimple first_stmt;
1578   enum tree_code code;
1579   int number_of_vects;
1580   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1581 
1582   first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1583   /* The number of vector defs is determined by the number of vector statements
1584      in the node from which we get those statements.  */
1585   if (SLP_TREE_LEFT (slp_node))
1586     number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1587   else
1588     {
1589       number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1590       /* Number of vector stmts was calculated according to LHS in
1591          vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1592          necessary. See vect_get_smallest_scalar_type() for details.  */
1593       vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1594                                      &rhs_size_unit);
1595       if (rhs_size_unit != lhs_size_unit)
1596         {
1597           number_of_vects *= rhs_size_unit;
1598           number_of_vects /= lhs_size_unit;
1599         }
1600     }
1601 
1602   /* Allocate memory for vectorized defs.  */
1603   *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1604 
1605   /* SLP_NODE corresponds either to a group of stores or to a group of
1606      unary/binary operations. We don't call this function for loads.  */
1607   if (SLP_TREE_LEFT (slp_node))
1608     /* The defs are already vectorized.  */
1609     vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1610   else
1611     /* Build vectors from scalar defs.  */
1612     vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects);
1613 
1614   if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1615     /* Since we don't call this function with loads, this is a group of
1616        stores.  */
1617     return;
1618 
1619   code = gimple_assign_rhs_code (first_stmt);
1620   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1621     return;
1622 
1623   /* The number of vector defs is determined by the number of vector statements
1624      in the node from which we get those statements.  */
1625   if (SLP_TREE_RIGHT (slp_node))
1626     number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1627   else
1628     number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1629 
1630   *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1631 
1632   if (SLP_TREE_RIGHT (slp_node))
1633     /* The defs are already vectorized.  */
1634     vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1635   else
1636     /* Build vectors from scalar defs.  */
1637     vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects);
1638 }
1639 
1640 
1641 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1642    building a vector of type MASK_TYPE from it) and two input vectors placed in
1643    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1644    shifting by STRIDE elements of DR_CHAIN for every copy.
1645    (STRIDE is the number of vectorized stmts for NODE divided by the number of
1646    copies).
1647    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1648    the created stmts must be inserted.  */
1649 
1650 static inline void
1651 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1652                            tree mask, int first_vec_indx, int second_vec_indx,
1653                            gimple_stmt_iterator *gsi, slp_tree node,
1654                            tree builtin_decl, tree vectype,
1655                            VEC(tree,heap) *dr_chain,
1656                            int ncopies, int vect_stmts_counter)
1657 {
1658   tree perm_dest;
1659   gimple perm_stmt = NULL;
1660   stmt_vec_info next_stmt_info;
1661   int i, stride;
1662   tree first_vec, second_vec, data_ref;
1663   VEC (tree, heap) *params = NULL;
1664 
1665   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1666 
1667   /* Initialize the vect stmts of NODE to properly insert the generated
1668      stmts later.  */
1669   for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1670        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1671     VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1672 
1673   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1674   for (i = 0; i < ncopies; i++)
1675     {
1676       first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1677       second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1678 
1679       /* Build argument list for the vectorized call.  */
1680       VEC_free (tree, heap, params);
1681       params = VEC_alloc (tree, heap, 3);
1682       VEC_quick_push (tree, params, first_vec);
1683       VEC_quick_push (tree, params, second_vec);
1684       VEC_quick_push (tree, params, mask);
1685 
1686       /* Generate the permute statement.  */
1687       perm_stmt = gimple_build_call_vec (builtin_decl, params);
1688       data_ref = make_ssa_name (perm_dest, perm_stmt);
1689       gimple_call_set_lhs (perm_stmt, data_ref);
1690       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1691 
1692       /* Store the vector statement in NODE.  */
1693       VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1694                    stride * i + vect_stmts_counter, perm_stmt);
1695 
1696       first_vec_indx += stride;
1697       second_vec_indx += stride;
1698     }
1699 
1700   /* Mark the scalar stmt as vectorized.  */
1701   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1702   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1703 }
1704 
1705 
1706 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1707    return in CURRENT_MASK_ELEMENT its equivalent in target specific
1708    representation. Check that the mask is valid and return FALSE if not.
1709    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1710    the next vector, i.e., the current first vector is not needed.  */
1711 
1712 static bool
1713 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1714                        int mask_nunits, bool only_one_vec, int index,
1715                        int *mask, int *current_mask_element,
1716                        bool *need_next_vector, int *number_of_mask_fixes,
1717                        bool *mask_fixed, bool *needs_first_vector)
1718 {
1719   int i;
1720 
1721   /* Convert to target specific representation.  */
1722   *current_mask_element = first_mask_element + m;
1723   /* Adjust the value in case it's a mask for second and third vectors.  */
1724   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
1725 
1726   if (*current_mask_element < mask_nunits)
1727     *needs_first_vector = true;
1728 
1729   /* We have only one input vector to permute but the mask accesses values in
1730      the next vector as well.  */
1731   if (only_one_vec && *current_mask_element >= mask_nunits)
1732     {
1733       if (vect_print_dump_info (REPORT_DETAILS))
1734         {
1735           fprintf (vect_dump, "permutation requires at least two vectors ");
1736           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1737         }
1738 
1739       return false;
1740     }
1741 
1742   /* The mask requires the next vector.  */
1743   if (*current_mask_element >= mask_nunits * 2)
1744     {
1745       if (*needs_first_vector || *mask_fixed)
1746         {
1747           /* We either need the first vector too or have already moved to the
1748              next vector. In both cases, this permutation needs three
1749              vectors.  */
1750           if (vect_print_dump_info (REPORT_DETAILS))
1751             {
1752               fprintf (vect_dump, "permutation requires at "
1753                                   "least three vectors ");
1754               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1755             }
1756 
1757           return false;
1758         }
1759 
1760       /* We move to the next vector, dropping the first one and working with
1761          the second and the third - we need to adjust the values of the mask
1762          accordingly.  */
1763       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
1764 
1765       for (i = 0; i < index; i++)
1766         mask[i] -= mask_nunits * *number_of_mask_fixes;
1767 
1768       (*number_of_mask_fixes)++;
1769       *mask_fixed = true;
1770     }
1771 
1772   *need_next_vector = *mask_fixed;
1773 
1774   /* This was the last element of this mask. Start a new one.  */
1775   if (index == mask_nunits - 1)
1776     {
1777       *number_of_mask_fixes = 1;
1778       *mask_fixed = false;
1779       *needs_first_vector = false;
1780     }
1781 
1782   return true;
1783 }
1784 
1785 
1786 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1787    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1788    permute statements for SLP_NODE_INSTANCE.  */
1789 bool
1790 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1791                               gimple_stmt_iterator *gsi, int vf,
1792                               slp_instance slp_node_instance, bool analyze_only)
1793 {
1794   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1795   tree mask_element_type = NULL_TREE, mask_type;
1796   int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1797   slp_tree node;
1798   tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1799   gimple next_scalar_stmt;
1800   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1801   int first_mask_element;
1802   int index, unroll_factor, *mask, current_mask_element, ncopies;
1803   bool only_one_vec = false, need_next_vector = false;
1804   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1805   int number_of_mask_fixes = 1;
1806   bool mask_fixed = false;
1807   bool needs_first_vector = false;
1808 
1809   if (!targetm.vectorize.builtin_vec_perm)
1810     {
1811       if (vect_print_dump_info (REPORT_DETAILS))
1812         {
1813           fprintf (vect_dump, "no builtin for vect permute for ");
1814           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1815         }
1816 
1817        return false;
1818     }
1819 
1820   builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1821                                                      &mask_element_type);
1822   if (!builtin_decl || !mask_element_type)
1823     {
1824       if (vect_print_dump_info (REPORT_DETAILS))
1825         {
1826           fprintf (vect_dump, "no builtin for vect permute for ");
1827           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1828         }
1829 
1830        return false;
1831     }
1832 
1833   mask_type = get_vectype_for_scalar_type (mask_element_type);
1834   mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1835   mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1836   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1837   scale = mask_nunits / nunits;
1838   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1839 
1840   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1841      unrolling factor.  */
1842   orig_vec_stmts_num = group_size *
1843                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1844   if (orig_vec_stmts_num == 1)
1845     only_one_vec = true;
1846 
1847   /* Number of copies is determined by the final vectorization factor
1848      relatively to SLP_NODE_INSTANCE unrolling factor.  */
1849   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1850 
1851   /* Generate permutation masks for every NODE. Number of masks for each NODE
1852      is equal to GROUP_SIZE.
1853      E.g., we have a group of three nodes with three loads from the same
1854      location in each node, and the vector size is 4. I.e., we have a
1855      a0b0c0a1b1c1... sequence and we need to create the following vectors:
1856      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1857      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1858      ...
1859 
1860      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1861      scpecific type, e.g., in bytes for Altivec.
1862      The last mask is illegal since we assume two operands for permute
1863      operation, and the mask element values can't be outside that range. Hence,
1864      the last mask must be converted into {2,5,5,5}.
1865      For the first two permutations we need the first and the second input
1866      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1867      we need the second and the third vectors: {b1,c1,a2,b2} and
1868      {c2,a3,b3,c3}.  */
1869 
1870   for (i = 0;
1871        VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1872                     i, node);
1873        i++)
1874     {
1875       scalar_index = 0;
1876       index = 0;
1877       vect_stmts_counter = 0;
1878       vec_index = 0;
1879       first_vec_index = vec_index++;
1880       if (only_one_vec)
1881         second_vec_index = first_vec_index;
1882       else
1883         second_vec_index =  vec_index++;
1884 
1885       for (j = 0; j < unroll_factor; j++)
1886         {
1887           for (k = 0; k < group_size; k++)
1888             {
1889               first_mask_element = (i + j * group_size) * scale;
1890               for (m = 0; m < scale; m++)
1891                 {
1892                   if (!vect_get_mask_element (stmt, first_mask_element, m,
1893                                    mask_nunits, only_one_vec, index, mask,
1894                                    &current_mask_element, &need_next_vector,
1895                                    &number_of_mask_fixes, &mask_fixed,
1896                                    &needs_first_vector))
1897                     return false;
1898 
1899                   mask[index++] = current_mask_element;
1900                 }
1901 
1902               if (index == mask_nunits)
1903                 {
1904 		  tree mask_vec = NULL;
1905 
1906 		  while (--index >= 0)
1907 		    {
1908 		      tree t = build_int_cst (mask_element_type, mask[index]);
1909 		      mask_vec = tree_cons (NULL, t, mask_vec);
1910 		    }
1911 		  mask_vec = build_vector (mask_type, mask_vec);
1912 		  index = 0;
1913 
1914 		  if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
1915 							      mask_vec))
1916 		    {
1917 		      if (vect_print_dump_info (REPORT_DETAILS))
1918 			{
1919 			  fprintf (vect_dump, "unsupported vect permute ");
1920 			  print_generic_expr (vect_dump, mask_vec, 0);
1921 			}
1922 		      free (mask);
1923 		      return false;
1924 		    }
1925 
1926                   if (!analyze_only)
1927                     {
1928                       if (need_next_vector)
1929                         {
1930                           first_vec_index = second_vec_index;
1931                           second_vec_index = vec_index;
1932                         }
1933 
1934                       next_scalar_stmt = VEC_index (gimple,
1935                                 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1936 
1937                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
1938                                mask_vec, first_vec_index, second_vec_index,
1939 			       gsi, node, builtin_decl, vectype, dr_chain,
1940 			       ncopies, vect_stmts_counter++);
1941                     }
1942                 }
1943             }
1944         }
1945     }
1946 
1947   free (mask);
1948   return true;
1949 }
1950 
1951 
1952 
1953 /* Vectorize SLP instance tree in postorder.  */
1954 
1955 static bool
1956 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1957                             unsigned int vectorization_factor)
1958 {
1959   gimple stmt;
1960   bool strided_store, is_store;
1961   gimple_stmt_iterator si;
1962   stmt_vec_info stmt_info;
1963   unsigned int vec_stmts_size, nunits, group_size;
1964   tree vectype;
1965   int i;
1966   slp_tree loads_node;
1967 
1968   if (!node)
1969     return false;
1970 
1971   vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1972                               vectorization_factor);
1973   vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1974                               vectorization_factor);
1975 
1976   stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1977   stmt_info = vinfo_for_stmt (stmt);
1978 
1979   /* VECTYPE is the type of the destination.  */
1980   vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1981   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1982   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1983 
1984   /* For each SLP instance calculate number of vector stmts to be created
1985      for the scalar stmts in each node of the SLP tree. Number of vector
1986      elements in one vector iteration is the number of scalar elements in
1987      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1988      size.  */
1989   vec_stmts_size = (vectorization_factor * group_size) / nunits;
1990 
1991   /* In case of load permutation we have to allocate vectorized statements for
1992      all the nodes that participate in that permutation.  */
1993   if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1994     {
1995       for (i = 0;
1996            VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1997            i++)
1998         {
1999           if (!SLP_TREE_VEC_STMTS (loads_node))
2000             {
2001               SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2002                                                            vec_stmts_size);
2003               SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2004             }
2005         }
2006     }
2007 
2008   if (!SLP_TREE_VEC_STMTS (node))
2009     {
2010       SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2011       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2012     }
2013 
2014   if (vect_print_dump_info (REPORT_DETAILS))
2015     {
2016       fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2017       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2018     }
2019 
2020   /* Loads should be inserted before the first load.  */
2021   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2022       && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2023       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2024     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2025   else
2026     si = gsi_for_stmt (stmt);
2027 
2028   is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2029   if (is_store)
2030     {
2031       if (DR_GROUP_FIRST_DR (stmt_info))
2032 	/* If IS_STORE is TRUE, the vectorization of the
2033 	   interleaving chain was completed - free all the stores in
2034 	   the chain.  */
2035 	vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2036       else
2037 	/* FORNOW: SLP originates only from strided stores.  */
2038 	gcc_unreachable ();
2039 
2040       return true;
2041     }
2042 
2043   /* FORNOW: SLP originates only from strided stores.  */
2044   return false;
2045 }
2046 
2047 
2048 bool
2049 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2050 {
2051   VEC (slp_instance, heap) *slp_instances;
2052   slp_instance instance;
2053   unsigned int i, vf;
2054   bool is_store = false;
2055 
2056   if (loop_vinfo)
2057     {
2058       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2059       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2060     }
2061   else
2062     {
2063       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2064       vf = 1;
2065     }
2066 
2067   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2068     {
2069       /* Schedule the tree of INSTANCE.  */
2070       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2071                                              instance, vf);
2072       if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2073 	  || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2074 	fprintf (vect_dump, "vectorizing stmts using SLP.");
2075     }
2076 
2077   return is_store;
2078 }
2079 
2080 
2081 /* Vectorize the basic block.  */
2082 
2083 void
2084 vect_slp_transform_bb (basic_block bb)
2085 {
2086   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2087   gimple_stmt_iterator si;
2088 
2089   gcc_assert (bb_vinfo);
2090 
2091   if (vect_print_dump_info (REPORT_DETAILS))
2092     fprintf (vect_dump, "SLPing BB\n");
2093 
2094   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2095     {
2096       gimple stmt = gsi_stmt (si);
2097       stmt_vec_info stmt_info;
2098 
2099       if (vect_print_dump_info (REPORT_DETAILS))
2100         {
2101           fprintf (vect_dump, "------>SLPing statement: ");
2102           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2103         }
2104 
2105       stmt_info = vinfo_for_stmt (stmt);
2106       gcc_assert (stmt_info);
2107 
2108       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
2109       if (STMT_SLP_TYPE (stmt_info))
2110         {
2111           vect_schedule_slp (NULL, bb_vinfo);
2112           break;
2113         }
2114     }
2115 
2116   mark_sym_for_renaming (gimple_vop (cfun));
2117   /* The memory tags and pointers in vectorized statements need to
2118      have their SSA forms updated.  FIXME, why can't this be delayed
2119      until all the loops have been transformed?  */
2120   update_ssa (TODO_update_ssa);
2121 
2122   if (vect_print_dump_info (REPORT_DETAILS))
2123     fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2124 
2125   destroy_bb_vec_info (bb_vinfo);
2126 }
2127 
2128