xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-stmts.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3    Free Software Foundation, Inc.
4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
5    and Ira Rosen <irar@il.ibm.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "toplev.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 
43 
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
45 
46 /* Function vect_mark_relevant.
47 
48    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
49 
50 static void
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 		    enum vect_relevant relevant, bool live_p)
53 {
54   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
57 
58   if (vect_print_dump_info (REPORT_DETAILS))
59     fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
60 
61   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
62     {
63       gimple pattern_stmt;
64 
65       /* This is the last stmt in a sequence that was detected as a
66          pattern that can potentially be vectorized.  Don't mark the stmt
67          as relevant/live because it's not going to be vectorized.
68          Instead mark the pattern-stmt that replaces it.  */
69 
70       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
71 
72       if (vect_print_dump_info (REPORT_DETAILS))
73         fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74       stmt_info = vinfo_for_stmt (pattern_stmt);
75       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
78       stmt = pattern_stmt;
79     }
80 
81   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83     STMT_VINFO_RELEVANT (stmt_info) = relevant;
84 
85   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
87     {
88       if (vect_print_dump_info (REPORT_DETAILS))
89         fprintf (vect_dump, "already marked relevant/live.");
90       return;
91     }
92 
93   VEC_safe_push (gimple, heap, *worklist, stmt);
94 }
95 
96 
97 /* Function vect_stmt_relevant_p.
98 
99    Return true if STMT in loop that is represented by LOOP_VINFO is
100    "relevant for vectorization".
101 
102    A stmt is considered "relevant for vectorization" if:
103    - it has uses outside the loop.
104    - it has vdefs (it alters memory).
105    - control stmts in the loop (except for the exit condition).
106 
107    CHECKME: what other side effects would the vectorizer allow?  */
108 
109 static bool
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 		      enum vect_relevant *relevant, bool *live_p)
112 {
113   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
114   ssa_op_iter op_iter;
115   imm_use_iterator imm_iter;
116   use_operand_p use_p;
117   def_operand_p def_p;
118 
119   *relevant = vect_unused_in_scope;
120   *live_p = false;
121 
122   /* cond stmt other than loop exit cond.  */
123   if (is_ctrl_stmt (stmt)
124       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
125          != loop_exit_ctrl_vec_info_type)
126     *relevant = vect_used_in_scope;
127 
128   /* changing memory.  */
129   if (gimple_code (stmt) != GIMPLE_PHI)
130     if (gimple_vdef (stmt))
131       {
132 	if (vect_print_dump_info (REPORT_DETAILS))
133 	  fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
134 	*relevant = vect_used_in_scope;
135       }
136 
137   /* uses outside the loop.  */
138   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
139     {
140       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
141 	{
142 	  basic_block bb = gimple_bb (USE_STMT (use_p));
143 	  if (!flow_bb_inside_loop_p (loop, bb))
144 	    {
145 	      if (vect_print_dump_info (REPORT_DETAILS))
146 		fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
147 
148 	      if (is_gimple_debug (USE_STMT (use_p)))
149 		continue;
150 
151 	      /* We expect all such uses to be in the loop exit phis
152 		 (because of loop closed form)   */
153 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
154 	      gcc_assert (bb == single_exit (loop)->dest);
155 
156               *live_p = true;
157 	    }
158 	}
159     }
160 
161   return (*live_p || *relevant);
162 }
163 
164 
165 /* Function exist_non_indexing_operands_for_use_p
166 
167    USE is one of the uses attached to STMT. Check if USE is
168    used in STMT for anything other than indexing an array.  */
169 
170 static bool
171 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
172 {
173   tree operand;
174   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
175 
176   /* USE corresponds to some operand in STMT. If there is no data
177      reference in STMT, then any operand that corresponds to USE
178      is not indexing an array.  */
179   if (!STMT_VINFO_DATA_REF (stmt_info))
180     return true;
181 
182   /* STMT has a data_ref. FORNOW this means that its of one of
183      the following forms:
184      -1- ARRAY_REF = var
185      -2- var = ARRAY_REF
186      (This should have been verified in analyze_data_refs).
187 
188      'var' in the second case corresponds to a def, not a use,
189      so USE cannot correspond to any operands that are not used
190      for array indexing.
191 
192      Therefore, all we need to check is if STMT falls into the
193      first case, and whether var corresponds to USE.  */
194 
195   if (!gimple_assign_copy_p (stmt))
196     return false;
197   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
198     return false;
199   operand = gimple_assign_rhs1 (stmt);
200   if (TREE_CODE (operand) != SSA_NAME)
201     return false;
202 
203   if (operand == use)
204     return true;
205 
206   return false;
207 }
208 
209 
210 /*
211    Function process_use.
212 
213    Inputs:
214    - a USE in STMT in a loop represented by LOOP_VINFO
215    - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
216      that defined USE. This is done by calling mark_relevant and passing it
217      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
218 
219    Outputs:
220    Generally, LIVE_P and RELEVANT are used to define the liveness and
221    relevance info of the DEF_STMT of this USE:
222        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
223        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
224    Exceptions:
225    - case 1: If USE is used only for address computations (e.g. array indexing),
226    which does not need to be directly vectorized, then the liveness/relevance
227    of the respective DEF_STMT is left unchanged.
228    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
229    skip DEF_STMT cause it had already been processed.
230    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
231    be modified accordingly.
232 
233    Return true if everything is as expected. Return false otherwise.  */
234 
235 static bool
236 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
237 	     enum vect_relevant relevant, VEC(gimple,heap) **worklist)
238 {
239   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
240   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
241   stmt_vec_info dstmt_vinfo;
242   basic_block bb, def_bb;
243   tree def;
244   gimple def_stmt;
245   enum vect_def_type dt;
246 
247   /* case 1: we are only interested in uses that need to be vectorized.  Uses
248      that are used for address computation are not considered relevant.  */
249   if (!exist_non_indexing_operands_for_use_p (use, stmt))
250      return true;
251 
252   if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
253     {
254       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
255         fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
256       return false;
257     }
258 
259   if (!def_stmt || gimple_nop_p (def_stmt))
260     return true;
261 
262   def_bb = gimple_bb (def_stmt);
263   if (!flow_bb_inside_loop_p (loop, def_bb))
264     {
265       if (vect_print_dump_info (REPORT_DETAILS))
266 	fprintf (vect_dump, "def_stmt is out of loop.");
267       return true;
268     }
269 
270   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
271      DEF_STMT must have already been processed, because this should be the
272      only way that STMT, which is a reduction-phi, was put in the worklist,
273      as there should be no other uses for DEF_STMT in the loop.  So we just
274      check that everything is as expected, and we are done.  */
275   dstmt_vinfo = vinfo_for_stmt (def_stmt);
276   bb = gimple_bb (stmt);
277   if (gimple_code (stmt) == GIMPLE_PHI
278       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
279       && gimple_code (def_stmt) != GIMPLE_PHI
280       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
281       && bb->loop_father == def_bb->loop_father)
282     {
283       if (vect_print_dump_info (REPORT_DETAILS))
284 	fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
285       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
286 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
287       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
288       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
289 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
290       return true;
291     }
292 
293   /* case 3a: outer-loop stmt defining an inner-loop stmt:
294 	outer-loop-header-bb:
295 		d = def_stmt
296 	inner-loop:
297 		stmt # use (d)
298 	outer-loop-tail-bb:
299 		...		  */
300   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
301     {
302       if (vect_print_dump_info (REPORT_DETAILS))
303 	fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
304 
305       switch (relevant)
306 	{
307 	case vect_unused_in_scope:
308 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
309 		      vect_used_in_scope : vect_unused_in_scope;
310 	  break;
311 
312 	case vect_used_in_outer_by_reduction:
313           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
314 	  relevant = vect_used_by_reduction;
315 	  break;
316 
317 	case vect_used_in_outer:
318           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
319 	  relevant = vect_used_in_scope;
320 	  break;
321 
322 	case vect_used_in_scope:
323 	  break;
324 
325 	default:
326 	  gcc_unreachable ();
327 	}
328     }
329 
330   /* case 3b: inner-loop stmt defining an outer-loop stmt:
331 	outer-loop-header-bb:
332 		...
333 	inner-loop:
334 		d = def_stmt
335 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
336 		stmt # use (d)		*/
337   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
338     {
339       if (vect_print_dump_info (REPORT_DETAILS))
340 	fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
341 
342       switch (relevant)
343         {
344         case vect_unused_in_scope:
345           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
346             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
347                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
348           break;
349 
350         case vect_used_by_reduction:
351           relevant = vect_used_in_outer_by_reduction;
352           break;
353 
354         case vect_used_in_scope:
355           relevant = vect_used_in_outer;
356           break;
357 
358         default:
359           gcc_unreachable ();
360         }
361     }
362 
363   vect_mark_relevant (worklist, def_stmt, relevant, live_p);
364   return true;
365 }
366 
367 
368 /* Function vect_mark_stmts_to_be_vectorized.
369 
370    Not all stmts in the loop need to be vectorized. For example:
371 
372      for i...
373        for j...
374    1.    T0 = i + j
375    2.	 T1 = a[T0]
376 
377    3.    j = j + 1
378 
379    Stmt 1 and 3 do not need to be vectorized, because loop control and
380    addressing of vectorized data-refs are handled differently.
381 
382    This pass detects such stmts.  */
383 
384 bool
385 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
386 {
387   VEC(gimple,heap) *worklist;
388   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
389   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
390   unsigned int nbbs = loop->num_nodes;
391   gimple_stmt_iterator si;
392   gimple stmt;
393   unsigned int i;
394   stmt_vec_info stmt_vinfo;
395   basic_block bb;
396   gimple phi;
397   bool live_p;
398   enum vect_relevant relevant, tmp_relevant;
399   enum vect_def_type def_type;
400 
401   if (vect_print_dump_info (REPORT_DETAILS))
402     fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
403 
404   worklist = VEC_alloc (gimple, heap, 64);
405 
406   /* 1. Init worklist.  */
407   for (i = 0; i < nbbs; i++)
408     {
409       bb = bbs[i];
410       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
411 	{
412 	  phi = gsi_stmt (si);
413 	  if (vect_print_dump_info (REPORT_DETAILS))
414 	    {
415 	      fprintf (vect_dump, "init: phi relevant? ");
416 	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
417 	    }
418 
419 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
420 	    vect_mark_relevant (&worklist, phi, relevant, live_p);
421 	}
422       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
423 	{
424 	  stmt = gsi_stmt (si);
425 	  if (vect_print_dump_info (REPORT_DETAILS))
426 	    {
427 	      fprintf (vect_dump, "init: stmt relevant? ");
428 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
429 	    }
430 
431 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
432             vect_mark_relevant (&worklist, stmt, relevant, live_p);
433 	}
434     }
435 
436   /* 2. Process_worklist */
437   while (VEC_length (gimple, worklist) > 0)
438     {
439       use_operand_p use_p;
440       ssa_op_iter iter;
441 
442       stmt = VEC_pop (gimple, worklist);
443       if (vect_print_dump_info (REPORT_DETAILS))
444 	{
445           fprintf (vect_dump, "worklist: examine stmt: ");
446           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
447 	}
448 
449       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
450 	 (DEF_STMT) as relevant/irrelevant and live/dead according to the
451 	 liveness and relevance properties of STMT.  */
452       stmt_vinfo = vinfo_for_stmt (stmt);
453       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
454       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
455 
456       /* Generally, the liveness and relevance properties of STMT are
457 	 propagated as is to the DEF_STMTs of its USEs:
458 	  live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
459 	  relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
460 
461 	 One exception is when STMT has been identified as defining a reduction
462 	 variable; in this case we set the liveness/relevance as follows:
463 	   live_p = false
464 	   relevant = vect_used_by_reduction
465 	 This is because we distinguish between two kinds of relevant stmts -
466 	 those that are used by a reduction computation, and those that are
467 	 (also) used by a regular computation. This allows us later on to
468 	 identify stmts that are used solely by a reduction, and therefore the
469 	 order of the results that they produce does not have to be kept.  */
470 
471       def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
472       tmp_relevant = relevant;
473       switch (def_type)
474         {
475           case vect_reduction_def:
476 	    switch (tmp_relevant)
477 	      {
478 	        case vect_unused_in_scope:
479 	          relevant = vect_used_by_reduction;
480 	          break;
481 
482 	        case vect_used_by_reduction:
483 	          if (gimple_code (stmt) == GIMPLE_PHI)
484                     break;
485   	          /* fall through */
486 
487 	        default:
488 	          if (vect_print_dump_info (REPORT_DETAILS))
489 	            fprintf (vect_dump, "unsupported use of reduction.");
490 
491   	          VEC_free (gimple, heap, worklist);
492 	          return false;
493 	      }
494 
495 	    live_p = false;
496 	    break;
497 
498           case vect_nested_cycle:
499             if (tmp_relevant != vect_unused_in_scope
500                 && tmp_relevant != vect_used_in_outer_by_reduction
501                 && tmp_relevant != vect_used_in_outer)
502               {
503                 if (vect_print_dump_info (REPORT_DETAILS))
504                   fprintf (vect_dump, "unsupported use of nested cycle.");
505 
506                 VEC_free (gimple, heap, worklist);
507                 return false;
508               }
509 
510             live_p = false;
511             break;
512 
513           case vect_double_reduction_def:
514             if (tmp_relevant != vect_unused_in_scope
515                 && tmp_relevant != vect_used_by_reduction)
516               {
517                 if (vect_print_dump_info (REPORT_DETAILS))
518                   fprintf (vect_dump, "unsupported use of double reduction.");
519 
520                 VEC_free (gimple, heap, worklist);
521                 return false;
522               }
523 
524             live_p = false;
525             break;
526 
527           default:
528             break;
529         }
530 
531       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
532 	{
533 	  tree op = USE_FROM_PTR (use_p);
534 	  if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
535 	    {
536 	      VEC_free (gimple, heap, worklist);
537 	      return false;
538 	    }
539 	}
540     } /* while worklist */
541 
542   VEC_free (gimple, heap, worklist);
543   return true;
544 }
545 
546 
547 int
548 cost_for_stmt (gimple stmt)
549 {
550   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
551 
552   switch (STMT_VINFO_TYPE (stmt_info))
553   {
554   case load_vec_info_type:
555     return TARG_SCALAR_LOAD_COST;
556   case store_vec_info_type:
557     return TARG_SCALAR_STORE_COST;
558   case shift_vec_info_type:
559   case op_vec_info_type:
560   case condition_vec_info_type:
561   case assignment_vec_info_type:
562   case reduc_vec_info_type:
563   case induc_vec_info_type:
564   case type_promotion_vec_info_type:
565   case type_demotion_vec_info_type:
566   case type_conversion_vec_info_type:
567   case call_vec_info_type:
568     return TARG_SCALAR_STMT_COST;
569   case undef_vec_info_type:
570   default:
571     gcc_unreachable ();
572   }
573 }
574 
575 /* Function vect_model_simple_cost.
576 
577    Models cost for simple operations, i.e. those that only emit ncopies of a
578    single op.  Right now, this does not account for multiple insns that could
579    be generated for the single vector op.  We will handle that shortly.  */
580 
581 void
582 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
583 			enum vect_def_type *dt, slp_tree slp_node)
584 {
585   int i;
586   int inside_cost = 0, outside_cost = 0;
587 
588   /* The SLP costs were already calculated during SLP tree build.  */
589   if (PURE_SLP_STMT (stmt_info))
590     return;
591 
592   inside_cost = ncopies * TARG_VEC_STMT_COST;
593 
594   /* FORNOW: Assuming maximum 2 args per stmts.  */
595   for (i = 0; i < 2; i++)
596     {
597       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
598 	outside_cost += TARG_SCALAR_TO_VEC_COST;
599     }
600 
601   if (vect_print_dump_info (REPORT_COST))
602     fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
603              "outside_cost = %d .", inside_cost, outside_cost);
604 
605   /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
606   stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
607   stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
608 }
609 
610 
611 /* Function vect_cost_strided_group_size
612 
613    For strided load or store, return the group_size only if it is the first
614    load or store of a group, else return 1.  This ensures that group size is
615    only returned once per group.  */
616 
617 static int
618 vect_cost_strided_group_size (stmt_vec_info stmt_info)
619 {
620   gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
621 
622   if (first_stmt == STMT_VINFO_STMT (stmt_info))
623     return DR_GROUP_SIZE (stmt_info);
624 
625   return 1;
626 }
627 
628 
629 /* Function vect_model_store_cost
630 
631    Models cost for stores.  In the case of strided accesses, one access
632    has the overhead of the strided access attributed to it.  */
633 
634 void
635 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
636 		       enum vect_def_type dt, slp_tree slp_node)
637 {
638   int group_size;
639   int inside_cost = 0, outside_cost = 0;
640 
641   /* The SLP costs were already calculated during SLP tree build.  */
642   if (PURE_SLP_STMT (stmt_info))
643     return;
644 
645   if (dt == vect_constant_def || dt == vect_external_def)
646     outside_cost = TARG_SCALAR_TO_VEC_COST;
647 
648   /* Strided access?  */
649   if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
650     group_size = vect_cost_strided_group_size (stmt_info);
651   /* Not a strided access.  */
652   else
653     group_size = 1;
654 
655   /* Is this an access in a group of stores, which provide strided access?
656      If so, add in the cost of the permutes.  */
657   if (group_size > 1)
658     {
659       /* Uses a high and low interleave operation for each needed permute.  */
660       inside_cost = ncopies * exact_log2(group_size) * group_size
661              * TARG_VEC_STMT_COST;
662 
663       if (vect_print_dump_info (REPORT_COST))
664         fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
665                  group_size);
666 
667     }
668 
669   /* Costs of the stores.  */
670   inside_cost += ncopies * TARG_VEC_STORE_COST;
671 
672   if (vect_print_dump_info (REPORT_COST))
673     fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
674              "outside_cost = %d .", inside_cost, outside_cost);
675 
676   /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
677   stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
678   stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
679 }
680 
681 
682 /* Function vect_model_load_cost
683 
684    Models cost for loads.  In the case of strided accesses, the last access
685    has the overhead of the strided access attributed to it.  Since unaligned
686    accesses are supported for loads, we also account for the costs of the
687    access scheme chosen.  */
688 
689 void
690 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
691 
692 {
693   int group_size;
694   int alignment_support_cheme;
695   gimple first_stmt;
696   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
697   int inside_cost = 0, outside_cost = 0;
698 
699   /* The SLP costs were already calculated during SLP tree build.  */
700   if (PURE_SLP_STMT (stmt_info))
701     return;
702 
703   /* Strided accesses?  */
704   first_stmt = DR_GROUP_FIRST_DR (stmt_info);
705   if (first_stmt && !slp_node)
706     {
707       group_size = vect_cost_strided_group_size (stmt_info);
708       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
709     }
710   /* Not a strided access.  */
711   else
712     {
713       group_size = 1;
714       first_dr = dr;
715     }
716 
717   alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
718 
719   /* Is this an access in a group of loads providing strided access?
720      If so, add in the cost of the permutes.  */
721   if (group_size > 1)
722     {
723       /* Uses an even and odd extract operations for each needed permute.  */
724       inside_cost = ncopies * exact_log2(group_size) * group_size
725 	* TARG_VEC_STMT_COST;
726 
727       if (vect_print_dump_info (REPORT_COST))
728         fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
729                  group_size);
730 
731     }
732 
733   /* The loads themselves.  */
734   switch (alignment_support_cheme)
735     {
736     case dr_aligned:
737       {
738         inside_cost += ncopies * TARG_VEC_LOAD_COST;
739 
740         if (vect_print_dump_info (REPORT_COST))
741           fprintf (vect_dump, "vect_model_load_cost: aligned.");
742 
743         break;
744       }
745     case dr_unaligned_supported:
746       {
747         /* Here, we assign an additional cost for the unaligned load.  */
748         inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
749 
750         if (vect_print_dump_info (REPORT_COST))
751           fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
752                    "hardware.");
753 
754         break;
755       }
756     case dr_explicit_realign:
757       {
758         inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
759 
760         /* FIXME: If the misalignment remains fixed across the iterations of
761            the containing loop, the following cost should be added to the
762            outside costs.  */
763         if (targetm.vectorize.builtin_mask_for_load)
764           inside_cost += TARG_VEC_STMT_COST;
765 
766         break;
767       }
768     case dr_explicit_realign_optimized:
769       {
770         if (vect_print_dump_info (REPORT_COST))
771           fprintf (vect_dump, "vect_model_load_cost: unaligned software "
772                    "pipelined.");
773 
774         /* Unaligned software pipeline has a load of an address, an initial
775            load, and possibly a mask operation to "prime" the loop. However,
776            if this is an access in a group of loads, which provide strided
777            access, then the above cost should only be considered for one
778            access in the group. Inside the loop, there is a load op
779            and a realignment op.  */
780 
781         if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
782           {
783             outside_cost = 2*TARG_VEC_STMT_COST;
784             if (targetm.vectorize.builtin_mask_for_load)
785               outside_cost += TARG_VEC_STMT_COST;
786           }
787 
788         inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
789 
790         break;
791       }
792 
793     default:
794       gcc_unreachable ();
795     }
796 
797   if (vect_print_dump_info (REPORT_COST))
798     fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
799              "outside_cost = %d .", inside_cost, outside_cost);
800 
801   /* Set the costs either in STMT_INFO or SLP_NODE (if exists).  */
802   stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
803   stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
804 }
805 
806 
807 /* Function vect_init_vector.
808 
809    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
810    the vector elements of VECTOR_VAR. Place the initialization at BSI if it
811    is not NULL. Otherwise, place the initialization at the loop preheader.
812    Return the DEF of INIT_STMT.
813    It will be used in the vectorization of STMT.  */
814 
815 tree
816 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
817 		  gimple_stmt_iterator *gsi)
818 {
819   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
820   tree new_var;
821   gimple init_stmt;
822   tree vec_oprnd;
823   edge pe;
824   tree new_temp;
825   basic_block new_bb;
826 
827   new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
828   add_referenced_var (new_var);
829   init_stmt = gimple_build_assign  (new_var, vector_var);
830   new_temp = make_ssa_name (new_var, init_stmt);
831   gimple_assign_set_lhs (init_stmt, new_temp);
832 
833   if (gsi)
834     vect_finish_stmt_generation (stmt, init_stmt, gsi);
835   else
836     {
837       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
838 
839       if (loop_vinfo)
840         {
841           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
842 
843           if (nested_in_vect_loop_p (loop, stmt))
844             loop = loop->inner;
845 
846 	  pe = loop_preheader_edge (loop);
847           new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
848           gcc_assert (!new_bb);
849 	}
850       else
851        {
852           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
853           basic_block bb;
854           gimple_stmt_iterator gsi_bb_start;
855 
856           gcc_assert (bb_vinfo);
857           bb = BB_VINFO_BB (bb_vinfo);
858           gsi_bb_start = gsi_after_labels (bb);
859           gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
860        }
861     }
862 
863   if (vect_print_dump_info (REPORT_DETAILS))
864     {
865       fprintf (vect_dump, "created new init_stmt: ");
866       print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
867     }
868 
869   vec_oprnd = gimple_assign_lhs (init_stmt);
870   return vec_oprnd;
871 }
872 
873 
874 /* Function vect_get_vec_def_for_operand.
875 
876    OP is an operand in STMT. This function returns a (vector) def that will be
877    used in the vectorized stmt for STMT.
878 
879    In the case that OP is an SSA_NAME which is defined in the loop, then
880    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
881 
882    In case OP is an invariant or constant, a new stmt that creates a vector def
883    needs to be introduced.  */
884 
885 tree
886 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
887 {
888   tree vec_oprnd;
889   gimple vec_stmt;
890   gimple def_stmt;
891   stmt_vec_info def_stmt_info = NULL;
892   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
893   unsigned int nunits;
894   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
895   tree vec_inv;
896   tree vec_cst;
897   tree t = NULL_TREE;
898   tree def;
899   int i;
900   enum vect_def_type dt;
901   bool is_simple_use;
902   tree vector_type;
903 
904   if (vect_print_dump_info (REPORT_DETAILS))
905     {
906       fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
907       print_generic_expr (vect_dump, op, TDF_SLIM);
908     }
909 
910   is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
911                                       &dt);
912   gcc_assert (is_simple_use);
913   if (vect_print_dump_info (REPORT_DETAILS))
914     {
915       if (def)
916         {
917           fprintf (vect_dump, "def =  ");
918           print_generic_expr (vect_dump, def, TDF_SLIM);
919         }
920       if (def_stmt)
921         {
922           fprintf (vect_dump, "  def_stmt =  ");
923 	  print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
924         }
925     }
926 
927   switch (dt)
928     {
929     /* Case 1: operand is a constant.  */
930     case vect_constant_def:
931       {
932 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
933 	gcc_assert (vector_type);
934 	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
935 
936 	if (scalar_def)
937 	  *scalar_def = op;
938 
939         /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
940         if (vect_print_dump_info (REPORT_DETAILS))
941           fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
942 
943         for (i = nunits - 1; i >= 0; --i)
944           {
945             t = tree_cons (NULL_TREE, op, t);
946           }
947         vec_cst = build_vector (vector_type, t);
948         return vect_init_vector (stmt, vec_cst, vector_type, NULL);
949       }
950 
951     /* Case 2: operand is defined outside the loop - loop invariant.  */
952     case vect_external_def:
953       {
954 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
955 	gcc_assert (vector_type);
956 	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
957 
958 	if (scalar_def)
959 	  *scalar_def = def;
960 
961         /* Create 'vec_inv = {inv,inv,..,inv}'  */
962         if (vect_print_dump_info (REPORT_DETAILS))
963           fprintf (vect_dump, "Create vector_inv.");
964 
965         for (i = nunits - 1; i >= 0; --i)
966           {
967             t = tree_cons (NULL_TREE, def, t);
968           }
969 
970 	/* FIXME: use build_constructor directly.  */
971         vec_inv = build_constructor_from_list (vector_type, t);
972         return vect_init_vector (stmt, vec_inv, vector_type, NULL);
973       }
974 
975     /* Case 3: operand is defined inside the loop.  */
976     case vect_internal_def:
977       {
978 	if (scalar_def)
979 	  *scalar_def = NULL/* FIXME tuples: def_stmt*/;
980 
981         /* Get the def from the vectorized stmt.  */
982         def_stmt_info = vinfo_for_stmt (def_stmt);
983         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
984         gcc_assert (vec_stmt);
985 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
986 	  vec_oprnd = PHI_RESULT (vec_stmt);
987 	else if (is_gimple_call (vec_stmt))
988 	  vec_oprnd = gimple_call_lhs (vec_stmt);
989 	else
990 	  vec_oprnd = gimple_assign_lhs (vec_stmt);
991         return vec_oprnd;
992       }
993 
994     /* Case 4: operand is defined by a loop header phi - reduction  */
995     case vect_reduction_def:
996     case vect_double_reduction_def:
997     case vect_nested_cycle:
998       {
999 	struct loop *loop;
1000 
1001 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1002 	loop = (gimple_bb (def_stmt))->loop_father;
1003 
1004         /* Get the def before the loop  */
1005         op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1006         return get_initial_def_for_reduction (stmt, op, scalar_def);
1007      }
1008 
1009     /* Case 5: operand is defined by loop-header phi - induction.  */
1010     case vect_induction_def:
1011       {
1012 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1013 
1014         /* Get the def from the vectorized stmt.  */
1015         def_stmt_info = vinfo_for_stmt (def_stmt);
1016         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1017 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1018 	  vec_oprnd = PHI_RESULT (vec_stmt);
1019 	else
1020 	  vec_oprnd = gimple_get_lhs (vec_stmt);
1021         return vec_oprnd;
1022       }
1023 
1024     default:
1025       gcc_unreachable ();
1026     }
1027 }
1028 
1029 
1030 /* Function vect_get_vec_def_for_stmt_copy
1031 
1032    Return a vector-def for an operand. This function is used when the
1033    vectorized stmt to be created (by the caller to this function) is a "copy"
1034    created in case the vectorized result cannot fit in one vector, and several
1035    copies of the vector-stmt are required. In this case the vector-def is
1036    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1037    of the stmt that defines VEC_OPRND.
1038    DT is the type of the vector def VEC_OPRND.
1039 
1040    Context:
1041         In case the vectorization factor (VF) is bigger than the number
1042    of elements that can fit in a vectype (nunits), we have to generate
1043    more than one vector stmt to vectorize the scalar stmt. This situation
1044    arises when there are multiple data-types operated upon in the loop; the
1045    smallest data-type determines the VF, and as a result, when vectorizing
1046    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1047    vector stmt (each computing a vector of 'nunits' results, and together
1048    computing 'VF' results in each iteration).  This function is called when
1049    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1050    which VF=16 and nunits=4, so the number of copies required is 4):
1051 
1052    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1053 
1054    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1055                         VS1.1:  vx.1 = memref1      VS1.2
1056                         VS1.2:  vx.2 = memref2      VS1.3
1057                         VS1.3:  vx.3 = memref3
1058 
1059    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1060                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1061                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1062                         VSnew.3:  vz3 = vx.3 + ...
1063 
1064    The vectorization of S1 is explained in vectorizable_load.
1065    The vectorization of S2:
1066         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1067    the function 'vect_get_vec_def_for_operand' is called to
1068    get the relevant vector-def for each operand of S2. For operand x it
1069    returns  the vector-def 'vx.0'.
1070 
1071         To create the remaining copies of the vector-stmt (VSnew.j), this
1072    function is called to get the relevant vector-def for each operand.  It is
1073    obtained from the respective VS1.j stmt, which is recorded in the
1074    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1075 
1076         For example, to obtain the vector-def 'vx.1' in order to create the
1077    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1078    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1079    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1080    and return its def ('vx.1').
1081    Overall, to create the above sequence this function will be called 3 times:
1082         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1083         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1084         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1085 
1086 tree
1087 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1088 {
1089   gimple vec_stmt_for_operand;
1090   stmt_vec_info def_stmt_info;
1091 
1092   /* Do nothing; can reuse same def.  */
1093   if (dt == vect_external_def || dt == vect_constant_def )
1094     return vec_oprnd;
1095 
1096   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1097   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1098   gcc_assert (def_stmt_info);
1099   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1100   gcc_assert (vec_stmt_for_operand);
1101   vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1102   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1103     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1104   else
1105     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1106   return vec_oprnd;
1107 }
1108 
1109 
1110 /* Get vectorized definitions for the operands to create a copy of an original
1111    stmt. See vect_get_vec_def_for_stmt_copy() for details.  */
1112 
1113 static void
1114 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1115 				 VEC(tree,heap) **vec_oprnds0,
1116 				 VEC(tree,heap) **vec_oprnds1)
1117 {
1118   tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1119 
1120   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1121   VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1122 
1123   if (vec_oprnds1 && *vec_oprnds1)
1124     {
1125       vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1126       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1127       VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1128     }
1129 }
1130 
1131 
1132 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL.  */
1133 
1134 static void
1135 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1136 		   VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1137 		   slp_tree slp_node)
1138 {
1139   if (slp_node)
1140     vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1);
1141   else
1142     {
1143       tree vec_oprnd;
1144 
1145       *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1146       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1147       VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1148 
1149       if (op1)
1150 	{
1151 	  *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1152 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1153 	  VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1154 	}
1155     }
1156 }
1157 
1158 
1159 /* Function vect_finish_stmt_generation.
1160 
1161    Insert a new stmt.  */
1162 
1163 void
1164 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1165 			     gimple_stmt_iterator *gsi)
1166 {
1167   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1168   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1169   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1170 
1171   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1172 
1173   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1174 
1175   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1176                                                    bb_vinfo));
1177 
1178   if (vect_print_dump_info (REPORT_DETAILS))
1179     {
1180       fprintf (vect_dump, "add new stmt: ");
1181       print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1182     }
1183 
1184   gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1185 }
1186 
1187 /* Checks if CALL can be vectorized in type VECTYPE.  Returns
1188    a function declaration if the target has a vectorized version
1189    of the function, or NULL_TREE if the function cannot be vectorized.  */
1190 
1191 tree
1192 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1193 {
1194   tree fndecl = gimple_call_fndecl (call);
1195 
1196   /* We only handle functions that do not read or clobber memory -- i.e.
1197      const or novops ones.  */
1198   if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1199     return NULL_TREE;
1200 
1201   if (!fndecl
1202       || TREE_CODE (fndecl) != FUNCTION_DECL
1203       || !DECL_BUILT_IN (fndecl))
1204     return NULL_TREE;
1205 
1206   return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1207 						        vectype_in);
1208 }
1209 
1210 /* Function vectorizable_call.
1211 
1212    Check if STMT performs a function call that can be vectorized.
1213    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1214    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1215    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1216 
1217 static bool
1218 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1219 {
1220   tree vec_dest;
1221   tree scalar_dest;
1222   tree op, type;
1223   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1224   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1225   tree vectype_out, vectype_in;
1226   int nunits_in;
1227   int nunits_out;
1228   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1229   tree fndecl, new_temp, def, rhs_type, lhs_type;
1230   gimple def_stmt;
1231   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1232   gimple new_stmt = NULL;
1233   int ncopies, j;
1234   VEC(tree, heap) *vargs = NULL;
1235   enum { NARROW, NONE, WIDEN } modifier;
1236   size_t i, nargs;
1237 
1238   /* FORNOW: unsupported in basic block SLP.  */
1239   gcc_assert (loop_vinfo);
1240 
1241   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1242     return false;
1243 
1244   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1245     return false;
1246 
1247   /* FORNOW: SLP not supported.  */
1248   if (STMT_SLP_TYPE (stmt_info))
1249     return false;
1250 
1251   /* Is STMT a vectorizable call?   */
1252   if (!is_gimple_call (stmt))
1253     return false;
1254 
1255   if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1256     return false;
1257 
1258   /* Process function arguments.  */
1259   rhs_type = NULL_TREE;
1260   nargs = gimple_call_num_args (stmt);
1261 
1262   /* Bail out if the function has more than two arguments, we
1263      do not have interesting builtin functions to vectorize with
1264      more than two arguments.  No arguments is also not good.  */
1265   if (nargs == 0 || nargs > 2)
1266     return false;
1267 
1268   for (i = 0; i < nargs; i++)
1269     {
1270       op = gimple_call_arg (stmt, i);
1271 
1272       /* We can only handle calls with arguments of the same type.  */
1273       if (rhs_type
1274 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1275 	{
1276 	  if (vect_print_dump_info (REPORT_DETAILS))
1277 	    fprintf (vect_dump, "argument types differ.");
1278 	  return false;
1279 	}
1280       rhs_type = TREE_TYPE (op);
1281 
1282       if (!vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt[i]))
1283 	{
1284 	  if (vect_print_dump_info (REPORT_DETAILS))
1285 	    fprintf (vect_dump, "use not simple.");
1286 	  return false;
1287 	}
1288     }
1289 
1290   vectype_in = get_vectype_for_scalar_type (rhs_type);
1291   if (!vectype_in)
1292     return false;
1293   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1294 
1295   lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1296   vectype_out = get_vectype_for_scalar_type (lhs_type);
1297   if (!vectype_out)
1298     return false;
1299   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1300 
1301   /* FORNOW */
1302   if (nunits_in == nunits_out / 2)
1303     modifier = NARROW;
1304   else if (nunits_out == nunits_in)
1305     modifier = NONE;
1306   else if (nunits_out == nunits_in / 2)
1307     modifier = WIDEN;
1308   else
1309     return false;
1310 
1311   /* For now, we only vectorize functions if a target specific builtin
1312      is available.  TODO -- in some cases, it might be profitable to
1313      insert the calls for pieces of the vector, in order to be able
1314      to vectorize other operations in the loop.  */
1315   fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1316   if (fndecl == NULL_TREE)
1317     {
1318       if (vect_print_dump_info (REPORT_DETAILS))
1319 	fprintf (vect_dump, "function is not vectorizable.");
1320 
1321       return false;
1322     }
1323 
1324   gcc_assert (!gimple_vuse (stmt));
1325 
1326   if (modifier == NARROW)
1327     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1328   else
1329     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1330 
1331   /* Sanity check: make sure that at least one copy of the vectorized stmt
1332      needs to be generated.  */
1333   gcc_assert (ncopies >= 1);
1334 
1335   if (!vec_stmt) /* transformation not required.  */
1336     {
1337       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1338       if (vect_print_dump_info (REPORT_DETAILS))
1339         fprintf (vect_dump, "=== vectorizable_call ===");
1340       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1341       return true;
1342     }
1343 
1344   /** Transform.  **/
1345 
1346   if (vect_print_dump_info (REPORT_DETAILS))
1347     fprintf (vect_dump, "transform operation.");
1348 
1349   /* Handle def.  */
1350   scalar_dest = gimple_call_lhs (stmt);
1351   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1352 
1353   prev_stmt_info = NULL;
1354   switch (modifier)
1355     {
1356     case NONE:
1357       for (j = 0; j < ncopies; ++j)
1358 	{
1359 	  /* Build argument list for the vectorized call.  */
1360 	  if (j == 0)
1361 	    vargs = VEC_alloc (tree, heap, nargs);
1362 	  else
1363 	    VEC_truncate (tree, vargs, 0);
1364 
1365 	  for (i = 0; i < nargs; i++)
1366 	    {
1367 	      op = gimple_call_arg (stmt, i);
1368 	      if (j == 0)
1369 		vec_oprnd0
1370 		  = vect_get_vec_def_for_operand (op, stmt, NULL);
1371 	      else
1372 		{
1373 		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
1374 		  vec_oprnd0
1375                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1376 		}
1377 
1378 	      VEC_quick_push (tree, vargs, vec_oprnd0);
1379 	    }
1380 
1381 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
1382 	  new_temp = make_ssa_name (vec_dest, new_stmt);
1383 	  gimple_call_set_lhs (new_stmt, new_temp);
1384 
1385 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1386 	  mark_symbols_for_renaming (new_stmt);
1387 
1388 	  if (j == 0)
1389 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1390 	  else
1391 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1392 
1393 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1394 	}
1395 
1396       break;
1397 
1398     case NARROW:
1399       for (j = 0; j < ncopies; ++j)
1400 	{
1401 	  /* Build argument list for the vectorized call.  */
1402 	  if (j == 0)
1403 	    vargs = VEC_alloc (tree, heap, nargs * 2);
1404 	  else
1405 	    VEC_truncate (tree, vargs, 0);
1406 
1407 	  for (i = 0; i < nargs; i++)
1408 	    {
1409 	      op = gimple_call_arg (stmt, i);
1410 	      if (j == 0)
1411 		{
1412 		  vec_oprnd0
1413 		    = vect_get_vec_def_for_operand (op, stmt, NULL);
1414 		  vec_oprnd1
1415 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1416 		}
1417 	      else
1418 		{
1419 		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1420 		  vec_oprnd0
1421 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1422 		  vec_oprnd1
1423 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1424 		}
1425 
1426 	      VEC_quick_push (tree, vargs, vec_oprnd0);
1427 	      VEC_quick_push (tree, vargs, vec_oprnd1);
1428 	    }
1429 
1430 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
1431 	  new_temp = make_ssa_name (vec_dest, new_stmt);
1432 	  gimple_call_set_lhs (new_stmt, new_temp);
1433 
1434 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1435 	  mark_symbols_for_renaming (new_stmt);
1436 
1437 	  if (j == 0)
1438 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1439 	  else
1440 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1441 
1442 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1443 	}
1444 
1445       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1446 
1447       break;
1448 
1449     case WIDEN:
1450       /* No current target implements this case.  */
1451       return false;
1452     }
1453 
1454   VEC_free (tree, heap, vargs);
1455 
1456   /* Update the exception handling table with the vector stmt if necessary.  */
1457   if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1458     gimple_purge_dead_eh_edges (gimple_bb (stmt));
1459 
1460   /* The call in STMT might prevent it from being removed in dce.
1461      We however cannot remove it here, due to the way the ssa name
1462      it defines is mapped to the new definition.  So just replace
1463      rhs of the statement with something harmless.  */
1464 
1465   type = TREE_TYPE (scalar_dest);
1466   new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1467 				  fold_convert (type, integer_zero_node));
1468   set_vinfo_for_stmt (new_stmt, stmt_info);
1469   set_vinfo_for_stmt (stmt, NULL);
1470   STMT_VINFO_STMT (stmt_info) = new_stmt;
1471   gsi_replace (gsi, new_stmt, false);
1472   SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1473 
1474   return true;
1475 }
1476 
1477 
1478 /* Function vect_gen_widened_results_half
1479 
1480    Create a vector stmt whose code, type, number of arguments, and result
1481    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1482    VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1483    In the case that CODE is a CALL_EXPR, this means that a call to DECL
1484    needs to be created (DECL is a function-decl of a target-builtin).
1485    STMT is the original scalar stmt that we are vectorizing.  */
1486 
1487 static gimple
1488 vect_gen_widened_results_half (enum tree_code code,
1489 			       tree decl,
1490                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
1491 			       tree vec_dest, gimple_stmt_iterator *gsi,
1492 			       gimple stmt)
1493 {
1494   gimple new_stmt;
1495   tree new_temp;
1496 
1497   /* Generate half of the widened result:  */
1498   if (code == CALL_EXPR)
1499     {
1500       /* Target specific support  */
1501       if (op_type == binary_op)
1502 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1503       else
1504 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1505       new_temp = make_ssa_name (vec_dest, new_stmt);
1506       gimple_call_set_lhs (new_stmt, new_temp);
1507     }
1508   else
1509     {
1510       /* Generic support */
1511       gcc_assert (op_type == TREE_CODE_LENGTH (code));
1512       if (op_type != binary_op)
1513 	vec_oprnd1 = NULL;
1514       new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1515 					       vec_oprnd1);
1516       new_temp = make_ssa_name (vec_dest, new_stmt);
1517       gimple_assign_set_lhs (new_stmt, new_temp);
1518     }
1519   vect_finish_stmt_generation (stmt, new_stmt, gsi);
1520 
1521   return new_stmt;
1522 }
1523 
1524 
1525 /* Check if STMT performs a conversion operation, that can be vectorized.
1526    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1527    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1528    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1529 
1530 static bool
1531 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1532 			 gimple *vec_stmt, slp_tree slp_node)
1533 {
1534   tree vec_dest;
1535   tree scalar_dest;
1536   tree op0;
1537   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1538   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1539   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1540   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1541   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1542   tree new_temp;
1543   tree def;
1544   gimple def_stmt;
1545   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1546   gimple new_stmt = NULL;
1547   stmt_vec_info prev_stmt_info;
1548   int nunits_in;
1549   int nunits_out;
1550   tree vectype_out, vectype_in;
1551   int ncopies, j;
1552   tree rhs_type, lhs_type;
1553   tree builtin_decl;
1554   enum { NARROW, NONE, WIDEN } modifier;
1555   int i;
1556   VEC(tree,heap) *vec_oprnds0 = NULL;
1557   tree vop0;
1558   tree integral_type;
1559   VEC(tree,heap) *dummy = NULL;
1560   int dummy_int;
1561 
1562   /* Is STMT a vectorizable conversion?   */
1563 
1564   /* FORNOW: unsupported in basic block SLP.  */
1565   gcc_assert (loop_vinfo);
1566 
1567   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1568     return false;
1569 
1570   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1571     return false;
1572 
1573   if (!is_gimple_assign (stmt))
1574     return false;
1575 
1576   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1577     return false;
1578 
1579   code = gimple_assign_rhs_code (stmt);
1580   if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1581     return false;
1582 
1583   /* Check types of lhs and rhs.  */
1584   op0 = gimple_assign_rhs1 (stmt);
1585   rhs_type = TREE_TYPE (op0);
1586   vectype_in = get_vectype_for_scalar_type (rhs_type);
1587   if (!vectype_in)
1588     return false;
1589   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1590 
1591   scalar_dest = gimple_assign_lhs (stmt);
1592   lhs_type = TREE_TYPE (scalar_dest);
1593   vectype_out = get_vectype_for_scalar_type (lhs_type);
1594   if (!vectype_out)
1595     return false;
1596   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1597 
1598   /* FORNOW */
1599   if (nunits_in == nunits_out / 2)
1600     modifier = NARROW;
1601   else if (nunits_out == nunits_in)
1602     modifier = NONE;
1603   else if (nunits_out == nunits_in / 2)
1604     modifier = WIDEN;
1605   else
1606     return false;
1607 
1608   if (modifier == NONE)
1609     gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1610 
1611   /* Bail out if the types are both integral or non-integral.  */
1612   if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1613       || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1614     return false;
1615 
1616   integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1617 
1618   if (modifier == NARROW)
1619     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1620   else
1621     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1622 
1623   /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1624      this, so we can safely override NCOPIES with 1 here.  */
1625   if (slp_node)
1626     ncopies = 1;
1627 
1628   /* Sanity check: make sure that at least one copy of the vectorized stmt
1629      needs to be generated.  */
1630   gcc_assert (ncopies >= 1);
1631 
1632   /* Check the operands of the operation.  */
1633   if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
1634     {
1635       if (vect_print_dump_info (REPORT_DETAILS))
1636 	fprintf (vect_dump, "use not simple.");
1637       return false;
1638     }
1639 
1640   /* Supportable by target?  */
1641   if ((modifier == NONE
1642        && !targetm.vectorize.builtin_conversion (code, integral_type))
1643       || (modifier == WIDEN
1644 	  && !supportable_widening_operation (code, stmt, vectype_in,
1645 					      &decl1, &decl2,
1646 					      &code1, &code2,
1647                                               &dummy_int, &dummy))
1648       || (modifier == NARROW
1649 	  && !supportable_narrowing_operation (code, stmt, vectype_in,
1650 					       &code1, &dummy_int, &dummy)))
1651     {
1652       if (vect_print_dump_info (REPORT_DETAILS))
1653         fprintf (vect_dump, "conversion not supported by target.");
1654       return false;
1655     }
1656 
1657   if (modifier != NONE)
1658     {
1659       STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1660       /* FORNOW: SLP not supported.  */
1661       if (STMT_SLP_TYPE (stmt_info))
1662 	return false;
1663     }
1664 
1665   if (!vec_stmt)		/* transformation not required.  */
1666     {
1667       STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1668       return true;
1669     }
1670 
1671   /** Transform.  **/
1672   if (vect_print_dump_info (REPORT_DETAILS))
1673     fprintf (vect_dump, "transform conversion.");
1674 
1675   /* Handle def.  */
1676   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1677 
1678   if (modifier == NONE && !slp_node)
1679     vec_oprnds0 = VEC_alloc (tree, heap, 1);
1680 
1681   prev_stmt_info = NULL;
1682   switch (modifier)
1683     {
1684     case NONE:
1685       for (j = 0; j < ncopies; j++)
1686 	{
1687 	  if (j == 0)
1688 	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1689 	  else
1690 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1691 
1692 	  builtin_decl =
1693 	    targetm.vectorize.builtin_conversion (code, integral_type);
1694 	  for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1695 	    {
1696 	      /* Arguments are ready. create the new vector stmt.  */
1697 	      new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1698 	      new_temp = make_ssa_name (vec_dest, new_stmt);
1699 	      gimple_call_set_lhs (new_stmt, new_temp);
1700 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
1701 	      if (slp_node)
1702 		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1703 	    }
1704 
1705 	  if (j == 0)
1706 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1707 	  else
1708 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1709 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1710 	}
1711       break;
1712 
1713     case WIDEN:
1714       /* In case the vectorization factor (VF) is bigger than the number
1715 	 of elements that we can fit in a vectype (nunits), we have to
1716 	 generate more than one vector stmt - i.e - we need to "unroll"
1717 	 the vector stmt by a factor VF/nunits.  */
1718       for (j = 0; j < ncopies; j++)
1719 	{
1720 	  if (j == 0)
1721 	    vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1722 	  else
1723 	    vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1724 
1725 	  STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1726 
1727 	  /* Generate first half of the widened result:  */
1728 	  new_stmt
1729 	    = vect_gen_widened_results_half (code1, decl1,
1730 					     vec_oprnd0, vec_oprnd1,
1731 					     unary_op, vec_dest, gsi, stmt);
1732 	  if (j == 0)
1733 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1734 	  else
1735 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1736 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1737 
1738 	  /* Generate second half of the widened result:  */
1739 	  new_stmt
1740 	    = vect_gen_widened_results_half (code2, decl2,
1741 					     vec_oprnd0, vec_oprnd1,
1742 					     unary_op, vec_dest, gsi, stmt);
1743 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1744 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1745 	}
1746       break;
1747 
1748     case NARROW:
1749       /* In case the vectorization factor (VF) is bigger than the number
1750 	 of elements that we can fit in a vectype (nunits), we have to
1751 	 generate more than one vector stmt - i.e - we need to "unroll"
1752 	 the vector stmt by a factor VF/nunits.  */
1753       for (j = 0; j < ncopies; j++)
1754 	{
1755 	  /* Handle uses.  */
1756 	  if (j == 0)
1757 	    {
1758 	      vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1759 	      vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1760 	    }
1761 	  else
1762 	    {
1763 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1764 	      vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1765 	    }
1766 
1767 	  /* Arguments are ready. Create the new vector stmt.  */
1768 	  new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1769 						   vec_oprnd1);
1770 	  new_temp = make_ssa_name (vec_dest, new_stmt);
1771 	  gimple_assign_set_lhs (new_stmt, new_temp);
1772 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1773 
1774 	  if (j == 0)
1775 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1776 	  else
1777 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1778 
1779 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1780 	}
1781 
1782       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1783     }
1784 
1785   if (vec_oprnds0)
1786     VEC_free (tree, heap, vec_oprnds0);
1787 
1788   return true;
1789 }
1790 /* Function vectorizable_assignment.
1791 
1792    Check if STMT performs an assignment (copy) that can be vectorized.
1793    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1794    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1795    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1796 
1797 static bool
1798 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1799 			 gimple *vec_stmt, slp_tree slp_node)
1800 {
1801   tree vec_dest;
1802   tree scalar_dest;
1803   tree op;
1804   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1805   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1806   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1807   tree new_temp;
1808   tree def;
1809   gimple def_stmt;
1810   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1811   unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1812   int ncopies;
1813   int i, j;
1814   VEC(tree,heap) *vec_oprnds = NULL;
1815   tree vop;
1816   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1817   gimple new_stmt = NULL;
1818   stmt_vec_info prev_stmt_info = NULL;
1819   enum tree_code code;
1820   tree vectype_in, vectype_out;
1821 
1822   /* Multiple types in SLP are handled by creating the appropriate number of
1823      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1824      case of SLP.  */
1825   if (slp_node)
1826     ncopies = 1;
1827   else
1828     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1829 
1830   gcc_assert (ncopies >= 1);
1831 
1832   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1833     return false;
1834 
1835   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1836     return false;
1837 
1838   /* Is vectorizable assignment?  */
1839   if (!is_gimple_assign (stmt))
1840     return false;
1841 
1842   scalar_dest = gimple_assign_lhs (stmt);
1843   if (TREE_CODE (scalar_dest) != SSA_NAME)
1844     return false;
1845 
1846   code = gimple_assign_rhs_code (stmt);
1847   if (gimple_assign_single_p (stmt)
1848       || code == PAREN_EXPR
1849       || CONVERT_EXPR_CODE_P (code))
1850     op = gimple_assign_rhs1 (stmt);
1851   else
1852     return false;
1853 
1854   if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
1855     {
1856       if (vect_print_dump_info (REPORT_DETAILS))
1857         fprintf (vect_dump, "use not simple.");
1858       return false;
1859     }
1860 
1861   /* We can handle NOP_EXPR conversions that do not change the number
1862      of elements or the vector size.  */
1863   vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
1864   vectype_out
1865     = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1866   if (CONVERT_EXPR_CODE_P (code)
1867       && (!vectype_in
1868 	  || !vectype_out
1869 	  || (TYPE_VECTOR_SUBPARTS (vectype_out)
1870 	      != TYPE_VECTOR_SUBPARTS (vectype_in))
1871 	  || (GET_MODE_SIZE (TYPE_MODE (vectype_out))
1872 	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
1873     return false;
1874 
1875   if (!vec_stmt) /* transformation not required.  */
1876     {
1877       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1878       if (vect_print_dump_info (REPORT_DETAILS))
1879         fprintf (vect_dump, "=== vectorizable_assignment ===");
1880       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1881       return true;
1882     }
1883 
1884   /** Transform.  **/
1885   if (vect_print_dump_info (REPORT_DETAILS))
1886     fprintf (vect_dump, "transform assignment.");
1887 
1888   /* Handle def.  */
1889   vec_dest = vect_create_destination_var (scalar_dest, vectype);
1890 
1891   /* Handle use.  */
1892   for (j = 0; j < ncopies; j++)
1893     {
1894       /* Handle uses.  */
1895       if (j == 0)
1896         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1897       else
1898         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
1899 
1900       /* Arguments are ready. create the new vector stmt.  */
1901       for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1902        {
1903 	 if (CONVERT_EXPR_CODE_P (code))
1904 	   vop = build1 (VIEW_CONVERT_EXPR, vectype_out, vop);
1905          new_stmt = gimple_build_assign (vec_dest, vop);
1906          new_temp = make_ssa_name (vec_dest, new_stmt);
1907          gimple_assign_set_lhs (new_stmt, new_temp);
1908          vect_finish_stmt_generation (stmt, new_stmt, gsi);
1909          if (slp_node)
1910            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1911        }
1912 
1913       if (slp_node)
1914         continue;
1915 
1916       if (j == 0)
1917         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1918       else
1919         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1920 
1921       prev_stmt_info = vinfo_for_stmt (new_stmt);
1922     }
1923 
1924   VEC_free (tree, heap, vec_oprnds);
1925   return true;
1926 }
1927 
1928 
1929 /* Function vectorizable_shift.
1930 
1931    Check if STMT performs a shift operation that can be vectorized.
1932    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1933    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1934    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1935 
1936 static bool
1937 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
1938                     gimple *vec_stmt, slp_tree slp_node)
1939 {
1940   tree vec_dest;
1941   tree scalar_dest;
1942   tree op0, op1 = NULL;
1943   tree vec_oprnd1 = NULL_TREE;
1944   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1946   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1947   enum tree_code code;
1948   enum machine_mode vec_mode;
1949   tree new_temp;
1950   int op_type;
1951   optab optab;
1952   int icode;
1953   enum machine_mode optab_op2_mode;
1954   tree def;
1955   gimple def_stmt;
1956   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1957   gimple new_stmt = NULL;
1958   stmt_vec_info prev_stmt_info;
1959   int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1960   int nunits_out;
1961   tree vectype_out;
1962   int ncopies;
1963   int j, i;
1964   VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1965   tree vop0, vop1;
1966   unsigned int k;
1967   bool scalar_shift_arg = false;
1968   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1969   int vf;
1970 
1971   if (loop_vinfo)
1972     vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1973   else
1974     vf = 1;
1975 
1976   /* Multiple types in SLP are handled by creating the appropriate number of
1977      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1978      case of SLP.  */
1979   if (slp_node)
1980     ncopies = 1;
1981   else
1982     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1983 
1984   gcc_assert (ncopies >= 1);
1985 
1986   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1987     return false;
1988 
1989   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1990     return false;
1991 
1992   /* Is STMT a vectorizable shift?  */
1993   if (!is_gimple_assign (stmt))
1994     return false;
1995 
1996   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1997     return false;
1998 
1999   scalar_dest = gimple_assign_lhs (stmt);
2000   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2001   if (!vectype_out)
2002     return false;
2003   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2004   if (nunits_out != nunits_in)
2005     return false;
2006 
2007   code = gimple_assign_rhs_code (stmt);
2008 
2009   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2010         || code == RROTATE_EXPR))
2011     return false;
2012 
2013   op_type = TREE_CODE_LENGTH (code);
2014   op0 = gimple_assign_rhs1 (stmt);
2015   if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
2016     {
2017       if (vect_print_dump_info (REPORT_DETAILS))
2018         fprintf (vect_dump, "use not simple.");
2019       return false;
2020     }
2021 
2022   op1 = gimple_assign_rhs2 (stmt);
2023   if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2024     {
2025       if (vect_print_dump_info (REPORT_DETAILS))
2026         fprintf (vect_dump, "use not simple.");
2027       return false;
2028     }
2029 
2030   /* Determine whether the shift amount is a vector, or scalar.  If the
2031      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
2032   /* vector shifted by vector */
2033   if (dt[1] == vect_internal_def)
2034     {
2035       optab = optab_for_tree_code (code, vectype, optab_vector);
2036       if (vect_print_dump_info (REPORT_DETAILS))
2037         fprintf (vect_dump, "vector/vector shift/rotate found.");
2038     }
2039 
2040   /* See if the machine has a vector shifted by scalar insn and if not
2041      then see if it has a vector shifted by vector insn */
2042   else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2043     {
2044       optab = optab_for_tree_code (code, vectype, optab_scalar);
2045       if (optab
2046           && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2047               != CODE_FOR_nothing))
2048         {
2049           scalar_shift_arg = true;
2050           if (vect_print_dump_info (REPORT_DETAILS))
2051             fprintf (vect_dump, "vector/scalar shift/rotate found.");
2052         }
2053       else
2054         {
2055           optab = optab_for_tree_code (code, vectype, optab_vector);
2056           if (optab
2057               && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2058                   != CODE_FOR_nothing))
2059             {
2060               if (vect_print_dump_info (REPORT_DETAILS))
2061                 fprintf (vect_dump, "vector/vector shift/rotate found.");
2062 
2063               /* Unlike the other binary operators, shifts/rotates have
2064                  the rhs being int, instead of the same type as the lhs,
2065                  so make sure the scalar is the right type if we are
2066                  dealing with vectors of short/char.  */
2067               if (dt[1] == vect_constant_def)
2068                 op1 = fold_convert (TREE_TYPE (vectype), op1);
2069             }
2070         }
2071     }
2072 
2073   else
2074     {
2075       if (vect_print_dump_info (REPORT_DETAILS))
2076         fprintf (vect_dump, "operand mode requires invariant argument.");
2077       return false;
2078     }
2079 
2080   /* Supportable by target?  */
2081   if (!optab)
2082     {
2083       if (vect_print_dump_info (REPORT_DETAILS))
2084         fprintf (vect_dump, "no optab.");
2085       return false;
2086     }
2087   vec_mode = TYPE_MODE (vectype);
2088   icode = (int) optab_handler (optab, vec_mode)->insn_code;
2089   if (icode == CODE_FOR_nothing)
2090     {
2091       if (vect_print_dump_info (REPORT_DETAILS))
2092         fprintf (vect_dump, "op not supported by target.");
2093       /* Check only during analysis.  */
2094       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2095           || (vf < vect_min_worthwhile_factor (code)
2096               && !vec_stmt))
2097         return false;
2098       if (vect_print_dump_info (REPORT_DETAILS))
2099         fprintf (vect_dump, "proceeding using word mode.");
2100     }
2101 
2102   /* Worthwhile without SIMD support? Check only during analysis.  */
2103   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2104       && vf < vect_min_worthwhile_factor (code)
2105       && !vec_stmt)
2106     {
2107       if (vect_print_dump_info (REPORT_DETAILS))
2108         fprintf (vect_dump, "not worthwhile without SIMD support.");
2109       return false;
2110     }
2111 
2112   if (!vec_stmt) /* transformation not required.  */
2113     {
2114       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2115       if (vect_print_dump_info (REPORT_DETAILS))
2116         fprintf (vect_dump, "=== vectorizable_shift ===");
2117       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2118       return true;
2119     }
2120 
2121   /** Transform.  **/
2122 
2123   if (vect_print_dump_info (REPORT_DETAILS))
2124     fprintf (vect_dump, "transform shift.");
2125 
2126   /* Handle def.  */
2127   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2128 
2129   /* Allocate VECs for vector operands. In case of SLP, vector operands are
2130      created in the previous stages of the recursion, so no allocation is
2131      needed, except for the case of shift with scalar shift argument. In that
2132      case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2133      be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2134      In case of loop-based vectorization we allocate VECs of size 1. We
2135      allocate VEC_OPRNDS1 only in case of binary operation.  */
2136   if (!slp_node)
2137     {
2138       vec_oprnds0 = VEC_alloc (tree, heap, 1);
2139       vec_oprnds1 = VEC_alloc (tree, heap, 1);
2140     }
2141   else if (scalar_shift_arg)
2142     vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2143 
2144   prev_stmt_info = NULL;
2145   for (j = 0; j < ncopies; j++)
2146     {
2147       /* Handle uses.  */
2148       if (j == 0)
2149         {
2150           if (scalar_shift_arg)
2151             {
2152               /* Vector shl and shr insn patterns can be defined with scalar
2153                  operand 2 (shift operand). In this case, use constant or loop
2154                  invariant op1 directly, without extending it to vector mode
2155                  first.  */
2156               optab_op2_mode = insn_data[icode].operand[2].mode;
2157               if (!VECTOR_MODE_P (optab_op2_mode))
2158                 {
2159                   if (vect_print_dump_info (REPORT_DETAILS))
2160                     fprintf (vect_dump, "operand 1 using scalar mode.");
2161                   vec_oprnd1 = op1;
2162                   VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2163                   if (slp_node)
2164                     {
2165                       /* Store vec_oprnd1 for every vector stmt to be created
2166                          for SLP_NODE. We check during the analysis that all the
2167                          shift arguments are the same.
2168                          TODO: Allow different constants for different vector
2169                          stmts generated for an SLP instance.  */
2170                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2171                         VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2172                     }
2173                 }
2174             }
2175 
2176           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2177              (a special case for certain kind of vector shifts); otherwise,
2178              operand 1 should be of a vector type (the usual case).  */
2179           if (vec_oprnd1)
2180             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2181                                slp_node);
2182           else
2183             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2184                                slp_node);
2185         }
2186       else
2187         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2188 
2189       /* Arguments are ready. Create the new vector stmt.  */
2190       for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2191         {
2192           vop1 = VEC_index (tree, vec_oprnds1, i);
2193           new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2194           new_temp = make_ssa_name (vec_dest, new_stmt);
2195           gimple_assign_set_lhs (new_stmt, new_temp);
2196           vect_finish_stmt_generation (stmt, new_stmt, gsi);
2197           if (slp_node)
2198             VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2199         }
2200 
2201       if (slp_node)
2202         continue;
2203 
2204       if (j == 0)
2205         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2206       else
2207         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2208       prev_stmt_info = vinfo_for_stmt (new_stmt);
2209     }
2210 
2211   VEC_free (tree, heap, vec_oprnds0);
2212   VEC_free (tree, heap, vec_oprnds1);
2213 
2214   return true;
2215 }
2216 
2217 
2218 /* Function vectorizable_operation.
2219 
2220    Check if STMT performs a binary or unary operation that can be vectorized.
2221    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2222    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2223    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2224 
2225 static bool
2226 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2227 			gimple *vec_stmt, slp_tree slp_node)
2228 {
2229   tree vec_dest;
2230   tree scalar_dest;
2231   tree op0, op1 = NULL;
2232   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2233   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2234   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2235   enum tree_code code;
2236   enum machine_mode vec_mode;
2237   tree new_temp;
2238   int op_type;
2239   optab optab;
2240   int icode;
2241   tree def;
2242   gimple def_stmt;
2243   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2244   gimple new_stmt = NULL;
2245   stmt_vec_info prev_stmt_info;
2246   int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2247   int nunits_out;
2248   tree vectype_out;
2249   int ncopies;
2250   int j, i;
2251   VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2252   tree vop0, vop1;
2253   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2254   int vf;
2255 
2256   if (loop_vinfo)
2257     vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2258   else
2259     vf = 1;
2260 
2261   /* Multiple types in SLP are handled by creating the appropriate number of
2262      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2263      case of SLP.  */
2264   if (slp_node)
2265     ncopies = 1;
2266   else
2267     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2268 
2269   gcc_assert (ncopies >= 1);
2270 
2271   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2272     return false;
2273 
2274   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2275     return false;
2276 
2277   /* Is STMT a vectorizable binary/unary operation?   */
2278   if (!is_gimple_assign (stmt))
2279     return false;
2280 
2281   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2282     return false;
2283 
2284   scalar_dest = gimple_assign_lhs (stmt);
2285   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2286   if (!vectype_out)
2287     return false;
2288   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2289   if (nunits_out != nunits_in)
2290     return false;
2291 
2292   code = gimple_assign_rhs_code (stmt);
2293 
2294   /* For pointer addition, we should use the normal plus for
2295      the vector addition.  */
2296   if (code == POINTER_PLUS_EXPR)
2297     code = PLUS_EXPR;
2298 
2299   /* Support only unary or binary operations.  */
2300   op_type = TREE_CODE_LENGTH (code);
2301   if (op_type != unary_op && op_type != binary_op)
2302     {
2303       if (vect_print_dump_info (REPORT_DETAILS))
2304 	fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2305       return false;
2306     }
2307 
2308   op0 = gimple_assign_rhs1 (stmt);
2309   if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
2310     {
2311       if (vect_print_dump_info (REPORT_DETAILS))
2312         fprintf (vect_dump, "use not simple.");
2313       return false;
2314     }
2315 
2316   if (op_type == binary_op)
2317     {
2318       op1 = gimple_assign_rhs2 (stmt);
2319       if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2320                                &dt[1]))
2321 	{
2322 	  if (vect_print_dump_info (REPORT_DETAILS))
2323 	    fprintf (vect_dump, "use not simple.");
2324 	  return false;
2325 	}
2326     }
2327 
2328   /* Shifts are handled in vectorizable_shift ().  */
2329   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2330       || code == RROTATE_EXPR)
2331     return false;
2332 
2333   optab = optab_for_tree_code (code, vectype, optab_default);
2334 
2335   /* Supportable by target?  */
2336   if (!optab)
2337     {
2338       if (vect_print_dump_info (REPORT_DETAILS))
2339 	fprintf (vect_dump, "no optab.");
2340       return false;
2341     }
2342   vec_mode = TYPE_MODE (vectype);
2343   icode = (int) optab_handler (optab, vec_mode)->insn_code;
2344   if (icode == CODE_FOR_nothing)
2345     {
2346       if (vect_print_dump_info (REPORT_DETAILS))
2347 	fprintf (vect_dump, "op not supported by target.");
2348       /* Check only during analysis.  */
2349       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2350 	  || (vf < vect_min_worthwhile_factor (code)
2351               && !vec_stmt))
2352         return false;
2353       if (vect_print_dump_info (REPORT_DETAILS))
2354 	fprintf (vect_dump, "proceeding using word mode.");
2355     }
2356 
2357   /* Worthwhile without SIMD support? Check only during analysis.  */
2358   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2359       && vf < vect_min_worthwhile_factor (code)
2360       && !vec_stmt)
2361     {
2362       if (vect_print_dump_info (REPORT_DETAILS))
2363 	fprintf (vect_dump, "not worthwhile without SIMD support.");
2364       return false;
2365     }
2366 
2367   if (!vec_stmt) /* transformation not required.  */
2368     {
2369       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2370       if (vect_print_dump_info (REPORT_DETAILS))
2371         fprintf (vect_dump, "=== vectorizable_operation ===");
2372       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2373       return true;
2374     }
2375 
2376   /** Transform.  **/
2377 
2378   if (vect_print_dump_info (REPORT_DETAILS))
2379     fprintf (vect_dump, "transform binary/unary operation.");
2380 
2381   /* Handle def.  */
2382   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2383 
2384   /* Allocate VECs for vector operands. In case of SLP, vector operands are
2385      created in the previous stages of the recursion, so no allocation is
2386      needed, except for the case of shift with scalar shift argument. In that
2387      case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2388      be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2389      In case of loop-based vectorization we allocate VECs of size 1. We
2390      allocate VEC_OPRNDS1 only in case of binary operation.  */
2391   if (!slp_node)
2392     {
2393       vec_oprnds0 = VEC_alloc (tree, heap, 1);
2394       if (op_type == binary_op)
2395         vec_oprnds1 = VEC_alloc (tree, heap, 1);
2396     }
2397 
2398   /* In case the vectorization factor (VF) is bigger than the number
2399      of elements that we can fit in a vectype (nunits), we have to generate
2400      more than one vector stmt - i.e - we need to "unroll" the
2401      vector stmt by a factor VF/nunits. In doing so, we record a pointer
2402      from one copy of the vector stmt to the next, in the field
2403      STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2404      stages to find the correct vector defs to be used when vectorizing
2405      stmts that use the defs of the current stmt. The example below illustrates
2406      the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2407      4 vectorized stmts):
2408 
2409      before vectorization:
2410                                 RELATED_STMT    VEC_STMT
2411         S1:     x = memref      -               -
2412         S2:     z = x + 1       -               -
2413 
2414      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2415              there):
2416                                 RELATED_STMT    VEC_STMT
2417         VS1_0:  vx0 = memref0   VS1_1           -
2418         VS1_1:  vx1 = memref1   VS1_2           -
2419         VS1_2:  vx2 = memref2   VS1_3           -
2420         VS1_3:  vx3 = memref3   -               -
2421         S1:     x = load        -               VS1_0
2422         S2:     z = x + 1       -               -
2423 
2424      step2: vectorize stmt S2 (done here):
2425         To vectorize stmt S2 we first need to find the relevant vector
2426         def for the first operand 'x'. This is, as usual, obtained from
2427         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2428         that defines 'x' (S1). This way we find the stmt VS1_0, and the
2429         relevant vector def 'vx0'. Having found 'vx0' we can generate
2430         the vector stmt VS2_0, and as usual, record it in the
2431         STMT_VINFO_VEC_STMT of stmt S2.
2432         When creating the second copy (VS2_1), we obtain the relevant vector
2433         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2434         stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2435         vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2436         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2437         Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2438         chain of stmts and pointers:
2439                                 RELATED_STMT    VEC_STMT
2440         VS1_0:  vx0 = memref0   VS1_1           -
2441         VS1_1:  vx1 = memref1   VS1_2           -
2442         VS1_2:  vx2 = memref2   VS1_3           -
2443         VS1_3:  vx3 = memref3   -               -
2444         S1:     x = load        -               VS1_0
2445         VS2_0:  vz0 = vx0 + v1  VS2_1           -
2446         VS2_1:  vz1 = vx1 + v1  VS2_2           -
2447         VS2_2:  vz2 = vx2 + v1  VS2_3           -
2448         VS2_3:  vz3 = vx3 + v1  -               -
2449         S2:     z = x + 1       -               VS2_0  */
2450 
2451   prev_stmt_info = NULL;
2452   for (j = 0; j < ncopies; j++)
2453     {
2454       /* Handle uses.  */
2455       if (j == 0)
2456 	{
2457 	  if (op_type == binary_op)
2458 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2459 			       slp_node);
2460 	  else
2461 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2462 			       slp_node);
2463 	}
2464       else
2465 	vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2466 
2467       /* Arguments are ready. Create the new vector stmt.  */
2468       for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2469         {
2470 	  vop1 = ((op_type == binary_op)
2471 		  ? VEC_index (tree, vec_oprnds1, i) : NULL);
2472 	  new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2473 	  new_temp = make_ssa_name (vec_dest, new_stmt);
2474 	  gimple_assign_set_lhs (new_stmt, new_temp);
2475 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2476           if (slp_node)
2477 	    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2478         }
2479 
2480       if (slp_node)
2481         continue;
2482 
2483       if (j == 0)
2484 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2485       else
2486 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2487       prev_stmt_info = vinfo_for_stmt (new_stmt);
2488     }
2489 
2490   VEC_free (tree, heap, vec_oprnds0);
2491   if (vec_oprnds1)
2492     VEC_free (tree, heap, vec_oprnds1);
2493 
2494   return true;
2495 }
2496 
2497 
2498 /* Get vectorized definitions for loop-based vectorization. For the first
2499    operand we call vect_get_vec_def_for_operand() (with OPRND containing
2500    scalar operand), and for the rest we get a copy with
2501    vect_get_vec_def_for_stmt_copy() using the previous vector definition
2502    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2503    The vectors are collected into VEC_OPRNDS.  */
2504 
2505 static void
2506 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2507                           VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2508 {
2509   tree vec_oprnd;
2510 
2511   /* Get first vector operand.  */
2512   /* All the vector operands except the very first one (that is scalar oprnd)
2513      are stmt copies.  */
2514   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2515     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2516   else
2517     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2518 
2519   VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2520 
2521   /* Get second vector operand.  */
2522   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2523   VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2524 
2525   *oprnd = vec_oprnd;
2526 
2527   /* For conversion in multiple steps, continue to get operands
2528      recursively.  */
2529   if (multi_step_cvt)
2530     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
2531 }
2532 
2533 
2534 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2535    For multi-step conversions store the resulting vectors and call the function
2536    recursively.  */
2537 
2538 static void
2539 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2540                                        int multi_step_cvt, gimple stmt,
2541                                        VEC (tree, heap) *vec_dsts,
2542                                        gimple_stmt_iterator *gsi,
2543                                        slp_tree slp_node, enum tree_code code,
2544                                        stmt_vec_info *prev_stmt_info)
2545 {
2546   unsigned int i;
2547   tree vop0, vop1, new_tmp, vec_dest;
2548   gimple new_stmt;
2549   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2550 
2551   vec_dest = VEC_pop (tree, vec_dsts);
2552 
2553   for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2554     {
2555       /* Create demotion operation.  */
2556       vop0 = VEC_index (tree, *vec_oprnds, i);
2557       vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2558       new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2559       new_tmp = make_ssa_name (vec_dest, new_stmt);
2560       gimple_assign_set_lhs (new_stmt, new_tmp);
2561       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2562 
2563       if (multi_step_cvt)
2564         /* Store the resulting vector for next recursive call.  */
2565         VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2566       else
2567         {
2568           /* This is the last step of the conversion sequence. Store the
2569              vectors in SLP_NODE or in vector info of the scalar statement
2570              (or in STMT_VINFO_RELATED_STMT chain).  */
2571           if (slp_node)
2572             VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2573           else
2574             {
2575               if (!*prev_stmt_info)
2576                 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2577               else
2578                 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2579 
2580               *prev_stmt_info = vinfo_for_stmt (new_stmt);
2581             }
2582         }
2583     }
2584 
2585   /* For multi-step demotion operations we first generate demotion operations
2586      from the source type to the intermediate types, and then combine the
2587      results (stored in VEC_OPRNDS) in demotion operation to the destination
2588      type.  */
2589   if (multi_step_cvt)
2590     {
2591       /* At each level of recursion we have have of the operands we had at the
2592          previous level.  */
2593       VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2594       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2595                                              stmt, vec_dsts, gsi, slp_node,
2596                                              code, prev_stmt_info);
2597     }
2598 }
2599 
2600 
2601 /* Function vectorizable_type_demotion
2602 
2603    Check if STMT performs a binary or unary operation that involves
2604    type demotion, and if it can be vectorized.
2605    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2606    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2607    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2608 
2609 static bool
2610 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2611 			    gimple *vec_stmt, slp_tree slp_node)
2612 {
2613   tree vec_dest;
2614   tree scalar_dest;
2615   tree op0;
2616   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2617   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2618   enum tree_code code, code1 = ERROR_MARK;
2619   tree def;
2620   gimple def_stmt;
2621   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2622   stmt_vec_info prev_stmt_info;
2623   int nunits_in;
2624   int nunits_out;
2625   tree vectype_out;
2626   int ncopies;
2627   int j, i;
2628   tree vectype_in;
2629   int multi_step_cvt = 0;
2630   VEC (tree, heap) *vec_oprnds0 = NULL;
2631   VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2632   tree last_oprnd, intermediate_type;
2633 
2634   /* FORNOW: not supported by basic block SLP vectorization.  */
2635   gcc_assert (loop_vinfo);
2636 
2637   if (!STMT_VINFO_RELEVANT_P (stmt_info))
2638     return false;
2639 
2640   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2641     return false;
2642 
2643   /* Is STMT a vectorizable type-demotion operation?  */
2644   if (!is_gimple_assign (stmt))
2645     return false;
2646 
2647   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2648     return false;
2649 
2650   code = gimple_assign_rhs_code (stmt);
2651   if (!CONVERT_EXPR_CODE_P (code))
2652     return false;
2653 
2654   op0 = gimple_assign_rhs1 (stmt);
2655   vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2656   if (!vectype_in)
2657     return false;
2658   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2659 
2660   scalar_dest = gimple_assign_lhs (stmt);
2661   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2662   if (!vectype_out)
2663     return false;
2664   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2665   if (nunits_in >= nunits_out)
2666     return false;
2667 
2668   /* Multiple types in SLP are handled by creating the appropriate number of
2669      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2670      case of SLP.  */
2671   if (slp_node)
2672     ncopies = 1;
2673   else
2674     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2675   gcc_assert (ncopies >= 1);
2676 
2677   if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2678 	  && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2679 	 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2680 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2681 	     && CONVERT_EXPR_CODE_P (code))))
2682     return false;
2683 
2684   /* Check the operands of the operation.  */
2685   if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2686     {
2687       if (vect_print_dump_info (REPORT_DETAILS))
2688         fprintf (vect_dump, "use not simple.");
2689       return false;
2690     }
2691 
2692   /* Supportable by target?  */
2693   if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2694                                         &multi_step_cvt, &interm_types))
2695     return false;
2696 
2697   STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2698 
2699   if (!vec_stmt) /* transformation not required.  */
2700     {
2701       STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2702       if (vect_print_dump_info (REPORT_DETAILS))
2703         fprintf (vect_dump, "=== vectorizable_demotion ===");
2704       vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2705       return true;
2706     }
2707 
2708   /** Transform.  **/
2709   if (vect_print_dump_info (REPORT_DETAILS))
2710     fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2711 	     ncopies);
2712 
2713   /* In case of multi-step demotion, we first generate demotion operations to
2714      the intermediate types, and then from that types to the final one.
2715      We create vector destinations for the intermediate type (TYPES) received
2716      from supportable_narrowing_operation, and store them in the correct order
2717      for future use in vect_create_vectorized_demotion_stmts().  */
2718   if (multi_step_cvt)
2719     vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2720   else
2721     vec_dsts = VEC_alloc (tree, heap, 1);
2722 
2723   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2724   VEC_quick_push (tree, vec_dsts, vec_dest);
2725 
2726   if (multi_step_cvt)
2727     {
2728       for (i = VEC_length (tree, interm_types) - 1;
2729            VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2730         {
2731           vec_dest = vect_create_destination_var (scalar_dest,
2732                                                   intermediate_type);
2733           VEC_quick_push (tree, vec_dsts, vec_dest);
2734         }
2735     }
2736 
2737   /* In case the vectorization factor (VF) is bigger than the number
2738      of elements that we can fit in a vectype (nunits), we have to generate
2739      more than one vector stmt - i.e - we need to "unroll" the
2740      vector stmt by a factor VF/nunits.   */
2741   last_oprnd = op0;
2742   prev_stmt_info = NULL;
2743   for (j = 0; j < ncopies; j++)
2744     {
2745       /* Handle uses.  */
2746       if (slp_node)
2747         vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL);
2748       else
2749         {
2750           VEC_free (tree, heap, vec_oprnds0);
2751           vec_oprnds0 = VEC_alloc (tree, heap,
2752                         (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2753           vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2754                                     vect_pow2 (multi_step_cvt) - 1);
2755         }
2756 
2757       /* Arguments are ready. Create the new vector stmts.  */
2758       tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2759       vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2760                                              multi_step_cvt, stmt, tmp_vec_dsts,
2761                                              gsi, slp_node, code1,
2762                                              &prev_stmt_info);
2763     }
2764 
2765   VEC_free (tree, heap, vec_oprnds0);
2766   VEC_free (tree, heap, vec_dsts);
2767   VEC_free (tree, heap, tmp_vec_dsts);
2768   VEC_free (tree, heap, interm_types);
2769 
2770   *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2771   return true;
2772 }
2773 
2774 
2775 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2776    and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2777    the resulting vectors and call the function recursively.  */
2778 
2779 static void
2780 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2781                                         VEC (tree, heap) **vec_oprnds1,
2782                                         int multi_step_cvt, gimple stmt,
2783                                         VEC (tree, heap) *vec_dsts,
2784                                         gimple_stmt_iterator *gsi,
2785                                         slp_tree slp_node, enum tree_code code1,
2786                                         enum tree_code code2, tree decl1,
2787                                         tree decl2, int op_type,
2788                                         stmt_vec_info *prev_stmt_info)
2789 {
2790   int i;
2791   tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2792   gimple new_stmt1, new_stmt2;
2793   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2794   VEC (tree, heap) *vec_tmp;
2795 
2796   vec_dest = VEC_pop (tree, vec_dsts);
2797   vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2798 
2799   for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2800     {
2801       if (op_type == binary_op)
2802         vop1 = VEC_index (tree, *vec_oprnds1, i);
2803       else
2804         vop1 = NULL_TREE;
2805 
2806       /* Generate the two halves of promotion operation.  */
2807       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2808                                                  op_type, vec_dest, gsi, stmt);
2809       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2810                                                  op_type, vec_dest, gsi, stmt);
2811       if (is_gimple_call (new_stmt1))
2812         {
2813           new_tmp1 = gimple_call_lhs (new_stmt1);
2814           new_tmp2 = gimple_call_lhs (new_stmt2);
2815         }
2816       else
2817         {
2818           new_tmp1 = gimple_assign_lhs (new_stmt1);
2819           new_tmp2 = gimple_assign_lhs (new_stmt2);
2820         }
2821 
2822       if (multi_step_cvt)
2823         {
2824           /* Store the results for the recursive call.  */
2825           VEC_quick_push (tree, vec_tmp, new_tmp1);
2826           VEC_quick_push (tree, vec_tmp, new_tmp2);
2827         }
2828       else
2829         {
2830           /* Last step of promotion sequience - store the results.  */
2831           if (slp_node)
2832             {
2833               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2834               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2835             }
2836           else
2837             {
2838               if (!*prev_stmt_info)
2839                 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2840               else
2841                 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2842 
2843               *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2844               STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2845               *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2846             }
2847         }
2848     }
2849 
2850   if (multi_step_cvt)
2851     {
2852       /* For multi-step promotion operation we first generate we call the
2853          function recurcively for every stage. We start from the input type,
2854          create promotion operations to the intermediate types, and then
2855          create promotions to the output type.  */
2856       *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2857       VEC_free (tree, heap, vec_tmp);
2858       vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2859                                               multi_step_cvt - 1, stmt,
2860                                               vec_dsts, gsi, slp_node, code1,
2861                                               code2, decl2, decl2, op_type,
2862                                               prev_stmt_info);
2863     }
2864 }
2865 
2866 
2867 /* Function vectorizable_type_promotion
2868 
2869    Check if STMT performs a binary or unary operation that involves
2870    type promotion, and if it can be vectorized.
2871    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2872    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2873    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2874 
2875 static bool
2876 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2877 			     gimple *vec_stmt, slp_tree slp_node)
2878 {
2879   tree vec_dest;
2880   tree scalar_dest;
2881   tree op0, op1 = NULL;
2882   tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2883   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2884   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2885   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2886   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2887   int op_type;
2888   tree def;
2889   gimple def_stmt;
2890   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2891   stmt_vec_info prev_stmt_info;
2892   int nunits_in;
2893   int nunits_out;
2894   tree vectype_out;
2895   int ncopies;
2896   int j, i;
2897   tree vectype_in;
2898   tree intermediate_type = NULL_TREE;
2899   int multi_step_cvt = 0;
2900   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2901   VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2902 
2903   /* FORNOW: not supported by basic block SLP vectorization.  */
2904   gcc_assert (loop_vinfo);
2905 
2906   if (!STMT_VINFO_RELEVANT_P (stmt_info))
2907     return false;
2908 
2909   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2910     return false;
2911 
2912   /* Is STMT a vectorizable type-promotion operation?  */
2913   if (!is_gimple_assign (stmt))
2914     return false;
2915 
2916   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2917     return false;
2918 
2919   code = gimple_assign_rhs_code (stmt);
2920   if (!CONVERT_EXPR_CODE_P (code)
2921       && code != WIDEN_MULT_EXPR)
2922     return false;
2923 
2924   op0 = gimple_assign_rhs1 (stmt);
2925   vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2926   if (!vectype_in)
2927     return false;
2928   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2929 
2930   scalar_dest = gimple_assign_lhs (stmt);
2931   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2932   if (!vectype_out)
2933     return false;
2934   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2935   if (nunits_in <= nunits_out)
2936     return false;
2937 
2938   /* Multiple types in SLP are handled by creating the appropriate number of
2939      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2940      case of SLP.  */
2941   if (slp_node)
2942     ncopies = 1;
2943   else
2944     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2945 
2946   gcc_assert (ncopies >= 1);
2947 
2948   if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2949 	  && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2950 	 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2951 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2952 	     && CONVERT_EXPR_CODE_P (code))))
2953     return false;
2954 
2955   /* Check the operands of the operation.  */
2956   if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2957     {
2958       if (vect_print_dump_info (REPORT_DETAILS))
2959 	fprintf (vect_dump, "use not simple.");
2960       return false;
2961     }
2962 
2963   op_type = TREE_CODE_LENGTH (code);
2964   if (op_type == binary_op)
2965     {
2966       op1 = gimple_assign_rhs2 (stmt);
2967       if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
2968         {
2969 	  if (vect_print_dump_info (REPORT_DETAILS))
2970 	    fprintf (vect_dump, "use not simple.");
2971           return false;
2972         }
2973     }
2974 
2975   /* Supportable by target?  */
2976   if (!supportable_widening_operation (code, stmt, vectype_in,
2977 				       &decl1, &decl2, &code1, &code2,
2978                                        &multi_step_cvt, &interm_types))
2979     return false;
2980 
2981   /* Binary widening operation can only be supported directly by the
2982      architecture.  */
2983   gcc_assert (!(multi_step_cvt && op_type == binary_op));
2984 
2985   STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2986 
2987   if (!vec_stmt) /* transformation not required.  */
2988     {
2989       STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2990       if (vect_print_dump_info (REPORT_DETAILS))
2991         fprintf (vect_dump, "=== vectorizable_promotion ===");
2992       vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2993       return true;
2994     }
2995 
2996   /** Transform.  **/
2997 
2998   if (vect_print_dump_info (REPORT_DETAILS))
2999     fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3000                         ncopies);
3001 
3002   /* Handle def.  */
3003   /* In case of multi-step promotion, we first generate promotion operations
3004      to the intermediate types, and then from that types to the final one.
3005      We store vector destination in VEC_DSTS in the correct order for
3006      recursive creation of promotion operations in
3007      vect_create_vectorized_promotion_stmts(). Vector destinations are created
3008      according to TYPES recieved from supportable_widening_operation().   */
3009   if (multi_step_cvt)
3010     vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3011   else
3012     vec_dsts = VEC_alloc (tree, heap, 1);
3013 
3014   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3015   VEC_quick_push (tree, vec_dsts, vec_dest);
3016 
3017   if (multi_step_cvt)
3018     {
3019       for (i = VEC_length (tree, interm_types) - 1;
3020            VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3021         {
3022           vec_dest = vect_create_destination_var (scalar_dest,
3023                                                   intermediate_type);
3024           VEC_quick_push (tree, vec_dsts, vec_dest);
3025         }
3026     }
3027 
3028   if (!slp_node)
3029     {
3030       vec_oprnds0 = VEC_alloc (tree, heap,
3031                             (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3032       if (op_type == binary_op)
3033         vec_oprnds1 = VEC_alloc (tree, heap, 1);
3034     }
3035 
3036   /* In case the vectorization factor (VF) is bigger than the number
3037      of elements that we can fit in a vectype (nunits), we have to generate
3038      more than one vector stmt - i.e - we need to "unroll" the
3039      vector stmt by a factor VF/nunits.   */
3040 
3041   prev_stmt_info = NULL;
3042   for (j = 0; j < ncopies; j++)
3043     {
3044       /* Handle uses.  */
3045       if (j == 0)
3046         {
3047           if (slp_node)
3048               vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1);
3049           else
3050             {
3051               vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3052               VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3053               if (op_type == binary_op)
3054                 {
3055                   vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3056                   VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3057                 }
3058             }
3059         }
3060       else
3061         {
3062           vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3063           VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3064           if (op_type == binary_op)
3065             {
3066               vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3067               VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3068             }
3069         }
3070 
3071       /* Arguments are ready. Create the new vector stmts.  */
3072       tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3073       vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3074                                               multi_step_cvt, stmt,
3075                                               tmp_vec_dsts,
3076                                               gsi, slp_node, code1, code2,
3077                                               decl1, decl2, op_type,
3078                                               &prev_stmt_info);
3079     }
3080 
3081   VEC_free (tree, heap, vec_dsts);
3082   VEC_free (tree, heap, tmp_vec_dsts);
3083   VEC_free (tree, heap, interm_types);
3084   VEC_free (tree, heap, vec_oprnds0);
3085   VEC_free (tree, heap, vec_oprnds1);
3086 
3087   *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3088   return true;
3089 }
3090 
3091 
3092 /* Function vectorizable_store.
3093 
3094    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3095    can be vectorized.
3096    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3097    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3098    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3099 
3100 static bool
3101 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3102 		    slp_tree slp_node)
3103 {
3104   tree scalar_dest;
3105   tree data_ref;
3106   tree op;
3107   tree vec_oprnd = NULL_TREE;
3108   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3109   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3110   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3111   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3112   struct loop *loop = NULL;
3113   enum machine_mode vec_mode;
3114   tree dummy;
3115   enum dr_alignment_support alignment_support_scheme;
3116   tree def;
3117   gimple def_stmt;
3118   enum vect_def_type dt;
3119   stmt_vec_info prev_stmt_info = NULL;
3120   tree dataref_ptr = NULL_TREE;
3121   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3122   int ncopies;
3123   int j;
3124   gimple next_stmt, first_stmt = NULL;
3125   bool strided_store = false;
3126   unsigned int group_size, i;
3127   VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3128   bool inv_p;
3129   VEC(tree,heap) *vec_oprnds = NULL;
3130   bool slp = (slp_node != NULL);
3131   unsigned int vec_num;
3132   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3133 
3134   if (loop_vinfo)
3135     loop = LOOP_VINFO_LOOP (loop_vinfo);
3136 
3137   /* Multiple types in SLP are handled by creating the appropriate number of
3138      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3139      case of SLP.  */
3140   if (slp)
3141     ncopies = 1;
3142   else
3143     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3144 
3145   gcc_assert (ncopies >= 1);
3146 
3147   /* FORNOW. This restriction should be relaxed.  */
3148   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3149     {
3150       if (vect_print_dump_info (REPORT_DETAILS))
3151         fprintf (vect_dump, "multiple types in nested loop.");
3152       return false;
3153     }
3154 
3155   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3156     return false;
3157 
3158   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3159     return false;
3160 
3161   /* Is vectorizable store? */
3162 
3163   if (!is_gimple_assign (stmt))
3164     return false;
3165 
3166   scalar_dest = gimple_assign_lhs (stmt);
3167   if (TREE_CODE (scalar_dest) != ARRAY_REF
3168       && TREE_CODE (scalar_dest) != INDIRECT_REF
3169       && TREE_CODE (scalar_dest) != COMPONENT_REF
3170       && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3171       && TREE_CODE (scalar_dest) != REALPART_EXPR)
3172     return false;
3173 
3174   gcc_assert (gimple_assign_single_p (stmt));
3175   op = gimple_assign_rhs1 (stmt);
3176   if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3177     {
3178       if (vect_print_dump_info (REPORT_DETAILS))
3179         fprintf (vect_dump, "use not simple.");
3180       return false;
3181     }
3182 
3183   /* The scalar rhs type needs to be trivially convertible to the vector
3184      component type.  This should always be the case.  */
3185   if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
3186     {
3187       if (vect_print_dump_info (REPORT_DETAILS))
3188         fprintf (vect_dump, "???  operands of different types");
3189       return false;
3190     }
3191 
3192   vec_mode = TYPE_MODE (vectype);
3193   /* FORNOW. In some cases can vectorize even if data-type not supported
3194      (e.g. - array initialization with 0).  */
3195   if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
3196     return false;
3197 
3198   if (!STMT_VINFO_DATA_REF (stmt_info))
3199     return false;
3200 
3201   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3202     {
3203       strided_store = true;
3204       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3205       if (!vect_strided_store_supported (vectype)
3206 	  && !PURE_SLP_STMT (stmt_info) && !slp)
3207 	return false;
3208 
3209       if (first_stmt == stmt)
3210 	{
3211           /* STMT is the leader of the group. Check the operands of all the
3212              stmts of the group.  */
3213           next_stmt = DR_GROUP_NEXT_DR (stmt_info);
3214           while (next_stmt)
3215             {
3216 	      gcc_assert (gimple_assign_single_p (next_stmt));
3217 	      op = gimple_assign_rhs1 (next_stmt);
3218               if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3219                                        &def, &dt))
3220                 {
3221                   if (vect_print_dump_info (REPORT_DETAILS))
3222                     fprintf (vect_dump, "use not simple.");
3223                   return false;
3224                 }
3225               next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3226             }
3227         }
3228     }
3229 
3230   if (!vec_stmt) /* transformation not required.  */
3231     {
3232       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3233       vect_model_store_cost (stmt_info, ncopies, dt, NULL);
3234       return true;
3235     }
3236 
3237   /** Transform.  **/
3238 
3239   if (strided_store)
3240     {
3241       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3242       group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3243 
3244       DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3245 
3246       /* FORNOW */
3247       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3248 
3249       /* We vectorize all the stmts of the interleaving group when we
3250 	 reach the last stmt in the group.  */
3251       if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3252 	  < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
3253 	  && !slp)
3254 	{
3255 	  *vec_stmt = NULL;
3256 	  return true;
3257 	}
3258 
3259       if (slp)
3260 	strided_store = false;
3261 
3262       /* VEC_NUM is the number of vect stmts to be created for this group.  */
3263       if (slp)
3264 	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3265       else
3266 	vec_num = group_size;
3267     }
3268   else
3269     {
3270       first_stmt = stmt;
3271       first_dr = dr;
3272       group_size = vec_num = 1;
3273     }
3274 
3275   if (vect_print_dump_info (REPORT_DETAILS))
3276     fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3277 
3278   dr_chain = VEC_alloc (tree, heap, group_size);
3279   oprnds = VEC_alloc (tree, heap, group_size);
3280 
3281   alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3282   gcc_assert (alignment_support_scheme);
3283 
3284   /* In case the vectorization factor (VF) is bigger than the number
3285      of elements that we can fit in a vectype (nunits), we have to generate
3286      more than one vector stmt - i.e - we need to "unroll" the
3287      vector stmt by a factor VF/nunits.  For more details see documentation in
3288      vect_get_vec_def_for_copy_stmt.  */
3289 
3290   /* In case of interleaving (non-unit strided access):
3291 
3292         S1:  &base + 2 = x2
3293         S2:  &base = x0
3294         S3:  &base + 1 = x1
3295         S4:  &base + 3 = x3
3296 
3297      We create vectorized stores starting from base address (the access of the
3298      first stmt in the chain (S2 in the above example), when the last store stmt
3299      of the chain (S4) is reached:
3300 
3301         VS1: &base = vx2
3302 	VS2: &base + vec_size*1 = vx0
3303 	VS3: &base + vec_size*2 = vx1
3304 	VS4: &base + vec_size*3 = vx3
3305 
3306      Then permutation statements are generated:
3307 
3308         VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3309         VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3310 	...
3311 
3312      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3313      (the order of the data-refs in the output of vect_permute_store_chain
3314      corresponds to the order of scalar stmts in the interleaving chain - see
3315      the documentation of vect_permute_store_chain()).
3316 
3317      In case of both multiple types and interleaving, above vector stores and
3318      permutation stmts are created for every copy. The result vector stmts are
3319      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3320      STMT_VINFO_RELATED_STMT for the next copies.
3321   */
3322 
3323   prev_stmt_info = NULL;
3324   for (j = 0; j < ncopies; j++)
3325     {
3326       gimple new_stmt;
3327       gimple ptr_incr;
3328 
3329       if (j == 0)
3330 	{
3331           if (slp)
3332             {
3333 	      /* Get vectorized arguments for SLP_NODE.  */
3334               vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3335                                  NULL);
3336 
3337               vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3338             }
3339           else
3340             {
3341 	      /* For interleaved stores we collect vectorized defs for all the
3342 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3343 		 used as an input to vect_permute_store_chain(), and OPRNDS as
3344 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3345 
3346 		 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3347 		 OPRNDS are of size 1.  */
3348 	      next_stmt = first_stmt;
3349 	      for (i = 0; i < group_size; i++)
3350 		{
3351 		  /* Since gaps are not supported for interleaved stores,
3352 		     GROUP_SIZE is the exact number of stmts in the chain.
3353 		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
3354 		     there is no interleaving, GROUP_SIZE is 1, and only one
3355 		     iteration of the loop will be executed.  */
3356 		  gcc_assert (next_stmt
3357 			      && gimple_assign_single_p (next_stmt));
3358 		  op = gimple_assign_rhs1 (next_stmt);
3359 
3360 		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3361 							    NULL);
3362 		  VEC_quick_push(tree, dr_chain, vec_oprnd);
3363 		  VEC_quick_push(tree, oprnds, vec_oprnd);
3364 		  next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3365 		}
3366 	    }
3367 
3368 	  /* We should have catched mismatched types earlier.  */
3369 	  gcc_assert (useless_type_conversion_p (vectype,
3370 						 TREE_TYPE (vec_oprnd)));
3371 	  dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3372 						  &dummy, &ptr_incr, false,
3373 						  &inv_p);
3374 	  gcc_assert (bb_vinfo || !inv_p);
3375 	}
3376       else
3377 	{
3378 	  /* For interleaved stores we created vectorized defs for all the
3379 	     defs stored in OPRNDS in the previous iteration (previous copy).
3380 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
3381 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3382 	     next copy.
3383 	     If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3384 	     OPRNDS are of size 1.  */
3385 	  for (i = 0; i < group_size; i++)
3386 	    {
3387 	      op = VEC_index (tree, oprnds, i);
3388 	      vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3389 	                          &dt);
3390 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3391 	      VEC_replace(tree, dr_chain, i, vec_oprnd);
3392 	      VEC_replace(tree, oprnds, i, vec_oprnd);
3393 	    }
3394 	  dataref_ptr =
3395 		bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3396 	}
3397 
3398       if (strided_store)
3399 	{
3400 	  result_chain = VEC_alloc (tree, heap, group_size);
3401 	  /* Permute.  */
3402 	  if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3403 					 &result_chain))
3404 	    return false;
3405 	}
3406 
3407       next_stmt = first_stmt;
3408       for (i = 0; i < vec_num; i++)
3409 	{
3410 	  if (i > 0)
3411 	    /* Bump the vector pointer.  */
3412 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3413 					   NULL_TREE);
3414 
3415 	  if (slp)
3416 	    vec_oprnd = VEC_index (tree, vec_oprnds, i);
3417 	  else if (strided_store)
3418 	    /* For strided stores vectorized defs are interleaved in
3419 	       vect_permute_store_chain().  */
3420 	    vec_oprnd = VEC_index (tree, result_chain, i);
3421 
3422           if (aligned_access_p (first_dr))
3423             data_ref = build_fold_indirect_ref (dataref_ptr);
3424           else
3425           {
3426             int mis = DR_MISALIGNMENT (first_dr);
3427             tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3428             tmis = size_binop (MULT_EXPR, tmis, size_int (BITS_PER_UNIT));
3429             data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3430            }
3431 
3432 	  /* If accesses through a pointer to vectype do not alias the original
3433 	     memory reference we have a problem.  This should never happen.  */
3434 	  gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3435 		      get_alias_set (gimple_assign_lhs (stmt))));
3436 
3437 	  /* Arguments are ready. Create the new vector stmt.  */
3438 	  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3439 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3440 	  mark_symbols_for_renaming (new_stmt);
3441 
3442           if (slp)
3443             continue;
3444 
3445           if (j == 0)
3446             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt =  new_stmt;
3447 	  else
3448 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3449 
3450 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3451 	  next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3452 	  if (!next_stmt)
3453 	    break;
3454 	}
3455     }
3456 
3457   VEC_free (tree, heap, dr_chain);
3458   VEC_free (tree, heap, oprnds);
3459   if (result_chain)
3460     VEC_free (tree, heap, result_chain);
3461 
3462   return true;
3463 }
3464 
3465 /* vectorizable_load.
3466 
3467    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3468    can be vectorized.
3469    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3470    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3471    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3472 
3473 static bool
3474 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3475 		   slp_tree slp_node, slp_instance slp_node_instance)
3476 {
3477   tree scalar_dest;
3478   tree vec_dest = NULL;
3479   tree data_ref = NULL;
3480   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3481   stmt_vec_info prev_stmt_info;
3482   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3483   struct loop *loop = NULL;
3484   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3485   bool nested_in_vect_loop = false;
3486   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3487   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3488   tree new_temp;
3489   int mode;
3490   gimple new_stmt = NULL;
3491   tree dummy;
3492   enum dr_alignment_support alignment_support_scheme;
3493   tree dataref_ptr = NULL_TREE;
3494   gimple ptr_incr;
3495   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3496   int ncopies;
3497   int i, j, group_size;
3498   tree msq = NULL_TREE, lsq;
3499   tree offset = NULL_TREE;
3500   tree realignment_token = NULL_TREE;
3501   gimple phi = NULL;
3502   VEC(tree,heap) *dr_chain = NULL;
3503   bool strided_load = false;
3504   gimple first_stmt;
3505   tree scalar_type;
3506   bool inv_p;
3507   bool compute_in_loop = false;
3508   struct loop *at_loop;
3509   int vec_num;
3510   bool slp = (slp_node != NULL);
3511   bool slp_perm = false;
3512   enum tree_code code;
3513   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3514   int vf;
3515 
3516   if (loop_vinfo)
3517     {
3518       loop = LOOP_VINFO_LOOP (loop_vinfo);
3519       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3520       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3521     }
3522   else
3523     vf = 1;
3524 
3525   /* Multiple types in SLP are handled by creating the appropriate number of
3526      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3527      case of SLP.  */
3528   if (slp)
3529     ncopies = 1;
3530   else
3531     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3532 
3533   gcc_assert (ncopies >= 1);
3534 
3535   /* FORNOW. This restriction should be relaxed.  */
3536   if (nested_in_vect_loop && ncopies > 1)
3537     {
3538       if (vect_print_dump_info (REPORT_DETAILS))
3539         fprintf (vect_dump, "multiple types in nested loop.");
3540       return false;
3541     }
3542 
3543   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3544     return false;
3545 
3546   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3547     return false;
3548 
3549   /* Is vectorizable load? */
3550   if (!is_gimple_assign (stmt))
3551     return false;
3552 
3553   scalar_dest = gimple_assign_lhs (stmt);
3554   if (TREE_CODE (scalar_dest) != SSA_NAME)
3555     return false;
3556 
3557   code = gimple_assign_rhs_code (stmt);
3558   if (code != ARRAY_REF
3559       && code != INDIRECT_REF
3560       && code != COMPONENT_REF
3561       && code != IMAGPART_EXPR
3562       && code != REALPART_EXPR)
3563     return false;
3564 
3565   if (!STMT_VINFO_DATA_REF (stmt_info))
3566     return false;
3567 
3568   scalar_type = TREE_TYPE (DR_REF (dr));
3569   mode = (int) TYPE_MODE (vectype);
3570 
3571   /* FORNOW. In some cases can vectorize even if data-type not supported
3572     (e.g. - data copies).  */
3573   if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3574     {
3575       if (vect_print_dump_info (REPORT_DETAILS))
3576 	fprintf (vect_dump, "Aligned load, but unsupported type.");
3577       return false;
3578     }
3579 
3580   /* The vector component type needs to be trivially convertible to the
3581      scalar lhs.  This should always be the case.  */
3582   if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3583     {
3584       if (vect_print_dump_info (REPORT_DETAILS))
3585         fprintf (vect_dump, "???  operands of different types");
3586       return false;
3587     }
3588 
3589   /* Check if the load is a part of an interleaving chain.  */
3590   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3591     {
3592       strided_load = true;
3593       /* FORNOW */
3594       gcc_assert (! nested_in_vect_loop);
3595 
3596       /* Check if interleaving is supported.  */
3597       if (!vect_strided_load_supported (vectype)
3598 	  && !PURE_SLP_STMT (stmt_info) && !slp)
3599 	return false;
3600     }
3601 
3602   if (!vec_stmt) /* transformation not required.  */
3603     {
3604       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3605       vect_model_load_cost (stmt_info, ncopies, NULL);
3606       return true;
3607     }
3608 
3609   if (vect_print_dump_info (REPORT_DETAILS))
3610     fprintf (vect_dump, "transform load.");
3611 
3612   /** Transform.  **/
3613 
3614   if (strided_load)
3615     {
3616       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3617       /* Check if the chain of loads is already vectorized.  */
3618       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3619 	{
3620 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3621 	  return true;
3622 	}
3623       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3624       group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3625 
3626       /* VEC_NUM is the number of vect stmts to be created for this group.  */
3627       if (slp)
3628 	{
3629 	  strided_load = false;
3630 	  vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3631           if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3632             slp_perm = true;
3633     	}
3634       else
3635 	vec_num = group_size;
3636 
3637       dr_chain = VEC_alloc (tree, heap, vec_num);
3638     }
3639   else
3640     {
3641       first_stmt = stmt;
3642       first_dr = dr;
3643       group_size = vec_num = 1;
3644     }
3645 
3646   alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3647   gcc_assert (alignment_support_scheme);
3648 
3649   /* In case the vectorization factor (VF) is bigger than the number
3650      of elements that we can fit in a vectype (nunits), we have to generate
3651      more than one vector stmt - i.e - we need to "unroll" the
3652      vector stmt by a factor VF/nunits. In doing so, we record a pointer
3653      from one copy of the vector stmt to the next, in the field
3654      STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3655      stages to find the correct vector defs to be used when vectorizing
3656      stmts that use the defs of the current stmt. The example below illustrates
3657      the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3658      4 vectorized stmts):
3659 
3660      before vectorization:
3661                                 RELATED_STMT    VEC_STMT
3662         S1:     x = memref      -               -
3663         S2:     z = x + 1       -               -
3664 
3665      step 1: vectorize stmt S1:
3666         We first create the vector stmt VS1_0, and, as usual, record a
3667         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3668         Next, we create the vector stmt VS1_1, and record a pointer to
3669         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3670         Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3671         stmts and pointers:
3672                                 RELATED_STMT    VEC_STMT
3673         VS1_0:  vx0 = memref0   VS1_1           -
3674         VS1_1:  vx1 = memref1   VS1_2           -
3675         VS1_2:  vx2 = memref2   VS1_3           -
3676         VS1_3:  vx3 = memref3   -               -
3677         S1:     x = load        -               VS1_0
3678         S2:     z = x + 1       -               -
3679 
3680      See in documentation in vect_get_vec_def_for_stmt_copy for how the
3681      information we recorded in RELATED_STMT field is used to vectorize
3682      stmt S2.  */
3683 
3684   /* In case of interleaving (non-unit strided access):
3685 
3686      S1:  x2 = &base + 2
3687      S2:  x0 = &base
3688      S3:  x1 = &base + 1
3689      S4:  x3 = &base + 3
3690 
3691      Vectorized loads are created in the order of memory accesses
3692      starting from the access of the first stmt of the chain:
3693 
3694      VS1: vx0 = &base
3695      VS2: vx1 = &base + vec_size*1
3696      VS3: vx3 = &base + vec_size*2
3697      VS4: vx4 = &base + vec_size*3
3698 
3699      Then permutation statements are generated:
3700 
3701      VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3702      VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3703        ...
3704 
3705      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3706      (the order of the data-refs in the output of vect_permute_load_chain
3707      corresponds to the order of scalar stmts in the interleaving chain - see
3708      the documentation of vect_permute_load_chain()).
3709      The generation of permutation stmts and recording them in
3710      STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3711 
3712      In case of both multiple types and interleaving, the vector loads and
3713      permutation stmts above are created for every copy. The result vector stmts
3714      are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3715      STMT_VINFO_RELATED_STMT for the next copies.  */
3716 
3717   /* If the data reference is aligned (dr_aligned) or potentially unaligned
3718      on a target that supports unaligned accesses (dr_unaligned_supported)
3719      we generate the following code:
3720          p = initial_addr;
3721          indx = 0;
3722          loop {
3723 	   p = p + indx * vectype_size;
3724            vec_dest = *(p);
3725            indx = indx + 1;
3726          }
3727 
3728      Otherwise, the data reference is potentially unaligned on a target that
3729      does not support unaligned accesses (dr_explicit_realign_optimized) -
3730      then generate the following code, in which the data in each iteration is
3731      obtained by two vector loads, one from the previous iteration, and one
3732      from the current iteration:
3733          p1 = initial_addr;
3734          msq_init = *(floor(p1))
3735          p2 = initial_addr + VS - 1;
3736          realignment_token = call target_builtin;
3737          indx = 0;
3738          loop {
3739            p2 = p2 + indx * vectype_size
3740            lsq = *(floor(p2))
3741            vec_dest = realign_load (msq, lsq, realignment_token)
3742            indx = indx + 1;
3743            msq = lsq;
3744          }   */
3745 
3746   /* If the misalignment remains the same throughout the execution of the
3747      loop, we can create the init_addr and permutation mask at the loop
3748      preheader. Otherwise, it needs to be created inside the loop.
3749      This can only occur when vectorizing memory accesses in the inner-loop
3750      nested within an outer-loop that is being vectorized.  */
3751 
3752   if (loop && nested_in_vect_loop_p (loop, stmt)
3753       && (TREE_INT_CST_LOW (DR_STEP (dr))
3754 	  % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3755     {
3756       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3757       compute_in_loop = true;
3758     }
3759 
3760   if ((alignment_support_scheme == dr_explicit_realign_optimized
3761        || alignment_support_scheme == dr_explicit_realign)
3762       && !compute_in_loop)
3763     {
3764       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3765 				    alignment_support_scheme, NULL_TREE,
3766 				    &at_loop);
3767       if (alignment_support_scheme == dr_explicit_realign_optimized)
3768 	{
3769 	  phi = SSA_NAME_DEF_STMT (msq);
3770 	  offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3771 	}
3772     }
3773   else
3774     at_loop = loop;
3775 
3776   prev_stmt_info = NULL;
3777   for (j = 0; j < ncopies; j++)
3778     {
3779       /* 1. Create the vector pointer update chain.  */
3780       if (j == 0)
3781         dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3782 					        at_loop, offset,
3783 						&dummy, &ptr_incr, false,
3784 						&inv_p);
3785       else
3786         dataref_ptr =
3787 		bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3788 
3789       for (i = 0; i < vec_num; i++)
3790 	{
3791 	  if (i > 0)
3792 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3793 					   NULL_TREE);
3794 
3795 	  /* 2. Create the vector-load in the loop.  */
3796 	  switch (alignment_support_scheme)
3797 	    {
3798 	    case dr_aligned:
3799 	      gcc_assert (aligned_access_p (first_dr));
3800 	      data_ref = build_fold_indirect_ref (dataref_ptr);
3801 	      break;
3802 	    case dr_unaligned_supported:
3803 	      {
3804 		int mis = DR_MISALIGNMENT (first_dr);
3805 		tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3806 
3807 		tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3808 		data_ref =
3809 		  build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3810 		break;
3811 	      }
3812 	    case dr_explicit_realign:
3813 	      {
3814 		tree ptr, bump;
3815 		tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3816 
3817 		if (compute_in_loop)
3818 		  msq = vect_setup_realignment (first_stmt, gsi,
3819 						&realignment_token,
3820 						dr_explicit_realign,
3821 						dataref_ptr, NULL);
3822 
3823 		data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3824 		vec_dest = vect_create_destination_var (scalar_dest, vectype);
3825 		new_stmt = gimple_build_assign (vec_dest, data_ref);
3826 		new_temp = make_ssa_name (vec_dest, new_stmt);
3827 		gimple_assign_set_lhs (new_stmt, new_temp);
3828 		gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3829 		gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3830 		vect_finish_stmt_generation (stmt, new_stmt, gsi);
3831 		msq = new_temp;
3832 
3833 		bump = size_binop (MULT_EXPR, vs_minus_1,
3834 				   TYPE_SIZE_UNIT (scalar_type));
3835 		ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3836 	        data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3837 	        break;
3838 	      }
3839 	    case dr_explicit_realign_optimized:
3840 	      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3841 	      break;
3842 	    default:
3843 	      gcc_unreachable ();
3844 	    }
3845 	  /* If accesses through a pointer to vectype do not alias the original
3846 	     memory reference we have a problem.  This should never happen. */
3847 	  gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3848 		      get_alias_set (gimple_assign_rhs1 (stmt))));
3849 	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3850 	  new_stmt = gimple_build_assign (vec_dest, data_ref);
3851 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3852 	  gimple_assign_set_lhs (new_stmt, new_temp);
3853 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3854 	  mark_symbols_for_renaming (new_stmt);
3855 
3856 	  /* 3. Handle explicit realignment if necessary/supported. Create in
3857 		loop: vec_dest = realign_load (msq, lsq, realignment_token)  */
3858 	  if (alignment_support_scheme == dr_explicit_realign_optimized
3859 	      || alignment_support_scheme == dr_explicit_realign)
3860 	    {
3861 	      tree tmp;
3862 
3863 	      lsq = gimple_assign_lhs (new_stmt);
3864 	      if (!realignment_token)
3865 		realignment_token = dataref_ptr;
3866 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
3867 	      tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3868 			    realignment_token);
3869 	      new_stmt = gimple_build_assign (vec_dest, tmp);
3870 	      new_temp = make_ssa_name (vec_dest, new_stmt);
3871 	      gimple_assign_set_lhs (new_stmt, new_temp);
3872 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3873 
3874 	      if (alignment_support_scheme == dr_explicit_realign_optimized)
3875 		{
3876 		  gcc_assert (phi);
3877 		  if (i == vec_num - 1 && j == ncopies - 1)
3878 		    add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
3879 				 UNKNOWN_LOCATION);
3880 		  msq = lsq;
3881 		}
3882 	    }
3883 
3884 	  /* 4. Handle invariant-load.  */
3885 	  if (inv_p && !bb_vinfo)
3886 	    {
3887 	      gcc_assert (!strided_load);
3888 	      gcc_assert (nested_in_vect_loop_p (loop, stmt));
3889 	      if (j == 0)
3890 		{
3891 		  int k;
3892 		  tree t = NULL_TREE;
3893 		  tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3894 
3895 		  /* CHECKME: bitpos depends on endianess?  */
3896 		  bitpos = bitsize_zero_node;
3897 		  vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3898 				    bitsize, bitpos);
3899 		  vec_dest =
3900 			vect_create_destination_var (scalar_dest, NULL_TREE);
3901 		  new_stmt = gimple_build_assign (vec_dest, vec_inv);
3902                   new_temp = make_ssa_name (vec_dest, new_stmt);
3903 		  gimple_assign_set_lhs (new_stmt, new_temp);
3904 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3905 
3906 		  for (k = nunits - 1; k >= 0; --k)
3907 		    t = tree_cons (NULL_TREE, new_temp, t);
3908 		  /* FIXME: use build_constructor directly.  */
3909 		  vec_inv = build_constructor_from_list (vectype, t);
3910 		  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3911 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
3912 		}
3913 	      else
3914 		gcc_unreachable (); /* FORNOW. */
3915 	    }
3916 
3917 	  /* Collect vector loads and later create their permutation in
3918 	     vect_transform_strided_load ().  */
3919           if (strided_load || slp_perm)
3920             VEC_quick_push (tree, dr_chain, new_temp);
3921 
3922          /* Store vector loads in the corresponding SLP_NODE.  */
3923 	  if (slp && !slp_perm)
3924 	    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3925 	}
3926 
3927       if (slp && !slp_perm)
3928 	continue;
3929 
3930       if (slp_perm)
3931         {
3932           if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
3933                                              slp_node_instance, false))
3934             {
3935               VEC_free (tree, heap, dr_chain);
3936               return false;
3937             }
3938         }
3939       else
3940         {
3941           if (strided_load)
3942   	    {
3943 	      if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3944 	        return false;
3945 
3946 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3947               VEC_free (tree, heap, dr_chain);
3948 	      dr_chain = VEC_alloc (tree, heap, group_size);
3949 	    }
3950           else
3951 	    {
3952 	      if (j == 0)
3953 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3954 	      else
3955 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3956 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
3957 	    }
3958         }
3959     }
3960 
3961   if (dr_chain)
3962     VEC_free (tree, heap, dr_chain);
3963 
3964   return true;
3965 }
3966 
3967 /* Function vect_is_simple_cond.
3968 
3969    Input:
3970    LOOP - the loop that is being vectorized.
3971    COND - Condition that is checked for simple use.
3972 
3973    Returns whether a COND can be vectorized.  Checks whether
3974    condition operands are supportable using vec_is_simple_use.  */
3975 
3976 static bool
3977 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3978 {
3979   tree lhs, rhs;
3980   tree def;
3981   enum vect_def_type dt;
3982 
3983   if (!COMPARISON_CLASS_P (cond))
3984     return false;
3985 
3986   lhs = TREE_OPERAND (cond, 0);
3987   rhs = TREE_OPERAND (cond, 1);
3988 
3989   if (TREE_CODE (lhs) == SSA_NAME)
3990     {
3991       gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3992       if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
3993                                &dt))
3994 	return false;
3995     }
3996   else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3997 	   && TREE_CODE (lhs) != FIXED_CST)
3998     return false;
3999 
4000   if (TREE_CODE (rhs) == SSA_NAME)
4001     {
4002       gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4003       if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4004                                &dt))
4005 	return false;
4006     }
4007   else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST
4008 	   && TREE_CODE (rhs) != FIXED_CST)
4009     return false;
4010 
4011   return true;
4012 }
4013 
4014 /* vectorizable_condition.
4015 
4016    Check if STMT is conditional modify expression that can be vectorized.
4017    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4018    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
4019    at GSI.
4020 
4021    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4022    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4023    else caluse if it is 2).
4024 
4025    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4026 
4027 bool
4028 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4029 			gimple *vec_stmt, tree reduc_def, int reduc_index)
4030 {
4031   tree scalar_dest = NULL_TREE;
4032   tree vec_dest = NULL_TREE;
4033   tree op = NULL_TREE;
4034   tree cond_expr, then_clause, else_clause;
4035   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4036   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4037   tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4038   tree vec_compare, vec_cond_expr;
4039   tree new_temp;
4040   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4041   enum machine_mode vec_mode;
4042   tree def;
4043   enum vect_def_type dt;
4044   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4045   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4046   enum tree_code code;
4047 
4048   /* FORNOW: unsupported in basic block SLP.  */
4049   gcc_assert (loop_vinfo);
4050 
4051   gcc_assert (ncopies >= 1);
4052   if (ncopies > 1)
4053     return false; /* FORNOW */
4054 
4055   if (!STMT_VINFO_RELEVANT_P (stmt_info))
4056     return false;
4057 
4058   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4059       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4060            && reduc_def))
4061     return false;
4062 
4063   /* FORNOW: SLP not supported.  */
4064   if (STMT_SLP_TYPE (stmt_info))
4065     return false;
4066 
4067   /* FORNOW: not yet supported.  */
4068   if (STMT_VINFO_LIVE_P (stmt_info))
4069     {
4070       if (vect_print_dump_info (REPORT_DETAILS))
4071         fprintf (vect_dump, "value used after loop.");
4072       return false;
4073     }
4074 
4075   /* Is vectorizable conditional operation?  */
4076   if (!is_gimple_assign (stmt))
4077     return false;
4078 
4079   code = gimple_assign_rhs_code (stmt);
4080 
4081   if (code != COND_EXPR)
4082     return false;
4083 
4084   gcc_assert (gimple_assign_single_p (stmt));
4085   op = gimple_assign_rhs1 (stmt);
4086   cond_expr = TREE_OPERAND (op, 0);
4087   then_clause = TREE_OPERAND (op, 1);
4088   else_clause = TREE_OPERAND (op, 2);
4089 
4090   if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4091     return false;
4092 
4093   /* We do not handle two different vector types for the condition
4094      and the values.  */
4095   if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
4096 			   TREE_TYPE (vectype)))
4097     return false;
4098 
4099   if (TREE_CODE (then_clause) == SSA_NAME)
4100     {
4101       gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4102       if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4103 			       &then_def_stmt, &def, &dt))
4104 	return false;
4105     }
4106   else if (TREE_CODE (then_clause) != INTEGER_CST
4107 	   && TREE_CODE (then_clause) != REAL_CST
4108 	   && TREE_CODE (then_clause) != FIXED_CST)
4109     return false;
4110 
4111   if (TREE_CODE (else_clause) == SSA_NAME)
4112     {
4113       gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4114       if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4115 			       &else_def_stmt, &def, &dt))
4116 	return false;
4117     }
4118   else if (TREE_CODE (else_clause) != INTEGER_CST
4119 	   && TREE_CODE (else_clause) != REAL_CST
4120 	   && TREE_CODE (else_clause) != FIXED_CST)
4121     return false;
4122 
4123 
4124   vec_mode = TYPE_MODE (vectype);
4125 
4126   if (!vec_stmt)
4127     {
4128       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4129       return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
4130     }
4131 
4132   /* Transform */
4133 
4134   /* Handle def.  */
4135   scalar_dest = gimple_assign_lhs (stmt);
4136   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4137 
4138   /* Handle cond expr.  */
4139   vec_cond_lhs =
4140     vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
4141   vec_cond_rhs =
4142     vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
4143   if (reduc_index == 1)
4144     vec_then_clause = reduc_def;
4145   else
4146     vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
4147   if (reduc_index == 2)
4148     vec_else_clause = reduc_def;
4149   else
4150     vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
4151 
4152   /* Arguments are ready. Create the new vector stmt.  */
4153   vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4154 			vec_cond_lhs, vec_cond_rhs);
4155   vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4156 			  vec_compare, vec_then_clause, vec_else_clause);
4157 
4158   *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4159   new_temp = make_ssa_name (vec_dest, *vec_stmt);
4160   gimple_assign_set_lhs (*vec_stmt, new_temp);
4161   vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
4162 
4163   return true;
4164 }
4165 
4166 
4167 /* Make sure the statement is vectorizable.  */
4168 
4169 bool
4170 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4171 {
4172   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4173   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4174   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4175   bool ok;
4176   HOST_WIDE_INT dummy;
4177   tree scalar_type, vectype;
4178 
4179   if (vect_print_dump_info (REPORT_DETAILS))
4180     {
4181       fprintf (vect_dump, "==> examining statement: ");
4182       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4183     }
4184 
4185   if (gimple_has_volatile_ops (stmt))
4186     {
4187       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4188         fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4189 
4190       return false;
4191     }
4192 
4193   /* Skip stmts that do not need to be vectorized. In loops this is expected
4194      to include:
4195      - the COND_EXPR which is the loop exit condition
4196      - any LABEL_EXPRs in the loop
4197      - computations that are used only for array indexing or loop control.
4198      In basic blocks we only analyze statements that are a part of some SLP
4199      instance, therefore, all the statements are relevant.  */
4200 
4201   if (!STMT_VINFO_RELEVANT_P (stmt_info)
4202       && !STMT_VINFO_LIVE_P (stmt_info))
4203     {
4204       if (vect_print_dump_info (REPORT_DETAILS))
4205         fprintf (vect_dump, "irrelevant.");
4206 
4207       return true;
4208     }
4209 
4210   switch (STMT_VINFO_DEF_TYPE (stmt_info))
4211     {
4212       case vect_internal_def:
4213         break;
4214 
4215       case vect_reduction_def:
4216       case vect_nested_cycle:
4217          gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
4218                      || relevance == vect_used_in_outer_by_reduction
4219                      || relevance == vect_unused_in_scope));
4220          break;
4221 
4222       case vect_induction_def:
4223       case vect_constant_def:
4224       case vect_external_def:
4225       case vect_unknown_def_type:
4226       default:
4227         gcc_unreachable ();
4228     }
4229 
4230   if (bb_vinfo)
4231     {
4232       gcc_assert (PURE_SLP_STMT (stmt_info));
4233 
4234       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
4235       if (vect_print_dump_info (REPORT_DETAILS))
4236         {
4237           fprintf (vect_dump, "get vectype for scalar type:  ");
4238           print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4239         }
4240 
4241       vectype = get_vectype_for_scalar_type (scalar_type);
4242       if (!vectype)
4243         {
4244           if (vect_print_dump_info (REPORT_DETAILS))
4245             {
4246                fprintf (vect_dump, "not SLPed: unsupported data-type ");
4247                print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4248             }
4249           return false;
4250         }
4251 
4252       if (vect_print_dump_info (REPORT_DETAILS))
4253         {
4254           fprintf (vect_dump, "vectype:  ");
4255           print_generic_expr (vect_dump, vectype, TDF_SLIM);
4256         }
4257 
4258       STMT_VINFO_VECTYPE (stmt_info) = vectype;
4259    }
4260 
4261   if (STMT_VINFO_RELEVANT_P (stmt_info))
4262     {
4263       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4264       gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4265       *need_to_vectorize = true;
4266     }
4267 
4268    ok = true;
4269    if (!bb_vinfo
4270        && (STMT_VINFO_RELEVANT_P (stmt_info)
4271            || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4272       ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4273             || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4274             || vectorizable_conversion (stmt, NULL, NULL, NULL)
4275             || vectorizable_shift (stmt, NULL, NULL, NULL)
4276             || vectorizable_operation (stmt, NULL, NULL, NULL)
4277             || vectorizable_assignment (stmt, NULL, NULL, NULL)
4278             || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4279             || vectorizable_call (stmt, NULL, NULL)
4280             || vectorizable_store (stmt, NULL, NULL, NULL)
4281             || vectorizable_reduction (stmt, NULL, NULL)
4282             || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
4283     else
4284       {
4285         if (bb_vinfo)
4286           ok = (vectorizable_shift (stmt, NULL, NULL, node)
4287                 || vectorizable_operation (stmt, NULL, NULL, node)
4288                 || vectorizable_assignment (stmt, NULL, NULL, node)
4289                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4290                 || vectorizable_store (stmt, NULL, NULL, node));
4291       }
4292 
4293   if (!ok)
4294     {
4295       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4296         {
4297           fprintf (vect_dump, "not vectorized: relevant stmt not ");
4298           fprintf (vect_dump, "supported: ");
4299           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4300         }
4301 
4302       return false;
4303     }
4304 
4305   if (bb_vinfo)
4306     return true;
4307 
4308   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4309       need extra handling, except for vectorizable reductions.  */
4310   if (STMT_VINFO_LIVE_P (stmt_info)
4311       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4312     ok = vectorizable_live_operation (stmt, NULL, NULL);
4313 
4314   if (!ok)
4315     {
4316       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4317         {
4318           fprintf (vect_dump, "not vectorized: live stmt not ");
4319           fprintf (vect_dump, "supported: ");
4320           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4321         }
4322 
4323        return false;
4324     }
4325 
4326   if (!PURE_SLP_STMT (stmt_info))
4327     {
4328       /* Groups of strided accesses whose size is not a power of 2 are not
4329          vectorizable yet using loop-vectorization. Therefore, if this stmt
4330 	 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4331 	 loop-based vectorized), the loop cannot be vectorized.  */
4332       if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4333           && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4334                                         DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4335         {
4336           if (vect_print_dump_info (REPORT_DETAILS))
4337             {
4338               fprintf (vect_dump, "not vectorized: the size of group "
4339                                   "of strided accesses is not a power of 2");
4340               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4341             }
4342 
4343           return false;
4344         }
4345     }
4346 
4347   return true;
4348 }
4349 
4350 
4351 /* Function vect_transform_stmt.
4352 
4353    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
4354 
4355 bool
4356 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4357 		     bool *strided_store, slp_tree slp_node,
4358                      slp_instance slp_node_instance)
4359 {
4360   bool is_store = false;
4361   gimple vec_stmt = NULL;
4362   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4363   gimple orig_stmt_in_pattern;
4364   bool done;
4365 
4366   switch (STMT_VINFO_TYPE (stmt_info))
4367     {
4368     case type_demotion_vec_info_type:
4369       done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4370       gcc_assert (done);
4371       break;
4372 
4373     case type_promotion_vec_info_type:
4374       done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4375       gcc_assert (done);
4376       break;
4377 
4378     case type_conversion_vec_info_type:
4379       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4380       gcc_assert (done);
4381       break;
4382 
4383     case induc_vec_info_type:
4384       gcc_assert (!slp_node);
4385       done = vectorizable_induction (stmt, gsi, &vec_stmt);
4386       gcc_assert (done);
4387       break;
4388 
4389     case shift_vec_info_type:
4390       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
4391       gcc_assert (done);
4392       break;
4393 
4394     case op_vec_info_type:
4395       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4396       gcc_assert (done);
4397       break;
4398 
4399     case assignment_vec_info_type:
4400       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4401       gcc_assert (done);
4402       break;
4403 
4404     case load_vec_info_type:
4405       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4406                                 slp_node_instance);
4407       gcc_assert (done);
4408       break;
4409 
4410     case store_vec_info_type:
4411       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4412       gcc_assert (done);
4413       if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4414 	{
4415 	  /* In case of interleaving, the whole chain is vectorized when the
4416 	     last store in the chain is reached. Store stmts before the last
4417 	     one are skipped, and there vec_stmt_info shouldn't be freed
4418 	     meanwhile.  */
4419 	  *strided_store = true;
4420 	  if (STMT_VINFO_VEC_STMT (stmt_info))
4421 	    is_store = true;
4422 	  }
4423       else
4424 	is_store = true;
4425       break;
4426 
4427     case condition_vec_info_type:
4428       gcc_assert (!slp_node);
4429       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
4430       gcc_assert (done);
4431       break;
4432 
4433     case call_vec_info_type:
4434       gcc_assert (!slp_node);
4435       done = vectorizable_call (stmt, gsi, &vec_stmt);
4436       break;
4437 
4438     case reduc_vec_info_type:
4439       gcc_assert (!slp_node);
4440       done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4441       gcc_assert (done);
4442       break;
4443 
4444     default:
4445       if (!STMT_VINFO_LIVE_P (stmt_info))
4446 	{
4447 	  if (vect_print_dump_info (REPORT_DETAILS))
4448 	    fprintf (vect_dump, "stmt not supported.");
4449 	  gcc_unreachable ();
4450 	}
4451     }
4452 
4453   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4454      is being vectorized, but outside the immediately enclosing loop.  */
4455   if (vec_stmt
4456       && STMT_VINFO_LOOP_VINFO (stmt_info)
4457       && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4458                                 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
4459       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4460       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4461           || STMT_VINFO_RELEVANT (stmt_info) ==
4462                                            vect_used_in_outer_by_reduction))
4463     {
4464       struct loop *innerloop = LOOP_VINFO_LOOP (
4465                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
4466       imm_use_iterator imm_iter;
4467       use_operand_p use_p;
4468       tree scalar_dest;
4469       gimple exit_phi;
4470 
4471       if (vect_print_dump_info (REPORT_DETAILS))
4472         fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4473 
4474       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4475         (to be used when vectorizing outer-loop stmts that use the DEF of
4476         STMT).  */
4477       if (gimple_code (stmt) == GIMPLE_PHI)
4478         scalar_dest = PHI_RESULT (stmt);
4479       else
4480         scalar_dest = gimple_assign_lhs (stmt);
4481 
4482       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4483        {
4484          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4485            {
4486              exit_phi = USE_STMT (use_p);
4487              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4488            }
4489        }
4490     }
4491 
4492   /* Handle stmts whose DEF is used outside the loop-nest that is
4493      being vectorized.  */
4494   if (STMT_VINFO_LIVE_P (stmt_info)
4495       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4496     {
4497       done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4498       gcc_assert (done);
4499     }
4500 
4501   if (vec_stmt)
4502     {
4503       STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4504       orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4505       if (orig_stmt_in_pattern)
4506 	{
4507 	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4508 	  /* STMT was inserted by the vectorizer to replace a computation idiom.
4509 	     ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4510 	     computed this idiom.  We need to record a pointer to VEC_STMT in
4511 	     the stmt_info of ORIG_STMT_IN_PATTERN.  See more details in the
4512 	     documentation of vect_pattern_recog.  */
4513 	  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4514 	    {
4515 	      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4516 	      STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4517 	    }
4518 	}
4519     }
4520 
4521   return is_store;
4522 }
4523 
4524 
4525 /* Remove a group of stores (for SLP or interleaving), free their
4526    stmt_vec_info.  */
4527 
4528 void
4529 vect_remove_stores (gimple first_stmt)
4530 {
4531   gimple next = first_stmt;
4532   gimple tmp;
4533   gimple_stmt_iterator next_si;
4534 
4535   while (next)
4536     {
4537       /* Free the attached stmt_vec_info and remove the stmt.  */
4538       next_si = gsi_for_stmt (next);
4539       gsi_remove (&next_si, true);
4540       tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4541       free_stmt_vec_info (next);
4542       next = tmp;
4543     }
4544 }
4545 
4546 
4547 /* Function new_stmt_vec_info.
4548 
4549    Create and initialize a new stmt_vec_info struct for STMT.  */
4550 
4551 stmt_vec_info
4552 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
4553                    bb_vec_info bb_vinfo)
4554 {
4555   stmt_vec_info res;
4556   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4557 
4558   STMT_VINFO_TYPE (res) = undef_vec_info_type;
4559   STMT_VINFO_STMT (res) = stmt;
4560   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4561   STMT_VINFO_BB_VINFO (res) = bb_vinfo;
4562   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4563   STMT_VINFO_LIVE_P (res) = false;
4564   STMT_VINFO_VECTYPE (res) = NULL;
4565   STMT_VINFO_VEC_STMT (res) = NULL;
4566   STMT_VINFO_IN_PATTERN_P (res) = false;
4567   STMT_VINFO_RELATED_STMT (res) = NULL;
4568   STMT_VINFO_DATA_REF (res) = NULL;
4569 
4570   STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4571   STMT_VINFO_DR_OFFSET (res) = NULL;
4572   STMT_VINFO_DR_INIT (res) = NULL;
4573   STMT_VINFO_DR_STEP (res) = NULL;
4574   STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4575 
4576   if (gimple_code (stmt) == GIMPLE_PHI
4577       && is_loop_header_bb_p (gimple_bb (stmt)))
4578     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4579   else
4580     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4581 
4582   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4583   STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4584   STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4585   STMT_SLP_TYPE (res) = loop_vect;
4586   DR_GROUP_FIRST_DR (res) = NULL;
4587   DR_GROUP_NEXT_DR (res) = NULL;
4588   DR_GROUP_SIZE (res) = 0;
4589   DR_GROUP_STORE_COUNT (res) = 0;
4590   DR_GROUP_GAP (res) = 0;
4591   DR_GROUP_SAME_DR_STMT (res) = NULL;
4592   DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4593 
4594   return res;
4595 }
4596 
4597 
4598 /* Create a hash table for stmt_vec_info. */
4599 
4600 void
4601 init_stmt_vec_info_vec (void)
4602 {
4603   gcc_assert (!stmt_vec_info_vec);
4604   stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4605 }
4606 
4607 
4608 /* Free hash table for stmt_vec_info. */
4609 
4610 void
4611 free_stmt_vec_info_vec (void)
4612 {
4613   gcc_assert (stmt_vec_info_vec);
4614   VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4615 }
4616 
4617 
4618 /* Free stmt vectorization related info.  */
4619 
4620 void
4621 free_stmt_vec_info (gimple stmt)
4622 {
4623   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4624 
4625   if (!stmt_info)
4626     return;
4627 
4628   VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4629   set_vinfo_for_stmt (stmt, NULL);
4630   free (stmt_info);
4631 }
4632 
4633 
4634 /* Function get_vectype_for_scalar_type.
4635 
4636    Returns the vector type corresponding to SCALAR_TYPE as supported
4637    by the target.  */
4638 
4639 tree
4640 get_vectype_for_scalar_type (tree scalar_type)
4641 {
4642   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4643   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
4644   int nunits;
4645   tree vectype;
4646 
4647   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4648     return NULL_TREE;
4649 
4650   /* We can't build a vector type of elements with alignment bigger than
4651      their size.  */
4652   if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
4653     return NULL_TREE;
4654 
4655   /* If we'd build a vector type of elements whose mode precision doesn't
4656      match their types precision we'll get mismatched types on vector
4657      extracts via BIT_FIELD_REFs.  This effectively means we disable
4658      vectorization of bool and/or enum types in some languages.  */
4659   if (INTEGRAL_TYPE_P (scalar_type)
4660       && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
4661     return NULL_TREE;
4662 
4663   /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4664      is expected.  */
4665   nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4666 
4667   vectype = build_vector_type (scalar_type, nunits);
4668   if (vect_print_dump_info (REPORT_DETAILS))
4669     {
4670       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4671       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4672     }
4673 
4674   if (!vectype)
4675     return NULL_TREE;
4676 
4677   if (vect_print_dump_info (REPORT_DETAILS))
4678     {
4679       fprintf (vect_dump, "vectype: ");
4680       print_generic_expr (vect_dump, vectype, TDF_SLIM);
4681     }
4682 
4683   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4684       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4685     {
4686       if (vect_print_dump_info (REPORT_DETAILS))
4687         fprintf (vect_dump, "mode not supported by target.");
4688       return NULL_TREE;
4689     }
4690 
4691   return vectype;
4692 }
4693 
4694 /* Function vect_is_simple_use.
4695 
4696    Input:
4697    LOOP_VINFO - the vect info of the loop that is being vectorized.
4698    BB_VINFO - the vect info of the basic block that is being vectorized.
4699    OPERAND - operand of a stmt in the loop or bb.
4700    DEF - the defining stmt in case OPERAND is an SSA_NAME.
4701 
4702    Returns whether a stmt with OPERAND can be vectorized.
4703    For loops, supportable operands are constants, loop invariants, and operands
4704    that are defined by the current iteration of the loop. Unsupportable
4705    operands are those that are defined by a previous iteration of the loop (as
4706    is the case in reduction/induction computations).
4707    For basic blocks, supportable operands are constants and bb invariants.
4708    For now, operands defined outside the basic block are not supported.  */
4709 
4710 bool
4711 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
4712                     bb_vec_info bb_vinfo, gimple *def_stmt,
4713 		    tree *def, enum vect_def_type *dt)
4714 {
4715   basic_block bb;
4716   stmt_vec_info stmt_vinfo;
4717   struct loop *loop = NULL;
4718 
4719   if (loop_vinfo)
4720     loop = LOOP_VINFO_LOOP (loop_vinfo);
4721 
4722   *def_stmt = NULL;
4723   *def = NULL_TREE;
4724 
4725   if (vect_print_dump_info (REPORT_DETAILS))
4726     {
4727       fprintf (vect_dump, "vect_is_simple_use: operand ");
4728       print_generic_expr (vect_dump, operand, TDF_SLIM);
4729     }
4730 
4731   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4732     {
4733       *dt = vect_constant_def;
4734       return true;
4735     }
4736 
4737   if (is_gimple_min_invariant (operand))
4738     {
4739       *def = operand;
4740       *dt = vect_external_def;
4741       return true;
4742     }
4743 
4744   if (TREE_CODE (operand) == PAREN_EXPR)
4745     {
4746       if (vect_print_dump_info (REPORT_DETAILS))
4747         fprintf (vect_dump, "non-associatable copy.");
4748       operand = TREE_OPERAND (operand, 0);
4749     }
4750 
4751   if (TREE_CODE (operand) != SSA_NAME)
4752     {
4753       if (vect_print_dump_info (REPORT_DETAILS))
4754         fprintf (vect_dump, "not ssa-name.");
4755       return false;
4756     }
4757 
4758   *def_stmt = SSA_NAME_DEF_STMT (operand);
4759   if (*def_stmt == NULL)
4760     {
4761       if (vect_print_dump_info (REPORT_DETAILS))
4762         fprintf (vect_dump, "no def_stmt.");
4763       return false;
4764     }
4765 
4766   if (vect_print_dump_info (REPORT_DETAILS))
4767     {
4768       fprintf (vect_dump, "def_stmt: ");
4769       print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4770     }
4771 
4772   /* Empty stmt is expected only in case of a function argument.
4773      (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
4774   if (gimple_nop_p (*def_stmt))
4775     {
4776       *def = operand;
4777       *dt = vect_external_def;
4778       return true;
4779     }
4780 
4781   bb = gimple_bb (*def_stmt);
4782 
4783   if ((loop && !flow_bb_inside_loop_p (loop, bb))
4784       || (!loop && bb != BB_VINFO_BB (bb_vinfo))
4785       || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
4786     *dt = vect_external_def;
4787   else
4788     {
4789       stmt_vinfo = vinfo_for_stmt (*def_stmt);
4790       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4791     }
4792 
4793   if (*dt == vect_unknown_def_type)
4794     {
4795       if (vect_print_dump_info (REPORT_DETAILS))
4796         fprintf (vect_dump, "Unsupported pattern.");
4797       return false;
4798     }
4799 
4800   if (vect_print_dump_info (REPORT_DETAILS))
4801     fprintf (vect_dump, "type of def: %d.",*dt);
4802 
4803   switch (gimple_code (*def_stmt))
4804     {
4805     case GIMPLE_PHI:
4806       *def = gimple_phi_result (*def_stmt);
4807       break;
4808 
4809     case GIMPLE_ASSIGN:
4810       *def = gimple_assign_lhs (*def_stmt);
4811       break;
4812 
4813     case GIMPLE_CALL:
4814       *def = gimple_call_lhs (*def_stmt);
4815       if (*def != NULL)
4816 	break;
4817       /* FALLTHRU */
4818     default:
4819       if (vect_print_dump_info (REPORT_DETAILS))
4820         fprintf (vect_dump, "unsupported defining stmt: ");
4821       return false;
4822     }
4823 
4824   return true;
4825 }
4826 
4827 
4828 /* Function supportable_widening_operation
4829 
4830    Check whether an operation represented by the code CODE is a
4831    widening operation that is supported by the target platform in
4832    vector form (i.e., when operating on arguments of type VECTYPE).
4833 
4834    Widening operations we currently support are NOP (CONVERT), FLOAT
4835    and WIDEN_MULT.  This function checks if these operations are supported
4836    by the target platform either directly (via vector tree-codes), or via
4837    target builtins.
4838 
4839    Output:
4840    - CODE1 and CODE2 are codes of vector operations to be used when
4841    vectorizing the operation, if available.
4842    - DECL1 and DECL2 are decls of target builtin functions to be used
4843    when vectorizing the operation, if available. In this case,
4844    CODE1 and CODE2 are CALL_EXPR.
4845    - MULTI_STEP_CVT determines the number of required intermediate steps in
4846    case of multi-step conversion (like char->short->int - in that case
4847    MULTI_STEP_CVT will be 1).
4848    - INTERM_TYPES contains the intermediate type required to perform the
4849    widening operation (short in the above example).  */
4850 
4851 bool
4852 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4853                                 tree *decl1, tree *decl2,
4854                                 enum tree_code *code1, enum tree_code *code2,
4855                                 int *multi_step_cvt,
4856                                 VEC (tree, heap) **interm_types)
4857 {
4858   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4859   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4860   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4861   bool ordered_p;
4862   enum machine_mode vec_mode;
4863   enum insn_code icode1, icode2;
4864   optab optab1, optab2;
4865   tree type = gimple_expr_type (stmt);
4866   tree wide_vectype = get_vectype_for_scalar_type (type);
4867   enum tree_code c1, c2;
4868 
4869   /* The result of a vectorized widening operation usually requires two vectors
4870      (because the widened results do not fit int one vector). The generated
4871      vector results would normally be expected to be generated in the same
4872      order as in the original scalar computation, i.e. if 8 results are
4873      generated in each vector iteration, they are to be organized as follows:
4874         vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4875 
4876      However, in the special case that the result of the widening operation is
4877      used in a reduction computation only, the order doesn't matter (because
4878      when vectorizing a reduction we change the order of the computation).
4879      Some targets can take advantage of this and generate more efficient code.
4880      For example, targets like Altivec, that support widen_mult using a sequence
4881      of {mult_even,mult_odd} generate the following vectors:
4882         vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4883 
4884      When vectorizing outer-loops, we execute the inner-loop sequentially
4885      (each vectorized inner-loop iteration contributes to VF outer-loop
4886      iterations in parallel). We therefore don't allow to change the order
4887      of the computation in the inner-loop during outer-loop vectorization.  */
4888 
4889    if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4890        && !nested_in_vect_loop_p (vect_loop, stmt))
4891      ordered_p = false;
4892    else
4893      ordered_p = true;
4894 
4895   if (!ordered_p
4896       && code == WIDEN_MULT_EXPR
4897       && targetm.vectorize.builtin_mul_widen_even
4898       && targetm.vectorize.builtin_mul_widen_even (vectype)
4899       && targetm.vectorize.builtin_mul_widen_odd
4900       && targetm.vectorize.builtin_mul_widen_odd (vectype))
4901     {
4902       if (vect_print_dump_info (REPORT_DETAILS))
4903         fprintf (vect_dump, "Unordered widening operation detected.");
4904 
4905       *code1 = *code2 = CALL_EXPR;
4906       *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4907       *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4908       return true;
4909     }
4910 
4911   switch (code)
4912     {
4913     case WIDEN_MULT_EXPR:
4914       if (BYTES_BIG_ENDIAN)
4915         {
4916           c1 = VEC_WIDEN_MULT_HI_EXPR;
4917           c2 = VEC_WIDEN_MULT_LO_EXPR;
4918         }
4919       else
4920         {
4921           c2 = VEC_WIDEN_MULT_HI_EXPR;
4922           c1 = VEC_WIDEN_MULT_LO_EXPR;
4923         }
4924       break;
4925 
4926     CASE_CONVERT:
4927       if (BYTES_BIG_ENDIAN)
4928         {
4929           c1 = VEC_UNPACK_HI_EXPR;
4930           c2 = VEC_UNPACK_LO_EXPR;
4931         }
4932       else
4933         {
4934           c2 = VEC_UNPACK_HI_EXPR;
4935           c1 = VEC_UNPACK_LO_EXPR;
4936         }
4937       break;
4938 
4939     case FLOAT_EXPR:
4940       if (BYTES_BIG_ENDIAN)
4941         {
4942           c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4943           c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4944         }
4945       else
4946         {
4947           c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4948           c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4949         }
4950       break;
4951 
4952     case FIX_TRUNC_EXPR:
4953       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4954 	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4955 	 computing the operation.  */
4956       return false;
4957 
4958     default:
4959       gcc_unreachable ();
4960     }
4961 
4962   if (code == FIX_TRUNC_EXPR)
4963     {
4964       /* The signedness is determined from output operand.  */
4965       optab1 = optab_for_tree_code (c1, type, optab_default);
4966       optab2 = optab_for_tree_code (c2, type, optab_default);
4967     }
4968   else
4969     {
4970       optab1 = optab_for_tree_code (c1, vectype, optab_default);
4971       optab2 = optab_for_tree_code (c2, vectype, optab_default);
4972     }
4973 
4974   if (!optab1 || !optab2)
4975     return false;
4976 
4977   vec_mode = TYPE_MODE (vectype);
4978   if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4979        || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4980                                                        == CODE_FOR_nothing)
4981     return false;
4982 
4983   /* Check if it's a multi-step conversion that can be done using intermediate
4984      types.  */
4985   if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4986        || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4987     {
4988       int i;
4989       tree prev_type = vectype, intermediate_type;
4990       enum machine_mode intermediate_mode, prev_mode = vec_mode;
4991       optab optab3, optab4;
4992 
4993       if (!CONVERT_EXPR_CODE_P (code))
4994         return false;
4995 
4996       *code1 = c1;
4997       *code2 = c2;
4998 
4999       /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5000          intermediate  steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
5001          to get to NARROW_VECTYPE, and fail if we do not.  */
5002       *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5003       for (i = 0; i < 3; i++)
5004         {
5005           intermediate_mode = insn_data[icode1].operand[0].mode;
5006           intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5007                                                      TYPE_UNSIGNED (prev_type));
5008           optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5009           optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5010 
5011           if (!optab3 || !optab4
5012               || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
5013                                                         == CODE_FOR_nothing
5014               || insn_data[icode1].operand[0].mode != intermediate_mode
5015               || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
5016                                                         == CODE_FOR_nothing
5017               || insn_data[icode2].operand[0].mode != intermediate_mode
5018               || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
5019                                                         == CODE_FOR_nothing
5020               || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
5021                                                         == CODE_FOR_nothing)
5022             return false;
5023 
5024           VEC_quick_push (tree, *interm_types, intermediate_type);
5025           (*multi_step_cvt)++;
5026 
5027           if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5028               && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5029             return true;
5030 
5031           prev_type = intermediate_type;
5032           prev_mode = intermediate_mode;
5033         }
5034 
5035        return false;
5036     }
5037 
5038   *code1 = c1;
5039   *code2 = c2;
5040   return true;
5041 }
5042 
5043 
5044 /* Function supportable_narrowing_operation
5045 
5046    Check whether an operation represented by the code CODE is a
5047    narrowing operation that is supported by the target platform in
5048    vector form (i.e., when operating on arguments of type VECTYPE).
5049 
5050    Narrowing operations we currently support are NOP (CONVERT) and
5051    FIX_TRUNC. This function checks if these operations are supported by
5052    the target platform directly via vector tree-codes.
5053 
5054    Output:
5055    - CODE1 is the code of a vector operation to be used when
5056    vectorizing the operation, if available.
5057    - MULTI_STEP_CVT determines the number of required intermediate steps in
5058    case of multi-step conversion (like int->short->char - in that case
5059    MULTI_STEP_CVT will be 1).
5060    - INTERM_TYPES contains the intermediate type required to perform the
5061    narrowing operation (short in the above example).   */
5062 
5063 bool
5064 supportable_narrowing_operation (enum tree_code code,
5065 				 const_gimple stmt, tree vectype,
5066 				 enum tree_code *code1, int *multi_step_cvt,
5067                                  VEC (tree, heap) **interm_types)
5068 {
5069   enum machine_mode vec_mode;
5070   enum insn_code icode1;
5071   optab optab1, interm_optab;
5072   tree type = gimple_expr_type (stmt);
5073   tree narrow_vectype = get_vectype_for_scalar_type (type);
5074   enum tree_code c1;
5075   tree intermediate_type, prev_type;
5076   int i;
5077 
5078   switch (code)
5079     {
5080     CASE_CONVERT:
5081       c1 = VEC_PACK_TRUNC_EXPR;
5082       break;
5083 
5084     case FIX_TRUNC_EXPR:
5085       c1 = VEC_PACK_FIX_TRUNC_EXPR;
5086       break;
5087 
5088     case FLOAT_EXPR:
5089       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5090 	 tree code and optabs used for computing the operation.  */
5091       return false;
5092 
5093     default:
5094       gcc_unreachable ();
5095     }
5096 
5097   if (code == FIX_TRUNC_EXPR)
5098     /* The signedness is determined from output operand.  */
5099     optab1 = optab_for_tree_code (c1, type, optab_default);
5100   else
5101     optab1 = optab_for_tree_code (c1, vectype, optab_default);
5102 
5103   if (!optab1)
5104     return false;
5105 
5106   vec_mode = TYPE_MODE (vectype);
5107   if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
5108        == CODE_FOR_nothing)
5109     return false;
5110 
5111   /* Check if it's a multi-step conversion that can be done using intermediate
5112      types.  */
5113   if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5114     {
5115       enum machine_mode intermediate_mode, prev_mode = vec_mode;
5116 
5117       *code1 = c1;
5118       prev_type = vectype;
5119       /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5120          intermediate  steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
5121          to get to NARROW_VECTYPE, and fail if we do not.  */
5122       *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5123       for (i = 0; i < 3; i++)
5124         {
5125           intermediate_mode = insn_data[icode1].operand[0].mode;
5126           intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5127                                                      TYPE_UNSIGNED (prev_type));
5128           interm_optab = optab_for_tree_code (c1, intermediate_type,
5129                                               optab_default);
5130           if (!interm_optab
5131               || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
5132                                                         == CODE_FOR_nothing
5133               || insn_data[icode1].operand[0].mode != intermediate_mode
5134               || (icode1
5135                   = interm_optab->handlers[(int) intermediate_mode].insn_code)
5136                  == CODE_FOR_nothing)
5137             return false;
5138 
5139           VEC_quick_push (tree, *interm_types, intermediate_type);
5140           (*multi_step_cvt)++;
5141 
5142           if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5143             return true;
5144 
5145           prev_type = intermediate_type;
5146           prev_mode = intermediate_mode;
5147         }
5148 
5149       return false;
5150     }
5151 
5152   *code1 = c1;
5153   return true;
5154 }
5155