xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-stmts.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "tree-eh.h"
50 #include "gimple-expr.h"
51 #include "is-a.h"
52 #include "gimple.h"
53 #include "gimplify.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
57 #include "tree-cfg.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
63 #include "cfgloop.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
66 #include "hashtab.h"
67 #include "rtl.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "recog.h"		/* FIXME: for insn_data */
82 #include "insn-codes.h"
83 #include "optabs.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
86 #include "hash-map.h"
87 #include "plugin-api.h"
88 #include "ipa-ref.h"
89 #include "cgraph.h"
90 #include "builtins.h"
91 
92 /* For lang_hooks.types.type_for_mode.  */
93 #include "langhooks.h"
94 
95 /* Return the vectorized type for the given statement.  */
96 
97 tree
98 stmt_vectype (struct _stmt_vec_info *stmt_info)
99 {
100   return STMT_VINFO_VECTYPE (stmt_info);
101 }
102 
103 /* Return TRUE iff the given statement is in an inner loop relative to
104    the loop being vectorized.  */
105 bool
106 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
107 {
108   gimple stmt = STMT_VINFO_STMT (stmt_info);
109   basic_block bb = gimple_bb (stmt);
110   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111   struct loop* loop;
112 
113   if (!loop_vinfo)
114     return false;
115 
116   loop = LOOP_VINFO_LOOP (loop_vinfo);
117 
118   return (bb->loop_father == loop->inner);
119 }
120 
121 /* Record the cost of a statement, either by directly informing the
122    target model or by saving it in a vector for later processing.
123    Return a preliminary estimate of the statement's cost.  */
124 
125 unsigned
126 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
128 		  int misalign, enum vect_cost_model_location where)
129 {
130   if (body_cost_vec)
131     {
132       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133       add_stmt_info_to_vec (body_cost_vec, count, kind,
134 			    stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135 			    misalign);
136       return (unsigned)
137 	(builtin_vectorization_cost (kind, vectype, misalign) * count);
138 
139     }
140   else
141     {
142       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143       bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144       void *target_cost_data;
145 
146       if (loop_vinfo)
147 	target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148       else
149 	target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
150 
151       return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152 			    misalign, where);
153     }
154 }
155 
156 /* Return a variable of type ELEM_TYPE[NELEMS].  */
157 
158 static tree
159 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
160 {
161   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162 			 "vect_array");
163 }
164 
165 /* ARRAY is an array of vectors created by create_vector_array.
166    Return an SSA_NAME for the vector in index N.  The reference
167    is part of the vectorization of STMT and the vector is associated
168    with scalar destination SCALAR_DEST.  */
169 
170 static tree
171 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172 		   tree array, unsigned HOST_WIDE_INT n)
173 {
174   tree vect_type, vect, vect_name, array_ref;
175   gimple new_stmt;
176 
177   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178   vect_type = TREE_TYPE (TREE_TYPE (array));
179   vect = vect_create_destination_var (scalar_dest, vect_type);
180   array_ref = build4 (ARRAY_REF, vect_type, array,
181 		      build_int_cst (size_type_node, n),
182 		      NULL_TREE, NULL_TREE);
183 
184   new_stmt = gimple_build_assign (vect, array_ref);
185   vect_name = make_ssa_name (vect, new_stmt);
186   gimple_assign_set_lhs (new_stmt, vect_name);
187   vect_finish_stmt_generation (stmt, new_stmt, gsi);
188 
189   return vect_name;
190 }
191 
192 /* ARRAY is an array of vectors created by create_vector_array.
193    Emit code to store SSA_NAME VECT in index N of the array.
194    The store is part of the vectorization of STMT.  */
195 
196 static void
197 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198 		    tree array, unsigned HOST_WIDE_INT n)
199 {
200   tree array_ref;
201   gimple new_stmt;
202 
203   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204 		      build_int_cst (size_type_node, n),
205 		      NULL_TREE, NULL_TREE);
206 
207   new_stmt = gimple_build_assign (array_ref, vect);
208   vect_finish_stmt_generation (stmt, new_stmt, gsi);
209 }
210 
211 /* PTR is a pointer to an array of type TYPE.  Return a representation
212    of *PTR.  The memory reference replaces those in FIRST_DR
213    (and its group).  */
214 
215 static tree
216 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
217 {
218   tree mem_ref, alias_ptr_type;
219 
220   alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222   /* Arrays have the same alignment as their type.  */
223   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
224   return mem_ref;
225 }
226 
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
228 
229 /* Function vect_mark_relevant.
230 
231    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
232 
233 static void
234 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
235 		    enum vect_relevant relevant, bool live_p,
236 		    bool used_in_pattern)
237 {
238   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241   gimple pattern_stmt;
242 
243   if (dump_enabled_p ())
244     dump_printf_loc (MSG_NOTE, vect_location,
245                      "mark relevant %d, live %d.\n", relevant, live_p);
246 
247   /* If this stmt is an original stmt in a pattern, we might need to mark its
248      related pattern stmt instead of the original stmt.  However, such stmts
249      may have their own uses that are not in any pattern, in such cases the
250      stmt itself should be marked.  */
251   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252     {
253       bool found = false;
254       if (!used_in_pattern)
255         {
256           imm_use_iterator imm_iter;
257           use_operand_p use_p;
258           gimple use_stmt;
259           tree lhs;
260 	  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261 	  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
262 
263           if (is_gimple_assign (stmt))
264             lhs = gimple_assign_lhs (stmt);
265           else
266             lhs = gimple_call_lhs (stmt);
267 
268           /* This use is out of pattern use, if LHS has other uses that are
269              pattern uses, we should mark the stmt itself, and not the pattern
270              stmt.  */
271 	  if (lhs && TREE_CODE (lhs) == SSA_NAME)
272 	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
273 	      {
274 		if (is_gimple_debug (USE_STMT (use_p)))
275 		  continue;
276 		use_stmt = USE_STMT (use_p);
277 
278 		if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279 		  continue;
280 
281 		if (vinfo_for_stmt (use_stmt)
282 		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
283 		  {
284 		    found = true;
285 		    break;
286 		  }
287 	      }
288         }
289 
290       if (!found)
291         {
292           /* This is the last stmt in a sequence that was detected as a
293              pattern that can potentially be vectorized.  Don't mark the stmt
294              as relevant/live because it's not going to be vectorized.
295              Instead mark the pattern-stmt that replaces it.  */
296 
297           pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
298 
299           if (dump_enabled_p ())
300             dump_printf_loc (MSG_NOTE, vect_location,
301                              "last stmt in pattern. don't mark"
302                              " relevant/live.\n");
303           stmt_info = vinfo_for_stmt (pattern_stmt);
304           gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305           save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306           save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307           stmt = pattern_stmt;
308         }
309     }
310 
311   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313     STMT_VINFO_RELEVANT (stmt_info) = relevant;
314 
315   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
317     {
318       if (dump_enabled_p ())
319         dump_printf_loc (MSG_NOTE, vect_location,
320                          "already marked relevant/live.\n");
321       return;
322     }
323 
324   worklist->safe_push (stmt);
325 }
326 
327 
328 /* Function vect_stmt_relevant_p.
329 
330    Return true if STMT in loop that is represented by LOOP_VINFO is
331    "relevant for vectorization".
332 
333    A stmt is considered "relevant for vectorization" if:
334    - it has uses outside the loop.
335    - it has vdefs (it alters memory).
336    - control stmts in the loop (except for the exit condition).
337 
338    CHECKME: what other side effects would the vectorizer allow?  */
339 
340 static bool
341 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342 		      enum vect_relevant *relevant, bool *live_p)
343 {
344   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345   ssa_op_iter op_iter;
346   imm_use_iterator imm_iter;
347   use_operand_p use_p;
348   def_operand_p def_p;
349 
350   *relevant = vect_unused_in_scope;
351   *live_p = false;
352 
353   /* cond stmt other than loop exit cond.  */
354   if (is_ctrl_stmt (stmt)
355       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356          != loop_exit_ctrl_vec_info_type)
357     *relevant = vect_used_in_scope;
358 
359   /* changing memory.  */
360   if (gimple_code (stmt) != GIMPLE_PHI)
361     if (gimple_vdef (stmt)
362 	&& !gimple_clobber_p (stmt))
363       {
364 	if (dump_enabled_p ())
365 	  dump_printf_loc (MSG_NOTE, vect_location,
366                            "vec_stmt_relevant_p: stmt has vdefs.\n");
367 	*relevant = vect_used_in_scope;
368       }
369 
370   /* uses outside the loop.  */
371   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
372     {
373       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
374 	{
375 	  basic_block bb = gimple_bb (USE_STMT (use_p));
376 	  if (!flow_bb_inside_loop_p (loop, bb))
377 	    {
378 	      if (dump_enabled_p ())
379 		dump_printf_loc (MSG_NOTE, vect_location,
380                                  "vec_stmt_relevant_p: used out of loop.\n");
381 
382 	      if (is_gimple_debug (USE_STMT (use_p)))
383 		continue;
384 
385 	      /* We expect all such uses to be in the loop exit phis
386 		 (because of loop closed form)   */
387 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388 	      gcc_assert (bb == single_exit (loop)->dest);
389 
390               *live_p = true;
391 	    }
392 	}
393     }
394 
395   return (*live_p || *relevant);
396 }
397 
398 
399 /* Function exist_non_indexing_operands_for_use_p
400 
401    USE is one of the uses attached to STMT.  Check if USE is
402    used in STMT for anything other than indexing an array.  */
403 
404 static bool
405 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
406 {
407   tree operand;
408   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
409 
410   /* USE corresponds to some operand in STMT.  If there is no data
411      reference in STMT, then any operand that corresponds to USE
412      is not indexing an array.  */
413   if (!STMT_VINFO_DATA_REF (stmt_info))
414     return true;
415 
416   /* STMT has a data_ref. FORNOW this means that its of one of
417      the following forms:
418      -1- ARRAY_REF = var
419      -2- var = ARRAY_REF
420      (This should have been verified in analyze_data_refs).
421 
422      'var' in the second case corresponds to a def, not a use,
423      so USE cannot correspond to any operands that are not used
424      for array indexing.
425 
426      Therefore, all we need to check is if STMT falls into the
427      first case, and whether var corresponds to USE.  */
428 
429   if (!gimple_assign_copy_p (stmt))
430     {
431       if (is_gimple_call (stmt)
432 	  && gimple_call_internal_p (stmt))
433 	switch (gimple_call_internal_fn (stmt))
434 	  {
435 	  case IFN_MASK_STORE:
436 	    operand = gimple_call_arg (stmt, 3);
437 	    if (operand == use)
438 	      return true;
439 	    /* FALLTHRU */
440 	  case IFN_MASK_LOAD:
441 	    operand = gimple_call_arg (stmt, 2);
442 	    if (operand == use)
443 	      return true;
444 	    break;
445 	  default:
446 	    break;
447 	  }
448       return false;
449     }
450 
451   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452     return false;
453   operand = gimple_assign_rhs1 (stmt);
454   if (TREE_CODE (operand) != SSA_NAME)
455     return false;
456 
457   if (operand == use)
458     return true;
459 
460   return false;
461 }
462 
463 
464 /*
465    Function process_use.
466 
467    Inputs:
468    - a USE in STMT in a loop represented by LOOP_VINFO
469    - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470      that defined USE.  This is done by calling mark_relevant and passing it
471      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473      be performed.
474 
475    Outputs:
476    Generally, LIVE_P and RELEVANT are used to define the liveness and
477    relevance info of the DEF_STMT of this USE:
478        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480    Exceptions:
481    - case 1: If USE is used only for address computations (e.g. array indexing),
482    which does not need to be directly vectorized, then the liveness/relevance
483    of the respective DEF_STMT is left unchanged.
484    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485    skip DEF_STMT cause it had already been processed.
486    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
487    be modified accordingly.
488 
489    Return true if everything is as expected. Return false otherwise.  */
490 
491 static bool
492 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
493 	     enum vect_relevant relevant, vec<gimple> *worklist,
494 	     bool force)
495 {
496   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498   stmt_vec_info dstmt_vinfo;
499   basic_block bb, def_bb;
500   tree def;
501   gimple def_stmt;
502   enum vect_def_type dt;
503 
504   /* case 1: we are only interested in uses that need to be vectorized.  Uses
505      that are used for address computation are not considered relevant.  */
506   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
507      return true;
508 
509   if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
510     {
511       if (dump_enabled_p ())
512         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513                          "not vectorized: unsupported use in stmt.\n");
514       return false;
515     }
516 
517   if (!def_stmt || gimple_nop_p (def_stmt))
518     return true;
519 
520   def_bb = gimple_bb (def_stmt);
521   if (!flow_bb_inside_loop_p (loop, def_bb))
522     {
523       if (dump_enabled_p ())
524 	dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
525       return true;
526     }
527 
528   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529      DEF_STMT must have already been processed, because this should be the
530      only way that STMT, which is a reduction-phi, was put in the worklist,
531      as there should be no other uses for DEF_STMT in the loop.  So we just
532      check that everything is as expected, and we are done.  */
533   dstmt_vinfo = vinfo_for_stmt (def_stmt);
534   bb = gimple_bb (stmt);
535   if (gimple_code (stmt) == GIMPLE_PHI
536       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537       && gimple_code (def_stmt) != GIMPLE_PHI
538       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539       && bb->loop_father == def_bb->loop_father)
540     {
541       if (dump_enabled_p ())
542 	dump_printf_loc (MSG_NOTE, vect_location,
543                          "reduc-stmt defining reduc-phi in the same nest.\n");
544       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
547       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
548 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
549       return true;
550     }
551 
552   /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 	outer-loop-header-bb:
554 		d = def_stmt
555 	inner-loop:
556 		stmt # use (d)
557 	outer-loop-tail-bb:
558 		...		  */
559   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
560     {
561       if (dump_enabled_p ())
562 	dump_printf_loc (MSG_NOTE, vect_location,
563                          "outer-loop def-stmt defining inner-loop stmt.\n");
564 
565       switch (relevant)
566 	{
567 	case vect_unused_in_scope:
568 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569 		      vect_used_in_scope : vect_unused_in_scope;
570 	  break;
571 
572 	case vect_used_in_outer_by_reduction:
573           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
574 	  relevant = vect_used_by_reduction;
575 	  break;
576 
577 	case vect_used_in_outer:
578           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
579 	  relevant = vect_used_in_scope;
580 	  break;
581 
582 	case vect_used_in_scope:
583 	  break;
584 
585 	default:
586 	  gcc_unreachable ();
587 	}
588     }
589 
590   /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 	outer-loop-header-bb:
592 		...
593 	inner-loop:
594 		d = def_stmt
595 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
596 		stmt # use (d)		*/
597   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
598     {
599       if (dump_enabled_p ())
600 	dump_printf_loc (MSG_NOTE, vect_location,
601                          "inner-loop def-stmt defining outer-loop stmt.\n");
602 
603       switch (relevant)
604         {
605         case vect_unused_in_scope:
606           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
607             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
608                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
609           break;
610 
611         case vect_used_by_reduction:
612           relevant = vect_used_in_outer_by_reduction;
613           break;
614 
615         case vect_used_in_scope:
616           relevant = vect_used_in_outer;
617           break;
618 
619         default:
620           gcc_unreachable ();
621         }
622     }
623 
624   vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625                       is_pattern_stmt_p (stmt_vinfo));
626   return true;
627 }
628 
629 
630 /* Function vect_mark_stmts_to_be_vectorized.
631 
632    Not all stmts in the loop need to be vectorized. For example:
633 
634      for i...
635        for j...
636    1.    T0 = i + j
637    2.	 T1 = a[T0]
638 
639    3.    j = j + 1
640 
641    Stmt 1 and 3 do not need to be vectorized, because loop control and
642    addressing of vectorized data-refs are handled differently.
643 
644    This pass detects such stmts.  */
645 
646 bool
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
648 {
649   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651   unsigned int nbbs = loop->num_nodes;
652   gimple_stmt_iterator si;
653   gimple stmt;
654   unsigned int i;
655   stmt_vec_info stmt_vinfo;
656   basic_block bb;
657   gimple phi;
658   bool live_p;
659   enum vect_relevant relevant, tmp_relevant;
660   enum vect_def_type def_type;
661 
662   if (dump_enabled_p ())
663     dump_printf_loc (MSG_NOTE, vect_location,
664                      "=== vect_mark_stmts_to_be_vectorized ===\n");
665 
666   auto_vec<gimple, 64> worklist;
667 
668   /* 1. Init worklist.  */
669   for (i = 0; i < nbbs; i++)
670     {
671       bb = bbs[i];
672       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
673 	{
674 	  phi = gsi_stmt (si);
675 	  if (dump_enabled_p ())
676 	    {
677 	      dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
679 	    }
680 
681 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
682 	    vect_mark_relevant (&worklist, phi, relevant, live_p, false);
683 	}
684       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
685 	{
686 	  stmt = gsi_stmt (si);
687 	  if (dump_enabled_p ())
688 	    {
689 	      dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690 	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
691 	    }
692 
693 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
694             vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
695 	}
696     }
697 
698   /* 2. Process_worklist */
699   while (worklist.length () > 0)
700     {
701       use_operand_p use_p;
702       ssa_op_iter iter;
703 
704       stmt = worklist.pop ();
705       if (dump_enabled_p ())
706 	{
707           dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
709 	}
710 
711       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 	 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 	 liveness and relevance properties of STMT.  */
714       stmt_vinfo = vinfo_for_stmt (stmt);
715       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
717 
718       /* Generally, the liveness and relevance properties of STMT are
719 	 propagated as is to the DEF_STMTs of its USEs:
720 	  live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 	  relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
722 
723 	 One exception is when STMT has been identified as defining a reduction
724 	 variable; in this case we set the liveness/relevance as follows:
725 	   live_p = false
726 	   relevant = vect_used_by_reduction
727 	 This is because we distinguish between two kinds of relevant stmts -
728 	 those that are used by a reduction computation, and those that are
729 	 (also) used by a regular computation.  This allows us later on to
730 	 identify stmts that are used solely by a reduction, and therefore the
731 	 order of the results that they produce does not have to be kept.  */
732 
733       def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734       tmp_relevant = relevant;
735       switch (def_type)
736         {
737           case vect_reduction_def:
738 	    switch (tmp_relevant)
739 	      {
740 	        case vect_unused_in_scope:
741 	          relevant = vect_used_by_reduction;
742 	          break;
743 
744 	        case vect_used_by_reduction:
745 	          if (gimple_code (stmt) == GIMPLE_PHI)
746                     break;
747   	          /* fall through */
748 
749 	        default:
750 	          if (dump_enabled_p ())
751 	            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752                                      "unsupported use of reduction.\n");
753 	          return false;
754 	      }
755 
756 	    live_p = false;
757 	    break;
758 
759           case vect_nested_cycle:
760             if (tmp_relevant != vect_unused_in_scope
761                 && tmp_relevant != vect_used_in_outer_by_reduction
762                 && tmp_relevant != vect_used_in_outer)
763               {
764                 if (dump_enabled_p ())
765                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766                                    "unsupported use of nested cycle.\n");
767 
768                 return false;
769               }
770 
771             live_p = false;
772             break;
773 
774           case vect_double_reduction_def:
775             if (tmp_relevant != vect_unused_in_scope
776                 && tmp_relevant != vect_used_by_reduction)
777               {
778                 if (dump_enabled_p ())
779                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780                                    "unsupported use of double reduction.\n");
781 
782                 return false;
783               }
784 
785             live_p = false;
786             break;
787 
788           default:
789             break;
790         }
791 
792       if (is_pattern_stmt_p (stmt_vinfo))
793         {
794           /* Pattern statements are not inserted into the code, so
795              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796              have to scan the RHS or function arguments instead.  */
797           if (is_gimple_assign (stmt))
798             {
799 	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800 	      tree op = gimple_assign_rhs1 (stmt);
801 
802 	      i = 1;
803 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
804 		{
805 		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
806 				    live_p, relevant, &worklist, false)
807 		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
808 				       live_p, relevant, &worklist, false))
809 		    return false;
810 		  i = 2;
811 		}
812 	      for (; i < gimple_num_ops (stmt); i++)
813                 {
814 		  op = gimple_op (stmt, i);
815                   if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
816 				    &worklist, false))
817                     return false;
818                  }
819             }
820           else if (is_gimple_call (stmt))
821             {
822               for (i = 0; i < gimple_call_num_args (stmt); i++)
823                 {
824                   tree arg = gimple_call_arg (stmt, i);
825                   if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
826 				    &worklist, false))
827                     return false;
828                 }
829             }
830         }
831       else
832         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
833           {
834             tree op = USE_FROM_PTR (use_p);
835             if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
836 			      &worklist, false))
837               return false;
838           }
839 
840       if (STMT_VINFO_GATHER_P (stmt_vinfo))
841 	{
842 	  tree off;
843 	  tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
844 	  gcc_assert (decl);
845 	  if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
846 			    &worklist, true))
847 	    return false;
848 	}
849     } /* while worklist */
850 
851   return true;
852 }
853 
854 
855 /* Function vect_model_simple_cost.
856 
857    Models cost for simple operations, i.e. those that only emit ncopies of a
858    single op.  Right now, this does not account for multiple insns that could
859    be generated for the single vector op.  We will handle that shortly.  */
860 
861 void
862 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
863 			enum vect_def_type *dt,
864 			stmt_vector_for_cost *prologue_cost_vec,
865 			stmt_vector_for_cost *body_cost_vec)
866 {
867   int i;
868   int inside_cost = 0, prologue_cost = 0;
869 
870   /* The SLP costs were already calculated during SLP tree build.  */
871   if (PURE_SLP_STMT (stmt_info))
872     return;
873 
874   /* FORNOW: Assuming maximum 2 args per stmts.  */
875   for (i = 0; i < 2; i++)
876     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
877       prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
878 					 stmt_info, 0, vect_prologue);
879 
880   /* Pass the inside-of-loop statements to the target-specific cost model.  */
881   inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
882 				  stmt_info, 0, vect_body);
883 
884   if (dump_enabled_p ())
885     dump_printf_loc (MSG_NOTE, vect_location,
886                      "vect_model_simple_cost: inside_cost = %d, "
887                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
888 }
889 
890 
891 /* Model cost for type demotion and promotion operations.  PWR is normally
892    zero for single-step promotions and demotions.  It will be one if
893    two-step promotion/demotion is required, and so on.  Each additional
894    step doubles the number of instructions required.  */
895 
896 static void
897 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
898 				    enum vect_def_type *dt, int pwr)
899 {
900   int i, tmp;
901   int inside_cost = 0, prologue_cost = 0;
902   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
903   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
904   void *target_cost_data;
905 
906   /* The SLP costs were already calculated during SLP tree build.  */
907   if (PURE_SLP_STMT (stmt_info))
908     return;
909 
910   if (loop_vinfo)
911     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
912   else
913     target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
914 
915   for (i = 0; i < pwr + 1; i++)
916     {
917       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
918 	(i + 1) : i;
919       inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
920 				    vec_promote_demote, stmt_info, 0,
921 				    vect_body);
922     }
923 
924   /* FORNOW: Assuming maximum 2 args per stmts.  */
925   for (i = 0; i < 2; i++)
926     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
927       prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
928 				      stmt_info, 0, vect_prologue);
929 
930   if (dump_enabled_p ())
931     dump_printf_loc (MSG_NOTE, vect_location,
932                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
933                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
934 }
935 
936 /* Function vect_cost_group_size
937 
938    For grouped load or store, return the group_size only if it is the first
939    load or store of a group, else return 1.  This ensures that group size is
940    only returned once per group.  */
941 
942 static int
943 vect_cost_group_size (stmt_vec_info stmt_info)
944 {
945   gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
946 
947   if (first_stmt == STMT_VINFO_STMT (stmt_info))
948     return GROUP_SIZE (stmt_info);
949 
950   return 1;
951 }
952 
953 
954 /* Function vect_model_store_cost
955 
956    Models cost for stores.  In the case of grouped accesses, one access
957    has the overhead of the grouped access attributed to it.  */
958 
959 void
960 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
961 		       bool store_lanes_p, enum vect_def_type dt,
962 		       slp_tree slp_node,
963 		       stmt_vector_for_cost *prologue_cost_vec,
964 		       stmt_vector_for_cost *body_cost_vec)
965 {
966   int group_size;
967   unsigned int inside_cost = 0, prologue_cost = 0;
968   struct data_reference *first_dr;
969   gimple first_stmt;
970 
971   /* The SLP costs were already calculated during SLP tree build.  */
972   if (PURE_SLP_STMT (stmt_info))
973     return;
974 
975   if (dt == vect_constant_def || dt == vect_external_def)
976     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
977 				       stmt_info, 0, vect_prologue);
978 
979   /* Grouped access?  */
980   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
981     {
982       if (slp_node)
983         {
984           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
985           group_size = 1;
986         }
987       else
988         {
989           first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
990           group_size = vect_cost_group_size (stmt_info);
991         }
992 
993       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
994     }
995   /* Not a grouped access.  */
996   else
997     {
998       group_size = 1;
999       first_dr = STMT_VINFO_DATA_REF (stmt_info);
1000     }
1001 
1002   /* We assume that the cost of a single store-lanes instruction is
1003      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
1004      access is instead being provided by a permute-and-store operation,
1005      include the cost of the permutes.  */
1006   if (!store_lanes_p && group_size > 1)
1007     {
1008       /* Uses a high and low interleave or shuffle operations for each
1009 	 needed permute.  */
1010       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1011       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1012 				      stmt_info, 0, vect_body);
1013 
1014       if (dump_enabled_p ())
1015         dump_printf_loc (MSG_NOTE, vect_location,
1016                          "vect_model_store_cost: strided group_size = %d .\n",
1017                          group_size);
1018     }
1019 
1020   /* Costs of the stores.  */
1021   vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1022 
1023   if (dump_enabled_p ())
1024     dump_printf_loc (MSG_NOTE, vect_location,
1025                      "vect_model_store_cost: inside_cost = %d, "
1026                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1027 }
1028 
1029 
1030 /* Calculate cost of DR's memory access.  */
1031 void
1032 vect_get_store_cost (struct data_reference *dr, int ncopies,
1033 		     unsigned int *inside_cost,
1034 		     stmt_vector_for_cost *body_cost_vec)
1035 {
1036   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1037   gimple stmt = DR_STMT (dr);
1038   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1039 
1040   switch (alignment_support_scheme)
1041     {
1042     case dr_aligned:
1043       {
1044 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1045 					  vector_store, stmt_info, 0,
1046 					  vect_body);
1047 
1048         if (dump_enabled_p ())
1049           dump_printf_loc (MSG_NOTE, vect_location,
1050                            "vect_model_store_cost: aligned.\n");
1051         break;
1052       }
1053 
1054     case dr_unaligned_supported:
1055       {
1056         /* Here, we assign an additional cost for the unaligned store.  */
1057 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1058 					  unaligned_store, stmt_info,
1059 					  DR_MISALIGNMENT (dr), vect_body);
1060         if (dump_enabled_p ())
1061           dump_printf_loc (MSG_NOTE, vect_location,
1062                            "vect_model_store_cost: unaligned supported by "
1063                            "hardware.\n");
1064         break;
1065       }
1066 
1067     case dr_unaligned_unsupported:
1068       {
1069         *inside_cost = VECT_MAX_COST;
1070 
1071         if (dump_enabled_p ())
1072           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1073                            "vect_model_store_cost: unsupported access.\n");
1074         break;
1075       }
1076 
1077     default:
1078       gcc_unreachable ();
1079     }
1080 }
1081 
1082 
1083 /* Function vect_model_load_cost
1084 
1085    Models cost for loads.  In the case of grouped accesses, the last access
1086    has the overhead of the grouped access attributed to it.  Since unaligned
1087    accesses are supported for loads, we also account for the costs of the
1088    access scheme chosen.  */
1089 
1090 void
1091 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1092 		      bool load_lanes_p, slp_tree slp_node,
1093 		      stmt_vector_for_cost *prologue_cost_vec,
1094 		      stmt_vector_for_cost *body_cost_vec)
1095 {
1096   int group_size;
1097   gimple first_stmt;
1098   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1099   unsigned int inside_cost = 0, prologue_cost = 0;
1100 
1101   /* The SLP costs were already calculated during SLP tree build.  */
1102   if (PURE_SLP_STMT (stmt_info))
1103     return;
1104 
1105   /* Grouped accesses?  */
1106   first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1107   if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1108     {
1109       group_size = vect_cost_group_size (stmt_info);
1110       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1111     }
1112   /* Not a grouped access.  */
1113   else
1114     {
1115       group_size = 1;
1116       first_dr = dr;
1117     }
1118 
1119   /* We assume that the cost of a single load-lanes instruction is
1120      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1121      access is instead being provided by a load-and-permute operation,
1122      include the cost of the permutes.  */
1123   if (!load_lanes_p && group_size > 1)
1124     {
1125       /* Uses an even and odd extract operations or shuffle operations
1126 	 for each needed permute.  */
1127       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1128       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1129 				      stmt_info, 0, vect_body);
1130 
1131       if (dump_enabled_p ())
1132         dump_printf_loc (MSG_NOTE, vect_location,
1133                          "vect_model_load_cost: strided group_size = %d .\n",
1134                          group_size);
1135     }
1136 
1137   /* The loads themselves.  */
1138   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1139     {
1140       /* N scalar loads plus gathering them into a vector.  */
1141       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1142       inside_cost += record_stmt_cost (body_cost_vec,
1143 				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1144 				       scalar_load, stmt_info, 0, vect_body);
1145       inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1146 				       stmt_info, 0, vect_body);
1147     }
1148   else
1149     vect_get_load_cost (first_dr, ncopies,
1150 			((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1151 			 || group_size > 1 || slp_node),
1152 			&inside_cost, &prologue_cost,
1153 			prologue_cost_vec, body_cost_vec, true);
1154 
1155   if (dump_enabled_p ())
1156     dump_printf_loc (MSG_NOTE, vect_location,
1157                      "vect_model_load_cost: inside_cost = %d, "
1158                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1159 }
1160 
1161 
1162 /* Calculate cost of DR's memory access.  */
1163 void
1164 vect_get_load_cost (struct data_reference *dr, int ncopies,
1165 		    bool add_realign_cost, unsigned int *inside_cost,
1166 		    unsigned int *prologue_cost,
1167 		    stmt_vector_for_cost *prologue_cost_vec,
1168 		    stmt_vector_for_cost *body_cost_vec,
1169 		    bool record_prologue_costs)
1170 {
1171   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1172   gimple stmt = DR_STMT (dr);
1173   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174 
1175   switch (alignment_support_scheme)
1176     {
1177     case dr_aligned:
1178       {
1179 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1180 					  stmt_info, 0, vect_body);
1181 
1182         if (dump_enabled_p ())
1183           dump_printf_loc (MSG_NOTE, vect_location,
1184                            "vect_model_load_cost: aligned.\n");
1185 
1186         break;
1187       }
1188     case dr_unaligned_supported:
1189       {
1190         /* Here, we assign an additional cost for the unaligned load.  */
1191 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1192 					  unaligned_load, stmt_info,
1193 					  DR_MISALIGNMENT (dr), vect_body);
1194 
1195         if (dump_enabled_p ())
1196           dump_printf_loc (MSG_NOTE, vect_location,
1197                            "vect_model_load_cost: unaligned supported by "
1198                            "hardware.\n");
1199 
1200         break;
1201       }
1202     case dr_explicit_realign:
1203       {
1204 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1205 					  vector_load, stmt_info, 0, vect_body);
1206 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1207 					  vec_perm, stmt_info, 0, vect_body);
1208 
1209         /* FIXME: If the misalignment remains fixed across the iterations of
1210            the containing loop, the following cost should be added to the
1211            prologue costs.  */
1212         if (targetm.vectorize.builtin_mask_for_load)
1213 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1214 					    stmt_info, 0, vect_body);
1215 
1216         if (dump_enabled_p ())
1217           dump_printf_loc (MSG_NOTE, vect_location,
1218                            "vect_model_load_cost: explicit realign\n");
1219 
1220         break;
1221       }
1222     case dr_explicit_realign_optimized:
1223       {
1224         if (dump_enabled_p ())
1225           dump_printf_loc (MSG_NOTE, vect_location,
1226                            "vect_model_load_cost: unaligned software "
1227                            "pipelined.\n");
1228 
1229         /* Unaligned software pipeline has a load of an address, an initial
1230            load, and possibly a mask operation to "prime" the loop.  However,
1231            if this is an access in a group of loads, which provide grouped
1232            access, then the above cost should only be considered for one
1233            access in the group.  Inside the loop, there is a load op
1234            and a realignment op.  */
1235 
1236         if (add_realign_cost && record_prologue_costs)
1237           {
1238 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1239 						vector_stmt, stmt_info,
1240 						0, vect_prologue);
1241             if (targetm.vectorize.builtin_mask_for_load)
1242 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1243 						  vector_stmt, stmt_info,
1244 						  0, vect_prologue);
1245           }
1246 
1247 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1248 					  stmt_info, 0, vect_body);
1249 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1250 					  stmt_info, 0, vect_body);
1251 
1252         if (dump_enabled_p ())
1253           dump_printf_loc (MSG_NOTE, vect_location,
1254                            "vect_model_load_cost: explicit realign optimized"
1255                            "\n");
1256 
1257         break;
1258       }
1259 
1260     case dr_unaligned_unsupported:
1261       {
1262         *inside_cost = VECT_MAX_COST;
1263 
1264         if (dump_enabled_p ())
1265           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1266                            "vect_model_load_cost: unsupported access.\n");
1267         break;
1268       }
1269 
1270     default:
1271       gcc_unreachable ();
1272     }
1273 }
1274 
1275 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1276    the loop preheader for the vectorized stmt STMT.  */
1277 
1278 static void
1279 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1280 {
1281   if (gsi)
1282     vect_finish_stmt_generation (stmt, new_stmt, gsi);
1283   else
1284     {
1285       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1286       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1287 
1288       if (loop_vinfo)
1289         {
1290           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1291 	  basic_block new_bb;
1292 	  edge pe;
1293 
1294           if (nested_in_vect_loop_p (loop, stmt))
1295             loop = loop->inner;
1296 
1297 	  pe = loop_preheader_edge (loop);
1298           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1299           gcc_assert (!new_bb);
1300 	}
1301       else
1302        {
1303           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1304           basic_block bb;
1305           gimple_stmt_iterator gsi_bb_start;
1306 
1307           gcc_assert (bb_vinfo);
1308           bb = BB_VINFO_BB (bb_vinfo);
1309           gsi_bb_start = gsi_after_labels (bb);
1310           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1311        }
1312     }
1313 
1314   if (dump_enabled_p ())
1315     {
1316       dump_printf_loc (MSG_NOTE, vect_location,
1317                        "created new init_stmt: ");
1318       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1319     }
1320 }
1321 
1322 /* Function vect_init_vector.
1323 
1324    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1325    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1326    vector type a vector with all elements equal to VAL is created first.
1327    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1328    initialization at the loop preheader.
1329    Return the DEF of INIT_STMT.
1330    It will be used in the vectorization of STMT.  */
1331 
1332 tree
1333 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1334 {
1335   tree new_var;
1336   gimple init_stmt;
1337   tree vec_oprnd;
1338   tree new_temp;
1339 
1340   if (TREE_CODE (type) == VECTOR_TYPE
1341       && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1342     {
1343       if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1344 	{
1345 	  if (CONSTANT_CLASS_P (val))
1346 	    val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1347 	  else
1348 	    {
1349 	      new_temp = make_ssa_name (TREE_TYPE (type));
1350 	      init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1351 	      vect_init_vector_1 (stmt, init_stmt, gsi);
1352 	      val = new_temp;
1353 	    }
1354 	}
1355       val = build_vector_from_val (type, val);
1356     }
1357 
1358   new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1359   init_stmt = gimple_build_assign  (new_var, val);
1360   new_temp = make_ssa_name (new_var, init_stmt);
1361   gimple_assign_set_lhs (init_stmt, new_temp);
1362   vect_init_vector_1 (stmt, init_stmt, gsi);
1363   vec_oprnd = gimple_assign_lhs (init_stmt);
1364   return vec_oprnd;
1365 }
1366 
1367 
1368 /* Function vect_get_vec_def_for_operand.
1369 
1370    OP is an operand in STMT.  This function returns a (vector) def that will be
1371    used in the vectorized stmt for STMT.
1372 
1373    In the case that OP is an SSA_NAME which is defined in the loop, then
1374    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1375 
1376    In case OP is an invariant or constant, a new stmt that creates a vector def
1377    needs to be introduced.  */
1378 
1379 tree
1380 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1381 {
1382   tree vec_oprnd;
1383   gimple vec_stmt;
1384   gimple def_stmt;
1385   stmt_vec_info def_stmt_info = NULL;
1386   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1387   unsigned int nunits;
1388   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1389   tree def;
1390   enum vect_def_type dt;
1391   bool is_simple_use;
1392   tree vector_type;
1393 
1394   if (dump_enabled_p ())
1395     {
1396       dump_printf_loc (MSG_NOTE, vect_location,
1397                        "vect_get_vec_def_for_operand: ");
1398       dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1399       dump_printf (MSG_NOTE, "\n");
1400     }
1401 
1402   is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1403 				      &def_stmt, &def, &dt);
1404   gcc_assert (is_simple_use);
1405   if (dump_enabled_p ())
1406     {
1407       int loc_printed = 0;
1408       if (def)
1409         {
1410           dump_printf_loc (MSG_NOTE, vect_location, "def =  ");
1411           loc_printed = 1;
1412           dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1413           dump_printf (MSG_NOTE, "\n");
1414         }
1415       if (def_stmt)
1416         {
1417           if (loc_printed)
1418             dump_printf (MSG_NOTE, "  def_stmt =  ");
1419           else
1420             dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1421 	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1422         }
1423     }
1424 
1425   switch (dt)
1426     {
1427     /* Case 1: operand is a constant.  */
1428     case vect_constant_def:
1429       {
1430 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1431 	gcc_assert (vector_type);
1432 	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1433 
1434 	if (scalar_def)
1435 	  *scalar_def = op;
1436 
1437         /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
1438         if (dump_enabled_p ())
1439           dump_printf_loc (MSG_NOTE, vect_location,
1440                            "Create vector_cst. nunits = %d\n", nunits);
1441 
1442         return vect_init_vector (stmt, op, vector_type, NULL);
1443       }
1444 
1445     /* Case 2: operand is defined outside the loop - loop invariant.  */
1446     case vect_external_def:
1447       {
1448 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1449 	gcc_assert (vector_type);
1450 
1451 	if (scalar_def)
1452 	  *scalar_def = def;
1453 
1454         /* Create 'vec_inv = {inv,inv,..,inv}'  */
1455         if (dump_enabled_p ())
1456           dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1457 
1458         return vect_init_vector (stmt, def, vector_type, NULL);
1459       }
1460 
1461     /* Case 3: operand is defined inside the loop.  */
1462     case vect_internal_def:
1463       {
1464 	if (scalar_def)
1465 	  *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1466 
1467         /* Get the def from the vectorized stmt.  */
1468         def_stmt_info = vinfo_for_stmt (def_stmt);
1469 
1470         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1471         /* Get vectorized pattern statement.  */
1472         if (!vec_stmt
1473             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1474             && !STMT_VINFO_RELEVANT (def_stmt_info))
1475           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1476                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
1477         gcc_assert (vec_stmt);
1478 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1479 	  vec_oprnd = PHI_RESULT (vec_stmt);
1480 	else if (is_gimple_call (vec_stmt))
1481 	  vec_oprnd = gimple_call_lhs (vec_stmt);
1482 	else
1483 	  vec_oprnd = gimple_assign_lhs (vec_stmt);
1484         return vec_oprnd;
1485       }
1486 
1487     /* Case 4: operand is defined by a loop header phi - reduction  */
1488     case vect_reduction_def:
1489     case vect_double_reduction_def:
1490     case vect_nested_cycle:
1491       {
1492 	struct loop *loop;
1493 
1494 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1495 	loop = (gimple_bb (def_stmt))->loop_father;
1496 
1497         /* Get the def before the loop  */
1498         op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1499         return get_initial_def_for_reduction (stmt, op, scalar_def);
1500      }
1501 
1502     /* Case 5: operand is defined by loop-header phi - induction.  */
1503     case vect_induction_def:
1504       {
1505 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1506 
1507         /* Get the def from the vectorized stmt.  */
1508         def_stmt_info = vinfo_for_stmt (def_stmt);
1509         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1510 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1511 	  vec_oprnd = PHI_RESULT (vec_stmt);
1512 	else
1513 	  vec_oprnd = gimple_get_lhs (vec_stmt);
1514         return vec_oprnd;
1515       }
1516 
1517     default:
1518       gcc_unreachable ();
1519     }
1520 }
1521 
1522 
1523 /* Function vect_get_vec_def_for_stmt_copy
1524 
1525    Return a vector-def for an operand.  This function is used when the
1526    vectorized stmt to be created (by the caller to this function) is a "copy"
1527    created in case the vectorized result cannot fit in one vector, and several
1528    copies of the vector-stmt are required.  In this case the vector-def is
1529    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1530    of the stmt that defines VEC_OPRND.
1531    DT is the type of the vector def VEC_OPRND.
1532 
1533    Context:
1534         In case the vectorization factor (VF) is bigger than the number
1535    of elements that can fit in a vectype (nunits), we have to generate
1536    more than one vector stmt to vectorize the scalar stmt.  This situation
1537    arises when there are multiple data-types operated upon in the loop; the
1538    smallest data-type determines the VF, and as a result, when vectorizing
1539    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1540    vector stmt (each computing a vector of 'nunits' results, and together
1541    computing 'VF' results in each iteration).  This function is called when
1542    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1543    which VF=16 and nunits=4, so the number of copies required is 4):
1544 
1545    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1546 
1547    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1548                         VS1.1:  vx.1 = memref1      VS1.2
1549                         VS1.2:  vx.2 = memref2      VS1.3
1550                         VS1.3:  vx.3 = memref3
1551 
1552    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1553                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1554                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1555                         VSnew.3:  vz3 = vx.3 + ...
1556 
1557    The vectorization of S1 is explained in vectorizable_load.
1558    The vectorization of S2:
1559         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1560    the function 'vect_get_vec_def_for_operand' is called to
1561    get the relevant vector-def for each operand of S2.  For operand x it
1562    returns  the vector-def 'vx.0'.
1563 
1564         To create the remaining copies of the vector-stmt (VSnew.j), this
1565    function is called to get the relevant vector-def for each operand.  It is
1566    obtained from the respective VS1.j stmt, which is recorded in the
1567    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1568 
1569         For example, to obtain the vector-def 'vx.1' in order to create the
1570    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1571    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1572    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1573    and return its def ('vx.1').
1574    Overall, to create the above sequence this function will be called 3 times:
1575         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1576         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1577         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1578 
1579 tree
1580 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1581 {
1582   gimple vec_stmt_for_operand;
1583   stmt_vec_info def_stmt_info;
1584 
1585   /* Do nothing; can reuse same def.  */
1586   if (dt == vect_external_def || dt == vect_constant_def )
1587     return vec_oprnd;
1588 
1589   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1590   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1591   gcc_assert (def_stmt_info);
1592   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1593   gcc_assert (vec_stmt_for_operand);
1594   vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1595   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1596     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1597   else
1598     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1599   return vec_oprnd;
1600 }
1601 
1602 
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1605 
1606 static void
1607 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1608 				 vec<tree> *vec_oprnds0,
1609 				 vec<tree> *vec_oprnds1)
1610 {
1611   tree vec_oprnd = vec_oprnds0->pop ();
1612 
1613   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1614   vec_oprnds0->quick_push (vec_oprnd);
1615 
1616   if (vec_oprnds1 && vec_oprnds1->length ())
1617     {
1618       vec_oprnd = vec_oprnds1->pop ();
1619       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1620       vec_oprnds1->quick_push (vec_oprnd);
1621     }
1622 }
1623 
1624 
1625 /* Get vectorized definitions for OP0 and OP1.
1626    REDUC_INDEX is the index of reduction operand in case of reduction,
1627    and -1 otherwise.  */
1628 
1629 void
1630 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1631 		   vec<tree> *vec_oprnds0,
1632 		   vec<tree> *vec_oprnds1,
1633 		   slp_tree slp_node, int reduc_index)
1634 {
1635   if (slp_node)
1636     {
1637       int nops = (op1 == NULL_TREE) ? 1 : 2;
1638       auto_vec<tree> ops (nops);
1639       auto_vec<vec<tree> > vec_defs (nops);
1640 
1641       ops.quick_push (op0);
1642       if (op1)
1643         ops.quick_push (op1);
1644 
1645       vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1646 
1647       *vec_oprnds0 = vec_defs[0];
1648       if (op1)
1649 	*vec_oprnds1 = vec_defs[1];
1650     }
1651   else
1652     {
1653       tree vec_oprnd;
1654 
1655       vec_oprnds0->create (1);
1656       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1657       vec_oprnds0->quick_push (vec_oprnd);
1658 
1659       if (op1)
1660 	{
1661 	  vec_oprnds1->create (1);
1662 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1663 	  vec_oprnds1->quick_push (vec_oprnd);
1664 	}
1665     }
1666 }
1667 
1668 
1669 /* Function vect_finish_stmt_generation.
1670 
1671    Insert a new stmt.  */
1672 
1673 void
1674 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1675 			     gimple_stmt_iterator *gsi)
1676 {
1677   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1678   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1679   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1680 
1681   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1682 
1683   if (!gsi_end_p (*gsi)
1684       && gimple_has_mem_ops (vec_stmt))
1685     {
1686       gimple at_stmt = gsi_stmt (*gsi);
1687       tree vuse = gimple_vuse (at_stmt);
1688       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1689 	{
1690 	  tree vdef = gimple_vdef (at_stmt);
1691 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1692 	  /* If we have an SSA vuse and insert a store, update virtual
1693 	     SSA form to avoid triggering the renamer.  Do so only
1694 	     if we can easily see all uses - which is what almost always
1695 	     happens with the way vectorized stmts are inserted.  */
1696 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1697 	      && ((is_gimple_assign (vec_stmt)
1698 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1699 		  || (is_gimple_call (vec_stmt)
1700 		      && !(gimple_call_flags (vec_stmt)
1701 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1702 	    {
1703 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1704 	      gimple_set_vdef (vec_stmt, new_vdef);
1705 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1706 	    }
1707 	}
1708     }
1709   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1710 
1711   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1712                                                    bb_vinfo));
1713 
1714   if (dump_enabled_p ())
1715     {
1716       dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1717       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1718     }
1719 
1720   gimple_set_location (vec_stmt, gimple_location (stmt));
1721 
1722   /* While EH edges will generally prevent vectorization, stmt might
1723      e.g. be in a must-not-throw region.  Ensure newly created stmts
1724      that could throw are part of the same region.  */
1725   int lp_nr = lookup_stmt_eh_lp (stmt);
1726   if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1727     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1728 }
1729 
1730 /* Checks if CALL can be vectorized in type VECTYPE.  Returns
1731    a function declaration if the target has a vectorized version
1732    of the function, or NULL_TREE if the function cannot be vectorized.  */
1733 
1734 tree
1735 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1736 {
1737   tree fndecl = gimple_call_fndecl (call);
1738 
1739   /* We only handle functions that do not read or clobber memory -- i.e.
1740      const or novops ones.  */
1741   if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1742     return NULL_TREE;
1743 
1744   if (!fndecl
1745       || TREE_CODE (fndecl) != FUNCTION_DECL
1746       || !DECL_BUILT_IN (fndecl))
1747     return NULL_TREE;
1748 
1749   return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1750 						        vectype_in);
1751 }
1752 
1753 
1754 static tree permute_vec_elements (tree, tree, tree, gimple,
1755 				  gimple_stmt_iterator *);
1756 
1757 
1758 /* Function vectorizable_mask_load_store.
1759 
1760    Check if STMT performs a conditional load or store that can be vectorized.
1761    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1762    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1763    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1764 
1765 static bool
1766 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1767 			      gimple *vec_stmt, slp_tree slp_node)
1768 {
1769   tree vec_dest = NULL;
1770   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1771   stmt_vec_info prev_stmt_info;
1772   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1773   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774   bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1775   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1776   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1777   tree elem_type;
1778   gimple new_stmt;
1779   tree dummy;
1780   tree dataref_ptr = NULL_TREE;
1781   gimple ptr_incr;
1782   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1783   int ncopies;
1784   int i, j;
1785   bool inv_p;
1786   tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1787   tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1788   int gather_scale = 1;
1789   enum vect_def_type gather_dt = vect_unknown_def_type;
1790   bool is_store;
1791   tree mask;
1792   gimple def_stmt;
1793   tree def;
1794   enum vect_def_type dt;
1795 
1796   if (slp_node != NULL)
1797     return false;
1798 
1799   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1800   gcc_assert (ncopies >= 1);
1801 
1802   is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1803   mask = gimple_call_arg (stmt, 2);
1804   if (TYPE_PRECISION (TREE_TYPE (mask))
1805       != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1806     return false;
1807 
1808   /* FORNOW. This restriction should be relaxed.  */
1809   if (nested_in_vect_loop && ncopies > 1)
1810     {
1811       if (dump_enabled_p ())
1812 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813 			 "multiple types in nested loop.");
1814       return false;
1815     }
1816 
1817   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1818     return false;
1819 
1820   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1821     return false;
1822 
1823   if (!STMT_VINFO_DATA_REF (stmt_info))
1824     return false;
1825 
1826   elem_type = TREE_TYPE (vectype);
1827 
1828   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1829     return false;
1830 
1831   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1832     return false;
1833 
1834   if (STMT_VINFO_GATHER_P (stmt_info))
1835     {
1836       gimple def_stmt;
1837       tree def;
1838       gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1839 				       &gather_off, &gather_scale);
1840       gcc_assert (gather_decl);
1841       if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1842 				 &def_stmt, &def, &gather_dt,
1843 				 &gather_off_vectype))
1844 	{
1845 	  if (dump_enabled_p ())
1846 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 			     "gather index use not simple.");
1848 	  return false;
1849 	}
1850 
1851       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1852       tree masktype
1853 	= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1854       if (TREE_CODE (masktype) == INTEGER_TYPE)
1855 	{
1856 	  if (dump_enabled_p ())
1857 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858 			     "masked gather with integer mask not supported.");
1859 	  return false;
1860 	}
1861     }
1862   else if (tree_int_cst_compare (nested_in_vect_loop
1863 				 ? STMT_VINFO_DR_STEP (stmt_info)
1864 				 : DR_STEP (dr), size_zero_node) <= 0)
1865     return false;
1866   else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1867 	   || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1868     return false;
1869 
1870   if (TREE_CODE (mask) != SSA_NAME)
1871     return false;
1872 
1873   if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1874 			   &def_stmt, &def, &dt))
1875     return false;
1876 
1877   if (is_store)
1878     {
1879       tree rhs = gimple_call_arg (stmt, 3);
1880       if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1881 			       &def_stmt, &def, &dt))
1882 	return false;
1883     }
1884 
1885   if (!vec_stmt) /* transformation not required.  */
1886     {
1887       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1888       if (is_store)
1889 	vect_model_store_cost (stmt_info, ncopies, false, dt,
1890 			       NULL, NULL, NULL);
1891       else
1892 	vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1893       return true;
1894     }
1895 
1896   /** Transform.  **/
1897 
1898   if (STMT_VINFO_GATHER_P (stmt_info))
1899     {
1900       tree vec_oprnd0 = NULL_TREE, op;
1901       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1902       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1903       tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1904       tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1905       tree mask_perm_mask = NULL_TREE;
1906       edge pe = loop_preheader_edge (loop);
1907       gimple_seq seq;
1908       basic_block new_bb;
1909       enum { NARROW, NONE, WIDEN } modifier;
1910       int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1911 
1912       rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1913       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1914       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1917       scaletype = TREE_VALUE (arglist);
1918       gcc_checking_assert (types_compatible_p (srctype, rettype)
1919 			   && types_compatible_p (srctype, masktype));
1920 
1921       if (nunits == gather_off_nunits)
1922 	modifier = NONE;
1923       else if (nunits == gather_off_nunits / 2)
1924 	{
1925 	  unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1926 	  modifier = WIDEN;
1927 
1928 	  for (i = 0; i < gather_off_nunits; ++i)
1929 	    sel[i] = i | nunits;
1930 
1931 	  perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1932 	}
1933       else if (nunits == gather_off_nunits * 2)
1934 	{
1935 	  unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1936 	  modifier = NARROW;
1937 
1938 	  for (i = 0; i < nunits; ++i)
1939 	    sel[i] = i < gather_off_nunits
1940 		     ? i : i + nunits - gather_off_nunits;
1941 
1942 	  perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1943 	  ncopies *= 2;
1944 	  for (i = 0; i < nunits; ++i)
1945 	    sel[i] = i | gather_off_nunits;
1946 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1947 	}
1948       else
1949 	gcc_unreachable ();
1950 
1951       vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1952 
1953       ptr = fold_convert (ptrtype, gather_base);
1954       if (!is_gimple_min_invariant (ptr))
1955 	{
1956 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1957 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1958 	  gcc_assert (!new_bb);
1959 	}
1960 
1961       scale = build_int_cst (scaletype, gather_scale);
1962 
1963       prev_stmt_info = NULL;
1964       for (j = 0; j < ncopies; ++j)
1965 	{
1966 	  if (modifier == WIDEN && (j & 1))
1967 	    op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1968 				       perm_mask, stmt, gsi);
1969 	  else if (j == 0)
1970 	    op = vec_oprnd0
1971 	      = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1972 	  else
1973 	    op = vec_oprnd0
1974 	      = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1975 
1976 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1977 	    {
1978 	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1979 			  == TYPE_VECTOR_SUBPARTS (idxtype));
1980 	      var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1981 	      var = make_ssa_name (var);
1982 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1983 	      new_stmt
1984 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1985 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986 	      op = var;
1987 	    }
1988 
1989 	  if (mask_perm_mask && (j & 1))
1990 	    mask_op = permute_vec_elements (mask_op, mask_op,
1991 					    mask_perm_mask, stmt, gsi);
1992 	  else
1993 	    {
1994 	      if (j == 0)
1995 		vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1996 	      else
1997 		{
1998 		  vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1999 				      &def_stmt, &def, &dt);
2000 		  vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2001 		}
2002 
2003 	      mask_op = vec_mask;
2004 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2005 		{
2006 		  gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2007 			      == TYPE_VECTOR_SUBPARTS (masktype));
2008 		  var = vect_get_new_vect_var (masktype, vect_simple_var,
2009 					       NULL);
2010 		  var = make_ssa_name (var);
2011 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2012 		  new_stmt
2013 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2014 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2015 		  mask_op = var;
2016 		}
2017 	    }
2018 
2019 	  new_stmt
2020 	    = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2021 				 scale);
2022 
2023 	  if (!useless_type_conversion_p (vectype, rettype))
2024 	    {
2025 	      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2026 			  == TYPE_VECTOR_SUBPARTS (rettype));
2027 	      var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2028 	      op = make_ssa_name (var, new_stmt);
2029 	      gimple_call_set_lhs (new_stmt, op);
2030 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2031 	      var = make_ssa_name (vec_dest);
2032 	      op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2033 	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2034 	    }
2035 	  else
2036 	    {
2037 	      var = make_ssa_name (vec_dest, new_stmt);
2038 	      gimple_call_set_lhs (new_stmt, var);
2039 	    }
2040 
2041 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2042 
2043 	  if (modifier == NARROW)
2044 	    {
2045 	      if ((j & 1) == 0)
2046 		{
2047 		  prev_res = var;
2048 		  continue;
2049 		}
2050 	      var = permute_vec_elements (prev_res, var,
2051 					  perm_mask, stmt, gsi);
2052 	      new_stmt = SSA_NAME_DEF_STMT (var);
2053 	    }
2054 
2055 	  if (prev_stmt_info == NULL)
2056 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2057 	  else
2058 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2059 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2060 	}
2061 
2062       /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2063 	 from the IL.  */
2064       tree lhs = gimple_call_lhs (stmt);
2065       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2066       set_vinfo_for_stmt (new_stmt, stmt_info);
2067       set_vinfo_for_stmt (stmt, NULL);
2068       STMT_VINFO_STMT (stmt_info) = new_stmt;
2069       gsi_replace (gsi, new_stmt, true);
2070       return true;
2071     }
2072   else if (is_store)
2073     {
2074       tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2075       prev_stmt_info = NULL;
2076       for (i = 0; i < ncopies; i++)
2077 	{
2078 	  unsigned align, misalign;
2079 
2080 	  if (i == 0)
2081 	    {
2082 	      tree rhs = gimple_call_arg (stmt, 3);
2083 	      vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2084 	      vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2085 	      /* We should have catched mismatched types earlier.  */
2086 	      gcc_assert (useless_type_conversion_p (vectype,
2087 						     TREE_TYPE (vec_rhs)));
2088 	      dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2089 						      NULL_TREE, &dummy, gsi,
2090 						      &ptr_incr, false, &inv_p);
2091 	      gcc_assert (!inv_p);
2092 	    }
2093 	  else
2094 	    {
2095 	      vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2096 				  &def, &dt);
2097 	      vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2098 	      vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2099 				  &def, &dt);
2100 	      vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2101 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2102 					     TYPE_SIZE_UNIT (vectype));
2103 	    }
2104 
2105 	  align = TYPE_ALIGN_UNIT (vectype);
2106 	  if (aligned_access_p (dr))
2107 	    misalign = 0;
2108 	  else if (DR_MISALIGNMENT (dr) == -1)
2109 	    {
2110 	      align = TYPE_ALIGN_UNIT (elem_type);
2111 	      misalign = 0;
2112 	    }
2113 	  else
2114 	    misalign = DR_MISALIGNMENT (dr);
2115 	  set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2116 				  misalign);
2117 	  new_stmt
2118 	    = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2119 					  gimple_call_arg (stmt, 1),
2120 					  vec_mask, vec_rhs);
2121 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122 	  if (i == 0)
2123 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124 	  else
2125 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2126 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2127 	}
2128     }
2129   else
2130     {
2131       tree vec_mask = NULL_TREE;
2132       prev_stmt_info = NULL;
2133       vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2134       for (i = 0; i < ncopies; i++)
2135 	{
2136 	  unsigned align, misalign;
2137 
2138 	  if (i == 0)
2139 	    {
2140 	      vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2141 	      dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2142 						      NULL_TREE, &dummy, gsi,
2143 						      &ptr_incr, false, &inv_p);
2144 	      gcc_assert (!inv_p);
2145 	    }
2146 	  else
2147 	    {
2148 	      vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2149 				  &def, &dt);
2150 	      vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2151 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2152 					     TYPE_SIZE_UNIT (vectype));
2153 	    }
2154 
2155 	  align = TYPE_ALIGN_UNIT (vectype);
2156 	  if (aligned_access_p (dr))
2157 	    misalign = 0;
2158 	  else if (DR_MISALIGNMENT (dr) == -1)
2159 	    {
2160 	      align = TYPE_ALIGN_UNIT (elem_type);
2161 	      misalign = 0;
2162 	    }
2163 	  else
2164 	    misalign = DR_MISALIGNMENT (dr);
2165 	  set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2166 				  misalign);
2167 	  new_stmt
2168 	    = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2169 					  gimple_call_arg (stmt, 1),
2170 					  vec_mask);
2171 	  gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2172 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2173 	  if (i == 0)
2174 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2175 	  else
2176 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2177 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2178 	}
2179     }
2180 
2181   if (!is_store)
2182     {
2183       /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2184 	 from the IL.  */
2185       tree lhs = gimple_call_lhs (stmt);
2186       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2187       set_vinfo_for_stmt (new_stmt, stmt_info);
2188       set_vinfo_for_stmt (stmt, NULL);
2189       STMT_VINFO_STMT (stmt_info) = new_stmt;
2190       gsi_replace (gsi, new_stmt, true);
2191     }
2192 
2193   return true;
2194 }
2195 
2196 
2197 /* Function vectorizable_call.
2198 
2199    Check if GS performs a function call that can be vectorized.
2200    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2201    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2202    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2203 
2204 static bool
2205 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2206 		   slp_tree slp_node)
2207 {
2208   gcall *stmt;
2209   tree vec_dest;
2210   tree scalar_dest;
2211   tree op, type;
2212   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2213   stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2214   tree vectype_out, vectype_in;
2215   int nunits_in;
2216   int nunits_out;
2217   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2218   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2219   tree fndecl, new_temp, def, rhs_type;
2220   gimple def_stmt;
2221   enum vect_def_type dt[3]
2222     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2223   gimple new_stmt = NULL;
2224   int ncopies, j;
2225   vec<tree> vargs = vNULL;
2226   enum { NARROW, NONE, WIDEN } modifier;
2227   size_t i, nargs;
2228   tree lhs;
2229 
2230   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2231     return false;
2232 
2233   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2234     return false;
2235 
2236   /* Is GS a vectorizable call?   */
2237   stmt = dyn_cast <gcall *> (gs);
2238   if (!stmt)
2239     return false;
2240 
2241   if (gimple_call_internal_p (stmt)
2242       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2243 	  || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2244     return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2245 					 slp_node);
2246 
2247   if (gimple_call_lhs (stmt) == NULL_TREE
2248       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2249     return false;
2250 
2251   gcc_checking_assert (!stmt_can_throw_internal (stmt));
2252 
2253   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2254 
2255   /* Process function arguments.  */
2256   rhs_type = NULL_TREE;
2257   vectype_in = NULL_TREE;
2258   nargs = gimple_call_num_args (stmt);
2259 
2260   /* Bail out if the function has more than three arguments, we do not have
2261      interesting builtin functions to vectorize with more than two arguments
2262      except for fma.  No arguments is also not good.  */
2263   if (nargs == 0 || nargs > 3)
2264     return false;
2265 
2266   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
2267   if (gimple_call_internal_p (stmt)
2268       && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2269     {
2270       nargs = 0;
2271       rhs_type = unsigned_type_node;
2272     }
2273 
2274   for (i = 0; i < nargs; i++)
2275     {
2276       tree opvectype;
2277 
2278       op = gimple_call_arg (stmt, i);
2279 
2280       /* We can only handle calls with arguments of the same type.  */
2281       if (rhs_type
2282 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2283 	{
2284 	  if (dump_enabled_p ())
2285 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286                              "argument types differ.\n");
2287 	  return false;
2288 	}
2289       if (!rhs_type)
2290 	rhs_type = TREE_TYPE (op);
2291 
2292       if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2293 				 &def_stmt, &def, &dt[i], &opvectype))
2294 	{
2295 	  if (dump_enabled_p ())
2296 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2297                              "use not simple.\n");
2298 	  return false;
2299 	}
2300 
2301       if (!vectype_in)
2302 	vectype_in = opvectype;
2303       else if (opvectype
2304 	       && opvectype != vectype_in)
2305 	{
2306 	  if (dump_enabled_p ())
2307 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308                              "argument vector types differ.\n");
2309 	  return false;
2310 	}
2311     }
2312   /* If all arguments are external or constant defs use a vector type with
2313      the same size as the output vector type.  */
2314   if (!vectype_in)
2315     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2316   if (vec_stmt)
2317     gcc_assert (vectype_in);
2318   if (!vectype_in)
2319     {
2320       if (dump_enabled_p ())
2321         {
2322           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2323                            "no vectype for scalar type ");
2324           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2325           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2326         }
2327 
2328       return false;
2329     }
2330 
2331   /* FORNOW */
2332   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2333   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2334   if (nunits_in == nunits_out / 2)
2335     modifier = NARROW;
2336   else if (nunits_out == nunits_in)
2337     modifier = NONE;
2338   else if (nunits_out == nunits_in / 2)
2339     modifier = WIDEN;
2340   else
2341     return false;
2342 
2343   /* For now, we only vectorize functions if a target specific builtin
2344      is available.  TODO -- in some cases, it might be profitable to
2345      insert the calls for pieces of the vector, in order to be able
2346      to vectorize other operations in the loop.  */
2347   fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2348   if (fndecl == NULL_TREE)
2349     {
2350       if (gimple_call_internal_p (stmt)
2351 	  && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2352 	  && !slp_node
2353 	  && loop_vinfo
2354 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2355 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2356 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2357 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2358 	{
2359 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
2360 	     { 0, 1, 2, ... vf - 1 } vector.  */
2361 	  gcc_assert (nargs == 0);
2362 	}
2363       else
2364 	{
2365 	  if (dump_enabled_p ())
2366 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367 			     "function is not vectorizable.\n");
2368 	  return false;
2369 	}
2370     }
2371 
2372   gcc_assert (!gimple_vuse (stmt));
2373 
2374   if (slp_node || PURE_SLP_STMT (stmt_info))
2375     ncopies = 1;
2376   else if (modifier == NARROW)
2377     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2378   else
2379     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2380 
2381   /* Sanity check: make sure that at least one copy of the vectorized stmt
2382      needs to be generated.  */
2383   gcc_assert (ncopies >= 1);
2384 
2385   if (!vec_stmt) /* transformation not required.  */
2386     {
2387       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2388       if (dump_enabled_p ())
2389         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2390                          "\n");
2391       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2392       return true;
2393     }
2394 
2395   /** Transform.  **/
2396 
2397   if (dump_enabled_p ())
2398     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2399 
2400   /* Handle def.  */
2401   scalar_dest = gimple_call_lhs (stmt);
2402   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2403 
2404   prev_stmt_info = NULL;
2405   switch (modifier)
2406     {
2407     case NONE:
2408       for (j = 0; j < ncopies; ++j)
2409 	{
2410 	  /* Build argument list for the vectorized call.  */
2411 	  if (j == 0)
2412 	    vargs.create (nargs);
2413 	  else
2414 	    vargs.truncate (0);
2415 
2416 	  if (slp_node)
2417 	    {
2418 	      auto_vec<vec<tree> > vec_defs (nargs);
2419 	      vec<tree> vec_oprnds0;
2420 
2421 	      for (i = 0; i < nargs; i++)
2422 		vargs.quick_push (gimple_call_arg (stmt, i));
2423 	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2424 	      vec_oprnds0 = vec_defs[0];
2425 
2426 	      /* Arguments are ready.  Create the new vector stmt.  */
2427 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2428 		{
2429 		  size_t k;
2430 		  for (k = 0; k < nargs; k++)
2431 		    {
2432 		      vec<tree> vec_oprndsk = vec_defs[k];
2433 		      vargs[k] = vec_oprndsk[i];
2434 		    }
2435 		  new_stmt = gimple_build_call_vec (fndecl, vargs);
2436 		  new_temp = make_ssa_name (vec_dest, new_stmt);
2437 		  gimple_call_set_lhs (new_stmt, new_temp);
2438 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2439 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2440 		}
2441 
2442 	      for (i = 0; i < nargs; i++)
2443 		{
2444 		  vec<tree> vec_oprndsi = vec_defs[i];
2445 		  vec_oprndsi.release ();
2446 		}
2447 	      continue;
2448 	    }
2449 
2450 	  for (i = 0; i < nargs; i++)
2451 	    {
2452 	      op = gimple_call_arg (stmt, i);
2453 	      if (j == 0)
2454 		vec_oprnd0
2455 		  = vect_get_vec_def_for_operand (op, stmt, NULL);
2456 	      else
2457 		{
2458 		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
2459 		  vec_oprnd0
2460                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2461 		}
2462 
2463 	      vargs.quick_push (vec_oprnd0);
2464 	    }
2465 
2466 	  if (gimple_call_internal_p (stmt)
2467 	      && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2468 	    {
2469 	      tree *v = XALLOCAVEC (tree, nunits_out);
2470 	      int k;
2471 	      for (k = 0; k < nunits_out; ++k)
2472 		v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2473 	      tree cst = build_vector (vectype_out, v);
2474 	      tree new_var
2475 		= vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2476 	      gimple init_stmt = gimple_build_assign (new_var, cst);
2477 	      new_temp = make_ssa_name (new_var, init_stmt);
2478 	      gimple_assign_set_lhs (init_stmt, new_temp);
2479 	      vect_init_vector_1 (stmt, init_stmt, NULL);
2480 	      new_temp = make_ssa_name (vec_dest);
2481 	      new_stmt = gimple_build_assign (new_temp,
2482 					      gimple_assign_lhs (init_stmt));
2483 	    }
2484 	  else
2485 	    {
2486 	      new_stmt = gimple_build_call_vec (fndecl, vargs);
2487 	      new_temp = make_ssa_name (vec_dest, new_stmt);
2488 	      gimple_call_set_lhs (new_stmt, new_temp);
2489 	    }
2490 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2491 
2492 	  if (j == 0)
2493 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2494 	  else
2495 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2496 
2497 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2498 	}
2499 
2500       break;
2501 
2502     case NARROW:
2503       for (j = 0; j < ncopies; ++j)
2504 	{
2505 	  /* Build argument list for the vectorized call.  */
2506 	  if (j == 0)
2507 	    vargs.create (nargs * 2);
2508 	  else
2509 	    vargs.truncate (0);
2510 
2511 	  if (slp_node)
2512 	    {
2513 	      auto_vec<vec<tree> > vec_defs (nargs);
2514 	      vec<tree> vec_oprnds0;
2515 
2516 	      for (i = 0; i < nargs; i++)
2517 		vargs.quick_push (gimple_call_arg (stmt, i));
2518 	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2519 	      vec_oprnds0 = vec_defs[0];
2520 
2521 	      /* Arguments are ready.  Create the new vector stmt.  */
2522 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2523 		{
2524 		  size_t k;
2525 		  vargs.truncate (0);
2526 		  for (k = 0; k < nargs; k++)
2527 		    {
2528 		      vec<tree> vec_oprndsk = vec_defs[k];
2529 		      vargs.quick_push (vec_oprndsk[i]);
2530 		      vargs.quick_push (vec_oprndsk[i + 1]);
2531 		    }
2532 		  new_stmt = gimple_build_call_vec (fndecl, vargs);
2533 		  new_temp = make_ssa_name (vec_dest, new_stmt);
2534 		  gimple_call_set_lhs (new_stmt, new_temp);
2535 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2536 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2537 		}
2538 
2539 	      for (i = 0; i < nargs; i++)
2540 		{
2541 		  vec<tree> vec_oprndsi = vec_defs[i];
2542 		  vec_oprndsi.release ();
2543 		}
2544 	      continue;
2545 	    }
2546 
2547 	  for (i = 0; i < nargs; i++)
2548 	    {
2549 	      op = gimple_call_arg (stmt, i);
2550 	      if (j == 0)
2551 		{
2552 		  vec_oprnd0
2553 		    = vect_get_vec_def_for_operand (op, stmt, NULL);
2554 		  vec_oprnd1
2555 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2556 		}
2557 	      else
2558 		{
2559 		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2560 		  vec_oprnd0
2561 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2562 		  vec_oprnd1
2563 		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2564 		}
2565 
2566 	      vargs.quick_push (vec_oprnd0);
2567 	      vargs.quick_push (vec_oprnd1);
2568 	    }
2569 
2570 	  new_stmt = gimple_build_call_vec (fndecl, vargs);
2571 	  new_temp = make_ssa_name (vec_dest, new_stmt);
2572 	  gimple_call_set_lhs (new_stmt, new_temp);
2573 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2574 
2575 	  if (j == 0)
2576 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2577 	  else
2578 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2579 
2580 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2581 	}
2582 
2583       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2584 
2585       break;
2586 
2587     case WIDEN:
2588       /* No current target implements this case.  */
2589       return false;
2590     }
2591 
2592   vargs.release ();
2593 
2594   /* The call in STMT might prevent it from being removed in dce.
2595      We however cannot remove it here, due to the way the ssa name
2596      it defines is mapped to the new definition.  So just replace
2597      rhs of the statement with something harmless.  */
2598 
2599   if (slp_node)
2600     return true;
2601 
2602   type = TREE_TYPE (scalar_dest);
2603   if (is_pattern_stmt_p (stmt_info))
2604     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2605   else
2606     lhs = gimple_call_lhs (stmt);
2607   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2608   set_vinfo_for_stmt (new_stmt, stmt_info);
2609   set_vinfo_for_stmt (stmt, NULL);
2610   STMT_VINFO_STMT (stmt_info) = new_stmt;
2611   gsi_replace (gsi, new_stmt, false);
2612 
2613   return true;
2614 }
2615 
2616 
2617 struct simd_call_arg_info
2618 {
2619   tree vectype;
2620   tree op;
2621   enum vect_def_type dt;
2622   HOST_WIDE_INT linear_step;
2623   unsigned int align;
2624 };
2625 
2626 /* Function vectorizable_simd_clone_call.
2627 
2628    Check if STMT performs a function call that can be vectorized
2629    by calling a simd clone of the function.
2630    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2631    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2632    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2633 
2634 static bool
2635 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2636 			      gimple *vec_stmt, slp_tree slp_node)
2637 {
2638   tree vec_dest;
2639   tree scalar_dest;
2640   tree op, type;
2641   tree vec_oprnd0 = NULL_TREE;
2642   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2643   tree vectype;
2644   unsigned int nunits;
2645   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2646   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2647   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2648   tree fndecl, new_temp, def;
2649   gimple def_stmt;
2650   gimple new_stmt = NULL;
2651   int ncopies, j;
2652   vec<simd_call_arg_info> arginfo = vNULL;
2653   vec<tree> vargs = vNULL;
2654   size_t i, nargs;
2655   tree lhs, rtype, ratype;
2656   vec<constructor_elt, va_gc> *ret_ctor_elts;
2657 
2658   /* Is STMT a vectorizable call?   */
2659   if (!is_gimple_call (stmt))
2660     return false;
2661 
2662   fndecl = gimple_call_fndecl (stmt);
2663   if (fndecl == NULL_TREE)
2664     return false;
2665 
2666   struct cgraph_node *node = cgraph_node::get (fndecl);
2667   if (node == NULL || node->simd_clones == NULL)
2668     return false;
2669 
2670   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2671     return false;
2672 
2673   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2674     return false;
2675 
2676   if (gimple_call_lhs (stmt)
2677       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2678     return false;
2679 
2680   gcc_checking_assert (!stmt_can_throw_internal (stmt));
2681 
2682   vectype = STMT_VINFO_VECTYPE (stmt_info);
2683 
2684   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2685     return false;
2686 
2687   /* FORNOW */
2688   if (slp_node || PURE_SLP_STMT (stmt_info))
2689     return false;
2690 
2691   /* Process function arguments.  */
2692   nargs = gimple_call_num_args (stmt);
2693 
2694   /* Bail out if the function has zero arguments.  */
2695   if (nargs == 0)
2696     return false;
2697 
2698   arginfo.create (nargs);
2699 
2700   for (i = 0; i < nargs; i++)
2701     {
2702       simd_call_arg_info thisarginfo;
2703       affine_iv iv;
2704 
2705       thisarginfo.linear_step = 0;
2706       thisarginfo.align = 0;
2707       thisarginfo.op = NULL_TREE;
2708 
2709       op = gimple_call_arg (stmt, i);
2710       if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2711 				 &def_stmt, &def, &thisarginfo.dt,
2712 				 &thisarginfo.vectype)
2713 	  || thisarginfo.dt == vect_uninitialized_def)
2714 	{
2715 	  if (dump_enabled_p ())
2716 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2717 			     "use not simple.\n");
2718 	  arginfo.release ();
2719 	  return false;
2720 	}
2721 
2722       if (thisarginfo.dt == vect_constant_def
2723 	  || thisarginfo.dt == vect_external_def)
2724 	gcc_assert (thisarginfo.vectype == NULL_TREE);
2725       else
2726 	gcc_assert (thisarginfo.vectype != NULL_TREE);
2727 
2728       /* For linear arguments, the analyze phase should have saved
2729 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
2730       if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2731 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2732 	{
2733 	  gcc_assert (vec_stmt);
2734 	  thisarginfo.linear_step
2735 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2736 	  thisarginfo.op
2737 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2738 	  /* If loop has been peeled for alignment, we need to adjust it.  */
2739 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2740 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2741 	  if (n1 != n2)
2742 	    {
2743 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2744 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2745 	      tree opt = TREE_TYPE (thisarginfo.op);
2746 	      bias = fold_convert (TREE_TYPE (step), bias);
2747 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2748 	      thisarginfo.op
2749 		= fold_build2 (POINTER_TYPE_P (opt)
2750 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2751 			       thisarginfo.op, bias);
2752 	    }
2753 	}
2754       else if (!vec_stmt
2755 	       && thisarginfo.dt != vect_constant_def
2756 	       && thisarginfo.dt != vect_external_def
2757 	       && loop_vinfo
2758 	       && TREE_CODE (op) == SSA_NAME
2759 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
2760 			     &iv, false)
2761 	       && tree_fits_shwi_p (iv.step))
2762 	{
2763 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
2764 	  thisarginfo.op = iv.base;
2765 	}
2766       else if ((thisarginfo.dt == vect_constant_def
2767 		|| thisarginfo.dt == vect_external_def)
2768 	       && POINTER_TYPE_P (TREE_TYPE (op)))
2769 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2770 
2771       arginfo.quick_push (thisarginfo);
2772     }
2773 
2774   unsigned int badness = 0;
2775   struct cgraph_node *bestn = NULL;
2776   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2777     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2778   else
2779     for (struct cgraph_node *n = node->simd_clones; n != NULL;
2780 	 n = n->simdclone->next_clone)
2781       {
2782 	unsigned int this_badness = 0;
2783 	if (n->simdclone->simdlen
2784 	    > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2785 	    || n->simdclone->nargs != nargs)
2786 	  continue;
2787 	if (n->simdclone->simdlen
2788 	    < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2789 	  this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2790 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
2791 	if (n->simdclone->inbranch)
2792 	  this_badness += 2048;
2793 	int target_badness = targetm.simd_clone.usable (n);
2794 	if (target_badness < 0)
2795 	  continue;
2796 	this_badness += target_badness * 512;
2797 	/* FORNOW: Have to add code to add the mask argument.  */
2798 	if (n->simdclone->inbranch)
2799 	  continue;
2800 	for (i = 0; i < nargs; i++)
2801 	  {
2802 	    switch (n->simdclone->args[i].arg_type)
2803 	      {
2804 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
2805 		if (!useless_type_conversion_p
2806 			(n->simdclone->args[i].orig_type,
2807 			 TREE_TYPE (gimple_call_arg (stmt, i))))
2808 		  i = -1;
2809 		else if (arginfo[i].dt == vect_constant_def
2810 			 || arginfo[i].dt == vect_external_def
2811 			 || arginfo[i].linear_step)
2812 		  this_badness += 64;
2813 		break;
2814 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
2815 		if (arginfo[i].dt != vect_constant_def
2816 		    && arginfo[i].dt != vect_external_def)
2817 		  i = -1;
2818 		break;
2819 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2820 		if (arginfo[i].dt == vect_constant_def
2821 		    || arginfo[i].dt == vect_external_def
2822 		    || (arginfo[i].linear_step
2823 			!= n->simdclone->args[i].linear_step))
2824 		  i = -1;
2825 		break;
2826 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2827 		/* FORNOW */
2828 		i = -1;
2829 		break;
2830 	      case SIMD_CLONE_ARG_TYPE_MASK:
2831 		gcc_unreachable ();
2832 	      }
2833 	    if (i == (size_t) -1)
2834 	      break;
2835 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
2836 	      {
2837 		i = -1;
2838 		break;
2839 	      }
2840 	    if (arginfo[i].align)
2841 	      this_badness += (exact_log2 (arginfo[i].align)
2842 			       - exact_log2 (n->simdclone->args[i].alignment));
2843 	  }
2844 	if (i == (size_t) -1)
2845 	  continue;
2846 	if (bestn == NULL || this_badness < badness)
2847 	  {
2848 	    bestn = n;
2849 	    badness = this_badness;
2850 	  }
2851       }
2852 
2853   if (bestn == NULL)
2854     {
2855       arginfo.release ();
2856       return false;
2857     }
2858 
2859   for (i = 0; i < nargs; i++)
2860     if ((arginfo[i].dt == vect_constant_def
2861 	 || arginfo[i].dt == vect_external_def)
2862 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2863       {
2864 	arginfo[i].vectype
2865 	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2866 								     i)));
2867 	if (arginfo[i].vectype == NULL
2868 	    || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2869 		> bestn->simdclone->simdlen))
2870 	  {
2871 	    arginfo.release ();
2872 	    return false;
2873 	  }
2874       }
2875 
2876   fndecl = bestn->decl;
2877   nunits = bestn->simdclone->simdlen;
2878   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2879 
2880   /* If the function isn't const, only allow it in simd loops where user
2881      has asserted that at least nunits consecutive iterations can be
2882      performed using SIMD instructions.  */
2883   if ((loop == NULL || (unsigned) loop->safelen < nunits)
2884       && gimple_vuse (stmt))
2885     {
2886       arginfo.release ();
2887       return false;
2888     }
2889 
2890   /* Sanity check: make sure that at least one copy of the vectorized stmt
2891      needs to be generated.  */
2892   gcc_assert (ncopies >= 1);
2893 
2894   if (!vec_stmt) /* transformation not required.  */
2895     {
2896       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2897       for (i = 0; i < nargs; i++)
2898 	if (bestn->simdclone->args[i].arg_type
2899 	    == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2900 	  {
2901 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2902 									+ 1);
2903 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2904 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2905 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
2906 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
2907 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2908 	  }
2909       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2910       if (dump_enabled_p ())
2911 	dump_printf_loc (MSG_NOTE, vect_location,
2912 			 "=== vectorizable_simd_clone_call ===\n");
2913 /*      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2914       arginfo.release ();
2915       return true;
2916     }
2917 
2918   /** Transform.  **/
2919 
2920   if (dump_enabled_p ())
2921     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2922 
2923   /* Handle def.  */
2924   scalar_dest = gimple_call_lhs (stmt);
2925   vec_dest = NULL_TREE;
2926   rtype = NULL_TREE;
2927   ratype = NULL_TREE;
2928   if (scalar_dest)
2929     {
2930       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2931       rtype = TREE_TYPE (TREE_TYPE (fndecl));
2932       if (TREE_CODE (rtype) == ARRAY_TYPE)
2933 	{
2934 	  ratype = rtype;
2935 	  rtype = TREE_TYPE (ratype);
2936 	}
2937     }
2938 
2939   prev_stmt_info = NULL;
2940   for (j = 0; j < ncopies; ++j)
2941     {
2942       /* Build argument list for the vectorized call.  */
2943       if (j == 0)
2944 	vargs.create (nargs);
2945       else
2946 	vargs.truncate (0);
2947 
2948       for (i = 0; i < nargs; i++)
2949 	{
2950 	  unsigned int k, l, m, o;
2951 	  tree atype;
2952 	  op = gimple_call_arg (stmt, i);
2953 	  switch (bestn->simdclone->args[i].arg_type)
2954 	    {
2955 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
2956 	      atype = bestn->simdclone->args[i].vector_type;
2957 	      o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2958 	      for (m = j * o; m < (j + 1) * o; m++)
2959 		{
2960 		  if (TYPE_VECTOR_SUBPARTS (atype)
2961 		      < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2962 		    {
2963 		      unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2964 		      k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2965 			   / TYPE_VECTOR_SUBPARTS (atype));
2966 		      gcc_assert ((k & (k - 1)) == 0);
2967 		      if (m == 0)
2968 			vec_oprnd0
2969 			  = vect_get_vec_def_for_operand (op, stmt, NULL);
2970 		      else
2971 			{
2972 			  vec_oprnd0 = arginfo[i].op;
2973 			  if ((m & (k - 1)) == 0)
2974 			    vec_oprnd0
2975 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2976 								vec_oprnd0);
2977 			}
2978 		      arginfo[i].op = vec_oprnd0;
2979 		      vec_oprnd0
2980 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2981 				  size_int (prec),
2982 				  bitsize_int ((m & (k - 1)) * prec));
2983 		      new_stmt
2984 			= gimple_build_assign (make_ssa_name (atype),
2985 					       vec_oprnd0);
2986 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
2988 		    }
2989 		  else
2990 		    {
2991 		      k = (TYPE_VECTOR_SUBPARTS (atype)
2992 			   / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2993 		      gcc_assert ((k & (k - 1)) == 0);
2994 		      vec<constructor_elt, va_gc> *ctor_elts;
2995 		      if (k != 1)
2996 			vec_alloc (ctor_elts, k);
2997 		      else
2998 			ctor_elts = NULL;
2999 		      for (l = 0; l < k; l++)
3000 			{
3001 			  if (m == 0 && l == 0)
3002 			    vec_oprnd0
3003 			      = vect_get_vec_def_for_operand (op, stmt, NULL);
3004 			  else
3005 			    vec_oprnd0
3006 			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3007 								arginfo[i].op);
3008 			  arginfo[i].op = vec_oprnd0;
3009 			  if (k == 1)
3010 			    break;
3011 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3012 						  vec_oprnd0);
3013 			}
3014 		      if (k == 1)
3015 			vargs.safe_push (vec_oprnd0);
3016 		      else
3017 			{
3018 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
3019 			  new_stmt
3020 			    = gimple_build_assign (make_ssa_name (atype),
3021 						   vec_oprnd0);
3022 			  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3023 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
3024 			}
3025 		    }
3026 		}
3027 	      break;
3028 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
3029 	      vargs.safe_push (op);
3030 	      break;
3031 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3032 	      if (j == 0)
3033 		{
3034 		  gimple_seq stmts;
3035 		  arginfo[i].op
3036 		    = force_gimple_operand (arginfo[i].op, &stmts, true,
3037 					    NULL_TREE);
3038 		  if (stmts != NULL)
3039 		    {
3040 		      basic_block new_bb;
3041 		      edge pe = loop_preheader_edge (loop);
3042 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3043 		      gcc_assert (!new_bb);
3044 		    }
3045 		  tree phi_res = copy_ssa_name (op);
3046 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
3047 		  set_vinfo_for_stmt (new_phi,
3048 				      new_stmt_vec_info (new_phi, loop_vinfo,
3049 							 NULL));
3050 		  add_phi_arg (new_phi, arginfo[i].op,
3051 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
3052 		  enum tree_code code
3053 		    = POINTER_TYPE_P (TREE_TYPE (op))
3054 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
3055 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
3056 			      ? sizetype : TREE_TYPE (op);
3057 		  widest_int cst
3058 		    = wi::mul (bestn->simdclone->args[i].linear_step,
3059 			       ncopies * nunits);
3060 		  tree tcst = wide_int_to_tree (type, cst);
3061 		  tree phi_arg = copy_ssa_name (op);
3062 		  new_stmt
3063 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
3064 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
3065 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3066 		  set_vinfo_for_stmt (new_stmt,
3067 				      new_stmt_vec_info (new_stmt, loop_vinfo,
3068 							 NULL));
3069 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3070 			       UNKNOWN_LOCATION);
3071 		  arginfo[i].op = phi_res;
3072 		  vargs.safe_push (phi_res);
3073 		}
3074 	      else
3075 		{
3076 		  enum tree_code code
3077 		    = POINTER_TYPE_P (TREE_TYPE (op))
3078 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
3079 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
3080 			      ? sizetype : TREE_TYPE (op);
3081 		  widest_int cst
3082 		    = wi::mul (bestn->simdclone->args[i].linear_step,
3083 			       j * nunits);
3084 		  tree tcst = wide_int_to_tree (type, cst);
3085 		  new_temp = make_ssa_name (TREE_TYPE (op));
3086 		  new_stmt = gimple_build_assign (new_temp, code,
3087 						  arginfo[i].op, tcst);
3088 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3089 		  vargs.safe_push (new_temp);
3090 		}
3091 	      break;
3092 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3093 	    default:
3094 	      gcc_unreachable ();
3095 	    }
3096 	}
3097 
3098       new_stmt = gimple_build_call_vec (fndecl, vargs);
3099       if (vec_dest)
3100 	{
3101 	  gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3102 	  if (ratype)
3103 	    new_temp = create_tmp_var (ratype);
3104 	  else if (TYPE_VECTOR_SUBPARTS (vectype)
3105 		   == TYPE_VECTOR_SUBPARTS (rtype))
3106 	    new_temp = make_ssa_name (vec_dest, new_stmt);
3107 	  else
3108 	    new_temp = make_ssa_name (rtype, new_stmt);
3109 	  gimple_call_set_lhs (new_stmt, new_temp);
3110 	}
3111       vect_finish_stmt_generation (stmt, new_stmt, gsi);
3112 
3113       if (vec_dest)
3114 	{
3115 	  if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3116 	    {
3117 	      unsigned int k, l;
3118 	      unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3119 	      k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3120 	      gcc_assert ((k & (k - 1)) == 0);
3121 	      for (l = 0; l < k; l++)
3122 		{
3123 		  tree t;
3124 		  if (ratype)
3125 		    {
3126 		      t = build_fold_addr_expr (new_temp);
3127 		      t = build2 (MEM_REF, vectype, t,
3128 				  build_int_cst (TREE_TYPE (t),
3129 						 l * prec / BITS_PER_UNIT));
3130 		    }
3131 		  else
3132 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
3133 				size_int (prec), bitsize_int (l * prec));
3134 		  new_stmt
3135 		    = gimple_build_assign (make_ssa_name (vectype), t);
3136 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3137 		  if (j == 0 && l == 0)
3138 		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3139 		  else
3140 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3141 
3142 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
3143 		}
3144 
3145 	      if (ratype)
3146 		{
3147 		  tree clobber = build_constructor (ratype, NULL);
3148 		  TREE_THIS_VOLATILE (clobber) = 1;
3149 		  new_stmt = gimple_build_assign (new_temp, clobber);
3150 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3151 		}
3152 	      continue;
3153 	    }
3154 	  else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3155 	    {
3156 	      unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3157 				/ TYPE_VECTOR_SUBPARTS (rtype));
3158 	      gcc_assert ((k & (k - 1)) == 0);
3159 	      if ((j & (k - 1)) == 0)
3160 		vec_alloc (ret_ctor_elts, k);
3161 	      if (ratype)
3162 		{
3163 		  unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3164 		  for (m = 0; m < o; m++)
3165 		    {
3166 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
3167 					 size_int (m), NULL_TREE, NULL_TREE);
3168 		      new_stmt
3169 			= gimple_build_assign (make_ssa_name (rtype), tem);
3170 		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3171 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3172 					      gimple_assign_lhs (new_stmt));
3173 		    }
3174 		  tree clobber = build_constructor (ratype, NULL);
3175 		  TREE_THIS_VOLATILE (clobber) = 1;
3176 		  new_stmt = gimple_build_assign (new_temp, clobber);
3177 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3178 		}
3179 	      else
3180 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3181 	      if ((j & (k - 1)) != k - 1)
3182 		continue;
3183 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3184 	      new_stmt
3185 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3186 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3187 
3188 	      if ((unsigned) j == k - 1)
3189 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3190 	      else
3191 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3192 
3193 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
3194 	      continue;
3195 	    }
3196 	  else if (ratype)
3197 	    {
3198 	      tree t = build_fold_addr_expr (new_temp);
3199 	      t = build2 (MEM_REF, vectype, t,
3200 			  build_int_cst (TREE_TYPE (t), 0));
3201 	      new_stmt
3202 		= gimple_build_assign (make_ssa_name (vec_dest), t);
3203 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3204 	      tree clobber = build_constructor (ratype, NULL);
3205 	      TREE_THIS_VOLATILE (clobber) = 1;
3206 	      vect_finish_stmt_generation (stmt,
3207 					   gimple_build_assign (new_temp,
3208 								clobber), gsi);
3209 	    }
3210 	}
3211 
3212       if (j == 0)
3213 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3214       else
3215 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3216 
3217       prev_stmt_info = vinfo_for_stmt (new_stmt);
3218     }
3219 
3220   vargs.release ();
3221 
3222   /* The call in STMT might prevent it from being removed in dce.
3223      We however cannot remove it here, due to the way the ssa name
3224      it defines is mapped to the new definition.  So just replace
3225      rhs of the statement with something harmless.  */
3226 
3227   if (slp_node)
3228     return true;
3229 
3230   if (scalar_dest)
3231     {
3232       type = TREE_TYPE (scalar_dest);
3233       if (is_pattern_stmt_p (stmt_info))
3234 	lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3235       else
3236 	lhs = gimple_call_lhs (stmt);
3237       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3238     }
3239   else
3240     new_stmt = gimple_build_nop ();
3241   set_vinfo_for_stmt (new_stmt, stmt_info);
3242   set_vinfo_for_stmt (stmt, NULL);
3243   STMT_VINFO_STMT (stmt_info) = new_stmt;
3244   gsi_replace (gsi, new_stmt, true);
3245   unlink_stmt_vdef (stmt);
3246 
3247   return true;
3248 }
3249 
3250 
3251 /* Function vect_gen_widened_results_half
3252 
3253    Create a vector stmt whose code, type, number of arguments, and result
3254    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3255    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
3256    In the case that CODE is a CALL_EXPR, this means that a call to DECL
3257    needs to be created (DECL is a function-decl of a target-builtin).
3258    STMT is the original scalar stmt that we are vectorizing.  */
3259 
3260 static gimple
3261 vect_gen_widened_results_half (enum tree_code code,
3262 			       tree decl,
3263                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
3264 			       tree vec_dest, gimple_stmt_iterator *gsi,
3265 			       gimple stmt)
3266 {
3267   gimple new_stmt;
3268   tree new_temp;
3269 
3270   /* Generate half of the widened result:  */
3271   if (code == CALL_EXPR)
3272     {
3273       /* Target specific support  */
3274       if (op_type == binary_op)
3275 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3276       else
3277 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3278       new_temp = make_ssa_name (vec_dest, new_stmt);
3279       gimple_call_set_lhs (new_stmt, new_temp);
3280     }
3281   else
3282     {
3283       /* Generic support */
3284       gcc_assert (op_type == TREE_CODE_LENGTH (code));
3285       if (op_type != binary_op)
3286 	vec_oprnd1 = NULL;
3287       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3288       new_temp = make_ssa_name (vec_dest, new_stmt);
3289       gimple_assign_set_lhs (new_stmt, new_temp);
3290     }
3291   vect_finish_stmt_generation (stmt, new_stmt, gsi);
3292 
3293   return new_stmt;
3294 }
3295 
3296 
3297 /* Get vectorized definitions for loop-based vectorization.  For the first
3298    operand we call vect_get_vec_def_for_operand() (with OPRND containing
3299    scalar operand), and for the rest we get a copy with
3300    vect_get_vec_def_for_stmt_copy() using the previous vector definition
3301    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3302    The vectors are collected into VEC_OPRNDS.  */
3303 
3304 static void
3305 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3306 			  vec<tree> *vec_oprnds, int multi_step_cvt)
3307 {
3308   tree vec_oprnd;
3309 
3310   /* Get first vector operand.  */
3311   /* All the vector operands except the very first one (that is scalar oprnd)
3312      are stmt copies.  */
3313   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3314     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3315   else
3316     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3317 
3318   vec_oprnds->quick_push (vec_oprnd);
3319 
3320   /* Get second vector operand.  */
3321   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3322   vec_oprnds->quick_push (vec_oprnd);
3323 
3324   *oprnd = vec_oprnd;
3325 
3326   /* For conversion in multiple steps, continue to get operands
3327      recursively.  */
3328   if (multi_step_cvt)
3329     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
3330 }
3331 
3332 
3333 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3334    For multi-step conversions store the resulting vectors and call the function
3335    recursively.  */
3336 
3337 static void
3338 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3339 				       int multi_step_cvt, gimple stmt,
3340 				       vec<tree> vec_dsts,
3341 				       gimple_stmt_iterator *gsi,
3342 				       slp_tree slp_node, enum tree_code code,
3343 				       stmt_vec_info *prev_stmt_info)
3344 {
3345   unsigned int i;
3346   tree vop0, vop1, new_tmp, vec_dest;
3347   gimple new_stmt;
3348   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3349 
3350   vec_dest = vec_dsts.pop ();
3351 
3352   for (i = 0; i < vec_oprnds->length (); i += 2)
3353     {
3354       /* Create demotion operation.  */
3355       vop0 = (*vec_oprnds)[i];
3356       vop1 = (*vec_oprnds)[i + 1];
3357       new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3358       new_tmp = make_ssa_name (vec_dest, new_stmt);
3359       gimple_assign_set_lhs (new_stmt, new_tmp);
3360       vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361 
3362       if (multi_step_cvt)
3363 	/* Store the resulting vector for next recursive call.  */
3364 	(*vec_oprnds)[i/2] = new_tmp;
3365       else
3366 	{
3367 	  /* This is the last step of the conversion sequence. Store the
3368 	     vectors in SLP_NODE or in vector info of the scalar statement
3369 	     (or in STMT_VINFO_RELATED_STMT chain).  */
3370 	  if (slp_node)
3371 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3372 	  else
3373 	    {
3374 	      if (!*prev_stmt_info)
3375 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3376 	      else
3377 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3378 
3379 	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
3380 	    }
3381 	}
3382     }
3383 
3384   /* For multi-step demotion operations we first generate demotion operations
3385      from the source type to the intermediate types, and then combine the
3386      results (stored in VEC_OPRNDS) in demotion operation to the destination
3387      type.  */
3388   if (multi_step_cvt)
3389     {
3390       /* At each level of recursion we have half of the operands we had at the
3391 	 previous level.  */
3392       vec_oprnds->truncate ((i+1)/2);
3393       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3394 					     stmt, vec_dsts, gsi, slp_node,
3395 					     VEC_PACK_TRUNC_EXPR,
3396 					     prev_stmt_info);
3397     }
3398 
3399   vec_dsts.quick_push (vec_dest);
3400 }
3401 
3402 
3403 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3404    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
3405    the resulting vectors and call the function recursively.  */
3406 
3407 static void
3408 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3409 					vec<tree> *vec_oprnds1,
3410 					gimple stmt, tree vec_dest,
3411 					gimple_stmt_iterator *gsi,
3412 					enum tree_code code1,
3413 					enum tree_code code2, tree decl1,
3414 					tree decl2, int op_type)
3415 {
3416   int i;
3417   tree vop0, vop1, new_tmp1, new_tmp2;
3418   gimple new_stmt1, new_stmt2;
3419   vec<tree> vec_tmp = vNULL;
3420 
3421   vec_tmp.create (vec_oprnds0->length () * 2);
3422   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3423     {
3424       if (op_type == binary_op)
3425 	vop1 = (*vec_oprnds1)[i];
3426       else
3427 	vop1 = NULL_TREE;
3428 
3429       /* Generate the two halves of promotion operation.  */
3430       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3431 						 op_type, vec_dest, gsi, stmt);
3432       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3433 						 op_type, vec_dest, gsi, stmt);
3434       if (is_gimple_call (new_stmt1))
3435 	{
3436 	  new_tmp1 = gimple_call_lhs (new_stmt1);
3437 	  new_tmp2 = gimple_call_lhs (new_stmt2);
3438 	}
3439       else
3440 	{
3441 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
3442 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
3443 	}
3444 
3445       /* Store the results for the next step.  */
3446       vec_tmp.quick_push (new_tmp1);
3447       vec_tmp.quick_push (new_tmp2);
3448     }
3449 
3450   vec_oprnds0->release ();
3451   *vec_oprnds0 = vec_tmp;
3452 }
3453 
3454 
3455 /* Check if STMT performs a conversion operation, that can be vectorized.
3456    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3457    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3458    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3459 
3460 static bool
3461 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3462 			 gimple *vec_stmt, slp_tree slp_node)
3463 {
3464   tree vec_dest;
3465   tree scalar_dest;
3466   tree op0, op1 = NULL_TREE;
3467   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3468   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3469   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3470   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3471   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3472   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3473   tree new_temp;
3474   tree def;
3475   gimple def_stmt;
3476   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3477   gimple new_stmt = NULL;
3478   stmt_vec_info prev_stmt_info;
3479   int nunits_in;
3480   int nunits_out;
3481   tree vectype_out, vectype_in;
3482   int ncopies, i, j;
3483   tree lhs_type, rhs_type;
3484   enum { NARROW, NONE, WIDEN } modifier;
3485   vec<tree> vec_oprnds0 = vNULL;
3486   vec<tree> vec_oprnds1 = vNULL;
3487   tree vop0;
3488   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3489   int multi_step_cvt = 0;
3490   vec<tree> vec_dsts = vNULL;
3491   vec<tree> interm_types = vNULL;
3492   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3493   int op_type;
3494   machine_mode rhs_mode;
3495   unsigned short fltsz;
3496 
3497   /* Is STMT a vectorizable conversion?   */
3498 
3499   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3500     return false;
3501 
3502   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3503     return false;
3504 
3505   if (!is_gimple_assign (stmt))
3506     return false;
3507 
3508   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3509     return false;
3510 
3511   code = gimple_assign_rhs_code (stmt);
3512   if (!CONVERT_EXPR_CODE_P (code)
3513       && code != FIX_TRUNC_EXPR
3514       && code != FLOAT_EXPR
3515       && code != WIDEN_MULT_EXPR
3516       && code != WIDEN_LSHIFT_EXPR)
3517     return false;
3518 
3519   op_type = TREE_CODE_LENGTH (code);
3520 
3521   /* Check types of lhs and rhs.  */
3522   scalar_dest = gimple_assign_lhs (stmt);
3523   lhs_type = TREE_TYPE (scalar_dest);
3524   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3525 
3526   op0 = gimple_assign_rhs1 (stmt);
3527   rhs_type = TREE_TYPE (op0);
3528 
3529   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3530       && !((INTEGRAL_TYPE_P (lhs_type)
3531 	    && INTEGRAL_TYPE_P (rhs_type))
3532 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
3533 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
3534     return false;
3535 
3536   if ((INTEGRAL_TYPE_P (lhs_type)
3537        && (TYPE_PRECISION (lhs_type)
3538 	   != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3539       || (INTEGRAL_TYPE_P (rhs_type)
3540 	  && (TYPE_PRECISION (rhs_type)
3541 	      != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3542     {
3543       if (dump_enabled_p ())
3544 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3545                          "type conversion to/from bit-precision unsupported."
3546                          "\n");
3547       return false;
3548     }
3549 
3550   /* Check the operands of the operation.  */
3551   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3552 			     &def_stmt, &def, &dt[0], &vectype_in))
3553     {
3554       if (dump_enabled_p ())
3555 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3556                          "use not simple.\n");
3557       return false;
3558     }
3559   if (op_type == binary_op)
3560     {
3561       bool ok;
3562 
3563       op1 = gimple_assign_rhs2 (stmt);
3564       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3565       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3566 	 OP1.  */
3567       if (CONSTANT_CLASS_P (op0))
3568 	ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3569 				   &def_stmt, &def, &dt[1], &vectype_in);
3570       else
3571 	ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3572 				 &def, &dt[1]);
3573 
3574       if (!ok)
3575 	{
3576           if (dump_enabled_p ())
3577             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3578                              "use not simple.\n");
3579 	  return false;
3580 	}
3581     }
3582 
3583   /* If op0 is an external or constant defs use a vector type of
3584      the same size as the output vector type.  */
3585   if (!vectype_in)
3586     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3587   if (vec_stmt)
3588     gcc_assert (vectype_in);
3589   if (!vectype_in)
3590     {
3591       if (dump_enabled_p ())
3592 	{
3593 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594                            "no vectype for scalar type ");
3595 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3596           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3597 	}
3598 
3599       return false;
3600     }
3601 
3602   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3603   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3604   if (nunits_in < nunits_out)
3605     modifier = NARROW;
3606   else if (nunits_out == nunits_in)
3607     modifier = NONE;
3608   else
3609     modifier = WIDEN;
3610 
3611   /* Multiple types in SLP are handled by creating the appropriate number of
3612      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3613      case of SLP.  */
3614   if (slp_node || PURE_SLP_STMT (stmt_info))
3615     ncopies = 1;
3616   else if (modifier == NARROW)
3617     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3618   else
3619     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3620 
3621   /* Sanity check: make sure that at least one copy of the vectorized stmt
3622      needs to be generated.  */
3623   gcc_assert (ncopies >= 1);
3624 
3625   /* Supportable by target?  */
3626   switch (modifier)
3627     {
3628     case NONE:
3629       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3630 	return false;
3631       if (supportable_convert_operation (code, vectype_out, vectype_in,
3632 					 &decl1, &code1))
3633 	break;
3634       /* FALLTHRU */
3635     unsupported:
3636       if (dump_enabled_p ())
3637 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3638                          "conversion not supported by target.\n");
3639       return false;
3640 
3641     case WIDEN:
3642       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3643 					  &code1, &code2, &multi_step_cvt,
3644 					  &interm_types))
3645 	{
3646 	  /* Binary widening operation can only be supported directly by the
3647 	     architecture.  */
3648 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
3649 	  break;
3650 	}
3651 
3652       if (code != FLOAT_EXPR
3653 	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3654 	      <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3655 	goto unsupported;
3656 
3657       rhs_mode = TYPE_MODE (rhs_type);
3658       fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3659       for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3660 	   rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3661 	   rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3662 	{
3663 	  cvt_type
3664 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3665 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3666 	  if (cvt_type == NULL_TREE)
3667 	    goto unsupported;
3668 
3669 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
3670 	    {
3671 	      if (!supportable_convert_operation (code, vectype_out,
3672 						  cvt_type, &decl1, &codecvt1))
3673 		goto unsupported;
3674 	    }
3675 	  else if (!supportable_widening_operation (code, stmt, vectype_out,
3676 						    cvt_type, &codecvt1,
3677 						    &codecvt2, &multi_step_cvt,
3678 						    &interm_types))
3679 	    continue;
3680 	  else
3681 	    gcc_assert (multi_step_cvt == 0);
3682 
3683 	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3684 					      vectype_in, &code1, &code2,
3685 					      &multi_step_cvt, &interm_types))
3686 	    break;
3687 	}
3688 
3689       if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3690 	goto unsupported;
3691 
3692       if (GET_MODE_SIZE (rhs_mode) == fltsz)
3693 	codecvt2 = ERROR_MARK;
3694       else
3695 	{
3696 	  multi_step_cvt++;
3697 	  interm_types.safe_push (cvt_type);
3698 	  cvt_type = NULL_TREE;
3699 	}
3700       break;
3701 
3702     case NARROW:
3703       gcc_assert (op_type == unary_op);
3704       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3705 					   &code1, &multi_step_cvt,
3706 					   &interm_types))
3707 	break;
3708 
3709       if (code != FIX_TRUNC_EXPR
3710 	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3711 	      >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3712 	goto unsupported;
3713 
3714       rhs_mode = TYPE_MODE (rhs_type);
3715       cvt_type
3716 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3717       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3718       if (cvt_type == NULL_TREE)
3719 	goto unsupported;
3720       if (!supportable_convert_operation (code, cvt_type, vectype_in,
3721 					  &decl1, &codecvt1))
3722 	goto unsupported;
3723       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3724 					   &code1, &multi_step_cvt,
3725 					   &interm_types))
3726 	break;
3727       goto unsupported;
3728 
3729     default:
3730       gcc_unreachable ();
3731     }
3732 
3733   if (!vec_stmt)		/* transformation not required.  */
3734     {
3735       if (dump_enabled_p ())
3736 	dump_printf_loc (MSG_NOTE, vect_location,
3737                          "=== vectorizable_conversion ===\n");
3738       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3739         {
3740 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3741 	  vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3742 	}
3743       else if (modifier == NARROW)
3744 	{
3745 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3746 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3747 	}
3748       else
3749 	{
3750 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3751 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3752 	}
3753       interm_types.release ();
3754       return true;
3755     }
3756 
3757   /** Transform.  **/
3758   if (dump_enabled_p ())
3759     dump_printf_loc (MSG_NOTE, vect_location,
3760                      "transform conversion. ncopies = %d.\n", ncopies);
3761 
3762   if (op_type == binary_op)
3763     {
3764       if (CONSTANT_CLASS_P (op0))
3765 	op0 = fold_convert (TREE_TYPE (op1), op0);
3766       else if (CONSTANT_CLASS_P (op1))
3767 	op1 = fold_convert (TREE_TYPE (op0), op1);
3768     }
3769 
3770   /* In case of multi-step conversion, we first generate conversion operations
3771      to the intermediate types, and then from that types to the final one.
3772      We create vector destinations for the intermediate type (TYPES) received
3773      from supportable_*_operation, and store them in the correct order
3774      for future use in vect_create_vectorized_*_stmts ().  */
3775   vec_dsts.create (multi_step_cvt + 1);
3776   vec_dest = vect_create_destination_var (scalar_dest,
3777 					  (cvt_type && modifier == WIDEN)
3778 					  ? cvt_type : vectype_out);
3779   vec_dsts.quick_push (vec_dest);
3780 
3781   if (multi_step_cvt)
3782     {
3783       for (i = interm_types.length () - 1;
3784 	   interm_types.iterate (i, &intermediate_type); i--)
3785 	{
3786 	  vec_dest = vect_create_destination_var (scalar_dest,
3787 						  intermediate_type);
3788 	  vec_dsts.quick_push (vec_dest);
3789 	}
3790     }
3791 
3792   if (cvt_type)
3793     vec_dest = vect_create_destination_var (scalar_dest,
3794 					    modifier == WIDEN
3795 					    ? vectype_out : cvt_type);
3796 
3797   if (!slp_node)
3798     {
3799       if (modifier == WIDEN)
3800 	{
3801 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3802 	  if (op_type == binary_op)
3803 	    vec_oprnds1.create (1);
3804 	}
3805       else if (modifier == NARROW)
3806 	vec_oprnds0.create (
3807 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3808     }
3809   else if (code == WIDEN_LSHIFT_EXPR)
3810     vec_oprnds1.create (slp_node->vec_stmts_size);
3811 
3812   last_oprnd = op0;
3813   prev_stmt_info = NULL;
3814   switch (modifier)
3815     {
3816     case NONE:
3817       for (j = 0; j < ncopies; j++)
3818 	{
3819 	  if (j == 0)
3820 	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3821 			       -1);
3822 	  else
3823 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3824 
3825 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3826 	    {
3827 	      /* Arguments are ready, create the new vector stmt.  */
3828 	      if (code1 == CALL_EXPR)
3829 		{
3830 		  new_stmt = gimple_build_call (decl1, 1, vop0);
3831 		  new_temp = make_ssa_name (vec_dest, new_stmt);
3832 		  gimple_call_set_lhs (new_stmt, new_temp);
3833 		}
3834 	      else
3835 		{
3836 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3837 		  new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3838 		  new_temp = make_ssa_name (vec_dest, new_stmt);
3839 		  gimple_assign_set_lhs (new_stmt, new_temp);
3840 		}
3841 
3842 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3843 	      if (slp_node)
3844 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3845 	    }
3846 
3847 	  if (j == 0)
3848 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3849 	  else
3850 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3851 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3852 	}
3853       break;
3854 
3855     case WIDEN:
3856       /* In case the vectorization factor (VF) is bigger than the number
3857 	 of elements that we can fit in a vectype (nunits), we have to
3858 	 generate more than one vector stmt - i.e - we need to "unroll"
3859 	 the vector stmt by a factor VF/nunits.  */
3860       for (j = 0; j < ncopies; j++)
3861 	{
3862 	  /* Handle uses.  */
3863 	  if (j == 0)
3864 	    {
3865 	      if (slp_node)
3866 		{
3867 		  if (code == WIDEN_LSHIFT_EXPR)
3868 		    {
3869 		      unsigned int k;
3870 
3871 		      vec_oprnd1 = op1;
3872 		      /* Store vec_oprnd1 for every vector stmt to be created
3873 			 for SLP_NODE.  We check during the analysis that all
3874 			 the shift arguments are the same.  */
3875 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3876 			vec_oprnds1.quick_push (vec_oprnd1);
3877 
3878 		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3879 					 slp_node, -1);
3880 		    }
3881 		  else
3882 		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3883 				       &vec_oprnds1, slp_node, -1);
3884 		}
3885 	      else
3886 		{
3887 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3888 		  vec_oprnds0.quick_push (vec_oprnd0);
3889 		  if (op_type == binary_op)
3890 		    {
3891 		      if (code == WIDEN_LSHIFT_EXPR)
3892 			vec_oprnd1 = op1;
3893 		      else
3894 			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3895 								   NULL);
3896 		      vec_oprnds1.quick_push (vec_oprnd1);
3897 		    }
3898 		}
3899 	    }
3900 	  else
3901 	    {
3902 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3903 	      vec_oprnds0.truncate (0);
3904 	      vec_oprnds0.quick_push (vec_oprnd0);
3905 	      if (op_type == binary_op)
3906 		{
3907 		  if (code == WIDEN_LSHIFT_EXPR)
3908 		    vec_oprnd1 = op1;
3909 		  else
3910 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3911 								 vec_oprnd1);
3912 		  vec_oprnds1.truncate (0);
3913 		  vec_oprnds1.quick_push (vec_oprnd1);
3914 		}
3915 	    }
3916 
3917 	  /* Arguments are ready.  Create the new vector stmts.  */
3918 	  for (i = multi_step_cvt; i >= 0; i--)
3919 	    {
3920 	      tree this_dest = vec_dsts[i];
3921 	      enum tree_code c1 = code1, c2 = code2;
3922 	      if (i == 0 && codecvt2 != ERROR_MARK)
3923 		{
3924 		  c1 = codecvt1;
3925 		  c2 = codecvt2;
3926 		}
3927 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3928 						      &vec_oprnds1,
3929 						      stmt, this_dest, gsi,
3930 						      c1, c2, decl1, decl2,
3931 						      op_type);
3932 	    }
3933 
3934 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3935 	    {
3936 	      if (cvt_type)
3937 		{
3938 		  if (codecvt1 == CALL_EXPR)
3939 		    {
3940 		      new_stmt = gimple_build_call (decl1, 1, vop0);
3941 		      new_temp = make_ssa_name (vec_dest, new_stmt);
3942 		      gimple_call_set_lhs (new_stmt, new_temp);
3943 		    }
3944 		  else
3945 		    {
3946 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3947 		      new_temp = make_ssa_name (vec_dest);
3948 		      new_stmt = gimple_build_assign (new_temp, codecvt1,
3949 						      vop0);
3950 		    }
3951 
3952 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3953 		}
3954 	      else
3955 		new_stmt = SSA_NAME_DEF_STMT (vop0);
3956 
3957 	      if (slp_node)
3958 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3959 	      else
3960 		{
3961 		  if (!prev_stmt_info)
3962 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3963 		  else
3964 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3965 		  prev_stmt_info = vinfo_for_stmt (new_stmt);
3966 		}
3967 	    }
3968 	}
3969 
3970       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3971       break;
3972 
3973     case NARROW:
3974       /* In case the vectorization factor (VF) is bigger than the number
3975 	 of elements that we can fit in a vectype (nunits), we have to
3976 	 generate more than one vector stmt - i.e - we need to "unroll"
3977 	 the vector stmt by a factor VF/nunits.  */
3978       for (j = 0; j < ncopies; j++)
3979 	{
3980 	  /* Handle uses.  */
3981 	  if (slp_node)
3982 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3983 			       slp_node, -1);
3984 	  else
3985 	    {
3986 	      vec_oprnds0.truncate (0);
3987 	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3988 					vect_pow2 (multi_step_cvt) - 1);
3989 	    }
3990 
3991 	  /* Arguments are ready.  Create the new vector stmts.  */
3992 	  if (cvt_type)
3993 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3994 	      {
3995 		if (codecvt1 == CALL_EXPR)
3996 		  {
3997 		    new_stmt = gimple_build_call (decl1, 1, vop0);
3998 		    new_temp = make_ssa_name (vec_dest, new_stmt);
3999 		    gimple_call_set_lhs (new_stmt, new_temp);
4000 		  }
4001 		else
4002 		  {
4003 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4004 		    new_temp = make_ssa_name (vec_dest);
4005 		    new_stmt = gimple_build_assign (new_temp, codecvt1,
4006 						    vop0);
4007 		  }
4008 
4009 		vect_finish_stmt_generation (stmt, new_stmt, gsi);
4010 		vec_oprnds0[i] = new_temp;
4011 	      }
4012 
4013 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4014 						 stmt, vec_dsts, gsi,
4015 						 slp_node, code1,
4016 						 &prev_stmt_info);
4017 	}
4018 
4019       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4020       break;
4021     }
4022 
4023   vec_oprnds0.release ();
4024   vec_oprnds1.release ();
4025   vec_dsts.release ();
4026   interm_types.release ();
4027 
4028   return true;
4029 }
4030 
4031 
4032 /* Function vectorizable_assignment.
4033 
4034    Check if STMT performs an assignment (copy) that can be vectorized.
4035    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4036    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4037    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4038 
4039 static bool
4040 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4041 			 gimple *vec_stmt, slp_tree slp_node)
4042 {
4043   tree vec_dest;
4044   tree scalar_dest;
4045   tree op;
4046   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4047   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4048   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4049   tree new_temp;
4050   tree def;
4051   gimple def_stmt;
4052   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4053   unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4054   int ncopies;
4055   int i, j;
4056   vec<tree> vec_oprnds = vNULL;
4057   tree vop;
4058   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4059   gimple new_stmt = NULL;
4060   stmt_vec_info prev_stmt_info = NULL;
4061   enum tree_code code;
4062   tree vectype_in;
4063 
4064   /* Multiple types in SLP are handled by creating the appropriate number of
4065      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4066      case of SLP.  */
4067   if (slp_node || PURE_SLP_STMT (stmt_info))
4068     ncopies = 1;
4069   else
4070     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4071 
4072   gcc_assert (ncopies >= 1);
4073 
4074   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4075     return false;
4076 
4077   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4078     return false;
4079 
4080   /* Is vectorizable assignment?  */
4081   if (!is_gimple_assign (stmt))
4082     return false;
4083 
4084   scalar_dest = gimple_assign_lhs (stmt);
4085   if (TREE_CODE (scalar_dest) != SSA_NAME)
4086     return false;
4087 
4088   code = gimple_assign_rhs_code (stmt);
4089   if (gimple_assign_single_p (stmt)
4090       || code == PAREN_EXPR
4091       || CONVERT_EXPR_CODE_P (code))
4092     op = gimple_assign_rhs1 (stmt);
4093   else
4094     return false;
4095 
4096   if (code == VIEW_CONVERT_EXPR)
4097     op = TREE_OPERAND (op, 0);
4098 
4099   if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4100 			     &def_stmt, &def, &dt[0], &vectype_in))
4101     {
4102       if (dump_enabled_p ())
4103         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4104                          "use not simple.\n");
4105       return false;
4106     }
4107 
4108   /* We can handle NOP_EXPR conversions that do not change the number
4109      of elements or the vector size.  */
4110   if ((CONVERT_EXPR_CODE_P (code)
4111        || code == VIEW_CONVERT_EXPR)
4112       && (!vectype_in
4113 	  || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4114 	  || (GET_MODE_SIZE (TYPE_MODE (vectype))
4115 	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4116     return false;
4117 
4118   /* We do not handle bit-precision changes.  */
4119   if ((CONVERT_EXPR_CODE_P (code)
4120        || code == VIEW_CONVERT_EXPR)
4121       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4122       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4123 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4124 	  || ((TYPE_PRECISION (TREE_TYPE (op))
4125 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4126       /* But a conversion that does not change the bit-pattern is ok.  */
4127       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4128 	    > TYPE_PRECISION (TREE_TYPE (op)))
4129 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
4130     {
4131       if (dump_enabled_p ())
4132         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4133                          "type conversion to/from bit-precision "
4134                          "unsupported.\n");
4135       return false;
4136     }
4137 
4138   if (!vec_stmt) /* transformation not required.  */
4139     {
4140       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4141       if (dump_enabled_p ())
4142         dump_printf_loc (MSG_NOTE, vect_location,
4143                          "=== vectorizable_assignment ===\n");
4144       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4145       return true;
4146     }
4147 
4148   /** Transform.  **/
4149   if (dump_enabled_p ())
4150     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4151 
4152   /* Handle def.  */
4153   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4154 
4155   /* Handle use.  */
4156   for (j = 0; j < ncopies; j++)
4157     {
4158       /* Handle uses.  */
4159       if (j == 0)
4160         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4161       else
4162         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4163 
4164       /* Arguments are ready. create the new vector stmt.  */
4165       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4166        {
4167 	 if (CONVERT_EXPR_CODE_P (code)
4168 	     || code == VIEW_CONVERT_EXPR)
4169 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4170          new_stmt = gimple_build_assign (vec_dest, vop);
4171          new_temp = make_ssa_name (vec_dest, new_stmt);
4172          gimple_assign_set_lhs (new_stmt, new_temp);
4173          vect_finish_stmt_generation (stmt, new_stmt, gsi);
4174          if (slp_node)
4175            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4176        }
4177 
4178       if (slp_node)
4179         continue;
4180 
4181       if (j == 0)
4182         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4183       else
4184         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4185 
4186       prev_stmt_info = vinfo_for_stmt (new_stmt);
4187     }
4188 
4189   vec_oprnds.release ();
4190   return true;
4191 }
4192 
4193 
4194 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4195    either as shift by a scalar or by a vector.  */
4196 
4197 bool
4198 vect_supportable_shift (enum tree_code code, tree scalar_type)
4199 {
4200 
4201   machine_mode vec_mode;
4202   optab optab;
4203   int icode;
4204   tree vectype;
4205 
4206   vectype = get_vectype_for_scalar_type (scalar_type);
4207   if (!vectype)
4208     return false;
4209 
4210   optab = optab_for_tree_code (code, vectype, optab_scalar);
4211   if (!optab
4212       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4213     {
4214       optab = optab_for_tree_code (code, vectype, optab_vector);
4215       if (!optab
4216           || (optab_handler (optab, TYPE_MODE (vectype))
4217                       == CODE_FOR_nothing))
4218         return false;
4219     }
4220 
4221   vec_mode = TYPE_MODE (vectype);
4222   icode = (int) optab_handler (optab, vec_mode);
4223   if (icode == CODE_FOR_nothing)
4224     return false;
4225 
4226   return true;
4227 }
4228 
4229 
4230 /* Function vectorizable_shift.
4231 
4232    Check if STMT performs a shift operation that can be vectorized.
4233    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4234    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4235    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4236 
4237 static bool
4238 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4239                     gimple *vec_stmt, slp_tree slp_node)
4240 {
4241   tree vec_dest;
4242   tree scalar_dest;
4243   tree op0, op1 = NULL;
4244   tree vec_oprnd1 = NULL_TREE;
4245   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4246   tree vectype;
4247   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4248   enum tree_code code;
4249   machine_mode vec_mode;
4250   tree new_temp;
4251   optab optab;
4252   int icode;
4253   machine_mode optab_op2_mode;
4254   tree def;
4255   gimple def_stmt;
4256   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4257   gimple new_stmt = NULL;
4258   stmt_vec_info prev_stmt_info;
4259   int nunits_in;
4260   int nunits_out;
4261   tree vectype_out;
4262   tree op1_vectype;
4263   int ncopies;
4264   int j, i;
4265   vec<tree> vec_oprnds0 = vNULL;
4266   vec<tree> vec_oprnds1 = vNULL;
4267   tree vop0, vop1;
4268   unsigned int k;
4269   bool scalar_shift_arg = true;
4270   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4271   int vf;
4272 
4273   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4274     return false;
4275 
4276   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4277     return false;
4278 
4279   /* Is STMT a vectorizable binary/unary operation?   */
4280   if (!is_gimple_assign (stmt))
4281     return false;
4282 
4283   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4284     return false;
4285 
4286   code = gimple_assign_rhs_code (stmt);
4287 
4288   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4289       || code == RROTATE_EXPR))
4290     return false;
4291 
4292   scalar_dest = gimple_assign_lhs (stmt);
4293   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4294   if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4295       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4296     {
4297       if (dump_enabled_p ())
4298         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4299                          "bit-precision shifts not supported.\n");
4300       return false;
4301     }
4302 
4303   op0 = gimple_assign_rhs1 (stmt);
4304   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4305                              &def_stmt, &def, &dt[0], &vectype))
4306     {
4307       if (dump_enabled_p ())
4308         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4309                          "use not simple.\n");
4310       return false;
4311     }
4312   /* If op0 is an external or constant def use a vector type with
4313      the same size as the output vector type.  */
4314   if (!vectype)
4315     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4316   if (vec_stmt)
4317     gcc_assert (vectype);
4318   if (!vectype)
4319     {
4320       if (dump_enabled_p ())
4321         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4322                          "no vectype for scalar type\n");
4323       return false;
4324     }
4325 
4326   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4327   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4328   if (nunits_out != nunits_in)
4329     return false;
4330 
4331   op1 = gimple_assign_rhs2 (stmt);
4332   if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4333 			     &def, &dt[1], &op1_vectype))
4334     {
4335       if (dump_enabled_p ())
4336         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4337                          "use not simple.\n");
4338       return false;
4339     }
4340 
4341   if (loop_vinfo)
4342     vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4343   else
4344     vf = 1;
4345 
4346   /* Multiple types in SLP are handled by creating the appropriate number of
4347      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4348      case of SLP.  */
4349   if (slp_node || PURE_SLP_STMT (stmt_info))
4350     ncopies = 1;
4351   else
4352     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4353 
4354   gcc_assert (ncopies >= 1);
4355 
4356   /* Determine whether the shift amount is a vector, or scalar.  If the
4357      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
4358 
4359   if (dt[1] == vect_internal_def && !slp_node)
4360     scalar_shift_arg = false;
4361   else if (dt[1] == vect_constant_def
4362 	   || dt[1] == vect_external_def
4363 	   || dt[1] == vect_internal_def)
4364     {
4365       /* In SLP, need to check whether the shift count is the same,
4366 	 in loops if it is a constant or invariant, it is always
4367 	 a scalar shift.  */
4368       if (slp_node)
4369 	{
4370 	  vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4371 	  gimple slpstmt;
4372 
4373 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4374 	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4375 	      scalar_shift_arg = false;
4376 	}
4377     }
4378   else
4379     {
4380       if (dump_enabled_p ())
4381         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4382                          "operand mode requires invariant argument.\n");
4383       return false;
4384     }
4385 
4386   /* Vector shifted by vector.  */
4387   if (!scalar_shift_arg)
4388     {
4389       optab = optab_for_tree_code (code, vectype, optab_vector);
4390       if (dump_enabled_p ())
4391         dump_printf_loc (MSG_NOTE, vect_location,
4392                          "vector/vector shift/rotate found.\n");
4393 
4394       if (!op1_vectype)
4395 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4396       if (op1_vectype == NULL_TREE
4397 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4398 	{
4399 	  if (dump_enabled_p ())
4400 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4401                              "unusable type for last operand in"
4402                              " vector/vector shift/rotate.\n");
4403 	  return false;
4404 	}
4405     }
4406   /* See if the machine has a vector shifted by scalar insn and if not
4407      then see if it has a vector shifted by vector insn.  */
4408   else
4409     {
4410       optab = optab_for_tree_code (code, vectype, optab_scalar);
4411       if (optab
4412           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4413         {
4414           if (dump_enabled_p ())
4415             dump_printf_loc (MSG_NOTE, vect_location,
4416                              "vector/scalar shift/rotate found.\n");
4417         }
4418       else
4419         {
4420           optab = optab_for_tree_code (code, vectype, optab_vector);
4421           if (optab
4422                && (optab_handler (optab, TYPE_MODE (vectype))
4423                       != CODE_FOR_nothing))
4424             {
4425 	      scalar_shift_arg = false;
4426 
4427               if (dump_enabled_p ())
4428                 dump_printf_loc (MSG_NOTE, vect_location,
4429                                  "vector/vector shift/rotate found.\n");
4430 
4431               /* Unlike the other binary operators, shifts/rotates have
4432                  the rhs being int, instead of the same type as the lhs,
4433                  so make sure the scalar is the right type if we are
4434 		 dealing with vectors of long long/long/short/char.  */
4435               if (dt[1] == vect_constant_def)
4436                 op1 = fold_convert (TREE_TYPE (vectype), op1);
4437 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4438 						   TREE_TYPE (op1)))
4439 		{
4440 		  if (slp_node
4441 		      && TYPE_MODE (TREE_TYPE (vectype))
4442 			 != TYPE_MODE (TREE_TYPE (op1)))
4443 		    {
4444                       if (dump_enabled_p ())
4445                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4446                                          "unusable type for last operand in"
4447                                          " vector/vector shift/rotate.\n");
4448 			return false;
4449 		    }
4450 		  if (vec_stmt && !slp_node)
4451 		    {
4452 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
4453 		      op1 = vect_init_vector (stmt, op1,
4454 					      TREE_TYPE (vectype), NULL);
4455 		    }
4456 		}
4457             }
4458         }
4459     }
4460 
4461   /* Supportable by target?  */
4462   if (!optab)
4463     {
4464       if (dump_enabled_p ())
4465         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4466                          "no optab.\n");
4467       return false;
4468     }
4469   vec_mode = TYPE_MODE (vectype);
4470   icode = (int) optab_handler (optab, vec_mode);
4471   if (icode == CODE_FOR_nothing)
4472     {
4473       if (dump_enabled_p ())
4474         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4475                          "op not supported by target.\n");
4476       /* Check only during analysis.  */
4477       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4478           || (vf < vect_min_worthwhile_factor (code)
4479               && !vec_stmt))
4480         return false;
4481       if (dump_enabled_p ())
4482         dump_printf_loc (MSG_NOTE, vect_location,
4483                          "proceeding using word mode.\n");
4484     }
4485 
4486   /* Worthwhile without SIMD support?  Check only during analysis.  */
4487   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4488       && vf < vect_min_worthwhile_factor (code)
4489       && !vec_stmt)
4490     {
4491       if (dump_enabled_p ())
4492         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4493                          "not worthwhile without SIMD support.\n");
4494       return false;
4495     }
4496 
4497   if (!vec_stmt) /* transformation not required.  */
4498     {
4499       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4500       if (dump_enabled_p ())
4501         dump_printf_loc (MSG_NOTE, vect_location,
4502                          "=== vectorizable_shift ===\n");
4503       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4504       return true;
4505     }
4506 
4507   /** Transform.  **/
4508 
4509   if (dump_enabled_p ())
4510     dump_printf_loc (MSG_NOTE, vect_location,
4511                      "transform binary/unary operation.\n");
4512 
4513   /* Handle def.  */
4514   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4515 
4516   prev_stmt_info = NULL;
4517   for (j = 0; j < ncopies; j++)
4518     {
4519       /* Handle uses.  */
4520       if (j == 0)
4521         {
4522           if (scalar_shift_arg)
4523             {
4524               /* Vector shl and shr insn patterns can be defined with scalar
4525                  operand 2 (shift operand).  In this case, use constant or loop
4526                  invariant op1 directly, without extending it to vector mode
4527                  first.  */
4528               optab_op2_mode = insn_data[icode].operand[2].mode;
4529               if (!VECTOR_MODE_P (optab_op2_mode))
4530                 {
4531                   if (dump_enabled_p ())
4532                     dump_printf_loc (MSG_NOTE, vect_location,
4533                                      "operand 1 using scalar mode.\n");
4534                   vec_oprnd1 = op1;
4535                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4536                   vec_oprnds1.quick_push (vec_oprnd1);
4537                   if (slp_node)
4538                     {
4539                       /* Store vec_oprnd1 for every vector stmt to be created
4540                          for SLP_NODE.  We check during the analysis that all
4541                          the shift arguments are the same.
4542                          TODO: Allow different constants for different vector
4543                          stmts generated for an SLP instance.  */
4544                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4545                         vec_oprnds1.quick_push (vec_oprnd1);
4546                     }
4547                 }
4548             }
4549 
4550           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4551              (a special case for certain kind of vector shifts); otherwise,
4552              operand 1 should be of a vector type (the usual case).  */
4553           if (vec_oprnd1)
4554             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4555                                slp_node, -1);
4556           else
4557             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4558                                slp_node, -1);
4559         }
4560       else
4561         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4562 
4563       /* Arguments are ready.  Create the new vector stmt.  */
4564       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4565         {
4566           vop1 = vec_oprnds1[i];
4567 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4568           new_temp = make_ssa_name (vec_dest, new_stmt);
4569           gimple_assign_set_lhs (new_stmt, new_temp);
4570           vect_finish_stmt_generation (stmt, new_stmt, gsi);
4571           if (slp_node)
4572             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4573         }
4574 
4575       if (slp_node)
4576         continue;
4577 
4578       if (j == 0)
4579         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4580       else
4581         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4582       prev_stmt_info = vinfo_for_stmt (new_stmt);
4583     }
4584 
4585   vec_oprnds0.release ();
4586   vec_oprnds1.release ();
4587 
4588   return true;
4589 }
4590 
4591 
4592 /* Function vectorizable_operation.
4593 
4594    Check if STMT performs a binary, unary or ternary operation that can
4595    be vectorized.
4596    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4597    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4598    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4599 
4600 static bool
4601 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4602 			gimple *vec_stmt, slp_tree slp_node)
4603 {
4604   tree vec_dest;
4605   tree scalar_dest;
4606   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4607   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4608   tree vectype;
4609   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4610   enum tree_code code;
4611   machine_mode vec_mode;
4612   tree new_temp;
4613   int op_type;
4614   optab optab;
4615   int icode;
4616   tree def;
4617   gimple def_stmt;
4618   enum vect_def_type dt[3]
4619     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4620   gimple new_stmt = NULL;
4621   stmt_vec_info prev_stmt_info;
4622   int nunits_in;
4623   int nunits_out;
4624   tree vectype_out;
4625   int ncopies;
4626   int j, i;
4627   vec<tree> vec_oprnds0 = vNULL;
4628   vec<tree> vec_oprnds1 = vNULL;
4629   vec<tree> vec_oprnds2 = vNULL;
4630   tree vop0, vop1, vop2;
4631   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4632   int vf;
4633 
4634   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4635     return false;
4636 
4637   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4638     return false;
4639 
4640   /* Is STMT a vectorizable binary/unary operation?   */
4641   if (!is_gimple_assign (stmt))
4642     return false;
4643 
4644   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4645     return false;
4646 
4647   code = gimple_assign_rhs_code (stmt);
4648 
4649   /* For pointer addition, we should use the normal plus for
4650      the vector addition.  */
4651   if (code == POINTER_PLUS_EXPR)
4652     code = PLUS_EXPR;
4653 
4654   /* Support only unary or binary operations.  */
4655   op_type = TREE_CODE_LENGTH (code);
4656   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4657     {
4658       if (dump_enabled_p ())
4659         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4660                          "num. args = %d (not unary/binary/ternary op).\n",
4661                          op_type);
4662       return false;
4663     }
4664 
4665   scalar_dest = gimple_assign_lhs (stmt);
4666   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4667 
4668   /* Most operations cannot handle bit-precision types without extra
4669      truncations.  */
4670   if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4671        != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4672       /* Exception are bitwise binary operations.  */
4673       && code != BIT_IOR_EXPR
4674       && code != BIT_XOR_EXPR
4675       && code != BIT_AND_EXPR)
4676     {
4677       if (dump_enabled_p ())
4678         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4679                          "bit-precision arithmetic not supported.\n");
4680       return false;
4681     }
4682 
4683   op0 = gimple_assign_rhs1 (stmt);
4684   if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4685 			     &def_stmt, &def, &dt[0], &vectype))
4686     {
4687       if (dump_enabled_p ())
4688         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4689                          "use not simple.\n");
4690       return false;
4691     }
4692   /* If op0 is an external or constant def use a vector type with
4693      the same size as the output vector type.  */
4694   if (!vectype)
4695     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4696   if (vec_stmt)
4697     gcc_assert (vectype);
4698   if (!vectype)
4699     {
4700       if (dump_enabled_p ())
4701         {
4702           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4703                            "no vectype for scalar type ");
4704           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4705                              TREE_TYPE (op0));
4706           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4707         }
4708 
4709       return false;
4710     }
4711 
4712   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4713   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4714   if (nunits_out != nunits_in)
4715     return false;
4716 
4717   if (op_type == binary_op || op_type == ternary_op)
4718     {
4719       op1 = gimple_assign_rhs2 (stmt);
4720       if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4721 			       &def, &dt[1]))
4722 	{
4723 	  if (dump_enabled_p ())
4724 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4725                              "use not simple.\n");
4726 	  return false;
4727 	}
4728     }
4729   if (op_type == ternary_op)
4730     {
4731       op2 = gimple_assign_rhs3 (stmt);
4732       if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4733 			       &def, &dt[2]))
4734 	{
4735 	  if (dump_enabled_p ())
4736 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4737                              "use not simple.\n");
4738 	  return false;
4739 	}
4740     }
4741 
4742   if (loop_vinfo)
4743     vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4744   else
4745     vf = 1;
4746 
4747   /* Multiple types in SLP are handled by creating the appropriate number of
4748      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4749      case of SLP.  */
4750   if (slp_node || PURE_SLP_STMT (stmt_info))
4751     ncopies = 1;
4752   else
4753     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4754 
4755   gcc_assert (ncopies >= 1);
4756 
4757   /* Shifts are handled in vectorizable_shift ().  */
4758   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4759       || code == RROTATE_EXPR)
4760    return false;
4761 
4762   /* Supportable by target?  */
4763 
4764   vec_mode = TYPE_MODE (vectype);
4765   if (code == MULT_HIGHPART_EXPR)
4766     {
4767       if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4768 	icode = LAST_INSN_CODE;
4769       else
4770 	icode = CODE_FOR_nothing;
4771     }
4772   else
4773     {
4774       optab = optab_for_tree_code (code, vectype, optab_default);
4775       if (!optab)
4776 	{
4777           if (dump_enabled_p ())
4778             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4779                              "no optab.\n");
4780 	  return false;
4781 	}
4782       icode = (int) optab_handler (optab, vec_mode);
4783     }
4784 
4785   if (icode == CODE_FOR_nothing)
4786     {
4787       if (dump_enabled_p ())
4788 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4789                          "op not supported by target.\n");
4790       /* Check only during analysis.  */
4791       if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4792 	  || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4793         return false;
4794       if (dump_enabled_p ())
4795 	dump_printf_loc (MSG_NOTE, vect_location,
4796                          "proceeding using word mode.\n");
4797     }
4798 
4799   /* Worthwhile without SIMD support?  Check only during analysis.  */
4800   if (!VECTOR_MODE_P (vec_mode)
4801       && !vec_stmt
4802       && vf < vect_min_worthwhile_factor (code))
4803     {
4804       if (dump_enabled_p ())
4805         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4806                          "not worthwhile without SIMD support.\n");
4807       return false;
4808     }
4809 
4810   if (!vec_stmt) /* transformation not required.  */
4811     {
4812       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4813       if (dump_enabled_p ())
4814         dump_printf_loc (MSG_NOTE, vect_location,
4815                          "=== vectorizable_operation ===\n");
4816       vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4817       return true;
4818     }
4819 
4820   /** Transform.  **/
4821 
4822   if (dump_enabled_p ())
4823     dump_printf_loc (MSG_NOTE, vect_location,
4824                      "transform binary/unary operation.\n");
4825 
4826   /* Handle def.  */
4827   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4828 
4829   /* In case the vectorization factor (VF) is bigger than the number
4830      of elements that we can fit in a vectype (nunits), we have to generate
4831      more than one vector stmt - i.e - we need to "unroll" the
4832      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
4833      from one copy of the vector stmt to the next, in the field
4834      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
4835      stages to find the correct vector defs to be used when vectorizing
4836      stmts that use the defs of the current stmt.  The example below
4837      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4838      we need to create 4 vectorized stmts):
4839 
4840      before vectorization:
4841                                 RELATED_STMT    VEC_STMT
4842         S1:     x = memref      -               -
4843         S2:     z = x + 1       -               -
4844 
4845      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4846              there):
4847                                 RELATED_STMT    VEC_STMT
4848         VS1_0:  vx0 = memref0   VS1_1           -
4849         VS1_1:  vx1 = memref1   VS1_2           -
4850         VS1_2:  vx2 = memref2   VS1_3           -
4851         VS1_3:  vx3 = memref3   -               -
4852         S1:     x = load        -               VS1_0
4853         S2:     z = x + 1       -               -
4854 
4855      step2: vectorize stmt S2 (done here):
4856         To vectorize stmt S2 we first need to find the relevant vector
4857         def for the first operand 'x'.  This is, as usual, obtained from
4858         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4859         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
4860         relevant vector def 'vx0'.  Having found 'vx0' we can generate
4861         the vector stmt VS2_0, and as usual, record it in the
4862         STMT_VINFO_VEC_STMT of stmt S2.
4863         When creating the second copy (VS2_1), we obtain the relevant vector
4864         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4865         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
4866         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
4867         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4868         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
4869         chain of stmts and pointers:
4870                                 RELATED_STMT    VEC_STMT
4871         VS1_0:  vx0 = memref0   VS1_1           -
4872         VS1_1:  vx1 = memref1   VS1_2           -
4873         VS1_2:  vx2 = memref2   VS1_3           -
4874         VS1_3:  vx3 = memref3   -               -
4875         S1:     x = load        -               VS1_0
4876         VS2_0:  vz0 = vx0 + v1  VS2_1           -
4877         VS2_1:  vz1 = vx1 + v1  VS2_2           -
4878         VS2_2:  vz2 = vx2 + v1  VS2_3           -
4879         VS2_3:  vz3 = vx3 + v1  -               -
4880         S2:     z = x + 1       -               VS2_0  */
4881 
4882   prev_stmt_info = NULL;
4883   for (j = 0; j < ncopies; j++)
4884     {
4885       /* Handle uses.  */
4886       if (j == 0)
4887 	{
4888 	  if (op_type == binary_op || op_type == ternary_op)
4889 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4890 			       slp_node, -1);
4891 	  else
4892 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4893 			       slp_node, -1);
4894 	  if (op_type == ternary_op)
4895 	    {
4896 	      vec_oprnds2.create (1);
4897 	      vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4898 		                                                    stmt,
4899 								    NULL));
4900 	    }
4901 	}
4902       else
4903 	{
4904 	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4905 	  if (op_type == ternary_op)
4906 	    {
4907 	      tree vec_oprnd = vec_oprnds2.pop ();
4908 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4909 							           vec_oprnd));
4910 	    }
4911 	}
4912 
4913       /* Arguments are ready.  Create the new vector stmt.  */
4914       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4915         {
4916 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
4917 		  ? vec_oprnds1[i] : NULL_TREE);
4918 	  vop2 = ((op_type == ternary_op)
4919 		  ? vec_oprnds2[i] : NULL_TREE);
4920 	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4921 	  new_temp = make_ssa_name (vec_dest, new_stmt);
4922 	  gimple_assign_set_lhs (new_stmt, new_temp);
4923 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4924           if (slp_node)
4925 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4926         }
4927 
4928       if (slp_node)
4929         continue;
4930 
4931       if (j == 0)
4932 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4933       else
4934 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4935       prev_stmt_info = vinfo_for_stmt (new_stmt);
4936     }
4937 
4938   vec_oprnds0.release ();
4939   vec_oprnds1.release ();
4940   vec_oprnds2.release ();
4941 
4942   return true;
4943 }
4944 
4945 /* A helper function to ensure data reference DR's base alignment
4946    for STMT_INFO.  */
4947 
4948 static void
4949 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4950 {
4951   if (!dr->aux)
4952     return;
4953 
4954   if (DR_VECT_AUX (dr)->base_misaligned)
4955     {
4956       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4957       tree base_decl = DR_VECT_AUX (dr)->base_decl;
4958 
4959       if (decl_in_symtab_p (base_decl))
4960 	symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4961       else
4962 	{
4963           DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4964           DECL_USER_ALIGN (base_decl) = 1;
4965 	}
4966       DR_VECT_AUX (dr)->base_misaligned = false;
4967     }
4968 }
4969 
4970 
4971 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4972    reversal of the vector elements.  If that is impossible to do,
4973    returns NULL.  */
4974 
4975 static tree
4976 perm_mask_for_reverse (tree vectype)
4977 {
4978   int i, nunits;
4979   unsigned char *sel;
4980 
4981   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4982   sel = XALLOCAVEC (unsigned char, nunits);
4983 
4984   for (i = 0; i < nunits; ++i)
4985     sel[i] = nunits - 1 - i;
4986 
4987   if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4988     return NULL_TREE;
4989   return vect_gen_perm_mask_checked (vectype, sel);
4990 }
4991 
4992 /* Function vectorizable_store.
4993 
4994    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4995    can be vectorized.
4996    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4997    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4998    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4999 
5000 static bool
5001 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5002                     slp_tree slp_node)
5003 {
5004   tree scalar_dest;
5005   tree data_ref;
5006   tree op;
5007   tree vec_oprnd = NULL_TREE;
5008   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5009   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5010   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5011   tree elem_type;
5012   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5013   struct loop *loop = NULL;
5014   machine_mode vec_mode;
5015   tree dummy;
5016   enum dr_alignment_support alignment_support_scheme;
5017   tree def;
5018   gimple def_stmt;
5019   enum vect_def_type dt;
5020   stmt_vec_info prev_stmt_info = NULL;
5021   tree dataref_ptr = NULL_TREE;
5022   tree dataref_offset = NULL_TREE;
5023   gimple ptr_incr = NULL;
5024   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5025   int ncopies;
5026   int j;
5027   gimple next_stmt, first_stmt = NULL;
5028   bool grouped_store = false;
5029   bool store_lanes_p = false;
5030   unsigned int group_size, i;
5031   vec<tree> dr_chain = vNULL;
5032   vec<tree> oprnds = vNULL;
5033   vec<tree> result_chain = vNULL;
5034   bool inv_p;
5035   bool negative = false;
5036   tree offset = NULL_TREE;
5037   vec<tree> vec_oprnds = vNULL;
5038   bool slp = (slp_node != NULL);
5039   unsigned int vec_num;
5040   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5041   tree aggr_type;
5042 
5043   if (loop_vinfo)
5044     loop = LOOP_VINFO_LOOP (loop_vinfo);
5045 
5046   /* Multiple types in SLP are handled by creating the appropriate number of
5047      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5048      case of SLP.  */
5049   if (slp || PURE_SLP_STMT (stmt_info))
5050     ncopies = 1;
5051   else
5052     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5053 
5054   gcc_assert (ncopies >= 1);
5055 
5056   /* FORNOW. This restriction should be relaxed.  */
5057   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5058     {
5059       if (dump_enabled_p ())
5060         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5061                          "multiple types in nested loop.\n");
5062       return false;
5063     }
5064 
5065   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5066     return false;
5067 
5068   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5069     return false;
5070 
5071   /* Is vectorizable store? */
5072 
5073   if (!is_gimple_assign (stmt))
5074     return false;
5075 
5076   scalar_dest = gimple_assign_lhs (stmt);
5077   if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5078       && is_pattern_stmt_p (stmt_info))
5079     scalar_dest = TREE_OPERAND (scalar_dest, 0);
5080   if (TREE_CODE (scalar_dest) != ARRAY_REF
5081       && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5082       && TREE_CODE (scalar_dest) != INDIRECT_REF
5083       && TREE_CODE (scalar_dest) != COMPONENT_REF
5084       && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5085       && TREE_CODE (scalar_dest) != REALPART_EXPR
5086       && TREE_CODE (scalar_dest) != MEM_REF)
5087     return false;
5088 
5089   gcc_assert (gimple_assign_single_p (stmt));
5090   op = gimple_assign_rhs1 (stmt);
5091   if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5092 			   &def, &dt))
5093     {
5094       if (dump_enabled_p ())
5095         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5096                          "use not simple.\n");
5097       return false;
5098     }
5099 
5100   elem_type = TREE_TYPE (vectype);
5101   vec_mode = TYPE_MODE (vectype);
5102 
5103   /* FORNOW. In some cases can vectorize even if data-type not supported
5104      (e.g. - array initialization with 0).  */
5105   if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5106     return false;
5107 
5108   if (!STMT_VINFO_DATA_REF (stmt_info))
5109     return false;
5110 
5111   negative =
5112     tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5113 			  ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5114 			  size_zero_node) < 0;
5115   if (negative && ncopies > 1)
5116     {
5117       if (dump_enabled_p ())
5118         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5119 			 "multiple types with negative step.\n");
5120       return false;
5121     }
5122 
5123   if (negative)
5124     {
5125       gcc_assert (!grouped_store);
5126       alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5127       if (alignment_support_scheme != dr_aligned
5128 	  && alignment_support_scheme != dr_unaligned_supported)
5129 	{
5130 	  if (dump_enabled_p ())
5131 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5132 			     "negative step but alignment required.\n");
5133 	  return false;
5134 	}
5135       if (dt != vect_constant_def
5136 	  && dt != vect_external_def
5137 	  && !perm_mask_for_reverse (vectype))
5138 	{
5139 	  if (dump_enabled_p ())
5140 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141 			     "negative step and reversing not supported.\n");
5142 	  return false;
5143 	}
5144     }
5145 
5146   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5147     {
5148       grouped_store = true;
5149       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5150       if (!slp && !PURE_SLP_STMT (stmt_info))
5151 	{
5152 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5153 	  if (vect_store_lanes_supported (vectype, group_size))
5154 	    store_lanes_p = true;
5155 	  else if (!vect_grouped_store_supported (vectype, group_size))
5156 	    return false;
5157 	}
5158 
5159       if (first_stmt == stmt)
5160 	{
5161           /* STMT is the leader of the group. Check the operands of all the
5162              stmts of the group.  */
5163           next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5164           while (next_stmt)
5165             {
5166 	      gcc_assert (gimple_assign_single_p (next_stmt));
5167 	      op = gimple_assign_rhs1 (next_stmt);
5168               if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5169 				       &def_stmt, &def, &dt))
5170                 {
5171                   if (dump_enabled_p ())
5172                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5173                                      "use not simple.\n");
5174                   return false;
5175                 }
5176               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5177             }
5178         }
5179     }
5180 
5181   if (!vec_stmt) /* transformation not required.  */
5182     {
5183       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5184       vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5185 			     NULL, NULL, NULL);
5186       return true;
5187     }
5188 
5189   /** Transform.  **/
5190 
5191   ensure_base_align (stmt_info, dr);
5192 
5193   if (grouped_store)
5194     {
5195       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5196       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5197 
5198       GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5199 
5200       /* FORNOW */
5201       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5202 
5203       /* We vectorize all the stmts of the interleaving group when we
5204 	 reach the last stmt in the group.  */
5205       if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5206 	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5207 	  && !slp)
5208 	{
5209 	  *vec_stmt = NULL;
5210 	  return true;
5211 	}
5212 
5213       if (slp)
5214         {
5215           grouped_store = false;
5216           /* VEC_NUM is the number of vect stmts to be created for this
5217              group.  */
5218           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5219           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5220           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5221 	  op = gimple_assign_rhs1 (first_stmt);
5222         }
5223       else
5224         /* VEC_NUM is the number of vect stmts to be created for this
5225            group.  */
5226 	vec_num = group_size;
5227     }
5228   else
5229     {
5230       first_stmt = stmt;
5231       first_dr = dr;
5232       group_size = vec_num = 1;
5233     }
5234 
5235   if (dump_enabled_p ())
5236     dump_printf_loc (MSG_NOTE, vect_location,
5237                      "transform store. ncopies = %d\n", ncopies);
5238 
5239   dr_chain.create (group_size);
5240   oprnds.create (group_size);
5241 
5242   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5243   gcc_assert (alignment_support_scheme);
5244   /* Targets with store-lane instructions must not require explicit
5245      realignment.  */
5246   gcc_assert (!store_lanes_p
5247 	      || alignment_support_scheme == dr_aligned
5248 	      || alignment_support_scheme == dr_unaligned_supported);
5249 
5250   if (negative)
5251     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5252 
5253   if (store_lanes_p)
5254     aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5255   else
5256     aggr_type = vectype;
5257 
5258   /* In case the vectorization factor (VF) is bigger than the number
5259      of elements that we can fit in a vectype (nunits), we have to generate
5260      more than one vector stmt - i.e - we need to "unroll" the
5261      vector stmt by a factor VF/nunits.  For more details see documentation in
5262      vect_get_vec_def_for_copy_stmt.  */
5263 
5264   /* In case of interleaving (non-unit grouped access):
5265 
5266         S1:  &base + 2 = x2
5267         S2:  &base = x0
5268         S3:  &base + 1 = x1
5269         S4:  &base + 3 = x3
5270 
5271      We create vectorized stores starting from base address (the access of the
5272      first stmt in the chain (S2 in the above example), when the last store stmt
5273      of the chain (S4) is reached:
5274 
5275         VS1: &base = vx2
5276 	VS2: &base + vec_size*1 = vx0
5277 	VS3: &base + vec_size*2 = vx1
5278 	VS4: &base + vec_size*3 = vx3
5279 
5280      Then permutation statements are generated:
5281 
5282 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5283 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5284 	...
5285 
5286      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5287      (the order of the data-refs in the output of vect_permute_store_chain
5288      corresponds to the order of scalar stmts in the interleaving chain - see
5289      the documentation of vect_permute_store_chain()).
5290 
5291      In case of both multiple types and interleaving, above vector stores and
5292      permutation stmts are created for every copy.  The result vector stmts are
5293      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5294      STMT_VINFO_RELATED_STMT for the next copies.
5295   */
5296 
5297   prev_stmt_info = NULL;
5298   for (j = 0; j < ncopies; j++)
5299     {
5300       gimple new_stmt;
5301 
5302       if (j == 0)
5303 	{
5304           if (slp)
5305             {
5306 	      /* Get vectorized arguments for SLP_NODE.  */
5307               vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5308                                  NULL, slp_node, -1);
5309 
5310               vec_oprnd = vec_oprnds[0];
5311             }
5312           else
5313             {
5314 	      /* For interleaved stores we collect vectorized defs for all the
5315 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5316 		 used as an input to vect_permute_store_chain(), and OPRNDS as
5317 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5318 
5319 		 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5320 		 OPRNDS are of size 1.  */
5321 	      next_stmt = first_stmt;
5322 	      for (i = 0; i < group_size; i++)
5323 		{
5324 		  /* Since gaps are not supported for interleaved stores,
5325 		     GROUP_SIZE is the exact number of stmts in the chain.
5326 		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
5327 		     there is no interleaving, GROUP_SIZE is 1, and only one
5328 		     iteration of the loop will be executed.  */
5329 		  gcc_assert (next_stmt
5330 			      && gimple_assign_single_p (next_stmt));
5331 		  op = gimple_assign_rhs1 (next_stmt);
5332 
5333 		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5334 							    NULL);
5335 		  dr_chain.quick_push (vec_oprnd);
5336 		  oprnds.quick_push (vec_oprnd);
5337 		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5338 		}
5339 	    }
5340 
5341 	  /* We should have catched mismatched types earlier.  */
5342 	  gcc_assert (useless_type_conversion_p (vectype,
5343 						 TREE_TYPE (vec_oprnd)));
5344 	  bool simd_lane_access_p
5345 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5346 	  if (simd_lane_access_p
5347 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5348 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5349 	      && integer_zerop (DR_OFFSET (first_dr))
5350 	      && integer_zerop (DR_INIT (first_dr))
5351 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
5352 					get_alias_set (DR_REF (first_dr))))
5353 	    {
5354 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5355 	      dataref_offset = build_int_cst (reference_alias_ptr_type
5356 					      (DR_REF (first_dr)), 0);
5357 	      inv_p = false;
5358 	    }
5359 	  else
5360 	    dataref_ptr
5361 	      = vect_create_data_ref_ptr (first_stmt, aggr_type,
5362 					  simd_lane_access_p ? loop : NULL,
5363 					  offset, &dummy, gsi, &ptr_incr,
5364 					  simd_lane_access_p, &inv_p);
5365 	  gcc_assert (bb_vinfo || !inv_p);
5366 	}
5367       else
5368 	{
5369 	  /* For interleaved stores we created vectorized defs for all the
5370 	     defs stored in OPRNDS in the previous iteration (previous copy).
5371 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
5372 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5373 	     next copy.
5374 	     If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5375 	     OPRNDS are of size 1.  */
5376 	  for (i = 0; i < group_size; i++)
5377 	    {
5378 	      op = oprnds[i];
5379 	      vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5380 				  &def, &dt);
5381 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5382 	      dr_chain[i] = vec_oprnd;
5383 	      oprnds[i] = vec_oprnd;
5384 	    }
5385 	  if (dataref_offset)
5386 	    dataref_offset
5387 	      = int_const_binop (PLUS_EXPR, dataref_offset,
5388 				 TYPE_SIZE_UNIT (aggr_type));
5389 	  else
5390 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5391 					   TYPE_SIZE_UNIT (aggr_type));
5392 	}
5393 
5394       if (store_lanes_p)
5395 	{
5396 	  tree vec_array;
5397 
5398 	  /* Combine all the vectors into an array.  */
5399 	  vec_array = create_vector_array (vectype, vec_num);
5400 	  for (i = 0; i < vec_num; i++)
5401 	    {
5402 	      vec_oprnd = dr_chain[i];
5403 	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5404 	    }
5405 
5406 	  /* Emit:
5407 	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
5408 	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5409 	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5410 	  gimple_call_set_lhs (new_stmt, data_ref);
5411 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5412 	}
5413       else
5414 	{
5415 	  new_stmt = NULL;
5416 	  if (grouped_store)
5417 	    {
5418 	      if (j == 0)
5419 		result_chain.create (group_size);
5420 	      /* Permute.  */
5421 	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5422 					&result_chain);
5423 	    }
5424 
5425 	  next_stmt = first_stmt;
5426 	  for (i = 0; i < vec_num; i++)
5427 	    {
5428 	      unsigned align, misalign;
5429 
5430 	      if (i > 0)
5431 		/* Bump the vector pointer.  */
5432 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5433 					       stmt, NULL_TREE);
5434 
5435 	      if (slp)
5436 		vec_oprnd = vec_oprnds[i];
5437 	      else if (grouped_store)
5438 		/* For grouped stores vectorized defs are interleaved in
5439 		   vect_permute_store_chain().  */
5440 		vec_oprnd = result_chain[i];
5441 
5442 	      data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5443 				 dataref_offset
5444 				 ? dataref_offset
5445 				 : build_int_cst (reference_alias_ptr_type
5446 						  (DR_REF (first_dr)), 0));
5447 	      align = TYPE_ALIGN_UNIT (vectype);
5448 	      if (aligned_access_p (first_dr))
5449 		misalign = 0;
5450 	      else if (DR_MISALIGNMENT (first_dr) == -1)
5451 		{
5452 		  if (DR_VECT_AUX (first_dr)->base_element_aligned)
5453 		    align = TYPE_ALIGN_UNIT (elem_type);
5454 		  else
5455 		    align = get_object_alignment (DR_REF (first_dr))
5456 			/ BITS_PER_UNIT;
5457 		  misalign = 0;
5458 		  TREE_TYPE (data_ref)
5459 		    = build_aligned_type (TREE_TYPE (data_ref),
5460 					  align * BITS_PER_UNIT);
5461 		}
5462 	      else
5463 		{
5464 		  TREE_TYPE (data_ref)
5465 		    = build_aligned_type (TREE_TYPE (data_ref),
5466 					  TYPE_ALIGN (elem_type));
5467 		  misalign = DR_MISALIGNMENT (first_dr);
5468 		}
5469 	      if (dataref_offset == NULL_TREE)
5470 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5471 					misalign);
5472 
5473 	      if (negative
5474 		  && dt != vect_constant_def
5475 		  && dt != vect_external_def)
5476 		{
5477 		  tree perm_mask = perm_mask_for_reverse (vectype);
5478 		  tree perm_dest
5479 		    = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5480 						   vectype);
5481 		  tree new_temp = make_ssa_name (perm_dest);
5482 
5483 		  /* Generate the permute statement.  */
5484 		  gimple perm_stmt
5485 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5486 					   vec_oprnd, perm_mask);
5487 		  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5488 
5489 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5490 		  vec_oprnd = new_temp;
5491 		}
5492 
5493 	      /* Arguments are ready.  Create the new vector stmt.  */
5494 	      new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5495 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
5496 
5497 	      if (slp)
5498 		continue;
5499 
5500 	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5501 	      if (!next_stmt)
5502 		break;
5503 	    }
5504 	}
5505       if (!slp)
5506 	{
5507 	  if (j == 0)
5508 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5509 	  else
5510 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5511 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
5512 	}
5513     }
5514 
5515   dr_chain.release ();
5516   oprnds.release ();
5517   result_chain.release ();
5518   vec_oprnds.release ();
5519 
5520   return true;
5521 }
5522 
5523 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5524    VECTOR_CST mask.  No checks are made that the target platform supports the
5525    mask, so callers may wish to test can_vec_perm_p separately, or use
5526    vect_gen_perm_mask_checked.  */
5527 
5528 tree
5529 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5530 {
5531   tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5532   int i, nunits;
5533 
5534   nunits = TYPE_VECTOR_SUBPARTS (vectype);
5535 
5536   mask_elt_type = lang_hooks.types.type_for_mode
5537 		    (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5538   mask_type = get_vectype_for_scalar_type (mask_elt_type);
5539 
5540   mask_elts = XALLOCAVEC (tree, nunits);
5541   for (i = nunits - 1; i >= 0; i--)
5542     mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5543   mask_vec = build_vector (mask_type, mask_elts);
5544 
5545   return mask_vec;
5546 }
5547 
5548 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_p,
5549    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
5550 
5551 tree
5552 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5553 {
5554   gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5555   return vect_gen_perm_mask_any (vectype, sel);
5556 }
5557 
5558 /* Given a vector variable X and Y, that was generated for the scalar
5559    STMT, generate instructions to permute the vector elements of X and Y
5560    using permutation mask MASK_VEC, insert them at *GSI and return the
5561    permuted vector variable.  */
5562 
5563 static tree
5564 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5565 		      gimple_stmt_iterator *gsi)
5566 {
5567   tree vectype = TREE_TYPE (x);
5568   tree perm_dest, data_ref;
5569   gimple perm_stmt;
5570 
5571   perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5572   data_ref = make_ssa_name (perm_dest);
5573 
5574   /* Generate the permute statement.  */
5575   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5576   vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5577 
5578   return data_ref;
5579 }
5580 
5581 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5582    inserting them on the loops preheader edge.  Returns true if we
5583    were successful in doing so (and thus STMT can be moved then),
5584    otherwise returns false.  */
5585 
5586 static bool
5587 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5588 {
5589   ssa_op_iter i;
5590   tree op;
5591   bool any = false;
5592 
5593   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5594     {
5595       gimple def_stmt = SSA_NAME_DEF_STMT (op);
5596       if (!gimple_nop_p (def_stmt)
5597 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5598 	{
5599 	  /* Make sure we don't need to recurse.  While we could do
5600 	     so in simple cases when there are more complex use webs
5601 	     we don't have an easy way to preserve stmt order to fulfil
5602 	     dependencies within them.  */
5603 	  tree op2;
5604 	  ssa_op_iter i2;
5605 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
5606 	    return false;
5607 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5608 	    {
5609 	      gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5610 	      if (!gimple_nop_p (def_stmt2)
5611 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5612 		return false;
5613 	    }
5614 	  any = true;
5615 	}
5616     }
5617 
5618   if (!any)
5619     return true;
5620 
5621   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5622     {
5623       gimple def_stmt = SSA_NAME_DEF_STMT (op);
5624       if (!gimple_nop_p (def_stmt)
5625 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5626 	{
5627 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5628 	  gsi_remove (&gsi, false);
5629 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5630 	}
5631     }
5632 
5633   return true;
5634 }
5635 
5636 /* vectorizable_load.
5637 
5638    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5639    can be vectorized.
5640    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5641    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5642    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5643 
5644 static bool
5645 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5646                    slp_tree slp_node, slp_instance slp_node_instance)
5647 {
5648   tree scalar_dest;
5649   tree vec_dest = NULL;
5650   tree data_ref = NULL;
5651   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5652   stmt_vec_info prev_stmt_info;
5653   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5654   struct loop *loop = NULL;
5655   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5656   bool nested_in_vect_loop = false;
5657   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5658   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5659   tree elem_type;
5660   tree new_temp;
5661   machine_mode mode;
5662   gimple new_stmt = NULL;
5663   tree dummy;
5664   enum dr_alignment_support alignment_support_scheme;
5665   tree dataref_ptr = NULL_TREE;
5666   tree dataref_offset = NULL_TREE;
5667   gimple ptr_incr = NULL;
5668   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5669   int ncopies;
5670   int i, j, group_size, group_gap;
5671   tree msq = NULL_TREE, lsq;
5672   tree offset = NULL_TREE;
5673   tree byte_offset = NULL_TREE;
5674   tree realignment_token = NULL_TREE;
5675   gphi *phi = NULL;
5676   vec<tree> dr_chain = vNULL;
5677   bool grouped_load = false;
5678   bool load_lanes_p = false;
5679   gimple first_stmt;
5680   bool inv_p;
5681   bool negative = false;
5682   bool compute_in_loop = false;
5683   struct loop *at_loop;
5684   int vec_num;
5685   bool slp = (slp_node != NULL);
5686   bool slp_perm = false;
5687   enum tree_code code;
5688   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5689   int vf;
5690   tree aggr_type;
5691   tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5692   tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5693   int gather_scale = 1;
5694   enum vect_def_type gather_dt = vect_unknown_def_type;
5695 
5696   if (loop_vinfo)
5697     {
5698       loop = LOOP_VINFO_LOOP (loop_vinfo);
5699       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5700       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5701     }
5702   else
5703     vf = 1;
5704 
5705   /* Multiple types in SLP are handled by creating the appropriate number of
5706      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5707      case of SLP.  */
5708   if (slp || PURE_SLP_STMT (stmt_info))
5709     ncopies = 1;
5710   else
5711     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5712 
5713   gcc_assert (ncopies >= 1);
5714 
5715   /* FORNOW. This restriction should be relaxed.  */
5716   if (nested_in_vect_loop && ncopies > 1)
5717     {
5718       if (dump_enabled_p ())
5719         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5720                          "multiple types in nested loop.\n");
5721       return false;
5722     }
5723 
5724   /* Invalidate assumptions made by dependence analysis when vectorization
5725      on the unrolled body effectively re-orders stmts.  */
5726   if (ncopies > 1
5727       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5728       && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5729 	  > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5730     {
5731       if (dump_enabled_p ())
5732 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733 			 "cannot perform implicit CSE when unrolling "
5734 			 "with negative dependence distance\n");
5735       return false;
5736     }
5737 
5738   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5739     return false;
5740 
5741   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5742     return false;
5743 
5744   /* Is vectorizable load? */
5745   if (!is_gimple_assign (stmt))
5746     return false;
5747 
5748   scalar_dest = gimple_assign_lhs (stmt);
5749   if (TREE_CODE (scalar_dest) != SSA_NAME)
5750     return false;
5751 
5752   code = gimple_assign_rhs_code (stmt);
5753   if (code != ARRAY_REF
5754       && code != BIT_FIELD_REF
5755       && code != INDIRECT_REF
5756       && code != COMPONENT_REF
5757       && code != IMAGPART_EXPR
5758       && code != REALPART_EXPR
5759       && code != MEM_REF
5760       && TREE_CODE_CLASS (code) != tcc_declaration)
5761     return false;
5762 
5763   if (!STMT_VINFO_DATA_REF (stmt_info))
5764     return false;
5765 
5766   elem_type = TREE_TYPE (vectype);
5767   mode = TYPE_MODE (vectype);
5768 
5769   /* FORNOW. In some cases can vectorize even if data-type not supported
5770     (e.g. - data copies).  */
5771   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5772     {
5773       if (dump_enabled_p ())
5774         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5775                          "Aligned load, but unsupported type.\n");
5776       return false;
5777     }
5778 
5779   /* Check if the load is a part of an interleaving chain.  */
5780   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5781     {
5782       grouped_load = true;
5783       /* FORNOW */
5784       gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5785 
5786       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5787 
5788       /* If this is single-element interleaving with an element distance
5789          that leaves unused vector loads around punt - we at least create
5790 	 very sub-optimal code in that case (and blow up memory,
5791 	 see PR65518).  */
5792       if (first_stmt == stmt
5793 	  && !GROUP_NEXT_ELEMENT (stmt_info)
5794 	  && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5795 	{
5796 	  if (dump_enabled_p ())
5797 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5798 			     "single-element interleaving not supported "
5799 			     "for not adjacent vector loads\n");
5800 	  return false;
5801 	}
5802 
5803       if (!slp && !PURE_SLP_STMT (stmt_info))
5804 	{
5805 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5806 	  if (vect_load_lanes_supported (vectype, group_size))
5807 	    load_lanes_p = true;
5808 	  else if (!vect_grouped_load_supported (vectype, group_size))
5809 	    return false;
5810 	}
5811 
5812       /* Invalidate assumptions made by dependence analysis when vectorization
5813 	 on the unrolled body effectively re-orders stmts.  */
5814       if (!PURE_SLP_STMT (stmt_info)
5815 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5816 	  && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5817 	      > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5818 	{
5819 	  if (dump_enabled_p ())
5820 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5821 			     "cannot perform implicit CSE when performing "
5822 			     "group loads with negative dependence distance\n");
5823 	  return false;
5824 	}
5825 
5826       /* Similarly when the stmt is a load that is both part of a SLP
5827          instance and a loop vectorized stmt via the same-dr mechanism
5828 	 we have to give up.  */
5829       if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5830 	  && (STMT_SLP_TYPE (stmt_info)
5831 	      != STMT_SLP_TYPE (vinfo_for_stmt
5832 				 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5833 	{
5834 	  if (dump_enabled_p ())
5835 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5836 			     "conflicting SLP types for CSEd load\n");
5837 	  return false;
5838 	}
5839     }
5840 
5841 
5842   if (STMT_VINFO_GATHER_P (stmt_info))
5843     {
5844       gimple def_stmt;
5845       tree def;
5846       gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5847 				       &gather_off, &gather_scale);
5848       gcc_assert (gather_decl);
5849       if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5850 				 &def_stmt, &def, &gather_dt,
5851 				 &gather_off_vectype))
5852 	{
5853 	  if (dump_enabled_p ())
5854 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5855                              "gather index use not simple.\n");
5856 	  return false;
5857 	}
5858     }
5859   else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5860     ;
5861   else
5862     {
5863       negative = tree_int_cst_compare (nested_in_vect_loop
5864 				       ? STMT_VINFO_DR_STEP (stmt_info)
5865 				       : DR_STEP (dr),
5866 				       size_zero_node) < 0;
5867       if (negative && ncopies > 1)
5868 	{
5869 	  if (dump_enabled_p ())
5870 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5871                              "multiple types with negative step.\n");
5872 	  return false;
5873 	}
5874 
5875       if (negative)
5876 	{
5877 	  if (grouped_load)
5878 	    {
5879 	      if (dump_enabled_p ())
5880 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 				 "negative step for group load not supported"
5882                                  "\n");
5883 	      return false;
5884 	    }
5885 	  alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5886 	  if (alignment_support_scheme != dr_aligned
5887 	      && alignment_support_scheme != dr_unaligned_supported)
5888 	    {
5889               if (dump_enabled_p ())
5890                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5891                                  "negative step but alignment required.\n");
5892 	      return false;
5893 	    }
5894 	  if (!perm_mask_for_reverse (vectype))
5895 	    {
5896               if (dump_enabled_p ())
5897                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5898                                  "negative step and reversing not supported."
5899                                  "\n");
5900 	      return false;
5901 	    }
5902 	}
5903     }
5904 
5905   if (!vec_stmt) /* transformation not required.  */
5906     {
5907       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5908       vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5909       return true;
5910     }
5911 
5912   if (dump_enabled_p ())
5913     dump_printf_loc (MSG_NOTE, vect_location,
5914                      "transform load. ncopies = %d\n", ncopies);
5915 
5916   /** Transform.  **/
5917 
5918   ensure_base_align (stmt_info, dr);
5919 
5920   if (STMT_VINFO_GATHER_P (stmt_info))
5921     {
5922       tree vec_oprnd0 = NULL_TREE, op;
5923       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5924       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5925       tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5926       edge pe = loop_preheader_edge (loop);
5927       gimple_seq seq;
5928       basic_block new_bb;
5929       enum { NARROW, NONE, WIDEN } modifier;
5930       int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5931 
5932       if (nunits == gather_off_nunits)
5933 	modifier = NONE;
5934       else if (nunits == gather_off_nunits / 2)
5935 	{
5936 	  unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5937 	  modifier = WIDEN;
5938 
5939 	  for (i = 0; i < gather_off_nunits; ++i)
5940 	    sel[i] = i | nunits;
5941 
5942 	  perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5943 	}
5944       else if (nunits == gather_off_nunits * 2)
5945 	{
5946 	  unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5947 	  modifier = NARROW;
5948 
5949 	  for (i = 0; i < nunits; ++i)
5950 	    sel[i] = i < gather_off_nunits
5951 		     ? i : i + nunits - gather_off_nunits;
5952 
5953 	  perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5954 	  ncopies *= 2;
5955 	}
5956       else
5957 	gcc_unreachable ();
5958 
5959       rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5960       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5961       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5962       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5963       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5964       scaletype = TREE_VALUE (arglist);
5965       gcc_checking_assert (types_compatible_p (srctype, rettype));
5966 
5967       vec_dest = vect_create_destination_var (scalar_dest, vectype);
5968 
5969       ptr = fold_convert (ptrtype, gather_base);
5970       if (!is_gimple_min_invariant (ptr))
5971 	{
5972 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5973 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5974 	  gcc_assert (!new_bb);
5975 	}
5976 
5977       /* Currently we support only unconditional gather loads,
5978 	 so mask should be all ones.  */
5979       if (TREE_CODE (masktype) == INTEGER_TYPE)
5980 	mask = build_int_cst (masktype, -1);
5981       else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5982 	{
5983 	  mask = build_int_cst (TREE_TYPE (masktype), -1);
5984 	  mask = build_vector_from_val (masktype, mask);
5985 	  mask = vect_init_vector (stmt, mask, masktype, NULL);
5986 	}
5987       else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5988 	{
5989 	  REAL_VALUE_TYPE r;
5990 	  long tmp[6];
5991 	  for (j = 0; j < 6; ++j)
5992 	    tmp[j] = -1;
5993 	  real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5994 	  mask = build_real (TREE_TYPE (masktype), r);
5995 	  mask = build_vector_from_val (masktype, mask);
5996 	  mask = vect_init_vector (stmt, mask, masktype, NULL);
5997 	}
5998       else
5999 	gcc_unreachable ();
6000 
6001       scale = build_int_cst (scaletype, gather_scale);
6002 
6003       if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6004 	merge = build_int_cst (TREE_TYPE (rettype), 0);
6005       else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6006 	{
6007 	  REAL_VALUE_TYPE r;
6008 	  long tmp[6];
6009 	  for (j = 0; j < 6; ++j)
6010 	    tmp[j] = 0;
6011 	  real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6012 	  merge = build_real (TREE_TYPE (rettype), r);
6013 	}
6014       else
6015 	gcc_unreachable ();
6016       merge = build_vector_from_val (rettype, merge);
6017       merge = vect_init_vector (stmt, merge, rettype, NULL);
6018 
6019       prev_stmt_info = NULL;
6020       for (j = 0; j < ncopies; ++j)
6021 	{
6022 	  if (modifier == WIDEN && (j & 1))
6023 	    op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6024 				       perm_mask, stmt, gsi);
6025 	  else if (j == 0)
6026 	    op = vec_oprnd0
6027 	      = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6028 	  else
6029 	    op = vec_oprnd0
6030 	      = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6031 
6032 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6033 	    {
6034 	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6035 			  == TYPE_VECTOR_SUBPARTS (idxtype));
6036 	      var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6037 	      var = make_ssa_name (var);
6038 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6039 	      new_stmt
6040 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6041 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6042 	      op = var;
6043 	    }
6044 
6045 	  new_stmt
6046 	    = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6047 
6048 	  if (!useless_type_conversion_p (vectype, rettype))
6049 	    {
6050 	      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6051 			  == TYPE_VECTOR_SUBPARTS (rettype));
6052 	      var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6053 	      op = make_ssa_name (var, new_stmt);
6054 	      gimple_call_set_lhs (new_stmt, op);
6055 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6056 	      var = make_ssa_name (vec_dest);
6057 	      op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6058 	      new_stmt
6059 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6060 	    }
6061 	  else
6062 	    {
6063 	      var = make_ssa_name (vec_dest, new_stmt);
6064 	      gimple_call_set_lhs (new_stmt, var);
6065 	    }
6066 
6067 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6068 
6069 	  if (modifier == NARROW)
6070 	    {
6071 	      if ((j & 1) == 0)
6072 		{
6073 		  prev_res = var;
6074 		  continue;
6075 		}
6076 	      var = permute_vec_elements (prev_res, var,
6077 					  perm_mask, stmt, gsi);
6078 	      new_stmt = SSA_NAME_DEF_STMT (var);
6079 	    }
6080 
6081 	  if (prev_stmt_info == NULL)
6082 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6083 	  else
6084 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6085 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
6086 	}
6087       return true;
6088     }
6089   else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6090     {
6091       gimple_stmt_iterator incr_gsi;
6092       bool insert_after;
6093       gimple incr;
6094       tree offvar;
6095       tree ivstep;
6096       tree running_off;
6097       vec<constructor_elt, va_gc> *v = NULL;
6098       gimple_seq stmts = NULL;
6099       tree stride_base, stride_step, alias_off;
6100 
6101       gcc_assert (!nested_in_vect_loop);
6102 
6103       stride_base
6104 	= fold_build_pointer_plus
6105 	    (unshare_expr (DR_BASE_ADDRESS (dr)),
6106 	     size_binop (PLUS_EXPR,
6107 			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6108 			 convert_to_ptrofftype (DR_INIT (dr))));
6109       stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6110 
6111       /* For a load with loop-invariant (but other than power-of-2)
6112          stride (i.e. not a grouped access) like so:
6113 
6114 	   for (i = 0; i < n; i += stride)
6115 	     ... = array[i];
6116 
6117 	 we generate a new induction variable and new accesses to
6118 	 form a new vector (or vectors, depending on ncopies):
6119 
6120 	   for (j = 0; ; j += VF*stride)
6121 	     tmp1 = array[j];
6122 	     tmp2 = array[j + stride];
6123 	     ...
6124 	     vectemp = {tmp1, tmp2, ...}
6125          */
6126 
6127       ivstep = stride_step;
6128       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6129 			    build_int_cst (TREE_TYPE (ivstep), vf));
6130 
6131       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6132 
6133       create_iv (stride_base, ivstep, NULL,
6134 		 loop, &incr_gsi, insert_after,
6135 		 &offvar, NULL);
6136       incr = gsi_stmt (incr_gsi);
6137       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6138 
6139       stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6140       if (stmts)
6141 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6142 
6143       prev_stmt_info = NULL;
6144       running_off = offvar;
6145       alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6146       for (j = 0; j < ncopies; j++)
6147 	{
6148 	  tree vec_inv;
6149 
6150 	  vec_alloc (v, nunits);
6151 	  for (i = 0; i < nunits; i++)
6152 	    {
6153 	      tree newref, newoff;
6154 	      gimple incr;
6155 	      newref = build2 (MEM_REF, TREE_TYPE (vectype),
6156 			       running_off, alias_off);
6157 
6158 	      newref = force_gimple_operand_gsi (gsi, newref, true,
6159 						 NULL_TREE, true,
6160 						 GSI_SAME_STMT);
6161 	      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6162 	      newoff = copy_ssa_name (running_off);
6163 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6164 					  running_off, stride_step);
6165 	      vect_finish_stmt_generation (stmt, incr, gsi);
6166 
6167 	      running_off = newoff;
6168 	    }
6169 
6170 	  vec_inv = build_constructor (vectype, v);
6171 	  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6172 	  new_stmt = SSA_NAME_DEF_STMT (new_temp);
6173 
6174 	  if (j == 0)
6175 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6176 	  else
6177 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6178 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
6179 	}
6180       return true;
6181     }
6182 
6183   if (grouped_load)
6184     {
6185       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6186       if (slp
6187           && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6188 	  && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6189         first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6190 
6191       /* Check if the chain of loads is already vectorized.  */
6192       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6193 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6194 	     ???  But we can only do so if there is exactly one
6195 	     as we have no way to get at the rest.  Leave the CSE
6196 	     opportunity alone.
6197 	     ???  With the group load eventually participating
6198 	     in multiple different permutations (having multiple
6199 	     slp nodes which refer to the same group) the CSE
6200 	     is even wrong code.  See PR56270.  */
6201 	  && !slp)
6202 	{
6203 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6204 	  return true;
6205 	}
6206       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6207       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6208 
6209       /* VEC_NUM is the number of vect stmts to be created for this group.  */
6210       if (slp)
6211 	{
6212 	  grouped_load = false;
6213 	  vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6214           if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6215             slp_perm = true;
6216 	  group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6217     	}
6218       else
6219 	{
6220 	  vec_num = group_size;
6221 	  group_gap = 0;
6222 	}
6223     }
6224   else
6225     {
6226       first_stmt = stmt;
6227       first_dr = dr;
6228       group_size = vec_num = 1;
6229       group_gap = 0;
6230     }
6231 
6232   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6233   gcc_assert (alignment_support_scheme);
6234   /* Targets with load-lane instructions must not require explicit
6235      realignment.  */
6236   gcc_assert (!load_lanes_p
6237 	      || alignment_support_scheme == dr_aligned
6238 	      || alignment_support_scheme == dr_unaligned_supported);
6239 
6240   /* In case the vectorization factor (VF) is bigger than the number
6241      of elements that we can fit in a vectype (nunits), we have to generate
6242      more than one vector stmt - i.e - we need to "unroll" the
6243      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6244      from one copy of the vector stmt to the next, in the field
6245      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6246      stages to find the correct vector defs to be used when vectorizing
6247      stmts that use the defs of the current stmt.  The example below
6248      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6249      need to create 4 vectorized stmts):
6250 
6251      before vectorization:
6252                                 RELATED_STMT    VEC_STMT
6253         S1:     x = memref      -               -
6254         S2:     z = x + 1       -               -
6255 
6256      step 1: vectorize stmt S1:
6257         We first create the vector stmt VS1_0, and, as usual, record a
6258         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6259         Next, we create the vector stmt VS1_1, and record a pointer to
6260         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6261         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
6262         stmts and pointers:
6263                                 RELATED_STMT    VEC_STMT
6264         VS1_0:  vx0 = memref0   VS1_1           -
6265         VS1_1:  vx1 = memref1   VS1_2           -
6266         VS1_2:  vx2 = memref2   VS1_3           -
6267         VS1_3:  vx3 = memref3   -               -
6268         S1:     x = load        -               VS1_0
6269         S2:     z = x + 1       -               -
6270 
6271      See in documentation in vect_get_vec_def_for_stmt_copy for how the
6272      information we recorded in RELATED_STMT field is used to vectorize
6273      stmt S2.  */
6274 
6275   /* In case of interleaving (non-unit grouped access):
6276 
6277      S1:  x2 = &base + 2
6278      S2:  x0 = &base
6279      S3:  x1 = &base + 1
6280      S4:  x3 = &base + 3
6281 
6282      Vectorized loads are created in the order of memory accesses
6283      starting from the access of the first stmt of the chain:
6284 
6285      VS1: vx0 = &base
6286      VS2: vx1 = &base + vec_size*1
6287      VS3: vx3 = &base + vec_size*2
6288      VS4: vx4 = &base + vec_size*3
6289 
6290      Then permutation statements are generated:
6291 
6292      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6293      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6294        ...
6295 
6296      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6297      (the order of the data-refs in the output of vect_permute_load_chain
6298      corresponds to the order of scalar stmts in the interleaving chain - see
6299      the documentation of vect_permute_load_chain()).
6300      The generation of permutation stmts and recording them in
6301      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6302 
6303      In case of both multiple types and interleaving, the vector loads and
6304      permutation stmts above are created for every copy.  The result vector
6305      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6306      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
6307 
6308   /* If the data reference is aligned (dr_aligned) or potentially unaligned
6309      on a target that supports unaligned accesses (dr_unaligned_supported)
6310      we generate the following code:
6311          p = initial_addr;
6312          indx = 0;
6313          loop {
6314 	   p = p + indx * vectype_size;
6315            vec_dest = *(p);
6316            indx = indx + 1;
6317          }
6318 
6319      Otherwise, the data reference is potentially unaligned on a target that
6320      does not support unaligned accesses (dr_explicit_realign_optimized) -
6321      then generate the following code, in which the data in each iteration is
6322      obtained by two vector loads, one from the previous iteration, and one
6323      from the current iteration:
6324          p1 = initial_addr;
6325          msq_init = *(floor(p1))
6326          p2 = initial_addr + VS - 1;
6327          realignment_token = call target_builtin;
6328          indx = 0;
6329          loop {
6330            p2 = p2 + indx * vectype_size
6331            lsq = *(floor(p2))
6332            vec_dest = realign_load (msq, lsq, realignment_token)
6333            indx = indx + 1;
6334            msq = lsq;
6335          }   */
6336 
6337   /* If the misalignment remains the same throughout the execution of the
6338      loop, we can create the init_addr and permutation mask at the loop
6339      preheader.  Otherwise, it needs to be created inside the loop.
6340      This can only occur when vectorizing memory accesses in the inner-loop
6341      nested within an outer-loop that is being vectorized.  */
6342 
6343   if (nested_in_vect_loop
6344       && (TREE_INT_CST_LOW (DR_STEP (dr))
6345 	  % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6346     {
6347       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6348       compute_in_loop = true;
6349     }
6350 
6351   if ((alignment_support_scheme == dr_explicit_realign_optimized
6352        || alignment_support_scheme == dr_explicit_realign)
6353       && !compute_in_loop)
6354     {
6355       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6356 				    alignment_support_scheme, NULL_TREE,
6357 				    &at_loop);
6358       if (alignment_support_scheme == dr_explicit_realign_optimized)
6359 	{
6360 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6361 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6362 				    size_one_node);
6363 	}
6364     }
6365   else
6366     at_loop = loop;
6367 
6368   if (negative)
6369     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6370 
6371   if (load_lanes_p)
6372     aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6373   else
6374     aggr_type = vectype;
6375 
6376   prev_stmt_info = NULL;
6377   for (j = 0; j < ncopies; j++)
6378     {
6379       /* 1. Create the vector or array pointer update chain.  */
6380       if (j == 0)
6381 	{
6382 	  bool simd_lane_access_p
6383 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6384 	  if (simd_lane_access_p
6385 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6386 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6387 	      && integer_zerop (DR_OFFSET (first_dr))
6388 	      && integer_zerop (DR_INIT (first_dr))
6389 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
6390 					get_alias_set (DR_REF (first_dr)))
6391 	      && (alignment_support_scheme == dr_aligned
6392 		  || alignment_support_scheme == dr_unaligned_supported))
6393 	    {
6394 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6395 	      dataref_offset = build_int_cst (reference_alias_ptr_type
6396 					      (DR_REF (first_dr)), 0);
6397 	      inv_p = false;
6398 	    }
6399 	  else
6400 	    dataref_ptr
6401 	      = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6402 					  offset, &dummy, gsi, &ptr_incr,
6403 					  simd_lane_access_p, &inv_p,
6404 					  byte_offset);
6405 	}
6406       else if (dataref_offset)
6407 	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6408 					  TYPE_SIZE_UNIT (aggr_type));
6409       else
6410         dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6411 				       TYPE_SIZE_UNIT (aggr_type));
6412 
6413       if (grouped_load || slp_perm)
6414 	dr_chain.create (vec_num);
6415 
6416       if (load_lanes_p)
6417 	{
6418 	  tree vec_array;
6419 
6420 	  vec_array = create_vector_array (vectype, vec_num);
6421 
6422 	  /* Emit:
6423 	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
6424 	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6425 	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6426 	  gimple_call_set_lhs (new_stmt, vec_array);
6427 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6428 
6429 	  /* Extract each vector into an SSA_NAME.  */
6430 	  for (i = 0; i < vec_num; i++)
6431 	    {
6432 	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
6433 					    vec_array, i);
6434 	      dr_chain.quick_push (new_temp);
6435 	    }
6436 
6437 	  /* Record the mapping between SSA_NAMEs and statements.  */
6438 	  vect_record_grouped_load_vectors (stmt, dr_chain);
6439 	}
6440       else
6441 	{
6442 	  for (i = 0; i < vec_num; i++)
6443 	    {
6444 	      if (i > 0)
6445 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6446 					       stmt, NULL_TREE);
6447 
6448 	      /* 2. Create the vector-load in the loop.  */
6449 	      switch (alignment_support_scheme)
6450 		{
6451 		case dr_aligned:
6452 		case dr_unaligned_supported:
6453 		  {
6454 		    unsigned int align, misalign;
6455 
6456 		    data_ref
6457 		      = build2 (MEM_REF, vectype, dataref_ptr,
6458 				dataref_offset
6459 				? dataref_offset
6460 				: build_int_cst (reference_alias_ptr_type
6461 						 (DR_REF (first_dr)), 0));
6462 		    align = TYPE_ALIGN_UNIT (vectype);
6463 		    if (alignment_support_scheme == dr_aligned)
6464 		      {
6465 			gcc_assert (aligned_access_p (first_dr));
6466 			misalign = 0;
6467 		      }
6468 		    else if (DR_MISALIGNMENT (first_dr) == -1)
6469 		      {
6470 			if (DR_VECT_AUX (first_dr)->base_element_aligned)
6471 			  align = TYPE_ALIGN_UNIT (elem_type);
6472 			else
6473 			  align = (get_object_alignment (DR_REF (first_dr))
6474 				   / BITS_PER_UNIT);
6475 			misalign = 0;
6476 			TREE_TYPE (data_ref)
6477 			  = build_aligned_type (TREE_TYPE (data_ref),
6478 						align * BITS_PER_UNIT);
6479 		      }
6480 		    else
6481 		      {
6482 			TREE_TYPE (data_ref)
6483 			  = build_aligned_type (TREE_TYPE (data_ref),
6484 						TYPE_ALIGN (elem_type));
6485 			misalign = DR_MISALIGNMENT (first_dr);
6486 		      }
6487 		    if (dataref_offset == NULL_TREE)
6488 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6489 					      align, misalign);
6490 		    break;
6491 		  }
6492 		case dr_explicit_realign:
6493 		  {
6494 		    tree ptr, bump;
6495 
6496 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6497 
6498 		    if (compute_in_loop)
6499 		      msq = vect_setup_realignment (first_stmt, gsi,
6500 						    &realignment_token,
6501 						    dr_explicit_realign,
6502 						    dataref_ptr, NULL);
6503 
6504 		    ptr = copy_ssa_name (dataref_ptr);
6505 		    new_stmt = gimple_build_assign
6506 				 (ptr, BIT_AND_EXPR, dataref_ptr,
6507 				  build_int_cst
6508 				  (TREE_TYPE (dataref_ptr),
6509 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6510 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
6511 		    data_ref
6512 		      = build2 (MEM_REF, vectype, ptr,
6513 				build_int_cst (reference_alias_ptr_type
6514 						 (DR_REF (first_dr)), 0));
6515 		    vec_dest = vect_create_destination_var (scalar_dest,
6516 							    vectype);
6517 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
6518 		    new_temp = make_ssa_name (vec_dest, new_stmt);
6519 		    gimple_assign_set_lhs (new_stmt, new_temp);
6520 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6521 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6522 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
6523 		    msq = new_temp;
6524 
6525 		    bump = size_binop (MULT_EXPR, vs,
6526 				       TYPE_SIZE_UNIT (elem_type));
6527 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
6528 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6529 		    new_stmt = gimple_build_assign
6530 				 (NULL_TREE, BIT_AND_EXPR, ptr,
6531 				  build_int_cst
6532 				  (TREE_TYPE (ptr),
6533 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6534 		    ptr = copy_ssa_name (dataref_ptr, new_stmt);
6535 		    gimple_assign_set_lhs (new_stmt, ptr);
6536 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
6537 		    data_ref
6538 		      = build2 (MEM_REF, vectype, ptr,
6539 				build_int_cst (reference_alias_ptr_type
6540 						 (DR_REF (first_dr)), 0));
6541 		    break;
6542 		  }
6543 		case dr_explicit_realign_optimized:
6544 		  new_temp = copy_ssa_name (dataref_ptr);
6545 		  new_stmt = gimple_build_assign
6546 			       (new_temp, BIT_AND_EXPR, dataref_ptr,
6547 				build_int_cst
6548 				  (TREE_TYPE (dataref_ptr),
6549 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6550 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6551 		  data_ref
6552 		    = build2 (MEM_REF, vectype, new_temp,
6553 			      build_int_cst (reference_alias_ptr_type
6554 					       (DR_REF (first_dr)), 0));
6555 		  break;
6556 		default:
6557 		  gcc_unreachable ();
6558 		}
6559 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
6560 	      new_stmt = gimple_build_assign (vec_dest, data_ref);
6561 	      new_temp = make_ssa_name (vec_dest, new_stmt);
6562 	      gimple_assign_set_lhs (new_stmt, new_temp);
6563 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6564 
6565 	      /* 3. Handle explicit realignment if necessary/supported.
6566 		 Create in loop:
6567 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
6568 	      if (alignment_support_scheme == dr_explicit_realign_optimized
6569 		  || alignment_support_scheme == dr_explicit_realign)
6570 		{
6571 		  lsq = gimple_assign_lhs (new_stmt);
6572 		  if (!realignment_token)
6573 		    realignment_token = dataref_ptr;
6574 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
6575 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6576 						  msq, lsq, realignment_token);
6577 		  new_temp = make_ssa_name (vec_dest, new_stmt);
6578 		  gimple_assign_set_lhs (new_stmt, new_temp);
6579 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6580 
6581 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
6582 		    {
6583 		      gcc_assert (phi);
6584 		      if (i == vec_num - 1 && j == ncopies - 1)
6585 			add_phi_arg (phi, lsq,
6586 				     loop_latch_edge (containing_loop),
6587 				     UNKNOWN_LOCATION);
6588 		      msq = lsq;
6589 		    }
6590 		}
6591 
6592 	      /* 4. Handle invariant-load.  */
6593 	      if (inv_p && !bb_vinfo)
6594 		{
6595 		  gcc_assert (!grouped_load);
6596 		  /* If we have versioned for aliasing or the loop doesn't
6597 		     have any data dependencies that would preclude this,
6598 		     then we are sure this is a loop invariant load and
6599 		     thus we can insert it on the preheader edge.  */
6600 		  if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6601 		      && !nested_in_vect_loop
6602 		      && hoist_defs_of_uses (stmt, loop))
6603 		    {
6604 		      if (dump_enabled_p ())
6605 			{
6606 			  dump_printf_loc (MSG_NOTE, vect_location,
6607 					   "hoisting out of the vectorized "
6608 					   "loop: ");
6609 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6610 			}
6611 		      tree tem = copy_ssa_name (scalar_dest);
6612 		      gsi_insert_on_edge_immediate
6613 			(loop_preheader_edge (loop),
6614 			 gimple_build_assign (tem,
6615 					      unshare_expr
6616 					        (gimple_assign_rhs1 (stmt))));
6617 		      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6618 		    }
6619 		  else
6620 		    {
6621 		      gimple_stmt_iterator gsi2 = *gsi;
6622 		      gsi_next (&gsi2);
6623 		      new_temp = vect_init_vector (stmt, scalar_dest,
6624 						   vectype, &gsi2);
6625 		    }
6626 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
6627 		  set_vinfo_for_stmt (new_stmt,
6628 				      new_stmt_vec_info (new_stmt, loop_vinfo,
6629 							 bb_vinfo));
6630 		}
6631 
6632 	      if (negative)
6633 		{
6634 		  tree perm_mask = perm_mask_for_reverse (vectype);
6635 		  new_temp = permute_vec_elements (new_temp, new_temp,
6636 						   perm_mask, stmt, gsi);
6637 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
6638 		}
6639 
6640 	      /* Collect vector loads and later create their permutation in
6641 		 vect_transform_grouped_load ().  */
6642 	      if (grouped_load || slp_perm)
6643 		dr_chain.quick_push (new_temp);
6644 
6645 	      /* Store vector loads in the corresponding SLP_NODE.  */
6646 	      if (slp && !slp_perm)
6647 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6648 	    }
6649 	  /* Bump the vector pointer to account for a gap.  */
6650 	  if (slp && group_gap != 0)
6651 	    {
6652 	      tree bump = size_binop (MULT_EXPR,
6653 				      TYPE_SIZE_UNIT (elem_type),
6654 				      size_int (group_gap));
6655 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6656 					     stmt, bump);
6657 	    }
6658 	}
6659 
6660       if (slp && !slp_perm)
6661 	continue;
6662 
6663       if (slp_perm)
6664         {
6665           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6666                                              slp_node_instance, false))
6667             {
6668               dr_chain.release ();
6669               return false;
6670             }
6671         }
6672       else
6673         {
6674           if (grouped_load)
6675   	    {
6676 	      if (!load_lanes_p)
6677 		vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6678 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6679 	    }
6680           else
6681 	    {
6682 	      if (j == 0)
6683 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6684 	      else
6685 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6686 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
6687 	    }
6688         }
6689       dr_chain.release ();
6690     }
6691 
6692   return true;
6693 }
6694 
6695 /* Function vect_is_simple_cond.
6696 
6697    Input:
6698    LOOP - the loop that is being vectorized.
6699    COND - Condition that is checked for simple use.
6700 
6701    Output:
6702    *COMP_VECTYPE - the vector type for the comparison.
6703 
6704    Returns whether a COND can be vectorized.  Checks whether
6705    condition operands are supportable using vec_is_simple_use.  */
6706 
6707 static bool
6708 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6709 		     bb_vec_info bb_vinfo, tree *comp_vectype)
6710 {
6711   tree lhs, rhs;
6712   tree def;
6713   enum vect_def_type dt;
6714   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6715 
6716   if (!COMPARISON_CLASS_P (cond))
6717     return false;
6718 
6719   lhs = TREE_OPERAND (cond, 0);
6720   rhs = TREE_OPERAND (cond, 1);
6721 
6722   if (TREE_CODE (lhs) == SSA_NAME)
6723     {
6724       gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6725       if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6726 				 &lhs_def_stmt, &def, &dt, &vectype1))
6727 	return false;
6728     }
6729   else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6730 	   && TREE_CODE (lhs) != FIXED_CST)
6731     return false;
6732 
6733   if (TREE_CODE (rhs) == SSA_NAME)
6734     {
6735       gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6736       if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6737 				 &rhs_def_stmt, &def, &dt, &vectype2))
6738 	return false;
6739     }
6740   else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6741 	   && TREE_CODE (rhs) != FIXED_CST)
6742     return false;
6743 
6744   *comp_vectype = vectype1 ? vectype1 : vectype2;
6745   return true;
6746 }
6747 
6748 /* vectorizable_condition.
6749 
6750    Check if STMT is conditional modify expression that can be vectorized.
6751    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6752    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
6753    at GSI.
6754 
6755    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6756    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6757    else caluse if it is 2).
6758 
6759    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6760 
6761 bool
6762 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6763 			gimple *vec_stmt, tree reduc_def, int reduc_index,
6764 			slp_tree slp_node)
6765 {
6766   tree scalar_dest = NULL_TREE;
6767   tree vec_dest = NULL_TREE;
6768   tree cond_expr, then_clause, else_clause;
6769   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6770   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6771   tree comp_vectype = NULL_TREE;
6772   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6773   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6774   tree vec_compare, vec_cond_expr;
6775   tree new_temp;
6776   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6777   tree def;
6778   enum vect_def_type dt, dts[4];
6779   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6780   int ncopies;
6781   enum tree_code code;
6782   stmt_vec_info prev_stmt_info = NULL;
6783   int i, j;
6784   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6785   vec<tree> vec_oprnds0 = vNULL;
6786   vec<tree> vec_oprnds1 = vNULL;
6787   vec<tree> vec_oprnds2 = vNULL;
6788   vec<tree> vec_oprnds3 = vNULL;
6789   tree vec_cmp_type;
6790 
6791   if (slp_node || PURE_SLP_STMT (stmt_info))
6792     ncopies = 1;
6793   else
6794     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6795 
6796   gcc_assert (ncopies >= 1);
6797   if (reduc_index && ncopies > 1)
6798     return false; /* FORNOW */
6799 
6800   if (reduc_index && STMT_SLP_TYPE (stmt_info))
6801     return false;
6802 
6803   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6804     return false;
6805 
6806   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6807       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6808            && reduc_def))
6809     return false;
6810 
6811   /* FORNOW: not yet supported.  */
6812   if (STMT_VINFO_LIVE_P (stmt_info))
6813     {
6814       if (dump_enabled_p ())
6815         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6816                          "value used after loop.\n");
6817       return false;
6818     }
6819 
6820   /* Is vectorizable conditional operation?  */
6821   if (!is_gimple_assign (stmt))
6822     return false;
6823 
6824   code = gimple_assign_rhs_code (stmt);
6825 
6826   if (code != COND_EXPR)
6827     return false;
6828 
6829   cond_expr = gimple_assign_rhs1 (stmt);
6830   then_clause = gimple_assign_rhs2 (stmt);
6831   else_clause = gimple_assign_rhs3 (stmt);
6832 
6833   if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6834 			    &comp_vectype)
6835       || !comp_vectype)
6836     return false;
6837 
6838   if (TREE_CODE (then_clause) == SSA_NAME)
6839     {
6840       gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6841       if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6842 			       &then_def_stmt, &def, &dt))
6843 	return false;
6844     }
6845   else if (TREE_CODE (then_clause) != INTEGER_CST
6846 	   && TREE_CODE (then_clause) != REAL_CST
6847 	   && TREE_CODE (then_clause) != FIXED_CST)
6848     return false;
6849 
6850   if (TREE_CODE (else_clause) == SSA_NAME)
6851     {
6852       gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6853       if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6854 			       &else_def_stmt, &def, &dt))
6855 	return false;
6856     }
6857   else if (TREE_CODE (else_clause) != INTEGER_CST
6858 	   && TREE_CODE (else_clause) != REAL_CST
6859 	   && TREE_CODE (else_clause) != FIXED_CST)
6860     return false;
6861 
6862   unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6863   /* The result of a vector comparison should be signed type.  */
6864   tree cmp_type = build_nonstandard_integer_type (prec, 0);
6865   vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6866   if (vec_cmp_type == NULL_TREE)
6867     return false;
6868 
6869   if (!vec_stmt)
6870     {
6871       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6872       return expand_vec_cond_expr_p (vectype, comp_vectype);
6873     }
6874 
6875   /* Transform.  */
6876 
6877   if (!slp_node)
6878     {
6879       vec_oprnds0.create (1);
6880       vec_oprnds1.create (1);
6881       vec_oprnds2.create (1);
6882       vec_oprnds3.create (1);
6883     }
6884 
6885   /* Handle def.  */
6886   scalar_dest = gimple_assign_lhs (stmt);
6887   vec_dest = vect_create_destination_var (scalar_dest, vectype);
6888 
6889   /* Handle cond expr.  */
6890   for (j = 0; j < ncopies; j++)
6891     {
6892       gassign *new_stmt = NULL;
6893       if (j == 0)
6894 	{
6895           if (slp_node)
6896             {
6897               auto_vec<tree, 4> ops;
6898 	      auto_vec<vec<tree>, 4> vec_defs;
6899 
6900               ops.safe_push (TREE_OPERAND (cond_expr, 0));
6901               ops.safe_push (TREE_OPERAND (cond_expr, 1));
6902               ops.safe_push (then_clause);
6903               ops.safe_push (else_clause);
6904               vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6905 	      vec_oprnds3 = vec_defs.pop ();
6906 	      vec_oprnds2 = vec_defs.pop ();
6907 	      vec_oprnds1 = vec_defs.pop ();
6908 	      vec_oprnds0 = vec_defs.pop ();
6909 
6910               ops.release ();
6911               vec_defs.release ();
6912             }
6913           else
6914             {
6915 	      gimple gtemp;
6916 	      vec_cond_lhs =
6917 	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6918 					    stmt, NULL);
6919 	      vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6920 				  loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6921 
6922 	      vec_cond_rhs =
6923 		vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6924 						stmt, NULL);
6925 	      vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6926 				  loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6927 	      if (reduc_index == 1)
6928 		vec_then_clause = reduc_def;
6929 	      else
6930 		{
6931 		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6932 		 		  			      stmt, NULL);
6933 	          vect_is_simple_use (then_clause, stmt, loop_vinfo,
6934 					  NULL, &gtemp, &def, &dts[2]);
6935 		}
6936 	      if (reduc_index == 2)
6937 		vec_else_clause = reduc_def;
6938 	      else
6939 		{
6940 		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6941 							      stmt, NULL);
6942 		  vect_is_simple_use (else_clause, stmt, loop_vinfo,
6943 				  NULL, &gtemp, &def, &dts[3]);
6944 		}
6945 	    }
6946 	}
6947       else
6948 	{
6949 	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6950 							 vec_oprnds0.pop ());
6951 	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6952 							 vec_oprnds1.pop ());
6953 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6954 							    vec_oprnds2.pop ());
6955 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6956 							    vec_oprnds3.pop ());
6957 	}
6958 
6959       if (!slp_node)
6960         {
6961 	  vec_oprnds0.quick_push (vec_cond_lhs);
6962 	  vec_oprnds1.quick_push (vec_cond_rhs);
6963 	  vec_oprnds2.quick_push (vec_then_clause);
6964 	  vec_oprnds3.quick_push (vec_else_clause);
6965 	}
6966 
6967       /* Arguments are ready.  Create the new vector stmt.  */
6968       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6969         {
6970           vec_cond_rhs = vec_oprnds1[i];
6971           vec_then_clause = vec_oprnds2[i];
6972           vec_else_clause = vec_oprnds3[i];
6973 
6974 	  vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6975 				vec_cond_lhs, vec_cond_rhs);
6976           vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6977  		         vec_compare, vec_then_clause, vec_else_clause);
6978 
6979           new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6980           new_temp = make_ssa_name (vec_dest, new_stmt);
6981           gimple_assign_set_lhs (new_stmt, new_temp);
6982           vect_finish_stmt_generation (stmt, new_stmt, gsi);
6983           if (slp_node)
6984             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6985         }
6986 
6987         if (slp_node)
6988           continue;
6989 
6990         if (j == 0)
6991           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6992         else
6993           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6994 
6995         prev_stmt_info = vinfo_for_stmt (new_stmt);
6996     }
6997 
6998   vec_oprnds0.release ();
6999   vec_oprnds1.release ();
7000   vec_oprnds2.release ();
7001   vec_oprnds3.release ();
7002 
7003   return true;
7004 }
7005 
7006 
7007 /* Make sure the statement is vectorizable.  */
7008 
7009 bool
7010 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7011 {
7012   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7013   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7014   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7015   bool ok;
7016   tree scalar_type, vectype;
7017   gimple pattern_stmt;
7018   gimple_seq pattern_def_seq;
7019 
7020   if (dump_enabled_p ())
7021     {
7022       dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7023       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7024     }
7025 
7026   if (gimple_has_volatile_ops (stmt))
7027     {
7028       if (dump_enabled_p ())
7029         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7030                          "not vectorized: stmt has volatile operands\n");
7031 
7032       return false;
7033     }
7034 
7035   /* Skip stmts that do not need to be vectorized. In loops this is expected
7036      to include:
7037      - the COND_EXPR which is the loop exit condition
7038      - any LABEL_EXPRs in the loop
7039      - computations that are used only for array indexing or loop control.
7040      In basic blocks we only analyze statements that are a part of some SLP
7041      instance, therefore, all the statements are relevant.
7042 
7043      Pattern statement needs to be analyzed instead of the original statement
7044      if the original statement is not relevant.  Otherwise, we analyze both
7045      statements.  In basic blocks we are called from some SLP instance
7046      traversal, don't analyze pattern stmts instead, the pattern stmts
7047      already will be part of SLP instance.  */
7048 
7049   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7050   if (!STMT_VINFO_RELEVANT_P (stmt_info)
7051       && !STMT_VINFO_LIVE_P (stmt_info))
7052     {
7053       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7054           && pattern_stmt
7055           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7056               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7057         {
7058           /* Analyze PATTERN_STMT instead of the original stmt.  */
7059           stmt = pattern_stmt;
7060           stmt_info = vinfo_for_stmt (pattern_stmt);
7061           if (dump_enabled_p ())
7062             {
7063               dump_printf_loc (MSG_NOTE, vect_location,
7064                                "==> examining pattern statement: ");
7065               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7066             }
7067         }
7068       else
7069         {
7070           if (dump_enabled_p ())
7071             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7072 
7073           return true;
7074         }
7075     }
7076   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7077 	   && node == NULL
7078            && pattern_stmt
7079            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7080                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7081     {
7082       /* Analyze PATTERN_STMT too.  */
7083       if (dump_enabled_p ())
7084         {
7085           dump_printf_loc (MSG_NOTE, vect_location,
7086                            "==> examining pattern statement: ");
7087           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7088         }
7089 
7090       if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7091         return false;
7092    }
7093 
7094   if (is_pattern_stmt_p (stmt_info)
7095       && node == NULL
7096       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7097     {
7098       gimple_stmt_iterator si;
7099 
7100       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7101 	{
7102 	  gimple pattern_def_stmt = gsi_stmt (si);
7103 	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7104 	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7105 	    {
7106 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
7107 	      if (dump_enabled_p ())
7108 		{
7109 		  dump_printf_loc (MSG_NOTE, vect_location,
7110                                    "==> examining pattern def statement: ");
7111 		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7112 		}
7113 
7114 	      if (!vect_analyze_stmt (pattern_def_stmt,
7115 				      need_to_vectorize, node))
7116 		return false;
7117 	    }
7118 	}
7119     }
7120 
7121   switch (STMT_VINFO_DEF_TYPE (stmt_info))
7122     {
7123       case vect_internal_def:
7124         break;
7125 
7126       case vect_reduction_def:
7127       case vect_nested_cycle:
7128          gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7129                      || relevance == vect_used_in_outer_by_reduction
7130                      || relevance == vect_unused_in_scope));
7131          break;
7132 
7133       case vect_induction_def:
7134       case vect_constant_def:
7135       case vect_external_def:
7136       case vect_unknown_def_type:
7137       default:
7138         gcc_unreachable ();
7139     }
7140 
7141   if (bb_vinfo)
7142     {
7143       gcc_assert (PURE_SLP_STMT (stmt_info));
7144 
7145       scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7146       if (dump_enabled_p ())
7147         {
7148           dump_printf_loc (MSG_NOTE, vect_location,
7149                            "get vectype for scalar type:  ");
7150           dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7151           dump_printf (MSG_NOTE, "\n");
7152         }
7153 
7154       vectype = get_vectype_for_scalar_type (scalar_type);
7155       if (!vectype)
7156         {
7157           if (dump_enabled_p ())
7158             {
7159                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7160                                 "not SLPed: unsupported data-type ");
7161                dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7162                                   scalar_type);
7163               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7164             }
7165           return false;
7166         }
7167 
7168       if (dump_enabled_p ())
7169         {
7170           dump_printf_loc (MSG_NOTE, vect_location, "vectype:  ");
7171           dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7172           dump_printf (MSG_NOTE, "\n");
7173         }
7174 
7175       STMT_VINFO_VECTYPE (stmt_info) = vectype;
7176    }
7177 
7178   if (STMT_VINFO_RELEVANT_P (stmt_info))
7179     {
7180       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7181       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7182 		  || (is_gimple_call (stmt)
7183 		      && gimple_call_lhs (stmt) == NULL_TREE));
7184       *need_to_vectorize = true;
7185     }
7186 
7187    ok = true;
7188    if (!bb_vinfo
7189        && (STMT_VINFO_RELEVANT_P (stmt_info)
7190            || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7191       ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7192 	    || vectorizable_conversion (stmt, NULL, NULL, NULL)
7193             || vectorizable_shift (stmt, NULL, NULL, NULL)
7194             || vectorizable_operation (stmt, NULL, NULL, NULL)
7195             || vectorizable_assignment (stmt, NULL, NULL, NULL)
7196             || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7197 	    || vectorizable_call (stmt, NULL, NULL, NULL)
7198             || vectorizable_store (stmt, NULL, NULL, NULL)
7199             || vectorizable_reduction (stmt, NULL, NULL, NULL)
7200             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7201     else
7202       {
7203         if (bb_vinfo)
7204 	  ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7205 		|| vectorizable_conversion (stmt, NULL, NULL, node)
7206 		|| vectorizable_shift (stmt, NULL, NULL, node)
7207                 || vectorizable_operation (stmt, NULL, NULL, node)
7208                 || vectorizable_assignment (stmt, NULL, NULL, node)
7209                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7210 		|| vectorizable_call (stmt, NULL, NULL, node)
7211                 || vectorizable_store (stmt, NULL, NULL, node)
7212                 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7213       }
7214 
7215   if (!ok)
7216     {
7217       if (dump_enabled_p ())
7218         {
7219           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7220                            "not vectorized: relevant stmt not ");
7221           dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7222           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7223         }
7224 
7225       return false;
7226     }
7227 
7228   if (bb_vinfo)
7229     return true;
7230 
7231   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7232       need extra handling, except for vectorizable reductions.  */
7233   if (STMT_VINFO_LIVE_P (stmt_info)
7234       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7235     ok = vectorizable_live_operation (stmt, NULL, NULL);
7236 
7237   if (!ok)
7238     {
7239       if (dump_enabled_p ())
7240         {
7241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7242                            "not vectorized: live stmt not ");
7243           dump_printf (MSG_MISSED_OPTIMIZATION,  "supported: ");
7244           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7245         }
7246 
7247        return false;
7248     }
7249 
7250   return true;
7251 }
7252 
7253 
7254 /* Function vect_transform_stmt.
7255 
7256    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
7257 
7258 bool
7259 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7260 		     bool *grouped_store, slp_tree slp_node,
7261                      slp_instance slp_node_instance)
7262 {
7263   bool is_store = false;
7264   gimple vec_stmt = NULL;
7265   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7266   bool done;
7267 
7268   switch (STMT_VINFO_TYPE (stmt_info))
7269     {
7270     case type_demotion_vec_info_type:
7271     case type_promotion_vec_info_type:
7272     case type_conversion_vec_info_type:
7273       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7274       gcc_assert (done);
7275       break;
7276 
7277     case induc_vec_info_type:
7278       gcc_assert (!slp_node);
7279       done = vectorizable_induction (stmt, gsi, &vec_stmt);
7280       gcc_assert (done);
7281       break;
7282 
7283     case shift_vec_info_type:
7284       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7285       gcc_assert (done);
7286       break;
7287 
7288     case op_vec_info_type:
7289       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7290       gcc_assert (done);
7291       break;
7292 
7293     case assignment_vec_info_type:
7294       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7295       gcc_assert (done);
7296       break;
7297 
7298     case load_vec_info_type:
7299       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7300                                 slp_node_instance);
7301       gcc_assert (done);
7302       break;
7303 
7304     case store_vec_info_type:
7305       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7306       gcc_assert (done);
7307       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7308 	{
7309 	  /* In case of interleaving, the whole chain is vectorized when the
7310 	     last store in the chain is reached.  Store stmts before the last
7311 	     one are skipped, and there vec_stmt_info shouldn't be freed
7312 	     meanwhile.  */
7313 	  *grouped_store = true;
7314 	  if (STMT_VINFO_VEC_STMT (stmt_info))
7315 	    is_store = true;
7316 	  }
7317       else
7318 	is_store = true;
7319       break;
7320 
7321     case condition_vec_info_type:
7322       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7323       gcc_assert (done);
7324       break;
7325 
7326     case call_vec_info_type:
7327       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7328       stmt = gsi_stmt (*gsi);
7329       if (is_gimple_call (stmt)
7330 	  && gimple_call_internal_p (stmt)
7331 	  && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7332 	is_store = true;
7333       break;
7334 
7335     case call_simd_clone_vec_info_type:
7336       done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7337       stmt = gsi_stmt (*gsi);
7338       break;
7339 
7340     case reduc_vec_info_type:
7341       done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7342       gcc_assert (done);
7343       break;
7344 
7345     default:
7346       if (!STMT_VINFO_LIVE_P (stmt_info))
7347 	{
7348 	  if (dump_enabled_p ())
7349 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7350                              "stmt not supported.\n");
7351 	  gcc_unreachable ();
7352 	}
7353     }
7354 
7355   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7356      is being vectorized, but outside the immediately enclosing loop.  */
7357   if (vec_stmt
7358       && STMT_VINFO_LOOP_VINFO (stmt_info)
7359       && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7360                                 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7361       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7362       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7363           || STMT_VINFO_RELEVANT (stmt_info) ==
7364                                            vect_used_in_outer_by_reduction))
7365     {
7366       struct loop *innerloop = LOOP_VINFO_LOOP (
7367                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7368       imm_use_iterator imm_iter;
7369       use_operand_p use_p;
7370       tree scalar_dest;
7371       gimple exit_phi;
7372 
7373       if (dump_enabled_p ())
7374         dump_printf_loc (MSG_NOTE, vect_location,
7375                          "Record the vdef for outer-loop vectorization.\n");
7376 
7377       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7378         (to be used when vectorizing outer-loop stmts that use the DEF of
7379         STMT).  */
7380       if (gimple_code (stmt) == GIMPLE_PHI)
7381         scalar_dest = PHI_RESULT (stmt);
7382       else
7383         scalar_dest = gimple_assign_lhs (stmt);
7384 
7385       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7386        {
7387          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7388            {
7389              exit_phi = USE_STMT (use_p);
7390              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7391            }
7392        }
7393     }
7394 
7395   /* Handle stmts whose DEF is used outside the loop-nest that is
7396      being vectorized.  */
7397   if (STMT_VINFO_LIVE_P (stmt_info)
7398       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7399     {
7400       done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7401       gcc_assert (done);
7402     }
7403 
7404   if (vec_stmt)
7405     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7406 
7407   return is_store;
7408 }
7409 
7410 
7411 /* Remove a group of stores (for SLP or interleaving), free their
7412    stmt_vec_info.  */
7413 
7414 void
7415 vect_remove_stores (gimple first_stmt)
7416 {
7417   gimple next = first_stmt;
7418   gimple tmp;
7419   gimple_stmt_iterator next_si;
7420 
7421   while (next)
7422     {
7423       stmt_vec_info stmt_info = vinfo_for_stmt (next);
7424 
7425       tmp = GROUP_NEXT_ELEMENT (stmt_info);
7426       if (is_pattern_stmt_p (stmt_info))
7427 	next = STMT_VINFO_RELATED_STMT (stmt_info);
7428       /* Free the attached stmt_vec_info and remove the stmt.  */
7429       next_si = gsi_for_stmt (next);
7430       unlink_stmt_vdef (next);
7431       gsi_remove (&next_si, true);
7432       release_defs (next);
7433       free_stmt_vec_info (next);
7434       next = tmp;
7435     }
7436 }
7437 
7438 
7439 /* Function new_stmt_vec_info.
7440 
7441    Create and initialize a new stmt_vec_info struct for STMT.  */
7442 
7443 stmt_vec_info
7444 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7445                    bb_vec_info bb_vinfo)
7446 {
7447   stmt_vec_info res;
7448   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7449 
7450   STMT_VINFO_TYPE (res) = undef_vec_info_type;
7451   STMT_VINFO_STMT (res) = stmt;
7452   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7453   STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7454   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7455   STMT_VINFO_LIVE_P (res) = false;
7456   STMT_VINFO_VECTYPE (res) = NULL;
7457   STMT_VINFO_VEC_STMT (res) = NULL;
7458   STMT_VINFO_VECTORIZABLE (res) = true;
7459   STMT_VINFO_IN_PATTERN_P (res) = false;
7460   STMT_VINFO_RELATED_STMT (res) = NULL;
7461   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7462   STMT_VINFO_DATA_REF (res) = NULL;
7463 
7464   STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7465   STMT_VINFO_DR_OFFSET (res) = NULL;
7466   STMT_VINFO_DR_INIT (res) = NULL;
7467   STMT_VINFO_DR_STEP (res) = NULL;
7468   STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7469 
7470   if (gimple_code (stmt) == GIMPLE_PHI
7471       && is_loop_header_bb_p (gimple_bb (stmt)))
7472     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7473   else
7474     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7475 
7476   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7477   STMT_SLP_TYPE (res) = loop_vect;
7478   GROUP_FIRST_ELEMENT (res) = NULL;
7479   GROUP_NEXT_ELEMENT (res) = NULL;
7480   GROUP_SIZE (res) = 0;
7481   GROUP_STORE_COUNT (res) = 0;
7482   GROUP_GAP (res) = 0;
7483   GROUP_SAME_DR_STMT (res) = NULL;
7484 
7485   return res;
7486 }
7487 
7488 
7489 /* Create a hash table for stmt_vec_info. */
7490 
7491 void
7492 init_stmt_vec_info_vec (void)
7493 {
7494   gcc_assert (!stmt_vec_info_vec.exists ());
7495   stmt_vec_info_vec.create (50);
7496 }
7497 
7498 
7499 /* Free hash table for stmt_vec_info. */
7500 
7501 void
7502 free_stmt_vec_info_vec (void)
7503 {
7504   unsigned int i;
7505   vec_void_p info;
7506   FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7507     if (info != NULL)
7508       free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7509   gcc_assert (stmt_vec_info_vec.exists ());
7510   stmt_vec_info_vec.release ();
7511 }
7512 
7513 
7514 /* Free stmt vectorization related info.  */
7515 
7516 void
7517 free_stmt_vec_info (gimple stmt)
7518 {
7519   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7520 
7521   if (!stmt_info)
7522     return;
7523 
7524   /* Check if this statement has a related "pattern stmt"
7525      (introduced by the vectorizer during the pattern recognition
7526      pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7527      too.  */
7528   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7529     {
7530       stmt_vec_info patt_info
7531 	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7532       if (patt_info)
7533 	{
7534 	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7535 	  gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7536 	  gimple_set_bb (patt_stmt, NULL);
7537 	  tree lhs = gimple_get_lhs (patt_stmt);
7538 	  if (TREE_CODE (lhs) == SSA_NAME)
7539 	    release_ssa_name (lhs);
7540 	  if (seq)
7541 	    {
7542 	      gimple_stmt_iterator si;
7543 	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7544 		{
7545 		  gimple seq_stmt = gsi_stmt (si);
7546 		  gimple_set_bb (seq_stmt, NULL);
7547 		  lhs = gimple_get_lhs (patt_stmt);
7548 		  if (TREE_CODE (lhs) == SSA_NAME)
7549 		    release_ssa_name (lhs);
7550 		  free_stmt_vec_info (seq_stmt);
7551 		}
7552 	    }
7553 	  free_stmt_vec_info (patt_stmt);
7554 	}
7555     }
7556 
7557   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7558   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7559   set_vinfo_for_stmt (stmt, NULL);
7560   free (stmt_info);
7561 }
7562 
7563 
7564 /* Function get_vectype_for_scalar_type_and_size.
7565 
7566    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
7567    by the target.  */
7568 
7569 static tree
7570 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7571 {
7572   machine_mode inner_mode = TYPE_MODE (scalar_type);
7573   machine_mode simd_mode;
7574   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7575   int nunits;
7576   tree vectype;
7577 
7578   if (nbytes == 0)
7579     return NULL_TREE;
7580 
7581   if (GET_MODE_CLASS (inner_mode) != MODE_INT
7582       && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7583     return NULL_TREE;
7584 
7585   /* For vector types of elements whose mode precision doesn't
7586      match their types precision we use a element type of mode
7587      precision.  The vectorization routines will have to make sure
7588      they support the proper result truncation/extension.
7589      We also make sure to build vector types with INTEGER_TYPE
7590      component type only.  */
7591   if (INTEGRAL_TYPE_P (scalar_type)
7592       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7593 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
7594     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7595 						  TYPE_UNSIGNED (scalar_type));
7596 
7597   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7598      When the component mode passes the above test simply use a type
7599      corresponding to that mode.  The theory is that any use that
7600      would cause problems with this will disable vectorization anyway.  */
7601   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7602 	   && !INTEGRAL_TYPE_P (scalar_type))
7603     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7604 
7605   /* We can't build a vector type of elements with alignment bigger than
7606      their size.  */
7607   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7608     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7609 						  TYPE_UNSIGNED (scalar_type));
7610 
7611   /* If we felt back to using the mode fail if there was
7612      no scalar type for it.  */
7613   if (scalar_type == NULL_TREE)
7614     return NULL_TREE;
7615 
7616   /* If no size was supplied use the mode the target prefers.   Otherwise
7617      lookup a vector mode of the specified size.  */
7618   if (size == 0)
7619     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7620   else
7621     simd_mode = mode_for_vector (inner_mode, size / nbytes);
7622   nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7623   if (nunits <= 1)
7624     return NULL_TREE;
7625 
7626   vectype = build_vector_type (scalar_type, nunits);
7627 
7628   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7629       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7630     return NULL_TREE;
7631 
7632   return vectype;
7633 }
7634 
7635 unsigned int current_vector_size;
7636 
7637 /* Function get_vectype_for_scalar_type.
7638 
7639    Returns the vector type corresponding to SCALAR_TYPE as supported
7640    by the target.  */
7641 
7642 tree
7643 get_vectype_for_scalar_type (tree scalar_type)
7644 {
7645   tree vectype;
7646   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7647 						  current_vector_size);
7648   if (vectype
7649       && current_vector_size == 0)
7650     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7651   return vectype;
7652 }
7653 
7654 /* Function get_same_sized_vectype
7655 
7656    Returns a vector type corresponding to SCALAR_TYPE of size
7657    VECTOR_TYPE if supported by the target.  */
7658 
7659 tree
7660 get_same_sized_vectype (tree scalar_type, tree vector_type)
7661 {
7662   return get_vectype_for_scalar_type_and_size
7663 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7664 }
7665 
7666 /* Function vect_is_simple_use.
7667 
7668    Input:
7669    LOOP_VINFO - the vect info of the loop that is being vectorized.
7670    BB_VINFO - the vect info of the basic block that is being vectorized.
7671    OPERAND - operand of STMT in the loop or bb.
7672    DEF - the defining stmt in case OPERAND is an SSA_NAME.
7673 
7674    Returns whether a stmt with OPERAND can be vectorized.
7675    For loops, supportable operands are constants, loop invariants, and operands
7676    that are defined by the current iteration of the loop.  Unsupportable
7677    operands are those that are defined by a previous iteration of the loop (as
7678    is the case in reduction/induction computations).
7679    For basic blocks, supportable operands are constants and bb invariants.
7680    For now, operands defined outside the basic block are not supported.  */
7681 
7682 bool
7683 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7684                     bb_vec_info bb_vinfo, gimple *def_stmt,
7685 		    tree *def, enum vect_def_type *dt)
7686 {
7687   basic_block bb;
7688   stmt_vec_info stmt_vinfo;
7689   struct loop *loop = NULL;
7690 
7691   if (loop_vinfo)
7692     loop = LOOP_VINFO_LOOP (loop_vinfo);
7693 
7694   *def_stmt = NULL;
7695   *def = NULL_TREE;
7696 
7697   if (dump_enabled_p ())
7698     {
7699       dump_printf_loc (MSG_NOTE, vect_location,
7700                        "vect_is_simple_use: operand ");
7701       dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7702       dump_printf (MSG_NOTE, "\n");
7703     }
7704 
7705   if (CONSTANT_CLASS_P (operand))
7706     {
7707       *dt = vect_constant_def;
7708       return true;
7709     }
7710 
7711   if (is_gimple_min_invariant (operand))
7712     {
7713       *def = operand;
7714       *dt = vect_external_def;
7715       return true;
7716     }
7717 
7718   if (TREE_CODE (operand) == PAREN_EXPR)
7719     {
7720       if (dump_enabled_p ())
7721         dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7722       operand = TREE_OPERAND (operand, 0);
7723     }
7724 
7725   if (TREE_CODE (operand) != SSA_NAME)
7726     {
7727       if (dump_enabled_p ())
7728         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7729                          "not ssa-name.\n");
7730       return false;
7731     }
7732 
7733   *def_stmt = SSA_NAME_DEF_STMT (operand);
7734   if (*def_stmt == NULL)
7735     {
7736       if (dump_enabled_p ())
7737         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7738                          "no def_stmt.\n");
7739       return false;
7740     }
7741 
7742   if (dump_enabled_p ())
7743     {
7744       dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7745       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7746     }
7747 
7748   /* Empty stmt is expected only in case of a function argument.
7749      (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
7750   if (gimple_nop_p (*def_stmt))
7751     {
7752       *def = operand;
7753       *dt = vect_external_def;
7754       return true;
7755     }
7756 
7757   bb = gimple_bb (*def_stmt);
7758 
7759   if ((loop && !flow_bb_inside_loop_p (loop, bb))
7760       || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7761       || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7762     *dt = vect_external_def;
7763   else
7764     {
7765       stmt_vinfo = vinfo_for_stmt (*def_stmt);
7766       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7767     }
7768 
7769   if (*dt == vect_unknown_def_type
7770       || (stmt
7771 	  && *dt == vect_double_reduction_def
7772 	  && gimple_code (stmt) != GIMPLE_PHI))
7773     {
7774       if (dump_enabled_p ())
7775         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7776                          "Unsupported pattern.\n");
7777       return false;
7778     }
7779 
7780   if (dump_enabled_p ())
7781     dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7782 
7783   switch (gimple_code (*def_stmt))
7784     {
7785     case GIMPLE_PHI:
7786       *def = gimple_phi_result (*def_stmt);
7787       break;
7788 
7789     case GIMPLE_ASSIGN:
7790       *def = gimple_assign_lhs (*def_stmt);
7791       break;
7792 
7793     case GIMPLE_CALL:
7794       *def = gimple_call_lhs (*def_stmt);
7795       if (*def != NULL)
7796 	break;
7797       /* FALLTHRU */
7798     default:
7799       if (dump_enabled_p ())
7800         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7801                          "unsupported defining stmt:\n");
7802       return false;
7803     }
7804 
7805   return true;
7806 }
7807 
7808 /* Function vect_is_simple_use_1.
7809 
7810    Same as vect_is_simple_use_1 but also determines the vector operand
7811    type of OPERAND and stores it to *VECTYPE.  If the definition of
7812    OPERAND is vect_uninitialized_def, vect_constant_def or
7813    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7814    is responsible to compute the best suited vector type for the
7815    scalar operand.  */
7816 
7817 bool
7818 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7819 		      bb_vec_info bb_vinfo, gimple *def_stmt,
7820 		      tree *def, enum vect_def_type *dt, tree *vectype)
7821 {
7822   if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7823 			   def, dt))
7824     return false;
7825 
7826   /* Now get a vector type if the def is internal, otherwise supply
7827      NULL_TREE and leave it up to the caller to figure out a proper
7828      type for the use stmt.  */
7829   if (*dt == vect_internal_def
7830       || *dt == vect_induction_def
7831       || *dt == vect_reduction_def
7832       || *dt == vect_double_reduction_def
7833       || *dt == vect_nested_cycle)
7834     {
7835       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7836 
7837       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7838           && !STMT_VINFO_RELEVANT (stmt_info)
7839           && !STMT_VINFO_LIVE_P (stmt_info))
7840 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7841 
7842       *vectype = STMT_VINFO_VECTYPE (stmt_info);
7843       gcc_assert (*vectype != NULL_TREE);
7844     }
7845   else if (*dt == vect_uninitialized_def
7846 	   || *dt == vect_constant_def
7847 	   || *dt == vect_external_def)
7848     *vectype = NULL_TREE;
7849   else
7850     gcc_unreachable ();
7851 
7852   return true;
7853 }
7854 
7855 
7856 /* Function supportable_widening_operation
7857 
7858    Check whether an operation represented by the code CODE is a
7859    widening operation that is supported by the target platform in
7860    vector form (i.e., when operating on arguments of type VECTYPE_IN
7861    producing a result of type VECTYPE_OUT).
7862 
7863    Widening operations we currently support are NOP (CONVERT), FLOAT
7864    and WIDEN_MULT.  This function checks if these operations are supported
7865    by the target platform either directly (via vector tree-codes), or via
7866    target builtins.
7867 
7868    Output:
7869    - CODE1 and CODE2 are codes of vector operations to be used when
7870    vectorizing the operation, if available.
7871    - MULTI_STEP_CVT determines the number of required intermediate steps in
7872    case of multi-step conversion (like char->short->int - in that case
7873    MULTI_STEP_CVT will be 1).
7874    - INTERM_TYPES contains the intermediate type required to perform the
7875    widening operation (short in the above example).  */
7876 
7877 bool
7878 supportable_widening_operation (enum tree_code code, gimple stmt,
7879 				tree vectype_out, tree vectype_in,
7880                                 enum tree_code *code1, enum tree_code *code2,
7881                                 int *multi_step_cvt,
7882                                 vec<tree> *interm_types)
7883 {
7884   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7885   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7886   struct loop *vect_loop = NULL;
7887   machine_mode vec_mode;
7888   enum insn_code icode1, icode2;
7889   optab optab1, optab2;
7890   tree vectype = vectype_in;
7891   tree wide_vectype = vectype_out;
7892   enum tree_code c1, c2;
7893   int i;
7894   tree prev_type, intermediate_type;
7895   machine_mode intermediate_mode, prev_mode;
7896   optab optab3, optab4;
7897 
7898   *multi_step_cvt = 0;
7899   if (loop_info)
7900     vect_loop = LOOP_VINFO_LOOP (loop_info);
7901 
7902   switch (code)
7903     {
7904     case WIDEN_MULT_EXPR:
7905       /* The result of a vectorized widening operation usually requires
7906 	 two vectors (because the widened results do not fit into one vector).
7907 	 The generated vector results would normally be expected to be
7908 	 generated in the same order as in the original scalar computation,
7909 	 i.e. if 8 results are generated in each vector iteration, they are
7910 	 to be organized as follows:
7911 		vect1: [res1,res2,res3,res4],
7912 		vect2: [res5,res6,res7,res8].
7913 
7914 	 However, in the special case that the result of the widening
7915 	 operation is used in a reduction computation only, the order doesn't
7916 	 matter (because when vectorizing a reduction we change the order of
7917 	 the computation).  Some targets can take advantage of this and
7918 	 generate more efficient code.  For example, targets like Altivec,
7919 	 that support widen_mult using a sequence of {mult_even,mult_odd}
7920 	 generate the following vectors:
7921 		vect1: [res1,res3,res5,res7],
7922 		vect2: [res2,res4,res6,res8].
7923 
7924 	 When vectorizing outer-loops, we execute the inner-loop sequentially
7925 	 (each vectorized inner-loop iteration contributes to VF outer-loop
7926 	 iterations in parallel).  We therefore don't allow to change the
7927 	 order of the computation in the inner-loop during outer-loop
7928 	 vectorization.  */
7929       /* TODO: Another case in which order doesn't *really* matter is when we
7930 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
7931 	 Normally, pack_trunc performs an even/odd permute, whereas the
7932 	 repack from an even/odd expansion would be an interleave, which
7933 	 would be significantly simpler for e.g. AVX2.  */
7934       /* In any case, in order to avoid duplicating the code below, recurse
7935 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
7936 	 are properly set up for the caller.  If we fail, we'll continue with
7937 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
7938       if (vect_loop
7939 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7940 	  && !nested_in_vect_loop_p (vect_loop, stmt)
7941 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7942 					     stmt, vectype_out, vectype_in,
7943 					     code1, code2, multi_step_cvt,
7944 					     interm_types))
7945         {
7946           /* Elements in a vector with vect_used_by_reduction property cannot
7947              be reordered if the use chain with this property does not have the
7948              same operation.  One such an example is s += a * b, where elements
7949              in a and b cannot be reordered.  Here we check if the vector defined
7950              by STMT is only directly used in the reduction statement.  */
7951           tree lhs = gimple_assign_lhs (stmt);
7952           use_operand_p dummy;
7953           gimple use_stmt;
7954           stmt_vec_info use_stmt_info = NULL;
7955           if (single_imm_use (lhs, &dummy, &use_stmt)
7956               && (use_stmt_info = vinfo_for_stmt (use_stmt))
7957               && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7958             return true;
7959         }
7960       c1 = VEC_WIDEN_MULT_LO_EXPR;
7961       c2 = VEC_WIDEN_MULT_HI_EXPR;
7962       break;
7963 
7964     case VEC_WIDEN_MULT_EVEN_EXPR:
7965       /* Support the recursion induced just above.  */
7966       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7967       c2 = VEC_WIDEN_MULT_ODD_EXPR;
7968       break;
7969 
7970     case WIDEN_LSHIFT_EXPR:
7971       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7972       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7973       break;
7974 
7975     CASE_CONVERT:
7976       c1 = VEC_UNPACK_LO_EXPR;
7977       c2 = VEC_UNPACK_HI_EXPR;
7978       break;
7979 
7980     case FLOAT_EXPR:
7981       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7982       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7983       break;
7984 
7985     case FIX_TRUNC_EXPR:
7986       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7987 	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7988 	 computing the operation.  */
7989       return false;
7990 
7991     default:
7992       gcc_unreachable ();
7993     }
7994 
7995   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7996     {
7997       enum tree_code ctmp = c1;
7998       c1 = c2;
7999       c2 = ctmp;
8000     }
8001 
8002   if (code == FIX_TRUNC_EXPR)
8003     {
8004       /* The signedness is determined from output operand.  */
8005       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8006       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8007     }
8008   else
8009     {
8010       optab1 = optab_for_tree_code (c1, vectype, optab_default);
8011       optab2 = optab_for_tree_code (c2, vectype, optab_default);
8012     }
8013 
8014   if (!optab1 || !optab2)
8015     return false;
8016 
8017   vec_mode = TYPE_MODE (vectype);
8018   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8019        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8020     return false;
8021 
8022   *code1 = c1;
8023   *code2 = c2;
8024 
8025   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8026       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8027     return true;
8028 
8029   /* Check if it's a multi-step conversion that can be done using intermediate
8030      types.  */
8031 
8032   prev_type = vectype;
8033   prev_mode = vec_mode;
8034 
8035   if (!CONVERT_EXPR_CODE_P (code))
8036     return false;
8037 
8038   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8039      intermediate steps in promotion sequence.  We try
8040      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8041      not.  */
8042   interm_types->create (MAX_INTERM_CVT_STEPS);
8043   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8044     {
8045       intermediate_mode = insn_data[icode1].operand[0].mode;
8046       intermediate_type
8047 	= lang_hooks.types.type_for_mode (intermediate_mode,
8048 					  TYPE_UNSIGNED (prev_type));
8049       optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8050       optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8051 
8052       if (!optab3 || !optab4
8053           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8054 	  || insn_data[icode1].operand[0].mode != intermediate_mode
8055 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8056 	  || insn_data[icode2].operand[0].mode != intermediate_mode
8057 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
8058 	      == CODE_FOR_nothing)
8059 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
8060 	      == CODE_FOR_nothing))
8061 	break;
8062 
8063       interm_types->quick_push (intermediate_type);
8064       (*multi_step_cvt)++;
8065 
8066       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8067 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8068 	return true;
8069 
8070       prev_type = intermediate_type;
8071       prev_mode = intermediate_mode;
8072     }
8073 
8074   interm_types->release ();
8075   return false;
8076 }
8077 
8078 
8079 /* Function supportable_narrowing_operation
8080 
8081    Check whether an operation represented by the code CODE is a
8082    narrowing operation that is supported by the target platform in
8083    vector form (i.e., when operating on arguments of type VECTYPE_IN
8084    and producing a result of type VECTYPE_OUT).
8085 
8086    Narrowing operations we currently support are NOP (CONVERT) and
8087    FIX_TRUNC.  This function checks if these operations are supported by
8088    the target platform directly via vector tree-codes.
8089 
8090    Output:
8091    - CODE1 is the code of a vector operation to be used when
8092    vectorizing the operation, if available.
8093    - MULTI_STEP_CVT determines the number of required intermediate steps in
8094    case of multi-step conversion (like int->short->char - in that case
8095    MULTI_STEP_CVT will be 1).
8096    - INTERM_TYPES contains the intermediate type required to perform the
8097    narrowing operation (short in the above example).   */
8098 
8099 bool
8100 supportable_narrowing_operation (enum tree_code code,
8101 				 tree vectype_out, tree vectype_in,
8102 				 enum tree_code *code1, int *multi_step_cvt,
8103                                  vec<tree> *interm_types)
8104 {
8105   machine_mode vec_mode;
8106   enum insn_code icode1;
8107   optab optab1, interm_optab;
8108   tree vectype = vectype_in;
8109   tree narrow_vectype = vectype_out;
8110   enum tree_code c1;
8111   tree intermediate_type;
8112   machine_mode intermediate_mode, prev_mode;
8113   int i;
8114   bool uns;
8115 
8116   *multi_step_cvt = 0;
8117   switch (code)
8118     {
8119     CASE_CONVERT:
8120       c1 = VEC_PACK_TRUNC_EXPR;
8121       break;
8122 
8123     case FIX_TRUNC_EXPR:
8124       c1 = VEC_PACK_FIX_TRUNC_EXPR;
8125       break;
8126 
8127     case FLOAT_EXPR:
8128       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8129 	 tree code and optabs used for computing the operation.  */
8130       return false;
8131 
8132     default:
8133       gcc_unreachable ();
8134     }
8135 
8136   if (code == FIX_TRUNC_EXPR)
8137     /* The signedness is determined from output operand.  */
8138     optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8139   else
8140     optab1 = optab_for_tree_code (c1, vectype, optab_default);
8141 
8142   if (!optab1)
8143     return false;
8144 
8145   vec_mode = TYPE_MODE (vectype);
8146   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8147     return false;
8148 
8149   *code1 = c1;
8150 
8151   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8152     return true;
8153 
8154   /* Check if it's a multi-step conversion that can be done using intermediate
8155      types.  */
8156   prev_mode = vec_mode;
8157   if (code == FIX_TRUNC_EXPR)
8158     uns = TYPE_UNSIGNED (vectype_out);
8159   else
8160     uns = TYPE_UNSIGNED (vectype);
8161 
8162   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8163      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8164      costly than signed.  */
8165   if (code == FIX_TRUNC_EXPR && uns)
8166     {
8167       enum insn_code icode2;
8168 
8169       intermediate_type
8170 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8171       interm_optab
8172 	= optab_for_tree_code (c1, intermediate_type, optab_default);
8173       if (interm_optab != unknown_optab
8174 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8175 	  && insn_data[icode1].operand[0].mode
8176 	     == insn_data[icode2].operand[0].mode)
8177 	{
8178 	  uns = false;
8179 	  optab1 = interm_optab;
8180 	  icode1 = icode2;
8181 	}
8182     }
8183 
8184   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8185      intermediate steps in promotion sequence.  We try
8186      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
8187   interm_types->create (MAX_INTERM_CVT_STEPS);
8188   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8189     {
8190       intermediate_mode = insn_data[icode1].operand[0].mode;
8191       intermediate_type
8192 	= lang_hooks.types.type_for_mode (intermediate_mode, uns);
8193       interm_optab
8194 	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8195 			       optab_default);
8196       if (!interm_optab
8197 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8198 	  || insn_data[icode1].operand[0].mode != intermediate_mode
8199 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8200 	      == CODE_FOR_nothing))
8201 	break;
8202 
8203       interm_types->quick_push (intermediate_type);
8204       (*multi_step_cvt)++;
8205 
8206       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8207 	return true;
8208 
8209       prev_mode = intermediate_mode;
8210       optab1 = interm_optab;
8211     }
8212 
8213   interm_types->release ();
8214   return false;
8215 }
8216