xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-stmts.c (revision 9fb66d812c00ebfb445c0b47dea128f32aa6fe96)
1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"		/* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 
58 /* For lang_hooks.types.type_for_mode.  */
59 #include "langhooks.h"
60 
61 /* Return the vectorized type for the given statement.  */
62 
63 tree
64 stmt_vectype (struct _stmt_vec_info *stmt_info)
65 {
66   return STMT_VINFO_VECTYPE (stmt_info);
67 }
68 
69 /* Return TRUE iff the given statement is in an inner loop relative to
70    the loop being vectorized.  */
71 bool
72 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
73 {
74   gimple *stmt = STMT_VINFO_STMT (stmt_info);
75   basic_block bb = gimple_bb (stmt);
76   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
77   struct loop* loop;
78 
79   if (!loop_vinfo)
80     return false;
81 
82   loop = LOOP_VINFO_LOOP (loop_vinfo);
83 
84   return (bb->loop_father == loop->inner);
85 }
86 
87 /* Record the cost of a statement, either by directly informing the
88    target model or by saving it in a vector for later processing.
89    Return a preliminary estimate of the statement's cost.  */
90 
91 unsigned
92 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
93 		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
94 		  int misalign, enum vect_cost_model_location where)
95 {
96   if ((kind == vector_load || kind == unaligned_load)
97       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
98     kind = vector_gather_load;
99   if ((kind == vector_store || kind == unaligned_store)
100       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
101     kind = vector_scatter_store;
102 
103   stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
104   body_cost_vec->safe_push (si);
105 
106   tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107   return (unsigned)
108       (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 }
110 
111 /* Return a variable of type ELEM_TYPE[NELEMS].  */
112 
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115 {
116   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 			 "vect_array");
118 }
119 
120 /* ARRAY is an array of vectors created by create_vector_array.
121    Return an SSA_NAME for the vector in index N.  The reference
122    is part of the vectorization of STMT_INFO and the vector is associated
123    with scalar destination SCALAR_DEST.  */
124 
125 static tree
126 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
127 		   tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
128 {
129   tree vect_type, vect, vect_name, array_ref;
130   gimple *new_stmt;
131 
132   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133   vect_type = TREE_TYPE (TREE_TYPE (array));
134   vect = vect_create_destination_var (scalar_dest, vect_type);
135   array_ref = build4 (ARRAY_REF, vect_type, array,
136 		      build_int_cst (size_type_node, n),
137 		      NULL_TREE, NULL_TREE);
138 
139   new_stmt = gimple_build_assign (vect, array_ref);
140   vect_name = make_ssa_name (vect, new_stmt);
141   gimple_assign_set_lhs (new_stmt, vect_name);
142   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
143 
144   return vect_name;
145 }
146 
147 /* ARRAY is an array of vectors created by create_vector_array.
148    Emit code to store SSA_NAME VECT in index N of the array.
149    The store is part of the vectorization of STMT_INFO.  */
150 
151 static void
152 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
153 		    tree vect, tree array, unsigned HOST_WIDE_INT n)
154 {
155   tree array_ref;
156   gimple *new_stmt;
157 
158   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 		      build_int_cst (size_type_node, n),
160 		      NULL_TREE, NULL_TREE);
161 
162   new_stmt = gimple_build_assign (array_ref, vect);
163   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
164 }
165 
166 /* PTR is a pointer to an array of type TYPE.  Return a representation
167    of *PTR.  The memory reference replaces those in FIRST_DR
168    (and its group).  */
169 
170 static tree
171 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
172 {
173   tree mem_ref;
174 
175   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176   /* Arrays have the same alignment as their type.  */
177   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
178   return mem_ref;
179 }
180 
181 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
182    Emit the clobber before *GSI.  */
183 
184 static void
185 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
186 		       tree var)
187 {
188   tree clobber = build_clobber (TREE_TYPE (var));
189   gimple *new_stmt = gimple_build_assign (var, clobber);
190   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
191 }
192 
193 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
194 
195 /* Function vect_mark_relevant.
196 
197    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
198 
199 static void
200 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
201 		    enum vect_relevant relevant, bool live_p)
202 {
203   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
205 
206   if (dump_enabled_p ())
207     dump_printf_loc (MSG_NOTE, vect_location,
208 		     "mark relevant %d, live %d: %G", relevant, live_p,
209 		     stmt_info->stmt);
210 
211   /* If this stmt is an original stmt in a pattern, we might need to mark its
212      related pattern stmt instead of the original stmt.  However, such stmts
213      may have their own uses that are not in any pattern, in such cases the
214      stmt itself should be marked.  */
215   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
216     {
217       /* This is the last stmt in a sequence that was detected as a
218 	 pattern that can potentially be vectorized.  Don't mark the stmt
219 	 as relevant/live because it's not going to be vectorized.
220 	 Instead mark the pattern-stmt that replaces it.  */
221 
222       if (dump_enabled_p ())
223 	dump_printf_loc (MSG_NOTE, vect_location,
224 			 "last stmt in pattern. don't mark"
225 			 " relevant/live.\n");
226       stmt_vec_info old_stmt_info = stmt_info;
227       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
228       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
229       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
230       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
231     }
232 
233   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
234   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
235     STMT_VINFO_RELEVANT (stmt_info) = relevant;
236 
237   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
238       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
239     {
240       if (dump_enabled_p ())
241         dump_printf_loc (MSG_NOTE, vect_location,
242                          "already marked relevant/live.\n");
243       return;
244     }
245 
246   worklist->safe_push (stmt_info);
247 }
248 
249 
250 /* Function is_simple_and_all_uses_invariant
251 
252    Return true if STMT_INFO is simple and all uses of it are invariant.  */
253 
254 bool
255 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
256 				  loop_vec_info loop_vinfo)
257 {
258   tree op;
259   ssa_op_iter iter;
260 
261   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
262   if (!stmt)
263     return false;
264 
265   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
266     {
267       enum vect_def_type dt = vect_uninitialized_def;
268 
269       if (!vect_is_simple_use (op, loop_vinfo, &dt))
270 	{
271 	  if (dump_enabled_p ())
272 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
273 			     "use not simple.\n");
274 	  return false;
275 	}
276 
277       if (dt != vect_external_def && dt != vect_constant_def)
278 	return false;
279     }
280   return true;
281 }
282 
283 /* Function vect_stmt_relevant_p.
284 
285    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
286    is "relevant for vectorization".
287 
288    A stmt is considered "relevant for vectorization" if:
289    - it has uses outside the loop.
290    - it has vdefs (it alters memory).
291    - control stmts in the loop (except for the exit condition).
292 
293    CHECKME: what other side effects would the vectorizer allow?  */
294 
295 static bool
296 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
297 		      enum vect_relevant *relevant, bool *live_p)
298 {
299   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
300   ssa_op_iter op_iter;
301   imm_use_iterator imm_iter;
302   use_operand_p use_p;
303   def_operand_p def_p;
304 
305   *relevant = vect_unused_in_scope;
306   *live_p = false;
307 
308   /* cond stmt other than loop exit cond.  */
309   if (is_ctrl_stmt (stmt_info->stmt)
310       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
311     *relevant = vect_used_in_scope;
312 
313   /* changing memory.  */
314   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
315     if (gimple_vdef (stmt_info->stmt)
316 	&& !gimple_clobber_p (stmt_info->stmt))
317       {
318 	if (dump_enabled_p ())
319 	  dump_printf_loc (MSG_NOTE, vect_location,
320                            "vec_stmt_relevant_p: stmt has vdefs.\n");
321 	*relevant = vect_used_in_scope;
322       }
323 
324   /* uses outside the loop.  */
325   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
326     {
327       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
328 	{
329 	  basic_block bb = gimple_bb (USE_STMT (use_p));
330 	  if (!flow_bb_inside_loop_p (loop, bb))
331 	    {
332 	      if (dump_enabled_p ())
333 		dump_printf_loc (MSG_NOTE, vect_location,
334                                  "vec_stmt_relevant_p: used out of loop.\n");
335 
336 	      if (is_gimple_debug (USE_STMT (use_p)))
337 		continue;
338 
339 	      /* We expect all such uses to be in the loop exit phis
340 		 (because of loop closed form)   */
341 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 	      gcc_assert (bb == single_exit (loop)->dest);
343 
344               *live_p = true;
345 	    }
346 	}
347     }
348 
349   if (*live_p && *relevant == vect_unused_in_scope
350       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
351     {
352       if (dump_enabled_p ())
353 	dump_printf_loc (MSG_NOTE, vect_location,
354 			 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355       *relevant = vect_used_only_live;
356     }
357 
358   return (*live_p || *relevant);
359 }
360 
361 
362 /* Function exist_non_indexing_operands_for_use_p
363 
364    USE is one of the uses attached to STMT_INFO.  Check if USE is
365    used in STMT_INFO for anything other than indexing an array.  */
366 
367 static bool
368 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
369 {
370   tree operand;
371 
372   /* USE corresponds to some operand in STMT.  If there is no data
373      reference in STMT, then any operand that corresponds to USE
374      is not indexing an array.  */
375   if (!STMT_VINFO_DATA_REF (stmt_info))
376     return true;
377 
378   /* STMT has a data_ref. FORNOW this means that its of one of
379      the following forms:
380      -1- ARRAY_REF = var
381      -2- var = ARRAY_REF
382      (This should have been verified in analyze_data_refs).
383 
384      'var' in the second case corresponds to a def, not a use,
385      so USE cannot correspond to any operands that are not used
386      for array indexing.
387 
388      Therefore, all we need to check is if STMT falls into the
389      first case, and whether var corresponds to USE.  */
390 
391   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
392   if (!assign || !gimple_assign_copy_p (assign))
393     {
394       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
395       if (call && gimple_call_internal_p (call))
396 	{
397 	  internal_fn ifn = gimple_call_internal_fn (call);
398 	  int mask_index = internal_fn_mask_index (ifn);
399 	  if (mask_index >= 0
400 	      && use == gimple_call_arg (call, mask_index))
401 	    return true;
402 	  int stored_value_index = internal_fn_stored_value_index (ifn);
403 	  if (stored_value_index >= 0
404 	      && use == gimple_call_arg (call, stored_value_index))
405 	    return true;
406 	  if (internal_gather_scatter_fn_p (ifn)
407 	      && use == gimple_call_arg (call, 1))
408 	    return true;
409 	}
410       return false;
411     }
412 
413   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
414     return false;
415   operand = gimple_assign_rhs1 (assign);
416   if (TREE_CODE (operand) != SSA_NAME)
417     return false;
418 
419   if (operand == use)
420     return true;
421 
422   return false;
423 }
424 
425 
426 /*
427    Function process_use.
428 
429    Inputs:
430    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
431    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
432      that defined USE.  This is done by calling mark_relevant and passing it
433      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
434    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435      be performed.
436 
437    Outputs:
438    Generally, LIVE_P and RELEVANT are used to define the liveness and
439    relevance info of the DEF_STMT of this USE:
440        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
441        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
442    Exceptions:
443    - case 1: If USE is used only for address computations (e.g. array indexing),
444    which does not need to be directly vectorized, then the liveness/relevance
445    of the respective DEF_STMT is left unchanged.
446    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
447    we skip DEF_STMT cause it had already been processed.
448    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
449    "relevant" will be modified accordingly.
450 
451    Return true if everything is as expected. Return false otherwise.  */
452 
453 static opt_result
454 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
455 	     enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
456 	     bool force)
457 {
458   stmt_vec_info dstmt_vinfo;
459   basic_block bb, def_bb;
460   enum vect_def_type dt;
461 
462   /* case 1: we are only interested in uses that need to be vectorized.  Uses
463      that are used for address computation are not considered relevant.  */
464   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465     return opt_result::success ();
466 
467   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468     return opt_result::failure_at (stmt_vinfo->stmt,
469 				   "not vectorized:"
470 				   " unsupported use in stmt.\n");
471 
472   if (!dstmt_vinfo)
473     return opt_result::success ();
474 
475   def_bb = gimple_bb (dstmt_vinfo->stmt);
476 
477   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
478      DSTMT_VINFO must have already been processed, because this should be the
479      only way that STMT, which is a reduction-phi, was put in the worklist,
480      as there should be no other uses for DSTMT_VINFO in the loop.  So we just
481      check that everything is as expected, and we are done.  */
482   bb = gimple_bb (stmt_vinfo->stmt);
483   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487       && bb->loop_father == def_bb->loop_father)
488     {
489       if (dump_enabled_p ())
490 	dump_printf_loc (MSG_NOTE, vect_location,
491                          "reduc-stmt defining reduc-phi in the same nest.\n");
492       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
493       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
494 		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
495       return opt_result::success ();
496     }
497 
498   /* case 3a: outer-loop stmt defining an inner-loop stmt:
499 	outer-loop-header-bb:
500 		d = dstmt_vinfo
501 	inner-loop:
502 		stmt # use (d)
503 	outer-loop-tail-bb:
504 		...		  */
505   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
506     {
507       if (dump_enabled_p ())
508 	dump_printf_loc (MSG_NOTE, vect_location,
509                          "outer-loop def-stmt defining inner-loop stmt.\n");
510 
511       switch (relevant)
512 	{
513 	case vect_unused_in_scope:
514 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
515 		      vect_used_in_scope : vect_unused_in_scope;
516 	  break;
517 
518 	case vect_used_in_outer_by_reduction:
519           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
520 	  relevant = vect_used_by_reduction;
521 	  break;
522 
523 	case vect_used_in_outer:
524           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 	  relevant = vect_used_in_scope;
526 	  break;
527 
528 	case vect_used_in_scope:
529 	  break;
530 
531 	default:
532 	  gcc_unreachable ();
533 	}
534     }
535 
536   /* case 3b: inner-loop stmt defining an outer-loop stmt:
537 	outer-loop-header-bb:
538 		...
539 	inner-loop:
540 		d = dstmt_vinfo
541 	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
542 		stmt # use (d)		*/
543   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
544     {
545       if (dump_enabled_p ())
546 	dump_printf_loc (MSG_NOTE, vect_location,
547                          "inner-loop def-stmt defining outer-loop stmt.\n");
548 
549       switch (relevant)
550         {
551         case vect_unused_in_scope:
552           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
553             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
554                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
555           break;
556 
557         case vect_used_by_reduction:
558 	case vect_used_only_live:
559           relevant = vect_used_in_outer_by_reduction;
560           break;
561 
562         case vect_used_in_scope:
563           relevant = vect_used_in_outer;
564           break;
565 
566         default:
567           gcc_unreachable ();
568         }
569     }
570   /* We are also not interested in uses on loop PHI backedges that are
571      inductions.  Otherwise we'll needlessly vectorize the IV increment
572      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
573      of course.  */
574   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
575 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
576 	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
577 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
578 				      loop_latch_edge (bb->loop_father))
579 	       == use))
580     {
581       if (dump_enabled_p ())
582 	dump_printf_loc (MSG_NOTE, vect_location,
583                          "induction value on backedge.\n");
584       return opt_result::success ();
585     }
586 
587 
588   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
589   return opt_result::success ();
590 }
591 
592 
593 /* Function vect_mark_stmts_to_be_vectorized.
594 
595    Not all stmts in the loop need to be vectorized. For example:
596 
597      for i...
598        for j...
599    1.    T0 = i + j
600    2.	 T1 = a[T0]
601 
602    3.    j = j + 1
603 
604    Stmt 1 and 3 do not need to be vectorized, because loop control and
605    addressing of vectorized data-refs are handled differently.
606 
607    This pass detects such stmts.  */
608 
609 opt_result
610 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
611 {
612   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
613   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
614   unsigned int nbbs = loop->num_nodes;
615   gimple_stmt_iterator si;
616   unsigned int i;
617   basic_block bb;
618   bool live_p;
619   enum vect_relevant relevant;
620 
621   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
622 
623   auto_vec<stmt_vec_info, 64> worklist;
624 
625   /* 1. Init worklist.  */
626   for (i = 0; i < nbbs; i++)
627     {
628       bb = bbs[i];
629       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
630 	{
631 	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
632 	  if (dump_enabled_p ())
633 	    dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
634 			     phi_info->stmt);
635 
636 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
637 	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
638 	}
639       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
640 	{
641 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 	  if (dump_enabled_p ())
643 	      dump_printf_loc (MSG_NOTE, vect_location,
644 			       "init: stmt relevant? %G", stmt_info->stmt);
645 
646 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 	}
649     }
650 
651   /* 2. Process_worklist */
652   while (worklist.length () > 0)
653     {
654       use_operand_p use_p;
655       ssa_op_iter iter;
656 
657       stmt_vec_info stmt_vinfo = worklist.pop ();
658       if (dump_enabled_p ())
659 	dump_printf_loc (MSG_NOTE, vect_location,
660 			 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661 
662       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 	 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 	 of STMT.  */
665       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666 
667       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 	 propagated as is to the DEF_STMTs of its USEs.
669 
670 	 One exception is when STMT has been identified as defining a reduction
671 	 variable; in this case we set the relevance to vect_used_by_reduction.
672 	 This is because we distinguish between two kinds of relevant stmts -
673 	 those that are used by a reduction computation, and those that are
674 	 (also) used by a regular computation.  This allows us later on to
675 	 identify stmts that are used solely by a reduction, and therefore the
676 	 order of the results that they produce does not have to be kept.  */
677 
678       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679         {
680           case vect_reduction_def:
681 	    gcc_assert (relevant != vect_unused_in_scope);
682 	    if (relevant != vect_unused_in_scope
683 		&& relevant != vect_used_in_scope
684 		&& relevant != vect_used_by_reduction
685 		&& relevant != vect_used_only_live)
686 	      return opt_result::failure_at
687 		(stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 	    break;
689 
690           case vect_nested_cycle:
691 	    if (relevant != vect_unused_in_scope
692 		&& relevant != vect_used_in_outer_by_reduction
693 		&& relevant != vect_used_in_outer)
694 	      return opt_result::failure_at
695 		(stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696             break;
697 
698           case vect_double_reduction_def:
699 	    if (relevant != vect_unused_in_scope
700 		&& relevant != vect_used_by_reduction
701 		&& relevant != vect_used_only_live)
702 	      return opt_result::failure_at
703 		(stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704             break;
705 
706           default:
707             break;
708         }
709 
710       if (is_pattern_stmt_p (stmt_vinfo))
711         {
712           /* Pattern statements are not inserted into the code, so
713              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714              have to scan the RHS or function arguments instead.  */
715 	  if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 	    {
717 	      enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 	      tree op = gimple_assign_rhs1 (assign);
719 
720 	      i = 1;
721 	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 		{
723 		  opt_result res
724 		    = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 				   loop_vinfo, relevant, &worklist, false);
726 		  if (!res)
727 		    return res;
728 		  res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 				     loop_vinfo, relevant, &worklist, false);
730 		  if (!res)
731 		    return res;
732 		  i = 2;
733 		}
734 	      for (; i < gimple_num_ops (assign); i++)
735 		{
736 		  op = gimple_op (assign, i);
737                   if (TREE_CODE (op) == SSA_NAME)
738 		    {
739 		      opt_result res
740 			= process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 				       &worklist, false);
742 		      if (!res)
743 			return res;
744 		    }
745                  }
746             }
747 	  else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 	    {
749 	      for (i = 0; i < gimple_call_num_args (call); i++)
750 		{
751 		  tree arg = gimple_call_arg (call, i);
752 		  opt_result res
753 		    = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 				   &worklist, false);
755 		  if (!res)
756 		    return res;
757 		}
758 	    }
759         }
760       else
761 	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762           {
763             tree op = USE_FROM_PTR (use_p);
764 	    opt_result res
765 	      = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 			     &worklist, false);
767 	    if (!res)
768 	      return res;
769           }
770 
771       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 	{
773 	  gather_scatter_info gs_info;
774 	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 	    gcc_unreachable ();
776 	  opt_result res
777 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 			   &worklist, true);
779 	  if (!res)
780 	    return res;
781 	}
782     } /* while worklist */
783 
784   return opt_result::success ();
785 }
786 
787 /* Compute the prologue cost for invariant or constant operands.  */
788 
789 static unsigned
790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 			       unsigned opno, enum vect_def_type dt,
792 			       stmt_vector_for_cost *cost_vec)
793 {
794   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
795   tree op = gimple_op (stmt, opno);
796   unsigned prologue_cost = 0;
797 
798   /* Without looking at the actual initializer a vector of
799      constants can be implemented as load from the constant pool.
800      When all elements are the same we can use a splat.  */
801   tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
802   unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
803   unsigned num_vects_to_check;
804   unsigned HOST_WIDE_INT const_nunits;
805   unsigned nelt_limit;
806   if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
807       && ! multiple_p (const_nunits, group_size))
808     {
809       num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
810       nelt_limit = const_nunits;
811     }
812   else
813     {
814       /* If either the vector has variable length or the vectors
815 	 are composed of repeated whole groups we only need to
816 	 cost construction once.  All vectors will be the same.  */
817       num_vects_to_check = 1;
818       nelt_limit = group_size;
819     }
820   tree elt = NULL_TREE;
821   unsigned nelt = 0;
822   for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
823     {
824       unsigned si = j % group_size;
825       if (nelt == 0)
826 	elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
827       /* ???  We're just tracking whether all operands of a single
828 	 vector initializer are the same, ideally we'd check if
829 	 we emitted the same one already.  */
830       else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
831 				 opno))
832 	elt = NULL_TREE;
833       nelt++;
834       if (nelt == nelt_limit)
835 	{
836 	  /* ???  We need to pass down stmt_info for a vector type
837 	     even if it points to the wrong stmt.  */
838 	  prologue_cost += record_stmt_cost
839 	      (cost_vec, 1,
840 	       dt == vect_external_def
841 	       ? (elt ? scalar_to_vec : vec_construct)
842 	       : vector_load,
843 	       stmt_info, 0, vect_prologue);
844 	  nelt = 0;
845 	}
846     }
847 
848   return prologue_cost;
849 }
850 
851 /* Function vect_model_simple_cost.
852 
853    Models cost for simple operations, i.e. those that only emit ncopies of a
854    single op.  Right now, this does not account for multiple insns that could
855    be generated for the single vector op.  We will handle that shortly.  */
856 
857 static void
858 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
859 			enum vect_def_type *dt,
860 			int ndts,
861 			slp_tree node,
862 			stmt_vector_for_cost *cost_vec)
863 {
864   int inside_cost = 0, prologue_cost = 0;
865 
866   gcc_assert (cost_vec != NULL);
867 
868   /* ???  Somehow we need to fix this at the callers.  */
869   if (node)
870     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
871 
872   if (node)
873     {
874       /* Scan operands and account for prologue cost of constants/externals.
875 	 ???  This over-estimates cost for multiple uses and should be
876 	 re-engineered.  */
877       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
878       tree lhs = gimple_get_lhs (stmt);
879       for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
880 	{
881 	  tree op = gimple_op (stmt, i);
882 	  enum vect_def_type dt;
883 	  if (!op || op == lhs)
884 	    continue;
885 	  if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
886 	      && (dt == vect_constant_def || dt == vect_external_def))
887 	    prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
888 							    i, dt, cost_vec);
889 	}
890     }
891   else
892     /* Cost the "broadcast" of a scalar operand in to a vector operand.
893        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
894        cost model.  */
895     for (int i = 0; i < ndts; i++)
896       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
898 					   stmt_info, 0, vect_prologue);
899 
900   /* Adjust for two-operator SLP nodes.  */
901   if (node && SLP_TREE_TWO_OPERATORS (node))
902     {
903       ncopies *= 2;
904       inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
905 				       stmt_info, 0, vect_body);
906     }
907 
908   /* Pass the inside-of-loop statements to the target-specific cost model.  */
909   inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
910 				   stmt_info, 0, vect_body);
911 
912   if (dump_enabled_p ())
913     dump_printf_loc (MSG_NOTE, vect_location,
914                      "vect_model_simple_cost: inside_cost = %d, "
915                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
916 }
917 
918 
919 /* Model cost for type demotion and promotion operations.  PWR is normally
920    zero for single-step promotions and demotions.  It will be one if
921    two-step promotion/demotion is required, and so on.  Each additional
922    step doubles the number of instructions required.  */
923 
924 static void
925 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
926 				    enum vect_def_type *dt, int pwr,
927 				    stmt_vector_for_cost *cost_vec)
928 {
929   int i, tmp;
930   int inside_cost = 0, prologue_cost = 0;
931 
932   for (i = 0; i < pwr + 1; i++)
933     {
934       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
935 	(i + 1) : i;
936       inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
937 				       vec_promote_demote, stmt_info, 0,
938 				       vect_body);
939     }
940 
941   /* FORNOW: Assuming maximum 2 args per stmts.  */
942   for (i = 0; i < 2; i++)
943     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
944       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
945 					 stmt_info, 0, vect_prologue);
946 
947   if (dump_enabled_p ())
948     dump_printf_loc (MSG_NOTE, vect_location,
949                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
950                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 }
952 
953 /* Returns true if the current function returns DECL.  */
954 
955 static bool
956 cfun_returns (tree decl)
957 {
958   edge_iterator ei;
959   edge e;
960   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
961     {
962       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
963       if (!ret)
964 	continue;
965       if (gimple_return_retval (ret) == decl)
966 	return true;
967       /* We often end up with an aggregate copy to the result decl,
968          handle that case as well.  First skip intermediate clobbers
969 	 though.  */
970       gimple *def = ret;
971       do
972 	{
973 	  def = SSA_NAME_DEF_STMT (gimple_vuse (def));
974 	}
975       while (gimple_clobber_p (def));
976       if (is_a <gassign *> (def)
977 	  && gimple_assign_lhs (def) == gimple_return_retval (ret)
978 	  && gimple_assign_rhs1 (def) == decl)
979 	return true;
980     }
981   return false;
982 }
983 
984 /* Function vect_model_store_cost
985 
986    Models cost for stores.  In the case of grouped accesses, one access
987    has the overhead of the grouped access attributed to it.  */
988 
989 static void
990 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
991 		       enum vect_def_type dt,
992 		       vect_memory_access_type memory_access_type,
993 		       vec_load_store_type vls_type, slp_tree slp_node,
994 		       stmt_vector_for_cost *cost_vec)
995 {
996   unsigned int inside_cost = 0, prologue_cost = 0;
997   stmt_vec_info first_stmt_info = stmt_info;
998   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
999 
1000   /* ???  Somehow we need to fix this at the callers.  */
1001   if (slp_node)
1002     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1003 
1004   if (vls_type == VLS_STORE_INVARIANT)
1005     {
1006       if (slp_node)
1007 	prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1008 							1, dt, cost_vec);
1009       else
1010 	prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1011 					   stmt_info, 0, vect_prologue);
1012     }
1013 
1014   /* Grouped stores update all elements in the group at once,
1015      so we want the DR for the first statement.  */
1016   if (!slp_node && grouped_access_p)
1017     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1018 
1019   /* True if we should include any once-per-group costs as well as
1020      the cost of the statement itself.  For SLP we only get called
1021      once per group anyhow.  */
1022   bool first_stmt_p = (first_stmt_info == stmt_info);
1023 
1024   /* We assume that the cost of a single store-lanes instruction is
1025      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
1026      access is instead being provided by a permute-and-store operation,
1027      include the cost of the permutes.  */
1028   if (first_stmt_p
1029       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1030     {
1031       /* Uses a high and low interleave or shuffle operations for each
1032 	 needed permute.  */
1033       int group_size = DR_GROUP_SIZE (first_stmt_info);
1034       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1035       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1036 				      stmt_info, 0, vect_body);
1037 
1038       if (dump_enabled_p ())
1039         dump_printf_loc (MSG_NOTE, vect_location,
1040                          "vect_model_store_cost: strided group_size = %d .\n",
1041                          group_size);
1042     }
1043 
1044   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1045   /* Costs of the stores.  */
1046   if (memory_access_type == VMAT_ELEMENTWISE
1047       || memory_access_type == VMAT_GATHER_SCATTER)
1048     {
1049       /* N scalar stores plus extracting the elements.  */
1050       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1051       inside_cost += record_stmt_cost (cost_vec,
1052 				       ncopies * assumed_nunits,
1053 				       scalar_store, stmt_info, 0, vect_body);
1054     }
1055   else
1056     vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1057 
1058   if (memory_access_type == VMAT_ELEMENTWISE
1059       || memory_access_type == VMAT_STRIDED_SLP)
1060     {
1061       /* N scalar stores plus extracting the elements.  */
1062       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1063       inside_cost += record_stmt_cost (cost_vec,
1064 				       ncopies * assumed_nunits,
1065 				       vec_to_scalar, stmt_info, 0, vect_body);
1066     }
1067 
1068   /* When vectorizing a store into the function result assign
1069      a penalty if the function returns in a multi-register location.
1070      In this case we assume we'll end up with having to spill the
1071      vector result and do piecewise loads as a conservative estimate.  */
1072   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1073   if (base
1074       && (TREE_CODE (base) == RESULT_DECL
1075 	  || (DECL_P (base) && cfun_returns (base)))
1076       && !aggregate_value_p (base, cfun->decl))
1077     {
1078       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1079       /* ???  Handle PARALLEL in some way.  */
1080       if (REG_P (reg))
1081 	{
1082 	  int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1083 	  /* Assume that a single reg-reg move is possible and cheap,
1084 	     do not account for vector to gp register move cost.  */
1085 	  if (nregs > 1)
1086 	    {
1087 	      /* Spill.  */
1088 	      prologue_cost += record_stmt_cost (cost_vec, ncopies,
1089 						 vector_store,
1090 						 stmt_info, 0, vect_epilogue);
1091 	      /* Loads.  */
1092 	      prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1093 						 scalar_load,
1094 						 stmt_info, 0, vect_epilogue);
1095 	    }
1096 	}
1097     }
1098 
1099   if (dump_enabled_p ())
1100     dump_printf_loc (MSG_NOTE, vect_location,
1101                      "vect_model_store_cost: inside_cost = %d, "
1102                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1103 }
1104 
1105 
1106 /* Calculate cost of DR's memory access.  */
1107 void
1108 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1109 		     unsigned int *inside_cost,
1110 		     stmt_vector_for_cost *body_cost_vec)
1111 {
1112   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1113   int alignment_support_scheme
1114     = vect_supportable_dr_alignment (dr_info, false);
1115 
1116   switch (alignment_support_scheme)
1117     {
1118     case dr_aligned:
1119       {
1120 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1121 					  vector_store, stmt_info, 0,
1122 					  vect_body);
1123 
1124         if (dump_enabled_p ())
1125           dump_printf_loc (MSG_NOTE, vect_location,
1126                            "vect_model_store_cost: aligned.\n");
1127         break;
1128       }
1129 
1130     case dr_unaligned_supported:
1131       {
1132         /* Here, we assign an additional cost for the unaligned store.  */
1133 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1134 					  unaligned_store, stmt_info,
1135 					  DR_MISALIGNMENT (dr_info),
1136 					  vect_body);
1137         if (dump_enabled_p ())
1138           dump_printf_loc (MSG_NOTE, vect_location,
1139                            "vect_model_store_cost: unaligned supported by "
1140                            "hardware.\n");
1141         break;
1142       }
1143 
1144     case dr_unaligned_unsupported:
1145       {
1146         *inside_cost = VECT_MAX_COST;
1147 
1148         if (dump_enabled_p ())
1149           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1150                            "vect_model_store_cost: unsupported access.\n");
1151         break;
1152       }
1153 
1154     default:
1155       gcc_unreachable ();
1156     }
1157 }
1158 
1159 
1160 /* Function vect_model_load_cost
1161 
1162    Models cost for loads.  In the case of grouped accesses, one access has
1163    the overhead of the grouped access attributed to it.  Since unaligned
1164    accesses are supported for loads, we also account for the costs of the
1165    access scheme chosen.  */
1166 
1167 static void
1168 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1169 		      vect_memory_access_type memory_access_type,
1170 		      slp_instance instance,
1171 		      slp_tree slp_node,
1172 		      stmt_vector_for_cost *cost_vec)
1173 {
1174   unsigned int inside_cost = 0, prologue_cost = 0;
1175   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1176 
1177   gcc_assert (cost_vec);
1178 
1179   /* ???  Somehow we need to fix this at the callers.  */
1180   if (slp_node)
1181     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1182 
1183   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1184     {
1185       /* If the load is permuted then the alignment is determined by
1186 	 the first group element not by the first scalar stmt DR.  */
1187       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1188       /* Record the cost for the permutation.  */
1189       unsigned n_perms;
1190       unsigned assumed_nunits
1191 	= vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1192       unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1193       vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1194 				    slp_vf, instance, true,
1195 				    &n_perms);
1196       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1197 				       first_stmt_info, 0, vect_body);
1198       /* And adjust the number of loads performed.  This handles
1199 	 redundancies as well as loads that are later dead.  */
1200       auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1201       bitmap_clear (perm);
1202       for (unsigned i = 0;
1203 	   i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1204 	bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1205       ncopies = 0;
1206       bool load_seen = false;
1207       for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1208 	{
1209 	  if (i % assumed_nunits == 0)
1210 	    {
1211 	      if (load_seen)
1212 		ncopies++;
1213 	      load_seen = false;
1214 	    }
1215 	  if (bitmap_bit_p (perm, i))
1216 	    load_seen = true;
1217 	}
1218       if (load_seen)
1219 	ncopies++;
1220       gcc_assert (ncopies
1221 		  <= (DR_GROUP_SIZE (first_stmt_info)
1222 		      - DR_GROUP_GAP (first_stmt_info)
1223 		      + assumed_nunits - 1) / assumed_nunits);
1224     }
1225 
1226   /* Grouped loads read all elements in the group at once,
1227      so we want the DR for the first statement.  */
1228   stmt_vec_info first_stmt_info = stmt_info;
1229   if (!slp_node && grouped_access_p)
1230     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1231 
1232   /* True if we should include any once-per-group costs as well as
1233      the cost of the statement itself.  For SLP we only get called
1234      once per group anyhow.  */
1235   bool first_stmt_p = (first_stmt_info == stmt_info);
1236 
1237   /* We assume that the cost of a single load-lanes instruction is
1238      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
1239      access is instead being provided by a load-and-permute operation,
1240      include the cost of the permutes.  */
1241   if (first_stmt_p
1242       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1243     {
1244       /* Uses an even and odd extract operations or shuffle operations
1245 	 for each needed permute.  */
1246       int group_size = DR_GROUP_SIZE (first_stmt_info);
1247       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1248       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1249 				       stmt_info, 0, vect_body);
1250 
1251       if (dump_enabled_p ())
1252         dump_printf_loc (MSG_NOTE, vect_location,
1253                          "vect_model_load_cost: strided group_size = %d .\n",
1254                          group_size);
1255     }
1256 
1257   /* The loads themselves.  */
1258   if (memory_access_type == VMAT_ELEMENTWISE
1259       || memory_access_type == VMAT_GATHER_SCATTER)
1260     {
1261       /* N scalar loads plus gathering them into a vector.  */
1262       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1263       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1264       inside_cost += record_stmt_cost (cost_vec,
1265 				       ncopies * assumed_nunits,
1266 				       scalar_load, stmt_info, 0, vect_body);
1267     }
1268   else
1269     vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1270 			&inside_cost, &prologue_cost,
1271 			cost_vec, cost_vec, true);
1272   if (memory_access_type == VMAT_ELEMENTWISE
1273       || memory_access_type == VMAT_STRIDED_SLP)
1274     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1275 				     stmt_info, 0, vect_body);
1276 
1277   if (dump_enabled_p ())
1278     dump_printf_loc (MSG_NOTE, vect_location,
1279                      "vect_model_load_cost: inside_cost = %d, "
1280                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1281 }
1282 
1283 
1284 /* Calculate cost of DR's memory access.  */
1285 void
1286 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1287 		    bool add_realign_cost, unsigned int *inside_cost,
1288 		    unsigned int *prologue_cost,
1289 		    stmt_vector_for_cost *prologue_cost_vec,
1290 		    stmt_vector_for_cost *body_cost_vec,
1291 		    bool record_prologue_costs)
1292 {
1293   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1294   int alignment_support_scheme
1295     = vect_supportable_dr_alignment (dr_info, false);
1296 
1297   switch (alignment_support_scheme)
1298     {
1299     case dr_aligned:
1300       {
1301 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1302 					  stmt_info, 0, vect_body);
1303 
1304         if (dump_enabled_p ())
1305           dump_printf_loc (MSG_NOTE, vect_location,
1306                            "vect_model_load_cost: aligned.\n");
1307 
1308         break;
1309       }
1310     case dr_unaligned_supported:
1311       {
1312         /* Here, we assign an additional cost for the unaligned load.  */
1313 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1314 					  unaligned_load, stmt_info,
1315 					  DR_MISALIGNMENT (dr_info),
1316 					  vect_body);
1317 
1318         if (dump_enabled_p ())
1319           dump_printf_loc (MSG_NOTE, vect_location,
1320                            "vect_model_load_cost: unaligned supported by "
1321                            "hardware.\n");
1322 
1323         break;
1324       }
1325     case dr_explicit_realign:
1326       {
1327 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1328 					  vector_load, stmt_info, 0, vect_body);
1329 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1330 					  vec_perm, stmt_info, 0, vect_body);
1331 
1332         /* FIXME: If the misalignment remains fixed across the iterations of
1333            the containing loop, the following cost should be added to the
1334            prologue costs.  */
1335         if (targetm.vectorize.builtin_mask_for_load)
1336 	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1337 					    stmt_info, 0, vect_body);
1338 
1339         if (dump_enabled_p ())
1340           dump_printf_loc (MSG_NOTE, vect_location,
1341                            "vect_model_load_cost: explicit realign\n");
1342 
1343         break;
1344       }
1345     case dr_explicit_realign_optimized:
1346       {
1347         if (dump_enabled_p ())
1348           dump_printf_loc (MSG_NOTE, vect_location,
1349                            "vect_model_load_cost: unaligned software "
1350                            "pipelined.\n");
1351 
1352         /* Unaligned software pipeline has a load of an address, an initial
1353            load, and possibly a mask operation to "prime" the loop.  However,
1354            if this is an access in a group of loads, which provide grouped
1355            access, then the above cost should only be considered for one
1356            access in the group.  Inside the loop, there is a load op
1357            and a realignment op.  */
1358 
1359         if (add_realign_cost && record_prologue_costs)
1360           {
1361 	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1362 						vector_stmt, stmt_info,
1363 						0, vect_prologue);
1364             if (targetm.vectorize.builtin_mask_for_load)
1365 	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1366 						  vector_stmt, stmt_info,
1367 						  0, vect_prologue);
1368           }
1369 
1370 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1371 					  stmt_info, 0, vect_body);
1372 	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1373 					  stmt_info, 0, vect_body);
1374 
1375         if (dump_enabled_p ())
1376           dump_printf_loc (MSG_NOTE, vect_location,
1377                            "vect_model_load_cost: explicit realign optimized"
1378                            "\n");
1379 
1380         break;
1381       }
1382 
1383     case dr_unaligned_unsupported:
1384       {
1385         *inside_cost = VECT_MAX_COST;
1386 
1387         if (dump_enabled_p ())
1388           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1389                            "vect_model_load_cost: unsupported access.\n");
1390         break;
1391       }
1392 
1393     default:
1394       gcc_unreachable ();
1395     }
1396 }
1397 
1398 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1399    the loop preheader for the vectorized stmt STMT_VINFO.  */
1400 
1401 static void
1402 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1403 		    gimple_stmt_iterator *gsi)
1404 {
1405   if (gsi)
1406     vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1407   else
1408     {
1409       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1410 
1411       if (loop_vinfo)
1412         {
1413           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1414 	  basic_block new_bb;
1415 	  edge pe;
1416 
1417 	  if (nested_in_vect_loop_p (loop, stmt_vinfo))
1418 	    loop = loop->inner;
1419 
1420 	  pe = loop_preheader_edge (loop);
1421           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1422           gcc_assert (!new_bb);
1423 	}
1424       else
1425        {
1426           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1427           basic_block bb;
1428           gimple_stmt_iterator gsi_bb_start;
1429 
1430           gcc_assert (bb_vinfo);
1431           bb = BB_VINFO_BB (bb_vinfo);
1432           gsi_bb_start = gsi_after_labels (bb);
1433           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1434        }
1435     }
1436 
1437   if (dump_enabled_p ())
1438     dump_printf_loc (MSG_NOTE, vect_location,
1439 		     "created new init_stmt: %G", new_stmt);
1440 }
1441 
1442 /* Function vect_init_vector.
1443 
1444    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1445    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1446    vector type a vector with all elements equal to VAL is created first.
1447    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1448    initialization at the loop preheader.
1449    Return the DEF of INIT_STMT.
1450    It will be used in the vectorization of STMT_INFO.  */
1451 
1452 tree
1453 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1454 		  gimple_stmt_iterator *gsi)
1455 {
1456   gimple *init_stmt;
1457   tree new_temp;
1458 
1459   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1460   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1461     {
1462       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1463       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1464 	{
1465 	  /* Scalar boolean value should be transformed into
1466 	     all zeros or all ones value before building a vector.  */
1467 	  if (VECTOR_BOOLEAN_TYPE_P (type))
1468 	    {
1469 	      tree true_val = build_all_ones_cst (TREE_TYPE (type));
1470 	      tree false_val = build_zero_cst (TREE_TYPE (type));
1471 
1472 	      if (CONSTANT_CLASS_P (val))
1473 		val = integer_zerop (val) ? false_val : true_val;
1474 	      else
1475 		{
1476 		  new_temp = make_ssa_name (TREE_TYPE (type));
1477 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1478 						   val, true_val, false_val);
1479 		  vect_init_vector_1 (stmt_info, init_stmt, gsi);
1480 		  val = new_temp;
1481 		}
1482 	    }
1483 	  else
1484 	    {
1485 	      gimple_seq stmts = NULL;
1486 	      if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1487 		val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1488 				    TREE_TYPE (type), val);
1489 	      else
1490 		/* ???  Condition vectorization expects us to do
1491 		   promotion of invariant/external defs.  */
1492 		val = gimple_convert (&stmts, TREE_TYPE (type), val);
1493 	      for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1494 		   !gsi_end_p (gsi2); )
1495 		{
1496 		  init_stmt = gsi_stmt (gsi2);
1497 		  gsi_remove (&gsi2, false);
1498 		  vect_init_vector_1 (stmt_info, init_stmt, gsi);
1499 		}
1500 	    }
1501 	}
1502       val = build_vector_from_val (type, val);
1503     }
1504 
1505   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1506   init_stmt = gimple_build_assign (new_temp, val);
1507   vect_init_vector_1 (stmt_info, init_stmt, gsi);
1508   return new_temp;
1509 }
1510 
1511 /* Function vect_get_vec_def_for_operand_1.
1512 
1513    For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1514    with type DT that will be used in the vectorized stmt.  */
1515 
1516 tree
1517 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1518 				enum vect_def_type dt)
1519 {
1520   tree vec_oprnd;
1521   stmt_vec_info vec_stmt_info;
1522 
1523   switch (dt)
1524     {
1525     /* operand is a constant or a loop invariant.  */
1526     case vect_constant_def:
1527     case vect_external_def:
1528       /* Code should use vect_get_vec_def_for_operand.  */
1529       gcc_unreachable ();
1530 
1531     /* Operand is defined by a loop header phi.  In case of nested
1532        cycles we also may have uses of the backedge def.  */
1533     case vect_reduction_def:
1534     case vect_double_reduction_def:
1535     case vect_nested_cycle:
1536     case vect_induction_def:
1537       gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1538 		  || dt == vect_nested_cycle);
1539       /* Fallthru.  */
1540 
1541     /* operand is defined inside the loop.  */
1542     case vect_internal_def:
1543       {
1544         /* Get the def from the vectorized stmt.  */
1545 	vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1546 	/* Get vectorized pattern statement.  */
1547 	if (!vec_stmt_info
1548 	    && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1549 	    && !STMT_VINFO_RELEVANT (def_stmt_info))
1550 	  vec_stmt_info = (STMT_VINFO_VEC_STMT
1551 			   (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1552 	gcc_assert (vec_stmt_info);
1553 	if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1554 	  vec_oprnd = PHI_RESULT (phi);
1555 	else
1556 	  vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1557 	return vec_oprnd;
1558       }
1559 
1560     default:
1561       gcc_unreachable ();
1562     }
1563 }
1564 
1565 
1566 /* Function vect_get_vec_def_for_operand.
1567 
1568    OP is an operand in STMT_VINFO.  This function returns a (vector) def
1569    that will be used in the vectorized stmt for STMT_VINFO.
1570 
1571    In the case that OP is an SSA_NAME which is defined in the loop, then
1572    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1573 
1574    In case OP is an invariant or constant, a new stmt that creates a vector def
1575    needs to be introduced.  VECTYPE may be used to specify a required type for
1576    vector invariant.  */
1577 
1578 tree
1579 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1580 {
1581   gimple *def_stmt;
1582   enum vect_def_type dt;
1583   bool is_simple_use;
1584   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1585 
1586   if (dump_enabled_p ())
1587     dump_printf_loc (MSG_NOTE, vect_location,
1588 		     "vect_get_vec_def_for_operand: %T\n", op);
1589 
1590   stmt_vec_info def_stmt_info;
1591   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1592 				      &def_stmt_info, &def_stmt);
1593   gcc_assert (is_simple_use);
1594   if (def_stmt && dump_enabled_p ())
1595     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
1596 
1597   if (dt == vect_constant_def || dt == vect_external_def)
1598     {
1599       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1600       tree vector_type;
1601 
1602       if (vectype)
1603 	vector_type = vectype;
1604       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1605 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1606 	vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1607       else
1608 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1609 
1610       gcc_assert (vector_type);
1611       return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1612     }
1613   else
1614     return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1615 }
1616 
1617 
1618 /* Function vect_get_vec_def_for_stmt_copy
1619 
1620    Return a vector-def for an operand.  This function is used when the
1621    vectorized stmt to be created (by the caller to this function) is a "copy"
1622    created in case the vectorized result cannot fit in one vector, and several
1623    copies of the vector-stmt are required.  In this case the vector-def is
1624    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1625    of the stmt that defines VEC_OPRND.  VINFO describes the vectorization.
1626 
1627    Context:
1628         In case the vectorization factor (VF) is bigger than the number
1629    of elements that can fit in a vectype (nunits), we have to generate
1630    more than one vector stmt to vectorize the scalar stmt.  This situation
1631    arises when there are multiple data-types operated upon in the loop; the
1632    smallest data-type determines the VF, and as a result, when vectorizing
1633    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1634    vector stmt (each computing a vector of 'nunits' results, and together
1635    computing 'VF' results in each iteration).  This function is called when
1636    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1637    which VF=16 and nunits=4, so the number of copies required is 4):
1638 
1639    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1640 
1641    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1642                         VS1.1:  vx.1 = memref1      VS1.2
1643                         VS1.2:  vx.2 = memref2      VS1.3
1644                         VS1.3:  vx.3 = memref3
1645 
1646    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1647                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1648                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1649                         VSnew.3:  vz3 = vx.3 + ...
1650 
1651    The vectorization of S1 is explained in vectorizable_load.
1652    The vectorization of S2:
1653         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1654    the function 'vect_get_vec_def_for_operand' is called to
1655    get the relevant vector-def for each operand of S2.  For operand x it
1656    returns  the vector-def 'vx.0'.
1657 
1658         To create the remaining copies of the vector-stmt (VSnew.j), this
1659    function is called to get the relevant vector-def for each operand.  It is
1660    obtained from the respective VS1.j stmt, which is recorded in the
1661    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1662 
1663         For example, to obtain the vector-def 'vx.1' in order to create the
1664    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1665    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1666    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1667    and return its def ('vx.1').
1668    Overall, to create the above sequence this function will be called 3 times:
1669 	vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1670 	vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1671 	vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2);  */
1672 
1673 tree
1674 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1675 {
1676   stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1677   if (!def_stmt_info)
1678     /* Do nothing; can reuse same def.  */
1679     return vec_oprnd;
1680 
1681   def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1682   gcc_assert (def_stmt_info);
1683   if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1684     vec_oprnd = PHI_RESULT (phi);
1685   else
1686     vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1687   return vec_oprnd;
1688 }
1689 
1690 
1691 /* Get vectorized definitions for the operands to create a copy of an original
1692    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1693 
1694 void
1695 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1696 				 vec<tree> *vec_oprnds0,
1697 				 vec<tree> *vec_oprnds1)
1698 {
1699   tree vec_oprnd = vec_oprnds0->pop ();
1700 
1701   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1702   vec_oprnds0->quick_push (vec_oprnd);
1703 
1704   if (vec_oprnds1 && vec_oprnds1->length ())
1705     {
1706       vec_oprnd = vec_oprnds1->pop ();
1707       vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1708       vec_oprnds1->quick_push (vec_oprnd);
1709     }
1710 }
1711 
1712 
1713 /* Get vectorized definitions for OP0 and OP1.  */
1714 
1715 void
1716 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1717 		   vec<tree> *vec_oprnds0,
1718 		   vec<tree> *vec_oprnds1,
1719 		   slp_tree slp_node)
1720 {
1721   if (slp_node)
1722     {
1723       int nops = (op1 == NULL_TREE) ? 1 : 2;
1724       auto_vec<tree> ops (nops);
1725       auto_vec<vec<tree> > vec_defs (nops);
1726 
1727       ops.quick_push (op0);
1728       if (op1)
1729         ops.quick_push (op1);
1730 
1731       vect_get_slp_defs (ops, slp_node, &vec_defs);
1732 
1733       *vec_oprnds0 = vec_defs[0];
1734       if (op1)
1735 	*vec_oprnds1 = vec_defs[1];
1736     }
1737   else
1738     {
1739       tree vec_oprnd;
1740 
1741       vec_oprnds0->create (1);
1742       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1743       vec_oprnds0->quick_push (vec_oprnd);
1744 
1745       if (op1)
1746 	{
1747 	  vec_oprnds1->create (1);
1748 	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1749 	  vec_oprnds1->quick_push (vec_oprnd);
1750 	}
1751     }
1752 }
1753 
1754 /* Helper function called by vect_finish_replace_stmt and
1755    vect_finish_stmt_generation.  Set the location of the new
1756    statement and create and return a stmt_vec_info for it.  */
1757 
1758 static stmt_vec_info
1759 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1760 {
1761   vec_info *vinfo = stmt_info->vinfo;
1762 
1763   stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1764 
1765   if (dump_enabled_p ())
1766     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1767 
1768   gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1769 
1770   /* While EH edges will generally prevent vectorization, stmt might
1771      e.g. be in a must-not-throw region.  Ensure newly created stmts
1772      that could throw are part of the same region.  */
1773   int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1774   if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1775     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1776 
1777   return vec_stmt_info;
1778 }
1779 
1780 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1781    which sets the same scalar result as STMT_INFO did.  Create and return a
1782    stmt_vec_info for VEC_STMT.  */
1783 
1784 stmt_vec_info
1785 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1786 {
1787   gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1788 
1789   gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1790   gsi_replace (&gsi, vec_stmt, true);
1791 
1792   return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1793 }
1794 
1795 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1796    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
1797 
1798 stmt_vec_info
1799 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1800 			     gimple_stmt_iterator *gsi)
1801 {
1802   gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1803 
1804   if (!gsi_end_p (*gsi)
1805       && gimple_has_mem_ops (vec_stmt))
1806     {
1807       gimple *at_stmt = gsi_stmt (*gsi);
1808       tree vuse = gimple_vuse (at_stmt);
1809       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1810 	{
1811 	  tree vdef = gimple_vdef (at_stmt);
1812 	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1813 	  /* If we have an SSA vuse and insert a store, update virtual
1814 	     SSA form to avoid triggering the renamer.  Do so only
1815 	     if we can easily see all uses - which is what almost always
1816 	     happens with the way vectorized stmts are inserted.  */
1817 	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1818 	      && ((is_gimple_assign (vec_stmt)
1819 		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1820 		  || (is_gimple_call (vec_stmt)
1821 		      && !(gimple_call_flags (vec_stmt)
1822 			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1823 	    {
1824 	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1825 	      gimple_set_vdef (vec_stmt, new_vdef);
1826 	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1827 	    }
1828 	}
1829     }
1830   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1831   return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1832 }
1833 
1834 /* We want to vectorize a call to combined function CFN with function
1835    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1836    as the types of all inputs.  Check whether this is possible using
1837    an internal function, returning its code if so or IFN_LAST if not.  */
1838 
1839 static internal_fn
1840 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1841 				tree vectype_out, tree vectype_in)
1842 {
1843   internal_fn ifn;
1844   if (internal_fn_p (cfn))
1845     ifn = as_internal_fn (cfn);
1846   else
1847     ifn = associated_internal_fn (fndecl);
1848   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1849     {
1850       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1851       if (info.vectorizable)
1852 	{
1853 	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1854 	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1855 	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1856 					      OPTIMIZE_FOR_SPEED))
1857 	    return ifn;
1858 	}
1859     }
1860   return IFN_LAST;
1861 }
1862 
1863 
1864 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1865 				  gimple_stmt_iterator *);
1866 
1867 /* Check whether a load or store statement in the loop described by
1868    LOOP_VINFO is possible in a fully-masked loop.  This is testing
1869    whether the vectorizer pass has the appropriate support, as well as
1870    whether the target does.
1871 
1872    VLS_TYPE says whether the statement is a load or store and VECTYPE
1873    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1874    says how the load or store is going to be implemented and GROUP_SIZE
1875    is the number of load or store statements in the containing group.
1876    If the access is a gather load or scatter store, GS_INFO describes
1877    its arguments.
1878 
1879    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1880    supported, otherwise record the required mask types.  */
1881 
1882 static void
1883 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1884 			  vec_load_store_type vls_type, int group_size,
1885 			  vect_memory_access_type memory_access_type,
1886 			  gather_scatter_info *gs_info)
1887 {
1888   /* Invariant loads need no special support.  */
1889   if (memory_access_type == VMAT_INVARIANT)
1890     return;
1891 
1892   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1893   machine_mode vecmode = TYPE_MODE (vectype);
1894   bool is_load = (vls_type == VLS_LOAD);
1895   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1896     {
1897       if (is_load
1898 	  ? !vect_load_lanes_supported (vectype, group_size, true)
1899 	  : !vect_store_lanes_supported (vectype, group_size, true))
1900 	{
1901 	  if (dump_enabled_p ())
1902 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1903 			     "can't use a fully-masked loop because the"
1904 			     " target doesn't have an appropriate masked"
1905 			     " load/store-lanes instruction.\n");
1906 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1907 	  return;
1908 	}
1909       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1910       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1911       return;
1912     }
1913 
1914   if (memory_access_type == VMAT_GATHER_SCATTER)
1915     {
1916       internal_fn ifn = (is_load
1917 			 ? IFN_MASK_GATHER_LOAD
1918 			 : IFN_MASK_SCATTER_STORE);
1919       tree offset_type = TREE_TYPE (gs_info->offset);
1920       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1921 						   gs_info->memory_type,
1922 						   TYPE_SIGN (offset_type),
1923 						   gs_info->scale))
1924 	{
1925 	  if (dump_enabled_p ())
1926 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1927 			     "can't use a fully-masked loop because the"
1928 			     " target doesn't have an appropriate masked"
1929 			     " gather load or scatter store instruction.\n");
1930 	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1931 	  return;
1932 	}
1933       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1934       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1935       return;
1936     }
1937 
1938   if (memory_access_type != VMAT_CONTIGUOUS
1939       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1940     {
1941       /* Element X of the data must come from iteration i * VF + X of the
1942 	 scalar loop.  We need more work to support other mappings.  */
1943       if (dump_enabled_p ())
1944 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1945 			 "can't use a fully-masked loop because an access"
1946 			 " isn't contiguous.\n");
1947       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1948       return;
1949     }
1950 
1951   machine_mode mask_mode;
1952   if (!(targetm.vectorize.get_mask_mode
1953 	(GET_MODE_NUNITS (vecmode),
1954 	 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1955       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1956     {
1957       if (dump_enabled_p ())
1958 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1959 			 "can't use a fully-masked loop because the target"
1960 			 " doesn't have the appropriate masked load or"
1961 			 " store.\n");
1962       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1963       return;
1964     }
1965   /* We might load more scalars than we need for permuting SLP loads.
1966      We checked in get_group_load_store_type that the extra elements
1967      don't leak into a new vector.  */
1968   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1969   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1970   unsigned int nvectors;
1971   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1972     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1973   else
1974     gcc_unreachable ();
1975 }
1976 
1977 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1978    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1979    that needs to be applied to all loads and stores in a vectorized loop.
1980    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1981 
1982    MASK_TYPE is the type of both masks.  If new statements are needed,
1983    insert them before GSI.  */
1984 
1985 static tree
1986 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1987 			 gimple_stmt_iterator *gsi)
1988 {
1989   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1990   if (!loop_mask)
1991     return vec_mask;
1992 
1993   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1994   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1995   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1996 					  vec_mask, loop_mask);
1997   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1998   return and_res;
1999 }
2000 
2001 /* Determine whether we can use a gather load or scatter store to vectorize
2002    strided load or store STMT_INFO by truncating the current offset to a
2003    smaller width.  We need to be able to construct an offset vector:
2004 
2005      { 0, X, X*2, X*3, ... }
2006 
2007    without loss of precision, where X is STMT_INFO's DR_STEP.
2008 
2009    Return true if this is possible, describing the gather load or scatter
2010    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
2011 
2012 static bool
2013 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2014 				     loop_vec_info loop_vinfo, bool masked_p,
2015 				     gather_scatter_info *gs_info)
2016 {
2017   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2018   data_reference *dr = dr_info->dr;
2019   tree step = DR_STEP (dr);
2020   if (TREE_CODE (step) != INTEGER_CST)
2021     {
2022       /* ??? Perhaps we could use range information here?  */
2023       if (dump_enabled_p ())
2024 	dump_printf_loc (MSG_NOTE, vect_location,
2025 			 "cannot truncate variable step.\n");
2026       return false;
2027     }
2028 
2029   /* Get the number of bits in an element.  */
2030   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2031   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2032   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2033 
2034   /* Set COUNT to the upper limit on the number of elements - 1.
2035      Start with the maximum vectorization factor.  */
2036   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2037 
2038   /* Try lowering COUNT to the number of scalar latch iterations.  */
2039   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2040   widest_int max_iters;
2041   if (max_loop_iterations (loop, &max_iters)
2042       && max_iters < count)
2043     count = max_iters.to_shwi ();
2044 
2045   /* Try scales of 1 and the element size.  */
2046   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2047   wi::overflow_type overflow = wi::OVF_NONE;
2048   for (int i = 0; i < 2; ++i)
2049     {
2050       int scale = scales[i];
2051       widest_int factor;
2052       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2053 	continue;
2054 
2055       /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2056 	 in OFFSET_BITS bits.  */
2057       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2058       if (overflow)
2059 	continue;
2060       signop sign = range >= 0 ? UNSIGNED : SIGNED;
2061       if (wi::min_precision (range, sign) > element_bits)
2062 	{
2063 	  overflow = wi::OVF_UNKNOWN;
2064 	  continue;
2065 	}
2066 
2067       /* See whether the target supports the operation.  */
2068       tree memory_type = TREE_TYPE (DR_REF (dr));
2069       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2070 				     memory_type, element_bits, sign, scale,
2071 				     &gs_info->ifn, &gs_info->element_type))
2072 	continue;
2073 
2074       tree offset_type = build_nonstandard_integer_type (element_bits,
2075 							 sign == UNSIGNED);
2076 
2077       gs_info->decl = NULL_TREE;
2078       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2079 	 but we don't need to store that here.  */
2080       gs_info->base = NULL_TREE;
2081       gs_info->offset = fold_convert (offset_type, step);
2082       gs_info->offset_dt = vect_constant_def;
2083       gs_info->offset_vectype = NULL_TREE;
2084       gs_info->scale = scale;
2085       gs_info->memory_type = memory_type;
2086       return true;
2087     }
2088 
2089   if (overflow && dump_enabled_p ())
2090     dump_printf_loc (MSG_NOTE, vect_location,
2091 		     "truncating gather/scatter offset to %d bits"
2092 		     " might change its value.\n", element_bits);
2093 
2094   return false;
2095 }
2096 
2097 /* Return true if we can use gather/scatter internal functions to
2098    vectorize STMT_INFO, which is a grouped or strided load or store.
2099    MASKED_P is true if load or store is conditional.  When returning
2100    true, fill in GS_INFO with the information required to perform the
2101    operation.  */
2102 
2103 static bool
2104 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2105 				    loop_vec_info loop_vinfo, bool masked_p,
2106 				    gather_scatter_info *gs_info)
2107 {
2108   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2109       || gs_info->decl)
2110     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2111 						masked_p, gs_info);
2112 
2113   scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2114   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2115   tree offset_type = TREE_TYPE (gs_info->offset);
2116   unsigned int offset_bits = TYPE_PRECISION (offset_type);
2117 
2118   /* Enforced by vect_check_gather_scatter.  */
2119   gcc_assert (element_bits >= offset_bits);
2120 
2121   /* If the elements are wider than the offset, convert the offset to the
2122      same width, without changing its sign.  */
2123   if (element_bits > offset_bits)
2124     {
2125       bool unsigned_p = TYPE_UNSIGNED (offset_type);
2126       offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2127       gs_info->offset = fold_convert (offset_type, gs_info->offset);
2128     }
2129 
2130   if (dump_enabled_p ())
2131     dump_printf_loc (MSG_NOTE, vect_location,
2132 		     "using gather/scatter for strided/grouped access,"
2133 		     " scale = %d\n", gs_info->scale);
2134 
2135   return true;
2136 }
2137 
2138 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2139    elements with a known constant step.  Return -1 if that step
2140    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
2141 
2142 static int
2143 compare_step_with_zero (stmt_vec_info stmt_info)
2144 {
2145   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2146   return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2147 			       size_zero_node);
2148 }
2149 
2150 /* If the target supports a permute mask that reverses the elements in
2151    a vector of type VECTYPE, return that mask, otherwise return null.  */
2152 
2153 static tree
2154 perm_mask_for_reverse (tree vectype)
2155 {
2156   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2157 
2158   /* The encoding has a single stepped pattern.  */
2159   vec_perm_builder sel (nunits, 1, 3);
2160   for (int i = 0; i < 3; ++i)
2161     sel.quick_push (nunits - 1 - i);
2162 
2163   vec_perm_indices indices (sel, 1, nunits);
2164   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2165     return NULL_TREE;
2166   return vect_gen_perm_mask_checked (vectype, indices);
2167 }
2168 
2169 /* A subroutine of get_load_store_type, with a subset of the same
2170    arguments.  Handle the case where STMT_INFO is a load or store that
2171    accesses consecutive elements with a negative step.  */
2172 
2173 static vect_memory_access_type
2174 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2175 			      vec_load_store_type vls_type,
2176 			      unsigned int ncopies)
2177 {
2178   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2179   dr_alignment_support alignment_support_scheme;
2180 
2181   if (ncopies > 1)
2182     {
2183       if (dump_enabled_p ())
2184 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185 			 "multiple types with negative step.\n");
2186       return VMAT_ELEMENTWISE;
2187     }
2188 
2189   alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2190   if (alignment_support_scheme != dr_aligned
2191       && alignment_support_scheme != dr_unaligned_supported)
2192     {
2193       if (dump_enabled_p ())
2194 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2195 			 "negative step but alignment required.\n");
2196       return VMAT_ELEMENTWISE;
2197     }
2198 
2199   if (vls_type == VLS_STORE_INVARIANT)
2200     {
2201       if (dump_enabled_p ())
2202 	dump_printf_loc (MSG_NOTE, vect_location,
2203 			 "negative step with invariant source;"
2204 			 " no permute needed.\n");
2205       return VMAT_CONTIGUOUS_DOWN;
2206     }
2207 
2208   if (!perm_mask_for_reverse (vectype))
2209     {
2210       if (dump_enabled_p ())
2211 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2212 			 "negative step and reversing not supported.\n");
2213       return VMAT_ELEMENTWISE;
2214     }
2215 
2216   return VMAT_CONTIGUOUS_REVERSE;
2217 }
2218 
2219 /* STMT_INFO is either a masked or unconditional store.  Return the value
2220    being stored.  */
2221 
2222 tree
2223 vect_get_store_rhs (stmt_vec_info stmt_info)
2224 {
2225   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2226     {
2227       gcc_assert (gimple_assign_single_p (assign));
2228       return gimple_assign_rhs1 (assign);
2229     }
2230   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2231     {
2232       internal_fn ifn = gimple_call_internal_fn (call);
2233       int index = internal_fn_stored_value_index (ifn);
2234       gcc_assert (index >= 0);
2235       return gimple_call_arg (call, index);
2236     }
2237   gcc_unreachable ();
2238 }
2239 
2240 /* A subroutine of get_load_store_type, with a subset of the same
2241    arguments.  Handle the case where STMT_INFO is part of a grouped load
2242    or store.
2243 
2244    For stores, the statements in the group are all consecutive
2245    and there is no gap at the end.  For loads, the statements in the
2246    group might not be consecutive; there can be gaps between statements
2247    as well as at the end.  */
2248 
2249 static bool
2250 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2251 			   bool masked_p, vec_load_store_type vls_type,
2252 			   vect_memory_access_type *memory_access_type,
2253 			   gather_scatter_info *gs_info)
2254 {
2255   vec_info *vinfo = stmt_info->vinfo;
2256   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2257   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2258   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2259   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2260   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2261   bool single_element_p = (stmt_info == first_stmt_info
2262 			   && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2263   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2264   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2265 
2266   /* True if the vectorized statements would access beyond the last
2267      statement in the group.  */
2268   bool overrun_p = false;
2269 
2270   /* True if we can cope with such overrun by peeling for gaps, so that
2271      there is at least one final scalar iteration after the vector loop.  */
2272   bool can_overrun_p = (!masked_p
2273 			&& vls_type == VLS_LOAD
2274 			&& loop_vinfo
2275 			&& !loop->inner);
2276 
2277   /* There can only be a gap at the end of the group if the stride is
2278      known at compile time.  */
2279   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2280 
2281   /* Stores can't yet have gaps.  */
2282   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2283 
2284   if (slp)
2285     {
2286       if (STMT_VINFO_STRIDED_P (first_stmt_info))
2287 	{
2288 	  /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2289 	     separated by the stride, until we have a complete vector.
2290 	     Fall back to scalar accesses if that isn't possible.  */
2291 	  if (multiple_p (nunits, group_size))
2292 	    *memory_access_type = VMAT_STRIDED_SLP;
2293 	  else
2294 	    *memory_access_type = VMAT_ELEMENTWISE;
2295 	}
2296       else
2297 	{
2298 	  overrun_p = loop_vinfo && gap != 0;
2299 	  if (overrun_p && vls_type != VLS_LOAD)
2300 	    {
2301 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 			       "Grouped store with gaps requires"
2303 			       " non-consecutive accesses\n");
2304 	      return false;
2305 	    }
2306 	  /* An overrun is fine if the trailing elements are smaller
2307 	     than the alignment boundary B.  Every vector access will
2308 	     be a multiple of B and so we are guaranteed to access a
2309 	     non-gap element in the same B-sized block.  */
2310 	  if (overrun_p
2311 	      && gap < (vect_known_alignment_in_bytes (first_dr_info)
2312 			/ vect_get_scalar_dr_size (first_dr_info)))
2313 	    overrun_p = false;
2314 	  if (overrun_p && !can_overrun_p)
2315 	    {
2316 	      if (dump_enabled_p ())
2317 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2318 				 "Peeling for outer loop is not supported\n");
2319 	      return false;
2320 	    }
2321 	  int cmp = compare_step_with_zero (stmt_info);
2322 	  if (cmp < 0)
2323 	    *memory_access_type = get_negative_load_store_type
2324 	      (stmt_info, vectype, vls_type, 1);
2325 	  else
2326 	    {
2327 	      gcc_assert (!loop_vinfo || cmp > 0);
2328 	      *memory_access_type = VMAT_CONTIGUOUS;
2329 	    }
2330 	}
2331     }
2332   else
2333     {
2334       /* We can always handle this case using elementwise accesses,
2335 	 but see if something more efficient is available.  */
2336       *memory_access_type = VMAT_ELEMENTWISE;
2337 
2338       /* If there is a gap at the end of the group then these optimizations
2339 	 would access excess elements in the last iteration.  */
2340       bool would_overrun_p = (gap != 0);
2341       /* An overrun is fine if the trailing elements are smaller than the
2342 	 alignment boundary B.  Every vector access will be a multiple of B
2343 	 and so we are guaranteed to access a non-gap element in the
2344 	 same B-sized block.  */
2345       if (would_overrun_p
2346 	  && !masked_p
2347 	  && gap < (vect_known_alignment_in_bytes (first_dr_info)
2348 		    / vect_get_scalar_dr_size (first_dr_info)))
2349 	would_overrun_p = false;
2350 
2351       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2352 	  && (can_overrun_p || !would_overrun_p)
2353 	  && compare_step_with_zero (stmt_info) > 0)
2354 	{
2355 	  /* First cope with the degenerate case of a single-element
2356 	     vector.  */
2357 	  if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2358 	    *memory_access_type = VMAT_CONTIGUOUS;
2359 
2360 	  /* Otherwise try using LOAD/STORE_LANES.  */
2361 	  if (*memory_access_type == VMAT_ELEMENTWISE
2362 	      && (vls_type == VLS_LOAD
2363 		  ? vect_load_lanes_supported (vectype, group_size, masked_p)
2364 		  : vect_store_lanes_supported (vectype, group_size,
2365 						masked_p)))
2366 	    {
2367 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
2368 	      overrun_p = would_overrun_p;
2369 	    }
2370 
2371 	  /* If that fails, try using permuting loads.  */
2372 	  if (*memory_access_type == VMAT_ELEMENTWISE
2373 	      && (vls_type == VLS_LOAD
2374 		  ? vect_grouped_load_supported (vectype, single_element_p,
2375 						 group_size)
2376 		  : vect_grouped_store_supported (vectype, group_size)))
2377 	    {
2378 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2379 	      overrun_p = would_overrun_p;
2380 	    }
2381 	}
2382 
2383       /* As a last resort, trying using a gather load or scatter store.
2384 
2385 	 ??? Although the code can handle all group sizes correctly,
2386 	 it probably isn't a win to use separate strided accesses based
2387 	 on nearby locations.  Or, even if it's a win over scalar code,
2388 	 it might not be a win over vectorizing at a lower VF, if that
2389 	 allows us to use contiguous accesses.  */
2390       if (*memory_access_type == VMAT_ELEMENTWISE
2391 	  && single_element_p
2392 	  && loop_vinfo
2393 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2394 						 masked_p, gs_info))
2395 	*memory_access_type = VMAT_GATHER_SCATTER;
2396     }
2397 
2398   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2399     {
2400       /* STMT is the leader of the group. Check the operands of all the
2401 	 stmts of the group.  */
2402       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2403       while (next_stmt_info)
2404 	{
2405 	  tree op = vect_get_store_rhs (next_stmt_info);
2406 	  enum vect_def_type dt;
2407 	  if (!vect_is_simple_use (op, vinfo, &dt))
2408 	    {
2409 	      if (dump_enabled_p ())
2410 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 				 "use not simple.\n");
2412 	      return false;
2413 	    }
2414 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2415 	}
2416     }
2417 
2418   if (overrun_p)
2419     {
2420       gcc_assert (can_overrun_p);
2421       if (dump_enabled_p ())
2422 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 			 "Data access with gaps requires scalar "
2424 			 "epilogue loop\n");
2425       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2426     }
2427 
2428   return true;
2429 }
2430 
2431 /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
2432    if there is a memory access type that the vectorized form can use,
2433    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2434    or scatters, fill in GS_INFO accordingly.
2435 
2436    SLP says whether we're performing SLP rather than loop vectorization.
2437    MASKED_P is true if the statement is conditional on a vectorized mask.
2438    VECTYPE is the vector type that the vectorized statements will use.
2439    NCOPIES is the number of vector statements that will be needed.  */
2440 
2441 static bool
2442 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2443 		     bool masked_p, vec_load_store_type vls_type,
2444 		     unsigned int ncopies,
2445 		     vect_memory_access_type *memory_access_type,
2446 		     gather_scatter_info *gs_info)
2447 {
2448   vec_info *vinfo = stmt_info->vinfo;
2449   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2450   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2451   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2452     {
2453       *memory_access_type = VMAT_GATHER_SCATTER;
2454       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2455 	gcc_unreachable ();
2456       else if (!vect_is_simple_use (gs_info->offset, vinfo,
2457 				    &gs_info->offset_dt,
2458 				    &gs_info->offset_vectype))
2459 	{
2460 	  if (dump_enabled_p ())
2461 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2462 			     "%s index use not simple.\n",
2463 			     vls_type == VLS_LOAD ? "gather" : "scatter");
2464 	  return false;
2465 	}
2466     }
2467   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2468     {
2469       if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2470 				      vls_type, memory_access_type, gs_info))
2471 	return false;
2472     }
2473   else if (STMT_VINFO_STRIDED_P (stmt_info))
2474     {
2475       gcc_assert (!slp);
2476       if (loop_vinfo
2477 	  && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2478 						 masked_p, gs_info))
2479 	*memory_access_type = VMAT_GATHER_SCATTER;
2480       else
2481 	*memory_access_type = VMAT_ELEMENTWISE;
2482     }
2483   else
2484     {
2485       int cmp = compare_step_with_zero (stmt_info);
2486       if (cmp < 0)
2487 	*memory_access_type = get_negative_load_store_type
2488 	  (stmt_info, vectype, vls_type, ncopies);
2489       else if (cmp == 0)
2490 	{
2491 	  gcc_assert (vls_type == VLS_LOAD);
2492 	  *memory_access_type = VMAT_INVARIANT;
2493 	}
2494       else
2495 	*memory_access_type = VMAT_CONTIGUOUS;
2496     }
2497 
2498   if ((*memory_access_type == VMAT_ELEMENTWISE
2499        || *memory_access_type == VMAT_STRIDED_SLP)
2500       && !nunits.is_constant ())
2501     {
2502       if (dump_enabled_p ())
2503 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2504 			 "Not using elementwise accesses due to variable "
2505 			 "vectorization factor.\n");
2506       return false;
2507     }
2508 
2509   /* FIXME: At the moment the cost model seems to underestimate the
2510      cost of using elementwise accesses.  This check preserves the
2511      traditional behavior until that can be fixed.  */
2512   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2513   if (!first_stmt_info)
2514     first_stmt_info = stmt_info;
2515   if (*memory_access_type == VMAT_ELEMENTWISE
2516       && !STMT_VINFO_STRIDED_P (first_stmt_info)
2517       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2518 	   && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2519 	   && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2520     {
2521       if (dump_enabled_p ())
2522 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2523 			 "not falling back to elementwise accesses\n");
2524       return false;
2525     }
2526   return true;
2527 }
2528 
2529 /* Return true if boolean argument MASK is suitable for vectorizing
2530    conditional load or store STMT_INFO.  When returning true, store the type
2531    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2532    in *MASK_VECTYPE_OUT.  */
2533 
2534 static bool
2535 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2536 			    vect_def_type *mask_dt_out,
2537 			    tree *mask_vectype_out)
2538 {
2539   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2540     {
2541       if (dump_enabled_p ())
2542 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2543 			 "mask argument is not a boolean.\n");
2544       return false;
2545     }
2546 
2547   if (TREE_CODE (mask) != SSA_NAME)
2548     {
2549       if (dump_enabled_p ())
2550 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 			 "mask argument is not an SSA name.\n");
2552       return false;
2553     }
2554 
2555   enum vect_def_type mask_dt;
2556   tree mask_vectype;
2557   if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2558     {
2559       if (dump_enabled_p ())
2560 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 			 "mask use not simple.\n");
2562       return false;
2563     }
2564 
2565   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2566   if (!mask_vectype)
2567     mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2568 
2569   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2570     {
2571       if (dump_enabled_p ())
2572 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2573 			 "could not find an appropriate vector mask type.\n");
2574       return false;
2575     }
2576 
2577   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2578 		TYPE_VECTOR_SUBPARTS (vectype)))
2579     {
2580       if (dump_enabled_p ())
2581 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2582 			 "vector mask type %T"
2583 			 " does not match vector data type %T.\n",
2584 			 mask_vectype, vectype);
2585 
2586       return false;
2587     }
2588 
2589   *mask_dt_out = mask_dt;
2590   *mask_vectype_out = mask_vectype;
2591   return true;
2592 }
2593 
2594 /* Return true if stored value RHS is suitable for vectorizing store
2595    statement STMT_INFO.  When returning true, store the type of the
2596    definition in *RHS_DT_OUT, the type of the vectorized store value in
2597    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2598 
2599 static bool
2600 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2601 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2602 		      vec_load_store_type *vls_type_out)
2603 {
2604   /* In the case this is a store from a constant make sure
2605      native_encode_expr can handle it.  */
2606   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2607     {
2608       if (dump_enabled_p ())
2609 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2610 			 "cannot encode constant as a byte sequence.\n");
2611       return false;
2612     }
2613 
2614   enum vect_def_type rhs_dt;
2615   tree rhs_vectype;
2616   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2617     {
2618       if (dump_enabled_p ())
2619 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2620 			 "use not simple.\n");
2621       return false;
2622     }
2623 
2624   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2625   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2626     {
2627       if (dump_enabled_p ())
2628 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2629 			 "incompatible vector types.\n");
2630       return false;
2631     }
2632 
2633   *rhs_dt_out = rhs_dt;
2634   *rhs_vectype_out = rhs_vectype;
2635   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2636     *vls_type_out = VLS_STORE_INVARIANT;
2637   else
2638     *vls_type_out = VLS_STORE;
2639   return true;
2640 }
2641 
2642 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2643    Note that we support masks with floating-point type, in which case the
2644    floats are interpreted as a bitmask.  */
2645 
2646 static tree
2647 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2648 {
2649   if (TREE_CODE (masktype) == INTEGER_TYPE)
2650     return build_int_cst (masktype, -1);
2651   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2652     {
2653       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2654       mask = build_vector_from_val (masktype, mask);
2655       return vect_init_vector (stmt_info, mask, masktype, NULL);
2656     }
2657   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2658     {
2659       REAL_VALUE_TYPE r;
2660       long tmp[6];
2661       for (int j = 0; j < 6; ++j)
2662 	tmp[j] = -1;
2663       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2664       tree mask = build_real (TREE_TYPE (masktype), r);
2665       mask = build_vector_from_val (masktype, mask);
2666       return vect_init_vector (stmt_info, mask, masktype, NULL);
2667     }
2668   gcc_unreachable ();
2669 }
2670 
2671 /* Build an all-zero merge value of type VECTYPE while vectorizing
2672    STMT_INFO as a gather load.  */
2673 
2674 static tree
2675 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2676 {
2677   tree merge;
2678   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2679     merge = build_int_cst (TREE_TYPE (vectype), 0);
2680   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2681     {
2682       REAL_VALUE_TYPE r;
2683       long tmp[6];
2684       for (int j = 0; j < 6; ++j)
2685 	tmp[j] = 0;
2686       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2687       merge = build_real (TREE_TYPE (vectype), r);
2688     }
2689   else
2690     gcc_unreachable ();
2691   merge = build_vector_from_val (vectype, merge);
2692   return vect_init_vector (stmt_info, merge, vectype, NULL);
2693 }
2694 
2695 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
2696    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
2697    the gather load operation.  If the load is conditional, MASK is the
2698    unvectorized condition and MASK_DT is its definition type, otherwise
2699    MASK is null.  */
2700 
2701 static void
2702 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2703 			      gimple_stmt_iterator *gsi,
2704 			      stmt_vec_info *vec_stmt,
2705 			      gather_scatter_info *gs_info,
2706 			      tree mask)
2707 {
2708   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2709   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2710   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2711   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2712   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2713   edge pe = loop_preheader_edge (loop);
2714   enum { NARROW, NONE, WIDEN } modifier;
2715   poly_uint64 gather_off_nunits
2716     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2717 
2718   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2719   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2720   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2721   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2722   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2723   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2724   tree scaletype = TREE_VALUE (arglist);
2725   tree real_masktype = masktype;
2726   gcc_checking_assert (types_compatible_p (srctype, rettype)
2727 		       && (!mask
2728 			   || TREE_CODE (masktype) == INTEGER_TYPE
2729 			   || types_compatible_p (srctype, masktype)));
2730   if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2731     masktype = build_same_sized_truth_vector_type (srctype);
2732 
2733   tree mask_halftype = masktype;
2734   tree perm_mask = NULL_TREE;
2735   tree mask_perm_mask = NULL_TREE;
2736   if (known_eq (nunits, gather_off_nunits))
2737     modifier = NONE;
2738   else if (known_eq (nunits * 2, gather_off_nunits))
2739     {
2740       modifier = WIDEN;
2741 
2742       /* Currently widening gathers and scatters are only supported for
2743 	 fixed-length vectors.  */
2744       int count = gather_off_nunits.to_constant ();
2745       vec_perm_builder sel (count, count, 1);
2746       for (int i = 0; i < count; ++i)
2747 	sel.quick_push (i | (count / 2));
2748 
2749       vec_perm_indices indices (sel, 1, count);
2750       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2751 					      indices);
2752     }
2753   else if (known_eq (nunits, gather_off_nunits * 2))
2754     {
2755       modifier = NARROW;
2756 
2757       /* Currently narrowing gathers and scatters are only supported for
2758 	 fixed-length vectors.  */
2759       int count = nunits.to_constant ();
2760       vec_perm_builder sel (count, count, 1);
2761       sel.quick_grow (count);
2762       for (int i = 0; i < count; ++i)
2763 	sel[i] = i < count / 2 ? i : i + count / 2;
2764       vec_perm_indices indices (sel, 2, count);
2765       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2766 
2767       ncopies *= 2;
2768 
2769       if (mask && masktype == real_masktype)
2770 	{
2771 	  for (int i = 0; i < count; ++i)
2772 	    sel[i] = i | (count / 2);
2773 	  indices.new_vector (sel, 2, count);
2774 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2775 	}
2776       else if (mask)
2777 	mask_halftype
2778 	  = build_same_sized_truth_vector_type (gs_info->offset_vectype);
2779     }
2780   else
2781     gcc_unreachable ();
2782 
2783   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2784   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2785 
2786   tree ptr = fold_convert (ptrtype, gs_info->base);
2787   if (!is_gimple_min_invariant (ptr))
2788     {
2789       gimple_seq seq;
2790       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2791       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2792       gcc_assert (!new_bb);
2793     }
2794 
2795   tree scale = build_int_cst (scaletype, gs_info->scale);
2796 
2797   tree vec_oprnd0 = NULL_TREE;
2798   tree vec_mask = NULL_TREE;
2799   tree src_op = NULL_TREE;
2800   tree mask_op = NULL_TREE;
2801   tree prev_res = NULL_TREE;
2802   stmt_vec_info prev_stmt_info = NULL;
2803 
2804   if (!mask)
2805     {
2806       src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2807       mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2808     }
2809 
2810   for (int j = 0; j < ncopies; ++j)
2811     {
2812       tree op, var;
2813       if (modifier == WIDEN && (j & 1))
2814 	op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2815 				   perm_mask, stmt_info, gsi);
2816       else if (j == 0)
2817 	op = vec_oprnd0
2818 	  = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2819       else
2820 	op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2821 							  vec_oprnd0);
2822 
2823       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2824 	{
2825 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2826 				TYPE_VECTOR_SUBPARTS (idxtype)));
2827 	  var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2828 	  op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2829 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2830 	  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2831 	  op = var;
2832 	}
2833 
2834       if (mask)
2835 	{
2836 	  if (mask_perm_mask && (j & 1))
2837 	    mask_op = permute_vec_elements (mask_op, mask_op,
2838 					    mask_perm_mask, stmt_info, gsi);
2839 	  else
2840 	    {
2841 	      if (j == 0)
2842 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2843 	      else if (modifier != NARROW || (j & 1) == 0)
2844 		vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2845 							   vec_mask);
2846 
2847 	      mask_op = vec_mask;
2848 	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2849 		{
2850 		  poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2851 		  poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2852 		  gcc_assert (known_eq (sub1, sub2));
2853 		  var = vect_get_new_ssa_name (masktype, vect_simple_var);
2854 		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2855 		  gassign *new_stmt
2856 		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2857 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2858 		  mask_op = var;
2859 		}
2860 	    }
2861 	  if (modifier == NARROW && masktype != real_masktype)
2862 	    {
2863 	      var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2864 	      gassign *new_stmt
2865 		= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2866 						    : VEC_UNPACK_LO_EXPR,
2867 				       mask_op);
2868 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2869 	      mask_op = var;
2870 	    }
2871 	  src_op = mask_op;
2872 	}
2873 
2874       tree mask_arg = mask_op;
2875       if (masktype != real_masktype)
2876 	{
2877 	  tree utype, optype = TREE_TYPE (mask_op);
2878 	  if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2879 	    utype = real_masktype;
2880 	  else
2881 	    utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2882 	  var = vect_get_new_ssa_name (utype, vect_scalar_var);
2883 	  mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2884 	  gassign *new_stmt
2885 	    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2886 	  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2887 	  mask_arg = var;
2888 	  if (!useless_type_conversion_p (real_masktype, utype))
2889 	    {
2890 	      gcc_assert (TYPE_PRECISION (utype)
2891 			  <= TYPE_PRECISION (real_masktype));
2892 	      var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2893 	      new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2894 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2895 	      mask_arg = var;
2896 	    }
2897 	  src_op = build_zero_cst (srctype);
2898 	}
2899       gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2900 					   mask_arg, scale);
2901 
2902       stmt_vec_info new_stmt_info;
2903       if (!useless_type_conversion_p (vectype, rettype))
2904 	{
2905 	  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2906 				TYPE_VECTOR_SUBPARTS (rettype)));
2907 	  op = vect_get_new_ssa_name (rettype, vect_simple_var);
2908 	  gimple_call_set_lhs (new_call, op);
2909 	  vect_finish_stmt_generation (stmt_info, new_call, gsi);
2910 	  var = make_ssa_name (vec_dest);
2911 	  op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2912 	  gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2913 	  new_stmt_info
2914 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2915 	}
2916       else
2917 	{
2918 	  var = make_ssa_name (vec_dest, new_call);
2919 	  gimple_call_set_lhs (new_call, var);
2920 	  new_stmt_info
2921 	    = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2922 	}
2923 
2924       if (modifier == NARROW)
2925 	{
2926 	  if ((j & 1) == 0)
2927 	    {
2928 	      prev_res = var;
2929 	      continue;
2930 	    }
2931 	  var = permute_vec_elements (prev_res, var, perm_mask,
2932 				      stmt_info, gsi);
2933 	  new_stmt_info = loop_vinfo->lookup_def (var);
2934 	}
2935 
2936       if (prev_stmt_info == NULL)
2937 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2938       else
2939 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2940       prev_stmt_info = new_stmt_info;
2941     }
2942 }
2943 
2944 /* Prepare the base and offset in GS_INFO for vectorization.
2945    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2946    to the vectorized offset argument for the first copy of STMT_INFO.
2947    STMT_INFO is the statement described by GS_INFO and LOOP is the
2948    containing loop.  */
2949 
2950 static void
2951 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2952 			     gather_scatter_info *gs_info,
2953 			     tree *dataref_ptr, tree *vec_offset)
2954 {
2955   gimple_seq stmts = NULL;
2956   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2957   if (stmts != NULL)
2958     {
2959       basic_block new_bb;
2960       edge pe = loop_preheader_edge (loop);
2961       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2962       gcc_assert (!new_bb);
2963     }
2964   tree offset_type = TREE_TYPE (gs_info->offset);
2965   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2966   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2967 					      offset_vectype);
2968 }
2969 
2970 /* Prepare to implement a grouped or strided load or store using
2971    the gather load or scatter store operation described by GS_INFO.
2972    STMT_INFO is the load or store statement.
2973 
2974    Set *DATAREF_BUMP to the amount that should be added to the base
2975    address after each copy of the vectorized statement.  Set *VEC_OFFSET
2976    to an invariant offset vector in which element I has the value
2977    I * DR_STEP / SCALE.  */
2978 
2979 static void
2980 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2981 				 loop_vec_info loop_vinfo,
2982 				 gather_scatter_info *gs_info,
2983 				 tree *dataref_bump, tree *vec_offset)
2984 {
2985   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2986   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2987   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2988   gimple_seq stmts;
2989 
2990   tree bump = size_binop (MULT_EXPR,
2991 			  fold_convert (sizetype, DR_STEP (dr)),
2992 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2993   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2994   if (stmts)
2995     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2996 
2997   /* The offset given in GS_INFO can have pointer type, so use the element
2998      type of the vector instead.  */
2999   tree offset_type = TREE_TYPE (gs_info->offset);
3000   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
3001   offset_type = TREE_TYPE (offset_vectype);
3002 
3003   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
3004   tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
3005 			  ssize_int (gs_info->scale));
3006   step = fold_convert (offset_type, step);
3007   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3008 
3009   /* Create {0, X, X*2, X*3, ...}.  */
3010   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
3011 			      build_zero_cst (offset_type), step);
3012   if (stmts)
3013     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3014 }
3015 
3016 /* Return the amount that should be added to a vector pointer to move
3017    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
3018    being vectorized and MEMORY_ACCESS_TYPE describes the type of
3019    vectorization.  */
3020 
3021 static tree
3022 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3023 			     vect_memory_access_type memory_access_type)
3024 {
3025   if (memory_access_type == VMAT_INVARIANT)
3026     return size_zero_node;
3027 
3028   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3029   tree step = vect_dr_behavior (dr_info)->step;
3030   if (tree_int_cst_sgn (step) == -1)
3031     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3032   return iv_step;
3033 }
3034 
3035 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
3036 
3037 static bool
3038 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3039 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
3040 		    tree vectype_in, stmt_vector_for_cost *cost_vec)
3041 {
3042   tree op, vectype;
3043   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3044   vec_info *vinfo = stmt_info->vinfo;
3045   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3046   unsigned ncopies;
3047 
3048   op = gimple_call_arg (stmt, 0);
3049   vectype = STMT_VINFO_VECTYPE (stmt_info);
3050   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3051 
3052   /* Multiple types in SLP are handled by creating the appropriate number of
3053      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3054      case of SLP.  */
3055   if (slp_node)
3056     ncopies = 1;
3057   else
3058     ncopies = vect_get_num_copies (loop_vinfo, vectype);
3059 
3060   gcc_assert (ncopies >= 1);
3061 
3062   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3063   if (! char_vectype)
3064     return false;
3065 
3066   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3067   unsigned word_bytes;
3068   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3069     return false;
3070 
3071   /* The encoding uses one stepped pattern for each byte in the word.  */
3072   vec_perm_builder elts (num_bytes, word_bytes, 3);
3073   for (unsigned i = 0; i < 3; ++i)
3074     for (unsigned j = 0; j < word_bytes; ++j)
3075       elts.quick_push ((i + 1) * word_bytes - j - 1);
3076 
3077   vec_perm_indices indices (elts, 1, num_bytes);
3078   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3079     return false;
3080 
3081   if (! vec_stmt)
3082     {
3083       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3084       DUMP_VECT_SCOPE ("vectorizable_bswap");
3085       if (! slp_node)
3086 	{
3087 	  record_stmt_cost (cost_vec,
3088 			    1, vector_stmt, stmt_info, 0, vect_prologue);
3089 	  record_stmt_cost (cost_vec,
3090 			    ncopies, vec_perm, stmt_info, 0, vect_body);
3091 	}
3092       return true;
3093     }
3094 
3095   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3096 
3097   /* Transform.  */
3098   vec<tree> vec_oprnds = vNULL;
3099   stmt_vec_info new_stmt_info = NULL;
3100   stmt_vec_info prev_stmt_info = NULL;
3101   for (unsigned j = 0; j < ncopies; j++)
3102     {
3103       /* Handle uses.  */
3104       if (j == 0)
3105 	vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3106       else
3107 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3108 
3109       /* Arguments are ready. create the new vector stmt.  */
3110       unsigned i;
3111       tree vop;
3112       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3113        {
3114 	 gimple *new_stmt;
3115 	 tree tem = make_ssa_name (char_vectype);
3116 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3117 						      char_vectype, vop));
3118 	 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3119 	 tree tem2 = make_ssa_name (char_vectype);
3120 	 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3121 					 tem, tem, bswap_vconst);
3122 	 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3123 	 tem = make_ssa_name (vectype);
3124 	 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3125 						      vectype, tem2));
3126 	 new_stmt_info
3127 	   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3128          if (slp_node)
3129 	   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3130        }
3131 
3132       if (slp_node)
3133         continue;
3134 
3135       if (j == 0)
3136 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3137       else
3138 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3139 
3140       prev_stmt_info = new_stmt_info;
3141     }
3142 
3143   vec_oprnds.release ();
3144   return true;
3145 }
3146 
3147 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3148    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3149    in a single step.  On success, store the binary pack code in
3150    *CONVERT_CODE.  */
3151 
3152 static bool
3153 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3154 			  tree_code *convert_code)
3155 {
3156   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3157       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3158     return false;
3159 
3160   tree_code code;
3161   int multi_step_cvt = 0;
3162   auto_vec <tree, 8> interm_types;
3163   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3164 					&code, &multi_step_cvt,
3165 					&interm_types)
3166       || multi_step_cvt)
3167     return false;
3168 
3169   *convert_code = code;
3170   return true;
3171 }
3172 
3173 /* Function vectorizable_call.
3174 
3175    Check if STMT_INFO performs a function call that can be vectorized.
3176    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3177    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3178    Return true if STMT_INFO is vectorizable in this way.  */
3179 
3180 static bool
3181 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3182 		   stmt_vec_info *vec_stmt, slp_tree slp_node,
3183 		   stmt_vector_for_cost *cost_vec)
3184 {
3185   gcall *stmt;
3186   tree vec_dest;
3187   tree scalar_dest;
3188   tree op;
3189   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3190   stmt_vec_info prev_stmt_info;
3191   tree vectype_out, vectype_in;
3192   poly_uint64 nunits_in;
3193   poly_uint64 nunits_out;
3194   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3195   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3196   vec_info *vinfo = stmt_info->vinfo;
3197   tree fndecl, new_temp, rhs_type;
3198   enum vect_def_type dt[4]
3199     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3200 	vect_unknown_def_type };
3201   tree vectypes[ARRAY_SIZE (dt)] = {};
3202   int ndts = ARRAY_SIZE (dt);
3203   int ncopies, j;
3204   auto_vec<tree, 8> vargs;
3205   auto_vec<tree, 8> orig_vargs;
3206   enum { NARROW, NONE, WIDEN } modifier;
3207   size_t i, nargs;
3208   tree lhs;
3209 
3210   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3211     return false;
3212 
3213   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3214       && ! vec_stmt)
3215     return false;
3216 
3217   /* Is STMT_INFO a vectorizable call?   */
3218   stmt = dyn_cast <gcall *> (stmt_info->stmt);
3219   if (!stmt)
3220     return false;
3221 
3222   if (gimple_call_internal_p (stmt)
3223       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3224 	  || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3225     /* Handled by vectorizable_load and vectorizable_store.  */
3226     return false;
3227 
3228   if (gimple_call_lhs (stmt) == NULL_TREE
3229       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3230     return false;
3231 
3232   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3233 
3234   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3235 
3236   /* Process function arguments.  */
3237   rhs_type = NULL_TREE;
3238   vectype_in = NULL_TREE;
3239   nargs = gimple_call_num_args (stmt);
3240 
3241   /* Bail out if the function has more than three arguments, we do not have
3242      interesting builtin functions to vectorize with more than two arguments
3243      except for fma.  No arguments is also not good.  */
3244   if (nargs == 0 || nargs > 4)
3245     return false;
3246 
3247   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
3248   combined_fn cfn = gimple_call_combined_fn (stmt);
3249   if (cfn == CFN_GOMP_SIMD_LANE)
3250     {
3251       nargs = 0;
3252       rhs_type = unsigned_type_node;
3253     }
3254 
3255   int mask_opno = -1;
3256   if (internal_fn_p (cfn))
3257     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3258 
3259   for (i = 0; i < nargs; i++)
3260     {
3261       op = gimple_call_arg (stmt, i);
3262       if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3263 	{
3264 	  if (dump_enabled_p ())
3265 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3266 			     "use not simple.\n");
3267 	  return false;
3268 	}
3269 
3270       /* Skip the mask argument to an internal function.  This operand
3271 	 has been converted via a pattern if necessary.  */
3272       if ((int) i == mask_opno)
3273 	continue;
3274 
3275       /* We can only handle calls with arguments of the same type.  */
3276       if (rhs_type
3277 	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3278 	{
3279 	  if (dump_enabled_p ())
3280 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3281                              "argument types differ.\n");
3282 	  return false;
3283 	}
3284       if (!rhs_type)
3285 	rhs_type = TREE_TYPE (op);
3286 
3287       if (!vectype_in)
3288 	vectype_in = vectypes[i];
3289       else if (vectypes[i]
3290 	       && !types_compatible_p (vectypes[i], vectype_in))
3291 	{
3292 	  if (dump_enabled_p ())
3293 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294                              "argument vector types differ.\n");
3295 	  return false;
3296 	}
3297     }
3298   /* If all arguments are external or constant defs use a vector type with
3299      the same size as the output vector type.  */
3300   if (!vectype_in)
3301     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3302   if (vec_stmt)
3303     gcc_assert (vectype_in);
3304   if (!vectype_in)
3305     {
3306       if (dump_enabled_p ())
3307 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308 			 "no vectype for scalar type %T\n", rhs_type);
3309 
3310       return false;
3311     }
3312 
3313   /* FORNOW */
3314   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3315   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3316   if (known_eq (nunits_in * 2, nunits_out))
3317     modifier = NARROW;
3318   else if (known_eq (nunits_out, nunits_in))
3319     modifier = NONE;
3320   else if (known_eq (nunits_out * 2, nunits_in))
3321     modifier = WIDEN;
3322   else
3323     return false;
3324 
3325   /* We only handle functions that do not read or clobber memory.  */
3326   if (gimple_vuse (stmt))
3327     {
3328       if (dump_enabled_p ())
3329 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3330 			 "function reads from or writes to memory.\n");
3331       return false;
3332     }
3333 
3334   /* For now, we only vectorize functions if a target specific builtin
3335      is available.  TODO -- in some cases, it might be profitable to
3336      insert the calls for pieces of the vector, in order to be able
3337      to vectorize other operations in the loop.  */
3338   fndecl = NULL_TREE;
3339   internal_fn ifn = IFN_LAST;
3340   tree callee = gimple_call_fndecl (stmt);
3341 
3342   /* First try using an internal function.  */
3343   tree_code convert_code = ERROR_MARK;
3344   if (cfn != CFN_LAST
3345       && (modifier == NONE
3346 	  || (modifier == NARROW
3347 	      && simple_integer_narrowing (vectype_out, vectype_in,
3348 					   &convert_code))))
3349     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3350 					  vectype_in);
3351 
3352   /* If that fails, try asking for a target-specific built-in function.  */
3353   if (ifn == IFN_LAST)
3354     {
3355       if (cfn != CFN_LAST)
3356 	fndecl = targetm.vectorize.builtin_vectorized_function
3357 	  (cfn, vectype_out, vectype_in);
3358       else if (callee)
3359 	fndecl = targetm.vectorize.builtin_md_vectorized_function
3360 	  (callee, vectype_out, vectype_in);
3361     }
3362 
3363   if (ifn == IFN_LAST && !fndecl)
3364     {
3365       if (cfn == CFN_GOMP_SIMD_LANE
3366 	  && !slp_node
3367 	  && loop_vinfo
3368 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3369 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3370 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3371 	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3372 	{
3373 	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
3374 	     { 0, 1, 2, ... vf - 1 } vector.  */
3375 	  gcc_assert (nargs == 0);
3376 	}
3377       else if (modifier == NONE
3378 	       && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3379 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3380 		   || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3381 	return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3382 				   vectype_in, cost_vec);
3383       else
3384 	{
3385 	  if (dump_enabled_p ())
3386 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3387 			     "function is not vectorizable.\n");
3388 	  return false;
3389 	}
3390     }
3391 
3392   if (slp_node)
3393     ncopies = 1;
3394   else if (modifier == NARROW && ifn == IFN_LAST)
3395     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3396   else
3397     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3398 
3399   /* Sanity check: make sure that at least one copy of the vectorized stmt
3400      needs to be generated.  */
3401   gcc_assert (ncopies >= 1);
3402 
3403   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3404   if (!vec_stmt) /* transformation not required.  */
3405     {
3406       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3407       DUMP_VECT_SCOPE ("vectorizable_call");
3408       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3409       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3410 	record_stmt_cost (cost_vec, ncopies / 2,
3411 			  vec_promote_demote, stmt_info, 0, vect_body);
3412 
3413       if (loop_vinfo && mask_opno >= 0)
3414 	{
3415 	  unsigned int nvectors = (slp_node
3416 				   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3417 				   : ncopies);
3418 	  vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3419 	}
3420       return true;
3421     }
3422 
3423   /* Transform.  */
3424 
3425   if (dump_enabled_p ())
3426     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3427 
3428   /* Handle def.  */
3429   scalar_dest = gimple_call_lhs (stmt);
3430   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3431 
3432   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3433 
3434   stmt_vec_info new_stmt_info = NULL;
3435   prev_stmt_info = NULL;
3436   if (modifier == NONE || ifn != IFN_LAST)
3437     {
3438       tree prev_res = NULL_TREE;
3439       vargs.safe_grow (nargs);
3440       orig_vargs.safe_grow (nargs);
3441       for (j = 0; j < ncopies; ++j)
3442 	{
3443 	  /* Build argument list for the vectorized call.  */
3444 	  if (slp_node)
3445 	    {
3446 	      auto_vec<vec<tree> > vec_defs (nargs);
3447 	      vec<tree> vec_oprnds0;
3448 
3449 	      for (i = 0; i < nargs; i++)
3450 		vargs[i] = gimple_call_arg (stmt, i);
3451 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3452 	      vec_oprnds0 = vec_defs[0];
3453 
3454 	      /* Arguments are ready.  Create the new vector stmt.  */
3455 	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3456 		{
3457 		  size_t k;
3458 		  for (k = 0; k < nargs; k++)
3459 		    {
3460 		      vec<tree> vec_oprndsk = vec_defs[k];
3461 		      vargs[k] = vec_oprndsk[i];
3462 		    }
3463 		  if (modifier == NARROW)
3464 		    {
3465 		      /* We don't define any narrowing conditional functions
3466 			 at present.  */
3467 		      gcc_assert (mask_opno < 0);
3468 		      tree half_res = make_ssa_name (vectype_in);
3469 		      gcall *call
3470 			= gimple_build_call_internal_vec (ifn, vargs);
3471 		      gimple_call_set_lhs (call, half_res);
3472 		      gimple_call_set_nothrow (call, true);
3473 		      new_stmt_info
3474 			= vect_finish_stmt_generation (stmt_info, call, gsi);
3475 		      if ((i & 1) == 0)
3476 			{
3477 			  prev_res = half_res;
3478 			  continue;
3479 			}
3480 		      new_temp = make_ssa_name (vec_dest);
3481 		      gimple *new_stmt
3482 			= gimple_build_assign (new_temp, convert_code,
3483 					       prev_res, half_res);
3484 		      new_stmt_info
3485 			= vect_finish_stmt_generation (stmt_info, new_stmt,
3486 						       gsi);
3487 		    }
3488 		  else
3489 		    {
3490 		      if (mask_opno >= 0 && masked_loop_p)
3491 			{
3492 			  unsigned int vec_num = vec_oprnds0.length ();
3493 			  /* Always true for SLP.  */
3494 			  gcc_assert (ncopies == 1);
3495 			  tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3496 							  vectype_out, i);
3497 			  vargs[mask_opno] = prepare_load_store_mask
3498 			    (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3499 			}
3500 
3501 		      gcall *call;
3502 		      if (ifn != IFN_LAST)
3503 			call = gimple_build_call_internal_vec (ifn, vargs);
3504 		      else
3505 			call = gimple_build_call_vec (fndecl, vargs);
3506 		      new_temp = make_ssa_name (vec_dest, call);
3507 		      gimple_call_set_lhs (call, new_temp);
3508 		      gimple_call_set_nothrow (call, true);
3509 		      new_stmt_info
3510 			= vect_finish_stmt_generation (stmt_info, call, gsi);
3511 		    }
3512 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3513 		}
3514 
3515 	      for (i = 0; i < nargs; i++)
3516 		{
3517 		  vec<tree> vec_oprndsi = vec_defs[i];
3518 		  vec_oprndsi.release ();
3519 		}
3520 	      continue;
3521 	    }
3522 
3523 	  if (mask_opno >= 0 && !vectypes[mask_opno])
3524 	    {
3525 	      gcc_assert (modifier != WIDEN);
3526 	      vectypes[mask_opno]
3527 		= build_same_sized_truth_vector_type (vectype_in);
3528 	    }
3529 
3530 	  for (i = 0; i < nargs; i++)
3531 	    {
3532 	      op = gimple_call_arg (stmt, i);
3533 	      if (j == 0)
3534 		vec_oprnd0
3535 		  = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3536 	      else
3537 		vec_oprnd0
3538 		  = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3539 
3540 	      orig_vargs[i] = vargs[i] = vec_oprnd0;
3541 	    }
3542 
3543 	  if (mask_opno >= 0 && masked_loop_p)
3544 	    {
3545 	      tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3546 					      vectype_out, j);
3547 	      vargs[mask_opno]
3548 		= prepare_load_store_mask (TREE_TYPE (mask), mask,
3549 					   vargs[mask_opno], gsi);
3550 	    }
3551 
3552 	  if (cfn == CFN_GOMP_SIMD_LANE)
3553 	    {
3554 	      tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3555 	      tree new_var
3556 		= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3557 	      gimple *init_stmt = gimple_build_assign (new_var, cst);
3558 	      vect_init_vector_1 (stmt_info, init_stmt, NULL);
3559 	      new_temp = make_ssa_name (vec_dest);
3560 	      gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3561 	      new_stmt_info
3562 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3563 	    }
3564 	  else if (modifier == NARROW)
3565 	    {
3566 	      /* We don't define any narrowing conditional functions at
3567 		 present.  */
3568 	      gcc_assert (mask_opno < 0);
3569 	      tree half_res = make_ssa_name (vectype_in);
3570 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3571 	      gimple_call_set_lhs (call, half_res);
3572 	      gimple_call_set_nothrow (call, true);
3573 	      new_stmt_info
3574 		= vect_finish_stmt_generation (stmt_info, call, gsi);
3575 	      if ((j & 1) == 0)
3576 		{
3577 		  prev_res = half_res;
3578 		  continue;
3579 		}
3580 	      new_temp = make_ssa_name (vec_dest);
3581 	      gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3582 						       prev_res, half_res);
3583 	      new_stmt_info
3584 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3585 	    }
3586 	  else
3587 	    {
3588 	      gcall *call;
3589 	      if (ifn != IFN_LAST)
3590 		call = gimple_build_call_internal_vec (ifn, vargs);
3591 	      else
3592 		call = gimple_build_call_vec (fndecl, vargs);
3593 	      new_temp = make_ssa_name (vec_dest, call);
3594 	      gimple_call_set_lhs (call, new_temp);
3595 	      gimple_call_set_nothrow (call, true);
3596 	      new_stmt_info
3597 		= vect_finish_stmt_generation (stmt_info, call, gsi);
3598 	    }
3599 
3600 	  if (j == (modifier == NARROW ? 1 : 0))
3601 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3602 	  else
3603 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3604 
3605 	  prev_stmt_info = new_stmt_info;
3606 	}
3607     }
3608   else if (modifier == NARROW)
3609     {
3610       /* We don't define any narrowing conditional functions at present.  */
3611       gcc_assert (mask_opno < 0);
3612       for (j = 0; j < ncopies; ++j)
3613 	{
3614 	  /* Build argument list for the vectorized call.  */
3615 	  if (j == 0)
3616 	    vargs.create (nargs * 2);
3617 	  else
3618 	    vargs.truncate (0);
3619 
3620 	  if (slp_node)
3621 	    {
3622 	      auto_vec<vec<tree> > vec_defs (nargs);
3623 	      vec<tree> vec_oprnds0;
3624 
3625 	      for (i = 0; i < nargs; i++)
3626 		vargs.quick_push (gimple_call_arg (stmt, i));
3627 	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
3628 	      vec_oprnds0 = vec_defs[0];
3629 
3630 	      /* Arguments are ready.  Create the new vector stmt.  */
3631 	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3632 		{
3633 		  size_t k;
3634 		  vargs.truncate (0);
3635 		  for (k = 0; k < nargs; k++)
3636 		    {
3637 		      vec<tree> vec_oprndsk = vec_defs[k];
3638 		      vargs.quick_push (vec_oprndsk[i]);
3639 		      vargs.quick_push (vec_oprndsk[i + 1]);
3640 		    }
3641 		  gcall *call;
3642 		  if (ifn != IFN_LAST)
3643 		    call = gimple_build_call_internal_vec (ifn, vargs);
3644 		  else
3645 		    call = gimple_build_call_vec (fndecl, vargs);
3646 		  new_temp = make_ssa_name (vec_dest, call);
3647 		  gimple_call_set_lhs (call, new_temp);
3648 		  gimple_call_set_nothrow (call, true);
3649 		  new_stmt_info
3650 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
3651 		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3652 		}
3653 
3654 	      for (i = 0; i < nargs; i++)
3655 		{
3656 		  vec<tree> vec_oprndsi = vec_defs[i];
3657 		  vec_oprndsi.release ();
3658 		}
3659 	      continue;
3660 	    }
3661 
3662 	  for (i = 0; i < nargs; i++)
3663 	    {
3664 	      op = gimple_call_arg (stmt, i);
3665 	      if (j == 0)
3666 		{
3667 		  vec_oprnd0
3668 		    = vect_get_vec_def_for_operand (op, stmt_info,
3669 						    vectypes[i]);
3670 		  vec_oprnd1
3671 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3672 		}
3673 	      else
3674 		{
3675 		  vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3676 						2 * i + 1);
3677 		  vec_oprnd0
3678 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3679 		  vec_oprnd1
3680 		    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3681 		}
3682 
3683 	      vargs.quick_push (vec_oprnd0);
3684 	      vargs.quick_push (vec_oprnd1);
3685 	    }
3686 
3687 	  gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3688 	  new_temp = make_ssa_name (vec_dest, new_stmt);
3689 	  gimple_call_set_lhs (new_stmt, new_temp);
3690 	  new_stmt_info
3691 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3692 
3693 	  if (j == 0)
3694 	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3695 	  else
3696 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3697 
3698 	  prev_stmt_info = new_stmt_info;
3699 	}
3700 
3701       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3702     }
3703   else
3704     /* No current target implements this case.  */
3705     return false;
3706 
3707   vargs.release ();
3708 
3709   /* The call in STMT might prevent it from being removed in dce.
3710      We however cannot remove it here, due to the way the ssa name
3711      it defines is mapped to the new definition.  So just replace
3712      rhs of the statement with something harmless.  */
3713 
3714   if (slp_node)
3715     return true;
3716 
3717   stmt_info = vect_orig_stmt (stmt_info);
3718   lhs = gimple_get_lhs (stmt_info->stmt);
3719 
3720   gassign *new_stmt
3721     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3722   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3723 
3724   return true;
3725 }
3726 
3727 
3728 struct simd_call_arg_info
3729 {
3730   tree vectype;
3731   tree op;
3732   HOST_WIDE_INT linear_step;
3733   enum vect_def_type dt;
3734   unsigned int align;
3735   bool simd_lane_linear;
3736 };
3737 
3738 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3739    is linear within simd lane (but not within whole loop), note it in
3740    *ARGINFO.  */
3741 
3742 static void
3743 vect_simd_lane_linear (tree op, struct loop *loop,
3744 		       struct simd_call_arg_info *arginfo)
3745 {
3746   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3747 
3748   if (!is_gimple_assign (def_stmt)
3749       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3750       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3751     return;
3752 
3753   tree base = gimple_assign_rhs1 (def_stmt);
3754   HOST_WIDE_INT linear_step = 0;
3755   tree v = gimple_assign_rhs2 (def_stmt);
3756   while (TREE_CODE (v) == SSA_NAME)
3757     {
3758       tree t;
3759       def_stmt = SSA_NAME_DEF_STMT (v);
3760       if (is_gimple_assign (def_stmt))
3761 	switch (gimple_assign_rhs_code (def_stmt))
3762 	  {
3763 	  case PLUS_EXPR:
3764 	    t = gimple_assign_rhs2 (def_stmt);
3765 	    if (linear_step || TREE_CODE (t) != INTEGER_CST)
3766 	      return;
3767 	    base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3768 	    v = gimple_assign_rhs1 (def_stmt);
3769 	    continue;
3770 	  case MULT_EXPR:
3771 	    t = gimple_assign_rhs2 (def_stmt);
3772 	    if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3773 	      return;
3774 	    linear_step = tree_to_shwi (t);
3775 	    v = gimple_assign_rhs1 (def_stmt);
3776 	    continue;
3777 	  CASE_CONVERT:
3778 	    t = gimple_assign_rhs1 (def_stmt);
3779 	    if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3780 		|| (TYPE_PRECISION (TREE_TYPE (v))
3781 		    < TYPE_PRECISION (TREE_TYPE (t))))
3782 	      return;
3783 	    if (!linear_step)
3784 	      linear_step = 1;
3785 	    v = t;
3786 	    continue;
3787 	  default:
3788 	    return;
3789 	  }
3790       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3791 	       && loop->simduid
3792 	       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3793 	       && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3794 		   == loop->simduid))
3795 	{
3796 	  if (!linear_step)
3797 	    linear_step = 1;
3798 	  arginfo->linear_step = linear_step;
3799 	  arginfo->op = base;
3800 	  arginfo->simd_lane_linear = true;
3801 	  return;
3802 	}
3803     }
3804 }
3805 
3806 /* Return the number of elements in vector type VECTYPE, which is associated
3807    with a SIMD clone.  At present these vectors always have a constant
3808    length.  */
3809 
3810 static unsigned HOST_WIDE_INT
3811 simd_clone_subparts (tree vectype)
3812 {
3813   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3814 }
3815 
3816 /* Function vectorizable_simd_clone_call.
3817 
3818    Check if STMT_INFO performs a function call that can be vectorized
3819    by calling a simd clone of the function.
3820    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3821    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3822    Return true if STMT_INFO is vectorizable in this way.  */
3823 
3824 static bool
3825 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3826 			      gimple_stmt_iterator *gsi,
3827 			      stmt_vec_info *vec_stmt, slp_tree slp_node,
3828 			      stmt_vector_for_cost *)
3829 {
3830   tree vec_dest;
3831   tree scalar_dest;
3832   tree op, type;
3833   tree vec_oprnd0 = NULL_TREE;
3834   stmt_vec_info prev_stmt_info;
3835   tree vectype;
3836   unsigned int nunits;
3837   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3838   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3839   vec_info *vinfo = stmt_info->vinfo;
3840   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3841   tree fndecl, new_temp;
3842   int ncopies, j;
3843   auto_vec<simd_call_arg_info> arginfo;
3844   vec<tree> vargs = vNULL;
3845   size_t i, nargs;
3846   tree lhs, rtype, ratype;
3847   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3848 
3849   /* Is STMT a vectorizable call?   */
3850   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3851   if (!stmt)
3852     return false;
3853 
3854   fndecl = gimple_call_fndecl (stmt);
3855   if (fndecl == NULL_TREE)
3856     return false;
3857 
3858   struct cgraph_node *node = cgraph_node::get (fndecl);
3859   if (node == NULL || node->simd_clones == NULL)
3860     return false;
3861 
3862   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3863     return false;
3864 
3865   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3866       && ! vec_stmt)
3867     return false;
3868 
3869   if (gimple_call_lhs (stmt)
3870       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3871     return false;
3872 
3873   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3874 
3875   vectype = STMT_VINFO_VECTYPE (stmt_info);
3876 
3877   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3878     return false;
3879 
3880   /* FORNOW */
3881   if (slp_node)
3882     return false;
3883 
3884   /* Process function arguments.  */
3885   nargs = gimple_call_num_args (stmt);
3886 
3887   /* Bail out if the function has zero arguments.  */
3888   if (nargs == 0)
3889     return false;
3890 
3891   arginfo.reserve (nargs, true);
3892 
3893   for (i = 0; i < nargs; i++)
3894     {
3895       simd_call_arg_info thisarginfo;
3896       affine_iv iv;
3897 
3898       thisarginfo.linear_step = 0;
3899       thisarginfo.align = 0;
3900       thisarginfo.op = NULL_TREE;
3901       thisarginfo.simd_lane_linear = false;
3902 
3903       op = gimple_call_arg (stmt, i);
3904       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3905 			       &thisarginfo.vectype)
3906 	  || thisarginfo.dt == vect_uninitialized_def)
3907 	{
3908 	  if (dump_enabled_p ())
3909 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3910 			     "use not simple.\n");
3911 	  return false;
3912 	}
3913 
3914       if (thisarginfo.dt == vect_constant_def
3915 	  || thisarginfo.dt == vect_external_def)
3916 	gcc_assert (thisarginfo.vectype == NULL_TREE);
3917       else
3918 	{
3919 	  gcc_assert (thisarginfo.vectype != NULL_TREE);
3920 	  if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3921 	    {
3922 	      if (dump_enabled_p ())
3923 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3924 				 "vector mask arguments are not supported\n");
3925 	      return false;
3926 	    }
3927 	}
3928 
3929       /* For linear arguments, the analyze phase should have saved
3930 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3931       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3932 	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3933 	{
3934 	  gcc_assert (vec_stmt);
3935 	  thisarginfo.linear_step
3936 	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3937 	  thisarginfo.op
3938 	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3939 	  thisarginfo.simd_lane_linear
3940 	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3941 	       == boolean_true_node);
3942 	  /* If loop has been peeled for alignment, we need to adjust it.  */
3943 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3944 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3945 	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
3946 	    {
3947 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3948 	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3949 	      tree opt = TREE_TYPE (thisarginfo.op);
3950 	      bias = fold_convert (TREE_TYPE (step), bias);
3951 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3952 	      thisarginfo.op
3953 		= fold_build2 (POINTER_TYPE_P (opt)
3954 			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3955 			       thisarginfo.op, bias);
3956 	    }
3957 	}
3958       else if (!vec_stmt
3959 	       && thisarginfo.dt != vect_constant_def
3960 	       && thisarginfo.dt != vect_external_def
3961 	       && loop_vinfo
3962 	       && TREE_CODE (op) == SSA_NAME
3963 	       && simple_iv (loop, loop_containing_stmt (stmt), op,
3964 			     &iv, false)
3965 	       && tree_fits_shwi_p (iv.step))
3966 	{
3967 	  thisarginfo.linear_step = tree_to_shwi (iv.step);
3968 	  thisarginfo.op = iv.base;
3969 	}
3970       else if ((thisarginfo.dt == vect_constant_def
3971 		|| thisarginfo.dt == vect_external_def)
3972 	       && POINTER_TYPE_P (TREE_TYPE (op)))
3973 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3974       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3975 	 linear too.  */
3976       if (POINTER_TYPE_P (TREE_TYPE (op))
3977 	  && !thisarginfo.linear_step
3978 	  && !vec_stmt
3979 	  && thisarginfo.dt != vect_constant_def
3980 	  && thisarginfo.dt != vect_external_def
3981 	  && loop_vinfo
3982 	  && !slp_node
3983 	  && TREE_CODE (op) == SSA_NAME)
3984 	vect_simd_lane_linear (op, loop, &thisarginfo);
3985 
3986       arginfo.quick_push (thisarginfo);
3987     }
3988 
3989   unsigned HOST_WIDE_INT vf;
3990   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3991     {
3992       if (dump_enabled_p ())
3993 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3994 			 "not considering SIMD clones; not yet supported"
3995 			 " for variable-width vectors.\n");
3996       return false;
3997     }
3998 
3999   unsigned int badness = 0;
4000   struct cgraph_node *bestn = NULL;
4001   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4002     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4003   else
4004     for (struct cgraph_node *n = node->simd_clones; n != NULL;
4005 	 n = n->simdclone->next_clone)
4006       {
4007 	unsigned int this_badness = 0;
4008 	if (n->simdclone->simdlen > vf
4009 	    || n->simdclone->nargs != nargs)
4010 	  continue;
4011 	if (n->simdclone->simdlen < vf)
4012 	  this_badness += (exact_log2 (vf)
4013 			   - exact_log2 (n->simdclone->simdlen)) * 1024;
4014 	if (n->simdclone->inbranch)
4015 	  this_badness += 2048;
4016 	int target_badness = targetm.simd_clone.usable (n);
4017 	if (target_badness < 0)
4018 	  continue;
4019 	this_badness += target_badness * 512;
4020 	/* FORNOW: Have to add code to add the mask argument.  */
4021 	if (n->simdclone->inbranch)
4022 	  continue;
4023 	for (i = 0; i < nargs; i++)
4024 	  {
4025 	    switch (n->simdclone->args[i].arg_type)
4026 	      {
4027 	      case SIMD_CLONE_ARG_TYPE_VECTOR:
4028 		if (!useless_type_conversion_p
4029 			(n->simdclone->args[i].orig_type,
4030 			 TREE_TYPE (gimple_call_arg (stmt, i))))
4031 		  i = -1;
4032 		else if (arginfo[i].dt == vect_constant_def
4033 			 || arginfo[i].dt == vect_external_def
4034 			 || arginfo[i].linear_step)
4035 		  this_badness += 64;
4036 		break;
4037 	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
4038 		if (arginfo[i].dt != vect_constant_def
4039 		    && arginfo[i].dt != vect_external_def)
4040 		  i = -1;
4041 		break;
4042 	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4043 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4044 		if (arginfo[i].dt == vect_constant_def
4045 		    || arginfo[i].dt == vect_external_def
4046 		    || (arginfo[i].linear_step
4047 			!= n->simdclone->args[i].linear_step))
4048 		  i = -1;
4049 		break;
4050 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4051 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4052 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4053 	      case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4054 	      case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4055 	      case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4056 		/* FORNOW */
4057 		i = -1;
4058 		break;
4059 	      case SIMD_CLONE_ARG_TYPE_MASK:
4060 		gcc_unreachable ();
4061 	      }
4062 	    if (i == (size_t) -1)
4063 	      break;
4064 	    if (n->simdclone->args[i].alignment > arginfo[i].align)
4065 	      {
4066 		i = -1;
4067 		break;
4068 	      }
4069 	    if (arginfo[i].align)
4070 	      this_badness += (exact_log2 (arginfo[i].align)
4071 			       - exact_log2 (n->simdclone->args[i].alignment));
4072 	  }
4073 	if (i == (size_t) -1)
4074 	  continue;
4075 	if (bestn == NULL || this_badness < badness)
4076 	  {
4077 	    bestn = n;
4078 	    badness = this_badness;
4079 	  }
4080       }
4081 
4082   if (bestn == NULL)
4083     return false;
4084 
4085   for (i = 0; i < nargs; i++)
4086     if ((arginfo[i].dt == vect_constant_def
4087 	 || arginfo[i].dt == vect_external_def)
4088 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4089       {
4090 	arginfo[i].vectype
4091 	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4092 								     i)));
4093 	if (arginfo[i].vectype == NULL
4094 	    || (simd_clone_subparts (arginfo[i].vectype)
4095 		> bestn->simdclone->simdlen))
4096 	  return false;
4097       }
4098 
4099   fndecl = bestn->decl;
4100   nunits = bestn->simdclone->simdlen;
4101   ncopies = vf / nunits;
4102 
4103   /* If the function isn't const, only allow it in simd loops where user
4104      has asserted that at least nunits consecutive iterations can be
4105      performed using SIMD instructions.  */
4106   if ((loop == NULL || (unsigned) loop->safelen < nunits)
4107       && gimple_vuse (stmt))
4108     return false;
4109 
4110   /* Sanity check: make sure that at least one copy of the vectorized stmt
4111      needs to be generated.  */
4112   gcc_assert (ncopies >= 1);
4113 
4114   if (!vec_stmt) /* transformation not required.  */
4115     {
4116       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4117       for (i = 0; i < nargs; i++)
4118 	if ((bestn->simdclone->args[i].arg_type
4119 	     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4120 	    || (bestn->simdclone->args[i].arg_type
4121 		== SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4122 	  {
4123 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4124 									+ 1);
4125 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4126 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4127 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
4128 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
4129 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4130 	    tree sll = arginfo[i].simd_lane_linear
4131 		       ? boolean_true_node : boolean_false_node;
4132 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4133 	  }
4134       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4135       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4136 /*      vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4137       return true;
4138     }
4139 
4140   /* Transform.  */
4141 
4142   if (dump_enabled_p ())
4143     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4144 
4145   /* Handle def.  */
4146   scalar_dest = gimple_call_lhs (stmt);
4147   vec_dest = NULL_TREE;
4148   rtype = NULL_TREE;
4149   ratype = NULL_TREE;
4150   if (scalar_dest)
4151     {
4152       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4153       rtype = TREE_TYPE (TREE_TYPE (fndecl));
4154       if (TREE_CODE (rtype) == ARRAY_TYPE)
4155 	{
4156 	  ratype = rtype;
4157 	  rtype = TREE_TYPE (ratype);
4158 	}
4159     }
4160 
4161   prev_stmt_info = NULL;
4162   for (j = 0; j < ncopies; ++j)
4163     {
4164       /* Build argument list for the vectorized call.  */
4165       if (j == 0)
4166 	vargs.create (nargs);
4167       else
4168 	vargs.truncate (0);
4169 
4170       for (i = 0; i < nargs; i++)
4171 	{
4172 	  unsigned int k, l, m, o;
4173 	  tree atype;
4174 	  op = gimple_call_arg (stmt, i);
4175 	  switch (bestn->simdclone->args[i].arg_type)
4176 	    {
4177 	    case SIMD_CLONE_ARG_TYPE_VECTOR:
4178 	      atype = bestn->simdclone->args[i].vector_type;
4179 	      o = nunits / simd_clone_subparts (atype);
4180 	      for (m = j * o; m < (j + 1) * o; m++)
4181 		{
4182 		  if (simd_clone_subparts (atype)
4183 		      < simd_clone_subparts (arginfo[i].vectype))
4184 		    {
4185 		      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4186 		      k = (simd_clone_subparts (arginfo[i].vectype)
4187 			   / simd_clone_subparts (atype));
4188 		      gcc_assert ((k & (k - 1)) == 0);
4189 		      if (m == 0)
4190 			vec_oprnd0
4191 			  = vect_get_vec_def_for_operand (op, stmt_info);
4192 		      else
4193 			{
4194 			  vec_oprnd0 = arginfo[i].op;
4195 			  if ((m & (k - 1)) == 0)
4196 			    vec_oprnd0
4197 			      = vect_get_vec_def_for_stmt_copy (vinfo,
4198 								vec_oprnd0);
4199 			}
4200 		      arginfo[i].op = vec_oprnd0;
4201 		      vec_oprnd0
4202 			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4203 				  bitsize_int (prec),
4204 				  bitsize_int ((m & (k - 1)) * prec));
4205 		      gassign *new_stmt
4206 			= gimple_build_assign (make_ssa_name (atype),
4207 					       vec_oprnd0);
4208 		      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4209 		      vargs.safe_push (gimple_assign_lhs (new_stmt));
4210 		    }
4211 		  else
4212 		    {
4213 		      k = (simd_clone_subparts (atype)
4214 			   / simd_clone_subparts (arginfo[i].vectype));
4215 		      gcc_assert ((k & (k - 1)) == 0);
4216 		      vec<constructor_elt, va_gc> *ctor_elts;
4217 		      if (k != 1)
4218 			vec_alloc (ctor_elts, k);
4219 		      else
4220 			ctor_elts = NULL;
4221 		      for (l = 0; l < k; l++)
4222 			{
4223 			  if (m == 0 && l == 0)
4224 			    vec_oprnd0
4225 			      = vect_get_vec_def_for_operand (op, stmt_info);
4226 			  else
4227 			    vec_oprnd0
4228 			      = vect_get_vec_def_for_stmt_copy (vinfo,
4229 								arginfo[i].op);
4230 			  arginfo[i].op = vec_oprnd0;
4231 			  if (k == 1)
4232 			    break;
4233 			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4234 						  vec_oprnd0);
4235 			}
4236 		      if (k == 1)
4237 			vargs.safe_push (vec_oprnd0);
4238 		      else
4239 			{
4240 			  vec_oprnd0 = build_constructor (atype, ctor_elts);
4241 			  gassign *new_stmt
4242 			    = gimple_build_assign (make_ssa_name (atype),
4243 						   vec_oprnd0);
4244 			  vect_finish_stmt_generation (stmt_info, new_stmt,
4245 						       gsi);
4246 			  vargs.safe_push (gimple_assign_lhs (new_stmt));
4247 			}
4248 		    }
4249 		}
4250 	      break;
4251 	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
4252 	      vargs.safe_push (op);
4253 	      break;
4254 	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4255 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4256 	      if (j == 0)
4257 		{
4258 		  gimple_seq stmts;
4259 		  arginfo[i].op
4260 		    = force_gimple_operand (arginfo[i].op, &stmts, true,
4261 					    NULL_TREE);
4262 		  if (stmts != NULL)
4263 		    {
4264 		      basic_block new_bb;
4265 		      edge pe = loop_preheader_edge (loop);
4266 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4267 		      gcc_assert (!new_bb);
4268 		    }
4269 		  if (arginfo[i].simd_lane_linear)
4270 		    {
4271 		      vargs.safe_push (arginfo[i].op);
4272 		      break;
4273 		    }
4274 		  tree phi_res = copy_ssa_name (op);
4275 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
4276 		  loop_vinfo->add_stmt (new_phi);
4277 		  add_phi_arg (new_phi, arginfo[i].op,
4278 			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
4279 		  enum tree_code code
4280 		    = POINTER_TYPE_P (TREE_TYPE (op))
4281 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4282 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4283 			      ? sizetype : TREE_TYPE (op);
4284 		  widest_int cst
4285 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4286 			       ncopies * nunits);
4287 		  tree tcst = wide_int_to_tree (type, cst);
4288 		  tree phi_arg = copy_ssa_name (op);
4289 		  gassign *new_stmt
4290 		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
4291 		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
4292 		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4293 		  loop_vinfo->add_stmt (new_stmt);
4294 		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4295 			       UNKNOWN_LOCATION);
4296 		  arginfo[i].op = phi_res;
4297 		  vargs.safe_push (phi_res);
4298 		}
4299 	      else
4300 		{
4301 		  enum tree_code code
4302 		    = POINTER_TYPE_P (TREE_TYPE (op))
4303 		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
4304 		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
4305 			      ? sizetype : TREE_TYPE (op);
4306 		  widest_int cst
4307 		    = wi::mul (bestn->simdclone->args[i].linear_step,
4308 			       j * nunits);
4309 		  tree tcst = wide_int_to_tree (type, cst);
4310 		  new_temp = make_ssa_name (TREE_TYPE (op));
4311 		  gassign *new_stmt
4312 		    = gimple_build_assign (new_temp, code,
4313 					   arginfo[i].op, tcst);
4314 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4315 		  vargs.safe_push (new_temp);
4316 		}
4317 	      break;
4318 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4319 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4320 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4321 	    case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4322 	    case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4323 	    case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4324 	    default:
4325 	      gcc_unreachable ();
4326 	    }
4327 	}
4328 
4329       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4330       if (vec_dest)
4331 	{
4332 	  gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4333 	  if (ratype)
4334 	    new_temp = create_tmp_var (ratype);
4335 	  else if (simd_clone_subparts (vectype)
4336 		   == simd_clone_subparts (rtype))
4337 	    new_temp = make_ssa_name (vec_dest, new_call);
4338 	  else
4339 	    new_temp = make_ssa_name (rtype, new_call);
4340 	  gimple_call_set_lhs (new_call, new_temp);
4341 	}
4342       stmt_vec_info new_stmt_info
4343 	= vect_finish_stmt_generation (stmt_info, new_call, gsi);
4344 
4345       if (vec_dest)
4346 	{
4347 	  if (simd_clone_subparts (vectype) < nunits)
4348 	    {
4349 	      unsigned int k, l;
4350 	      poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4351 	      poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4352 	      k = nunits / simd_clone_subparts (vectype);
4353 	      gcc_assert ((k & (k - 1)) == 0);
4354 	      for (l = 0; l < k; l++)
4355 		{
4356 		  tree t;
4357 		  if (ratype)
4358 		    {
4359 		      t = build_fold_addr_expr (new_temp);
4360 		      t = build2 (MEM_REF, vectype, t,
4361 				  build_int_cst (TREE_TYPE (t), l * bytes));
4362 		    }
4363 		  else
4364 		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
4365 				bitsize_int (prec), bitsize_int (l * prec));
4366 		  gimple *new_stmt
4367 		    = gimple_build_assign (make_ssa_name (vectype), t);
4368 		  new_stmt_info
4369 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4370 
4371 		  if (j == 0 && l == 0)
4372 		    STMT_VINFO_VEC_STMT (stmt_info)
4373 		      = *vec_stmt = new_stmt_info;
4374 		  else
4375 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4376 
4377 		  prev_stmt_info = new_stmt_info;
4378 		}
4379 
4380 	      if (ratype)
4381 		vect_clobber_variable (stmt_info, gsi, new_temp);
4382 	      continue;
4383 	    }
4384 	  else if (simd_clone_subparts (vectype) > nunits)
4385 	    {
4386 	      unsigned int k = (simd_clone_subparts (vectype)
4387 				/ simd_clone_subparts (rtype));
4388 	      gcc_assert ((k & (k - 1)) == 0);
4389 	      if ((j & (k - 1)) == 0)
4390 		vec_alloc (ret_ctor_elts, k);
4391 	      if (ratype)
4392 		{
4393 		  unsigned int m, o = nunits / simd_clone_subparts (rtype);
4394 		  for (m = 0; m < o; m++)
4395 		    {
4396 		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
4397 					 size_int (m), NULL_TREE, NULL_TREE);
4398 		      gimple *new_stmt
4399 			= gimple_build_assign (make_ssa_name (rtype), tem);
4400 		      new_stmt_info
4401 			= vect_finish_stmt_generation (stmt_info, new_stmt,
4402 						       gsi);
4403 		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4404 					      gimple_assign_lhs (new_stmt));
4405 		    }
4406 		  vect_clobber_variable (stmt_info, gsi, new_temp);
4407 		}
4408 	      else
4409 		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4410 	      if ((j & (k - 1)) != k - 1)
4411 		continue;
4412 	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4413 	      gimple *new_stmt
4414 		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4415 	      new_stmt_info
4416 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4417 
4418 	      if ((unsigned) j == k - 1)
4419 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4420 	      else
4421 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4422 
4423 	      prev_stmt_info = new_stmt_info;
4424 	      continue;
4425 	    }
4426 	  else if (ratype)
4427 	    {
4428 	      tree t = build_fold_addr_expr (new_temp);
4429 	      t = build2 (MEM_REF, vectype, t,
4430 			  build_int_cst (TREE_TYPE (t), 0));
4431 	      gimple *new_stmt
4432 		= gimple_build_assign (make_ssa_name (vec_dest), t);
4433 	      new_stmt_info
4434 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4435 	      vect_clobber_variable (stmt_info, gsi, new_temp);
4436 	    }
4437 	}
4438 
4439       if (j == 0)
4440 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4441       else
4442 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4443 
4444       prev_stmt_info = new_stmt_info;
4445     }
4446 
4447   vargs.release ();
4448 
4449   /* The call in STMT might prevent it from being removed in dce.
4450      We however cannot remove it here, due to the way the ssa name
4451      it defines is mapped to the new definition.  So just replace
4452      rhs of the statement with something harmless.  */
4453 
4454   if (slp_node)
4455     return true;
4456 
4457   gimple *new_stmt;
4458   if (scalar_dest)
4459     {
4460       type = TREE_TYPE (scalar_dest);
4461       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4462       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4463     }
4464   else
4465     new_stmt = gimple_build_nop ();
4466   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4467   unlink_stmt_vdef (stmt);
4468 
4469   return true;
4470 }
4471 
4472 
4473 /* Function vect_gen_widened_results_half
4474 
4475    Create a vector stmt whose code, type, number of arguments, and result
4476    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4477    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
4478    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4479    needs to be created (DECL is a function-decl of a target-builtin).
4480    STMT_INFO is the original scalar stmt that we are vectorizing.  */
4481 
4482 static gimple *
4483 vect_gen_widened_results_half (enum tree_code code,
4484 			       tree decl,
4485                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4486 			       tree vec_dest, gimple_stmt_iterator *gsi,
4487 			       stmt_vec_info stmt_info)
4488 {
4489   gimple *new_stmt;
4490   tree new_temp;
4491 
4492   /* Generate half of the widened result:  */
4493   if (code == CALL_EXPR)
4494     {
4495       /* Target specific support  */
4496       if (op_type == binary_op)
4497 	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4498       else
4499 	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4500       new_temp = make_ssa_name (vec_dest, new_stmt);
4501       gimple_call_set_lhs (new_stmt, new_temp);
4502     }
4503   else
4504     {
4505       /* Generic support */
4506       gcc_assert (op_type == TREE_CODE_LENGTH (code));
4507       if (op_type != binary_op)
4508 	vec_oprnd1 = NULL;
4509       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4510       new_temp = make_ssa_name (vec_dest, new_stmt);
4511       gimple_assign_set_lhs (new_stmt, new_temp);
4512     }
4513   vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4514 
4515   return new_stmt;
4516 }
4517 
4518 
4519 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4520    For the first operand we call vect_get_vec_def_for_operand (with OPRND
4521    containing scalar operand), and for the rest we get a copy with
4522    vect_get_vec_def_for_stmt_copy() using the previous vector definition
4523    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4524    The vectors are collected into VEC_OPRNDS.  */
4525 
4526 static void
4527 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4528 			  vec<tree> *vec_oprnds, int multi_step_cvt)
4529 {
4530   vec_info *vinfo = stmt_info->vinfo;
4531   tree vec_oprnd;
4532 
4533   /* Get first vector operand.  */
4534   /* All the vector operands except the very first one (that is scalar oprnd)
4535      are stmt copies.  */
4536   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4537     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4538   else
4539     vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4540 
4541   vec_oprnds->quick_push (vec_oprnd);
4542 
4543   /* Get second vector operand.  */
4544   vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4545   vec_oprnds->quick_push (vec_oprnd);
4546 
4547   *oprnd = vec_oprnd;
4548 
4549   /* For conversion in multiple steps, continue to get operands
4550      recursively.  */
4551   if (multi_step_cvt)
4552     vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4553 			      multi_step_cvt - 1);
4554 }
4555 
4556 
4557 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4558    For multi-step conversions store the resulting vectors and call the function
4559    recursively.  */
4560 
4561 static void
4562 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4563 				       int multi_step_cvt,
4564 				       stmt_vec_info stmt_info,
4565 				       vec<tree> vec_dsts,
4566 				       gimple_stmt_iterator *gsi,
4567 				       slp_tree slp_node, enum tree_code code,
4568 				       stmt_vec_info *prev_stmt_info)
4569 {
4570   unsigned int i;
4571   tree vop0, vop1, new_tmp, vec_dest;
4572 
4573   vec_dest = vec_dsts.pop ();
4574 
4575   for (i = 0; i < vec_oprnds->length (); i += 2)
4576     {
4577       /* Create demotion operation.  */
4578       vop0 = (*vec_oprnds)[i];
4579       vop1 = (*vec_oprnds)[i + 1];
4580       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4581       new_tmp = make_ssa_name (vec_dest, new_stmt);
4582       gimple_assign_set_lhs (new_stmt, new_tmp);
4583       stmt_vec_info new_stmt_info
4584 	= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4585 
4586       if (multi_step_cvt)
4587 	/* Store the resulting vector for next recursive call.  */
4588 	(*vec_oprnds)[i/2] = new_tmp;
4589       else
4590 	{
4591 	  /* This is the last step of the conversion sequence. Store the
4592 	     vectors in SLP_NODE or in vector info of the scalar statement
4593 	     (or in STMT_VINFO_RELATED_STMT chain).  */
4594 	  if (slp_node)
4595 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4596 	  else
4597 	    {
4598 	      if (!*prev_stmt_info)
4599 		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4600 	      else
4601 		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4602 
4603 	      *prev_stmt_info = new_stmt_info;
4604 	    }
4605 	}
4606     }
4607 
4608   /* For multi-step demotion operations we first generate demotion operations
4609      from the source type to the intermediate types, and then combine the
4610      results (stored in VEC_OPRNDS) in demotion operation to the destination
4611      type.  */
4612   if (multi_step_cvt)
4613     {
4614       /* At each level of recursion we have half of the operands we had at the
4615 	 previous level.  */
4616       vec_oprnds->truncate ((i+1)/2);
4617       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4618 					     stmt_info, vec_dsts, gsi,
4619 					     slp_node, VEC_PACK_TRUNC_EXPR,
4620 					     prev_stmt_info);
4621     }
4622 
4623   vec_dsts.quick_push (vec_dest);
4624 }
4625 
4626 
4627 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4628    and VEC_OPRNDS1, for a binary operation associated with scalar statement
4629    STMT_INFO.  For multi-step conversions store the resulting vectors and
4630    call the function recursively.  */
4631 
4632 static void
4633 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4634 					vec<tree> *vec_oprnds1,
4635 					stmt_vec_info stmt_info, tree vec_dest,
4636 					gimple_stmt_iterator *gsi,
4637 					enum tree_code code1,
4638 					enum tree_code code2, tree decl1,
4639 					tree decl2, int op_type)
4640 {
4641   int i;
4642   tree vop0, vop1, new_tmp1, new_tmp2;
4643   gimple *new_stmt1, *new_stmt2;
4644   vec<tree> vec_tmp = vNULL;
4645 
4646   vec_tmp.create (vec_oprnds0->length () * 2);
4647   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4648     {
4649       if (op_type == binary_op)
4650 	vop1 = (*vec_oprnds1)[i];
4651       else
4652 	vop1 = NULL_TREE;
4653 
4654       /* Generate the two halves of promotion operation.  */
4655       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4656 						 op_type, vec_dest, gsi,
4657 						 stmt_info);
4658       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4659 						 op_type, vec_dest, gsi,
4660 						 stmt_info);
4661       if (is_gimple_call (new_stmt1))
4662 	{
4663 	  new_tmp1 = gimple_call_lhs (new_stmt1);
4664 	  new_tmp2 = gimple_call_lhs (new_stmt2);
4665 	}
4666       else
4667 	{
4668 	  new_tmp1 = gimple_assign_lhs (new_stmt1);
4669 	  new_tmp2 = gimple_assign_lhs (new_stmt2);
4670 	}
4671 
4672       /* Store the results for the next step.  */
4673       vec_tmp.quick_push (new_tmp1);
4674       vec_tmp.quick_push (new_tmp2);
4675     }
4676 
4677   vec_oprnds0->release ();
4678   *vec_oprnds0 = vec_tmp;
4679 }
4680 
4681 
4682 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4683    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4684    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4685    Return true if STMT_INFO is vectorizable in this way.  */
4686 
4687 static bool
4688 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4689 			 stmt_vec_info *vec_stmt, slp_tree slp_node,
4690 			 stmt_vector_for_cost *cost_vec)
4691 {
4692   tree vec_dest;
4693   tree scalar_dest;
4694   tree op0, op1 = NULL_TREE;
4695   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4696   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4697   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4698   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4699   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4700   tree new_temp;
4701   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4702   int ndts = 2;
4703   stmt_vec_info prev_stmt_info;
4704   poly_uint64 nunits_in;
4705   poly_uint64 nunits_out;
4706   tree vectype_out, vectype_in;
4707   int ncopies, i, j;
4708   tree lhs_type, rhs_type;
4709   enum { NARROW, NONE, WIDEN } modifier;
4710   vec<tree> vec_oprnds0 = vNULL;
4711   vec<tree> vec_oprnds1 = vNULL;
4712   tree vop0;
4713   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4714   vec_info *vinfo = stmt_info->vinfo;
4715   int multi_step_cvt = 0;
4716   vec<tree> interm_types = vNULL;
4717   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4718   int op_type;
4719   unsigned short fltsz;
4720 
4721   /* Is STMT a vectorizable conversion?   */
4722 
4723   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4724     return false;
4725 
4726   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4727       && ! vec_stmt)
4728     return false;
4729 
4730   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4731   if (!stmt)
4732     return false;
4733 
4734   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4735     return false;
4736 
4737   code = gimple_assign_rhs_code (stmt);
4738   if (!CONVERT_EXPR_CODE_P (code)
4739       && code != FIX_TRUNC_EXPR
4740       && code != FLOAT_EXPR
4741       && code != WIDEN_MULT_EXPR
4742       && code != WIDEN_LSHIFT_EXPR)
4743     return false;
4744 
4745   op_type = TREE_CODE_LENGTH (code);
4746 
4747   /* Check types of lhs and rhs.  */
4748   scalar_dest = gimple_assign_lhs (stmt);
4749   lhs_type = TREE_TYPE (scalar_dest);
4750   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4751 
4752   op0 = gimple_assign_rhs1 (stmt);
4753   rhs_type = TREE_TYPE (op0);
4754 
4755   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4756       && !((INTEGRAL_TYPE_P (lhs_type)
4757 	    && INTEGRAL_TYPE_P (rhs_type))
4758 	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
4759 	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
4760     return false;
4761 
4762   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4763       && ((INTEGRAL_TYPE_P (lhs_type)
4764 	   && !type_has_mode_precision_p (lhs_type))
4765 	  || (INTEGRAL_TYPE_P (rhs_type)
4766 	      && !type_has_mode_precision_p (rhs_type))))
4767     {
4768       if (dump_enabled_p ())
4769 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4770                          "type conversion to/from bit-precision unsupported."
4771                          "\n");
4772       return false;
4773     }
4774 
4775   /* Check the operands of the operation.  */
4776   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4777     {
4778       if (dump_enabled_p ())
4779 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4780                          "use not simple.\n");
4781       return false;
4782     }
4783   if (op_type == binary_op)
4784     {
4785       bool ok;
4786 
4787       op1 = gimple_assign_rhs2 (stmt);
4788       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4789       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4790 	 OP1.  */
4791       if (CONSTANT_CLASS_P (op0))
4792 	ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4793       else
4794 	ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4795 
4796       if (!ok)
4797 	{
4798           if (dump_enabled_p ())
4799             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4800                              "use not simple.\n");
4801 	  return false;
4802 	}
4803     }
4804 
4805   /* If op0 is an external or constant defs use a vector type of
4806      the same size as the output vector type.  */
4807   if (!vectype_in)
4808     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4809   if (vec_stmt)
4810     gcc_assert (vectype_in);
4811   if (!vectype_in)
4812     {
4813       if (dump_enabled_p ())
4814 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4815 			 "no vectype for scalar type %T\n", rhs_type);
4816 
4817       return false;
4818     }
4819 
4820   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4821       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4822     {
4823       if (dump_enabled_p ())
4824 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4825 			 "can't convert between boolean and non "
4826 			 "boolean vectors %T\n", rhs_type);
4827 
4828       return false;
4829     }
4830 
4831   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4832   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4833   if (known_eq (nunits_out, nunits_in))
4834     modifier = NONE;
4835   else if (multiple_p (nunits_out, nunits_in))
4836     modifier = NARROW;
4837   else
4838     {
4839       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4840       modifier = WIDEN;
4841     }
4842 
4843   /* Multiple types in SLP are handled by creating the appropriate number of
4844      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4845      case of SLP.  */
4846   if (slp_node)
4847     ncopies = 1;
4848   else if (modifier == NARROW)
4849     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4850   else
4851     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4852 
4853   /* Sanity check: make sure that at least one copy of the vectorized stmt
4854      needs to be generated.  */
4855   gcc_assert (ncopies >= 1);
4856 
4857   bool found_mode = false;
4858   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4859   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4860   opt_scalar_mode rhs_mode_iter;
4861 
4862   /* Supportable by target?  */
4863   switch (modifier)
4864     {
4865     case NONE:
4866       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4867 	return false;
4868       if (supportable_convert_operation (code, vectype_out, vectype_in,
4869 					 &decl1, &code1))
4870 	break;
4871       /* FALLTHRU */
4872     unsupported:
4873       if (dump_enabled_p ())
4874 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4875                          "conversion not supported by target.\n");
4876       return false;
4877 
4878     case WIDEN:
4879       if (supportable_widening_operation (code, stmt_info, vectype_out,
4880 					  vectype_in, &code1, &code2,
4881 					  &multi_step_cvt, &interm_types))
4882 	{
4883 	  /* Binary widening operation can only be supported directly by the
4884 	     architecture.  */
4885 	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
4886 	  break;
4887 	}
4888 
4889       if (code != FLOAT_EXPR
4890 	  || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4891 	goto unsupported;
4892 
4893       fltsz = GET_MODE_SIZE (lhs_mode);
4894       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4895 	{
4896 	  rhs_mode = rhs_mode_iter.require ();
4897 	  if (GET_MODE_SIZE (rhs_mode) > fltsz)
4898 	    break;
4899 
4900 	  cvt_type
4901 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4902 	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4903 	  if (cvt_type == NULL_TREE)
4904 	    goto unsupported;
4905 
4906 	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
4907 	    {
4908 	      if (!supportable_convert_operation (code, vectype_out,
4909 						  cvt_type, &decl1, &codecvt1))
4910 		goto unsupported;
4911 	    }
4912 	  else if (!supportable_widening_operation (code, stmt_info,
4913 						    vectype_out, cvt_type,
4914 						    &codecvt1, &codecvt2,
4915 						    &multi_step_cvt,
4916 						    &interm_types))
4917 	    continue;
4918 	  else
4919 	    gcc_assert (multi_step_cvt == 0);
4920 
4921 	  if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4922 					      vectype_in, &code1, &code2,
4923 					      &multi_step_cvt, &interm_types))
4924 	    {
4925 	      found_mode = true;
4926 	      break;
4927 	    }
4928 	}
4929 
4930       if (!found_mode)
4931 	goto unsupported;
4932 
4933       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4934 	codecvt2 = ERROR_MARK;
4935       else
4936 	{
4937 	  multi_step_cvt++;
4938 	  interm_types.safe_push (cvt_type);
4939 	  cvt_type = NULL_TREE;
4940 	}
4941       break;
4942 
4943     case NARROW:
4944       gcc_assert (op_type == unary_op);
4945       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4946 					   &code1, &multi_step_cvt,
4947 					   &interm_types))
4948 	break;
4949 
4950       if (code != FIX_TRUNC_EXPR
4951 	  || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4952 	goto unsupported;
4953 
4954       cvt_type
4955 	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4956       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4957       if (cvt_type == NULL_TREE)
4958 	goto unsupported;
4959       if (!supportable_convert_operation (code, cvt_type, vectype_in,
4960 					  &decl1, &codecvt1))
4961 	goto unsupported;
4962       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4963 					   &code1, &multi_step_cvt,
4964 					   &interm_types))
4965 	break;
4966       goto unsupported;
4967 
4968     default:
4969       gcc_unreachable ();
4970     }
4971 
4972   if (!vec_stmt)		/* transformation not required.  */
4973     {
4974       DUMP_VECT_SCOPE ("vectorizable_conversion");
4975       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4976         {
4977 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4978 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4979 				  cost_vec);
4980 	}
4981       else if (modifier == NARROW)
4982 	{
4983 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4984 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4985 					      cost_vec);
4986 	}
4987       else
4988 	{
4989 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4990 	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4991 					      cost_vec);
4992 	}
4993       interm_types.release ();
4994       return true;
4995     }
4996 
4997   /* Transform.  */
4998   if (dump_enabled_p ())
4999     dump_printf_loc (MSG_NOTE, vect_location,
5000                      "transform conversion. ncopies = %d.\n", ncopies);
5001 
5002   if (op_type == binary_op)
5003     {
5004       if (CONSTANT_CLASS_P (op0))
5005 	op0 = fold_convert (TREE_TYPE (op1), op0);
5006       else if (CONSTANT_CLASS_P (op1))
5007 	op1 = fold_convert (TREE_TYPE (op0), op1);
5008     }
5009 
5010   /* In case of multi-step conversion, we first generate conversion operations
5011      to the intermediate types, and then from that types to the final one.
5012      We create vector destinations for the intermediate type (TYPES) received
5013      from supportable_*_operation, and store them in the correct order
5014      for future use in vect_create_vectorized_*_stmts ().  */
5015   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5016   vec_dest = vect_create_destination_var (scalar_dest,
5017 					  (cvt_type && modifier == WIDEN)
5018 					  ? cvt_type : vectype_out);
5019   vec_dsts.quick_push (vec_dest);
5020 
5021   if (multi_step_cvt)
5022     {
5023       for (i = interm_types.length () - 1;
5024 	   interm_types.iterate (i, &intermediate_type); i--)
5025 	{
5026 	  vec_dest = vect_create_destination_var (scalar_dest,
5027 						  intermediate_type);
5028 	  vec_dsts.quick_push (vec_dest);
5029 	}
5030     }
5031 
5032   if (cvt_type)
5033     vec_dest = vect_create_destination_var (scalar_dest,
5034 					    modifier == WIDEN
5035 					    ? vectype_out : cvt_type);
5036 
5037   if (!slp_node)
5038     {
5039       if (modifier == WIDEN)
5040 	{
5041 	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5042 	  if (op_type == binary_op)
5043 	    vec_oprnds1.create (1);
5044 	}
5045       else if (modifier == NARROW)
5046 	vec_oprnds0.create (
5047 		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5048     }
5049   else if (code == WIDEN_LSHIFT_EXPR)
5050     vec_oprnds1.create (slp_node->vec_stmts_size);
5051 
5052   last_oprnd = op0;
5053   prev_stmt_info = NULL;
5054   switch (modifier)
5055     {
5056     case NONE:
5057       for (j = 0; j < ncopies; j++)
5058 	{
5059 	  if (j == 0)
5060 	    vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5061 			       NULL, slp_node);
5062 	  else
5063 	    vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5064 
5065 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5066 	    {
5067 	      stmt_vec_info new_stmt_info;
5068 	      /* Arguments are ready, create the new vector stmt.  */
5069 	      if (code1 == CALL_EXPR)
5070 		{
5071 		  gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5072 		  new_temp = make_ssa_name (vec_dest, new_stmt);
5073 		  gimple_call_set_lhs (new_stmt, new_temp);
5074 		  new_stmt_info
5075 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5076 		}
5077 	      else
5078 		{
5079 		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5080 		  gassign *new_stmt
5081 		    = gimple_build_assign (vec_dest, code1, vop0);
5082 		  new_temp = make_ssa_name (vec_dest, new_stmt);
5083 		  gimple_assign_set_lhs (new_stmt, new_temp);
5084 		  new_stmt_info
5085 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5086 		}
5087 
5088 	      if (slp_node)
5089 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5090 	      else
5091 		{
5092 		  if (!prev_stmt_info)
5093 		    STMT_VINFO_VEC_STMT (stmt_info)
5094 		      = *vec_stmt = new_stmt_info;
5095 		  else
5096 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5097 		  prev_stmt_info = new_stmt_info;
5098 		}
5099 	    }
5100 	}
5101       break;
5102 
5103     case WIDEN:
5104       /* In case the vectorization factor (VF) is bigger than the number
5105 	 of elements that we can fit in a vectype (nunits), we have to
5106 	 generate more than one vector stmt - i.e - we need to "unroll"
5107 	 the vector stmt by a factor VF/nunits.  */
5108       for (j = 0; j < ncopies; j++)
5109 	{
5110 	  /* Handle uses.  */
5111 	  if (j == 0)
5112 	    {
5113 	      if (slp_node)
5114 		{
5115 		  if (code == WIDEN_LSHIFT_EXPR)
5116 		    {
5117 		      unsigned int k;
5118 
5119 		      vec_oprnd1 = op1;
5120 		      /* Store vec_oprnd1 for every vector stmt to be created
5121 			 for SLP_NODE.  We check during the analysis that all
5122 			 the shift arguments are the same.  */
5123 		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5124 			vec_oprnds1.quick_push (vec_oprnd1);
5125 
5126 		      vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5127 					 &vec_oprnds0, NULL, slp_node);
5128 		    }
5129 		  else
5130 		    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5131 				       &vec_oprnds1, slp_node);
5132 		}
5133 	      else
5134 		{
5135 		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5136 		  vec_oprnds0.quick_push (vec_oprnd0);
5137 		  if (op_type == binary_op)
5138 		    {
5139 		      if (code == WIDEN_LSHIFT_EXPR)
5140 			vec_oprnd1 = op1;
5141 		      else
5142 			vec_oprnd1
5143 			  = vect_get_vec_def_for_operand (op1, stmt_info);
5144 		      vec_oprnds1.quick_push (vec_oprnd1);
5145 		    }
5146 		}
5147 	    }
5148 	  else
5149 	    {
5150 	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5151 	      vec_oprnds0.truncate (0);
5152 	      vec_oprnds0.quick_push (vec_oprnd0);
5153 	      if (op_type == binary_op)
5154 		{
5155 		  if (code == WIDEN_LSHIFT_EXPR)
5156 		    vec_oprnd1 = op1;
5157 		  else
5158 		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5159 								 vec_oprnd1);
5160 		  vec_oprnds1.truncate (0);
5161 		  vec_oprnds1.quick_push (vec_oprnd1);
5162 		}
5163 	    }
5164 
5165 	  /* Arguments are ready.  Create the new vector stmts.  */
5166 	  for (i = multi_step_cvt; i >= 0; i--)
5167 	    {
5168 	      tree this_dest = vec_dsts[i];
5169 	      enum tree_code c1 = code1, c2 = code2;
5170 	      if (i == 0 && codecvt2 != ERROR_MARK)
5171 		{
5172 		  c1 = codecvt1;
5173 		  c2 = codecvt2;
5174 		}
5175 	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5176 						      &vec_oprnds1, stmt_info,
5177 						      this_dest, gsi,
5178 						      c1, c2, decl1, decl2,
5179 						      op_type);
5180 	    }
5181 
5182 	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5183 	    {
5184 	      stmt_vec_info new_stmt_info;
5185 	      if (cvt_type)
5186 		{
5187 		  if (codecvt1 == CALL_EXPR)
5188 		    {
5189 		      gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5190 		      new_temp = make_ssa_name (vec_dest, new_stmt);
5191 		      gimple_call_set_lhs (new_stmt, new_temp);
5192 		      new_stmt_info
5193 			= vect_finish_stmt_generation (stmt_info, new_stmt,
5194 						       gsi);
5195 		    }
5196 		  else
5197 		    {
5198 		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5199 		      new_temp = make_ssa_name (vec_dest);
5200 		      gassign *new_stmt
5201 			= gimple_build_assign (new_temp, codecvt1, vop0);
5202 		      new_stmt_info
5203 			= vect_finish_stmt_generation (stmt_info, new_stmt,
5204 						       gsi);
5205 		    }
5206 		}
5207 	      else
5208 		new_stmt_info = vinfo->lookup_def (vop0);
5209 
5210 	      if (slp_node)
5211 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5212 	      else
5213 		{
5214 		  if (!prev_stmt_info)
5215 		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5216 		  else
5217 		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5218 		  prev_stmt_info = new_stmt_info;
5219 		}
5220 	    }
5221 	}
5222 
5223       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5224       break;
5225 
5226     case NARROW:
5227       /* In case the vectorization factor (VF) is bigger than the number
5228 	 of elements that we can fit in a vectype (nunits), we have to
5229 	 generate more than one vector stmt - i.e - we need to "unroll"
5230 	 the vector stmt by a factor VF/nunits.  */
5231       for (j = 0; j < ncopies; j++)
5232 	{
5233 	  /* Handle uses.  */
5234 	  if (slp_node)
5235 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5236 			       slp_node);
5237 	  else
5238 	    {
5239 	      vec_oprnds0.truncate (0);
5240 	      vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5241 					vect_pow2 (multi_step_cvt) - 1);
5242 	    }
5243 
5244 	  /* Arguments are ready.  Create the new vector stmts.  */
5245 	  if (cvt_type)
5246 	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5247 	      {
5248 		if (codecvt1 == CALL_EXPR)
5249 		  {
5250 		    gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5251 		    new_temp = make_ssa_name (vec_dest, new_stmt);
5252 		    gimple_call_set_lhs (new_stmt, new_temp);
5253 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5254 		  }
5255 		else
5256 		  {
5257 		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5258 		    new_temp = make_ssa_name (vec_dest);
5259 		    gassign *new_stmt
5260 		      = gimple_build_assign (new_temp, codecvt1, vop0);
5261 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5262 		  }
5263 
5264 		vec_oprnds0[i] = new_temp;
5265 	      }
5266 
5267 	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5268 						 stmt_info, vec_dsts, gsi,
5269 						 slp_node, code1,
5270 						 &prev_stmt_info);
5271 	}
5272 
5273       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5274       break;
5275     }
5276 
5277   vec_oprnds0.release ();
5278   vec_oprnds1.release ();
5279   interm_types.release ();
5280 
5281   return true;
5282 }
5283 
5284 
5285 /* Function vectorizable_assignment.
5286 
5287    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5288    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5289    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5290    Return true if STMT_INFO is vectorizable in this way.  */
5291 
5292 static bool
5293 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5294 			 stmt_vec_info *vec_stmt, slp_tree slp_node,
5295 			 stmt_vector_for_cost *cost_vec)
5296 {
5297   tree vec_dest;
5298   tree scalar_dest;
5299   tree op;
5300   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5301   tree new_temp;
5302   enum vect_def_type dt[1] = {vect_unknown_def_type};
5303   int ndts = 1;
5304   int ncopies;
5305   int i, j;
5306   vec<tree> vec_oprnds = vNULL;
5307   tree vop;
5308   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5309   vec_info *vinfo = stmt_info->vinfo;
5310   stmt_vec_info prev_stmt_info = NULL;
5311   enum tree_code code;
5312   tree vectype_in;
5313 
5314   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5315     return false;
5316 
5317   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5318       && ! vec_stmt)
5319     return false;
5320 
5321   /* Is vectorizable assignment?  */
5322   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5323   if (!stmt)
5324     return false;
5325 
5326   scalar_dest = gimple_assign_lhs (stmt);
5327   if (TREE_CODE (scalar_dest) != SSA_NAME)
5328     return false;
5329 
5330   code = gimple_assign_rhs_code (stmt);
5331   if (gimple_assign_single_p (stmt)
5332       || code == PAREN_EXPR
5333       || CONVERT_EXPR_CODE_P (code))
5334     op = gimple_assign_rhs1 (stmt);
5335   else
5336     return false;
5337 
5338   if (code == VIEW_CONVERT_EXPR)
5339     op = TREE_OPERAND (op, 0);
5340 
5341   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5342   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5343 
5344   /* Multiple types in SLP are handled by creating the appropriate number of
5345      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5346      case of SLP.  */
5347   if (slp_node)
5348     ncopies = 1;
5349   else
5350     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5351 
5352   gcc_assert (ncopies >= 1);
5353 
5354   if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5355     {
5356       if (dump_enabled_p ())
5357         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5358                          "use not simple.\n");
5359       return false;
5360     }
5361 
5362   /* We can handle NOP_EXPR conversions that do not change the number
5363      of elements or the vector size.  */
5364   if ((CONVERT_EXPR_CODE_P (code)
5365        || code == VIEW_CONVERT_EXPR)
5366       && (!vectype_in
5367 	  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5368 	  || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5369 		       GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5370     return false;
5371 
5372   /* We do not handle bit-precision changes.  */
5373   if ((CONVERT_EXPR_CODE_P (code)
5374        || code == VIEW_CONVERT_EXPR)
5375       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5376       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5377 	  || !type_has_mode_precision_p (TREE_TYPE (op)))
5378       /* But a conversion that does not change the bit-pattern is ok.  */
5379       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5380 	    > TYPE_PRECISION (TREE_TYPE (op)))
5381 	   && TYPE_UNSIGNED (TREE_TYPE (op)))
5382       /* Conversion between boolean types of different sizes is
5383 	 a simple assignment in case their vectypes are same
5384 	 boolean vectors.  */
5385       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5386 	  || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5387     {
5388       if (dump_enabled_p ())
5389         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5390                          "type conversion to/from bit-precision "
5391                          "unsupported.\n");
5392       return false;
5393     }
5394 
5395   if (!vec_stmt) /* transformation not required.  */
5396     {
5397       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5398       DUMP_VECT_SCOPE ("vectorizable_assignment");
5399       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5400       return true;
5401     }
5402 
5403   /* Transform.  */
5404   if (dump_enabled_p ())
5405     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5406 
5407   /* Handle def.  */
5408   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5409 
5410   /* Handle use.  */
5411   for (j = 0; j < ncopies; j++)
5412     {
5413       /* Handle uses.  */
5414       if (j == 0)
5415 	vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5416       else
5417 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5418 
5419       /* Arguments are ready. create the new vector stmt.  */
5420       stmt_vec_info new_stmt_info = NULL;
5421       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5422        {
5423 	 if (CONVERT_EXPR_CODE_P (code)
5424 	     || code == VIEW_CONVERT_EXPR)
5425 	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5426 	 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5427          new_temp = make_ssa_name (vec_dest, new_stmt);
5428          gimple_assign_set_lhs (new_stmt, new_temp);
5429 	 new_stmt_info
5430 	   = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5431          if (slp_node)
5432 	   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5433        }
5434 
5435       if (slp_node)
5436         continue;
5437 
5438       if (j == 0)
5439 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5440       else
5441 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5442 
5443       prev_stmt_info = new_stmt_info;
5444     }
5445 
5446   vec_oprnds.release ();
5447   return true;
5448 }
5449 
5450 
5451 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5452    either as shift by a scalar or by a vector.  */
5453 
5454 bool
5455 vect_supportable_shift (enum tree_code code, tree scalar_type)
5456 {
5457 
5458   machine_mode vec_mode;
5459   optab optab;
5460   int icode;
5461   tree vectype;
5462 
5463   vectype = get_vectype_for_scalar_type (scalar_type);
5464   if (!vectype)
5465     return false;
5466 
5467   optab = optab_for_tree_code (code, vectype, optab_scalar);
5468   if (!optab
5469       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5470     {
5471       optab = optab_for_tree_code (code, vectype, optab_vector);
5472       if (!optab
5473           || (optab_handler (optab, TYPE_MODE (vectype))
5474                       == CODE_FOR_nothing))
5475         return false;
5476     }
5477 
5478   vec_mode = TYPE_MODE (vectype);
5479   icode = (int) optab_handler (optab, vec_mode);
5480   if (icode == CODE_FOR_nothing)
5481     return false;
5482 
5483   return true;
5484 }
5485 
5486 
5487 /* Function vectorizable_shift.
5488 
5489    Check if STMT_INFO performs a shift operation that can be vectorized.
5490    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5491    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5492    Return true if STMT_INFO is vectorizable in this way.  */
5493 
5494 bool
5495 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5496 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
5497 		    stmt_vector_for_cost *cost_vec)
5498 {
5499   tree vec_dest;
5500   tree scalar_dest;
5501   tree op0, op1 = NULL;
5502   tree vec_oprnd1 = NULL_TREE;
5503   tree vectype;
5504   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5505   enum tree_code code;
5506   machine_mode vec_mode;
5507   tree new_temp;
5508   optab optab;
5509   int icode;
5510   machine_mode optab_op2_mode;
5511   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5512   int ndts = 2;
5513   stmt_vec_info prev_stmt_info;
5514   poly_uint64 nunits_in;
5515   poly_uint64 nunits_out;
5516   tree vectype_out;
5517   tree op1_vectype;
5518   int ncopies;
5519   int j, i;
5520   vec<tree> vec_oprnds0 = vNULL;
5521   vec<tree> vec_oprnds1 = vNULL;
5522   tree vop0, vop1;
5523   unsigned int k;
5524   bool scalar_shift_arg = true;
5525   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5526   vec_info *vinfo = stmt_info->vinfo;
5527 
5528   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5529     return false;
5530 
5531   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5532       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5533       && ! vec_stmt)
5534     return false;
5535 
5536   /* Is STMT a vectorizable binary/unary operation?   */
5537   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5538   if (!stmt)
5539     return false;
5540 
5541   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5542     return false;
5543 
5544   code = gimple_assign_rhs_code (stmt);
5545 
5546   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5547       || code == RROTATE_EXPR))
5548     return false;
5549 
5550   scalar_dest = gimple_assign_lhs (stmt);
5551   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5552   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5553     {
5554       if (dump_enabled_p ())
5555         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5556                          "bit-precision shifts not supported.\n");
5557       return false;
5558     }
5559 
5560   op0 = gimple_assign_rhs1 (stmt);
5561   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5562     {
5563       if (dump_enabled_p ())
5564         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5565                          "use not simple.\n");
5566       return false;
5567     }
5568   /* If op0 is an external or constant def use a vector type with
5569      the same size as the output vector type.  */
5570   if (!vectype)
5571     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5572   if (vec_stmt)
5573     gcc_assert (vectype);
5574   if (!vectype)
5575     {
5576       if (dump_enabled_p ())
5577         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5578                          "no vectype for scalar type\n");
5579       return false;
5580     }
5581 
5582   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5583   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5584   if (maybe_ne (nunits_out, nunits_in))
5585     return false;
5586 
5587   op1 = gimple_assign_rhs2 (stmt);
5588   stmt_vec_info op1_def_stmt_info;
5589   if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5590 			   &op1_def_stmt_info))
5591     {
5592       if (dump_enabled_p ())
5593         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5594                          "use not simple.\n");
5595       return false;
5596     }
5597 
5598   /* Multiple types in SLP are handled by creating the appropriate number of
5599      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5600      case of SLP.  */
5601   if (slp_node)
5602     ncopies = 1;
5603   else
5604     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5605 
5606   gcc_assert (ncopies >= 1);
5607 
5608   /* Determine whether the shift amount is a vector, or scalar.  If the
5609      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5610 
5611   if ((dt[1] == vect_internal_def
5612        || dt[1] == vect_induction_def
5613        || dt[1] == vect_nested_cycle)
5614       && !slp_node)
5615     scalar_shift_arg = false;
5616   else if (dt[1] == vect_constant_def
5617 	   || dt[1] == vect_external_def
5618 	   || dt[1] == vect_internal_def)
5619     {
5620       /* In SLP, need to check whether the shift count is the same,
5621 	 in loops if it is a constant or invariant, it is always
5622 	 a scalar shift.  */
5623       if (slp_node)
5624 	{
5625 	  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5626 	  stmt_vec_info slpstmt_info;
5627 
5628 	  FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5629 	    {
5630 	      gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5631 	      if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5632 		scalar_shift_arg = false;
5633 	    }
5634 
5635 	  /* For internal SLP defs we have to make sure we see scalar stmts
5636 	     for all vector elements.
5637 	     ???  For different vectors we could resort to a different
5638 	     scalar shift operand but code-generation below simply always
5639 	     takes the first.  */
5640 	  if (dt[1] == vect_internal_def
5641 	      && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5642 			   stmts.length ()))
5643 	    scalar_shift_arg = false;
5644 	}
5645 
5646       /* If the shift amount is computed by a pattern stmt we cannot
5647          use the scalar amount directly thus give up and use a vector
5648 	 shift.  */
5649       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5650 	scalar_shift_arg = false;
5651     }
5652   else
5653     {
5654       if (dump_enabled_p ())
5655         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5656                          "operand mode requires invariant argument.\n");
5657       return false;
5658     }
5659 
5660   /* Vector shifted by vector.  */
5661   if (!scalar_shift_arg)
5662     {
5663       optab = optab_for_tree_code (code, vectype, optab_vector);
5664       if (dump_enabled_p ())
5665         dump_printf_loc (MSG_NOTE, vect_location,
5666                          "vector/vector shift/rotate found.\n");
5667 
5668       if (!op1_vectype)
5669 	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5670       if (op1_vectype == NULL_TREE
5671 	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5672 	{
5673 	  if (dump_enabled_p ())
5674 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5675                              "unusable type for last operand in"
5676                              " vector/vector shift/rotate.\n");
5677 	  return false;
5678 	}
5679     }
5680   /* See if the machine has a vector shifted by scalar insn and if not
5681      then see if it has a vector shifted by vector insn.  */
5682   else
5683     {
5684       optab = optab_for_tree_code (code, vectype, optab_scalar);
5685       if (optab
5686           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5687         {
5688           if (dump_enabled_p ())
5689             dump_printf_loc (MSG_NOTE, vect_location,
5690                              "vector/scalar shift/rotate found.\n");
5691         }
5692       else
5693         {
5694           optab = optab_for_tree_code (code, vectype, optab_vector);
5695           if (optab
5696                && (optab_handler (optab, TYPE_MODE (vectype))
5697                       != CODE_FOR_nothing))
5698             {
5699 	      scalar_shift_arg = false;
5700 
5701               if (dump_enabled_p ())
5702                 dump_printf_loc (MSG_NOTE, vect_location,
5703                                  "vector/vector shift/rotate found.\n");
5704 
5705               /* Unlike the other binary operators, shifts/rotates have
5706                  the rhs being int, instead of the same type as the lhs,
5707                  so make sure the scalar is the right type if we are
5708 		 dealing with vectors of long long/long/short/char.  */
5709               if (dt[1] == vect_constant_def)
5710                 op1 = fold_convert (TREE_TYPE (vectype), op1);
5711 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5712 						   TREE_TYPE (op1)))
5713 		{
5714 		  if (slp_node
5715 		      && TYPE_MODE (TREE_TYPE (vectype))
5716 			 != TYPE_MODE (TREE_TYPE (op1)))
5717 		    {
5718                       if (dump_enabled_p ())
5719                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5720                                          "unusable type for last operand in"
5721                                          " vector/vector shift/rotate.\n");
5722 		      return false;
5723 		    }
5724 		  if (vec_stmt && !slp_node)
5725 		    {
5726 		      op1 = fold_convert (TREE_TYPE (vectype), op1);
5727 		      op1 = vect_init_vector (stmt_info, op1,
5728 					      TREE_TYPE (vectype), NULL);
5729 		    }
5730 		}
5731             }
5732         }
5733     }
5734 
5735   /* Supportable by target?  */
5736   if (!optab)
5737     {
5738       if (dump_enabled_p ())
5739         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5740                          "no optab.\n");
5741       return false;
5742     }
5743   vec_mode = TYPE_MODE (vectype);
5744   icode = (int) optab_handler (optab, vec_mode);
5745   if (icode == CODE_FOR_nothing)
5746     {
5747       if (dump_enabled_p ())
5748         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5749                          "op not supported by target.\n");
5750       /* Check only during analysis.  */
5751       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5752 	  || (!vec_stmt
5753 	      && !vect_worthwhile_without_simd_p (vinfo, code)))
5754         return false;
5755       if (dump_enabled_p ())
5756         dump_printf_loc (MSG_NOTE, vect_location,
5757                          "proceeding using word mode.\n");
5758     }
5759 
5760   /* Worthwhile without SIMD support?  Check only during analysis.  */
5761   if (!vec_stmt
5762       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5763       && !vect_worthwhile_without_simd_p (vinfo, code))
5764     {
5765       if (dump_enabled_p ())
5766         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5767                          "not worthwhile without SIMD support.\n");
5768       return false;
5769     }
5770 
5771   if (!vec_stmt) /* transformation not required.  */
5772     {
5773       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5774       DUMP_VECT_SCOPE ("vectorizable_shift");
5775       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5776       return true;
5777     }
5778 
5779   /* Transform.  */
5780 
5781   if (dump_enabled_p ())
5782     dump_printf_loc (MSG_NOTE, vect_location,
5783                      "transform binary/unary operation.\n");
5784 
5785   /* Handle def.  */
5786   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5787 
5788   prev_stmt_info = NULL;
5789   for (j = 0; j < ncopies; j++)
5790     {
5791       /* Handle uses.  */
5792       if (j == 0)
5793         {
5794           if (scalar_shift_arg)
5795             {
5796               /* Vector shl and shr insn patterns can be defined with scalar
5797                  operand 2 (shift operand).  In this case, use constant or loop
5798                  invariant op1 directly, without extending it to vector mode
5799                  first.  */
5800               optab_op2_mode = insn_data[icode].operand[2].mode;
5801               if (!VECTOR_MODE_P (optab_op2_mode))
5802                 {
5803                   if (dump_enabled_p ())
5804                     dump_printf_loc (MSG_NOTE, vect_location,
5805                                      "operand 1 using scalar mode.\n");
5806                   vec_oprnd1 = op1;
5807                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5808                   vec_oprnds1.quick_push (vec_oprnd1);
5809                   if (slp_node)
5810                     {
5811                       /* Store vec_oprnd1 for every vector stmt to be created
5812                          for SLP_NODE.  We check during the analysis that all
5813                          the shift arguments are the same.
5814                          TODO: Allow different constants for different vector
5815                          stmts generated for an SLP instance.  */
5816                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5817                         vec_oprnds1.quick_push (vec_oprnd1);
5818                     }
5819                 }
5820             }
5821 
5822           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5823              (a special case for certain kind of vector shifts); otherwise,
5824              operand 1 should be of a vector type (the usual case).  */
5825           if (vec_oprnd1)
5826 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5827 			       slp_node);
5828           else
5829 	    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5830 			       slp_node);
5831         }
5832       else
5833 	vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5834 
5835       /* Arguments are ready.  Create the new vector stmt.  */
5836       stmt_vec_info new_stmt_info = NULL;
5837       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5838         {
5839           vop1 = vec_oprnds1[i];
5840 	  gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5841           new_temp = make_ssa_name (vec_dest, new_stmt);
5842           gimple_assign_set_lhs (new_stmt, new_temp);
5843 	  new_stmt_info
5844 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5845           if (slp_node)
5846 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5847         }
5848 
5849       if (slp_node)
5850         continue;
5851 
5852       if (j == 0)
5853 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5854       else
5855 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5856       prev_stmt_info = new_stmt_info;
5857     }
5858 
5859   vec_oprnds0.release ();
5860   vec_oprnds1.release ();
5861 
5862   return true;
5863 }
5864 
5865 
5866 /* Function vectorizable_operation.
5867 
5868    Check if STMT_INFO performs a binary, unary or ternary operation that can
5869    be vectorized.
5870    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5871    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5872    Return true if STMT_INFO is vectorizable in this way.  */
5873 
5874 static bool
5875 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5876 			stmt_vec_info *vec_stmt, slp_tree slp_node,
5877 			stmt_vector_for_cost *cost_vec)
5878 {
5879   tree vec_dest;
5880   tree scalar_dest;
5881   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5882   tree vectype;
5883   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5884   enum tree_code code, orig_code;
5885   machine_mode vec_mode;
5886   tree new_temp;
5887   int op_type;
5888   optab optab;
5889   bool target_support_p;
5890   enum vect_def_type dt[3]
5891     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5892   int ndts = 3;
5893   stmt_vec_info prev_stmt_info;
5894   poly_uint64 nunits_in;
5895   poly_uint64 nunits_out;
5896   tree vectype_out;
5897   int ncopies;
5898   int j, i;
5899   vec<tree> vec_oprnds0 = vNULL;
5900   vec<tree> vec_oprnds1 = vNULL;
5901   vec<tree> vec_oprnds2 = vNULL;
5902   tree vop0, vop1, vop2;
5903   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5904   vec_info *vinfo = stmt_info->vinfo;
5905 
5906   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5907     return false;
5908 
5909   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5910       && ! vec_stmt)
5911     return false;
5912 
5913   /* Is STMT a vectorizable binary/unary operation?   */
5914   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5915   if (!stmt)
5916     return false;
5917 
5918   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5919     return false;
5920 
5921   orig_code = code = gimple_assign_rhs_code (stmt);
5922 
5923   /* For pointer addition and subtraction, we should use the normal
5924      plus and minus for the vector operation.  */
5925   if (code == POINTER_PLUS_EXPR)
5926     code = PLUS_EXPR;
5927   if (code == POINTER_DIFF_EXPR)
5928     code = MINUS_EXPR;
5929 
5930   /* Support only unary or binary operations.  */
5931   op_type = TREE_CODE_LENGTH (code);
5932   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5933     {
5934       if (dump_enabled_p ())
5935         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5936                          "num. args = %d (not unary/binary/ternary op).\n",
5937                          op_type);
5938       return false;
5939     }
5940 
5941   scalar_dest = gimple_assign_lhs (stmt);
5942   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5943 
5944   /* Most operations cannot handle bit-precision types without extra
5945      truncations.  */
5946   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5947       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5948       /* Exception are bitwise binary operations.  */
5949       && code != BIT_IOR_EXPR
5950       && code != BIT_XOR_EXPR
5951       && code != BIT_AND_EXPR)
5952     {
5953       if (dump_enabled_p ())
5954         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5955                          "bit-precision arithmetic not supported.\n");
5956       return false;
5957     }
5958 
5959   op0 = gimple_assign_rhs1 (stmt);
5960   if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5961     {
5962       if (dump_enabled_p ())
5963         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5964                          "use not simple.\n");
5965       return false;
5966     }
5967   /* If op0 is an external or constant def use a vector type with
5968      the same size as the output vector type.  */
5969   if (!vectype)
5970     {
5971       /* For boolean type we cannot determine vectype by
5972 	 invariant value (don't know whether it is a vector
5973 	 of booleans or vector of integers).  We use output
5974 	 vectype because operations on boolean don't change
5975 	 type.  */
5976       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5977 	{
5978 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5979 	    {
5980 	      if (dump_enabled_p ())
5981 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5982 				 "not supported operation on bool value.\n");
5983 	      return false;
5984 	    }
5985 	  vectype = vectype_out;
5986 	}
5987       else
5988 	vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5989     }
5990   if (vec_stmt)
5991     gcc_assert (vectype);
5992   if (!vectype)
5993     {
5994       if (dump_enabled_p ())
5995 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5996 			 "no vectype for scalar type %T\n",
5997 			 TREE_TYPE (op0));
5998 
5999       return false;
6000     }
6001 
6002   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6003   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6004   if (maybe_ne (nunits_out, nunits_in))
6005     return false;
6006 
6007   if (op_type == binary_op || op_type == ternary_op)
6008     {
6009       op1 = gimple_assign_rhs2 (stmt);
6010       if (!vect_is_simple_use (op1, vinfo, &dt[1]))
6011 	{
6012 	  if (dump_enabled_p ())
6013 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6014                              "use not simple.\n");
6015 	  return false;
6016 	}
6017     }
6018   if (op_type == ternary_op)
6019     {
6020       op2 = gimple_assign_rhs3 (stmt);
6021       if (!vect_is_simple_use (op2, vinfo, &dt[2]))
6022 	{
6023 	  if (dump_enabled_p ())
6024 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6025                              "use not simple.\n");
6026 	  return false;
6027 	}
6028     }
6029 
6030   /* Multiple types in SLP are handled by creating the appropriate number of
6031      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6032      case of SLP.  */
6033   if (slp_node)
6034     ncopies = 1;
6035   else
6036     ncopies = vect_get_num_copies (loop_vinfo, vectype);
6037 
6038   gcc_assert (ncopies >= 1);
6039 
6040   /* Shifts are handled in vectorizable_shift ().  */
6041   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6042       || code == RROTATE_EXPR)
6043    return false;
6044 
6045   /* Supportable by target?  */
6046 
6047   vec_mode = TYPE_MODE (vectype);
6048   if (code == MULT_HIGHPART_EXPR)
6049     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6050   else
6051     {
6052       optab = optab_for_tree_code (code, vectype, optab_default);
6053       if (!optab)
6054 	{
6055           if (dump_enabled_p ())
6056             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6057                              "no optab.\n");
6058 	  return false;
6059 	}
6060       target_support_p = (optab_handler (optab, vec_mode)
6061 			  != CODE_FOR_nothing);
6062     }
6063 
6064   if (!target_support_p)
6065     {
6066       if (dump_enabled_p ())
6067 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6068                          "op not supported by target.\n");
6069       /* Check only during analysis.  */
6070       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6071 	  || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6072         return false;
6073       if (dump_enabled_p ())
6074 	dump_printf_loc (MSG_NOTE, vect_location,
6075                          "proceeding using word mode.\n");
6076     }
6077 
6078   /* Worthwhile without SIMD support?  Check only during analysis.  */
6079   if (!VECTOR_MODE_P (vec_mode)
6080       && !vec_stmt
6081       && !vect_worthwhile_without_simd_p (vinfo, code))
6082     {
6083       if (dump_enabled_p ())
6084         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6085                          "not worthwhile without SIMD support.\n");
6086       return false;
6087     }
6088 
6089   if (!vec_stmt) /* transformation not required.  */
6090     {
6091       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6092       DUMP_VECT_SCOPE ("vectorizable_operation");
6093       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6094       return true;
6095     }
6096 
6097   /* Transform.  */
6098 
6099   if (dump_enabled_p ())
6100     dump_printf_loc (MSG_NOTE, vect_location,
6101                      "transform binary/unary operation.\n");
6102 
6103   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6104      vectors with unsigned elements, but the result is signed.  So, we
6105      need to compute the MINUS_EXPR into vectype temporary and
6106      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
6107   tree vec_cvt_dest = NULL_TREE;
6108   if (orig_code == POINTER_DIFF_EXPR)
6109     {
6110       vec_dest = vect_create_destination_var (scalar_dest, vectype);
6111       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6112     }
6113   /* Handle def.  */
6114   else
6115     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6116 
6117   /* In case the vectorization factor (VF) is bigger than the number
6118      of elements that we can fit in a vectype (nunits), we have to generate
6119      more than one vector stmt - i.e - we need to "unroll" the
6120      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6121      from one copy of the vector stmt to the next, in the field
6122      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6123      stages to find the correct vector defs to be used when vectorizing
6124      stmts that use the defs of the current stmt.  The example below
6125      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6126      we need to create 4 vectorized stmts):
6127 
6128      before vectorization:
6129                                 RELATED_STMT    VEC_STMT
6130         S1:     x = memref      -               -
6131         S2:     z = x + 1       -               -
6132 
6133      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6134              there):
6135                                 RELATED_STMT    VEC_STMT
6136         VS1_0:  vx0 = memref0   VS1_1           -
6137         VS1_1:  vx1 = memref1   VS1_2           -
6138         VS1_2:  vx2 = memref2   VS1_3           -
6139         VS1_3:  vx3 = memref3   -               -
6140         S1:     x = load        -               VS1_0
6141         S2:     z = x + 1       -               -
6142 
6143      step2: vectorize stmt S2 (done here):
6144         To vectorize stmt S2 we first need to find the relevant vector
6145         def for the first operand 'x'.  This is, as usual, obtained from
6146         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6147         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
6148         relevant vector def 'vx0'.  Having found 'vx0' we can generate
6149         the vector stmt VS2_0, and as usual, record it in the
6150         STMT_VINFO_VEC_STMT of stmt S2.
6151         When creating the second copy (VS2_1), we obtain the relevant vector
6152         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6153         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
6154         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
6155         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6156         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
6157         chain of stmts and pointers:
6158                                 RELATED_STMT    VEC_STMT
6159         VS1_0:  vx0 = memref0   VS1_1           -
6160         VS1_1:  vx1 = memref1   VS1_2           -
6161         VS1_2:  vx2 = memref2   VS1_3           -
6162         VS1_3:  vx3 = memref3   -               -
6163         S1:     x = load        -               VS1_0
6164         VS2_0:  vz0 = vx0 + v1  VS2_1           -
6165         VS2_1:  vz1 = vx1 + v1  VS2_2           -
6166         VS2_2:  vz2 = vx2 + v1  VS2_3           -
6167         VS2_3:  vz3 = vx3 + v1  -               -
6168         S2:     z = x + 1       -               VS2_0  */
6169 
6170   prev_stmt_info = NULL;
6171   for (j = 0; j < ncopies; j++)
6172     {
6173       /* Handle uses.  */
6174       if (j == 0)
6175 	{
6176 	  if (op_type == binary_op)
6177 	    vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6178 			       slp_node);
6179 	  else if (op_type == ternary_op)
6180 	    {
6181 	      if (slp_node)
6182 		{
6183 		  auto_vec<tree> ops(3);
6184 		  ops.quick_push (op0);
6185 		  ops.quick_push (op1);
6186 		  ops.quick_push (op2);
6187 		  auto_vec<vec<tree> > vec_defs(3);
6188 		  vect_get_slp_defs (ops, slp_node, &vec_defs);
6189 		  vec_oprnds0 = vec_defs[0];
6190 		  vec_oprnds1 = vec_defs[1];
6191 		  vec_oprnds2 = vec_defs[2];
6192 		}
6193 	      else
6194 		{
6195 		  vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6196 				     &vec_oprnds1, NULL);
6197 		  vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6198 				     NULL, NULL);
6199 		}
6200 	    }
6201 	  else
6202 	    vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6203 			       slp_node);
6204 	}
6205       else
6206 	{
6207 	  vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6208 	  if (op_type == ternary_op)
6209 	    {
6210 	      tree vec_oprnd = vec_oprnds2.pop ();
6211 	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6212 							           vec_oprnd));
6213 	    }
6214 	}
6215 
6216       /* Arguments are ready.  Create the new vector stmt.  */
6217       stmt_vec_info new_stmt_info = NULL;
6218       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6219         {
6220 	  vop1 = ((op_type == binary_op || op_type == ternary_op)
6221 		  ? vec_oprnds1[i] : NULL_TREE);
6222 	  vop2 = ((op_type == ternary_op)
6223 		  ? vec_oprnds2[i] : NULL_TREE);
6224 	  gassign *new_stmt = gimple_build_assign (vec_dest, code,
6225 						   vop0, vop1, vop2);
6226 	  new_temp = make_ssa_name (vec_dest, new_stmt);
6227 	  gimple_assign_set_lhs (new_stmt, new_temp);
6228 	  new_stmt_info
6229 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6230 	  if (vec_cvt_dest)
6231 	    {
6232 	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6233 	      gassign *new_stmt
6234 		= gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6235 				       new_temp);
6236 	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6237 	      gimple_assign_set_lhs (new_stmt, new_temp);
6238 	      new_stmt_info
6239 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6240 	    }
6241           if (slp_node)
6242 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6243         }
6244 
6245       if (slp_node)
6246         continue;
6247 
6248       if (j == 0)
6249 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6250       else
6251 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6252       prev_stmt_info = new_stmt_info;
6253     }
6254 
6255   vec_oprnds0.release ();
6256   vec_oprnds1.release ();
6257   vec_oprnds2.release ();
6258 
6259   return true;
6260 }
6261 
6262 /* A helper function to ensure data reference DR_INFO's base alignment.  */
6263 
6264 static void
6265 ensure_base_align (dr_vec_info *dr_info)
6266 {
6267   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6268     return;
6269 
6270   if (dr_info->base_misaligned)
6271     {
6272       tree base_decl = dr_info->base_decl;
6273 
6274       // We should only be able to increase the alignment of a base object if
6275       // we know what its new alignment should be at compile time.
6276       unsigned HOST_WIDE_INT align_base_to =
6277 	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6278 
6279       if (decl_in_symtab_p (base_decl))
6280 	symtab_node::get (base_decl)->increase_alignment (align_base_to);
6281       else
6282 	{
6283 	  SET_DECL_ALIGN (base_decl, align_base_to);
6284           DECL_USER_ALIGN (base_decl) = 1;
6285 	}
6286       dr_info->base_misaligned = false;
6287     }
6288 }
6289 
6290 
6291 /* Function get_group_alias_ptr_type.
6292 
6293    Return the alias type for the group starting at FIRST_STMT_INFO.  */
6294 
6295 static tree
6296 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6297 {
6298   struct data_reference *first_dr, *next_dr;
6299 
6300   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6301   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6302   while (next_stmt_info)
6303     {
6304       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6305       if (get_alias_set (DR_REF (first_dr))
6306 	  != get_alias_set (DR_REF (next_dr)))
6307 	{
6308 	  if (dump_enabled_p ())
6309 	    dump_printf_loc (MSG_NOTE, vect_location,
6310 			     "conflicting alias set types.\n");
6311 	  return ptr_type_node;
6312 	}
6313       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6314     }
6315   return reference_alias_ptr_type (DR_REF (first_dr));
6316 }
6317 
6318 
6319 /* Function vectorizable_store.
6320 
6321    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6322    that can be vectorized.
6323    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6324    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6325    Return true if STMT_INFO is vectorizable in this way.  */
6326 
6327 static bool
6328 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6329 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
6330 		    stmt_vector_for_cost *cost_vec)
6331 {
6332   tree data_ref;
6333   tree op;
6334   tree vec_oprnd = NULL_TREE;
6335   tree elem_type;
6336   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6337   struct loop *loop = NULL;
6338   machine_mode vec_mode;
6339   tree dummy;
6340   enum dr_alignment_support alignment_support_scheme;
6341   enum vect_def_type rhs_dt = vect_unknown_def_type;
6342   enum vect_def_type mask_dt = vect_unknown_def_type;
6343   stmt_vec_info prev_stmt_info = NULL;
6344   tree dataref_ptr = NULL_TREE;
6345   tree dataref_offset = NULL_TREE;
6346   gimple *ptr_incr = NULL;
6347   int ncopies;
6348   int j;
6349   stmt_vec_info first_stmt_info;
6350   bool grouped_store;
6351   unsigned int group_size, i;
6352   vec<tree> oprnds = vNULL;
6353   vec<tree> result_chain = vNULL;
6354   tree offset = NULL_TREE;
6355   vec<tree> vec_oprnds = vNULL;
6356   bool slp = (slp_node != NULL);
6357   unsigned int vec_num;
6358   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6359   vec_info *vinfo = stmt_info->vinfo;
6360   tree aggr_type;
6361   gather_scatter_info gs_info;
6362   poly_uint64 vf;
6363   vec_load_store_type vls_type;
6364   tree ref_type;
6365 
6366   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6367     return false;
6368 
6369   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6370       && ! vec_stmt)
6371     return false;
6372 
6373   /* Is vectorizable store? */
6374 
6375   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6376   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6377     {
6378       tree scalar_dest = gimple_assign_lhs (assign);
6379       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6380 	  && is_pattern_stmt_p (stmt_info))
6381 	scalar_dest = TREE_OPERAND (scalar_dest, 0);
6382       if (TREE_CODE (scalar_dest) != ARRAY_REF
6383 	  && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6384 	  && TREE_CODE (scalar_dest) != INDIRECT_REF
6385 	  && TREE_CODE (scalar_dest) != COMPONENT_REF
6386 	  && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6387 	  && TREE_CODE (scalar_dest) != REALPART_EXPR
6388 	  && TREE_CODE (scalar_dest) != MEM_REF)
6389 	return false;
6390     }
6391   else
6392     {
6393       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6394       if (!call || !gimple_call_internal_p (call))
6395 	return false;
6396 
6397       internal_fn ifn = gimple_call_internal_fn (call);
6398       if (!internal_store_fn_p (ifn))
6399 	return false;
6400 
6401       if (slp_node != NULL)
6402 	{
6403 	  if (dump_enabled_p ())
6404 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6405 			     "SLP of masked stores not supported.\n");
6406 	  return false;
6407 	}
6408 
6409       int mask_index = internal_fn_mask_index (ifn);
6410       if (mask_index >= 0)
6411 	{
6412 	  mask = gimple_call_arg (call, mask_index);
6413 	  if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6414 					   &mask_vectype))
6415 	    return false;
6416 	}
6417     }
6418 
6419   op = vect_get_store_rhs (stmt_info);
6420 
6421   /* Cannot have hybrid store SLP -- that would mean storing to the
6422      same location twice.  */
6423   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6424 
6425   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6426   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6427 
6428   if (loop_vinfo)
6429     {
6430       loop = LOOP_VINFO_LOOP (loop_vinfo);
6431       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6432     }
6433   else
6434     vf = 1;
6435 
6436   /* Multiple types in SLP are handled by creating the appropriate number of
6437      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6438      case of SLP.  */
6439   if (slp)
6440     ncopies = 1;
6441   else
6442     ncopies = vect_get_num_copies (loop_vinfo, vectype);
6443 
6444   gcc_assert (ncopies >= 1);
6445 
6446   /* FORNOW.  This restriction should be relaxed.  */
6447   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6448     {
6449       if (dump_enabled_p ())
6450 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6451 			 "multiple types in nested loop.\n");
6452       return false;
6453     }
6454 
6455   if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6456     return false;
6457 
6458   elem_type = TREE_TYPE (vectype);
6459   vec_mode = TYPE_MODE (vectype);
6460 
6461   if (!STMT_VINFO_DATA_REF (stmt_info))
6462     return false;
6463 
6464   vect_memory_access_type memory_access_type;
6465   if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6466 			    &memory_access_type, &gs_info))
6467     return false;
6468 
6469   if (mask)
6470     {
6471       if (memory_access_type == VMAT_CONTIGUOUS)
6472 	{
6473 	  if (!VECTOR_MODE_P (vec_mode)
6474 	      || !can_vec_mask_load_store_p (vec_mode,
6475 					     TYPE_MODE (mask_vectype), false))
6476 	    return false;
6477 	}
6478       else if (memory_access_type != VMAT_LOAD_STORE_LANES
6479 	       && (memory_access_type != VMAT_GATHER_SCATTER
6480 		   || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
6481 	{
6482 	  if (dump_enabled_p ())
6483 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6484 			     "unsupported access type for masked store.\n");
6485 	  return false;
6486 	}
6487     }
6488   else
6489     {
6490       /* FORNOW. In some cases can vectorize even if data-type not supported
6491 	 (e.g. - array initialization with 0).  */
6492       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6493 	return false;
6494     }
6495 
6496   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6497   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6498 		   && memory_access_type != VMAT_GATHER_SCATTER
6499 		   && (slp || memory_access_type != VMAT_CONTIGUOUS));
6500   if (grouped_store)
6501     {
6502       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6503       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6504       group_size = DR_GROUP_SIZE (first_stmt_info);
6505     }
6506   else
6507     {
6508       first_stmt_info = stmt_info;
6509       first_dr_info = dr_info;
6510       group_size = vec_num = 1;
6511     }
6512 
6513   if (!vec_stmt) /* transformation not required.  */
6514     {
6515       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6516 
6517       if (loop_vinfo
6518 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6519 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6520 				  memory_access_type, &gs_info);
6521 
6522       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6523       vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6524 			     vls_type, slp_node, cost_vec);
6525       return true;
6526     }
6527   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6528 
6529   /* Transform.  */
6530 
6531   ensure_base_align (dr_info);
6532 
6533   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6534     {
6535       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6536       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6537       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6538       tree ptr, var, scale, vec_mask;
6539       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
6540       tree mask_halfvectype = mask_vectype;
6541       edge pe = loop_preheader_edge (loop);
6542       gimple_seq seq;
6543       basic_block new_bb;
6544       enum { NARROW, NONE, WIDEN } modifier;
6545       poly_uint64 scatter_off_nunits
6546 	= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6547 
6548       if (known_eq (nunits, scatter_off_nunits))
6549 	modifier = NONE;
6550       else if (known_eq (nunits * 2, scatter_off_nunits))
6551 	{
6552 	  modifier = WIDEN;
6553 
6554 	  /* Currently gathers and scatters are only supported for
6555 	     fixed-length vectors.  */
6556 	  unsigned int count = scatter_off_nunits.to_constant ();
6557 	  vec_perm_builder sel (count, count, 1);
6558 	  for (i = 0; i < (unsigned int) count; ++i)
6559 	    sel.quick_push (i | (count / 2));
6560 
6561 	  vec_perm_indices indices (sel, 1, count);
6562 	  perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6563 						  indices);
6564 	  gcc_assert (perm_mask != NULL_TREE);
6565 	}
6566       else if (known_eq (nunits, scatter_off_nunits * 2))
6567 	{
6568 	  modifier = NARROW;
6569 
6570 	  /* Currently gathers and scatters are only supported for
6571 	     fixed-length vectors.  */
6572 	  unsigned int count = nunits.to_constant ();
6573 	  vec_perm_builder sel (count, count, 1);
6574 	  for (i = 0; i < (unsigned int) count; ++i)
6575 	    sel.quick_push (i | (count / 2));
6576 
6577 	  vec_perm_indices indices (sel, 2, count);
6578 	  perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6579 	  gcc_assert (perm_mask != NULL_TREE);
6580 	  ncopies *= 2;
6581 
6582 	  if (mask)
6583 	    mask_halfvectype
6584 	      = build_same_sized_truth_vector_type (gs_info.offset_vectype);
6585 	}
6586       else
6587 	gcc_unreachable ();
6588 
6589       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6590       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6591       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6592       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6593       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6594       scaletype = TREE_VALUE (arglist);
6595 
6596       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6597 			   && TREE_CODE (rettype) == VOID_TYPE);
6598 
6599       ptr = fold_convert (ptrtype, gs_info.base);
6600       if (!is_gimple_min_invariant (ptr))
6601 	{
6602 	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6603 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6604 	  gcc_assert (!new_bb);
6605 	}
6606 
6607       if (mask == NULL_TREE)
6608 	{
6609 	  mask_arg = build_int_cst (masktype, -1);
6610 	  mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
6611 	}
6612 
6613       scale = build_int_cst (scaletype, gs_info.scale);
6614 
6615       prev_stmt_info = NULL;
6616       for (j = 0; j < ncopies; ++j)
6617 	{
6618 	  if (j == 0)
6619 	    {
6620 	      src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
6621 	      op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
6622 							      stmt_info);
6623 	      if (mask)
6624 		mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
6625 								   stmt_info);
6626 	    }
6627 	  else if (modifier != NONE && (j & 1))
6628 	    {
6629 	      if (modifier == WIDEN)
6630 		{
6631 		  src
6632 		    = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6633 								   vec_oprnd1);
6634 		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6635 					     stmt_info, gsi);
6636 		  if (mask)
6637 		    mask_op
6638 		      = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6639 								   vec_mask);
6640 		}
6641 	      else if (modifier == NARROW)
6642 		{
6643 		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6644 					      stmt_info, gsi);
6645 		  op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6646 								    vec_oprnd0);
6647 		}
6648 	      else
6649 		gcc_unreachable ();
6650 	    }
6651 	  else
6652 	    {
6653 	      src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6654 								 vec_oprnd1);
6655 	      op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6656 								vec_oprnd0);
6657 	      if (mask)
6658 		mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6659 								     vec_mask);
6660 	    }
6661 
6662 	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6663 	    {
6664 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6665 				    TYPE_VECTOR_SUBPARTS (srctype)));
6666 	      var = vect_get_new_ssa_name (srctype, vect_simple_var);
6667 	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6668 	      gassign *new_stmt
6669 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6670 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6671 	      src = var;
6672 	    }
6673 
6674 	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6675 	    {
6676 	      gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6677 				    TYPE_VECTOR_SUBPARTS (idxtype)));
6678 	      var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6679 	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6680 	      gassign *new_stmt
6681 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6682 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6683 	      op = var;
6684 	    }
6685 
6686 	  if (mask)
6687 	    {
6688 	      tree utype;
6689 	      mask_arg = mask_op;
6690 	      if (modifier == NARROW)
6691 		{
6692 		  var = vect_get_new_ssa_name (mask_halfvectype,
6693 					       vect_simple_var);
6694 		  gassign *new_stmt
6695 		    = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
6696 							: VEC_UNPACK_LO_EXPR,
6697 					   mask_op);
6698 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6699 		  mask_arg = var;
6700 		}
6701 	      tree optype = TREE_TYPE (mask_arg);
6702 	      if (TYPE_MODE (masktype) == TYPE_MODE (optype))
6703 		utype = masktype;
6704 	      else
6705 		utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
6706 	      var = vect_get_new_ssa_name (utype, vect_scalar_var);
6707 	      mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
6708 	      gassign *new_stmt
6709 		= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
6710 	      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6711 	      mask_arg = var;
6712 	      if (!useless_type_conversion_p (masktype, utype))
6713 		{
6714 		  gcc_assert (TYPE_PRECISION (utype)
6715 			      <= TYPE_PRECISION (masktype));
6716 		  var = vect_get_new_ssa_name (masktype, vect_scalar_var);
6717 		  new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
6718 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6719 		  mask_arg = var;
6720 		}
6721 	    }
6722 
6723 	  gcall *new_stmt
6724 	    = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
6725 	  stmt_vec_info new_stmt_info
6726 	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6727 
6728 	  if (prev_stmt_info == NULL)
6729 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6730 	  else
6731 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6732 	  prev_stmt_info = new_stmt_info;
6733 	}
6734       return true;
6735     }
6736 
6737   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6738     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6739 
6740   if (grouped_store)
6741     {
6742       /* FORNOW */
6743       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6744 
6745       /* We vectorize all the stmts of the interleaving group when we
6746 	 reach the last stmt in the group.  */
6747       if (DR_GROUP_STORE_COUNT (first_stmt_info)
6748 	  < DR_GROUP_SIZE (first_stmt_info)
6749 	  && !slp)
6750 	{
6751 	  *vec_stmt = NULL;
6752 	  return true;
6753 	}
6754 
6755       if (slp)
6756         {
6757           grouped_store = false;
6758           /* VEC_NUM is the number of vect stmts to be created for this
6759              group.  */
6760           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6761 	  first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6762 	  gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6763 		      == first_stmt_info);
6764 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6765 	  op = vect_get_store_rhs (first_stmt_info);
6766         }
6767       else
6768         /* VEC_NUM is the number of vect stmts to be created for this
6769            group.  */
6770 	vec_num = group_size;
6771 
6772       ref_type = get_group_alias_ptr_type (first_stmt_info);
6773     }
6774   else
6775     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6776 
6777   if (dump_enabled_p ())
6778     dump_printf_loc (MSG_NOTE, vect_location,
6779                      "transform store. ncopies = %d\n", ncopies);
6780 
6781   if (memory_access_type == VMAT_ELEMENTWISE
6782       || memory_access_type == VMAT_STRIDED_SLP)
6783     {
6784       gimple_stmt_iterator incr_gsi;
6785       bool insert_after;
6786       gimple *incr;
6787       tree offvar;
6788       tree ivstep;
6789       tree running_off;
6790       tree stride_base, stride_step, alias_off;
6791       tree vec_oprnd;
6792       unsigned int g;
6793       /* Checked by get_load_store_type.  */
6794       unsigned int const_nunits = nunits.to_constant ();
6795 
6796       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6797       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6798 
6799       stride_base
6800 	= fold_build_pointer_plus
6801 	    (DR_BASE_ADDRESS (first_dr_info->dr),
6802 	     size_binop (PLUS_EXPR,
6803 			 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6804 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6805       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6806 
6807       /* For a store with loop-invariant (but other than power-of-2)
6808          stride (i.e. not a grouped access) like so:
6809 
6810 	   for (i = 0; i < n; i += stride)
6811 	     array[i] = ...;
6812 
6813 	 we generate a new induction variable and new stores from
6814 	 the components of the (vectorized) rhs:
6815 
6816 	   for (j = 0; ; j += VF*stride)
6817 	     vectemp = ...;
6818 	     tmp1 = vectemp[0];
6819 	     array[j] = tmp1;
6820 	     tmp2 = vectemp[1];
6821 	     array[j + stride] = tmp2;
6822 	     ...
6823          */
6824 
6825       unsigned nstores = const_nunits;
6826       unsigned lnel = 1;
6827       tree ltype = elem_type;
6828       tree lvectype = vectype;
6829       if (slp)
6830 	{
6831 	  if (group_size < const_nunits
6832 	      && const_nunits % group_size == 0)
6833 	    {
6834 	      nstores = const_nunits / group_size;
6835 	      lnel = group_size;
6836 	      ltype = build_vector_type (elem_type, group_size);
6837 	      lvectype = vectype;
6838 
6839 	      /* First check if vec_extract optab doesn't support extraction
6840 		 of vector elts directly.  */
6841 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6842 	      machine_mode vmode;
6843 	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
6844 		  || !VECTOR_MODE_P (vmode)
6845 		  || !targetm.vector_mode_supported_p (vmode)
6846 		  || (convert_optab_handler (vec_extract_optab,
6847 					     TYPE_MODE (vectype), vmode)
6848 		      == CODE_FOR_nothing))
6849 		{
6850 		  /* Try to avoid emitting an extract of vector elements
6851 		     by performing the extracts using an integer type of the
6852 		     same size, extracting from a vector of those and then
6853 		     re-interpreting it as the original vector type if
6854 		     supported.  */
6855 		  unsigned lsize
6856 		    = group_size * GET_MODE_BITSIZE (elmode);
6857 		  unsigned int lnunits = const_nunits / group_size;
6858 		  /* If we can't construct such a vector fall back to
6859 		     element extracts from the original vector type and
6860 		     element size stores.  */
6861 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
6862 		      && mode_for_vector (elmode, lnunits).exists (&vmode)
6863 		      && VECTOR_MODE_P (vmode)
6864 		      && targetm.vector_mode_supported_p (vmode)
6865 		      && (convert_optab_handler (vec_extract_optab,
6866 						 vmode, elmode)
6867 			  != CODE_FOR_nothing))
6868 		    {
6869 		      nstores = lnunits;
6870 		      lnel = group_size;
6871 		      ltype = build_nonstandard_integer_type (lsize, 1);
6872 		      lvectype = build_vector_type (ltype, nstores);
6873 		    }
6874 		  /* Else fall back to vector extraction anyway.
6875 		     Fewer stores are more important than avoiding spilling
6876 		     of the vector we extract from.  Compared to the
6877 		     construction case in vectorizable_load no store-forwarding
6878 		     issue exists here for reasonable archs.  */
6879 		}
6880 	    }
6881 	  else if (group_size >= const_nunits
6882 		   && group_size % const_nunits == 0)
6883 	    {
6884 	      nstores = 1;
6885 	      lnel = const_nunits;
6886 	      ltype = vectype;
6887 	      lvectype = vectype;
6888 	    }
6889 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6890 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6891 	}
6892 
6893       ivstep = stride_step;
6894       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6895 			    build_int_cst (TREE_TYPE (ivstep), vf));
6896 
6897       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6898 
6899       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6900       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6901       create_iv (stride_base, ivstep, NULL,
6902 		 loop, &incr_gsi, insert_after,
6903 		 &offvar, NULL);
6904       incr = gsi_stmt (incr_gsi);
6905       loop_vinfo->add_stmt (incr);
6906 
6907       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6908 
6909       prev_stmt_info = NULL;
6910       alias_off = build_int_cst (ref_type, 0);
6911       stmt_vec_info next_stmt_info = first_stmt_info;
6912       for (g = 0; g < group_size; g++)
6913 	{
6914 	  running_off = offvar;
6915 	  if (g)
6916 	    {
6917 	      tree size = TYPE_SIZE_UNIT (ltype);
6918 	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6919 				      size);
6920 	      tree newoff = copy_ssa_name (running_off, NULL);
6921 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6922 					  running_off, pos);
6923 	      vect_finish_stmt_generation (stmt_info, incr, gsi);
6924 	      running_off = newoff;
6925 	    }
6926 	  unsigned int group_el = 0;
6927 	  unsigned HOST_WIDE_INT
6928 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6929 	  for (j = 0; j < ncopies; j++)
6930 	    {
6931 	      /* We've set op and dt above, from vect_get_store_rhs,
6932 		 and first_stmt_info == stmt_info.  */
6933 	      if (j == 0)
6934 		{
6935 		  if (slp)
6936 		    {
6937 		      vect_get_vec_defs (op, NULL_TREE, stmt_info,
6938 					 &vec_oprnds, NULL, slp_node);
6939 		      vec_oprnd = vec_oprnds[0];
6940 		    }
6941 		  else
6942 		    {
6943 		      op = vect_get_store_rhs (next_stmt_info);
6944 		      vec_oprnd = vect_get_vec_def_for_operand
6945 			(op, next_stmt_info);
6946 		    }
6947 		}
6948 	      else
6949 		{
6950 		  if (slp)
6951 		    vec_oprnd = vec_oprnds[j];
6952 		  else
6953 		    vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6954 								vec_oprnd);
6955 		}
6956 	      /* Pun the vector to extract from if necessary.  */
6957 	      if (lvectype != vectype)
6958 		{
6959 		  tree tem = make_ssa_name (lvectype);
6960 		  gimple *pun
6961 		    = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6962 							lvectype, vec_oprnd));
6963 		  vect_finish_stmt_generation (stmt_info, pun, gsi);
6964 		  vec_oprnd = tem;
6965 		}
6966 	      for (i = 0; i < nstores; i++)
6967 		{
6968 		  tree newref, newoff;
6969 		  gimple *incr, *assign;
6970 		  tree size = TYPE_SIZE (ltype);
6971 		  /* Extract the i'th component.  */
6972 		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6973 					  bitsize_int (i), size);
6974 		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6975 					   size, pos);
6976 
6977 		  elem = force_gimple_operand_gsi (gsi, elem, true,
6978 						   NULL_TREE, true,
6979 						   GSI_SAME_STMT);
6980 
6981 		  tree this_off = build_int_cst (TREE_TYPE (alias_off),
6982 						 group_el * elsz);
6983 		  newref = build2 (MEM_REF, ltype,
6984 				   running_off, this_off);
6985 		  vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6986 
6987 		  /* And store it to *running_off.  */
6988 		  assign = gimple_build_assign (newref, elem);
6989 		  stmt_vec_info assign_info
6990 		    = vect_finish_stmt_generation (stmt_info, assign, gsi);
6991 
6992 		  group_el += lnel;
6993 		  if (! slp
6994 		      || group_el == group_size)
6995 		    {
6996 		      newoff = copy_ssa_name (running_off, NULL);
6997 		      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6998 						  running_off, stride_step);
6999 		      vect_finish_stmt_generation (stmt_info, incr, gsi);
7000 
7001 		      running_off = newoff;
7002 		      group_el = 0;
7003 		    }
7004 		  if (g == group_size - 1
7005 		      && !slp)
7006 		    {
7007 		      if (j == 0 && i == 0)
7008 			STMT_VINFO_VEC_STMT (stmt_info)
7009 			    = *vec_stmt = assign_info;
7010 		      else
7011 			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
7012 		      prev_stmt_info = assign_info;
7013 		    }
7014 		}
7015 	    }
7016 	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7017 	  if (slp)
7018 	    break;
7019 	}
7020 
7021       vec_oprnds.release ();
7022       return true;
7023     }
7024 
7025   auto_vec<tree> dr_chain (group_size);
7026   oprnds.create (group_size);
7027 
7028   alignment_support_scheme
7029     = vect_supportable_dr_alignment (first_dr_info, false);
7030   gcc_assert (alignment_support_scheme);
7031   vec_loop_masks *loop_masks
7032     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7033        ? &LOOP_VINFO_MASKS (loop_vinfo)
7034        : NULL);
7035   /* Targets with store-lane instructions must not require explicit
7036      realignment.  vect_supportable_dr_alignment always returns either
7037      dr_aligned or dr_unaligned_supported for masked operations.  */
7038   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7039 	       && !mask
7040 	       && !loop_masks)
7041 	      || alignment_support_scheme == dr_aligned
7042 	      || alignment_support_scheme == dr_unaligned_supported);
7043 
7044   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7045       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7046     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7047 
7048   tree bump;
7049   tree vec_offset = NULL_TREE;
7050   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7051     {
7052       aggr_type = NULL_TREE;
7053       bump = NULL_TREE;
7054     }
7055   else if (memory_access_type == VMAT_GATHER_SCATTER)
7056     {
7057       aggr_type = elem_type;
7058       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7059 				       &bump, &vec_offset);
7060     }
7061   else
7062     {
7063       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7064 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7065       else
7066 	aggr_type = vectype;
7067       bump = vect_get_data_ptr_increment (dr_info, aggr_type,
7068 					  memory_access_type);
7069     }
7070 
7071   if (mask)
7072     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7073 
7074   /* In case the vectorization factor (VF) is bigger than the number
7075      of elements that we can fit in a vectype (nunits), we have to generate
7076      more than one vector stmt - i.e - we need to "unroll" the
7077      vector stmt by a factor VF/nunits.  For more details see documentation in
7078      vect_get_vec_def_for_copy_stmt.  */
7079 
7080   /* In case of interleaving (non-unit grouped access):
7081 
7082         S1:  &base + 2 = x2
7083         S2:  &base = x0
7084         S3:  &base + 1 = x1
7085         S4:  &base + 3 = x3
7086 
7087      We create vectorized stores starting from base address (the access of the
7088      first stmt in the chain (S2 in the above example), when the last store stmt
7089      of the chain (S4) is reached:
7090 
7091         VS1: &base = vx2
7092 	VS2: &base + vec_size*1 = vx0
7093 	VS3: &base + vec_size*2 = vx1
7094 	VS4: &base + vec_size*3 = vx3
7095 
7096      Then permutation statements are generated:
7097 
7098 	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7099 	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7100 	...
7101 
7102      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7103      (the order of the data-refs in the output of vect_permute_store_chain
7104      corresponds to the order of scalar stmts in the interleaving chain - see
7105      the documentation of vect_permute_store_chain()).
7106 
7107      In case of both multiple types and interleaving, above vector stores and
7108      permutation stmts are created for every copy.  The result vector stmts are
7109      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7110      STMT_VINFO_RELATED_STMT for the next copies.
7111   */
7112 
7113   prev_stmt_info = NULL;
7114   tree vec_mask = NULL_TREE;
7115   for (j = 0; j < ncopies; j++)
7116     {
7117       stmt_vec_info new_stmt_info;
7118       if (j == 0)
7119 	{
7120           if (slp)
7121             {
7122 	      /* Get vectorized arguments for SLP_NODE.  */
7123 	      vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
7124 				 NULL, slp_node);
7125 
7126               vec_oprnd = vec_oprnds[0];
7127             }
7128           else
7129             {
7130 	      /* For interleaved stores we collect vectorized defs for all the
7131 		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7132 		 used as an input to vect_permute_store_chain(), and OPRNDS as
7133 		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
7134 
7135 		 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7136 		 OPRNDS are of size 1.  */
7137 	      stmt_vec_info next_stmt_info = first_stmt_info;
7138 	      for (i = 0; i < group_size; i++)
7139 		{
7140 		  /* Since gaps are not supported for interleaved stores,
7141 		     DR_GROUP_SIZE is the exact number of stmts in the chain.
7142 		     Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
7143 		     that there is no interleaving, DR_GROUP_SIZE is 1,
7144 		     and only one iteration of the loop will be executed.  */
7145 		  op = vect_get_store_rhs (next_stmt_info);
7146 		  vec_oprnd = vect_get_vec_def_for_operand
7147 		    (op, next_stmt_info);
7148 		  dr_chain.quick_push (vec_oprnd);
7149 		  oprnds.quick_push (vec_oprnd);
7150 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7151 		}
7152 	      if (mask)
7153 		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
7154 							 mask_vectype);
7155 	    }
7156 
7157 	  /* We should have catched mismatched types earlier.  */
7158 	  gcc_assert (useless_type_conversion_p (vectype,
7159 						 TREE_TYPE (vec_oprnd)));
7160 	  bool simd_lane_access_p
7161 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7162 	  if (simd_lane_access_p
7163 	      && !loop_masks
7164 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7165 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7166 	      && integer_zerop (DR_OFFSET (first_dr_info->dr))
7167 	      && integer_zerop (DR_INIT (first_dr_info->dr))
7168 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
7169 					get_alias_set (TREE_TYPE (ref_type))))
7170 	    {
7171 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7172 	      dataref_offset = build_int_cst (ref_type, 0);
7173 	    }
7174 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7175 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
7176 					 &dataref_ptr, &vec_offset);
7177 	  else
7178 	    dataref_ptr
7179 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7180 					  simd_lane_access_p ? loop : NULL,
7181 					  offset, &dummy, gsi, &ptr_incr,
7182 					  simd_lane_access_p, NULL_TREE, bump);
7183 	}
7184       else
7185 	{
7186 	  /* For interleaved stores we created vectorized defs for all the
7187 	     defs stored in OPRNDS in the previous iteration (previous copy).
7188 	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
7189 	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7190 	     next copy.
7191 	     If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7192 	     OPRNDS are of size 1.  */
7193 	  for (i = 0; i < group_size; i++)
7194 	    {
7195 	      op = oprnds[i];
7196 	      vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7197 	      dr_chain[i] = vec_oprnd;
7198 	      oprnds[i] = vec_oprnd;
7199 	    }
7200 	  if (mask)
7201 	    vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7202 	  if (dataref_offset)
7203 	    dataref_offset
7204 	      = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7205 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7206 	    vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7207 	  else
7208 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7209 					   stmt_info, bump);
7210 	}
7211 
7212       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7213 	{
7214 	  tree vec_array;
7215 
7216 	  /* Get an array into which we can store the individual vectors.  */
7217 	  vec_array = create_vector_array (vectype, vec_num);
7218 
7219 	  /* Invalidate the current contents of VEC_ARRAY.  This should
7220 	     become an RTL clobber too, which prevents the vector registers
7221 	     from being upward-exposed.  */
7222 	  vect_clobber_variable (stmt_info, gsi, vec_array);
7223 
7224 	  /* Store the individual vectors into the array.  */
7225 	  for (i = 0; i < vec_num; i++)
7226 	    {
7227 	      vec_oprnd = dr_chain[i];
7228 	      write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7229 	    }
7230 
7231 	  tree final_mask = NULL;
7232 	  if (loop_masks)
7233 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7234 					     vectype, j);
7235 	  if (vec_mask)
7236 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7237 						  vec_mask, gsi);
7238 
7239 	  gcall *call;
7240 	  if (final_mask)
7241 	    {
7242 	      /* Emit:
7243 		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7244 				     VEC_ARRAY).  */
7245 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7246 	      tree alias_ptr = build_int_cst (ref_type, align);
7247 	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7248 						 dataref_ptr, alias_ptr,
7249 						 final_mask, vec_array);
7250 	    }
7251 	  else
7252 	    {
7253 	      /* Emit:
7254 		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
7255 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7256 	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7257 						 vec_array);
7258 	      gimple_call_set_lhs (call, data_ref);
7259 	    }
7260 	  gimple_call_set_nothrow (call, true);
7261 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7262 
7263 	  /* Record that VEC_ARRAY is now dead.  */
7264 	  vect_clobber_variable (stmt_info, gsi, vec_array);
7265 	}
7266       else
7267 	{
7268 	  new_stmt_info = NULL;
7269 	  if (grouped_store)
7270 	    {
7271 	      if (j == 0)
7272 		result_chain.create (group_size);
7273 	      /* Permute.  */
7274 	      vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7275 					&result_chain);
7276 	    }
7277 
7278 	  stmt_vec_info next_stmt_info = first_stmt_info;
7279 	  for (i = 0; i < vec_num; i++)
7280 	    {
7281 	      unsigned misalign;
7282 	      unsigned HOST_WIDE_INT align;
7283 
7284 	      tree final_mask = NULL_TREE;
7285 	      if (loop_masks)
7286 		final_mask = vect_get_loop_mask (gsi, loop_masks,
7287 						 vec_num * ncopies,
7288 						 vectype, vec_num * j + i);
7289 	      if (vec_mask)
7290 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7291 						      vec_mask, gsi);
7292 
7293 	      if (memory_access_type == VMAT_GATHER_SCATTER)
7294 		{
7295 		  tree scale = size_int (gs_info.scale);
7296 		  gcall *call;
7297 		  if (loop_masks)
7298 		    call = gimple_build_call_internal
7299 		      (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7300 		       scale, vec_oprnd, final_mask);
7301 		  else
7302 		    call = gimple_build_call_internal
7303 		      (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7304 		       scale, vec_oprnd);
7305 		  gimple_call_set_nothrow (call, true);
7306 		  new_stmt_info
7307 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
7308 		  break;
7309 		}
7310 
7311 	      if (i > 0)
7312 		/* Bump the vector pointer.  */
7313 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7314 					       stmt_info, bump);
7315 
7316 	      if (slp)
7317 		vec_oprnd = vec_oprnds[i];
7318 	      else if (grouped_store)
7319 		/* For grouped stores vectorized defs are interleaved in
7320 		   vect_permute_store_chain().  */
7321 		vec_oprnd = result_chain[i];
7322 
7323 	      align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7324 	      if (aligned_access_p (first_dr_info))
7325 		misalign = 0;
7326 	      else if (DR_MISALIGNMENT (first_dr_info) == -1)
7327 		{
7328 		  align = dr_alignment (vect_dr_behavior (first_dr_info));
7329 		  misalign = 0;
7330 		}
7331 	      else
7332 		misalign = DR_MISALIGNMENT (first_dr_info);
7333 	      if (dataref_offset == NULL_TREE
7334 		  && TREE_CODE (dataref_ptr) == SSA_NAME)
7335 		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7336 					misalign);
7337 
7338 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7339 		{
7340 		  tree perm_mask = perm_mask_for_reverse (vectype);
7341 		  tree perm_dest = vect_create_destination_var
7342 		    (vect_get_store_rhs (stmt_info), vectype);
7343 		  tree new_temp = make_ssa_name (perm_dest);
7344 
7345 		  /* Generate the permute statement.  */
7346 		  gimple *perm_stmt
7347 		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7348 					   vec_oprnd, perm_mask);
7349 		  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7350 
7351 		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7352 		  vec_oprnd = new_temp;
7353 		}
7354 
7355 	      /* Arguments are ready.  Create the new vector stmt.  */
7356 	      if (final_mask)
7357 		{
7358 		  align = least_bit_hwi (misalign | align);
7359 		  tree ptr = build_int_cst (ref_type, align);
7360 		  gcall *call
7361 		    = gimple_build_call_internal (IFN_MASK_STORE, 4,
7362 						  dataref_ptr, ptr,
7363 						  final_mask, vec_oprnd);
7364 		  gimple_call_set_nothrow (call, true);
7365 		  new_stmt_info
7366 		    = vect_finish_stmt_generation (stmt_info, call, gsi);
7367 		}
7368 	      else
7369 		{
7370 		  data_ref = fold_build2 (MEM_REF, vectype,
7371 					  dataref_ptr,
7372 					  dataref_offset
7373 					  ? dataref_offset
7374 					  : build_int_cst (ref_type, 0));
7375 		  if (aligned_access_p (first_dr_info))
7376 		    ;
7377 		  else if (DR_MISALIGNMENT (first_dr_info) == -1)
7378 		    TREE_TYPE (data_ref)
7379 		      = build_aligned_type (TREE_TYPE (data_ref),
7380 					    align * BITS_PER_UNIT);
7381 		  else
7382 		    TREE_TYPE (data_ref)
7383 		      = build_aligned_type (TREE_TYPE (data_ref),
7384 					    TYPE_ALIGN (elem_type));
7385 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7386 		  gassign *new_stmt
7387 		    = gimple_build_assign (data_ref, vec_oprnd);
7388 		  new_stmt_info
7389 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7390 		}
7391 
7392 	      if (slp)
7393 		continue;
7394 
7395 	      next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7396 	      if (!next_stmt_info)
7397 		break;
7398 	    }
7399 	}
7400       if (!slp)
7401 	{
7402 	  if (j == 0)
7403 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7404 	  else
7405 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7406 	  prev_stmt_info = new_stmt_info;
7407 	}
7408     }
7409 
7410   oprnds.release ();
7411   result_chain.release ();
7412   vec_oprnds.release ();
7413 
7414   return true;
7415 }
7416 
7417 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7418    VECTOR_CST mask.  No checks are made that the target platform supports the
7419    mask, so callers may wish to test can_vec_perm_const_p separately, or use
7420    vect_gen_perm_mask_checked.  */
7421 
7422 tree
7423 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7424 {
7425   tree mask_type;
7426 
7427   poly_uint64 nunits = sel.length ();
7428   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7429 
7430   mask_type = build_vector_type (ssizetype, nunits);
7431   return vec_perm_indices_to_tree (mask_type, sel);
7432 }
7433 
7434 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
7435    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
7436 
7437 tree
7438 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7439 {
7440   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7441   return vect_gen_perm_mask_any (vectype, sel);
7442 }
7443 
7444 /* Given a vector variable X and Y, that was generated for the scalar
7445    STMT_INFO, generate instructions to permute the vector elements of X and Y
7446    using permutation mask MASK_VEC, insert them at *GSI and return the
7447    permuted vector variable.  */
7448 
7449 static tree
7450 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7451 		      gimple_stmt_iterator *gsi)
7452 {
7453   tree vectype = TREE_TYPE (x);
7454   tree perm_dest, data_ref;
7455   gimple *perm_stmt;
7456 
7457   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7458   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
7459     perm_dest = vect_create_destination_var (scalar_dest, vectype);
7460   else
7461     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7462   data_ref = make_ssa_name (perm_dest);
7463 
7464   /* Generate the permute statement.  */
7465   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7466   vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7467 
7468   return data_ref;
7469 }
7470 
7471 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7472    inserting them on the loops preheader edge.  Returns true if we
7473    were successful in doing so (and thus STMT_INFO can be moved then),
7474    otherwise returns false.  */
7475 
7476 static bool
7477 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7478 {
7479   ssa_op_iter i;
7480   tree op;
7481   bool any = false;
7482 
7483   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7484     {
7485       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7486       if (!gimple_nop_p (def_stmt)
7487 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7488 	{
7489 	  /* Make sure we don't need to recurse.  While we could do
7490 	     so in simple cases when there are more complex use webs
7491 	     we don't have an easy way to preserve stmt order to fulfil
7492 	     dependencies within them.  */
7493 	  tree op2;
7494 	  ssa_op_iter i2;
7495 	  if (gimple_code (def_stmt) == GIMPLE_PHI)
7496 	    return false;
7497 	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7498 	    {
7499 	      gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7500 	      if (!gimple_nop_p (def_stmt2)
7501 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7502 		return false;
7503 	    }
7504 	  any = true;
7505 	}
7506     }
7507 
7508   if (!any)
7509     return true;
7510 
7511   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7512     {
7513       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7514       if (!gimple_nop_p (def_stmt)
7515 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7516 	{
7517 	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7518 	  gsi_remove (&gsi, false);
7519 	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7520 	}
7521     }
7522 
7523   return true;
7524 }
7525 
7526 /* vectorizable_load.
7527 
7528    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7529    that can be vectorized.
7530    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7531    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7532    Return true if STMT_INFO is vectorizable in this way.  */
7533 
7534 static bool
7535 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7536 		   stmt_vec_info *vec_stmt, slp_tree slp_node,
7537 		   slp_instance slp_node_instance,
7538 		   stmt_vector_for_cost *cost_vec)
7539 {
7540   tree scalar_dest;
7541   tree vec_dest = NULL;
7542   tree data_ref = NULL;
7543   stmt_vec_info prev_stmt_info;
7544   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7545   struct loop *loop = NULL;
7546   struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7547   bool nested_in_vect_loop = false;
7548   tree elem_type;
7549   tree new_temp;
7550   machine_mode mode;
7551   tree dummy;
7552   enum dr_alignment_support alignment_support_scheme;
7553   tree dataref_ptr = NULL_TREE;
7554   tree dataref_offset = NULL_TREE;
7555   gimple *ptr_incr = NULL;
7556   int ncopies;
7557   int i, j;
7558   unsigned int group_size;
7559   poly_uint64 group_gap_adj;
7560   tree msq = NULL_TREE, lsq;
7561   tree offset = NULL_TREE;
7562   tree byte_offset = NULL_TREE;
7563   tree realignment_token = NULL_TREE;
7564   gphi *phi = NULL;
7565   vec<tree> dr_chain = vNULL;
7566   bool grouped_load = false;
7567   stmt_vec_info first_stmt_info;
7568   stmt_vec_info first_stmt_info_for_drptr = NULL;
7569   bool compute_in_loop = false;
7570   struct loop *at_loop;
7571   int vec_num;
7572   bool slp = (slp_node != NULL);
7573   bool slp_perm = false;
7574   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7575   poly_uint64 vf;
7576   tree aggr_type;
7577   gather_scatter_info gs_info;
7578   vec_info *vinfo = stmt_info->vinfo;
7579   tree ref_type;
7580   enum vect_def_type mask_dt = vect_unknown_def_type;
7581 
7582   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7583     return false;
7584 
7585   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7586       && ! vec_stmt)
7587     return false;
7588 
7589   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7590   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7591     {
7592       scalar_dest = gimple_assign_lhs (assign);
7593       if (TREE_CODE (scalar_dest) != SSA_NAME)
7594 	return false;
7595 
7596       tree_code code = gimple_assign_rhs_code (assign);
7597       if (code != ARRAY_REF
7598 	  && code != BIT_FIELD_REF
7599 	  && code != INDIRECT_REF
7600 	  && code != COMPONENT_REF
7601 	  && code != IMAGPART_EXPR
7602 	  && code != REALPART_EXPR
7603 	  && code != MEM_REF
7604 	  && TREE_CODE_CLASS (code) != tcc_declaration)
7605 	return false;
7606     }
7607   else
7608     {
7609       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7610       if (!call || !gimple_call_internal_p (call))
7611 	return false;
7612 
7613       internal_fn ifn = gimple_call_internal_fn (call);
7614       if (!internal_load_fn_p (ifn))
7615 	return false;
7616 
7617       scalar_dest = gimple_call_lhs (call);
7618       if (!scalar_dest)
7619 	return false;
7620 
7621       if (slp_node != NULL)
7622 	{
7623 	  if (dump_enabled_p ())
7624 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7625 			     "SLP of masked loads not supported.\n");
7626 	  return false;
7627 	}
7628 
7629       int mask_index = internal_fn_mask_index (ifn);
7630       if (mask_index >= 0)
7631 	{
7632 	  mask = gimple_call_arg (call, mask_index);
7633 	  if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7634 					   &mask_vectype))
7635 	    return false;
7636 	}
7637     }
7638 
7639   if (!STMT_VINFO_DATA_REF (stmt_info))
7640     return false;
7641 
7642   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7643   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7644 
7645   if (loop_vinfo)
7646     {
7647       loop = LOOP_VINFO_LOOP (loop_vinfo);
7648       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7649       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7650     }
7651   else
7652     vf = 1;
7653 
7654   /* Multiple types in SLP are handled by creating the appropriate number of
7655      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7656      case of SLP.  */
7657   if (slp)
7658     ncopies = 1;
7659   else
7660     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7661 
7662   gcc_assert (ncopies >= 1);
7663 
7664   /* FORNOW. This restriction should be relaxed.  */
7665   if (nested_in_vect_loop && ncopies > 1)
7666     {
7667       if (dump_enabled_p ())
7668         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7669                          "multiple types in nested loop.\n");
7670       return false;
7671     }
7672 
7673   /* Invalidate assumptions made by dependence analysis when vectorization
7674      on the unrolled body effectively re-orders stmts.  */
7675   if (ncopies > 1
7676       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7677       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7678 		   STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7679     {
7680       if (dump_enabled_p ())
7681 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7682 			 "cannot perform implicit CSE when unrolling "
7683 			 "with negative dependence distance\n");
7684       return false;
7685     }
7686 
7687   elem_type = TREE_TYPE (vectype);
7688   mode = TYPE_MODE (vectype);
7689 
7690   /* FORNOW. In some cases can vectorize even if data-type not supported
7691     (e.g. - data copies).  */
7692   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7693     {
7694       if (dump_enabled_p ())
7695         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7696                          "Aligned load, but unsupported type.\n");
7697       return false;
7698     }
7699 
7700   /* Check if the load is a part of an interleaving chain.  */
7701   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7702     {
7703       grouped_load = true;
7704       /* FORNOW */
7705       gcc_assert (!nested_in_vect_loop);
7706       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7707 
7708       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7709       group_size = DR_GROUP_SIZE (first_stmt_info);
7710 
7711       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7712 	slp_perm = true;
7713 
7714       /* Invalidate assumptions made by dependence analysis when vectorization
7715 	 on the unrolled body effectively re-orders stmts.  */
7716       if (!PURE_SLP_STMT (stmt_info)
7717 	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7718 	  && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7719 		       STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7720 	{
7721 	  if (dump_enabled_p ())
7722 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7723 			     "cannot perform implicit CSE when performing "
7724 			     "group loads with negative dependence distance\n");
7725 	  return false;
7726 	}
7727     }
7728   else
7729     group_size = 1;
7730 
7731   vect_memory_access_type memory_access_type;
7732   if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7733 			    &memory_access_type, &gs_info))
7734     return false;
7735 
7736   if (mask)
7737     {
7738       if (memory_access_type == VMAT_CONTIGUOUS)
7739 	{
7740 	  machine_mode vec_mode = TYPE_MODE (vectype);
7741 	  if (!VECTOR_MODE_P (vec_mode)
7742 	      || !can_vec_mask_load_store_p (vec_mode,
7743 					     TYPE_MODE (mask_vectype), true))
7744 	    return false;
7745 	}
7746       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7747 	       && memory_access_type != VMAT_GATHER_SCATTER)
7748 	{
7749 	  if (dump_enabled_p ())
7750 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7751 			     "unsupported access type for masked load.\n");
7752 	  return false;
7753 	}
7754     }
7755 
7756   if (!vec_stmt) /* transformation not required.  */
7757     {
7758       if (!slp)
7759 	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7760 
7761       if (loop_vinfo
7762 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7763 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7764 				  memory_access_type, &gs_info);
7765 
7766       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7767       vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7768 			    slp_node_instance, slp_node, cost_vec);
7769       return true;
7770     }
7771 
7772   if (!slp)
7773     gcc_assert (memory_access_type
7774 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7775 
7776   if (dump_enabled_p ())
7777     dump_printf_loc (MSG_NOTE, vect_location,
7778                      "transform load. ncopies = %d\n", ncopies);
7779 
7780   /* Transform.  */
7781 
7782   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7783   ensure_base_align (dr_info);
7784 
7785   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7786     {
7787       vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7788       return true;
7789     }
7790 
7791   if (memory_access_type == VMAT_INVARIANT)
7792     {
7793       gcc_assert (!grouped_load && !mask && !bb_vinfo);
7794       /* If we have versioned for aliasing or the loop doesn't
7795 	 have any data dependencies that would preclude this,
7796 	 then we are sure this is a loop invariant load and
7797 	 thus we can insert it on the preheader edge.  */
7798       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7799 		      && !nested_in_vect_loop
7800 		      && hoist_defs_of_uses (stmt_info, loop));
7801       if (hoist_p)
7802 	{
7803 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7804 	  if (dump_enabled_p ())
7805 	    dump_printf_loc (MSG_NOTE, vect_location,
7806 			     "hoisting out of the vectorized loop: %G", stmt);
7807 	  scalar_dest = copy_ssa_name (scalar_dest);
7808 	  tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7809 	  gsi_insert_on_edge_immediate
7810 	    (loop_preheader_edge (loop),
7811 	     gimple_build_assign (scalar_dest, rhs));
7812 	}
7813       /* These copies are all equivalent, but currently the representation
7814 	 requires a separate STMT_VINFO_VEC_STMT for each one.  */
7815       prev_stmt_info = NULL;
7816       gimple_stmt_iterator gsi2 = *gsi;
7817       gsi_next (&gsi2);
7818       for (j = 0; j < ncopies; j++)
7819 	{
7820 	  stmt_vec_info new_stmt_info;
7821 	  if (hoist_p)
7822 	    {
7823 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
7824 					   vectype, NULL);
7825 	      gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7826 	      new_stmt_info = vinfo->add_stmt (new_stmt);
7827 	    }
7828 	  else
7829 	    {
7830 	      new_temp = vect_init_vector (stmt_info, scalar_dest,
7831 					   vectype, &gsi2);
7832 	      new_stmt_info = vinfo->lookup_def (new_temp);
7833 	    }
7834 	  if (slp)
7835 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7836 	  else if (j == 0)
7837 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7838 	  else
7839 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7840 	  prev_stmt_info = new_stmt_info;
7841 	}
7842       return true;
7843     }
7844 
7845   if (memory_access_type == VMAT_ELEMENTWISE
7846       || memory_access_type == VMAT_STRIDED_SLP)
7847     {
7848       gimple_stmt_iterator incr_gsi;
7849       bool insert_after;
7850       gimple *incr;
7851       tree offvar;
7852       tree ivstep;
7853       tree running_off;
7854       vec<constructor_elt, va_gc> *v = NULL;
7855       tree stride_base, stride_step, alias_off;
7856       /* Checked by get_load_store_type.  */
7857       unsigned int const_nunits = nunits.to_constant ();
7858       unsigned HOST_WIDE_INT cst_offset = 0;
7859 
7860       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7861       gcc_assert (!nested_in_vect_loop);
7862 
7863       if (grouped_load)
7864 	{
7865 	  first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7866 	  first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7867 	}
7868       else
7869 	{
7870 	  first_stmt_info = stmt_info;
7871 	  first_dr_info = dr_info;
7872 	}
7873       if (slp && grouped_load)
7874 	{
7875 	  group_size = DR_GROUP_SIZE (first_stmt_info);
7876 	  ref_type = get_group_alias_ptr_type (first_stmt_info);
7877 	}
7878       else
7879 	{
7880 	  if (grouped_load)
7881 	    cst_offset
7882 	      = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7883 		 * vect_get_place_in_interleaving_chain (stmt_info,
7884 							 first_stmt_info));
7885 	  group_size = 1;
7886 	  ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7887 	}
7888 
7889       stride_base
7890 	= fold_build_pointer_plus
7891 	    (DR_BASE_ADDRESS (first_dr_info->dr),
7892 	     size_binop (PLUS_EXPR,
7893 			 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7894 			 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7895       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7896 
7897       /* For a load with loop-invariant (but other than power-of-2)
7898          stride (i.e. not a grouped access) like so:
7899 
7900 	   for (i = 0; i < n; i += stride)
7901 	     ... = array[i];
7902 
7903 	 we generate a new induction variable and new accesses to
7904 	 form a new vector (or vectors, depending on ncopies):
7905 
7906 	   for (j = 0; ; j += VF*stride)
7907 	     tmp1 = array[j];
7908 	     tmp2 = array[j + stride];
7909 	     ...
7910 	     vectemp = {tmp1, tmp2, ...}
7911          */
7912 
7913       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7914 			    build_int_cst (TREE_TYPE (stride_step), vf));
7915 
7916       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7917 
7918       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7919       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7920       create_iv (stride_base, ivstep, NULL,
7921 		 loop, &incr_gsi, insert_after,
7922 		 &offvar, NULL);
7923       incr = gsi_stmt (incr_gsi);
7924       loop_vinfo->add_stmt (incr);
7925 
7926       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7927 
7928       prev_stmt_info = NULL;
7929       running_off = offvar;
7930       alias_off = build_int_cst (ref_type, 0);
7931       int nloads = const_nunits;
7932       int lnel = 1;
7933       tree ltype = TREE_TYPE (vectype);
7934       tree lvectype = vectype;
7935       auto_vec<tree> dr_chain;
7936       if (memory_access_type == VMAT_STRIDED_SLP)
7937 	{
7938 	  if (group_size < const_nunits)
7939 	    {
7940 	      /* First check if vec_init optab supports construction from
7941 		 vector elts directly.  */
7942 	      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7943 	      machine_mode vmode;
7944 	      if (mode_for_vector (elmode, group_size).exists (&vmode)
7945 		  && VECTOR_MODE_P (vmode)
7946 		  && targetm.vector_mode_supported_p (vmode)
7947 		  && (convert_optab_handler (vec_init_optab,
7948 					     TYPE_MODE (vectype), vmode)
7949 		      != CODE_FOR_nothing))
7950 		{
7951 		  nloads = const_nunits / group_size;
7952 		  lnel = group_size;
7953 		  ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7954 		}
7955 	      else
7956 		{
7957 		  /* Otherwise avoid emitting a constructor of vector elements
7958 		     by performing the loads using an integer type of the same
7959 		     size, constructing a vector of those and then
7960 		     re-interpreting it as the original vector type.
7961 		     This avoids a huge runtime penalty due to the general
7962 		     inability to perform store forwarding from smaller stores
7963 		     to a larger load.  */
7964 		  unsigned lsize
7965 		    = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7966 		  unsigned int lnunits = const_nunits / group_size;
7967 		  /* If we can't construct such a vector fall back to
7968 		     element loads of the original vector type.  */
7969 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
7970 		      && mode_for_vector (elmode, lnunits).exists (&vmode)
7971 		      && VECTOR_MODE_P (vmode)
7972 		      && targetm.vector_mode_supported_p (vmode)
7973 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
7974 			  != CODE_FOR_nothing))
7975 		    {
7976 		      nloads = lnunits;
7977 		      lnel = group_size;
7978 		      ltype = build_nonstandard_integer_type (lsize, 1);
7979 		      lvectype = build_vector_type (ltype, nloads);
7980 		    }
7981 		}
7982 	    }
7983 	  else
7984 	    {
7985 	      nloads = 1;
7986 	      lnel = const_nunits;
7987 	      ltype = vectype;
7988 	    }
7989 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7990 	}
7991       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
7992       else if (nloads == 1)
7993 	ltype = vectype;
7994 
7995       if (slp)
7996 	{
7997 	  /* For SLP permutation support we need to load the whole group,
7998 	     not only the number of vector stmts the permutation result
7999 	     fits in.  */
8000 	  if (slp_perm)
8001 	    {
8002 	      /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8003 		 variable VF.  */
8004 	      unsigned int const_vf = vf.to_constant ();
8005 	      ncopies = CEIL (group_size * const_vf, const_nunits);
8006 	      dr_chain.create (ncopies);
8007 	    }
8008 	  else
8009 	    ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8010 	}
8011       unsigned int group_el = 0;
8012       unsigned HOST_WIDE_INT
8013 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8014       for (j = 0; j < ncopies; j++)
8015 	{
8016 	  if (nloads > 1)
8017 	    vec_alloc (v, nloads);
8018 	  stmt_vec_info new_stmt_info = NULL;
8019 	  for (i = 0; i < nloads; i++)
8020 	    {
8021 	      tree this_off = build_int_cst (TREE_TYPE (alias_off),
8022 					     group_el * elsz + cst_offset);
8023 	      tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8024 	      vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8025 	      gassign *new_stmt
8026 		= gimple_build_assign (make_ssa_name (ltype), data_ref);
8027 	      new_stmt_info
8028 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8029 	      if (nloads > 1)
8030 		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8031 					gimple_assign_lhs (new_stmt));
8032 
8033 	      group_el += lnel;
8034 	      if (! slp
8035 		  || group_el == group_size)
8036 		{
8037 		  tree newoff = copy_ssa_name (running_off);
8038 		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8039 						      running_off, stride_step);
8040 		  vect_finish_stmt_generation (stmt_info, incr, gsi);
8041 
8042 		  running_off = newoff;
8043 		  group_el = 0;
8044 		}
8045 	    }
8046 	  if (nloads > 1)
8047 	    {
8048 	      tree vec_inv = build_constructor (lvectype, v);
8049 	      new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
8050 	      new_stmt_info = vinfo->lookup_def (new_temp);
8051 	      if (lvectype != vectype)
8052 		{
8053 		  gassign *new_stmt
8054 		    = gimple_build_assign (make_ssa_name (vectype),
8055 					   VIEW_CONVERT_EXPR,
8056 					   build1 (VIEW_CONVERT_EXPR,
8057 						   vectype, new_temp));
8058 		  new_stmt_info
8059 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8060 		}
8061 	    }
8062 
8063 	  if (slp)
8064 	    {
8065 	      if (slp_perm)
8066 		dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
8067 	      else
8068 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8069 	    }
8070 	  else
8071 	    {
8072 	      if (j == 0)
8073 		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8074 	      else
8075 		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8076 	      prev_stmt_info = new_stmt_info;
8077 	    }
8078 	}
8079       if (slp_perm)
8080 	{
8081 	  unsigned n_perms;
8082 	  vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8083 					slp_node_instance, false, &n_perms);
8084 	}
8085       return true;
8086     }
8087 
8088   if (memory_access_type == VMAT_GATHER_SCATTER
8089       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8090     grouped_load = false;
8091 
8092   if (grouped_load)
8093     {
8094       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8095       group_size = DR_GROUP_SIZE (first_stmt_info);
8096       /* For SLP vectorization we directly vectorize a subchain
8097          without permutation.  */
8098       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8099 	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8100       /* For BB vectorization always use the first stmt to base
8101 	 the data ref pointer on.  */
8102       if (bb_vinfo)
8103 	first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8104 
8105       /* Check if the chain of loads is already vectorized.  */
8106       if (STMT_VINFO_VEC_STMT (first_stmt_info)
8107 	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8108 	     ???  But we can only do so if there is exactly one
8109 	     as we have no way to get at the rest.  Leave the CSE
8110 	     opportunity alone.
8111 	     ???  With the group load eventually participating
8112 	     in multiple different permutations (having multiple
8113 	     slp nodes which refer to the same group) the CSE
8114 	     is even wrong code.  See PR56270.  */
8115 	  && !slp)
8116 	{
8117 	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8118 	  return true;
8119 	}
8120       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8121       group_gap_adj = 0;
8122 
8123       /* VEC_NUM is the number of vect stmts to be created for this group.  */
8124       if (slp)
8125 	{
8126 	  grouped_load = false;
8127 	  /* If an SLP permutation is from N elements to N elements,
8128 	     and if one vector holds a whole number of N, we can load
8129 	     the inputs to the permutation in the same way as an
8130 	     unpermuted sequence.  In other cases we need to load the
8131 	     whole group, not only the number of vector stmts the
8132 	     permutation result fits in.  */
8133 	  if (slp_perm
8134 	      && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
8135 		  || !multiple_p (nunits, group_size)))
8136 	    {
8137 	      /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8138 		 variable VF; see vect_transform_slp_perm_load.  */
8139 	      unsigned int const_vf = vf.to_constant ();
8140 	      unsigned int const_nunits = nunits.to_constant ();
8141 	      vec_num = CEIL (group_size * const_vf, const_nunits);
8142 	      group_gap_adj = vf * group_size - nunits * vec_num;
8143 	    }
8144 	  else
8145 	    {
8146 	      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8147 	      group_gap_adj
8148 		= group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
8149 	    }
8150     	}
8151       else
8152 	vec_num = group_size;
8153 
8154       ref_type = get_group_alias_ptr_type (first_stmt_info);
8155     }
8156   else
8157     {
8158       first_stmt_info = stmt_info;
8159       first_dr_info = dr_info;
8160       group_size = vec_num = 1;
8161       group_gap_adj = 0;
8162       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8163     }
8164 
8165   alignment_support_scheme
8166     = vect_supportable_dr_alignment (first_dr_info, false);
8167   gcc_assert (alignment_support_scheme);
8168   vec_loop_masks *loop_masks
8169     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8170        ? &LOOP_VINFO_MASKS (loop_vinfo)
8171        : NULL);
8172   /* Targets with store-lane instructions must not require explicit
8173      realignment.  vect_supportable_dr_alignment always returns either
8174      dr_aligned or dr_unaligned_supported for masked operations.  */
8175   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8176 	       && !mask
8177 	       && !loop_masks)
8178 	      || alignment_support_scheme == dr_aligned
8179 	      || alignment_support_scheme == dr_unaligned_supported);
8180 
8181   /* In case the vectorization factor (VF) is bigger than the number
8182      of elements that we can fit in a vectype (nunits), we have to generate
8183      more than one vector stmt - i.e - we need to "unroll" the
8184      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
8185      from one copy of the vector stmt to the next, in the field
8186      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
8187      stages to find the correct vector defs to be used when vectorizing
8188      stmts that use the defs of the current stmt.  The example below
8189      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8190      need to create 4 vectorized stmts):
8191 
8192      before vectorization:
8193                                 RELATED_STMT    VEC_STMT
8194         S1:     x = memref      -               -
8195         S2:     z = x + 1       -               -
8196 
8197      step 1: vectorize stmt S1:
8198         We first create the vector stmt VS1_0, and, as usual, record a
8199         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8200         Next, we create the vector stmt VS1_1, and record a pointer to
8201         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8202         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
8203         stmts and pointers:
8204                                 RELATED_STMT    VEC_STMT
8205         VS1_0:  vx0 = memref0   VS1_1           -
8206         VS1_1:  vx1 = memref1   VS1_2           -
8207         VS1_2:  vx2 = memref2   VS1_3           -
8208         VS1_3:  vx3 = memref3   -               -
8209         S1:     x = load        -               VS1_0
8210         S2:     z = x + 1       -               -
8211 
8212      See in documentation in vect_get_vec_def_for_stmt_copy for how the
8213      information we recorded in RELATED_STMT field is used to vectorize
8214      stmt S2.  */
8215 
8216   /* In case of interleaving (non-unit grouped access):
8217 
8218      S1:  x2 = &base + 2
8219      S2:  x0 = &base
8220      S3:  x1 = &base + 1
8221      S4:  x3 = &base + 3
8222 
8223      Vectorized loads are created in the order of memory accesses
8224      starting from the access of the first stmt of the chain:
8225 
8226      VS1: vx0 = &base
8227      VS2: vx1 = &base + vec_size*1
8228      VS3: vx3 = &base + vec_size*2
8229      VS4: vx4 = &base + vec_size*3
8230 
8231      Then permutation statements are generated:
8232 
8233      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8234      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8235        ...
8236 
8237      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8238      (the order of the data-refs in the output of vect_permute_load_chain
8239      corresponds to the order of scalar stmts in the interleaving chain - see
8240      the documentation of vect_permute_load_chain()).
8241      The generation of permutation stmts and recording them in
8242      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8243 
8244      In case of both multiple types and interleaving, the vector loads and
8245      permutation stmts above are created for every copy.  The result vector
8246      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8247      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
8248 
8249   /* If the data reference is aligned (dr_aligned) or potentially unaligned
8250      on a target that supports unaligned accesses (dr_unaligned_supported)
8251      we generate the following code:
8252          p = initial_addr;
8253          indx = 0;
8254          loop {
8255 	   p = p + indx * vectype_size;
8256            vec_dest = *(p);
8257            indx = indx + 1;
8258          }
8259 
8260      Otherwise, the data reference is potentially unaligned on a target that
8261      does not support unaligned accesses (dr_explicit_realign_optimized) -
8262      then generate the following code, in which the data in each iteration is
8263      obtained by two vector loads, one from the previous iteration, and one
8264      from the current iteration:
8265          p1 = initial_addr;
8266          msq_init = *(floor(p1))
8267          p2 = initial_addr + VS - 1;
8268          realignment_token = call target_builtin;
8269          indx = 0;
8270          loop {
8271            p2 = p2 + indx * vectype_size
8272            lsq = *(floor(p2))
8273            vec_dest = realign_load (msq, lsq, realignment_token)
8274            indx = indx + 1;
8275            msq = lsq;
8276          }   */
8277 
8278   /* If the misalignment remains the same throughout the execution of the
8279      loop, we can create the init_addr and permutation mask at the loop
8280      preheader.  Otherwise, it needs to be created inside the loop.
8281      This can only occur when vectorizing memory accesses in the inner-loop
8282      nested within an outer-loop that is being vectorized.  */
8283 
8284   if (nested_in_vect_loop
8285       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8286 		      GET_MODE_SIZE (TYPE_MODE (vectype))))
8287     {
8288       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8289       compute_in_loop = true;
8290     }
8291 
8292   bool diff_first_stmt_info
8293     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
8294 
8295   if ((alignment_support_scheme == dr_explicit_realign_optimized
8296        || alignment_support_scheme == dr_explicit_realign)
8297       && !compute_in_loop)
8298     {
8299       /* If we have different first_stmt_info, we can't set up realignment
8300 	 here, since we can't guarantee first_stmt_info DR has been
8301 	 initialized yet, use first_stmt_info_for_drptr DR by bumping the
8302 	 distance from first_stmt_info DR instead as below.  */
8303       if (!diff_first_stmt_info)
8304 	msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8305 				      alignment_support_scheme, NULL_TREE,
8306 				      &at_loop);
8307       if (alignment_support_scheme == dr_explicit_realign_optimized)
8308 	{
8309 	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8310 	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8311 				    size_one_node);
8312 	  gcc_assert (!first_stmt_info_for_drptr);
8313 	}
8314     }
8315   else
8316     at_loop = loop;
8317 
8318   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8319     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8320 
8321   tree bump;
8322   tree vec_offset = NULL_TREE;
8323   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8324     {
8325       aggr_type = NULL_TREE;
8326       bump = NULL_TREE;
8327     }
8328   else if (memory_access_type == VMAT_GATHER_SCATTER)
8329     {
8330       aggr_type = elem_type;
8331       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8332 				       &bump, &vec_offset);
8333     }
8334   else
8335     {
8336       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8337 	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8338       else
8339 	aggr_type = vectype;
8340       bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8341 					  memory_access_type);
8342     }
8343 
8344   tree vec_mask = NULL_TREE;
8345   prev_stmt_info = NULL;
8346   poly_uint64 group_elt = 0;
8347   for (j = 0; j < ncopies; j++)
8348     {
8349       stmt_vec_info new_stmt_info = NULL;
8350       /* 1. Create the vector or array pointer update chain.  */
8351       if (j == 0)
8352 	{
8353 	  bool simd_lane_access_p
8354 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8355 	  if (simd_lane_access_p
8356 	      && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8357 	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8358 	      && integer_zerop (DR_OFFSET (first_dr_info->dr))
8359 	      && integer_zerop (DR_INIT (first_dr_info->dr))
8360 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
8361 					get_alias_set (TREE_TYPE (ref_type)))
8362 	      && (alignment_support_scheme == dr_aligned
8363 		  || alignment_support_scheme == dr_unaligned_supported))
8364 	    {
8365 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8366 	      dataref_offset = build_int_cst (ref_type, 0);
8367 	    }
8368 	  else if (diff_first_stmt_info)
8369 	    {
8370 	      dataref_ptr
8371 		= vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8372 					    aggr_type, at_loop, offset, &dummy,
8373 					    gsi, &ptr_incr, simd_lane_access_p,
8374 					    byte_offset, bump);
8375 	      /* Adjust the pointer by the difference to first_stmt.  */
8376 	      data_reference_p ptrdr
8377 		= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8378 	      tree diff
8379 		= fold_convert (sizetype,
8380 				size_binop (MINUS_EXPR,
8381 					    DR_INIT (first_dr_info->dr),
8382 					    DR_INIT (ptrdr)));
8383 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8384 					     stmt_info, diff);
8385 	      if (alignment_support_scheme == dr_explicit_realign)
8386 		{
8387 		  msq = vect_setup_realignment (first_stmt_info_for_drptr, gsi,
8388 						&realignment_token,
8389 						alignment_support_scheme,
8390 						dataref_ptr, &at_loop);
8391 		  gcc_assert (!compute_in_loop);
8392 		}
8393 	    }
8394 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8395 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8396 					 &dataref_ptr, &vec_offset);
8397 	  else
8398 	    dataref_ptr
8399 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8400 					  offset, &dummy, gsi, &ptr_incr,
8401 					  simd_lane_access_p,
8402 					  byte_offset, bump);
8403 	  if (mask)
8404 	    vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8405 						     mask_vectype);
8406 	}
8407       else
8408 	{
8409 	  if (dataref_offset)
8410 	    dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8411 					      bump);
8412 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8413 	    vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8414 	  else
8415 	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8416 					   stmt_info, bump);
8417 	  if (mask)
8418 	    vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8419 	}
8420 
8421       if (grouped_load || slp_perm)
8422 	dr_chain.create (vec_num);
8423 
8424       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8425 	{
8426 	  tree vec_array;
8427 
8428 	  vec_array = create_vector_array (vectype, vec_num);
8429 
8430 	  tree final_mask = NULL_TREE;
8431 	  if (loop_masks)
8432 	    final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8433 					     vectype, j);
8434 	  if (vec_mask)
8435 	    final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8436 						  vec_mask, gsi);
8437 
8438 	  gcall *call;
8439 	  if (final_mask)
8440 	    {
8441 	      /* Emit:
8442 		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8443 		                                VEC_MASK).  */
8444 	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8445 	      tree alias_ptr = build_int_cst (ref_type, align);
8446 	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8447 						 dataref_ptr, alias_ptr,
8448 						 final_mask);
8449 	    }
8450 	  else
8451 	    {
8452 	      /* Emit:
8453 		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
8454 	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8455 	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8456 	    }
8457 	  gimple_call_set_lhs (call, vec_array);
8458 	  gimple_call_set_nothrow (call, true);
8459 	  new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8460 
8461 	  /* Extract each vector into an SSA_NAME.  */
8462 	  for (i = 0; i < vec_num; i++)
8463 	    {
8464 	      new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8465 					    vec_array, i);
8466 	      dr_chain.quick_push (new_temp);
8467 	    }
8468 
8469 	  /* Record the mapping between SSA_NAMEs and statements.  */
8470 	  vect_record_grouped_load_vectors (stmt_info, dr_chain);
8471 
8472 	  /* Record that VEC_ARRAY is now dead.  */
8473 	  vect_clobber_variable (stmt_info, gsi, vec_array);
8474 	}
8475       else
8476 	{
8477 	  for (i = 0; i < vec_num; i++)
8478 	    {
8479 	      tree final_mask = NULL_TREE;
8480 	      if (loop_masks
8481 		  && memory_access_type != VMAT_INVARIANT)
8482 		final_mask = vect_get_loop_mask (gsi, loop_masks,
8483 						 vec_num * ncopies,
8484 						 vectype, vec_num * j + i);
8485 	      if (vec_mask)
8486 		final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8487 						      vec_mask, gsi);
8488 
8489 	      if (i > 0)
8490 		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8491 					       stmt_info, bump);
8492 
8493 	      /* 2. Create the vector-load in the loop.  */
8494 	      gimple *new_stmt = NULL;
8495 	      switch (alignment_support_scheme)
8496 		{
8497 		case dr_aligned:
8498 		case dr_unaligned_supported:
8499 		  {
8500 		    unsigned int misalign;
8501 		    unsigned HOST_WIDE_INT align;
8502 
8503 		    if (memory_access_type == VMAT_GATHER_SCATTER)
8504 		      {
8505 			tree scale = size_int (gs_info.scale);
8506 			gcall *call;
8507 			if (loop_masks)
8508 			  call = gimple_build_call_internal
8509 			    (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8510 			     vec_offset, scale, final_mask);
8511 			else
8512 			  call = gimple_build_call_internal
8513 			    (IFN_GATHER_LOAD, 3, dataref_ptr,
8514 			     vec_offset, scale);
8515 			gimple_call_set_nothrow (call, true);
8516 			new_stmt = call;
8517 			data_ref = NULL_TREE;
8518 			break;
8519 		      }
8520 
8521 		    align =
8522 		      known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8523 		    if (alignment_support_scheme == dr_aligned)
8524 		      {
8525 			gcc_assert (aligned_access_p (first_dr_info));
8526 			misalign = 0;
8527 		      }
8528 		    else if (DR_MISALIGNMENT (first_dr_info) == -1)
8529 		      {
8530 			align = dr_alignment
8531 			  (vect_dr_behavior (first_dr_info));
8532 			misalign = 0;
8533 		      }
8534 		    else
8535 		      misalign = DR_MISALIGNMENT (first_dr_info);
8536 		    if (dataref_offset == NULL_TREE
8537 			&& TREE_CODE (dataref_ptr) == SSA_NAME)
8538 		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8539 					      align, misalign);
8540 
8541 		    if (final_mask)
8542 		      {
8543 			align = least_bit_hwi (misalign | align);
8544 			tree ptr = build_int_cst (ref_type, align);
8545 			gcall *call
8546 			  = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8547 							dataref_ptr, ptr,
8548 							final_mask);
8549 			gimple_call_set_nothrow (call, true);
8550 			new_stmt = call;
8551 			data_ref = NULL_TREE;
8552 		      }
8553 		    else
8554 		      {
8555 			data_ref
8556 			  = fold_build2 (MEM_REF, vectype, dataref_ptr,
8557 					 dataref_offset
8558 					 ? dataref_offset
8559 					 : build_int_cst (ref_type, 0));
8560 			if (alignment_support_scheme == dr_aligned)
8561 			  ;
8562 			else if (DR_MISALIGNMENT (first_dr_info) == -1)
8563 			  TREE_TYPE (data_ref)
8564 			    = build_aligned_type (TREE_TYPE (data_ref),
8565 						  align * BITS_PER_UNIT);
8566 			else
8567 			  TREE_TYPE (data_ref)
8568 			    = build_aligned_type (TREE_TYPE (data_ref),
8569 						  TYPE_ALIGN (elem_type));
8570 		      }
8571 		    break;
8572 		  }
8573 		case dr_explicit_realign:
8574 		  {
8575 		    tree ptr, bump;
8576 
8577 		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8578 
8579 		    if (compute_in_loop)
8580 		      msq = vect_setup_realignment (first_stmt_info, gsi,
8581 						    &realignment_token,
8582 						    dr_explicit_realign,
8583 						    dataref_ptr, NULL);
8584 
8585 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8586 		      ptr = copy_ssa_name (dataref_ptr);
8587 		    else
8588 		      ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8589 		    // For explicit realign the target alignment should be
8590 		    // known at compile time.
8591 		    unsigned HOST_WIDE_INT align =
8592 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8593 		    new_stmt = gimple_build_assign
8594 				 (ptr, BIT_AND_EXPR, dataref_ptr,
8595 				  build_int_cst
8596 				  (TREE_TYPE (dataref_ptr),
8597 				   -(HOST_WIDE_INT) align));
8598 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8599 		    data_ref
8600 		      = build2 (MEM_REF, vectype, ptr,
8601 				build_int_cst (ref_type, 0));
8602 		    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8603 		    vec_dest = vect_create_destination_var (scalar_dest,
8604 							    vectype);
8605 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
8606 		    new_temp = make_ssa_name (vec_dest, new_stmt);
8607 		    gimple_assign_set_lhs (new_stmt, new_temp);
8608 		    gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8609 		    gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8610 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8611 		    msq = new_temp;
8612 
8613 		    bump = size_binop (MULT_EXPR, vs,
8614 				       TYPE_SIZE_UNIT (elem_type));
8615 		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
8616 		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8617 					   stmt_info, bump);
8618 		    new_stmt = gimple_build_assign
8619 				 (NULL_TREE, BIT_AND_EXPR, ptr,
8620 				  build_int_cst
8621 				  (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8622 		    ptr = copy_ssa_name (ptr, new_stmt);
8623 		    gimple_assign_set_lhs (new_stmt, ptr);
8624 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8625 		    data_ref
8626 		      = build2 (MEM_REF, vectype, ptr,
8627 				build_int_cst (ref_type, 0));
8628 		    break;
8629 		  }
8630 		case dr_explicit_realign_optimized:
8631 		  {
8632 		    if (TREE_CODE (dataref_ptr) == SSA_NAME)
8633 		      new_temp = copy_ssa_name (dataref_ptr);
8634 		    else
8635 		      new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8636 		    // We should only be doing this if we know the target
8637 		    // alignment at compile time.
8638 		    unsigned HOST_WIDE_INT align =
8639 		      DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8640 		    new_stmt = gimple_build_assign
8641 		      (new_temp, BIT_AND_EXPR, dataref_ptr,
8642 		       build_int_cst (TREE_TYPE (dataref_ptr),
8643 				     -(HOST_WIDE_INT) align));
8644 		    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8645 		    data_ref
8646 		      = build2 (MEM_REF, vectype, new_temp,
8647 				build_int_cst (ref_type, 0));
8648 		    break;
8649 		  }
8650 		default:
8651 		  gcc_unreachable ();
8652 		}
8653 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
8654 	      /* DATA_REF is null if we've already built the statement.  */
8655 	      if (data_ref)
8656 		{
8657 		  vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8658 		  new_stmt = gimple_build_assign (vec_dest, data_ref);
8659 		}
8660 	      new_temp = make_ssa_name (vec_dest, new_stmt);
8661 	      gimple_set_lhs (new_stmt, new_temp);
8662 	      new_stmt_info
8663 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8664 
8665 	      /* 3. Handle explicit realignment if necessary/supported.
8666 		 Create in loop:
8667 		   vec_dest = realign_load (msq, lsq, realignment_token)  */
8668 	      if (alignment_support_scheme == dr_explicit_realign_optimized
8669 		  || alignment_support_scheme == dr_explicit_realign)
8670 		{
8671 		  lsq = gimple_assign_lhs (new_stmt);
8672 		  if (!realignment_token)
8673 		    realignment_token = dataref_ptr;
8674 		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
8675 		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8676 						  msq, lsq, realignment_token);
8677 		  new_temp = make_ssa_name (vec_dest, new_stmt);
8678 		  gimple_assign_set_lhs (new_stmt, new_temp);
8679 		  new_stmt_info
8680 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8681 
8682 		  if (alignment_support_scheme == dr_explicit_realign_optimized)
8683 		    {
8684 		      gcc_assert (phi);
8685 		      if (i == vec_num - 1 && j == ncopies - 1)
8686 			add_phi_arg (phi, lsq,
8687 				     loop_latch_edge (containing_loop),
8688 				     UNKNOWN_LOCATION);
8689 		      msq = lsq;
8690 		    }
8691 		}
8692 
8693 	      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8694 		{
8695 		  tree perm_mask = perm_mask_for_reverse (vectype);
8696 		  new_temp = permute_vec_elements (new_temp, new_temp,
8697 						   perm_mask, stmt_info, gsi);
8698 		  new_stmt_info = vinfo->lookup_def (new_temp);
8699 		}
8700 
8701 	      /* Collect vector loads and later create their permutation in
8702 		 vect_transform_grouped_load ().  */
8703 	      if (grouped_load || slp_perm)
8704 		dr_chain.quick_push (new_temp);
8705 
8706 	      /* Store vector loads in the corresponding SLP_NODE.  */
8707 	      if (slp && !slp_perm)
8708 		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8709 
8710 	      /* With SLP permutation we load the gaps as well, without
8711 	         we need to skip the gaps after we manage to fully load
8712 		 all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
8713 	      group_elt += nunits;
8714 	      if (maybe_ne (group_gap_adj, 0U)
8715 		  && !slp_perm
8716 		  && known_eq (group_elt, group_size - group_gap_adj))
8717 		{
8718 		  poly_wide_int bump_val
8719 		    = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8720 		       * group_gap_adj);
8721 		  tree bump = wide_int_to_tree (sizetype, bump_val);
8722 		  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8723 						 stmt_info, bump);
8724 		  group_elt = 0;
8725 		}
8726 	    }
8727 	  /* Bump the vector pointer to account for a gap or for excess
8728 	     elements loaded for a permuted SLP load.  */
8729 	  if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8730 	    {
8731 	      poly_wide_int bump_val
8732 		= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8733 		   * group_gap_adj);
8734 	      tree bump = wide_int_to_tree (sizetype, bump_val);
8735 	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8736 					     stmt_info, bump);
8737 	    }
8738 	}
8739 
8740       if (slp && !slp_perm)
8741 	continue;
8742 
8743       if (slp_perm)
8744         {
8745 	  unsigned n_perms;
8746           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8747                                              slp_node_instance, false,
8748 					     &n_perms))
8749             {
8750               dr_chain.release ();
8751               return false;
8752             }
8753         }
8754       else
8755         {
8756           if (grouped_load)
8757   	    {
8758 	      if (memory_access_type != VMAT_LOAD_STORE_LANES)
8759 		vect_transform_grouped_load (stmt_info, dr_chain,
8760 					     group_size, gsi);
8761 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8762 	    }
8763           else
8764 	    {
8765 	      if (j == 0)
8766 	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8767 	      else
8768 	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8769 	      prev_stmt_info = new_stmt_info;
8770 	    }
8771         }
8772       dr_chain.release ();
8773     }
8774 
8775   return true;
8776 }
8777 
8778 /* Function vect_is_simple_cond.
8779 
8780    Input:
8781    LOOP - the loop that is being vectorized.
8782    COND - Condition that is checked for simple use.
8783 
8784    Output:
8785    *COMP_VECTYPE - the vector type for the comparison.
8786    *DTS - The def types for the arguments of the comparison
8787 
8788    Returns whether a COND can be vectorized.  Checks whether
8789    condition operands are supportable using vec_is_simple_use.  */
8790 
8791 static bool
8792 vect_is_simple_cond (tree cond, vec_info *vinfo,
8793 		     tree *comp_vectype, enum vect_def_type *dts,
8794 		     tree vectype)
8795 {
8796   tree lhs, rhs;
8797   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8798 
8799   /* Mask case.  */
8800   if (TREE_CODE (cond) == SSA_NAME
8801       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8802     {
8803       if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8804 	  || !*comp_vectype
8805 	  || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8806 	return false;
8807       return true;
8808     }
8809 
8810   if (!COMPARISON_CLASS_P (cond))
8811     return false;
8812 
8813   lhs = TREE_OPERAND (cond, 0);
8814   rhs = TREE_OPERAND (cond, 1);
8815 
8816   if (TREE_CODE (lhs) == SSA_NAME)
8817     {
8818       if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8819 	return false;
8820     }
8821   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8822 	   || TREE_CODE (lhs) == FIXED_CST)
8823     dts[0] = vect_constant_def;
8824   else
8825     return false;
8826 
8827   if (TREE_CODE (rhs) == SSA_NAME)
8828     {
8829       if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8830 	return false;
8831     }
8832   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8833 	   || TREE_CODE (rhs) == FIXED_CST)
8834     dts[1] = vect_constant_def;
8835   else
8836     return false;
8837 
8838   if (vectype1 && vectype2
8839       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8840 		   TYPE_VECTOR_SUBPARTS (vectype2)))
8841     return false;
8842 
8843   *comp_vectype = vectype1 ? vectype1 : vectype2;
8844   /* Invariant comparison.  */
8845   if (! *comp_vectype && vectype)
8846     {
8847       tree scalar_type = TREE_TYPE (lhs);
8848       /* If we can widen the comparison to match vectype do so.  */
8849       if (INTEGRAL_TYPE_P (scalar_type)
8850 	  && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8851 			      TYPE_SIZE (TREE_TYPE (vectype))))
8852 	scalar_type = build_nonstandard_integer_type
8853 	  (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8854 	   TYPE_UNSIGNED (scalar_type));
8855       *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8856     }
8857 
8858   return true;
8859 }
8860 
8861 /* vectorizable_condition.
8862 
8863    Check if STMT_INFO is conditional modify expression that can be vectorized.
8864    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8865    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
8866    at GSI.
8867 
8868    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8869 
8870    Return true if STMT_INFO is vectorizable in this way.  */
8871 
8872 bool
8873 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8874 			stmt_vec_info *vec_stmt, bool for_reduction,
8875 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8876 {
8877   vec_info *vinfo = stmt_info->vinfo;
8878   tree scalar_dest = NULL_TREE;
8879   tree vec_dest = NULL_TREE;
8880   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8881   tree then_clause, else_clause;
8882   tree comp_vectype = NULL_TREE;
8883   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8884   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8885   tree vec_compare;
8886   tree new_temp;
8887   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8888   enum vect_def_type dts[4]
8889     = {vect_unknown_def_type, vect_unknown_def_type,
8890        vect_unknown_def_type, vect_unknown_def_type};
8891   int ndts = 4;
8892   int ncopies;
8893   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8894   stmt_vec_info prev_stmt_info = NULL;
8895   int i, j;
8896   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8897   vec<tree> vec_oprnds0 = vNULL;
8898   vec<tree> vec_oprnds1 = vNULL;
8899   vec<tree> vec_oprnds2 = vNULL;
8900   vec<tree> vec_oprnds3 = vNULL;
8901   tree vec_cmp_type;
8902   bool masked = false;
8903 
8904   if (for_reduction && STMT_SLP_TYPE (stmt_info))
8905     return false;
8906 
8907   vect_reduction_type reduction_type
8908     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8909   if (reduction_type == TREE_CODE_REDUCTION)
8910     {
8911       if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8912 	return false;
8913 
8914       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8915 	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8916 	       && for_reduction))
8917 	return false;
8918 
8919       /* FORNOW: not yet supported.  */
8920       if (STMT_VINFO_LIVE_P (stmt_info))
8921 	{
8922 	  if (dump_enabled_p ())
8923 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8924 			     "value used after loop.\n");
8925 	  return false;
8926 	}
8927     }
8928 
8929   /* Is vectorizable conditional operation?  */
8930   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8931   if (!stmt)
8932     return false;
8933 
8934   code = gimple_assign_rhs_code (stmt);
8935 
8936   if (code != COND_EXPR)
8937     return false;
8938 
8939   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8940   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8941 
8942   if (slp_node)
8943     ncopies = 1;
8944   else
8945     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8946 
8947   gcc_assert (ncopies >= 1);
8948   if (for_reduction && ncopies > 1)
8949     return false; /* FORNOW */
8950 
8951   cond_expr = gimple_assign_rhs1 (stmt);
8952   then_clause = gimple_assign_rhs2 (stmt);
8953   else_clause = gimple_assign_rhs3 (stmt);
8954 
8955   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8956 			    &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8957       || !comp_vectype)
8958     return false;
8959 
8960   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8961     return false;
8962   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8963     return false;
8964 
8965   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8966     return false;
8967 
8968   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8969     return false;
8970 
8971   masked = !COMPARISON_CLASS_P (cond_expr);
8972   vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8973 
8974   if (vec_cmp_type == NULL_TREE)
8975     return false;
8976 
8977   cond_code = TREE_CODE (cond_expr);
8978   if (!masked)
8979     {
8980       cond_expr0 = TREE_OPERAND (cond_expr, 0);
8981       cond_expr1 = TREE_OPERAND (cond_expr, 1);
8982     }
8983 
8984   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8985     {
8986       /* Boolean values may have another representation in vectors
8987 	 and therefore we prefer bit operations over comparison for
8988 	 them (which also works for scalar masks).  We store opcodes
8989 	 to use in bitop1 and bitop2.  Statement is vectorized as
8990 	 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8991 	 depending on bitop1 and bitop2 arity.  */
8992       switch (cond_code)
8993 	{
8994 	case GT_EXPR:
8995 	  bitop1 = BIT_NOT_EXPR;
8996 	  bitop2 = BIT_AND_EXPR;
8997 	  break;
8998 	case GE_EXPR:
8999 	  bitop1 = BIT_NOT_EXPR;
9000 	  bitop2 = BIT_IOR_EXPR;
9001 	  break;
9002 	case LT_EXPR:
9003 	  bitop1 = BIT_NOT_EXPR;
9004 	  bitop2 = BIT_AND_EXPR;
9005 	  std::swap (cond_expr0, cond_expr1);
9006 	  break;
9007 	case LE_EXPR:
9008 	  bitop1 = BIT_NOT_EXPR;
9009 	  bitop2 = BIT_IOR_EXPR;
9010 	  std::swap (cond_expr0, cond_expr1);
9011 	  break;
9012 	case NE_EXPR:
9013 	  bitop1 = BIT_XOR_EXPR;
9014 	  break;
9015 	case EQ_EXPR:
9016 	  bitop1 = BIT_XOR_EXPR;
9017 	  bitop2 = BIT_NOT_EXPR;
9018 	  break;
9019 	default:
9020 	  return false;
9021 	}
9022       cond_code = SSA_NAME;
9023     }
9024 
9025   if (!vec_stmt)
9026     {
9027       if (bitop1 != NOP_EXPR)
9028 	{
9029 	  machine_mode mode = TYPE_MODE (comp_vectype);
9030 	  optab optab;
9031 
9032 	  optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9033 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9034 	    return false;
9035 
9036 	  if (bitop2 != NOP_EXPR)
9037 	    {
9038 	      optab = optab_for_tree_code (bitop2, comp_vectype,
9039 					   optab_default);
9040 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9041 		return false;
9042 	    }
9043 	}
9044       if (expand_vec_cond_expr_p (vectype, comp_vectype,
9045 				     cond_code))
9046 	{
9047 	  STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
9048 	  vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
9049 				  cost_vec);
9050 	  return true;
9051 	}
9052       return false;
9053     }
9054 
9055   /* Transform.  */
9056 
9057   if (!slp_node)
9058     {
9059       vec_oprnds0.create (1);
9060       vec_oprnds1.create (1);
9061       vec_oprnds2.create (1);
9062       vec_oprnds3.create (1);
9063     }
9064 
9065   /* Handle def.  */
9066   scalar_dest = gimple_assign_lhs (stmt);
9067   if (reduction_type != EXTRACT_LAST_REDUCTION)
9068     vec_dest = vect_create_destination_var (scalar_dest, vectype);
9069 
9070   /* Handle cond expr.  */
9071   for (j = 0; j < ncopies; j++)
9072     {
9073       stmt_vec_info new_stmt_info = NULL;
9074       if (j == 0)
9075 	{
9076           if (slp_node)
9077             {
9078               auto_vec<tree, 4> ops;
9079 	      auto_vec<vec<tree>, 4> vec_defs;
9080 
9081 	      if (masked)
9082 		ops.safe_push (cond_expr);
9083 	      else
9084 		{
9085 		  ops.safe_push (cond_expr0);
9086 		  ops.safe_push (cond_expr1);
9087 		}
9088               ops.safe_push (then_clause);
9089               ops.safe_push (else_clause);
9090               vect_get_slp_defs (ops, slp_node, &vec_defs);
9091 	      vec_oprnds3 = vec_defs.pop ();
9092 	      vec_oprnds2 = vec_defs.pop ();
9093 	      if (!masked)
9094 		vec_oprnds1 = vec_defs.pop ();
9095 	      vec_oprnds0 = vec_defs.pop ();
9096             }
9097           else
9098             {
9099 	      if (masked)
9100 		{
9101 		  vec_cond_lhs
9102 		    = vect_get_vec_def_for_operand (cond_expr, stmt_info,
9103 						    comp_vectype);
9104 		}
9105 	      else
9106 		{
9107 		  vec_cond_lhs
9108 		    = vect_get_vec_def_for_operand (cond_expr0,
9109 						    stmt_info, comp_vectype);
9110 		  vec_cond_rhs
9111 		    = vect_get_vec_def_for_operand (cond_expr1,
9112 						    stmt_info, comp_vectype);
9113 		}
9114 	      vec_then_clause = vect_get_vec_def_for_operand (then_clause,
9115 							      stmt_info);
9116 	      if (reduction_type != EXTRACT_LAST_REDUCTION)
9117 		vec_else_clause = vect_get_vec_def_for_operand (else_clause,
9118 								stmt_info);
9119 	    }
9120 	}
9121       else
9122 	{
9123 	  vec_cond_lhs
9124 	    = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
9125 	  if (!masked)
9126 	    vec_cond_rhs
9127 	      = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
9128 
9129 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9130 							    vec_oprnds2.pop ());
9131 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9132 							    vec_oprnds3.pop ());
9133 	}
9134 
9135       if (!slp_node)
9136         {
9137 	  vec_oprnds0.quick_push (vec_cond_lhs);
9138 	  if (!masked)
9139 	    vec_oprnds1.quick_push (vec_cond_rhs);
9140 	  vec_oprnds2.quick_push (vec_then_clause);
9141 	  vec_oprnds3.quick_push (vec_else_clause);
9142 	}
9143 
9144       /* Arguments are ready.  Create the new vector stmt.  */
9145       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9146         {
9147           vec_then_clause = vec_oprnds2[i];
9148           vec_else_clause = vec_oprnds3[i];
9149 
9150 	  if (masked)
9151 	    vec_compare = vec_cond_lhs;
9152 	  else
9153 	    {
9154 	      vec_cond_rhs = vec_oprnds1[i];
9155 	      if (bitop1 == NOP_EXPR)
9156 		vec_compare = build2 (cond_code, vec_cmp_type,
9157 				      vec_cond_lhs, vec_cond_rhs);
9158 	      else
9159 		{
9160 		  new_temp = make_ssa_name (vec_cmp_type);
9161 		  gassign *new_stmt;
9162 		  if (bitop1 == BIT_NOT_EXPR)
9163 		    new_stmt = gimple_build_assign (new_temp, bitop1,
9164 						    vec_cond_rhs);
9165 		  else
9166 		    new_stmt
9167 		      = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9168 					     vec_cond_rhs);
9169 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9170 		  if (bitop2 == NOP_EXPR)
9171 		    vec_compare = new_temp;
9172 		  else if (bitop2 == BIT_NOT_EXPR)
9173 		    {
9174 		      /* Instead of doing ~x ? y : z do x ? z : y.  */
9175 		      vec_compare = new_temp;
9176 		      std::swap (vec_then_clause, vec_else_clause);
9177 		    }
9178 		  else
9179 		    {
9180 		      vec_compare = make_ssa_name (vec_cmp_type);
9181 		      new_stmt
9182 			= gimple_build_assign (vec_compare, bitop2,
9183 					       vec_cond_lhs, new_temp);
9184 		      vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9185 		    }
9186 		}
9187 	    }
9188 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
9189 	    {
9190 	      if (!is_gimple_val (vec_compare))
9191 		{
9192 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
9193 		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
9194 							   vec_compare);
9195 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9196 		  vec_compare = vec_compare_name;
9197 		}
9198 	      gcall *new_stmt = gimple_build_call_internal
9199 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9200 		 vec_then_clause);
9201 	      gimple_call_set_lhs (new_stmt, scalar_dest);
9202 	      SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9203 	      if (stmt_info->stmt == gsi_stmt (*gsi))
9204 		new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9205 	      else
9206 		{
9207 		  /* In this case we're moving the definition to later in the
9208 		     block.  That doesn't matter because the only uses of the
9209 		     lhs are in phi statements.  */
9210 		  gimple_stmt_iterator old_gsi
9211 		    = gsi_for_stmt (stmt_info->stmt);
9212 		  gsi_remove (&old_gsi, true);
9213 		  new_stmt_info
9214 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9215 		}
9216 	    }
9217 	  else
9218 	    {
9219 	      new_temp = make_ssa_name (vec_dest);
9220 	      gassign *new_stmt
9221 		= gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9222 				       vec_then_clause, vec_else_clause);
9223 	      new_stmt_info
9224 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9225 	    }
9226           if (slp_node)
9227 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9228         }
9229 
9230         if (slp_node)
9231           continue;
9232 
9233 	if (j == 0)
9234 	  STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9235 	else
9236 	  STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9237 
9238 	prev_stmt_info = new_stmt_info;
9239     }
9240 
9241   vec_oprnds0.release ();
9242   vec_oprnds1.release ();
9243   vec_oprnds2.release ();
9244   vec_oprnds3.release ();
9245 
9246   return true;
9247 }
9248 
9249 /* vectorizable_comparison.
9250 
9251    Check if STMT_INFO is comparison expression that can be vectorized.
9252    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9253    comparison, put it in VEC_STMT, and insert it at GSI.
9254 
9255    Return true if STMT_INFO is vectorizable in this way.  */
9256 
9257 static bool
9258 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9259 			 stmt_vec_info *vec_stmt,
9260 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9261 {
9262   vec_info *vinfo = stmt_info->vinfo;
9263   tree lhs, rhs1, rhs2;
9264   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9265   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9266   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9267   tree new_temp;
9268   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9269   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9270   int ndts = 2;
9271   poly_uint64 nunits;
9272   int ncopies;
9273   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9274   stmt_vec_info prev_stmt_info = NULL;
9275   int i, j;
9276   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9277   vec<tree> vec_oprnds0 = vNULL;
9278   vec<tree> vec_oprnds1 = vNULL;
9279   tree mask_type;
9280   tree mask;
9281 
9282   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9283     return false;
9284 
9285   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9286     return false;
9287 
9288   mask_type = vectype;
9289   nunits = TYPE_VECTOR_SUBPARTS (vectype);
9290 
9291   if (slp_node)
9292     ncopies = 1;
9293   else
9294     ncopies = vect_get_num_copies (loop_vinfo, vectype);
9295 
9296   gcc_assert (ncopies >= 1);
9297   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9298     return false;
9299 
9300   if (STMT_VINFO_LIVE_P (stmt_info))
9301     {
9302       if (dump_enabled_p ())
9303 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9304 			 "value used after loop.\n");
9305       return false;
9306     }
9307 
9308   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9309   if (!stmt)
9310     return false;
9311 
9312   code = gimple_assign_rhs_code (stmt);
9313 
9314   if (TREE_CODE_CLASS (code) != tcc_comparison)
9315     return false;
9316 
9317   rhs1 = gimple_assign_rhs1 (stmt);
9318   rhs2 = gimple_assign_rhs2 (stmt);
9319 
9320   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9321     return false;
9322 
9323   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9324     return false;
9325 
9326   if (vectype1 && vectype2
9327       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9328 		   TYPE_VECTOR_SUBPARTS (vectype2)))
9329     return false;
9330 
9331   vectype = vectype1 ? vectype1 : vectype2;
9332 
9333   /* Invariant comparison.  */
9334   if (!vectype)
9335     {
9336       vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9337       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9338 	return false;
9339     }
9340   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9341     return false;
9342 
9343   /* Can't compare mask and non-mask types.  */
9344   if (vectype1 && vectype2
9345       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9346     return false;
9347 
9348   /* Boolean values may have another representation in vectors
9349      and therefore we prefer bit operations over comparison for
9350      them (which also works for scalar masks).  We store opcodes
9351      to use in bitop1 and bitop2.  Statement is vectorized as
9352        BITOP2 (rhs1 BITOP1 rhs2) or
9353        rhs1 BITOP2 (BITOP1 rhs2)
9354      depending on bitop1 and bitop2 arity.  */
9355   bool swap_p = false;
9356   if (VECTOR_BOOLEAN_TYPE_P (vectype))
9357     {
9358       if (code == GT_EXPR)
9359 	{
9360 	  bitop1 = BIT_NOT_EXPR;
9361 	  bitop2 = BIT_AND_EXPR;
9362 	}
9363       else if (code == GE_EXPR)
9364 	{
9365 	  bitop1 = BIT_NOT_EXPR;
9366 	  bitop2 = BIT_IOR_EXPR;
9367 	}
9368       else if (code == LT_EXPR)
9369 	{
9370 	  bitop1 = BIT_NOT_EXPR;
9371 	  bitop2 = BIT_AND_EXPR;
9372 	  swap_p = true;
9373 	}
9374       else if (code == LE_EXPR)
9375 	{
9376 	  bitop1 = BIT_NOT_EXPR;
9377 	  bitop2 = BIT_IOR_EXPR;
9378 	  swap_p = true;
9379 	}
9380       else
9381 	{
9382 	  bitop1 = BIT_XOR_EXPR;
9383 	  if (code == EQ_EXPR)
9384 	    bitop2 = BIT_NOT_EXPR;
9385 	}
9386     }
9387 
9388   if (!vec_stmt)
9389     {
9390       if (bitop1 == NOP_EXPR)
9391 	{
9392 	  if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9393 	    return false;
9394 	}
9395       else
9396 	{
9397 	  machine_mode mode = TYPE_MODE (vectype);
9398 	  optab optab;
9399 
9400 	  optab = optab_for_tree_code (bitop1, vectype, optab_default);
9401 	  if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9402 	    return false;
9403 
9404 	  if (bitop2 != NOP_EXPR)
9405 	    {
9406 	      optab = optab_for_tree_code (bitop2, vectype, optab_default);
9407 	      if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9408 		return false;
9409 	    }
9410 	}
9411 
9412       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9413       vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9414 			      dts, ndts, slp_node, cost_vec);
9415       return true;
9416     }
9417 
9418   /* Transform.  */
9419   if (!slp_node)
9420     {
9421       vec_oprnds0.create (1);
9422       vec_oprnds1.create (1);
9423     }
9424 
9425   /* Handle def.  */
9426   lhs = gimple_assign_lhs (stmt);
9427   mask = vect_create_destination_var (lhs, mask_type);
9428 
9429   /* Handle cmp expr.  */
9430   for (j = 0; j < ncopies; j++)
9431     {
9432       stmt_vec_info new_stmt_info = NULL;
9433       if (j == 0)
9434 	{
9435 	  if (slp_node)
9436 	    {
9437 	      auto_vec<tree, 2> ops;
9438 	      auto_vec<vec<tree>, 2> vec_defs;
9439 
9440 	      ops.safe_push (rhs1);
9441 	      ops.safe_push (rhs2);
9442 	      vect_get_slp_defs (ops, slp_node, &vec_defs);
9443 	      vec_oprnds1 = vec_defs.pop ();
9444 	      vec_oprnds0 = vec_defs.pop ();
9445 	      if (swap_p)
9446 		std::swap (vec_oprnds0, vec_oprnds1);
9447 	    }
9448 	  else
9449 	    {
9450 	      vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9451 						       vectype);
9452 	      vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9453 						       vectype);
9454 	    }
9455 	}
9456       else
9457 	{
9458 	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9459 						     vec_oprnds0.pop ());
9460 	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9461 						     vec_oprnds1.pop ());
9462 	}
9463 
9464       if (!slp_node)
9465 	{
9466 	  if (swap_p)
9467 	    std::swap (vec_rhs1, vec_rhs2);
9468 	  vec_oprnds0.quick_push (vec_rhs1);
9469 	  vec_oprnds1.quick_push (vec_rhs2);
9470 	}
9471 
9472       /* Arguments are ready.  Create the new vector stmt.  */
9473       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9474 	{
9475 	  vec_rhs2 = vec_oprnds1[i];
9476 
9477 	  new_temp = make_ssa_name (mask);
9478 	  if (bitop1 == NOP_EXPR)
9479 	    {
9480 	      gassign *new_stmt = gimple_build_assign (new_temp, code,
9481 						       vec_rhs1, vec_rhs2);
9482 	      new_stmt_info
9483 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9484 	    }
9485 	  else
9486 	    {
9487 	      gassign *new_stmt;
9488 	      if (bitop1 == BIT_NOT_EXPR)
9489 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9490 	      else
9491 		new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9492 						vec_rhs2);
9493 	      new_stmt_info
9494 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9495 	      if (bitop2 != NOP_EXPR)
9496 		{
9497 		  tree res = make_ssa_name (mask);
9498 		  if (bitop2 == BIT_NOT_EXPR)
9499 		    new_stmt = gimple_build_assign (res, bitop2, new_temp);
9500 		  else
9501 		    new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9502 						    new_temp);
9503 		  new_stmt_info
9504 		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9505 		}
9506 	    }
9507 	  if (slp_node)
9508 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9509 	}
9510 
9511       if (slp_node)
9512 	continue;
9513 
9514       if (j == 0)
9515 	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9516       else
9517 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9518 
9519       prev_stmt_info = new_stmt_info;
9520     }
9521 
9522   vec_oprnds0.release ();
9523   vec_oprnds1.release ();
9524 
9525   return true;
9526 }
9527 
9528 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9529    can handle all live statements in the node.  Otherwise return true
9530    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9531    GSI and VEC_STMT are as for vectorizable_live_operation.  */
9532 
9533 static bool
9534 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9535 			  slp_tree slp_node, stmt_vec_info *vec_stmt,
9536 			  stmt_vector_for_cost *cost_vec)
9537 {
9538   if (slp_node)
9539     {
9540       stmt_vec_info slp_stmt_info;
9541       unsigned int i;
9542       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9543 	{
9544 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
9545 	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9546 					       vec_stmt, cost_vec))
9547 	    return false;
9548 	}
9549     }
9550   else if (STMT_VINFO_LIVE_P (stmt_info)
9551 	   && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9552 					    vec_stmt, cost_vec))
9553     return false;
9554 
9555   return true;
9556 }
9557 
9558 /* Make sure the statement is vectorizable.  */
9559 
9560 opt_result
9561 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9562 		   slp_tree node, slp_instance node_instance,
9563 		   stmt_vector_for_cost *cost_vec)
9564 {
9565   vec_info *vinfo = stmt_info->vinfo;
9566   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9567   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9568   bool ok;
9569   gimple_seq pattern_def_seq;
9570 
9571   if (dump_enabled_p ())
9572     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9573 		     stmt_info->stmt);
9574 
9575   if (gimple_has_volatile_ops (stmt_info->stmt))
9576     return opt_result::failure_at (stmt_info->stmt,
9577 				   "not vectorized:"
9578 				   " stmt has volatile operands: %G\n",
9579 				   stmt_info->stmt);
9580 
9581   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9582       && node == NULL
9583       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9584     {
9585       gimple_stmt_iterator si;
9586 
9587       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9588 	{
9589 	  stmt_vec_info pattern_def_stmt_info
9590 	    = vinfo->lookup_stmt (gsi_stmt (si));
9591 	  if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9592 	      || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9593 	    {
9594 	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
9595 	      if (dump_enabled_p ())
9596 		dump_printf_loc (MSG_NOTE, vect_location,
9597 				 "==> examining pattern def statement: %G",
9598 				 pattern_def_stmt_info->stmt);
9599 
9600 	      opt_result res
9601 		= vect_analyze_stmt (pattern_def_stmt_info,
9602 				     need_to_vectorize, node, node_instance,
9603 				     cost_vec);
9604 	      if (!res)
9605 		return res;
9606 	    }
9607 	}
9608     }
9609 
9610   /* Skip stmts that do not need to be vectorized. In loops this is expected
9611      to include:
9612      - the COND_EXPR which is the loop exit condition
9613      - any LABEL_EXPRs in the loop
9614      - computations that are used only for array indexing or loop control.
9615      In basic blocks we only analyze statements that are a part of some SLP
9616      instance, therefore, all the statements are relevant.
9617 
9618      Pattern statement needs to be analyzed instead of the original statement
9619      if the original statement is not relevant.  Otherwise, we analyze both
9620      statements.  In basic blocks we are called from some SLP instance
9621      traversal, don't analyze pattern stmts instead, the pattern stmts
9622      already will be part of SLP instance.  */
9623 
9624   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9625   if (!STMT_VINFO_RELEVANT_P (stmt_info)
9626       && !STMT_VINFO_LIVE_P (stmt_info))
9627     {
9628       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9629 	  && pattern_stmt_info
9630 	  && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9631 	      || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9632         {
9633           /* Analyze PATTERN_STMT instead of the original stmt.  */
9634 	  stmt_info = pattern_stmt_info;
9635           if (dump_enabled_p ())
9636 	    dump_printf_loc (MSG_NOTE, vect_location,
9637 			     "==> examining pattern statement: %G",
9638 			     stmt_info->stmt);
9639         }
9640       else
9641         {
9642           if (dump_enabled_p ())
9643             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9644 
9645           return opt_result::success ();
9646         }
9647     }
9648   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9649 	   && node == NULL
9650 	   && pattern_stmt_info
9651 	   && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9652 	       || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9653     {
9654       /* Analyze PATTERN_STMT too.  */
9655       if (dump_enabled_p ())
9656 	dump_printf_loc (MSG_NOTE, vect_location,
9657 			 "==> examining pattern statement: %G",
9658 			 pattern_stmt_info->stmt);
9659 
9660       opt_result res
9661 	= vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9662 			     node_instance, cost_vec);
9663       if (!res)
9664 	return res;
9665    }
9666 
9667   switch (STMT_VINFO_DEF_TYPE (stmt_info))
9668     {
9669       case vect_internal_def:
9670         break;
9671 
9672       case vect_reduction_def:
9673       case vect_nested_cycle:
9674          gcc_assert (!bb_vinfo
9675 		     && (relevance == vect_used_in_outer
9676 			 || relevance == vect_used_in_outer_by_reduction
9677 			 || relevance == vect_used_by_reduction
9678 			 || relevance == vect_unused_in_scope
9679 			 || relevance == vect_used_only_live));
9680          break;
9681 
9682       case vect_induction_def:
9683 	gcc_assert (!bb_vinfo);
9684 	break;
9685 
9686       case vect_constant_def:
9687       case vect_external_def:
9688       case vect_unknown_def_type:
9689       default:
9690         gcc_unreachable ();
9691     }
9692 
9693   if (STMT_VINFO_RELEVANT_P (stmt_info))
9694     {
9695       tree type = gimple_expr_type (stmt_info->stmt);
9696       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9697       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9698       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9699 		  || (call && gimple_call_lhs (call) == NULL_TREE));
9700       *need_to_vectorize = true;
9701     }
9702 
9703   if (PURE_SLP_STMT (stmt_info) && !node)
9704     {
9705       if (dump_enabled_p ())
9706 	dump_printf_loc (MSG_NOTE, vect_location,
9707 			 "handled only by SLP analysis\n");
9708       return opt_result::success ();
9709     }
9710 
9711   ok = true;
9712   if (!bb_vinfo
9713       && (STMT_VINFO_RELEVANT_P (stmt_info)
9714 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9715     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9716        -mveclibabi= takes preference over library functions with
9717        the simd attribute.  */
9718     ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9719 	  || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9720 					   cost_vec)
9721 	  || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9722 	  || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9723 	  || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9724 	  || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9725 				cost_vec)
9726 	  || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9727 	  || vectorizable_reduction (stmt_info, NULL, NULL, node,
9728 				     node_instance, cost_vec)
9729 	  || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9730 	  || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9731 	  || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9732 				     cost_vec)
9733 	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
9734 				      cost_vec));
9735   else
9736     {
9737       if (bb_vinfo)
9738 	ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9739 	      || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9740 					       cost_vec)
9741 	      || vectorizable_conversion (stmt_info, NULL, NULL, node,
9742 					  cost_vec)
9743 	      || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9744 	      || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9745 	      || vectorizable_assignment (stmt_info, NULL, NULL, node,
9746 					  cost_vec)
9747 	      || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9748 				    cost_vec)
9749 	      || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9750 	      || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9751 					 cost_vec)
9752 	      || vectorizable_comparison (stmt_info, NULL, NULL, node,
9753 					  cost_vec));
9754     }
9755 
9756   if (!ok)
9757     return opt_result::failure_at (stmt_info->stmt,
9758 				   "not vectorized:"
9759 				   " relevant stmt not supported: %G",
9760 				   stmt_info->stmt);
9761 
9762   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9763       need extra handling, except for vectorizable reductions.  */
9764   if (!bb_vinfo
9765       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9766       && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9767     return opt_result::failure_at (stmt_info->stmt,
9768 				   "not vectorized:"
9769 				   " live stmt not supported: %G",
9770 				   stmt_info->stmt);
9771 
9772   return opt_result::success ();
9773 }
9774 
9775 
9776 /* Function vect_transform_stmt.
9777 
9778    Create a vectorized stmt to replace STMT_INFO, and insert it at BSI.  */
9779 
9780 bool
9781 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9782 		     slp_tree slp_node, slp_instance slp_node_instance)
9783 {
9784   vec_info *vinfo = stmt_info->vinfo;
9785   bool is_store = false;
9786   stmt_vec_info vec_stmt = NULL;
9787   bool done;
9788 
9789   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9790   stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9791 
9792   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9793 		   && nested_in_vect_loop_p
9794 		        (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9795 			 stmt_info));
9796 
9797   gimple *stmt = stmt_info->stmt;
9798   switch (STMT_VINFO_TYPE (stmt_info))
9799     {
9800     case type_demotion_vec_info_type:
9801     case type_promotion_vec_info_type:
9802     case type_conversion_vec_info_type:
9803       done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9804 				      NULL);
9805       gcc_assert (done);
9806       break;
9807 
9808     case induc_vec_info_type:
9809       done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9810 				     NULL);
9811       gcc_assert (done);
9812       break;
9813 
9814     case shift_vec_info_type:
9815       done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9816       gcc_assert (done);
9817       break;
9818 
9819     case op_vec_info_type:
9820       done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9821 				     NULL);
9822       gcc_assert (done);
9823       break;
9824 
9825     case assignment_vec_info_type:
9826       done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9827 				      NULL);
9828       gcc_assert (done);
9829       break;
9830 
9831     case load_vec_info_type:
9832       done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9833                                 slp_node_instance, NULL);
9834       gcc_assert (done);
9835       break;
9836 
9837     case store_vec_info_type:
9838       done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9839       gcc_assert (done);
9840       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9841 	{
9842 	  /* In case of interleaving, the whole chain is vectorized when the
9843 	     last store in the chain is reached.  Store stmts before the last
9844 	     one are skipped, and there vec_stmt_info shouldn't be freed
9845 	     meanwhile.  */
9846 	  stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9847 	  if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9848 	    is_store = true;
9849 	}
9850       else
9851 	is_store = true;
9852       break;
9853 
9854     case condition_vec_info_type:
9855       done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
9856 				     slp_node, NULL);
9857       gcc_assert (done);
9858       break;
9859 
9860     case comparison_vec_info_type:
9861       done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
9862 				      slp_node, NULL);
9863       gcc_assert (done);
9864       break;
9865 
9866     case call_vec_info_type:
9867       done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9868       stmt = gsi_stmt (*gsi);
9869       break;
9870 
9871     case call_simd_clone_vec_info_type:
9872       done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9873 					   slp_node, NULL);
9874       stmt = gsi_stmt (*gsi);
9875       break;
9876 
9877     case reduc_vec_info_type:
9878       done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9879 				     slp_node_instance, NULL);
9880       gcc_assert (done);
9881       break;
9882 
9883     default:
9884       if (!STMT_VINFO_LIVE_P (stmt_info))
9885 	{
9886 	  if (dump_enabled_p ())
9887 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9888                              "stmt not supported.\n");
9889 	  gcc_unreachable ();
9890 	}
9891     }
9892 
9893   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9894      This would break hybrid SLP vectorization.  */
9895   if (slp_node)
9896     gcc_assert (!vec_stmt
9897 		&& STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9898 
9899   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9900      is being vectorized, but outside the immediately enclosing loop.  */
9901   if (vec_stmt
9902       && nested_p
9903       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9904       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9905           || STMT_VINFO_RELEVANT (stmt_info) ==
9906                                            vect_used_in_outer_by_reduction))
9907     {
9908       struct loop *innerloop = LOOP_VINFO_LOOP (
9909                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9910       imm_use_iterator imm_iter;
9911       use_operand_p use_p;
9912       tree scalar_dest;
9913 
9914       if (dump_enabled_p ())
9915         dump_printf_loc (MSG_NOTE, vect_location,
9916                          "Record the vdef for outer-loop vectorization.\n");
9917 
9918       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9919         (to be used when vectorizing outer-loop stmts that use the DEF of
9920         STMT).  */
9921       if (gimple_code (stmt) == GIMPLE_PHI)
9922         scalar_dest = PHI_RESULT (stmt);
9923       else
9924         scalar_dest = gimple_get_lhs (stmt);
9925 
9926       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9927 	if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9928 	  {
9929 	    stmt_vec_info exit_phi_info
9930 	      = vinfo->lookup_stmt (USE_STMT (use_p));
9931 	    STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9932 	  }
9933     }
9934 
9935   /* Handle stmts whose DEF is used outside the loop-nest that is
9936      being vectorized.  */
9937   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9938     {
9939       done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9940 				       NULL);
9941       gcc_assert (done);
9942     }
9943 
9944   if (vec_stmt)
9945     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9946 
9947   return is_store;
9948 }
9949 
9950 
9951 /* Remove a group of stores (for SLP or interleaving), free their
9952    stmt_vec_info.  */
9953 
9954 void
9955 vect_remove_stores (stmt_vec_info first_stmt_info)
9956 {
9957   vec_info *vinfo = first_stmt_info->vinfo;
9958   stmt_vec_info next_stmt_info = first_stmt_info;
9959 
9960   while (next_stmt_info)
9961     {
9962       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9963       next_stmt_info = vect_orig_stmt (next_stmt_info);
9964       /* Free the attached stmt_vec_info and remove the stmt.  */
9965       vinfo->remove_stmt (next_stmt_info);
9966       next_stmt_info = tmp;
9967     }
9968 }
9969 
9970 /* Function get_vectype_for_scalar_type_and_size.
9971 
9972    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
9973    by the target.  */
9974 
9975 tree
9976 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9977 {
9978   tree orig_scalar_type = scalar_type;
9979   scalar_mode inner_mode;
9980   machine_mode simd_mode;
9981   poly_uint64 nunits;
9982   tree vectype;
9983 
9984   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9985       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9986     return NULL_TREE;
9987 
9988   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9989 
9990   /* For vector types of elements whose mode precision doesn't
9991      match their types precision we use a element type of mode
9992      precision.  The vectorization routines will have to make sure
9993      they support the proper result truncation/extension.
9994      We also make sure to build vector types with INTEGER_TYPE
9995      component type only.  */
9996   if (INTEGRAL_TYPE_P (scalar_type)
9997       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9998 	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
9999     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10000 						  TYPE_UNSIGNED (scalar_type));
10001 
10002   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10003      When the component mode passes the above test simply use a type
10004      corresponding to that mode.  The theory is that any use that
10005      would cause problems with this will disable vectorization anyway.  */
10006   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10007 	   && !INTEGRAL_TYPE_P (scalar_type))
10008     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10009 
10010   /* We can't build a vector type of elements with alignment bigger than
10011      their size.  */
10012   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10013     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10014 						  TYPE_UNSIGNED (scalar_type));
10015 
10016   /* If we felt back to using the mode fail if there was
10017      no scalar type for it.  */
10018   if (scalar_type == NULL_TREE)
10019     return NULL_TREE;
10020 
10021   /* If no size was supplied use the mode the target prefers.   Otherwise
10022      lookup a vector mode of the specified size.  */
10023   if (known_eq (size, 0U))
10024     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10025   else if (!multiple_p (size, nbytes, &nunits)
10026 	   || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10027     return NULL_TREE;
10028   /* NOTE: nunits == 1 is allowed to support single element vector types.  */
10029   if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
10030     return NULL_TREE;
10031 
10032   vectype = build_vector_type (scalar_type, nunits);
10033 
10034   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10035       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10036     return NULL_TREE;
10037 
10038   /* Re-attach the address-space qualifier if we canonicalized the scalar
10039      type.  */
10040   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10041     return build_qualified_type
10042 	     (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10043 
10044   return vectype;
10045 }
10046 
10047 poly_uint64 current_vector_size;
10048 
10049 /* Function get_vectype_for_scalar_type.
10050 
10051    Returns the vector type corresponding to SCALAR_TYPE as supported
10052    by the target.  */
10053 
10054 tree
10055 get_vectype_for_scalar_type (tree scalar_type)
10056 {
10057   tree vectype;
10058   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10059 						  current_vector_size);
10060   if (vectype
10061       && known_eq (current_vector_size, 0U))
10062     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10063   return vectype;
10064 }
10065 
10066 /* Function get_mask_type_for_scalar_type.
10067 
10068    Returns the mask type corresponding to a result of comparison
10069    of vectors of specified SCALAR_TYPE as supported by target.  */
10070 
10071 tree
10072 get_mask_type_for_scalar_type (tree scalar_type)
10073 {
10074   tree vectype = get_vectype_for_scalar_type (scalar_type);
10075 
10076   if (!vectype)
10077     return NULL;
10078 
10079   return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10080 				  current_vector_size);
10081 }
10082 
10083 /* Function get_same_sized_vectype
10084 
10085    Returns a vector type corresponding to SCALAR_TYPE of size
10086    VECTOR_TYPE if supported by the target.  */
10087 
10088 tree
10089 get_same_sized_vectype (tree scalar_type, tree vector_type)
10090 {
10091   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10092     return build_same_sized_truth_vector_type (vector_type);
10093 
10094   return get_vectype_for_scalar_type_and_size
10095 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10096 }
10097 
10098 /* Function vect_is_simple_use.
10099 
10100    Input:
10101    VINFO - the vect info of the loop or basic block that is being vectorized.
10102    OPERAND - operand in the loop or bb.
10103    Output:
10104    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10105      case OPERAND is an SSA_NAME that is defined in the vectorizable region
10106    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10107      the definition could be anywhere in the function
10108    DT - the type of definition
10109 
10110    Returns whether a stmt with OPERAND can be vectorized.
10111    For loops, supportable operands are constants, loop invariants, and operands
10112    that are defined by the current iteration of the loop.  Unsupportable
10113    operands are those that are defined by a previous iteration of the loop (as
10114    is the case in reduction/induction computations).
10115    For basic blocks, supportable operands are constants and bb invariants.
10116    For now, operands defined outside the basic block are not supported.  */
10117 
10118 bool
10119 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10120 		    stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
10121 {
10122   if (def_stmt_info_out)
10123     *def_stmt_info_out = NULL;
10124   if (def_stmt_out)
10125     *def_stmt_out = NULL;
10126   *dt = vect_unknown_def_type;
10127 
10128   if (dump_enabled_p ())
10129     {
10130       dump_printf_loc (MSG_NOTE, vect_location,
10131                        "vect_is_simple_use: operand ");
10132       if (TREE_CODE (operand) == SSA_NAME
10133 	  && !SSA_NAME_IS_DEFAULT_DEF (operand))
10134 	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10135       else
10136 	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10137     }
10138 
10139   if (CONSTANT_CLASS_P (operand))
10140     *dt = vect_constant_def;
10141   else if (is_gimple_min_invariant (operand))
10142     *dt = vect_external_def;
10143   else if (TREE_CODE (operand) != SSA_NAME)
10144     *dt = vect_unknown_def_type;
10145   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10146     *dt = vect_external_def;
10147   else
10148     {
10149       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10150       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10151       if (!stmt_vinfo)
10152 	*dt = vect_external_def;
10153       else
10154 	{
10155 	  stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
10156 	  def_stmt = stmt_vinfo->stmt;
10157 	  switch (gimple_code (def_stmt))
10158 	    {
10159 	    case GIMPLE_PHI:
10160 	    case GIMPLE_ASSIGN:
10161 	    case GIMPLE_CALL:
10162 	      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10163 	      break;
10164 	    default:
10165 	      *dt = vect_unknown_def_type;
10166 	      break;
10167 	    }
10168 	  if (def_stmt_info_out)
10169 	    *def_stmt_info_out = stmt_vinfo;
10170 	}
10171       if (def_stmt_out)
10172 	*def_stmt_out = def_stmt;
10173     }
10174 
10175   if (dump_enabled_p ())
10176     {
10177       dump_printf (MSG_NOTE, ", type of def: ");
10178       switch (*dt)
10179 	{
10180 	case vect_uninitialized_def:
10181 	  dump_printf (MSG_NOTE, "uninitialized\n");
10182 	  break;
10183 	case vect_constant_def:
10184 	  dump_printf (MSG_NOTE, "constant\n");
10185 	  break;
10186 	case vect_external_def:
10187 	  dump_printf (MSG_NOTE, "external\n");
10188 	  break;
10189 	case vect_internal_def:
10190 	  dump_printf (MSG_NOTE, "internal\n");
10191 	  break;
10192 	case vect_induction_def:
10193 	  dump_printf (MSG_NOTE, "induction\n");
10194 	  break;
10195 	case vect_reduction_def:
10196 	  dump_printf (MSG_NOTE, "reduction\n");
10197 	  break;
10198 	case vect_double_reduction_def:
10199 	  dump_printf (MSG_NOTE, "double reduction\n");
10200 	  break;
10201 	case vect_nested_cycle:
10202 	  dump_printf (MSG_NOTE, "nested cycle\n");
10203 	  break;
10204 	case vect_unknown_def_type:
10205 	  dump_printf (MSG_NOTE, "unknown\n");
10206 	  break;
10207 	}
10208     }
10209 
10210   if (*dt == vect_unknown_def_type)
10211     {
10212       if (dump_enabled_p ())
10213         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10214                          "Unsupported pattern.\n");
10215       return false;
10216     }
10217 
10218   return true;
10219 }
10220 
10221 /* Function vect_is_simple_use.
10222 
10223    Same as vect_is_simple_use but also determines the vector operand
10224    type of OPERAND and stores it to *VECTYPE.  If the definition of
10225    OPERAND is vect_uninitialized_def, vect_constant_def or
10226    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10227    is responsible to compute the best suited vector type for the
10228    scalar operand.  */
10229 
10230 bool
10231 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10232 		    tree *vectype, stmt_vec_info *def_stmt_info_out,
10233 		    gimple **def_stmt_out)
10234 {
10235   stmt_vec_info def_stmt_info;
10236   gimple *def_stmt;
10237   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10238     return false;
10239 
10240   if (def_stmt_out)
10241     *def_stmt_out = def_stmt;
10242   if (def_stmt_info_out)
10243     *def_stmt_info_out = def_stmt_info;
10244 
10245   /* Now get a vector type if the def is internal, otherwise supply
10246      NULL_TREE and leave it up to the caller to figure out a proper
10247      type for the use stmt.  */
10248   if (*dt == vect_internal_def
10249       || *dt == vect_induction_def
10250       || *dt == vect_reduction_def
10251       || *dt == vect_double_reduction_def
10252       || *dt == vect_nested_cycle)
10253     {
10254       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10255       gcc_assert (*vectype != NULL_TREE);
10256       if (dump_enabled_p ())
10257 	dump_printf_loc (MSG_NOTE, vect_location,
10258 			 "vect_is_simple_use: vectype %T\n", *vectype);
10259     }
10260   else if (*dt == vect_uninitialized_def
10261 	   || *dt == vect_constant_def
10262 	   || *dt == vect_external_def)
10263     *vectype = NULL_TREE;
10264   else
10265     gcc_unreachable ();
10266 
10267   return true;
10268 }
10269 
10270 
10271 /* Function supportable_widening_operation
10272 
10273    Check whether an operation represented by the code CODE is a
10274    widening operation that is supported by the target platform in
10275    vector form (i.e., when operating on arguments of type VECTYPE_IN
10276    producing a result of type VECTYPE_OUT).
10277 
10278    Widening operations we currently support are NOP (CONVERT), FLOAT,
10279    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
10280    are supported by the target platform either directly (via vector
10281    tree-codes), or via target builtins.
10282 
10283    Output:
10284    - CODE1 and CODE2 are codes of vector operations to be used when
10285    vectorizing the operation, if available.
10286    - MULTI_STEP_CVT determines the number of required intermediate steps in
10287    case of multi-step conversion (like char->short->int - in that case
10288    MULTI_STEP_CVT will be 1).
10289    - INTERM_TYPES contains the intermediate type required to perform the
10290    widening operation (short in the above example).  */
10291 
10292 bool
10293 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10294 				tree vectype_out, tree vectype_in,
10295                                 enum tree_code *code1, enum tree_code *code2,
10296                                 int *multi_step_cvt,
10297                                 vec<tree> *interm_types)
10298 {
10299   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10300   struct loop *vect_loop = NULL;
10301   machine_mode vec_mode;
10302   enum insn_code icode1, icode2;
10303   optab optab1, optab2;
10304   tree vectype = vectype_in;
10305   tree wide_vectype = vectype_out;
10306   enum tree_code c1, c2;
10307   int i;
10308   tree prev_type, intermediate_type;
10309   machine_mode intermediate_mode, prev_mode;
10310   optab optab3, optab4;
10311 
10312   *multi_step_cvt = 0;
10313   if (loop_info)
10314     vect_loop = LOOP_VINFO_LOOP (loop_info);
10315 
10316   switch (code)
10317     {
10318     case WIDEN_MULT_EXPR:
10319       /* The result of a vectorized widening operation usually requires
10320 	 two vectors (because the widened results do not fit into one vector).
10321 	 The generated vector results would normally be expected to be
10322 	 generated in the same order as in the original scalar computation,
10323 	 i.e. if 8 results are generated in each vector iteration, they are
10324 	 to be organized as follows:
10325 		vect1: [res1,res2,res3,res4],
10326 		vect2: [res5,res6,res7,res8].
10327 
10328 	 However, in the special case that the result of the widening
10329 	 operation is used in a reduction computation only, the order doesn't
10330 	 matter (because when vectorizing a reduction we change the order of
10331 	 the computation).  Some targets can take advantage of this and
10332 	 generate more efficient code.  For example, targets like Altivec,
10333 	 that support widen_mult using a sequence of {mult_even,mult_odd}
10334 	 generate the following vectors:
10335 		vect1: [res1,res3,res5,res7],
10336 		vect2: [res2,res4,res6,res8].
10337 
10338 	 When vectorizing outer-loops, we execute the inner-loop sequentially
10339 	 (each vectorized inner-loop iteration contributes to VF outer-loop
10340 	 iterations in parallel).  We therefore don't allow to change the
10341 	 order of the computation in the inner-loop during outer-loop
10342 	 vectorization.  */
10343       /* TODO: Another case in which order doesn't *really* matter is when we
10344 	 widen and then contract again, e.g. (short)((int)x * y >> 8).
10345 	 Normally, pack_trunc performs an even/odd permute, whereas the
10346 	 repack from an even/odd expansion would be an interleave, which
10347 	 would be significantly simpler for e.g. AVX2.  */
10348       /* In any case, in order to avoid duplicating the code below, recurse
10349 	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
10350 	 are properly set up for the caller.  If we fail, we'll continue with
10351 	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
10352       if (vect_loop
10353 	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10354 	  && !nested_in_vect_loop_p (vect_loop, stmt_info)
10355 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10356 					     stmt_info, vectype_out,
10357 					     vectype_in, code1, code2,
10358 					     multi_step_cvt, interm_types))
10359         {
10360           /* Elements in a vector with vect_used_by_reduction property cannot
10361              be reordered if the use chain with this property does not have the
10362              same operation.  One such an example is s += a * b, where elements
10363              in a and b cannot be reordered.  Here we check if the vector defined
10364              by STMT is only directly used in the reduction statement.  */
10365 	  tree lhs = gimple_assign_lhs (stmt_info->stmt);
10366 	  stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10367 	  if (use_stmt_info
10368 	      && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10369 	    return true;
10370         }
10371       c1 = VEC_WIDEN_MULT_LO_EXPR;
10372       c2 = VEC_WIDEN_MULT_HI_EXPR;
10373       break;
10374 
10375     case DOT_PROD_EXPR:
10376       c1 = DOT_PROD_EXPR;
10377       c2 = DOT_PROD_EXPR;
10378       break;
10379 
10380     case SAD_EXPR:
10381       c1 = SAD_EXPR;
10382       c2 = SAD_EXPR;
10383       break;
10384 
10385     case VEC_WIDEN_MULT_EVEN_EXPR:
10386       /* Support the recursion induced just above.  */
10387       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10388       c2 = VEC_WIDEN_MULT_ODD_EXPR;
10389       break;
10390 
10391     case WIDEN_LSHIFT_EXPR:
10392       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10393       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10394       break;
10395 
10396     CASE_CONVERT:
10397       c1 = VEC_UNPACK_LO_EXPR;
10398       c2 = VEC_UNPACK_HI_EXPR;
10399       break;
10400 
10401     case FLOAT_EXPR:
10402       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10403       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10404       break;
10405 
10406     case FIX_TRUNC_EXPR:
10407       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10408       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10409       break;
10410 
10411     default:
10412       gcc_unreachable ();
10413     }
10414 
10415   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10416     std::swap (c1, c2);
10417 
10418   if (code == FIX_TRUNC_EXPR)
10419     {
10420       /* The signedness is determined from output operand.  */
10421       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10422       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10423     }
10424   else if (CONVERT_EXPR_CODE_P (code)
10425 	   && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
10426 	   && VECTOR_BOOLEAN_TYPE_P (vectype)
10427 	   && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
10428 	   && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10429     {
10430       /* If the input and result modes are the same, a different optab
10431 	 is needed where we pass in the number of units in vectype.  */
10432       optab1 = vec_unpacks_sbool_lo_optab;
10433       optab2 = vec_unpacks_sbool_hi_optab;
10434     }
10435   else
10436     {
10437       optab1 = optab_for_tree_code (c1, vectype, optab_default);
10438       optab2 = optab_for_tree_code (c2, vectype, optab_default);
10439     }
10440 
10441   if (!optab1 || !optab2)
10442     return false;
10443 
10444   vec_mode = TYPE_MODE (vectype);
10445   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10446        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10447     return false;
10448 
10449   *code1 = c1;
10450   *code2 = c2;
10451 
10452   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10453       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10454     {
10455       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10456 	return true;
10457       /* For scalar masks we may have different boolean
10458 	 vector types having the same QImode.  Thus we
10459 	 add additional check for elements number.  */
10460       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10461 		    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10462 	return true;
10463     }
10464 
10465   /* Check if it's a multi-step conversion that can be done using intermediate
10466      types.  */
10467 
10468   prev_type = vectype;
10469   prev_mode = vec_mode;
10470 
10471   if (!CONVERT_EXPR_CODE_P (code))
10472     return false;
10473 
10474   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10475      intermediate steps in promotion sequence.  We try
10476      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10477      not.  */
10478   interm_types->create (MAX_INTERM_CVT_STEPS);
10479   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10480     {
10481       intermediate_mode = insn_data[icode1].operand[0].mode;
10482       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10483 	{
10484 	  intermediate_type = vect_halve_mask_nunits (prev_type);
10485 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10486 	    return false;
10487 	}
10488       else
10489 	intermediate_type
10490 	  = lang_hooks.types.type_for_mode (intermediate_mode,
10491 					    TYPE_UNSIGNED (prev_type));
10492 
10493       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10494 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
10495 	  && intermediate_mode == prev_mode
10496 	  && SCALAR_INT_MODE_P (prev_mode))
10497 	{
10498 	  /* If the input and result modes are the same, a different optab
10499 	     is needed where we pass in the number of units in vectype.  */
10500 	  optab3 = vec_unpacks_sbool_lo_optab;
10501 	  optab4 = vec_unpacks_sbool_hi_optab;
10502 	}
10503       else
10504 	{
10505 	  optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10506 	  optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10507 	}
10508 
10509       if (!optab3 || !optab4
10510           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10511 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10512 	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10513 	  || insn_data[icode2].operand[0].mode != intermediate_mode
10514 	  || ((icode1 = optab_handler (optab3, intermediate_mode))
10515 	      == CODE_FOR_nothing)
10516 	  || ((icode2 = optab_handler (optab4, intermediate_mode))
10517 	      == CODE_FOR_nothing))
10518 	break;
10519 
10520       interm_types->quick_push (intermediate_type);
10521       (*multi_step_cvt)++;
10522 
10523       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10524 	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10525 	{
10526 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10527 	    return true;
10528 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10529 			TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10530 	    return true;
10531 	}
10532 
10533       prev_type = intermediate_type;
10534       prev_mode = intermediate_mode;
10535     }
10536 
10537   interm_types->release ();
10538   return false;
10539 }
10540 
10541 
10542 /* Function supportable_narrowing_operation
10543 
10544    Check whether an operation represented by the code CODE is a
10545    narrowing operation that is supported by the target platform in
10546    vector form (i.e., when operating on arguments of type VECTYPE_IN
10547    and producing a result of type VECTYPE_OUT).
10548 
10549    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10550    and FLOAT.  This function checks if these operations are supported by
10551    the target platform directly via vector tree-codes.
10552 
10553    Output:
10554    - CODE1 is the code of a vector operation to be used when
10555    vectorizing the operation, if available.
10556    - MULTI_STEP_CVT determines the number of required intermediate steps in
10557    case of multi-step conversion (like int->short->char - in that case
10558    MULTI_STEP_CVT will be 1).
10559    - INTERM_TYPES contains the intermediate type required to perform the
10560    narrowing operation (short in the above example).   */
10561 
10562 bool
10563 supportable_narrowing_operation (enum tree_code code,
10564 				 tree vectype_out, tree vectype_in,
10565 				 enum tree_code *code1, int *multi_step_cvt,
10566                                  vec<tree> *interm_types)
10567 {
10568   machine_mode vec_mode;
10569   enum insn_code icode1;
10570   optab optab1, interm_optab;
10571   tree vectype = vectype_in;
10572   tree narrow_vectype = vectype_out;
10573   enum tree_code c1;
10574   tree intermediate_type, prev_type;
10575   machine_mode intermediate_mode, prev_mode;
10576   int i;
10577   bool uns;
10578 
10579   *multi_step_cvt = 0;
10580   switch (code)
10581     {
10582     CASE_CONVERT:
10583       c1 = VEC_PACK_TRUNC_EXPR;
10584       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
10585 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
10586 	  && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
10587 	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10588 	optab1 = vec_pack_sbool_trunc_optab;
10589       else
10590 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
10591       break;
10592 
10593     case FIX_TRUNC_EXPR:
10594       c1 = VEC_PACK_FIX_TRUNC_EXPR;
10595       /* The signedness is determined from output operand.  */
10596       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10597       break;
10598 
10599     case FLOAT_EXPR:
10600       c1 = VEC_PACK_FLOAT_EXPR;
10601       optab1 = optab_for_tree_code (c1, vectype, optab_default);
10602       break;
10603 
10604     default:
10605       gcc_unreachable ();
10606     }
10607 
10608   if (!optab1)
10609     return false;
10610 
10611   vec_mode = TYPE_MODE (vectype);
10612   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10613     return false;
10614 
10615   *code1 = c1;
10616 
10617   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10618     {
10619       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10620 	return true;
10621       /* For scalar masks we may have different boolean
10622 	 vector types having the same QImode.  Thus we
10623 	 add additional check for elements number.  */
10624       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10625 		    TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10626 	return true;
10627     }
10628 
10629   if (code == FLOAT_EXPR)
10630     return false;
10631 
10632   /* Check if it's a multi-step conversion that can be done using intermediate
10633      types.  */
10634   prev_mode = vec_mode;
10635   prev_type = vectype;
10636   if (code == FIX_TRUNC_EXPR)
10637     uns = TYPE_UNSIGNED (vectype_out);
10638   else
10639     uns = TYPE_UNSIGNED (vectype);
10640 
10641   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10642      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10643      costly than signed.  */
10644   if (code == FIX_TRUNC_EXPR && uns)
10645     {
10646       enum insn_code icode2;
10647 
10648       intermediate_type
10649 	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10650       interm_optab
10651 	= optab_for_tree_code (c1, intermediate_type, optab_default);
10652       if (interm_optab != unknown_optab
10653 	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10654 	  && insn_data[icode1].operand[0].mode
10655 	     == insn_data[icode2].operand[0].mode)
10656 	{
10657 	  uns = false;
10658 	  optab1 = interm_optab;
10659 	  icode1 = icode2;
10660 	}
10661     }
10662 
10663   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10664      intermediate steps in promotion sequence.  We try
10665      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
10666   interm_types->create (MAX_INTERM_CVT_STEPS);
10667   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10668     {
10669       intermediate_mode = insn_data[icode1].operand[0].mode;
10670       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10671 	{
10672 	  intermediate_type = vect_double_mask_nunits (prev_type);
10673 	  if (intermediate_mode != TYPE_MODE (intermediate_type))
10674 	    return false;
10675 	}
10676       else
10677 	intermediate_type
10678 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10679       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10680 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
10681 	  && intermediate_mode == prev_mode
10682 	  && SCALAR_INT_MODE_P (prev_mode))
10683 	interm_optab = vec_pack_sbool_trunc_optab;
10684       else
10685 	interm_optab
10686 	  = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10687 				 optab_default);
10688       if (!interm_optab
10689 	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10690 	  || insn_data[icode1].operand[0].mode != intermediate_mode
10691 	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10692 	      == CODE_FOR_nothing))
10693 	break;
10694 
10695       interm_types->quick_push (intermediate_type);
10696       (*multi_step_cvt)++;
10697 
10698       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10699 	{
10700 	  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10701 	    return true;
10702 	  if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10703 			TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10704 	    return true;
10705 	}
10706 
10707       prev_mode = intermediate_mode;
10708       prev_type = intermediate_type;
10709       optab1 = interm_optab;
10710     }
10711 
10712   interm_types->release ();
10713   return false;
10714 }
10715 
10716 /* Generate and return a statement that sets vector mask MASK such that
10717    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
10718 
10719 gcall *
10720 vect_gen_while (tree mask, tree start_index, tree end_index)
10721 {
10722   tree cmp_type = TREE_TYPE (start_index);
10723   tree mask_type = TREE_TYPE (mask);
10724   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10725 						       cmp_type, mask_type,
10726 						       OPTIMIZE_FOR_SPEED));
10727   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10728 					    start_index, end_index,
10729 					    build_zero_cst (mask_type));
10730   gimple_call_set_lhs (call, mask);
10731   return call;
10732 }
10733 
10734 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10735    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
10736 
10737 tree
10738 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10739 		    tree end_index)
10740 {
10741   tree tmp = make_ssa_name (mask_type);
10742   gcall *call = vect_gen_while (tmp, start_index, end_index);
10743   gimple_seq_add_stmt (seq, call);
10744   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10745 }
10746 
10747 /* Try to compute the vector types required to vectorize STMT_INFO,
10748    returning true on success and false if vectorization isn't possible.
10749 
10750    On success:
10751 
10752    - Set *STMT_VECTYPE_OUT to:
10753      - NULL_TREE if the statement doesn't need to be vectorized;
10754      - boolean_type_node if the statement is a boolean operation whose
10755        vector type can only be determined once all the other vector types
10756        are known; and
10757      - the equivalent of STMT_VINFO_VECTYPE otherwise.
10758 
10759    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10760      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10761      statement does not help to determine the overall number of units.  */
10762 
10763 opt_result
10764 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10765 				tree *stmt_vectype_out,
10766 				tree *nunits_vectype_out)
10767 {
10768   gimple *stmt = stmt_info->stmt;
10769 
10770   *stmt_vectype_out = NULL_TREE;
10771   *nunits_vectype_out = NULL_TREE;
10772 
10773   if (gimple_get_lhs (stmt) == NULL_TREE
10774       /* MASK_STORE has no lhs, but is ok.  */
10775       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10776     {
10777       if (is_a <gcall *> (stmt))
10778 	{
10779 	  /* Ignore calls with no lhs.  These must be calls to
10780 	     #pragma omp simd functions, and what vectorization factor
10781 	     it really needs can't be determined until
10782 	     vectorizable_simd_clone_call.  */
10783 	  if (dump_enabled_p ())
10784 	    dump_printf_loc (MSG_NOTE, vect_location,
10785 			     "defer to SIMD clone analysis.\n");
10786 	  return opt_result::success ();
10787 	}
10788 
10789       return opt_result::failure_at (stmt,
10790 				     "not vectorized: irregular stmt.%G", stmt);
10791     }
10792 
10793   if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10794     return opt_result::failure_at (stmt,
10795 				   "not vectorized: vector stmt in loop:%G",
10796 				   stmt);
10797 
10798   tree vectype;
10799   tree scalar_type = NULL_TREE;
10800   if (STMT_VINFO_VECTYPE (stmt_info))
10801     *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10802   else
10803     {
10804       gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10805       if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10806 	scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10807       else
10808 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10809 
10810       /* Pure bool ops don't participate in number-of-units computation.
10811 	 For comparisons use the types being compared.  */
10812       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10813 	  && is_gimple_assign (stmt)
10814 	  && gimple_assign_rhs_code (stmt) != COND_EXPR)
10815 	{
10816 	  *stmt_vectype_out = boolean_type_node;
10817 
10818 	  tree rhs1 = gimple_assign_rhs1 (stmt);
10819 	  if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10820 	      && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10821 	    scalar_type = TREE_TYPE (rhs1);
10822 	  else
10823 	    {
10824 	      if (dump_enabled_p ())
10825 		dump_printf_loc (MSG_NOTE, vect_location,
10826 				 "pure bool operation.\n");
10827 	      return opt_result::success ();
10828 	    }
10829 	}
10830 
10831       if (dump_enabled_p ())
10832 	dump_printf_loc (MSG_NOTE, vect_location,
10833 			 "get vectype for scalar type:  %T\n", scalar_type);
10834       vectype = get_vectype_for_scalar_type (scalar_type);
10835       if (!vectype)
10836 	return opt_result::failure_at (stmt,
10837 				       "not vectorized:"
10838 				       " unsupported data-type %T\n",
10839 				       scalar_type);
10840 
10841       if (!*stmt_vectype_out)
10842 	*stmt_vectype_out = vectype;
10843 
10844       if (dump_enabled_p ())
10845 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10846     }
10847 
10848   /* Don't try to compute scalar types if the stmt produces a boolean
10849      vector; use the existing vector type instead.  */
10850   tree nunits_vectype;
10851   if (VECTOR_BOOLEAN_TYPE_P (vectype))
10852     nunits_vectype = vectype;
10853   else
10854     {
10855       /* The number of units is set according to the smallest scalar
10856 	 type (or the largest vector size, but we only support one
10857 	 vector size per vectorization).  */
10858       if (*stmt_vectype_out != boolean_type_node)
10859 	{
10860 	  HOST_WIDE_INT dummy;
10861 	  scalar_type = vect_get_smallest_scalar_type (stmt_info,
10862 						       &dummy, &dummy);
10863 	}
10864       if (dump_enabled_p ())
10865 	dump_printf_loc (MSG_NOTE, vect_location,
10866 			 "get vectype for scalar type:  %T\n", scalar_type);
10867       nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10868     }
10869   if (!nunits_vectype)
10870     return opt_result::failure_at (stmt,
10871 				   "not vectorized: unsupported data-type %T\n",
10872 				   scalar_type);
10873 
10874   if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10875 		GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10876     return opt_result::failure_at (stmt,
10877 				   "not vectorized: different sized vector "
10878 				   "types in statement, %T and %T\n",
10879 				   vectype, nunits_vectype);
10880 
10881   if (dump_enabled_p ())
10882     {
10883       dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10884 		       nunits_vectype);
10885 
10886       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10887       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10888       dump_printf (MSG_NOTE, "\n");
10889     }
10890 
10891   *nunits_vectype_out = nunits_vectype;
10892   return opt_result::success ();
10893 }
10894 
10895 /* Try to determine the correct vector type for STMT_INFO, which is a
10896    statement that produces a scalar boolean result.  Return the vector
10897    type on success, otherwise return NULL_TREE.  */
10898 
10899 opt_tree
10900 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10901 {
10902   gimple *stmt = stmt_info->stmt;
10903   tree mask_type = NULL;
10904   tree vectype, scalar_type;
10905 
10906   if (is_gimple_assign (stmt)
10907       && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10908       && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10909     {
10910       scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10911       mask_type = get_mask_type_for_scalar_type (scalar_type);
10912 
10913       if (!mask_type)
10914 	return opt_tree::failure_at (stmt,
10915 				     "not vectorized: unsupported mask\n");
10916     }
10917   else
10918     {
10919       tree rhs;
10920       ssa_op_iter iter;
10921       enum vect_def_type dt;
10922 
10923       FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10924 	{
10925 	  if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10926 	    return opt_tree::failure_at (stmt,
10927 					 "not vectorized:can't compute mask"
10928 					 " type for statement, %G", stmt);
10929 
10930 	  /* No vectype probably means external definition.
10931 	     Allow it in case there is another operand which
10932 	     allows to determine mask type.  */
10933 	  if (!vectype)
10934 	    continue;
10935 
10936 	  if (!mask_type)
10937 	    mask_type = vectype;
10938 	  else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10939 			     TYPE_VECTOR_SUBPARTS (vectype)))
10940 	    return opt_tree::failure_at (stmt,
10941 					 "not vectorized: different sized mask"
10942 					 " types in statement, %T and %T\n",
10943 					 mask_type, vectype);
10944 	  else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10945 		   != VECTOR_BOOLEAN_TYPE_P (vectype))
10946 	    return opt_tree::failure_at (stmt,
10947 					 "not vectorized: mixed mask and "
10948 					 "nonmask vector types in statement, "
10949 					 "%T and %T\n",
10950 					 mask_type, vectype);
10951 	}
10952 
10953       /* We may compare boolean value loaded as vector of integers.
10954 	 Fix mask_type in such case.  */
10955       if (mask_type
10956 	  && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10957 	  && gimple_code (stmt) == GIMPLE_ASSIGN
10958 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10959 	mask_type = build_same_sized_truth_vector_type (mask_type);
10960     }
10961 
10962   /* No mask_type should mean loop invariant predicate.
10963      This is probably a subject for optimization in if-conversion.  */
10964   if (!mask_type)
10965     return opt_tree::failure_at (stmt,
10966 				 "not vectorized: can't compute mask type "
10967 				 "for statement: %G", stmt);
10968 
10969   return opt_tree::success (mask_type);
10970 }
10971