xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-data-refs.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "dumpfile.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
40 
41 /* Need to include rtl.h, expr.h, etc. for optabs.  */
42 #include "expr.h"
43 #include "optabs.h"
44 
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
47 
48 static bool
49 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
50 			      tree vectype, unsigned HOST_WIDE_INT count)
51 {
52   enum machine_mode mode, array_mode;
53   bool limit_p;
54 
55   mode = TYPE_MODE (vectype);
56   limit_p = !targetm.array_mode_supported_p (mode, count);
57   array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
58 			      MODE_INT, limit_p);
59 
60   if (array_mode == BLKmode)
61     {
62       if (dump_enabled_p ())
63 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
64                          "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65                          GET_MODE_NAME (mode), count);
66       return false;
67     }
68 
69   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
70     {
71       if (dump_enabled_p ())
72 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
73                          "cannot use %s<%s><%s>", name,
74                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
75       return false;
76     }
77 
78   if (dump_enabled_p ())
79     dump_printf_loc (MSG_NOTE, vect_location,
80                      "can use %s<%s><%s>", name, GET_MODE_NAME (array_mode),
81                      GET_MODE_NAME (mode));
82 
83   return true;
84 }
85 
86 
87 /* Return the smallest scalar part of STMT.
88    This is used to determine the vectype of the stmt.  We generally set the
89    vectype according to the type of the result (lhs).  For stmts whose
90    result-type is different than the type of the arguments (e.g., demotion,
91    promotion), vectype will be reset appropriately (later).  Note that we have
92    to visit the smallest datatype in this function, because that determines the
93    VF.  If the smallest datatype in the loop is present only as the rhs of a
94    promotion operation - we'd miss it.
95    Such a case, where a variable of this datatype does not appear in the lhs
96    anywhere in the loop, can only occur if it's an invariant: e.g.:
97    'int_x = (int) short_inv', which we'd expect to have been optimized away by
98    invariant motion.  However, we cannot rely on invariant motion to always
99    take invariants out of the loop, and so in the case of promotion we also
100    have to check the rhs.
101    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
102    types.  */
103 
104 tree
105 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
106                                HOST_WIDE_INT *rhs_size_unit)
107 {
108   tree scalar_type = gimple_expr_type (stmt);
109   HOST_WIDE_INT lhs, rhs;
110 
111   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
112 
113   if (is_gimple_assign (stmt)
114       && (gimple_assign_cast_p (stmt)
115           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
116           || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
117           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
118     {
119       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
120 
121       rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
122       if (rhs < lhs)
123         scalar_type = rhs_type;
124     }
125 
126   *lhs_size_unit = lhs;
127   *rhs_size_unit = rhs;
128   return scalar_type;
129 }
130 
131 
132 /* Find the place of the data-ref in STMT in the interleaving chain that starts
133    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
134 
135 int
136 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
137 {
138   gimple next_stmt = first_stmt;
139   int result = 0;
140 
141   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
142     return -1;
143 
144   while (next_stmt && next_stmt != stmt)
145     {
146       result++;
147       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
148     }
149 
150   if (next_stmt)
151     return result;
152   else
153     return -1;
154 }
155 
156 
157 /* Function vect_insert_into_interleaving_chain.
158 
159    Insert DRA into the interleaving chain of DRB according to DRA's INIT.  */
160 
161 static void
162 vect_insert_into_interleaving_chain (struct data_reference *dra,
163 				     struct data_reference *drb)
164 {
165   gimple prev, next;
166   tree next_init;
167   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
168   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
169 
170   prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
171   next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
172   while (next)
173     {
174       next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
175       if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
176 	{
177 	  /* Insert here.  */
178 	  GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
179 	  GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
180 	  return;
181 	}
182       prev = next;
183       next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
184     }
185 
186   /* We got to the end of the list. Insert here.  */
187   GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
188   GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 }
190 
191 
192 /* Function vect_update_interleaving_chain.
193 
194    For two data-refs DRA and DRB that are a part of a chain interleaved data
195    accesses, update the interleaving chain.  DRB's INIT is smaller than DRA's.
196 
197    There are four possible cases:
198    1. New stmts - both DRA and DRB are not a part of any chain:
199       FIRST_DR = DRB
200       NEXT_DR (DRB) = DRA
201    2. DRB is a part of a chain and DRA is not:
202       no need to update FIRST_DR
203       no need to insert DRB
204       insert DRA according to init
205    3. DRA is a part of a chain and DRB is not:
206       if (init of FIRST_DR > init of DRB)
207           FIRST_DR = DRB
208 	  NEXT(FIRST_DR) = previous FIRST_DR
209       else
210           insert DRB according to its init
211    4. both DRA and DRB are in some interleaving chains:
212       choose the chain with the smallest init of FIRST_DR
213       insert the nodes of the second chain into the first one.  */
214 
215 static void
216 vect_update_interleaving_chain (struct data_reference *drb,
217 				struct data_reference *dra)
218 {
219   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
220   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
221   tree next_init, init_dra_chain, init_drb_chain;
222   gimple first_a, first_b;
223   tree node_init;
224   gimple node, prev, next, first_stmt;
225 
226   /* 1. New stmts - both DRA and DRB are not a part of any chain.   */
227   if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
228     {
229       GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
230       GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
231       GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
232       return;
233     }
234 
235   /* 2. DRB is a part of a chain and DRA is not.  */
236   if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
237     {
238       GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
239       /* Insert DRA into the chain of DRB.  */
240       vect_insert_into_interleaving_chain (dra, drb);
241       return;
242     }
243 
244   /* 3. DRA is a part of a chain and DRB is not.  */
245   if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
246     {
247       gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
248       tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
249 							      old_first_stmt)));
250       gimple tmp;
251 
252       if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
253 	{
254 	  /* DRB's init is smaller than the init of the stmt previously marked
255 	     as the first stmt of the interleaving chain of DRA.  Therefore, we
256 	     update FIRST_STMT and put DRB in the head of the list.  */
257 	  GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
258 	  GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
259 
260 	  /* Update all the stmts in the list to point to the new FIRST_STMT.  */
261 	  tmp = old_first_stmt;
262 	  while (tmp)
263 	    {
264 	      GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
265 	      tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
266 	    }
267 	}
268       else
269 	{
270 	  /* Insert DRB in the list of DRA.  */
271 	  vect_insert_into_interleaving_chain (drb, dra);
272 	  GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
273 	}
274       return;
275     }
276 
277   /* 4. both DRA and DRB are in some interleaving chains.  */
278   first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
279   first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
280   if (first_a == first_b)
281     return;
282   init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
283   init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
284 
285   if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
286     {
287       /* Insert the nodes of DRA chain into the DRB chain.
288 	 After inserting a node, continue from this node of the DRB chain (don't
289          start from the beginning.  */
290       node = GROUP_FIRST_ELEMENT (stmtinfo_a);
291       prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
292       first_stmt = first_b;
293     }
294   else
295     {
296       /* Insert the nodes of DRB chain into the DRA chain.
297 	 After inserting a node, continue from this node of the DRA chain (don't
298          start from the beginning.  */
299       node = GROUP_FIRST_ELEMENT (stmtinfo_b);
300       prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
301       first_stmt = first_a;
302     }
303 
304   while (node)
305     {
306       node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
307       next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
308       while (next)
309 	{
310 	  next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
311 	  if (tree_int_cst_compare (next_init, node_init) > 0)
312 	    {
313 	      /* Insert here.  */
314 	      GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
315 	      GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
316 	      prev = node;
317 	      break;
318 	    }
319 	  prev = next;
320 	  next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
321 	}
322       if (!next)
323 	{
324 	  /* We got to the end of the list. Insert here.  */
325 	  GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
326 	  GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
327 	  prev = node;
328 	}
329       GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
330       node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331     }
332 }
333 
334 /* Check dependence between DRA and DRB for basic block vectorization.
335    If the accesses share same bases and offsets, we can compare their initial
336    constant offsets to decide whether they differ or not.  In case of a read-
337    write dependence we check that the load is before the store to ensure that
338    vectorization will not change the order of the accesses.  */
339 
340 static bool
341 vect_drs_dependent_in_basic_block (struct data_reference *dra,
342                                    struct data_reference *drb)
343 {
344   HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
345   gimple earlier_stmt;
346 
347   /* We only call this function for pairs of loads and stores, but we verify
348      it here.  */
349   if (DR_IS_READ (dra) == DR_IS_READ (drb))
350     {
351       if (DR_IS_READ (dra))
352         return false;
353       else
354         return true;
355     }
356 
357   /* Check that the data-refs have same bases and offsets.  If not, we can't
358      determine if they are dependent.  */
359   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
360       || !dr_equal_offsets_p (dra, drb))
361     return true;
362 
363   /* Check the types.  */
364   type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
365   type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
366 
367   if (type_size_a != type_size_b
368       || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
369                               TREE_TYPE (DR_REF (drb))))
370     return true;
371 
372   init_a = TREE_INT_CST_LOW (DR_INIT (dra));
373   init_b = TREE_INT_CST_LOW (DR_INIT (drb));
374 
375   /* Two different locations - no dependence.  */
376   if (init_a != init_b)
377     return false;
378 
379   /* We have a read-write dependence.  Check that the load is before the store.
380      When we vectorize basic blocks, vector load can be only before
381      corresponding scalar load, and vector store can be only after its
382      corresponding scalar store.  So the order of the acceses is preserved in
383      case the load is before the store.  */
384   earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
385   if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
386     return false;
387 
388   return true;
389 }
390 
391 
392 /* Function vect_check_interleaving.
393 
394    Check if DRA and DRB are a part of interleaving.  In case they are, insert
395    DRA and DRB in an interleaving chain.  */
396 
397 static bool
398 vect_check_interleaving (struct data_reference *dra,
399 			 struct data_reference *drb)
400 {
401   HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
402 
403   /* Check that the data-refs have same first location (except init) and they
404      are both either store or load (not load and store).  */
405   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
406       || !dr_equal_offsets_p (dra, drb)
407       || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
408       || DR_IS_READ (dra) != DR_IS_READ (drb))
409     return false;
410 
411   /* Check:
412      1. data-refs are of the same type
413      2. their steps are equal
414      3. the step (if greater than zero) is greater than the difference between
415         data-refs' inits.  */
416   type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
417   type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
418 
419   if (type_size_a != type_size_b
420       || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
421       || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
422                               TREE_TYPE (DR_REF (drb))))
423     return false;
424 
425   init_a = TREE_INT_CST_LOW (DR_INIT (dra));
426   init_b = TREE_INT_CST_LOW (DR_INIT (drb));
427   step = TREE_INT_CST_LOW (DR_STEP (dra));
428 
429   if (init_a > init_b)
430     {
431       /* If init_a == init_b + the size of the type * k, we have an interleaving,
432 	 and DRB is accessed before DRA.  */
433       diff_mod_size = (init_a - init_b) % type_size_a;
434 
435       if (step && (init_a - init_b) > step)
436          return false;
437 
438       if (diff_mod_size == 0)
439 	{
440 	  vect_update_interleaving_chain (drb, dra);
441 	  if (dump_enabled_p ())
442 	    {
443 	      dump_printf_loc (MSG_NOTE, vect_location,
444                                "Detected interleaving ");
445 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
446 	      dump_printf (MSG_NOTE,  " and ");
447 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
448 	    }
449 	  return true;
450 	}
451     }
452   else
453     {
454       /* If init_b == init_a + the size of the type * k, we have an
455 	 interleaving, and DRA is accessed before DRB.  */
456       diff_mod_size = (init_b - init_a) % type_size_a;
457 
458       if (step && (init_b - init_a) > step)
459          return false;
460 
461       if (diff_mod_size == 0)
462 	{
463 	  vect_update_interleaving_chain (dra, drb);
464 	  if (dump_enabled_p ())
465 	    {
466 	      dump_printf_loc (MSG_NOTE, vect_location,
467                                "Detected interleaving ");
468 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
469 	      dump_printf (MSG_NOTE,  " and ");
470 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
471 	    }
472 	  return true;
473 	}
474     }
475 
476   return false;
477 }
478 
479 /* Check if data references pointed by DR_I and DR_J are same or
480    belong to same interleaving group.  Return FALSE if drs are
481    different, otherwise return TRUE.  */
482 
483 static bool
484 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
485 {
486   gimple stmt_i = DR_STMT (dr_i);
487   gimple stmt_j = DR_STMT (dr_j);
488 
489   if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
490       || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
491 	    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
492 	    && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
493 		== GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
494     return true;
495   else
496     return false;
497 }
498 
499 /* If address ranges represented by DDR_I and DDR_J are equal,
500    return TRUE, otherwise return FALSE.  */
501 
502 static bool
503 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
504 {
505   if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
506        && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
507       || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
508 	  && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
509     return true;
510   else
511     return false;
512 }
513 
514 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
515    tested at run-time.  Return TRUE if DDR was successfully inserted.
516    Return false if versioning is not supported.  */
517 
518 static bool
519 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
520 {
521   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
522 
523   if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
524     return false;
525 
526   if (dump_enabled_p ())
527     {
528       dump_printf_loc (MSG_NOTE, vect_location,
529                        "mark for run-time aliasing test between ");
530       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
531       dump_printf (MSG_NOTE,  " and ");
532       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
533     }
534 
535   if (optimize_loop_nest_for_size_p (loop))
536     {
537       if (dump_enabled_p ())
538 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
539                          "versioning not supported when optimizing for size.");
540       return false;
541     }
542 
543   /* FORNOW: We don't support versioning with outer-loop vectorization.  */
544   if (loop->inner)
545     {
546       if (dump_enabled_p ())
547 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
548                          "versioning not yet supported for outer-loops.");
549       return false;
550     }
551 
552   /* FORNOW: We don't support creating runtime alias tests for non-constant
553      step.  */
554   if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
555       || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
556     {
557       if (dump_enabled_p ())
558 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
559                          "versioning not yet supported for non-constant "
560                          "step");
561       return false;
562     }
563 
564   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
565   return true;
566 }
567 
568 
569 /* Function vect_analyze_data_ref_dependence.
570 
571    Return TRUE if there (might) exist a dependence between a memory-reference
572    DRA and a memory-reference DRB.  When versioning for alias may check a
573    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
574    the data dependence.  */
575 
576 static bool
577 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
578                                   loop_vec_info loop_vinfo, int *max_vf)
579 {
580   unsigned int i;
581   struct loop *loop = NULL;
582   struct data_reference *dra = DDR_A (ddr);
583   struct data_reference *drb = DDR_B (ddr);
584   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
585   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
586   lambda_vector dist_v;
587   unsigned int loop_depth;
588 
589   /* Don't bother to analyze statements marked as unvectorizable.  */
590   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
591       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
592     return false;
593 
594   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
595     {
596       /* Independent data accesses.  */
597       vect_check_interleaving (dra, drb);
598       return false;
599     }
600 
601   if (loop_vinfo)
602     loop = LOOP_VINFO_LOOP (loop_vinfo);
603 
604   if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
605     return false;
606 
607   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
608     {
609       gimple earlier_stmt;
610 
611       if (loop_vinfo)
612         {
613           if (dump_enabled_p ())
614             {
615               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616                                "versioning for alias required: "
617                                "can't determine dependence between ");
618               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
619                                  DR_REF (dra));
620               dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
621               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
622                                  DR_REF (drb));
623             }
624 
625           /* Add to list of ddrs that need to be tested at run-time.  */
626           return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
627         }
628 
629       /* When vectorizing a basic block unknown depnedence can still mean
630 	 grouped access.  */
631       if (vect_check_interleaving (dra, drb))
632          return false;
633 
634       /* Read-read is OK (we need this check here, after checking for
635          interleaving).  */
636       if (DR_IS_READ (dra) && DR_IS_READ (drb))
637         return false;
638 
639       if (dump_enabled_p ())
640         {
641           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642                            "can't determine dependence between ");
643           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
644           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
645           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
646         }
647 
648       /* We do not vectorize basic blocks with write-write dependencies.  */
649       if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
650         return true;
651 
652       /* Check that it's not a load-after-store dependence.  */
653       earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
654       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
655         return true;
656 
657       return false;
658     }
659 
660   /* Versioning for alias is not yet supported for basic block SLP, and
661      dependence distance is unapplicable, hence, in case of known data
662      dependence, basic block vectorization is impossible for now.  */
663   if (!loop_vinfo)
664     {
665       if (dra != drb && vect_check_interleaving (dra, drb))
666         return false;
667 
668       if (dump_enabled_p ())
669         {
670           dump_printf_loc (MSG_NOTE, vect_location,
671                            "determined dependence between ");
672           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
673           dump_printf (MSG_NOTE, " and ");
674           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
675         }
676 
677       /* Do not vectorize basic blcoks with write-write dependences.  */
678       if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
679         return true;
680 
681       /* Check if this dependence is allowed in basic block vectorization.  */
682       return vect_drs_dependent_in_basic_block (dra, drb);
683     }
684 
685   /* Loop-based vectorization and known data dependence.  */
686   if (DDR_NUM_DIST_VECTS (ddr) == 0)
687     {
688       if (dump_enabled_p ())
689         {
690           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
691                            "versioning for alias required: "
692                            "bad dist vector for ");
693           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
694           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
695           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
696         }
697       /* Add to list of ddrs that need to be tested at run-time.  */
698       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
699     }
700 
701   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
702   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
703     {
704       int dist = dist_v[loop_depth];
705 
706       if (dump_enabled_p ())
707 	dump_printf_loc (MSG_NOTE, vect_location,
708                          "dependence distance  = %d.", dist);
709 
710       if (dist == 0)
711 	{
712 	  if (dump_enabled_p ())
713 	    {
714 	      dump_printf_loc (MSG_NOTE, vect_location,
715                                "dependence distance == 0 between ");
716 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
717 	      dump_printf (MSG_NOTE, " and ");
718 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
719 	    }
720 
721           /* For interleaving, mark that there is a read-write dependency if
722              necessary. We check before that one of the data-refs is store.  */
723           if (DR_IS_READ (dra))
724             GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
725 	  else
726             {
727               if (DR_IS_READ (drb))
728                 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
729 	    }
730 
731 	  continue;
732 	}
733 
734       if (dist > 0 && DDR_REVERSED_P (ddr))
735 	{
736 	  /* If DDR_REVERSED_P the order of the data-refs in DDR was
737 	     reversed (to make distance vector positive), and the actual
738 	     distance is negative.  */
739 	  if (dump_enabled_p ())
740 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
741                              "dependence distance negative.");
742 	  /* Record a negative dependence distance to later limit the
743 	     amount of stmt copying / unrolling we can perform.
744 	     Only need to handle read-after-write dependence.  */
745 	  if (DR_IS_READ (drb)
746 	      && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
747 		  || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
748 	    STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
749 	  continue;
750 	}
751 
752       if (abs (dist) >= 2
753 	  && abs (dist) < *max_vf)
754 	{
755 	  /* The dependence distance requires reduction of the maximal
756 	     vectorization factor.  */
757 	  *max_vf = abs (dist);
758 	  if (dump_enabled_p ())
759 	    dump_printf_loc (MSG_NOTE, vect_location,
760                              "adjusting maximal vectorization factor to %i",
761                              *max_vf);
762 	}
763 
764       if (abs (dist) >= *max_vf)
765 	{
766 	  /* Dependence distance does not create dependence, as far as
767 	     vectorization is concerned, in this case.  */
768 	  if (dump_enabled_p ())
769 	    dump_printf_loc (MSG_NOTE, vect_location,
770                              "dependence distance >= VF.");
771 	  continue;
772 	}
773 
774       if (dump_enabled_p ())
775 	{
776 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
777                        "not vectorized, possible dependence "
778                        "between data-refs ");
779 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
780 	  dump_printf (MSG_NOTE,  " and ");
781 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
782 	}
783 
784       return true;
785     }
786 
787   return false;
788 }
789 
790 /* Function vect_analyze_data_ref_dependences.
791 
792    Examine all the data references in the loop, and make sure there do not
793    exist any data dependences between them.  Set *MAX_VF according to
794    the maximum vectorization factor the data dependences allow.  */
795 
796 bool
797 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
798                                    bb_vec_info bb_vinfo, int *max_vf)
799 {
800   unsigned int i;
801   vec<ddr_p> ddrs = vNULL;
802   struct data_dependence_relation *ddr;
803 
804   if (dump_enabled_p ())
805     dump_printf_loc (MSG_NOTE, vect_location,
806                      "=== vect_analyze_dependences ===");
807   if (loop_vinfo)
808     ddrs = LOOP_VINFO_DDRS (loop_vinfo);
809   else
810     ddrs = BB_VINFO_DDRS (bb_vinfo);
811 
812   FOR_EACH_VEC_ELT (ddrs, i, ddr)
813     if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
814       return false;
815 
816   return true;
817 }
818 
819 
820 /* Function vect_compute_data_ref_alignment
821 
822    Compute the misalignment of the data reference DR.
823 
824    Output:
825    1. If during the misalignment computation it is found that the data reference
826       cannot be vectorized then false is returned.
827    2. DR_MISALIGNMENT (DR) is defined.
828 
829    FOR NOW: No analysis is actually performed. Misalignment is calculated
830    only for trivial cases. TODO.  */
831 
832 static bool
833 vect_compute_data_ref_alignment (struct data_reference *dr)
834 {
835   gimple stmt = DR_STMT (dr);
836   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
837   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
838   struct loop *loop = NULL;
839   tree ref = DR_REF (dr);
840   tree vectype;
841   tree base, base_addr;
842   bool base_aligned;
843   tree misalign;
844   tree aligned_to, alignment;
845 
846   if (dump_enabled_p ())
847     dump_printf_loc (MSG_NOTE, vect_location,
848                      "vect_compute_data_ref_alignment:");
849 
850   if (loop_vinfo)
851     loop = LOOP_VINFO_LOOP (loop_vinfo);
852 
853   /* Initialize misalignment to unknown.  */
854   SET_DR_MISALIGNMENT (dr, -1);
855 
856   /* Strided loads perform only component accesses, misalignment information
857      is irrelevant for them.  */
858   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
859     return true;
860 
861   misalign = DR_INIT (dr);
862   aligned_to = DR_ALIGNED_TO (dr);
863   base_addr = DR_BASE_ADDRESS (dr);
864   vectype = STMT_VINFO_VECTYPE (stmt_info);
865 
866   /* In case the dataref is in an inner-loop of the loop that is being
867      vectorized (LOOP), we use the base and misalignment information
868      relative to the outer-loop (LOOP).  This is ok only if the misalignment
869      stays the same throughout the execution of the inner-loop, which is why
870      we have to check that the stride of the dataref in the inner-loop evenly
871      divides by the vector size.  */
872   if (loop && nested_in_vect_loop_p (loop, stmt))
873     {
874       tree step = DR_STEP (dr);
875       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
876 
877       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
878         {
879           if (dump_enabled_p ())
880             dump_printf_loc (MSG_NOTE, vect_location,
881                              "inner step divides the vector-size.");
882 	  misalign = STMT_VINFO_DR_INIT (stmt_info);
883 	  aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
884 	  base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
885         }
886       else
887 	{
888 	  if (dump_enabled_p ())
889 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
890                              "inner step doesn't divide the vector-size.");
891 	  misalign = NULL_TREE;
892 	}
893     }
894 
895   /* Similarly, if we're doing basic-block vectorization, we can only use
896      base and misalignment information relative to an innermost loop if the
897      misalignment stays the same throughout the execution of the loop.
898      As above, this is the case if the stride of the dataref evenly divides
899      by the vector size.  */
900   if (!loop)
901     {
902       tree step = DR_STEP (dr);
903       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
904 
905       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
906 	{
907 	  if (dump_enabled_p ())
908 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
909                              "SLP: step doesn't divide the vector-size.");
910 	  misalign = NULL_TREE;
911 	}
912     }
913 
914   base = build_fold_indirect_ref (base_addr);
915   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
916 
917   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
918       || !misalign)
919     {
920       if (dump_enabled_p ())
921 	{
922 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
923                            "Unknown alignment for access: ");
924 	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
925 	}
926       return true;
927     }
928 
929   if ((DECL_P (base)
930        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
931 				alignment) >= 0)
932       || (TREE_CODE (base_addr) == SSA_NAME
933 	  && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
934 						      TREE_TYPE (base_addr)))),
935 				   alignment) >= 0)
936       || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
937     base_aligned = true;
938   else
939     base_aligned = false;
940 
941   if (!base_aligned)
942     {
943       /* Do not change the alignment of global variables here if
944 	 flag_section_anchors is enabled as we already generated
945 	 RTL for other functions.  Most global variables should
946 	 have been aligned during the IPA increase_alignment pass.  */
947       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
948 	  || (TREE_STATIC (base) && flag_section_anchors))
949 	{
950 	  if (dump_enabled_p ())
951 	    {
952 	      dump_printf_loc (MSG_NOTE, vect_location,
953                                "can't force alignment of ref: ");
954 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
955 	    }
956 	  return true;
957 	}
958 
959       /* Force the alignment of the decl.
960 	 NOTE: This is the only change to the code we make during
961 	 the analysis phase, before deciding to vectorize the loop.  */
962       if (dump_enabled_p ())
963         {
964           dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
965           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
966         }
967 
968       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
969       DECL_USER_ALIGN (base) = 1;
970     }
971 
972   /* At this point we assume that the base is aligned.  */
973   gcc_assert (base_aligned
974 	      || (TREE_CODE (base) == VAR_DECL
975 		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
976 
977   /* If this is a backward running DR then first access in the larger
978      vectype actually is N-1 elements before the address in the DR.
979      Adjust misalign accordingly.  */
980   if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
981     {
982       tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
983       /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
984 	 otherwise we wouldn't be here.  */
985       offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
986       /* PLUS because DR_STEP was negative.  */
987       misalign = size_binop (PLUS_EXPR, misalign, offset);
988     }
989 
990   /* Modulo alignment.  */
991   misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
992 
993   if (!host_integerp (misalign, 1))
994     {
995       /* Negative or overflowed misalignment value.  */
996       if (dump_enabled_p ())
997 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
998                          "unexpected misalign value");
999       return false;
1000     }
1001 
1002   SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1003 
1004   if (dump_enabled_p ())
1005     {
1006       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1007                        "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1008       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
1009     }
1010 
1011   return true;
1012 }
1013 
1014 
1015 /* Function vect_compute_data_refs_alignment
1016 
1017    Compute the misalignment of data references in the loop.
1018    Return FALSE if a data reference is found that cannot be vectorized.  */
1019 
1020 static bool
1021 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
1022                                   bb_vec_info bb_vinfo)
1023 {
1024   vec<data_reference_p> datarefs;
1025   struct data_reference *dr;
1026   unsigned int i;
1027 
1028   if (loop_vinfo)
1029     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1030   else
1031     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1032 
1033   FOR_EACH_VEC_ELT (datarefs, i, dr)
1034     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
1035         && !vect_compute_data_ref_alignment (dr))
1036       {
1037         if (bb_vinfo)
1038           {
1039             /* Mark unsupported statement as unvectorizable.  */
1040             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1041             continue;
1042           }
1043         else
1044           return false;
1045       }
1046 
1047   return true;
1048 }
1049 
1050 
1051 /* Function vect_update_misalignment_for_peel
1052 
1053    DR - the data reference whose misalignment is to be adjusted.
1054    DR_PEEL - the data reference whose misalignment is being made
1055              zero in the vector loop by the peel.
1056    NPEEL - the number of iterations in the peel loop if the misalignment
1057            of DR_PEEL is known at compile time.  */
1058 
1059 static void
1060 vect_update_misalignment_for_peel (struct data_reference *dr,
1061                                    struct data_reference *dr_peel, int npeel)
1062 {
1063   unsigned int i;
1064   vec<dr_p> same_align_drs;
1065   struct data_reference *current_dr;
1066   int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1067   int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1068   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1069   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1070 
1071  /* For interleaved data accesses the step in the loop must be multiplied by
1072      the size of the interleaving group.  */
1073   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1074     dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1075   if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1076     dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1077 
1078   /* It can be assumed that the data refs with the same alignment as dr_peel
1079      are aligned in the vector loop.  */
1080   same_align_drs
1081     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1082   FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
1083     {
1084       if (current_dr != dr)
1085         continue;
1086       gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1087                   DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1088       SET_DR_MISALIGNMENT (dr, 0);
1089       return;
1090     }
1091 
1092   if (known_alignment_for_access_p (dr)
1093       && known_alignment_for_access_p (dr_peel))
1094     {
1095       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1096       int misal = DR_MISALIGNMENT (dr);
1097       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1098       misal += negative ? -npeel * dr_size : npeel * dr_size;
1099       misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1100       SET_DR_MISALIGNMENT (dr, misal);
1101       return;
1102     }
1103 
1104   if (dump_enabled_p ())
1105     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.");
1106   SET_DR_MISALIGNMENT (dr, -1);
1107 }
1108 
1109 
1110 /* Function vect_verify_datarefs_alignment
1111 
1112    Return TRUE if all data references in the loop can be
1113    handled with respect to alignment.  */
1114 
1115 bool
1116 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1117 {
1118   vec<data_reference_p> datarefs;
1119   struct data_reference *dr;
1120   enum dr_alignment_support supportable_dr_alignment;
1121   unsigned int i;
1122 
1123   if (loop_vinfo)
1124     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1125   else
1126     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1127 
1128   FOR_EACH_VEC_ELT (datarefs, i, dr)
1129     {
1130       gimple stmt = DR_STMT (dr);
1131       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1132 
1133       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1134 	continue;
1135 
1136       /* For interleaving, only the alignment of the first access matters.
1137          Skip statements marked as not vectorizable.  */
1138       if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
1139            && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1140           || !STMT_VINFO_VECTORIZABLE (stmt_info))
1141         continue;
1142 
1143       /* Strided loads perform only component accesses, alignment is
1144 	 irrelevant for them.  */
1145       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1146 	continue;
1147 
1148       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1149       if (!supportable_dr_alignment)
1150         {
1151           if (dump_enabled_p ())
1152             {
1153               if (DR_IS_READ (dr))
1154                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1155                                  "not vectorized: unsupported unaligned load.");
1156               else
1157                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1158                                  "not vectorized: unsupported unaligned "
1159                                  "store.");
1160 
1161               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1162                                  DR_REF (dr));
1163             }
1164           return false;
1165         }
1166       if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
1167         dump_printf_loc (MSG_NOTE, vect_location,
1168                          "Vectorizing an unaligned access.");
1169     }
1170   return true;
1171 }
1172 
1173 /* Given an memory reference EXP return whether its alignment is less
1174    than its size.  */
1175 
1176 static bool
1177 not_size_aligned (tree exp)
1178 {
1179   if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
1180     return true;
1181 
1182   return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
1183 	  > get_object_alignment (exp));
1184 }
1185 
1186 /* Function vector_alignment_reachable_p
1187 
1188    Return true if vector alignment for DR is reachable by peeling
1189    a few loop iterations.  Return false otherwise.  */
1190 
1191 static bool
1192 vector_alignment_reachable_p (struct data_reference *dr)
1193 {
1194   gimple stmt = DR_STMT (dr);
1195   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1196   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1197 
1198   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1199     {
1200       /* For interleaved access we peel only if number of iterations in
1201 	 the prolog loop ({VF - misalignment}), is a multiple of the
1202 	 number of the interleaved accesses.  */
1203       int elem_size, mis_in_elements;
1204       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1205 
1206       /* FORNOW: handle only known alignment.  */
1207       if (!known_alignment_for_access_p (dr))
1208 	return false;
1209 
1210       elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1211       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1212 
1213       if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1214 	return false;
1215     }
1216 
1217   /* If misalignment is known at the compile time then allow peeling
1218      only if natural alignment is reachable through peeling.  */
1219   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1220     {
1221       HOST_WIDE_INT elmsize =
1222 		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1223       if (dump_enabled_p ())
1224 	{
1225 	  dump_printf_loc (MSG_NOTE, vect_location,
1226                            "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1227 	  dump_printf (MSG_NOTE,
1228                        ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1229 	}
1230       if (DR_MISALIGNMENT (dr) % elmsize)
1231 	{
1232 	  if (dump_enabled_p ())
1233 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1234                              "data size does not divide the misalignment.\n");
1235 	  return false;
1236 	}
1237     }
1238 
1239   if (!known_alignment_for_access_p (dr))
1240     {
1241       tree type = TREE_TYPE (DR_REF (dr));
1242       bool is_packed = not_size_aligned (DR_REF (dr));
1243       if (dump_enabled_p ())
1244 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1245                          "Unknown misalignment, is_packed = %d",is_packed);
1246       if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1247 	return true;
1248       else
1249 	return false;
1250     }
1251 
1252   return true;
1253 }
1254 
1255 
1256 /* Calculate the cost of the memory access represented by DR.  */
1257 
1258 static void
1259 vect_get_data_access_cost (struct data_reference *dr,
1260                            unsigned int *inside_cost,
1261                            unsigned int *outside_cost,
1262 			   stmt_vector_for_cost *body_cost_vec)
1263 {
1264   gimple stmt = DR_STMT (dr);
1265   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1266   int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1267   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1268   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1269   int ncopies = vf / nunits;
1270 
1271   if (DR_IS_READ (dr))
1272     vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1273 			NULL, body_cost_vec, false);
1274   else
1275     vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1276 
1277   if (dump_enabled_p ())
1278     dump_printf_loc (MSG_NOTE, vect_location,
1279                      "vect_get_data_access_cost: inside_cost = %d, "
1280                      "outside_cost = %d.", *inside_cost, *outside_cost);
1281 }
1282 
1283 
1284 static hashval_t
1285 vect_peeling_hash (const void *elem)
1286 {
1287   const struct _vect_peel_info *peel_info;
1288 
1289   peel_info = (const struct _vect_peel_info *) elem;
1290   return (hashval_t) peel_info->npeel;
1291 }
1292 
1293 
1294 static int
1295 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1296 {
1297   const struct _vect_peel_info *a, *b;
1298 
1299   a = (const struct _vect_peel_info *) elem1;
1300   b = (const struct _vect_peel_info *) elem2;
1301   return (a->npeel == b->npeel);
1302 }
1303 
1304 
1305 /* Insert DR into peeling hash table with NPEEL as key.  */
1306 
1307 static void
1308 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1309                           int npeel)
1310 {
1311   struct _vect_peel_info elem, *slot;
1312   void **new_slot;
1313   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1314 
1315   elem.npeel = npeel;
1316   slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1317                                      &elem);
1318   if (slot)
1319     slot->count++;
1320   else
1321     {
1322       slot = XNEW (struct _vect_peel_info);
1323       slot->npeel = npeel;
1324       slot->dr = dr;
1325       slot->count = 1;
1326       new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1327                                  INSERT);
1328       *new_slot = slot;
1329     }
1330 
1331   if (!supportable_dr_alignment && !flag_vect_cost_model)
1332     slot->count += VECT_MAX_COST;
1333 }
1334 
1335 
1336 /* Traverse peeling hash table to find peeling option that aligns maximum
1337    number of data accesses.  */
1338 
1339 static int
1340 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1341 {
1342   vect_peel_info elem = (vect_peel_info) *slot;
1343   vect_peel_extended_info max = (vect_peel_extended_info) data;
1344 
1345   if (elem->count > max->peel_info.count
1346       || (elem->count == max->peel_info.count
1347           && max->peel_info.npeel > elem->npeel))
1348     {
1349       max->peel_info.npeel = elem->npeel;
1350       max->peel_info.count = elem->count;
1351       max->peel_info.dr = elem->dr;
1352     }
1353 
1354   return 1;
1355 }
1356 
1357 
1358 /* Traverse peeling hash table and calculate cost for each peeling option.
1359    Find the one with the lowest cost.  */
1360 
1361 static int
1362 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1363 {
1364   vect_peel_info elem = (vect_peel_info) *slot;
1365   vect_peel_extended_info min = (vect_peel_extended_info) data;
1366   int save_misalignment, dummy;
1367   unsigned int inside_cost = 0, outside_cost = 0, i;
1368   gimple stmt = DR_STMT (elem->dr);
1369   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1370   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1371   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1372   struct data_reference *dr;
1373   stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1374   int single_iter_cost;
1375 
1376   prologue_cost_vec.create (2);
1377   body_cost_vec.create (2);
1378   epilogue_cost_vec.create (2);
1379 
1380   FOR_EACH_VEC_ELT (datarefs, i, dr)
1381     {
1382       stmt = DR_STMT (dr);
1383       stmt_info = vinfo_for_stmt (stmt);
1384       /* For interleaving, only the alignment of the first access
1385          matters.  */
1386       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1387           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1388         continue;
1389 
1390       save_misalignment = DR_MISALIGNMENT (dr);
1391       vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1392       vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1393 				 &body_cost_vec);
1394       SET_DR_MISALIGNMENT (dr, save_misalignment);
1395     }
1396 
1397   single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1398   outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1399 					       &dummy, single_iter_cost,
1400 					       &prologue_cost_vec,
1401 					       &epilogue_cost_vec);
1402 
1403   /* Prologue and epilogue costs are added to the target model later.
1404      These costs depend only on the scalar iteration cost, the
1405      number of peeling iterations finally chosen, and the number of
1406      misaligned statements.  So discard the information found here.  */
1407   prologue_cost_vec.release ();
1408   epilogue_cost_vec.release ();
1409 
1410   if (inside_cost < min->inside_cost
1411       || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1412     {
1413       min->inside_cost = inside_cost;
1414       min->outside_cost = outside_cost;
1415       min->body_cost_vec.release ();
1416       min->body_cost_vec = body_cost_vec;
1417       min->peel_info.dr = elem->dr;
1418       min->peel_info.npeel = elem->npeel;
1419     }
1420   else
1421     body_cost_vec.release ();
1422 
1423   return 1;
1424 }
1425 
1426 
1427 /* Choose best peeling option by traversing peeling hash table and either
1428    choosing an option with the lowest cost (if cost model is enabled) or the
1429    option that aligns as many accesses as possible.  */
1430 
1431 static struct data_reference *
1432 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1433                                        unsigned int *npeel,
1434 				       stmt_vector_for_cost *body_cost_vec)
1435 {
1436    struct _vect_peel_extended_info res;
1437 
1438    res.peel_info.dr = NULL;
1439    res.body_cost_vec = stmt_vector_for_cost();
1440 
1441    if (flag_vect_cost_model)
1442      {
1443        res.inside_cost = INT_MAX;
1444        res.outside_cost = INT_MAX;
1445        htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1446                       vect_peeling_hash_get_lowest_cost, &res);
1447      }
1448    else
1449      {
1450        res.peel_info.count = 0;
1451        htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1452                       vect_peeling_hash_get_most_frequent, &res);
1453      }
1454 
1455    *npeel = res.peel_info.npeel;
1456    *body_cost_vec = res.body_cost_vec;
1457    return res.peel_info.dr;
1458 }
1459 
1460 
1461 /* Function vect_enhance_data_refs_alignment
1462 
1463    This pass will use loop versioning and loop peeling in order to enhance
1464    the alignment of data references in the loop.
1465 
1466    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1467    original loop is to be vectorized.  Any other loops that are created by
1468    the transformations performed in this pass - are not supposed to be
1469    vectorized.  This restriction will be relaxed.
1470 
1471    This pass will require a cost model to guide it whether to apply peeling
1472    or versioning or a combination of the two.  For example, the scheme that
1473    intel uses when given a loop with several memory accesses, is as follows:
1474    choose one memory access ('p') which alignment you want to force by doing
1475    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
1476    other accesses are not necessarily aligned, or (2) use loop versioning to
1477    generate one loop in which all accesses are aligned, and another loop in
1478    which only 'p' is necessarily aligned.
1479 
1480    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1481    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1482    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1483 
1484    Devising a cost model is the most critical aspect of this work.  It will
1485    guide us on which access to peel for, whether to use loop versioning, how
1486    many versions to create, etc.  The cost model will probably consist of
1487    generic considerations as well as target specific considerations (on
1488    powerpc for example, misaligned stores are more painful than misaligned
1489    loads).
1490 
1491    Here are the general steps involved in alignment enhancements:
1492 
1493      -- original loop, before alignment analysis:
1494 	for (i=0; i<N; i++){
1495 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
1496 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1497 	}
1498 
1499      -- After vect_compute_data_refs_alignment:
1500 	for (i=0; i<N; i++){
1501 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1502 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1503 	}
1504 
1505      -- Possibility 1: we do loop versioning:
1506      if (p is aligned) {
1507 	for (i=0; i<N; i++){	# loop 1A
1508 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1509 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1510 	}
1511      }
1512      else {
1513 	for (i=0; i<N; i++){	# loop 1B
1514 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1515 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1516 	}
1517      }
1518 
1519      -- Possibility 2: we do loop peeling:
1520      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1521 	x = q[i];
1522 	p[i] = y;
1523      }
1524      for (i = 3; i < N; i++){	# loop 2A
1525 	x = q[i];			# DR_MISALIGNMENT(q) = 0
1526 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1527      }
1528 
1529      -- Possibility 3: combination of loop peeling and versioning:
1530      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1531 	x = q[i];
1532 	p[i] = y;
1533      }
1534      if (p is aligned) {
1535 	for (i = 3; i<N; i++){	# loop 3A
1536 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1537 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1538 	}
1539      }
1540      else {
1541 	for (i = 3; i<N; i++){	# loop 3B
1542 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1543 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1544 	}
1545      }
1546 
1547      These loops are later passed to loop_transform to be vectorized.  The
1548      vectorizer will use the alignment information to guide the transformation
1549      (whether to generate regular loads/stores, or with special handling for
1550      misalignment).  */
1551 
1552 bool
1553 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1554 {
1555   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1556   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1557   enum dr_alignment_support supportable_dr_alignment;
1558   struct data_reference *dr0 = NULL, *first_store = NULL;
1559   struct data_reference *dr;
1560   unsigned int i, j;
1561   bool do_peeling = false;
1562   bool do_versioning = false;
1563   bool stat;
1564   gimple stmt;
1565   stmt_vec_info stmt_info;
1566   int vect_versioning_for_alias_required;
1567   unsigned int npeel = 0;
1568   bool all_misalignments_unknown = true;
1569   unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1570   unsigned possible_npeel_number = 1;
1571   tree vectype;
1572   unsigned int nelements, mis, same_align_drs_max = 0;
1573   stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost();
1574 
1575   if (dump_enabled_p ())
1576     dump_printf_loc (MSG_NOTE, vect_location,
1577                      "=== vect_enhance_data_refs_alignment ===");
1578 
1579   /* While cost model enhancements are expected in the future, the high level
1580      view of the code at this time is as follows:
1581 
1582      A) If there is a misaligned access then see if peeling to align
1583         this access can make all data references satisfy
1584         vect_supportable_dr_alignment.  If so, update data structures
1585         as needed and return true.
1586 
1587      B) If peeling wasn't possible and there is a data reference with an
1588         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1589         then see if loop versioning checks can be used to make all data
1590         references satisfy vect_supportable_dr_alignment.  If so, update
1591         data structures as needed and return true.
1592 
1593      C) If neither peeling nor versioning were successful then return false if
1594         any data reference does not satisfy vect_supportable_dr_alignment.
1595 
1596      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1597 
1598      Note, Possibility 3 above (which is peeling and versioning together) is not
1599      being done at this time.  */
1600 
1601   /* (1) Peeling to force alignment.  */
1602 
1603   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1604      Considerations:
1605      + How many accesses will become aligned due to the peeling
1606      - How many accesses will become unaligned due to the peeling,
1607        and the cost of misaligned accesses.
1608      - The cost of peeling (the extra runtime checks, the increase
1609        in code size).  */
1610 
1611   FOR_EACH_VEC_ELT (datarefs, i, dr)
1612     {
1613       stmt = DR_STMT (dr);
1614       stmt_info = vinfo_for_stmt (stmt);
1615 
1616       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1617 	continue;
1618 
1619       /* For interleaving, only the alignment of the first access
1620          matters.  */
1621       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1622           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1623         continue;
1624 
1625       /* For invariant accesses there is nothing to enhance.  */
1626       if (integer_zerop (DR_STEP (dr)))
1627 	continue;
1628 
1629       /* Strided loads perform only component accesses, alignment is
1630 	 irrelevant for them.  */
1631       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1632 	continue;
1633 
1634       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1635       do_peeling = vector_alignment_reachable_p (dr);
1636       if (do_peeling)
1637         {
1638           if (known_alignment_for_access_p (dr))
1639             {
1640               unsigned int npeel_tmp;
1641 	      bool negative = tree_int_cst_compare (DR_STEP (dr),
1642 						    size_zero_node) < 0;
1643 
1644               /* Save info about DR in the hash table.  */
1645               if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1646                 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1647                            htab_create (1, vect_peeling_hash,
1648                                         vect_peeling_hash_eq, free);
1649 
1650               vectype = STMT_VINFO_VECTYPE (stmt_info);
1651               nelements = TYPE_VECTOR_SUBPARTS (vectype);
1652               mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1653                                                 TREE_TYPE (DR_REF (dr))));
1654               npeel_tmp = (negative
1655 			   ? (mis - nelements) : (nelements - mis))
1656 		  & (nelements - 1);
1657 
1658               /* For multiple types, it is possible that the bigger type access
1659                  will have more than one peeling option.  E.g., a loop with two
1660                  types: one of size (vector size / 4), and the other one of
1661                  size (vector size / 8).  Vectorization factor will 8.  If both
1662                  access are misaligned by 3, the first one needs one scalar
1663                  iteration to be aligned, and the second one needs 5.  But the
1664                  the first one will be aligned also by peeling 5 scalar
1665                  iterations, and in that case both accesses will be aligned.
1666                  Hence, except for the immediate peeling amount, we also want
1667                  to try to add full vector size, while we don't exceed
1668                  vectorization factor.
1669                  We do this automtically for cost model, since we calculate cost
1670                  for every peeling option.  */
1671               if (!flag_vect_cost_model)
1672                 possible_npeel_number = vf /nelements;
1673 
1674               /* Handle the aligned case. We may decide to align some other
1675                  access, making DR unaligned.  */
1676               if (DR_MISALIGNMENT (dr) == 0)
1677                 {
1678                   npeel_tmp = 0;
1679                   if (!flag_vect_cost_model)
1680                     possible_npeel_number++;
1681                 }
1682 
1683               for (j = 0; j < possible_npeel_number; j++)
1684                 {
1685                   gcc_assert (npeel_tmp <= vf);
1686                   vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1687                   npeel_tmp += nelements;
1688                 }
1689 
1690               all_misalignments_unknown = false;
1691               /* Data-ref that was chosen for the case that all the
1692                  misalignments are unknown is not relevant anymore, since we
1693                  have a data-ref with known alignment.  */
1694               dr0 = NULL;
1695             }
1696           else
1697             {
1698               /* If we don't know all the misalignment values, we prefer
1699                  peeling for data-ref that has maximum number of data-refs
1700                  with the same alignment, unless the target prefers to align
1701                  stores over load.  */
1702               if (all_misalignments_unknown)
1703                 {
1704                   if (same_align_drs_max
1705 			< STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ()
1706                       || !dr0)
1707                     {
1708                       same_align_drs_max
1709 			  = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1710                       dr0 = dr;
1711                     }
1712 
1713                   if (!first_store && DR_IS_WRITE (dr))
1714                     first_store = dr;
1715                 }
1716 
1717               /* If there are both known and unknown misaligned accesses in the
1718                  loop, we choose peeling amount according to the known
1719                  accesses.  */
1720 
1721 
1722               if (!supportable_dr_alignment)
1723                 {
1724                   dr0 = dr;
1725                   if (!first_store && DR_IS_WRITE (dr))
1726                     first_store = dr;
1727                 }
1728             }
1729         }
1730       else
1731         {
1732           if (!aligned_access_p (dr))
1733             {
1734               if (dump_enabled_p ())
1735                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736                                  "vector alignment may not be reachable");
1737               break;
1738             }
1739         }
1740     }
1741 
1742   vect_versioning_for_alias_required
1743     = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1744 
1745   /* Temporarily, if versioning for alias is required, we disable peeling
1746      until we support peeling and versioning.  Often peeling for alignment
1747      will require peeling for loop-bound, which in turn requires that we
1748      know how to adjust the loop ivs after the loop.  */
1749   if (vect_versioning_for_alias_required
1750       || !vect_can_advance_ivs_p (loop_vinfo)
1751       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1752     do_peeling = false;
1753 
1754   if (do_peeling && all_misalignments_unknown
1755       && vect_supportable_dr_alignment (dr0, false))
1756     {
1757 
1758       /* Check if the target requires to prefer stores over loads, i.e., if
1759          misaligned stores are more expensive than misaligned loads (taking
1760          drs with same alignment into account).  */
1761       if (first_store && DR_IS_READ (dr0))
1762         {
1763           unsigned int load_inside_cost = 0, load_outside_cost = 0;
1764           unsigned int store_inside_cost = 0, store_outside_cost = 0;
1765           unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1766           unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1767 	  stmt_vector_for_cost dummy;
1768 	  dummy.create (2);
1769 
1770           vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1771 				     &dummy);
1772           vect_get_data_access_cost (first_store, &store_inside_cost,
1773 				     &store_outside_cost, &dummy);
1774 
1775 	  dummy.release ();
1776 
1777           /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1778              aligning the load DR0).  */
1779           load_inside_penalty = store_inside_cost;
1780           load_outside_penalty = store_outside_cost;
1781           for (i = 0;
1782 	       STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1783 			  DR_STMT (first_store))).iterate (i, &dr);
1784                i++)
1785             if (DR_IS_READ (dr))
1786               {
1787                 load_inside_penalty += load_inside_cost;
1788                 load_outside_penalty += load_outside_cost;
1789               }
1790             else
1791               {
1792                 load_inside_penalty += store_inside_cost;
1793                 load_outside_penalty += store_outside_cost;
1794               }
1795 
1796           /* Calculate the penalty for leaving DR0 unaligned (by
1797              aligning the FIRST_STORE).  */
1798           store_inside_penalty = load_inside_cost;
1799           store_outside_penalty = load_outside_cost;
1800           for (i = 0;
1801 	       STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1802 		      DR_STMT (dr0))).iterate (i, &dr);
1803                i++)
1804             if (DR_IS_READ (dr))
1805               {
1806                 store_inside_penalty += load_inside_cost;
1807                 store_outside_penalty += load_outside_cost;
1808               }
1809             else
1810               {
1811                 store_inside_penalty += store_inside_cost;
1812                 store_outside_penalty += store_outside_cost;
1813               }
1814 
1815           if (load_inside_penalty > store_inside_penalty
1816               || (load_inside_penalty == store_inside_penalty
1817                   && load_outside_penalty > store_outside_penalty))
1818             dr0 = first_store;
1819         }
1820 
1821       /* In case there are only loads with different unknown misalignments, use
1822          peeling only if it may help to align other accesses in the loop.  */
1823       if (!first_store
1824 	  && !STMT_VINFO_SAME_ALIGN_REFS (
1825 		  vinfo_for_stmt (DR_STMT (dr0))).length ()
1826           && vect_supportable_dr_alignment (dr0, false)
1827               != dr_unaligned_supported)
1828         do_peeling = false;
1829     }
1830 
1831   if (do_peeling && !dr0)
1832     {
1833       /* Peeling is possible, but there is no data access that is not supported
1834          unless aligned. So we try to choose the best possible peeling.  */
1835 
1836       /* We should get here only if there are drs with known misalignment.  */
1837       gcc_assert (!all_misalignments_unknown);
1838 
1839       /* Choose the best peeling from the hash table.  */
1840       dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1841 						   &body_cost_vec);
1842       if (!dr0 || !npeel)
1843         do_peeling = false;
1844     }
1845 
1846   if (do_peeling)
1847     {
1848       stmt = DR_STMT (dr0);
1849       stmt_info = vinfo_for_stmt (stmt);
1850       vectype = STMT_VINFO_VECTYPE (stmt_info);
1851       nelements = TYPE_VECTOR_SUBPARTS (vectype);
1852 
1853       if (known_alignment_for_access_p (dr0))
1854         {
1855 	  bool negative = tree_int_cst_compare (DR_STEP (dr0),
1856 						size_zero_node) < 0;
1857           if (!npeel)
1858             {
1859               /* Since it's known at compile time, compute the number of
1860                  iterations in the peeled loop (the peeling factor) for use in
1861                  updating DR_MISALIGNMENT values.  The peeling factor is the
1862                  vectorization factor minus the misalignment as an element
1863                  count.  */
1864               mis = DR_MISALIGNMENT (dr0);
1865               mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1866               npeel = ((negative ? mis - nelements : nelements - mis)
1867 		       & (nelements - 1));
1868             }
1869 
1870 	  /* For interleaved data access every iteration accesses all the
1871 	     members of the group, therefore we divide the number of iterations
1872 	     by the group size.  */
1873 	  stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1874 	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1875 	    npeel /= GROUP_SIZE (stmt_info);
1876 
1877           if (dump_enabled_p ())
1878             dump_printf_loc (MSG_NOTE, vect_location,
1879                              "Try peeling by %d", npeel);
1880         }
1881 
1882       /* Ensure that all data refs can be vectorized after the peel.  */
1883       FOR_EACH_VEC_ELT (datarefs, i, dr)
1884         {
1885           int save_misalignment;
1886 
1887 	  if (dr == dr0)
1888 	    continue;
1889 
1890 	  stmt = DR_STMT (dr);
1891 	  stmt_info = vinfo_for_stmt (stmt);
1892 	  /* For interleaving, only the alignment of the first access
1893             matters.  */
1894 	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1895 	      && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1896 	    continue;
1897 
1898 	  /* Strided loads perform only component accesses, alignment is
1899 	     irrelevant for them.  */
1900 	  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1901 	    continue;
1902 
1903 	  save_misalignment = DR_MISALIGNMENT (dr);
1904 	  vect_update_misalignment_for_peel (dr, dr0, npeel);
1905 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1906 	  SET_DR_MISALIGNMENT (dr, save_misalignment);
1907 
1908 	  if (!supportable_dr_alignment)
1909 	    {
1910 	      do_peeling = false;
1911 	      break;
1912 	    }
1913 	}
1914 
1915       if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1916         {
1917           stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1918           if (!stat)
1919             do_peeling = false;
1920           else
1921 	    {
1922 	      body_cost_vec.release ();
1923 	      return stat;
1924 	    }
1925         }
1926 
1927       if (do_peeling)
1928         {
1929 	  stmt_info_for_cost *si;
1930 	  void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1931 
1932           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1933              If the misalignment of DR_i is identical to that of dr0 then set
1934              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1935              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1936              by the peeling factor times the element size of DR_i (MOD the
1937              vectorization factor times the size).  Otherwise, the
1938              misalignment of DR_i must be set to unknown.  */
1939 	  FOR_EACH_VEC_ELT (datarefs, i, dr)
1940 	    if (dr != dr0)
1941 	      vect_update_misalignment_for_peel (dr, dr0, npeel);
1942 
1943           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1944           if (npeel)
1945             LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1946           else
1947             LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1948 	  SET_DR_MISALIGNMENT (dr0, 0);
1949 	  if (dump_enabled_p ())
1950             {
1951               dump_printf_loc (MSG_NOTE, vect_location,
1952                                "Alignment of access forced using peeling.");
1953               dump_printf_loc (MSG_NOTE, vect_location,
1954                                "Peeling for alignment will be applied.");
1955             }
1956 	  /* We've delayed passing the inside-loop peeling costs to the
1957 	     target cost model until we were sure peeling would happen.
1958 	     Do so now.  */
1959 	  if (body_cost_vec.exists ())
1960 	    {
1961 	      FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1962 		{
1963 		  struct _stmt_vec_info *stmt_info
1964 		    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1965 		  (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1966 					si->misalign, vect_body);
1967 		}
1968 	      body_cost_vec.release ();
1969 	    }
1970 
1971 	  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1972 	  gcc_assert (stat);
1973           return stat;
1974         }
1975     }
1976 
1977   body_cost_vec.release ();
1978 
1979   /* (2) Versioning to force alignment.  */
1980 
1981   /* Try versioning if:
1982      1) flag_tree_vect_loop_version is TRUE
1983      2) optimize loop for speed
1984      3) there is at least one unsupported misaligned data ref with an unknown
1985         misalignment, and
1986      4) all misaligned data refs with a known misalignment are supported, and
1987      5) the number of runtime alignment checks is within reason.  */
1988 
1989   do_versioning =
1990 	flag_tree_vect_loop_version
1991 	&& optimize_loop_nest_for_speed_p (loop)
1992 	&& (!loop->inner); /* FORNOW */
1993 
1994   if (do_versioning)
1995     {
1996       FOR_EACH_VEC_ELT (datarefs, i, dr)
1997         {
1998 	  stmt = DR_STMT (dr);
1999 	  stmt_info = vinfo_for_stmt (stmt);
2000 
2001 	  /* For interleaving, only the alignment of the first access
2002 	     matters.  */
2003 	  if (aligned_access_p (dr)
2004 	      || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
2005 		  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
2006 	    continue;
2007 
2008 	  /* Strided loads perform only component accesses, alignment is
2009 	     irrelevant for them.  */
2010 	  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
2011 	    continue;
2012 
2013 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
2014 
2015           if (!supportable_dr_alignment)
2016             {
2017               gimple stmt;
2018               int mask;
2019               tree vectype;
2020 
2021               if (known_alignment_for_access_p (dr)
2022                   || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2023                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2024                 {
2025                   do_versioning = false;
2026                   break;
2027                 }
2028 
2029               stmt = DR_STMT (dr);
2030               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2031               gcc_assert (vectype);
2032 
2033               /* The rightmost bits of an aligned address must be zeros.
2034                  Construct the mask needed for this test.  For example,
2035                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2036                  mask must be 15 = 0xf. */
2037               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2038 
2039               /* FORNOW: use the same mask to test all potentially unaligned
2040                  references in the loop.  The vectorizer currently supports
2041                  a single vector size, see the reference to
2042                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2043                  vectorization factor is computed.  */
2044               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2045                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2046               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2047               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
2048 		      DR_STMT (dr));
2049             }
2050         }
2051 
2052       /* Versioning requires at least one misaligned data reference.  */
2053       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2054         do_versioning = false;
2055       else if (!do_versioning)
2056         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2057     }
2058 
2059   if (do_versioning)
2060     {
2061       vec<gimple> may_misalign_stmts
2062         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2063       gimple stmt;
2064 
2065       /* It can now be assumed that the data references in the statements
2066          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2067          of the loop being vectorized.  */
2068       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
2069         {
2070           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2071           dr = STMT_VINFO_DATA_REF (stmt_info);
2072 	  SET_DR_MISALIGNMENT (dr, 0);
2073 	  if (dump_enabled_p ())
2074             dump_printf_loc (MSG_NOTE, vect_location,
2075                              "Alignment of access forced using versioning.");
2076         }
2077 
2078       if (dump_enabled_p ())
2079         dump_printf_loc (MSG_NOTE, vect_location,
2080                          "Versioning for alignment will be applied.");
2081 
2082       /* Peeling and versioning can't be done together at this time.  */
2083       gcc_assert (! (do_peeling && do_versioning));
2084 
2085       stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2086       gcc_assert (stat);
2087       return stat;
2088     }
2089 
2090   /* This point is reached if neither peeling nor versioning is being done.  */
2091   gcc_assert (! (do_peeling || do_versioning));
2092 
2093   stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2094   return stat;
2095 }
2096 
2097 
2098 /* Function vect_find_same_alignment_drs.
2099 
2100    Update group and alignment relations according to the chosen
2101    vectorization factor.  */
2102 
2103 static void
2104 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2105 			      loop_vec_info loop_vinfo)
2106 {
2107   unsigned int i;
2108   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2109   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2110   struct data_reference *dra = DDR_A (ddr);
2111   struct data_reference *drb = DDR_B (ddr);
2112   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2113   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2114   int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2115   int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2116   lambda_vector dist_v;
2117   unsigned int loop_depth;
2118 
2119   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2120     return;
2121 
2122   if (dra == drb)
2123     return;
2124 
2125   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2126     return;
2127 
2128   /* Loop-based vectorization and known data dependence.  */
2129   if (DDR_NUM_DIST_VECTS (ddr) == 0)
2130     return;
2131 
2132   /* Data-dependence analysis reports a distance vector of zero
2133      for data-references that overlap only in the first iteration
2134      but have different sign step (see PR45764).
2135      So as a sanity check require equal DR_STEP.  */
2136   if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2137     return;
2138 
2139   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
2140   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
2141     {
2142       int dist = dist_v[loop_depth];
2143 
2144       if (dump_enabled_p ())
2145 	dump_printf_loc (MSG_NOTE, vect_location,
2146                          "dependence distance  = %d.", dist);
2147 
2148       /* Same loop iteration.  */
2149       if (dist == 0
2150 	  || (dist % vectorization_factor == 0 && dra_size == drb_size))
2151 	{
2152 	  /* Two references with distance zero have the same alignment.  */
2153 	  STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
2154 	  STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
2155 	  if (dump_enabled_p ())
2156 	    {
2157               dump_printf_loc (MSG_NOTE, vect_location,
2158                                "accesses have the same alignment.");
2159 	      dump_printf (MSG_NOTE,
2160                            "dependence distance modulo vf == 0 between ");
2161 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2162 	      dump_printf (MSG_NOTE,  " and ");
2163 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2164 	    }
2165 	}
2166     }
2167 }
2168 
2169 
2170 /* Function vect_analyze_data_refs_alignment
2171 
2172    Analyze the alignment of the data-references in the loop.
2173    Return FALSE if a data reference is found that cannot be vectorized.  */
2174 
2175 bool
2176 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2177                                   bb_vec_info bb_vinfo)
2178 {
2179   if (dump_enabled_p ())
2180     dump_printf_loc (MSG_NOTE, vect_location,
2181                      "=== vect_analyze_data_refs_alignment ===");
2182 
2183   /* Mark groups of data references with same alignment using
2184      data dependence information.  */
2185   if (loop_vinfo)
2186     {
2187       vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2188       struct data_dependence_relation *ddr;
2189       unsigned int i;
2190 
2191       FOR_EACH_VEC_ELT (ddrs, i, ddr)
2192 	vect_find_same_alignment_drs (ddr, loop_vinfo);
2193     }
2194 
2195   if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2196     {
2197       if (dump_enabled_p ())
2198 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2199                          "not vectorized: can't calculate alignment "
2200                          "for data ref.");
2201       return false;
2202     }
2203 
2204   return true;
2205 }
2206 
2207 
2208 /* Analyze groups of accesses: check that DR belongs to a group of
2209    accesses of legal size, step, etc.  Detect gaps, single element
2210    interleaving, and other special cases. Set grouped access info.
2211    Collect groups of strided stores for further use in SLP analysis.  */
2212 
2213 static bool
2214 vect_analyze_group_access (struct data_reference *dr)
2215 {
2216   tree step = DR_STEP (dr);
2217   tree scalar_type = TREE_TYPE (DR_REF (dr));
2218   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2219   gimple stmt = DR_STMT (dr);
2220   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2221   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2222   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2223   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2224   HOST_WIDE_INT groupsize, last_accessed_element = 1;
2225   bool slp_impossible = false;
2226   struct loop *loop = NULL;
2227 
2228   if (loop_vinfo)
2229     loop = LOOP_VINFO_LOOP (loop_vinfo);
2230 
2231   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2232      size of the interleaving group (including gaps).  */
2233   groupsize = dr_step / type_size;
2234 
2235   /* Not consecutive access is possible only if it is a part of interleaving.  */
2236   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2237     {
2238       /* Check if it this DR is a part of interleaving, and is a single
2239 	 element of the group that is accessed in the loop.  */
2240 
2241       /* Gaps are supported only for loads. STEP must be a multiple of the type
2242 	 size.  The size of the group must be a power of 2.  */
2243       if (DR_IS_READ (dr)
2244 	  && (dr_step % type_size) == 0
2245 	  && groupsize > 0
2246 	  && exact_log2 (groupsize) != -1)
2247 	{
2248 	  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2249 	  GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2250 	  if (dump_enabled_p ())
2251 	    {
2252 	      dump_printf_loc (MSG_NOTE, vect_location,
2253                                "Detected single element interleaving ");
2254 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2255 	      dump_printf (MSG_NOTE, " step ");
2256 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2257 	    }
2258 
2259 	  if (loop_vinfo)
2260 	    {
2261 	      if (dump_enabled_p ())
2262 		dump_printf_loc (MSG_NOTE, vect_location,
2263                                  "Data access with gaps requires scalar "
2264                                  "epilogue loop");
2265               if (loop->inner)
2266                 {
2267                   if (dump_enabled_p ())
2268                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269                                      "Peeling for outer loop is not"
2270                                      " supported");
2271                   return false;
2272                 }
2273 
2274               LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2275 	    }
2276 
2277 	  return true;
2278 	}
2279 
2280       if (dump_enabled_p ())
2281         {
2282  	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2283                            "not consecutive access ");
2284           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2285         }
2286 
2287       if (bb_vinfo)
2288         {
2289           /* Mark the statement as unvectorizable.  */
2290           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2291           return true;
2292         }
2293 
2294       return false;
2295     }
2296 
2297   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2298     {
2299       /* First stmt in the interleaving chain. Check the chain.  */
2300       gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2301       struct data_reference *data_ref = dr;
2302       unsigned int count = 1;
2303       tree next_step;
2304       tree prev_init = DR_INIT (data_ref);
2305       gimple prev = stmt;
2306       HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2307 
2308       while (next)
2309         {
2310           /* Skip same data-refs.  In case that two or more stmts share
2311              data-ref (supported only for loads), we vectorize only the first
2312              stmt, and the rest get their vectorized loads from the first
2313              one.  */
2314           if (!tree_int_cst_compare (DR_INIT (data_ref),
2315                                      DR_INIT (STMT_VINFO_DATA_REF (
2316 						   vinfo_for_stmt (next)))))
2317             {
2318               if (DR_IS_WRITE (data_ref))
2319                 {
2320                   if (dump_enabled_p ())
2321                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322                                      "Two store stmts share the same dr.");
2323                   return false;
2324                 }
2325 
2326               /* Check that there is no load-store dependencies for this loads
2327                  to prevent a case of load-store-load to the same location.  */
2328               if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2329                   || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2330                 {
2331                   if (dump_enabled_p ())
2332                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2333                                      "READ_WRITE dependence in interleaving.");
2334                   return false;
2335                 }
2336 
2337               /* For load use the same data-ref load.  */
2338               GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2339 
2340               prev = next;
2341               next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2342               continue;
2343             }
2344 
2345           prev = next;
2346 
2347           /* Check that all the accesses have the same STEP.  */
2348           next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2349           if (tree_int_cst_compare (step, next_step))
2350             {
2351               if (dump_enabled_p ())
2352                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2353                                  "not consecutive access in interleaving");
2354               return false;
2355             }
2356 
2357           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2358           /* Check that the distance between two accesses is equal to the type
2359              size. Otherwise, we have gaps.  */
2360           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2361                   - TREE_INT_CST_LOW (prev_init)) / type_size;
2362 	  if (diff != 1)
2363 	    {
2364 	      /* FORNOW: SLP of accesses with gaps is not supported.  */
2365 	      slp_impossible = true;
2366 	      if (DR_IS_WRITE (data_ref))
2367 		{
2368                   if (dump_enabled_p ())
2369                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2370                                      "interleaved store with gaps");
2371 		  return false;
2372 		}
2373 
2374               gaps += diff - 1;
2375 	    }
2376 
2377 	  last_accessed_element += diff;
2378 
2379           /* Store the gap from the previous member of the group. If there is no
2380              gap in the access, GROUP_GAP is always 1.  */
2381           GROUP_GAP (vinfo_for_stmt (next)) = diff;
2382 
2383           prev_init = DR_INIT (data_ref);
2384           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2385           /* Count the number of data-refs in the chain.  */
2386           count++;
2387         }
2388 
2389       /* COUNT is the number of accesses found, we multiply it by the size of
2390          the type to get COUNT_IN_BYTES.  */
2391       count_in_bytes = type_size * count;
2392 
2393       /* Check that the size of the interleaving (including gaps) is not
2394          greater than STEP.  */
2395       if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2396         {
2397           if (dump_enabled_p ())
2398             {
2399               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2400                                "interleaving size is greater than step for ");
2401               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dr));
2402             }
2403           return false;
2404         }
2405 
2406       /* Check that the size of the interleaving is equal to STEP for stores,
2407          i.e., that there are no gaps.  */
2408       if (dr_step && dr_step != count_in_bytes)
2409         {
2410           if (DR_IS_READ (dr))
2411             {
2412               slp_impossible = true;
2413               /* There is a gap after the last load in the group. This gap is a
2414                  difference between the groupsize and the number of elements.
2415 		 When there is no gap, this difference should be 0.  */
2416               GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2417             }
2418           else
2419             {
2420               if (dump_enabled_p ())
2421                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2422                                  "interleaved store with gaps");
2423               return false;
2424             }
2425         }
2426 
2427       /* Check that STEP is a multiple of type size.  */
2428       if (dr_step && (dr_step % type_size) != 0)
2429         {
2430           if (dump_enabled_p ())
2431             {
2432               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433                                "step is not a multiple of type size: step ");
2434               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2435               dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2436               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2437                                  TYPE_SIZE_UNIT (scalar_type));
2438             }
2439           return false;
2440         }
2441 
2442       if (groupsize == 0)
2443         groupsize = count;
2444 
2445       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2446       if (dump_enabled_p ())
2447         dump_printf_loc (MSG_NOTE, vect_location,
2448                          "Detected interleaving of size %d", (int)groupsize);
2449 
2450       /* SLP: create an SLP data structure for every interleaving group of
2451 	 stores for further analysis in vect_analyse_slp.  */
2452       if (DR_IS_WRITE (dr) && !slp_impossible)
2453         {
2454           if (loop_vinfo)
2455             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2456           if (bb_vinfo)
2457             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2458         }
2459 
2460       /* There is a gap in the end of the group.  */
2461       if (groupsize - last_accessed_element > 0 && loop_vinfo)
2462 	{
2463 	  if (dump_enabled_p ())
2464 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2465                              "Data access with gaps requires scalar "
2466                              "epilogue loop");
2467           if (loop->inner)
2468             {
2469               if (dump_enabled_p ())
2470                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2471                                  "Peeling for outer loop is not supported");
2472               return false;
2473             }
2474 
2475           LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2476 	}
2477     }
2478 
2479   return true;
2480 }
2481 
2482 
2483 /* Analyze the access pattern of the data-reference DR.
2484    In case of non-consecutive accesses call vect_analyze_group_access() to
2485    analyze groups of accesses.  */
2486 
2487 static bool
2488 vect_analyze_data_ref_access (struct data_reference *dr)
2489 {
2490   tree step = DR_STEP (dr);
2491   tree scalar_type = TREE_TYPE (DR_REF (dr));
2492   gimple stmt = DR_STMT (dr);
2493   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2494   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2495   struct loop *loop = NULL;
2496 
2497   if (loop_vinfo)
2498     loop = LOOP_VINFO_LOOP (loop_vinfo);
2499 
2500   if (loop_vinfo && !step)
2501     {
2502       if (dump_enabled_p ())
2503 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2504                          "bad data-ref access in loop");
2505       return false;
2506     }
2507 
2508   /* Allow invariant loads in not nested loops.  */
2509   if (loop_vinfo && integer_zerop (step))
2510     {
2511       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2512       if (nested_in_vect_loop_p (loop, stmt))
2513 	{
2514 	  if (dump_enabled_p ())
2515 	    dump_printf_loc (MSG_NOTE, vect_location,
2516 			     "zero step in inner loop of nest");
2517 	  return false;
2518 	}
2519       return DR_IS_READ (dr);
2520     }
2521 
2522   if (loop && nested_in_vect_loop_p (loop, stmt))
2523     {
2524       /* Interleaved accesses are not yet supported within outer-loop
2525         vectorization for references in the inner-loop.  */
2526       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2527 
2528       /* For the rest of the analysis we use the outer-loop step.  */
2529       step = STMT_VINFO_DR_STEP (stmt_info);
2530       if (integer_zerop (step))
2531 	{
2532 	  if (dump_enabled_p ())
2533 	    dump_printf_loc (MSG_NOTE, vect_location,
2534                              "zero step in outer loop.");
2535 	  if (DR_IS_READ (dr))
2536   	    return true;
2537 	  else
2538 	    return false;
2539 	}
2540     }
2541 
2542   /* Consecutive?  */
2543   if (TREE_CODE (step) == INTEGER_CST)
2544     {
2545       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2546       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2547 	  || (dr_step < 0
2548 	      && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2549 	{
2550 	  /* Mark that it is not interleaving.  */
2551 	  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2552 	  return true;
2553 	}
2554     }
2555 
2556   if (loop && nested_in_vect_loop_p (loop, stmt))
2557     {
2558       if (dump_enabled_p ())
2559 	dump_printf_loc (MSG_NOTE, vect_location,
2560                          "grouped access in outer loop.");
2561       return false;
2562     }
2563 
2564   /* Assume this is a DR handled by non-constant strided load case.  */
2565   if (TREE_CODE (step) != INTEGER_CST)
2566     return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2567 
2568   /* Not consecutive access - check if it's a part of interleaving group.  */
2569   return vect_analyze_group_access (dr);
2570 }
2571 
2572 
2573 /* Function vect_analyze_data_ref_accesses.
2574 
2575    Analyze the access pattern of all the data references in the loop.
2576 
2577    FORNOW: the only access pattern that is considered vectorizable is a
2578 	   simple step 1 (consecutive) access.
2579 
2580    FORNOW: handle only arrays and pointer accesses.  */
2581 
2582 bool
2583 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2584 {
2585   unsigned int i;
2586   vec<data_reference_p> datarefs;
2587   struct data_reference *dr;
2588 
2589   if (dump_enabled_p ())
2590     dump_printf_loc (MSG_NOTE, vect_location,
2591                      "=== vect_analyze_data_ref_accesses ===");
2592 
2593   if (loop_vinfo)
2594     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2595   else
2596     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2597 
2598   FOR_EACH_VEC_ELT (datarefs, i, dr)
2599     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2600         && !vect_analyze_data_ref_access (dr))
2601       {
2602 	if (dump_enabled_p ())
2603 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2604                            "not vectorized: complicated access pattern.");
2605 
2606         if (bb_vinfo)
2607           {
2608             /* Mark the statement as not vectorizable.  */
2609             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2610             continue;
2611           }
2612         else
2613           return false;
2614       }
2615 
2616   return true;
2617 }
2618 
2619 /* Function vect_prune_runtime_alias_test_list.
2620 
2621    Prune a list of ddrs to be tested at run-time by versioning for alias.
2622    Return FALSE if resulting list of ddrs is longer then allowed by
2623    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
2624 
2625 bool
2626 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2627 {
2628   vec<ddr_p>  ddrs =
2629     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2630   unsigned i, j;
2631 
2632   if (dump_enabled_p ())
2633     dump_printf_loc (MSG_NOTE, vect_location,
2634                      "=== vect_prune_runtime_alias_test_list ===");
2635 
2636   for (i = 0; i < ddrs.length (); )
2637     {
2638       bool found;
2639       ddr_p ddr_i;
2640 
2641       ddr_i = ddrs[i];
2642       found = false;
2643 
2644       for (j = 0; j < i; j++)
2645         {
2646 	  ddr_p ddr_j = ddrs[j];
2647 
2648 	  if (vect_vfa_range_equal (ddr_i, ddr_j))
2649 	    {
2650 	      if (dump_enabled_p ())
2651 		{
2652 		  dump_printf_loc (MSG_NOTE, vect_location,
2653                                    "found equal ranges ");
2654 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_i)));
2655 		  dump_printf (MSG_NOTE,  ", ");
2656 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_i)));
2657 		  dump_printf (MSG_NOTE,  " and ");
2658 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_j)));
2659 		  dump_printf (MSG_NOTE,  ", ");
2660 		  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_j)));
2661 		}
2662 	      found = true;
2663 	      break;
2664 	    }
2665 	}
2666 
2667       if (found)
2668       {
2669 	ddrs.ordered_remove (i);
2670 	continue;
2671       }
2672       i++;
2673     }
2674 
2675   if (ddrs.length () >
2676        (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2677     {
2678       if (dump_enabled_p ())
2679 	{
2680 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION,  vect_location,
2681                            "disable versioning for alias - max number of "
2682                            "generated checks exceeded.");
2683 	}
2684 
2685       LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).truncate (0);
2686 
2687       return false;
2688     }
2689 
2690   return true;
2691 }
2692 
2693 /* Check whether a non-affine read in stmt is suitable for gather load
2694    and if so, return a builtin decl for that operation.  */
2695 
2696 tree
2697 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2698 		   tree *offp, int *scalep)
2699 {
2700   HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2701   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2702   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2703   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2704   tree offtype = NULL_TREE;
2705   tree decl, base, off;
2706   enum machine_mode pmode;
2707   int punsignedp, pvolatilep;
2708 
2709   /* The gather builtins need address of the form
2710      loop_invariant + vector * {1, 2, 4, 8}
2711      or
2712      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2713      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2714      of loop invariants/SSA_NAMEs defined in the loop, with casts,
2715      multiplications and additions in it.  To get a vector, we need
2716      a single SSA_NAME that will be defined in the loop and will
2717      contain everything that is not loop invariant and that can be
2718      vectorized.  The following code attempts to find such a preexistng
2719      SSA_NAME OFF and put the loop invariants into a tree BASE
2720      that can be gimplified before the loop.  */
2721   base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2722 			      &pmode, &punsignedp, &pvolatilep, false);
2723   gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2724 
2725   if (TREE_CODE (base) == MEM_REF)
2726     {
2727       if (!integer_zerop (TREE_OPERAND (base, 1)))
2728 	{
2729 	  if (off == NULL_TREE)
2730 	    {
2731 	      double_int moff = mem_ref_offset (base);
2732 	      off = double_int_to_tree (sizetype, moff);
2733 	    }
2734 	  else
2735 	    off = size_binop (PLUS_EXPR, off,
2736 			      fold_convert (sizetype, TREE_OPERAND (base, 1)));
2737 	}
2738       base = TREE_OPERAND (base, 0);
2739     }
2740   else
2741     base = build_fold_addr_expr (base);
2742 
2743   if (off == NULL_TREE)
2744     off = size_zero_node;
2745 
2746   /* If base is not loop invariant, either off is 0, then we start with just
2747      the constant offset in the loop invariant BASE and continue with base
2748      as OFF, otherwise give up.
2749      We could handle that case by gimplifying the addition of base + off
2750      into some SSA_NAME and use that as off, but for now punt.  */
2751   if (!expr_invariant_in_loop_p (loop, base))
2752     {
2753       if (!integer_zerop (off))
2754 	return NULL_TREE;
2755       off = base;
2756       base = size_int (pbitpos / BITS_PER_UNIT);
2757     }
2758   /* Otherwise put base + constant offset into the loop invariant BASE
2759      and continue with OFF.  */
2760   else
2761     {
2762       base = fold_convert (sizetype, base);
2763       base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2764     }
2765 
2766   /* OFF at this point may be either a SSA_NAME or some tree expression
2767      from get_inner_reference.  Try to peel off loop invariants from it
2768      into BASE as long as possible.  */
2769   STRIP_NOPS (off);
2770   while (offtype == NULL_TREE)
2771     {
2772       enum tree_code code;
2773       tree op0, op1, add = NULL_TREE;
2774 
2775       if (TREE_CODE (off) == SSA_NAME)
2776 	{
2777 	  gimple def_stmt = SSA_NAME_DEF_STMT (off);
2778 
2779 	  if (expr_invariant_in_loop_p (loop, off))
2780 	    return NULL_TREE;
2781 
2782 	  if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2783 	    break;
2784 
2785 	  op0 = gimple_assign_rhs1 (def_stmt);
2786 	  code = gimple_assign_rhs_code (def_stmt);
2787 	  op1 = gimple_assign_rhs2 (def_stmt);
2788 	}
2789       else
2790 	{
2791 	  if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2792 	    return NULL_TREE;
2793 	  code = TREE_CODE (off);
2794 	  extract_ops_from_tree (off, &code, &op0, &op1);
2795 	}
2796       switch (code)
2797 	{
2798 	case POINTER_PLUS_EXPR:
2799 	case PLUS_EXPR:
2800 	  if (expr_invariant_in_loop_p (loop, op0))
2801 	    {
2802 	      add = op0;
2803 	      off = op1;
2804 	    do_add:
2805 	      add = fold_convert (sizetype, add);
2806 	      if (scale != 1)
2807 		add = size_binop (MULT_EXPR, add, size_int (scale));
2808 	      base = size_binop (PLUS_EXPR, base, add);
2809 	      continue;
2810 	    }
2811 	  if (expr_invariant_in_loop_p (loop, op1))
2812 	    {
2813 	      add = op1;
2814 	      off = op0;
2815 	      goto do_add;
2816 	    }
2817 	  break;
2818 	case MINUS_EXPR:
2819 	  if (expr_invariant_in_loop_p (loop, op1))
2820 	    {
2821 	      add = fold_convert (sizetype, op1);
2822 	      add = size_binop (MINUS_EXPR, size_zero_node, add);
2823 	      off = op0;
2824 	      goto do_add;
2825 	    }
2826 	  break;
2827 	case MULT_EXPR:
2828 	  if (scale == 1 && host_integerp (op1, 0))
2829 	    {
2830 	      scale = tree_low_cst (op1, 0);
2831 	      off = op0;
2832 	      continue;
2833 	    }
2834 	  break;
2835 	case SSA_NAME:
2836 	  off = op0;
2837 	  continue;
2838 	CASE_CONVERT:
2839 	  if (!POINTER_TYPE_P (TREE_TYPE (op0))
2840 	      && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2841 	    break;
2842 	  if (TYPE_PRECISION (TREE_TYPE (op0))
2843 	      == TYPE_PRECISION (TREE_TYPE (off)))
2844 	    {
2845 	      off = op0;
2846 	      continue;
2847 	    }
2848 	  if (TYPE_PRECISION (TREE_TYPE (op0))
2849 	      < TYPE_PRECISION (TREE_TYPE (off)))
2850 	    {
2851 	      off = op0;
2852 	      offtype = TREE_TYPE (off);
2853 	      STRIP_NOPS (off);
2854 	      continue;
2855 	    }
2856 	  break;
2857 	default:
2858 	  break;
2859 	}
2860       break;
2861     }
2862 
2863   /* If at the end OFF still isn't a SSA_NAME or isn't
2864      defined in the loop, punt.  */
2865   if (TREE_CODE (off) != SSA_NAME
2866       || expr_invariant_in_loop_p (loop, off))
2867     return NULL_TREE;
2868 
2869   if (offtype == NULL_TREE)
2870     offtype = TREE_TYPE (off);
2871 
2872   decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2873 					   offtype, scale);
2874   if (decl == NULL_TREE)
2875     return NULL_TREE;
2876 
2877   if (basep)
2878     *basep = base;
2879   if (offp)
2880     *offp = off;
2881   if (scalep)
2882     *scalep = scale;
2883   return decl;
2884 }
2885 
2886 /* Check wether a non-affine load in STMT (being in the loop referred to
2887    in LOOP_VINFO) is suitable for handling as strided load.  That is the case
2888    if its address is a simple induction variable.  If so return the base
2889    of that induction variable in *BASEP and the (loop-invariant) step
2890    in *STEPP, both only when that pointer is non-zero.
2891 
2892    This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2893    base pointer) only.  */
2894 
2895 static bool
2896 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo)
2897 {
2898   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2899   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2900   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2901   tree base, off;
2902   affine_iv iv;
2903 
2904   if (!DR_IS_READ (dr))
2905     return false;
2906 
2907   base = DR_REF (dr);
2908 
2909   if (TREE_CODE (base) == ARRAY_REF)
2910     {
2911       off = TREE_OPERAND (base, 1);
2912       base = TREE_OPERAND (base, 0);
2913     }
2914   else if (TREE_CODE (base) == MEM_REF)
2915     {
2916       off = TREE_OPERAND (base, 0);
2917       base = TREE_OPERAND (base, 1);
2918     }
2919   else
2920     return false;
2921 
2922   if (TREE_CODE (off) != SSA_NAME)
2923     return false;
2924 
2925   if (!expr_invariant_in_loop_p (loop, base)
2926       || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2927     return false;
2928 
2929   return true;
2930 }
2931 
2932 /* Function vect_analyze_data_refs.
2933 
2934   Find all the data references in the loop or basic block.
2935 
2936    The general structure of the analysis of data refs in the vectorizer is as
2937    follows:
2938    1- vect_analyze_data_refs(loop/bb): call
2939       compute_data_dependences_for_loop/bb to find and analyze all data-refs
2940       in the loop/bb and their dependences.
2941    2- vect_analyze_dependences(): apply dependence testing using ddrs.
2942    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2943    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2944 
2945 */
2946 
2947 bool
2948 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2949 			bb_vec_info bb_vinfo,
2950 			int *min_vf)
2951 {
2952   struct loop *loop = NULL;
2953   basic_block bb = NULL;
2954   unsigned int i;
2955   vec<data_reference_p> datarefs;
2956   struct data_reference *dr;
2957   tree scalar_type;
2958   bool res, stop_bb_analysis = false;
2959 
2960   if (dump_enabled_p ())
2961     dump_printf_loc (MSG_NOTE, vect_location,
2962                      "=== vect_analyze_data_refs ===\n");
2963 
2964   if (loop_vinfo)
2965     {
2966       loop = LOOP_VINFO_LOOP (loop_vinfo);
2967       res = compute_data_dependences_for_loop
2968 	(loop, true,
2969 	 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2970 	 &LOOP_VINFO_DATAREFS (loop_vinfo),
2971 	 &LOOP_VINFO_DDRS (loop_vinfo));
2972 
2973       if (!res)
2974 	{
2975 	  if (dump_enabled_p ())
2976 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2977                              "not vectorized: loop contains function calls"
2978                              " or data references that cannot be analyzed");
2979 	  return false;
2980 	}
2981 
2982       datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2983     }
2984   else
2985     {
2986       gimple_stmt_iterator gsi;
2987 
2988       bb = BB_VINFO_BB (bb_vinfo);
2989       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2990 	{
2991 	  gimple stmt = gsi_stmt (gsi);
2992 	  if (!find_data_references_in_stmt (NULL, stmt,
2993 					     &BB_VINFO_DATAREFS (bb_vinfo)))
2994 	    {
2995 	      /* Mark the rest of the basic-block as unvectorizable.  */
2996 	      for (; !gsi_end_p (gsi); gsi_next (&gsi))
2997 		{
2998 		  stmt = gsi_stmt (gsi);
2999 		  STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3000 		}
3001 	      break;
3002 	    }
3003 	}
3004       if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
3005 				    &BB_VINFO_DDRS (bb_vinfo),
3006 				    vNULL, true))
3007 	{
3008 	  if (dump_enabled_p ())
3009 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3010                              "not vectorized: basic block contains function"
3011                              " calls or data references that cannot be"
3012                              " analyzed");
3013 	  return false;
3014 	}
3015 
3016       datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3017     }
3018 
3019   /* Go through the data-refs, check that the analysis succeeded.  Update
3020      pointer from stmt_vec_info struct to DR and vectype.  */
3021 
3022   FOR_EACH_VEC_ELT (datarefs, i, dr)
3023     {
3024       gimple stmt;
3025       stmt_vec_info stmt_info;
3026       tree base, offset, init;
3027       bool gather = false;
3028       int vf;
3029 
3030       if (!dr || !DR_REF (dr))
3031         {
3032           if (dump_enabled_p ())
3033 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3034                              "not vectorized: unhandled data-ref ");
3035           return false;
3036         }
3037 
3038       stmt = DR_STMT (dr);
3039       stmt_info = vinfo_for_stmt (stmt);
3040 
3041       if (stop_bb_analysis)
3042         {
3043           STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3044           continue;
3045         }
3046 
3047       /* Check that analysis of the data-ref succeeded.  */
3048       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3049 	  || !DR_STEP (dr))
3050         {
3051 	  /* If target supports vector gather loads, see if they can't
3052 	     be used.  */
3053 	  if (loop_vinfo
3054 	      && DR_IS_READ (dr)
3055 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
3056 	      && targetm.vectorize.builtin_gather != NULL
3057 	      && !nested_in_vect_loop_p (loop, stmt))
3058 	    {
3059 	      struct data_reference *newdr
3060 		= create_data_ref (NULL, loop_containing_stmt (stmt),
3061 				   DR_REF (dr), stmt, true);
3062 	      gcc_assert (newdr != NULL && DR_REF (newdr));
3063 	      if (DR_BASE_ADDRESS (newdr)
3064 		  && DR_OFFSET (newdr)
3065 		  && DR_INIT (newdr)
3066 		  && DR_STEP (newdr)
3067 		  && integer_zerop (DR_STEP (newdr)))
3068 		{
3069 		  dr = newdr;
3070 		  gather = true;
3071 		}
3072 	      else
3073 		free_data_ref (newdr);
3074 	    }
3075 
3076 	  if (!gather)
3077 	    {
3078 	      if (dump_enabled_p ())
3079 		{
3080 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3081                                    "not vectorized: data ref analysis "
3082                                    "failed ");
3083 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3084 		}
3085 
3086 	      if (bb_vinfo)
3087 		{
3088 		  STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3089 		  stop_bb_analysis = true;
3090 		  continue;
3091 		}
3092 
3093 	      return false;
3094 	    }
3095         }
3096 
3097       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3098         {
3099           if (dump_enabled_p ())
3100             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3101                              "not vectorized: base addr of dr is a "
3102                              "constant");
3103 
3104           if (bb_vinfo)
3105             {
3106               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3107               stop_bb_analysis = true;
3108               continue;
3109             }
3110 
3111 	  if (gather)
3112 	    free_data_ref (dr);
3113 	  return false;
3114         }
3115 
3116       if (TREE_THIS_VOLATILE (DR_REF (dr)))
3117         {
3118           if (dump_enabled_p ())
3119             {
3120               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3121                                "not vectorized: volatile type ");
3122               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3123             }
3124 
3125           if (bb_vinfo)
3126             {
3127               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3128               stop_bb_analysis = true;
3129               continue;
3130             }
3131 
3132           return false;
3133         }
3134 
3135       if (stmt_can_throw_internal (stmt))
3136         {
3137           if (dump_enabled_p ())
3138             {
3139               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3140                                "not vectorized: statement can throw an "
3141                                "exception ");
3142               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3143             }
3144 
3145           if (bb_vinfo)
3146             {
3147               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3148               stop_bb_analysis = true;
3149               continue;
3150             }
3151 
3152 	  if (gather)
3153 	    free_data_ref (dr);
3154           return false;
3155         }
3156 
3157       if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3158 	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3159 	{
3160           if (dump_enabled_p ())
3161             {
3162               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3163                                "not vectorized: statement is bitfield "
3164                                "access ");
3165               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3166             }
3167 
3168           if (bb_vinfo)
3169             {
3170               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3171               stop_bb_analysis = true;
3172               continue;
3173             }
3174 
3175 	  if (gather)
3176 	    free_data_ref (dr);
3177           return false;
3178 	}
3179 
3180       base = unshare_expr (DR_BASE_ADDRESS (dr));
3181       offset = unshare_expr (DR_OFFSET (dr));
3182       init = unshare_expr (DR_INIT (dr));
3183 
3184       if (is_gimple_call (stmt))
3185 	{
3186 	  if (dump_enabled_p ())
3187 	    {
3188 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION,  vect_location,
3189                                "not vectorized: dr in a call ");
3190 	      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3191 	    }
3192 
3193 	  if (bb_vinfo)
3194 	    {
3195 	      STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3196 	      stop_bb_analysis = true;
3197 	      continue;
3198 	    }
3199 
3200 	  if (gather)
3201 	    free_data_ref (dr);
3202 	  return false;
3203 	}
3204 
3205       /* Update DR field in stmt_vec_info struct.  */
3206 
3207       /* If the dataref is in an inner-loop of the loop that is considered for
3208 	 for vectorization, we also want to analyze the access relative to
3209 	 the outer-loop (DR contains information only relative to the
3210 	 inner-most enclosing loop).  We do that by building a reference to the
3211 	 first location accessed by the inner-loop, and analyze it relative to
3212 	 the outer-loop.  */
3213       if (loop && nested_in_vect_loop_p (loop, stmt))
3214 	{
3215 	  tree outer_step, outer_base, outer_init;
3216 	  HOST_WIDE_INT pbitsize, pbitpos;
3217 	  tree poffset;
3218 	  enum machine_mode pmode;
3219 	  int punsignedp, pvolatilep;
3220 	  affine_iv base_iv, offset_iv;
3221 	  tree dinit;
3222 
3223 	  /* Build a reference to the first location accessed by the
3224 	     inner-loop: *(BASE+INIT).  (The first location is actually
3225 	     BASE+INIT+OFFSET, but we add OFFSET separately later).  */
3226           tree inner_base = build_fold_indirect_ref
3227                                 (fold_build_pointer_plus (base, init));
3228 
3229 	  if (dump_enabled_p ())
3230 	    {
3231 	      dump_printf_loc (MSG_NOTE, vect_location,
3232                                "analyze in outer-loop: ");
3233 	      dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
3234 	    }
3235 
3236 	  outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3237 		          &poffset, &pmode, &punsignedp, &pvolatilep, false);
3238 	  gcc_assert (outer_base != NULL_TREE);
3239 
3240 	  if (pbitpos % BITS_PER_UNIT != 0)
3241 	    {
3242 	      if (dump_enabled_p ())
3243 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3244                                  "failed: bit offset alignment.\n");
3245 	      return false;
3246 	    }
3247 
3248 	  outer_base = build_fold_addr_expr (outer_base);
3249 	  if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3250                           &base_iv, false))
3251 	    {
3252 	      if (dump_enabled_p ())
3253 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3254                                  "failed: evolution of base is not affine.\n");
3255 	      return false;
3256 	    }
3257 
3258 	  if (offset)
3259 	    {
3260 	      if (poffset)
3261 		poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3262                                        poffset);
3263 	      else
3264 		poffset = offset;
3265 	    }
3266 
3267 	  if (!poffset)
3268 	    {
3269 	      offset_iv.base = ssize_int (0);
3270 	      offset_iv.step = ssize_int (0);
3271 	    }
3272 	  else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3273                                &offset_iv, false))
3274 	    {
3275 	      if (dump_enabled_p ())
3276 	        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3277                                  "evolution of offset is not affine.\n");
3278 	      return false;
3279 	    }
3280 
3281 	  outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3282 	  split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3283 	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3284 	  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3285 	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3286 
3287 	  outer_step = size_binop (PLUS_EXPR,
3288 				fold_convert (ssizetype, base_iv.step),
3289 				fold_convert (ssizetype, offset_iv.step));
3290 
3291 	  STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3292 	  /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3293 	  STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3294 	  STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3295 	  STMT_VINFO_DR_OFFSET (stmt_info) =
3296 				fold_convert (ssizetype, offset_iv.base);
3297 	  STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3298 				size_int (highest_pow2_factor (offset_iv.base));
3299 
3300           if (dump_enabled_p ())
3301 	    {
3302 	      dump_printf_loc (MSG_NOTE, vect_location,
3303                                "\touter base_address: ");
3304 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
3305                                  STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3306 	      dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3307 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
3308                                  STMT_VINFO_DR_OFFSET (stmt_info));
3309 	      dump_printf (MSG_NOTE,
3310                            "\n\touter constant offset from base address: ");
3311 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
3312                                  STMT_VINFO_DR_INIT (stmt_info));
3313 	      dump_printf (MSG_NOTE, "\n\touter step: ");
3314 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
3315                                  STMT_VINFO_DR_STEP (stmt_info));
3316 	      dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3317 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
3318                                  STMT_VINFO_DR_ALIGNED_TO (stmt_info));
3319 	    }
3320 	}
3321 
3322       if (STMT_VINFO_DATA_REF (stmt_info))
3323         {
3324           if (dump_enabled_p ())
3325             {
3326               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3327                                "not vectorized: more than one data ref "
3328                                "in stmt: ");
3329               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3330             }
3331 
3332           if (bb_vinfo)
3333             {
3334               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3335               stop_bb_analysis = true;
3336               continue;
3337             }
3338 
3339 	  if (gather)
3340 	    free_data_ref (dr);
3341           return false;
3342         }
3343 
3344       STMT_VINFO_DATA_REF (stmt_info) = dr;
3345 
3346       /* Set vectype for STMT.  */
3347       scalar_type = TREE_TYPE (DR_REF (dr));
3348       STMT_VINFO_VECTYPE (stmt_info) =
3349                 get_vectype_for_scalar_type (scalar_type);
3350       if (!STMT_VINFO_VECTYPE (stmt_info))
3351         {
3352           if (dump_enabled_p ())
3353             {
3354               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3355                                "not vectorized: no vectype for stmt: ");
3356               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3357               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3358               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3359                                  scalar_type);
3360             }
3361 
3362           if (bb_vinfo)
3363             {
3364               /* Mark the statement as not vectorizable.  */
3365               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3366               stop_bb_analysis = true;
3367               continue;
3368             }
3369 
3370 	  if (gather)
3371 	    {
3372 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
3373 	      free_data_ref (dr);
3374 	    }
3375 	  return false;
3376         }
3377 
3378       /* Adjust the minimal vectorization factor according to the
3379 	 vector type.  */
3380       vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3381       if (vf > *min_vf)
3382 	*min_vf = vf;
3383 
3384       if (gather)
3385 	{
3386 	  unsigned int j, k, n;
3387 	  struct data_reference *olddr
3388 	    = datarefs[i];
3389 	  vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3390 	  struct data_dependence_relation *ddr, *newddr;
3391 	  bool bad = false;
3392 	  tree off;
3393 	  vec<loop_p> nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3394 
3395 	  gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3396 	  if (gather
3397 	      && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3398 	    gather = false;
3399 	  if (!gather)
3400 	    {
3401 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
3402 	      free_data_ref (dr);
3403 	      if (dump_enabled_p ())
3404 		{
3405 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3406                                    "not vectorized: not suitable for gather "
3407                                    "load ");
3408 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3409 		}
3410 	      return false;
3411 	    }
3412 
3413 	  n = datarefs.length () - 1;
3414 	  for (j = 0, k = i - 1; j < i; j++)
3415 	    {
3416 	      ddr = ddrs[k];
3417 	      gcc_assert (DDR_B (ddr) == olddr);
3418 	      newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3419 							    nest);
3420 	      ddrs[k] = newddr;
3421 	      free_dependence_relation (ddr);
3422 	      if (!bad
3423 		  && DR_IS_WRITE (DDR_A (newddr))
3424 		  && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3425 		bad = true;
3426 	      k += --n;
3427 	    }
3428 
3429 	  k++;
3430 	  n = k + datarefs.length () - i - 1;
3431 	  for (; k < n; k++)
3432 	    {
3433 	      ddr = ddrs[k];
3434 	      gcc_assert (DDR_A (ddr) == olddr);
3435 	      newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3436 							    nest);
3437 	      ddrs[k] = newddr;
3438 	      free_dependence_relation (ddr);
3439 	      if (!bad
3440 		  && DR_IS_WRITE (DDR_B (newddr))
3441 		  && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3442 		bad = true;
3443 	    }
3444 
3445 	  k = ddrs.length ()
3446 	      - datarefs.length () + i;
3447 	  ddr = ddrs[k];
3448 	  gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3449 	  newddr = initialize_data_dependence_relation (dr, dr, nest);
3450 	  ddrs[k] = newddr;
3451 	  free_dependence_relation (ddr);
3452 	  datarefs[i] = dr;
3453 
3454 	  if (bad)
3455 	    {
3456 	      if (dump_enabled_p ())
3457 		{
3458 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3459                                    "not vectorized: data dependence conflict"
3460                                    " prevents gather load");
3461 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3462 		}
3463 	      return false;
3464 	    }
3465 
3466 	  STMT_VINFO_GATHER_P (stmt_info) = true;
3467 	}
3468       else if (loop_vinfo
3469 	       && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3470 	{
3471 	  bool strided_load = false;
3472 	  if (!nested_in_vect_loop_p (loop, stmt))
3473 	    strided_load = vect_check_strided_load (stmt, loop_vinfo);
3474 	  if (!strided_load)
3475 	    {
3476 	      if (dump_enabled_p ())
3477 		{
3478 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3479                                    "not vectorized: not suitable for strided "
3480                                    "load ");
3481 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3482 		}
3483 	      return false;
3484 	    }
3485 	  STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3486 	}
3487     }
3488 
3489   return true;
3490 }
3491 
3492 
3493 /* Function vect_get_new_vect_var.
3494 
3495    Returns a name for a new variable.  The current naming scheme appends the
3496    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3497    the name of vectorizer generated variables, and appends that to NAME if
3498    provided.  */
3499 
3500 tree
3501 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3502 {
3503   const char *prefix;
3504   tree new_vect_var;
3505 
3506   switch (var_kind)
3507   {
3508   case vect_simple_var:
3509     prefix = "vect_";
3510     break;
3511   case vect_scalar_var:
3512     prefix = "stmp_";
3513     break;
3514   case vect_pointer_var:
3515     prefix = "vect_p";
3516     break;
3517   default:
3518     gcc_unreachable ();
3519   }
3520 
3521   if (name)
3522     {
3523       char* tmp = concat (prefix, name, NULL);
3524       new_vect_var = create_tmp_reg (type, tmp);
3525       free (tmp);
3526     }
3527   else
3528     new_vect_var = create_tmp_reg (type, prefix);
3529 
3530   return new_vect_var;
3531 }
3532 
3533 
3534 /* Function vect_create_addr_base_for_vector_ref.
3535 
3536    Create an expression that computes the address of the first memory location
3537    that will be accessed for a data reference.
3538 
3539    Input:
3540    STMT: The statement containing the data reference.
3541    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3542    OFFSET: Optional. If supplied, it is be added to the initial address.
3543    LOOP:    Specify relative to which loop-nest should the address be computed.
3544             For example, when the dataref is in an inner-loop nested in an
3545 	    outer-loop that is now being vectorized, LOOP can be either the
3546 	    outer-loop, or the inner-loop.  The first memory location accessed
3547 	    by the following dataref ('in' points to short):
3548 
3549 		for (i=0; i<N; i++)
3550 		   for (j=0; j<M; j++)
3551 		     s += in[i+j]
3552 
3553 	    is as follows:
3554 	    if LOOP=i_loop:	&in		(relative to i_loop)
3555 	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
3556    BYTE_OFFSET: Optional, defaulted to NULL.  If supplied, it is added to the
3557 	    initial address.  Unlike OFFSET, which is number of elements to
3558 	    be added, BYTE_OFFSET is measured in bytes.
3559 
3560    Output:
3561    1. Return an SSA_NAME whose value is the address of the memory location of
3562       the first vector of the data reference.
3563    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3564       these statement(s) which define the returned SSA_NAME.
3565 
3566    FORNOW: We are only handling array accesses with step 1.  */
3567 
3568 tree
3569 vect_create_addr_base_for_vector_ref (gimple stmt,
3570 				      gimple_seq *new_stmt_list,
3571 				      tree offset,
3572 				      struct loop *loop,
3573 				      tree byte_offset)
3574 {
3575   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3576   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3577   tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3578   const char *base_name;
3579   tree data_ref_base_var;
3580   tree vec_stmt;
3581   tree addr_base, addr_expr;
3582   tree dest;
3583   gimple_seq seq = NULL;
3584   tree base_offset = unshare_expr (DR_OFFSET (dr));
3585   tree init = unshare_expr (DR_INIT (dr));
3586   tree vect_ptr_type;
3587   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3588   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3589   tree base;
3590 
3591   if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3592     {
3593       struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3594 
3595       gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3596 
3597       data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3598       base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3599       init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3600     }
3601 
3602   if (loop_vinfo)
3603     base_name = get_name (data_ref_base);
3604   else
3605     {
3606       base_offset = ssize_int (0);
3607       init = ssize_int (0);
3608       base_name = get_name (DR_REF (dr));
3609     }
3610 
3611   data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3612   data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3613 					data_ref_base_var);
3614   gimple_seq_add_seq (new_stmt_list, seq);
3615 
3616   /* Create base_offset */
3617   base_offset = size_binop (PLUS_EXPR,
3618 			    fold_convert (sizetype, base_offset),
3619 			    fold_convert (sizetype, init));
3620   dest = create_tmp_var (sizetype, "base_off");
3621   base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3622   gimple_seq_add_seq (new_stmt_list, seq);
3623 
3624   if (offset)
3625     {
3626       tree tmp = create_tmp_var (sizetype, "offset");
3627 
3628       offset = fold_build2 (MULT_EXPR, sizetype,
3629 			    fold_convert (sizetype, offset), step);
3630       base_offset = fold_build2 (PLUS_EXPR, sizetype,
3631 				 base_offset, offset);
3632       base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3633       gimple_seq_add_seq (new_stmt_list, seq);
3634     }
3635   if (byte_offset)
3636     {
3637       tree tmp = create_tmp_var (sizetype, "offset");
3638 
3639       byte_offset = fold_convert (sizetype, byte_offset);
3640       base_offset = fold_build2 (PLUS_EXPR, sizetype,
3641 				 base_offset, byte_offset);
3642       base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3643       gimple_seq_add_seq (new_stmt_list, seq);
3644     }
3645 
3646   /* base + base_offset */
3647   if (loop_vinfo)
3648     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3649   else
3650     {
3651       addr_base = build1 (ADDR_EXPR,
3652 			  build_pointer_type (TREE_TYPE (DR_REF (dr))),
3653 			  unshare_expr (DR_REF (dr)));
3654     }
3655 
3656   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3657   base = get_base_address (DR_REF (dr));
3658   if (base
3659       && TREE_CODE (base) == MEM_REF)
3660     vect_ptr_type
3661       = build_qualified_type (vect_ptr_type,
3662 			      TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3663 
3664   vec_stmt = fold_convert (vect_ptr_type, addr_base);
3665   addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3666                                      base_name);
3667   vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3668   gimple_seq_add_seq (new_stmt_list, seq);
3669 
3670   if (DR_PTR_INFO (dr)
3671       && TREE_CODE (vec_stmt) == SSA_NAME)
3672     {
3673       duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3674       if (offset)
3675 	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
3676     }
3677 
3678   if (dump_enabled_p ())
3679     {
3680       dump_printf_loc (MSG_NOTE, vect_location, "created ");
3681       dump_generic_expr (MSG_NOTE, TDF_SLIM, vec_stmt);
3682     }
3683 
3684   return vec_stmt;
3685 }
3686 
3687 
3688 /* Function vect_create_data_ref_ptr.
3689 
3690    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3691    location accessed in the loop by STMT, along with the def-use update
3692    chain to appropriately advance the pointer through the loop iterations.
3693    Also set aliasing information for the pointer.  This pointer is used by
3694    the callers to this function to create a memory reference expression for
3695    vector load/store access.
3696 
3697    Input:
3698    1. STMT: a stmt that references memory. Expected to be of the form
3699          GIMPLE_ASSIGN <name, data-ref> or
3700 	 GIMPLE_ASSIGN <data-ref, name>.
3701    2. AGGR_TYPE: the type of the reference, which should be either a vector
3702         or an array.
3703    3. AT_LOOP: the loop where the vector memref is to be created.
3704    4. OFFSET (optional): an offset to be added to the initial address accessed
3705         by the data-ref in STMT.
3706    5. BSI: location where the new stmts are to be placed if there is no loop
3707    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3708         pointing to the initial address.
3709    7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
3710 	to the initial address accessed by the data-ref in STMT.  This is
3711 	similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
3712 	in bytes.
3713 
3714    Output:
3715    1. Declare a new ptr to vector_type, and have it point to the base of the
3716       data reference (initial addressed accessed by the data reference).
3717       For example, for vector of type V8HI, the following code is generated:
3718 
3719       v8hi *ap;
3720       ap = (v8hi *)initial_address;
3721 
3722       if OFFSET is not supplied:
3723          initial_address = &a[init];
3724       if OFFSET is supplied:
3725          initial_address = &a[init + OFFSET];
3726       if BYTE_OFFSET is supplied:
3727 	 initial_address = &a[init] + BYTE_OFFSET;
3728 
3729       Return the initial_address in INITIAL_ADDRESS.
3730 
3731    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
3732       update the pointer in each iteration of the loop.
3733 
3734       Return the increment stmt that updates the pointer in PTR_INCR.
3735 
3736    3. Set INV_P to true if the access pattern of the data reference in the
3737       vectorized loop is invariant.  Set it to false otherwise.
3738 
3739    4. Return the pointer.  */
3740 
3741 tree
3742 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3743 			  tree offset, tree *initial_address,
3744 			  gimple_stmt_iterator *gsi, gimple *ptr_incr,
3745 			  bool only_init, bool *inv_p, tree byte_offset)
3746 {
3747   const char *base_name;
3748   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3749   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3750   struct loop *loop = NULL;
3751   bool nested_in_vect_loop = false;
3752   struct loop *containing_loop = NULL;
3753   tree aggr_ptr_type;
3754   tree aggr_ptr;
3755   tree new_temp;
3756   gimple vec_stmt;
3757   gimple_seq new_stmt_list = NULL;
3758   edge pe = NULL;
3759   basic_block new_bb;
3760   tree aggr_ptr_init;
3761   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3762   tree aptr;
3763   gimple_stmt_iterator incr_gsi;
3764   bool insert_after;
3765   bool negative;
3766   tree indx_before_incr, indx_after_incr;
3767   gimple incr;
3768   tree step;
3769   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3770   tree base;
3771 
3772   gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3773 	      || TREE_CODE (aggr_type) == VECTOR_TYPE);
3774 
3775   if (loop_vinfo)
3776     {
3777       loop = LOOP_VINFO_LOOP (loop_vinfo);
3778       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3779       containing_loop = (gimple_bb (stmt))->loop_father;
3780       pe = loop_preheader_edge (loop);
3781     }
3782   else
3783     {
3784       gcc_assert (bb_vinfo);
3785       only_init = true;
3786       *ptr_incr = NULL;
3787     }
3788 
3789   /* Check the step (evolution) of the load in LOOP, and record
3790      whether it's invariant.  */
3791   if (nested_in_vect_loop)
3792     step = STMT_VINFO_DR_STEP (stmt_info);
3793   else
3794     step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3795 
3796   if (tree_int_cst_compare (step, size_zero_node) == 0)
3797     *inv_p = true;
3798   else
3799     *inv_p = false;
3800   negative = tree_int_cst_compare (step, size_zero_node) < 0;
3801 
3802   /* Create an expression for the first address accessed by this load
3803      in LOOP.  */
3804   base_name = get_name (DR_BASE_ADDRESS (dr));
3805 
3806   if (dump_enabled_p ())
3807     {
3808       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
3809       dump_printf_loc (MSG_NOTE, vect_location,
3810                        "create %s-pointer variable to type: ",
3811                        tree_code_name[(int) TREE_CODE (aggr_type)]);
3812       dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
3813       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
3814         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
3815       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
3816         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
3817       else
3818         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
3819       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
3820     }
3821 
3822   /* (1) Create the new aggregate-pointer variable.  */
3823   aggr_ptr_type = build_pointer_type (aggr_type);
3824   base = get_base_address (DR_REF (dr));
3825   if (base
3826       && TREE_CODE (base) == MEM_REF)
3827     aggr_ptr_type
3828       = build_qualified_type (aggr_ptr_type,
3829 			      TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3830   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
3831 
3832   /* Vector and array types inherit the alias set of their component
3833      type by default so we need to use a ref-all pointer if the data
3834      reference does not conflict with the created aggregated data
3835      reference because it is not addressable.  */
3836   if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3837 			      get_alias_set (DR_REF (dr))))
3838     {
3839       aggr_ptr_type
3840 	= build_pointer_type_for_mode (aggr_type,
3841 				       TYPE_MODE (aggr_ptr_type), true);
3842       aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3843 					base_name);
3844     }
3845 
3846   /* Likewise for any of the data references in the stmt group.  */
3847   else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3848     {
3849       gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3850       do
3851 	{
3852 	  tree lhs = gimple_assign_lhs (orig_stmt);
3853 	  if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3854 				      get_alias_set (lhs)))
3855 	    {
3856 	      aggr_ptr_type
3857 		= build_pointer_type_for_mode (aggr_type,
3858 					       TYPE_MODE (aggr_ptr_type), true);
3859 	      aggr_ptr
3860 		= vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3861 					 base_name);
3862 	      break;
3863 	    }
3864 
3865 	  orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3866 	}
3867       while (orig_stmt);
3868     }
3869 
3870   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3871      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3872      def-use update cycles for the pointer: one relative to the outer-loop
3873      (LOOP), which is what steps (3) and (4) below do.  The other is relative
3874      to the inner-loop (which is the inner-most loop containing the dataref),
3875      and this is done be step (5) below.
3876 
3877      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3878      inner-most loop, and so steps (3),(4) work the same, and step (5) is
3879      redundant.  Steps (3),(4) create the following:
3880 
3881 	vp0 = &base_addr;
3882 	LOOP:	vp1 = phi(vp0,vp2)
3883 		...
3884 		...
3885 		vp2 = vp1 + step
3886 		goto LOOP
3887 
3888      If there is an inner-loop nested in loop, then step (5) will also be
3889      applied, and an additional update in the inner-loop will be created:
3890 
3891 	vp0 = &base_addr;
3892 	LOOP:   vp1 = phi(vp0,vp2)
3893 		...
3894         inner:     vp3 = phi(vp1,vp4)
3895 	           vp4 = vp3 + inner_step
3896 	           if () goto inner
3897 		...
3898 		vp2 = vp1 + step
3899 		if () goto LOOP   */
3900 
3901   /* (2) Calculate the initial address of the aggregate-pointer, and set
3902      the aggregate-pointer to point to it before the loop.  */
3903 
3904   /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader.  */
3905 
3906   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3907 						   offset, loop, byte_offset);
3908   if (new_stmt_list)
3909     {
3910       if (pe)
3911         {
3912           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3913           gcc_assert (!new_bb);
3914         }
3915       else
3916         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3917     }
3918 
3919   *initial_address = new_temp;
3920 
3921   /* Create: p = (aggr_type *) initial_base  */
3922   if (TREE_CODE (new_temp) != SSA_NAME
3923       || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3924     {
3925       vec_stmt = gimple_build_assign (aggr_ptr,
3926 				      fold_convert (aggr_ptr_type, new_temp));
3927       aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3928       /* Copy the points-to information if it exists. */
3929       if (DR_PTR_INFO (dr))
3930 	duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3931       gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3932       if (pe)
3933 	{
3934 	  new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3935 	  gcc_assert (!new_bb);
3936 	}
3937       else
3938 	gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3939     }
3940   else
3941     aggr_ptr_init = new_temp;
3942 
3943   /* (3) Handle the updating of the aggregate-pointer inside the loop.
3944      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3945      inner-loop nested in LOOP (during outer-loop vectorization).  */
3946 
3947   /* No update in loop is required.  */
3948   if (only_init && (!loop_vinfo || at_loop == loop))
3949     aptr = aggr_ptr_init;
3950   else
3951     {
3952       /* The step of the aggregate pointer is the type size.  */
3953       tree step = TYPE_SIZE_UNIT (aggr_type);
3954       /* One exception to the above is when the scalar step of the load in
3955 	 LOOP is zero. In this case the step here is also zero.  */
3956       if (*inv_p)
3957 	step = size_zero_node;
3958       else if (negative)
3959 	step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3960 
3961       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3962 
3963       create_iv (aggr_ptr_init,
3964 		 fold_convert (aggr_ptr_type, step),
3965 		 aggr_ptr, loop, &incr_gsi, insert_after,
3966 		 &indx_before_incr, &indx_after_incr);
3967       incr = gsi_stmt (incr_gsi);
3968       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3969 
3970       /* Copy the points-to information if it exists. */
3971       if (DR_PTR_INFO (dr))
3972 	{
3973 	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3974 	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3975 	}
3976       if (ptr_incr)
3977 	*ptr_incr = incr;
3978 
3979       aptr = indx_before_incr;
3980     }
3981 
3982   if (!nested_in_vect_loop || only_init)
3983     return aptr;
3984 
3985 
3986   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3987      nested in LOOP, if exists.  */
3988 
3989   gcc_assert (nested_in_vect_loop);
3990   if (!only_init)
3991     {
3992       standard_iv_increment_position (containing_loop, &incr_gsi,
3993 				      &insert_after);
3994       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3995 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3996 		 &indx_after_incr);
3997       incr = gsi_stmt (incr_gsi);
3998       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3999 
4000       /* Copy the points-to information if it exists. */
4001       if (DR_PTR_INFO (dr))
4002 	{
4003 	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4004 	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4005 	}
4006       if (ptr_incr)
4007 	*ptr_incr = incr;
4008 
4009       return indx_before_incr;
4010     }
4011   else
4012     gcc_unreachable ();
4013 }
4014 
4015 
4016 /* Function bump_vector_ptr
4017 
4018    Increment a pointer (to a vector type) by vector-size. If requested,
4019    i.e. if PTR-INCR is given, then also connect the new increment stmt
4020    to the existing def-use update-chain of the pointer, by modifying
4021    the PTR_INCR as illustrated below:
4022 
4023    The pointer def-use update-chain before this function:
4024                         DATAREF_PTR = phi (p_0, p_2)
4025                         ....
4026         PTR_INCR:       p_2 = DATAREF_PTR + step
4027 
4028    The pointer def-use update-chain after this function:
4029                         DATAREF_PTR = phi (p_0, p_2)
4030                         ....
4031                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4032                         ....
4033         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
4034 
4035    Input:
4036    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4037                  in the loop.
4038    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4039 	      the loop.  The increment amount across iterations is expected
4040 	      to be vector_size.
4041    BSI - location where the new update stmt is to be placed.
4042    STMT - the original scalar memory-access stmt that is being vectorized.
4043    BUMP - optional. The offset by which to bump the pointer. If not given,
4044 	  the offset is assumed to be vector_size.
4045 
4046    Output: Return NEW_DATAREF_PTR as illustrated above.
4047 
4048 */
4049 
4050 tree
4051 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4052 		 gimple stmt, tree bump)
4053 {
4054   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4055   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4056   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4057   tree update = TYPE_SIZE_UNIT (vectype);
4058   gimple incr_stmt;
4059   ssa_op_iter iter;
4060   use_operand_p use_p;
4061   tree new_dataref_ptr;
4062 
4063   if (bump)
4064     update = bump;
4065 
4066   new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4067   incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
4068 					    dataref_ptr, update);
4069   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4070 
4071   /* Copy the points-to information if it exists. */
4072   if (DR_PTR_INFO (dr))
4073     {
4074       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
4075       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
4076     }
4077 
4078   if (!ptr_incr)
4079     return new_dataref_ptr;
4080 
4081   /* Update the vector-pointer's cross-iteration increment.  */
4082   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4083     {
4084       tree use = USE_FROM_PTR (use_p);
4085 
4086       if (use == dataref_ptr)
4087         SET_USE (use_p, new_dataref_ptr);
4088       else
4089         gcc_assert (tree_int_cst_compare (use, update) == 0);
4090     }
4091 
4092   return new_dataref_ptr;
4093 }
4094 
4095 
4096 /* Function vect_create_destination_var.
4097 
4098    Create a new temporary of type VECTYPE.  */
4099 
4100 tree
4101 vect_create_destination_var (tree scalar_dest, tree vectype)
4102 {
4103   tree vec_dest;
4104   const char *new_name;
4105   tree type;
4106   enum vect_var_kind kind;
4107 
4108   kind = vectype ? vect_simple_var : vect_scalar_var;
4109   type = vectype ? vectype : TREE_TYPE (scalar_dest);
4110 
4111   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4112 
4113   new_name = get_name (scalar_dest);
4114   if (!new_name)
4115     new_name = "var_";
4116   vec_dest = vect_get_new_vect_var (type, kind, new_name);
4117 
4118   return vec_dest;
4119 }
4120 
4121 /* Function vect_grouped_store_supported.
4122 
4123    Returns TRUE if interleave high and interleave low permutations
4124    are supported, and FALSE otherwise.  */
4125 
4126 bool
4127 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4128 {
4129   enum machine_mode mode = TYPE_MODE (vectype);
4130 
4131   /* vect_permute_store_chain requires the group size to be a power of two.  */
4132   if (exact_log2 (count) == -1)
4133     {
4134       if (dump_enabled_p ())
4135 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136                          "the size of the group of accesses"
4137                          " is not a power of 2");
4138       return false;
4139     }
4140 
4141   /* Check that the permutation is supported.  */
4142   if (VECTOR_MODE_P (mode))
4143     {
4144       unsigned int i, nelt = GET_MODE_NUNITS (mode);
4145       unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4146       for (i = 0; i < nelt / 2; i++)
4147 	{
4148 	  sel[i * 2] = i;
4149 	  sel[i * 2 + 1] = i + nelt;
4150 	}
4151       if (can_vec_perm_p (mode, false, sel))
4152 	{
4153 	  for (i = 0; i < nelt; i++)
4154 	    sel[i] += nelt / 2;
4155 	  if (can_vec_perm_p (mode, false, sel))
4156 	    return true;
4157 	}
4158     }
4159 
4160   if (dump_enabled_p ())
4161     dump_printf (MSG_MISSED_OPTIMIZATION,
4162                  "interleave op not supported by target.");
4163   return false;
4164 }
4165 
4166 
4167 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4168    type VECTYPE.  */
4169 
4170 bool
4171 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4172 {
4173   return vect_lanes_optab_supported_p ("vec_store_lanes",
4174 				       vec_store_lanes_optab,
4175 				       vectype, count);
4176 }
4177 
4178 
4179 /* Function vect_permute_store_chain.
4180 
4181    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4182    a power of 2, generate interleave_high/low stmts to reorder the data
4183    correctly for the stores.  Return the final references for stores in
4184    RESULT_CHAIN.
4185 
4186    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4187    The input is 4 vectors each containing 8 elements.  We assign a number to
4188    each element, the input sequence is:
4189 
4190    1st vec:   0  1  2  3  4  5  6  7
4191    2nd vec:   8  9 10 11 12 13 14 15
4192    3rd vec:  16 17 18 19 20 21 22 23
4193    4th vec:  24 25 26 27 28 29 30 31
4194 
4195    The output sequence should be:
4196 
4197    1st vec:  0  8 16 24  1  9 17 25
4198    2nd vec:  2 10 18 26  3 11 19 27
4199    3rd vec:  4 12 20 28  5 13 21 30
4200    4th vec:  6 14 22 30  7 15 23 31
4201 
4202    i.e., we interleave the contents of the four vectors in their order.
4203 
4204    We use interleave_high/low instructions to create such output.  The input of
4205    each interleave_high/low operation is two vectors:
4206    1st vec    2nd vec
4207    0 1 2 3    4 5 6 7
4208    the even elements of the result vector are obtained left-to-right from the
4209    high/low elements of the first vector.  The odd elements of the result are
4210    obtained left-to-right from the high/low elements of the second vector.
4211    The output of interleave_high will be:   0 4 1 5
4212    and of interleave_low:                   2 6 3 7
4213 
4214 
4215    The permutation is done in log LENGTH stages.  In each stage interleave_high
4216    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4217    where the first argument is taken from the first half of DR_CHAIN and the
4218    second argument from it's second half.
4219    In our example,
4220 
4221    I1: interleave_high (1st vec, 3rd vec)
4222    I2: interleave_low (1st vec, 3rd vec)
4223    I3: interleave_high (2nd vec, 4th vec)
4224    I4: interleave_low (2nd vec, 4th vec)
4225 
4226    The output for the first stage is:
4227 
4228    I1:  0 16  1 17  2 18  3 19
4229    I2:  4 20  5 21  6 22  7 23
4230    I3:  8 24  9 25 10 26 11 27
4231    I4: 12 28 13 29 14 30 15 31
4232 
4233    The output of the second stage, i.e. the final result is:
4234 
4235    I1:  0  8 16 24  1  9 17 25
4236    I2:  2 10 18 26  3 11 19 27
4237    I3:  4 12 20 28  5 13 21 30
4238    I4:  6 14 22 30  7 15 23 31.  */
4239 
4240 void
4241 vect_permute_store_chain (vec<tree> dr_chain,
4242 			  unsigned int length,
4243 			  gimple stmt,
4244 			  gimple_stmt_iterator *gsi,
4245 			  vec<tree> *result_chain)
4246 {
4247   tree vect1, vect2, high, low;
4248   gimple perm_stmt;
4249   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4250   tree perm_mask_low, perm_mask_high;
4251   unsigned int i, n;
4252   unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4253   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4254 
4255   result_chain->quick_grow (length);
4256   memcpy (result_chain->address (), dr_chain.address (),
4257 	  length * sizeof (tree));
4258 
4259   for (i = 0, n = nelt / 2; i < n; i++)
4260     {
4261       sel[i * 2] = i;
4262       sel[i * 2 + 1] = i + nelt;
4263     }
4264   perm_mask_high = vect_gen_perm_mask (vectype, sel);
4265   gcc_assert (perm_mask_high != NULL);
4266 
4267   for (i = 0; i < nelt; i++)
4268     sel[i] += nelt / 2;
4269   perm_mask_low = vect_gen_perm_mask (vectype, sel);
4270   gcc_assert (perm_mask_low != NULL);
4271 
4272   for (i = 0, n = exact_log2 (length); i < n; i++)
4273     {
4274       for (j = 0; j < length/2; j++)
4275 	{
4276 	  vect1 = dr_chain[j];
4277 	  vect2 = dr_chain[j+length/2];
4278 
4279 	  /* Create interleaving stmt:
4280 	     high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}>  */
4281 	  high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4282 	  perm_stmt
4283 	    = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4284 					    vect1, vect2, perm_mask_high);
4285 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4286 	  (*result_chain)[2*j] = high;
4287 
4288 	  /* Create interleaving stmt:
4289 	     low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4290 						 nelt*3/2+1, ...}>  */
4291 	  low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4292 	  perm_stmt
4293 	    = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4294 					    vect1, vect2, perm_mask_low);
4295 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4296 	  (*result_chain)[2*j+1] = low;
4297 	}
4298       memcpy (dr_chain.address (), result_chain->address (),
4299 	      length * sizeof (tree));
4300     }
4301 }
4302 
4303 /* Function vect_setup_realignment
4304 
4305    This function is called when vectorizing an unaligned load using
4306    the dr_explicit_realign[_optimized] scheme.
4307    This function generates the following code at the loop prolog:
4308 
4309       p = initial_addr;
4310    x  msq_init = *(floor(p));   # prolog load
4311       realignment_token = call target_builtin;
4312     loop:
4313    x  msq = phi (msq_init, ---)
4314 
4315    The stmts marked with x are generated only for the case of
4316    dr_explicit_realign_optimized.
4317 
4318    The code above sets up a new (vector) pointer, pointing to the first
4319    location accessed by STMT, and a "floor-aligned" load using that pointer.
4320    It also generates code to compute the "realignment-token" (if the relevant
4321    target hook was defined), and creates a phi-node at the loop-header bb
4322    whose arguments are the result of the prolog-load (created by this
4323    function) and the result of a load that takes place in the loop (to be
4324    created by the caller to this function).
4325 
4326    For the case of dr_explicit_realign_optimized:
4327    The caller to this function uses the phi-result (msq) to create the
4328    realignment code inside the loop, and sets up the missing phi argument,
4329    as follows:
4330     loop:
4331       msq = phi (msq_init, lsq)
4332       lsq = *(floor(p'));        # load in loop
4333       result = realign_load (msq, lsq, realignment_token);
4334 
4335    For the case of dr_explicit_realign:
4336     loop:
4337       msq = *(floor(p)); 	# load in loop
4338       p' = p + (VS-1);
4339       lsq = *(floor(p'));	# load in loop
4340       result = realign_load (msq, lsq, realignment_token);
4341 
4342    Input:
4343    STMT - (scalar) load stmt to be vectorized. This load accesses
4344           a memory location that may be unaligned.
4345    BSI - place where new code is to be inserted.
4346    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4347 			      is used.
4348 
4349    Output:
4350    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4351                        target hook, if defined.
4352    Return value - the result of the loop-header phi node.  */
4353 
4354 tree
4355 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4356                         tree *realignment_token,
4357 			enum dr_alignment_support alignment_support_scheme,
4358 			tree init_addr,
4359 			struct loop **at_loop)
4360 {
4361   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4362   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4363   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4364   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4365   struct loop *loop = NULL;
4366   edge pe = NULL;
4367   tree scalar_dest = gimple_assign_lhs (stmt);
4368   tree vec_dest;
4369   gimple inc;
4370   tree ptr;
4371   tree data_ref;
4372   gimple new_stmt;
4373   basic_block new_bb;
4374   tree msq_init = NULL_TREE;
4375   tree new_temp;
4376   gimple phi_stmt;
4377   tree msq = NULL_TREE;
4378   gimple_seq stmts = NULL;
4379   bool inv_p;
4380   bool compute_in_loop = false;
4381   bool nested_in_vect_loop = false;
4382   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4383   struct loop *loop_for_initial_load = NULL;
4384 
4385   if (loop_vinfo)
4386     {
4387       loop = LOOP_VINFO_LOOP (loop_vinfo);
4388       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4389     }
4390 
4391   gcc_assert (alignment_support_scheme == dr_explicit_realign
4392 	      || alignment_support_scheme == dr_explicit_realign_optimized);
4393 
4394   /* We need to generate three things:
4395      1. the misalignment computation
4396      2. the extra vector load (for the optimized realignment scheme).
4397      3. the phi node for the two vectors from which the realignment is
4398       done (for the optimized realignment scheme).  */
4399 
4400   /* 1. Determine where to generate the misalignment computation.
4401 
4402      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4403      calculation will be generated by this function, outside the loop (in the
4404      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
4405      caller, inside the loop.
4406 
4407      Background: If the misalignment remains fixed throughout the iterations of
4408      the loop, then both realignment schemes are applicable, and also the
4409      misalignment computation can be done outside LOOP.  This is because we are
4410      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4411      are a multiple of VS (the Vector Size), and therefore the misalignment in
4412      different vectorized LOOP iterations is always the same.
4413      The problem arises only if the memory access is in an inner-loop nested
4414      inside LOOP, which is now being vectorized using outer-loop vectorization.
4415      This is the only case when the misalignment of the memory access may not
4416      remain fixed throughout the iterations of the inner-loop (as explained in
4417      detail in vect_supportable_dr_alignment).  In this case, not only is the
4418      optimized realignment scheme not applicable, but also the misalignment
4419      computation (and generation of the realignment token that is passed to
4420      REALIGN_LOAD) have to be done inside the loop.
4421 
4422      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4423      or not, which in turn determines if the misalignment is computed inside
4424      the inner-loop, or outside LOOP.  */
4425 
4426   if (init_addr != NULL_TREE || !loop_vinfo)
4427     {
4428       compute_in_loop = true;
4429       gcc_assert (alignment_support_scheme == dr_explicit_realign);
4430     }
4431 
4432 
4433   /* 2. Determine where to generate the extra vector load.
4434 
4435      For the optimized realignment scheme, instead of generating two vector
4436      loads in each iteration, we generate a single extra vector load in the
4437      preheader of the loop, and in each iteration reuse the result of the
4438      vector load from the previous iteration.  In case the memory access is in
4439      an inner-loop nested inside LOOP, which is now being vectorized using
4440      outer-loop vectorization, we need to determine whether this initial vector
4441      load should be generated at the preheader of the inner-loop, or can be
4442      generated at the preheader of LOOP.  If the memory access has no evolution
4443      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4444      to be generated inside LOOP (in the preheader of the inner-loop).  */
4445 
4446   if (nested_in_vect_loop)
4447     {
4448       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4449       bool invariant_in_outerloop =
4450             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4451       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4452     }
4453   else
4454     loop_for_initial_load = loop;
4455   if (at_loop)
4456     *at_loop = loop_for_initial_load;
4457 
4458   if (loop_for_initial_load)
4459     pe = loop_preheader_edge (loop_for_initial_load);
4460 
4461   /* 3. For the case of the optimized realignment, create the first vector
4462       load at the loop preheader.  */
4463 
4464   if (alignment_support_scheme == dr_explicit_realign_optimized)
4465     {
4466       /* Create msq_init = *(floor(p1)) in the loop preheader  */
4467 
4468       gcc_assert (!compute_in_loop);
4469       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4470       ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4471 				      NULL_TREE, &init_addr, NULL, &inc,
4472 				      true, &inv_p);
4473       new_temp = copy_ssa_name (ptr, NULL);
4474       new_stmt = gimple_build_assign_with_ops
4475 		   (BIT_AND_EXPR, new_temp, ptr,
4476 		    build_int_cst (TREE_TYPE (ptr),
4477 				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4478       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4479       gcc_assert (!new_bb);
4480       data_ref
4481 	= build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4482 		  build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4483       new_stmt = gimple_build_assign (vec_dest, data_ref);
4484       new_temp = make_ssa_name (vec_dest, new_stmt);
4485       gimple_assign_set_lhs (new_stmt, new_temp);
4486       if (pe)
4487         {
4488           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4489           gcc_assert (!new_bb);
4490         }
4491       else
4492          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4493 
4494       msq_init = gimple_assign_lhs (new_stmt);
4495     }
4496 
4497   /* 4. Create realignment token using a target builtin, if available.
4498       It is done either inside the containing loop, or before LOOP (as
4499       determined above).  */
4500 
4501   if (targetm.vectorize.builtin_mask_for_load)
4502     {
4503       tree builtin_decl;
4504 
4505       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
4506       if (!init_addr)
4507 	{
4508 	  /* Generate the INIT_ADDR computation outside LOOP.  */
4509 	  init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4510 							NULL_TREE, loop);
4511           if (loop)
4512             {
4513    	      pe = loop_preheader_edge (loop);
4514 	      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4515 	      gcc_assert (!new_bb);
4516             }
4517           else
4518              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4519 	}
4520 
4521       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4522       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4523       vec_dest =
4524 	vect_create_destination_var (scalar_dest,
4525 				     gimple_call_return_type (new_stmt));
4526       new_temp = make_ssa_name (vec_dest, new_stmt);
4527       gimple_call_set_lhs (new_stmt, new_temp);
4528 
4529       if (compute_in_loop)
4530 	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4531       else
4532 	{
4533 	  /* Generate the misalignment computation outside LOOP.  */
4534 	  pe = loop_preheader_edge (loop);
4535 	  new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4536 	  gcc_assert (!new_bb);
4537 	}
4538 
4539       *realignment_token = gimple_call_lhs (new_stmt);
4540 
4541       /* The result of the CALL_EXPR to this builtin is determined from
4542          the value of the parameter and no global variables are touched
4543          which makes the builtin a "const" function.  Requiring the
4544          builtin to have the "const" attribute makes it unnecessary
4545          to call mark_call_clobbered.  */
4546       gcc_assert (TREE_READONLY (builtin_decl));
4547     }
4548 
4549   if (alignment_support_scheme == dr_explicit_realign)
4550     return msq;
4551 
4552   gcc_assert (!compute_in_loop);
4553   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4554 
4555 
4556   /* 5. Create msq = phi <msq_init, lsq> in loop  */
4557 
4558   pe = loop_preheader_edge (containing_loop);
4559   vec_dest = vect_create_destination_var (scalar_dest, vectype);
4560   msq = make_ssa_name (vec_dest, NULL);
4561   phi_stmt = create_phi_node (msq, containing_loop->header);
4562   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4563 
4564   return msq;
4565 }
4566 
4567 
4568 /* Function vect_grouped_load_supported.
4569 
4570    Returns TRUE if even and odd permutations are supported,
4571    and FALSE otherwise.  */
4572 
4573 bool
4574 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4575 {
4576   enum machine_mode mode = TYPE_MODE (vectype);
4577 
4578   /* vect_permute_load_chain requires the group size to be a power of two.  */
4579   if (exact_log2 (count) == -1)
4580     {
4581       if (dump_enabled_p ())
4582 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4583                          "the size of the group of accesses"
4584                          " is not a power of 2");
4585       return false;
4586     }
4587 
4588   /* Check that the permutation is supported.  */
4589   if (VECTOR_MODE_P (mode))
4590     {
4591       unsigned int i, nelt = GET_MODE_NUNITS (mode);
4592       unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4593 
4594       for (i = 0; i < nelt; i++)
4595 	sel[i] = i * 2;
4596       if (can_vec_perm_p (mode, false, sel))
4597 	{
4598 	  for (i = 0; i < nelt; i++)
4599 	    sel[i] = i * 2 + 1;
4600 	  if (can_vec_perm_p (mode, false, sel))
4601 	    return true;
4602 	}
4603     }
4604 
4605   if (dump_enabled_p ())
4606     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4607                      "extract even/odd not supported by target");
4608   return false;
4609 }
4610 
4611 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4612    type VECTYPE.  */
4613 
4614 bool
4615 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4616 {
4617   return vect_lanes_optab_supported_p ("vec_load_lanes",
4618 				       vec_load_lanes_optab,
4619 				       vectype, count);
4620 }
4621 
4622 /* Function vect_permute_load_chain.
4623 
4624    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4625    a power of 2, generate extract_even/odd stmts to reorder the input data
4626    correctly.  Return the final references for loads in RESULT_CHAIN.
4627 
4628    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4629    The input is 4 vectors each containing 8 elements. We assign a number to each
4630    element, the input sequence is:
4631 
4632    1st vec:   0  1  2  3  4  5  6  7
4633    2nd vec:   8  9 10 11 12 13 14 15
4634    3rd vec:  16 17 18 19 20 21 22 23
4635    4th vec:  24 25 26 27 28 29 30 31
4636 
4637    The output sequence should be:
4638 
4639    1st vec:  0 4  8 12 16 20 24 28
4640    2nd vec:  1 5  9 13 17 21 25 29
4641    3rd vec:  2 6 10 14 18 22 26 30
4642    4th vec:  3 7 11 15 19 23 27 31
4643 
4644    i.e., the first output vector should contain the first elements of each
4645    interleaving group, etc.
4646 
4647    We use extract_even/odd instructions to create such output.  The input of
4648    each extract_even/odd operation is two vectors
4649    1st vec    2nd vec
4650    0 1 2 3    4 5 6 7
4651 
4652    and the output is the vector of extracted even/odd elements.  The output of
4653    extract_even will be:   0 2 4 6
4654    and of extract_odd:     1 3 5 7
4655 
4656 
4657    The permutation is done in log LENGTH stages.  In each stage extract_even
4658    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4659    their order.  In our example,
4660 
4661    E1: extract_even (1st vec, 2nd vec)
4662    E2: extract_odd (1st vec, 2nd vec)
4663    E3: extract_even (3rd vec, 4th vec)
4664    E4: extract_odd (3rd vec, 4th vec)
4665 
4666    The output for the first stage will be:
4667 
4668    E1:  0  2  4  6  8 10 12 14
4669    E2:  1  3  5  7  9 11 13 15
4670    E3: 16 18 20 22 24 26 28 30
4671    E4: 17 19 21 23 25 27 29 31
4672 
4673    In order to proceed and create the correct sequence for the next stage (or
4674    for the correct output, if the second stage is the last one, as in our
4675    example), we first put the output of extract_even operation and then the
4676    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4677    The input for the second stage is:
4678 
4679    1st vec (E1):  0  2  4  6  8 10 12 14
4680    2nd vec (E3): 16 18 20 22 24 26 28 30
4681    3rd vec (E2):  1  3  5  7  9 11 13 15
4682    4th vec (E4): 17 19 21 23 25 27 29 31
4683 
4684    The output of the second stage:
4685 
4686    E1: 0 4  8 12 16 20 24 28
4687    E2: 2 6 10 14 18 22 26 30
4688    E3: 1 5  9 13 17 21 25 29
4689    E4: 3 7 11 15 19 23 27 31
4690 
4691    And RESULT_CHAIN after reordering:
4692 
4693    1st vec (E1):  0 4  8 12 16 20 24 28
4694    2nd vec (E3):  1 5  9 13 17 21 25 29
4695    3rd vec (E2):  2 6 10 14 18 22 26 30
4696    4th vec (E4):  3 7 11 15 19 23 27 31.  */
4697 
4698 static void
4699 vect_permute_load_chain (vec<tree> dr_chain,
4700 			 unsigned int length,
4701 			 gimple stmt,
4702 			 gimple_stmt_iterator *gsi,
4703 			 vec<tree> *result_chain)
4704 {
4705   tree data_ref, first_vect, second_vect;
4706   tree perm_mask_even, perm_mask_odd;
4707   gimple perm_stmt;
4708   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4709   unsigned int i, j, log_length = exact_log2 (length);
4710   unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4711   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4712 
4713   result_chain->quick_grow (length);
4714   memcpy (result_chain->address (), dr_chain.address (),
4715 	  length * sizeof (tree));
4716 
4717   for (i = 0; i < nelt; ++i)
4718     sel[i] = i * 2;
4719   perm_mask_even = vect_gen_perm_mask (vectype, sel);
4720   gcc_assert (perm_mask_even != NULL);
4721 
4722   for (i = 0; i < nelt; ++i)
4723     sel[i] = i * 2 + 1;
4724   perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4725   gcc_assert (perm_mask_odd != NULL);
4726 
4727   for (i = 0; i < log_length; i++)
4728     {
4729       for (j = 0; j < length; j += 2)
4730 	{
4731 	  first_vect = dr_chain[j];
4732 	  second_vect = dr_chain[j+1];
4733 
4734 	  /* data_ref = permute_even (first_data_ref, second_data_ref);  */
4735 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4736 	  perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4737 						    first_vect, second_vect,
4738 						    perm_mask_even);
4739 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4740 	  (*result_chain)[j/2] = data_ref;
4741 
4742 	  /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
4743 	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4744 	  perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4745 						    first_vect, second_vect,
4746 						    perm_mask_odd);
4747 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4748 	  (*result_chain)[j/2+length/2] = data_ref;
4749 	}
4750       memcpy (dr_chain.address (), result_chain->address (),
4751 	      length * sizeof (tree));
4752     }
4753 }
4754 
4755 
4756 /* Function vect_transform_grouped_load.
4757 
4758    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4759    to perform their permutation and ascribe the result vectorized statements to
4760    the scalar statements.
4761 */
4762 
4763 void
4764 vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
4765 			     gimple_stmt_iterator *gsi)
4766 {
4767   vec<tree> result_chain = vNULL;
4768 
4769   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4770      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4771      vectors, that are ready for vector computation.  */
4772   result_chain.create (size);
4773   vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4774   vect_record_grouped_load_vectors (stmt, result_chain);
4775   result_chain.release ();
4776 }
4777 
4778 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4779    generated as part of the vectorization of STMT.  Assign the statement
4780    for each vector to the associated scalar statement.  */
4781 
4782 void
4783 vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
4784 {
4785   gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4786   gimple next_stmt, new_stmt;
4787   unsigned int i, gap_count;
4788   tree tmp_data_ref;
4789 
4790   /* Put a permuted data-ref in the VECTORIZED_STMT field.
4791      Since we scan the chain starting from it's first node, their order
4792      corresponds the order of data-refs in RESULT_CHAIN.  */
4793   next_stmt = first_stmt;
4794   gap_count = 1;
4795   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
4796     {
4797       if (!next_stmt)
4798 	break;
4799 
4800       /* Skip the gaps.  Loads created for the gaps will be removed by dead
4801        code elimination pass later.  No need to check for the first stmt in
4802        the group, since it always exists.
4803        GROUP_GAP is the number of steps in elements from the previous
4804        access (if there is no gap GROUP_GAP is 1).  We skip loads that
4805        correspond to the gaps.  */
4806       if (next_stmt != first_stmt
4807           && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4808       {
4809         gap_count++;
4810         continue;
4811       }
4812 
4813       while (next_stmt)
4814         {
4815 	  new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4816 	  /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4817 	     copies, and we put the new vector statement in the first available
4818 	     RELATED_STMT.  */
4819 	  if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4820 	    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4821 	  else
4822             {
4823               if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4824                 {
4825  	          gimple prev_stmt =
4826 		    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4827 	          gimple rel_stmt =
4828 		    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4829 	          while (rel_stmt)
4830 		    {
4831 		      prev_stmt = rel_stmt;
4832 		      rel_stmt =
4833                         STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4834 		    }
4835 
4836   	          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4837                     new_stmt;
4838                 }
4839             }
4840 
4841 	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4842 	  gap_count = 1;
4843 	  /* If NEXT_STMT accesses the same DR as the previous statement,
4844 	     put the same TMP_DATA_REF as its vectorized statement; otherwise
4845 	     get the next data-ref from RESULT_CHAIN.  */
4846 	  if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4847 	    break;
4848         }
4849     }
4850 }
4851 
4852 /* Function vect_force_dr_alignment_p.
4853 
4854    Returns whether the alignment of a DECL can be forced to be aligned
4855    on ALIGNMENT bit boundary.  */
4856 
4857 bool
4858 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4859 {
4860   if (TREE_CODE (decl) != VAR_DECL)
4861     return false;
4862 
4863   /* We cannot change alignment of common or external symbols as another
4864      translation unit may contain a definition with lower alignment.
4865      The rules of common symbol linking mean that the definition
4866      will override the common symbol.  The same is true for constant
4867      pool entries which may be shared and are not properly merged
4868      by LTO.  */
4869   if (DECL_EXTERNAL (decl)
4870       || DECL_COMMON (decl)
4871       || DECL_IN_CONSTANT_POOL (decl))
4872     return false;
4873 
4874   if (TREE_ASM_WRITTEN (decl))
4875     return false;
4876 
4877   /* Do not override the alignment as specified by the ABI when the used
4878      attribute is set.  */
4879   if (DECL_PRESERVE_P (decl))
4880     return false;
4881 
4882   /* Do not override explicit alignment set by the user when an explicit
4883      section name is also used.  This is a common idiom used by many
4884      software projects.  */
4885   if (DECL_SECTION_NAME (decl) != NULL_TREE
4886       && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
4887     return false;
4888 
4889   if (TREE_STATIC (decl))
4890     return (alignment <= MAX_OFILE_ALIGNMENT);
4891   else
4892     return (alignment <= MAX_STACK_ALIGNMENT);
4893 }
4894 
4895 
4896 /* Return whether the data reference DR is supported with respect to its
4897    alignment.
4898    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4899    it is aligned, i.e., check if it is possible to vectorize it with different
4900    alignment.  */
4901 
4902 enum dr_alignment_support
4903 vect_supportable_dr_alignment (struct data_reference *dr,
4904                                bool check_aligned_accesses)
4905 {
4906   gimple stmt = DR_STMT (dr);
4907   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4908   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4909   enum machine_mode mode = TYPE_MODE (vectype);
4910   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4911   struct loop *vect_loop = NULL;
4912   bool nested_in_vect_loop = false;
4913 
4914   if (aligned_access_p (dr) && !check_aligned_accesses)
4915     return dr_aligned;
4916 
4917   if (loop_vinfo)
4918     {
4919       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4920       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4921     }
4922 
4923   /* Possibly unaligned access.  */
4924 
4925   /* We can choose between using the implicit realignment scheme (generating
4926      a misaligned_move stmt) and the explicit realignment scheme (generating
4927      aligned loads with a REALIGN_LOAD).  There are two variants to the
4928      explicit realignment scheme: optimized, and unoptimized.
4929      We can optimize the realignment only if the step between consecutive
4930      vector loads is equal to the vector size.  Since the vector memory
4931      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4932      is guaranteed that the misalignment amount remains the same throughout the
4933      execution of the vectorized loop.  Therefore, we can create the
4934      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4935      at the loop preheader.
4936 
4937      However, in the case of outer-loop vectorization, when vectorizing a
4938      memory access in the inner-loop nested within the LOOP that is now being
4939      vectorized, while it is guaranteed that the misalignment of the
4940      vectorized memory access will remain the same in different outer-loop
4941      iterations, it is *not* guaranteed that is will remain the same throughout
4942      the execution of the inner-loop.  This is because the inner-loop advances
4943      with the original scalar step (and not in steps of VS).  If the inner-loop
4944      step happens to be a multiple of VS, then the misalignment remains fixed
4945      and we can use the optimized realignment scheme.  For example:
4946 
4947       for (i=0; i<N; i++)
4948         for (j=0; j<M; j++)
4949           s += a[i+j];
4950 
4951      When vectorizing the i-loop in the above example, the step between
4952      consecutive vector loads is 1, and so the misalignment does not remain
4953      fixed across the execution of the inner-loop, and the realignment cannot
4954      be optimized (as illustrated in the following pseudo vectorized loop):
4955 
4956       for (i=0; i<N; i+=4)
4957         for (j=0; j<M; j++){
4958           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4959                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
4960                          // (assuming that we start from an aligned address).
4961           }
4962 
4963      We therefore have to use the unoptimized realignment scheme:
4964 
4965       for (i=0; i<N; i+=4)
4966           for (j=k; j<M; j+=4)
4967           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4968                            // that the misalignment of the initial address is
4969                            // 0).
4970 
4971      The loop can then be vectorized as follows:
4972 
4973       for (k=0; k<4; k++){
4974         rt = get_realignment_token (&vp[k]);
4975         for (i=0; i<N; i+=4){
4976           v1 = vp[i+k];
4977           for (j=k; j<M; j+=4){
4978             v2 = vp[i+j+VS-1];
4979             va = REALIGN_LOAD <v1,v2,rt>;
4980             vs += va;
4981             v1 = v2;
4982           }
4983         }
4984     } */
4985 
4986   if (DR_IS_READ (dr))
4987     {
4988       bool is_packed = false;
4989       tree type = (TREE_TYPE (DR_REF (dr)));
4990 
4991       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4992 	  && (!targetm.vectorize.builtin_mask_for_load
4993 	      || targetm.vectorize.builtin_mask_for_load ()))
4994 	{
4995 	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4996 	  if ((nested_in_vect_loop
4997 	       && (TREE_INT_CST_LOW (DR_STEP (dr))
4998 	 	   != GET_MODE_SIZE (TYPE_MODE (vectype))))
4999               || !loop_vinfo)
5000 	    return dr_explicit_realign;
5001 	  else
5002 	    return dr_explicit_realign_optimized;
5003 	}
5004       if (!known_alignment_for_access_p (dr))
5005 	is_packed = not_size_aligned (DR_REF (dr));
5006 
5007       if (targetm.vectorize.
5008 	  support_vector_misalignment (mode, type,
5009 				       DR_MISALIGNMENT (dr), is_packed))
5010 	/* Can't software pipeline the loads, but can at least do them.  */
5011 	return dr_unaligned_supported;
5012     }
5013   else
5014     {
5015       bool is_packed = false;
5016       tree type = (TREE_TYPE (DR_REF (dr)));
5017 
5018       if (!known_alignment_for_access_p (dr))
5019 	is_packed = not_size_aligned (DR_REF (dr));
5020 
5021      if (targetm.vectorize.
5022          support_vector_misalignment (mode, type,
5023 				      DR_MISALIGNMENT (dr), is_packed))
5024        return dr_unaligned_supported;
5025     }
5026 
5027   /* Unsupported.  */
5028   return dr_unaligned_unsupported;
5029 }
5030