xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-data-refs.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3    Free Software Foundation, Inc.
4    Contributed by Dorit Naishlos <dorit@il.ibm.com>
5    and Ira Rosen <irar@il.ibm.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "toplev.h"
41 
42 
43 /* Return the smallest scalar part of STMT.
44    This is used to determine the vectype of the stmt. We generally set the
45    vectype according to the type of the result (lhs). For stmts whose
46    result-type is different than the type of the arguments (e.g., demotion,
47    promotion), vectype will be reset appropriately (later).  Note that we have
48    to visit the smallest datatype in this function, because that determines the
49    VF. If the smallest datatype in the loop is present only as the rhs of a
50    promotion operation - we'd miss it.
51    Such a case, where a variable of this datatype does not appear in the lhs
52    anywhere in the loop, can only occur if it's an invariant: e.g.:
53    'int_x = (int) short_inv', which we'd expect to have been optimized away by
54    invariant motion. However, we cannot rely on invariant motion to always take
55    invariants out of the loop, and so in the case of promotion we also have to
56    check the rhs.
57    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58    types.  */
59 
60 tree
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62                                HOST_WIDE_INT *rhs_size_unit)
63 {
64   tree scalar_type = gimple_expr_type (stmt);
65   HOST_WIDE_INT lhs, rhs;
66 
67   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68 
69   if (is_gimple_assign (stmt)
70       && (gimple_assign_cast_p (stmt)
71           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73     {
74       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75 
76       rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77       if (rhs < lhs)
78         scalar_type = rhs_type;
79     }
80 
81   *lhs_size_unit = lhs;
82   *rhs_size_unit = rhs;
83   return scalar_type;
84 }
85 
86 
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88    from FIRST_STMT. Return -1 if the data-ref is not a part of the chain.  */
89 
90 int
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92 {
93   gimple next_stmt = first_stmt;
94   int result = 0;
95 
96   if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97     return -1;
98 
99   while (next_stmt && next_stmt != stmt)
100     {
101       result++;
102       next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103     }
104 
105   if (next_stmt)
106     return result;
107   else
108     return -1;
109 }
110 
111 
112 /* Function vect_insert_into_interleaving_chain.
113 
114    Insert DRA into the interleaving chain of DRB according to DRA's INIT.  */
115 
116 static void
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 				     struct data_reference *drb)
119 {
120   gimple prev, next;
121   tree next_init;
122   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124 
125   prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126   next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127   while (next)
128     {
129       next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130       if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131 	{
132 	  /* Insert here.  */
133 	  DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 	  DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 	  return;
136 	}
137       prev = next;
138       next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139     }
140 
141   /* We got to the end of the list. Insert here.  */
142   DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143   DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144 }
145 
146 
147 /* Function vect_update_interleaving_chain.
148 
149    For two data-refs DRA and DRB that are a part of a chain interleaved data
150    accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151 
152    There are four possible cases:
153    1. New stmts - both DRA and DRB are not a part of any chain:
154       FIRST_DR = DRB
155       NEXT_DR (DRB) = DRA
156    2. DRB is a part of a chain and DRA is not:
157       no need to update FIRST_DR
158       no need to insert DRB
159       insert DRA according to init
160    3. DRA is a part of a chain and DRB is not:
161       if (init of FIRST_DR > init of DRB)
162           FIRST_DR = DRB
163 	  NEXT(FIRST_DR) = previous FIRST_DR
164       else
165           insert DRB according to its init
166    4. both DRA and DRB are in some interleaving chains:
167       choose the chain with the smallest init of FIRST_DR
168       insert the nodes of the second chain into the first one.  */
169 
170 static void
171 vect_update_interleaving_chain (struct data_reference *drb,
172 				struct data_reference *dra)
173 {
174   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176   tree next_init, init_dra_chain, init_drb_chain;
177   gimple first_a, first_b;
178   tree node_init;
179   gimple node, prev, next, first_stmt;
180 
181   /* 1. New stmts - both DRA and DRB are not a part of any chain.   */
182   if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183     {
184       DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185       DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186       DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187       return;
188     }
189 
190   /* 2. DRB is a part of a chain and DRA is not.  */
191   if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192     {
193       DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194       /* Insert DRA into the chain of DRB.  */
195       vect_insert_into_interleaving_chain (dra, drb);
196       return;
197     }
198 
199   /* 3. DRA is a part of a chain and DRB is not.  */
200   if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201     {
202       gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203       tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 							      old_first_stmt)));
205       gimple tmp;
206 
207       if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208 	{
209 	  /* DRB's init is smaller than the init of the stmt previously marked
210 	     as the first stmt of the interleaving chain of DRA. Therefore, we
211 	     update FIRST_STMT and put DRB in the head of the list.  */
212 	  DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 	  DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
214 
215 	  /* Update all the stmts in the list to point to the new FIRST_STMT.  */
216 	  tmp = old_first_stmt;
217 	  while (tmp)
218 	    {
219 	      DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 	      tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221 	    }
222 	}
223       else
224 	{
225 	  /* Insert DRB in the list of DRA.  */
226 	  vect_insert_into_interleaving_chain (drb, dra);
227 	  DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
228 	}
229       return;
230     }
231 
232   /* 4. both DRA and DRB are in some interleaving chains.  */
233   first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234   first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235   if (first_a == first_b)
236     return;
237   init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238   init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239 
240   if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241     {
242       /* Insert the nodes of DRA chain into the DRB chain.
243 	 After inserting a node, continue from this node of the DRB chain (don't
244          start from the beginning.  */
245       node = DR_GROUP_FIRST_DR (stmtinfo_a);
246       prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247       first_stmt = first_b;
248     }
249   else
250     {
251       /* Insert the nodes of DRB chain into the DRA chain.
252 	 After inserting a node, continue from this node of the DRA chain (don't
253          start from the beginning.  */
254       node = DR_GROUP_FIRST_DR (stmtinfo_b);
255       prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256       first_stmt = first_a;
257     }
258 
259   while (node)
260     {
261       node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262       next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263       while (next)
264 	{
265 	  next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 	  if (tree_int_cst_compare (next_init, node_init) > 0)
267 	    {
268 	      /* Insert here.  */
269 	      DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 	      DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 	      prev = node;
272 	      break;
273 	    }
274 	  prev = next;
275 	  next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276 	}
277       if (!next)
278 	{
279 	  /* We got to the end of the list. Insert here.  */
280 	  DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 	  DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 	  prev = node;
283 	}
284       DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285       node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
286     }
287 }
288 
289 
290 /* Function vect_equal_offsets.
291 
292    Check if OFFSET1 and OFFSET2 are identical expressions.  */
293 
294 static bool
295 vect_equal_offsets (tree offset1, tree offset2)
296 {
297   bool res;
298 
299   STRIP_NOPS (offset1);
300   STRIP_NOPS (offset2);
301 
302   if (offset1 == offset2)
303     return true;
304 
305   if (TREE_CODE (offset1) != TREE_CODE (offset2)
306       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
307     return false;
308 
309   res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
310 			    TREE_OPERAND (offset2, 0));
311 
312   if (!res || !BINARY_CLASS_P (offset1))
313     return res;
314 
315   res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
316 			    TREE_OPERAND (offset2, 1));
317 
318   return res;
319 }
320 
321 
322 /* Function vect_check_interleaving.
323 
324    Check if DRA and DRB are a part of interleaving. In case they are, insert
325    DRA and DRB in an interleaving chain.  */
326 
327 static bool
328 vect_check_interleaving (struct data_reference *dra,
329 			 struct data_reference *drb)
330 {
331   HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
332 
333   /* Check that the data-refs have same first location (except init) and they
334      are both either store or load (not load and store).  */
335   if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
336        && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
337 	   || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
338 	   || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
339 	   != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
340       || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
341       || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
342       || DR_IS_READ (dra) != DR_IS_READ (drb))
343     return false;
344 
345   /* Check:
346      1. data-refs are of the same type
347      2. their steps are equal
348      3. the step (if greater than zero) is greater than the difference between
349         data-refs' inits.  */
350   type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
351   type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
352 
353   if (type_size_a != type_size_b
354       || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
355       || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
356                               TREE_TYPE (DR_REF (drb))))
357     return false;
358 
359   init_a = TREE_INT_CST_LOW (DR_INIT (dra));
360   init_b = TREE_INT_CST_LOW (DR_INIT (drb));
361   step = TREE_INT_CST_LOW (DR_STEP (dra));
362 
363   if (init_a > init_b)
364     {
365       /* If init_a == init_b + the size of the type * k, we have an interleaving,
366 	 and DRB is accessed before DRA.  */
367       diff_mod_size = (init_a - init_b) % type_size_a;
368 
369       if (step && (init_a - init_b) > step)
370          return false;
371 
372       if (diff_mod_size == 0)
373 	{
374 	  vect_update_interleaving_chain (drb, dra);
375 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
376 	    {
377 	      fprintf (vect_dump, "Detected interleaving ");
378 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
379 	      fprintf (vect_dump, " and ");
380 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
381 	    }
382 	  return true;
383 	}
384     }
385   else
386     {
387       /* If init_b == init_a + the size of the type * k, we have an
388 	 interleaving, and DRA is accessed before DRB.  */
389       diff_mod_size = (init_b - init_a) % type_size_a;
390 
391       if (step && (init_b - init_a) > step)
392          return false;
393 
394       if (diff_mod_size == 0)
395 	{
396 	  vect_update_interleaving_chain (dra, drb);
397 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
398 	    {
399 	      fprintf (vect_dump, "Detected interleaving ");
400 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
401 	      fprintf (vect_dump, " and ");
402 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
403 	    }
404 	  return true;
405 	}
406     }
407 
408   return false;
409 }
410 
411 /* Check if data references pointed by DR_I and DR_J are same or
412    belong to same interleaving group.  Return FALSE if drs are
413    different, otherwise return TRUE.  */
414 
415 static bool
416 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
417 {
418   gimple stmt_i = DR_STMT (dr_i);
419   gimple stmt_j = DR_STMT (dr_j);
420 
421   if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
422       || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
423 	    && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
424 	    && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
425 		== DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
426     return true;
427   else
428     return false;
429 }
430 
431 /* If address ranges represented by DDR_I and DDR_J are equal,
432    return TRUE, otherwise return FALSE.  */
433 
434 static bool
435 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
436 {
437   if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
438        && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
439       || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
440 	  && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
441     return true;
442   else
443     return false;
444 }
445 
446 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
447    tested at run-time.  Return TRUE if DDR was successfully inserted.
448    Return false if versioning is not supported.  */
449 
450 static bool
451 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
452 {
453   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
454 
455   if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
456     return false;
457 
458   if (vect_print_dump_info (REPORT_DR_DETAILS))
459     {
460       fprintf (vect_dump, "mark for run-time aliasing test between ");
461       print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
462       fprintf (vect_dump, " and ");
463       print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
464     }
465 
466   if (optimize_loop_nest_for_size_p (loop))
467     {
468       if (vect_print_dump_info (REPORT_DR_DETAILS))
469 	fprintf (vect_dump, "versioning not supported when optimizing for size.");
470       return false;
471     }
472 
473   /* FORNOW: We don't support versioning with outer-loop vectorization.  */
474   if (loop->inner)
475     {
476       if (vect_print_dump_info (REPORT_DR_DETAILS))
477 	fprintf (vect_dump, "versioning not yet supported for outer-loops.");
478       return false;
479     }
480 
481   VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
482   return true;
483 }
484 
485 
486 /* Function vect_analyze_data_ref_dependence.
487 
488    Return TRUE if there (might) exist a dependence between a memory-reference
489    DRA and a memory-reference DRB.  When versioning for alias may check a
490    dependence at run-time, return FALSE.  */
491 
492 static bool
493 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
494                                   loop_vec_info loop_vinfo)
495 {
496   unsigned int i;
497   struct loop *loop = NULL;
498   int vectorization_factor = 0;
499   struct data_reference *dra = DDR_A (ddr);
500   struct data_reference *drb = DDR_B (ddr);
501   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
502   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
503   int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
504   int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
505   lambda_vector dist_v;
506   unsigned int loop_depth;
507 
508   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
509     {
510       /* Independent data accesses.  */
511       vect_check_interleaving (dra, drb);
512       return false;
513     }
514 
515   if (loop_vinfo)
516     {
517       loop = LOOP_VINFO_LOOP (loop_vinfo);
518       vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
519     }
520 
521   if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
522     return false;
523 
524   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
525     {
526       if (loop_vinfo)
527         {
528           if (vect_print_dump_info (REPORT_DR_DETAILS))
529             {
530               fprintf (vect_dump, "versioning for alias required: "
531                                   "can't determine dependence between ");
532               print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
533               fprintf (vect_dump, " and ");
534               print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
535             }
536 
537           /* Add to list of ddrs that need to be tested at run-time.  */
538           return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
539         }
540 
541       /* When vectorizing a basic block unknown depnedence can still mean
542 	 strided access.  */
543       if (vect_check_interleaving (dra, drb))
544          return false;
545 
546       if (vect_print_dump_info (REPORT_DR_DETAILS))
547         {
548           fprintf (vect_dump, "can't determine dependence between ");
549           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
550           fprintf (vect_dump, " and ");
551           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
552         }
553 
554       return true;
555     }
556 
557   /* Versioning for alias is not yet supported for basic block SLP, and
558      dependence distance is unapplicable, hence, in case of known data
559      dependence, basic block vectorization is impossible for now.  */
560   if (!loop_vinfo)
561     {
562       if (dra != drb && vect_check_interleaving (dra, drb))
563         return false;
564 
565       if (vect_print_dump_info (REPORT_DR_DETAILS))
566         {
567           fprintf (vect_dump, "determined dependence between ");
568           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
569           fprintf (vect_dump, " and ");
570           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
571         }
572 
573       return true;
574     }
575 
576   /* Loop-based vectorization and known data dependence.  */
577   if (DDR_NUM_DIST_VECTS (ddr) == 0)
578     {
579       if (vect_print_dump_info (REPORT_DR_DETAILS))
580         {
581           fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
582           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
583           fprintf (vect_dump, " and ");
584           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
585         }
586       /* Add to list of ddrs that need to be tested at run-time.  */
587       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
588     }
589 
590   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
591   for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
592     {
593       int dist = dist_v[loop_depth];
594 
595       if (vect_print_dump_info (REPORT_DR_DETAILS))
596 	fprintf (vect_dump, "dependence distance  = %d.", dist);
597 
598       /* Same loop iteration.  */
599       if (dist % vectorization_factor == 0 && dra_size == drb_size)
600 	{
601 	  /* Two references with distance zero have the same alignment.  */
602 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
603 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
604 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
605 	    fprintf (vect_dump, "accesses have the same alignment.");
606 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
607 	    {
608 	      fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
609 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610 	      fprintf (vect_dump, " and ");
611 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
612 	    }
613 
614           /* For interleaving, mark that there is a read-write dependency if
615              necessary. We check before that one of the data-refs is store.  */
616           if (DR_IS_READ (dra))
617             DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
618 	  else
619             {
620               if (DR_IS_READ (drb))
621                 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
622 	    }
623 
624           continue;
625 	}
626 
627       if (abs (dist) >= vectorization_factor
628           || (dist > 0 && DDR_REVERSED_P (ddr)))
629 	{
630 	  /* Dependence distance does not create dependence, as far as
631 	     vectorization is concerned, in this case. If DDR_REVERSED_P the
632 	     order of the data-refs in DDR was reversed (to make distance
633 	     vector positive), and the actual distance is negative.  */
634 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
635 	    fprintf (vect_dump, "dependence distance >= VF or negative.");
636 	  continue;
637 	}
638 
639       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
640 	{
641 	  fprintf (vect_dump, "not vectorized, possible dependence "
642     		              "between data-refs ");
643 	  print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
644 	  fprintf (vect_dump, " and ");
645 	  print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
646 	}
647 
648       return true;
649     }
650 
651   return false;
652 }
653 
654 /* Function vect_analyze_data_ref_dependences.
655 
656    Examine all the data references in the loop, and make sure there do not
657    exist any data dependences between them.  */
658 
659 bool
660 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
661                                    bb_vec_info bb_vinfo)
662 {
663   unsigned int i;
664   VEC (ddr_p, heap) *ddrs = NULL;
665   struct data_dependence_relation *ddr;
666 
667   if (vect_print_dump_info (REPORT_DETAILS))
668     fprintf (vect_dump, "=== vect_analyze_dependences ===");
669 
670   if (loop_vinfo)
671     ddrs = LOOP_VINFO_DDRS (loop_vinfo);
672   else
673     ddrs = BB_VINFO_DDRS (bb_vinfo);
674 
675   for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
676     if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
677       return false;
678 
679   return true;
680 }
681 
682 
683 /* Function vect_compute_data_ref_alignment
684 
685    Compute the misalignment of the data reference DR.
686 
687    Output:
688    1. If during the misalignment computation it is found that the data reference
689       cannot be vectorized then false is returned.
690    2. DR_MISALIGNMENT (DR) is defined.
691 
692    FOR NOW: No analysis is actually performed. Misalignment is calculated
693    only for trivial cases. TODO.  */
694 
695 static bool
696 vect_compute_data_ref_alignment (struct data_reference *dr)
697 {
698   gimple stmt = DR_STMT (dr);
699   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
700   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
701   struct loop *loop = NULL;
702   tree ref = DR_REF (dr);
703   tree vectype;
704   tree base, base_addr;
705   bool base_aligned;
706   tree misalign;
707   tree aligned_to, alignment;
708 
709   if (vect_print_dump_info (REPORT_DETAILS))
710     fprintf (vect_dump, "vect_compute_data_ref_alignment:");
711 
712   if (loop_vinfo)
713     loop = LOOP_VINFO_LOOP (loop_vinfo);
714 
715   /* Initialize misalignment to unknown.  */
716   SET_DR_MISALIGNMENT (dr, -1);
717 
718   misalign = DR_INIT (dr);
719   aligned_to = DR_ALIGNED_TO (dr);
720   base_addr = DR_BASE_ADDRESS (dr);
721   vectype = STMT_VINFO_VECTYPE (stmt_info);
722 
723   /* In case the dataref is in an inner-loop of the loop that is being
724      vectorized (LOOP), we use the base and misalignment information
725      relative to the outer-loop (LOOP). This is ok only if the misalignment
726      stays the same throughout the execution of the inner-loop, which is why
727      we have to check that the stride of the dataref in the inner-loop evenly
728      divides by the vector size.  */
729   if (loop && nested_in_vect_loop_p (loop, stmt))
730     {
731       tree step = DR_STEP (dr);
732       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
733 
734       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
735         {
736           if (vect_print_dump_info (REPORT_ALIGNMENT))
737             fprintf (vect_dump, "inner step divides the vector-size.");
738 	  misalign = STMT_VINFO_DR_INIT (stmt_info);
739 	  aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
740 	  base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
741         }
742       else
743 	{
744 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
745 	    fprintf (vect_dump, "inner step doesn't divide the vector-size.");
746 	  misalign = NULL_TREE;
747 	}
748     }
749 
750   base = build_fold_indirect_ref (base_addr);
751   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
752 
753   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
754       || !misalign)
755     {
756       if (vect_print_dump_info (REPORT_ALIGNMENT))
757 	{
758 	  fprintf (vect_dump, "Unknown alignment for access: ");
759 	  print_generic_expr (vect_dump, base, TDF_SLIM);
760 	}
761       return true;
762     }
763 
764   if ((DECL_P (base)
765        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
766 				alignment) >= 0)
767       || (TREE_CODE (base_addr) == SSA_NAME
768 	  && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
769 						      TREE_TYPE (base_addr)))),
770 				   alignment) >= 0))
771     base_aligned = true;
772   else
773     base_aligned = false;
774 
775   if (!base_aligned)
776     {
777       /* Do not change the alignment of global variables if
778 	 flag_section_anchors is enabled.  */
779       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
780 	  || (TREE_STATIC (base) && flag_section_anchors))
781 	{
782 	  if (vect_print_dump_info (REPORT_DETAILS))
783 	    {
784 	      fprintf (vect_dump, "can't force alignment of ref: ");
785 	      print_generic_expr (vect_dump, ref, TDF_SLIM);
786 	    }
787 	  return true;
788 	}
789 
790       /* Force the alignment of the decl.
791 	 NOTE: This is the only change to the code we make during
792 	 the analysis phase, before deciding to vectorize the loop.  */
793       if (vect_print_dump_info (REPORT_DETAILS))
794 	fprintf (vect_dump, "force alignment");
795       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
796       DECL_USER_ALIGN (base) = 1;
797     }
798 
799   /* At this point we assume that the base is aligned.  */
800   gcc_assert (base_aligned
801 	      || (TREE_CODE (base) == VAR_DECL
802 		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
803 
804   /* Modulo alignment.  */
805   misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
806 
807   if (!host_integerp (misalign, 1))
808     {
809       /* Negative or overflowed misalignment value.  */
810       if (vect_print_dump_info (REPORT_DETAILS))
811 	fprintf (vect_dump, "unexpected misalign value");
812       return false;
813     }
814 
815   SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
816 
817   if (vect_print_dump_info (REPORT_DETAILS))
818     {
819       fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
820       print_generic_expr (vect_dump, ref, TDF_SLIM);
821     }
822 
823   return true;
824 }
825 
826 
827 /* Function vect_compute_data_refs_alignment
828 
829    Compute the misalignment of data references in the loop.
830    Return FALSE if a data reference is found that cannot be vectorized.  */
831 
832 static bool
833 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
834                                   bb_vec_info bb_vinfo)
835 {
836   VEC (data_reference_p, heap) *datarefs;
837   struct data_reference *dr;
838   unsigned int i;
839 
840   if (loop_vinfo)
841     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
842   else
843     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
844 
845   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
846     if (!vect_compute_data_ref_alignment (dr))
847       return false;
848 
849   return true;
850 }
851 
852 
853 /* Function vect_update_misalignment_for_peel
854 
855    DR - the data reference whose misalignment is to be adjusted.
856    DR_PEEL - the data reference whose misalignment is being made
857              zero in the vector loop by the peel.
858    NPEEL - the number of iterations in the peel loop if the misalignment
859            of DR_PEEL is known at compile time.  */
860 
861 static void
862 vect_update_misalignment_for_peel (struct data_reference *dr,
863                                    struct data_reference *dr_peel, int npeel)
864 {
865   unsigned int i;
866   VEC(dr_p,heap) *same_align_drs;
867   struct data_reference *current_dr;
868   int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
869   int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
870   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
871   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
872 
873  /* For interleaved data accesses the step in the loop must be multiplied by
874      the size of the interleaving group.  */
875   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
876     dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
877   if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
878     dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
879 
880   /* It can be assumed that the data refs with the same alignment as dr_peel
881      are aligned in the vector loop.  */
882   same_align_drs
883     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
884   for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
885     {
886       if (current_dr != dr)
887         continue;
888       gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
889                   DR_MISALIGNMENT (dr_peel) / dr_peel_size);
890       SET_DR_MISALIGNMENT (dr, 0);
891       return;
892     }
893 
894   if (known_alignment_for_access_p (dr)
895       && known_alignment_for_access_p (dr_peel))
896     {
897       int misal = DR_MISALIGNMENT (dr);
898       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
899       misal += npeel * dr_size;
900       misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
901       SET_DR_MISALIGNMENT (dr, misal);
902       return;
903     }
904 
905   if (vect_print_dump_info (REPORT_DETAILS))
906     fprintf (vect_dump, "Setting misalignment to -1.");
907   SET_DR_MISALIGNMENT (dr, -1);
908 }
909 
910 
911 /* Function vect_verify_datarefs_alignment
912 
913    Return TRUE if all data references in the loop can be
914    handled with respect to alignment.  */
915 
916 bool
917 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
918 {
919   VEC (data_reference_p, heap) *datarefs;
920   struct data_reference *dr;
921   enum dr_alignment_support supportable_dr_alignment;
922   unsigned int i;
923 
924   if (loop_vinfo)
925     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
926   else
927     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
928 
929   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
930     {
931       gimple stmt = DR_STMT (dr);
932       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
933 
934       /* For interleaving, only the alignment of the first access matters.  */
935       if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
936           && DR_GROUP_FIRST_DR (stmt_info) != stmt)
937         continue;
938 
939       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
940       if (!supportable_dr_alignment)
941         {
942           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
943             {
944               if (DR_IS_READ (dr))
945                 fprintf (vect_dump,
946                          "not vectorized: unsupported unaligned load.");
947               else
948                 fprintf (vect_dump,
949                          "not vectorized: unsupported unaligned store.");
950             }
951           return false;
952         }
953       if (supportable_dr_alignment != dr_aligned
954           && vect_print_dump_info (REPORT_ALIGNMENT))
955         fprintf (vect_dump, "Vectorizing an unaligned access.");
956     }
957   return true;
958 }
959 
960 
961 /* Function vector_alignment_reachable_p
962 
963    Return true if vector alignment for DR is reachable by peeling
964    a few loop iterations.  Return false otherwise.  */
965 
966 static bool
967 vector_alignment_reachable_p (struct data_reference *dr)
968 {
969   gimple stmt = DR_STMT (dr);
970   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
971   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
972 
973   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
974     {
975       /* For interleaved access we peel only if number of iterations in
976 	 the prolog loop ({VF - misalignment}), is a multiple of the
977 	 number of the interleaved accesses.  */
978       int elem_size, mis_in_elements;
979       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
980 
981       /* FORNOW: handle only known alignment.  */
982       if (!known_alignment_for_access_p (dr))
983 	return false;
984 
985       elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
986       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
987 
988       if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
989 	return false;
990     }
991 
992   /* If misalignment is known at the compile time then allow peeling
993      only if natural alignment is reachable through peeling.  */
994   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
995     {
996       HOST_WIDE_INT elmsize =
997 		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
998       if (vect_print_dump_info (REPORT_DETAILS))
999 	{
1000 	  fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1001 	  fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1002 	}
1003       if (DR_MISALIGNMENT (dr) % elmsize)
1004 	{
1005 	  if (vect_print_dump_info (REPORT_DETAILS))
1006 	    fprintf (vect_dump, "data size does not divide the misalignment.\n");
1007 	  return false;
1008 	}
1009     }
1010 
1011   if (!known_alignment_for_access_p (dr))
1012     {
1013       tree type = (TREE_TYPE (DR_REF (dr)));
1014       tree ba = DR_BASE_OBJECT (dr);
1015       bool is_packed = false;
1016 
1017       if (ba)
1018 	is_packed = contains_packed_reference (ba);
1019 
1020       if (vect_print_dump_info (REPORT_DETAILS))
1021 	fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1022       if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1023 	return true;
1024       else
1025 	return false;
1026     }
1027 
1028   return true;
1029 }
1030 
1031 /* Function vect_enhance_data_refs_alignment
1032 
1033    This pass will use loop versioning and loop peeling in order to enhance
1034    the alignment of data references in the loop.
1035 
1036    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1037    original loop is to be vectorized; Any other loops that are created by
1038    the transformations performed in this pass - are not supposed to be
1039    vectorized. This restriction will be relaxed.
1040 
1041    This pass will require a cost model to guide it whether to apply peeling
1042    or versioning or a combination of the two. For example, the scheme that
1043    intel uses when given a loop with several memory accesses, is as follows:
1044    choose one memory access ('p') which alignment you want to force by doing
1045    peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1046    other accesses are not necessarily aligned, or (2) use loop versioning to
1047    generate one loop in which all accesses are aligned, and another loop in
1048    which only 'p' is necessarily aligned.
1049 
1050    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1051    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1052    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1053 
1054    Devising a cost model is the most critical aspect of this work. It will
1055    guide us on which access to peel for, whether to use loop versioning, how
1056    many versions to create, etc. The cost model will probably consist of
1057    generic considerations as well as target specific considerations (on
1058    powerpc for example, misaligned stores are more painful than misaligned
1059    loads).
1060 
1061    Here are the general steps involved in alignment enhancements:
1062 
1063      -- original loop, before alignment analysis:
1064 	for (i=0; i<N; i++){
1065 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
1066 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1067 	}
1068 
1069      -- After vect_compute_data_refs_alignment:
1070 	for (i=0; i<N; i++){
1071 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1072 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1073 	}
1074 
1075      -- Possibility 1: we do loop versioning:
1076      if (p is aligned) {
1077 	for (i=0; i<N; i++){	# loop 1A
1078 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1079 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1080 	}
1081      }
1082      else {
1083 	for (i=0; i<N; i++){	# loop 1B
1084 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1085 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1086 	}
1087      }
1088 
1089      -- Possibility 2: we do loop peeling:
1090      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1091 	x = q[i];
1092 	p[i] = y;
1093      }
1094      for (i = 3; i < N; i++){	# loop 2A
1095 	x = q[i];			# DR_MISALIGNMENT(q) = 0
1096 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1097      }
1098 
1099      -- Possibility 3: combination of loop peeling and versioning:
1100      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1101 	x = q[i];
1102 	p[i] = y;
1103      }
1104      if (p is aligned) {
1105 	for (i = 3; i<N; i++){	# loop 3A
1106 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1107 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1108 	}
1109      }
1110      else {
1111 	for (i = 3; i<N; i++){	# loop 3B
1112 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1113 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1114 	}
1115      }
1116 
1117      These loops are later passed to loop_transform to be vectorized. The
1118      vectorizer will use the alignment information to guide the transformation
1119      (whether to generate regular loads/stores, or with special handling for
1120      misalignment).  */
1121 
1122 bool
1123 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1124 {
1125   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1126   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1127   enum dr_alignment_support supportable_dr_alignment;
1128   struct data_reference *dr0 = NULL;
1129   struct data_reference *dr;
1130   unsigned int i;
1131   bool do_peeling = false;
1132   bool do_versioning = false;
1133   bool stat;
1134   gimple stmt;
1135   stmt_vec_info stmt_info;
1136   int vect_versioning_for_alias_required;
1137 
1138   if (vect_print_dump_info (REPORT_DETAILS))
1139     fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1140 
1141   /* While cost model enhancements are expected in the future, the high level
1142      view of the code at this time is as follows:
1143 
1144      A) If there is a misaligned access then see if peeling to align
1145         this access can make all data references satisfy
1146         vect_supportable_dr_alignment.  If so, update data structures
1147         as needed and return true.
1148 
1149      B) If peeling wasn't possible and there is a data reference with an
1150         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1151         then see if loop versioning checks can be used to make all data
1152         references satisfy vect_supportable_dr_alignment.  If so, update
1153         data structures as needed and return true.
1154 
1155      C) If neither peeling nor versioning were successful then return false if
1156         any data reference does not satisfy vect_supportable_dr_alignment.
1157 
1158      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1159 
1160      Note, Possibility 3 above (which is peeling and versioning together) is not
1161      being done at this time.  */
1162 
1163   /* (1) Peeling to force alignment.  */
1164 
1165   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1166      Considerations:
1167      + How many accesses will become aligned due to the peeling
1168      - How many accesses will become unaligned due to the peeling,
1169        and the cost of misaligned accesses.
1170      - The cost of peeling (the extra runtime checks, the increase
1171        in code size).
1172 
1173      The scheme we use FORNOW: peel to force the alignment of the first
1174      unsupported misaligned access in the loop.
1175 
1176      TODO: Use a cost model.  */
1177 
1178   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1179     {
1180       stmt = DR_STMT (dr);
1181       stmt_info = vinfo_for_stmt (stmt);
1182 
1183       /* For interleaving, only the alignment of the first access
1184          matters.  */
1185       if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1186           && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1187         continue;
1188 
1189       if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1190         {
1191 	  do_peeling = vector_alignment_reachable_p (dr);
1192 	  if (do_peeling)
1193 	    dr0 = dr;
1194 	  if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1195             fprintf (vect_dump, "vector alignment may not be reachable");
1196 	  break;
1197 	}
1198     }
1199 
1200   vect_versioning_for_alias_required
1201     = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1202 
1203   /* Temporarily, if versioning for alias is required, we disable peeling
1204      until we support peeling and versioning.  Often peeling for alignment
1205      will require peeling for loop-bound, which in turn requires that we
1206      know how to adjust the loop ivs after the loop.  */
1207   if (vect_versioning_for_alias_required
1208       || !vect_can_advance_ivs_p (loop_vinfo)
1209       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1210     do_peeling = false;
1211 
1212   if (do_peeling)
1213     {
1214       int mis;
1215       int npeel = 0;
1216       gimple stmt = DR_STMT (dr0);
1217       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1219       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1220 
1221       if (known_alignment_for_access_p (dr0))
1222         {
1223           /* Since it's known at compile time, compute the number of iterations
1224              in the peeled loop (the peeling factor) for use in updating
1225              DR_MISALIGNMENT values.  The peeling factor is the vectorization
1226              factor minus the misalignment as an element count.  */
1227           mis = DR_MISALIGNMENT (dr0);
1228           mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1229           npeel = nelements - mis;
1230 
1231 	  /* For interleaved data access every iteration accesses all the
1232 	     members of the group, therefore we divide the number of iterations
1233 	     by the group size.  */
1234 	  stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1235 	  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1236 	    npeel /= DR_GROUP_SIZE (stmt_info);
1237 
1238           if (vect_print_dump_info (REPORT_DETAILS))
1239             fprintf (vect_dump, "Try peeling by %d", npeel);
1240         }
1241 
1242       /* Ensure that all data refs can be vectorized after the peel.  */
1243       for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1244         {
1245           int save_misalignment;
1246 
1247 	  if (dr == dr0)
1248 	    continue;
1249 
1250 	  stmt = DR_STMT (dr);
1251 	  stmt_info = vinfo_for_stmt (stmt);
1252 	  /* For interleaving, only the alignment of the first access
1253             matters.  */
1254 	  if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1255 	      && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1256 	    continue;
1257 
1258 	  save_misalignment = DR_MISALIGNMENT (dr);
1259 	  vect_update_misalignment_for_peel (dr, dr0, npeel);
1260 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1261 	  SET_DR_MISALIGNMENT (dr, save_misalignment);
1262 
1263 	  if (!supportable_dr_alignment)
1264 	    {
1265 	      do_peeling = false;
1266 	      break;
1267 	    }
1268 	}
1269 
1270       if (do_peeling)
1271         {
1272           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1273              If the misalignment of DR_i is identical to that of dr0 then set
1274              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1275              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1276              by the peeling factor times the element size of DR_i (MOD the
1277              vectorization factor times the size).  Otherwise, the
1278              misalignment of DR_i must be set to unknown.  */
1279 	  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1280 	    if (dr != dr0)
1281 	      vect_update_misalignment_for_peel (dr, dr0, npeel);
1282 
1283           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1284           LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1285 	  SET_DR_MISALIGNMENT (dr0, 0);
1286 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1287             fprintf (vect_dump, "Alignment of access forced using peeling.");
1288 
1289           if (vect_print_dump_info (REPORT_DETAILS))
1290             fprintf (vect_dump, "Peeling for alignment will be applied.");
1291 
1292 	  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1293 	  gcc_assert (stat);
1294           return stat;
1295         }
1296     }
1297 
1298 
1299   /* (2) Versioning to force alignment.  */
1300 
1301   /* Try versioning if:
1302      1) flag_tree_vect_loop_version is TRUE
1303      2) optimize loop for speed
1304      3) there is at least one unsupported misaligned data ref with an unknown
1305         misalignment, and
1306      4) all misaligned data refs with a known misalignment are supported, and
1307      5) the number of runtime alignment checks is within reason.  */
1308 
1309   do_versioning =
1310 	flag_tree_vect_loop_version
1311 	&& optimize_loop_nest_for_speed_p (loop)
1312 	&& (!loop->inner); /* FORNOW */
1313 
1314   if (do_versioning)
1315     {
1316       for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1317         {
1318 	  stmt = DR_STMT (dr);
1319 	  stmt_info = vinfo_for_stmt (stmt);
1320 
1321 	  /* For interleaving, only the alignment of the first access
1322 	     matters.  */
1323 	  if (aligned_access_p (dr)
1324 	      || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1325 		  && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1326 	    continue;
1327 
1328 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1329 
1330           if (!supportable_dr_alignment)
1331             {
1332               gimple stmt;
1333               int mask;
1334               tree vectype;
1335 
1336               if (known_alignment_for_access_p (dr)
1337                   || VEC_length (gimple,
1338                                  LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1339                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1340                 {
1341                   do_versioning = false;
1342                   break;
1343                 }
1344 
1345               stmt = DR_STMT (dr);
1346               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1347               gcc_assert (vectype);
1348 
1349               /* The rightmost bits of an aligned address must be zeros.
1350                  Construct the mask needed for this test.  For example,
1351                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1352                  mask must be 15 = 0xf. */
1353               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1354 
1355               /* FORNOW: use the same mask to test all potentially unaligned
1356                  references in the loop.  The vectorizer currently supports
1357                  a single vector size, see the reference to
1358                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1359                  vectorization factor is computed.  */
1360               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1361                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1362               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1363               VEC_safe_push (gimple, heap,
1364                              LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1365                              DR_STMT (dr));
1366             }
1367         }
1368 
1369       /* Versioning requires at least one misaligned data reference.  */
1370       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1371         do_versioning = false;
1372       else if (!do_versioning)
1373         VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1374     }
1375 
1376   if (do_versioning)
1377     {
1378       VEC(gimple,heap) *may_misalign_stmts
1379         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1380       gimple stmt;
1381 
1382       /* It can now be assumed that the data references in the statements
1383          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1384          of the loop being vectorized.  */
1385       for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1386         {
1387           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1388           dr = STMT_VINFO_DATA_REF (stmt_info);
1389 	  SET_DR_MISALIGNMENT (dr, 0);
1390 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1391             fprintf (vect_dump, "Alignment of access forced using versioning.");
1392         }
1393 
1394       if (vect_print_dump_info (REPORT_DETAILS))
1395         fprintf (vect_dump, "Versioning for alignment will be applied.");
1396 
1397       /* Peeling and versioning can't be done together at this time.  */
1398       gcc_assert (! (do_peeling && do_versioning));
1399 
1400       stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1401       gcc_assert (stat);
1402       return stat;
1403     }
1404 
1405   /* This point is reached if neither peeling nor versioning is being done.  */
1406   gcc_assert (! (do_peeling || do_versioning));
1407 
1408   stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1409   return stat;
1410 }
1411 
1412 
1413 /* Function vect_analyze_data_refs_alignment
1414 
1415    Analyze the alignment of the data-references in the loop.
1416    Return FALSE if a data reference is found that cannot be vectorized.  */
1417 
1418 bool
1419 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1420                                   bb_vec_info bb_vinfo)
1421 {
1422   if (vect_print_dump_info (REPORT_DETAILS))
1423     fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1424 
1425   if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1426     {
1427       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1428 	fprintf (vect_dump,
1429 		 "not vectorized: can't calculate alignment for data ref.");
1430       return false;
1431     }
1432 
1433   return true;
1434 }
1435 
1436 
1437 /* Analyze groups of strided accesses: check that DR belongs to a group of
1438    strided accesses of legal size, step, etc. Detect gaps, single element
1439    interleaving, and other special cases. Set strided access info.
1440    Collect groups of strided stores for further use in SLP analysis.  */
1441 
1442 static bool
1443 vect_analyze_group_access (struct data_reference *dr)
1444 {
1445   tree step = DR_STEP (dr);
1446   tree scalar_type = TREE_TYPE (DR_REF (dr));
1447   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1448   gimple stmt = DR_STMT (dr);
1449   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1450   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1451   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1452   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1453   HOST_WIDE_INT stride, last_accessed_element = 1;
1454   bool slp_impossible = false;
1455 
1456   /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1457      interleaving group (including gaps).  */
1458   stride = dr_step / type_size;
1459 
1460   /* Not consecutive access is possible only if it is a part of interleaving.  */
1461   if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1462     {
1463       /* Check if it this DR is a part of interleaving, and is a single
1464 	 element of the group that is accessed in the loop.  */
1465 
1466       /* Gaps are supported only for loads. STEP must be a multiple of the type
1467 	 size.  The size of the group must be a power of 2.  */
1468       if (DR_IS_READ (dr)
1469 	  && (dr_step % type_size) == 0
1470 	  && stride > 0
1471 	  && exact_log2 (stride) != -1)
1472 	{
1473 	  DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1474 	  DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1475 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
1476 	    {
1477 	      fprintf (vect_dump, "Detected single element interleaving ");
1478 	      print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1479 	      fprintf (vect_dump, " step ");
1480 	      print_generic_expr (vect_dump, step, TDF_SLIM);
1481 	    }
1482 
1483 	  if (loop_vinfo)
1484 	    {
1485 	      LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1486 
1487 	      if (vect_print_dump_info (REPORT_DETAILS))
1488 		fprintf (vect_dump, "Data access with gaps requires scalar "
1489 				    "epilogue loop");
1490 	    }
1491 
1492 	  return true;
1493 	}
1494       if (vect_print_dump_info (REPORT_DETAILS))
1495 	fprintf (vect_dump, "not consecutive access");
1496       return false;
1497     }
1498 
1499   if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1500     {
1501       /* First stmt in the interleaving chain. Check the chain.  */
1502       gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1503       struct data_reference *data_ref = dr;
1504       unsigned int count = 1;
1505       tree next_step;
1506       tree prev_init = DR_INIT (data_ref);
1507       gimple prev = stmt;
1508       HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1509 
1510       while (next)
1511         {
1512           /* Skip same data-refs. In case that two or more stmts share data-ref
1513              (supported only for loads), we vectorize only the first stmt, and
1514              the rest get their vectorized loads from the first one.  */
1515           if (!tree_int_cst_compare (DR_INIT (data_ref),
1516                                      DR_INIT (STMT_VINFO_DATA_REF (
1517 						   vinfo_for_stmt (next)))))
1518             {
1519               if (!DR_IS_READ (data_ref))
1520                 {
1521                   if (vect_print_dump_info (REPORT_DETAILS))
1522                     fprintf (vect_dump, "Two store stmts share the same dr.");
1523                   return false;
1524                 }
1525 
1526               /* Check that there is no load-store dependencies for this loads
1527                  to prevent a case of load-store-load to the same location.  */
1528               if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1529                   || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1530                 {
1531                   if (vect_print_dump_info (REPORT_DETAILS))
1532                     fprintf (vect_dump,
1533                              "READ_WRITE dependence in interleaving.");
1534                   return false;
1535                 }
1536 
1537               /* For load use the same data-ref load.  */
1538               DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1539 
1540               prev = next;
1541               next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1542               continue;
1543             }
1544 
1545           prev = next;
1546 
1547           /* Check that all the accesses have the same STEP.  */
1548           next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1549           if (tree_int_cst_compare (step, next_step))
1550             {
1551               if (vect_print_dump_info (REPORT_DETAILS))
1552                 fprintf (vect_dump, "not consecutive access in interleaving");
1553               return false;
1554             }
1555 
1556           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1557           /* Check that the distance between two accesses is equal to the type
1558              size. Otherwise, we have gaps.  */
1559           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1560                   - TREE_INT_CST_LOW (prev_init)) / type_size;
1561 	  if (diff != 1)
1562 	    {
1563 	      /* FORNOW: SLP of accesses with gaps is not supported.  */
1564 	      slp_impossible = true;
1565 	      if (!DR_IS_READ (data_ref))
1566 		{
1567 		  if (vect_print_dump_info (REPORT_DETAILS))
1568 		    fprintf (vect_dump, "interleaved store with gaps");
1569 		  return false;
1570 		}
1571 
1572               gaps += diff - 1;
1573 	    }
1574 
1575 	  last_accessed_element += diff;
1576 
1577           /* Store the gap from the previous member of the group. If there is no
1578              gap in the access, DR_GROUP_GAP is always 1.  */
1579           DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1580 
1581           prev_init = DR_INIT (data_ref);
1582           next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1583           /* Count the number of data-refs in the chain.  */
1584           count++;
1585         }
1586 
1587       /* COUNT is the number of accesses found, we multiply it by the size of
1588          the type to get COUNT_IN_BYTES.  */
1589       count_in_bytes = type_size * count;
1590 
1591       /* Check that the size of the interleaving (including gaps) is not
1592          greater than STEP.  */
1593       if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1594         {
1595           if (vect_print_dump_info (REPORT_DETAILS))
1596             {
1597               fprintf (vect_dump, "interleaving size is greater than step for ");
1598               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1599             }
1600           return false;
1601         }
1602 
1603       /* Check that the size of the interleaving is equal to STEP for stores,
1604          i.e., that there are no gaps.  */
1605       if (dr_step && dr_step != count_in_bytes)
1606         {
1607           if (DR_IS_READ (dr))
1608             {
1609               slp_impossible = true;
1610               /* There is a gap after the last load in the group. This gap is a
1611                  difference between the stride and the number of elements. When
1612                  there is no gap, this difference should be 0.  */
1613               DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1614             }
1615           else
1616             {
1617               if (vect_print_dump_info (REPORT_DETAILS))
1618                 fprintf (vect_dump, "interleaved store with gaps");
1619               return false;
1620             }
1621         }
1622 
1623       /* Check that STEP is a multiple of type size.  */
1624       if (dr_step && (dr_step % type_size) != 0)
1625         {
1626           if (vect_print_dump_info (REPORT_DETAILS))
1627             {
1628               fprintf (vect_dump, "step is not a multiple of type size: step ");
1629               print_generic_expr (vect_dump, step, TDF_SLIM);
1630               fprintf (vect_dump, " size ");
1631               print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1632                                   TDF_SLIM);
1633             }
1634           return false;
1635         }
1636 
1637       /* FORNOW: we handle only interleaving that is a power of 2.
1638          We don't fail here if it may be still possible to vectorize the
1639          group using SLP. If not, the size of the group will be checked in
1640          vect_analyze_operations, and the vectorization will fail.  */
1641       if (exact_log2 (stride) == -1)
1642 	{
1643 	  if (vect_print_dump_info (REPORT_DETAILS))
1644 	    fprintf (vect_dump, "interleaving is not a power of 2");
1645 
1646 	  if (slp_impossible)
1647 	    return false;
1648 	}
1649 
1650       if (stride == 0)
1651         stride = count;
1652 
1653       DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1654       if (vect_print_dump_info (REPORT_DETAILS))
1655         fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1656 
1657       /* SLP: create an SLP data structure for every interleaving group of
1658 	 stores for further analysis in vect_analyse_slp.  */
1659       if (!DR_IS_READ (dr) && !slp_impossible)
1660         {
1661           if (loop_vinfo)
1662             VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1663                            stmt);
1664           if (bb_vinfo)
1665             VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1666                            stmt);
1667         }
1668 
1669       /* There is a gap in the end of the group.  */
1670       if (stride - last_accessed_element > 0 && loop_vinfo)
1671 	{
1672 	  LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1673 	  if (vect_print_dump_info (REPORT_DETAILS))
1674 	    fprintf (vect_dump, "Data access with gaps requires scalar "
1675 				"epilogue loop");
1676 	}
1677     }
1678 
1679   return true;
1680 }
1681 
1682 
1683 /* Analyze the access pattern of the data-reference DR.
1684    In case of non-consecutive accesses call vect_analyze_group_access() to
1685    analyze groups of strided accesses.  */
1686 
1687 static bool
1688 vect_analyze_data_ref_access (struct data_reference *dr)
1689 {
1690   tree step = DR_STEP (dr);
1691   tree scalar_type = TREE_TYPE (DR_REF (dr));
1692   gimple stmt = DR_STMT (dr);
1693   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1694   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1695   struct loop *loop = NULL;
1696   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1697 
1698   if (loop_vinfo)
1699     loop = LOOP_VINFO_LOOP (loop_vinfo);
1700 
1701   if (loop_vinfo && !step)
1702     {
1703       if (vect_print_dump_info (REPORT_DETAILS))
1704 	fprintf (vect_dump, "bad data-ref access in loop");
1705       return false;
1706     }
1707 
1708   /* Don't allow invariant accesses in loops.  */
1709   if (loop_vinfo && dr_step == 0)
1710     return false;
1711 
1712   if (loop && nested_in_vect_loop_p (loop, stmt))
1713     {
1714       /* Interleaved accesses are not yet supported within outer-loop
1715         vectorization for references in the inner-loop.  */
1716       DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1717 
1718       /* For the rest of the analysis we use the outer-loop step.  */
1719       step = STMT_VINFO_DR_STEP (stmt_info);
1720       dr_step = TREE_INT_CST_LOW (step);
1721 
1722       if (dr_step == 0)
1723 	{
1724 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1725 	    fprintf (vect_dump, "zero step in outer loop.");
1726 	  if (DR_IS_READ (dr))
1727   	    return true;
1728 	  else
1729 	    return false;
1730 	}
1731     }
1732 
1733   /* Consecutive?  */
1734   if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1735     {
1736       /* Mark that it is not interleaving.  */
1737       DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1738       return true;
1739     }
1740 
1741   if (loop && nested_in_vect_loop_p (loop, stmt))
1742     {
1743       if (vect_print_dump_info (REPORT_ALIGNMENT))
1744 	fprintf (vect_dump, "strided access in outer loop.");
1745       return false;
1746     }
1747 
1748   /* Not consecutive access - check if it's a part of interleaving group.  */
1749   return vect_analyze_group_access (dr);
1750 }
1751 
1752 
1753 /* Function vect_analyze_data_ref_accesses.
1754 
1755    Analyze the access pattern of all the data references in the loop.
1756 
1757    FORNOW: the only access pattern that is considered vectorizable is a
1758 	   simple step 1 (consecutive) access.
1759 
1760    FORNOW: handle only arrays and pointer accesses.  */
1761 
1762 bool
1763 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1764 {
1765   unsigned int i;
1766   VEC (data_reference_p, heap) *datarefs;
1767   struct data_reference *dr;
1768 
1769   if (vect_print_dump_info (REPORT_DETAILS))
1770     fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1771 
1772   if (loop_vinfo)
1773     datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1774   else
1775     datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1776 
1777   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1778     if (!vect_analyze_data_ref_access (dr))
1779       {
1780 	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1781 	  fprintf (vect_dump, "not vectorized: complicated access pattern.");
1782 	return false;
1783       }
1784 
1785   return true;
1786 }
1787 
1788 /* Function vect_prune_runtime_alias_test_list.
1789 
1790    Prune a list of ddrs to be tested at run-time by versioning for alias.
1791    Return FALSE if resulting list of ddrs is longer then allowed by
1792    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
1793 
1794 bool
1795 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1796 {
1797   VEC (ddr_p, heap) * ddrs =
1798     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1799   unsigned i, j;
1800 
1801   if (vect_print_dump_info (REPORT_DETAILS))
1802     fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1803 
1804   for (i = 0; i < VEC_length (ddr_p, ddrs); )
1805     {
1806       bool found;
1807       ddr_p ddr_i;
1808 
1809       ddr_i = VEC_index (ddr_p, ddrs, i);
1810       found = false;
1811 
1812       for (j = 0; j < i; j++)
1813         {
1814 	  ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1815 
1816 	  if (vect_vfa_range_equal (ddr_i, ddr_j))
1817 	    {
1818 	      if (vect_print_dump_info (REPORT_DR_DETAILS))
1819 		{
1820 		  fprintf (vect_dump, "found equal ranges ");
1821 		  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1822 		  fprintf (vect_dump, ", ");
1823 		  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1824 		  fprintf (vect_dump, " and ");
1825 		  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1826 		  fprintf (vect_dump, ", ");
1827 		  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1828 		}
1829 	      found = true;
1830 	      break;
1831 	    }
1832 	}
1833 
1834       if (found)
1835       {
1836 	VEC_ordered_remove (ddr_p, ddrs, i);
1837 	continue;
1838       }
1839       i++;
1840     }
1841 
1842   if (VEC_length (ddr_p, ddrs) >
1843        (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1844     {
1845       if (vect_print_dump_info (REPORT_DR_DETAILS))
1846 	{
1847 	  fprintf (vect_dump,
1848 		   "disable versioning for alias - max number of generated "
1849 		   "checks exceeded.");
1850 	}
1851 
1852       VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1853 
1854       return false;
1855     }
1856 
1857   return true;
1858 }
1859 
1860 
1861 /* Function vect_analyze_data_refs.
1862 
1863   Find all the data references in the loop or basic block.
1864 
1865    The general structure of the analysis of data refs in the vectorizer is as
1866    follows:
1867    1- vect_analyze_data_refs(loop/bb): call
1868       compute_data_dependences_for_loop/bb to find and analyze all data-refs
1869       in the loop/bb and their dependences.
1870    2- vect_analyze_dependences(): apply dependence testing using ddrs.
1871    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1872    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1873 
1874 */
1875 
1876 bool
1877 vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1878 {
1879   struct loop *loop = NULL;
1880   basic_block bb = NULL;
1881   unsigned int i;
1882   VEC (data_reference_p, heap) *datarefs;
1883   struct data_reference *dr;
1884   tree scalar_type;
1885   bool res;
1886 
1887   if (vect_print_dump_info (REPORT_DETAILS))
1888     fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1889 
1890   if (loop_vinfo)
1891     {
1892       loop = LOOP_VINFO_LOOP (loop_vinfo);
1893       res = compute_data_dependences_for_loop
1894 	(loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
1895 	 &LOOP_VINFO_DDRS (loop_vinfo));
1896 
1897       if (!res)
1898 	{
1899 	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1900 	    fprintf (vect_dump, "not vectorized: loop contains function calls"
1901 		     " or data references that cannot be analyzed");
1902 	  return false;
1903 	}
1904 
1905       datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1906     }
1907   else
1908     {
1909       bb = BB_VINFO_BB (bb_vinfo);
1910       res = compute_data_dependences_for_bb (bb, true,
1911 					     &BB_VINFO_DATAREFS (bb_vinfo),
1912 					     &BB_VINFO_DDRS (bb_vinfo));
1913       if (!res)
1914 	{
1915 	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1916 	    fprintf (vect_dump, "not vectorized: basic block contains function"
1917 		     " calls or data references that cannot be analyzed");
1918 	  return false;
1919 	}
1920 
1921       datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1922     }
1923 
1924   /* Go through the data-refs, check that the analysis succeeded. Update pointer
1925      from stmt_vec_info struct to DR and vectype.  */
1926 
1927   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1928     {
1929       gimple stmt;
1930       stmt_vec_info stmt_info;
1931       tree base, offset, init;
1932 
1933       if (!dr || !DR_REF (dr))
1934         {
1935           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1936 	    fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1937           return false;
1938         }
1939 
1940       stmt = DR_STMT (dr);
1941       stmt_info = vinfo_for_stmt (stmt);
1942 
1943       /* Check that analysis of the data-ref succeeded.  */
1944       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1945           || !DR_STEP (dr))
1946         {
1947           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1948             {
1949               fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1950               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1951             }
1952           return false;
1953         }
1954 
1955       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1956         {
1957           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1958             fprintf (vect_dump, "not vectorized: base addr of dr is a "
1959                      "constant");
1960           return false;
1961         }
1962 
1963       base = unshare_expr (DR_BASE_ADDRESS (dr));
1964       offset = unshare_expr (DR_OFFSET (dr));
1965       init = unshare_expr (DR_INIT (dr));
1966 
1967       /* Update DR field in stmt_vec_info struct.  */
1968 
1969       /* If the dataref is in an inner-loop of the loop that is considered for
1970 	 for vectorization, we also want to analyze the access relative to
1971 	 the outer-loop (DR contains information only relative to the
1972 	 inner-most enclosing loop).  We do that by building a reference to the
1973 	 first location accessed by the inner-loop, and analyze it relative to
1974 	 the outer-loop.  */
1975       if (loop && nested_in_vect_loop_p (loop, stmt))
1976 	{
1977 	  tree outer_step, outer_base, outer_init;
1978 	  HOST_WIDE_INT pbitsize, pbitpos;
1979 	  tree poffset;
1980 	  enum machine_mode pmode;
1981 	  int punsignedp, pvolatilep;
1982 	  affine_iv base_iv, offset_iv;
1983 	  tree dinit;
1984 
1985 	  /* Build a reference to the first location accessed by the
1986 	     inner-loop: *(BASE+INIT). (The first location is actually
1987 	     BASE+INIT+OFFSET, but we add OFFSET separately later).  */
1988           tree inner_base = build_fold_indirect_ref
1989                                 (fold_build2 (POINTER_PLUS_EXPR,
1990                                               TREE_TYPE (base), base,
1991                                               fold_convert (sizetype, init)));
1992 
1993 	  if (vect_print_dump_info (REPORT_DETAILS))
1994 	    {
1995 	      fprintf (vect_dump, "analyze in outer-loop: ");
1996 	      print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1997 	    }
1998 
1999 	  outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2000 		          &poffset, &pmode, &punsignedp, &pvolatilep, false);
2001 	  gcc_assert (outer_base != NULL_TREE);
2002 
2003 	  if (pbitpos % BITS_PER_UNIT != 0)
2004 	    {
2005 	      if (vect_print_dump_info (REPORT_DETAILS))
2006 		fprintf (vect_dump, "failed: bit offset alignment.\n");
2007 	      return false;
2008 	    }
2009 
2010 	  outer_base = build_fold_addr_expr (outer_base);
2011 	  if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2012                           &base_iv, false))
2013 	    {
2014 	      if (vect_print_dump_info (REPORT_DETAILS))
2015 		fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2016 	      return false;
2017 	    }
2018 
2019 	  if (offset)
2020 	    {
2021 	      if (poffset)
2022 		poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2023                                        poffset);
2024 	      else
2025 		poffset = offset;
2026 	    }
2027 
2028 	  if (!poffset)
2029 	    {
2030 	      offset_iv.base = ssize_int (0);
2031 	      offset_iv.step = ssize_int (0);
2032 	    }
2033 	  else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2034                                &offset_iv, false))
2035 	    {
2036 	      if (vect_print_dump_info (REPORT_DETAILS))
2037 	        fprintf (vect_dump, "evolution of offset is not affine.\n");
2038 	      return false;
2039 	    }
2040 
2041 	  outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2042 	  split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2043 	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
2044 	  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2045 	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
2046 
2047 	  outer_step = size_binop (PLUS_EXPR,
2048 				fold_convert (ssizetype, base_iv.step),
2049 				fold_convert (ssizetype, offset_iv.step));
2050 
2051 	  STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2052 	  /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2053 	  STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2054 	  STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2055 	  STMT_VINFO_DR_OFFSET (stmt_info) =
2056 				fold_convert (ssizetype, offset_iv.base);
2057 	  STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2058 				size_int (highest_pow2_factor (offset_iv.base));
2059 
2060 	  if (vect_print_dump_info (REPORT_DETAILS))
2061 	    {
2062 	      fprintf (vect_dump, "\touter base_address: ");
2063 	      print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2064 	      fprintf (vect_dump, "\n\touter offset from base address: ");
2065 	      print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2066 	      fprintf (vect_dump, "\n\touter constant offset from base address: ");
2067 	      print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2068 	      fprintf (vect_dump, "\n\touter step: ");
2069 	      print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2070 	      fprintf (vect_dump, "\n\touter aligned to: ");
2071 	      print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2072 	    }
2073 	}
2074 
2075       if (STMT_VINFO_DATA_REF (stmt_info))
2076         {
2077           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2078             {
2079               fprintf (vect_dump,
2080                        "not vectorized: more than one data ref in stmt: ");
2081               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2082             }
2083           return false;
2084         }
2085 
2086       STMT_VINFO_DATA_REF (stmt_info) = dr;
2087 
2088       /* Set vectype for STMT.  */
2089       scalar_type = TREE_TYPE (DR_REF (dr));
2090       STMT_VINFO_VECTYPE (stmt_info) =
2091                 get_vectype_for_scalar_type (scalar_type);
2092       if (!STMT_VINFO_VECTYPE (stmt_info))
2093         {
2094           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2095             {
2096               fprintf (vect_dump,
2097                        "not vectorized: no vectype for stmt: ");
2098               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2099               fprintf (vect_dump, " scalar_type: ");
2100               print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2101             }
2102           return false;
2103         }
2104     }
2105 
2106   return true;
2107 }
2108 
2109 
2110 /* Function vect_get_new_vect_var.
2111 
2112    Returns a name for a new variable. The current naming scheme appends the
2113    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2114    the name of vectorizer generated variables, and appends that to NAME if
2115    provided.  */
2116 
2117 tree
2118 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2119 {
2120   const char *prefix;
2121   tree new_vect_var;
2122 
2123   switch (var_kind)
2124   {
2125   case vect_simple_var:
2126     prefix = "vect_";
2127     break;
2128   case vect_scalar_var:
2129     prefix = "stmp_";
2130     break;
2131   case vect_pointer_var:
2132     prefix = "vect_p";
2133     break;
2134   default:
2135     gcc_unreachable ();
2136   }
2137 
2138   if (name)
2139     {
2140       char* tmp = concat (prefix, name, NULL);
2141       new_vect_var = create_tmp_var (type, tmp);
2142       free (tmp);
2143     }
2144   else
2145     new_vect_var = create_tmp_var (type, prefix);
2146 
2147   /* Mark vector typed variable as a gimple register variable.  */
2148   if (TREE_CODE (type) == VECTOR_TYPE)
2149     DECL_GIMPLE_REG_P (new_vect_var) = true;
2150 
2151   return new_vect_var;
2152 }
2153 
2154 
2155 /* Function vect_create_addr_base_for_vector_ref.
2156 
2157    Create an expression that computes the address of the first memory location
2158    that will be accessed for a data reference.
2159 
2160    Input:
2161    STMT: The statement containing the data reference.
2162    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2163    OFFSET: Optional. If supplied, it is be added to the initial address.
2164    LOOP:    Specify relative to which loop-nest should the address be computed.
2165             For example, when the dataref is in an inner-loop nested in an
2166 	    outer-loop that is now being vectorized, LOOP can be either the
2167 	    outer-loop, or the inner-loop. The first memory location accessed
2168 	    by the following dataref ('in' points to short):
2169 
2170 		for (i=0; i<N; i++)
2171 		   for (j=0; j<M; j++)
2172 		     s += in[i+j]
2173 
2174 	    is as follows:
2175 	    if LOOP=i_loop:	&in		(relative to i_loop)
2176 	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
2177 
2178    Output:
2179    1. Return an SSA_NAME whose value is the address of the memory location of
2180       the first vector of the data reference.
2181    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2182       these statement(s) which define the returned SSA_NAME.
2183 
2184    FORNOW: We are only handling array accesses with step 1.  */
2185 
2186 tree
2187 vect_create_addr_base_for_vector_ref (gimple stmt,
2188 				      gimple_seq *new_stmt_list,
2189 				      tree offset,
2190 				      struct loop *loop)
2191 {
2192   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2193   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2194   tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2195   tree base_name;
2196   tree data_ref_base_var;
2197   tree vec_stmt;
2198   tree addr_base, addr_expr;
2199   tree dest;
2200   gimple_seq seq = NULL;
2201   tree base_offset = unshare_expr (DR_OFFSET (dr));
2202   tree init = unshare_expr (DR_INIT (dr));
2203   tree vect_ptr_type;
2204   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2205   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2206 
2207   if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2208     {
2209       struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2210 
2211       gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2212 
2213       data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2214       base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2215       init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2216     }
2217 
2218   if (loop_vinfo)
2219     base_name = build_fold_indirect_ref (data_ref_base);
2220   else
2221     {
2222       base_offset = ssize_int (0);
2223       init = ssize_int (0);
2224       base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2225     }
2226 
2227   data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2228   add_referenced_var (data_ref_base_var);
2229   data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2230 					data_ref_base_var);
2231   gimple_seq_add_seq (new_stmt_list, seq);
2232 
2233   /* Create base_offset */
2234   base_offset = size_binop (PLUS_EXPR,
2235 			    fold_convert (sizetype, base_offset),
2236 			    fold_convert (sizetype, init));
2237   dest = create_tmp_var (sizetype, "base_off");
2238   add_referenced_var (dest);
2239   base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2240   gimple_seq_add_seq (new_stmt_list, seq);
2241 
2242   if (offset)
2243     {
2244       tree tmp = create_tmp_var (sizetype, "offset");
2245 
2246       add_referenced_var (tmp);
2247       offset = fold_build2 (MULT_EXPR, sizetype,
2248 			    fold_convert (sizetype, offset), step);
2249       base_offset = fold_build2 (PLUS_EXPR, sizetype,
2250 				 base_offset, offset);
2251       base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2252       gimple_seq_add_seq (new_stmt_list, seq);
2253     }
2254 
2255   /* base + base_offset */
2256   if (loop_vinfo)
2257     addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2258                              data_ref_base, base_offset);
2259   else
2260     {
2261       if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2262         addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2263       else
2264         addr_base = build1 (ADDR_EXPR,
2265                             build_pointer_type (TREE_TYPE (DR_REF (dr))),
2266                             unshare_expr (DR_REF (dr)));
2267     }
2268 
2269   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2270 
2271   vec_stmt = fold_convert (vect_ptr_type, addr_base);
2272   addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2273                                      get_name (base_name));
2274   add_referenced_var (addr_expr);
2275   vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2276   gimple_seq_add_seq (new_stmt_list, seq);
2277 
2278   if (vect_print_dump_info (REPORT_DETAILS))
2279     {
2280       fprintf (vect_dump, "created ");
2281       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2282     }
2283 
2284   return vec_stmt;
2285 }
2286 
2287 
2288 /* Function vect_create_data_ref_ptr.
2289 
2290    Create a new pointer to vector type (vp), that points to the first location
2291    accessed in the loop by STMT, along with the def-use update chain to
2292    appropriately advance the pointer through the loop iterations. Also set
2293    aliasing information for the pointer.  This vector pointer is used by the
2294    callers to this function to create a memory reference expression for vector
2295    load/store access.
2296 
2297    Input:
2298    1. STMT: a stmt that references memory. Expected to be of the form
2299          GIMPLE_ASSIGN <name, data-ref> or
2300 	 GIMPLE_ASSIGN <data-ref, name>.
2301    2. AT_LOOP: the loop where the vector memref is to be created.
2302    3. OFFSET (optional): an offset to be added to the initial address accessed
2303         by the data-ref in STMT.
2304    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2305         pointing to the initial address.
2306    5. TYPE: if not NULL indicates the required type of the data-ref.
2307 
2308    Output:
2309    1. Declare a new ptr to vector_type, and have it point to the base of the
2310       data reference (initial addressed accessed by the data reference).
2311       For example, for vector of type V8HI, the following code is generated:
2312 
2313       v8hi *vp;
2314       vp = (v8hi *)initial_address;
2315 
2316       if OFFSET is not supplied:
2317          initial_address = &a[init];
2318       if OFFSET is supplied:
2319          initial_address = &a[init + OFFSET];
2320 
2321       Return the initial_address in INITIAL_ADDRESS.
2322 
2323    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
2324       update the pointer in each iteration of the loop.
2325 
2326       Return the increment stmt that updates the pointer in PTR_INCR.
2327 
2328    3. Set INV_P to true if the access pattern of the data reference in the
2329       vectorized loop is invariant. Set it to false otherwise.
2330 
2331    4. Return the pointer.  */
2332 
2333 tree
2334 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2335 			  tree offset, tree *initial_address, gimple *ptr_incr,
2336 			  bool only_init, bool *inv_p)
2337 {
2338   tree base_name;
2339   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2340   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2341   struct loop *loop = NULL;
2342   bool nested_in_vect_loop = false;
2343   struct loop *containing_loop = NULL;
2344   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2345   tree vect_ptr_type;
2346   tree vect_ptr;
2347   tree new_temp;
2348   gimple vec_stmt;
2349   gimple_seq new_stmt_list = NULL;
2350   edge pe = NULL;
2351   basic_block new_bb;
2352   tree vect_ptr_init;
2353   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2354   tree vptr;
2355   gimple_stmt_iterator incr_gsi;
2356   bool insert_after;
2357   tree indx_before_incr, indx_after_incr;
2358   gimple incr;
2359   tree step;
2360   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2361   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2362 
2363   if (loop_vinfo)
2364     {
2365       loop = LOOP_VINFO_LOOP (loop_vinfo);
2366       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2367       containing_loop = (gimple_bb (stmt))->loop_father;
2368       pe = loop_preheader_edge (loop);
2369     }
2370   else
2371     {
2372       gcc_assert (bb_vinfo);
2373       only_init = true;
2374       *ptr_incr = NULL;
2375     }
2376 
2377   /* Check the step (evolution) of the load in LOOP, and record
2378      whether it's invariant.  */
2379   if (nested_in_vect_loop)
2380     step = STMT_VINFO_DR_STEP (stmt_info);
2381   else
2382     step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2383 
2384   if (tree_int_cst_compare (step, size_zero_node) == 0)
2385     *inv_p = true;
2386   else
2387     *inv_p = false;
2388 
2389   /* Create an expression for the first address accessed by this load
2390      in LOOP.  */
2391   base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2392 
2393   if (vect_print_dump_info (REPORT_DETAILS))
2394     {
2395       tree data_ref_base = base_name;
2396       fprintf (vect_dump, "create vector-pointer variable to type: ");
2397       print_generic_expr (vect_dump, vectype, TDF_SLIM);
2398       if (TREE_CODE (data_ref_base) == VAR_DECL
2399           || TREE_CODE (data_ref_base) == ARRAY_REF)
2400         fprintf (vect_dump, "  vectorizing an array ref: ");
2401       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2402         fprintf (vect_dump, "  vectorizing a record based array ref: ");
2403       else if (TREE_CODE (data_ref_base) == SSA_NAME)
2404         fprintf (vect_dump, "  vectorizing a pointer ref: ");
2405       print_generic_expr (vect_dump, base_name, TDF_SLIM);
2406     }
2407 
2408   /** (1) Create the new vector-pointer variable:  **/
2409   vect_ptr_type = build_pointer_type (vectype);
2410   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2411                                     get_name (base_name));
2412 
2413   /* Vector types inherit the alias set of their component type by default so
2414      we need to use a ref-all pointer if the data reference does not conflict
2415      with the created vector data reference because it is not addressable.  */
2416   if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2417 			      get_alias_set (DR_REF (dr))))
2418     {
2419       vect_ptr_type
2420 	= build_pointer_type_for_mode (vectype,
2421 				       TYPE_MODE (vect_ptr_type), true);
2422       vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2423 					get_name (base_name));
2424     }
2425 
2426   /* Likewise for any of the data references in the stmt group.  */
2427   else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2428     {
2429       gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2430       do
2431 	{
2432 	  tree lhs = gimple_assign_lhs (orig_stmt);
2433 	  if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2434 				      get_alias_set (lhs)))
2435 	    {
2436 	      vect_ptr_type
2437 		= build_pointer_type_for_mode (vectype,
2438 					       TYPE_MODE (vect_ptr_type), true);
2439 	      vect_ptr
2440 		= vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2441 					 get_name (base_name));
2442 	      break;
2443 	    }
2444 
2445 	  orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2446 	}
2447       while (orig_stmt);
2448     }
2449 
2450   add_referenced_var (vect_ptr);
2451 
2452   /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2453       vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2454       def-use update cycles for the pointer: One relative to the outer-loop
2455       (LOOP), which is what steps (3) and (4) below do. The other is relative
2456       to the inner-loop (which is the inner-most loop containing the dataref),
2457       and this is done be step (5) below.
2458 
2459       When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2460       inner-most loop, and so steps (3),(4) work the same, and step (5) is
2461       redundant.  Steps (3),(4) create the following:
2462 
2463 	vp0 = &base_addr;
2464 	LOOP:	vp1 = phi(vp0,vp2)
2465 		...
2466 		...
2467 		vp2 = vp1 + step
2468 		goto LOOP
2469 
2470       If there is an inner-loop nested in loop, then step (5) will also be
2471       applied, and an additional update in the inner-loop will be created:
2472 
2473 	vp0 = &base_addr;
2474 	LOOP:   vp1 = phi(vp0,vp2)
2475 		...
2476         inner:     vp3 = phi(vp1,vp4)
2477 	           vp4 = vp3 + inner_step
2478 	           if () goto inner
2479 		...
2480 		vp2 = vp1 + step
2481 		if () goto LOOP   */
2482 
2483   /** (3) Calculate the initial address the vector-pointer, and set
2484           the vector-pointer to point to it before the loop:  **/
2485 
2486   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
2487 
2488   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2489                                                    offset, loop);
2490   if (new_stmt_list)
2491     {
2492       if (pe)
2493         {
2494           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2495           gcc_assert (!new_bb);
2496         }
2497       else
2498         gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2499     }
2500 
2501   *initial_address = new_temp;
2502 
2503   /* Create: p = (vectype *) initial_base  */
2504   vec_stmt = gimple_build_assign (vect_ptr,
2505 				  fold_convert (vect_ptr_type, new_temp));
2506   vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2507   gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2508   if (pe)
2509     {
2510       new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2511       gcc_assert (!new_bb);
2512     }
2513   else
2514     gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2515 
2516   /** (4) Handle the updating of the vector-pointer inside the loop.
2517 	  This is needed when ONLY_INIT is false, and also when AT_LOOP
2518 	  is the inner-loop nested in LOOP (during outer-loop vectorization).
2519    **/
2520 
2521   /* No update in loop is required.  */
2522   if (only_init && (!loop_vinfo || at_loop == loop))
2523     {
2524       /* Copy the points-to information if it exists. */
2525       if (DR_PTR_INFO (dr))
2526         duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2527       vptr = vect_ptr_init;
2528     }
2529   else
2530     {
2531       /* The step of the vector pointer is the Vector Size.  */
2532       tree step = TYPE_SIZE_UNIT (vectype);
2533       /* One exception to the above is when the scalar step of the load in
2534 	 LOOP is zero. In this case the step here is also zero.  */
2535       if (*inv_p)
2536 	step = size_zero_node;
2537 
2538       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2539 
2540       create_iv (vect_ptr_init,
2541 		 fold_convert (vect_ptr_type, step),
2542 		 vect_ptr, loop, &incr_gsi, insert_after,
2543 		 &indx_before_incr, &indx_after_incr);
2544       incr = gsi_stmt (incr_gsi);
2545       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2546 
2547       /* Copy the points-to information if it exists. */
2548       if (DR_PTR_INFO (dr))
2549 	{
2550 	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2551 	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2552 	}
2553       if (ptr_incr)
2554 	*ptr_incr = incr;
2555 
2556       vptr = indx_before_incr;
2557     }
2558 
2559   if (!nested_in_vect_loop || only_init)
2560     return vptr;
2561 
2562 
2563   /** (5) Handle the updating of the vector-pointer inside the inner-loop
2564 	  nested in LOOP, if exists: **/
2565 
2566   gcc_assert (nested_in_vect_loop);
2567   if (!only_init)
2568     {
2569       standard_iv_increment_position (containing_loop, &incr_gsi,
2570 				      &insert_after);
2571       create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2572 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2573 		 &indx_after_incr);
2574       incr = gsi_stmt (incr_gsi);
2575       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2576 
2577       /* Copy the points-to information if it exists. */
2578       if (DR_PTR_INFO (dr))
2579 	{
2580 	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2581 	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2582 	}
2583       if (ptr_incr)
2584 	*ptr_incr = incr;
2585 
2586       return indx_before_incr;
2587     }
2588   else
2589     gcc_unreachable ();
2590 }
2591 
2592 
2593 /* Function bump_vector_ptr
2594 
2595    Increment a pointer (to a vector type) by vector-size. If requested,
2596    i.e. if PTR-INCR is given, then also connect the new increment stmt
2597    to the existing def-use update-chain of the pointer, by modifying
2598    the PTR_INCR as illustrated below:
2599 
2600    The pointer def-use update-chain before this function:
2601                         DATAREF_PTR = phi (p_0, p_2)
2602                         ....
2603         PTR_INCR:       p_2 = DATAREF_PTR + step
2604 
2605    The pointer def-use update-chain after this function:
2606                         DATAREF_PTR = phi (p_0, p_2)
2607                         ....
2608                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2609                         ....
2610         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
2611 
2612    Input:
2613    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2614                  in the loop.
2615    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2616 	      the loop.  The increment amount across iterations is expected
2617 	      to be vector_size.
2618    BSI - location where the new update stmt is to be placed.
2619    STMT - the original scalar memory-access stmt that is being vectorized.
2620    BUMP - optional. The offset by which to bump the pointer. If not given,
2621 	  the offset is assumed to be vector_size.
2622 
2623    Output: Return NEW_DATAREF_PTR as illustrated above.
2624 
2625 */
2626 
2627 tree
2628 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2629 		 gimple stmt, tree bump)
2630 {
2631   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2632   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2633   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2634   tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2635   tree update = TYPE_SIZE_UNIT (vectype);
2636   gimple incr_stmt;
2637   ssa_op_iter iter;
2638   use_operand_p use_p;
2639   tree new_dataref_ptr;
2640 
2641   if (bump)
2642     update = bump;
2643 
2644   incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2645 					    dataref_ptr, update);
2646   new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2647   gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2648   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2649 
2650   /* Copy the points-to information if it exists. */
2651   if (DR_PTR_INFO (dr))
2652     duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2653 
2654   if (!ptr_incr)
2655     return new_dataref_ptr;
2656 
2657   /* Update the vector-pointer's cross-iteration increment.  */
2658   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2659     {
2660       tree use = USE_FROM_PTR (use_p);
2661 
2662       if (use == dataref_ptr)
2663         SET_USE (use_p, new_dataref_ptr);
2664       else
2665         gcc_assert (tree_int_cst_compare (use, update) == 0);
2666     }
2667 
2668   return new_dataref_ptr;
2669 }
2670 
2671 
2672 /* Function vect_create_destination_var.
2673 
2674    Create a new temporary of type VECTYPE.  */
2675 
2676 tree
2677 vect_create_destination_var (tree scalar_dest, tree vectype)
2678 {
2679   tree vec_dest;
2680   const char *new_name;
2681   tree type;
2682   enum vect_var_kind kind;
2683 
2684   kind = vectype ? vect_simple_var : vect_scalar_var;
2685   type = vectype ? vectype : TREE_TYPE (scalar_dest);
2686 
2687   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2688 
2689   new_name = get_name (scalar_dest);
2690   if (!new_name)
2691     new_name = "var_";
2692   vec_dest = vect_get_new_vect_var (type, kind, new_name);
2693   add_referenced_var (vec_dest);
2694 
2695   return vec_dest;
2696 }
2697 
2698 /* Function vect_strided_store_supported.
2699 
2700    Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2701    and FALSE otherwise.  */
2702 
2703 bool
2704 vect_strided_store_supported (tree vectype)
2705 {
2706   optab interleave_high_optab, interleave_low_optab;
2707   int mode;
2708 
2709   mode = (int) TYPE_MODE (vectype);
2710 
2711   /* Check that the operation is supported.  */
2712   interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2713 					       vectype, optab_default);
2714   interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2715 					      vectype, optab_default);
2716   if (!interleave_high_optab || !interleave_low_optab)
2717     {
2718       if (vect_print_dump_info (REPORT_DETAILS))
2719 	fprintf (vect_dump, "no optab for interleave.");
2720       return false;
2721     }
2722 
2723   if (optab_handler (interleave_high_optab, mode)->insn_code
2724       == CODE_FOR_nothing
2725       || optab_handler (interleave_low_optab, mode)->insn_code
2726       == CODE_FOR_nothing)
2727     {
2728       if (vect_print_dump_info (REPORT_DETAILS))
2729 	fprintf (vect_dump, "interleave op not supported by target.");
2730       return false;
2731     }
2732 
2733   return true;
2734 }
2735 
2736 
2737 /* Function vect_permute_store_chain.
2738 
2739    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2740    a power of 2, generate interleave_high/low stmts to reorder the data
2741    correctly for the stores. Return the final references for stores in
2742    RESULT_CHAIN.
2743 
2744    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2745    The input is 4 vectors each containing 8 elements. We assign a number to each
2746    element, the input sequence is:
2747 
2748    1st vec:   0  1  2  3  4  5  6  7
2749    2nd vec:   8  9 10 11 12 13 14 15
2750    3rd vec:  16 17 18 19 20 21 22 23
2751    4th vec:  24 25 26 27 28 29 30 31
2752 
2753    The output sequence should be:
2754 
2755    1st vec:  0  8 16 24  1  9 17 25
2756    2nd vec:  2 10 18 26  3 11 19 27
2757    3rd vec:  4 12 20 28  5 13 21 30
2758    4th vec:  6 14 22 30  7 15 23 31
2759 
2760    i.e., we interleave the contents of the four vectors in their order.
2761 
2762    We use interleave_high/low instructions to create such output. The input of
2763    each interleave_high/low operation is two vectors:
2764    1st vec    2nd vec
2765    0 1 2 3    4 5 6 7
2766    the even elements of the result vector are obtained left-to-right from the
2767    high/low elements of the first vector. The odd elements of the result are
2768    obtained left-to-right from the high/low elements of the second vector.
2769    The output of interleave_high will be:   0 4 1 5
2770    and of interleave_low:                   2 6 3 7
2771 
2772 
2773    The permutation is done in log LENGTH stages. In each stage interleave_high
2774    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2775    where the first argument is taken from the first half of DR_CHAIN and the
2776    second argument from it's second half.
2777    In our example,
2778 
2779    I1: interleave_high (1st vec, 3rd vec)
2780    I2: interleave_low (1st vec, 3rd vec)
2781    I3: interleave_high (2nd vec, 4th vec)
2782    I4: interleave_low (2nd vec, 4th vec)
2783 
2784    The output for the first stage is:
2785 
2786    I1:  0 16  1 17  2 18  3 19
2787    I2:  4 20  5 21  6 22  7 23
2788    I3:  8 24  9 25 10 26 11 27
2789    I4: 12 28 13 29 14 30 15 31
2790 
2791    The output of the second stage, i.e. the final result is:
2792 
2793    I1:  0  8 16 24  1  9 17 25
2794    I2:  2 10 18 26  3 11 19 27
2795    I3:  4 12 20 28  5 13 21 30
2796    I4:  6 14 22 30  7 15 23 31.  */
2797 
2798 bool
2799 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2800 			  unsigned int length,
2801 			  gimple stmt,
2802 			  gimple_stmt_iterator *gsi,
2803 			  VEC(tree,heap) **result_chain)
2804 {
2805   tree perm_dest, vect1, vect2, high, low;
2806   gimple perm_stmt;
2807   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2808   int i;
2809   unsigned int j;
2810   enum tree_code high_code, low_code;
2811 
2812   /* Check that the operation is supported.  */
2813   if (!vect_strided_store_supported (vectype))
2814     return false;
2815 
2816   *result_chain = VEC_copy (tree, heap, dr_chain);
2817 
2818   for (i = 0; i < exact_log2 (length); i++)
2819     {
2820       for (j = 0; j < length/2; j++)
2821 	{
2822 	  vect1 = VEC_index (tree, dr_chain, j);
2823 	  vect2 = VEC_index (tree, dr_chain, j+length/2);
2824 
2825 	  /* Create interleaving stmt:
2826 	     in the case of big endian:
2827                                 high = interleave_high (vect1, vect2)
2828              and in the case of little endian:
2829                                 high = interleave_low (vect1, vect2).  */
2830 	  perm_dest = create_tmp_var (vectype, "vect_inter_high");
2831 	  DECL_GIMPLE_REG_P (perm_dest) = 1;
2832 	  add_referenced_var (perm_dest);
2833           if (BYTES_BIG_ENDIAN)
2834 	    {
2835 	      high_code = VEC_INTERLEAVE_HIGH_EXPR;
2836 	      low_code = VEC_INTERLEAVE_LOW_EXPR;
2837 	    }
2838 	  else
2839 	    {
2840 	      low_code = VEC_INTERLEAVE_HIGH_EXPR;
2841 	      high_code = VEC_INTERLEAVE_LOW_EXPR;
2842 	    }
2843 	  perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2844 						    vect1, vect2);
2845 	  high = make_ssa_name (perm_dest, perm_stmt);
2846 	  gimple_assign_set_lhs (perm_stmt, high);
2847 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2848 	  VEC_replace (tree, *result_chain, 2*j, high);
2849 
2850 	  /* Create interleaving stmt:
2851              in the case of big endian:
2852                                low  = interleave_low (vect1, vect2)
2853              and in the case of little endian:
2854                                low  = interleave_high (vect1, vect2).  */
2855 	  perm_dest = create_tmp_var (vectype, "vect_inter_low");
2856 	  DECL_GIMPLE_REG_P (perm_dest) = 1;
2857 	  add_referenced_var (perm_dest);
2858 	  perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2859 						    vect1, vect2);
2860 	  low = make_ssa_name (perm_dest, perm_stmt);
2861 	  gimple_assign_set_lhs (perm_stmt, low);
2862 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2863 	  VEC_replace (tree, *result_chain, 2*j+1, low);
2864 	}
2865       dr_chain = VEC_copy (tree, heap, *result_chain);
2866     }
2867   return true;
2868 }
2869 
2870 /* Function vect_setup_realignment
2871 
2872    This function is called when vectorizing an unaligned load using
2873    the dr_explicit_realign[_optimized] scheme.
2874    This function generates the following code at the loop prolog:
2875 
2876       p = initial_addr;
2877    x  msq_init = *(floor(p));   # prolog load
2878       realignment_token = call target_builtin;
2879     loop:
2880    x  msq = phi (msq_init, ---)
2881 
2882    The stmts marked with x are generated only for the case of
2883    dr_explicit_realign_optimized.
2884 
2885    The code above sets up a new (vector) pointer, pointing to the first
2886    location accessed by STMT, and a "floor-aligned" load using that pointer.
2887    It also generates code to compute the "realignment-token" (if the relevant
2888    target hook was defined), and creates a phi-node at the loop-header bb
2889    whose arguments are the result of the prolog-load (created by this
2890    function) and the result of a load that takes place in the loop (to be
2891    created by the caller to this function).
2892 
2893    For the case of dr_explicit_realign_optimized:
2894    The caller to this function uses the phi-result (msq) to create the
2895    realignment code inside the loop, and sets up the missing phi argument,
2896    as follows:
2897     loop:
2898       msq = phi (msq_init, lsq)
2899       lsq = *(floor(p'));        # load in loop
2900       result = realign_load (msq, lsq, realignment_token);
2901 
2902    For the case of dr_explicit_realign:
2903     loop:
2904       msq = *(floor(p)); 	# load in loop
2905       p' = p + (VS-1);
2906       lsq = *(floor(p'));	# load in loop
2907       result = realign_load (msq, lsq, realignment_token);
2908 
2909    Input:
2910    STMT - (scalar) load stmt to be vectorized. This load accesses
2911           a memory location that may be unaligned.
2912    BSI - place where new code is to be inserted.
2913    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2914 			      is used.
2915 
2916    Output:
2917    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2918                        target hook, if defined.
2919    Return value - the result of the loop-header phi node.  */
2920 
2921 tree
2922 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2923                         tree *realignment_token,
2924 			enum dr_alignment_support alignment_support_scheme,
2925 			tree init_addr,
2926 			struct loop **at_loop)
2927 {
2928   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2929   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2930   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2931   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2932   edge pe;
2933   tree scalar_dest = gimple_assign_lhs (stmt);
2934   tree vec_dest;
2935   gimple inc;
2936   tree ptr;
2937   tree data_ref;
2938   gimple new_stmt;
2939   basic_block new_bb;
2940   tree msq_init = NULL_TREE;
2941   tree new_temp;
2942   gimple phi_stmt;
2943   tree msq = NULL_TREE;
2944   gimple_seq stmts = NULL;
2945   bool inv_p;
2946   bool compute_in_loop = false;
2947   bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2948   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2949   struct loop *loop_for_initial_load;
2950 
2951   gcc_assert (alignment_support_scheme == dr_explicit_realign
2952 	      || alignment_support_scheme == dr_explicit_realign_optimized);
2953 
2954   /* We need to generate three things:
2955      1. the misalignment computation
2956      2. the extra vector load (for the optimized realignment scheme).
2957      3. the phi node for the two vectors from which the realignment is
2958       done (for the optimized realignment scheme).
2959    */
2960 
2961   /* 1. Determine where to generate the misalignment computation.
2962 
2963      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2964      calculation will be generated by this function, outside the loop (in the
2965      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
2966      caller, inside the loop.
2967 
2968      Background: If the misalignment remains fixed throughout the iterations of
2969      the loop, then both realignment schemes are applicable, and also the
2970      misalignment computation can be done outside LOOP.  This is because we are
2971      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2972      are a multiple of VS (the Vector Size), and therefore the misalignment in
2973      different vectorized LOOP iterations is always the same.
2974      The problem arises only if the memory access is in an inner-loop nested
2975      inside LOOP, which is now being vectorized using outer-loop vectorization.
2976      This is the only case when the misalignment of the memory access may not
2977      remain fixed throughout the iterations of the inner-loop (as explained in
2978      detail in vect_supportable_dr_alignment).  In this case, not only is the
2979      optimized realignment scheme not applicable, but also the misalignment
2980      computation (and generation of the realignment token that is passed to
2981      REALIGN_LOAD) have to be done inside the loop.
2982 
2983      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2984      or not, which in turn determines if the misalignment is computed inside
2985      the inner-loop, or outside LOOP.  */
2986 
2987   if (init_addr != NULL_TREE)
2988     {
2989       compute_in_loop = true;
2990       gcc_assert (alignment_support_scheme == dr_explicit_realign);
2991     }
2992 
2993 
2994   /* 2. Determine where to generate the extra vector load.
2995 
2996      For the optimized realignment scheme, instead of generating two vector
2997      loads in each iteration, we generate a single extra vector load in the
2998      preheader of the loop, and in each iteration reuse the result of the
2999      vector load from the previous iteration.  In case the memory access is in
3000      an inner-loop nested inside LOOP, which is now being vectorized using
3001      outer-loop vectorization, we need to determine whether this initial vector
3002      load should be generated at the preheader of the inner-loop, or can be
3003      generated at the preheader of LOOP.  If the memory access has no evolution
3004      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3005      to be generated inside LOOP (in the preheader of the inner-loop).  */
3006 
3007   if (nested_in_vect_loop)
3008     {
3009       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3010       bool invariant_in_outerloop =
3011             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3012       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3013     }
3014   else
3015     loop_for_initial_load = loop;
3016   if (at_loop)
3017     *at_loop = loop_for_initial_load;
3018 
3019   /* 3. For the case of the optimized realignment, create the first vector
3020       load at the loop preheader.  */
3021 
3022   if (alignment_support_scheme == dr_explicit_realign_optimized)
3023     {
3024       /* Create msq_init = *(floor(p1)) in the loop preheader  */
3025 
3026       gcc_assert (!compute_in_loop);
3027       pe = loop_preheader_edge (loop_for_initial_load);
3028       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3029       ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3030 				      &init_addr, &inc, true, &inv_p);
3031       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3032       new_stmt = gimple_build_assign (vec_dest, data_ref);
3033       new_temp = make_ssa_name (vec_dest, new_stmt);
3034       gimple_assign_set_lhs (new_stmt, new_temp);
3035       mark_symbols_for_renaming (new_stmt);
3036       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3037       gcc_assert (!new_bb);
3038       msq_init = gimple_assign_lhs (new_stmt);
3039     }
3040 
3041   /* 4. Create realignment token using a target builtin, if available.
3042       It is done either inside the containing loop, or before LOOP (as
3043       determined above).  */
3044 
3045   if (targetm.vectorize.builtin_mask_for_load)
3046     {
3047       tree builtin_decl;
3048 
3049       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
3050       if (compute_in_loop)
3051 	gcc_assert (init_addr); /* already computed by the caller.  */
3052       else
3053 	{
3054 	  /* Generate the INIT_ADDR computation outside LOOP.  */
3055 	  init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3056 							NULL_TREE, loop);
3057 	  pe = loop_preheader_edge (loop);
3058 	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3059 	  gcc_assert (!new_bb);
3060 	}
3061 
3062       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3063       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3064       vec_dest =
3065 	vect_create_destination_var (scalar_dest,
3066 				     gimple_call_return_type (new_stmt));
3067       new_temp = make_ssa_name (vec_dest, new_stmt);
3068       gimple_call_set_lhs (new_stmt, new_temp);
3069 
3070       if (compute_in_loop)
3071 	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3072       else
3073 	{
3074 	  /* Generate the misalignment computation outside LOOP.  */
3075 	  pe = loop_preheader_edge (loop);
3076 	  new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3077 	  gcc_assert (!new_bb);
3078 	}
3079 
3080       *realignment_token = gimple_call_lhs (new_stmt);
3081 
3082       /* The result of the CALL_EXPR to this builtin is determined from
3083          the value of the parameter and no global variables are touched
3084          which makes the builtin a "const" function.  Requiring the
3085          builtin to have the "const" attribute makes it unnecessary
3086          to call mark_call_clobbered.  */
3087       gcc_assert (TREE_READONLY (builtin_decl));
3088     }
3089 
3090   if (alignment_support_scheme == dr_explicit_realign)
3091     return msq;
3092 
3093   gcc_assert (!compute_in_loop);
3094   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3095 
3096 
3097   /* 5. Create msq = phi <msq_init, lsq> in loop  */
3098 
3099   pe = loop_preheader_edge (containing_loop);
3100   vec_dest = vect_create_destination_var (scalar_dest, vectype);
3101   msq = make_ssa_name (vec_dest, NULL);
3102   phi_stmt = create_phi_node (msq, containing_loop->header);
3103   SSA_NAME_DEF_STMT (msq) = phi_stmt;
3104   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3105 
3106   return msq;
3107 }
3108 
3109 
3110 /* Function vect_strided_load_supported.
3111 
3112    Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3113    and FALSE otherwise.  */
3114 
3115 bool
3116 vect_strided_load_supported (tree vectype)
3117 {
3118   optab perm_even_optab, perm_odd_optab;
3119   int mode;
3120 
3121   mode = (int) TYPE_MODE (vectype);
3122 
3123   perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3124 					 optab_default);
3125   if (!perm_even_optab)
3126     {
3127       if (vect_print_dump_info (REPORT_DETAILS))
3128 	fprintf (vect_dump, "no optab for perm_even.");
3129       return false;
3130     }
3131 
3132   if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3133     {
3134       if (vect_print_dump_info (REPORT_DETAILS))
3135 	fprintf (vect_dump, "perm_even op not supported by target.");
3136       return false;
3137     }
3138 
3139   perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3140 					optab_default);
3141   if (!perm_odd_optab)
3142     {
3143       if (vect_print_dump_info (REPORT_DETAILS))
3144 	fprintf (vect_dump, "no optab for perm_odd.");
3145       return false;
3146     }
3147 
3148   if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3149     {
3150       if (vect_print_dump_info (REPORT_DETAILS))
3151 	fprintf (vect_dump, "perm_odd op not supported by target.");
3152       return false;
3153     }
3154   return true;
3155 }
3156 
3157 
3158 /* Function vect_permute_load_chain.
3159 
3160    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3161    a power of 2, generate extract_even/odd stmts to reorder the input data
3162    correctly. Return the final references for loads in RESULT_CHAIN.
3163 
3164    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3165    The input is 4 vectors each containing 8 elements. We assign a number to each
3166    element, the input sequence is:
3167 
3168    1st vec:   0  1  2  3  4  5  6  7
3169    2nd vec:   8  9 10 11 12 13 14 15
3170    3rd vec:  16 17 18 19 20 21 22 23
3171    4th vec:  24 25 26 27 28 29 30 31
3172 
3173    The output sequence should be:
3174 
3175    1st vec:  0 4  8 12 16 20 24 28
3176    2nd vec:  1 5  9 13 17 21 25 29
3177    3rd vec:  2 6 10 14 18 22 26 30
3178    4th vec:  3 7 11 15 19 23 27 31
3179 
3180    i.e., the first output vector should contain the first elements of each
3181    interleaving group, etc.
3182 
3183    We use extract_even/odd instructions to create such output. The input of each
3184    extract_even/odd operation is two vectors
3185    1st vec    2nd vec
3186    0 1 2 3    4 5 6 7
3187 
3188    and the output is the vector of extracted even/odd elements. The output of
3189    extract_even will be:   0 2 4 6
3190    and of extract_odd:     1 3 5 7
3191 
3192 
3193    The permutation is done in log LENGTH stages. In each stage extract_even and
3194    extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3195    order. In our example,
3196 
3197    E1: extract_even (1st vec, 2nd vec)
3198    E2: extract_odd (1st vec, 2nd vec)
3199    E3: extract_even (3rd vec, 4th vec)
3200    E4: extract_odd (3rd vec, 4th vec)
3201 
3202    The output for the first stage will be:
3203 
3204    E1:  0  2  4  6  8 10 12 14
3205    E2:  1  3  5  7  9 11 13 15
3206    E3: 16 18 20 22 24 26 28 30
3207    E4: 17 19 21 23 25 27 29 31
3208 
3209    In order to proceed and create the correct sequence for the next stage (or
3210    for the correct output, if the second stage is the last one, as in our
3211    example), we first put the output of extract_even operation and then the
3212    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3213    The input for the second stage is:
3214 
3215    1st vec (E1):  0  2  4  6  8 10 12 14
3216    2nd vec (E3): 16 18 20 22 24 26 28 30
3217    3rd vec (E2):  1  3  5  7  9 11 13 15
3218    4th vec (E4): 17 19 21 23 25 27 29 31
3219 
3220    The output of the second stage:
3221 
3222    E1: 0 4  8 12 16 20 24 28
3223    E2: 2 6 10 14 18 22 26 30
3224    E3: 1 5  9 13 17 21 25 29
3225    E4: 3 7 11 15 19 23 27 31
3226 
3227    And RESULT_CHAIN after reordering:
3228 
3229    1st vec (E1):  0 4  8 12 16 20 24 28
3230    2nd vec (E3):  1 5  9 13 17 21 25 29
3231    3rd vec (E2):  2 6 10 14 18 22 26 30
3232    4th vec (E4):  3 7 11 15 19 23 27 31.  */
3233 
3234 bool
3235 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3236 			 unsigned int length,
3237 			 gimple stmt,
3238 			 gimple_stmt_iterator *gsi,
3239 			 VEC(tree,heap) **result_chain)
3240 {
3241   tree perm_dest, data_ref, first_vect, second_vect;
3242   gimple perm_stmt;
3243   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3244   int i;
3245   unsigned int j;
3246 
3247   /* Check that the operation is supported.  */
3248   if (!vect_strided_load_supported (vectype))
3249     return false;
3250 
3251   *result_chain = VEC_copy (tree, heap, dr_chain);
3252   for (i = 0; i < exact_log2 (length); i++)
3253     {
3254       for (j = 0; j < length; j +=2)
3255 	{
3256 	  first_vect = VEC_index (tree, dr_chain, j);
3257 	  second_vect = VEC_index (tree, dr_chain, j+1);
3258 
3259 	  /* data_ref = permute_even (first_data_ref, second_data_ref);  */
3260 	  perm_dest = create_tmp_var (vectype, "vect_perm_even");
3261 	  DECL_GIMPLE_REG_P (perm_dest) = 1;
3262 	  add_referenced_var (perm_dest);
3263 
3264 	  perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3265 						    perm_dest, first_vect,
3266 						    second_vect);
3267 
3268 	  data_ref = make_ssa_name (perm_dest, perm_stmt);
3269 	  gimple_assign_set_lhs (perm_stmt, data_ref);
3270 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3271 	  mark_symbols_for_renaming (perm_stmt);
3272 
3273 	  VEC_replace (tree, *result_chain, j/2, data_ref);
3274 
3275 	  /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
3276 	  perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3277 	  DECL_GIMPLE_REG_P (perm_dest) = 1;
3278 	  add_referenced_var (perm_dest);
3279 
3280 	  perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3281 						    perm_dest, first_vect,
3282 						    second_vect);
3283 	  data_ref = make_ssa_name (perm_dest, perm_stmt);
3284 	  gimple_assign_set_lhs (perm_stmt, data_ref);
3285 	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3286 	  mark_symbols_for_renaming (perm_stmt);
3287 
3288 	  VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3289 	}
3290       dr_chain = VEC_copy (tree, heap, *result_chain);
3291     }
3292   return true;
3293 }
3294 
3295 
3296 /* Function vect_transform_strided_load.
3297 
3298    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3299    to perform their permutation and ascribe the result vectorized statements to
3300    the scalar statements.
3301 */
3302 
3303 bool
3304 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3305 			     gimple_stmt_iterator *gsi)
3306 {
3307   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3308   gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3309   gimple next_stmt, new_stmt;
3310   VEC(tree,heap) *result_chain = NULL;
3311   unsigned int i, gap_count;
3312   tree tmp_data_ref;
3313 
3314   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3315      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3316      vectors, that are ready for vector computation.  */
3317   result_chain = VEC_alloc (tree, heap, size);
3318   /* Permute.  */
3319   if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3320     return false;
3321 
3322   /* Put a permuted data-ref in the VECTORIZED_STMT field.
3323      Since we scan the chain starting from it's first node, their order
3324      corresponds the order of data-refs in RESULT_CHAIN.  */
3325   next_stmt = first_stmt;
3326   gap_count = 1;
3327   for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3328     {
3329       if (!next_stmt)
3330 	break;
3331 
3332       /* Skip the gaps. Loads created for the gaps will be removed by dead
3333        code elimination pass later. No need to check for the first stmt in
3334        the group, since it always exists.
3335        DR_GROUP_GAP is the number of steps in elements from the previous
3336        access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3337        correspond to the gaps.
3338       */
3339       if (next_stmt != first_stmt
3340           && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3341       {
3342         gap_count++;
3343         continue;
3344       }
3345 
3346       while (next_stmt)
3347         {
3348 	  new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3349 	  /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3350 	     copies, and we put the new vector statement in the first available
3351 	     RELATED_STMT.  */
3352 	  if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3353 	    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3354 	  else
3355             {
3356               if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3357                 {
3358  	          gimple prev_stmt =
3359 		    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3360 	          gimple rel_stmt =
3361 		    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3362 	          while (rel_stmt)
3363 		    {
3364 		      prev_stmt = rel_stmt;
3365 		      rel_stmt =
3366                         STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3367 		    }
3368 
3369   	          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3370                     new_stmt;
3371                 }
3372             }
3373 
3374 	  next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3375 	  gap_count = 1;
3376 	  /* If NEXT_STMT accesses the same DR as the previous statement,
3377 	     put the same TMP_DATA_REF as its vectorized statement; otherwise
3378 	     get the next data-ref from RESULT_CHAIN.  */
3379 	  if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3380 	    break;
3381         }
3382     }
3383 
3384   VEC_free (tree, heap, result_chain);
3385   return true;
3386 }
3387 
3388 /* Function vect_force_dr_alignment_p.
3389 
3390    Returns whether the alignment of a DECL can be forced to be aligned
3391    on ALIGNMENT bit boundary.  */
3392 
3393 bool
3394 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3395 {
3396   if (TREE_CODE (decl) != VAR_DECL)
3397     return false;
3398 
3399   if (DECL_EXTERNAL (decl))
3400     return false;
3401 
3402   if (TREE_ASM_WRITTEN (decl))
3403     return false;
3404 
3405   if (TREE_STATIC (decl))
3406     return (alignment <= MAX_OFILE_ALIGNMENT);
3407   else
3408     return (alignment <= MAX_STACK_ALIGNMENT);
3409 }
3410 
3411 /* Function vect_supportable_dr_alignment
3412 
3413    Return whether the data reference DR is supported with respect to its
3414    alignment.  */
3415 
3416 enum dr_alignment_support
3417 vect_supportable_dr_alignment (struct data_reference *dr)
3418 {
3419   gimple stmt = DR_STMT (dr);
3420   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3421   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3422   enum machine_mode mode = TYPE_MODE (vectype);
3423   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3424   struct loop *vect_loop = NULL;
3425   bool nested_in_vect_loop = false;
3426 
3427   if (aligned_access_p (dr))
3428     return dr_aligned;
3429 
3430   if (!loop_vinfo)
3431     /* FORNOW: Misaligned accesses are supported only in loops.  */
3432     return dr_unaligned_unsupported;
3433 
3434   vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3435   nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3436 
3437   /* Possibly unaligned access.  */
3438 
3439   /* We can choose between using the implicit realignment scheme (generating
3440      a misaligned_move stmt) and the explicit realignment scheme (generating
3441      aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3442      realignment scheme: optimized, and unoptimized.
3443      We can optimize the realignment only if the step between consecutive
3444      vector loads is equal to the vector size.  Since the vector memory
3445      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3446      is guaranteed that the misalignment amount remains the same throughout the
3447      execution of the vectorized loop.  Therefore, we can create the
3448      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3449      at the loop preheader.
3450 
3451      However, in the case of outer-loop vectorization, when vectorizing a
3452      memory access in the inner-loop nested within the LOOP that is now being
3453      vectorized, while it is guaranteed that the misalignment of the
3454      vectorized memory access will remain the same in different outer-loop
3455      iterations, it is *not* guaranteed that is will remain the same throughout
3456      the execution of the inner-loop.  This is because the inner-loop advances
3457      with the original scalar step (and not in steps of VS).  If the inner-loop
3458      step happens to be a multiple of VS, then the misalignment remains fixed
3459      and we can use the optimized realignment scheme.  For example:
3460 
3461       for (i=0; i<N; i++)
3462         for (j=0; j<M; j++)
3463           s += a[i+j];
3464 
3465      When vectorizing the i-loop in the above example, the step between
3466      consecutive vector loads is 1, and so the misalignment does not remain
3467      fixed across the execution of the inner-loop, and the realignment cannot
3468      be optimized (as illustrated in the following pseudo vectorized loop):
3469 
3470       for (i=0; i<N; i+=4)
3471         for (j=0; j<M; j++){
3472           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3473                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
3474                          // (assuming that we start from an aligned address).
3475           }
3476 
3477      We therefore have to use the unoptimized realignment scheme:
3478 
3479       for (i=0; i<N; i+=4)
3480           for (j=k; j<M; j+=4)
3481           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3482                            // that the misalignment of the initial address is
3483                            // 0).
3484 
3485      The loop can then be vectorized as follows:
3486 
3487       for (k=0; k<4; k++){
3488         rt = get_realignment_token (&vp[k]);
3489         for (i=0; i<N; i+=4){
3490           v1 = vp[i+k];
3491           for (j=k; j<M; j+=4){
3492             v2 = vp[i+j+VS-1];
3493             va = REALIGN_LOAD <v1,v2,rt>;
3494             vs += va;
3495             v1 = v2;
3496           }
3497         }
3498     } */
3499 
3500   if (DR_IS_READ (dr))
3501     {
3502       bool is_packed = false;
3503       tree type = (TREE_TYPE (DR_REF (dr)));
3504 
3505       if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3506 						   	     CODE_FOR_nothing
3507 	  && (!targetm.vectorize.builtin_mask_for_load
3508 	      || targetm.vectorize.builtin_mask_for_load ()))
3509 	{
3510 	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3511 	  if (nested_in_vect_loop
3512 	      && (TREE_INT_CST_LOW (DR_STEP (dr))
3513 		  != GET_MODE_SIZE (TYPE_MODE (vectype))))
3514 	    return dr_explicit_realign;
3515 	  else
3516 	    return dr_explicit_realign_optimized;
3517 	}
3518       if (!known_alignment_for_access_p (dr))
3519 	{
3520 	  tree ba = DR_BASE_OBJECT (dr);
3521 
3522 	  if (ba)
3523 	    is_packed = contains_packed_reference (ba);
3524 	}
3525 
3526       if (targetm.vectorize.
3527 	  builtin_support_vector_misalignment (mode, type,
3528 					       DR_MISALIGNMENT (dr), is_packed))
3529 	/* Can't software pipeline the loads, but can at least do them.  */
3530 	return dr_unaligned_supported;
3531     }
3532   else
3533     {
3534       bool is_packed = false;
3535       tree type = (TREE_TYPE (DR_REF (dr)));
3536 
3537       if (!known_alignment_for_access_p (dr))
3538 	{
3539 	  tree ba = DR_BASE_OBJECT (dr);
3540 
3541 	  if (ba)
3542 	    is_packed = contains_packed_reference (ba);
3543 	}
3544 
3545      if (targetm.vectorize.
3546          builtin_support_vector_misalignment (mode, type,
3547 					      DR_MISALIGNMENT (dr), is_packed))
3548        return dr_unaligned_supported;
3549     }
3550 
3551   /* Unsupported.  */
3552   return dr_unaligned_unsupported;
3553 }
3554