xref: /openbsd-src/gnu/gcc/gcc/tree-vect-analyze.c (revision 404b540a9034ac75a6199ad1a32d1bbc7a0d4210)
1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41 
42 /* Main analysis functions.  */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static void vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static bool vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
54 
55 /* Utility functions for the analyses.  */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61   (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66   (struct data_reference *, struct data_reference *, int npeel);
67 
68 
69 /* Function vect_determine_vectorization_factor
70 
71    Determine the vectorization factor (VF). VF is the number of data elements
72    that are operated upon in parallel in a single iteration of the vectorized
73    loop. For example, when vectorizing a loop that operates on 4byte elements,
74    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75    elements can fit in a single vector register.
76 
77    We currently support vectorization of loops in which all types operated upon
78    are of the same size. Therefore this function currently sets VF according to
79    the size of the types operated upon, and fails if there are multiple sizes
80    in the loop.
81 
82    VF is also the factor by which the loop iterations are strip-mined, e.g.:
83    original loop:
84         for (i=0; i<N; i++){
85           a[i] = b[i] + c[i];
86         }
87 
88    vectorized loop:
89         for (i=0; i<N; i+=VF){
90           a[i:VF] = b[i:VF] + c[i:VF];
91         }
92 */
93 
94 static bool
vect_determine_vectorization_factor(loop_vec_info loop_vinfo)95 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 {
97   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99   int nbbs = loop->num_nodes;
100   block_stmt_iterator si;
101   unsigned int vectorization_factor = 0;
102   int i;
103   tree scalar_type;
104 
105   if (vect_print_dump_info (REPORT_DETAILS))
106     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
107 
108   for (i = 0; i < nbbs; i++)
109     {
110       basic_block bb = bbs[i];
111 
112       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
113         {
114           tree stmt = bsi_stmt (si);
115           unsigned int nunits;
116           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117           tree vectype;
118 
119           if (vect_print_dump_info (REPORT_DETAILS))
120             {
121               fprintf (vect_dump, "==> examining statement: ");
122               print_generic_expr (vect_dump, stmt, TDF_SLIM);
123             }
124 
125           gcc_assert (stmt_info);
126           /* skip stmts which do not need to be vectorized.  */
127           if (!STMT_VINFO_RELEVANT_P (stmt_info)
128 	      && !STMT_VINFO_LIVE_P (stmt_info))
129             {
130               if (vect_print_dump_info (REPORT_DETAILS))
131                 fprintf (vect_dump, "skip.");
132               continue;
133             }
134 
135           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
136             {
137               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
138                 {
139                   fprintf (vect_dump, "not vectorized: vector stmt in loop:");
140                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
141                 }
142               return false;
143             }
144 
145 	  if (STMT_VINFO_VECTYPE (stmt_info))
146 	    {
147 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
148 	      scalar_type = TREE_TYPE (vectype);
149 	    }
150 	  else
151 	    {
152 	      if (STMT_VINFO_DATA_REF (stmt_info))
153 		scalar_type =
154 			TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
155 	      else if (TREE_CODE (stmt) == MODIFY_EXPR)
156 		scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
157 	      else
158 		scalar_type = TREE_TYPE (stmt);
159 
160 	      if (vect_print_dump_info (REPORT_DETAILS))
161 		{
162 		  fprintf (vect_dump, "get vectype for scalar type:  ");
163 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
164 		}
165 
166 	      vectype = get_vectype_for_scalar_type (scalar_type);
167 	      if (!vectype)
168 		{
169 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
170 		    {
171 		      fprintf (vect_dump,
172 			       "not vectorized: unsupported data-type ");
173 		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
174 		    }
175 		  return false;
176 		}
177 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
178             }
179 
180           if (vect_print_dump_info (REPORT_DETAILS))
181             {
182               fprintf (vect_dump, "vectype: ");
183               print_generic_expr (vect_dump, vectype, TDF_SLIM);
184             }
185 
186           nunits = TYPE_VECTOR_SUBPARTS (vectype);
187           if (vect_print_dump_info (REPORT_DETAILS))
188             fprintf (vect_dump, "nunits = %d", nunits);
189 
190           if (vectorization_factor)
191             {
192               /* FORNOW: don't allow mixed units.
193                  This restriction will be relaxed in the future.  */
194               if (nunits != vectorization_factor)
195                 {
196                   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
197                     fprintf (vect_dump, "not vectorized: mixed data-types");
198                   return false;
199                 }
200             }
201           else
202             vectorization_factor = nunits;
203 
204           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
205                         * vectorization_factor == UNITS_PER_SIMD_WORD);
206         }
207     }
208 
209   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
210 
211   if (vectorization_factor <= 1)
212     {
213       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
214         fprintf (vect_dump, "not vectorized: unsupported data-type");
215       return false;
216     }
217   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
218 
219   return true;
220 }
221 
222 
223 /* Function vect_analyze_operations.
224 
225    Scan the loop stmts and make sure they are all vectorizable.  */
226 
227 static bool
vect_analyze_operations(loop_vec_info loop_vinfo)228 vect_analyze_operations (loop_vec_info loop_vinfo)
229 {
230   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
232   int nbbs = loop->num_nodes;
233   block_stmt_iterator si;
234   unsigned int vectorization_factor = 0;
235   int i;
236   bool ok;
237   tree phi;
238   stmt_vec_info stmt_info;
239   bool need_to_vectorize = false;
240 
241   if (vect_print_dump_info (REPORT_DETAILS))
242     fprintf (vect_dump, "=== vect_analyze_operations ===");
243 
244   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
245   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
246 
247   for (i = 0; i < nbbs; i++)
248     {
249       basic_block bb = bbs[i];
250 
251       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
252         {
253 	  stmt_info = vinfo_for_stmt (phi);
254 	  if (vect_print_dump_info (REPORT_DETAILS))
255 	    {
256 	      fprintf (vect_dump, "examining phi: ");
257 	      print_generic_expr (vect_dump, phi, TDF_SLIM);
258 	    }
259 
260 	  gcc_assert (stmt_info);
261 
262 	  if (STMT_VINFO_LIVE_P (stmt_info))
263 	    {
264 	      /* FORNOW: not yet supported.  */
265 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
266 		fprintf (vect_dump, "not vectorized: value used after loop.");
267 	    return false;
268 	  }
269 
270 	  if (STMT_VINFO_RELEVANT_P (stmt_info))
271 	    {
272 	      /* Most likely a reduction-like computation that is used
273 	         in the loop.  */
274 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275 	        fprintf (vect_dump, "not vectorized: unsupported pattern.");
276  	     return false;
277 	    }
278 	}
279 
280       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
281 	{
282 	  tree stmt = bsi_stmt (si);
283 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
284 
285 	  if (vect_print_dump_info (REPORT_DETAILS))
286 	    {
287 	      fprintf (vect_dump, "==> examining statement: ");
288 	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
289 	    }
290 
291 	  gcc_assert (stmt_info);
292 
293 	  /* skip stmts which do not need to be vectorized.
294 	     this is expected to include:
295 	     - the COND_EXPR which is the loop exit condition
296 	     - any LABEL_EXPRs in the loop
297 	     - computations that are used only for array indexing or loop
298 	     control  */
299 
300 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
301 	      && !STMT_VINFO_LIVE_P (stmt_info))
302 	    {
303 	      if (vect_print_dump_info (REPORT_DETAILS))
304 	        fprintf (vect_dump, "irrelevant.");
305 	      continue;
306 	    }
307 
308           if (STMT_VINFO_RELEVANT_P (stmt_info))
309             {
310               gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
311               gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
312 
313 	      ok = (vectorizable_operation (stmt, NULL, NULL)
314 		    || vectorizable_assignment (stmt, NULL, NULL)
315 		    || vectorizable_load (stmt, NULL, NULL)
316 		    || vectorizable_store (stmt, NULL, NULL)
317 		    || vectorizable_condition (stmt, NULL, NULL));
318 
319 	      if (!ok)
320 		{
321 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
322 		    {
323 		      fprintf (vect_dump,
324 			       "not vectorized: relevant stmt not supported: ");
325 		      print_generic_expr (vect_dump, stmt, TDF_SLIM);
326 		    }
327 		  return false;
328 		}
329 	      need_to_vectorize = true;
330             }
331 
332 	  if (STMT_VINFO_LIVE_P (stmt_info))
333 	    {
334 	      ok = vectorizable_reduction (stmt, NULL, NULL);
335 
336 	      if (ok)
337                 need_to_vectorize = true;
338               else
339 	        ok = vectorizable_live_operation (stmt, NULL, NULL);
340 
341 	      if (!ok)
342 		{
343 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
344 		    {
345 		      fprintf (vect_dump,
346 			       "not vectorized: live stmt not supported: ");
347 		      print_generic_expr (vect_dump, stmt, TDF_SLIM);
348 		    }
349 		  return false;
350 		}
351 	    }
352 	} /* stmts in bb */
353     } /* bbs */
354 
355   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
356 
357   /* All operations in the loop are either irrelevant (deal with loop
358      control, or dead), or only used outside the loop and can be moved
359      out of the loop (e.g. invariants, inductions).  The loop can be
360      optimized away by scalar optimizations.  We're better off not
361      touching this loop.  */
362   if (!need_to_vectorize)
363     {
364       if (vect_print_dump_info (REPORT_DETAILS))
365 	fprintf (vect_dump,
366 		 "All the computation can be taken out of the loop.");
367       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
368         fprintf (vect_dump,
369 		 "not vectorized: redundant loop. no profit to vectorize.");
370       return false;
371     }
372 
373   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
374       && vect_print_dump_info (REPORT_DETAILS))
375     fprintf (vect_dump,
376         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
377         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
378 
379   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
380       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
381     {
382       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
383 	fprintf (vect_dump, "not vectorized: iteration count too small.");
384       return false;
385     }
386 
387   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
388       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
389       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
390     {
391       if (vect_print_dump_info (REPORT_DETAILS))
392         fprintf (vect_dump, "epilog loop required.");
393       if (!vect_can_advance_ivs_p (loop_vinfo))
394         {
395           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396             fprintf (vect_dump,
397                      "not vectorized: can't create epilog loop 1.");
398           return false;
399         }
400       if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
401         {
402           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
403             fprintf (vect_dump,
404                      "not vectorized: can't create epilog loop 2.");
405           return false;
406         }
407     }
408 
409   return true;
410 }
411 
412 
413 /* Function exist_non_indexing_operands_for_use_p
414 
415    USE is one of the uses attached to STMT. Check if USE is
416    used in STMT for anything other than indexing an array.  */
417 
418 static bool
exist_non_indexing_operands_for_use_p(tree use,tree stmt)419 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
420 {
421   tree operand;
422   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
423 
424   /* USE corresponds to some operand in STMT. If there is no data
425      reference in STMT, then any operand that corresponds to USE
426      is not indexing an array.  */
427   if (!STMT_VINFO_DATA_REF (stmt_info))
428     return true;
429 
430   /* STMT has a data_ref. FORNOW this means that its of one of
431      the following forms:
432      -1- ARRAY_REF = var
433      -2- var = ARRAY_REF
434      (This should have been verified in analyze_data_refs).
435 
436      'var' in the second case corresponds to a def, not a use,
437      so USE cannot correspond to any operands that are not used
438      for array indexing.
439 
440      Therefore, all we need to check is if STMT falls into the
441      first case, and whether var corresponds to USE.  */
442 
443   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
444     return false;
445 
446   operand = TREE_OPERAND (stmt, 1);
447 
448   if (TREE_CODE (operand) != SSA_NAME)
449     return false;
450 
451   if (operand == use)
452     return true;
453 
454   return false;
455 }
456 
457 
458 /* Function vect_analyze_scalar_cycles.
459 
460    Examine the cross iteration def-use cycles of scalar variables, by
461    analyzing the loop (scalar) PHIs; Classify each cycle as one of the
462    following: invariant, induction, reduction, unknown.
463 
464    Some forms of scalar cycles are not yet supported.
465 
466    Example1: reduction: (unsupported yet)
467 
468               loop1:
469               for (i=0; i<N; i++)
470                  sum += a[i];
471 
472    Example2: induction: (unsupported yet)
473 
474               loop2:
475               for (i=0; i<N; i++)
476                  a[i] = i;
477 
478    Note: the following loop *is* vectorizable:
479 
480               loop3:
481               for (i=0; i<N; i++)
482                  a[i] = b[i];
483 
484          even though it has a def-use cycle caused by the induction variable i:
485 
486               loop: i_2 = PHI (i_0, i_1)
487                     a[i_2] = ...;
488                     i_1 = i_2 + 1;
489                     GOTO loop;
490 
491          because the def-use cycle in loop3 is considered "not relevant" - i.e.,
492          it does not need to be vectorized because it is only used for array
493          indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
494          loop2 on the other hand is relevant (it is being written to memory).
495 */
496 
497 static void
vect_analyze_scalar_cycles(loop_vec_info loop_vinfo)498 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
499 {
500   tree phi;
501   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
502   basic_block bb = loop->header;
503   tree dummy;
504 
505   if (vect_print_dump_info (REPORT_DETAILS))
506     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
507 
508   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
509     {
510       tree access_fn = NULL;
511       tree def = PHI_RESULT (phi);
512       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
513       tree reduc_stmt;
514 
515       if (vect_print_dump_info (REPORT_DETAILS))
516 	{
517           fprintf (vect_dump, "Analyze phi: ");
518           print_generic_expr (vect_dump, phi, TDF_SLIM);
519 	}
520 
521       /* Skip virtual phi's. The data dependences that are associated with
522          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
523 
524       if (!is_gimple_reg (SSA_NAME_VAR (def)))
525 	{
526 	  if (vect_print_dump_info (REPORT_DETAILS))
527 	    fprintf (vect_dump, "virtual phi. skip.");
528 	  continue;
529 	}
530 
531       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
532 
533       /* Analyze the evolution function.  */
534 
535       access_fn = analyze_scalar_evolution (loop, def);
536 
537       if (!access_fn)
538 	continue;
539 
540       if (vect_print_dump_info (REPORT_DETAILS))
541         {
542            fprintf (vect_dump, "Access function of PHI: ");
543            print_generic_expr (vect_dump, access_fn, TDF_SLIM);
544         }
545 
546       if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
547 	{
548 	  if (vect_print_dump_info (REPORT_DETAILS))
549 	    fprintf (vect_dump, "Detected induction.");
550 	  STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
551           continue;
552 	}
553 
554       /* TODO: handle invariant phis  */
555 
556       reduc_stmt = vect_is_simple_reduction (loop, phi);
557       if (reduc_stmt)
558         {
559           if (vect_print_dump_info (REPORT_DETAILS))
560             fprintf (vect_dump, "Detected reduction.");
561           STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
562           STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
563                                                         vect_reduction_def;
564         }
565       else
566         if (vect_print_dump_info (REPORT_DETAILS))
567           fprintf (vect_dump, "Unknown def-use cycle pattern.");
568 
569     }
570 
571   return;
572 }
573 
574 
575 /* Function vect_analyze_data_ref_dependence.
576 
577    Return TRUE if there (might) exist a dependence between a memory-reference
578    DRA and a memory-reference DRB.  */
579 
580 static bool
vect_analyze_data_ref_dependence(struct data_dependence_relation * ddr,loop_vec_info loop_vinfo)581 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
582                                   loop_vec_info loop_vinfo)
583 {
584   unsigned int i;
585   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
586   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
587   struct data_reference *dra = DDR_A (ddr);
588   struct data_reference *drb = DDR_B (ddr);
589   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
590   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
591   lambda_vector dist_v;
592   unsigned int loop_depth;
593 
594   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
595     return false;
596 
597   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
598     {
599       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
600         {
601           fprintf (vect_dump,
602                    "not vectorized: can't determine dependence between ");
603           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
604           fprintf (vect_dump, " and ");
605           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
606         }
607       return true;
608     }
609 
610   if (DDR_NUM_DIST_VECTS (ddr) == 0)
611     {
612       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
613         {
614           fprintf (vect_dump, "not vectorized: bad dist vector for ");
615           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
616           fprintf (vect_dump, " and ");
617           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618         }
619       return true;
620     }
621 
622   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
623   for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
624     {
625       int dist = dist_v[loop_depth];
626 
627       if (vect_print_dump_info (REPORT_DR_DETAILS))
628 	fprintf (vect_dump, "dependence distance  = %d.", dist);
629 
630       /* Same loop iteration.  */
631       if (dist % vectorization_factor == 0)
632 	{
633 	  /* Two references with distance zero have the same alignment.  */
634 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
635 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
636 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
637 	    fprintf (vect_dump, "accesses have the same alignment.");
638 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
639 	    {
640 	      fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
641 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
642 	      fprintf (vect_dump, " and ");
643 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
644 	    }
645 	  continue;
646 	}
647 
648       if (abs (dist) >= vectorization_factor)
649 	{
650 	  /* Dependence distance does not create dependence, as far as vectorization
651 	     is concerned, in this case.  */
652 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
653 	    fprintf (vect_dump, "dependence distance >= VF.");
654 	  continue;
655 	}
656 
657       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
658 	{
659 	  fprintf (vect_dump,
660 		   "not vectorized: possible dependence between data-refs ");
661 	  print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662 	  fprintf (vect_dump, " and ");
663 	  print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
664 	}
665 
666       return true;
667     }
668 
669   return false;
670 }
671 
672 
673 /* Function vect_analyze_data_ref_dependences.
674 
675    Examine all the data references in the loop, and make sure there do not
676    exist any data dependences between them.  */
677 
678 static bool
vect_analyze_data_ref_dependences(loop_vec_info loop_vinfo)679 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
680 {
681   unsigned int i;
682   VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
683   struct data_dependence_relation *ddr;
684 
685   if (vect_print_dump_info (REPORT_DETAILS))
686     fprintf (vect_dump, "=== vect_analyze_dependences ===");
687 
688   for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
689     if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
690       return false;
691 
692   return true;
693 }
694 
695 
696 /* Function vect_compute_data_ref_alignment
697 
698    Compute the misalignment of the data reference DR.
699 
700    Output:
701    1. If during the misalignment computation it is found that the data reference
702       cannot be vectorized then false is returned.
703    2. DR_MISALIGNMENT (DR) is defined.
704 
705    FOR NOW: No analysis is actually performed. Misalignment is calculated
706    only for trivial cases. TODO.  */
707 
708 static bool
vect_compute_data_ref_alignment(struct data_reference * dr)709 vect_compute_data_ref_alignment (struct data_reference *dr)
710 {
711   tree stmt = DR_STMT (dr);
712   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
713   tree ref = DR_REF (dr);
714   tree vectype;
715   tree base, base_addr;
716   bool base_aligned;
717   tree misalign;
718   tree aligned_to, alignment;
719 
720   if (vect_print_dump_info (REPORT_DETAILS))
721     fprintf (vect_dump, "vect_compute_data_ref_alignment:");
722 
723   /* Initialize misalignment to unknown.  */
724   DR_MISALIGNMENT (dr) = -1;
725 
726   misalign = DR_OFFSET_MISALIGNMENT (dr);
727   aligned_to = DR_ALIGNED_TO (dr);
728   base_addr = DR_BASE_ADDRESS (dr);
729   base = build_fold_indirect_ref (base_addr);
730   vectype = STMT_VINFO_VECTYPE (stmt_info);
731   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
732 
733   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
734       || !misalign)
735     {
736       if (vect_print_dump_info (REPORT_DETAILS))
737 	{
738 	  fprintf (vect_dump, "Unknown alignment for access: ");
739 	  print_generic_expr (vect_dump, base, TDF_SLIM);
740 	}
741       return true;
742     }
743 
744   if ((DECL_P (base)
745        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
746 				alignment) >= 0)
747       || (TREE_CODE (base_addr) == SSA_NAME
748 	  && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
749 						      TREE_TYPE (base_addr)))),
750 				   alignment) >= 0))
751     base_aligned = true;
752   else
753     base_aligned = false;
754 
755   if (!base_aligned)
756     {
757       /* Do not change the alignment of global variables if
758 	 flag_section_anchors is enabled.  */
759       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
760 	  || (TREE_STATIC (base) && flag_section_anchors))
761 	{
762 	  if (vect_print_dump_info (REPORT_DETAILS))
763 	    {
764 	      fprintf (vect_dump, "can't force alignment of ref: ");
765 	      print_generic_expr (vect_dump, ref, TDF_SLIM);
766 	    }
767 	  return true;
768 	}
769 
770       /* Force the alignment of the decl.
771 	 NOTE: This is the only change to the code we make during
772 	 the analysis phase, before deciding to vectorize the loop.  */
773       if (vect_print_dump_info (REPORT_DETAILS))
774 	fprintf (vect_dump, "force alignment");
775       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
776       DECL_USER_ALIGN (base) = 1;
777     }
778 
779   /* At this point we assume that the base is aligned.  */
780   gcc_assert (base_aligned
781 	      || (TREE_CODE (base) == VAR_DECL
782 		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
783 
784   /* Modulo alignment.  */
785   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
786 
787   if (!host_integerp (misalign, 1))
788     {
789       /* Negative or overflowed misalignment value.  */
790       if (vect_print_dump_info (REPORT_DETAILS))
791 	fprintf (vect_dump, "unexpected misalign value");
792       return false;
793     }
794 
795   DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
796 
797   if (vect_print_dump_info (REPORT_DETAILS))
798     {
799       fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
800       print_generic_expr (vect_dump, ref, TDF_SLIM);
801     }
802 
803   return true;
804 }
805 
806 
807 /* Function vect_compute_data_refs_alignment
808 
809    Compute the misalignment of data references in the loop.
810    Return FALSE if a data reference is found that cannot be vectorized.  */
811 
812 static bool
vect_compute_data_refs_alignment(loop_vec_info loop_vinfo)813 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
814 {
815   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
816   struct data_reference *dr;
817   unsigned int i;
818 
819   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
820     if (!vect_compute_data_ref_alignment (dr))
821       return false;
822 
823   return true;
824 }
825 
826 
827 /* Function vect_update_misalignment_for_peel
828 
829    DR - the data reference whose misalignment is to be adjusted.
830    DR_PEEL - the data reference whose misalignment is being made
831              zero in the vector loop by the peel.
832    NPEEL - the number of iterations in the peel loop if the misalignment
833            of DR_PEEL is known at compile time.  */
834 
835 static void
vect_update_misalignment_for_peel(struct data_reference * dr,struct data_reference * dr_peel,int npeel)836 vect_update_misalignment_for_peel (struct data_reference *dr,
837                                    struct data_reference *dr_peel, int npeel)
838 {
839   unsigned int i;
840   int drsize;
841   VEC(dr_p,heap) *same_align_drs;
842   struct data_reference *current_dr;
843 
844   if (known_alignment_for_access_p (dr)
845       && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
846     {
847       DR_MISALIGNMENT (dr) = 0;
848       return;
849     }
850 
851   /* It can be assumed that the data refs with the same alignment as dr_peel
852      are aligned in the vector loop.  */
853   same_align_drs
854     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
855   for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
856     {
857       if (current_dr != dr)
858         continue;
859       gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
860       DR_MISALIGNMENT (dr) = 0;
861       return;
862     }
863 
864   if (known_alignment_for_access_p (dr)
865       && known_alignment_for_access_p (dr_peel))
866     {
867       drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
868       DR_MISALIGNMENT (dr) += npeel * drsize;
869       DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
870       return;
871     }
872 
873   DR_MISALIGNMENT (dr) = -1;
874 }
875 
876 
877 /* Function vect_verify_datarefs_alignment
878 
879    Return TRUE if all data references in the loop can be
880    handled with respect to alignment.  */
881 
882 static bool
vect_verify_datarefs_alignment(loop_vec_info loop_vinfo)883 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
884 {
885   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
886   struct data_reference *dr;
887   enum dr_alignment_support supportable_dr_alignment;
888   unsigned int i;
889 
890   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
891     {
892       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
893       if (!supportable_dr_alignment)
894         {
895           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
896             {
897               if (DR_IS_READ (dr))
898                 fprintf (vect_dump,
899                          "not vectorized: unsupported unaligned load.");
900               else
901                 fprintf (vect_dump,
902                          "not vectorized: unsupported unaligned store.");
903             }
904           return false;
905         }
906       if (supportable_dr_alignment != dr_aligned
907           && vect_print_dump_info (REPORT_ALIGNMENT))
908         fprintf (vect_dump, "Vectorizing an unaligned access.");
909     }
910   return true;
911 }
912 
913 
914 /* Function vect_enhance_data_refs_alignment
915 
916    This pass will use loop versioning and loop peeling in order to enhance
917    the alignment of data references in the loop.
918 
919    FOR NOW: we assume that whatever versioning/peeling takes place, only the
920    original loop is to be vectorized; Any other loops that are created by
921    the transformations performed in this pass - are not supposed to be
922    vectorized. This restriction will be relaxed.
923 
924    This pass will require a cost model to guide it whether to apply peeling
925    or versioning or a combination of the two. For example, the scheme that
926    intel uses when given a loop with several memory accesses, is as follows:
927    choose one memory access ('p') which alignment you want to force by doing
928    peeling. Then, either (1) generate a loop in which 'p' is aligned and all
929    other accesses are not necessarily aligned, or (2) use loop versioning to
930    generate one loop in which all accesses are aligned, and another loop in
931    which only 'p' is necessarily aligned.
932 
933    ("Automatic Intra-Register Vectorization for the Intel Architecture",
934    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
935    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
936 
937    Devising a cost model is the most critical aspect of this work. It will
938    guide us on which access to peel for, whether to use loop versioning, how
939    many versions to create, etc. The cost model will probably consist of
940    generic considerations as well as target specific considerations (on
941    powerpc for example, misaligned stores are more painful than misaligned
942    loads).
943 
944    Here are the general steps involved in alignment enhancements:
945 
946      -- original loop, before alignment analysis:
947 	for (i=0; i<N; i++){
948 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
949 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
950 	}
951 
952      -- After vect_compute_data_refs_alignment:
953 	for (i=0; i<N; i++){
954 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
955 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
956 	}
957 
958      -- Possibility 1: we do loop versioning:
959      if (p is aligned) {
960 	for (i=0; i<N; i++){	# loop 1A
961 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
962 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
963 	}
964      }
965      else {
966 	for (i=0; i<N; i++){	# loop 1B
967 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
968 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
969 	}
970      }
971 
972      -- Possibility 2: we do loop peeling:
973      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
974 	x = q[i];
975 	p[i] = y;
976      }
977      for (i = 3; i < N; i++){	# loop 2A
978 	x = q[i];			# DR_MISALIGNMENT(q) = 0
979 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
980      }
981 
982      -- Possibility 3: combination of loop peeling and versioning:
983      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
984 	x = q[i];
985 	p[i] = y;
986      }
987      if (p is aligned) {
988 	for (i = 3; i<N; i++){	# loop 3A
989 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
990 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
991 	}
992      }
993      else {
994 	for (i = 3; i<N; i++){	# loop 3B
995 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
996 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
997 	}
998      }
999 
1000      These loops are later passed to loop_transform to be vectorized. The
1001      vectorizer will use the alignment information to guide the transformation
1002      (whether to generate regular loads/stores, or with special handling for
1003      misalignment).  */
1004 
1005 static bool
vect_enhance_data_refs_alignment(loop_vec_info loop_vinfo)1006 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1007 {
1008   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1009   enum dr_alignment_support supportable_dr_alignment;
1010   struct data_reference *dr0 = NULL;
1011   struct data_reference *dr;
1012   unsigned int i;
1013   bool do_peeling = false;
1014   bool do_versioning = false;
1015   bool stat;
1016 
1017   /* While cost model enhancements are expected in the future, the high level
1018      view of the code at this time is as follows:
1019 
1020      A) If there is a misaligned write then see if peeling to align this write
1021         can make all data references satisfy vect_supportable_dr_alignment.
1022         If so, update data structures as needed and return true.  Note that
1023         at this time vect_supportable_dr_alignment is known to return false
1024         for a a misaligned write.
1025 
1026      B) If peeling wasn't possible and there is a data reference with an
1027         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1028         then see if loop versioning checks can be used to make all data
1029         references satisfy vect_supportable_dr_alignment.  If so, update
1030         data structures as needed and return true.
1031 
1032      C) If neither peeling nor versioning were successful then return false if
1033         any data reference does not satisfy vect_supportable_dr_alignment.
1034 
1035      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1036 
1037      Note, Possibility 3 above (which is peeling and versioning together) is not
1038      being done at this time.  */
1039 
1040   /* (1) Peeling to force alignment.  */
1041 
1042   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1043      Considerations:
1044      + How many accesses will become aligned due to the peeling
1045      - How many accesses will become unaligned due to the peeling,
1046        and the cost of misaligned accesses.
1047      - The cost of peeling (the extra runtime checks, the increase
1048        in code size).
1049 
1050      The scheme we use FORNOW: peel to force the alignment of the first
1051      misaligned store in the loop.
1052      Rationale: misaligned stores are not yet supported.
1053 
1054      TODO: Use a cost model.  */
1055 
1056   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1057     if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1058       {
1059 	dr0 = dr;
1060 	do_peeling = true;
1061 	break;
1062       }
1063 
1064   /* Often peeling for alignment will require peeling for loop-bound, which in
1065      turn requires that we know how to adjust the loop ivs after the loop.  */
1066   if (!vect_can_advance_ivs_p (loop_vinfo))
1067     do_peeling = false;
1068 
1069   if (do_peeling)
1070     {
1071       int mis;
1072       int npeel = 0;
1073 
1074       if (known_alignment_for_access_p (dr0))
1075         {
1076           /* Since it's known at compile time, compute the number of iterations
1077              in the peeled loop (the peeling factor) for use in updating
1078              DR_MISALIGNMENT values.  The peeling factor is the vectorization
1079              factor minus the misalignment as an element count.  */
1080           mis = DR_MISALIGNMENT (dr0);
1081           mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1082           npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1083         }
1084 
1085       /* Ensure that all data refs can be vectorized after the peel.  */
1086       for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1087         {
1088           int save_misalignment;
1089 
1090 	  if (dr == dr0)
1091 	    continue;
1092 
1093 	  save_misalignment = DR_MISALIGNMENT (dr);
1094 	  vect_update_misalignment_for_peel (dr, dr0, npeel);
1095 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1096 	  DR_MISALIGNMENT (dr) = save_misalignment;
1097 
1098 	  if (!supportable_dr_alignment)
1099 	    {
1100 	      do_peeling = false;
1101 	      break;
1102 	    }
1103 	}
1104 
1105       if (do_peeling)
1106         {
1107           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1108              If the misalignment of DR_i is identical to that of dr0 then set
1109              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1110              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1111              by the peeling factor times the element size of DR_i (MOD the
1112              vectorization factor times the size).  Otherwise, the
1113              misalignment of DR_i must be set to unknown.  */
1114 	  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1115 	    if (dr != dr0)
1116 	      vect_update_misalignment_for_peel (dr, dr0, npeel);
1117 
1118           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1119           LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1120           DR_MISALIGNMENT (dr0) = 0;
1121 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1122             fprintf (vect_dump, "Alignment of access forced using peeling.");
1123 
1124           if (vect_print_dump_info (REPORT_DETAILS))
1125             fprintf (vect_dump, "Peeling for alignment will be applied.");
1126 
1127 	  stat = vect_verify_datarefs_alignment (loop_vinfo);
1128 	  gcc_assert (stat);
1129           return stat;
1130         }
1131     }
1132 
1133 
1134   /* (2) Versioning to force alignment.  */
1135 
1136   /* Try versioning if:
1137      1) flag_tree_vect_loop_version is TRUE
1138      2) optimize_size is FALSE
1139      3) there is at least one unsupported misaligned data ref with an unknown
1140         misalignment, and
1141      4) all misaligned data refs with a known misalignment are supported, and
1142      5) the number of runtime alignment checks is within reason.  */
1143 
1144   do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1145 
1146   if (do_versioning)
1147     {
1148       for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1149         {
1150           if (aligned_access_p (dr))
1151             continue;
1152 
1153           supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1154 
1155           if (!supportable_dr_alignment)
1156             {
1157               tree stmt;
1158               int mask;
1159               tree vectype;
1160 
1161               if (known_alignment_for_access_p (dr)
1162                   || VEC_length (tree,
1163                                  LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1164                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1165                 {
1166                   do_versioning = false;
1167                   break;
1168                 }
1169 
1170               stmt = DR_STMT (dr);
1171               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1172               gcc_assert (vectype);
1173 
1174               /* The rightmost bits of an aligned address must be zeros.
1175                  Construct the mask needed for this test.  For example,
1176                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1177                  mask must be 15 = 0xf. */
1178               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1179 
1180               /* FORNOW: use the same mask to test all potentially unaligned
1181                  references in the loop.  The vectorizer currently supports
1182                  a single vector size, see the reference to
1183                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1184                  vectorization factor is computed.  */
1185               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1186                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1187               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1188               VEC_safe_push (tree, heap,
1189                              LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1190                              DR_STMT (dr));
1191             }
1192         }
1193 
1194       /* Versioning requires at least one misaligned data reference.  */
1195       if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1196         do_versioning = false;
1197       else if (!do_versioning)
1198         VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1199     }
1200 
1201   if (do_versioning)
1202     {
1203       VEC(tree,heap) *may_misalign_stmts
1204         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1205       tree stmt;
1206 
1207       /* It can now be assumed that the data references in the statements
1208          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1209          of the loop being vectorized.  */
1210       for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1211         {
1212           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1213           dr = STMT_VINFO_DATA_REF (stmt_info);
1214           DR_MISALIGNMENT (dr) = 0;
1215 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1216             fprintf (vect_dump, "Alignment of access forced using versioning.");
1217         }
1218 
1219       if (vect_print_dump_info (REPORT_DETAILS))
1220         fprintf (vect_dump, "Versioning for alignment will be applied.");
1221 
1222       /* Peeling and versioning can't be done together at this time.  */
1223       gcc_assert (! (do_peeling && do_versioning));
1224 
1225       stat = vect_verify_datarefs_alignment (loop_vinfo);
1226       gcc_assert (stat);
1227       return stat;
1228     }
1229 
1230   /* This point is reached if neither peeling nor versioning is being done.  */
1231   gcc_assert (! (do_peeling || do_versioning));
1232 
1233   stat = vect_verify_datarefs_alignment (loop_vinfo);
1234   return stat;
1235 }
1236 
1237 
1238 /* Function vect_analyze_data_refs_alignment
1239 
1240    Analyze the alignment of the data-references in the loop.
1241    Return FALSE if a data reference is found that cannot be vectorized.  */
1242 
1243 static bool
vect_analyze_data_refs_alignment(loop_vec_info loop_vinfo)1244 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1245 {
1246   if (vect_print_dump_info (REPORT_DETAILS))
1247     fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1248 
1249   if (!vect_compute_data_refs_alignment (loop_vinfo))
1250     {
1251       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1252 	fprintf (vect_dump,
1253 		 "not vectorized: can't calculate alignment for data ref.");
1254       return false;
1255     }
1256 
1257   return true;
1258 }
1259 
1260 
1261 /* Function vect_analyze_data_ref_access.
1262 
1263    Analyze the access pattern of the data-reference DR. For now, a data access
1264    has to be consecutive to be considered vectorizable.  */
1265 
1266 static bool
vect_analyze_data_ref_access(struct data_reference * dr)1267 vect_analyze_data_ref_access (struct data_reference *dr)
1268 {
1269   tree step = DR_STEP (dr);
1270   tree scalar_type = TREE_TYPE (DR_REF (dr));
1271 
1272   if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1273     {
1274       if (vect_print_dump_info (REPORT_DETAILS))
1275 	fprintf (vect_dump, "not consecutive access");
1276       return false;
1277     }
1278   return true;
1279 }
1280 
1281 
1282 /* Function vect_analyze_data_ref_accesses.
1283 
1284    Analyze the access pattern of all the data references in the loop.
1285 
1286    FORNOW: the only access pattern that is considered vectorizable is a
1287 	   simple step 1 (consecutive) access.
1288 
1289    FORNOW: handle only arrays and pointer accesses.  */
1290 
1291 static bool
vect_analyze_data_ref_accesses(loop_vec_info loop_vinfo)1292 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1293 {
1294   unsigned int i;
1295   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1296   struct data_reference *dr;
1297 
1298   if (vect_print_dump_info (REPORT_DETAILS))
1299     fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1300 
1301   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1302     if (!vect_analyze_data_ref_access (dr))
1303       {
1304 	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1305 	  fprintf (vect_dump, "not vectorized: complicated access pattern.");
1306 	return false;
1307       }
1308 
1309   return true;
1310 }
1311 
1312 
1313 /* Function vect_analyze_data_refs.
1314 
1315   Find all the data references in the loop.
1316 
1317    The general structure of the analysis of data refs in the vectorizer is as
1318    follows:
1319    1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1320       find and analyze all data-refs in the loop and their dependences.
1321    2- vect_analyze_dependences(): apply dependence testing using ddrs.
1322    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1323    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1324 
1325 */
1326 
1327 static bool
vect_analyze_data_refs(loop_vec_info loop_vinfo)1328 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1329 {
1330   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1331   unsigned int i;
1332   VEC (data_reference_p, heap) *datarefs;
1333   struct data_reference *dr;
1334   tree scalar_type;
1335 
1336   if (vect_print_dump_info (REPORT_DETAILS))
1337     fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1338 
1339   compute_data_dependences_for_loop (loop, false,
1340                                      &LOOP_VINFO_DATAREFS (loop_vinfo),
1341                                      &LOOP_VINFO_DDRS (loop_vinfo));
1342 
1343   /* Go through the data-refs, check that the analysis succeeded. Update pointer
1344      from stmt_vec_info struct to DR and vectype.  */
1345   datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1346 
1347   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1348     {
1349       tree stmt;
1350       stmt_vec_info stmt_info;
1351 
1352       if (!dr || !DR_REF (dr))
1353         {
1354           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1355 	    fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1356           return false;
1357         }
1358 
1359       /* Update DR field in stmt_vec_info struct.  */
1360       stmt = DR_STMT (dr);
1361       stmt_info = vinfo_for_stmt (stmt);
1362 
1363       if (STMT_VINFO_DATA_REF (stmt_info))
1364         {
1365           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1366             {
1367               fprintf (vect_dump,
1368                        "not vectorized: more than one data ref in stmt: ");
1369               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1370             }
1371           return false;
1372         }
1373       STMT_VINFO_DATA_REF (stmt_info) = dr;
1374 
1375       /* Check that analysis of the data-ref succeeded.  */
1376       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1377           || !DR_STEP (dr))
1378         {
1379           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1380             {
1381               fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1382               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1383             }
1384           return false;
1385         }
1386       if (!DR_MEMTAG (dr))
1387         {
1388           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1389             {
1390               fprintf (vect_dump, "not vectorized: no memory tag for ");
1391               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1392             }
1393           return false;
1394         }
1395 
1396       /* Set vectype for STMT.  */
1397       scalar_type = TREE_TYPE (DR_REF (dr));
1398       STMT_VINFO_VECTYPE (stmt_info) =
1399                 get_vectype_for_scalar_type (scalar_type);
1400       if (!STMT_VINFO_VECTYPE (stmt_info))
1401         {
1402           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1403             {
1404               fprintf (vect_dump,
1405                        "not vectorized: no vectype for stmt: ");
1406               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1407               fprintf (vect_dump, " scalar_type: ");
1408               print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1409             }
1410           return false;
1411         }
1412     }
1413 
1414   return true;
1415 }
1416 
1417 
1418 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
1419 
1420 /* Function vect_mark_relevant.
1421 
1422    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
1423 
1424 static void
vect_mark_relevant(VEC (tree,heap)** worklist,tree stmt,bool relevant_p,bool live_p)1425 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1426 		    bool relevant_p, bool live_p)
1427 {
1428   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1429   bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1430   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1431 
1432   if (vect_print_dump_info (REPORT_DETAILS))
1433     fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1434 
1435   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1436     {
1437       tree pattern_stmt;
1438 
1439       /* This is the last stmt in a sequence that was detected as a
1440          pattern that can potentially be vectorized.  Don't mark the stmt
1441          as relevant/live because it's not going to vectorized.
1442          Instead mark the pattern-stmt that replaces it.  */
1443       if (vect_print_dump_info (REPORT_DETAILS))
1444         fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
1445       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1446       stmt_info = vinfo_for_stmt (pattern_stmt);
1447       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1448       save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1449       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1450       stmt = pattern_stmt;
1451     }
1452 
1453   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1454   STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1455 
1456   if (TREE_CODE (stmt) == PHI_NODE)
1457     /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1458        or live will fail vectorization later on.  */
1459     return;
1460 
1461   if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1462       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1463     {
1464       if (vect_print_dump_info (REPORT_DETAILS))
1465         fprintf (vect_dump, "already marked relevant/live.");
1466       return;
1467     }
1468 
1469   VEC_safe_push (tree, heap, *worklist, stmt);
1470 }
1471 
1472 
1473 /* Function vect_stmt_relevant_p.
1474 
1475    Return true if STMT in loop that is represented by LOOP_VINFO is
1476    "relevant for vectorization".
1477 
1478    A stmt is considered "relevant for vectorization" if:
1479    - it has uses outside the loop.
1480    - it has vdefs (it alters memory).
1481    - control stmts in the loop (except for the exit condition).
1482 
1483    CHECKME: what other side effects would the vectorizer allow?  */
1484 
1485 static bool
vect_stmt_relevant_p(tree stmt,loop_vec_info loop_vinfo,bool * relevant_p,bool * live_p)1486 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1487 		      bool *relevant_p, bool *live_p)
1488 {
1489   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1490   ssa_op_iter op_iter;
1491   imm_use_iterator imm_iter;
1492   use_operand_p use_p;
1493   def_operand_p def_p;
1494 
1495   *relevant_p = false;
1496   *live_p = false;
1497 
1498   /* cond stmt other than loop exit cond.  */
1499   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1500     *relevant_p = true;
1501 
1502   /* changing memory.  */
1503   if (TREE_CODE (stmt) != PHI_NODE)
1504     if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1505       {
1506 	if (vect_print_dump_info (REPORT_DETAILS))
1507 	  fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1508 	*relevant_p = true;
1509       }
1510 
1511   /* uses outside the loop.  */
1512   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1513     {
1514       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1515 	{
1516 	  basic_block bb = bb_for_stmt (USE_STMT (use_p));
1517 	  if (!flow_bb_inside_loop_p (loop, bb))
1518 	    {
1519 	      if (vect_print_dump_info (REPORT_DETAILS))
1520 		fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1521 
1522 	      /* We expect all such uses to be in the loop exit phis
1523 		 (because of loop closed form)   */
1524 	      gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1525 	      gcc_assert (bb == loop->single_exit->dest);
1526 
1527               *live_p = true;
1528 	    }
1529 	}
1530     }
1531 
1532   return (*live_p || *relevant_p);
1533 }
1534 
1535 
1536 /* Function vect_mark_stmts_to_be_vectorized.
1537 
1538    Not all stmts in the loop need to be vectorized. For example:
1539 
1540      for i...
1541        for j...
1542    1.    T0 = i + j
1543    2.	 T1 = a[T0]
1544 
1545    3.    j = j + 1
1546 
1547    Stmt 1 and 3 do not need to be vectorized, because loop control and
1548    addressing of vectorized data-refs are handled differently.
1549 
1550    This pass detects such stmts.  */
1551 
1552 static bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)1553 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1554 {
1555   VEC(tree,heap) *worklist;
1556   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1557   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1558   unsigned int nbbs = loop->num_nodes;
1559   block_stmt_iterator si;
1560   tree stmt, use;
1561   stmt_ann_t ann;
1562   ssa_op_iter iter;
1563   unsigned int i;
1564   stmt_vec_info stmt_vinfo;
1565   basic_block bb;
1566   tree phi;
1567   bool relevant_p, live_p;
1568   tree def, def_stmt;
1569   enum vect_def_type dt;
1570 
1571   if (vect_print_dump_info (REPORT_DETAILS))
1572     fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1573 
1574   worklist = VEC_alloc (tree, heap, 64);
1575 
1576   /* 1. Init worklist.  */
1577 
1578   bb = loop->header;
1579   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1580     {
1581       if (vect_print_dump_info (REPORT_DETAILS))
1582         {
1583           fprintf (vect_dump, "init: phi relevant? ");
1584           print_generic_expr (vect_dump, phi, TDF_SLIM);
1585         }
1586 
1587       if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1588 	vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1589     }
1590 
1591   for (i = 0; i < nbbs; i++)
1592     {
1593       bb = bbs[i];
1594       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1595 	{
1596 	  stmt = bsi_stmt (si);
1597 
1598 	  if (vect_print_dump_info (REPORT_DETAILS))
1599 	    {
1600 	      fprintf (vect_dump, "init: stmt relevant? ");
1601 	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
1602 	    }
1603 
1604 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1605             vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1606 	}
1607     }
1608 
1609 
1610   /* 2. Process_worklist */
1611 
1612   while (VEC_length (tree, worklist) > 0)
1613     {
1614       stmt = VEC_pop (tree, worklist);
1615 
1616       if (vect_print_dump_info (REPORT_DETAILS))
1617 	{
1618           fprintf (vect_dump, "worklist: examine stmt: ");
1619           print_generic_expr (vect_dump, stmt, TDF_SLIM);
1620 	}
1621 
1622       /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1623          in the loop, mark the stmt that defines it (DEF_STMT) as
1624          relevant/irrelevant and live/dead according to the liveness and
1625          relevance properties of STMT.
1626        */
1627 
1628       gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1629 
1630       ann = stmt_ann (stmt);
1631       stmt_vinfo = vinfo_for_stmt (stmt);
1632 
1633       relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1634       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1635 
1636       /* Generally, the liveness and relevance properties of STMT are
1637          propagated to the DEF_STMTs of its USEs:
1638              STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1639              STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1640 
1641          Exceptions:
1642 
1643 	 (case 1)
1644            If USE is used only for address computations (e.g. array indexing),
1645            which does not need to be directly vectorized, then the
1646            liveness/relevance of the respective DEF_STMT is left unchanged.
1647 
1648 	 (case 2)
1649            If STMT has been identified as defining a reduction variable, then
1650 	   we have two cases:
1651 	   (case 2.1)
1652 	     The last use of STMT is the reduction-variable, which is defined
1653 	     by a loop-header-phi. We don't want to mark the phi as live or
1654 	     relevant (because it does not need to be vectorized, it is handled
1655              as part of the vectorization of the reduction), so in this case we
1656 	     skip the call to vect_mark_relevant.
1657 	   (case 2.2)
1658 	     The rest of the uses of STMT are defined in the loop body. For
1659              the def_stmt of these uses we want to set liveness/relevance
1660              as follows:
1661                STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1662                STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1663              because even though STMT is classified as live (since it defines a
1664              value that is used across loop iterations) and irrelevant (since it
1665              is not used inside the loop), it will be vectorized, and therefore
1666              the corresponding DEF_STMTs need to marked as relevant.
1667        */
1668 
1669       /* case 2.2:  */
1670       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1671         {
1672           gcc_assert (!relevant_p && live_p);
1673           relevant_p = true;
1674           live_p = false;
1675         }
1676 
1677       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1678 	{
1679 	  /* case 1: we are only interested in uses that need to be vectorized.
1680 	     Uses that are used for address computation are not considered
1681 	     relevant.
1682 	   */
1683 	  if (!exist_non_indexing_operands_for_use_p (use, stmt))
1684 	    continue;
1685 
1686 	  if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1687             {
1688               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1689                 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1690 	      VEC_free (tree, heap, worklist);
1691               return false;
1692             }
1693 
1694 	  if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1695 	    continue;
1696 
1697           if (vect_print_dump_info (REPORT_DETAILS))
1698             {
1699               fprintf (vect_dump, "worklist: examine use %d: ", i);
1700               print_generic_expr (vect_dump, use, TDF_SLIM);
1701             }
1702 
1703 	  bb = bb_for_stmt (def_stmt);
1704           if (!flow_bb_inside_loop_p (loop, bb))
1705             continue;
1706 
1707 	  /* case 2.1: the reduction-use does not mark the defining-phi
1708 	     as relevant.  */
1709 	  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1710 	      && TREE_CODE (def_stmt) == PHI_NODE)
1711 	    continue;
1712 
1713 	  vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1714 	}
1715     }				/* while worklist */
1716 
1717   VEC_free (tree, heap, worklist);
1718   return true;
1719 }
1720 
1721 
1722 /* Function vect_can_advance_ivs_p
1723 
1724    In case the number of iterations that LOOP iterates is unknown at compile
1725    time, an epilog loop will be generated, and the loop induction variables
1726    (IVs) will be "advanced" to the value they are supposed to take just before
1727    the epilog loop.  Here we check that the access function of the loop IVs
1728    and the expression that represents the loop bound are simple enough.
1729    These restrictions will be relaxed in the future.  */
1730 
1731 static bool
vect_can_advance_ivs_p(loop_vec_info loop_vinfo)1732 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1733 {
1734   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1735   basic_block bb = loop->header;
1736   tree phi;
1737 
1738   /* Analyze phi functions of the loop header.  */
1739 
1740   if (vect_print_dump_info (REPORT_DETAILS))
1741     fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1742 
1743   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1744     {
1745       tree access_fn = NULL;
1746       tree evolution_part;
1747 
1748       if (vect_print_dump_info (REPORT_DETAILS))
1749 	{
1750           fprintf (vect_dump, "Analyze phi: ");
1751           print_generic_expr (vect_dump, phi, TDF_SLIM);
1752 	}
1753 
1754       /* Skip virtual phi's. The data dependences that are associated with
1755          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1756 
1757       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1758 	{
1759 	  if (vect_print_dump_info (REPORT_DETAILS))
1760 	    fprintf (vect_dump, "virtual phi. skip.");
1761 	  continue;
1762 	}
1763 
1764       /* Skip reduction phis.  */
1765 
1766       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1767         {
1768           if (vect_print_dump_info (REPORT_DETAILS))
1769             fprintf (vect_dump, "reduc phi. skip.");
1770           continue;
1771         }
1772 
1773       /* Analyze the evolution function.  */
1774 
1775       access_fn = instantiate_parameters
1776 	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1777 
1778       if (!access_fn)
1779 	{
1780 	  if (vect_print_dump_info (REPORT_DETAILS))
1781 	    fprintf (vect_dump, "No Access function.");
1782 	  return false;
1783 	}
1784 
1785       if (vect_print_dump_info (REPORT_DETAILS))
1786         {
1787 	  fprintf (vect_dump, "Access function of PHI: ");
1788 	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1789         }
1790 
1791       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1792 
1793       if (evolution_part == NULL_TREE)
1794         {
1795 	  if (vect_print_dump_info (REPORT_DETAILS))
1796 	    fprintf (vect_dump, "No evolution.");
1797 	  return false;
1798         }
1799 
1800       /* FORNOW: We do not transform initial conditions of IVs
1801 	 which evolution functions are a polynomial of degree >= 2.  */
1802 
1803       if (tree_is_chrec (evolution_part))
1804 	return false;
1805     }
1806 
1807   return true;
1808 }
1809 
1810 
1811 /* Function vect_get_loop_niters.
1812 
1813    Determine how many iterations the loop is executed.
1814    If an expression that represents the number of iterations
1815    can be constructed, place it in NUMBER_OF_ITERATIONS.
1816    Return the loop exit condition.  */
1817 
1818 static tree
vect_get_loop_niters(struct loop * loop,tree * number_of_iterations)1819 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1820 {
1821   tree niters;
1822 
1823   if (vect_print_dump_info (REPORT_DETAILS))
1824     fprintf (vect_dump, "=== get_loop_niters ===");
1825 
1826   niters = number_of_iterations_in_loop (loop);
1827 
1828   if (niters != NULL_TREE
1829       && niters != chrec_dont_know)
1830     {
1831       *number_of_iterations = niters;
1832 
1833       if (vect_print_dump_info (REPORT_DETAILS))
1834 	{
1835 	  fprintf (vect_dump, "==> get_loop_niters:" );
1836 	  print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1837 	}
1838     }
1839 
1840   return get_loop_exit_condition (loop);
1841 }
1842 
1843 
1844 /* Function vect_analyze_loop_form.
1845 
1846    Verify the following restrictions (some may be relaxed in the future):
1847    - it's an inner-most loop
1848    - number of BBs = 2 (which are the loop header and the latch)
1849    - the loop has a pre-header
1850    - the loop has a single entry and exit
1851    - the loop exit condition is simple enough, and the number of iterations
1852      can be analyzed (a countable loop).  */
1853 
1854 static loop_vec_info
vect_analyze_loop_form(struct loop * loop)1855 vect_analyze_loop_form (struct loop *loop)
1856 {
1857   loop_vec_info loop_vinfo;
1858   tree loop_cond;
1859   tree number_of_iterations = NULL;
1860 
1861   if (vect_print_dump_info (REPORT_DETAILS))
1862     fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1863 
1864   if (loop->inner)
1865     {
1866       if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1867         fprintf (vect_dump, "not vectorized: nested loop.");
1868       return NULL;
1869     }
1870 
1871   if (!loop->single_exit
1872       || loop->num_nodes != 2
1873       || EDGE_COUNT (loop->header->preds) != 2)
1874     {
1875       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1876         {
1877           if (!loop->single_exit)
1878             fprintf (vect_dump, "not vectorized: multiple exits.");
1879           else if (loop->num_nodes != 2)
1880             fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1881           else if (EDGE_COUNT (loop->header->preds) != 2)
1882             fprintf (vect_dump, "not vectorized: too many incoming edges.");
1883         }
1884 
1885       return NULL;
1886     }
1887 
1888   /* We assume that the loop exit condition is at the end of the loop. i.e,
1889      that the loop is represented as a do-while (with a proper if-guard
1890      before the loop if needed), where the loop header contains all the
1891      executable statements, and the latch is empty.  */
1892   if (!empty_block_p (loop->latch)
1893         || phi_nodes (loop->latch))
1894     {
1895       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1896         fprintf (vect_dump, "not vectorized: unexpected loop form.");
1897       return NULL;
1898     }
1899 
1900   /* Make sure there exists a single-predecessor exit bb:  */
1901   if (!single_pred_p (loop->single_exit->dest))
1902     {
1903       edge e = loop->single_exit;
1904       if (!(e->flags & EDGE_ABNORMAL))
1905 	{
1906 	  split_loop_exit_edge (e);
1907 	  if (vect_print_dump_info (REPORT_DETAILS))
1908 	    fprintf (vect_dump, "split exit edge.");
1909 	}
1910       else
1911 	{
1912 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1913 	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1914 	  return NULL;
1915 	}
1916     }
1917 
1918   if (empty_block_p (loop->header))
1919     {
1920       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1921         fprintf (vect_dump, "not vectorized: empty loop.");
1922       return NULL;
1923     }
1924 
1925   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1926   if (!loop_cond)
1927     {
1928       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1929 	fprintf (vect_dump, "not vectorized: complicated exit condition.");
1930       return NULL;
1931     }
1932 
1933   if (!number_of_iterations)
1934     {
1935       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1936 	fprintf (vect_dump,
1937 		 "not vectorized: number of iterations cannot be computed.");
1938       return NULL;
1939     }
1940 
1941   if (chrec_contains_undetermined (number_of_iterations))
1942     {
1943       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1944         fprintf (vect_dump, "Infinite number of iterations.");
1945       return false;
1946     }
1947 
1948   loop_vinfo = new_loop_vec_info (loop);
1949   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1950 
1951   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1952     {
1953       if (vect_print_dump_info (REPORT_DETAILS))
1954         {
1955           fprintf (vect_dump, "Symbolic number of iterations is ");
1956           print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1957         }
1958     }
1959   else
1960   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
1961     {
1962       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1963         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1964       return NULL;
1965     }
1966 
1967   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
1968 
1969   return loop_vinfo;
1970 }
1971 
1972 
1973 /* Function vect_analyze_loop.
1974 
1975    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1976    for it. The different analyses will record information in the
1977    loop_vec_info struct.  */
1978 loop_vec_info
vect_analyze_loop(struct loop * loop)1979 vect_analyze_loop (struct loop *loop)
1980 {
1981   bool ok;
1982   loop_vec_info loop_vinfo;
1983 
1984   if (vect_print_dump_info (REPORT_DETAILS))
1985     fprintf (vect_dump, "===== analyze_loop_nest =====");
1986 
1987   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1988 
1989   loop_vinfo = vect_analyze_loop_form (loop);
1990   if (!loop_vinfo)
1991     {
1992       if (vect_print_dump_info (REPORT_DETAILS))
1993 	fprintf (vect_dump, "bad loop form.");
1994       return NULL;
1995     }
1996 
1997   /* Find all data references in the loop (which correspond to vdefs/vuses)
1998      and analyze their evolution in the loop.
1999 
2000      FORNOW: Handle only simple, array references, which
2001      alignment can be forced, and aligned pointer-references.  */
2002 
2003   ok = vect_analyze_data_refs (loop_vinfo);
2004   if (!ok)
2005     {
2006       if (vect_print_dump_info (REPORT_DETAILS))
2007 	fprintf (vect_dump, "bad data references.");
2008       destroy_loop_vec_info (loop_vinfo);
2009       return NULL;
2010     }
2011 
2012   /* Classify all cross-iteration scalar data-flow cycles.
2013      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
2014 
2015   vect_analyze_scalar_cycles (loop_vinfo);
2016 
2017   vect_pattern_recog (loop_vinfo);
2018 
2019   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
2020 
2021   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2022   if (!ok)
2023     {
2024       if (vect_print_dump_info (REPORT_DETAILS))
2025 	fprintf (vect_dump, "unexpected pattern.");
2026       destroy_loop_vec_info (loop_vinfo);
2027       return NULL;
2028     }
2029 
2030   /* Analyze the alignment of the data-refs in the loop.
2031      Fail if a data reference is found that cannot be vectorized.  */
2032 
2033   ok = vect_analyze_data_refs_alignment (loop_vinfo);
2034   if (!ok)
2035     {
2036       if (vect_print_dump_info (REPORT_DETAILS))
2037 	fprintf (vect_dump, "bad data alignment.");
2038       destroy_loop_vec_info (loop_vinfo);
2039       return NULL;
2040     }
2041 
2042   ok = vect_determine_vectorization_factor (loop_vinfo);
2043   if (!ok)
2044     {
2045       if (vect_print_dump_info (REPORT_DETAILS))
2046         fprintf (vect_dump, "can't determine vectorization factor.");
2047       destroy_loop_vec_info (loop_vinfo);
2048       return NULL;
2049     }
2050 
2051   /* Analyze data dependences between the data-refs in the loop.
2052      FORNOW: fail at the first data dependence that we encounter.  */
2053 
2054   ok = vect_analyze_data_ref_dependences (loop_vinfo);
2055   if (!ok)
2056     {
2057       if (vect_print_dump_info (REPORT_DETAILS))
2058 	fprintf (vect_dump, "bad data dependence.");
2059       destroy_loop_vec_info (loop_vinfo);
2060       return NULL;
2061     }
2062 
2063   /* Analyze the access patterns of the data-refs in the loop (consecutive,
2064      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
2065 
2066   ok = vect_analyze_data_ref_accesses (loop_vinfo);
2067   if (!ok)
2068     {
2069       if (vect_print_dump_info (REPORT_DETAILS))
2070 	fprintf (vect_dump, "bad data access.");
2071       destroy_loop_vec_info (loop_vinfo);
2072       return NULL;
2073     }
2074 
2075   /* This pass will decide on using loop versioning and/or loop peeling in
2076      order to enhance the alignment of data references in the loop.  */
2077 
2078   ok = vect_enhance_data_refs_alignment (loop_vinfo);
2079   if (!ok)
2080     {
2081       if (vect_print_dump_info (REPORT_DETAILS))
2082 	fprintf (vect_dump, "bad data alignment.");
2083       destroy_loop_vec_info (loop_vinfo);
2084       return NULL;
2085     }
2086 
2087   /* Scan all the operations in the loop and make sure they are
2088      vectorizable.  */
2089 
2090   ok = vect_analyze_operations (loop_vinfo);
2091   if (!ok)
2092     {
2093       if (vect_print_dump_info (REPORT_DETAILS))
2094 	fprintf (vect_dump, "bad operation or unsupported loop bound.");
2095       destroy_loop_vec_info (loop_vinfo);
2096       return NULL;
2097     }
2098 
2099   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2100 
2101   return loop_vinfo;
2102 }
2103