1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "timevar.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
41
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static void vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static bool vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
54
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference *, struct data_reference *, int npeel);
67
68
69 /* Function vect_determine_vectorization_factor
70
71 Determine the vectorization factor (VF). VF is the number of data elements
72 that are operated upon in parallel in a single iteration of the vectorized
73 loop. For example, when vectorizing a loop that operates on 4byte elements,
74 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
75 elements can fit in a single vector register.
76
77 We currently support vectorization of loops in which all types operated upon
78 are of the same size. Therefore this function currently sets VF according to
79 the size of the types operated upon, and fails if there are multiple sizes
80 in the loop.
81
82 VF is also the factor by which the loop iterations are strip-mined, e.g.:
83 original loop:
84 for (i=0; i<N; i++){
85 a[i] = b[i] + c[i];
86 }
87
88 vectorized loop:
89 for (i=0; i<N; i+=VF){
90 a[i:VF] = b[i:VF] + c[i:VF];
91 }
92 */
93
94 static bool
vect_determine_vectorization_factor(loop_vec_info loop_vinfo)95 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 {
97 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99 int nbbs = loop->num_nodes;
100 block_stmt_iterator si;
101 unsigned int vectorization_factor = 0;
102 int i;
103 tree scalar_type;
104
105 if (vect_print_dump_info (REPORT_DETAILS))
106 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
107
108 for (i = 0; i < nbbs; i++)
109 {
110 basic_block bb = bbs[i];
111
112 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
113 {
114 tree stmt = bsi_stmt (si);
115 unsigned int nunits;
116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117 tree vectype;
118
119 if (vect_print_dump_info (REPORT_DETAILS))
120 {
121 fprintf (vect_dump, "==> examining statement: ");
122 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 }
124
125 gcc_assert (stmt_info);
126 /* skip stmts which do not need to be vectorized. */
127 if (!STMT_VINFO_RELEVANT_P (stmt_info)
128 && !STMT_VINFO_LIVE_P (stmt_info))
129 {
130 if (vect_print_dump_info (REPORT_DETAILS))
131 fprintf (vect_dump, "skip.");
132 continue;
133 }
134
135 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
136 {
137 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
138 {
139 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
140 print_generic_expr (vect_dump, stmt, TDF_SLIM);
141 }
142 return false;
143 }
144
145 if (STMT_VINFO_VECTYPE (stmt_info))
146 {
147 vectype = STMT_VINFO_VECTYPE (stmt_info);
148 scalar_type = TREE_TYPE (vectype);
149 }
150 else
151 {
152 if (STMT_VINFO_DATA_REF (stmt_info))
153 scalar_type =
154 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
155 else if (TREE_CODE (stmt) == MODIFY_EXPR)
156 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
157 else
158 scalar_type = TREE_TYPE (stmt);
159
160 if (vect_print_dump_info (REPORT_DETAILS))
161 {
162 fprintf (vect_dump, "get vectype for scalar type: ");
163 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
164 }
165
166 vectype = get_vectype_for_scalar_type (scalar_type);
167 if (!vectype)
168 {
169 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
170 {
171 fprintf (vect_dump,
172 "not vectorized: unsupported data-type ");
173 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
174 }
175 return false;
176 }
177 STMT_VINFO_VECTYPE (stmt_info) = vectype;
178 }
179
180 if (vect_print_dump_info (REPORT_DETAILS))
181 {
182 fprintf (vect_dump, "vectype: ");
183 print_generic_expr (vect_dump, vectype, TDF_SLIM);
184 }
185
186 nunits = TYPE_VECTOR_SUBPARTS (vectype);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "nunits = %d", nunits);
189
190 if (vectorization_factor)
191 {
192 /* FORNOW: don't allow mixed units.
193 This restriction will be relaxed in the future. */
194 if (nunits != vectorization_factor)
195 {
196 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
197 fprintf (vect_dump, "not vectorized: mixed data-types");
198 return false;
199 }
200 }
201 else
202 vectorization_factor = nunits;
203
204 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
205 * vectorization_factor == UNITS_PER_SIMD_WORD);
206 }
207 }
208
209 /* TODO: Analyze cost. Decide if worth while to vectorize. */
210
211 if (vectorization_factor <= 1)
212 {
213 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
214 fprintf (vect_dump, "not vectorized: unsupported data-type");
215 return false;
216 }
217 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
218
219 return true;
220 }
221
222
223 /* Function vect_analyze_operations.
224
225 Scan the loop stmts and make sure they are all vectorizable. */
226
227 static bool
vect_analyze_operations(loop_vec_info loop_vinfo)228 vect_analyze_operations (loop_vec_info loop_vinfo)
229 {
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
232 int nbbs = loop->num_nodes;
233 block_stmt_iterator si;
234 unsigned int vectorization_factor = 0;
235 int i;
236 bool ok;
237 tree phi;
238 stmt_vec_info stmt_info;
239 bool need_to_vectorize = false;
240
241 if (vect_print_dump_info (REPORT_DETAILS))
242 fprintf (vect_dump, "=== vect_analyze_operations ===");
243
244 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
245 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
246
247 for (i = 0; i < nbbs; i++)
248 {
249 basic_block bb = bbs[i];
250
251 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
252 {
253 stmt_info = vinfo_for_stmt (phi);
254 if (vect_print_dump_info (REPORT_DETAILS))
255 {
256 fprintf (vect_dump, "examining phi: ");
257 print_generic_expr (vect_dump, phi, TDF_SLIM);
258 }
259
260 gcc_assert (stmt_info);
261
262 if (STMT_VINFO_LIVE_P (stmt_info))
263 {
264 /* FORNOW: not yet supported. */
265 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
266 fprintf (vect_dump, "not vectorized: value used after loop.");
267 return false;
268 }
269
270 if (STMT_VINFO_RELEVANT_P (stmt_info))
271 {
272 /* Most likely a reduction-like computation that is used
273 in the loop. */
274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275 fprintf (vect_dump, "not vectorized: unsupported pattern.");
276 return false;
277 }
278 }
279
280 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
281 {
282 tree stmt = bsi_stmt (si);
283 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
284
285 if (vect_print_dump_info (REPORT_DETAILS))
286 {
287 fprintf (vect_dump, "==> examining statement: ");
288 print_generic_expr (vect_dump, stmt, TDF_SLIM);
289 }
290
291 gcc_assert (stmt_info);
292
293 /* skip stmts which do not need to be vectorized.
294 this is expected to include:
295 - the COND_EXPR which is the loop exit condition
296 - any LABEL_EXPRs in the loop
297 - computations that are used only for array indexing or loop
298 control */
299
300 if (!STMT_VINFO_RELEVANT_P (stmt_info)
301 && !STMT_VINFO_LIVE_P (stmt_info))
302 {
303 if (vect_print_dump_info (REPORT_DETAILS))
304 fprintf (vect_dump, "irrelevant.");
305 continue;
306 }
307
308 if (STMT_VINFO_RELEVANT_P (stmt_info))
309 {
310 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
311 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
312
313 ok = (vectorizable_operation (stmt, NULL, NULL)
314 || vectorizable_assignment (stmt, NULL, NULL)
315 || vectorizable_load (stmt, NULL, NULL)
316 || vectorizable_store (stmt, NULL, NULL)
317 || vectorizable_condition (stmt, NULL, NULL));
318
319 if (!ok)
320 {
321 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
322 {
323 fprintf (vect_dump,
324 "not vectorized: relevant stmt not supported: ");
325 print_generic_expr (vect_dump, stmt, TDF_SLIM);
326 }
327 return false;
328 }
329 need_to_vectorize = true;
330 }
331
332 if (STMT_VINFO_LIVE_P (stmt_info))
333 {
334 ok = vectorizable_reduction (stmt, NULL, NULL);
335
336 if (ok)
337 need_to_vectorize = true;
338 else
339 ok = vectorizable_live_operation (stmt, NULL, NULL);
340
341 if (!ok)
342 {
343 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
344 {
345 fprintf (vect_dump,
346 "not vectorized: live stmt not supported: ");
347 print_generic_expr (vect_dump, stmt, TDF_SLIM);
348 }
349 return false;
350 }
351 }
352 } /* stmts in bb */
353 } /* bbs */
354
355 /* TODO: Analyze cost. Decide if worth while to vectorize. */
356
357 /* All operations in the loop are either irrelevant (deal with loop
358 control, or dead), or only used outside the loop and can be moved
359 out of the loop (e.g. invariants, inductions). The loop can be
360 optimized away by scalar optimizations. We're better off not
361 touching this loop. */
362 if (!need_to_vectorize)
363 {
364 if (vect_print_dump_info (REPORT_DETAILS))
365 fprintf (vect_dump,
366 "All the computation can be taken out of the loop.");
367 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
368 fprintf (vect_dump,
369 "not vectorized: redundant loop. no profit to vectorize.");
370 return false;
371 }
372
373 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
374 && vect_print_dump_info (REPORT_DETAILS))
375 fprintf (vect_dump,
376 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
377 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
378
379 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
380 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
381 {
382 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
383 fprintf (vect_dump, "not vectorized: iteration count too small.");
384 return false;
385 }
386
387 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
388 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
389 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
390 {
391 if (vect_print_dump_info (REPORT_DETAILS))
392 fprintf (vect_dump, "epilog loop required.");
393 if (!vect_can_advance_ivs_p (loop_vinfo))
394 {
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396 fprintf (vect_dump,
397 "not vectorized: can't create epilog loop 1.");
398 return false;
399 }
400 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
401 {
402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
403 fprintf (vect_dump,
404 "not vectorized: can't create epilog loop 2.");
405 return false;
406 }
407 }
408
409 return true;
410 }
411
412
413 /* Function exist_non_indexing_operands_for_use_p
414
415 USE is one of the uses attached to STMT. Check if USE is
416 used in STMT for anything other than indexing an array. */
417
418 static bool
exist_non_indexing_operands_for_use_p(tree use,tree stmt)419 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
420 {
421 tree operand;
422 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
423
424 /* USE corresponds to some operand in STMT. If there is no data
425 reference in STMT, then any operand that corresponds to USE
426 is not indexing an array. */
427 if (!STMT_VINFO_DATA_REF (stmt_info))
428 return true;
429
430 /* STMT has a data_ref. FORNOW this means that its of one of
431 the following forms:
432 -1- ARRAY_REF = var
433 -2- var = ARRAY_REF
434 (This should have been verified in analyze_data_refs).
435
436 'var' in the second case corresponds to a def, not a use,
437 so USE cannot correspond to any operands that are not used
438 for array indexing.
439
440 Therefore, all we need to check is if STMT falls into the
441 first case, and whether var corresponds to USE. */
442
443 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
444 return false;
445
446 operand = TREE_OPERAND (stmt, 1);
447
448 if (TREE_CODE (operand) != SSA_NAME)
449 return false;
450
451 if (operand == use)
452 return true;
453
454 return false;
455 }
456
457
458 /* Function vect_analyze_scalar_cycles.
459
460 Examine the cross iteration def-use cycles of scalar variables, by
461 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
462 following: invariant, induction, reduction, unknown.
463
464 Some forms of scalar cycles are not yet supported.
465
466 Example1: reduction: (unsupported yet)
467
468 loop1:
469 for (i=0; i<N; i++)
470 sum += a[i];
471
472 Example2: induction: (unsupported yet)
473
474 loop2:
475 for (i=0; i<N; i++)
476 a[i] = i;
477
478 Note: the following loop *is* vectorizable:
479
480 loop3:
481 for (i=0; i<N; i++)
482 a[i] = b[i];
483
484 even though it has a def-use cycle caused by the induction variable i:
485
486 loop: i_2 = PHI (i_0, i_1)
487 a[i_2] = ...;
488 i_1 = i_2 + 1;
489 GOTO loop;
490
491 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
492 it does not need to be vectorized because it is only used for array
493 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
494 loop2 on the other hand is relevant (it is being written to memory).
495 */
496
497 static void
vect_analyze_scalar_cycles(loop_vec_info loop_vinfo)498 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
499 {
500 tree phi;
501 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
502 basic_block bb = loop->header;
503 tree dummy;
504
505 if (vect_print_dump_info (REPORT_DETAILS))
506 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
507
508 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
509 {
510 tree access_fn = NULL;
511 tree def = PHI_RESULT (phi);
512 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
513 tree reduc_stmt;
514
515 if (vect_print_dump_info (REPORT_DETAILS))
516 {
517 fprintf (vect_dump, "Analyze phi: ");
518 print_generic_expr (vect_dump, phi, TDF_SLIM);
519 }
520
521 /* Skip virtual phi's. The data dependences that are associated with
522 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
523
524 if (!is_gimple_reg (SSA_NAME_VAR (def)))
525 {
526 if (vect_print_dump_info (REPORT_DETAILS))
527 fprintf (vect_dump, "virtual phi. skip.");
528 continue;
529 }
530
531 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
532
533 /* Analyze the evolution function. */
534
535 access_fn = analyze_scalar_evolution (loop, def);
536
537 if (!access_fn)
538 continue;
539
540 if (vect_print_dump_info (REPORT_DETAILS))
541 {
542 fprintf (vect_dump, "Access function of PHI: ");
543 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
544 }
545
546 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
547 {
548 if (vect_print_dump_info (REPORT_DETAILS))
549 fprintf (vect_dump, "Detected induction.");
550 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
551 continue;
552 }
553
554 /* TODO: handle invariant phis */
555
556 reduc_stmt = vect_is_simple_reduction (loop, phi);
557 if (reduc_stmt)
558 {
559 if (vect_print_dump_info (REPORT_DETAILS))
560 fprintf (vect_dump, "Detected reduction.");
561 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
562 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
563 vect_reduction_def;
564 }
565 else
566 if (vect_print_dump_info (REPORT_DETAILS))
567 fprintf (vect_dump, "Unknown def-use cycle pattern.");
568
569 }
570
571 return;
572 }
573
574
575 /* Function vect_analyze_data_ref_dependence.
576
577 Return TRUE if there (might) exist a dependence between a memory-reference
578 DRA and a memory-reference DRB. */
579
580 static bool
vect_analyze_data_ref_dependence(struct data_dependence_relation * ddr,loop_vec_info loop_vinfo)581 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
582 loop_vec_info loop_vinfo)
583 {
584 unsigned int i;
585 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
586 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
587 struct data_reference *dra = DDR_A (ddr);
588 struct data_reference *drb = DDR_B (ddr);
589 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
590 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
591 lambda_vector dist_v;
592 unsigned int loop_depth;
593
594 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
595 return false;
596
597 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
598 {
599 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
600 {
601 fprintf (vect_dump,
602 "not vectorized: can't determine dependence between ");
603 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
604 fprintf (vect_dump, " and ");
605 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
606 }
607 return true;
608 }
609
610 if (DDR_NUM_DIST_VECTS (ddr) == 0)
611 {
612 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
613 {
614 fprintf (vect_dump, "not vectorized: bad dist vector for ");
615 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
616 fprintf (vect_dump, " and ");
617 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618 }
619 return true;
620 }
621
622 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
623 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
624 {
625 int dist = dist_v[loop_depth];
626
627 if (vect_print_dump_info (REPORT_DR_DETAILS))
628 fprintf (vect_dump, "dependence distance = %d.", dist);
629
630 /* Same loop iteration. */
631 if (dist % vectorization_factor == 0)
632 {
633 /* Two references with distance zero have the same alignment. */
634 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
635 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
636 if (vect_print_dump_info (REPORT_ALIGNMENT))
637 fprintf (vect_dump, "accesses have the same alignment.");
638 if (vect_print_dump_info (REPORT_DR_DETAILS))
639 {
640 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
641 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
642 fprintf (vect_dump, " and ");
643 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
644 }
645 continue;
646 }
647
648 if (abs (dist) >= vectorization_factor)
649 {
650 /* Dependence distance does not create dependence, as far as vectorization
651 is concerned, in this case. */
652 if (vect_print_dump_info (REPORT_DR_DETAILS))
653 fprintf (vect_dump, "dependence distance >= VF.");
654 continue;
655 }
656
657 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
658 {
659 fprintf (vect_dump,
660 "not vectorized: possible dependence between data-refs ");
661 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662 fprintf (vect_dump, " and ");
663 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
664 }
665
666 return true;
667 }
668
669 return false;
670 }
671
672
673 /* Function vect_analyze_data_ref_dependences.
674
675 Examine all the data references in the loop, and make sure there do not
676 exist any data dependences between them. */
677
678 static bool
vect_analyze_data_ref_dependences(loop_vec_info loop_vinfo)679 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
680 {
681 unsigned int i;
682 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
683 struct data_dependence_relation *ddr;
684
685 if (vect_print_dump_info (REPORT_DETAILS))
686 fprintf (vect_dump, "=== vect_analyze_dependences ===");
687
688 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
689 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
690 return false;
691
692 return true;
693 }
694
695
696 /* Function vect_compute_data_ref_alignment
697
698 Compute the misalignment of the data reference DR.
699
700 Output:
701 1. If during the misalignment computation it is found that the data reference
702 cannot be vectorized then false is returned.
703 2. DR_MISALIGNMENT (DR) is defined.
704
705 FOR NOW: No analysis is actually performed. Misalignment is calculated
706 only for trivial cases. TODO. */
707
708 static bool
vect_compute_data_ref_alignment(struct data_reference * dr)709 vect_compute_data_ref_alignment (struct data_reference *dr)
710 {
711 tree stmt = DR_STMT (dr);
712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
713 tree ref = DR_REF (dr);
714 tree vectype;
715 tree base, base_addr;
716 bool base_aligned;
717 tree misalign;
718 tree aligned_to, alignment;
719
720 if (vect_print_dump_info (REPORT_DETAILS))
721 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
722
723 /* Initialize misalignment to unknown. */
724 DR_MISALIGNMENT (dr) = -1;
725
726 misalign = DR_OFFSET_MISALIGNMENT (dr);
727 aligned_to = DR_ALIGNED_TO (dr);
728 base_addr = DR_BASE_ADDRESS (dr);
729 base = build_fold_indirect_ref (base_addr);
730 vectype = STMT_VINFO_VECTYPE (stmt_info);
731 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
732
733 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
734 || !misalign)
735 {
736 if (vect_print_dump_info (REPORT_DETAILS))
737 {
738 fprintf (vect_dump, "Unknown alignment for access: ");
739 print_generic_expr (vect_dump, base, TDF_SLIM);
740 }
741 return true;
742 }
743
744 if ((DECL_P (base)
745 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
746 alignment) >= 0)
747 || (TREE_CODE (base_addr) == SSA_NAME
748 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
749 TREE_TYPE (base_addr)))),
750 alignment) >= 0))
751 base_aligned = true;
752 else
753 base_aligned = false;
754
755 if (!base_aligned)
756 {
757 /* Do not change the alignment of global variables if
758 flag_section_anchors is enabled. */
759 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
760 || (TREE_STATIC (base) && flag_section_anchors))
761 {
762 if (vect_print_dump_info (REPORT_DETAILS))
763 {
764 fprintf (vect_dump, "can't force alignment of ref: ");
765 print_generic_expr (vect_dump, ref, TDF_SLIM);
766 }
767 return true;
768 }
769
770 /* Force the alignment of the decl.
771 NOTE: This is the only change to the code we make during
772 the analysis phase, before deciding to vectorize the loop. */
773 if (vect_print_dump_info (REPORT_DETAILS))
774 fprintf (vect_dump, "force alignment");
775 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
776 DECL_USER_ALIGN (base) = 1;
777 }
778
779 /* At this point we assume that the base is aligned. */
780 gcc_assert (base_aligned
781 || (TREE_CODE (base) == VAR_DECL
782 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
783
784 /* Modulo alignment. */
785 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
786
787 if (!host_integerp (misalign, 1))
788 {
789 /* Negative or overflowed misalignment value. */
790 if (vect_print_dump_info (REPORT_DETAILS))
791 fprintf (vect_dump, "unexpected misalign value");
792 return false;
793 }
794
795 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
796
797 if (vect_print_dump_info (REPORT_DETAILS))
798 {
799 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
800 print_generic_expr (vect_dump, ref, TDF_SLIM);
801 }
802
803 return true;
804 }
805
806
807 /* Function vect_compute_data_refs_alignment
808
809 Compute the misalignment of data references in the loop.
810 Return FALSE if a data reference is found that cannot be vectorized. */
811
812 static bool
vect_compute_data_refs_alignment(loop_vec_info loop_vinfo)813 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
814 {
815 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
816 struct data_reference *dr;
817 unsigned int i;
818
819 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
820 if (!vect_compute_data_ref_alignment (dr))
821 return false;
822
823 return true;
824 }
825
826
827 /* Function vect_update_misalignment_for_peel
828
829 DR - the data reference whose misalignment is to be adjusted.
830 DR_PEEL - the data reference whose misalignment is being made
831 zero in the vector loop by the peel.
832 NPEEL - the number of iterations in the peel loop if the misalignment
833 of DR_PEEL is known at compile time. */
834
835 static void
vect_update_misalignment_for_peel(struct data_reference * dr,struct data_reference * dr_peel,int npeel)836 vect_update_misalignment_for_peel (struct data_reference *dr,
837 struct data_reference *dr_peel, int npeel)
838 {
839 unsigned int i;
840 int drsize;
841 VEC(dr_p,heap) *same_align_drs;
842 struct data_reference *current_dr;
843
844 if (known_alignment_for_access_p (dr)
845 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
846 {
847 DR_MISALIGNMENT (dr) = 0;
848 return;
849 }
850
851 /* It can be assumed that the data refs with the same alignment as dr_peel
852 are aligned in the vector loop. */
853 same_align_drs
854 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
855 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
856 {
857 if (current_dr != dr)
858 continue;
859 gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
860 DR_MISALIGNMENT (dr) = 0;
861 return;
862 }
863
864 if (known_alignment_for_access_p (dr)
865 && known_alignment_for_access_p (dr_peel))
866 {
867 drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
868 DR_MISALIGNMENT (dr) += npeel * drsize;
869 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
870 return;
871 }
872
873 DR_MISALIGNMENT (dr) = -1;
874 }
875
876
877 /* Function vect_verify_datarefs_alignment
878
879 Return TRUE if all data references in the loop can be
880 handled with respect to alignment. */
881
882 static bool
vect_verify_datarefs_alignment(loop_vec_info loop_vinfo)883 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
884 {
885 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
886 struct data_reference *dr;
887 enum dr_alignment_support supportable_dr_alignment;
888 unsigned int i;
889
890 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
891 {
892 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
893 if (!supportable_dr_alignment)
894 {
895 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
896 {
897 if (DR_IS_READ (dr))
898 fprintf (vect_dump,
899 "not vectorized: unsupported unaligned load.");
900 else
901 fprintf (vect_dump,
902 "not vectorized: unsupported unaligned store.");
903 }
904 return false;
905 }
906 if (supportable_dr_alignment != dr_aligned
907 && vect_print_dump_info (REPORT_ALIGNMENT))
908 fprintf (vect_dump, "Vectorizing an unaligned access.");
909 }
910 return true;
911 }
912
913
914 /* Function vect_enhance_data_refs_alignment
915
916 This pass will use loop versioning and loop peeling in order to enhance
917 the alignment of data references in the loop.
918
919 FOR NOW: we assume that whatever versioning/peeling takes place, only the
920 original loop is to be vectorized; Any other loops that are created by
921 the transformations performed in this pass - are not supposed to be
922 vectorized. This restriction will be relaxed.
923
924 This pass will require a cost model to guide it whether to apply peeling
925 or versioning or a combination of the two. For example, the scheme that
926 intel uses when given a loop with several memory accesses, is as follows:
927 choose one memory access ('p') which alignment you want to force by doing
928 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
929 other accesses are not necessarily aligned, or (2) use loop versioning to
930 generate one loop in which all accesses are aligned, and another loop in
931 which only 'p' is necessarily aligned.
932
933 ("Automatic Intra-Register Vectorization for the Intel Architecture",
934 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
935 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
936
937 Devising a cost model is the most critical aspect of this work. It will
938 guide us on which access to peel for, whether to use loop versioning, how
939 many versions to create, etc. The cost model will probably consist of
940 generic considerations as well as target specific considerations (on
941 powerpc for example, misaligned stores are more painful than misaligned
942 loads).
943
944 Here are the general steps involved in alignment enhancements:
945
946 -- original loop, before alignment analysis:
947 for (i=0; i<N; i++){
948 x = q[i]; # DR_MISALIGNMENT(q) = unknown
949 p[i] = y; # DR_MISALIGNMENT(p) = unknown
950 }
951
952 -- After vect_compute_data_refs_alignment:
953 for (i=0; i<N; i++){
954 x = q[i]; # DR_MISALIGNMENT(q) = 3
955 p[i] = y; # DR_MISALIGNMENT(p) = unknown
956 }
957
958 -- Possibility 1: we do loop versioning:
959 if (p is aligned) {
960 for (i=0; i<N; i++){ # loop 1A
961 x = q[i]; # DR_MISALIGNMENT(q) = 3
962 p[i] = y; # DR_MISALIGNMENT(p) = 0
963 }
964 }
965 else {
966 for (i=0; i<N; i++){ # loop 1B
967 x = q[i]; # DR_MISALIGNMENT(q) = 3
968 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
969 }
970 }
971
972 -- Possibility 2: we do loop peeling:
973 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
974 x = q[i];
975 p[i] = y;
976 }
977 for (i = 3; i < N; i++){ # loop 2A
978 x = q[i]; # DR_MISALIGNMENT(q) = 0
979 p[i] = y; # DR_MISALIGNMENT(p) = unknown
980 }
981
982 -- Possibility 3: combination of loop peeling and versioning:
983 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
984 x = q[i];
985 p[i] = y;
986 }
987 if (p is aligned) {
988 for (i = 3; i<N; i++){ # loop 3A
989 x = q[i]; # DR_MISALIGNMENT(q) = 0
990 p[i] = y; # DR_MISALIGNMENT(p) = 0
991 }
992 }
993 else {
994 for (i = 3; i<N; i++){ # loop 3B
995 x = q[i]; # DR_MISALIGNMENT(q) = 0
996 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
997 }
998 }
999
1000 These loops are later passed to loop_transform to be vectorized. The
1001 vectorizer will use the alignment information to guide the transformation
1002 (whether to generate regular loads/stores, or with special handling for
1003 misalignment). */
1004
1005 static bool
vect_enhance_data_refs_alignment(loop_vec_info loop_vinfo)1006 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1007 {
1008 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1009 enum dr_alignment_support supportable_dr_alignment;
1010 struct data_reference *dr0 = NULL;
1011 struct data_reference *dr;
1012 unsigned int i;
1013 bool do_peeling = false;
1014 bool do_versioning = false;
1015 bool stat;
1016
1017 /* While cost model enhancements are expected in the future, the high level
1018 view of the code at this time is as follows:
1019
1020 A) If there is a misaligned write then see if peeling to align this write
1021 can make all data references satisfy vect_supportable_dr_alignment.
1022 If so, update data structures as needed and return true. Note that
1023 at this time vect_supportable_dr_alignment is known to return false
1024 for a a misaligned write.
1025
1026 B) If peeling wasn't possible and there is a data reference with an
1027 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1028 then see if loop versioning checks can be used to make all data
1029 references satisfy vect_supportable_dr_alignment. If so, update
1030 data structures as needed and return true.
1031
1032 C) If neither peeling nor versioning were successful then return false if
1033 any data reference does not satisfy vect_supportable_dr_alignment.
1034
1035 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1036
1037 Note, Possibility 3 above (which is peeling and versioning together) is not
1038 being done at this time. */
1039
1040 /* (1) Peeling to force alignment. */
1041
1042 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1043 Considerations:
1044 + How many accesses will become aligned due to the peeling
1045 - How many accesses will become unaligned due to the peeling,
1046 and the cost of misaligned accesses.
1047 - The cost of peeling (the extra runtime checks, the increase
1048 in code size).
1049
1050 The scheme we use FORNOW: peel to force the alignment of the first
1051 misaligned store in the loop.
1052 Rationale: misaligned stores are not yet supported.
1053
1054 TODO: Use a cost model. */
1055
1056 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1057 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1058 {
1059 dr0 = dr;
1060 do_peeling = true;
1061 break;
1062 }
1063
1064 /* Often peeling for alignment will require peeling for loop-bound, which in
1065 turn requires that we know how to adjust the loop ivs after the loop. */
1066 if (!vect_can_advance_ivs_p (loop_vinfo))
1067 do_peeling = false;
1068
1069 if (do_peeling)
1070 {
1071 int mis;
1072 int npeel = 0;
1073
1074 if (known_alignment_for_access_p (dr0))
1075 {
1076 /* Since it's known at compile time, compute the number of iterations
1077 in the peeled loop (the peeling factor) for use in updating
1078 DR_MISALIGNMENT values. The peeling factor is the vectorization
1079 factor minus the misalignment as an element count. */
1080 mis = DR_MISALIGNMENT (dr0);
1081 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1082 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1083 }
1084
1085 /* Ensure that all data refs can be vectorized after the peel. */
1086 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1087 {
1088 int save_misalignment;
1089
1090 if (dr == dr0)
1091 continue;
1092
1093 save_misalignment = DR_MISALIGNMENT (dr);
1094 vect_update_misalignment_for_peel (dr, dr0, npeel);
1095 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1096 DR_MISALIGNMENT (dr) = save_misalignment;
1097
1098 if (!supportable_dr_alignment)
1099 {
1100 do_peeling = false;
1101 break;
1102 }
1103 }
1104
1105 if (do_peeling)
1106 {
1107 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1108 If the misalignment of DR_i is identical to that of dr0 then set
1109 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1110 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1111 by the peeling factor times the element size of DR_i (MOD the
1112 vectorization factor times the size). Otherwise, the
1113 misalignment of DR_i must be set to unknown. */
1114 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1115 if (dr != dr0)
1116 vect_update_misalignment_for_peel (dr, dr0, npeel);
1117
1118 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1119 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1120 DR_MISALIGNMENT (dr0) = 0;
1121 if (vect_print_dump_info (REPORT_ALIGNMENT))
1122 fprintf (vect_dump, "Alignment of access forced using peeling.");
1123
1124 if (vect_print_dump_info (REPORT_DETAILS))
1125 fprintf (vect_dump, "Peeling for alignment will be applied.");
1126
1127 stat = vect_verify_datarefs_alignment (loop_vinfo);
1128 gcc_assert (stat);
1129 return stat;
1130 }
1131 }
1132
1133
1134 /* (2) Versioning to force alignment. */
1135
1136 /* Try versioning if:
1137 1) flag_tree_vect_loop_version is TRUE
1138 2) optimize_size is FALSE
1139 3) there is at least one unsupported misaligned data ref with an unknown
1140 misalignment, and
1141 4) all misaligned data refs with a known misalignment are supported, and
1142 5) the number of runtime alignment checks is within reason. */
1143
1144 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1145
1146 if (do_versioning)
1147 {
1148 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1149 {
1150 if (aligned_access_p (dr))
1151 continue;
1152
1153 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1154
1155 if (!supportable_dr_alignment)
1156 {
1157 tree stmt;
1158 int mask;
1159 tree vectype;
1160
1161 if (known_alignment_for_access_p (dr)
1162 || VEC_length (tree,
1163 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1164 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1165 {
1166 do_versioning = false;
1167 break;
1168 }
1169
1170 stmt = DR_STMT (dr);
1171 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1172 gcc_assert (vectype);
1173
1174 /* The rightmost bits of an aligned address must be zeros.
1175 Construct the mask needed for this test. For example,
1176 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1177 mask must be 15 = 0xf. */
1178 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1179
1180 /* FORNOW: use the same mask to test all potentially unaligned
1181 references in the loop. The vectorizer currently supports
1182 a single vector size, see the reference to
1183 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1184 vectorization factor is computed. */
1185 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1186 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1187 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1188 VEC_safe_push (tree, heap,
1189 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1190 DR_STMT (dr));
1191 }
1192 }
1193
1194 /* Versioning requires at least one misaligned data reference. */
1195 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1196 do_versioning = false;
1197 else if (!do_versioning)
1198 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1199 }
1200
1201 if (do_versioning)
1202 {
1203 VEC(tree,heap) *may_misalign_stmts
1204 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1205 tree stmt;
1206
1207 /* It can now be assumed that the data references in the statements
1208 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1209 of the loop being vectorized. */
1210 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1211 {
1212 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1213 dr = STMT_VINFO_DATA_REF (stmt_info);
1214 DR_MISALIGNMENT (dr) = 0;
1215 if (vect_print_dump_info (REPORT_ALIGNMENT))
1216 fprintf (vect_dump, "Alignment of access forced using versioning.");
1217 }
1218
1219 if (vect_print_dump_info (REPORT_DETAILS))
1220 fprintf (vect_dump, "Versioning for alignment will be applied.");
1221
1222 /* Peeling and versioning can't be done together at this time. */
1223 gcc_assert (! (do_peeling && do_versioning));
1224
1225 stat = vect_verify_datarefs_alignment (loop_vinfo);
1226 gcc_assert (stat);
1227 return stat;
1228 }
1229
1230 /* This point is reached if neither peeling nor versioning is being done. */
1231 gcc_assert (! (do_peeling || do_versioning));
1232
1233 stat = vect_verify_datarefs_alignment (loop_vinfo);
1234 return stat;
1235 }
1236
1237
1238 /* Function vect_analyze_data_refs_alignment
1239
1240 Analyze the alignment of the data-references in the loop.
1241 Return FALSE if a data reference is found that cannot be vectorized. */
1242
1243 static bool
vect_analyze_data_refs_alignment(loop_vec_info loop_vinfo)1244 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1245 {
1246 if (vect_print_dump_info (REPORT_DETAILS))
1247 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1248
1249 if (!vect_compute_data_refs_alignment (loop_vinfo))
1250 {
1251 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1252 fprintf (vect_dump,
1253 "not vectorized: can't calculate alignment for data ref.");
1254 return false;
1255 }
1256
1257 return true;
1258 }
1259
1260
1261 /* Function vect_analyze_data_ref_access.
1262
1263 Analyze the access pattern of the data-reference DR. For now, a data access
1264 has to be consecutive to be considered vectorizable. */
1265
1266 static bool
vect_analyze_data_ref_access(struct data_reference * dr)1267 vect_analyze_data_ref_access (struct data_reference *dr)
1268 {
1269 tree step = DR_STEP (dr);
1270 tree scalar_type = TREE_TYPE (DR_REF (dr));
1271
1272 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1273 {
1274 if (vect_print_dump_info (REPORT_DETAILS))
1275 fprintf (vect_dump, "not consecutive access");
1276 return false;
1277 }
1278 return true;
1279 }
1280
1281
1282 /* Function vect_analyze_data_ref_accesses.
1283
1284 Analyze the access pattern of all the data references in the loop.
1285
1286 FORNOW: the only access pattern that is considered vectorizable is a
1287 simple step 1 (consecutive) access.
1288
1289 FORNOW: handle only arrays and pointer accesses. */
1290
1291 static bool
vect_analyze_data_ref_accesses(loop_vec_info loop_vinfo)1292 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1293 {
1294 unsigned int i;
1295 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1296 struct data_reference *dr;
1297
1298 if (vect_print_dump_info (REPORT_DETAILS))
1299 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1300
1301 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1302 if (!vect_analyze_data_ref_access (dr))
1303 {
1304 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1305 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1306 return false;
1307 }
1308
1309 return true;
1310 }
1311
1312
1313 /* Function vect_analyze_data_refs.
1314
1315 Find all the data references in the loop.
1316
1317 The general structure of the analysis of data refs in the vectorizer is as
1318 follows:
1319 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1320 find and analyze all data-refs in the loop and their dependences.
1321 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1322 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1323 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1324
1325 */
1326
1327 static bool
vect_analyze_data_refs(loop_vec_info loop_vinfo)1328 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1329 {
1330 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1331 unsigned int i;
1332 VEC (data_reference_p, heap) *datarefs;
1333 struct data_reference *dr;
1334 tree scalar_type;
1335
1336 if (vect_print_dump_info (REPORT_DETAILS))
1337 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1338
1339 compute_data_dependences_for_loop (loop, false,
1340 &LOOP_VINFO_DATAREFS (loop_vinfo),
1341 &LOOP_VINFO_DDRS (loop_vinfo));
1342
1343 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1344 from stmt_vec_info struct to DR and vectype. */
1345 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1346
1347 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1348 {
1349 tree stmt;
1350 stmt_vec_info stmt_info;
1351
1352 if (!dr || !DR_REF (dr))
1353 {
1354 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1355 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1356 return false;
1357 }
1358
1359 /* Update DR field in stmt_vec_info struct. */
1360 stmt = DR_STMT (dr);
1361 stmt_info = vinfo_for_stmt (stmt);
1362
1363 if (STMT_VINFO_DATA_REF (stmt_info))
1364 {
1365 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1366 {
1367 fprintf (vect_dump,
1368 "not vectorized: more than one data ref in stmt: ");
1369 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1370 }
1371 return false;
1372 }
1373 STMT_VINFO_DATA_REF (stmt_info) = dr;
1374
1375 /* Check that analysis of the data-ref succeeded. */
1376 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1377 || !DR_STEP (dr))
1378 {
1379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1380 {
1381 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1382 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1383 }
1384 return false;
1385 }
1386 if (!DR_MEMTAG (dr))
1387 {
1388 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1389 {
1390 fprintf (vect_dump, "not vectorized: no memory tag for ");
1391 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1392 }
1393 return false;
1394 }
1395
1396 /* Set vectype for STMT. */
1397 scalar_type = TREE_TYPE (DR_REF (dr));
1398 STMT_VINFO_VECTYPE (stmt_info) =
1399 get_vectype_for_scalar_type (scalar_type);
1400 if (!STMT_VINFO_VECTYPE (stmt_info))
1401 {
1402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1403 {
1404 fprintf (vect_dump,
1405 "not vectorized: no vectype for stmt: ");
1406 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1407 fprintf (vect_dump, " scalar_type: ");
1408 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1409 }
1410 return false;
1411 }
1412 }
1413
1414 return true;
1415 }
1416
1417
1418 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1419
1420 /* Function vect_mark_relevant.
1421
1422 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1423
1424 static void
vect_mark_relevant(VEC (tree,heap)** worklist,tree stmt,bool relevant_p,bool live_p)1425 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1426 bool relevant_p, bool live_p)
1427 {
1428 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1429 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1430 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1431
1432 if (vect_print_dump_info (REPORT_DETAILS))
1433 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1434
1435 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1436 {
1437 tree pattern_stmt;
1438
1439 /* This is the last stmt in a sequence that was detected as a
1440 pattern that can potentially be vectorized. Don't mark the stmt
1441 as relevant/live because it's not going to vectorized.
1442 Instead mark the pattern-stmt that replaces it. */
1443 if (vect_print_dump_info (REPORT_DETAILS))
1444 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
1445 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1446 stmt_info = vinfo_for_stmt (pattern_stmt);
1447 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1448 save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1449 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1450 stmt = pattern_stmt;
1451 }
1452
1453 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1454 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1455
1456 if (TREE_CODE (stmt) == PHI_NODE)
1457 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1458 or live will fail vectorization later on. */
1459 return;
1460
1461 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1462 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1463 {
1464 if (vect_print_dump_info (REPORT_DETAILS))
1465 fprintf (vect_dump, "already marked relevant/live.");
1466 return;
1467 }
1468
1469 VEC_safe_push (tree, heap, *worklist, stmt);
1470 }
1471
1472
1473 /* Function vect_stmt_relevant_p.
1474
1475 Return true if STMT in loop that is represented by LOOP_VINFO is
1476 "relevant for vectorization".
1477
1478 A stmt is considered "relevant for vectorization" if:
1479 - it has uses outside the loop.
1480 - it has vdefs (it alters memory).
1481 - control stmts in the loop (except for the exit condition).
1482
1483 CHECKME: what other side effects would the vectorizer allow? */
1484
1485 static bool
vect_stmt_relevant_p(tree stmt,loop_vec_info loop_vinfo,bool * relevant_p,bool * live_p)1486 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1487 bool *relevant_p, bool *live_p)
1488 {
1489 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1490 ssa_op_iter op_iter;
1491 imm_use_iterator imm_iter;
1492 use_operand_p use_p;
1493 def_operand_p def_p;
1494
1495 *relevant_p = false;
1496 *live_p = false;
1497
1498 /* cond stmt other than loop exit cond. */
1499 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1500 *relevant_p = true;
1501
1502 /* changing memory. */
1503 if (TREE_CODE (stmt) != PHI_NODE)
1504 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1505 {
1506 if (vect_print_dump_info (REPORT_DETAILS))
1507 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1508 *relevant_p = true;
1509 }
1510
1511 /* uses outside the loop. */
1512 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1513 {
1514 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1515 {
1516 basic_block bb = bb_for_stmt (USE_STMT (use_p));
1517 if (!flow_bb_inside_loop_p (loop, bb))
1518 {
1519 if (vect_print_dump_info (REPORT_DETAILS))
1520 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1521
1522 /* We expect all such uses to be in the loop exit phis
1523 (because of loop closed form) */
1524 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1525 gcc_assert (bb == loop->single_exit->dest);
1526
1527 *live_p = true;
1528 }
1529 }
1530 }
1531
1532 return (*live_p || *relevant_p);
1533 }
1534
1535
1536 /* Function vect_mark_stmts_to_be_vectorized.
1537
1538 Not all stmts in the loop need to be vectorized. For example:
1539
1540 for i...
1541 for j...
1542 1. T0 = i + j
1543 2. T1 = a[T0]
1544
1545 3. j = j + 1
1546
1547 Stmt 1 and 3 do not need to be vectorized, because loop control and
1548 addressing of vectorized data-refs are handled differently.
1549
1550 This pass detects such stmts. */
1551
1552 static bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)1553 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1554 {
1555 VEC(tree,heap) *worklist;
1556 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1557 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1558 unsigned int nbbs = loop->num_nodes;
1559 block_stmt_iterator si;
1560 tree stmt, use;
1561 stmt_ann_t ann;
1562 ssa_op_iter iter;
1563 unsigned int i;
1564 stmt_vec_info stmt_vinfo;
1565 basic_block bb;
1566 tree phi;
1567 bool relevant_p, live_p;
1568 tree def, def_stmt;
1569 enum vect_def_type dt;
1570
1571 if (vect_print_dump_info (REPORT_DETAILS))
1572 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1573
1574 worklist = VEC_alloc (tree, heap, 64);
1575
1576 /* 1. Init worklist. */
1577
1578 bb = loop->header;
1579 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1580 {
1581 if (vect_print_dump_info (REPORT_DETAILS))
1582 {
1583 fprintf (vect_dump, "init: phi relevant? ");
1584 print_generic_expr (vect_dump, phi, TDF_SLIM);
1585 }
1586
1587 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1588 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1589 }
1590
1591 for (i = 0; i < nbbs; i++)
1592 {
1593 bb = bbs[i];
1594 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1595 {
1596 stmt = bsi_stmt (si);
1597
1598 if (vect_print_dump_info (REPORT_DETAILS))
1599 {
1600 fprintf (vect_dump, "init: stmt relevant? ");
1601 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1602 }
1603
1604 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1605 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1606 }
1607 }
1608
1609
1610 /* 2. Process_worklist */
1611
1612 while (VEC_length (tree, worklist) > 0)
1613 {
1614 stmt = VEC_pop (tree, worklist);
1615
1616 if (vect_print_dump_info (REPORT_DETAILS))
1617 {
1618 fprintf (vect_dump, "worklist: examine stmt: ");
1619 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1620 }
1621
1622 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1623 in the loop, mark the stmt that defines it (DEF_STMT) as
1624 relevant/irrelevant and live/dead according to the liveness and
1625 relevance properties of STMT.
1626 */
1627
1628 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1629
1630 ann = stmt_ann (stmt);
1631 stmt_vinfo = vinfo_for_stmt (stmt);
1632
1633 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1634 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1635
1636 /* Generally, the liveness and relevance properties of STMT are
1637 propagated to the DEF_STMTs of its USEs:
1638 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1639 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1640
1641 Exceptions:
1642
1643 (case 1)
1644 If USE is used only for address computations (e.g. array indexing),
1645 which does not need to be directly vectorized, then the
1646 liveness/relevance of the respective DEF_STMT is left unchanged.
1647
1648 (case 2)
1649 If STMT has been identified as defining a reduction variable, then
1650 we have two cases:
1651 (case 2.1)
1652 The last use of STMT is the reduction-variable, which is defined
1653 by a loop-header-phi. We don't want to mark the phi as live or
1654 relevant (because it does not need to be vectorized, it is handled
1655 as part of the vectorization of the reduction), so in this case we
1656 skip the call to vect_mark_relevant.
1657 (case 2.2)
1658 The rest of the uses of STMT are defined in the loop body. For
1659 the def_stmt of these uses we want to set liveness/relevance
1660 as follows:
1661 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1662 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1663 because even though STMT is classified as live (since it defines a
1664 value that is used across loop iterations) and irrelevant (since it
1665 is not used inside the loop), it will be vectorized, and therefore
1666 the corresponding DEF_STMTs need to marked as relevant.
1667 */
1668
1669 /* case 2.2: */
1670 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1671 {
1672 gcc_assert (!relevant_p && live_p);
1673 relevant_p = true;
1674 live_p = false;
1675 }
1676
1677 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1678 {
1679 /* case 1: we are only interested in uses that need to be vectorized.
1680 Uses that are used for address computation are not considered
1681 relevant.
1682 */
1683 if (!exist_non_indexing_operands_for_use_p (use, stmt))
1684 continue;
1685
1686 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1687 {
1688 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1689 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1690 VEC_free (tree, heap, worklist);
1691 return false;
1692 }
1693
1694 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1695 continue;
1696
1697 if (vect_print_dump_info (REPORT_DETAILS))
1698 {
1699 fprintf (vect_dump, "worklist: examine use %d: ", i);
1700 print_generic_expr (vect_dump, use, TDF_SLIM);
1701 }
1702
1703 bb = bb_for_stmt (def_stmt);
1704 if (!flow_bb_inside_loop_p (loop, bb))
1705 continue;
1706
1707 /* case 2.1: the reduction-use does not mark the defining-phi
1708 as relevant. */
1709 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1710 && TREE_CODE (def_stmt) == PHI_NODE)
1711 continue;
1712
1713 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1714 }
1715 } /* while worklist */
1716
1717 VEC_free (tree, heap, worklist);
1718 return true;
1719 }
1720
1721
1722 /* Function vect_can_advance_ivs_p
1723
1724 In case the number of iterations that LOOP iterates is unknown at compile
1725 time, an epilog loop will be generated, and the loop induction variables
1726 (IVs) will be "advanced" to the value they are supposed to take just before
1727 the epilog loop. Here we check that the access function of the loop IVs
1728 and the expression that represents the loop bound are simple enough.
1729 These restrictions will be relaxed in the future. */
1730
1731 static bool
vect_can_advance_ivs_p(loop_vec_info loop_vinfo)1732 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1733 {
1734 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1735 basic_block bb = loop->header;
1736 tree phi;
1737
1738 /* Analyze phi functions of the loop header. */
1739
1740 if (vect_print_dump_info (REPORT_DETAILS))
1741 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1742
1743 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1744 {
1745 tree access_fn = NULL;
1746 tree evolution_part;
1747
1748 if (vect_print_dump_info (REPORT_DETAILS))
1749 {
1750 fprintf (vect_dump, "Analyze phi: ");
1751 print_generic_expr (vect_dump, phi, TDF_SLIM);
1752 }
1753
1754 /* Skip virtual phi's. The data dependences that are associated with
1755 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1756
1757 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1758 {
1759 if (vect_print_dump_info (REPORT_DETAILS))
1760 fprintf (vect_dump, "virtual phi. skip.");
1761 continue;
1762 }
1763
1764 /* Skip reduction phis. */
1765
1766 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1767 {
1768 if (vect_print_dump_info (REPORT_DETAILS))
1769 fprintf (vect_dump, "reduc phi. skip.");
1770 continue;
1771 }
1772
1773 /* Analyze the evolution function. */
1774
1775 access_fn = instantiate_parameters
1776 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1777
1778 if (!access_fn)
1779 {
1780 if (vect_print_dump_info (REPORT_DETAILS))
1781 fprintf (vect_dump, "No Access function.");
1782 return false;
1783 }
1784
1785 if (vect_print_dump_info (REPORT_DETAILS))
1786 {
1787 fprintf (vect_dump, "Access function of PHI: ");
1788 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1789 }
1790
1791 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1792
1793 if (evolution_part == NULL_TREE)
1794 {
1795 if (vect_print_dump_info (REPORT_DETAILS))
1796 fprintf (vect_dump, "No evolution.");
1797 return false;
1798 }
1799
1800 /* FORNOW: We do not transform initial conditions of IVs
1801 which evolution functions are a polynomial of degree >= 2. */
1802
1803 if (tree_is_chrec (evolution_part))
1804 return false;
1805 }
1806
1807 return true;
1808 }
1809
1810
1811 /* Function vect_get_loop_niters.
1812
1813 Determine how many iterations the loop is executed.
1814 If an expression that represents the number of iterations
1815 can be constructed, place it in NUMBER_OF_ITERATIONS.
1816 Return the loop exit condition. */
1817
1818 static tree
vect_get_loop_niters(struct loop * loop,tree * number_of_iterations)1819 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1820 {
1821 tree niters;
1822
1823 if (vect_print_dump_info (REPORT_DETAILS))
1824 fprintf (vect_dump, "=== get_loop_niters ===");
1825
1826 niters = number_of_iterations_in_loop (loop);
1827
1828 if (niters != NULL_TREE
1829 && niters != chrec_dont_know)
1830 {
1831 *number_of_iterations = niters;
1832
1833 if (vect_print_dump_info (REPORT_DETAILS))
1834 {
1835 fprintf (vect_dump, "==> get_loop_niters:" );
1836 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1837 }
1838 }
1839
1840 return get_loop_exit_condition (loop);
1841 }
1842
1843
1844 /* Function vect_analyze_loop_form.
1845
1846 Verify the following restrictions (some may be relaxed in the future):
1847 - it's an inner-most loop
1848 - number of BBs = 2 (which are the loop header and the latch)
1849 - the loop has a pre-header
1850 - the loop has a single entry and exit
1851 - the loop exit condition is simple enough, and the number of iterations
1852 can be analyzed (a countable loop). */
1853
1854 static loop_vec_info
vect_analyze_loop_form(struct loop * loop)1855 vect_analyze_loop_form (struct loop *loop)
1856 {
1857 loop_vec_info loop_vinfo;
1858 tree loop_cond;
1859 tree number_of_iterations = NULL;
1860
1861 if (vect_print_dump_info (REPORT_DETAILS))
1862 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1863
1864 if (loop->inner)
1865 {
1866 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1867 fprintf (vect_dump, "not vectorized: nested loop.");
1868 return NULL;
1869 }
1870
1871 if (!loop->single_exit
1872 || loop->num_nodes != 2
1873 || EDGE_COUNT (loop->header->preds) != 2)
1874 {
1875 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1876 {
1877 if (!loop->single_exit)
1878 fprintf (vect_dump, "not vectorized: multiple exits.");
1879 else if (loop->num_nodes != 2)
1880 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1881 else if (EDGE_COUNT (loop->header->preds) != 2)
1882 fprintf (vect_dump, "not vectorized: too many incoming edges.");
1883 }
1884
1885 return NULL;
1886 }
1887
1888 /* We assume that the loop exit condition is at the end of the loop. i.e,
1889 that the loop is represented as a do-while (with a proper if-guard
1890 before the loop if needed), where the loop header contains all the
1891 executable statements, and the latch is empty. */
1892 if (!empty_block_p (loop->latch)
1893 || phi_nodes (loop->latch))
1894 {
1895 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1896 fprintf (vect_dump, "not vectorized: unexpected loop form.");
1897 return NULL;
1898 }
1899
1900 /* Make sure there exists a single-predecessor exit bb: */
1901 if (!single_pred_p (loop->single_exit->dest))
1902 {
1903 edge e = loop->single_exit;
1904 if (!(e->flags & EDGE_ABNORMAL))
1905 {
1906 split_loop_exit_edge (e);
1907 if (vect_print_dump_info (REPORT_DETAILS))
1908 fprintf (vect_dump, "split exit edge.");
1909 }
1910 else
1911 {
1912 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1913 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1914 return NULL;
1915 }
1916 }
1917
1918 if (empty_block_p (loop->header))
1919 {
1920 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1921 fprintf (vect_dump, "not vectorized: empty loop.");
1922 return NULL;
1923 }
1924
1925 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1926 if (!loop_cond)
1927 {
1928 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1929 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1930 return NULL;
1931 }
1932
1933 if (!number_of_iterations)
1934 {
1935 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1936 fprintf (vect_dump,
1937 "not vectorized: number of iterations cannot be computed.");
1938 return NULL;
1939 }
1940
1941 if (chrec_contains_undetermined (number_of_iterations))
1942 {
1943 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1944 fprintf (vect_dump, "Infinite number of iterations.");
1945 return false;
1946 }
1947
1948 loop_vinfo = new_loop_vec_info (loop);
1949 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1950
1951 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1952 {
1953 if (vect_print_dump_info (REPORT_DETAILS))
1954 {
1955 fprintf (vect_dump, "Symbolic number of iterations is ");
1956 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1957 }
1958 }
1959 else
1960 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
1961 {
1962 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1963 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1964 return NULL;
1965 }
1966
1967 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
1968
1969 return loop_vinfo;
1970 }
1971
1972
1973 /* Function vect_analyze_loop.
1974
1975 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1976 for it. The different analyses will record information in the
1977 loop_vec_info struct. */
1978 loop_vec_info
vect_analyze_loop(struct loop * loop)1979 vect_analyze_loop (struct loop *loop)
1980 {
1981 bool ok;
1982 loop_vec_info loop_vinfo;
1983
1984 if (vect_print_dump_info (REPORT_DETAILS))
1985 fprintf (vect_dump, "===== analyze_loop_nest =====");
1986
1987 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1988
1989 loop_vinfo = vect_analyze_loop_form (loop);
1990 if (!loop_vinfo)
1991 {
1992 if (vect_print_dump_info (REPORT_DETAILS))
1993 fprintf (vect_dump, "bad loop form.");
1994 return NULL;
1995 }
1996
1997 /* Find all data references in the loop (which correspond to vdefs/vuses)
1998 and analyze their evolution in the loop.
1999
2000 FORNOW: Handle only simple, array references, which
2001 alignment can be forced, and aligned pointer-references. */
2002
2003 ok = vect_analyze_data_refs (loop_vinfo);
2004 if (!ok)
2005 {
2006 if (vect_print_dump_info (REPORT_DETAILS))
2007 fprintf (vect_dump, "bad data references.");
2008 destroy_loop_vec_info (loop_vinfo);
2009 return NULL;
2010 }
2011
2012 /* Classify all cross-iteration scalar data-flow cycles.
2013 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2014
2015 vect_analyze_scalar_cycles (loop_vinfo);
2016
2017 vect_pattern_recog (loop_vinfo);
2018
2019 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2020
2021 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2022 if (!ok)
2023 {
2024 if (vect_print_dump_info (REPORT_DETAILS))
2025 fprintf (vect_dump, "unexpected pattern.");
2026 destroy_loop_vec_info (loop_vinfo);
2027 return NULL;
2028 }
2029
2030 /* Analyze the alignment of the data-refs in the loop.
2031 Fail if a data reference is found that cannot be vectorized. */
2032
2033 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2034 if (!ok)
2035 {
2036 if (vect_print_dump_info (REPORT_DETAILS))
2037 fprintf (vect_dump, "bad data alignment.");
2038 destroy_loop_vec_info (loop_vinfo);
2039 return NULL;
2040 }
2041
2042 ok = vect_determine_vectorization_factor (loop_vinfo);
2043 if (!ok)
2044 {
2045 if (vect_print_dump_info (REPORT_DETAILS))
2046 fprintf (vect_dump, "can't determine vectorization factor.");
2047 destroy_loop_vec_info (loop_vinfo);
2048 return NULL;
2049 }
2050
2051 /* Analyze data dependences between the data-refs in the loop.
2052 FORNOW: fail at the first data dependence that we encounter. */
2053
2054 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2055 if (!ok)
2056 {
2057 if (vect_print_dump_info (REPORT_DETAILS))
2058 fprintf (vect_dump, "bad data dependence.");
2059 destroy_loop_vec_info (loop_vinfo);
2060 return NULL;
2061 }
2062
2063 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2064 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2065
2066 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2067 if (!ok)
2068 {
2069 if (vect_print_dump_info (REPORT_DETAILS))
2070 fprintf (vect_dump, "bad data access.");
2071 destroy_loop_vec_info (loop_vinfo);
2072 return NULL;
2073 }
2074
2075 /* This pass will decide on using loop versioning and/or loop peeling in
2076 order to enhance the alignment of data references in the loop. */
2077
2078 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2079 if (!ok)
2080 {
2081 if (vect_print_dump_info (REPORT_DETAILS))
2082 fprintf (vect_dump, "bad data alignment.");
2083 destroy_loop_vec_info (loop_vinfo);
2084 return NULL;
2085 }
2086
2087 /* Scan all the operations in the loop and make sure they are
2088 vectorizable. */
2089
2090 ok = vect_analyze_operations (loop_vinfo);
2091 if (!ok)
2092 {
2093 if (vect_print_dump_info (REPORT_DETAILS))
2094 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2095 destroy_loop_vec_info (loop_vinfo);
2096 return NULL;
2097 }
2098
2099 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2100
2101 return loop_vinfo;
2102 }
2103