xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-data-ref.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Data references and dependences detectors.
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass walks a given loop structure searching for array
22    references.  The information about the array accesses is recorded
23    in DATA_REFERENCE structures.
24 
25    The basic test for determining the dependences is:
26    given two access functions chrec1 and chrec2 to a same array, and
27    x and y two vectors from the iteration domain, the same element of
28    the array is accessed twice at iterations x and y if and only if:
29    |             chrec1 (x) == chrec2 (y).
30 
31    The goals of this analysis are:
32 
33    - to determine the independence: the relation between two
34      independent accesses is qualified with the chrec_known (this
35      information allows a loop parallelization),
36 
37    - when two data references access the same data, to qualify the
38      dependence relation with classic dependence representations:
39 
40        - distance vectors
41        - direction vectors
42        - loop carried level dependence
43        - polyhedron dependence
44      or with the chains of recurrences based representation,
45 
46    - to define a knowledge base for storing the data dependence
47      information,
48 
49    - to define an interface to access this data.
50 
51 
52    Definitions:
53 
54    - subscript: given two array accesses a subscript is the tuple
55    composed of the access functions for a given dimension.  Example:
56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57    (f1, g1), (f2, g2), (f3, g3).
58 
59    - Diophantine equation: an equation whose coefficients and
60    solutions are integer constants, for example the equation
61    |   3*x + 2*y = 1
62    has an integer solution x = 1 and y = -1.
63 
64    References:
65 
66    - "Advanced Compilation for High Performance Computing" by Randy
67    Allen and Ken Kennedy.
68    http://citeseer.ist.psu.edu/goff91practical.html
69 
70    - "Loop Transformations for Restructuring Compilers - The Foundations"
71    by Utpal Banerjee.
72 
73 
74 */
75 
76 #define INCLUDE_ALGORITHM
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "backend.h"
81 #include "rtl.h"
82 #include "tree.h"
83 #include "gimple.h"
84 #include "gimple-pretty-print.h"
85 #include "alias.h"
86 #include "fold-const.h"
87 #include "expr.h"
88 #include "gimple-iterator.h"
89 #include "tree-ssa-loop-niter.h"
90 #include "tree-ssa-loop.h"
91 #include "tree-ssa.h"
92 #include "cfgloop.h"
93 #include "tree-data-ref.h"
94 #include "tree-scalar-evolution.h"
95 #include "dumpfile.h"
96 #include "tree-affine.h"
97 #include "builtins.h"
98 #include "tree-eh.h"
99 #include "ssa.h"
100 #include "internal-fn.h"
101 #include "vr-values.h"
102 #include "range-op.h"
103 #include "tree-ssa-loop-ivopts.h"
104 
105 static struct datadep_stats
106 {
107   int num_dependence_tests;
108   int num_dependence_dependent;
109   int num_dependence_independent;
110   int num_dependence_undetermined;
111 
112   int num_subscript_tests;
113   int num_subscript_undetermined;
114   int num_same_subscript_function;
115 
116   int num_ziv;
117   int num_ziv_independent;
118   int num_ziv_dependent;
119   int num_ziv_unimplemented;
120 
121   int num_siv;
122   int num_siv_independent;
123   int num_siv_dependent;
124   int num_siv_unimplemented;
125 
126   int num_miv;
127   int num_miv_independent;
128   int num_miv_dependent;
129   int num_miv_unimplemented;
130 } dependence_stats;
131 
132 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
133 					   unsigned int, unsigned int,
134 					   class loop *);
135 /* Returns true iff A divides B.  */
136 
137 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)138 tree_fold_divides_p (const_tree a, const_tree b)
139 {
140   gcc_assert (TREE_CODE (a) == INTEGER_CST);
141   gcc_assert (TREE_CODE (b) == INTEGER_CST);
142   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
143 }
144 
145 /* Returns true iff A divides B.  */
146 
147 static inline bool
int_divides_p(lambda_int a,lambda_int b)148 int_divides_p (lambda_int a, lambda_int b)
149 {
150   return ((b % a) == 0);
151 }
152 
153 /* Return true if reference REF contains a union access.  */
154 
155 static bool
ref_contains_union_access_p(tree ref)156 ref_contains_union_access_p (tree ref)
157 {
158   while (handled_component_p (ref))
159     {
160       ref = TREE_OPERAND (ref, 0);
161       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
162 	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
163 	return true;
164     }
165   return false;
166 }
167 
168 
169 
170 /* Dump into FILE all the data references from DATAREFS.  */
171 
172 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)173 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
174 {
175   for (data_reference *dr : datarefs)
176     dump_data_reference (file, dr);
177 }
178 
179 /* Unified dump into FILE all the data references from DATAREFS.  */
180 
181 DEBUG_FUNCTION void
debug(vec<data_reference_p> & ref)182 debug (vec<data_reference_p> &ref)
183 {
184   dump_data_references (stderr, ref);
185 }
186 
187 DEBUG_FUNCTION void
debug(vec<data_reference_p> * ptr)188 debug (vec<data_reference_p> *ptr)
189 {
190   if (ptr)
191     debug (*ptr);
192   else
193     fprintf (stderr, "<nil>\n");
194 }
195 
196 
197 /* Dump into STDERR all the data references from DATAREFS.  */
198 
199 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)200 debug_data_references (vec<data_reference_p> datarefs)
201 {
202   dump_data_references (stderr, datarefs);
203 }
204 
205 /* Print to STDERR the data_reference DR.  */
206 
207 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)208 debug_data_reference (struct data_reference *dr)
209 {
210   dump_data_reference (stderr, dr);
211 }
212 
213 /* Dump function for a DATA_REFERENCE structure.  */
214 
215 void
dump_data_reference(FILE * outf,struct data_reference * dr)216 dump_data_reference (FILE *outf,
217 		     struct data_reference *dr)
218 {
219   unsigned int i;
220 
221   fprintf (outf, "#(Data Ref: \n");
222   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
223   fprintf (outf, "#  stmt: ");
224   print_gimple_stmt (outf, DR_STMT (dr), 0);
225   fprintf (outf, "#  ref: ");
226   print_generic_stmt (outf, DR_REF (dr));
227   fprintf (outf, "#  base_object: ");
228   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
229 
230   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
231     {
232       fprintf (outf, "#  Access function %d: ", i);
233       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
234     }
235   fprintf (outf, "#)\n");
236 }
237 
238 /* Unified dump function for a DATA_REFERENCE structure.  */
239 
240 DEBUG_FUNCTION void
debug(data_reference & ref)241 debug (data_reference &ref)
242 {
243   dump_data_reference (stderr, &ref);
244 }
245 
246 DEBUG_FUNCTION void
debug(data_reference * ptr)247 debug (data_reference *ptr)
248 {
249   if (ptr)
250     debug (*ptr);
251   else
252     fprintf (stderr, "<nil>\n");
253 }
254 
255 
256 /* Dumps the affine function described by FN to the file OUTF.  */
257 
258 DEBUG_FUNCTION void
dump_affine_function(FILE * outf,affine_fn fn)259 dump_affine_function (FILE *outf, affine_fn fn)
260 {
261   unsigned i;
262   tree coef;
263 
264   print_generic_expr (outf, fn[0], TDF_SLIM);
265   for (i = 1; fn.iterate (i, &coef); i++)
266     {
267       fprintf (outf, " + ");
268       print_generic_expr (outf, coef, TDF_SLIM);
269       fprintf (outf, " * x_%u", i);
270     }
271 }
272 
273 /* Dumps the conflict function CF to the file OUTF.  */
274 
275 DEBUG_FUNCTION void
dump_conflict_function(FILE * outf,conflict_function * cf)276 dump_conflict_function (FILE *outf, conflict_function *cf)
277 {
278   unsigned i;
279 
280   if (cf->n == NO_DEPENDENCE)
281     fprintf (outf, "no dependence");
282   else if (cf->n == NOT_KNOWN)
283     fprintf (outf, "not known");
284   else
285     {
286       for (i = 0; i < cf->n; i++)
287 	{
288 	  if (i != 0)
289 	    fprintf (outf, " ");
290 	  fprintf (outf, "[");
291 	  dump_affine_function (outf, cf->fns[i]);
292 	  fprintf (outf, "]");
293 	}
294     }
295 }
296 
297 /* Dump function for a SUBSCRIPT structure.  */
298 
299 DEBUG_FUNCTION void
dump_subscript(FILE * outf,struct subscript * subscript)300 dump_subscript (FILE *outf, struct subscript *subscript)
301 {
302   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
303 
304   fprintf (outf, "\n (subscript \n");
305   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
306   dump_conflict_function (outf, cf);
307   if (CF_NONTRIVIAL_P (cf))
308     {
309       tree last_iteration = SUB_LAST_CONFLICT (subscript);
310       fprintf (outf, "\n  last_conflict: ");
311       print_generic_expr (outf, last_iteration);
312     }
313 
314   cf = SUB_CONFLICTS_IN_B (subscript);
315   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
316   dump_conflict_function (outf, cf);
317   if (CF_NONTRIVIAL_P (cf))
318     {
319       tree last_iteration = SUB_LAST_CONFLICT (subscript);
320       fprintf (outf, "\n  last_conflict: ");
321       print_generic_expr (outf, last_iteration);
322     }
323 
324   fprintf (outf, "\n  (Subscript distance: ");
325   print_generic_expr (outf, SUB_DISTANCE (subscript));
326   fprintf (outf, " ))\n");
327 }
328 
329 /* Print the classic direction vector DIRV to OUTF.  */
330 
331 DEBUG_FUNCTION void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)332 print_direction_vector (FILE *outf,
333 			lambda_vector dirv,
334 			int length)
335 {
336   int eq;
337 
338   for (eq = 0; eq < length; eq++)
339     {
340       enum data_dependence_direction dir = ((enum data_dependence_direction)
341 					    dirv[eq]);
342 
343       switch (dir)
344 	{
345 	case dir_positive:
346 	  fprintf (outf, "    +");
347 	  break;
348 	case dir_negative:
349 	  fprintf (outf, "    -");
350 	  break;
351 	case dir_equal:
352 	  fprintf (outf, "    =");
353 	  break;
354 	case dir_positive_or_equal:
355 	  fprintf (outf, "   +=");
356 	  break;
357 	case dir_positive_or_negative:
358 	  fprintf (outf, "   +-");
359 	  break;
360 	case dir_negative_or_equal:
361 	  fprintf (outf, "   -=");
362 	  break;
363 	case dir_star:
364 	  fprintf (outf, "    *");
365 	  break;
366 	default:
367 	  fprintf (outf, "indep");
368 	  break;
369 	}
370     }
371   fprintf (outf, "\n");
372 }
373 
374 /* Print a vector of direction vectors.  */
375 
376 DEBUG_FUNCTION void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)377 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
378 		   int length)
379 {
380   for (lambda_vector v : dir_vects)
381     print_direction_vector (outf, v, length);
382 }
383 
384 /* Print out a vector VEC of length N to OUTFILE.  */
385 
386 DEBUG_FUNCTION void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)387 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
388 {
389   int i;
390 
391   for (i = 0; i < n; i++)
392     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
393   fprintf (outfile, "\n");
394 }
395 
396 /* Print a vector of distance vectors.  */
397 
398 DEBUG_FUNCTION void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)399 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
400 		    int length)
401 {
402   for (lambda_vector v : dist_vects)
403     print_lambda_vector (outf, v, length);
404 }
405 
406 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
407 
408 DEBUG_FUNCTION void
dump_data_dependence_relation(FILE * outf,const data_dependence_relation * ddr)409 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
410 {
411   struct data_reference *dra, *drb;
412 
413   fprintf (outf, "(Data Dep: \n");
414 
415   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
416     {
417       if (ddr)
418 	{
419 	  dra = DDR_A (ddr);
420 	  drb = DDR_B (ddr);
421 	  if (dra)
422 	    dump_data_reference (outf, dra);
423 	  else
424 	    fprintf (outf, "    (nil)\n");
425 	  if (drb)
426 	    dump_data_reference (outf, drb);
427 	  else
428 	    fprintf (outf, "    (nil)\n");
429 	}
430       fprintf (outf, "    (don't know)\n)\n");
431       return;
432     }
433 
434   dra = DDR_A (ddr);
435   drb = DDR_B (ddr);
436   dump_data_reference (outf, dra);
437   dump_data_reference (outf, drb);
438 
439   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
440     fprintf (outf, "    (no dependence)\n");
441 
442   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
443     {
444       unsigned int i;
445       class loop *loopi;
446 
447       subscript *sub;
448       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
449 	{
450 	  fprintf (outf, "  access_fn_A: ");
451 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
452 	  fprintf (outf, "  access_fn_B: ");
453 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
454 	  dump_subscript (outf, sub);
455 	}
456 
457       fprintf (outf, "  loop nest: (");
458       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
459 	fprintf (outf, "%d ", loopi->num);
460       fprintf (outf, ")\n");
461 
462       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
463 	{
464 	  fprintf (outf, "  distance_vector: ");
465 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
466 			       DDR_NB_LOOPS (ddr));
467 	}
468 
469       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
470 	{
471 	  fprintf (outf, "  direction_vector: ");
472 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
473 				  DDR_NB_LOOPS (ddr));
474 	}
475     }
476 
477   fprintf (outf, ")\n");
478 }
479 
480 /* Debug version.  */
481 
482 DEBUG_FUNCTION void
debug_data_dependence_relation(const struct data_dependence_relation * ddr)483 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
484 {
485   dump_data_dependence_relation (stderr, ddr);
486 }
487 
488 /* Dump into FILE all the dependence relations from DDRS.  */
489 
490 DEBUG_FUNCTION void
dump_data_dependence_relations(FILE * file,const vec<ddr_p> & ddrs)491 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
492 {
493   for (auto ddr : ddrs)
494     dump_data_dependence_relation (file, ddr);
495 }
496 
497 DEBUG_FUNCTION void
debug(vec<ddr_p> & ref)498 debug (vec<ddr_p> &ref)
499 {
500   dump_data_dependence_relations (stderr, ref);
501 }
502 
503 DEBUG_FUNCTION void
debug(vec<ddr_p> * ptr)504 debug (vec<ddr_p> *ptr)
505 {
506   if (ptr)
507     debug (*ptr);
508   else
509     fprintf (stderr, "<nil>\n");
510 }
511 
512 
513 /* Dump to STDERR all the dependence relations from DDRS.  */
514 
515 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)516 debug_data_dependence_relations (vec<ddr_p> ddrs)
517 {
518   dump_data_dependence_relations (stderr, ddrs);
519 }
520 
521 /* Dumps the distance and direction vectors in FILE.  DDRS contains
522    the dependence relations, and VECT_SIZE is the size of the
523    dependence vectors, or in other words the number of loops in the
524    considered nest.  */
525 
526 DEBUG_FUNCTION void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)527 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
528 {
529   for (data_dependence_relation *ddr : ddrs)
530     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
531       {
532 	for (lambda_vector v : DDR_DIST_VECTS (ddr))
533 	  {
534 	    fprintf (file, "DISTANCE_V (");
535 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
536 	    fprintf (file, ")\n");
537 	  }
538 
539 	for (lambda_vector v : DDR_DIR_VECTS (ddr))
540 	  {
541 	    fprintf (file, "DIRECTION_V (");
542 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
543 	    fprintf (file, ")\n");
544 	  }
545       }
546 
547   fprintf (file, "\n\n");
548 }
549 
550 /* Dumps the data dependence relations DDRS in FILE.  */
551 
552 DEBUG_FUNCTION void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)553 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
554 {
555   for (data_dependence_relation *ddr : ddrs)
556     dump_data_dependence_relation (file, ddr);
557 
558   fprintf (file, "\n\n");
559 }
560 
561 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)562 debug_ddrs (vec<ddr_p> ddrs)
563 {
564   dump_ddrs (stderr, ddrs);
565 }
566 
567 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
568    OP0 CODE OP1, where:
569 
570    - OP0 CODE OP1 has integral type TYPE
571    - the range of OP0 is given by OP0_RANGE and
572    - the range of OP1 is given by OP1_RANGE.
573 
574    Independently of RESULT_RANGE, try to compute:
575 
576      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
577 	     - (sizetype) (OP0 CODE OP1)
578 
579    as a constant and subtract DELTA from the ssizetype constant in *OFF.
580    Return true on success, or false if DELTA is not known at compile time.
581 
582    Truncation and sign changes are known to distribute over CODE, i.e.
583 
584      (itype) (A CODE B) == (itype) A CODE (itype) B
585 
586    for any integral type ITYPE whose precision is no greater than the
587    precision of A and B.  */
588 
589 static bool
compute_distributive_range(tree type,value_range & op0_range,tree_code code,value_range & op1_range,tree * off,value_range * result_range)590 compute_distributive_range (tree type, value_range &op0_range,
591 			    tree_code code, value_range &op1_range,
592 			    tree *off, value_range *result_range)
593 {
594   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
595   if (result_range)
596     {
597       range_operator *op = range_op_handler (code, type);
598       op->fold_range (*result_range, type, op0_range, op1_range);
599     }
600 
601   /* The distributive property guarantees that if TYPE is no narrower
602      than SIZETYPE,
603 
604        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
605 
606      and so we can treat DELTA as zero.  */
607   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
608     return true;
609 
610   /* If overflow is undefined, we can assume that:
611 
612        X == (ssizetype) OP0 CODE (ssizetype) OP1
613 
614      is within the range of TYPE, i.e.:
615 
616        X == (ssizetype) (TYPE) X
617 
618      Distributing the (TYPE) truncation over X gives:
619 
620        X == (ssizetype) (OP0 CODE OP1)
621 
622      Casting both sides to sizetype and distributing the sizetype cast
623      over X gives:
624 
625        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
626 
627      and so we can treat DELTA as zero.  */
628   if (TYPE_OVERFLOW_UNDEFINED (type))
629     return true;
630 
631   /* Compute the range of:
632 
633        (ssizetype) OP0 CODE (ssizetype) OP1
634 
635      The distributive property guarantees that this has the same bitpattern as:
636 
637        (sizetype) OP0 CODE (sizetype) OP1
638 
639      but its range is more conducive to analysis.  */
640   range_cast (op0_range, ssizetype);
641   range_cast (op1_range, ssizetype);
642   value_range wide_range;
643   range_operator *op = range_op_handler (code, ssizetype);
644   bool saved_flag_wrapv = flag_wrapv;
645   flag_wrapv = 1;
646   op->fold_range (wide_range, ssizetype, op0_range, op1_range);
647   flag_wrapv = saved_flag_wrapv;
648   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
649     return false;
650 
651   wide_int lb = wide_range.lower_bound ();
652   wide_int ub = wide_range.upper_bound ();
653 
654   /* Calculate the number of times that each end of the range overflows or
655      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
656   unsigned int precision = TYPE_PRECISION (type);
657   if (!TYPE_UNSIGNED (type))
658     {
659       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
660       lb -= type_min;
661       ub -= type_min;
662     }
663   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
664   lb &= upper_bits;
665   ub &= upper_bits;
666   if (lb != ub)
667     return false;
668 
669   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
670      negative values indicating underflow.  The low PRECISION bits of LB
671      are clear, so DELTA is therefore LB (== UB).  */
672   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
673   return true;
674 }
675 
676 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
677    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
678    FROM_TYPE are integral types.  */
679 
680 static bool
nop_conversion_for_offset_p(tree to_type,tree from_type,value_range & range)681 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
682 {
683   gcc_assert (INTEGRAL_TYPE_P (to_type)
684 	      && INTEGRAL_TYPE_P (from_type)
685 	      && !TYPE_OVERFLOW_TRAPS (to_type)
686 	      && !TYPE_OVERFLOW_TRAPS (from_type));
687 
688   /* Converting to something no narrower than sizetype and then to sizetype
689      is equivalent to converting directly to sizetype.  */
690   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
691     return true;
692 
693   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
694   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
695       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
696     return true;
697 
698   /* For narrowing conversions, we could in principle test whether
699      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
700      and apply a constant adjustment.
701 
702      For other conversions (which involve a sign change) we could
703      check that the signs are always equal, and apply a constant
704      adjustment if the signs are negative.
705 
706      However, both cases should be rare.  */
707   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
708 			    TYPE_SIGN (to_type));
709 }
710 
711 static void
712 split_constant_offset (tree type, tree *var, tree *off,
713 		       value_range *result_range,
714 		       hash_map<tree, std::pair<tree, tree> > &cache,
715 		       unsigned *limit);
716 
717 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
718    try to express OP0 CODE OP1 as:
719 
720      POINTER_PLUS <*VAR, (sizetype) *OFF>
721 
722    where:
723 
724    - *VAR has type TYPE
725    - *OFF is a constant of type ssizetype.
726 
727    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
728 
729      *VAR + (sizetype) *OFF
730 
731    where:
732 
733    - *VAR has type sizetype
734    - *OFF is a constant of type ssizetype.
735 
736    In both cases, OP0 CODE OP1 has type TYPE.
737 
738    Return true on success.  A false return value indicates that we can't
739    do better than set *OFF to zero.
740 
741    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
742    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
743 
744    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
745    visited.  LIMIT counts down the number of SSA names that we are
746    allowed to process before giving up.  */
747 
748 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off,value_range * result_range,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)749 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
750 			 tree *var, tree *off, value_range *result_range,
751 			 hash_map<tree, std::pair<tree, tree> > &cache,
752 			 unsigned *limit)
753 {
754   tree var0, var1;
755   tree off0, off1;
756   value_range op0_range, op1_range;
757 
758   *var = NULL_TREE;
759   *off = NULL_TREE;
760 
761   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
762     return false;
763 
764   switch (code)
765     {
766     case INTEGER_CST:
767       *var = size_int (0);
768       *off = fold_convert (ssizetype, op0);
769       if (result_range)
770 	result_range->set (op0, op0);
771       return true;
772 
773     case POINTER_PLUS_EXPR:
774       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
775       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
776       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
777       *off = size_binop (PLUS_EXPR, off0, off1);
778       return true;
779 
780     case PLUS_EXPR:
781     case MINUS_EXPR:
782       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
783       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
784       *off = size_binop (code, off0, off1);
785       if (!compute_distributive_range (type, op0_range, code, op1_range,
786 				       off, result_range))
787 	return false;
788       *var = fold_build2 (code, sizetype, var0, var1);
789       return true;
790 
791     case MULT_EXPR:
792       if (TREE_CODE (op1) != INTEGER_CST)
793 	return false;
794 
795       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
796       op1_range.set (op1, op1);
797       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
798       if (!compute_distributive_range (type, op0_range, code, op1_range,
799 				       off, result_range))
800 	return false;
801       *var = fold_build2 (MULT_EXPR, sizetype, var0,
802 			  fold_convert (sizetype, op1));
803       return true;
804 
805     case ADDR_EXPR:
806       {
807 	tree base, poffset;
808 	poly_int64 pbitsize, pbitpos, pbytepos;
809 	machine_mode pmode;
810 	int punsignedp, preversep, pvolatilep;
811 
812 	op0 = TREE_OPERAND (op0, 0);
813 	base
814 	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
815 				 &punsignedp, &preversep, &pvolatilep);
816 
817 	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
818 	  return false;
819 	base = build_fold_addr_expr (base);
820 	off0 = ssize_int (pbytepos);
821 
822 	if (poffset)
823 	  {
824 	    split_constant_offset (poffset, &poffset, &off1, nullptr,
825 				   cache, limit);
826 	    off0 = size_binop (PLUS_EXPR, off0, off1);
827 	    base = fold_build_pointer_plus (base, poffset);
828 	  }
829 
830 	var0 = fold_convert (type, base);
831 
832 	/* If variable length types are involved, punt, otherwise casts
833 	   might be converted into ARRAY_REFs in gimplify_conversion.
834 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
835 	   possibly no longer appears in current GIMPLE, might resurface.
836 	   This perhaps could run
837 	   if (CONVERT_EXPR_P (var0))
838 	     {
839 	       gimplify_conversion (&var0);
840 	       // Attempt to fill in any within var0 found ARRAY_REF's
841 	       // element size from corresponding op embedded ARRAY_REF,
842 	       // if unsuccessful, just punt.
843 	     }  */
844 	while (POINTER_TYPE_P (type))
845 	  type = TREE_TYPE (type);
846 	if (int_size_in_bytes (type) < 0)
847 	  return false;
848 
849 	*var = var0;
850 	*off = off0;
851 	return true;
852       }
853 
854     case SSA_NAME:
855       {
856 	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
857 	  return false;
858 
859 	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
860 	enum tree_code subcode;
861 
862 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
863 	  return false;
864 
865 	subcode = gimple_assign_rhs_code (def_stmt);
866 
867 	/* We are using a cache to avoid un-CSEing large amounts of code.  */
868 	bool use_cache = false;
869 	if (!has_single_use (op0)
870 	    && (subcode == POINTER_PLUS_EXPR
871 		|| subcode == PLUS_EXPR
872 		|| subcode == MINUS_EXPR
873 		|| subcode == MULT_EXPR
874 		|| subcode == ADDR_EXPR
875 		|| CONVERT_EXPR_CODE_P (subcode)))
876 	  {
877 	    use_cache = true;
878 	    bool existed;
879 	    std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
880 	    if (existed)
881 	      {
882 		if (integer_zerop (e.second))
883 		  return false;
884 		*var = e.first;
885 		*off = e.second;
886 		/* The caller sets the range in this case.  */
887 		return true;
888 	      }
889 	    e = std::make_pair (op0, ssize_int (0));
890 	  }
891 
892 	if (*limit == 0)
893 	  return false;
894 	--*limit;
895 
896 	var0 = gimple_assign_rhs1 (def_stmt);
897 	var1 = gimple_assign_rhs2 (def_stmt);
898 
899 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
900 					    var, off, nullptr, cache, limit);
901 	if (res && use_cache)
902 	  *cache.get (op0) = std::make_pair (*var, *off);
903 	/* The caller sets the range in this case.  */
904 	return res;
905       }
906     CASE_CONVERT:
907       {
908 	/* We can only handle the following conversions:
909 
910 	   - Conversions from one pointer type to another pointer type.
911 
912 	   - Conversions from one non-trapping integral type to another
913 	     non-trapping integral type.  In this case, the recursive
914 	     call makes sure that:
915 
916 	       (sizetype) OP0
917 
918 	     can be expressed as a sizetype operation involving VAR and OFF,
919 	     and all we need to do is check whether:
920 
921 	       (sizetype) OP0 == (sizetype) (TYPE) OP0
922 
923 	   - Conversions from a non-trapping sizetype-size integral type to
924 	     a like-sized pointer type.  In this case, the recursive call
925 	     makes sure that:
926 
927 	       (sizetype) OP0 == *VAR + (sizetype) *OFF
928 
929 	     and we can convert that to:
930 
931 	       POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
932 
933 	   - Conversions from a sizetype-sized pointer type to a like-sized
934 	     non-trapping integral type.  In this case, the recursive call
935 	     makes sure that:
936 
937 	       OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
938 
939 	     where the POINTER_PLUS and *VAR have the same precision as
940 	     TYPE (and the same precision as sizetype).  Then:
941 
942 	       (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
943 	tree itype = TREE_TYPE (op0);
944 	if ((POINTER_TYPE_P (itype)
945 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
946 	    && (POINTER_TYPE_P (type)
947 		|| (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
948 	    && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
949 		|| (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
950 		    && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
951 	  {
952 	    if (POINTER_TYPE_P (type))
953 	      {
954 		split_constant_offset (op0, var, off, nullptr, cache, limit);
955 		*var = fold_convert (type, *var);
956 	      }
957 	    else if (POINTER_TYPE_P (itype))
958 	      {
959 		split_constant_offset (op0, var, off, nullptr, cache, limit);
960 		*var = fold_convert (sizetype, *var);
961 	      }
962 	    else
963 	      {
964 		split_constant_offset (op0, var, off, &op0_range,
965 				       cache, limit);
966 		if (!nop_conversion_for_offset_p (type, itype, op0_range))
967 		  return false;
968 		if (result_range)
969 		  {
970 		    *result_range = op0_range;
971 		    range_cast (*result_range, type);
972 		  }
973 	      }
974 	    return true;
975 	  }
976 	return false;
977       }
978 
979     default:
980       return false;
981     }
982 }
983 
984 /* If EXP has pointer type, try to express it as:
985 
986      POINTER_PLUS <*VAR, (sizetype) *OFF>
987 
988    where:
989 
990    - *VAR has the same type as EXP
991    - *OFF is a constant of type ssizetype.
992 
993    If EXP has an integral type, try to express (sizetype) EXP as:
994 
995      *VAR + (sizetype) *OFF
996 
997    where:
998 
999    - *VAR has type sizetype
1000    - *OFF is a constant of type ssizetype.
1001 
1002    If EXP_RANGE is nonnull, set it to the range of EXP.
1003 
1004    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1005    visited.  LIMIT counts down the number of SSA names that we are
1006    allowed to process before giving up.  */
1007 
1008 static void
split_constant_offset(tree exp,tree * var,tree * off,value_range * exp_range,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)1009 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1010 		       hash_map<tree, std::pair<tree, tree> > &cache,
1011 		       unsigned *limit)
1012 {
1013   tree type = TREE_TYPE (exp), op0, op1;
1014   enum tree_code code;
1015 
1016   code = TREE_CODE (exp);
1017   if (exp_range)
1018     {
1019       *exp_range = type;
1020       if (code == SSA_NAME)
1021 	{
1022 	  value_range vr;
1023 	  get_range_query (cfun)->range_of_expr (vr, exp);
1024 	  if (vr.undefined_p ())
1025 	    vr.set_varying (TREE_TYPE (exp));
1026 	  wide_int var_min = wi::to_wide (vr.min ());
1027 	  wide_int var_max = wi::to_wide (vr.max ());
1028 	  value_range_kind vr_kind = vr.kind ();
1029 	  wide_int var_nonzero = get_nonzero_bits (exp);
1030 	  vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1031 						       &var_min, &var_max,
1032 						       var_nonzero,
1033 						       TYPE_SIGN (type));
1034 	  /* This check for VR_VARYING is here because the old code
1035 	     using get_range_info would return VR_RANGE for the entire
1036 	     domain, instead of VR_VARYING.  The new code normalizes
1037 	     full-domain ranges to VR_VARYING.  */
1038 	  if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1039 	    *exp_range = value_range (type, var_min, var_max);
1040 	}
1041     }
1042 
1043   if (!tree_is_chrec (exp)
1044       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1045     {
1046       extract_ops_from_tree (exp, &code, &op0, &op1);
1047       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1048 				   exp_range, cache, limit))
1049 	return;
1050     }
1051 
1052   *var = exp;
1053   if (INTEGRAL_TYPE_P (type))
1054     *var = fold_convert (sizetype, *var);
1055   *off = ssize_int (0);
1056 
1057   value_range r;
1058   if (exp_range && code != SSA_NAME
1059       && get_range_query (cfun)->range_of_expr (r, exp)
1060       && !r.undefined_p ())
1061     *exp_range = r;
1062 }
1063 
1064 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1065    type as EXP while OFF has type ssizetype.  */
1066 
1067 void
split_constant_offset(tree exp,tree * var,tree * off)1068 split_constant_offset (tree exp, tree *var, tree *off)
1069 {
1070   unsigned limit = param_ssa_name_def_chain_limit;
1071   static hash_map<tree, std::pair<tree, tree> > *cache;
1072   if (!cache)
1073     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1074   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1075   *var = fold_convert (TREE_TYPE (exp), *var);
1076   cache->empty ();
1077 }
1078 
1079 /* Returns the address ADDR of an object in a canonical shape (without nop
1080    casts, and with type of pointer to the object).  */
1081 
1082 static tree
canonicalize_base_object_address(tree addr)1083 canonicalize_base_object_address (tree addr)
1084 {
1085   tree orig = addr;
1086 
1087   STRIP_NOPS (addr);
1088 
1089   /* The base address may be obtained by casting from integer, in that case
1090      keep the cast.  */
1091   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1092     return orig;
1093 
1094   if (TREE_CODE (addr) != ADDR_EXPR)
1095     return addr;
1096 
1097   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1098 }
1099 
1100 /* Analyze the behavior of memory reference REF within STMT.
1101    There are two modes:
1102 
1103    - BB analysis.  In this case we simply split the address into base,
1104      init and offset components, without reference to any containing loop.
1105      The resulting base and offset are general expressions and they can
1106      vary arbitrarily from one iteration of the containing loop to the next.
1107      The step is always zero.
1108 
1109    - loop analysis.  In this case we analyze the reference both wrt LOOP
1110      and on the basis that the reference occurs (is "used") in LOOP;
1111      see the comment above analyze_scalar_evolution_in_loop for more
1112      information about this distinction.  The base, init, offset and
1113      step fields are all invariant in LOOP.
1114 
1115    Perform BB analysis if LOOP is null, or if LOOP is the function's
1116    dummy outermost loop.  In other cases perform loop analysis.
1117 
1118    Return true if the analysis succeeded and store the results in DRB if so.
1119    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1120 
1121 opt_result
dr_analyze_innermost(innermost_loop_behavior * drb,tree ref,class loop * loop,const gimple * stmt)1122 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1123 		      class loop *loop, const gimple *stmt)
1124 {
1125   poly_int64 pbitsize, pbitpos;
1126   tree base, poffset;
1127   machine_mode pmode;
1128   int punsignedp, preversep, pvolatilep;
1129   affine_iv base_iv, offset_iv;
1130   tree init, dinit, step;
1131   bool in_loop = (loop && loop->num);
1132 
1133   if (dump_file && (dump_flags & TDF_DETAILS))
1134     fprintf (dump_file, "analyze_innermost: ");
1135 
1136   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1137 			      &punsignedp, &preversep, &pvolatilep);
1138   gcc_assert (base != NULL_TREE);
1139 
1140   poly_int64 pbytepos;
1141   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1142     return opt_result::failure_at (stmt,
1143 				   "failed: bit offset alignment.\n");
1144 
1145   if (preversep)
1146     return opt_result::failure_at (stmt,
1147 				   "failed: reverse storage order.\n");
1148 
1149   /* Calculate the alignment and misalignment for the inner reference.  */
1150   unsigned int HOST_WIDE_INT bit_base_misalignment;
1151   unsigned int bit_base_alignment;
1152   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1153 
1154   /* There are no bitfield references remaining in BASE, so the values
1155      we got back must be whole bytes.  */
1156   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1157 	      && bit_base_misalignment % BITS_PER_UNIT == 0);
1158   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1159   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1160 
1161   if (TREE_CODE (base) == MEM_REF)
1162     {
1163       if (!integer_zerop (TREE_OPERAND (base, 1)))
1164 	{
1165 	  /* Subtract MOFF from the base and add it to POFFSET instead.
1166 	     Adjust the misalignment to reflect the amount we subtracted.  */
1167 	  poly_offset_int moff = mem_ref_offset (base);
1168 	  base_misalignment -= moff.force_shwi ();
1169 	  tree mofft = wide_int_to_tree (sizetype, moff);
1170 	  if (!poffset)
1171 	    poffset = mofft;
1172 	  else
1173 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
1174 	}
1175       base = TREE_OPERAND (base, 0);
1176     }
1177   else
1178     base = build_fold_addr_expr (base);
1179 
1180   if (in_loop)
1181     {
1182       if (!simple_iv (loop, loop, base, &base_iv, true))
1183 	return opt_result::failure_at
1184 	  (stmt, "failed: evolution of base is not affine.\n");
1185     }
1186   else
1187     {
1188       base_iv.base = base;
1189       base_iv.step = ssize_int (0);
1190       base_iv.no_overflow = true;
1191     }
1192 
1193   if (!poffset)
1194     {
1195       offset_iv.base = ssize_int (0);
1196       offset_iv.step = ssize_int (0);
1197     }
1198   else
1199     {
1200       if (!in_loop)
1201         {
1202           offset_iv.base = poffset;
1203           offset_iv.step = ssize_int (0);
1204         }
1205       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1206 	return opt_result::failure_at
1207 	  (stmt, "failed: evolution of offset is not affine.\n");
1208     }
1209 
1210   init = ssize_int (pbytepos);
1211 
1212   /* Subtract any constant component from the base and add it to INIT instead.
1213      Adjust the misalignment to reflect the amount we subtracted.  */
1214   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1215   init = size_binop (PLUS_EXPR, init, dinit);
1216   base_misalignment -= TREE_INT_CST_LOW (dinit);
1217 
1218   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1219   init = size_binop (PLUS_EXPR, init, dinit);
1220 
1221   step = size_binop (PLUS_EXPR,
1222 		     fold_convert (ssizetype, base_iv.step),
1223 		     fold_convert (ssizetype, offset_iv.step));
1224 
1225   base = canonicalize_base_object_address (base_iv.base);
1226 
1227   /* See if get_pointer_alignment can guarantee a higher alignment than
1228      the one we calculated above.  */
1229   unsigned int HOST_WIDE_INT alt_misalignment;
1230   unsigned int alt_alignment;
1231   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1232 
1233   /* As above, these values must be whole bytes.  */
1234   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1235 	      && alt_misalignment % BITS_PER_UNIT == 0);
1236   alt_alignment /= BITS_PER_UNIT;
1237   alt_misalignment /= BITS_PER_UNIT;
1238 
1239   if (base_alignment < alt_alignment)
1240     {
1241       base_alignment = alt_alignment;
1242       base_misalignment = alt_misalignment;
1243     }
1244 
1245   drb->base_address = base;
1246   drb->offset = fold_convert (ssizetype, offset_iv.base);
1247   drb->init = init;
1248   drb->step = step;
1249   if (known_misalignment (base_misalignment, base_alignment,
1250 			  &drb->base_misalignment))
1251     drb->base_alignment = base_alignment;
1252   else
1253     {
1254       drb->base_alignment = known_alignment (base_misalignment);
1255       drb->base_misalignment = 0;
1256     }
1257   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1258   drb->step_alignment = highest_pow2_factor (step);
1259 
1260   if (dump_file && (dump_flags & TDF_DETAILS))
1261     fprintf (dump_file, "success.\n");
1262 
1263   return opt_result::success ();
1264 }
1265 
1266 /* Return true if OP is a valid component reference for a DR access
1267    function.  This accepts a subset of what handled_component_p accepts.  */
1268 
1269 static bool
access_fn_component_p(tree op)1270 access_fn_component_p (tree op)
1271 {
1272   switch (TREE_CODE (op))
1273     {
1274     case REALPART_EXPR:
1275     case IMAGPART_EXPR:
1276     case ARRAY_REF:
1277       return true;
1278 
1279     case COMPONENT_REF:
1280       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1281 
1282     default:
1283       return false;
1284     }
1285 }
1286 
1287 /* Returns whether BASE can have a access_fn_component_p with BASE
1288    as base.  */
1289 
1290 static bool
base_supports_access_fn_components_p(tree base)1291 base_supports_access_fn_components_p (tree base)
1292 {
1293   switch (TREE_CODE (TREE_TYPE (base)))
1294     {
1295     case COMPLEX_TYPE:
1296     case ARRAY_TYPE:
1297     case RECORD_TYPE:
1298       return true;
1299     default:
1300       return false;
1301     }
1302 }
1303 
1304 /* Determines the base object and the list of indices of memory reference
1305    DR, analyzed in LOOP and instantiated before NEST.  */
1306 
1307 static void
dr_analyze_indices(struct indices * dri,tree ref,edge nest,loop_p loop)1308 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1309 {
1310   /* If analyzing a basic-block there are no indices to analyze
1311      and thus no access functions.  */
1312   if (!nest)
1313     {
1314       dri->base_object = ref;
1315       dri->access_fns.create (0);
1316       return;
1317     }
1318 
1319   vec<tree> access_fns = vNULL;
1320 
1321   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1322      into a two element array with a constant index.  The base is
1323      then just the immediate underlying object.  */
1324   if (TREE_CODE (ref) == REALPART_EXPR)
1325     {
1326       ref = TREE_OPERAND (ref, 0);
1327       access_fns.safe_push (integer_zero_node);
1328     }
1329   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1330     {
1331       ref = TREE_OPERAND (ref, 0);
1332       access_fns.safe_push (integer_one_node);
1333     }
1334 
1335   /* Analyze access functions of dimensions we know to be independent.
1336      The list of component references handled here should be kept in
1337      sync with access_fn_component_p.  */
1338   while (handled_component_p (ref))
1339     {
1340       if (TREE_CODE (ref) == ARRAY_REF)
1341 	{
1342 	  tree op = TREE_OPERAND (ref, 1);
1343 	  tree access_fn = analyze_scalar_evolution (loop, op);
1344 	  access_fn = instantiate_scev (nest, loop, access_fn);
1345 	  access_fns.safe_push (access_fn);
1346 	}
1347       else if (TREE_CODE (ref) == COMPONENT_REF
1348 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1349 	{
1350 	  /* For COMPONENT_REFs of records (but not unions!) use the
1351 	     FIELD_DECL offset as constant access function so we can
1352 	     disambiguate a[i].f1 and a[i].f2.  */
1353 	  tree off = component_ref_field_offset (ref);
1354 	  off = size_binop (PLUS_EXPR,
1355 			    size_binop (MULT_EXPR,
1356 					fold_convert (bitsizetype, off),
1357 					bitsize_int (BITS_PER_UNIT)),
1358 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1359 	  access_fns.safe_push (off);
1360 	}
1361       else
1362 	/* If we have an unhandled component we could not translate
1363 	   to an access function stop analyzing.  We have determined
1364 	   our base object in this case.  */
1365 	break;
1366 
1367       ref = TREE_OPERAND (ref, 0);
1368     }
1369 
1370   /* If the address operand of a MEM_REF base has an evolution in the
1371      analyzed nest, add it as an additional independent access-function.  */
1372   if (TREE_CODE (ref) == MEM_REF)
1373     {
1374       tree op = TREE_OPERAND (ref, 0);
1375       tree access_fn = analyze_scalar_evolution (loop, op);
1376       access_fn = instantiate_scev (nest, loop, access_fn);
1377       STRIP_NOPS (access_fn);
1378       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1379 	{
1380 	  tree memoff = TREE_OPERAND (ref, 1);
1381 	  tree base = initial_condition (access_fn);
1382 	  tree orig_type = TREE_TYPE (base);
1383 	  STRIP_USELESS_TYPE_CONVERSION (base);
1384 	  tree off;
1385 	  split_constant_offset (base, &base, &off);
1386 	  STRIP_USELESS_TYPE_CONVERSION (base);
1387 	  /* Fold the MEM_REF offset into the evolutions initial
1388 	     value to make more bases comparable.  */
1389 	  if (!integer_zerop (memoff))
1390 	    {
1391 	      off = size_binop (PLUS_EXPR, off,
1392 				fold_convert (ssizetype, memoff));
1393 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
1394 	    }
1395 	  /* Adjust the offset so it is a multiple of the access type
1396 	     size and thus we separate bases that can possibly be used
1397 	     to produce partial overlaps (which the access_fn machinery
1398 	     cannot handle).  */
1399 	  wide_int rem;
1400 	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1401 	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1402 	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1403 	    rem = wi::mod_trunc
1404 	      (wi::to_wide (off),
1405 	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1406 	       SIGNED);
1407 	  else
1408 	    /* If we can't compute the remainder simply force the initial
1409 	       condition to zero.  */
1410 	    rem = wi::to_wide (off);
1411 	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1412 	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1413 	  /* And finally replace the initial condition.  */
1414 	  access_fn = chrec_replace_initial_condition
1415 	      (access_fn, fold_convert (orig_type, off));
1416 	  /* ???  This is still not a suitable base object for
1417 	     dr_may_alias_p - the base object needs to be an
1418 	     access that covers the object as whole.  With
1419 	     an evolution in the pointer this cannot be
1420 	     guaranteed.
1421 	     As a band-aid, mark the access so we can special-case
1422 	     it in dr_may_alias_p.  */
1423 	  tree old = ref;
1424 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
1425 				 MEM_REF, TREE_TYPE (ref),
1426 				 base, memoff);
1427 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1428 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1429 	  dri->unconstrained_base = true;
1430 	  access_fns.safe_push (access_fn);
1431 	}
1432     }
1433   else if (DECL_P (ref))
1434     {
1435       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1436       ref = build2 (MEM_REF, TREE_TYPE (ref),
1437 		    build_fold_addr_expr (ref),
1438 		    build_int_cst (reference_alias_ptr_type (ref), 0));
1439     }
1440 
1441   dri->base_object = ref;
1442   dri->access_fns = access_fns;
1443 }
1444 
1445 /* Extracts the alias analysis information from the memory reference DR.  */
1446 
1447 static void
dr_analyze_alias(struct data_reference * dr)1448 dr_analyze_alias (struct data_reference *dr)
1449 {
1450   tree ref = DR_REF (dr);
1451   tree base = get_base_address (ref), addr;
1452 
1453   if (INDIRECT_REF_P (base)
1454       || TREE_CODE (base) == MEM_REF)
1455     {
1456       addr = TREE_OPERAND (base, 0);
1457       if (TREE_CODE (addr) == SSA_NAME)
1458 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1459     }
1460 }
1461 
1462 /* Frees data reference DR.  */
1463 
1464 void
free_data_ref(data_reference_p dr)1465 free_data_ref (data_reference_p dr)
1466 {
1467   DR_ACCESS_FNS (dr).release ();
1468   if (dr->alt_indices.base_object)
1469     dr->alt_indices.access_fns.release ();
1470   free (dr);
1471 }
1472 
1473 /* Analyze memory reference MEMREF, which is accessed in STMT.
1474    The reference is a read if IS_READ is true, otherwise it is a write.
1475    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1476    within STMT, i.e. that it might not occur even if STMT is executed
1477    and runs to completion.
1478 
1479    Return the data_reference description of MEMREF.  NEST is the outermost
1480    loop in which the reference should be instantiated, LOOP is the loop
1481    in which the data reference should be analyzed.  */
1482 
1483 struct data_reference *
create_data_ref(edge nest,loop_p loop,tree memref,gimple * stmt,bool is_read,bool is_conditional_in_stmt)1484 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1485 		 bool is_read, bool is_conditional_in_stmt)
1486 {
1487   struct data_reference *dr;
1488 
1489   if (dump_file && (dump_flags & TDF_DETAILS))
1490     {
1491       fprintf (dump_file, "Creating dr for ");
1492       print_generic_expr (dump_file, memref, TDF_SLIM);
1493       fprintf (dump_file, "\n");
1494     }
1495 
1496   dr = XCNEW (struct data_reference);
1497   DR_STMT (dr) = stmt;
1498   DR_REF (dr) = memref;
1499   DR_IS_READ (dr) = is_read;
1500   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1501 
1502   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1503 			nest != NULL ? loop : NULL, stmt);
1504   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1505   dr_analyze_alias (dr);
1506 
1507   if (dump_file && (dump_flags & TDF_DETAILS))
1508     {
1509       unsigned i;
1510       fprintf (dump_file, "\tbase_address: ");
1511       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1512       fprintf (dump_file, "\n\toffset from base address: ");
1513       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1514       fprintf (dump_file, "\n\tconstant offset from base address: ");
1515       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1516       fprintf (dump_file, "\n\tstep: ");
1517       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1518       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1519       fprintf (dump_file, "\n\tbase misalignment: %d",
1520 	       DR_BASE_MISALIGNMENT (dr));
1521       fprintf (dump_file, "\n\toffset alignment: %d",
1522 	       DR_OFFSET_ALIGNMENT (dr));
1523       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1524       fprintf (dump_file, "\n\tbase_object: ");
1525       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1526       fprintf (dump_file, "\n");
1527       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1528 	{
1529 	  fprintf (dump_file, "\tAccess function %d: ", i);
1530 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1531 	}
1532     }
1533 
1534   return dr;
1535 }
1536 
1537 /*  A helper function computes order between two tree expressions T1 and T2.
1538     This is used in comparator functions sorting objects based on the order
1539     of tree expressions.  The function returns -1, 0, or 1.  */
1540 
1541 int
data_ref_compare_tree(tree t1,tree t2)1542 data_ref_compare_tree (tree t1, tree t2)
1543 {
1544   int i, cmp;
1545   enum tree_code code;
1546   char tclass;
1547 
1548   if (t1 == t2)
1549     return 0;
1550   if (t1 == NULL)
1551     return -1;
1552   if (t2 == NULL)
1553     return 1;
1554 
1555   STRIP_USELESS_TYPE_CONVERSION (t1);
1556   STRIP_USELESS_TYPE_CONVERSION (t2);
1557   if (t1 == t2)
1558     return 0;
1559 
1560   if (TREE_CODE (t1) != TREE_CODE (t2)
1561       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1562     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1563 
1564   code = TREE_CODE (t1);
1565   switch (code)
1566     {
1567     case INTEGER_CST:
1568       return tree_int_cst_compare (t1, t2);
1569 
1570     case STRING_CST:
1571       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1572 	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1573       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1574 		     TREE_STRING_LENGTH (t1));
1575 
1576     case SSA_NAME:
1577       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1578 	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1579       break;
1580 
1581     default:
1582       if (POLY_INT_CST_P (t1))
1583 	return compare_sizes_for_sort (wi::to_poly_widest (t1),
1584 				       wi::to_poly_widest (t2));
1585 
1586       tclass = TREE_CODE_CLASS (code);
1587 
1588       /* For decls, compare their UIDs.  */
1589       if (tclass == tcc_declaration)
1590 	{
1591 	  if (DECL_UID (t1) != DECL_UID (t2))
1592 	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1593 	  break;
1594 	}
1595       /* For expressions, compare their operands recursively.  */
1596       else if (IS_EXPR_CODE_CLASS (tclass))
1597 	{
1598 	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1599 	    {
1600 	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1601 					   TREE_OPERAND (t2, i));
1602 	      if (cmp != 0)
1603 		return cmp;
1604 	    }
1605 	}
1606       else
1607 	gcc_unreachable ();
1608     }
1609 
1610   return 0;
1611 }
1612 
1613 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1614    check.  */
1615 
1616 opt_result
runtime_alias_check_p(ddr_p ddr,class loop * loop,bool speed_p)1617 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1618 {
1619   if (dump_enabled_p ())
1620     dump_printf (MSG_NOTE,
1621 		 "consider run-time aliasing test between %T and %T\n",
1622 		 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1623 
1624   if (!speed_p)
1625     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1626 				   "runtime alias check not supported when"
1627 				   " optimizing for size.\n");
1628 
1629   /* FORNOW: We don't support versioning with outer-loop in either
1630      vectorization or loop distribution.  */
1631   if (loop != NULL && loop->inner != NULL)
1632     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1633 				   "runtime alias check not supported for"
1634 				   " outer loop.\n");
1635 
1636   /* FORNOW: We don't support handling different address spaces.  */
1637   if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1638       != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1639     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1640 				   "runtime alias check between different "
1641 				   "address spaces not supported.\n");
1642 
1643   return opt_result::success ();
1644 }
1645 
1646 /* Operator == between two dr_with_seg_len objects.
1647 
1648    This equality operator is used to make sure two data refs
1649    are the same one so that we will consider to combine the
1650    aliasing checks of those two pairs of data dependent data
1651    refs.  */
1652 
1653 static bool
operator ==(const dr_with_seg_len & d1,const dr_with_seg_len & d2)1654 operator == (const dr_with_seg_len& d1,
1655 	     const dr_with_seg_len& d2)
1656 {
1657   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1658 			   DR_BASE_ADDRESS (d2.dr), 0)
1659 	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1660 	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1661 	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1662 	  && known_eq (d1.access_size, d2.access_size)
1663 	  && d1.align == d2.align);
1664 }
1665 
1666 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1667    so that we can combine aliasing checks in one scan.  */
1668 
1669 static int
comp_dr_with_seg_len_pair(const void * pa_,const void * pb_)1670 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1671 {
1672   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1673   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1674   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1675   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1676 
1677   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1678      if a and c have the same basic address snd step, and b and d have the same
1679      address and step.  Therefore, if any a&c or b&d don't have the same address
1680      and step, we don't care the order of those two pairs after sorting.  */
1681   int comp_res;
1682 
1683   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1684 					 DR_BASE_ADDRESS (b1.dr))) != 0)
1685     return comp_res;
1686   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1687 					 DR_BASE_ADDRESS (b2.dr))) != 0)
1688     return comp_res;
1689   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1690 					 DR_STEP (b1.dr))) != 0)
1691     return comp_res;
1692   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1693 					 DR_STEP (b2.dr))) != 0)
1694     return comp_res;
1695   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1696 					 DR_OFFSET (b1.dr))) != 0)
1697     return comp_res;
1698   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1699 					 DR_INIT (b1.dr))) != 0)
1700     return comp_res;
1701   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1702 					 DR_OFFSET (b2.dr))) != 0)
1703     return comp_res;
1704   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1705 					 DR_INIT (b2.dr))) != 0)
1706     return comp_res;
1707 
1708   return 0;
1709 }
1710 
1711 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1712 
1713 static void
dump_alias_pair(dr_with_seg_len_pair_t * alias_pair,const char * indent)1714 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1715 {
1716   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1717 	       DR_REF (alias_pair->first.dr),
1718 	       DR_REF (alias_pair->second.dr));
1719 
1720   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1721 	       alias_pair->first.seg_len);
1722   if (!operand_equal_p (alias_pair->first.seg_len,
1723 			alias_pair->second.seg_len, 0))
1724     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1725 
1726   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1727   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1728   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1729     {
1730       dump_printf (MSG_NOTE, " vs. ");
1731       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1732     }
1733 
1734   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1735 	       alias_pair->first.align);
1736   if (alias_pair->first.align != alias_pair->second.align)
1737     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1738 
1739   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1740   if (alias_pair->flags & DR_ALIAS_RAW)
1741     dump_printf (MSG_NOTE, " RAW");
1742   if (alias_pair->flags & DR_ALIAS_WAR)
1743     dump_printf (MSG_NOTE, " WAR");
1744   if (alias_pair->flags & DR_ALIAS_WAW)
1745     dump_printf (MSG_NOTE, " WAW");
1746   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1747     dump_printf (MSG_NOTE, " ARBITRARY");
1748   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1749     dump_printf (MSG_NOTE, " SWAPPED");
1750   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1751     dump_printf (MSG_NOTE, " UNSWAPPED");
1752   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1753     dump_printf (MSG_NOTE, " MIXED_STEPS");
1754   if (alias_pair->flags == 0)
1755     dump_printf (MSG_NOTE, " <none>");
1756   dump_printf (MSG_NOTE, "\n");
1757 }
1758 
1759 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1760    FACTOR is number of iterations that each data reference is accessed.
1761 
1762    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1763    we create an expression:
1764 
1765    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1766    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1767 
1768    for aliasing checks.  However, in some cases we can decrease the number
1769    of checks by combining two checks into one.  For example, suppose we have
1770    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1771    condition is satisfied:
1772 
1773    load_ptr_0 < load_ptr_1  &&
1774    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1775 
1776    (this condition means, in each iteration of vectorized loop, the accessed
1777    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1778    load_ptr_1.)
1779 
1780    we then can use only the following expression to finish the alising checks
1781    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1782 
1783    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1784    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1785 
1786    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1787    basic address.  */
1788 
1789 void
prune_runtime_alias_test_list(vec<dr_with_seg_len_pair_t> * alias_pairs,poly_uint64)1790 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1791 			       poly_uint64)
1792 {
1793   if (alias_pairs->is_empty ())
1794     return;
1795 
1796   /* Canonicalize each pair so that the base components are ordered wrt
1797      data_ref_compare_tree.  This allows the loop below to merge more
1798      cases.  */
1799   unsigned int i;
1800   dr_with_seg_len_pair_t *alias_pair;
1801   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1802     {
1803       data_reference_p dr_a = alias_pair->first.dr;
1804       data_reference_p dr_b = alias_pair->second.dr;
1805       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1806 					    DR_BASE_ADDRESS (dr_b));
1807       if (comp_res == 0)
1808 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1809       if (comp_res == 0)
1810 	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1811       if (comp_res > 0)
1812 	{
1813 	  std::swap (alias_pair->first, alias_pair->second);
1814 	  alias_pair->flags |= DR_ALIAS_SWAPPED;
1815 	}
1816       else
1817 	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1818     }
1819 
1820   /* Sort the collected data ref pairs so that we can scan them once to
1821      combine all possible aliasing checks.  */
1822   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1823 
1824   /* Scan the sorted dr pairs and check if we can combine alias checks
1825      of two neighboring dr pairs.  */
1826   unsigned int last = 0;
1827   for (i = 1; i < alias_pairs->length (); ++i)
1828     {
1829       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1830       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1831       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1832 
1833       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1834       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1835       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1836       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1837 
1838       /* Remove duplicate data ref pairs.  */
1839       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1840 	{
1841 	  if (dump_enabled_p ())
1842 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1843 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1844 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1845 	  alias_pair1->flags |= alias_pair2->flags;
1846 	  continue;
1847 	}
1848 
1849       /* Assume that we won't be able to merge the pairs, then correct
1850 	 if we do.  */
1851       last += 1;
1852       if (last != i)
1853 	(*alias_pairs)[last] = (*alias_pairs)[i];
1854 
1855       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1856 	{
1857 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1858 	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
1859 	  if (*dr_a1 == *dr_a2)
1860 	    {
1861 	      std::swap (dr_a1, dr_b1);
1862 	      std::swap (dr_a2, dr_b2);
1863 	    }
1864 
1865 	  poly_int64 init_a1, init_a2;
1866 	  /* Only consider cases in which the distance between the initial
1867 	     DR_A1 and the initial DR_A2 is known at compile time.  */
1868 	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1869 				DR_BASE_ADDRESS (dr_a2->dr), 0)
1870 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1871 				   DR_OFFSET (dr_a2->dr), 0)
1872 	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1873 	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1874 	    continue;
1875 
1876 	  /* Don't combine if we can't tell which one comes first.  */
1877 	  if (!ordered_p (init_a1, init_a2))
1878 	    continue;
1879 
1880 	  /* Work out what the segment length would be if we did combine
1881 	     DR_A1 and DR_A2:
1882 
1883 	     - If DR_A1 and DR_A2 have equal lengths, that length is
1884 	       also the combined length.
1885 
1886 	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
1887 	       length is the lower bound on those lengths.
1888 
1889 	     - If DR_A1 and DR_A2 both have positive lengths, the combined
1890 	       length is the upper bound on those lengths.
1891 
1892 	     Other cases are unlikely to give a useful combination.
1893 
1894 	     The lengths both have sizetype, so the sign is taken from
1895 	     the step instead.  */
1896 	  poly_uint64 new_seg_len = 0;
1897 	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1898 						 dr_a2->seg_len, 0);
1899 	  if (new_seg_len_p)
1900 	    {
1901 	      poly_uint64 seg_len_a1, seg_len_a2;
1902 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1903 		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1904 		continue;
1905 
1906 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
1907 	      if (TREE_CODE (indicator_a) != INTEGER_CST)
1908 		continue;
1909 
1910 	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
1911 	      if (TREE_CODE (indicator_b) != INTEGER_CST)
1912 		continue;
1913 
1914 	      int sign_a = tree_int_cst_sgn (indicator_a);
1915 	      int sign_b = tree_int_cst_sgn (indicator_b);
1916 
1917 	      if (sign_a <= 0 && sign_b <= 0)
1918 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1919 	      else if (sign_a >= 0 && sign_b >= 0)
1920 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1921 	      else
1922 		continue;
1923 	    }
1924 	  /* At this point we're committed to merging the refs.  */
1925 
1926 	  /* Make sure dr_a1 starts left of dr_a2.  */
1927 	  if (maybe_gt (init_a1, init_a2))
1928 	    {
1929 	      std::swap (*dr_a1, *dr_a2);
1930 	      std::swap (init_a1, init_a2);
1931 	    }
1932 
1933 	  /* The DR_Bs are equal, so only the DR_As can introduce
1934 	     mixed steps.  */
1935 	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1936 	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1937 
1938 	  if (new_seg_len_p)
1939 	    {
1940 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1941 					      new_seg_len);
1942 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1943 	    }
1944 
1945 	  /* This is always positive due to the swap above.  */
1946 	  poly_uint64 diff = init_a2 - init_a1;
1947 
1948 	  /* The new check will start at DR_A1.  Make sure that its access
1949 	     size encompasses the initial DR_A2.  */
1950 	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1951 	    {
1952 	      dr_a1->access_size = upper_bound (dr_a1->access_size,
1953 						diff + dr_a2->access_size);
1954 	      unsigned int new_align = known_alignment (dr_a1->access_size);
1955 	      dr_a1->align = MIN (dr_a1->align, new_align);
1956 	    }
1957 	  if (dump_enabled_p ())
1958 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1959 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1960 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1961 	  alias_pair1->flags |= alias_pair2->flags;
1962 	  last -= 1;
1963 	}
1964     }
1965   alias_pairs->truncate (last + 1);
1966 
1967   /* Try to restore the original dr_with_seg_len order within each
1968      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1969      unswapped pairs into the same check, we have to invalidate any
1970      RAW, WAR and WAW information for it.  */
1971   if (dump_enabled_p ())
1972     dump_printf (MSG_NOTE, "merged alias checks:\n");
1973   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1974     {
1975       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1976       unsigned int swapped = (alias_pair->flags & swap_mask);
1977       if (swapped == DR_ALIAS_SWAPPED)
1978 	std::swap (alias_pair->first, alias_pair->second);
1979       else if (swapped != DR_ALIAS_UNSWAPPED)
1980 	alias_pair->flags |= DR_ALIAS_ARBITRARY;
1981       alias_pair->flags &= ~swap_mask;
1982       if (dump_enabled_p ())
1983 	dump_alias_pair (alias_pair, "  ");
1984     }
1985 }
1986 
1987 /* A subroutine of create_intersect_range_checks, with a subset of the
1988    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1989    to optimize cases in which the references form a simple RAW, WAR or
1990    WAR dependence.  */
1991 
1992 static bool
create_ifn_alias_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)1993 create_ifn_alias_checks (tree *cond_expr,
1994 			 const dr_with_seg_len_pair_t &alias_pair)
1995 {
1996   const dr_with_seg_len& dr_a = alias_pair.first;
1997   const dr_with_seg_len& dr_b = alias_pair.second;
1998 
1999   /* Check for cases in which:
2000 
2001      (a) we have a known RAW, WAR or WAR dependence
2002      (b) the accesses are well-ordered in both the original and new code
2003 	 (see the comment above the DR_ALIAS_* flags for details); and
2004      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2005   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2006     return false;
2007 
2008   /* Make sure that both DRs access the same pattern of bytes,
2009      with a constant length and step.  */
2010   poly_uint64 seg_len;
2011   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2012       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2013       || maybe_ne (dr_a.access_size, dr_b.access_size)
2014       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2015       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2016     return false;
2017 
2018   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2019   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2020   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2021 
2022   /* See whether the target suports what we want to do.  WAW checks are
2023      equivalent to WAR checks here.  */
2024   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2025 		     ? IFN_CHECK_RAW_PTRS
2026 		     : IFN_CHECK_WAR_PTRS);
2027   unsigned int align = MIN (dr_a.align, dr_b.align);
2028   poly_uint64 full_length = seg_len + bytes;
2029   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2030 					   full_length, align))
2031     {
2032       full_length = seg_len + dr_a.access_size;
2033       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2034 					       full_length, align))
2035 	return false;
2036     }
2037 
2038   /* Commit to using this form of test.  */
2039   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2040   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2041 
2042   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2043   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2044 
2045   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2046 					     ifn, boolean_type_node,
2047 					     4, addr_a, addr_b,
2048 					     size_int (full_length),
2049 					     size_int (align));
2050 
2051   if (dump_enabled_p ())
2052     {
2053       if (ifn == IFN_CHECK_RAW_PTRS)
2054 	dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2055       else
2056 	dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2057     }
2058   return true;
2059 }
2060 
2061 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2062    free of aliases, using a condition based on index values instead
2063    of a condition based on addresses.  Return true on success,
2064    storing the condition in *COND_EXPR.
2065 
2066    This can only be done if the two data references in ALIAS_PAIR access
2067    the same array object and the index is the only difference.  For example,
2068    if the two data references are DR_A and DR_B:
2069 
2070                        DR_A                           DR_B
2071       data-ref         arr[i]                         arr[j]
2072       base_object      arr                            arr
2073       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2074 
2075    The addresses and their index are like:
2076 
2077         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2078      ------------------------------------------------------->
2079         |   |   |   |   |          |   |   |   |   |
2080      ------------------------------------------------------->
2081         i_0 ...         i_0+4      j_0 ...         j_0+4
2082 
2083    We can create expression based on index rather than address:
2084 
2085      (unsigned) (i_0 - j_0 + 3) <= 6
2086 
2087    i.e. the indices are less than 4 apart.
2088 
2089    Note evolution step of index needs to be considered in comparison.  */
2090 
2091 static bool
create_intersect_range_checks_index(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2092 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2093 				     const dr_with_seg_len_pair_t &alias_pair)
2094 {
2095   const dr_with_seg_len &dr_a = alias_pair.first;
2096   const dr_with_seg_len &dr_b = alias_pair.second;
2097   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2098       || integer_zerop (DR_STEP (dr_a.dr))
2099       || integer_zerop (DR_STEP (dr_b.dr))
2100       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2101     return false;
2102 
2103   poly_uint64 seg_len1, seg_len2;
2104   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2105       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2106     return false;
2107 
2108   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2109     return false;
2110 
2111   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2112     return false;
2113 
2114   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2115     return false;
2116 
2117   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2118 
2119   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2120   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2121   if (neg_step)
2122     {
2123       abs_step = -abs_step;
2124       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2125       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2126     }
2127 
2128   /* Infer the number of iterations with which the memory segment is accessed
2129      by DR.  In other words, alias is checked if memory segment accessed by
2130      DR_A in some iterations intersect with memory segment accessed by DR_B
2131      in the same amount iterations.
2132      Note segnment length is a linear function of number of iterations with
2133      DR_STEP as the coefficient.  */
2134   poly_uint64 niter_len1, niter_len2;
2135   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2136       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2137     return false;
2138 
2139   /* Divide each access size by the byte step, rounding up.  */
2140   poly_uint64 niter_access1, niter_access2;
2141   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2142 			abs_step, &niter_access1)
2143       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2144 			   abs_step, &niter_access2))
2145     return false;
2146 
2147   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2148 
2149   int found = -1;
2150   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2151     {
2152       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2153       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2154       /* Two indices must be the same if they are not scev, or not scev wrto
2155 	 current loop being vecorized.  */
2156       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2157 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
2158 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
2159 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2160 	{
2161 	  if (operand_equal_p (access1, access2, 0))
2162 	    continue;
2163 
2164 	  return false;
2165 	}
2166       if (found >= 0)
2167 	return false;
2168       found = i;
2169     }
2170 
2171   /* Ought not to happen in practice, since if all accesses are equal then the
2172      alias should be decidable at compile time.  */
2173   if (found < 0)
2174     return false;
2175 
2176   /* The two indices must have the same step.  */
2177   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2178   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2179   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2180     return false;
2181 
2182   tree idx_step = CHREC_RIGHT (access1);
2183   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2184   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2185   /* Index must evaluate in the same direction as DR.  */
2186   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2187 
2188   tree min1 = CHREC_LEFT (access1);
2189   tree min2 = CHREC_LEFT (access2);
2190   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2191     return false;
2192 
2193   /* Ideally, alias can be checked against loop's control IV, but we
2194      need to prove linear mapping between control IV and reference
2195      index.  Although that should be true, we check against (array)
2196      index of data reference.  Like segment length, index length is
2197      linear function of the number of iterations with index_step as
2198      the coefficient, i.e, niter_len * idx_step.  */
2199   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2200 					      SIGNED);
2201   if (neg_step)
2202     abs_idx_step = -abs_idx_step;
2203   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2204   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2205   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2206   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2207 
2208   gcc_assert (known_ge (idx_len1, 0)
2209 	      && known_ge (idx_len2, 0)
2210 	      && known_ge (idx_access1, 0)
2211 	      && known_ge (idx_access2, 0));
2212 
2213   /* Each access has the following pattern, with lengths measured
2214      in units of INDEX:
2215 
2216 	  <-- idx_len -->
2217 	  <--- A: -ve step --->
2218 	  +-----+-------+-----+-------+-----+
2219 	  | n-1 | ..... |  0  | ..... | n-1 |
2220 	  +-----+-------+-----+-------+-----+
2221 			<--- B: +ve step --->
2222 			<-- idx_len -->
2223 			|
2224 		       min
2225 
2226      where "n" is the number of scalar iterations covered by the segment
2227      and where each access spans idx_access units.
2228 
2229      A is the range of bytes accessed when the step is negative,
2230      B is the range when the step is positive.
2231 
2232      When checking for general overlap, we need to test whether
2233      the range:
2234 
2235        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2236 
2237      overlaps:
2238 
2239        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2240 
2241      where:
2242 
2243 	low_offsetN = +ve step ? 0 : -idx_lenN;
2244        high_offsetN = +ve step ? idx_lenN : 0;
2245 
2246      This is equivalent to testing whether:
2247 
2248        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2249        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2250 
2251      Converting this into a single test, there is an overlap if:
2252 
2253        0 <= min2 - min1 + bias <= limit
2254 
2255      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2256 	   limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2257 		 + (high_offset2 - low_offset2 + idx_access2 - 1)
2258       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2259 
2260      Combining the tests requires limit to be computable in an unsigned
2261      form of the index type; if it isn't, we fall back to the usual
2262      pointer-based checks.
2263 
2264      We can do better if DR_B is a write and if DR_A and DR_B are
2265      well-ordered in both the original and the new code (see the
2266      comment above the DR_ALIAS_* flags for details).  In this case
2267      we know that for each i in [0, n-1], the write performed by
2268      access i of DR_B occurs after access numbers j<=i of DR_A in
2269      both the original and the new code.  Any write or anti
2270      dependencies wrt those DR_A accesses are therefore maintained.
2271 
2272      We just need to make sure that each individual write in DR_B does not
2273      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2274      after the DR_B access in the original code but happen before it in
2275      the new code.
2276 
2277      We know the steps for both accesses are equal, so by induction, we
2278      just need to test whether the first write of DR_B overlaps a later
2279      access of DR_A.  In other words, we need to move min1 along by
2280      one iteration:
2281 
2282        min1' = min1 + idx_step
2283 
2284      and use the ranges:
2285 
2286        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2287 
2288      and:
2289 
2290        [min2, min2 + idx_access2 - 1]
2291 
2292      where:
2293 
2294 	low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2295        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2296   if (waw_or_war_p)
2297     idx_len1 -= abs_idx_step;
2298 
2299   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2300   if (!waw_or_war_p)
2301     limit += idx_len2;
2302 
2303   tree utype = unsigned_type_for (TREE_TYPE (min1));
2304   if (!wi::fits_to_tree_p (limit, utype))
2305     return false;
2306 
2307   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2308   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2309   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2310   /* Equivalent to adding IDX_STEP to MIN1.  */
2311   if (waw_or_war_p)
2312     bias -= wi::to_offset (idx_step);
2313 
2314   tree subject = fold_build2 (MINUS_EXPR, utype,
2315 			      fold_convert (utype, min2),
2316 			      fold_convert (utype, min1));
2317   subject = fold_build2 (PLUS_EXPR, utype, subject,
2318 			 wide_int_to_tree (utype, bias));
2319   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2320 				     wide_int_to_tree (utype, limit));
2321   if (*cond_expr)
2322     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2323 			      *cond_expr, part_cond_expr);
2324   else
2325     *cond_expr = part_cond_expr;
2326   if (dump_enabled_p ())
2327     {
2328       if (waw_or_war_p)
2329 	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2330       else
2331 	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2332     }
2333   return true;
2334 }
2335 
2336 /* A subroutine of create_intersect_range_checks, with a subset of the
2337    same arguments.  Try to optimize cases in which the second access
2338    is a write and in which some overlap is valid.  */
2339 
2340 static bool
create_waw_or_war_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2341 create_waw_or_war_checks (tree *cond_expr,
2342 			  const dr_with_seg_len_pair_t &alias_pair)
2343 {
2344   const dr_with_seg_len& dr_a = alias_pair.first;
2345   const dr_with_seg_len& dr_b = alias_pair.second;
2346 
2347   /* Check for cases in which:
2348 
2349      (a) DR_B is always a write;
2350      (b) the accesses are well-ordered in both the original and new code
2351 	 (see the comment above the DR_ALIAS_* flags for details); and
2352      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2353   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2354     return false;
2355 
2356   /* Check for equal (but possibly variable) steps.  */
2357   tree step = DR_STEP (dr_a.dr);
2358   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2359     return false;
2360 
2361   /* Make sure that we can operate on sizetype without loss of precision.  */
2362   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2363   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2364     return false;
2365 
2366   /* All addresses involved are known to have a common alignment ALIGN.
2367      We can therefore subtract ALIGN from an exclusive endpoint to get
2368      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2369      same as the access sizes of both DRs, and so subtracting ALIGN
2370      cancels out the addition of an access size.  */
2371   unsigned int align = MIN (dr_a.align, dr_b.align);
2372   poly_uint64 last_chunk_a = dr_a.access_size - align;
2373   poly_uint64 last_chunk_b = dr_b.access_size - align;
2374 
2375   /* Get a boolean expression that is true when the step is negative.  */
2376   tree indicator = dr_direction_indicator (dr_a.dr);
2377   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2378 			       fold_convert (ssizetype, indicator),
2379 			       ssize_int (0));
2380 
2381   /* Get lengths in sizetype.  */
2382   tree seg_len_a
2383     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2384   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2385 
2386   /* Each access has the following pattern:
2387 
2388 	  <- |seg_len| ->
2389 	  <--- A: -ve step --->
2390 	  +-----+-------+-----+-------+-----+
2391 	  | n-1 | ..... |  0  | ..... | n-1 |
2392 	  +-----+-------+-----+-------+-----+
2393 			<--- B: +ve step --->
2394 			<- |seg_len| ->
2395 			|
2396 		   base address
2397 
2398      where "n" is the number of scalar iterations covered by the segment.
2399 
2400      A is the range of bytes accessed when the step is negative,
2401      B is the range when the step is positive.
2402 
2403      We know that DR_B is a write.  We also know (from checking that
2404      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2405      the write performed by access i of DR_B occurs after access numbers
2406      j<=i of DR_A in both the original and the new code.  Any write or
2407      anti dependencies wrt those DR_A accesses are therefore maintained.
2408 
2409      We just need to make sure that each individual write in DR_B does not
2410      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2411      after the DR_B access in the original code but happen before it in
2412      the new code.
2413 
2414      We know the steps for both accesses are equal, so by induction, we
2415      just need to test whether the first write of DR_B overlaps a later
2416      access of DR_A.  In other words, we need to move addr_a along by
2417      one iteration:
2418 
2419        addr_a' = addr_a + step
2420 
2421      and check whether:
2422 
2423        [addr_b, addr_b + last_chunk_b]
2424 
2425      overlaps:
2426 
2427        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2428 
2429      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2430 
2431 	low_offset_a = +ve step ? 0 : seg_len_a - step
2432        high_offset_a = +ve step ? seg_len_a - step : 0
2433 
2434      This is equivalent to testing whether:
2435 
2436        addr_a' + low_offset_a <= addr_b + last_chunk_b
2437        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2438 
2439      Converting this into a single test, there is an overlap if:
2440 
2441        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2442 
2443      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2444 
2445      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2446      less than the size of the object underlying DR_A.  We also know
2447      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2448      guaranteed at compile time.  There can therefore be no overflow if
2449      "limit" is calculated in an unsigned type with pointer precision.  */
2450   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2451 					 DR_OFFSET (dr_a.dr));
2452   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2453 
2454   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2455 					 DR_OFFSET (dr_b.dr));
2456   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2457 
2458   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2459   addr_a = fold_build_pointer_plus (addr_a, step);
2460   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2461 					   seg_len_a, step);
2462   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2463     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2464 
2465   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2466 				   seg_len_a_minus_step, size_zero_node);
2467   if (!CONSTANT_CLASS_P (low_offset_a))
2468     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2469 
2470   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2471      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2472   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2473 				    low_offset_a);
2474 
2475   /* The amount added to addr_b - addr_a'.  */
2476   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2477 			   size_int (last_chunk_b), low_offset_a);
2478 
2479   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2480   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2481 		       size_int (last_chunk_a + last_chunk_b));
2482 
2483   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2484   subject = fold_build2 (PLUS_EXPR, sizetype,
2485 			 fold_convert (sizetype, subject), bias);
2486 
2487   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2488   if (dump_enabled_p ())
2489     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2490   return true;
2491 }
2492 
2493 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2494    every address ADDR accessed by D:
2495 
2496      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2497 
2498    In this case, every element accessed by D is aligned to at least
2499    ALIGN bytes.
2500 
2501    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2502 
2503      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2504 
2505 static void
get_segment_min_max(const dr_with_seg_len & d,tree * seg_min_out,tree * seg_max_out,HOST_WIDE_INT align)2506 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2507 		     tree *seg_max_out, HOST_WIDE_INT align)
2508 {
2509   /* Each access has the following pattern:
2510 
2511 	  <- |seg_len| ->
2512 	  <--- A: -ve step --->
2513 	  +-----+-------+-----+-------+-----+
2514 	  | n-1 | ,.... |  0  | ..... | n-1 |
2515 	  +-----+-------+-----+-------+-----+
2516 			<--- B: +ve step --->
2517 			<- |seg_len| ->
2518 			|
2519 		   base address
2520 
2521      where "n" is the number of scalar iterations covered by the segment.
2522      (This should be VF for a particular pair if we know that both steps
2523      are the same, otherwise it will be the full number of scalar loop
2524      iterations.)
2525 
2526      A is the range of bytes accessed when the step is negative,
2527      B is the range when the step is positive.
2528 
2529      If the access size is "access_size" bytes, the lowest addressed byte is:
2530 
2531 	 base + (step < 0 ? seg_len : 0)   [LB]
2532 
2533      and the highest addressed byte is always below:
2534 
2535 	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2536 
2537      Thus:
2538 
2539 	 LB <= ADDR < UB
2540 
2541      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2542      bytes, so:
2543 
2544 	 LB <= ADDR <= UB - ALIGN
2545 
2546      where "- ALIGN" folds naturally with the "+ access_size" and often
2547      cancels it out.
2548 
2549      We don't try to simplify LB and UB beyond this (e.g. by using
2550      MIN and MAX based on whether seg_len rather than the stride is
2551      negative) because it is possible for the absolute size of the
2552      segment to overflow the range of a ssize_t.
2553 
2554      Keeping the pointer_plus outside of the cond_expr should allow
2555      the cond_exprs to be shared with other alias checks.  */
2556   tree indicator = dr_direction_indicator (d.dr);
2557   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2558 			       fold_convert (ssizetype, indicator),
2559 			       ssize_int (0));
2560   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2561 					    DR_OFFSET (d.dr));
2562   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2563   tree seg_len
2564     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2565 
2566   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2567 				seg_len, size_zero_node);
2568   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2569 				size_zero_node, seg_len);
2570   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2571 			   size_int (d.access_size - align));
2572 
2573   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2574   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2575 }
2576 
2577 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2578    storing the condition in *COND_EXPR.  The fallback is to generate a
2579    a test that the two accesses do not overlap:
2580 
2581      end_a <= start_b || end_b <= start_a.  */
2582 
2583 static void
create_intersect_range_checks(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2584 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2585 			       const dr_with_seg_len_pair_t &alias_pair)
2586 {
2587   const dr_with_seg_len& dr_a = alias_pair.first;
2588   const dr_with_seg_len& dr_b = alias_pair.second;
2589   *cond_expr = NULL_TREE;
2590   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2591     return;
2592 
2593   if (create_ifn_alias_checks (cond_expr, alias_pair))
2594     return;
2595 
2596   if (create_waw_or_war_checks (cond_expr, alias_pair))
2597     return;
2598 
2599   unsigned HOST_WIDE_INT min_align;
2600   tree_code cmp_code;
2601   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2602      are equivalent.  This is just an optimization heuristic.  */
2603   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2604       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2605     {
2606       /* In this case adding access_size to seg_len is likely to give
2607 	 a simple X * step, where X is either the number of scalar
2608 	 iterations or the vectorization factor.  We're better off
2609 	 keeping that, rather than subtracting an alignment from it.
2610 
2611 	 In this case the maximum values are exclusive and so there is
2612 	 no alias if the maximum of one segment equals the minimum
2613 	 of another.  */
2614       min_align = 0;
2615       cmp_code = LE_EXPR;
2616     }
2617   else
2618     {
2619       /* Calculate the minimum alignment shared by all four pointers,
2620 	 then arrange for this alignment to be subtracted from the
2621 	 exclusive maximum values to get inclusive maximum values.
2622 	 This "- min_align" is cumulative with a "+ access_size"
2623 	 in the calculation of the maximum values.  In the best
2624 	 (and common) case, the two cancel each other out, leaving
2625 	 us with an inclusive bound based only on seg_len.  In the
2626 	 worst case we're simply adding a smaller number than before.
2627 
2628 	 Because the maximum values are inclusive, there is an alias
2629 	 if the maximum value of one segment is equal to the minimum
2630 	 value of the other.  */
2631       min_align = std::min (dr_a.align, dr_b.align);
2632       min_align = std::min (min_align, known_alignment (dr_a.access_size));
2633       min_align = std::min (min_align, known_alignment (dr_b.access_size));
2634       cmp_code = LT_EXPR;
2635     }
2636 
2637   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2638   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2639   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2640 
2641   *cond_expr
2642     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2643 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2644 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2645   if (dump_enabled_p ())
2646     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2647 }
2648 
2649 /* Create a conditional expression that represents the run-time checks for
2650    overlapping of address ranges represented by a list of data references
2651    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2652    COND_EXPR is the conditional expression to be used in the if statement
2653    that controls which version of the loop gets executed at runtime.  */
2654 
2655 void
create_runtime_alias_checks(class loop * loop,const vec<dr_with_seg_len_pair_t> * alias_pairs,tree * cond_expr)2656 create_runtime_alias_checks (class loop *loop,
2657 			     const vec<dr_with_seg_len_pair_t> *alias_pairs,
2658 			     tree * cond_expr)
2659 {
2660   tree part_cond_expr;
2661 
2662   fold_defer_overflow_warnings ();
2663   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2664     {
2665       gcc_assert (alias_pair.flags);
2666       if (dump_enabled_p ())
2667 	dump_printf (MSG_NOTE,
2668 		     "create runtime check for data references %T and %T\n",
2669 		     DR_REF (alias_pair.first.dr),
2670 		     DR_REF (alias_pair.second.dr));
2671 
2672       /* Create condition expression for each pair data references.  */
2673       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2674       if (*cond_expr)
2675 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2676 				  *cond_expr, part_cond_expr);
2677       else
2678 	*cond_expr = part_cond_expr;
2679     }
2680   fold_undefer_and_ignore_overflow_warnings ();
2681 }
2682 
2683 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2684    expressions.  */
2685 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)2686 dr_equal_offsets_p1 (tree offset1, tree offset2)
2687 {
2688   bool res;
2689 
2690   STRIP_NOPS (offset1);
2691   STRIP_NOPS (offset2);
2692 
2693   if (offset1 == offset2)
2694     return true;
2695 
2696   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2697       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2698     return false;
2699 
2700   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2701                              TREE_OPERAND (offset2, 0));
2702 
2703   if (!res || !BINARY_CLASS_P (offset1))
2704     return res;
2705 
2706   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2707                              TREE_OPERAND (offset2, 1));
2708 
2709   return res;
2710 }
2711 
2712 /* Check if DRA and DRB have equal offsets.  */
2713 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)2714 dr_equal_offsets_p (struct data_reference *dra,
2715                     struct data_reference *drb)
2716 {
2717   tree offset1, offset2;
2718 
2719   offset1 = DR_OFFSET (dra);
2720   offset2 = DR_OFFSET (drb);
2721 
2722   return dr_equal_offsets_p1 (offset1, offset2);
2723 }
2724 
2725 /* Returns true if FNA == FNB.  */
2726 
2727 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)2728 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2729 {
2730   unsigned i, n = fna.length ();
2731 
2732   if (n != fnb.length ())
2733     return false;
2734 
2735   for (i = 0; i < n; i++)
2736     if (!operand_equal_p (fna[i], fnb[i], 0))
2737       return false;
2738 
2739   return true;
2740 }
2741 
2742 /* If all the functions in CF are the same, returns one of them,
2743    otherwise returns NULL.  */
2744 
2745 static affine_fn
common_affine_function(conflict_function * cf)2746 common_affine_function (conflict_function *cf)
2747 {
2748   unsigned i;
2749   affine_fn comm;
2750 
2751   if (!CF_NONTRIVIAL_P (cf))
2752     return affine_fn ();
2753 
2754   comm = cf->fns[0];
2755 
2756   for (i = 1; i < cf->n; i++)
2757     if (!affine_function_equal_p (comm, cf->fns[i]))
2758       return affine_fn ();
2759 
2760   return comm;
2761 }
2762 
2763 /* Returns the base of the affine function FN.  */
2764 
2765 static tree
affine_function_base(affine_fn fn)2766 affine_function_base (affine_fn fn)
2767 {
2768   return fn[0];
2769 }
2770 
2771 /* Returns true if FN is a constant.  */
2772 
2773 static bool
affine_function_constant_p(affine_fn fn)2774 affine_function_constant_p (affine_fn fn)
2775 {
2776   unsigned i;
2777   tree coef;
2778 
2779   for (i = 1; fn.iterate (i, &coef); i++)
2780     if (!integer_zerop (coef))
2781       return false;
2782 
2783   return true;
2784 }
2785 
2786 /* Returns true if FN is the zero constant function.  */
2787 
2788 static bool
affine_function_zero_p(affine_fn fn)2789 affine_function_zero_p (affine_fn fn)
2790 {
2791   return (integer_zerop (affine_function_base (fn))
2792 	  && affine_function_constant_p (fn));
2793 }
2794 
2795 /* Returns a signed integer type with the largest precision from TA
2796    and TB.  */
2797 
2798 static tree
signed_type_for_types(tree ta,tree tb)2799 signed_type_for_types (tree ta, tree tb)
2800 {
2801   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2802     return signed_type_for (ta);
2803   else
2804     return signed_type_for (tb);
2805 }
2806 
2807 /* Applies operation OP on affine functions FNA and FNB, and returns the
2808    result.  */
2809 
2810 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)2811 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2812 {
2813   unsigned i, n, m;
2814   affine_fn ret;
2815   tree coef;
2816 
2817   if (fnb.length () > fna.length ())
2818     {
2819       n = fna.length ();
2820       m = fnb.length ();
2821     }
2822   else
2823     {
2824       n = fnb.length ();
2825       m = fna.length ();
2826     }
2827 
2828   ret.create (m);
2829   for (i = 0; i < n; i++)
2830     {
2831       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2832 					 TREE_TYPE (fnb[i]));
2833       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2834     }
2835 
2836   for (; fna.iterate (i, &coef); i++)
2837     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2838 				 coef, integer_zero_node));
2839   for (; fnb.iterate (i, &coef); i++)
2840     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2841 				 integer_zero_node, coef));
2842 
2843   return ret;
2844 }
2845 
2846 /* Returns the sum of affine functions FNA and FNB.  */
2847 
2848 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)2849 affine_fn_plus (affine_fn fna, affine_fn fnb)
2850 {
2851   return affine_fn_op (PLUS_EXPR, fna, fnb);
2852 }
2853 
2854 /* Returns the difference of affine functions FNA and FNB.  */
2855 
2856 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)2857 affine_fn_minus (affine_fn fna, affine_fn fnb)
2858 {
2859   return affine_fn_op (MINUS_EXPR, fna, fnb);
2860 }
2861 
2862 /* Frees affine function FN.  */
2863 
2864 static void
affine_fn_free(affine_fn fn)2865 affine_fn_free (affine_fn fn)
2866 {
2867   fn.release ();
2868 }
2869 
2870 /* Determine for each subscript in the data dependence relation DDR
2871    the distance.  */
2872 
2873 static void
compute_subscript_distance(struct data_dependence_relation * ddr)2874 compute_subscript_distance (struct data_dependence_relation *ddr)
2875 {
2876   conflict_function *cf_a, *cf_b;
2877   affine_fn fn_a, fn_b, diff;
2878 
2879   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2880     {
2881       unsigned int i;
2882 
2883       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2884  	{
2885  	  struct subscript *subscript;
2886 
2887  	  subscript = DDR_SUBSCRIPT (ddr, i);
2888  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
2889  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
2890 
2891 	  fn_a = common_affine_function (cf_a);
2892 	  fn_b = common_affine_function (cf_b);
2893 	  if (!fn_a.exists () || !fn_b.exists ())
2894 	    {
2895 	      SUB_DISTANCE (subscript) = chrec_dont_know;
2896 	      return;
2897 	    }
2898 	  diff = affine_fn_minus (fn_a, fn_b);
2899 
2900  	  if (affine_function_constant_p (diff))
2901  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
2902  	  else
2903  	    SUB_DISTANCE (subscript) = chrec_dont_know;
2904 
2905 	  affine_fn_free (diff);
2906  	}
2907     }
2908 }
2909 
2910 /* Returns the conflict function for "unknown".  */
2911 
2912 static conflict_function *
conflict_fn_not_known(void)2913 conflict_fn_not_known (void)
2914 {
2915   conflict_function *fn = XCNEW (conflict_function);
2916   fn->n = NOT_KNOWN;
2917 
2918   return fn;
2919 }
2920 
2921 /* Returns the conflict function for "independent".  */
2922 
2923 static conflict_function *
conflict_fn_no_dependence(void)2924 conflict_fn_no_dependence (void)
2925 {
2926   conflict_function *fn = XCNEW (conflict_function);
2927   fn->n = NO_DEPENDENCE;
2928 
2929   return fn;
2930 }
2931 
2932 /* Returns true if the address of OBJ is invariant in LOOP.  */
2933 
2934 static bool
object_address_invariant_in_loop_p(const class loop * loop,const_tree obj)2935 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2936 {
2937   while (handled_component_p (obj))
2938     {
2939       if (TREE_CODE (obj) == ARRAY_REF)
2940 	{
2941 	  for (int i = 1; i < 4; ++i)
2942 	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2943 							loop->num))
2944 	      return false;
2945 	}
2946       else if (TREE_CODE (obj) == COMPONENT_REF)
2947 	{
2948 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2949 						      loop->num))
2950 	    return false;
2951 	}
2952       obj = TREE_OPERAND (obj, 0);
2953     }
2954 
2955   if (!INDIRECT_REF_P (obj)
2956       && TREE_CODE (obj) != MEM_REF)
2957     return true;
2958 
2959   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2960 						  loop->num);
2961 }
2962 
2963 /* Returns false if we can prove that data references A and B do not alias,
2964    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2965    considered.  */
2966 
2967 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,class loop * loop_nest)2968 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2969 		class loop *loop_nest)
2970 {
2971   tree addr_a = DR_BASE_OBJECT (a);
2972   tree addr_b = DR_BASE_OBJECT (b);
2973 
2974   /* If we are not processing a loop nest but scalar code we
2975      do not need to care about possible cross-iteration dependences
2976      and thus can process the full original reference.  Do so,
2977      similar to how loop invariant motion applies extra offset-based
2978      disambiguation.  */
2979   if (!loop_nest)
2980     {
2981       aff_tree off1, off2;
2982       poly_widest_int size1, size2;
2983       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2984       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2985       aff_combination_scale (&off1, -1);
2986       aff_combination_add (&off2, &off1);
2987       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2988 	return false;
2989     }
2990 
2991   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2992       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2993       /* For cross-iteration dependences the cliques must be valid for the
2994 	 whole loop, not just individual iterations.  */
2995       && (!loop_nest
2996 	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2997 	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2998       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2999       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3000     return false;
3001 
3002   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3003      do not know the size of the base-object.  So we cannot do any
3004      offset/overlap based analysis but have to rely on points-to
3005      information only.  */
3006   if (TREE_CODE (addr_a) == MEM_REF
3007       && (DR_UNCONSTRAINED_BASE (a)
3008 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3009     {
3010       /* For true dependences we can apply TBAA.  */
3011       if (flag_strict_aliasing
3012 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
3013 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3014 				     get_alias_set (DR_REF (b))))
3015 	return false;
3016       if (TREE_CODE (addr_b) == MEM_REF)
3017 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3018 				       TREE_OPERAND (addr_b, 0));
3019       else
3020 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3021 				       build_fold_addr_expr (addr_b));
3022     }
3023   else if (TREE_CODE (addr_b) == MEM_REF
3024 	   && (DR_UNCONSTRAINED_BASE (b)
3025 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3026     {
3027       /* For true dependences we can apply TBAA.  */
3028       if (flag_strict_aliasing
3029 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
3030 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3031 				     get_alias_set (DR_REF (b))))
3032 	return false;
3033       if (TREE_CODE (addr_a) == MEM_REF)
3034 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3035 				       TREE_OPERAND (addr_b, 0));
3036       else
3037 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3038 				       TREE_OPERAND (addr_b, 0));
3039     }
3040 
3041   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3042      that is being subsetted in the loop nest.  */
3043   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3044     return refs_output_dependent_p (addr_a, addr_b);
3045   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3046     return refs_anti_dependent_p (addr_a, addr_b);
3047   return refs_may_alias_p (addr_a, addr_b);
3048 }
3049 
3050 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3051    if it is meaningful to compare their associated access functions
3052    when checking for dependencies.  */
3053 
3054 static bool
access_fn_components_comparable_p(tree ref_a,tree ref_b)3055 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3056 {
3057   /* Allow pairs of component refs from the following sets:
3058 
3059        { REALPART_EXPR, IMAGPART_EXPR }
3060        { COMPONENT_REF }
3061        { ARRAY_REF }.  */
3062   tree_code code_a = TREE_CODE (ref_a);
3063   tree_code code_b = TREE_CODE (ref_b);
3064   if (code_a == IMAGPART_EXPR)
3065     code_a = REALPART_EXPR;
3066   if (code_b == IMAGPART_EXPR)
3067     code_b = REALPART_EXPR;
3068   if (code_a != code_b)
3069     return false;
3070 
3071   if (TREE_CODE (ref_a) == COMPONENT_REF)
3072     /* ??? We cannot simply use the type of operand #0 of the refs here as
3073        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3074        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3075     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3076 	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3077 
3078   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3079 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3080 }
3081 
3082 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3083    is true when the main indices of A and B were not comparable so we try again
3084    with alternate indices computed on an indirect reference.  */
3085 
3086 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_dependence_relation * res,vec<loop_p> loop_nest,bool use_alt_indices)3087 initialize_data_dependence_relation (struct data_dependence_relation *res,
3088 				     vec<loop_p> loop_nest,
3089 				     bool use_alt_indices)
3090 {
3091   struct data_reference *a = DDR_A (res);
3092   struct data_reference *b = DDR_B (res);
3093   unsigned int i;
3094 
3095   struct indices *indices_a = &a->indices;
3096   struct indices *indices_b = &b->indices;
3097   if (use_alt_indices)
3098     {
3099       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3100 	indices_a = &a->alt_indices;
3101       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3102 	indices_b = &b->alt_indices;
3103     }
3104   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3105   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3106   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3107     {
3108       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3109       return res;
3110     }
3111 
3112   /* For unconstrained bases, the root (highest-indexed) subscript
3113      describes a variation in the base of the original DR_REF rather
3114      than a component access.  We have no type that accurately describes
3115      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3116      applying this subscript) so limit the search to the last real
3117      component access.
3118 
3119      E.g. for:
3120 
3121 	void
3122 	f (int a[][8], int b[][8])
3123 	{
3124 	  for (int i = 0; i < 8; ++i)
3125 	    a[i * 2][0] = b[i][0];
3126 	}
3127 
3128      the a and b accesses have a single ARRAY_REF component reference [0]
3129      but have two subscripts.  */
3130   if (indices_a->unconstrained_base)
3131     num_dimensions_a -= 1;
3132   if (indices_b->unconstrained_base)
3133     num_dimensions_b -= 1;
3134 
3135   /* These structures describe sequences of component references in
3136      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3137      specific access function.  */
3138   struct {
3139     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3140        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3141        indices.  In C notation, these are the indices of the rightmost
3142        component references; e.g. for a sequence .b.c.d, the start
3143        index is for .d.  */
3144     unsigned int start_a;
3145     unsigned int start_b;
3146 
3147     /* The sequence contains LENGTH consecutive access functions from
3148        each DR.  */
3149     unsigned int length;
3150 
3151     /* The enclosing objects for the A and B sequences respectively,
3152        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3153        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3154     tree object_a;
3155     tree object_b;
3156   } full_seq = {}, struct_seq = {};
3157 
3158   /* Before each iteration of the loop:
3159 
3160      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3161      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3162   unsigned int index_a = 0;
3163   unsigned int index_b = 0;
3164   tree ref_a = DR_REF (a);
3165   tree ref_b = DR_REF (b);
3166 
3167   /* Now walk the component references from the final DR_REFs back up to
3168      the enclosing base objects.  Each component reference corresponds
3169      to one access function in the DR, with access function 0 being for
3170      the final DR_REF and the highest-indexed access function being the
3171      one that is applied to the base of the DR.
3172 
3173      Look for a sequence of component references whose access functions
3174      are comparable (see access_fn_components_comparable_p).  If more
3175      than one such sequence exists, pick the one nearest the base
3176      (which is the leftmost sequence in C notation).  Store this sequence
3177      in FULL_SEQ.
3178 
3179      For example, if we have:
3180 
3181 	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3182 
3183 	A: a[0][i].s.c.d
3184 	B: __real b[0][i].s.e[i].f
3185 
3186      (where d is the same type as the real component of f) then the access
3187      functions would be:
3188 
3189 			 0   1   2   3
3190 	A:              .d  .c  .s [i]
3191 
3192 		 0   1   2   3   4   5
3193 	B:  __real  .f [i]  .e  .s [i]
3194 
3195      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3196      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3197      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3198      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3199      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3200      index foo[10] arrays, so is again comparable.  The sequence is
3201      therefore:
3202 
3203         A: [1, 3]  (i.e. [i].s.c)
3204         B: [3, 5]  (i.e. [i].s.e)
3205 
3206      Also look for sequences of component references whose access
3207      functions are comparable and whose enclosing objects have the same
3208      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3209      example, STRUCT_SEQ would be:
3210 
3211         A: [1, 2]  (i.e. s.c)
3212         B: [3, 4]  (i.e. s.e)  */
3213   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3214     {
3215       /* The alternate indices form always has a single dimension
3216 	 with unconstrained base.  */
3217       gcc_assert (!use_alt_indices);
3218 
3219       /* REF_A and REF_B must be one of the component access types
3220 	 allowed by dr_analyze_indices.  */
3221       gcc_checking_assert (access_fn_component_p (ref_a));
3222       gcc_checking_assert (access_fn_component_p (ref_b));
3223 
3224       /* Get the immediately-enclosing objects for REF_A and REF_B,
3225 	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3226 	 and DR_ACCESS_FN (B, INDEX_B).  */
3227       tree object_a = TREE_OPERAND (ref_a, 0);
3228       tree object_b = TREE_OPERAND (ref_b, 0);
3229 
3230       tree type_a = TREE_TYPE (object_a);
3231       tree type_b = TREE_TYPE (object_b);
3232       if (access_fn_components_comparable_p (ref_a, ref_b))
3233 	{
3234 	  /* This pair of component accesses is comparable for dependence
3235 	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3236 	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3237 	  if (full_seq.start_a + full_seq.length != index_a
3238 	      || full_seq.start_b + full_seq.length != index_b)
3239 	    {
3240 	      /* The accesses don't extend the current sequence,
3241 		 so start a new one here.  */
3242 	      full_seq.start_a = index_a;
3243 	      full_seq.start_b = index_b;
3244 	      full_seq.length = 0;
3245 	    }
3246 
3247 	  /* Add this pair of references to the sequence.  */
3248 	  full_seq.length += 1;
3249 	  full_seq.object_a = object_a;
3250 	  full_seq.object_b = object_b;
3251 
3252 	  /* If the enclosing objects are structures (and thus have the
3253 	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3254 	  if (TREE_CODE (type_a) == RECORD_TYPE)
3255 	    struct_seq = full_seq;
3256 
3257 	  /* Move to the next containing reference for both A and B.  */
3258 	  ref_a = object_a;
3259 	  ref_b = object_b;
3260 	  index_a += 1;
3261 	  index_b += 1;
3262 	  continue;
3263 	}
3264 
3265       /* Try to approach equal type sizes.  */
3266       if (!COMPLETE_TYPE_P (type_a)
3267 	  || !COMPLETE_TYPE_P (type_b)
3268 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3269 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3270 	break;
3271 
3272       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3273       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3274       if (size_a <= size_b)
3275 	{
3276 	  index_a += 1;
3277 	  ref_a = object_a;
3278 	}
3279       if (size_b <= size_a)
3280 	{
3281 	  index_b += 1;
3282 	  ref_b = object_b;
3283 	}
3284     }
3285 
3286   /* See whether FULL_SEQ ends at the base and whether the two bases
3287      are equal.  We do not care about TBAA or alignment info so we can
3288      use OEP_ADDRESS_OF to avoid false negatives.  */
3289   tree base_a = indices_a->base_object;
3290   tree base_b = indices_b->base_object;
3291   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3292 		      && full_seq.start_b + full_seq.length == num_dimensions_b
3293 		      && (indices_a->unconstrained_base
3294 			  == indices_b->unconstrained_base)
3295 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3296 		      && (types_compatible_p (TREE_TYPE (base_a),
3297 					      TREE_TYPE (base_b))
3298 			  || (!base_supports_access_fn_components_p (base_a)
3299 			      && !base_supports_access_fn_components_p (base_b)
3300 			      && operand_equal_p
3301 				   (TYPE_SIZE (TREE_TYPE (base_a)),
3302 				    TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3303 		      && (!loop_nest.exists ()
3304 			  || (object_address_invariant_in_loop_p
3305 			      (loop_nest[0], base_a))));
3306 
3307   /* If the bases are the same, we can include the base variation too.
3308      E.g. the b accesses in:
3309 
3310        for (int i = 0; i < n; ++i)
3311          b[i + 4][0] = b[i][0];
3312 
3313      have a definite dependence distance of 4, while for:
3314 
3315        for (int i = 0; i < n; ++i)
3316          a[i + 4][0] = b[i][0];
3317 
3318      the dependence distance depends on the gap between a and b.
3319 
3320      If the bases are different then we can only rely on the sequence
3321      rooted at a structure access, since arrays are allowed to overlap
3322      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3323      valid code:
3324 
3325        int a[256];
3326        ...
3327        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3328 
3329      where two lvalues with the same int[4][3] type overlap, and where
3330      both lvalues are distinct from the object's declared type.  */
3331   if (same_base_p)
3332     {
3333       if (indices_a->unconstrained_base)
3334 	full_seq.length += 1;
3335     }
3336   else
3337     full_seq = struct_seq;
3338 
3339   /* Punt if we didn't find a suitable sequence.  */
3340   if (full_seq.length == 0)
3341     {
3342       if (use_alt_indices
3343 	  || (TREE_CODE (DR_REF (a)) == MEM_REF
3344 	      && TREE_CODE (DR_REF (b)) == MEM_REF)
3345 	  || may_be_nonaddressable_p (DR_REF (a))
3346 	  || may_be_nonaddressable_p (DR_REF (b)))
3347 	{
3348 	  /* Fully exhausted possibilities.  */
3349 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3350 	  return res;
3351 	}
3352 
3353       /* Try evaluating both DRs as dereferences of pointers.  */
3354       if (!a->alt_indices.base_object
3355 	  && TREE_CODE (DR_REF (a)) != MEM_REF)
3356 	{
3357 	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3358 				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3359 				 build_int_cst
3360 				   (reference_alias_ptr_type (DR_REF (a)), 0));
3361 	  dr_analyze_indices (&a->alt_indices, alt_ref,
3362 			      loop_preheader_edge (loop_nest[0]),
3363 			      loop_containing_stmt (DR_STMT (a)));
3364 	}
3365       if (!b->alt_indices.base_object
3366 	  && TREE_CODE (DR_REF (b)) != MEM_REF)
3367 	{
3368 	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3369 				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3370 				 build_int_cst
3371 				   (reference_alias_ptr_type (DR_REF (b)), 0));
3372 	  dr_analyze_indices (&b->alt_indices, alt_ref,
3373 			      loop_preheader_edge (loop_nest[0]),
3374 			      loop_containing_stmt (DR_STMT (b)));
3375 	}
3376       return initialize_data_dependence_relation (res, loop_nest, true);
3377     }
3378 
3379   if (!same_base_p)
3380     {
3381       /* Partial overlap is possible for different bases when strict aliasing
3382 	 is not in effect.  It's also possible if either base involves a union
3383 	 access; e.g. for:
3384 
3385 	   struct s1 { int a[2]; };
3386 	   struct s2 { struct s1 b; int c; };
3387 	   struct s3 { int d; struct s1 e; };
3388 	   union u { struct s2 f; struct s3 g; } *p, *q;
3389 
3390 	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3391 	 "p->g.e" (base "p->g") and might partially overlap the s1 at
3392 	 "q->g.e" (base "q->g").  */
3393       if (!flag_strict_aliasing
3394 	  || ref_contains_union_access_p (full_seq.object_a)
3395 	  || ref_contains_union_access_p (full_seq.object_b))
3396 	{
3397 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3398 	  return res;
3399 	}
3400 
3401       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3402       if (!loop_nest.exists ()
3403 	  || (object_address_invariant_in_loop_p (loop_nest[0],
3404 						  full_seq.object_a)
3405 	      && object_address_invariant_in_loop_p (loop_nest[0],
3406 						     full_seq.object_b)))
3407 	{
3408 	  DDR_OBJECT_A (res) = full_seq.object_a;
3409 	  DDR_OBJECT_B (res) = full_seq.object_b;
3410 	}
3411     }
3412 
3413   DDR_AFFINE_P (res) = true;
3414   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3415   DDR_SUBSCRIPTS (res).create (full_seq.length);
3416   DDR_LOOP_NEST (res) = loop_nest;
3417   DDR_SELF_REFERENCE (res) = false;
3418 
3419   for (i = 0; i < full_seq.length; ++i)
3420     {
3421       struct subscript *subscript;
3422 
3423       subscript = XNEW (struct subscript);
3424       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3425       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3426       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3427       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3428       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3429       SUB_DISTANCE (subscript) = chrec_dont_know;
3430       DDR_SUBSCRIPTS (res).safe_push (subscript);
3431     }
3432 
3433   return res;
3434 }
3435 
3436 /* Initialize a data dependence relation between data accesses A and
3437    B.  NB_LOOPS is the number of loops surrounding the references: the
3438    size of the classic distance/direction vectors.  */
3439 
3440 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)3441 initialize_data_dependence_relation (struct data_reference *a,
3442 				     struct data_reference *b,
3443 				     vec<loop_p> loop_nest)
3444 {
3445   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3446   DDR_A (res) = a;
3447   DDR_B (res) = b;
3448   DDR_LOOP_NEST (res).create (0);
3449   DDR_SUBSCRIPTS (res).create (0);
3450   DDR_DIR_VECTS (res).create (0);
3451   DDR_DIST_VECTS (res).create (0);
3452 
3453   if (a == NULL || b == NULL)
3454     {
3455       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3456       return res;
3457     }
3458 
3459   /* If the data references do not alias, then they are independent.  */
3460   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3461     {
3462       DDR_ARE_DEPENDENT (res) = chrec_known;
3463       return res;
3464     }
3465 
3466   return initialize_data_dependence_relation (res, loop_nest, false);
3467 }
3468 
3469 
3470 /* Frees memory used by the conflict function F.  */
3471 
3472 static void
free_conflict_function(conflict_function * f)3473 free_conflict_function (conflict_function *f)
3474 {
3475   unsigned i;
3476 
3477   if (CF_NONTRIVIAL_P (f))
3478     {
3479       for (i = 0; i < f->n; i++)
3480 	affine_fn_free (f->fns[i]);
3481     }
3482   free (f);
3483 }
3484 
3485 /* Frees memory used by SUBSCRIPTS.  */
3486 
3487 static void
free_subscripts(vec<subscript_p> subscripts)3488 free_subscripts (vec<subscript_p> subscripts)
3489 {
3490   for (subscript_p s : subscripts)
3491     {
3492       free_conflict_function (s->conflicting_iterations_in_a);
3493       free_conflict_function (s->conflicting_iterations_in_b);
3494       free (s);
3495     }
3496   subscripts.release ();
3497 }
3498 
3499 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3500    description.  */
3501 
3502 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)3503 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3504 			tree chrec)
3505 {
3506   DDR_ARE_DEPENDENT (ddr) = chrec;
3507   free_subscripts (DDR_SUBSCRIPTS (ddr));
3508   DDR_SUBSCRIPTS (ddr).create (0);
3509 }
3510 
3511 /* The dependence relation DDR cannot be represented by a distance
3512    vector.  */
3513 
3514 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)3515 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3516 {
3517   if (dump_file && (dump_flags & TDF_DETAILS))
3518     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3519 
3520   DDR_AFFINE_P (ddr) = false;
3521 }
3522 
3523 
3524 
3525 /* This section contains the classic Banerjee tests.  */
3526 
3527 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3528    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3529 
3530 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)3531 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3532 {
3533   return (evolution_function_is_constant_p (chrec_a)
3534 	  && evolution_function_is_constant_p (chrec_b));
3535 }
3536 
3537 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3538    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3539 
3540 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)3541 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3542 {
3543   if ((evolution_function_is_constant_p (chrec_a)
3544        && evolution_function_is_univariate_p (chrec_b))
3545       || (evolution_function_is_constant_p (chrec_b)
3546 	  && evolution_function_is_univariate_p (chrec_a)))
3547     return true;
3548 
3549   if (evolution_function_is_univariate_p (chrec_a)
3550       && evolution_function_is_univariate_p (chrec_b))
3551     {
3552       switch (TREE_CODE (chrec_a))
3553 	{
3554 	case POLYNOMIAL_CHREC:
3555 	  switch (TREE_CODE (chrec_b))
3556 	    {
3557 	    case POLYNOMIAL_CHREC:
3558 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3559 		return false;
3560 	      /* FALLTHRU */
3561 
3562 	    default:
3563 	      return true;
3564 	    }
3565 
3566 	default:
3567 	  return true;
3568 	}
3569     }
3570 
3571   return false;
3572 }
3573 
3574 /* Creates a conflict function with N dimensions.  The affine functions
3575    in each dimension follow.  */
3576 
3577 static conflict_function *
conflict_fn(unsigned n,...)3578 conflict_fn (unsigned n, ...)
3579 {
3580   unsigned i;
3581   conflict_function *ret = XCNEW (conflict_function);
3582   va_list ap;
3583 
3584   gcc_assert (n > 0 && n <= MAX_DIM);
3585   va_start (ap, n);
3586 
3587   ret->n = n;
3588   for (i = 0; i < n; i++)
3589     ret->fns[i] = va_arg (ap, affine_fn);
3590   va_end (ap);
3591 
3592   return ret;
3593 }
3594 
3595 /* Returns constant affine function with value CST.  */
3596 
3597 static affine_fn
affine_fn_cst(tree cst)3598 affine_fn_cst (tree cst)
3599 {
3600   affine_fn fn;
3601   fn.create (1);
3602   fn.quick_push (cst);
3603   return fn;
3604 }
3605 
3606 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3607 
3608 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)3609 affine_fn_univar (tree cst, unsigned dim, tree coef)
3610 {
3611   affine_fn fn;
3612   fn.create (dim + 1);
3613   unsigned i;
3614 
3615   gcc_assert (dim > 0);
3616   fn.quick_push (cst);
3617   for (i = 1; i < dim; i++)
3618     fn.quick_push (integer_zero_node);
3619   fn.quick_push (coef);
3620   return fn;
3621 }
3622 
3623 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3624    *OVERLAPS_B are initialized to the functions that describe the
3625    relation between the elements accessed twice by CHREC_A and
3626    CHREC_B.  For k >= 0, the following property is verified:
3627 
3628    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3629 
3630 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3631 analyze_ziv_subscript (tree chrec_a,
3632 		       tree chrec_b,
3633 		       conflict_function **overlaps_a,
3634 		       conflict_function **overlaps_b,
3635 		       tree *last_conflicts)
3636 {
3637   tree type, difference;
3638   dependence_stats.num_ziv++;
3639 
3640   if (dump_file && (dump_flags & TDF_DETAILS))
3641     fprintf (dump_file, "(analyze_ziv_subscript \n");
3642 
3643   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3644   chrec_a = chrec_convert (type, chrec_a, NULL);
3645   chrec_b = chrec_convert (type, chrec_b, NULL);
3646   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3647 
3648   switch (TREE_CODE (difference))
3649     {
3650     case INTEGER_CST:
3651       if (integer_zerop (difference))
3652 	{
3653 	  /* The difference is equal to zero: the accessed index
3654 	     overlaps for each iteration in the loop.  */
3655 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3656 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3657 	  *last_conflicts = chrec_dont_know;
3658 	  dependence_stats.num_ziv_dependent++;
3659 	}
3660       else
3661 	{
3662 	  /* The accesses do not overlap.  */
3663 	  *overlaps_a = conflict_fn_no_dependence ();
3664 	  *overlaps_b = conflict_fn_no_dependence ();
3665 	  *last_conflicts = integer_zero_node;
3666 	  dependence_stats.num_ziv_independent++;
3667 	}
3668       break;
3669 
3670     default:
3671       /* We're not sure whether the indexes overlap.  For the moment,
3672 	 conservatively answer "don't know".  */
3673       if (dump_file && (dump_flags & TDF_DETAILS))
3674 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3675 
3676       *overlaps_a = conflict_fn_not_known ();
3677       *overlaps_b = conflict_fn_not_known ();
3678       *last_conflicts = chrec_dont_know;
3679       dependence_stats.num_ziv_unimplemented++;
3680       break;
3681     }
3682 
3683   if (dump_file && (dump_flags & TDF_DETAILS))
3684     fprintf (dump_file, ")\n");
3685 }
3686 
3687 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3688    and only if it fits to the int type.  If this is not the case, or the
3689    bound  on the number of iterations of LOOP could not be derived, returns
3690    chrec_dont_know.  */
3691 
3692 static tree
max_stmt_executions_tree(class loop * loop)3693 max_stmt_executions_tree (class loop *loop)
3694 {
3695   widest_int nit;
3696 
3697   if (!max_stmt_executions (loop, &nit))
3698     return chrec_dont_know;
3699 
3700   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3701     return chrec_dont_know;
3702 
3703   return wide_int_to_tree (unsigned_type_node, nit);
3704 }
3705 
3706 /* Determine whether the CHREC is always positive/negative.  If the expression
3707    cannot be statically analyzed, return false, otherwise set the answer into
3708    VALUE.  */
3709 
3710 static bool
chrec_is_positive(tree chrec,bool * value)3711 chrec_is_positive (tree chrec, bool *value)
3712 {
3713   bool value0, value1, value2;
3714   tree end_value, nb_iter;
3715 
3716   switch (TREE_CODE (chrec))
3717     {
3718     case POLYNOMIAL_CHREC:
3719       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3720 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3721 	return false;
3722 
3723       /* FIXME -- overflows.  */
3724       if (value0 == value1)
3725 	{
3726 	  *value = value0;
3727 	  return true;
3728 	}
3729 
3730       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3731 	 and the proof consists in showing that the sign never
3732 	 changes during the execution of the loop, from 0 to
3733 	 loop->nb_iterations.  */
3734       if (!evolution_function_is_affine_p (chrec))
3735 	return false;
3736 
3737       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3738       if (chrec_contains_undetermined (nb_iter))
3739 	return false;
3740 
3741 #if 0
3742       /* TODO -- If the test is after the exit, we may decrease the number of
3743 	 iterations by one.  */
3744       if (after_exit)
3745 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3746 #endif
3747 
3748       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3749 
3750       if (!chrec_is_positive (end_value, &value2))
3751 	return false;
3752 
3753       *value = value0;
3754       return value0 == value1;
3755 
3756     case INTEGER_CST:
3757       switch (tree_int_cst_sgn (chrec))
3758 	{
3759 	case -1:
3760 	  *value = false;
3761 	  break;
3762 	case 1:
3763 	  *value = true;
3764 	  break;
3765 	default:
3766 	  return false;
3767 	}
3768       return true;
3769 
3770     default:
3771       return false;
3772     }
3773 }
3774 
3775 
3776 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3777    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3778    *OVERLAPS_B are initialized to the functions that describe the
3779    relation between the elements accessed twice by CHREC_A and
3780    CHREC_B.  For k >= 0, the following property is verified:
3781 
3782    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3783 
3784 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3785 analyze_siv_subscript_cst_affine (tree chrec_a,
3786 				  tree chrec_b,
3787 				  conflict_function **overlaps_a,
3788 				  conflict_function **overlaps_b,
3789 				  tree *last_conflicts)
3790 {
3791   bool value0, value1, value2;
3792   tree type, difference, tmp;
3793 
3794   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3795   chrec_a = chrec_convert (type, chrec_a, NULL);
3796   chrec_b = chrec_convert (type, chrec_b, NULL);
3797   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3798 
3799   /* Special case overlap in the first iteration.  */
3800   if (integer_zerop (difference))
3801     {
3802       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3803       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3804       *last_conflicts = integer_one_node;
3805       return;
3806     }
3807 
3808   if (!chrec_is_positive (initial_condition (difference), &value0))
3809     {
3810       if (dump_file && (dump_flags & TDF_DETAILS))
3811 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3812 
3813       dependence_stats.num_siv_unimplemented++;
3814       *overlaps_a = conflict_fn_not_known ();
3815       *overlaps_b = conflict_fn_not_known ();
3816       *last_conflicts = chrec_dont_know;
3817       return;
3818     }
3819   else
3820     {
3821       if (value0 == false)
3822 	{
3823 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3824 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3825 	    {
3826 	      if (dump_file && (dump_flags & TDF_DETAILS))
3827 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3828 
3829 	      *overlaps_a = conflict_fn_not_known ();
3830 	      *overlaps_b = conflict_fn_not_known ();
3831 	      *last_conflicts = chrec_dont_know;
3832 	      dependence_stats.num_siv_unimplemented++;
3833 	      return;
3834 	    }
3835 	  else
3836 	    {
3837 	      if (value1 == true)
3838 		{
3839 		  /* Example:
3840 		     chrec_a = 12
3841 		     chrec_b = {10, +, 1}
3842 		  */
3843 
3844 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3845 		    {
3846 		      HOST_WIDE_INT numiter;
3847 		      class loop *loop = get_chrec_loop (chrec_b);
3848 
3849 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3850 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
3851 					 fold_build1 (ABS_EXPR, type, difference),
3852 					 CHREC_RIGHT (chrec_b));
3853 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3854 		      *last_conflicts = integer_one_node;
3855 
3856 
3857 		      /* Perform weak-zero siv test to see if overlap is
3858 			 outside the loop bounds.  */
3859 		      numiter = max_stmt_executions_int (loop);
3860 
3861 		      if (numiter >= 0
3862 			  && compare_tree_int (tmp, numiter) > 0)
3863 			{
3864 			  free_conflict_function (*overlaps_a);
3865 			  free_conflict_function (*overlaps_b);
3866 			  *overlaps_a = conflict_fn_no_dependence ();
3867 			  *overlaps_b = conflict_fn_no_dependence ();
3868 			  *last_conflicts = integer_zero_node;
3869 			  dependence_stats.num_siv_independent++;
3870 			  return;
3871 			}
3872 		      dependence_stats.num_siv_dependent++;
3873 		      return;
3874 		    }
3875 
3876 		  /* When the step does not divide the difference, there are
3877 		     no overlaps.  */
3878 		  else
3879 		    {
3880 		      *overlaps_a = conflict_fn_no_dependence ();
3881 		      *overlaps_b = conflict_fn_no_dependence ();
3882 		      *last_conflicts = integer_zero_node;
3883 		      dependence_stats.num_siv_independent++;
3884 		      return;
3885 		    }
3886 		}
3887 
3888 	      else
3889 		{
3890 		  /* Example:
3891 		     chrec_a = 12
3892 		     chrec_b = {10, +, -1}
3893 
3894 		     In this case, chrec_a will not overlap with chrec_b.  */
3895 		  *overlaps_a = conflict_fn_no_dependence ();
3896 		  *overlaps_b = conflict_fn_no_dependence ();
3897 		  *last_conflicts = integer_zero_node;
3898 		  dependence_stats.num_siv_independent++;
3899 		  return;
3900 		}
3901 	    }
3902 	}
3903       else
3904 	{
3905 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3906 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3907 	    {
3908 	      if (dump_file && (dump_flags & TDF_DETAILS))
3909 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3910 
3911 	      *overlaps_a = conflict_fn_not_known ();
3912 	      *overlaps_b = conflict_fn_not_known ();
3913 	      *last_conflicts = chrec_dont_know;
3914 	      dependence_stats.num_siv_unimplemented++;
3915 	      return;
3916 	    }
3917 	  else
3918 	    {
3919 	      if (value2 == false)
3920 		{
3921 		  /* Example:
3922 		     chrec_a = 3
3923 		     chrec_b = {10, +, -1}
3924 		  */
3925 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3926 		    {
3927 		      HOST_WIDE_INT numiter;
3928 		      class loop *loop = get_chrec_loop (chrec_b);
3929 
3930 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3931 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3932 					 CHREC_RIGHT (chrec_b));
3933 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3934 		      *last_conflicts = integer_one_node;
3935 
3936 		      /* Perform weak-zero siv test to see if overlap is
3937 			 outside the loop bounds.  */
3938 		      numiter = max_stmt_executions_int (loop);
3939 
3940 		      if (numiter >= 0
3941 			  && compare_tree_int (tmp, numiter) > 0)
3942 			{
3943 			  free_conflict_function (*overlaps_a);
3944 			  free_conflict_function (*overlaps_b);
3945 			  *overlaps_a = conflict_fn_no_dependence ();
3946 			  *overlaps_b = conflict_fn_no_dependence ();
3947 			  *last_conflicts = integer_zero_node;
3948 			  dependence_stats.num_siv_independent++;
3949 			  return;
3950 			}
3951 		      dependence_stats.num_siv_dependent++;
3952 		      return;
3953 		    }
3954 
3955 		  /* When the step does not divide the difference, there
3956 		     are no overlaps.  */
3957 		  else
3958 		    {
3959 		      *overlaps_a = conflict_fn_no_dependence ();
3960 		      *overlaps_b = conflict_fn_no_dependence ();
3961 		      *last_conflicts = integer_zero_node;
3962 		      dependence_stats.num_siv_independent++;
3963 		      return;
3964 		    }
3965 		}
3966 	      else
3967 		{
3968 		  /* Example:
3969 		     chrec_a = 3
3970 		     chrec_b = {4, +, 1}
3971 
3972 		     In this case, chrec_a will not overlap with chrec_b.  */
3973 		  *overlaps_a = conflict_fn_no_dependence ();
3974 		  *overlaps_b = conflict_fn_no_dependence ();
3975 		  *last_conflicts = integer_zero_node;
3976 		  dependence_stats.num_siv_independent++;
3977 		  return;
3978 		}
3979 	    }
3980 	}
3981     }
3982 }
3983 
3984 /* Helper recursive function for initializing the matrix A.  Returns
3985    the initial value of CHREC.  */
3986 
3987 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)3988 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3989 {
3990   gcc_assert (chrec);
3991 
3992   switch (TREE_CODE (chrec))
3993     {
3994     case POLYNOMIAL_CHREC:
3995       HOST_WIDE_INT chrec_right;
3996       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3997 	return chrec_dont_know;
3998       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
3999       /* We want to be able to negate without overflow.  */
4000       if (chrec_right == HOST_WIDE_INT_MIN)
4001 	return chrec_dont_know;
4002       A[index][0] = mult * chrec_right;
4003       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4004 
4005     case PLUS_EXPR:
4006     case MULT_EXPR:
4007     case MINUS_EXPR:
4008       {
4009 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4010 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4011 
4012 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4013       }
4014 
4015     CASE_CONVERT:
4016       {
4017 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4018 	return chrec_convert (chrec_type (chrec), op, NULL);
4019       }
4020 
4021     case BIT_NOT_EXPR:
4022       {
4023 	/* Handle ~X as -1 - X.  */
4024 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4025 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4026 			      build_int_cst (TREE_TYPE (chrec), -1), op);
4027       }
4028 
4029     case INTEGER_CST:
4030       return chrec;
4031 
4032     default:
4033       gcc_unreachable ();
4034       return NULL_TREE;
4035     }
4036 }
4037 
4038 #define FLOOR_DIV(x,y) ((x) / (y))
4039 
4040 /* Solves the special case of the Diophantine equation:
4041    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4042 
4043    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4044    number of iterations that loops X and Y run.  The overlaps will be
4045    constructed as evolutions in dimension DIM.  */
4046 
4047 static void
compute_overlap_steps_for_affine_univar(HOST_WIDE_INT niter,HOST_WIDE_INT step_a,HOST_WIDE_INT step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)4048 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4049 					 HOST_WIDE_INT step_a,
4050 					 HOST_WIDE_INT step_b,
4051 					 affine_fn *overlaps_a,
4052 					 affine_fn *overlaps_b,
4053 					 tree *last_conflicts, int dim)
4054 {
4055   if (((step_a > 0 && step_b > 0)
4056        || (step_a < 0 && step_b < 0)))
4057     {
4058       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4059       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4060 
4061       gcd_steps_a_b = gcd (step_a, step_b);
4062       step_overlaps_a = step_b / gcd_steps_a_b;
4063       step_overlaps_b = step_a / gcd_steps_a_b;
4064 
4065       if (niter > 0)
4066 	{
4067 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
4068 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4069 	  last_conflict = tau2;
4070 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4071 	}
4072       else
4073 	*last_conflicts = chrec_dont_know;
4074 
4075       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4076 				      build_int_cst (NULL_TREE,
4077 						     step_overlaps_a));
4078       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4079 				      build_int_cst (NULL_TREE,
4080 						     step_overlaps_b));
4081     }
4082 
4083   else
4084     {
4085       *overlaps_a = affine_fn_cst (integer_zero_node);
4086       *overlaps_b = affine_fn_cst (integer_zero_node);
4087       *last_conflicts = integer_zero_node;
4088     }
4089 }
4090 
4091 /* Solves the special case of a Diophantine equation where CHREC_A is
4092    an affine bivariate function, and CHREC_B is an affine univariate
4093    function.  For example,
4094 
4095    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4096 
4097    has the following overlapping functions:
4098 
4099    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4100    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4101    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4102 
4103    FORNOW: This is a specialized implementation for a case occurring in
4104    a common benchmark.  Implement the general algorithm.  */
4105 
4106 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)4107 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4108 				      conflict_function **overlaps_a,
4109 				      conflict_function **overlaps_b,
4110 				      tree *last_conflicts)
4111 {
4112   bool xz_p, yz_p, xyz_p;
4113   HOST_WIDE_INT step_x, step_y, step_z;
4114   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4115   affine_fn overlaps_a_xz, overlaps_b_xz;
4116   affine_fn overlaps_a_yz, overlaps_b_yz;
4117   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4118   affine_fn ova1, ova2, ovb;
4119   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4120 
4121   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4122   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4123   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4124 
4125   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4126   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4127   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4128 
4129   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4130     {
4131       if (dump_file && (dump_flags & TDF_DETAILS))
4132 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4133 
4134       *overlaps_a = conflict_fn_not_known ();
4135       *overlaps_b = conflict_fn_not_known ();
4136       *last_conflicts = chrec_dont_know;
4137       return;
4138     }
4139 
4140   niter = MIN (niter_x, niter_z);
4141   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4142 					   &overlaps_a_xz,
4143 					   &overlaps_b_xz,
4144 					   &last_conflicts_xz, 1);
4145   niter = MIN (niter_y, niter_z);
4146   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4147 					   &overlaps_a_yz,
4148 					   &overlaps_b_yz,
4149 					   &last_conflicts_yz, 2);
4150   niter = MIN (niter_x, niter_z);
4151   niter = MIN (niter_y, niter);
4152   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4153 					   &overlaps_a_xyz,
4154 					   &overlaps_b_xyz,
4155 					   &last_conflicts_xyz, 3);
4156 
4157   xz_p = !integer_zerop (last_conflicts_xz);
4158   yz_p = !integer_zerop (last_conflicts_yz);
4159   xyz_p = !integer_zerop (last_conflicts_xyz);
4160 
4161   if (xz_p || yz_p || xyz_p)
4162     {
4163       ova1 = affine_fn_cst (integer_zero_node);
4164       ova2 = affine_fn_cst (integer_zero_node);
4165       ovb = affine_fn_cst (integer_zero_node);
4166       if (xz_p)
4167 	{
4168 	  affine_fn t0 = ova1;
4169 	  affine_fn t2 = ovb;
4170 
4171 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4172 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
4173 	  affine_fn_free (t0);
4174 	  affine_fn_free (t2);
4175 	  *last_conflicts = last_conflicts_xz;
4176 	}
4177       if (yz_p)
4178 	{
4179 	  affine_fn t0 = ova2;
4180 	  affine_fn t2 = ovb;
4181 
4182 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4183 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
4184 	  affine_fn_free (t0);
4185 	  affine_fn_free (t2);
4186 	  *last_conflicts = last_conflicts_yz;
4187 	}
4188       if (xyz_p)
4189 	{
4190 	  affine_fn t0 = ova1;
4191 	  affine_fn t2 = ova2;
4192 	  affine_fn t4 = ovb;
4193 
4194 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4195 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4196 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4197 	  affine_fn_free (t0);
4198 	  affine_fn_free (t2);
4199 	  affine_fn_free (t4);
4200 	  *last_conflicts = last_conflicts_xyz;
4201 	}
4202       *overlaps_a = conflict_fn (2, ova1, ova2);
4203       *overlaps_b = conflict_fn (1, ovb);
4204     }
4205   else
4206     {
4207       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4208       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4209       *last_conflicts = integer_zero_node;
4210     }
4211 
4212   affine_fn_free (overlaps_a_xz);
4213   affine_fn_free (overlaps_b_xz);
4214   affine_fn_free (overlaps_a_yz);
4215   affine_fn_free (overlaps_b_yz);
4216   affine_fn_free (overlaps_a_xyz);
4217   affine_fn_free (overlaps_b_xyz);
4218 }
4219 
4220 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4221 
4222 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)4223 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4224 		    int size)
4225 {
4226   memcpy (vec2, vec1, size * sizeof (*vec1));
4227 }
4228 
4229 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4230 
4231 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)4232 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4233 		    int m, int n)
4234 {
4235   int i;
4236 
4237   for (i = 0; i < m; i++)
4238     lambda_vector_copy (mat1[i], mat2[i], n);
4239 }
4240 
4241 /* Store the N x N identity matrix in MAT.  */
4242 
4243 static void
lambda_matrix_id(lambda_matrix mat,int size)4244 lambda_matrix_id (lambda_matrix mat, int size)
4245 {
4246   int i, j;
4247 
4248   for (i = 0; i < size; i++)
4249     for (j = 0; j < size; j++)
4250       mat[i][j] = (i == j) ? 1 : 0;
4251 }
4252 
4253 /* Return the index of the first nonzero element of vector VEC1 between
4254    START and N.  We must have START <= N.
4255    Returns N if VEC1 is the zero vector.  */
4256 
4257 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)4258 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4259 {
4260   int j = start;
4261   while (j < n && vec1[j] == 0)
4262     j++;
4263   return j;
4264 }
4265 
4266 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4267    R2 = R2 + CONST1 * R1.  */
4268 
4269 static bool
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,lambda_int const1)4270 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4271 		       lambda_int const1)
4272 {
4273   int i;
4274 
4275   if (const1 == 0)
4276     return true;
4277 
4278   for (i = 0; i < n; i++)
4279     {
4280       bool ovf;
4281       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4282       if (ovf)
4283 	return false;
4284       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4285       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4286 	return false;
4287       mat[r2][i] = tem2;
4288     }
4289 
4290   return true;
4291 }
4292 
4293 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4294    and store the result in VEC2.  */
4295 
4296 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,lambda_int const1)4297 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4298 			  int size, lambda_int const1)
4299 {
4300   int i;
4301 
4302   if (const1 == 0)
4303     lambda_vector_clear (vec2, size);
4304   else
4305     for (i = 0; i < size; i++)
4306       vec2[i] = const1 * vec1[i];
4307 }
4308 
4309 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4310 
4311 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)4312 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4313 		      int size)
4314 {
4315   lambda_vector_mult_const (vec1, vec2, size, -1);
4316 }
4317 
4318 /* Negate row R1 of matrix MAT which has N columns.  */
4319 
4320 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)4321 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4322 {
4323   lambda_vector_negate (mat[r1], mat[r1], n);
4324 }
4325 
4326 /* Return true if two vectors are equal.  */
4327 
4328 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)4329 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4330 {
4331   int i;
4332   for (i = 0; i < size; i++)
4333     if (vec1[i] != vec2[i])
4334       return false;
4335   return true;
4336 }
4337 
4338 /* Given an M x N integer matrix A, this function determines an M x
4339    M unimodular matrix U, and an M x N echelon matrix S such that
4340    "U.A = S".  This decomposition is also known as "right Hermite".
4341 
4342    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4343    Restructuring Compilers" Utpal Banerjee.  */
4344 
4345 static bool
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)4346 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4347 			     lambda_matrix S, lambda_matrix U)
4348 {
4349   int i, j, i0 = 0;
4350 
4351   lambda_matrix_copy (A, S, m, n);
4352   lambda_matrix_id (U, m);
4353 
4354   for (j = 0; j < n; j++)
4355     {
4356       if (lambda_vector_first_nz (S[j], m, i0) < m)
4357 	{
4358 	  ++i0;
4359 	  for (i = m - 1; i >= i0; i--)
4360 	    {
4361 	      while (S[i][j] != 0)
4362 		{
4363 		  lambda_int factor, a, b;
4364 
4365 		  a = S[i-1][j];
4366 		  b = S[i][j];
4367 		  gcc_assert (a != HOST_WIDE_INT_MIN);
4368 		  factor = a / b;
4369 
4370 		  if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4371 		    return false;
4372 		  std::swap (S[i], S[i-1]);
4373 
4374 		  if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4375 		    return false;
4376 		  std::swap (U[i], U[i-1]);
4377 		}
4378 	    }
4379 	}
4380     }
4381 
4382   return true;
4383 }
4384 
4385 /* Determines the overlapping elements due to accesses CHREC_A and
4386    CHREC_B, that are affine functions.  This function cannot handle
4387    symbolic evolution functions, ie. when initial conditions are
4388    parameters, because it uses lambda matrices of integers.  */
4389 
4390 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)4391 analyze_subscript_affine_affine (tree chrec_a,
4392 				 tree chrec_b,
4393 				 conflict_function **overlaps_a,
4394 				 conflict_function **overlaps_b,
4395 				 tree *last_conflicts)
4396 {
4397   unsigned nb_vars_a, nb_vars_b, dim;
4398   lambda_int gamma, gcd_alpha_beta;
4399   lambda_matrix A, U, S;
4400   struct obstack scratch_obstack;
4401 
4402   if (eq_evolutions_p (chrec_a, chrec_b))
4403     {
4404       /* The accessed index overlaps for each iteration in the
4405 	 loop.  */
4406       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4407       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4408       *last_conflicts = chrec_dont_know;
4409       return;
4410     }
4411   if (dump_file && (dump_flags & TDF_DETAILS))
4412     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4413 
4414   /* For determining the initial intersection, we have to solve a
4415      Diophantine equation.  This is the most time consuming part.
4416 
4417      For answering to the question: "Is there a dependence?" we have
4418      to prove that there exists a solution to the Diophantine
4419      equation, and that the solution is in the iteration domain,
4420      i.e. the solution is positive or zero, and that the solution
4421      happens before the upper bound loop.nb_iterations.  Otherwise
4422      there is no dependence.  This function outputs a description of
4423      the iterations that hold the intersections.  */
4424 
4425   nb_vars_a = nb_vars_in_chrec (chrec_a);
4426   nb_vars_b = nb_vars_in_chrec (chrec_b);
4427 
4428   gcc_obstack_init (&scratch_obstack);
4429 
4430   dim = nb_vars_a + nb_vars_b;
4431   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4432   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4433   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4434 
4435   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4436   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4437   if (init_a == chrec_dont_know
4438       || init_b == chrec_dont_know)
4439     {
4440       if (dump_file && (dump_flags & TDF_DETAILS))
4441 	fprintf (dump_file, "affine-affine test failed: "
4442 		 "representation issue.\n");
4443       *overlaps_a = conflict_fn_not_known ();
4444       *overlaps_b = conflict_fn_not_known ();
4445       *last_conflicts = chrec_dont_know;
4446       goto end_analyze_subs_aa;
4447     }
4448   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4449 
4450   /* Don't do all the hard work of solving the Diophantine equation
4451      when we already know the solution: for example,
4452      | {3, +, 1}_1
4453      | {3, +, 4}_2
4454      | gamma = 3 - 3 = 0.
4455      Then the first overlap occurs during the first iterations:
4456      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4457   */
4458   if (gamma == 0)
4459     {
4460       if (nb_vars_a == 1 && nb_vars_b == 1)
4461 	{
4462 	  HOST_WIDE_INT step_a, step_b;
4463 	  HOST_WIDE_INT niter, niter_a, niter_b;
4464 	  affine_fn ova, ovb;
4465 
4466 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4467 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4468 	  niter = MIN (niter_a, niter_b);
4469 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4470 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4471 
4472 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4473 						   &ova, &ovb,
4474 						   last_conflicts, 1);
4475 	  *overlaps_a = conflict_fn (1, ova);
4476 	  *overlaps_b = conflict_fn (1, ovb);
4477 	}
4478 
4479       else if (nb_vars_a == 2 && nb_vars_b == 1)
4480 	compute_overlap_steps_for_affine_1_2
4481 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4482 
4483       else if (nb_vars_a == 1 && nb_vars_b == 2)
4484 	compute_overlap_steps_for_affine_1_2
4485 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4486 
4487       else
4488 	{
4489 	  if (dump_file && (dump_flags & TDF_DETAILS))
4490 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4491 	  *overlaps_a = conflict_fn_not_known ();
4492 	  *overlaps_b = conflict_fn_not_known ();
4493 	  *last_conflicts = chrec_dont_know;
4494 	}
4495       goto end_analyze_subs_aa;
4496     }
4497 
4498   /* U.A = S */
4499   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4500     {
4501       *overlaps_a = conflict_fn_not_known ();
4502       *overlaps_b = conflict_fn_not_known ();
4503       *last_conflicts = chrec_dont_know;
4504       goto end_analyze_subs_aa;
4505     }
4506 
4507   if (S[0][0] < 0)
4508     {
4509       S[0][0] *= -1;
4510       lambda_matrix_row_negate (U, dim, 0);
4511     }
4512   gcd_alpha_beta = S[0][0];
4513 
4514   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4515      but that is a quite strange case.  Instead of ICEing, answer
4516      don't know.  */
4517   if (gcd_alpha_beta == 0)
4518     {
4519       *overlaps_a = conflict_fn_not_known ();
4520       *overlaps_b = conflict_fn_not_known ();
4521       *last_conflicts = chrec_dont_know;
4522       goto end_analyze_subs_aa;
4523     }
4524 
4525   /* The classic "gcd-test".  */
4526   if (!int_divides_p (gcd_alpha_beta, gamma))
4527     {
4528       /* The "gcd-test" has determined that there is no integer
4529 	 solution, i.e. there is no dependence.  */
4530       *overlaps_a = conflict_fn_no_dependence ();
4531       *overlaps_b = conflict_fn_no_dependence ();
4532       *last_conflicts = integer_zero_node;
4533     }
4534 
4535   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4536   else if (nb_vars_a == 1 && nb_vars_b == 1)
4537     {
4538       /* Both functions should have the same evolution sign.  */
4539       if (((A[0][0] > 0 && -A[1][0] > 0)
4540 	   || (A[0][0] < 0 && -A[1][0] < 0)))
4541 	{
4542 	  /* The solutions are given by:
4543 	     |
4544 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4545 	     |                           [u21 u22]    [y0]
4546 
4547 	     For a given integer t.  Using the following variables,
4548 
4549 	     | i0 = u11 * gamma / gcd_alpha_beta
4550 	     | j0 = u12 * gamma / gcd_alpha_beta
4551 	     | i1 = u21
4552 	     | j1 = u22
4553 
4554 	     the solutions are:
4555 
4556 	     | x0 = i0 + i1 * t,
4557 	     | y0 = j0 + j1 * t.  */
4558       	  HOST_WIDE_INT i0, j0, i1, j1;
4559 
4560 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
4561 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
4562 	  i1 = U[1][0];
4563 	  j1 = U[1][1];
4564 
4565 	  if ((i1 == 0 && i0 < 0)
4566 	      || (j1 == 0 && j0 < 0))
4567 	    {
4568 	      /* There is no solution.
4569 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4570 		 falls in here, but for the moment we don't look at the
4571 		 upper bound of the iteration domain.  */
4572 	      *overlaps_a = conflict_fn_no_dependence ();
4573 	      *overlaps_b = conflict_fn_no_dependence ();
4574 	      *last_conflicts = integer_zero_node;
4575 	      goto end_analyze_subs_aa;
4576 	    }
4577 
4578 	  if (i1 > 0 && j1 > 0)
4579 	    {
4580 	      HOST_WIDE_INT niter_a
4581 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
4582 	      HOST_WIDE_INT niter_b
4583 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
4584 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4585 
4586 	      /* (X0, Y0) is a solution of the Diophantine equation:
4587 		 "chrec_a (X0) = chrec_b (Y0)".  */
4588 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4589 					CEIL (-j0, j1));
4590 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
4591 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
4592 
4593 	      /* (X1, Y1) is the smallest positive solution of the eq
4594 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4595 		 first conflict occurs.  */
4596 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4597 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4598 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4599 
4600 	      if (niter > 0)
4601 		{
4602 		  /* If the overlap occurs outside of the bounds of the
4603 		     loop, there is no dependence.  */
4604 		  if (x1 >= niter_a || y1 >= niter_b)
4605 		    {
4606 		      *overlaps_a = conflict_fn_no_dependence ();
4607 		      *overlaps_b = conflict_fn_no_dependence ();
4608 		      *last_conflicts = integer_zero_node;
4609 		      goto end_analyze_subs_aa;
4610 		    }
4611 
4612 		  /* max stmt executions can get quite large, avoid
4613 		     overflows by using wide ints here.  */
4614 		  widest_int tau2
4615 		    = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4616 				wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4617 		  widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4618 		  if (wi::min_precision (last_conflict, SIGNED)
4619 		      <= TYPE_PRECISION (integer_type_node))
4620 		    *last_conflicts
4621 		       = build_int_cst (integer_type_node,
4622 					last_conflict.to_shwi ());
4623 		  else
4624 		    *last_conflicts = chrec_dont_know;
4625 		}
4626 	      else
4627 		*last_conflicts = chrec_dont_know;
4628 
4629 	      *overlaps_a
4630 		= conflict_fn (1,
4631 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
4632 						 1,
4633 						 build_int_cst (NULL_TREE, i1)));
4634 	      *overlaps_b
4635 		= conflict_fn (1,
4636 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
4637 						 1,
4638 						 build_int_cst (NULL_TREE, j1)));
4639 	    }
4640 	  else
4641 	    {
4642 	      /* FIXME: For the moment, the upper bound of the
4643 		 iteration domain for i and j is not checked.  */
4644 	      if (dump_file && (dump_flags & TDF_DETAILS))
4645 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4646 	      *overlaps_a = conflict_fn_not_known ();
4647 	      *overlaps_b = conflict_fn_not_known ();
4648 	      *last_conflicts = chrec_dont_know;
4649 	    }
4650 	}
4651       else
4652 	{
4653 	  if (dump_file && (dump_flags & TDF_DETAILS))
4654 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4655 	  *overlaps_a = conflict_fn_not_known ();
4656 	  *overlaps_b = conflict_fn_not_known ();
4657 	  *last_conflicts = chrec_dont_know;
4658 	}
4659     }
4660   else
4661     {
4662       if (dump_file && (dump_flags & TDF_DETAILS))
4663 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4664       *overlaps_a = conflict_fn_not_known ();
4665       *overlaps_b = conflict_fn_not_known ();
4666       *last_conflicts = chrec_dont_know;
4667     }
4668 
4669 end_analyze_subs_aa:
4670   obstack_free (&scratch_obstack, NULL);
4671   if (dump_file && (dump_flags & TDF_DETAILS))
4672     {
4673       fprintf (dump_file, "  (overlaps_a = ");
4674       dump_conflict_function (dump_file, *overlaps_a);
4675       fprintf (dump_file, ")\n  (overlaps_b = ");
4676       dump_conflict_function (dump_file, *overlaps_b);
4677       fprintf (dump_file, "))\n");
4678     }
4679 }
4680 
4681 /* Returns true when analyze_subscript_affine_affine can be used for
4682    determining the dependence relation between chrec_a and chrec_b,
4683    that contain symbols.  This function modifies chrec_a and chrec_b
4684    such that the analysis result is the same, and such that they don't
4685    contain symbols, and then can safely be passed to the analyzer.
4686 
4687    Example: The analysis of the following tuples of evolutions produce
4688    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4689    vs. {0, +, 1}_1
4690 
4691    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4692    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4693 */
4694 
4695 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)4696 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4697 {
4698   tree diff, type, left_a, left_b, right_b;
4699 
4700   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4701       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4702     /* FIXME: For the moment not handled.  Might be refined later.  */
4703     return false;
4704 
4705   type = chrec_type (*chrec_a);
4706   left_a = CHREC_LEFT (*chrec_a);
4707   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4708   diff = chrec_fold_minus (type, left_a, left_b);
4709 
4710   if (!evolution_function_is_constant_p (diff))
4711     return false;
4712 
4713   if (dump_file && (dump_flags & TDF_DETAILS))
4714     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4715 
4716   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4717 				     diff, CHREC_RIGHT (*chrec_a));
4718   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4719   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4720 				     build_int_cst (type, 0),
4721 				     right_b);
4722   return true;
4723 }
4724 
4725 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4726    *OVERLAPS_B are initialized to the functions that describe the
4727    relation between the elements accessed twice by CHREC_A and
4728    CHREC_B.  For k >= 0, the following property is verified:
4729 
4730    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4731 
4732 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)4733 analyze_siv_subscript (tree chrec_a,
4734 		       tree chrec_b,
4735 		       conflict_function **overlaps_a,
4736 		       conflict_function **overlaps_b,
4737 		       tree *last_conflicts,
4738 		       int loop_nest_num)
4739 {
4740   dependence_stats.num_siv++;
4741 
4742   if (dump_file && (dump_flags & TDF_DETAILS))
4743     fprintf (dump_file, "(analyze_siv_subscript \n");
4744 
4745   if (evolution_function_is_constant_p (chrec_a)
4746       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4747     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4748 				      overlaps_a, overlaps_b, last_conflicts);
4749 
4750   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4751 	   && evolution_function_is_constant_p (chrec_b))
4752     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4753 				      overlaps_b, overlaps_a, last_conflicts);
4754 
4755   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4756 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4757     {
4758       if (!chrec_contains_symbols (chrec_a)
4759 	  && !chrec_contains_symbols (chrec_b))
4760 	{
4761 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4762 					   overlaps_a, overlaps_b,
4763 					   last_conflicts);
4764 
4765 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4766 	      || CF_NOT_KNOWN_P (*overlaps_b))
4767 	    dependence_stats.num_siv_unimplemented++;
4768 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4769 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4770 	    dependence_stats.num_siv_independent++;
4771 	  else
4772 	    dependence_stats.num_siv_dependent++;
4773 	}
4774       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4775 							&chrec_b))
4776 	{
4777 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4778 					   overlaps_a, overlaps_b,
4779 					   last_conflicts);
4780 
4781 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4782 	      || CF_NOT_KNOWN_P (*overlaps_b))
4783 	    dependence_stats.num_siv_unimplemented++;
4784 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4785 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4786 	    dependence_stats.num_siv_independent++;
4787 	  else
4788 	    dependence_stats.num_siv_dependent++;
4789 	}
4790       else
4791 	goto siv_subscript_dontknow;
4792     }
4793 
4794   else
4795     {
4796     siv_subscript_dontknow:;
4797       if (dump_file && (dump_flags & TDF_DETAILS))
4798 	fprintf (dump_file, "  siv test failed: unimplemented");
4799       *overlaps_a = conflict_fn_not_known ();
4800       *overlaps_b = conflict_fn_not_known ();
4801       *last_conflicts = chrec_dont_know;
4802       dependence_stats.num_siv_unimplemented++;
4803     }
4804 
4805   if (dump_file && (dump_flags & TDF_DETAILS))
4806     fprintf (dump_file, ")\n");
4807 }
4808 
4809 /* Returns false if we can prove that the greatest common divisor of the steps
4810    of CHREC does not divide CST, false otherwise.  */
4811 
4812 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)4813 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4814 {
4815   HOST_WIDE_INT cd = 0, val;
4816   tree step;
4817 
4818   if (!tree_fits_shwi_p (cst))
4819     return true;
4820   val = tree_to_shwi (cst);
4821 
4822   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4823     {
4824       step = CHREC_RIGHT (chrec);
4825       if (!tree_fits_shwi_p (step))
4826 	return true;
4827       cd = gcd (cd, tree_to_shwi (step));
4828       chrec = CHREC_LEFT (chrec);
4829     }
4830 
4831   return val % cd == 0;
4832 }
4833 
4834 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4835    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4836    functions that describe the relation between the elements accessed
4837    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4838    is verified:
4839 
4840    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4841 
4842 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,class loop * loop_nest)4843 analyze_miv_subscript (tree chrec_a,
4844 		       tree chrec_b,
4845 		       conflict_function **overlaps_a,
4846 		       conflict_function **overlaps_b,
4847 		       tree *last_conflicts,
4848 		       class loop *loop_nest)
4849 {
4850   tree type, difference;
4851 
4852   dependence_stats.num_miv++;
4853   if (dump_file && (dump_flags & TDF_DETAILS))
4854     fprintf (dump_file, "(analyze_miv_subscript \n");
4855 
4856   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4857   chrec_a = chrec_convert (type, chrec_a, NULL);
4858   chrec_b = chrec_convert (type, chrec_b, NULL);
4859   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4860 
4861   if (eq_evolutions_p (chrec_a, chrec_b))
4862     {
4863       /* Access functions are the same: all the elements are accessed
4864 	 in the same order.  */
4865       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4866       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4867       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4868       dependence_stats.num_miv_dependent++;
4869     }
4870 
4871   else if (evolution_function_is_constant_p (difference)
4872 	   && evolution_function_is_affine_multivariate_p (chrec_a,
4873 							   loop_nest->num)
4874 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
4875     {
4876       /* testsuite/.../ssa-chrec-33.c
4877 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4878 
4879 	 The difference is 1, and all the evolution steps are multiples
4880 	 of 2, consequently there are no overlapping elements.  */
4881       *overlaps_a = conflict_fn_no_dependence ();
4882       *overlaps_b = conflict_fn_no_dependence ();
4883       *last_conflicts = integer_zero_node;
4884       dependence_stats.num_miv_independent++;
4885     }
4886 
4887   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4888 	   && !chrec_contains_symbols (chrec_a, loop_nest)
4889 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4890 	   && !chrec_contains_symbols (chrec_b, loop_nest))
4891     {
4892       /* testsuite/.../ssa-chrec-35.c
4893 	 {0, +, 1}_2  vs.  {0, +, 1}_3
4894 	 the overlapping elements are respectively located at iterations:
4895 	 {0, +, 1}_x and {0, +, 1}_x,
4896 	 in other words, we have the equality:
4897 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4898 
4899 	 Other examples:
4900 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4901 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4902 
4903 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4904 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4905       */
4906       analyze_subscript_affine_affine (chrec_a, chrec_b,
4907 				       overlaps_a, overlaps_b, last_conflicts);
4908 
4909       if (CF_NOT_KNOWN_P (*overlaps_a)
4910  	  || CF_NOT_KNOWN_P (*overlaps_b))
4911 	dependence_stats.num_miv_unimplemented++;
4912       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4913 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
4914 	dependence_stats.num_miv_independent++;
4915       else
4916 	dependence_stats.num_miv_dependent++;
4917     }
4918 
4919   else
4920     {
4921       /* When the analysis is too difficult, answer "don't know".  */
4922       if (dump_file && (dump_flags & TDF_DETAILS))
4923 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4924 
4925       *overlaps_a = conflict_fn_not_known ();
4926       *overlaps_b = conflict_fn_not_known ();
4927       *last_conflicts = chrec_dont_know;
4928       dependence_stats.num_miv_unimplemented++;
4929     }
4930 
4931   if (dump_file && (dump_flags & TDF_DETAILS))
4932     fprintf (dump_file, ")\n");
4933 }
4934 
4935 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4936    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4937    OVERLAP_ITERATIONS_B are initialized with two functions that
4938    describe the iterations that contain conflicting elements.
4939 
4940    Remark: For an integer k >= 0, the following equality is true:
4941 
4942    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4943 */
4944 
4945 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,class loop * loop_nest)4946 analyze_overlapping_iterations (tree chrec_a,
4947 				tree chrec_b,
4948 				conflict_function **overlap_iterations_a,
4949 				conflict_function **overlap_iterations_b,
4950 				tree *last_conflicts, class loop *loop_nest)
4951 {
4952   unsigned int lnn = loop_nest->num;
4953 
4954   dependence_stats.num_subscript_tests++;
4955 
4956   if (dump_file && (dump_flags & TDF_DETAILS))
4957     {
4958       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4959       fprintf (dump_file, "  (chrec_a = ");
4960       print_generic_expr (dump_file, chrec_a);
4961       fprintf (dump_file, ")\n  (chrec_b = ");
4962       print_generic_expr (dump_file, chrec_b);
4963       fprintf (dump_file, ")\n");
4964     }
4965 
4966   if (chrec_a == NULL_TREE
4967       || chrec_b == NULL_TREE
4968       || chrec_contains_undetermined (chrec_a)
4969       || chrec_contains_undetermined (chrec_b))
4970     {
4971       dependence_stats.num_subscript_undetermined++;
4972 
4973       *overlap_iterations_a = conflict_fn_not_known ();
4974       *overlap_iterations_b = conflict_fn_not_known ();
4975     }
4976 
4977   /* If they are the same chrec, and are affine, they overlap
4978      on every iteration.  */
4979   else if (eq_evolutions_p (chrec_a, chrec_b)
4980 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4981 	       || operand_equal_p (chrec_a, chrec_b, 0)))
4982     {
4983       dependence_stats.num_same_subscript_function++;
4984       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4985       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4986       *last_conflicts = chrec_dont_know;
4987     }
4988 
4989   /* If they aren't the same, and aren't affine, we can't do anything
4990      yet.  */
4991   else if ((chrec_contains_symbols (chrec_a)
4992 	    || chrec_contains_symbols (chrec_b))
4993 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4994 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4995     {
4996       dependence_stats.num_subscript_undetermined++;
4997       *overlap_iterations_a = conflict_fn_not_known ();
4998       *overlap_iterations_b = conflict_fn_not_known ();
4999     }
5000 
5001   else if (ziv_subscript_p (chrec_a, chrec_b))
5002     analyze_ziv_subscript (chrec_a, chrec_b,
5003 			   overlap_iterations_a, overlap_iterations_b,
5004 			   last_conflicts);
5005 
5006   else if (siv_subscript_p (chrec_a, chrec_b))
5007     analyze_siv_subscript (chrec_a, chrec_b,
5008 			   overlap_iterations_a, overlap_iterations_b,
5009 			   last_conflicts, lnn);
5010 
5011   else
5012     analyze_miv_subscript (chrec_a, chrec_b,
5013 			   overlap_iterations_a, overlap_iterations_b,
5014 			   last_conflicts, loop_nest);
5015 
5016   if (dump_file && (dump_flags & TDF_DETAILS))
5017     {
5018       fprintf (dump_file, "  (overlap_iterations_a = ");
5019       dump_conflict_function (dump_file, *overlap_iterations_a);
5020       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5021       dump_conflict_function (dump_file, *overlap_iterations_b);
5022       fprintf (dump_file, "))\n");
5023     }
5024 }
5025 
5026 /* Helper function for uniquely inserting distance vectors.  */
5027 
5028 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)5029 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5030 {
5031   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5032     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5033       return;
5034 
5035   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5036 }
5037 
5038 /* Helper function for uniquely inserting direction vectors.  */
5039 
5040 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)5041 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5042 {
5043   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5044     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5045       return;
5046 
5047   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5048 }
5049 
5050 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5051    haven't yet determined a distance for this outer loop, push a new
5052    distance vector composed of the previous distance, and a distance
5053    of 1 for this outer loop.  Example:
5054 
5055    | loop_1
5056    |   loop_2
5057    |     A[10]
5058    |   endloop_2
5059    | endloop_1
5060 
5061    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5062    save (0, 1), then we have to save (1, 0).  */
5063 
5064 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)5065 add_outer_distances (struct data_dependence_relation *ddr,
5066 		     lambda_vector dist_v, int index)
5067 {
5068   /* For each outer loop where init_v is not set, the accesses are
5069      in dependence of distance 1 in the loop.  */
5070   while (--index >= 0)
5071     {
5072       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5073       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5074       save_v[index] = 1;
5075       save_dist_v (ddr, save_v);
5076     }
5077 }
5078 
5079 /* Return false when fail to represent the data dependence as a
5080    distance vector.  A_INDEX is the index of the first reference
5081    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5082    second reference.  INIT_B is set to true when a component has been
5083    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5084    the index in DIST_V that carries the dependence.  */
5085 
5086 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,lambda_vector dist_v,bool * init_b,int * index_carry)5087 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5088 			     unsigned int a_index, unsigned int b_index,
5089 			     lambda_vector dist_v, bool *init_b,
5090 			     int *index_carry)
5091 {
5092   unsigned i;
5093   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5094   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5095 
5096   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5097     {
5098       tree access_fn_a, access_fn_b;
5099       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5100 
5101       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5102 	{
5103 	  non_affine_dependence_relation (ddr);
5104 	  return false;
5105 	}
5106 
5107       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5108       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5109 
5110       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5111 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5112 	{
5113 	  HOST_WIDE_INT dist;
5114 	  int index;
5115 	  int var_a = CHREC_VARIABLE (access_fn_a);
5116 	  int var_b = CHREC_VARIABLE (access_fn_b);
5117 
5118 	  if (var_a != var_b
5119 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5120 	    {
5121 	      non_affine_dependence_relation (ddr);
5122 	      return false;
5123 	    }
5124 
5125 	  /* When data references are collected in a loop while data
5126 	     dependences are analyzed in loop nest nested in the loop, we
5127 	     would have more number of access functions than number of
5128 	     loops.  Skip access functions of loops not in the loop nest.
5129 
5130 	     See PR89725 for more information.  */
5131 	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5132 	    continue;
5133 
5134 	  dist = int_cst_value (SUB_DISTANCE (subscript));
5135 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5136 	  *index_carry = MIN (index, *index_carry);
5137 
5138 	  /* This is the subscript coupling test.  If we have already
5139 	     recorded a distance for this loop (a distance coming from
5140 	     another subscript), it should be the same.  For example,
5141 	     in the following code, there is no dependence:
5142 
5143 	     | loop i = 0, N, 1
5144 	     |   T[i+1][i] = ...
5145 	     |   ... = T[i][i]
5146 	     | endloop
5147 	  */
5148 	  if (init_v[index] != 0 && dist_v[index] != dist)
5149 	    {
5150 	      finalize_ddr_dependent (ddr, chrec_known);
5151 	      return false;
5152 	    }
5153 
5154 	  dist_v[index] = dist;
5155 	  init_v[index] = 1;
5156 	  *init_b = true;
5157 	}
5158       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5159 	{
5160 	  /* This can be for example an affine vs. constant dependence
5161 	     (T[i] vs. T[3]) that is not an affine dependence and is
5162 	     not representable as a distance vector.  */
5163 	  non_affine_dependence_relation (ddr);
5164 	  return false;
5165 	}
5166       else
5167 	*init_b = true;
5168     }
5169 
5170   return true;
5171 }
5172 
5173 /* Return true when the DDR contains only invariant access functions wrto. loop
5174    number LNUM.  */
5175 
5176 static bool
invariant_access_functions(const struct data_dependence_relation * ddr,int lnum)5177 invariant_access_functions (const struct data_dependence_relation *ddr,
5178 			    int lnum)
5179 {
5180   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5181     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5182 	|| !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5183       return false;
5184 
5185   return true;
5186 }
5187 
5188 /* Helper function for the case where DDR_A and DDR_B are the same
5189    multivariate access function with a constant step.  For an example
5190    see pr34635-1.c.  */
5191 
5192 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)5193 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5194 {
5195   int x_1, x_2;
5196   tree c_1 = CHREC_LEFT (c_2);
5197   tree c_0 = CHREC_LEFT (c_1);
5198   lambda_vector dist_v;
5199   HOST_WIDE_INT v1, v2, cd;
5200 
5201   /* Polynomials with more than 2 variables are not handled yet.  When
5202      the evolution steps are parameters, it is not possible to
5203      represent the dependence using classical distance vectors.  */
5204   if (TREE_CODE (c_0) != INTEGER_CST
5205       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5206       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5207     {
5208       DDR_AFFINE_P (ddr) = false;
5209       return;
5210     }
5211 
5212   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5213   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5214 
5215   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5216   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5217   v1 = int_cst_value (CHREC_RIGHT (c_1));
5218   v2 = int_cst_value (CHREC_RIGHT (c_2));
5219   cd = gcd (v1, v2);
5220   v1 /= cd;
5221   v2 /= cd;
5222 
5223   if (v2 < 0)
5224     {
5225       v2 = -v2;
5226       v1 = -v1;
5227     }
5228 
5229   dist_v[x_1] = v2;
5230   dist_v[x_2] = -v1;
5231   save_dist_v (ddr, dist_v);
5232 
5233   add_outer_distances (ddr, dist_v, x_1);
5234 }
5235 
5236 /* Helper function for the case where DDR_A and DDR_B are the same
5237    access functions.  */
5238 
5239 static void
add_other_self_distances(struct data_dependence_relation * ddr)5240 add_other_self_distances (struct data_dependence_relation *ddr)
5241 {
5242   lambda_vector dist_v;
5243   unsigned i;
5244   int index_carry = DDR_NB_LOOPS (ddr);
5245   subscript *sub;
5246   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5247 
5248   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5249     {
5250       tree access_fun = SUB_ACCESS_FN (sub, 0);
5251 
5252       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5253 	{
5254 	  if (!evolution_function_is_univariate_p (access_fun, loop->num))
5255 	    {
5256 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5257 		{
5258 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5259 		  return;
5260 		}
5261 
5262 	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5263 
5264 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5265 		add_multivariate_self_dist (ddr, access_fun);
5266 	      else
5267 		/* The evolution step is not constant: it varies in
5268 		   the outer loop, so this cannot be represented by a
5269 		   distance vector.  For example in pr34635.c the
5270 		   evolution is {0, +, {0, +, 4}_1}_2.  */
5271 		DDR_AFFINE_P (ddr) = false;
5272 
5273 	      return;
5274 	    }
5275 
5276 	  /* When data references are collected in a loop while data
5277 	     dependences are analyzed in loop nest nested in the loop, we
5278 	     would have more number of access functions than number of
5279 	     loops.  Skip access functions of loops not in the loop nest.
5280 
5281 	     See PR89725 for more information.  */
5282 	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5283 				  loop))
5284 	    continue;
5285 
5286 	  index_carry = MIN (index_carry,
5287 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
5288 						 DDR_LOOP_NEST (ddr)));
5289 	}
5290     }
5291 
5292   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5293   add_outer_distances (ddr, dist_v, index_carry);
5294 }
5295 
5296 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)5297 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5298 {
5299   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5300 
5301   dist_v[0] = 1;
5302   save_dist_v (ddr, dist_v);
5303 }
5304 
5305 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5306    is the case for example when access functions are the same and
5307    equal to a constant, as in:
5308 
5309    | loop_1
5310    |   A[3] = ...
5311    |   ... = A[3]
5312    | endloop_1
5313 
5314    in which case the distance vectors are (0) and (1).  */
5315 
5316 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)5317 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5318 {
5319   unsigned i, j;
5320 
5321   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5322     {
5323       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5324       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5325       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5326 
5327       for (j = 0; j < ca->n; j++)
5328 	if (affine_function_zero_p (ca->fns[j]))
5329 	  {
5330 	    insert_innermost_unit_dist_vector (ddr);
5331 	    return;
5332 	  }
5333 
5334       for (j = 0; j < cb->n; j++)
5335 	if (affine_function_zero_p (cb->fns[j]))
5336 	  {
5337 	    insert_innermost_unit_dist_vector (ddr);
5338 	    return;
5339 	  }
5340     }
5341 }
5342 
5343 /* Return true when the DDR contains two data references that have the
5344    same access functions.  */
5345 
5346 static inline bool
same_access_functions(const struct data_dependence_relation * ddr)5347 same_access_functions (const struct data_dependence_relation *ddr)
5348 {
5349   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5350     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5351 			  SUB_ACCESS_FN (sub, 1)))
5352       return false;
5353 
5354   return true;
5355 }
5356 
5357 /* Compute the classic per loop distance vector.  DDR is the data
5358    dependence relation to build a vector from.  Return false when fail
5359    to represent the data dependence as a distance vector.  */
5360 
5361 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,class loop * loop_nest)5362 build_classic_dist_vector (struct data_dependence_relation *ddr,
5363 			   class loop *loop_nest)
5364 {
5365   bool init_b = false;
5366   int index_carry = DDR_NB_LOOPS (ddr);
5367   lambda_vector dist_v;
5368 
5369   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5370     return false;
5371 
5372   if (same_access_functions (ddr))
5373     {
5374       /* Save the 0 vector.  */
5375       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5376       save_dist_v (ddr, dist_v);
5377 
5378       if (invariant_access_functions (ddr, loop_nest->num))
5379 	add_distance_for_zero_overlaps (ddr);
5380 
5381       if (DDR_NB_LOOPS (ddr) > 1)
5382 	add_other_self_distances (ddr);
5383 
5384       return true;
5385     }
5386 
5387   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5388   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5389     return false;
5390 
5391   /* Save the distance vector if we initialized one.  */
5392   if (init_b)
5393     {
5394       /* Verify a basic constraint: classic distance vectors should
5395 	 always be lexicographically positive.
5396 
5397 	 Data references are collected in the order of execution of
5398 	 the program, thus for the following loop
5399 
5400 	 | for (i = 1; i < 100; i++)
5401 	 |   for (j = 1; j < 100; j++)
5402 	 |     {
5403 	 |       t = T[j+1][i-1];  // A
5404 	 |       T[j][i] = t + 2;  // B
5405 	 |     }
5406 
5407 	 references are collected following the direction of the wind:
5408 	 A then B.  The data dependence tests are performed also
5409 	 following this order, such that we're looking at the distance
5410 	 separating the elements accessed by A from the elements later
5411 	 accessed by B.  But in this example, the distance returned by
5412 	 test_dep (A, B) is lexicographically negative (-1, 1), that
5413 	 means that the access A occurs later than B with respect to
5414 	 the outer loop, ie. we're actually looking upwind.  In this
5415 	 case we solve test_dep (B, A) looking downwind to the
5416 	 lexicographically positive solution, that returns the
5417 	 distance vector (1, -1).  */
5418       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5419 	{
5420 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5421 	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5422 	    return false;
5423 	  compute_subscript_distance (ddr);
5424 	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5425 					    &index_carry))
5426 	    return false;
5427 	  save_dist_v (ddr, save_v);
5428 	  DDR_REVERSED_P (ddr) = true;
5429 
5430 	  /* In this case there is a dependence forward for all the
5431 	     outer loops:
5432 
5433 	     | for (k = 1; k < 100; k++)
5434 	     |  for (i = 1; i < 100; i++)
5435 	     |   for (j = 1; j < 100; j++)
5436 	     |     {
5437 	     |       t = T[j+1][i-1];  // A
5438 	     |       T[j][i] = t + 2;  // B
5439 	     |     }
5440 
5441 	     the vectors are:
5442 	     (0,  1, -1)
5443 	     (1,  1, -1)
5444 	     (1, -1,  1)
5445 	  */
5446 	  if (DDR_NB_LOOPS (ddr) > 1)
5447 	    {
5448  	      add_outer_distances (ddr, save_v, index_carry);
5449 	      add_outer_distances (ddr, dist_v, index_carry);
5450 	    }
5451 	}
5452       else
5453 	{
5454 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5455 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5456 
5457 	  if (DDR_NB_LOOPS (ddr) > 1)
5458 	    {
5459 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5460 
5461 	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5462 		return false;
5463 	      compute_subscript_distance (ddr);
5464 	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5465 						&index_carry))
5466 		return false;
5467 
5468 	      save_dist_v (ddr, save_v);
5469 	      add_outer_distances (ddr, dist_v, index_carry);
5470 	      add_outer_distances (ddr, opposite_v, index_carry);
5471 	    }
5472 	  else
5473 	    save_dist_v (ddr, save_v);
5474 	}
5475     }
5476   else
5477     {
5478       /* There is a distance of 1 on all the outer loops: Example:
5479 	 there is a dependence of distance 1 on loop_1 for the array A.
5480 
5481 	 | loop_1
5482 	 |   A[5] = ...
5483 	 | endloop
5484       */
5485       add_outer_distances (ddr, dist_v,
5486 			   lambda_vector_first_nz (dist_v,
5487 						   DDR_NB_LOOPS (ddr), 0));
5488     }
5489 
5490   if (dump_file && (dump_flags & TDF_DETAILS))
5491     {
5492       unsigned i;
5493 
5494       fprintf (dump_file, "(build_classic_dist_vector\n");
5495       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5496 	{
5497 	  fprintf (dump_file, "  dist_vector = (");
5498 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5499 			       DDR_NB_LOOPS (ddr));
5500 	  fprintf (dump_file, "  )\n");
5501 	}
5502       fprintf (dump_file, ")\n");
5503     }
5504 
5505   return true;
5506 }
5507 
5508 /* Return the direction for a given distance.
5509    FIXME: Computing dir this way is suboptimal, since dir can catch
5510    cases that dist is unable to represent.  */
5511 
5512 static inline enum data_dependence_direction
dir_from_dist(int dist)5513 dir_from_dist (int dist)
5514 {
5515   if (dist > 0)
5516     return dir_positive;
5517   else if (dist < 0)
5518     return dir_negative;
5519   else
5520     return dir_equal;
5521 }
5522 
5523 /* Compute the classic per loop direction vector.  DDR is the data
5524    dependence relation to build a vector from.  */
5525 
5526 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)5527 build_classic_dir_vector (struct data_dependence_relation *ddr)
5528 {
5529   unsigned i, j;
5530   lambda_vector dist_v;
5531 
5532   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5533     {
5534       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5535 
5536       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5537 	dir_v[j] = dir_from_dist (dist_v[j]);
5538 
5539       save_dir_v (ddr, dir_v);
5540     }
5541 }
5542 
5543 /* Helper function.  Returns true when there is a dependence between the
5544    data references.  A_INDEX is the index of the first reference (0 for
5545    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5546 
5547 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,class loop * loop_nest)5548 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5549 			       unsigned int a_index, unsigned int b_index,
5550 			       class loop *loop_nest)
5551 {
5552   unsigned int i;
5553   tree last_conflicts;
5554   struct subscript *subscript;
5555   tree res = NULL_TREE;
5556 
5557   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5558     {
5559       conflict_function *overlaps_a, *overlaps_b;
5560 
5561       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5562 				      SUB_ACCESS_FN (subscript, b_index),
5563 				      &overlaps_a, &overlaps_b,
5564 				      &last_conflicts, loop_nest);
5565 
5566       if (SUB_CONFLICTS_IN_A (subscript))
5567 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5568       if (SUB_CONFLICTS_IN_B (subscript))
5569 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5570 
5571       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5572       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5573       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5574 
5575       /* If there is any undetermined conflict function we have to
5576          give a conservative answer in case we cannot prove that
5577 	 no dependence exists when analyzing another subscript.  */
5578       if (CF_NOT_KNOWN_P (overlaps_a)
5579  	  || CF_NOT_KNOWN_P (overlaps_b))
5580  	{
5581 	  res = chrec_dont_know;
5582 	  continue;
5583  	}
5584 
5585       /* When there is a subscript with no dependence we can stop.  */
5586       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5587  	       || CF_NO_DEPENDENCE_P (overlaps_b))
5588  	{
5589 	  res = chrec_known;
5590 	  break;
5591  	}
5592     }
5593 
5594   if (res == NULL_TREE)
5595     return true;
5596 
5597   if (res == chrec_known)
5598     dependence_stats.num_dependence_independent++;
5599   else
5600     dependence_stats.num_dependence_undetermined++;
5601   finalize_ddr_dependent (ddr, res);
5602   return false;
5603 }
5604 
5605 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5606 
5607 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,class loop * loop_nest)5608 subscript_dependence_tester (struct data_dependence_relation *ddr,
5609 			     class loop *loop_nest)
5610 {
5611   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5612     dependence_stats.num_dependence_dependent++;
5613 
5614   compute_subscript_distance (ddr);
5615   if (build_classic_dist_vector (ddr, loop_nest))
5616     build_classic_dir_vector (ddr);
5617 }
5618 
5619 /* Returns true when all the access functions of A are affine or
5620    constant with respect to LOOP_NEST.  */
5621 
5622 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const class loop * loop_nest)5623 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5624 					   const class loop *loop_nest)
5625 {
5626   vec<tree> fns = DR_ACCESS_FNS (a);
5627   for (tree t : fns)
5628     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5629 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5630       return false;
5631 
5632   return true;
5633 }
5634 
5635 /* This computes the affine dependence relation between A and B with
5636    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5637    independence between two accesses, while CHREC_DONT_KNOW is used
5638    for representing the unknown relation.
5639 
5640    Note that it is possible to stop the computation of the dependence
5641    relation the first time we detect a CHREC_KNOWN element for a given
5642    subscript.  */
5643 
5644 void
compute_affine_dependence(struct data_dependence_relation * ddr,class loop * loop_nest)5645 compute_affine_dependence (struct data_dependence_relation *ddr,
5646 			   class loop *loop_nest)
5647 {
5648   struct data_reference *dra = DDR_A (ddr);
5649   struct data_reference *drb = DDR_B (ddr);
5650 
5651   if (dump_file && (dump_flags & TDF_DETAILS))
5652     {
5653       fprintf (dump_file, "(compute_affine_dependence\n");
5654       fprintf (dump_file, "  ref_a: ");
5655       print_generic_expr (dump_file, DR_REF (dra));
5656       fprintf (dump_file, ", stmt_a: ");
5657       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5658       fprintf (dump_file, "  ref_b: ");
5659       print_generic_expr (dump_file, DR_REF (drb));
5660       fprintf (dump_file, ", stmt_b: ");
5661       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5662     }
5663 
5664   /* Analyze only when the dependence relation is not yet known.  */
5665   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5666     {
5667       dependence_stats.num_dependence_tests++;
5668 
5669       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5670 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
5671 	subscript_dependence_tester (ddr, loop_nest);
5672 
5673       /* As a last case, if the dependence cannot be determined, or if
5674 	 the dependence is considered too difficult to determine, answer
5675 	 "don't know".  */
5676       else
5677 	{
5678 	  dependence_stats.num_dependence_undetermined++;
5679 
5680 	  if (dump_file && (dump_flags & TDF_DETAILS))
5681 	    {
5682 	      fprintf (dump_file, "Data ref a:\n");
5683 	      dump_data_reference (dump_file, dra);
5684 	      fprintf (dump_file, "Data ref b:\n");
5685 	      dump_data_reference (dump_file, drb);
5686 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5687 	    }
5688 	  finalize_ddr_dependent (ddr, chrec_dont_know);
5689 	}
5690     }
5691 
5692   if (dump_file && (dump_flags & TDF_DETAILS))
5693     {
5694       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5695 	fprintf (dump_file, ") -> no dependence\n");
5696       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5697 	fprintf (dump_file, ") -> dependence analysis failed\n");
5698       else
5699 	fprintf (dump_file, ")\n");
5700     }
5701 }
5702 
5703 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5704    the data references in DATAREFS, in the LOOP_NEST.  When
5705    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5706    relations.  Return true when successful, i.e. data references number
5707    is small enough to be handled.  */
5708 
5709 bool
compute_all_dependences(const vec<data_reference_p> & datarefs,vec<ddr_p> * dependence_relations,const vec<loop_p> & loop_nest,bool compute_self_and_rr)5710 compute_all_dependences (const vec<data_reference_p> &datarefs,
5711 			 vec<ddr_p> *dependence_relations,
5712 			 const vec<loop_p> &loop_nest,
5713 			 bool compute_self_and_rr)
5714 {
5715   struct data_dependence_relation *ddr;
5716   struct data_reference *a, *b;
5717   unsigned int i, j;
5718 
5719   if ((int) datarefs.length ()
5720       > param_loop_max_datarefs_for_datadeps)
5721     {
5722       struct data_dependence_relation *ddr;
5723 
5724       /* Insert a single relation into dependence_relations:
5725 	 chrec_dont_know.  */
5726       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5727       dependence_relations->safe_push (ddr);
5728       return false;
5729     }
5730 
5731   FOR_EACH_VEC_ELT (datarefs, i, a)
5732     for (j = i + 1; datarefs.iterate (j, &b); j++)
5733       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5734 	{
5735 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
5736 	  dependence_relations->safe_push (ddr);
5737           if (loop_nest.exists ())
5738    	    compute_affine_dependence (ddr, loop_nest[0]);
5739 	}
5740 
5741   if (compute_self_and_rr)
5742     FOR_EACH_VEC_ELT (datarefs, i, a)
5743       {
5744 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
5745 	dependence_relations->safe_push (ddr);
5746         if (loop_nest.exists ())
5747    	  compute_affine_dependence (ddr, loop_nest[0]);
5748       }
5749 
5750   return true;
5751 }
5752 
5753 /* Describes a location of a memory reference.  */
5754 
5755 struct data_ref_loc
5756 {
5757   /* The memory reference.  */
5758   tree ref;
5759 
5760   /* True if the memory reference is read.  */
5761   bool is_read;
5762 
5763   /* True if the data reference is conditional within the containing
5764      statement, i.e. if it might not occur even when the statement
5765      is executed and runs to completion.  */
5766   bool is_conditional_in_stmt;
5767 };
5768 
5769 
5770 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5771    true if STMT clobbers memory, false otherwise.  */
5772 
5773 static bool
get_references_in_stmt(gimple * stmt,vec<data_ref_loc,va_heap> * references)5774 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5775 {
5776   bool clobbers_memory = false;
5777   data_ref_loc ref;
5778   tree op0, op1;
5779   enum gimple_code stmt_code = gimple_code (stmt);
5780 
5781   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5782      As we cannot model data-references to not spelled out
5783      accesses give up if they may occur.  */
5784   if (stmt_code == GIMPLE_CALL
5785       && !(gimple_call_flags (stmt) & ECF_CONST))
5786     {
5787       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5788       if (gimple_call_internal_p (stmt))
5789 	switch (gimple_call_internal_fn (stmt))
5790 	  {
5791 	  case IFN_GOMP_SIMD_LANE:
5792 	    {
5793 	      class loop *loop = gimple_bb (stmt)->loop_father;
5794 	      tree uid = gimple_call_arg (stmt, 0);
5795 	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
5796 	      if (loop == NULL
5797 		  || loop->simduid != SSA_NAME_VAR (uid))
5798 		clobbers_memory = true;
5799 	      break;
5800 	    }
5801 	  case IFN_MASK_LOAD:
5802 	  case IFN_MASK_STORE:
5803 	    break;
5804 	  default:
5805 	    clobbers_memory = true;
5806 	    break;
5807 	  }
5808       else
5809 	clobbers_memory = true;
5810     }
5811   else if (stmt_code == GIMPLE_ASM
5812 	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5813 	       || gimple_vuse (stmt)))
5814     clobbers_memory = true;
5815 
5816   if (!gimple_vuse (stmt))
5817     return clobbers_memory;
5818 
5819   if (stmt_code == GIMPLE_ASSIGN)
5820     {
5821       tree base;
5822       op0 = gimple_assign_lhs (stmt);
5823       op1 = gimple_assign_rhs1 (stmt);
5824 
5825       if (DECL_P (op1)
5826 	  || (REFERENCE_CLASS_P (op1)
5827 	      && (base = get_base_address (op1))
5828 	      && TREE_CODE (base) != SSA_NAME
5829 	      && !is_gimple_min_invariant (base)))
5830 	{
5831 	  ref.ref = op1;
5832 	  ref.is_read = true;
5833 	  ref.is_conditional_in_stmt = false;
5834 	  references->safe_push (ref);
5835 	}
5836     }
5837   else if (stmt_code == GIMPLE_CALL)
5838     {
5839       unsigned i, n;
5840       tree ptr, type;
5841       unsigned int align;
5842 
5843       ref.is_read = false;
5844       if (gimple_call_internal_p (stmt))
5845 	switch (gimple_call_internal_fn (stmt))
5846 	  {
5847 	  case IFN_MASK_LOAD:
5848 	    if (gimple_call_lhs (stmt) == NULL_TREE)
5849 	      break;
5850 	    ref.is_read = true;
5851 	    /* FALLTHRU */
5852 	  case IFN_MASK_STORE:
5853 	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5854 	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
5855 	    if (ref.is_read)
5856 	      type = TREE_TYPE (gimple_call_lhs (stmt));
5857 	    else
5858 	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
5859 	    if (TYPE_ALIGN (type) != align)
5860 	      type = build_aligned_type (type, align);
5861 	    ref.is_conditional_in_stmt = true;
5862 	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5863 				   ptr);
5864 	    references->safe_push (ref);
5865 	    return false;
5866 	  default:
5867 	    break;
5868 	  }
5869 
5870       op0 = gimple_call_lhs (stmt);
5871       n = gimple_call_num_args (stmt);
5872       for (i = 0; i < n; i++)
5873 	{
5874 	  op1 = gimple_call_arg (stmt, i);
5875 
5876 	  if (DECL_P (op1)
5877 	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5878 	    {
5879 	      ref.ref = op1;
5880 	      ref.is_read = true;
5881 	      ref.is_conditional_in_stmt = false;
5882 	      references->safe_push (ref);
5883 	    }
5884 	}
5885     }
5886   else
5887     return clobbers_memory;
5888 
5889   if (op0
5890       && (DECL_P (op0)
5891 	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5892     {
5893       ref.ref = op0;
5894       ref.is_read = false;
5895       ref.is_conditional_in_stmt = false;
5896       references->safe_push (ref);
5897     }
5898   return clobbers_memory;
5899 }
5900 
5901 
5902 /* Returns true if the loop-nest has any data reference.  */
5903 
5904 bool
loop_nest_has_data_refs(loop_p loop)5905 loop_nest_has_data_refs (loop_p loop)
5906 {
5907   basic_block *bbs = get_loop_body (loop);
5908   auto_vec<data_ref_loc, 3> references;
5909 
5910   for (unsigned i = 0; i < loop->num_nodes; i++)
5911     {
5912       basic_block bb = bbs[i];
5913       gimple_stmt_iterator bsi;
5914 
5915       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5916 	{
5917 	  gimple *stmt = gsi_stmt (bsi);
5918 	  get_references_in_stmt (stmt, &references);
5919 	  if (references.length ())
5920 	    {
5921 	      free (bbs);
5922 	      return true;
5923 	    }
5924 	}
5925     }
5926   free (bbs);
5927   return false;
5928 }
5929 
5930 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5931    reference, returns false, otherwise returns true.  NEST is the outermost
5932    loop of the loop nest in which the references should be analyzed.  */
5933 
5934 opt_result
find_data_references_in_stmt(class loop * nest,gimple * stmt,vec<data_reference_p> * datarefs)5935 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5936 			      vec<data_reference_p> *datarefs)
5937 {
5938   auto_vec<data_ref_loc, 2> references;
5939   data_reference_p dr;
5940 
5941   if (get_references_in_stmt (stmt, &references))
5942     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5943 				   stmt);
5944 
5945   for (const data_ref_loc &ref : references)
5946     {
5947       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5948 			    loop_containing_stmt (stmt), ref.ref,
5949 			    stmt, ref.is_read, ref.is_conditional_in_stmt);
5950       gcc_assert (dr != NULL);
5951       datarefs->safe_push (dr);
5952     }
5953 
5954   return opt_result::success ();
5955 }
5956 
5957 /* Stores the data references in STMT to DATAREFS.  If there is an
5958    unanalyzable reference, returns false, otherwise returns true.
5959    NEST is the outermost loop of the loop nest in which the references
5960    should be instantiated, LOOP is the loop in which the references
5961    should be analyzed.  */
5962 
5963 bool
graphite_find_data_references_in_stmt(edge nest,loop_p loop,gimple * stmt,vec<data_reference_p> * datarefs)5964 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5965 				       vec<data_reference_p> *datarefs)
5966 {
5967   auto_vec<data_ref_loc, 2> references;
5968   bool ret = true;
5969   data_reference_p dr;
5970 
5971   if (get_references_in_stmt (stmt, &references))
5972     return false;
5973 
5974   for (const data_ref_loc &ref : references)
5975     {
5976       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5977 			    ref.is_conditional_in_stmt);
5978       gcc_assert (dr != NULL);
5979       datarefs->safe_push (dr);
5980     }
5981 
5982   return ret;
5983 }
5984 
5985 /* Search the data references in LOOP, and record the information into
5986    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5987    difficult case, returns NULL_TREE otherwise.  */
5988 
5989 tree
find_data_references_in_bb(class loop * loop,basic_block bb,vec<data_reference_p> * datarefs)5990 find_data_references_in_bb (class loop *loop, basic_block bb,
5991                             vec<data_reference_p> *datarefs)
5992 {
5993   gimple_stmt_iterator bsi;
5994 
5995   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5996     {
5997       gimple *stmt = gsi_stmt (bsi);
5998 
5999       if (!find_data_references_in_stmt (loop, stmt, datarefs))
6000         {
6001           struct data_reference *res;
6002           res = XCNEW (struct data_reference);
6003           datarefs->safe_push (res);
6004 
6005           return chrec_dont_know;
6006         }
6007     }
6008 
6009   return NULL_TREE;
6010 }
6011 
6012 /* Search the data references in LOOP, and record the information into
6013    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6014    difficult case, returns NULL_TREE otherwise.
6015 
6016    TODO: This function should be made smarter so that it can handle address
6017    arithmetic as if they were array accesses, etc.  */
6018 
6019 tree
find_data_references_in_loop(class loop * loop,vec<data_reference_p> * datarefs)6020 find_data_references_in_loop (class loop *loop,
6021 			      vec<data_reference_p> *datarefs)
6022 {
6023   basic_block bb, *bbs;
6024   unsigned int i;
6025 
6026   bbs = get_loop_body_in_dom_order (loop);
6027 
6028   for (i = 0; i < loop->num_nodes; i++)
6029     {
6030       bb = bbs[i];
6031 
6032       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6033         {
6034           free (bbs);
6035           return chrec_dont_know;
6036         }
6037     }
6038   free (bbs);
6039 
6040   return NULL_TREE;
6041 }
6042 
6043 /* Return the alignment in bytes that DRB is guaranteed to have at all
6044    times.  */
6045 
6046 unsigned int
dr_alignment(innermost_loop_behavior * drb)6047 dr_alignment (innermost_loop_behavior *drb)
6048 {
6049   /* Get the alignment of BASE_ADDRESS + INIT.  */
6050   unsigned int alignment = drb->base_alignment;
6051   unsigned int misalignment = (drb->base_misalignment
6052 			       + TREE_INT_CST_LOW (drb->init));
6053   if (misalignment != 0)
6054     alignment = MIN (alignment, misalignment & -misalignment);
6055 
6056   /* Cap it to the alignment of OFFSET.  */
6057   if (!integer_zerop (drb->offset))
6058     alignment = MIN (alignment, drb->offset_alignment);
6059 
6060   /* Cap it to the alignment of STEP.  */
6061   if (!integer_zerop (drb->step))
6062     alignment = MIN (alignment, drb->step_alignment);
6063 
6064   return alignment;
6065 }
6066 
6067 /* If BASE is a pointer-typed SSA name, try to find the object that it
6068    is based on.  Return this object X on success and store the alignment
6069    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6070 
6071 static tree
get_base_for_alignment_1(tree base,unsigned int * alignment_out)6072 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6073 {
6074   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6075     return NULL_TREE;
6076 
6077   gimple *def = SSA_NAME_DEF_STMT (base);
6078   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6079 
6080   /* Peel chrecs and record the minimum alignment preserved by
6081      all steps.  */
6082   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6083   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6084     {
6085       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6086       alignment = MIN (alignment, step_alignment);
6087       base = CHREC_LEFT (base);
6088     }
6089 
6090   /* Punt if the expression is too complicated to handle.  */
6091   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6092     return NULL_TREE;
6093 
6094   /* The only useful cases are those for which a dereference folds to something
6095      other than an INDIRECT_REF.  */
6096   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6097   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6098   if (!ref)
6099     return NULL_TREE;
6100 
6101   /* Analyze the base to which the steps we peeled were applied.  */
6102   poly_int64 bitsize, bitpos, bytepos;
6103   machine_mode mode;
6104   int unsignedp, reversep, volatilep;
6105   tree offset;
6106   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6107 			      &unsignedp, &reversep, &volatilep);
6108   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6109     return NULL_TREE;
6110 
6111   /* Restrict the alignment to that guaranteed by the offsets.  */
6112   unsigned int bytepos_alignment = known_alignment (bytepos);
6113   if (bytepos_alignment != 0)
6114     alignment = MIN (alignment, bytepos_alignment);
6115   if (offset)
6116     {
6117       unsigned int offset_alignment = highest_pow2_factor (offset);
6118       alignment = MIN (alignment, offset_alignment);
6119     }
6120 
6121   *alignment_out = alignment;
6122   return base;
6123 }
6124 
6125 /* Return the object whose alignment would need to be changed in order
6126    to increase the alignment of ADDR.  Store the maximum achievable
6127    alignment in *MAX_ALIGNMENT.  */
6128 
6129 tree
get_base_for_alignment(tree addr,unsigned int * max_alignment)6130 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6131 {
6132   tree base = get_base_for_alignment_1 (addr, max_alignment);
6133   if (base)
6134     return base;
6135 
6136   if (TREE_CODE (addr) == ADDR_EXPR)
6137     addr = TREE_OPERAND (addr, 0);
6138   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6139   return addr;
6140 }
6141 
6142 /* Recursive helper function.  */
6143 
6144 static bool
find_loop_nest_1(class loop * loop,vec<loop_p> * loop_nest)6145 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6146 {
6147   /* Inner loops of the nest should not contain siblings.  Example:
6148      when there are two consecutive loops,
6149 
6150      | loop_0
6151      |   loop_1
6152      |     A[{0, +, 1}_1]
6153      |   endloop_1
6154      |   loop_2
6155      |     A[{0, +, 1}_2]
6156      |   endloop_2
6157      | endloop_0
6158 
6159      the dependence relation cannot be captured by the distance
6160      abstraction.  */
6161   if (loop->next)
6162     return false;
6163 
6164   loop_nest->safe_push (loop);
6165   if (loop->inner)
6166     return find_loop_nest_1 (loop->inner, loop_nest);
6167   return true;
6168 }
6169 
6170 /* Return false when the LOOP is not well nested.  Otherwise return
6171    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6172    contain the loops from the outermost to the innermost, as they will
6173    appear in the classic distance vector.  */
6174 
6175 bool
find_loop_nest(class loop * loop,vec<loop_p> * loop_nest)6176 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6177 {
6178   loop_nest->safe_push (loop);
6179   if (loop->inner)
6180     return find_loop_nest_1 (loop->inner, loop_nest);
6181   return true;
6182 }
6183 
6184 /* Returns true when the data dependences have been computed, false otherwise.
6185    Given a loop nest LOOP, the following vectors are returned:
6186    DATAREFS is initialized to all the array elements contained in this loop,
6187    DEPENDENCE_RELATIONS contains the relations between the data references.
6188    Compute read-read and self relations if
6189    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6190 
6191 bool
compute_data_dependences_for_loop(class loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)6192 compute_data_dependences_for_loop (class loop *loop,
6193 				   bool compute_self_and_read_read_dependences,
6194 				   vec<loop_p> *loop_nest,
6195 				   vec<data_reference_p> *datarefs,
6196 				   vec<ddr_p> *dependence_relations)
6197 {
6198   bool res = true;
6199 
6200   memset (&dependence_stats, 0, sizeof (dependence_stats));
6201 
6202   /* If the loop nest is not well formed, or one of the data references
6203      is not computable, give up without spending time to compute other
6204      dependences.  */
6205   if (!loop
6206       || !find_loop_nest (loop, loop_nest)
6207       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6208       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6209 				   compute_self_and_read_read_dependences))
6210     res = false;
6211 
6212   if (dump_file && (dump_flags & TDF_STATS))
6213     {
6214       fprintf (dump_file, "Dependence tester statistics:\n");
6215 
6216       fprintf (dump_file, "Number of dependence tests: %d\n",
6217 	       dependence_stats.num_dependence_tests);
6218       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6219 	       dependence_stats.num_dependence_dependent);
6220       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6221 	       dependence_stats.num_dependence_independent);
6222       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6223 	       dependence_stats.num_dependence_undetermined);
6224 
6225       fprintf (dump_file, "Number of subscript tests: %d\n",
6226 	       dependence_stats.num_subscript_tests);
6227       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6228 	       dependence_stats.num_subscript_undetermined);
6229       fprintf (dump_file, "Number of same subscript function: %d\n",
6230 	       dependence_stats.num_same_subscript_function);
6231 
6232       fprintf (dump_file, "Number of ziv tests: %d\n",
6233 	       dependence_stats.num_ziv);
6234       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6235 	       dependence_stats.num_ziv_dependent);
6236       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6237 	       dependence_stats.num_ziv_independent);
6238       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6239 	       dependence_stats.num_ziv_unimplemented);
6240 
6241       fprintf (dump_file, "Number of siv tests: %d\n",
6242 	       dependence_stats.num_siv);
6243       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6244 	       dependence_stats.num_siv_dependent);
6245       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6246 	       dependence_stats.num_siv_independent);
6247       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6248 	       dependence_stats.num_siv_unimplemented);
6249 
6250       fprintf (dump_file, "Number of miv tests: %d\n",
6251 	       dependence_stats.num_miv);
6252       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6253 	       dependence_stats.num_miv_dependent);
6254       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6255 	       dependence_stats.num_miv_independent);
6256       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6257 	       dependence_stats.num_miv_unimplemented);
6258     }
6259 
6260   return res;
6261 }
6262 
6263 /* Free the memory used by a data dependence relation DDR.  */
6264 
6265 void
free_dependence_relation(struct data_dependence_relation * ddr)6266 free_dependence_relation (struct data_dependence_relation *ddr)
6267 {
6268   if (ddr == NULL)
6269     return;
6270 
6271   if (DDR_SUBSCRIPTS (ddr).exists ())
6272     free_subscripts (DDR_SUBSCRIPTS (ddr));
6273   DDR_DIST_VECTS (ddr).release ();
6274   DDR_DIR_VECTS (ddr).release ();
6275 
6276   free (ddr);
6277 }
6278 
6279 /* Free the memory used by the data dependence relations from
6280    DEPENDENCE_RELATIONS.  */
6281 
6282 void
free_dependence_relations(vec<ddr_p> & dependence_relations)6283 free_dependence_relations (vec<ddr_p>& dependence_relations)
6284 {
6285   for (data_dependence_relation *ddr : dependence_relations)
6286     if (ddr)
6287       free_dependence_relation (ddr);
6288 
6289   dependence_relations.release ();
6290 }
6291 
6292 /* Free the memory used by the data references from DATAREFS.  */
6293 
6294 void
free_data_refs(vec<data_reference_p> & datarefs)6295 free_data_refs (vec<data_reference_p>& datarefs)
6296 {
6297   for (data_reference *dr : datarefs)
6298     free_data_ref (dr);
6299   datarefs.release ();
6300 }
6301 
6302 /* Common routine implementing both dr_direction_indicator and
6303    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6304    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6305    Return the step as the indicator otherwise.  */
6306 
6307 static tree
dr_step_indicator(struct data_reference * dr,int useful_min)6308 dr_step_indicator (struct data_reference *dr, int useful_min)
6309 {
6310   tree step = DR_STEP (dr);
6311   if (!step)
6312     return NULL_TREE;
6313   STRIP_NOPS (step);
6314   /* Look for cases where the step is scaled by a positive constant
6315      integer, which will often be the access size.  If the multiplication
6316      doesn't change the sign (due to overflow effects) then we can
6317      test the unscaled value instead.  */
6318   if (TREE_CODE (step) == MULT_EXPR
6319       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6320       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6321     {
6322       tree factor = TREE_OPERAND (step, 1);
6323       step = TREE_OPERAND (step, 0);
6324 
6325       /* Strip widening and truncating conversions as well as nops.  */
6326       if (CONVERT_EXPR_P (step)
6327 	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6328 	step = TREE_OPERAND (step, 0);
6329       tree type = TREE_TYPE (step);
6330 
6331       /* Get the range of step values that would not cause overflow.  */
6332       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6333 			 / wi::to_widest (factor));
6334       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6335 			 / wi::to_widest (factor));
6336 
6337       /* Get the range of values that the unconverted step actually has.  */
6338       wide_int step_min, step_max;
6339       value_range vr;
6340       if (TREE_CODE (step) != SSA_NAME
6341 	  || !get_range_query (cfun)->range_of_expr (vr, step)
6342 	  || vr.kind () != VR_RANGE)
6343 	{
6344 	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6345 	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6346 	}
6347       else
6348 	{
6349 	  step_min = vr.lower_bound ();
6350 	  step_max = vr.upper_bound ();
6351 	}
6352 
6353       /* Check whether the unconverted step has an acceptable range.  */
6354       signop sgn = TYPE_SIGN (type);
6355       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6356 	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6357 	{
6358 	  if (wi::ge_p (step_min, useful_min, sgn))
6359 	    return ssize_int (useful_min);
6360 	  else if (wi::lt_p (step_max, 0, sgn))
6361 	    return ssize_int (-1);
6362 	  else
6363 	    return fold_convert (ssizetype, step);
6364 	}
6365     }
6366   return DR_STEP (dr);
6367 }
6368 
6369 /* Return a value that is negative iff DR has a negative step.  */
6370 
6371 tree
dr_direction_indicator(struct data_reference * dr)6372 dr_direction_indicator (struct data_reference *dr)
6373 {
6374   return dr_step_indicator (dr, 0);
6375 }
6376 
6377 /* Return a value that is zero iff DR has a zero step.  */
6378 
6379 tree
dr_zero_step_indicator(struct data_reference * dr)6380 dr_zero_step_indicator (struct data_reference *dr)
6381 {
6382   return dr_step_indicator (dr, 1);
6383 }
6384 
6385 /* Return true if DR is known to have a nonnegative (but possibly zero)
6386    step.  */
6387 
6388 bool
dr_known_forward_stride_p(struct data_reference * dr)6389 dr_known_forward_stride_p (struct data_reference *dr)
6390 {
6391   tree indicator = dr_direction_indicator (dr);
6392   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6393 				   fold_convert (ssizetype, indicator),
6394 				   ssize_int (0));
6395   return neg_step_val && integer_zerop (neg_step_val);
6396 }
6397