xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-data-ref.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Data references and dependences detectors.
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass walks a given loop structure searching for array
22    references.  The information about the array accesses is recorded
23    in DATA_REFERENCE structures.
24 
25    The basic test for determining the dependences is:
26    given two access functions chrec1 and chrec2 to a same array, and
27    x and y two vectors from the iteration domain, the same element of
28    the array is accessed twice at iterations x and y if and only if:
29    |             chrec1 (x) == chrec2 (y).
30 
31    The goals of this analysis are:
32 
33    - to determine the independence: the relation between two
34      independent accesses is qualified with the chrec_known (this
35      information allows a loop parallelization),
36 
37    - when two data references access the same data, to qualify the
38      dependence relation with classic dependence representations:
39 
40        - distance vectors
41        - direction vectors
42        - loop carried level dependence
43        - polyhedron dependence
44      or with the chains of recurrences based representation,
45 
46    - to define a knowledge base for storing the data dependence
47      information,
48 
49    - to define an interface to access this data.
50 
51 
52    Definitions:
53 
54    - subscript: given two array accesses a subscript is the tuple
55    composed of the access functions for a given dimension.  Example:
56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57    (f1, g1), (f2, g2), (f3, g3).
58 
59    - Diophantine equation: an equation whose coefficients and
60    solutions are integer constants, for example the equation
61    |   3*x + 2*y = 1
62    has an integer solution x = 1 and y = -1.
63 
64    References:
65 
66    - "Advanced Compilation for High Performance Computing" by Randy
67    Allen and Ken Kennedy.
68    http://citeseer.ist.psu.edu/goff91practical.html
69 
70    - "Loop Transformations for Restructuring Compilers - The Foundations"
71    by Utpal Banerjee.
72 
73 
74 */
75 
76 #include "config.h"
77 #include "system.h"
78 #include "coretypes.h"
79 #include "backend.h"
80 #include "rtl.h"
81 #include "tree.h"
82 #include "gimple.h"
83 #include "gimple-pretty-print.h"
84 #include "alias.h"
85 #include "fold-const.h"
86 #include "expr.h"
87 #include "gimple-iterator.h"
88 #include "tree-ssa-loop-niter.h"
89 #include "tree-ssa-loop.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "dumpfile.h"
95 #include "tree-affine.h"
96 #include "builtins.h"
97 #include "tree-eh.h"
98 #include "ssa.h"
99 #include "internal-fn.h"
100 
101 static struct datadep_stats
102 {
103   int num_dependence_tests;
104   int num_dependence_dependent;
105   int num_dependence_independent;
106   int num_dependence_undetermined;
107 
108   int num_subscript_tests;
109   int num_subscript_undetermined;
110   int num_same_subscript_function;
111 
112   int num_ziv;
113   int num_ziv_independent;
114   int num_ziv_dependent;
115   int num_ziv_unimplemented;
116 
117   int num_siv;
118   int num_siv_independent;
119   int num_siv_dependent;
120   int num_siv_unimplemented;
121 
122   int num_miv;
123   int num_miv_independent;
124   int num_miv_dependent;
125   int num_miv_unimplemented;
126 } dependence_stats;
127 
128 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
129 					   unsigned int, unsigned int,
130 					   class loop *);
131 /* Returns true iff A divides B.  */
132 
133 static inline bool
tree_fold_divides_p(const_tree a,const_tree b)134 tree_fold_divides_p (const_tree a, const_tree b)
135 {
136   gcc_assert (TREE_CODE (a) == INTEGER_CST);
137   gcc_assert (TREE_CODE (b) == INTEGER_CST);
138   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
139 }
140 
141 /* Returns true iff A divides B.  */
142 
143 static inline bool
int_divides_p(lambda_int a,lambda_int b)144 int_divides_p (lambda_int a, lambda_int b)
145 {
146   return ((b % a) == 0);
147 }
148 
149 /* Return true if reference REF contains a union access.  */
150 
151 static bool
ref_contains_union_access_p(tree ref)152 ref_contains_union_access_p (tree ref)
153 {
154   while (handled_component_p (ref))
155     {
156       ref = TREE_OPERAND (ref, 0);
157       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
158 	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
159 	return true;
160     }
161   return false;
162 }
163 
164 
165 
166 /* Dump into FILE all the data references from DATAREFS.  */
167 
168 static void
dump_data_references(FILE * file,vec<data_reference_p> datarefs)169 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
170 {
171   unsigned int i;
172   struct data_reference *dr;
173 
174   FOR_EACH_VEC_ELT (datarefs, i, dr)
175     dump_data_reference (file, dr);
176 }
177 
178 /* Unified dump into FILE all the data references from DATAREFS.  */
179 
180 DEBUG_FUNCTION void
debug(vec<data_reference_p> & ref)181 debug (vec<data_reference_p> &ref)
182 {
183   dump_data_references (stderr, ref);
184 }
185 
186 DEBUG_FUNCTION void
debug(vec<data_reference_p> * ptr)187 debug (vec<data_reference_p> *ptr)
188 {
189   if (ptr)
190     debug (*ptr);
191   else
192     fprintf (stderr, "<nil>\n");
193 }
194 
195 
196 /* Dump into STDERR all the data references from DATAREFS.  */
197 
198 DEBUG_FUNCTION void
debug_data_references(vec<data_reference_p> datarefs)199 debug_data_references (vec<data_reference_p> datarefs)
200 {
201   dump_data_references (stderr, datarefs);
202 }
203 
204 /* Print to STDERR the data_reference DR.  */
205 
206 DEBUG_FUNCTION void
debug_data_reference(struct data_reference * dr)207 debug_data_reference (struct data_reference *dr)
208 {
209   dump_data_reference (stderr, dr);
210 }
211 
212 /* Dump function for a DATA_REFERENCE structure.  */
213 
214 void
dump_data_reference(FILE * outf,struct data_reference * dr)215 dump_data_reference (FILE *outf,
216 		     struct data_reference *dr)
217 {
218   unsigned int i;
219 
220   fprintf (outf, "#(Data Ref: \n");
221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222   fprintf (outf, "#  stmt: ");
223   print_gimple_stmt (outf, DR_STMT (dr), 0);
224   fprintf (outf, "#  ref: ");
225   print_generic_stmt (outf, DR_REF (dr));
226   fprintf (outf, "#  base_object: ");
227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
228 
229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
230     {
231       fprintf (outf, "#  Access function %d: ", i);
232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
233     }
234   fprintf (outf, "#)\n");
235 }
236 
237 /* Unified dump function for a DATA_REFERENCE structure.  */
238 
239 DEBUG_FUNCTION void
debug(data_reference & ref)240 debug (data_reference &ref)
241 {
242   dump_data_reference (stderr, &ref);
243 }
244 
245 DEBUG_FUNCTION void
debug(data_reference * ptr)246 debug (data_reference *ptr)
247 {
248   if (ptr)
249     debug (*ptr);
250   else
251     fprintf (stderr, "<nil>\n");
252 }
253 
254 
255 /* Dumps the affine function described by FN to the file OUTF.  */
256 
257 DEBUG_FUNCTION void
dump_affine_function(FILE * outf,affine_fn fn)258 dump_affine_function (FILE *outf, affine_fn fn)
259 {
260   unsigned i;
261   tree coef;
262 
263   print_generic_expr (outf, fn[0], TDF_SLIM);
264   for (i = 1; fn.iterate (i, &coef); i++)
265     {
266       fprintf (outf, " + ");
267       print_generic_expr (outf, coef, TDF_SLIM);
268       fprintf (outf, " * x_%u", i);
269     }
270 }
271 
272 /* Dumps the conflict function CF to the file OUTF.  */
273 
274 DEBUG_FUNCTION void
dump_conflict_function(FILE * outf,conflict_function * cf)275 dump_conflict_function (FILE *outf, conflict_function *cf)
276 {
277   unsigned i;
278 
279   if (cf->n == NO_DEPENDENCE)
280     fprintf (outf, "no dependence");
281   else if (cf->n == NOT_KNOWN)
282     fprintf (outf, "not known");
283   else
284     {
285       for (i = 0; i < cf->n; i++)
286 	{
287 	  if (i != 0)
288 	    fprintf (outf, " ");
289 	  fprintf (outf, "[");
290 	  dump_affine_function (outf, cf->fns[i]);
291 	  fprintf (outf, "]");
292 	}
293     }
294 }
295 
296 /* Dump function for a SUBSCRIPT structure.  */
297 
298 DEBUG_FUNCTION void
dump_subscript(FILE * outf,struct subscript * subscript)299 dump_subscript (FILE *outf, struct subscript *subscript)
300 {
301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
302 
303   fprintf (outf, "\n (subscript \n");
304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
305   dump_conflict_function (outf, cf);
306   if (CF_NONTRIVIAL_P (cf))
307     {
308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
309       fprintf (outf, "\n  last_conflict: ");
310       print_generic_expr (outf, last_iteration);
311     }
312 
313   cf = SUB_CONFLICTS_IN_B (subscript);
314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
315   dump_conflict_function (outf, cf);
316   if (CF_NONTRIVIAL_P (cf))
317     {
318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
319       fprintf (outf, "\n  last_conflict: ");
320       print_generic_expr (outf, last_iteration);
321     }
322 
323   fprintf (outf, "\n  (Subscript distance: ");
324   print_generic_expr (outf, SUB_DISTANCE (subscript));
325   fprintf (outf, " ))\n");
326 }
327 
328 /* Print the classic direction vector DIRV to OUTF.  */
329 
330 DEBUG_FUNCTION void
print_direction_vector(FILE * outf,lambda_vector dirv,int length)331 print_direction_vector (FILE *outf,
332 			lambda_vector dirv,
333 			int length)
334 {
335   int eq;
336 
337   for (eq = 0; eq < length; eq++)
338     {
339       enum data_dependence_direction dir = ((enum data_dependence_direction)
340 					    dirv[eq]);
341 
342       switch (dir)
343 	{
344 	case dir_positive:
345 	  fprintf (outf, "    +");
346 	  break;
347 	case dir_negative:
348 	  fprintf (outf, "    -");
349 	  break;
350 	case dir_equal:
351 	  fprintf (outf, "    =");
352 	  break;
353 	case dir_positive_or_equal:
354 	  fprintf (outf, "   +=");
355 	  break;
356 	case dir_positive_or_negative:
357 	  fprintf (outf, "   +-");
358 	  break;
359 	case dir_negative_or_equal:
360 	  fprintf (outf, "   -=");
361 	  break;
362 	case dir_star:
363 	  fprintf (outf, "    *");
364 	  break;
365 	default:
366 	  fprintf (outf, "indep");
367 	  break;
368 	}
369     }
370   fprintf (outf, "\n");
371 }
372 
373 /* Print a vector of direction vectors.  */
374 
375 DEBUG_FUNCTION void
print_dir_vectors(FILE * outf,vec<lambda_vector> dir_vects,int length)376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377 		   int length)
378 {
379   unsigned j;
380   lambda_vector v;
381 
382   FOR_EACH_VEC_ELT (dir_vects, j, v)
383     print_direction_vector (outf, v, length);
384 }
385 
386 /* Print out a vector VEC of length N to OUTFILE.  */
387 
388 DEBUG_FUNCTION void
print_lambda_vector(FILE * outfile,lambda_vector vector,int n)389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
390 {
391   int i;
392 
393   for (i = 0; i < n; i++)
394     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
395   fprintf (outfile, "\n");
396 }
397 
398 /* Print a vector of distance vectors.  */
399 
400 DEBUG_FUNCTION void
print_dist_vectors(FILE * outf,vec<lambda_vector> dist_vects,int length)401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
402 		    int length)
403 {
404   unsigned j;
405   lambda_vector v;
406 
407   FOR_EACH_VEC_ELT (dist_vects, j, v)
408     print_lambda_vector (outf, v, length);
409 }
410 
411 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
412 
413 DEBUG_FUNCTION void
dump_data_dependence_relation(FILE * outf,struct data_dependence_relation * ddr)414 dump_data_dependence_relation (FILE *outf,
415 			       struct data_dependence_relation *ddr)
416 {
417   struct data_reference *dra, *drb;
418 
419   fprintf (outf, "(Data Dep: \n");
420 
421   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
422     {
423       if (ddr)
424 	{
425 	  dra = DDR_A (ddr);
426 	  drb = DDR_B (ddr);
427 	  if (dra)
428 	    dump_data_reference (outf, dra);
429 	  else
430 	    fprintf (outf, "    (nil)\n");
431 	  if (drb)
432 	    dump_data_reference (outf, drb);
433 	  else
434 	    fprintf (outf, "    (nil)\n");
435 	}
436       fprintf (outf, "    (don't know)\n)\n");
437       return;
438     }
439 
440   dra = DDR_A (ddr);
441   drb = DDR_B (ddr);
442   dump_data_reference (outf, dra);
443   dump_data_reference (outf, drb);
444 
445   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
446     fprintf (outf, "    (no dependence)\n");
447 
448   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
449     {
450       unsigned int i;
451       class loop *loopi;
452 
453       subscript *sub;
454       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
455 	{
456 	  fprintf (outf, "  access_fn_A: ");
457 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
458 	  fprintf (outf, "  access_fn_B: ");
459 	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
460 	  dump_subscript (outf, sub);
461 	}
462 
463       fprintf (outf, "  loop nest: (");
464       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
465 	fprintf (outf, "%d ", loopi->num);
466       fprintf (outf, ")\n");
467 
468       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
469 	{
470 	  fprintf (outf, "  distance_vector: ");
471 	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
472 			       DDR_NB_LOOPS (ddr));
473 	}
474 
475       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
476 	{
477 	  fprintf (outf, "  direction_vector: ");
478 	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
479 				  DDR_NB_LOOPS (ddr));
480 	}
481     }
482 
483   fprintf (outf, ")\n");
484 }
485 
486 /* Debug version.  */
487 
488 DEBUG_FUNCTION void
debug_data_dependence_relation(struct data_dependence_relation * ddr)489 debug_data_dependence_relation (struct data_dependence_relation *ddr)
490 {
491   dump_data_dependence_relation (stderr, ddr);
492 }
493 
494 /* Dump into FILE all the dependence relations from DDRS.  */
495 
496 DEBUG_FUNCTION void
dump_data_dependence_relations(FILE * file,vec<ddr_p> ddrs)497 dump_data_dependence_relations (FILE *file,
498 				vec<ddr_p> ddrs)
499 {
500   unsigned int i;
501   struct data_dependence_relation *ddr;
502 
503   FOR_EACH_VEC_ELT (ddrs, i, ddr)
504     dump_data_dependence_relation (file, ddr);
505 }
506 
507 DEBUG_FUNCTION void
debug(vec<ddr_p> & ref)508 debug (vec<ddr_p> &ref)
509 {
510   dump_data_dependence_relations (stderr, ref);
511 }
512 
513 DEBUG_FUNCTION void
debug(vec<ddr_p> * ptr)514 debug (vec<ddr_p> *ptr)
515 {
516   if (ptr)
517     debug (*ptr);
518   else
519     fprintf (stderr, "<nil>\n");
520 }
521 
522 
523 /* Dump to STDERR all the dependence relations from DDRS.  */
524 
525 DEBUG_FUNCTION void
debug_data_dependence_relations(vec<ddr_p> ddrs)526 debug_data_dependence_relations (vec<ddr_p> ddrs)
527 {
528   dump_data_dependence_relations (stderr, ddrs);
529 }
530 
531 /* Dumps the distance and direction vectors in FILE.  DDRS contains
532    the dependence relations, and VECT_SIZE is the size of the
533    dependence vectors, or in other words the number of loops in the
534    considered nest.  */
535 
536 DEBUG_FUNCTION void
dump_dist_dir_vectors(FILE * file,vec<ddr_p> ddrs)537 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
538 {
539   unsigned int i, j;
540   struct data_dependence_relation *ddr;
541   lambda_vector v;
542 
543   FOR_EACH_VEC_ELT (ddrs, i, ddr)
544     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
545       {
546 	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
547 	  {
548 	    fprintf (file, "DISTANCE_V (");
549 	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
550 	    fprintf (file, ")\n");
551 	  }
552 
553 	FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
554 	  {
555 	    fprintf (file, "DIRECTION_V (");
556 	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
557 	    fprintf (file, ")\n");
558 	  }
559       }
560 
561   fprintf (file, "\n\n");
562 }
563 
564 /* Dumps the data dependence relations DDRS in FILE.  */
565 
566 DEBUG_FUNCTION void
dump_ddrs(FILE * file,vec<ddr_p> ddrs)567 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
568 {
569   unsigned int i;
570   struct data_dependence_relation *ddr;
571 
572   FOR_EACH_VEC_ELT (ddrs, i, ddr)
573     dump_data_dependence_relation (file, ddr);
574 
575   fprintf (file, "\n\n");
576 }
577 
578 DEBUG_FUNCTION void
debug_ddrs(vec<ddr_p> ddrs)579 debug_ddrs (vec<ddr_p> ddrs)
580 {
581   dump_ddrs (stderr, ddrs);
582 }
583 
584 static void
585 split_constant_offset (tree exp, tree *var, tree *off,
586 		       hash_map<tree, std::pair<tree, tree> > &cache,
587 		       unsigned *limit);
588 
589 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
590    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
591    constant of type ssizetype, and returns true.  If we cannot do this
592    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
593    is returned.  */
594 
595 static bool
split_constant_offset_1(tree type,tree op0,enum tree_code code,tree op1,tree * var,tree * off,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)596 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
597 			 tree *var, tree *off,
598 			 hash_map<tree, std::pair<tree, tree> > &cache,
599 			 unsigned *limit)
600 {
601   tree var0, var1;
602   tree off0, off1;
603   enum tree_code ocode = code;
604 
605   *var = NULL_TREE;
606   *off = NULL_TREE;
607 
608   switch (code)
609     {
610     case INTEGER_CST:
611       *var = build_int_cst (type, 0);
612       *off = fold_convert (ssizetype, op0);
613       return true;
614 
615     case POINTER_PLUS_EXPR:
616       ocode = PLUS_EXPR;
617       /* FALLTHROUGH */
618     case PLUS_EXPR:
619     case MINUS_EXPR:
620       if (TREE_CODE (op1) == INTEGER_CST)
621 	{
622 	  split_constant_offset (op0, &var0, &off0, cache, limit);
623 	  *var = var0;
624 	  *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
625 	  return true;
626 	}
627       split_constant_offset (op0, &var0, &off0, cache, limit);
628       split_constant_offset (op1, &var1, &off1, cache, limit);
629       *var = fold_build2 (code, type, var0, var1);
630       *off = size_binop (ocode, off0, off1);
631       return true;
632 
633     case MULT_EXPR:
634       if (TREE_CODE (op1) != INTEGER_CST)
635 	return false;
636 
637       split_constant_offset (op0, &var0, &off0, cache, limit);
638       *var = fold_build2 (MULT_EXPR, type, var0, op1);
639       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
640       return true;
641 
642     case ADDR_EXPR:
643       {
644 	tree base, poffset;
645 	poly_int64 pbitsize, pbitpos, pbytepos;
646 	machine_mode pmode;
647 	int punsignedp, preversep, pvolatilep;
648 
649 	op0 = TREE_OPERAND (op0, 0);
650 	base
651 	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
652 				 &punsignedp, &preversep, &pvolatilep);
653 
654 	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
655 	  return false;
656 	base = build_fold_addr_expr (base);
657 	off0 = ssize_int (pbytepos);
658 
659 	if (poffset)
660 	  {
661 	    split_constant_offset (poffset, &poffset, &off1, cache, limit);
662 	    off0 = size_binop (PLUS_EXPR, off0, off1);
663 	    if (POINTER_TYPE_P (TREE_TYPE (base)))
664 	      base = fold_build_pointer_plus (base, poffset);
665 	    else
666 	      base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
667 				  fold_convert (TREE_TYPE (base), poffset));
668 	  }
669 
670 	var0 = fold_convert (type, base);
671 
672 	/* If variable length types are involved, punt, otherwise casts
673 	   might be converted into ARRAY_REFs in gimplify_conversion.
674 	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
675 	   possibly no longer appears in current GIMPLE, might resurface.
676 	   This perhaps could run
677 	   if (CONVERT_EXPR_P (var0))
678 	     {
679 	       gimplify_conversion (&var0);
680 	       // Attempt to fill in any within var0 found ARRAY_REF's
681 	       // element size from corresponding op embedded ARRAY_REF,
682 	       // if unsuccessful, just punt.
683 	     }  */
684 	while (POINTER_TYPE_P (type))
685 	  type = TREE_TYPE (type);
686 	if (int_size_in_bytes (type) < 0)
687 	  return false;
688 
689 	*var = var0;
690 	*off = off0;
691 	return true;
692       }
693 
694     case SSA_NAME:
695       {
696 	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
697 	  return false;
698 
699 	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
700 	enum tree_code subcode;
701 
702 	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
703 	  return false;
704 
705 	subcode = gimple_assign_rhs_code (def_stmt);
706 
707 	/* We are using a cache to avoid un-CSEing large amounts of code.  */
708 	bool use_cache = false;
709 	if (!has_single_use (op0)
710 	    && (subcode == POINTER_PLUS_EXPR
711 		|| subcode == PLUS_EXPR
712 		|| subcode == MINUS_EXPR
713 		|| subcode == MULT_EXPR
714 		|| subcode == ADDR_EXPR
715 		|| CONVERT_EXPR_CODE_P (subcode)))
716 	  {
717 	    use_cache = true;
718 	    bool existed;
719 	    std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
720 	    if (existed)
721 	      {
722 		if (integer_zerop (e.second))
723 		  return false;
724 		*var = e.first;
725 		*off = e.second;
726 		return true;
727 	      }
728 	    e = std::make_pair (op0, ssize_int (0));
729 	  }
730 
731 	if (*limit == 0)
732 	  return false;
733 	--*limit;
734 
735 	var0 = gimple_assign_rhs1 (def_stmt);
736 	var1 = gimple_assign_rhs2 (def_stmt);
737 
738 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
739 					    var, off, cache, limit);
740 	if (res && use_cache)
741 	  *cache.get (op0) = std::make_pair (*var, *off);
742 	return res;
743       }
744     CASE_CONVERT:
745       {
746 	/* We must not introduce undefined overflow, and we must not change
747 	   the value.  Hence we're okay if the inner type doesn't overflow
748 	   to start with (pointer or signed), the outer type also is an
749 	   integer or pointer and the outer precision is at least as large
750 	   as the inner.  */
751 	tree itype = TREE_TYPE (op0);
752 	if ((POINTER_TYPE_P (itype)
753 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
754 	    && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
755 	    && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
756 	  {
757 	    if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
758 	      {
759 		/* Split the unconverted operand and try to prove that
760 		   wrapping isn't a problem.  */
761 		tree tmp_var, tmp_off;
762 		split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
763 
764 		/* See whether we have an SSA_NAME whose range is known
765 		   to be [A, B].  */
766 		if (TREE_CODE (tmp_var) != SSA_NAME)
767 		  return false;
768 		wide_int var_min, var_max;
769 		value_range_kind vr_type = get_range_info (tmp_var, &var_min,
770 							   &var_max);
771 		wide_int var_nonzero = get_nonzero_bits (tmp_var);
772 		signop sgn = TYPE_SIGN (itype);
773 		if (intersect_range_with_nonzero_bits (vr_type, &var_min,
774 						       &var_max, var_nonzero,
775 						       sgn) != VR_RANGE)
776 		  return false;
777 
778 		/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
779 		   is known to be [A + TMP_OFF, B + TMP_OFF], with all
780 		   operations done in ITYPE.  The addition must overflow
781 		   at both ends of the range or at neither.  */
782 		wi::overflow_type overflow[2];
783 		unsigned int prec = TYPE_PRECISION (itype);
784 		wide_int woff = wi::to_wide (tmp_off, prec);
785 		wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
786 		wi::add (var_max, woff, sgn, &overflow[1]);
787 		if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
788 		  return false;
789 
790 		/* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
791 		widest_int diff = (widest_int::from (op0_min, sgn)
792 				   - widest_int::from (var_min, sgn));
793 		var0 = tmp_var;
794 		*off = wide_int_to_tree (ssizetype, diff);
795 	      }
796 	    else
797 	      split_constant_offset (op0, &var0, off, cache, limit);
798 	    *var = fold_convert (type, var0);
799 	    return true;
800 	  }
801 	return false;
802       }
803 
804     default:
805       return false;
806     }
807 }
808 
809 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
810    will be ssizetype.  */
811 
812 static void
split_constant_offset(tree exp,tree * var,tree * off,hash_map<tree,std::pair<tree,tree>> & cache,unsigned * limit)813 split_constant_offset (tree exp, tree *var, tree *off,
814 		       hash_map<tree, std::pair<tree, tree> > &cache,
815 		       unsigned *limit)
816 {
817   tree type = TREE_TYPE (exp), op0, op1, e, o;
818   enum tree_code code;
819 
820   *var = exp;
821   *off = ssize_int (0);
822 
823   if (tree_is_chrec (exp)
824       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
825     return;
826 
827   code = TREE_CODE (exp);
828   extract_ops_from_tree (exp, &code, &op0, &op1);
829   if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
830     {
831       *var = e;
832       *off = o;
833     }
834 }
835 
836 void
split_constant_offset(tree exp,tree * var,tree * off)837 split_constant_offset (tree exp, tree *var, tree *off)
838 {
839   unsigned limit = param_ssa_name_def_chain_limit;
840   static hash_map<tree, std::pair<tree, tree> > *cache;
841   if (!cache)
842     cache = new hash_map<tree, std::pair<tree, tree> > (37);
843   split_constant_offset (exp, var, off, *cache, &limit);
844   cache->empty ();
845 }
846 
847 /* Returns the address ADDR of an object in a canonical shape (without nop
848    casts, and with type of pointer to the object).  */
849 
850 static tree
canonicalize_base_object_address(tree addr)851 canonicalize_base_object_address (tree addr)
852 {
853   tree orig = addr;
854 
855   STRIP_NOPS (addr);
856 
857   /* The base address may be obtained by casting from integer, in that case
858      keep the cast.  */
859   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
860     return orig;
861 
862   if (TREE_CODE (addr) != ADDR_EXPR)
863     return addr;
864 
865   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
866 }
867 
868 /* Analyze the behavior of memory reference REF within STMT.
869    There are two modes:
870 
871    - BB analysis.  In this case we simply split the address into base,
872      init and offset components, without reference to any containing loop.
873      The resulting base and offset are general expressions and they can
874      vary arbitrarily from one iteration of the containing loop to the next.
875      The step is always zero.
876 
877    - loop analysis.  In this case we analyze the reference both wrt LOOP
878      and on the basis that the reference occurs (is "used") in LOOP;
879      see the comment above analyze_scalar_evolution_in_loop for more
880      information about this distinction.  The base, init, offset and
881      step fields are all invariant in LOOP.
882 
883    Perform BB analysis if LOOP is null, or if LOOP is the function's
884    dummy outermost loop.  In other cases perform loop analysis.
885 
886    Return true if the analysis succeeded and store the results in DRB if so.
887    BB analysis can only fail for bitfield or reversed-storage accesses.  */
888 
889 opt_result
dr_analyze_innermost(innermost_loop_behavior * drb,tree ref,class loop * loop,const gimple * stmt)890 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
891 		      class loop *loop, const gimple *stmt)
892 {
893   poly_int64 pbitsize, pbitpos;
894   tree base, poffset;
895   machine_mode pmode;
896   int punsignedp, preversep, pvolatilep;
897   affine_iv base_iv, offset_iv;
898   tree init, dinit, step;
899   bool in_loop = (loop && loop->num);
900 
901   if (dump_file && (dump_flags & TDF_DETAILS))
902     fprintf (dump_file, "analyze_innermost: ");
903 
904   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
905 			      &punsignedp, &preversep, &pvolatilep);
906   gcc_assert (base != NULL_TREE);
907 
908   poly_int64 pbytepos;
909   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
910     return opt_result::failure_at (stmt,
911 				   "failed: bit offset alignment.\n");
912 
913   if (preversep)
914     return opt_result::failure_at (stmt,
915 				   "failed: reverse storage order.\n");
916 
917   /* Calculate the alignment and misalignment for the inner reference.  */
918   unsigned int HOST_WIDE_INT bit_base_misalignment;
919   unsigned int bit_base_alignment;
920   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
921 
922   /* There are no bitfield references remaining in BASE, so the values
923      we got back must be whole bytes.  */
924   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
925 	      && bit_base_misalignment % BITS_PER_UNIT == 0);
926   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
927   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
928 
929   if (TREE_CODE (base) == MEM_REF)
930     {
931       if (!integer_zerop (TREE_OPERAND (base, 1)))
932 	{
933 	  /* Subtract MOFF from the base and add it to POFFSET instead.
934 	     Adjust the misalignment to reflect the amount we subtracted.  */
935 	  poly_offset_int moff = mem_ref_offset (base);
936 	  base_misalignment -= moff.force_shwi ();
937 	  tree mofft = wide_int_to_tree (sizetype, moff);
938 	  if (!poffset)
939 	    poffset = mofft;
940 	  else
941 	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
942 	}
943       base = TREE_OPERAND (base, 0);
944     }
945   else
946     base = build_fold_addr_expr (base);
947 
948   if (in_loop)
949     {
950       if (!simple_iv (loop, loop, base, &base_iv, true))
951 	return opt_result::failure_at
952 	  (stmt, "failed: evolution of base is not affine.\n");
953     }
954   else
955     {
956       base_iv.base = base;
957       base_iv.step = ssize_int (0);
958       base_iv.no_overflow = true;
959     }
960 
961   if (!poffset)
962     {
963       offset_iv.base = ssize_int (0);
964       offset_iv.step = ssize_int (0);
965     }
966   else
967     {
968       if (!in_loop)
969         {
970           offset_iv.base = poffset;
971           offset_iv.step = ssize_int (0);
972         }
973       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
974 	return opt_result::failure_at
975 	  (stmt, "failed: evolution of offset is not affine.\n");
976     }
977 
978   init = ssize_int (pbytepos);
979 
980   /* Subtract any constant component from the base and add it to INIT instead.
981      Adjust the misalignment to reflect the amount we subtracted.  */
982   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
983   init = size_binop (PLUS_EXPR, init, dinit);
984   base_misalignment -= TREE_INT_CST_LOW (dinit);
985 
986   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
987   init = size_binop (PLUS_EXPR, init, dinit);
988 
989   step = size_binop (PLUS_EXPR,
990 		     fold_convert (ssizetype, base_iv.step),
991 		     fold_convert (ssizetype, offset_iv.step));
992 
993   base = canonicalize_base_object_address (base_iv.base);
994 
995   /* See if get_pointer_alignment can guarantee a higher alignment than
996      the one we calculated above.  */
997   unsigned int HOST_WIDE_INT alt_misalignment;
998   unsigned int alt_alignment;
999   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1000 
1001   /* As above, these values must be whole bytes.  */
1002   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1003 	      && alt_misalignment % BITS_PER_UNIT == 0);
1004   alt_alignment /= BITS_PER_UNIT;
1005   alt_misalignment /= BITS_PER_UNIT;
1006 
1007   if (base_alignment < alt_alignment)
1008     {
1009       base_alignment = alt_alignment;
1010       base_misalignment = alt_misalignment;
1011     }
1012 
1013   drb->base_address = base;
1014   drb->offset = fold_convert (ssizetype, offset_iv.base);
1015   drb->init = init;
1016   drb->step = step;
1017   if (known_misalignment (base_misalignment, base_alignment,
1018 			  &drb->base_misalignment))
1019     drb->base_alignment = base_alignment;
1020   else
1021     {
1022       drb->base_alignment = known_alignment (base_misalignment);
1023       drb->base_misalignment = 0;
1024     }
1025   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1026   drb->step_alignment = highest_pow2_factor (step);
1027 
1028   if (dump_file && (dump_flags & TDF_DETAILS))
1029     fprintf (dump_file, "success.\n");
1030 
1031   return opt_result::success ();
1032 }
1033 
1034 /* Return true if OP is a valid component reference for a DR access
1035    function.  This accepts a subset of what handled_component_p accepts.  */
1036 
1037 static bool
access_fn_component_p(tree op)1038 access_fn_component_p (tree op)
1039 {
1040   switch (TREE_CODE (op))
1041     {
1042     case REALPART_EXPR:
1043     case IMAGPART_EXPR:
1044     case ARRAY_REF:
1045       return true;
1046 
1047     case COMPONENT_REF:
1048       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1049 
1050     default:
1051       return false;
1052     }
1053 }
1054 
1055 /* Determines the base object and the list of indices of memory reference
1056    DR, analyzed in LOOP and instantiated before NEST.  */
1057 
1058 static void
dr_analyze_indices(struct data_reference * dr,edge nest,loop_p loop)1059 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1060 {
1061   vec<tree> access_fns = vNULL;
1062   tree ref, op;
1063   tree base, off, access_fn;
1064 
1065   /* If analyzing a basic-block there are no indices to analyze
1066      and thus no access functions.  */
1067   if (!nest)
1068     {
1069       DR_BASE_OBJECT (dr) = DR_REF (dr);
1070       DR_ACCESS_FNS (dr).create (0);
1071       return;
1072     }
1073 
1074   ref = DR_REF (dr);
1075 
1076   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1077      into a two element array with a constant index.  The base is
1078      then just the immediate underlying object.  */
1079   if (TREE_CODE (ref) == REALPART_EXPR)
1080     {
1081       ref = TREE_OPERAND (ref, 0);
1082       access_fns.safe_push (integer_zero_node);
1083     }
1084   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1085     {
1086       ref = TREE_OPERAND (ref, 0);
1087       access_fns.safe_push (integer_one_node);
1088     }
1089 
1090   /* Analyze access functions of dimensions we know to be independent.
1091      The list of component references handled here should be kept in
1092      sync with access_fn_component_p.  */
1093   while (handled_component_p (ref))
1094     {
1095       if (TREE_CODE (ref) == ARRAY_REF)
1096 	{
1097 	  op = TREE_OPERAND (ref, 1);
1098 	  access_fn = analyze_scalar_evolution (loop, op);
1099 	  access_fn = instantiate_scev (nest, loop, access_fn);
1100 	  access_fns.safe_push (access_fn);
1101 	}
1102       else if (TREE_CODE (ref) == COMPONENT_REF
1103 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1104 	{
1105 	  /* For COMPONENT_REFs of records (but not unions!) use the
1106 	     FIELD_DECL offset as constant access function so we can
1107 	     disambiguate a[i].f1 and a[i].f2.  */
1108 	  tree off = component_ref_field_offset (ref);
1109 	  off = size_binop (PLUS_EXPR,
1110 			    size_binop (MULT_EXPR,
1111 					fold_convert (bitsizetype, off),
1112 					bitsize_int (BITS_PER_UNIT)),
1113 			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1114 	  access_fns.safe_push (off);
1115 	}
1116       else
1117 	/* If we have an unhandled component we could not translate
1118 	   to an access function stop analyzing.  We have determined
1119 	   our base object in this case.  */
1120 	break;
1121 
1122       ref = TREE_OPERAND (ref, 0);
1123     }
1124 
1125   /* If the address operand of a MEM_REF base has an evolution in the
1126      analyzed nest, add it as an additional independent access-function.  */
1127   if (TREE_CODE (ref) == MEM_REF)
1128     {
1129       op = TREE_OPERAND (ref, 0);
1130       access_fn = analyze_scalar_evolution (loop, op);
1131       access_fn = instantiate_scev (nest, loop, access_fn);
1132       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1133 	{
1134 	  tree orig_type;
1135 	  tree memoff = TREE_OPERAND (ref, 1);
1136 	  base = initial_condition (access_fn);
1137 	  orig_type = TREE_TYPE (base);
1138 	  STRIP_USELESS_TYPE_CONVERSION (base);
1139 	  split_constant_offset (base, &base, &off);
1140 	  STRIP_USELESS_TYPE_CONVERSION (base);
1141 	  /* Fold the MEM_REF offset into the evolutions initial
1142 	     value to make more bases comparable.  */
1143 	  if (!integer_zerop (memoff))
1144 	    {
1145 	      off = size_binop (PLUS_EXPR, off,
1146 				fold_convert (ssizetype, memoff));
1147 	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
1148 	    }
1149 	  /* Adjust the offset so it is a multiple of the access type
1150 	     size and thus we separate bases that can possibly be used
1151 	     to produce partial overlaps (which the access_fn machinery
1152 	     cannot handle).  */
1153 	  wide_int rem;
1154 	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1155 	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1156 	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1157 	    rem = wi::mod_trunc
1158 	      (wi::to_wide (off),
1159 	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1160 	       SIGNED);
1161 	  else
1162 	    /* If we can't compute the remainder simply force the initial
1163 	       condition to zero.  */
1164 	    rem = wi::to_wide (off);
1165 	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1166 	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1167 	  /* And finally replace the initial condition.  */
1168 	  access_fn = chrec_replace_initial_condition
1169 	      (access_fn, fold_convert (orig_type, off));
1170 	  /* ???  This is still not a suitable base object for
1171 	     dr_may_alias_p - the base object needs to be an
1172 	     access that covers the object as whole.  With
1173 	     an evolution in the pointer this cannot be
1174 	     guaranteed.
1175 	     As a band-aid, mark the access so we can special-case
1176 	     it in dr_may_alias_p.  */
1177 	  tree old = ref;
1178 	  ref = fold_build2_loc (EXPR_LOCATION (ref),
1179 				 MEM_REF, TREE_TYPE (ref),
1180 				 base, memoff);
1181 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1182 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1183 	  DR_UNCONSTRAINED_BASE (dr) = true;
1184 	  access_fns.safe_push (access_fn);
1185 	}
1186     }
1187   else if (DECL_P (ref))
1188     {
1189       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1190       ref = build2 (MEM_REF, TREE_TYPE (ref),
1191 		    build_fold_addr_expr (ref),
1192 		    build_int_cst (reference_alias_ptr_type (ref), 0));
1193     }
1194 
1195   DR_BASE_OBJECT (dr) = ref;
1196   DR_ACCESS_FNS (dr) = access_fns;
1197 }
1198 
1199 /* Extracts the alias analysis information from the memory reference DR.  */
1200 
1201 static void
dr_analyze_alias(struct data_reference * dr)1202 dr_analyze_alias (struct data_reference *dr)
1203 {
1204   tree ref = DR_REF (dr);
1205   tree base = get_base_address (ref), addr;
1206 
1207   if (INDIRECT_REF_P (base)
1208       || TREE_CODE (base) == MEM_REF)
1209     {
1210       addr = TREE_OPERAND (base, 0);
1211       if (TREE_CODE (addr) == SSA_NAME)
1212 	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1213     }
1214 }
1215 
1216 /* Frees data reference DR.  */
1217 
1218 void
free_data_ref(data_reference_p dr)1219 free_data_ref (data_reference_p dr)
1220 {
1221   DR_ACCESS_FNS (dr).release ();
1222   free (dr);
1223 }
1224 
1225 /* Analyze memory reference MEMREF, which is accessed in STMT.
1226    The reference is a read if IS_READ is true, otherwise it is a write.
1227    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1228    within STMT, i.e. that it might not occur even if STMT is executed
1229    and runs to completion.
1230 
1231    Return the data_reference description of MEMREF.  NEST is the outermost
1232    loop in which the reference should be instantiated, LOOP is the loop
1233    in which the data reference should be analyzed.  */
1234 
1235 struct data_reference *
create_data_ref(edge nest,loop_p loop,tree memref,gimple * stmt,bool is_read,bool is_conditional_in_stmt)1236 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1237 		 bool is_read, bool is_conditional_in_stmt)
1238 {
1239   struct data_reference *dr;
1240 
1241   if (dump_file && (dump_flags & TDF_DETAILS))
1242     {
1243       fprintf (dump_file, "Creating dr for ");
1244       print_generic_expr (dump_file, memref, TDF_SLIM);
1245       fprintf (dump_file, "\n");
1246     }
1247 
1248   dr = XCNEW (struct data_reference);
1249   DR_STMT (dr) = stmt;
1250   DR_REF (dr) = memref;
1251   DR_IS_READ (dr) = is_read;
1252   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1253 
1254   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1255 			nest != NULL ? loop : NULL, stmt);
1256   dr_analyze_indices (dr, nest, loop);
1257   dr_analyze_alias (dr);
1258 
1259   if (dump_file && (dump_flags & TDF_DETAILS))
1260     {
1261       unsigned i;
1262       fprintf (dump_file, "\tbase_address: ");
1263       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1264       fprintf (dump_file, "\n\toffset from base address: ");
1265       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1266       fprintf (dump_file, "\n\tconstant offset from base address: ");
1267       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1268       fprintf (dump_file, "\n\tstep: ");
1269       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1270       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1271       fprintf (dump_file, "\n\tbase misalignment: %d",
1272 	       DR_BASE_MISALIGNMENT (dr));
1273       fprintf (dump_file, "\n\toffset alignment: %d",
1274 	       DR_OFFSET_ALIGNMENT (dr));
1275       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1276       fprintf (dump_file, "\n\tbase_object: ");
1277       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1278       fprintf (dump_file, "\n");
1279       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1280 	{
1281 	  fprintf (dump_file, "\tAccess function %d: ", i);
1282 	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1283 	}
1284     }
1285 
1286   return dr;
1287 }
1288 
1289 /*  A helper function computes order between two tree expressions T1 and T2.
1290     This is used in comparator functions sorting objects based on the order
1291     of tree expressions.  The function returns -1, 0, or 1.  */
1292 
1293 int
data_ref_compare_tree(tree t1,tree t2)1294 data_ref_compare_tree (tree t1, tree t2)
1295 {
1296   int i, cmp;
1297   enum tree_code code;
1298   char tclass;
1299 
1300   if (t1 == t2)
1301     return 0;
1302   if (t1 == NULL)
1303     return -1;
1304   if (t2 == NULL)
1305     return 1;
1306 
1307   STRIP_USELESS_TYPE_CONVERSION (t1);
1308   STRIP_USELESS_TYPE_CONVERSION (t2);
1309   if (t1 == t2)
1310     return 0;
1311 
1312   if (TREE_CODE (t1) != TREE_CODE (t2)
1313       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1314     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1315 
1316   code = TREE_CODE (t1);
1317   switch (code)
1318     {
1319     case INTEGER_CST:
1320       return tree_int_cst_compare (t1, t2);
1321 
1322     case STRING_CST:
1323       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1324 	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1325       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1326 		     TREE_STRING_LENGTH (t1));
1327 
1328     case SSA_NAME:
1329       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1330 	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1331       break;
1332 
1333     default:
1334       if (POLY_INT_CST_P (t1))
1335 	return compare_sizes_for_sort (wi::to_poly_widest (t1),
1336 				       wi::to_poly_widest (t2));
1337 
1338       tclass = TREE_CODE_CLASS (code);
1339 
1340       /* For decls, compare their UIDs.  */
1341       if (tclass == tcc_declaration)
1342 	{
1343 	  if (DECL_UID (t1) != DECL_UID (t2))
1344 	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1345 	  break;
1346 	}
1347       /* For expressions, compare their operands recursively.  */
1348       else if (IS_EXPR_CODE_CLASS (tclass))
1349 	{
1350 	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1351 	    {
1352 	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1353 					   TREE_OPERAND (t2, i));
1354 	      if (cmp != 0)
1355 		return cmp;
1356 	    }
1357 	}
1358       else
1359 	gcc_unreachable ();
1360     }
1361 
1362   return 0;
1363 }
1364 
1365 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1366    check.  */
1367 
1368 opt_result
runtime_alias_check_p(ddr_p ddr,class loop * loop,bool speed_p)1369 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1370 {
1371   if (dump_enabled_p ())
1372     dump_printf (MSG_NOTE,
1373 		 "consider run-time aliasing test between %T and %T\n",
1374 		 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1375 
1376   if (!speed_p)
1377     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1378 				   "runtime alias check not supported when"
1379 				   " optimizing for size.\n");
1380 
1381   /* FORNOW: We don't support versioning with outer-loop in either
1382      vectorization or loop distribution.  */
1383   if (loop != NULL && loop->inner != NULL)
1384     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1385 				   "runtime alias check not supported for"
1386 				   " outer loop.\n");
1387 
1388   return opt_result::success ();
1389 }
1390 
1391 /* Operator == between two dr_with_seg_len objects.
1392 
1393    This equality operator is used to make sure two data refs
1394    are the same one so that we will consider to combine the
1395    aliasing checks of those two pairs of data dependent data
1396    refs.  */
1397 
1398 static bool
1399 operator == (const dr_with_seg_len& d1,
1400 	     const dr_with_seg_len& d2)
1401 {
1402   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1403 			   DR_BASE_ADDRESS (d2.dr), 0)
1404 	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1405 	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1406 	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1407 	  && known_eq (d1.access_size, d2.access_size)
1408 	  && d1.align == d2.align);
1409 }
1410 
1411 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1412    so that we can combine aliasing checks in one scan.  */
1413 
1414 static int
comp_dr_with_seg_len_pair(const void * pa_,const void * pb_)1415 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1416 {
1417   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1418   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1419   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1420   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1421 
1422   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1423      if a and c have the same basic address snd step, and b and d have the same
1424      address and step.  Therefore, if any a&c or b&d don't have the same address
1425      and step, we don't care the order of those two pairs after sorting.  */
1426   int comp_res;
1427 
1428   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1429 					 DR_BASE_ADDRESS (b1.dr))) != 0)
1430     return comp_res;
1431   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1432 					 DR_BASE_ADDRESS (b2.dr))) != 0)
1433     return comp_res;
1434   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1435 					 DR_STEP (b1.dr))) != 0)
1436     return comp_res;
1437   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1438 					 DR_STEP (b2.dr))) != 0)
1439     return comp_res;
1440   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1441 					 DR_OFFSET (b1.dr))) != 0)
1442     return comp_res;
1443   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1444 					 DR_INIT (b1.dr))) != 0)
1445     return comp_res;
1446   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1447 					 DR_OFFSET (b2.dr))) != 0)
1448     return comp_res;
1449   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1450 					 DR_INIT (b2.dr))) != 0)
1451     return comp_res;
1452 
1453   return 0;
1454 }
1455 
1456 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1457 
1458 static void
dump_alias_pair(dr_with_seg_len_pair_t * alias_pair,const char * indent)1459 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1460 {
1461   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1462 	       DR_REF (alias_pair->first.dr),
1463 	       DR_REF (alias_pair->second.dr));
1464 
1465   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1466 	       alias_pair->first.seg_len);
1467   if (!operand_equal_p (alias_pair->first.seg_len,
1468 			alias_pair->second.seg_len, 0))
1469     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1470 
1471   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1472   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1473   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1474     {
1475       dump_printf (MSG_NOTE, " vs. ");
1476       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1477     }
1478 
1479   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1480 	       alias_pair->first.align);
1481   if (alias_pair->first.align != alias_pair->second.align)
1482     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1483 
1484   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1485   if (alias_pair->flags & DR_ALIAS_RAW)
1486     dump_printf (MSG_NOTE, " RAW");
1487   if (alias_pair->flags & DR_ALIAS_WAR)
1488     dump_printf (MSG_NOTE, " WAR");
1489   if (alias_pair->flags & DR_ALIAS_WAW)
1490     dump_printf (MSG_NOTE, " WAW");
1491   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1492     dump_printf (MSG_NOTE, " ARBITRARY");
1493   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1494     dump_printf (MSG_NOTE, " SWAPPED");
1495   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1496     dump_printf (MSG_NOTE, " UNSWAPPED");
1497   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1498     dump_printf (MSG_NOTE, " MIXED_STEPS");
1499   if (alias_pair->flags == 0)
1500     dump_printf (MSG_NOTE, " <none>");
1501   dump_printf (MSG_NOTE, "\n");
1502 }
1503 
1504 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1505    FACTOR is number of iterations that each data reference is accessed.
1506 
1507    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1508    we create an expression:
1509 
1510    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1511    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1512 
1513    for aliasing checks.  However, in some cases we can decrease the number
1514    of checks by combining two checks into one.  For example, suppose we have
1515    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1516    condition is satisfied:
1517 
1518    load_ptr_0 < load_ptr_1  &&
1519    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1520 
1521    (this condition means, in each iteration of vectorized loop, the accessed
1522    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1523    load_ptr_1.)
1524 
1525    we then can use only the following expression to finish the alising checks
1526    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1527 
1528    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1529    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1530 
1531    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1532    basic address.  */
1533 
1534 void
prune_runtime_alias_test_list(vec<dr_with_seg_len_pair_t> * alias_pairs,poly_uint64)1535 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1536 			       poly_uint64)
1537 {
1538   if (alias_pairs->is_empty ())
1539     return;
1540 
1541   /* Canonicalize each pair so that the base components are ordered wrt
1542      data_ref_compare_tree.  This allows the loop below to merge more
1543      cases.  */
1544   unsigned int i;
1545   dr_with_seg_len_pair_t *alias_pair;
1546   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1547     {
1548       data_reference_p dr_a = alias_pair->first.dr;
1549       data_reference_p dr_b = alias_pair->second.dr;
1550       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1551 					    DR_BASE_ADDRESS (dr_b));
1552       if (comp_res == 0)
1553 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1554       if (comp_res == 0)
1555 	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1556       if (comp_res > 0)
1557 	{
1558 	  std::swap (alias_pair->first, alias_pair->second);
1559 	  alias_pair->flags |= DR_ALIAS_SWAPPED;
1560 	}
1561       else
1562 	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1563     }
1564 
1565   /* Sort the collected data ref pairs so that we can scan them once to
1566      combine all possible aliasing checks.  */
1567   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1568 
1569   /* Scan the sorted dr pairs and check if we can combine alias checks
1570      of two neighboring dr pairs.  */
1571   unsigned int last = 0;
1572   for (i = 1; i < alias_pairs->length (); ++i)
1573     {
1574       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1575       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1576       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1577 
1578       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1579       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1580       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1581       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1582 
1583       /* Remove duplicate data ref pairs.  */
1584       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1585 	{
1586 	  if (dump_enabled_p ())
1587 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1588 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1589 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1590 	  alias_pair1->flags |= alias_pair2->flags;
1591 	  continue;
1592 	}
1593 
1594       /* Assume that we won't be able to merge the pairs, then correct
1595 	 if we do.  */
1596       last += 1;
1597       if (last != i)
1598 	(*alias_pairs)[last] = (*alias_pairs)[i];
1599 
1600       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1601 	{
1602 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1603 	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
1604 	  if (*dr_a1 == *dr_a2)
1605 	    {
1606 	      std::swap (dr_a1, dr_b1);
1607 	      std::swap (dr_a2, dr_b2);
1608 	    }
1609 
1610 	  poly_int64 init_a1, init_a2;
1611 	  /* Only consider cases in which the distance between the initial
1612 	     DR_A1 and the initial DR_A2 is known at compile time.  */
1613 	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1614 				DR_BASE_ADDRESS (dr_a2->dr), 0)
1615 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1616 				   DR_OFFSET (dr_a2->dr), 0)
1617 	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1618 	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1619 	    continue;
1620 
1621 	  /* Don't combine if we can't tell which one comes first.  */
1622 	  if (!ordered_p (init_a1, init_a2))
1623 	    continue;
1624 
1625 	  /* Work out what the segment length would be if we did combine
1626 	     DR_A1 and DR_A2:
1627 
1628 	     - If DR_A1 and DR_A2 have equal lengths, that length is
1629 	       also the combined length.
1630 
1631 	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
1632 	       length is the lower bound on those lengths.
1633 
1634 	     - If DR_A1 and DR_A2 both have positive lengths, the combined
1635 	       length is the upper bound on those lengths.
1636 
1637 	     Other cases are unlikely to give a useful combination.
1638 
1639 	     The lengths both have sizetype, so the sign is taken from
1640 	     the step instead.  */
1641 	  poly_uint64 new_seg_len = 0;
1642 	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1643 						 dr_a2->seg_len, 0);
1644 	  if (new_seg_len_p)
1645 	    {
1646 	      poly_uint64 seg_len_a1, seg_len_a2;
1647 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1648 		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1649 		continue;
1650 
1651 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
1652 	      if (TREE_CODE (indicator_a) != INTEGER_CST)
1653 		continue;
1654 
1655 	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
1656 	      if (TREE_CODE (indicator_b) != INTEGER_CST)
1657 		continue;
1658 
1659 	      int sign_a = tree_int_cst_sgn (indicator_a);
1660 	      int sign_b = tree_int_cst_sgn (indicator_b);
1661 
1662 	      if (sign_a <= 0 && sign_b <= 0)
1663 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1664 	      else if (sign_a >= 0 && sign_b >= 0)
1665 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1666 	      else
1667 		continue;
1668 	    }
1669 	  /* At this point we're committed to merging the refs.  */
1670 
1671 	  /* Make sure dr_a1 starts left of dr_a2.  */
1672 	  if (maybe_gt (init_a1, init_a2))
1673 	    {
1674 	      std::swap (*dr_a1, *dr_a2);
1675 	      std::swap (init_a1, init_a2);
1676 	    }
1677 
1678 	  /* The DR_Bs are equal, so only the DR_As can introduce
1679 	     mixed steps.  */
1680 	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1681 	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1682 
1683 	  if (new_seg_len_p)
1684 	    {
1685 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1686 					      new_seg_len);
1687 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1688 	    }
1689 
1690 	  /* This is always positive due to the swap above.  */
1691 	  poly_uint64 diff = init_a2 - init_a1;
1692 
1693 	  /* The new check will start at DR_A1.  Make sure that its access
1694 	     size encompasses the initial DR_A2.  */
1695 	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1696 	    {
1697 	      dr_a1->access_size = upper_bound (dr_a1->access_size,
1698 						diff + dr_a2->access_size);
1699 	      unsigned int new_align = known_alignment (dr_a1->access_size);
1700 	      dr_a1->align = MIN (dr_a1->align, new_align);
1701 	    }
1702 	  if (dump_enabled_p ())
1703 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1704 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1705 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1706 	  alias_pair1->flags |= alias_pair2->flags;
1707 	  last -= 1;
1708 	}
1709     }
1710   alias_pairs->truncate (last + 1);
1711 
1712   /* Try to restore the original dr_with_seg_len order within each
1713      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1714      unswapped pairs into the same check, we have to invalidate any
1715      RAW, WAR and WAW information for it.  */
1716   if (dump_enabled_p ())
1717     dump_printf (MSG_NOTE, "merged alias checks:\n");
1718   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1719     {
1720       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1721       unsigned int swapped = (alias_pair->flags & swap_mask);
1722       if (swapped == DR_ALIAS_SWAPPED)
1723 	std::swap (alias_pair->first, alias_pair->second);
1724       else if (swapped != DR_ALIAS_UNSWAPPED)
1725 	alias_pair->flags |= DR_ALIAS_ARBITRARY;
1726       alias_pair->flags &= ~swap_mask;
1727       if (dump_enabled_p ())
1728 	dump_alias_pair (alias_pair, "  ");
1729     }
1730 }
1731 
1732 /* A subroutine of create_intersect_range_checks, with a subset of the
1733    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1734    to optimize cases in which the references form a simple RAW, WAR or
1735    WAR dependence.  */
1736 
1737 static bool
create_ifn_alias_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)1738 create_ifn_alias_checks (tree *cond_expr,
1739 			 const dr_with_seg_len_pair_t &alias_pair)
1740 {
1741   const dr_with_seg_len& dr_a = alias_pair.first;
1742   const dr_with_seg_len& dr_b = alias_pair.second;
1743 
1744   /* Check for cases in which:
1745 
1746      (a) we have a known RAW, WAR or WAR dependence
1747      (b) the accesses are well-ordered in both the original and new code
1748 	 (see the comment above the DR_ALIAS_* flags for details); and
1749      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1750   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1751     return false;
1752 
1753   /* Make sure that both DRs access the same pattern of bytes,
1754      with a constant length and step.  */
1755   poly_uint64 seg_len;
1756   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
1757       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
1758       || maybe_ne (dr_a.access_size, dr_b.access_size)
1759       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
1760       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
1761     return false;
1762 
1763   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
1764   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
1765   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
1766 
1767   /* See whether the target suports what we want to do.  WAW checks are
1768      equivalent to WAR checks here.  */
1769   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
1770 		     ? IFN_CHECK_RAW_PTRS
1771 		     : IFN_CHECK_WAR_PTRS);
1772   unsigned int align = MIN (dr_a.align, dr_b.align);
1773   poly_uint64 full_length = seg_len + bytes;
1774   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
1775 					   full_length, align))
1776     {
1777       full_length = seg_len + dr_a.access_size;
1778       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
1779 					       full_length, align))
1780 	return false;
1781     }
1782 
1783   /* Commit to using this form of test.  */
1784   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
1785   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
1786 
1787   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
1788   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
1789 
1790   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
1791 					     ifn, boolean_type_node,
1792 					     4, addr_a, addr_b,
1793 					     size_int (full_length),
1794 					     size_int (align));
1795 
1796   if (dump_enabled_p ())
1797     {
1798       if (ifn == IFN_CHECK_RAW_PTRS)
1799 	dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
1800       else
1801 	dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
1802     }
1803   return true;
1804 }
1805 
1806 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
1807    free of aliases, using a condition based on index values instead
1808    of a condition based on addresses.  Return true on success,
1809    storing the condition in *COND_EXPR.
1810 
1811    This can only be done if the two data references in ALIAS_PAIR access
1812    the same array object and the index is the only difference.  For example,
1813    if the two data references are DR_A and DR_B:
1814 
1815                        DR_A                           DR_B
1816       data-ref         arr[i]                         arr[j]
1817       base_object      arr                            arr
1818       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1819 
1820    The addresses and their index are like:
1821 
1822         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1823      ------------------------------------------------------->
1824         |   |   |   |   |          |   |   |   |   |
1825      ------------------------------------------------------->
1826         i_0 ...         i_0+4      j_0 ...         j_0+4
1827 
1828    We can create expression based on index rather than address:
1829 
1830      (unsigned) (i_0 - j_0 + 3) <= 6
1831 
1832    i.e. the indices are less than 4 apart.
1833 
1834    Note evolution step of index needs to be considered in comparison.  */
1835 
1836 static bool
create_intersect_range_checks_index(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)1837 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1838 				     const dr_with_seg_len_pair_t &alias_pair)
1839 {
1840   const dr_with_seg_len &dr_a = alias_pair.first;
1841   const dr_with_seg_len &dr_b = alias_pair.second;
1842   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
1843       || integer_zerop (DR_STEP (dr_a.dr))
1844       || integer_zerop (DR_STEP (dr_b.dr))
1845       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1846     return false;
1847 
1848   poly_uint64 seg_len1, seg_len2;
1849   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1850       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1851     return false;
1852 
1853   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1854     return false;
1855 
1856   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1857     return false;
1858 
1859   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1860     return false;
1861 
1862   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1863 
1864   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1865   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1866   if (neg_step)
1867     {
1868       abs_step = -abs_step;
1869       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
1870       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
1871     }
1872 
1873   /* Infer the number of iterations with which the memory segment is accessed
1874      by DR.  In other words, alias is checked if memory segment accessed by
1875      DR_A in some iterations intersect with memory segment accessed by DR_B
1876      in the same amount iterations.
1877      Note segnment length is a linear function of number of iterations with
1878      DR_STEP as the coefficient.  */
1879   poly_uint64 niter_len1, niter_len2;
1880   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1881       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1882     return false;
1883 
1884   /* Divide each access size by the byte step, rounding up.  */
1885   poly_uint64 niter_access1, niter_access2;
1886   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
1887 			abs_step, &niter_access1)
1888       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1889 			   abs_step, &niter_access2))
1890     return false;
1891 
1892   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
1893 
1894   int found = -1;
1895   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1896     {
1897       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1898       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1899       /* Two indices must be the same if they are not scev, or not scev wrto
1900 	 current loop being vecorized.  */
1901       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1902 	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
1903 	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
1904 	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1905 	{
1906 	  if (operand_equal_p (access1, access2, 0))
1907 	    continue;
1908 
1909 	  return false;
1910 	}
1911       if (found >= 0)
1912 	return false;
1913       found = i;
1914     }
1915 
1916   /* Ought not to happen in practice, since if all accesses are equal then the
1917      alias should be decidable at compile time.  */
1918   if (found < 0)
1919     return false;
1920 
1921   /* The two indices must have the same step.  */
1922   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
1923   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
1924   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1925     return false;
1926 
1927   tree idx_step = CHREC_RIGHT (access1);
1928   /* Index must have const step, otherwise DR_STEP won't be constant.  */
1929   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1930   /* Index must evaluate in the same direction as DR.  */
1931   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1932 
1933   tree min1 = CHREC_LEFT (access1);
1934   tree min2 = CHREC_LEFT (access2);
1935   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1936     return false;
1937 
1938   /* Ideally, alias can be checked against loop's control IV, but we
1939      need to prove linear mapping between control IV and reference
1940      index.  Although that should be true, we check against (array)
1941      index of data reference.  Like segment length, index length is
1942      linear function of the number of iterations with index_step as
1943      the coefficient, i.e, niter_len * idx_step.  */
1944   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
1945 					      SIGNED);
1946   if (neg_step)
1947     abs_idx_step = -abs_idx_step;
1948   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
1949   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
1950   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
1951   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
1952 
1953   gcc_assert (known_ge (idx_len1, 0)
1954 	      && known_ge (idx_len2, 0)
1955 	      && known_ge (idx_access1, 0)
1956 	      && known_ge (idx_access2, 0));
1957 
1958   /* Each access has the following pattern, with lengths measured
1959      in units of INDEX:
1960 
1961 	  <-- idx_len -->
1962 	  <--- A: -ve step --->
1963 	  +-----+-------+-----+-------+-----+
1964 	  | n-1 | ..... |  0  | ..... | n-1 |
1965 	  +-----+-------+-----+-------+-----+
1966 			<--- B: +ve step --->
1967 			<-- idx_len -->
1968 			|
1969 		       min
1970 
1971      where "n" is the number of scalar iterations covered by the segment
1972      and where each access spans idx_access units.
1973 
1974      A is the range of bytes accessed when the step is negative,
1975      B is the range when the step is positive.
1976 
1977      When checking for general overlap, we need to test whether
1978      the range:
1979 
1980        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
1981 
1982      overlaps:
1983 
1984        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
1985 
1986      where:
1987 
1988 	low_offsetN = +ve step ? 0 : -idx_lenN;
1989        high_offsetN = +ve step ? idx_lenN : 0;
1990 
1991      This is equivalent to testing whether:
1992 
1993        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
1994        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
1995 
1996      Converting this into a single test, there is an overlap if:
1997 
1998        0 <= min2 - min1 + bias <= limit
1999 
2000      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2001 	   limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2002 		 + (high_offset2 - low_offset2 + idx_access2 - 1)
2003       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2004 
2005      Combining the tests requires limit to be computable in an unsigned
2006      form of the index type; if it isn't, we fall back to the usual
2007      pointer-based checks.
2008 
2009      We can do better if DR_B is a write and if DR_A and DR_B are
2010      well-ordered in both the original and the new code (see the
2011      comment above the DR_ALIAS_* flags for details).  In this case
2012      we know that for each i in [0, n-1], the write performed by
2013      access i of DR_B occurs after access numbers j<=i of DR_A in
2014      both the original and the new code.  Any write or anti
2015      dependencies wrt those DR_A accesses are therefore maintained.
2016 
2017      We just need to make sure that each individual write in DR_B does not
2018      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2019      after the DR_B access in the original code but happen before it in
2020      the new code.
2021 
2022      We know the steps for both accesses are equal, so by induction, we
2023      just need to test whether the first write of DR_B overlaps a later
2024      access of DR_A.  In other words, we need to move min1 along by
2025      one iteration:
2026 
2027        min1' = min1 + idx_step
2028 
2029      and use the ranges:
2030 
2031        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2032 
2033      and:
2034 
2035        [min2, min2 + idx_access2 - 1]
2036 
2037      where:
2038 
2039 	low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2040        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2041   if (waw_or_war_p)
2042     idx_len1 -= abs_idx_step;
2043 
2044   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2045   if (!waw_or_war_p)
2046     limit += idx_len2;
2047 
2048   tree utype = unsigned_type_for (TREE_TYPE (min1));
2049   if (!wi::fits_to_tree_p (limit, utype))
2050     return false;
2051 
2052   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2053   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2054   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2055   /* Equivalent to adding IDX_STEP to MIN1.  */
2056   if (waw_or_war_p)
2057     bias -= wi::to_offset (idx_step);
2058 
2059   tree subject = fold_build2 (MINUS_EXPR, utype,
2060 			      fold_convert (utype, min2),
2061 			      fold_convert (utype, min1));
2062   subject = fold_build2 (PLUS_EXPR, utype, subject,
2063 			 wide_int_to_tree (utype, bias));
2064   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2065 				     wide_int_to_tree (utype, limit));
2066   if (*cond_expr)
2067     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2068 			      *cond_expr, part_cond_expr);
2069   else
2070     *cond_expr = part_cond_expr;
2071   if (dump_enabled_p ())
2072     {
2073       if (waw_or_war_p)
2074 	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2075       else
2076 	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2077     }
2078   return true;
2079 }
2080 
2081 /* A subroutine of create_intersect_range_checks, with a subset of the
2082    same arguments.  Try to optimize cases in which the second access
2083    is a write and in which some overlap is valid.  */
2084 
2085 static bool
create_waw_or_war_checks(tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2086 create_waw_or_war_checks (tree *cond_expr,
2087 			  const dr_with_seg_len_pair_t &alias_pair)
2088 {
2089   const dr_with_seg_len& dr_a = alias_pair.first;
2090   const dr_with_seg_len& dr_b = alias_pair.second;
2091 
2092   /* Check for cases in which:
2093 
2094      (a) DR_B is always a write;
2095      (b) the accesses are well-ordered in both the original and new code
2096 	 (see the comment above the DR_ALIAS_* flags for details); and
2097      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2098   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2099     return false;
2100 
2101   /* Check for equal (but possibly variable) steps.  */
2102   tree step = DR_STEP (dr_a.dr);
2103   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2104     return false;
2105 
2106   /* Make sure that we can operate on sizetype without loss of precision.  */
2107   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2108   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2109     return false;
2110 
2111   /* All addresses involved are known to have a common alignment ALIGN.
2112      We can therefore subtract ALIGN from an exclusive endpoint to get
2113      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2114      same as the access sizes of both DRs, and so subtracting ALIGN
2115      cancels out the addition of an access size.  */
2116   unsigned int align = MIN (dr_a.align, dr_b.align);
2117   poly_uint64 last_chunk_a = dr_a.access_size - align;
2118   poly_uint64 last_chunk_b = dr_b.access_size - align;
2119 
2120   /* Get a boolean expression that is true when the step is negative.  */
2121   tree indicator = dr_direction_indicator (dr_a.dr);
2122   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2123 			       fold_convert (ssizetype, indicator),
2124 			       ssize_int (0));
2125 
2126   /* Get lengths in sizetype.  */
2127   tree seg_len_a
2128     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2129   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2130 
2131   /* Each access has the following pattern:
2132 
2133 	  <- |seg_len| ->
2134 	  <--- A: -ve step --->
2135 	  +-----+-------+-----+-------+-----+
2136 	  | n-1 | ..... |  0  | ..... | n-1 |
2137 	  +-----+-------+-----+-------+-----+
2138 			<--- B: +ve step --->
2139 			<- |seg_len| ->
2140 			|
2141 		   base address
2142 
2143      where "n" is the number of scalar iterations covered by the segment.
2144 
2145      A is the range of bytes accessed when the step is negative,
2146      B is the range when the step is positive.
2147 
2148      We know that DR_B is a write.  We also know (from checking that
2149      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2150      the write performed by access i of DR_B occurs after access numbers
2151      j<=i of DR_A in both the original and the new code.  Any write or
2152      anti dependencies wrt those DR_A accesses are therefore maintained.
2153 
2154      We just need to make sure that each individual write in DR_B does not
2155      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2156      after the DR_B access in the original code but happen before it in
2157      the new code.
2158 
2159      We know the steps for both accesses are equal, so by induction, we
2160      just need to test whether the first write of DR_B overlaps a later
2161      access of DR_A.  In other words, we need to move addr_a along by
2162      one iteration:
2163 
2164        addr_a' = addr_a + step
2165 
2166      and check whether:
2167 
2168        [addr_b, addr_b + last_chunk_b]
2169 
2170      overlaps:
2171 
2172        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2173 
2174      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2175 
2176 	low_offset_a = +ve step ? 0 : seg_len_a - step
2177        high_offset_a = +ve step ? seg_len_a - step : 0
2178 
2179      This is equivalent to testing whether:
2180 
2181        addr_a' + low_offset_a <= addr_b + last_chunk_b
2182        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2183 
2184      Converting this into a single test, there is an overlap if:
2185 
2186        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2187 
2188      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2189 
2190      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2191      less than the size of the object underlying DR_A.  We also know
2192      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2193      guaranteed at compile time.  There can therefore be no overflow if
2194      "limit" is calculated in an unsigned type with pointer precision.  */
2195   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2196 					 DR_OFFSET (dr_a.dr));
2197   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2198 
2199   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2200 					 DR_OFFSET (dr_b.dr));
2201   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2202 
2203   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2204   addr_a = fold_build_pointer_plus (addr_a, step);
2205   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2206 					   seg_len_a, step);
2207   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2208     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2209 
2210   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2211 				   seg_len_a_minus_step, size_zero_node);
2212   if (!CONSTANT_CLASS_P (low_offset_a))
2213     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2214 
2215   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2216      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2217   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2218 				    low_offset_a);
2219 
2220   /* The amount added to addr_b - addr_a'.  */
2221   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2222 			   size_int (last_chunk_b), low_offset_a);
2223 
2224   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2225   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2226 		       size_int (last_chunk_a + last_chunk_b));
2227 
2228   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2229   subject = fold_build2 (PLUS_EXPR, sizetype,
2230 			 fold_convert (sizetype, subject), bias);
2231 
2232   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2233   if (dump_enabled_p ())
2234     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2235   return true;
2236 }
2237 
2238 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2239    every address ADDR accessed by D:
2240 
2241      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2242 
2243    In this case, every element accessed by D is aligned to at least
2244    ALIGN bytes.
2245 
2246    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2247 
2248      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2249 
2250 static void
get_segment_min_max(const dr_with_seg_len & d,tree * seg_min_out,tree * seg_max_out,HOST_WIDE_INT align)2251 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2252 		     tree *seg_max_out, HOST_WIDE_INT align)
2253 {
2254   /* Each access has the following pattern:
2255 
2256 	  <- |seg_len| ->
2257 	  <--- A: -ve step --->
2258 	  +-----+-------+-----+-------+-----+
2259 	  | n-1 | ,.... |  0  | ..... | n-1 |
2260 	  +-----+-------+-----+-------+-----+
2261 			<--- B: +ve step --->
2262 			<- |seg_len| ->
2263 			|
2264 		   base address
2265 
2266      where "n" is the number of scalar iterations covered by the segment.
2267      (This should be VF for a particular pair if we know that both steps
2268      are the same, otherwise it will be the full number of scalar loop
2269      iterations.)
2270 
2271      A is the range of bytes accessed when the step is negative,
2272      B is the range when the step is positive.
2273 
2274      If the access size is "access_size" bytes, the lowest addressed byte is:
2275 
2276 	 base + (step < 0 ? seg_len : 0)   [LB]
2277 
2278      and the highest addressed byte is always below:
2279 
2280 	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2281 
2282      Thus:
2283 
2284 	 LB <= ADDR < UB
2285 
2286      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2287      bytes, so:
2288 
2289 	 LB <= ADDR <= UB - ALIGN
2290 
2291      where "- ALIGN" folds naturally with the "+ access_size" and often
2292      cancels it out.
2293 
2294      We don't try to simplify LB and UB beyond this (e.g. by using
2295      MIN and MAX based on whether seg_len rather than the stride is
2296      negative) because it is possible for the absolute size of the
2297      segment to overflow the range of a ssize_t.
2298 
2299      Keeping the pointer_plus outside of the cond_expr should allow
2300      the cond_exprs to be shared with other alias checks.  */
2301   tree indicator = dr_direction_indicator (d.dr);
2302   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2303 			       fold_convert (ssizetype, indicator),
2304 			       ssize_int (0));
2305   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2306 					    DR_OFFSET (d.dr));
2307   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2308   tree seg_len
2309     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2310 
2311   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2312 				seg_len, size_zero_node);
2313   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2314 				size_zero_node, seg_len);
2315   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2316 			   size_int (d.access_size - align));
2317 
2318   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2319   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2320 }
2321 
2322 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2323    storing the condition in *COND_EXPR.  The fallback is to generate a
2324    a test that the two accesses do not overlap:
2325 
2326      end_a <= start_b || end_b <= start_a.  */
2327 
2328 static void
create_intersect_range_checks(class loop * loop,tree * cond_expr,const dr_with_seg_len_pair_t & alias_pair)2329 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2330 			       const dr_with_seg_len_pair_t &alias_pair)
2331 {
2332   const dr_with_seg_len& dr_a = alias_pair.first;
2333   const dr_with_seg_len& dr_b = alias_pair.second;
2334   *cond_expr = NULL_TREE;
2335   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2336     return;
2337 
2338   if (create_ifn_alias_checks (cond_expr, alias_pair))
2339     return;
2340 
2341   if (create_waw_or_war_checks (cond_expr, alias_pair))
2342     return;
2343 
2344   unsigned HOST_WIDE_INT min_align;
2345   tree_code cmp_code;
2346   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2347      are equivalent.  This is just an optimization heuristic.  */
2348   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2349       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2350     {
2351       /* In this case adding access_size to seg_len is likely to give
2352 	 a simple X * step, where X is either the number of scalar
2353 	 iterations or the vectorization factor.  We're better off
2354 	 keeping that, rather than subtracting an alignment from it.
2355 
2356 	 In this case the maximum values are exclusive and so there is
2357 	 no alias if the maximum of one segment equals the minimum
2358 	 of another.  */
2359       min_align = 0;
2360       cmp_code = LE_EXPR;
2361     }
2362   else
2363     {
2364       /* Calculate the minimum alignment shared by all four pointers,
2365 	 then arrange for this alignment to be subtracted from the
2366 	 exclusive maximum values to get inclusive maximum values.
2367 	 This "- min_align" is cumulative with a "+ access_size"
2368 	 in the calculation of the maximum values.  In the best
2369 	 (and common) case, the two cancel each other out, leaving
2370 	 us with an inclusive bound based only on seg_len.  In the
2371 	 worst case we're simply adding a smaller number than before.
2372 
2373 	 Because the maximum values are inclusive, there is an alias
2374 	 if the maximum value of one segment is equal to the minimum
2375 	 value of the other.  */
2376       min_align = MIN (dr_a.align, dr_b.align);
2377       cmp_code = LT_EXPR;
2378     }
2379 
2380   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2381   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2382   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2383 
2384   *cond_expr
2385     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2386 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2387 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2388   if (dump_enabled_p ())
2389     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2390 }
2391 
2392 /* Create a conditional expression that represents the run-time checks for
2393    overlapping of address ranges represented by a list of data references
2394    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2395    COND_EXPR is the conditional expression to be used in the if statement
2396    that controls which version of the loop gets executed at runtime.  */
2397 
2398 void
create_runtime_alias_checks(class loop * loop,vec<dr_with_seg_len_pair_t> * alias_pairs,tree * cond_expr)2399 create_runtime_alias_checks (class loop *loop,
2400 			     vec<dr_with_seg_len_pair_t> *alias_pairs,
2401 			     tree * cond_expr)
2402 {
2403   tree part_cond_expr;
2404 
2405   fold_defer_overflow_warnings ();
2406   dr_with_seg_len_pair_t *alias_pair;
2407   unsigned int i;
2408   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
2409     {
2410       gcc_assert (alias_pair->flags);
2411       if (dump_enabled_p ())
2412 	dump_printf (MSG_NOTE,
2413 		     "create runtime check for data references %T and %T\n",
2414 		     DR_REF (alias_pair->first.dr),
2415 		     DR_REF (alias_pair->second.dr));
2416 
2417       /* Create condition expression for each pair data references.  */
2418       create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
2419       if (*cond_expr)
2420 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2421 				  *cond_expr, part_cond_expr);
2422       else
2423 	*cond_expr = part_cond_expr;
2424     }
2425   fold_undefer_and_ignore_overflow_warnings ();
2426 }
2427 
2428 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2429    expressions.  */
2430 static bool
dr_equal_offsets_p1(tree offset1,tree offset2)2431 dr_equal_offsets_p1 (tree offset1, tree offset2)
2432 {
2433   bool res;
2434 
2435   STRIP_NOPS (offset1);
2436   STRIP_NOPS (offset2);
2437 
2438   if (offset1 == offset2)
2439     return true;
2440 
2441   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2442       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2443     return false;
2444 
2445   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2446                              TREE_OPERAND (offset2, 0));
2447 
2448   if (!res || !BINARY_CLASS_P (offset1))
2449     return res;
2450 
2451   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2452                              TREE_OPERAND (offset2, 1));
2453 
2454   return res;
2455 }
2456 
2457 /* Check if DRA and DRB have equal offsets.  */
2458 bool
dr_equal_offsets_p(struct data_reference * dra,struct data_reference * drb)2459 dr_equal_offsets_p (struct data_reference *dra,
2460                     struct data_reference *drb)
2461 {
2462   tree offset1, offset2;
2463 
2464   offset1 = DR_OFFSET (dra);
2465   offset2 = DR_OFFSET (drb);
2466 
2467   return dr_equal_offsets_p1 (offset1, offset2);
2468 }
2469 
2470 /* Returns true if FNA == FNB.  */
2471 
2472 static bool
affine_function_equal_p(affine_fn fna,affine_fn fnb)2473 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2474 {
2475   unsigned i, n = fna.length ();
2476 
2477   if (n != fnb.length ())
2478     return false;
2479 
2480   for (i = 0; i < n; i++)
2481     if (!operand_equal_p (fna[i], fnb[i], 0))
2482       return false;
2483 
2484   return true;
2485 }
2486 
2487 /* If all the functions in CF are the same, returns one of them,
2488    otherwise returns NULL.  */
2489 
2490 static affine_fn
common_affine_function(conflict_function * cf)2491 common_affine_function (conflict_function *cf)
2492 {
2493   unsigned i;
2494   affine_fn comm;
2495 
2496   if (!CF_NONTRIVIAL_P (cf))
2497     return affine_fn ();
2498 
2499   comm = cf->fns[0];
2500 
2501   for (i = 1; i < cf->n; i++)
2502     if (!affine_function_equal_p (comm, cf->fns[i]))
2503       return affine_fn ();
2504 
2505   return comm;
2506 }
2507 
2508 /* Returns the base of the affine function FN.  */
2509 
2510 static tree
affine_function_base(affine_fn fn)2511 affine_function_base (affine_fn fn)
2512 {
2513   return fn[0];
2514 }
2515 
2516 /* Returns true if FN is a constant.  */
2517 
2518 static bool
affine_function_constant_p(affine_fn fn)2519 affine_function_constant_p (affine_fn fn)
2520 {
2521   unsigned i;
2522   tree coef;
2523 
2524   for (i = 1; fn.iterate (i, &coef); i++)
2525     if (!integer_zerop (coef))
2526       return false;
2527 
2528   return true;
2529 }
2530 
2531 /* Returns true if FN is the zero constant function.  */
2532 
2533 static bool
affine_function_zero_p(affine_fn fn)2534 affine_function_zero_p (affine_fn fn)
2535 {
2536   return (integer_zerop (affine_function_base (fn))
2537 	  && affine_function_constant_p (fn));
2538 }
2539 
2540 /* Returns a signed integer type with the largest precision from TA
2541    and TB.  */
2542 
2543 static tree
signed_type_for_types(tree ta,tree tb)2544 signed_type_for_types (tree ta, tree tb)
2545 {
2546   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2547     return signed_type_for (ta);
2548   else
2549     return signed_type_for (tb);
2550 }
2551 
2552 /* Applies operation OP on affine functions FNA and FNB, and returns the
2553    result.  */
2554 
2555 static affine_fn
affine_fn_op(enum tree_code op,affine_fn fna,affine_fn fnb)2556 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2557 {
2558   unsigned i, n, m;
2559   affine_fn ret;
2560   tree coef;
2561 
2562   if (fnb.length () > fna.length ())
2563     {
2564       n = fna.length ();
2565       m = fnb.length ();
2566     }
2567   else
2568     {
2569       n = fnb.length ();
2570       m = fna.length ();
2571     }
2572 
2573   ret.create (m);
2574   for (i = 0; i < n; i++)
2575     {
2576       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2577 					 TREE_TYPE (fnb[i]));
2578       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2579     }
2580 
2581   for (; fna.iterate (i, &coef); i++)
2582     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2583 				 coef, integer_zero_node));
2584   for (; fnb.iterate (i, &coef); i++)
2585     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2586 				 integer_zero_node, coef));
2587 
2588   return ret;
2589 }
2590 
2591 /* Returns the sum of affine functions FNA and FNB.  */
2592 
2593 static affine_fn
affine_fn_plus(affine_fn fna,affine_fn fnb)2594 affine_fn_plus (affine_fn fna, affine_fn fnb)
2595 {
2596   return affine_fn_op (PLUS_EXPR, fna, fnb);
2597 }
2598 
2599 /* Returns the difference of affine functions FNA and FNB.  */
2600 
2601 static affine_fn
affine_fn_minus(affine_fn fna,affine_fn fnb)2602 affine_fn_minus (affine_fn fna, affine_fn fnb)
2603 {
2604   return affine_fn_op (MINUS_EXPR, fna, fnb);
2605 }
2606 
2607 /* Frees affine function FN.  */
2608 
2609 static void
affine_fn_free(affine_fn fn)2610 affine_fn_free (affine_fn fn)
2611 {
2612   fn.release ();
2613 }
2614 
2615 /* Determine for each subscript in the data dependence relation DDR
2616    the distance.  */
2617 
2618 static void
compute_subscript_distance(struct data_dependence_relation * ddr)2619 compute_subscript_distance (struct data_dependence_relation *ddr)
2620 {
2621   conflict_function *cf_a, *cf_b;
2622   affine_fn fn_a, fn_b, diff;
2623 
2624   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2625     {
2626       unsigned int i;
2627 
2628       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2629  	{
2630  	  struct subscript *subscript;
2631 
2632  	  subscript = DDR_SUBSCRIPT (ddr, i);
2633  	  cf_a = SUB_CONFLICTS_IN_A (subscript);
2634  	  cf_b = SUB_CONFLICTS_IN_B (subscript);
2635 
2636 	  fn_a = common_affine_function (cf_a);
2637 	  fn_b = common_affine_function (cf_b);
2638 	  if (!fn_a.exists () || !fn_b.exists ())
2639 	    {
2640 	      SUB_DISTANCE (subscript) = chrec_dont_know;
2641 	      return;
2642 	    }
2643 	  diff = affine_fn_minus (fn_a, fn_b);
2644 
2645  	  if (affine_function_constant_p (diff))
2646  	    SUB_DISTANCE (subscript) = affine_function_base (diff);
2647  	  else
2648  	    SUB_DISTANCE (subscript) = chrec_dont_know;
2649 
2650 	  affine_fn_free (diff);
2651  	}
2652     }
2653 }
2654 
2655 /* Returns the conflict function for "unknown".  */
2656 
2657 static conflict_function *
conflict_fn_not_known(void)2658 conflict_fn_not_known (void)
2659 {
2660   conflict_function *fn = XCNEW (conflict_function);
2661   fn->n = NOT_KNOWN;
2662 
2663   return fn;
2664 }
2665 
2666 /* Returns the conflict function for "independent".  */
2667 
2668 static conflict_function *
conflict_fn_no_dependence(void)2669 conflict_fn_no_dependence (void)
2670 {
2671   conflict_function *fn = XCNEW (conflict_function);
2672   fn->n = NO_DEPENDENCE;
2673 
2674   return fn;
2675 }
2676 
2677 /* Returns true if the address of OBJ is invariant in LOOP.  */
2678 
2679 static bool
object_address_invariant_in_loop_p(const class loop * loop,const_tree obj)2680 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2681 {
2682   while (handled_component_p (obj))
2683     {
2684       if (TREE_CODE (obj) == ARRAY_REF)
2685 	{
2686 	  for (int i = 1; i < 4; ++i)
2687 	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2688 							loop->num))
2689 	      return false;
2690 	}
2691       else if (TREE_CODE (obj) == COMPONENT_REF)
2692 	{
2693 	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2694 						      loop->num))
2695 	    return false;
2696 	}
2697       obj = TREE_OPERAND (obj, 0);
2698     }
2699 
2700   if (!INDIRECT_REF_P (obj)
2701       && TREE_CODE (obj) != MEM_REF)
2702     return true;
2703 
2704   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2705 						  loop->num);
2706 }
2707 
2708 /* Returns false if we can prove that data references A and B do not alias,
2709    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2710    considered.  */
2711 
2712 bool
dr_may_alias_p(const struct data_reference * a,const struct data_reference * b,class loop * loop_nest)2713 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2714 		class loop *loop_nest)
2715 {
2716   tree addr_a = DR_BASE_OBJECT (a);
2717   tree addr_b = DR_BASE_OBJECT (b);
2718 
2719   /* If we are not processing a loop nest but scalar code we
2720      do not need to care about possible cross-iteration dependences
2721      and thus can process the full original reference.  Do so,
2722      similar to how loop invariant motion applies extra offset-based
2723      disambiguation.  */
2724   if (!loop_nest)
2725     {
2726       aff_tree off1, off2;
2727       poly_widest_int size1, size2;
2728       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2729       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2730       aff_combination_scale (&off1, -1);
2731       aff_combination_add (&off2, &off1);
2732       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2733 	return false;
2734     }
2735 
2736   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2737       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2738       /* For cross-iteration dependences the cliques must be valid for the
2739 	 whole loop, not just individual iterations.  */
2740       && (!loop_nest
2741 	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2742 	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2743       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2744       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2745     return false;
2746 
2747   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2748      do not know the size of the base-object.  So we cannot do any
2749      offset/overlap based analysis but have to rely on points-to
2750      information only.  */
2751   if (TREE_CODE (addr_a) == MEM_REF
2752       && (DR_UNCONSTRAINED_BASE (a)
2753 	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2754     {
2755       /* For true dependences we can apply TBAA.  */
2756       if (flag_strict_aliasing
2757 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
2758 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2759 				     get_alias_set (DR_REF (b))))
2760 	return false;
2761       if (TREE_CODE (addr_b) == MEM_REF)
2762 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2763 				       TREE_OPERAND (addr_b, 0));
2764       else
2765 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2766 				       build_fold_addr_expr (addr_b));
2767     }
2768   else if (TREE_CODE (addr_b) == MEM_REF
2769 	   && (DR_UNCONSTRAINED_BASE (b)
2770 	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2771     {
2772       /* For true dependences we can apply TBAA.  */
2773       if (flag_strict_aliasing
2774 	  && DR_IS_WRITE (a) && DR_IS_READ (b)
2775 	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2776 				     get_alias_set (DR_REF (b))))
2777 	return false;
2778       if (TREE_CODE (addr_a) == MEM_REF)
2779 	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2780 				       TREE_OPERAND (addr_b, 0));
2781       else
2782 	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2783 				       TREE_OPERAND (addr_b, 0));
2784     }
2785 
2786   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2787      that is being subsetted in the loop nest.  */
2788   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2789     return refs_output_dependent_p (addr_a, addr_b);
2790   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2791     return refs_anti_dependent_p (addr_a, addr_b);
2792   return refs_may_alias_p (addr_a, addr_b);
2793 }
2794 
2795 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2796    if it is meaningful to compare their associated access functions
2797    when checking for dependencies.  */
2798 
2799 static bool
access_fn_components_comparable_p(tree ref_a,tree ref_b)2800 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2801 {
2802   /* Allow pairs of component refs from the following sets:
2803 
2804        { REALPART_EXPR, IMAGPART_EXPR }
2805        { COMPONENT_REF }
2806        { ARRAY_REF }.  */
2807   tree_code code_a = TREE_CODE (ref_a);
2808   tree_code code_b = TREE_CODE (ref_b);
2809   if (code_a == IMAGPART_EXPR)
2810     code_a = REALPART_EXPR;
2811   if (code_b == IMAGPART_EXPR)
2812     code_b = REALPART_EXPR;
2813   if (code_a != code_b)
2814     return false;
2815 
2816   if (TREE_CODE (ref_a) == COMPONENT_REF)
2817     /* ??? We cannot simply use the type of operand #0 of the refs here as
2818        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2819        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2820     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2821 	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2822 
2823   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2824 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2825 }
2826 
2827 /* Initialize a data dependence relation between data accesses A and
2828    B.  NB_LOOPS is the number of loops surrounding the references: the
2829    size of the classic distance/direction vectors.  */
2830 
2831 struct data_dependence_relation *
initialize_data_dependence_relation(struct data_reference * a,struct data_reference * b,vec<loop_p> loop_nest)2832 initialize_data_dependence_relation (struct data_reference *a,
2833 				     struct data_reference *b,
2834  				     vec<loop_p> loop_nest)
2835 {
2836   struct data_dependence_relation *res;
2837   unsigned int i;
2838 
2839   res = XCNEW (struct data_dependence_relation);
2840   DDR_A (res) = a;
2841   DDR_B (res) = b;
2842   DDR_LOOP_NEST (res).create (0);
2843   DDR_SUBSCRIPTS (res).create (0);
2844   DDR_DIR_VECTS (res).create (0);
2845   DDR_DIST_VECTS (res).create (0);
2846 
2847   if (a == NULL || b == NULL)
2848     {
2849       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2850       return res;
2851     }
2852 
2853   /* If the data references do not alias, then they are independent.  */
2854   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2855     {
2856       DDR_ARE_DEPENDENT (res) = chrec_known;
2857       return res;
2858     }
2859 
2860   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2861   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2862   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2863     {
2864       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2865       return res;
2866     }
2867 
2868   /* For unconstrained bases, the root (highest-indexed) subscript
2869      describes a variation in the base of the original DR_REF rather
2870      than a component access.  We have no type that accurately describes
2871      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2872      applying this subscript) so limit the search to the last real
2873      component access.
2874 
2875      E.g. for:
2876 
2877 	void
2878 	f (int a[][8], int b[][8])
2879 	{
2880 	  for (int i = 0; i < 8; ++i)
2881 	    a[i * 2][0] = b[i][0];
2882 	}
2883 
2884      the a and b accesses have a single ARRAY_REF component reference [0]
2885      but have two subscripts.  */
2886   if (DR_UNCONSTRAINED_BASE (a))
2887     num_dimensions_a -= 1;
2888   if (DR_UNCONSTRAINED_BASE (b))
2889     num_dimensions_b -= 1;
2890 
2891   /* These structures describe sequences of component references in
2892      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2893      specific access function.  */
2894   struct {
2895     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2896        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2897        indices.  In C notation, these are the indices of the rightmost
2898        component references; e.g. for a sequence .b.c.d, the start
2899        index is for .d.  */
2900     unsigned int start_a;
2901     unsigned int start_b;
2902 
2903     /* The sequence contains LENGTH consecutive access functions from
2904        each DR.  */
2905     unsigned int length;
2906 
2907     /* The enclosing objects for the A and B sequences respectively,
2908        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2909        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2910     tree object_a;
2911     tree object_b;
2912   } full_seq = {}, struct_seq = {};
2913 
2914   /* Before each iteration of the loop:
2915 
2916      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2917      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2918   unsigned int index_a = 0;
2919   unsigned int index_b = 0;
2920   tree ref_a = DR_REF (a);
2921   tree ref_b = DR_REF (b);
2922 
2923   /* Now walk the component references from the final DR_REFs back up to
2924      the enclosing base objects.  Each component reference corresponds
2925      to one access function in the DR, with access function 0 being for
2926      the final DR_REF and the highest-indexed access function being the
2927      one that is applied to the base of the DR.
2928 
2929      Look for a sequence of component references whose access functions
2930      are comparable (see access_fn_components_comparable_p).  If more
2931      than one such sequence exists, pick the one nearest the base
2932      (which is the leftmost sequence in C notation).  Store this sequence
2933      in FULL_SEQ.
2934 
2935      For example, if we have:
2936 
2937 	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2938 
2939 	A: a[0][i].s.c.d
2940 	B: __real b[0][i].s.e[i].f
2941 
2942      (where d is the same type as the real component of f) then the access
2943      functions would be:
2944 
2945 			 0   1   2   3
2946 	A:              .d  .c  .s [i]
2947 
2948 		 0   1   2   3   4   5
2949 	B:  __real  .f [i]  .e  .s [i]
2950 
2951      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2952      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2953      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2954      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2955      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2956      index foo[10] arrays, so is again comparable.  The sequence is
2957      therefore:
2958 
2959         A: [1, 3]  (i.e. [i].s.c)
2960         B: [3, 5]  (i.e. [i].s.e)
2961 
2962      Also look for sequences of component references whose access
2963      functions are comparable and whose enclosing objects have the same
2964      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2965      example, STRUCT_SEQ would be:
2966 
2967         A: [1, 2]  (i.e. s.c)
2968         B: [3, 4]  (i.e. s.e)  */
2969   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2970     {
2971       /* REF_A and REF_B must be one of the component access types
2972 	 allowed by dr_analyze_indices.  */
2973       gcc_checking_assert (access_fn_component_p (ref_a));
2974       gcc_checking_assert (access_fn_component_p (ref_b));
2975 
2976       /* Get the immediately-enclosing objects for REF_A and REF_B,
2977 	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2978 	 and DR_ACCESS_FN (B, INDEX_B).  */
2979       tree object_a = TREE_OPERAND (ref_a, 0);
2980       tree object_b = TREE_OPERAND (ref_b, 0);
2981 
2982       tree type_a = TREE_TYPE (object_a);
2983       tree type_b = TREE_TYPE (object_b);
2984       if (access_fn_components_comparable_p (ref_a, ref_b))
2985 	{
2986 	  /* This pair of component accesses is comparable for dependence
2987 	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2988 	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2989 	  if (full_seq.start_a + full_seq.length != index_a
2990 	      || full_seq.start_b + full_seq.length != index_b)
2991 	    {
2992 	      /* The accesses don't extend the current sequence,
2993 		 so start a new one here.  */
2994 	      full_seq.start_a = index_a;
2995 	      full_seq.start_b = index_b;
2996 	      full_seq.length = 0;
2997 	    }
2998 
2999 	  /* Add this pair of references to the sequence.  */
3000 	  full_seq.length += 1;
3001 	  full_seq.object_a = object_a;
3002 	  full_seq.object_b = object_b;
3003 
3004 	  /* If the enclosing objects are structures (and thus have the
3005 	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3006 	  if (TREE_CODE (type_a) == RECORD_TYPE)
3007 	    struct_seq = full_seq;
3008 
3009 	  /* Move to the next containing reference for both A and B.  */
3010 	  ref_a = object_a;
3011 	  ref_b = object_b;
3012 	  index_a += 1;
3013 	  index_b += 1;
3014 	  continue;
3015 	}
3016 
3017       /* Try to approach equal type sizes.  */
3018       if (!COMPLETE_TYPE_P (type_a)
3019 	  || !COMPLETE_TYPE_P (type_b)
3020 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3021 	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3022 	break;
3023 
3024       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3025       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3026       if (size_a <= size_b)
3027 	{
3028 	  index_a += 1;
3029 	  ref_a = object_a;
3030 	}
3031       if (size_b <= size_a)
3032 	{
3033 	  index_b += 1;
3034 	  ref_b = object_b;
3035 	}
3036     }
3037 
3038   /* See whether FULL_SEQ ends at the base and whether the two bases
3039      are equal.  We do not care about TBAA or alignment info so we can
3040      use OEP_ADDRESS_OF to avoid false negatives.  */
3041   tree base_a = DR_BASE_OBJECT (a);
3042   tree base_b = DR_BASE_OBJECT (b);
3043   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3044 		      && full_seq.start_b + full_seq.length == num_dimensions_b
3045 		      && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
3046 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3047 		      && types_compatible_p (TREE_TYPE (base_a),
3048 					     TREE_TYPE (base_b))
3049 		      && (!loop_nest.exists ()
3050 			  || (object_address_invariant_in_loop_p
3051 			      (loop_nest[0], base_a))));
3052 
3053   /* If the bases are the same, we can include the base variation too.
3054      E.g. the b accesses in:
3055 
3056        for (int i = 0; i < n; ++i)
3057          b[i + 4][0] = b[i][0];
3058 
3059      have a definite dependence distance of 4, while for:
3060 
3061        for (int i = 0; i < n; ++i)
3062          a[i + 4][0] = b[i][0];
3063 
3064      the dependence distance depends on the gap between a and b.
3065 
3066      If the bases are different then we can only rely on the sequence
3067      rooted at a structure access, since arrays are allowed to overlap
3068      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3069      valid code:
3070 
3071        int a[256];
3072        ...
3073        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3074 
3075      where two lvalues with the same int[4][3] type overlap, and where
3076      both lvalues are distinct from the object's declared type.  */
3077   if (same_base_p)
3078     {
3079       if (DR_UNCONSTRAINED_BASE (a))
3080 	full_seq.length += 1;
3081     }
3082   else
3083     full_seq = struct_seq;
3084 
3085   /* Punt if we didn't find a suitable sequence.  */
3086   if (full_seq.length == 0)
3087     {
3088       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3089       return res;
3090     }
3091 
3092   if (!same_base_p)
3093     {
3094       /* Partial overlap is possible for different bases when strict aliasing
3095 	 is not in effect.  It's also possible if either base involves a union
3096 	 access; e.g. for:
3097 
3098 	   struct s1 { int a[2]; };
3099 	   struct s2 { struct s1 b; int c; };
3100 	   struct s3 { int d; struct s1 e; };
3101 	   union u { struct s2 f; struct s3 g; } *p, *q;
3102 
3103 	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3104 	 "p->g.e" (base "p->g") and might partially overlap the s1 at
3105 	 "q->g.e" (base "q->g").  */
3106       if (!flag_strict_aliasing
3107 	  || ref_contains_union_access_p (full_seq.object_a)
3108 	  || ref_contains_union_access_p (full_seq.object_b))
3109 	{
3110 	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3111 	  return res;
3112 	}
3113 
3114       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3115       if (!loop_nest.exists ()
3116 	  || (object_address_invariant_in_loop_p (loop_nest[0],
3117 						  full_seq.object_a)
3118 	      && object_address_invariant_in_loop_p (loop_nest[0],
3119 						     full_seq.object_b)))
3120 	{
3121 	  DDR_OBJECT_A (res) = full_seq.object_a;
3122 	  DDR_OBJECT_B (res) = full_seq.object_b;
3123 	}
3124     }
3125 
3126   DDR_AFFINE_P (res) = true;
3127   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3128   DDR_SUBSCRIPTS (res).create (full_seq.length);
3129   DDR_LOOP_NEST (res) = loop_nest;
3130   DDR_SELF_REFERENCE (res) = false;
3131 
3132   for (i = 0; i < full_seq.length; ++i)
3133     {
3134       struct subscript *subscript;
3135 
3136       subscript = XNEW (struct subscript);
3137       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
3138       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
3139       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3140       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3141       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3142       SUB_DISTANCE (subscript) = chrec_dont_know;
3143       DDR_SUBSCRIPTS (res).safe_push (subscript);
3144     }
3145 
3146   return res;
3147 }
3148 
3149 /* Frees memory used by the conflict function F.  */
3150 
3151 static void
free_conflict_function(conflict_function * f)3152 free_conflict_function (conflict_function *f)
3153 {
3154   unsigned i;
3155 
3156   if (CF_NONTRIVIAL_P (f))
3157     {
3158       for (i = 0; i < f->n; i++)
3159 	affine_fn_free (f->fns[i]);
3160     }
3161   free (f);
3162 }
3163 
3164 /* Frees memory used by SUBSCRIPTS.  */
3165 
3166 static void
free_subscripts(vec<subscript_p> subscripts)3167 free_subscripts (vec<subscript_p> subscripts)
3168 {
3169   unsigned i;
3170   subscript_p s;
3171 
3172   FOR_EACH_VEC_ELT (subscripts, i, s)
3173     {
3174       free_conflict_function (s->conflicting_iterations_in_a);
3175       free_conflict_function (s->conflicting_iterations_in_b);
3176       free (s);
3177     }
3178   subscripts.release ();
3179 }
3180 
3181 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3182    description.  */
3183 
3184 static inline void
finalize_ddr_dependent(struct data_dependence_relation * ddr,tree chrec)3185 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3186 			tree chrec)
3187 {
3188   DDR_ARE_DEPENDENT (ddr) = chrec;
3189   free_subscripts (DDR_SUBSCRIPTS (ddr));
3190   DDR_SUBSCRIPTS (ddr).create (0);
3191 }
3192 
3193 /* The dependence relation DDR cannot be represented by a distance
3194    vector.  */
3195 
3196 static inline void
non_affine_dependence_relation(struct data_dependence_relation * ddr)3197 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3198 {
3199   if (dump_file && (dump_flags & TDF_DETAILS))
3200     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3201 
3202   DDR_AFFINE_P (ddr) = false;
3203 }
3204 
3205 
3206 
3207 /* This section contains the classic Banerjee tests.  */
3208 
3209 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3210    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3211 
3212 static inline bool
ziv_subscript_p(const_tree chrec_a,const_tree chrec_b)3213 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3214 {
3215   return (evolution_function_is_constant_p (chrec_a)
3216 	  && evolution_function_is_constant_p (chrec_b));
3217 }
3218 
3219 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3220    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3221 
3222 static bool
siv_subscript_p(const_tree chrec_a,const_tree chrec_b)3223 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3224 {
3225   if ((evolution_function_is_constant_p (chrec_a)
3226        && evolution_function_is_univariate_p (chrec_b))
3227       || (evolution_function_is_constant_p (chrec_b)
3228 	  && evolution_function_is_univariate_p (chrec_a)))
3229     return true;
3230 
3231   if (evolution_function_is_univariate_p (chrec_a)
3232       && evolution_function_is_univariate_p (chrec_b))
3233     {
3234       switch (TREE_CODE (chrec_a))
3235 	{
3236 	case POLYNOMIAL_CHREC:
3237 	  switch (TREE_CODE (chrec_b))
3238 	    {
3239 	    case POLYNOMIAL_CHREC:
3240 	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3241 		return false;
3242 	      /* FALLTHRU */
3243 
3244 	    default:
3245 	      return true;
3246 	    }
3247 
3248 	default:
3249 	  return true;
3250 	}
3251     }
3252 
3253   return false;
3254 }
3255 
3256 /* Creates a conflict function with N dimensions.  The affine functions
3257    in each dimension follow.  */
3258 
3259 static conflict_function *
conflict_fn(unsigned n,...)3260 conflict_fn (unsigned n, ...)
3261 {
3262   unsigned i;
3263   conflict_function *ret = XCNEW (conflict_function);
3264   va_list ap;
3265 
3266   gcc_assert (n > 0 && n <= MAX_DIM);
3267   va_start (ap, n);
3268 
3269   ret->n = n;
3270   for (i = 0; i < n; i++)
3271     ret->fns[i] = va_arg (ap, affine_fn);
3272   va_end (ap);
3273 
3274   return ret;
3275 }
3276 
3277 /* Returns constant affine function with value CST.  */
3278 
3279 static affine_fn
affine_fn_cst(tree cst)3280 affine_fn_cst (tree cst)
3281 {
3282   affine_fn fn;
3283   fn.create (1);
3284   fn.quick_push (cst);
3285   return fn;
3286 }
3287 
3288 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3289 
3290 static affine_fn
affine_fn_univar(tree cst,unsigned dim,tree coef)3291 affine_fn_univar (tree cst, unsigned dim, tree coef)
3292 {
3293   affine_fn fn;
3294   fn.create (dim + 1);
3295   unsigned i;
3296 
3297   gcc_assert (dim > 0);
3298   fn.quick_push (cst);
3299   for (i = 1; i < dim; i++)
3300     fn.quick_push (integer_zero_node);
3301   fn.quick_push (coef);
3302   return fn;
3303 }
3304 
3305 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3306    *OVERLAPS_B are initialized to the functions that describe the
3307    relation between the elements accessed twice by CHREC_A and
3308    CHREC_B.  For k >= 0, the following property is verified:
3309 
3310    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3311 
3312 static void
analyze_ziv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3313 analyze_ziv_subscript (tree chrec_a,
3314 		       tree chrec_b,
3315 		       conflict_function **overlaps_a,
3316 		       conflict_function **overlaps_b,
3317 		       tree *last_conflicts)
3318 {
3319   tree type, difference;
3320   dependence_stats.num_ziv++;
3321 
3322   if (dump_file && (dump_flags & TDF_DETAILS))
3323     fprintf (dump_file, "(analyze_ziv_subscript \n");
3324 
3325   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3326   chrec_a = chrec_convert (type, chrec_a, NULL);
3327   chrec_b = chrec_convert (type, chrec_b, NULL);
3328   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3329 
3330   switch (TREE_CODE (difference))
3331     {
3332     case INTEGER_CST:
3333       if (integer_zerop (difference))
3334 	{
3335 	  /* The difference is equal to zero: the accessed index
3336 	     overlaps for each iteration in the loop.  */
3337 	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3338 	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3339 	  *last_conflicts = chrec_dont_know;
3340 	  dependence_stats.num_ziv_dependent++;
3341 	}
3342       else
3343 	{
3344 	  /* The accesses do not overlap.  */
3345 	  *overlaps_a = conflict_fn_no_dependence ();
3346 	  *overlaps_b = conflict_fn_no_dependence ();
3347 	  *last_conflicts = integer_zero_node;
3348 	  dependence_stats.num_ziv_independent++;
3349 	}
3350       break;
3351 
3352     default:
3353       /* We're not sure whether the indexes overlap.  For the moment,
3354 	 conservatively answer "don't know".  */
3355       if (dump_file && (dump_flags & TDF_DETAILS))
3356 	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3357 
3358       *overlaps_a = conflict_fn_not_known ();
3359       *overlaps_b = conflict_fn_not_known ();
3360       *last_conflicts = chrec_dont_know;
3361       dependence_stats.num_ziv_unimplemented++;
3362       break;
3363     }
3364 
3365   if (dump_file && (dump_flags & TDF_DETAILS))
3366     fprintf (dump_file, ")\n");
3367 }
3368 
3369 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3370    and only if it fits to the int type.  If this is not the case, or the
3371    bound  on the number of iterations of LOOP could not be derived, returns
3372    chrec_dont_know.  */
3373 
3374 static tree
max_stmt_executions_tree(class loop * loop)3375 max_stmt_executions_tree (class loop *loop)
3376 {
3377   widest_int nit;
3378 
3379   if (!max_stmt_executions (loop, &nit))
3380     return chrec_dont_know;
3381 
3382   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3383     return chrec_dont_know;
3384 
3385   return wide_int_to_tree (unsigned_type_node, nit);
3386 }
3387 
3388 /* Determine whether the CHREC is always positive/negative.  If the expression
3389    cannot be statically analyzed, return false, otherwise set the answer into
3390    VALUE.  */
3391 
3392 static bool
chrec_is_positive(tree chrec,bool * value)3393 chrec_is_positive (tree chrec, bool *value)
3394 {
3395   bool value0, value1, value2;
3396   tree end_value, nb_iter;
3397 
3398   switch (TREE_CODE (chrec))
3399     {
3400     case POLYNOMIAL_CHREC:
3401       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3402 	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3403 	return false;
3404 
3405       /* FIXME -- overflows.  */
3406       if (value0 == value1)
3407 	{
3408 	  *value = value0;
3409 	  return true;
3410 	}
3411 
3412       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3413 	 and the proof consists in showing that the sign never
3414 	 changes during the execution of the loop, from 0 to
3415 	 loop->nb_iterations.  */
3416       if (!evolution_function_is_affine_p (chrec))
3417 	return false;
3418 
3419       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3420       if (chrec_contains_undetermined (nb_iter))
3421 	return false;
3422 
3423 #if 0
3424       /* TODO -- If the test is after the exit, we may decrease the number of
3425 	 iterations by one.  */
3426       if (after_exit)
3427 	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3428 #endif
3429 
3430       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3431 
3432       if (!chrec_is_positive (end_value, &value2))
3433 	return false;
3434 
3435       *value = value0;
3436       return value0 == value1;
3437 
3438     case INTEGER_CST:
3439       switch (tree_int_cst_sgn (chrec))
3440 	{
3441 	case -1:
3442 	  *value = false;
3443 	  break;
3444 	case 1:
3445 	  *value = true;
3446 	  break;
3447 	default:
3448 	  return false;
3449 	}
3450       return true;
3451 
3452     default:
3453       return false;
3454     }
3455 }
3456 
3457 
3458 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3459    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3460    *OVERLAPS_B are initialized to the functions that describe the
3461    relation between the elements accessed twice by CHREC_A and
3462    CHREC_B.  For k >= 0, the following property is verified:
3463 
3464    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3465 
3466 static void
analyze_siv_subscript_cst_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3467 analyze_siv_subscript_cst_affine (tree chrec_a,
3468 				  tree chrec_b,
3469 				  conflict_function **overlaps_a,
3470 				  conflict_function **overlaps_b,
3471 				  tree *last_conflicts)
3472 {
3473   bool value0, value1, value2;
3474   tree type, difference, tmp;
3475 
3476   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3477   chrec_a = chrec_convert (type, chrec_a, NULL);
3478   chrec_b = chrec_convert (type, chrec_b, NULL);
3479   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3480 
3481   /* Special case overlap in the first iteration.  */
3482   if (integer_zerop (difference))
3483     {
3484       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3485       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3486       *last_conflicts = integer_one_node;
3487       return;
3488     }
3489 
3490   if (!chrec_is_positive (initial_condition (difference), &value0))
3491     {
3492       if (dump_file && (dump_flags & TDF_DETAILS))
3493 	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3494 
3495       dependence_stats.num_siv_unimplemented++;
3496       *overlaps_a = conflict_fn_not_known ();
3497       *overlaps_b = conflict_fn_not_known ();
3498       *last_conflicts = chrec_dont_know;
3499       return;
3500     }
3501   else
3502     {
3503       if (value0 == false)
3504 	{
3505 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3506 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3507 	    {
3508 	      if (dump_file && (dump_flags & TDF_DETAILS))
3509 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3510 
3511 	      *overlaps_a = conflict_fn_not_known ();
3512 	      *overlaps_b = conflict_fn_not_known ();
3513 	      *last_conflicts = chrec_dont_know;
3514 	      dependence_stats.num_siv_unimplemented++;
3515 	      return;
3516 	    }
3517 	  else
3518 	    {
3519 	      if (value1 == true)
3520 		{
3521 		  /* Example:
3522 		     chrec_a = 12
3523 		     chrec_b = {10, +, 1}
3524 		  */
3525 
3526 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3527 		    {
3528 		      HOST_WIDE_INT numiter;
3529 		      class loop *loop = get_chrec_loop (chrec_b);
3530 
3531 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3532 		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
3533 					 fold_build1 (ABS_EXPR, type, difference),
3534 					 CHREC_RIGHT (chrec_b));
3535 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3536 		      *last_conflicts = integer_one_node;
3537 
3538 
3539 		      /* Perform weak-zero siv test to see if overlap is
3540 			 outside the loop bounds.  */
3541 		      numiter = max_stmt_executions_int (loop);
3542 
3543 		      if (numiter >= 0
3544 			  && compare_tree_int (tmp, numiter) > 0)
3545 			{
3546 			  free_conflict_function (*overlaps_a);
3547 			  free_conflict_function (*overlaps_b);
3548 			  *overlaps_a = conflict_fn_no_dependence ();
3549 			  *overlaps_b = conflict_fn_no_dependence ();
3550 			  *last_conflicts = integer_zero_node;
3551 			  dependence_stats.num_siv_independent++;
3552 			  return;
3553 			}
3554 		      dependence_stats.num_siv_dependent++;
3555 		      return;
3556 		    }
3557 
3558 		  /* When the step does not divide the difference, there are
3559 		     no overlaps.  */
3560 		  else
3561 		    {
3562 		      *overlaps_a = conflict_fn_no_dependence ();
3563 		      *overlaps_b = conflict_fn_no_dependence ();
3564 		      *last_conflicts = integer_zero_node;
3565 		      dependence_stats.num_siv_independent++;
3566 		      return;
3567 		    }
3568 		}
3569 
3570 	      else
3571 		{
3572 		  /* Example:
3573 		     chrec_a = 12
3574 		     chrec_b = {10, +, -1}
3575 
3576 		     In this case, chrec_a will not overlap with chrec_b.  */
3577 		  *overlaps_a = conflict_fn_no_dependence ();
3578 		  *overlaps_b = conflict_fn_no_dependence ();
3579 		  *last_conflicts = integer_zero_node;
3580 		  dependence_stats.num_siv_independent++;
3581 		  return;
3582 		}
3583 	    }
3584 	}
3585       else
3586 	{
3587 	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3588 	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3589 	    {
3590 	      if (dump_file && (dump_flags & TDF_DETAILS))
3591 		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3592 
3593 	      *overlaps_a = conflict_fn_not_known ();
3594 	      *overlaps_b = conflict_fn_not_known ();
3595 	      *last_conflicts = chrec_dont_know;
3596 	      dependence_stats.num_siv_unimplemented++;
3597 	      return;
3598 	    }
3599 	  else
3600 	    {
3601 	      if (value2 == false)
3602 		{
3603 		  /* Example:
3604 		     chrec_a = 3
3605 		     chrec_b = {10, +, -1}
3606 		  */
3607 		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3608 		    {
3609 		      HOST_WIDE_INT numiter;
3610 		      class loop *loop = get_chrec_loop (chrec_b);
3611 
3612 		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3613 		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3614 					 CHREC_RIGHT (chrec_b));
3615 		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3616 		      *last_conflicts = integer_one_node;
3617 
3618 		      /* Perform weak-zero siv test to see if overlap is
3619 			 outside the loop bounds.  */
3620 		      numiter = max_stmt_executions_int (loop);
3621 
3622 		      if (numiter >= 0
3623 			  && compare_tree_int (tmp, numiter) > 0)
3624 			{
3625 			  free_conflict_function (*overlaps_a);
3626 			  free_conflict_function (*overlaps_b);
3627 			  *overlaps_a = conflict_fn_no_dependence ();
3628 			  *overlaps_b = conflict_fn_no_dependence ();
3629 			  *last_conflicts = integer_zero_node;
3630 			  dependence_stats.num_siv_independent++;
3631 			  return;
3632 			}
3633 		      dependence_stats.num_siv_dependent++;
3634 		      return;
3635 		    }
3636 
3637 		  /* When the step does not divide the difference, there
3638 		     are no overlaps.  */
3639 		  else
3640 		    {
3641 		      *overlaps_a = conflict_fn_no_dependence ();
3642 		      *overlaps_b = conflict_fn_no_dependence ();
3643 		      *last_conflicts = integer_zero_node;
3644 		      dependence_stats.num_siv_independent++;
3645 		      return;
3646 		    }
3647 		}
3648 	      else
3649 		{
3650 		  /* Example:
3651 		     chrec_a = 3
3652 		     chrec_b = {4, +, 1}
3653 
3654 		     In this case, chrec_a will not overlap with chrec_b.  */
3655 		  *overlaps_a = conflict_fn_no_dependence ();
3656 		  *overlaps_b = conflict_fn_no_dependence ();
3657 		  *last_conflicts = integer_zero_node;
3658 		  dependence_stats.num_siv_independent++;
3659 		  return;
3660 		}
3661 	    }
3662 	}
3663     }
3664 }
3665 
3666 /* Helper recursive function for initializing the matrix A.  Returns
3667    the initial value of CHREC.  */
3668 
3669 static tree
initialize_matrix_A(lambda_matrix A,tree chrec,unsigned index,int mult)3670 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3671 {
3672   gcc_assert (chrec);
3673 
3674   switch (TREE_CODE (chrec))
3675     {
3676     case POLYNOMIAL_CHREC:
3677       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3678 	return chrec_dont_know;
3679       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3680       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3681 
3682     case PLUS_EXPR:
3683     case MULT_EXPR:
3684     case MINUS_EXPR:
3685       {
3686 	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3687 	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3688 
3689 	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3690       }
3691 
3692     CASE_CONVERT:
3693       {
3694 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3695 	return chrec_convert (chrec_type (chrec), op, NULL);
3696       }
3697 
3698     case BIT_NOT_EXPR:
3699       {
3700 	/* Handle ~X as -1 - X.  */
3701 	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3702 	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3703 			      build_int_cst (TREE_TYPE (chrec), -1), op);
3704       }
3705 
3706     case INTEGER_CST:
3707       return chrec;
3708 
3709     default:
3710       gcc_unreachable ();
3711       return NULL_TREE;
3712     }
3713 }
3714 
3715 #define FLOOR_DIV(x,y) ((x) / (y))
3716 
3717 /* Solves the special case of the Diophantine equation:
3718    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3719 
3720    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3721    number of iterations that loops X and Y run.  The overlaps will be
3722    constructed as evolutions in dimension DIM.  */
3723 
3724 static void
compute_overlap_steps_for_affine_univar(HOST_WIDE_INT niter,HOST_WIDE_INT step_a,HOST_WIDE_INT step_b,affine_fn * overlaps_a,affine_fn * overlaps_b,tree * last_conflicts,int dim)3725 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3726 					 HOST_WIDE_INT step_a,
3727 					 HOST_WIDE_INT step_b,
3728 					 affine_fn *overlaps_a,
3729 					 affine_fn *overlaps_b,
3730 					 tree *last_conflicts, int dim)
3731 {
3732   if (((step_a > 0 && step_b > 0)
3733        || (step_a < 0 && step_b < 0)))
3734     {
3735       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3736       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3737 
3738       gcd_steps_a_b = gcd (step_a, step_b);
3739       step_overlaps_a = step_b / gcd_steps_a_b;
3740       step_overlaps_b = step_a / gcd_steps_a_b;
3741 
3742       if (niter > 0)
3743 	{
3744 	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
3745 	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3746 	  last_conflict = tau2;
3747 	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3748 	}
3749       else
3750 	*last_conflicts = chrec_dont_know;
3751 
3752       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3753 				      build_int_cst (NULL_TREE,
3754 						     step_overlaps_a));
3755       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3756 				      build_int_cst (NULL_TREE,
3757 						     step_overlaps_b));
3758     }
3759 
3760   else
3761     {
3762       *overlaps_a = affine_fn_cst (integer_zero_node);
3763       *overlaps_b = affine_fn_cst (integer_zero_node);
3764       *last_conflicts = integer_zero_node;
3765     }
3766 }
3767 
3768 /* Solves the special case of a Diophantine equation where CHREC_A is
3769    an affine bivariate function, and CHREC_B is an affine univariate
3770    function.  For example,
3771 
3772    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3773 
3774    has the following overlapping functions:
3775 
3776    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3777    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3778    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3779 
3780    FORNOW: This is a specialized implementation for a case occurring in
3781    a common benchmark.  Implement the general algorithm.  */
3782 
3783 static void
compute_overlap_steps_for_affine_1_2(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)3784 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3785 				      conflict_function **overlaps_a,
3786 				      conflict_function **overlaps_b,
3787 				      tree *last_conflicts)
3788 {
3789   bool xz_p, yz_p, xyz_p;
3790   HOST_WIDE_INT step_x, step_y, step_z;
3791   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3792   affine_fn overlaps_a_xz, overlaps_b_xz;
3793   affine_fn overlaps_a_yz, overlaps_b_yz;
3794   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3795   affine_fn ova1, ova2, ovb;
3796   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3797 
3798   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3799   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3800   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3801 
3802   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3803   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3804   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3805 
3806   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3807     {
3808       if (dump_file && (dump_flags & TDF_DETAILS))
3809 	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3810 
3811       *overlaps_a = conflict_fn_not_known ();
3812       *overlaps_b = conflict_fn_not_known ();
3813       *last_conflicts = chrec_dont_know;
3814       return;
3815     }
3816 
3817   niter = MIN (niter_x, niter_z);
3818   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3819 					   &overlaps_a_xz,
3820 					   &overlaps_b_xz,
3821 					   &last_conflicts_xz, 1);
3822   niter = MIN (niter_y, niter_z);
3823   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3824 					   &overlaps_a_yz,
3825 					   &overlaps_b_yz,
3826 					   &last_conflicts_yz, 2);
3827   niter = MIN (niter_x, niter_z);
3828   niter = MIN (niter_y, niter);
3829   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3830 					   &overlaps_a_xyz,
3831 					   &overlaps_b_xyz,
3832 					   &last_conflicts_xyz, 3);
3833 
3834   xz_p = !integer_zerop (last_conflicts_xz);
3835   yz_p = !integer_zerop (last_conflicts_yz);
3836   xyz_p = !integer_zerop (last_conflicts_xyz);
3837 
3838   if (xz_p || yz_p || xyz_p)
3839     {
3840       ova1 = affine_fn_cst (integer_zero_node);
3841       ova2 = affine_fn_cst (integer_zero_node);
3842       ovb = affine_fn_cst (integer_zero_node);
3843       if (xz_p)
3844 	{
3845 	  affine_fn t0 = ova1;
3846 	  affine_fn t2 = ovb;
3847 
3848 	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3849 	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
3850 	  affine_fn_free (t0);
3851 	  affine_fn_free (t2);
3852 	  *last_conflicts = last_conflicts_xz;
3853 	}
3854       if (yz_p)
3855 	{
3856 	  affine_fn t0 = ova2;
3857 	  affine_fn t2 = ovb;
3858 
3859 	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3860 	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
3861 	  affine_fn_free (t0);
3862 	  affine_fn_free (t2);
3863 	  *last_conflicts = last_conflicts_yz;
3864 	}
3865       if (xyz_p)
3866 	{
3867 	  affine_fn t0 = ova1;
3868 	  affine_fn t2 = ova2;
3869 	  affine_fn t4 = ovb;
3870 
3871 	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3872 	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3873 	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3874 	  affine_fn_free (t0);
3875 	  affine_fn_free (t2);
3876 	  affine_fn_free (t4);
3877 	  *last_conflicts = last_conflicts_xyz;
3878 	}
3879       *overlaps_a = conflict_fn (2, ova1, ova2);
3880       *overlaps_b = conflict_fn (1, ovb);
3881     }
3882   else
3883     {
3884       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3885       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3886       *last_conflicts = integer_zero_node;
3887     }
3888 
3889   affine_fn_free (overlaps_a_xz);
3890   affine_fn_free (overlaps_b_xz);
3891   affine_fn_free (overlaps_a_yz);
3892   affine_fn_free (overlaps_b_yz);
3893   affine_fn_free (overlaps_a_xyz);
3894   affine_fn_free (overlaps_b_xyz);
3895 }
3896 
3897 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3898 
3899 static void
lambda_vector_copy(lambda_vector vec1,lambda_vector vec2,int size)3900 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3901 		    int size)
3902 {
3903   memcpy (vec2, vec1, size * sizeof (*vec1));
3904 }
3905 
3906 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3907 
3908 static void
lambda_matrix_copy(lambda_matrix mat1,lambda_matrix mat2,int m,int n)3909 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3910 		    int m, int n)
3911 {
3912   int i;
3913 
3914   for (i = 0; i < m; i++)
3915     lambda_vector_copy (mat1[i], mat2[i], n);
3916 }
3917 
3918 /* Store the N x N identity matrix in MAT.  */
3919 
3920 static void
lambda_matrix_id(lambda_matrix mat,int size)3921 lambda_matrix_id (lambda_matrix mat, int size)
3922 {
3923   int i, j;
3924 
3925   for (i = 0; i < size; i++)
3926     for (j = 0; j < size; j++)
3927       mat[i][j] = (i == j) ? 1 : 0;
3928 }
3929 
3930 /* Return the index of the first nonzero element of vector VEC1 between
3931    START and N.  We must have START <= N.
3932    Returns N if VEC1 is the zero vector.  */
3933 
3934 static int
lambda_vector_first_nz(lambda_vector vec1,int n,int start)3935 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3936 {
3937   int j = start;
3938   while (j < n && vec1[j] == 0)
3939     j++;
3940   return j;
3941 }
3942 
3943 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3944    R2 = R2 + CONST1 * R1.  */
3945 
3946 static void
lambda_matrix_row_add(lambda_matrix mat,int n,int r1,int r2,lambda_int const1)3947 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3948 		       lambda_int const1)
3949 {
3950   int i;
3951 
3952   if (const1 == 0)
3953     return;
3954 
3955   for (i = 0; i < n; i++)
3956     mat[r2][i] += const1 * mat[r1][i];
3957 }
3958 
3959 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3960    and store the result in VEC2.  */
3961 
3962 static void
lambda_vector_mult_const(lambda_vector vec1,lambda_vector vec2,int size,lambda_int const1)3963 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3964 			  int size, lambda_int const1)
3965 {
3966   int i;
3967 
3968   if (const1 == 0)
3969     lambda_vector_clear (vec2, size);
3970   else
3971     for (i = 0; i < size; i++)
3972       vec2[i] = const1 * vec1[i];
3973 }
3974 
3975 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3976 
3977 static void
lambda_vector_negate(lambda_vector vec1,lambda_vector vec2,int size)3978 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3979 		      int size)
3980 {
3981   lambda_vector_mult_const (vec1, vec2, size, -1);
3982 }
3983 
3984 /* Negate row R1 of matrix MAT which has N columns.  */
3985 
3986 static void
lambda_matrix_row_negate(lambda_matrix mat,int n,int r1)3987 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3988 {
3989   lambda_vector_negate (mat[r1], mat[r1], n);
3990 }
3991 
3992 /* Return true if two vectors are equal.  */
3993 
3994 static bool
lambda_vector_equal(lambda_vector vec1,lambda_vector vec2,int size)3995 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3996 {
3997   int i;
3998   for (i = 0; i < size; i++)
3999     if (vec1[i] != vec2[i])
4000       return false;
4001   return true;
4002 }
4003 
4004 /* Given an M x N integer matrix A, this function determines an M x
4005    M unimodular matrix U, and an M x N echelon matrix S such that
4006    "U.A = S".  This decomposition is also known as "right Hermite".
4007 
4008    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4009    Restructuring Compilers" Utpal Banerjee.  */
4010 
4011 static void
lambda_matrix_right_hermite(lambda_matrix A,int m,int n,lambda_matrix S,lambda_matrix U)4012 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4013 			     lambda_matrix S, lambda_matrix U)
4014 {
4015   int i, j, i0 = 0;
4016 
4017   lambda_matrix_copy (A, S, m, n);
4018   lambda_matrix_id (U, m);
4019 
4020   for (j = 0; j < n; j++)
4021     {
4022       if (lambda_vector_first_nz (S[j], m, i0) < m)
4023 	{
4024 	  ++i0;
4025 	  for (i = m - 1; i >= i0; i--)
4026 	    {
4027 	      while (S[i][j] != 0)
4028 		{
4029 		  lambda_int sigma, factor, a, b;
4030 
4031 		  a = S[i-1][j];
4032 		  b = S[i][j];
4033 		  sigma = ((a < 0) ^ (b < 0)) ? -1: 1;
4034 		  unsigned HOST_WIDE_INT abs_a = absu_hwi (a);
4035 		  unsigned HOST_WIDE_INT abs_b = absu_hwi (b);
4036 		  factor = sigma * (lambda_int)(abs_a / abs_b);
4037 
4038 		  lambda_matrix_row_add (S, n, i, i-1, -factor);
4039 		  std::swap (S[i], S[i-1]);
4040 
4041 		  lambda_matrix_row_add (U, m, i, i-1, -factor);
4042 		  std::swap (U[i], U[i-1]);
4043 		}
4044 	    }
4045 	}
4046     }
4047 }
4048 
4049 /* Determines the overlapping elements due to accesses CHREC_A and
4050    CHREC_B, that are affine functions.  This function cannot handle
4051    symbolic evolution functions, ie. when initial conditions are
4052    parameters, because it uses lambda matrices of integers.  */
4053 
4054 static void
analyze_subscript_affine_affine(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts)4055 analyze_subscript_affine_affine (tree chrec_a,
4056 				 tree chrec_b,
4057 				 conflict_function **overlaps_a,
4058 				 conflict_function **overlaps_b,
4059 				 tree *last_conflicts)
4060 {
4061   unsigned nb_vars_a, nb_vars_b, dim;
4062   lambda_int gamma, gcd_alpha_beta;
4063   lambda_matrix A, U, S;
4064   struct obstack scratch_obstack;
4065 
4066   if (eq_evolutions_p (chrec_a, chrec_b))
4067     {
4068       /* The accessed index overlaps for each iteration in the
4069 	 loop.  */
4070       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4071       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4072       *last_conflicts = chrec_dont_know;
4073       return;
4074     }
4075   if (dump_file && (dump_flags & TDF_DETAILS))
4076     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4077 
4078   /* For determining the initial intersection, we have to solve a
4079      Diophantine equation.  This is the most time consuming part.
4080 
4081      For answering to the question: "Is there a dependence?" we have
4082      to prove that there exists a solution to the Diophantine
4083      equation, and that the solution is in the iteration domain,
4084      i.e. the solution is positive or zero, and that the solution
4085      happens before the upper bound loop.nb_iterations.  Otherwise
4086      there is no dependence.  This function outputs a description of
4087      the iterations that hold the intersections.  */
4088 
4089   nb_vars_a = nb_vars_in_chrec (chrec_a);
4090   nb_vars_b = nb_vars_in_chrec (chrec_b);
4091 
4092   gcc_obstack_init (&scratch_obstack);
4093 
4094   dim = nb_vars_a + nb_vars_b;
4095   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4096   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4097   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4098 
4099   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4100   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4101   if (init_a == chrec_dont_know
4102       || init_b == chrec_dont_know)
4103     {
4104       if (dump_file && (dump_flags & TDF_DETAILS))
4105 	fprintf (dump_file, "affine-affine test failed: "
4106 		 "representation issue.\n");
4107       *overlaps_a = conflict_fn_not_known ();
4108       *overlaps_b = conflict_fn_not_known ();
4109       *last_conflicts = chrec_dont_know;
4110       goto end_analyze_subs_aa;
4111     }
4112   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4113 
4114   /* Don't do all the hard work of solving the Diophantine equation
4115      when we already know the solution: for example,
4116      | {3, +, 1}_1
4117      | {3, +, 4}_2
4118      | gamma = 3 - 3 = 0.
4119      Then the first overlap occurs during the first iterations:
4120      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4121   */
4122   if (gamma == 0)
4123     {
4124       if (nb_vars_a == 1 && nb_vars_b == 1)
4125 	{
4126 	  HOST_WIDE_INT step_a, step_b;
4127 	  HOST_WIDE_INT niter, niter_a, niter_b;
4128 	  affine_fn ova, ovb;
4129 
4130 	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4131 	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4132 	  niter = MIN (niter_a, niter_b);
4133 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4134 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4135 
4136 	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4137 						   &ova, &ovb,
4138 						   last_conflicts, 1);
4139 	  *overlaps_a = conflict_fn (1, ova);
4140 	  *overlaps_b = conflict_fn (1, ovb);
4141 	}
4142 
4143       else if (nb_vars_a == 2 && nb_vars_b == 1)
4144 	compute_overlap_steps_for_affine_1_2
4145 	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4146 
4147       else if (nb_vars_a == 1 && nb_vars_b == 2)
4148 	compute_overlap_steps_for_affine_1_2
4149 	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4150 
4151       else
4152 	{
4153 	  if (dump_file && (dump_flags & TDF_DETAILS))
4154 	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4155 	  *overlaps_a = conflict_fn_not_known ();
4156 	  *overlaps_b = conflict_fn_not_known ();
4157 	  *last_conflicts = chrec_dont_know;
4158 	}
4159       goto end_analyze_subs_aa;
4160     }
4161 
4162   /* U.A = S */
4163   lambda_matrix_right_hermite (A, dim, 1, S, U);
4164 
4165   if (S[0][0] < 0)
4166     {
4167       S[0][0] *= -1;
4168       lambda_matrix_row_negate (U, dim, 0);
4169     }
4170   gcd_alpha_beta = S[0][0];
4171 
4172   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4173      but that is a quite strange case.  Instead of ICEing, answer
4174      don't know.  */
4175   if (gcd_alpha_beta == 0)
4176     {
4177       *overlaps_a = conflict_fn_not_known ();
4178       *overlaps_b = conflict_fn_not_known ();
4179       *last_conflicts = chrec_dont_know;
4180       goto end_analyze_subs_aa;
4181     }
4182 
4183   /* The classic "gcd-test".  */
4184   if (!int_divides_p (gcd_alpha_beta, gamma))
4185     {
4186       /* The "gcd-test" has determined that there is no integer
4187 	 solution, i.e. there is no dependence.  */
4188       *overlaps_a = conflict_fn_no_dependence ();
4189       *overlaps_b = conflict_fn_no_dependence ();
4190       *last_conflicts = integer_zero_node;
4191     }
4192 
4193   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4194   else if (nb_vars_a == 1 && nb_vars_b == 1)
4195     {
4196       /* Both functions should have the same evolution sign.  */
4197       if (((A[0][0] > 0 && -A[1][0] > 0)
4198 	   || (A[0][0] < 0 && -A[1][0] < 0)))
4199 	{
4200 	  /* The solutions are given by:
4201 	     |
4202 	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4203 	     |                           [u21 u22]    [y0]
4204 
4205 	     For a given integer t.  Using the following variables,
4206 
4207 	     | i0 = u11 * gamma / gcd_alpha_beta
4208 	     | j0 = u12 * gamma / gcd_alpha_beta
4209 	     | i1 = u21
4210 	     | j1 = u22
4211 
4212 	     the solutions are:
4213 
4214 	     | x0 = i0 + i1 * t,
4215 	     | y0 = j0 + j1 * t.  */
4216       	  HOST_WIDE_INT i0, j0, i1, j1;
4217 
4218 	  i0 = U[0][0] * gamma / gcd_alpha_beta;
4219 	  j0 = U[0][1] * gamma / gcd_alpha_beta;
4220 	  i1 = U[1][0];
4221 	  j1 = U[1][1];
4222 
4223 	  if ((i1 == 0 && i0 < 0)
4224 	      || (j1 == 0 && j0 < 0))
4225 	    {
4226 	      /* There is no solution.
4227 		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4228 		 falls in here, but for the moment we don't look at the
4229 		 upper bound of the iteration domain.  */
4230 	      *overlaps_a = conflict_fn_no_dependence ();
4231 	      *overlaps_b = conflict_fn_no_dependence ();
4232 	      *last_conflicts = integer_zero_node;
4233 	      goto end_analyze_subs_aa;
4234 	    }
4235 
4236 	  if (i1 > 0 && j1 > 0)
4237 	    {
4238 	      HOST_WIDE_INT niter_a
4239 		= max_stmt_executions_int (get_chrec_loop (chrec_a));
4240 	      HOST_WIDE_INT niter_b
4241 		= max_stmt_executions_int (get_chrec_loop (chrec_b));
4242 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4243 
4244 	      /* (X0, Y0) is a solution of the Diophantine equation:
4245 		 "chrec_a (X0) = chrec_b (Y0)".  */
4246 	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4247 					CEIL (-j0, j1));
4248 	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
4249 	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
4250 
4251 	      /* (X1, Y1) is the smallest positive solution of the eq
4252 		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4253 		 first conflict occurs.  */
4254 	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4255 	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4256 	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4257 
4258 	      if (niter > 0)
4259 		{
4260 		  /* If the overlap occurs outside of the bounds of the
4261 		     loop, there is no dependence.  */
4262 		  if (x1 >= niter_a || y1 >= niter_b)
4263 		    {
4264 		      *overlaps_a = conflict_fn_no_dependence ();
4265 		      *overlaps_b = conflict_fn_no_dependence ();
4266 		      *last_conflicts = integer_zero_node;
4267 		      goto end_analyze_subs_aa;
4268 		    }
4269 
4270 		  /* max stmt executions can get quite large, avoid
4271 		     overflows by using wide ints here.  */
4272 		  widest_int tau2
4273 		    = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4274 				wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4275 		  widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4276 		  if (wi::min_precision (last_conflict, SIGNED)
4277 		      <= TYPE_PRECISION (integer_type_node))
4278 		    *last_conflicts
4279 		       = build_int_cst (integer_type_node,
4280 					last_conflict.to_shwi ());
4281 		  else
4282 		    *last_conflicts = chrec_dont_know;
4283 		}
4284 	      else
4285 		*last_conflicts = chrec_dont_know;
4286 
4287 	      *overlaps_a
4288 		= conflict_fn (1,
4289 			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
4290 						 1,
4291 						 build_int_cst (NULL_TREE, i1)));
4292 	      *overlaps_b
4293 		= conflict_fn (1,
4294 			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
4295 						 1,
4296 						 build_int_cst (NULL_TREE, j1)));
4297 	    }
4298 	  else
4299 	    {
4300 	      /* FIXME: For the moment, the upper bound of the
4301 		 iteration domain for i and j is not checked.  */
4302 	      if (dump_file && (dump_flags & TDF_DETAILS))
4303 		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4304 	      *overlaps_a = conflict_fn_not_known ();
4305 	      *overlaps_b = conflict_fn_not_known ();
4306 	      *last_conflicts = chrec_dont_know;
4307 	    }
4308 	}
4309       else
4310 	{
4311 	  if (dump_file && (dump_flags & TDF_DETAILS))
4312 	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4313 	  *overlaps_a = conflict_fn_not_known ();
4314 	  *overlaps_b = conflict_fn_not_known ();
4315 	  *last_conflicts = chrec_dont_know;
4316 	}
4317     }
4318   else
4319     {
4320       if (dump_file && (dump_flags & TDF_DETAILS))
4321 	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4322       *overlaps_a = conflict_fn_not_known ();
4323       *overlaps_b = conflict_fn_not_known ();
4324       *last_conflicts = chrec_dont_know;
4325     }
4326 
4327 end_analyze_subs_aa:
4328   obstack_free (&scratch_obstack, NULL);
4329   if (dump_file && (dump_flags & TDF_DETAILS))
4330     {
4331       fprintf (dump_file, "  (overlaps_a = ");
4332       dump_conflict_function (dump_file, *overlaps_a);
4333       fprintf (dump_file, ")\n  (overlaps_b = ");
4334       dump_conflict_function (dump_file, *overlaps_b);
4335       fprintf (dump_file, "))\n");
4336     }
4337 }
4338 
4339 /* Returns true when analyze_subscript_affine_affine can be used for
4340    determining the dependence relation between chrec_a and chrec_b,
4341    that contain symbols.  This function modifies chrec_a and chrec_b
4342    such that the analysis result is the same, and such that they don't
4343    contain symbols, and then can safely be passed to the analyzer.
4344 
4345    Example: The analysis of the following tuples of evolutions produce
4346    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4347    vs. {0, +, 1}_1
4348 
4349    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4350    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4351 */
4352 
4353 static bool
can_use_analyze_subscript_affine_affine(tree * chrec_a,tree * chrec_b)4354 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4355 {
4356   tree diff, type, left_a, left_b, right_b;
4357 
4358   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4359       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4360     /* FIXME: For the moment not handled.  Might be refined later.  */
4361     return false;
4362 
4363   type = chrec_type (*chrec_a);
4364   left_a = CHREC_LEFT (*chrec_a);
4365   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4366   diff = chrec_fold_minus (type, left_a, left_b);
4367 
4368   if (!evolution_function_is_constant_p (diff))
4369     return false;
4370 
4371   if (dump_file && (dump_flags & TDF_DETAILS))
4372     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4373 
4374   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4375 				     diff, CHREC_RIGHT (*chrec_a));
4376   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4377   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4378 				     build_int_cst (type, 0),
4379 				     right_b);
4380   return true;
4381 }
4382 
4383 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4384    *OVERLAPS_B are initialized to the functions that describe the
4385    relation between the elements accessed twice by CHREC_A and
4386    CHREC_B.  For k >= 0, the following property is verified:
4387 
4388    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4389 
4390 static void
analyze_siv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,int loop_nest_num)4391 analyze_siv_subscript (tree chrec_a,
4392 		       tree chrec_b,
4393 		       conflict_function **overlaps_a,
4394 		       conflict_function **overlaps_b,
4395 		       tree *last_conflicts,
4396 		       int loop_nest_num)
4397 {
4398   dependence_stats.num_siv++;
4399 
4400   if (dump_file && (dump_flags & TDF_DETAILS))
4401     fprintf (dump_file, "(analyze_siv_subscript \n");
4402 
4403   if (evolution_function_is_constant_p (chrec_a)
4404       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4405     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4406 				      overlaps_a, overlaps_b, last_conflicts);
4407 
4408   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4409 	   && evolution_function_is_constant_p (chrec_b))
4410     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4411 				      overlaps_b, overlaps_a, last_conflicts);
4412 
4413   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4414 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4415     {
4416       if (!chrec_contains_symbols (chrec_a)
4417 	  && !chrec_contains_symbols (chrec_b))
4418 	{
4419 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4420 					   overlaps_a, overlaps_b,
4421 					   last_conflicts);
4422 
4423 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4424 	      || CF_NOT_KNOWN_P (*overlaps_b))
4425 	    dependence_stats.num_siv_unimplemented++;
4426 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4427 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4428 	    dependence_stats.num_siv_independent++;
4429 	  else
4430 	    dependence_stats.num_siv_dependent++;
4431 	}
4432       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4433 							&chrec_b))
4434 	{
4435 	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4436 					   overlaps_a, overlaps_b,
4437 					   last_conflicts);
4438 
4439 	  if (CF_NOT_KNOWN_P (*overlaps_a)
4440 	      || CF_NOT_KNOWN_P (*overlaps_b))
4441 	    dependence_stats.num_siv_unimplemented++;
4442 	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4443 		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4444 	    dependence_stats.num_siv_independent++;
4445 	  else
4446 	    dependence_stats.num_siv_dependent++;
4447 	}
4448       else
4449 	goto siv_subscript_dontknow;
4450     }
4451 
4452   else
4453     {
4454     siv_subscript_dontknow:;
4455       if (dump_file && (dump_flags & TDF_DETAILS))
4456 	fprintf (dump_file, "  siv test failed: unimplemented");
4457       *overlaps_a = conflict_fn_not_known ();
4458       *overlaps_b = conflict_fn_not_known ();
4459       *last_conflicts = chrec_dont_know;
4460       dependence_stats.num_siv_unimplemented++;
4461     }
4462 
4463   if (dump_file && (dump_flags & TDF_DETAILS))
4464     fprintf (dump_file, ")\n");
4465 }
4466 
4467 /* Returns false if we can prove that the greatest common divisor of the steps
4468    of CHREC does not divide CST, false otherwise.  */
4469 
4470 static bool
gcd_of_steps_may_divide_p(const_tree chrec,const_tree cst)4471 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4472 {
4473   HOST_WIDE_INT cd = 0, val;
4474   tree step;
4475 
4476   if (!tree_fits_shwi_p (cst))
4477     return true;
4478   val = tree_to_shwi (cst);
4479 
4480   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4481     {
4482       step = CHREC_RIGHT (chrec);
4483       if (!tree_fits_shwi_p (step))
4484 	return true;
4485       cd = gcd (cd, tree_to_shwi (step));
4486       chrec = CHREC_LEFT (chrec);
4487     }
4488 
4489   return val % cd == 0;
4490 }
4491 
4492 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4493    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4494    functions that describe the relation between the elements accessed
4495    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4496    is verified:
4497 
4498    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4499 
4500 static void
analyze_miv_subscript(tree chrec_a,tree chrec_b,conflict_function ** overlaps_a,conflict_function ** overlaps_b,tree * last_conflicts,class loop * loop_nest)4501 analyze_miv_subscript (tree chrec_a,
4502 		       tree chrec_b,
4503 		       conflict_function **overlaps_a,
4504 		       conflict_function **overlaps_b,
4505 		       tree *last_conflicts,
4506 		       class loop *loop_nest)
4507 {
4508   tree type, difference;
4509 
4510   dependence_stats.num_miv++;
4511   if (dump_file && (dump_flags & TDF_DETAILS))
4512     fprintf (dump_file, "(analyze_miv_subscript \n");
4513 
4514   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4515   chrec_a = chrec_convert (type, chrec_a, NULL);
4516   chrec_b = chrec_convert (type, chrec_b, NULL);
4517   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4518 
4519   if (eq_evolutions_p (chrec_a, chrec_b))
4520     {
4521       /* Access functions are the same: all the elements are accessed
4522 	 in the same order.  */
4523       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4524       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4525       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4526       dependence_stats.num_miv_dependent++;
4527     }
4528 
4529   else if (evolution_function_is_constant_p (difference)
4530 	   && evolution_function_is_affine_multivariate_p (chrec_a,
4531 							   loop_nest->num)
4532 	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
4533     {
4534       /* testsuite/.../ssa-chrec-33.c
4535 	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4536 
4537 	 The difference is 1, and all the evolution steps are multiples
4538 	 of 2, consequently there are no overlapping elements.  */
4539       *overlaps_a = conflict_fn_no_dependence ();
4540       *overlaps_b = conflict_fn_no_dependence ();
4541       *last_conflicts = integer_zero_node;
4542       dependence_stats.num_miv_independent++;
4543     }
4544 
4545   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4546 	   && !chrec_contains_symbols (chrec_a, loop_nest)
4547 	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4548 	   && !chrec_contains_symbols (chrec_b, loop_nest))
4549     {
4550       /* testsuite/.../ssa-chrec-35.c
4551 	 {0, +, 1}_2  vs.  {0, +, 1}_3
4552 	 the overlapping elements are respectively located at iterations:
4553 	 {0, +, 1}_x and {0, +, 1}_x,
4554 	 in other words, we have the equality:
4555 	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4556 
4557 	 Other examples:
4558 	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4559 	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4560 
4561 	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4562 	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4563       */
4564       analyze_subscript_affine_affine (chrec_a, chrec_b,
4565 				       overlaps_a, overlaps_b, last_conflicts);
4566 
4567       if (CF_NOT_KNOWN_P (*overlaps_a)
4568  	  || CF_NOT_KNOWN_P (*overlaps_b))
4569 	dependence_stats.num_miv_unimplemented++;
4570       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4571 	       || CF_NO_DEPENDENCE_P (*overlaps_b))
4572 	dependence_stats.num_miv_independent++;
4573       else
4574 	dependence_stats.num_miv_dependent++;
4575     }
4576 
4577   else
4578     {
4579       /* When the analysis is too difficult, answer "don't know".  */
4580       if (dump_file && (dump_flags & TDF_DETAILS))
4581 	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4582 
4583       *overlaps_a = conflict_fn_not_known ();
4584       *overlaps_b = conflict_fn_not_known ();
4585       *last_conflicts = chrec_dont_know;
4586       dependence_stats.num_miv_unimplemented++;
4587     }
4588 
4589   if (dump_file && (dump_flags & TDF_DETAILS))
4590     fprintf (dump_file, ")\n");
4591 }
4592 
4593 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4594    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4595    OVERLAP_ITERATIONS_B are initialized with two functions that
4596    describe the iterations that contain conflicting elements.
4597 
4598    Remark: For an integer k >= 0, the following equality is true:
4599 
4600    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4601 */
4602 
4603 static void
analyze_overlapping_iterations(tree chrec_a,tree chrec_b,conflict_function ** overlap_iterations_a,conflict_function ** overlap_iterations_b,tree * last_conflicts,class loop * loop_nest)4604 analyze_overlapping_iterations (tree chrec_a,
4605 				tree chrec_b,
4606 				conflict_function **overlap_iterations_a,
4607 				conflict_function **overlap_iterations_b,
4608 				tree *last_conflicts, class loop *loop_nest)
4609 {
4610   unsigned int lnn = loop_nest->num;
4611 
4612   dependence_stats.num_subscript_tests++;
4613 
4614   if (dump_file && (dump_flags & TDF_DETAILS))
4615     {
4616       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4617       fprintf (dump_file, "  (chrec_a = ");
4618       print_generic_expr (dump_file, chrec_a);
4619       fprintf (dump_file, ")\n  (chrec_b = ");
4620       print_generic_expr (dump_file, chrec_b);
4621       fprintf (dump_file, ")\n");
4622     }
4623 
4624   if (chrec_a == NULL_TREE
4625       || chrec_b == NULL_TREE
4626       || chrec_contains_undetermined (chrec_a)
4627       || chrec_contains_undetermined (chrec_b))
4628     {
4629       dependence_stats.num_subscript_undetermined++;
4630 
4631       *overlap_iterations_a = conflict_fn_not_known ();
4632       *overlap_iterations_b = conflict_fn_not_known ();
4633     }
4634 
4635   /* If they are the same chrec, and are affine, they overlap
4636      on every iteration.  */
4637   else if (eq_evolutions_p (chrec_a, chrec_b)
4638 	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4639 	       || operand_equal_p (chrec_a, chrec_b, 0)))
4640     {
4641       dependence_stats.num_same_subscript_function++;
4642       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4643       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4644       *last_conflicts = chrec_dont_know;
4645     }
4646 
4647   /* If they aren't the same, and aren't affine, we can't do anything
4648      yet.  */
4649   else if ((chrec_contains_symbols (chrec_a)
4650 	    || chrec_contains_symbols (chrec_b))
4651 	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4652 	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4653     {
4654       dependence_stats.num_subscript_undetermined++;
4655       *overlap_iterations_a = conflict_fn_not_known ();
4656       *overlap_iterations_b = conflict_fn_not_known ();
4657     }
4658 
4659   else if (ziv_subscript_p (chrec_a, chrec_b))
4660     analyze_ziv_subscript (chrec_a, chrec_b,
4661 			   overlap_iterations_a, overlap_iterations_b,
4662 			   last_conflicts);
4663 
4664   else if (siv_subscript_p (chrec_a, chrec_b))
4665     analyze_siv_subscript (chrec_a, chrec_b,
4666 			   overlap_iterations_a, overlap_iterations_b,
4667 			   last_conflicts, lnn);
4668 
4669   else
4670     analyze_miv_subscript (chrec_a, chrec_b,
4671 			   overlap_iterations_a, overlap_iterations_b,
4672 			   last_conflicts, loop_nest);
4673 
4674   if (dump_file && (dump_flags & TDF_DETAILS))
4675     {
4676       fprintf (dump_file, "  (overlap_iterations_a = ");
4677       dump_conflict_function (dump_file, *overlap_iterations_a);
4678       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4679       dump_conflict_function (dump_file, *overlap_iterations_b);
4680       fprintf (dump_file, "))\n");
4681     }
4682 }
4683 
4684 /* Helper function for uniquely inserting distance vectors.  */
4685 
4686 static void
save_dist_v(struct data_dependence_relation * ddr,lambda_vector dist_v)4687 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4688 {
4689   unsigned i;
4690   lambda_vector v;
4691 
4692   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4693     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4694       return;
4695 
4696   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4697 }
4698 
4699 /* Helper function for uniquely inserting direction vectors.  */
4700 
4701 static void
save_dir_v(struct data_dependence_relation * ddr,lambda_vector dir_v)4702 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4703 {
4704   unsigned i;
4705   lambda_vector v;
4706 
4707   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4708     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4709       return;
4710 
4711   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4712 }
4713 
4714 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4715    haven't yet determined a distance for this outer loop, push a new
4716    distance vector composed of the previous distance, and a distance
4717    of 1 for this outer loop.  Example:
4718 
4719    | loop_1
4720    |   loop_2
4721    |     A[10]
4722    |   endloop_2
4723    | endloop_1
4724 
4725    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4726    save (0, 1), then we have to save (1, 0).  */
4727 
4728 static void
add_outer_distances(struct data_dependence_relation * ddr,lambda_vector dist_v,int index)4729 add_outer_distances (struct data_dependence_relation *ddr,
4730 		     lambda_vector dist_v, int index)
4731 {
4732   /* For each outer loop where init_v is not set, the accesses are
4733      in dependence of distance 1 in the loop.  */
4734   while (--index >= 0)
4735     {
4736       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4737       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4738       save_v[index] = 1;
4739       save_dist_v (ddr, save_v);
4740     }
4741 }
4742 
4743 /* Return false when fail to represent the data dependence as a
4744    distance vector.  A_INDEX is the index of the first reference
4745    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4746    second reference.  INIT_B is set to true when a component has been
4747    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4748    the index in DIST_V that carries the dependence.  */
4749 
4750 static bool
build_classic_dist_vector_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,lambda_vector dist_v,bool * init_b,int * index_carry)4751 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4752 			     unsigned int a_index, unsigned int b_index,
4753 			     lambda_vector dist_v, bool *init_b,
4754 			     int *index_carry)
4755 {
4756   unsigned i;
4757   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4758   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4759 
4760   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4761     {
4762       tree access_fn_a, access_fn_b;
4763       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4764 
4765       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4766 	{
4767 	  non_affine_dependence_relation (ddr);
4768 	  return false;
4769 	}
4770 
4771       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4772       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4773 
4774       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4775 	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4776 	{
4777 	  HOST_WIDE_INT dist;
4778 	  int index;
4779 	  int var_a = CHREC_VARIABLE (access_fn_a);
4780 	  int var_b = CHREC_VARIABLE (access_fn_b);
4781 
4782 	  if (var_a != var_b
4783 	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4784 	    {
4785 	      non_affine_dependence_relation (ddr);
4786 	      return false;
4787 	    }
4788 
4789 	  /* When data references are collected in a loop while data
4790 	     dependences are analyzed in loop nest nested in the loop, we
4791 	     would have more number of access functions than number of
4792 	     loops.  Skip access functions of loops not in the loop nest.
4793 
4794 	     See PR89725 for more information.  */
4795 	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4796 	    continue;
4797 
4798 	  dist = int_cst_value (SUB_DISTANCE (subscript));
4799 	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4800 	  *index_carry = MIN (index, *index_carry);
4801 
4802 	  /* This is the subscript coupling test.  If we have already
4803 	     recorded a distance for this loop (a distance coming from
4804 	     another subscript), it should be the same.  For example,
4805 	     in the following code, there is no dependence:
4806 
4807 	     | loop i = 0, N, 1
4808 	     |   T[i+1][i] = ...
4809 	     |   ... = T[i][i]
4810 	     | endloop
4811 	  */
4812 	  if (init_v[index] != 0 && dist_v[index] != dist)
4813 	    {
4814 	      finalize_ddr_dependent (ddr, chrec_known);
4815 	      return false;
4816 	    }
4817 
4818 	  dist_v[index] = dist;
4819 	  init_v[index] = 1;
4820 	  *init_b = true;
4821 	}
4822       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4823 	{
4824 	  /* This can be for example an affine vs. constant dependence
4825 	     (T[i] vs. T[3]) that is not an affine dependence and is
4826 	     not representable as a distance vector.  */
4827 	  non_affine_dependence_relation (ddr);
4828 	  return false;
4829 	}
4830       else
4831 	*init_b = true;
4832     }
4833 
4834   return true;
4835 }
4836 
4837 /* Return true when the DDR contains only invariant access functions wrto. loop
4838    number LNUM.  */
4839 
4840 static bool
invariant_access_functions(const struct data_dependence_relation * ddr,int lnum)4841 invariant_access_functions (const struct data_dependence_relation *ddr,
4842 			    int lnum)
4843 {
4844   unsigned i;
4845   subscript *sub;
4846 
4847   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4848     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
4849 	|| !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
4850       return false;
4851 
4852   return true;
4853 }
4854 
4855 /* Helper function for the case where DDR_A and DDR_B are the same
4856    multivariate access function with a constant step.  For an example
4857    see pr34635-1.c.  */
4858 
4859 static void
add_multivariate_self_dist(struct data_dependence_relation * ddr,tree c_2)4860 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4861 {
4862   int x_1, x_2;
4863   tree c_1 = CHREC_LEFT (c_2);
4864   tree c_0 = CHREC_LEFT (c_1);
4865   lambda_vector dist_v;
4866   HOST_WIDE_INT v1, v2, cd;
4867 
4868   /* Polynomials with more than 2 variables are not handled yet.  When
4869      the evolution steps are parameters, it is not possible to
4870      represent the dependence using classical distance vectors.  */
4871   if (TREE_CODE (c_0) != INTEGER_CST
4872       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4873       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4874     {
4875       DDR_AFFINE_P (ddr) = false;
4876       return;
4877     }
4878 
4879   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4880   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4881 
4882   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4883   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4884   v1 = int_cst_value (CHREC_RIGHT (c_1));
4885   v2 = int_cst_value (CHREC_RIGHT (c_2));
4886   cd = gcd (v1, v2);
4887   v1 /= cd;
4888   v2 /= cd;
4889 
4890   if (v2 < 0)
4891     {
4892       v2 = -v2;
4893       v1 = -v1;
4894     }
4895 
4896   dist_v[x_1] = v2;
4897   dist_v[x_2] = -v1;
4898   save_dist_v (ddr, dist_v);
4899 
4900   add_outer_distances (ddr, dist_v, x_1);
4901 }
4902 
4903 /* Helper function for the case where DDR_A and DDR_B are the same
4904    access functions.  */
4905 
4906 static void
add_other_self_distances(struct data_dependence_relation * ddr)4907 add_other_self_distances (struct data_dependence_relation *ddr)
4908 {
4909   lambda_vector dist_v;
4910   unsigned i;
4911   int index_carry = DDR_NB_LOOPS (ddr);
4912   subscript *sub;
4913   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4914 
4915   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4916     {
4917       tree access_fun = SUB_ACCESS_FN (sub, 0);
4918 
4919       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4920 	{
4921 	  if (!evolution_function_is_univariate_p (access_fun, loop->num))
4922 	    {
4923 	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4924 		{
4925 		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4926 		  return;
4927 		}
4928 
4929 	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4930 
4931 	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4932 		add_multivariate_self_dist (ddr, access_fun);
4933 	      else
4934 		/* The evolution step is not constant: it varies in
4935 		   the outer loop, so this cannot be represented by a
4936 		   distance vector.  For example in pr34635.c the
4937 		   evolution is {0, +, {0, +, 4}_1}_2.  */
4938 		DDR_AFFINE_P (ddr) = false;
4939 
4940 	      return;
4941 	    }
4942 
4943 	  /* When data references are collected in a loop while data
4944 	     dependences are analyzed in loop nest nested in the loop, we
4945 	     would have more number of access functions than number of
4946 	     loops.  Skip access functions of loops not in the loop nest.
4947 
4948 	     See PR89725 for more information.  */
4949 	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4950 				  loop))
4951 	    continue;
4952 
4953 	  index_carry = MIN (index_carry,
4954 			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
4955 						 DDR_LOOP_NEST (ddr)));
4956 	}
4957     }
4958 
4959   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4960   add_outer_distances (ddr, dist_v, index_carry);
4961 }
4962 
4963 static void
insert_innermost_unit_dist_vector(struct data_dependence_relation * ddr)4964 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4965 {
4966   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4967 
4968   dist_v[0] = 1;
4969   save_dist_v (ddr, dist_v);
4970 }
4971 
4972 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4973    is the case for example when access functions are the same and
4974    equal to a constant, as in:
4975 
4976    | loop_1
4977    |   A[3] = ...
4978    |   ... = A[3]
4979    | endloop_1
4980 
4981    in which case the distance vectors are (0) and (1).  */
4982 
4983 static void
add_distance_for_zero_overlaps(struct data_dependence_relation * ddr)4984 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4985 {
4986   unsigned i, j;
4987 
4988   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4989     {
4990       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4991       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4992       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4993 
4994       for (j = 0; j < ca->n; j++)
4995 	if (affine_function_zero_p (ca->fns[j]))
4996 	  {
4997 	    insert_innermost_unit_dist_vector (ddr);
4998 	    return;
4999 	  }
5000 
5001       for (j = 0; j < cb->n; j++)
5002 	if (affine_function_zero_p (cb->fns[j]))
5003 	  {
5004 	    insert_innermost_unit_dist_vector (ddr);
5005 	    return;
5006 	  }
5007     }
5008 }
5009 
5010 /* Return true when the DDR contains two data references that have the
5011    same access functions.  */
5012 
5013 static inline bool
same_access_functions(const struct data_dependence_relation * ddr)5014 same_access_functions (const struct data_dependence_relation *ddr)
5015 {
5016   unsigned i;
5017   subscript *sub;
5018 
5019   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5020     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5021 			  SUB_ACCESS_FN (sub, 1)))
5022       return false;
5023 
5024   return true;
5025 }
5026 
5027 /* Compute the classic per loop distance vector.  DDR is the data
5028    dependence relation to build a vector from.  Return false when fail
5029    to represent the data dependence as a distance vector.  */
5030 
5031 static bool
build_classic_dist_vector(struct data_dependence_relation * ddr,class loop * loop_nest)5032 build_classic_dist_vector (struct data_dependence_relation *ddr,
5033 			   class loop *loop_nest)
5034 {
5035   bool init_b = false;
5036   int index_carry = DDR_NB_LOOPS (ddr);
5037   lambda_vector dist_v;
5038 
5039   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5040     return false;
5041 
5042   if (same_access_functions (ddr))
5043     {
5044       /* Save the 0 vector.  */
5045       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5046       save_dist_v (ddr, dist_v);
5047 
5048       if (invariant_access_functions (ddr, loop_nest->num))
5049 	add_distance_for_zero_overlaps (ddr);
5050 
5051       if (DDR_NB_LOOPS (ddr) > 1)
5052 	add_other_self_distances (ddr);
5053 
5054       return true;
5055     }
5056 
5057   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5058   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5059     return false;
5060 
5061   /* Save the distance vector if we initialized one.  */
5062   if (init_b)
5063     {
5064       /* Verify a basic constraint: classic distance vectors should
5065 	 always be lexicographically positive.
5066 
5067 	 Data references are collected in the order of execution of
5068 	 the program, thus for the following loop
5069 
5070 	 | for (i = 1; i < 100; i++)
5071 	 |   for (j = 1; j < 100; j++)
5072 	 |     {
5073 	 |       t = T[j+1][i-1];  // A
5074 	 |       T[j][i] = t + 2;  // B
5075 	 |     }
5076 
5077 	 references are collected following the direction of the wind:
5078 	 A then B.  The data dependence tests are performed also
5079 	 following this order, such that we're looking at the distance
5080 	 separating the elements accessed by A from the elements later
5081 	 accessed by B.  But in this example, the distance returned by
5082 	 test_dep (A, B) is lexicographically negative (-1, 1), that
5083 	 means that the access A occurs later than B with respect to
5084 	 the outer loop, ie. we're actually looking upwind.  In this
5085 	 case we solve test_dep (B, A) looking downwind to the
5086 	 lexicographically positive solution, that returns the
5087 	 distance vector (1, -1).  */
5088       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5089 	{
5090 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5091 	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5092 	    return false;
5093 	  compute_subscript_distance (ddr);
5094 	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5095 					    &index_carry))
5096 	    return false;
5097 	  save_dist_v (ddr, save_v);
5098 	  DDR_REVERSED_P (ddr) = true;
5099 
5100 	  /* In this case there is a dependence forward for all the
5101 	     outer loops:
5102 
5103 	     | for (k = 1; k < 100; k++)
5104 	     |  for (i = 1; i < 100; i++)
5105 	     |   for (j = 1; j < 100; j++)
5106 	     |     {
5107 	     |       t = T[j+1][i-1];  // A
5108 	     |       T[j][i] = t + 2;  // B
5109 	     |     }
5110 
5111 	     the vectors are:
5112 	     (0,  1, -1)
5113 	     (1,  1, -1)
5114 	     (1, -1,  1)
5115 	  */
5116 	  if (DDR_NB_LOOPS (ddr) > 1)
5117 	    {
5118  	      add_outer_distances (ddr, save_v, index_carry);
5119 	      add_outer_distances (ddr, dist_v, index_carry);
5120 	    }
5121 	}
5122       else
5123 	{
5124 	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5125 	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5126 
5127 	  if (DDR_NB_LOOPS (ddr) > 1)
5128 	    {
5129 	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5130 
5131 	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5132 		return false;
5133 	      compute_subscript_distance (ddr);
5134 	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5135 						&index_carry))
5136 		return false;
5137 
5138 	      save_dist_v (ddr, save_v);
5139 	      add_outer_distances (ddr, dist_v, index_carry);
5140 	      add_outer_distances (ddr, opposite_v, index_carry);
5141 	    }
5142 	  else
5143 	    save_dist_v (ddr, save_v);
5144 	}
5145     }
5146   else
5147     {
5148       /* There is a distance of 1 on all the outer loops: Example:
5149 	 there is a dependence of distance 1 on loop_1 for the array A.
5150 
5151 	 | loop_1
5152 	 |   A[5] = ...
5153 	 | endloop
5154       */
5155       add_outer_distances (ddr, dist_v,
5156 			   lambda_vector_first_nz (dist_v,
5157 						   DDR_NB_LOOPS (ddr), 0));
5158     }
5159 
5160   if (dump_file && (dump_flags & TDF_DETAILS))
5161     {
5162       unsigned i;
5163 
5164       fprintf (dump_file, "(build_classic_dist_vector\n");
5165       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5166 	{
5167 	  fprintf (dump_file, "  dist_vector = (");
5168 	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5169 			       DDR_NB_LOOPS (ddr));
5170 	  fprintf (dump_file, "  )\n");
5171 	}
5172       fprintf (dump_file, ")\n");
5173     }
5174 
5175   return true;
5176 }
5177 
5178 /* Return the direction for a given distance.
5179    FIXME: Computing dir this way is suboptimal, since dir can catch
5180    cases that dist is unable to represent.  */
5181 
5182 static inline enum data_dependence_direction
dir_from_dist(int dist)5183 dir_from_dist (int dist)
5184 {
5185   if (dist > 0)
5186     return dir_positive;
5187   else if (dist < 0)
5188     return dir_negative;
5189   else
5190     return dir_equal;
5191 }
5192 
5193 /* Compute the classic per loop direction vector.  DDR is the data
5194    dependence relation to build a vector from.  */
5195 
5196 static void
build_classic_dir_vector(struct data_dependence_relation * ddr)5197 build_classic_dir_vector (struct data_dependence_relation *ddr)
5198 {
5199   unsigned i, j;
5200   lambda_vector dist_v;
5201 
5202   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5203     {
5204       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5205 
5206       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5207 	dir_v[j] = dir_from_dist (dist_v[j]);
5208 
5209       save_dir_v (ddr, dir_v);
5210     }
5211 }
5212 
5213 /* Helper function.  Returns true when there is a dependence between the
5214    data references.  A_INDEX is the index of the first reference (0 for
5215    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5216 
5217 static bool
subscript_dependence_tester_1(struct data_dependence_relation * ddr,unsigned int a_index,unsigned int b_index,class loop * loop_nest)5218 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5219 			       unsigned int a_index, unsigned int b_index,
5220 			       class loop *loop_nest)
5221 {
5222   unsigned int i;
5223   tree last_conflicts;
5224   struct subscript *subscript;
5225   tree res = NULL_TREE;
5226 
5227   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5228     {
5229       conflict_function *overlaps_a, *overlaps_b;
5230 
5231       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5232 				      SUB_ACCESS_FN (subscript, b_index),
5233 				      &overlaps_a, &overlaps_b,
5234 				      &last_conflicts, loop_nest);
5235 
5236       if (SUB_CONFLICTS_IN_A (subscript))
5237 	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5238       if (SUB_CONFLICTS_IN_B (subscript))
5239 	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5240 
5241       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5242       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5243       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5244 
5245       /* If there is any undetermined conflict function we have to
5246          give a conservative answer in case we cannot prove that
5247 	 no dependence exists when analyzing another subscript.  */
5248       if (CF_NOT_KNOWN_P (overlaps_a)
5249  	  || CF_NOT_KNOWN_P (overlaps_b))
5250  	{
5251 	  res = chrec_dont_know;
5252 	  continue;
5253  	}
5254 
5255       /* When there is a subscript with no dependence we can stop.  */
5256       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5257  	       || CF_NO_DEPENDENCE_P (overlaps_b))
5258  	{
5259 	  res = chrec_known;
5260 	  break;
5261  	}
5262     }
5263 
5264   if (res == NULL_TREE)
5265     return true;
5266 
5267   if (res == chrec_known)
5268     dependence_stats.num_dependence_independent++;
5269   else
5270     dependence_stats.num_dependence_undetermined++;
5271   finalize_ddr_dependent (ddr, res);
5272   return false;
5273 }
5274 
5275 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5276 
5277 static void
subscript_dependence_tester(struct data_dependence_relation * ddr,class loop * loop_nest)5278 subscript_dependence_tester (struct data_dependence_relation *ddr,
5279 			     class loop *loop_nest)
5280 {
5281   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5282     dependence_stats.num_dependence_dependent++;
5283 
5284   compute_subscript_distance (ddr);
5285   if (build_classic_dist_vector (ddr, loop_nest))
5286     build_classic_dir_vector (ddr);
5287 }
5288 
5289 /* Returns true when all the access functions of A are affine or
5290    constant with respect to LOOP_NEST.  */
5291 
5292 static bool
access_functions_are_affine_or_constant_p(const struct data_reference * a,const class loop * loop_nest)5293 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5294 					   const class loop *loop_nest)
5295 {
5296   unsigned int i;
5297   vec<tree> fns = DR_ACCESS_FNS (a);
5298   tree t;
5299 
5300   FOR_EACH_VEC_ELT (fns, i, t)
5301     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5302 	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5303       return false;
5304 
5305   return true;
5306 }
5307 
5308 /* This computes the affine dependence relation between A and B with
5309    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5310    independence between two accesses, while CHREC_DONT_KNOW is used
5311    for representing the unknown relation.
5312 
5313    Note that it is possible to stop the computation of the dependence
5314    relation the first time we detect a CHREC_KNOWN element for a given
5315    subscript.  */
5316 
5317 void
compute_affine_dependence(struct data_dependence_relation * ddr,class loop * loop_nest)5318 compute_affine_dependence (struct data_dependence_relation *ddr,
5319 			   class loop *loop_nest)
5320 {
5321   struct data_reference *dra = DDR_A (ddr);
5322   struct data_reference *drb = DDR_B (ddr);
5323 
5324   if (dump_file && (dump_flags & TDF_DETAILS))
5325     {
5326       fprintf (dump_file, "(compute_affine_dependence\n");
5327       fprintf (dump_file, "  ref_a: ");
5328       print_generic_expr (dump_file, DR_REF (dra));
5329       fprintf (dump_file, ", stmt_a: ");
5330       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5331       fprintf (dump_file, "  ref_b: ");
5332       print_generic_expr (dump_file, DR_REF (drb));
5333       fprintf (dump_file, ", stmt_b: ");
5334       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5335     }
5336 
5337   /* Analyze only when the dependence relation is not yet known.  */
5338   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5339     {
5340       dependence_stats.num_dependence_tests++;
5341 
5342       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5343 	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
5344 	subscript_dependence_tester (ddr, loop_nest);
5345 
5346       /* As a last case, if the dependence cannot be determined, or if
5347 	 the dependence is considered too difficult to determine, answer
5348 	 "don't know".  */
5349       else
5350 	{
5351 	  dependence_stats.num_dependence_undetermined++;
5352 
5353 	  if (dump_file && (dump_flags & TDF_DETAILS))
5354 	    {
5355 	      fprintf (dump_file, "Data ref a:\n");
5356 	      dump_data_reference (dump_file, dra);
5357 	      fprintf (dump_file, "Data ref b:\n");
5358 	      dump_data_reference (dump_file, drb);
5359 	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5360 	    }
5361 	  finalize_ddr_dependent (ddr, chrec_dont_know);
5362 	}
5363     }
5364 
5365   if (dump_file && (dump_flags & TDF_DETAILS))
5366     {
5367       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5368 	fprintf (dump_file, ") -> no dependence\n");
5369       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5370 	fprintf (dump_file, ") -> dependence analysis failed\n");
5371       else
5372 	fprintf (dump_file, ")\n");
5373     }
5374 }
5375 
5376 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5377    the data references in DATAREFS, in the LOOP_NEST.  When
5378    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5379    relations.  Return true when successful, i.e. data references number
5380    is small enough to be handled.  */
5381 
5382 bool
compute_all_dependences(vec<data_reference_p> datarefs,vec<ddr_p> * dependence_relations,vec<loop_p> loop_nest,bool compute_self_and_rr)5383 compute_all_dependences (vec<data_reference_p> datarefs,
5384 			 vec<ddr_p> *dependence_relations,
5385 			 vec<loop_p> loop_nest,
5386 			 bool compute_self_and_rr)
5387 {
5388   struct data_dependence_relation *ddr;
5389   struct data_reference *a, *b;
5390   unsigned int i, j;
5391 
5392   if ((int) datarefs.length ()
5393       > param_loop_max_datarefs_for_datadeps)
5394     {
5395       struct data_dependence_relation *ddr;
5396 
5397       /* Insert a single relation into dependence_relations:
5398 	 chrec_dont_know.  */
5399       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5400       dependence_relations->safe_push (ddr);
5401       return false;
5402     }
5403 
5404   FOR_EACH_VEC_ELT (datarefs, i, a)
5405     for (j = i + 1; datarefs.iterate (j, &b); j++)
5406       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5407 	{
5408 	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
5409 	  dependence_relations->safe_push (ddr);
5410           if (loop_nest.exists ())
5411    	    compute_affine_dependence (ddr, loop_nest[0]);
5412 	}
5413 
5414   if (compute_self_and_rr)
5415     FOR_EACH_VEC_ELT (datarefs, i, a)
5416       {
5417 	ddr = initialize_data_dependence_relation (a, a, loop_nest);
5418 	dependence_relations->safe_push (ddr);
5419         if (loop_nest.exists ())
5420    	  compute_affine_dependence (ddr, loop_nest[0]);
5421       }
5422 
5423   return true;
5424 }
5425 
5426 /* Describes a location of a memory reference.  */
5427 
5428 struct data_ref_loc
5429 {
5430   /* The memory reference.  */
5431   tree ref;
5432 
5433   /* True if the memory reference is read.  */
5434   bool is_read;
5435 
5436   /* True if the data reference is conditional within the containing
5437      statement, i.e. if it might not occur even when the statement
5438      is executed and runs to completion.  */
5439   bool is_conditional_in_stmt;
5440 };
5441 
5442 
5443 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5444    true if STMT clobbers memory, false otherwise.  */
5445 
5446 static bool
get_references_in_stmt(gimple * stmt,vec<data_ref_loc,va_heap> * references)5447 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5448 {
5449   bool clobbers_memory = false;
5450   data_ref_loc ref;
5451   tree op0, op1;
5452   enum gimple_code stmt_code = gimple_code (stmt);
5453 
5454   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5455      As we cannot model data-references to not spelled out
5456      accesses give up if they may occur.  */
5457   if (stmt_code == GIMPLE_CALL
5458       && !(gimple_call_flags (stmt) & ECF_CONST))
5459     {
5460       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5461       if (gimple_call_internal_p (stmt))
5462 	switch (gimple_call_internal_fn (stmt))
5463 	  {
5464 	  case IFN_GOMP_SIMD_LANE:
5465 	    {
5466 	      class loop *loop = gimple_bb (stmt)->loop_father;
5467 	      tree uid = gimple_call_arg (stmt, 0);
5468 	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
5469 	      if (loop == NULL
5470 		  || loop->simduid != SSA_NAME_VAR (uid))
5471 		clobbers_memory = true;
5472 	      break;
5473 	    }
5474 	  case IFN_MASK_LOAD:
5475 	  case IFN_MASK_STORE:
5476 	    break;
5477 	  default:
5478 	    clobbers_memory = true;
5479 	    break;
5480 	  }
5481       else
5482 	clobbers_memory = true;
5483     }
5484   else if (stmt_code == GIMPLE_ASM
5485 	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5486 	       || gimple_vuse (stmt)))
5487     clobbers_memory = true;
5488 
5489   if (!gimple_vuse (stmt))
5490     return clobbers_memory;
5491 
5492   if (stmt_code == GIMPLE_ASSIGN)
5493     {
5494       tree base;
5495       op0 = gimple_assign_lhs (stmt);
5496       op1 = gimple_assign_rhs1 (stmt);
5497 
5498       if (DECL_P (op1)
5499 	  || (REFERENCE_CLASS_P (op1)
5500 	      && (base = get_base_address (op1))
5501 	      && TREE_CODE (base) != SSA_NAME
5502 	      && !is_gimple_min_invariant (base)))
5503 	{
5504 	  ref.ref = op1;
5505 	  ref.is_read = true;
5506 	  ref.is_conditional_in_stmt = false;
5507 	  references->safe_push (ref);
5508 	}
5509     }
5510   else if (stmt_code == GIMPLE_CALL)
5511     {
5512       unsigned i, n;
5513       tree ptr, type;
5514       unsigned int align;
5515 
5516       ref.is_read = false;
5517       if (gimple_call_internal_p (stmt))
5518 	switch (gimple_call_internal_fn (stmt))
5519 	  {
5520 	  case IFN_MASK_LOAD:
5521 	    if (gimple_call_lhs (stmt) == NULL_TREE)
5522 	      break;
5523 	    ref.is_read = true;
5524 	    /* FALLTHRU */
5525 	  case IFN_MASK_STORE:
5526 	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5527 	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
5528 	    if (ref.is_read)
5529 	      type = TREE_TYPE (gimple_call_lhs (stmt));
5530 	    else
5531 	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
5532 	    if (TYPE_ALIGN (type) != align)
5533 	      type = build_aligned_type (type, align);
5534 	    ref.is_conditional_in_stmt = true;
5535 	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5536 				   ptr);
5537 	    references->safe_push (ref);
5538 	    return false;
5539 	  default:
5540 	    break;
5541 	  }
5542 
5543       op0 = gimple_call_lhs (stmt);
5544       n = gimple_call_num_args (stmt);
5545       for (i = 0; i < n; i++)
5546 	{
5547 	  op1 = gimple_call_arg (stmt, i);
5548 
5549 	  if (DECL_P (op1)
5550 	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5551 	    {
5552 	      ref.ref = op1;
5553 	      ref.is_read = true;
5554 	      ref.is_conditional_in_stmt = false;
5555 	      references->safe_push (ref);
5556 	    }
5557 	}
5558     }
5559   else
5560     return clobbers_memory;
5561 
5562   if (op0
5563       && (DECL_P (op0)
5564 	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5565     {
5566       ref.ref = op0;
5567       ref.is_read = false;
5568       ref.is_conditional_in_stmt = false;
5569       references->safe_push (ref);
5570     }
5571   return clobbers_memory;
5572 }
5573 
5574 
5575 /* Returns true if the loop-nest has any data reference.  */
5576 
5577 bool
loop_nest_has_data_refs(loop_p loop)5578 loop_nest_has_data_refs (loop_p loop)
5579 {
5580   basic_block *bbs = get_loop_body (loop);
5581   auto_vec<data_ref_loc, 3> references;
5582 
5583   for (unsigned i = 0; i < loop->num_nodes; i++)
5584     {
5585       basic_block bb = bbs[i];
5586       gimple_stmt_iterator bsi;
5587 
5588       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5589 	{
5590 	  gimple *stmt = gsi_stmt (bsi);
5591 	  get_references_in_stmt (stmt, &references);
5592 	  if (references.length ())
5593 	    {
5594 	      free (bbs);
5595 	      return true;
5596 	    }
5597 	}
5598     }
5599   free (bbs);
5600   return false;
5601 }
5602 
5603 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5604    reference, returns false, otherwise returns true.  NEST is the outermost
5605    loop of the loop nest in which the references should be analyzed.  */
5606 
5607 opt_result
find_data_references_in_stmt(class loop * nest,gimple * stmt,vec<data_reference_p> * datarefs)5608 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5609 			      vec<data_reference_p> *datarefs)
5610 {
5611   unsigned i;
5612   auto_vec<data_ref_loc, 2> references;
5613   data_ref_loc *ref;
5614   data_reference_p dr;
5615 
5616   if (get_references_in_stmt (stmt, &references))
5617     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5618 				   stmt);
5619 
5620   FOR_EACH_VEC_ELT (references, i, ref)
5621     {
5622       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5623 			    loop_containing_stmt (stmt), ref->ref,
5624 			    stmt, ref->is_read, ref->is_conditional_in_stmt);
5625       gcc_assert (dr != NULL);
5626       datarefs->safe_push (dr);
5627     }
5628 
5629   return opt_result::success ();
5630 }
5631 
5632 /* Stores the data references in STMT to DATAREFS.  If there is an
5633    unanalyzable reference, returns false, otherwise returns true.
5634    NEST is the outermost loop of the loop nest in which the references
5635    should be instantiated, LOOP is the loop in which the references
5636    should be analyzed.  */
5637 
5638 bool
graphite_find_data_references_in_stmt(edge nest,loop_p loop,gimple * stmt,vec<data_reference_p> * datarefs)5639 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5640 				       vec<data_reference_p> *datarefs)
5641 {
5642   unsigned i;
5643   auto_vec<data_ref_loc, 2> references;
5644   data_ref_loc *ref;
5645   bool ret = true;
5646   data_reference_p dr;
5647 
5648   if (get_references_in_stmt (stmt, &references))
5649     return false;
5650 
5651   FOR_EACH_VEC_ELT (references, i, ref)
5652     {
5653       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5654 			    ref->is_conditional_in_stmt);
5655       gcc_assert (dr != NULL);
5656       datarefs->safe_push (dr);
5657     }
5658 
5659   return ret;
5660 }
5661 
5662 /* Search the data references in LOOP, and record the information into
5663    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5664    difficult case, returns NULL_TREE otherwise.  */
5665 
5666 tree
find_data_references_in_bb(class loop * loop,basic_block bb,vec<data_reference_p> * datarefs)5667 find_data_references_in_bb (class loop *loop, basic_block bb,
5668                             vec<data_reference_p> *datarefs)
5669 {
5670   gimple_stmt_iterator bsi;
5671 
5672   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5673     {
5674       gimple *stmt = gsi_stmt (bsi);
5675 
5676       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5677         {
5678           struct data_reference *res;
5679           res = XCNEW (struct data_reference);
5680           datarefs->safe_push (res);
5681 
5682           return chrec_dont_know;
5683         }
5684     }
5685 
5686   return NULL_TREE;
5687 }
5688 
5689 /* Search the data references in LOOP, and record the information into
5690    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5691    difficult case, returns NULL_TREE otherwise.
5692 
5693    TODO: This function should be made smarter so that it can handle address
5694    arithmetic as if they were array accesses, etc.  */
5695 
5696 tree
find_data_references_in_loop(class loop * loop,vec<data_reference_p> * datarefs)5697 find_data_references_in_loop (class loop *loop,
5698 			      vec<data_reference_p> *datarefs)
5699 {
5700   basic_block bb, *bbs;
5701   unsigned int i;
5702 
5703   bbs = get_loop_body_in_dom_order (loop);
5704 
5705   for (i = 0; i < loop->num_nodes; i++)
5706     {
5707       bb = bbs[i];
5708 
5709       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5710         {
5711           free (bbs);
5712           return chrec_dont_know;
5713         }
5714     }
5715   free (bbs);
5716 
5717   return NULL_TREE;
5718 }
5719 
5720 /* Return the alignment in bytes that DRB is guaranteed to have at all
5721    times.  */
5722 
5723 unsigned int
dr_alignment(innermost_loop_behavior * drb)5724 dr_alignment (innermost_loop_behavior *drb)
5725 {
5726   /* Get the alignment of BASE_ADDRESS + INIT.  */
5727   unsigned int alignment = drb->base_alignment;
5728   unsigned int misalignment = (drb->base_misalignment
5729 			       + TREE_INT_CST_LOW (drb->init));
5730   if (misalignment != 0)
5731     alignment = MIN (alignment, misalignment & -misalignment);
5732 
5733   /* Cap it to the alignment of OFFSET.  */
5734   if (!integer_zerop (drb->offset))
5735     alignment = MIN (alignment, drb->offset_alignment);
5736 
5737   /* Cap it to the alignment of STEP.  */
5738   if (!integer_zerop (drb->step))
5739     alignment = MIN (alignment, drb->step_alignment);
5740 
5741   return alignment;
5742 }
5743 
5744 /* If BASE is a pointer-typed SSA name, try to find the object that it
5745    is based on.  Return this object X on success and store the alignment
5746    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5747 
5748 static tree
get_base_for_alignment_1(tree base,unsigned int * alignment_out)5749 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5750 {
5751   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5752     return NULL_TREE;
5753 
5754   gimple *def = SSA_NAME_DEF_STMT (base);
5755   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5756 
5757   /* Peel chrecs and record the minimum alignment preserved by
5758      all steps.  */
5759   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5760   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5761     {
5762       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5763       alignment = MIN (alignment, step_alignment);
5764       base = CHREC_LEFT (base);
5765     }
5766 
5767   /* Punt if the expression is too complicated to handle.  */
5768   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5769     return NULL_TREE;
5770 
5771   /* The only useful cases are those for which a dereference folds to something
5772      other than an INDIRECT_REF.  */
5773   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5774   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5775   if (!ref)
5776     return NULL_TREE;
5777 
5778   /* Analyze the base to which the steps we peeled were applied.  */
5779   poly_int64 bitsize, bitpos, bytepos;
5780   machine_mode mode;
5781   int unsignedp, reversep, volatilep;
5782   tree offset;
5783   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5784 			      &unsignedp, &reversep, &volatilep);
5785   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5786     return NULL_TREE;
5787 
5788   /* Restrict the alignment to that guaranteed by the offsets.  */
5789   unsigned int bytepos_alignment = known_alignment (bytepos);
5790   if (bytepos_alignment != 0)
5791     alignment = MIN (alignment, bytepos_alignment);
5792   if (offset)
5793     {
5794       unsigned int offset_alignment = highest_pow2_factor (offset);
5795       alignment = MIN (alignment, offset_alignment);
5796     }
5797 
5798   *alignment_out = alignment;
5799   return base;
5800 }
5801 
5802 /* Return the object whose alignment would need to be changed in order
5803    to increase the alignment of ADDR.  Store the maximum achievable
5804    alignment in *MAX_ALIGNMENT.  */
5805 
5806 tree
get_base_for_alignment(tree addr,unsigned int * max_alignment)5807 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5808 {
5809   tree base = get_base_for_alignment_1 (addr, max_alignment);
5810   if (base)
5811     return base;
5812 
5813   if (TREE_CODE (addr) == ADDR_EXPR)
5814     addr = TREE_OPERAND (addr, 0);
5815   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5816   return addr;
5817 }
5818 
5819 /* Recursive helper function.  */
5820 
5821 static bool
find_loop_nest_1(class loop * loop,vec<loop_p> * loop_nest)5822 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5823 {
5824   /* Inner loops of the nest should not contain siblings.  Example:
5825      when there are two consecutive loops,
5826 
5827      | loop_0
5828      |   loop_1
5829      |     A[{0, +, 1}_1]
5830      |   endloop_1
5831      |   loop_2
5832      |     A[{0, +, 1}_2]
5833      |   endloop_2
5834      | endloop_0
5835 
5836      the dependence relation cannot be captured by the distance
5837      abstraction.  */
5838   if (loop->next)
5839     return false;
5840 
5841   loop_nest->safe_push (loop);
5842   if (loop->inner)
5843     return find_loop_nest_1 (loop->inner, loop_nest);
5844   return true;
5845 }
5846 
5847 /* Return false when the LOOP is not well nested.  Otherwise return
5848    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5849    contain the loops from the outermost to the innermost, as they will
5850    appear in the classic distance vector.  */
5851 
5852 bool
find_loop_nest(class loop * loop,vec<loop_p> * loop_nest)5853 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5854 {
5855   loop_nest->safe_push (loop);
5856   if (loop->inner)
5857     return find_loop_nest_1 (loop->inner, loop_nest);
5858   return true;
5859 }
5860 
5861 /* Returns true when the data dependences have been computed, false otherwise.
5862    Given a loop nest LOOP, the following vectors are returned:
5863    DATAREFS is initialized to all the array elements contained in this loop,
5864    DEPENDENCE_RELATIONS contains the relations between the data references.
5865    Compute read-read and self relations if
5866    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5867 
5868 bool
compute_data_dependences_for_loop(class loop * loop,bool compute_self_and_read_read_dependences,vec<loop_p> * loop_nest,vec<data_reference_p> * datarefs,vec<ddr_p> * dependence_relations)5869 compute_data_dependences_for_loop (class loop *loop,
5870 				   bool compute_self_and_read_read_dependences,
5871 				   vec<loop_p> *loop_nest,
5872 				   vec<data_reference_p> *datarefs,
5873 				   vec<ddr_p> *dependence_relations)
5874 {
5875   bool res = true;
5876 
5877   memset (&dependence_stats, 0, sizeof (dependence_stats));
5878 
5879   /* If the loop nest is not well formed, or one of the data references
5880      is not computable, give up without spending time to compute other
5881      dependences.  */
5882   if (!loop
5883       || !find_loop_nest (loop, loop_nest)
5884       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5885       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5886 				   compute_self_and_read_read_dependences))
5887     res = false;
5888 
5889   if (dump_file && (dump_flags & TDF_STATS))
5890     {
5891       fprintf (dump_file, "Dependence tester statistics:\n");
5892 
5893       fprintf (dump_file, "Number of dependence tests: %d\n",
5894 	       dependence_stats.num_dependence_tests);
5895       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5896 	       dependence_stats.num_dependence_dependent);
5897       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5898 	       dependence_stats.num_dependence_independent);
5899       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5900 	       dependence_stats.num_dependence_undetermined);
5901 
5902       fprintf (dump_file, "Number of subscript tests: %d\n",
5903 	       dependence_stats.num_subscript_tests);
5904       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5905 	       dependence_stats.num_subscript_undetermined);
5906       fprintf (dump_file, "Number of same subscript function: %d\n",
5907 	       dependence_stats.num_same_subscript_function);
5908 
5909       fprintf (dump_file, "Number of ziv tests: %d\n",
5910 	       dependence_stats.num_ziv);
5911       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5912 	       dependence_stats.num_ziv_dependent);
5913       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5914 	       dependence_stats.num_ziv_independent);
5915       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5916 	       dependence_stats.num_ziv_unimplemented);
5917 
5918       fprintf (dump_file, "Number of siv tests: %d\n",
5919 	       dependence_stats.num_siv);
5920       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5921 	       dependence_stats.num_siv_dependent);
5922       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5923 	       dependence_stats.num_siv_independent);
5924       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5925 	       dependence_stats.num_siv_unimplemented);
5926 
5927       fprintf (dump_file, "Number of miv tests: %d\n",
5928 	       dependence_stats.num_miv);
5929       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5930 	       dependence_stats.num_miv_dependent);
5931       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5932 	       dependence_stats.num_miv_independent);
5933       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5934 	       dependence_stats.num_miv_unimplemented);
5935     }
5936 
5937   return res;
5938 }
5939 
5940 /* Free the memory used by a data dependence relation DDR.  */
5941 
5942 void
free_dependence_relation(struct data_dependence_relation * ddr)5943 free_dependence_relation (struct data_dependence_relation *ddr)
5944 {
5945   if (ddr == NULL)
5946     return;
5947 
5948   if (DDR_SUBSCRIPTS (ddr).exists ())
5949     free_subscripts (DDR_SUBSCRIPTS (ddr));
5950   DDR_DIST_VECTS (ddr).release ();
5951   DDR_DIR_VECTS (ddr).release ();
5952 
5953   free (ddr);
5954 }
5955 
5956 /* Free the memory used by the data dependence relations from
5957    DEPENDENCE_RELATIONS.  */
5958 
5959 void
free_dependence_relations(vec<ddr_p> dependence_relations)5960 free_dependence_relations (vec<ddr_p> dependence_relations)
5961 {
5962   unsigned int i;
5963   struct data_dependence_relation *ddr;
5964 
5965   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5966     if (ddr)
5967       free_dependence_relation (ddr);
5968 
5969   dependence_relations.release ();
5970 }
5971 
5972 /* Free the memory used by the data references from DATAREFS.  */
5973 
5974 void
free_data_refs(vec<data_reference_p> datarefs)5975 free_data_refs (vec<data_reference_p> datarefs)
5976 {
5977   unsigned int i;
5978   struct data_reference *dr;
5979 
5980   FOR_EACH_VEC_ELT (datarefs, i, dr)
5981     free_data_ref (dr);
5982   datarefs.release ();
5983 }
5984 
5985 /* Common routine implementing both dr_direction_indicator and
5986    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5987    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5988    Return the step as the indicator otherwise.  */
5989 
5990 static tree
dr_step_indicator(struct data_reference * dr,int useful_min)5991 dr_step_indicator (struct data_reference *dr, int useful_min)
5992 {
5993   tree step = DR_STEP (dr);
5994   if (!step)
5995     return NULL_TREE;
5996   STRIP_NOPS (step);
5997   /* Look for cases where the step is scaled by a positive constant
5998      integer, which will often be the access size.  If the multiplication
5999      doesn't change the sign (due to overflow effects) then we can
6000      test the unscaled value instead.  */
6001   if (TREE_CODE (step) == MULT_EXPR
6002       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6003       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6004     {
6005       tree factor = TREE_OPERAND (step, 1);
6006       step = TREE_OPERAND (step, 0);
6007 
6008       /* Strip widening and truncating conversions as well as nops.  */
6009       if (CONVERT_EXPR_P (step)
6010 	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6011 	step = TREE_OPERAND (step, 0);
6012       tree type = TREE_TYPE (step);
6013 
6014       /* Get the range of step values that would not cause overflow.  */
6015       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6016 			 / wi::to_widest (factor));
6017       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6018 			 / wi::to_widest (factor));
6019 
6020       /* Get the range of values that the unconverted step actually has.  */
6021       wide_int step_min, step_max;
6022       if (TREE_CODE (step) != SSA_NAME
6023 	  || get_range_info (step, &step_min, &step_max) != VR_RANGE)
6024 	{
6025 	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6026 	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6027 	}
6028 
6029       /* Check whether the unconverted step has an acceptable range.  */
6030       signop sgn = TYPE_SIGN (type);
6031       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6032 	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6033 	{
6034 	  if (wi::ge_p (step_min, useful_min, sgn))
6035 	    return ssize_int (useful_min);
6036 	  else if (wi::lt_p (step_max, 0, sgn))
6037 	    return ssize_int (-1);
6038 	  else
6039 	    return fold_convert (ssizetype, step);
6040 	}
6041     }
6042   return DR_STEP (dr);
6043 }
6044 
6045 /* Return a value that is negative iff DR has a negative step.  */
6046 
6047 tree
dr_direction_indicator(struct data_reference * dr)6048 dr_direction_indicator (struct data_reference *dr)
6049 {
6050   return dr_step_indicator (dr, 0);
6051 }
6052 
6053 /* Return a value that is zero iff DR has a zero step.  */
6054 
6055 tree
dr_zero_step_indicator(struct data_reference * dr)6056 dr_zero_step_indicator (struct data_reference *dr)
6057 {
6058   return dr_step_indicator (dr, 1);
6059 }
6060 
6061 /* Return true if DR is known to have a nonnegative (but possibly zero)
6062    step.  */
6063 
6064 bool
dr_known_forward_stride_p(struct data_reference * dr)6065 dr_known_forward_stride_p (struct data_reference *dr)
6066 {
6067   tree indicator = dr_direction_indicator (dr);
6068   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6069 				   fold_convert (ssizetype, indicator),
6070 				   ssize_int (0));
6071   return neg_step_val && integer_zerop (neg_step_val);
6072 }
6073