1 /* Data references and dependences detectors. 2 Copyright (C) 2003-2017 Free Software Foundation, Inc. 3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* This pass walks a given loop structure searching for array 22 references. The information about the array accesses is recorded 23 in DATA_REFERENCE structures. 24 25 The basic test for determining the dependences is: 26 given two access functions chrec1 and chrec2 to a same array, and 27 x and y two vectors from the iteration domain, the same element of 28 the array is accessed twice at iterations x and y if and only if: 29 | chrec1 (x) == chrec2 (y). 30 31 The goals of this analysis are: 32 33 - to determine the independence: the relation between two 34 independent accesses is qualified with the chrec_known (this 35 information allows a loop parallelization), 36 37 - when two data references access the same data, to qualify the 38 dependence relation with classic dependence representations: 39 40 - distance vectors 41 - direction vectors 42 - loop carried level dependence 43 - polyhedron dependence 44 or with the chains of recurrences based representation, 45 46 - to define a knowledge base for storing the data dependence 47 information, 48 49 - to define an interface to access this data. 50 51 52 Definitions: 53 54 - subscript: given two array accesses a subscript is the tuple 55 composed of the access functions for a given dimension. Example: 56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts: 57 (f1, g1), (f2, g2), (f3, g3). 58 59 - Diophantine equation: an equation whose coefficients and 60 solutions are integer constants, for example the equation 61 | 3*x + 2*y = 1 62 has an integer solution x = 1 and y = -1. 63 64 References: 65 66 - "Advanced Compilation for High Performance Computing" by Randy 67 Allen and Ken Kennedy. 68 http://citeseer.ist.psu.edu/goff91practical.html 69 70 - "Loop Transformations for Restructuring Compilers - The Foundations" 71 by Utpal Banerjee. 72 73 74 */ 75 76 #include "config.h" 77 #include "system.h" 78 #include "coretypes.h" 79 #include "backend.h" 80 #include "rtl.h" 81 #include "tree.h" 82 #include "gimple.h" 83 #include "gimple-pretty-print.h" 84 #include "alias.h" 85 #include "fold-const.h" 86 #include "expr.h" 87 #include "gimple-iterator.h" 88 #include "tree-ssa-loop-niter.h" 89 #include "tree-ssa-loop.h" 90 #include "tree-ssa.h" 91 #include "cfgloop.h" 92 #include "tree-data-ref.h" 93 #include "tree-scalar-evolution.h" 94 #include "dumpfile.h" 95 #include "tree-affine.h" 96 #include "params.h" 97 98 static struct datadep_stats 99 { 100 int num_dependence_tests; 101 int num_dependence_dependent; 102 int num_dependence_independent; 103 int num_dependence_undetermined; 104 105 int num_subscript_tests; 106 int num_subscript_undetermined; 107 int num_same_subscript_function; 108 109 int num_ziv; 110 int num_ziv_independent; 111 int num_ziv_dependent; 112 int num_ziv_unimplemented; 113 114 int num_siv; 115 int num_siv_independent; 116 int num_siv_dependent; 117 int num_siv_unimplemented; 118 119 int num_miv; 120 int num_miv_independent; 121 int num_miv_dependent; 122 int num_miv_unimplemented; 123 } dependence_stats; 124 125 static bool subscript_dependence_tester_1 (struct data_dependence_relation *, 126 struct data_reference *, 127 struct data_reference *, 128 struct loop *); 129 /* Returns true iff A divides B. */ 130 131 static inline bool 132 tree_fold_divides_p (const_tree a, const_tree b) 133 { 134 gcc_assert (TREE_CODE (a) == INTEGER_CST); 135 gcc_assert (TREE_CODE (b) == INTEGER_CST); 136 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a)); 137 } 138 139 /* Returns true iff A divides B. */ 140 141 static inline bool 142 int_divides_p (int a, int b) 143 { 144 return ((b % a) == 0); 145 } 146 147 148 149 /* Dump into FILE all the data references from DATAREFS. */ 150 151 static void 152 dump_data_references (FILE *file, vec<data_reference_p> datarefs) 153 { 154 unsigned int i; 155 struct data_reference *dr; 156 157 FOR_EACH_VEC_ELT (datarefs, i, dr) 158 dump_data_reference (file, dr); 159 } 160 161 /* Unified dump into FILE all the data references from DATAREFS. */ 162 163 DEBUG_FUNCTION void 164 debug (vec<data_reference_p> &ref) 165 { 166 dump_data_references (stderr, ref); 167 } 168 169 DEBUG_FUNCTION void 170 debug (vec<data_reference_p> *ptr) 171 { 172 if (ptr) 173 debug (*ptr); 174 else 175 fprintf (stderr, "<nil>\n"); 176 } 177 178 179 /* Dump into STDERR all the data references from DATAREFS. */ 180 181 DEBUG_FUNCTION void 182 debug_data_references (vec<data_reference_p> datarefs) 183 { 184 dump_data_references (stderr, datarefs); 185 } 186 187 /* Print to STDERR the data_reference DR. */ 188 189 DEBUG_FUNCTION void 190 debug_data_reference (struct data_reference *dr) 191 { 192 dump_data_reference (stderr, dr); 193 } 194 195 /* Dump function for a DATA_REFERENCE structure. */ 196 197 void 198 dump_data_reference (FILE *outf, 199 struct data_reference *dr) 200 { 201 unsigned int i; 202 203 fprintf (outf, "#(Data Ref: \n"); 204 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index); 205 fprintf (outf, "# stmt: "); 206 print_gimple_stmt (outf, DR_STMT (dr), 0, 0); 207 fprintf (outf, "# ref: "); 208 print_generic_stmt (outf, DR_REF (dr), 0); 209 fprintf (outf, "# base_object: "); 210 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0); 211 212 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) 213 { 214 fprintf (outf, "# Access function %d: ", i); 215 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0); 216 } 217 fprintf (outf, "#)\n"); 218 } 219 220 /* Unified dump function for a DATA_REFERENCE structure. */ 221 222 DEBUG_FUNCTION void 223 debug (data_reference &ref) 224 { 225 dump_data_reference (stderr, &ref); 226 } 227 228 DEBUG_FUNCTION void 229 debug (data_reference *ptr) 230 { 231 if (ptr) 232 debug (*ptr); 233 else 234 fprintf (stderr, "<nil>\n"); 235 } 236 237 238 /* Dumps the affine function described by FN to the file OUTF. */ 239 240 DEBUG_FUNCTION void 241 dump_affine_function (FILE *outf, affine_fn fn) 242 { 243 unsigned i; 244 tree coef; 245 246 print_generic_expr (outf, fn[0], TDF_SLIM); 247 for (i = 1; fn.iterate (i, &coef); i++) 248 { 249 fprintf (outf, " + "); 250 print_generic_expr (outf, coef, TDF_SLIM); 251 fprintf (outf, " * x_%u", i); 252 } 253 } 254 255 /* Dumps the conflict function CF to the file OUTF. */ 256 257 DEBUG_FUNCTION void 258 dump_conflict_function (FILE *outf, conflict_function *cf) 259 { 260 unsigned i; 261 262 if (cf->n == NO_DEPENDENCE) 263 fprintf (outf, "no dependence"); 264 else if (cf->n == NOT_KNOWN) 265 fprintf (outf, "not known"); 266 else 267 { 268 for (i = 0; i < cf->n; i++) 269 { 270 if (i != 0) 271 fprintf (outf, " "); 272 fprintf (outf, "["); 273 dump_affine_function (outf, cf->fns[i]); 274 fprintf (outf, "]"); 275 } 276 } 277 } 278 279 /* Dump function for a SUBSCRIPT structure. */ 280 281 DEBUG_FUNCTION void 282 dump_subscript (FILE *outf, struct subscript *subscript) 283 { 284 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript); 285 286 fprintf (outf, "\n (subscript \n"); 287 fprintf (outf, " iterations_that_access_an_element_twice_in_A: "); 288 dump_conflict_function (outf, cf); 289 if (CF_NONTRIVIAL_P (cf)) 290 { 291 tree last_iteration = SUB_LAST_CONFLICT (subscript); 292 fprintf (outf, "\n last_conflict: "); 293 print_generic_expr (outf, last_iteration, 0); 294 } 295 296 cf = SUB_CONFLICTS_IN_B (subscript); 297 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: "); 298 dump_conflict_function (outf, cf); 299 if (CF_NONTRIVIAL_P (cf)) 300 { 301 tree last_iteration = SUB_LAST_CONFLICT (subscript); 302 fprintf (outf, "\n last_conflict: "); 303 print_generic_expr (outf, last_iteration, 0); 304 } 305 306 fprintf (outf, "\n (Subscript distance: "); 307 print_generic_expr (outf, SUB_DISTANCE (subscript), 0); 308 fprintf (outf, " ))\n"); 309 } 310 311 /* Print the classic direction vector DIRV to OUTF. */ 312 313 DEBUG_FUNCTION void 314 print_direction_vector (FILE *outf, 315 lambda_vector dirv, 316 int length) 317 { 318 int eq; 319 320 for (eq = 0; eq < length; eq++) 321 { 322 enum data_dependence_direction dir = ((enum data_dependence_direction) 323 dirv[eq]); 324 325 switch (dir) 326 { 327 case dir_positive: 328 fprintf (outf, " +"); 329 break; 330 case dir_negative: 331 fprintf (outf, " -"); 332 break; 333 case dir_equal: 334 fprintf (outf, " ="); 335 break; 336 case dir_positive_or_equal: 337 fprintf (outf, " +="); 338 break; 339 case dir_positive_or_negative: 340 fprintf (outf, " +-"); 341 break; 342 case dir_negative_or_equal: 343 fprintf (outf, " -="); 344 break; 345 case dir_star: 346 fprintf (outf, " *"); 347 break; 348 default: 349 fprintf (outf, "indep"); 350 break; 351 } 352 } 353 fprintf (outf, "\n"); 354 } 355 356 /* Print a vector of direction vectors. */ 357 358 DEBUG_FUNCTION void 359 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects, 360 int length) 361 { 362 unsigned j; 363 lambda_vector v; 364 365 FOR_EACH_VEC_ELT (dir_vects, j, v) 366 print_direction_vector (outf, v, length); 367 } 368 369 /* Print out a vector VEC of length N to OUTFILE. */ 370 371 DEBUG_FUNCTION void 372 print_lambda_vector (FILE * outfile, lambda_vector vector, int n) 373 { 374 int i; 375 376 for (i = 0; i < n; i++) 377 fprintf (outfile, "%3d ", vector[i]); 378 fprintf (outfile, "\n"); 379 } 380 381 /* Print a vector of distance vectors. */ 382 383 DEBUG_FUNCTION void 384 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects, 385 int length) 386 { 387 unsigned j; 388 lambda_vector v; 389 390 FOR_EACH_VEC_ELT (dist_vects, j, v) 391 print_lambda_vector (outf, v, length); 392 } 393 394 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */ 395 396 DEBUG_FUNCTION void 397 dump_data_dependence_relation (FILE *outf, 398 struct data_dependence_relation *ddr) 399 { 400 struct data_reference *dra, *drb; 401 402 fprintf (outf, "(Data Dep: \n"); 403 404 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 405 { 406 if (ddr) 407 { 408 dra = DDR_A (ddr); 409 drb = DDR_B (ddr); 410 if (dra) 411 dump_data_reference (outf, dra); 412 else 413 fprintf (outf, " (nil)\n"); 414 if (drb) 415 dump_data_reference (outf, drb); 416 else 417 fprintf (outf, " (nil)\n"); 418 } 419 fprintf (outf, " (don't know)\n)\n"); 420 return; 421 } 422 423 dra = DDR_A (ddr); 424 drb = DDR_B (ddr); 425 dump_data_reference (outf, dra); 426 dump_data_reference (outf, drb); 427 428 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 429 fprintf (outf, " (no dependence)\n"); 430 431 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 432 { 433 unsigned int i; 434 struct loop *loopi; 435 436 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 437 { 438 fprintf (outf, " access_fn_A: "); 439 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0); 440 fprintf (outf, " access_fn_B: "); 441 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0); 442 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i)); 443 } 444 445 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr)); 446 fprintf (outf, " loop nest: ("); 447 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi) 448 fprintf (outf, "%d ", loopi->num); 449 fprintf (outf, ")\n"); 450 451 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 452 { 453 fprintf (outf, " distance_vector: "); 454 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i), 455 DDR_NB_LOOPS (ddr)); 456 } 457 458 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++) 459 { 460 fprintf (outf, " direction_vector: "); 461 print_direction_vector (outf, DDR_DIR_VECT (ddr, i), 462 DDR_NB_LOOPS (ddr)); 463 } 464 } 465 466 fprintf (outf, ")\n"); 467 } 468 469 /* Debug version. */ 470 471 DEBUG_FUNCTION void 472 debug_data_dependence_relation (struct data_dependence_relation *ddr) 473 { 474 dump_data_dependence_relation (stderr, ddr); 475 } 476 477 /* Dump into FILE all the dependence relations from DDRS. */ 478 479 DEBUG_FUNCTION void 480 dump_data_dependence_relations (FILE *file, 481 vec<ddr_p> ddrs) 482 { 483 unsigned int i; 484 struct data_dependence_relation *ddr; 485 486 FOR_EACH_VEC_ELT (ddrs, i, ddr) 487 dump_data_dependence_relation (file, ddr); 488 } 489 490 DEBUG_FUNCTION void 491 debug (vec<ddr_p> &ref) 492 { 493 dump_data_dependence_relations (stderr, ref); 494 } 495 496 DEBUG_FUNCTION void 497 debug (vec<ddr_p> *ptr) 498 { 499 if (ptr) 500 debug (*ptr); 501 else 502 fprintf (stderr, "<nil>\n"); 503 } 504 505 506 /* Dump to STDERR all the dependence relations from DDRS. */ 507 508 DEBUG_FUNCTION void 509 debug_data_dependence_relations (vec<ddr_p> ddrs) 510 { 511 dump_data_dependence_relations (stderr, ddrs); 512 } 513 514 /* Dumps the distance and direction vectors in FILE. DDRS contains 515 the dependence relations, and VECT_SIZE is the size of the 516 dependence vectors, or in other words the number of loops in the 517 considered nest. */ 518 519 DEBUG_FUNCTION void 520 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs) 521 { 522 unsigned int i, j; 523 struct data_dependence_relation *ddr; 524 lambda_vector v; 525 526 FOR_EACH_VEC_ELT (ddrs, i, ddr) 527 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr)) 528 { 529 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v) 530 { 531 fprintf (file, "DISTANCE_V ("); 532 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr)); 533 fprintf (file, ")\n"); 534 } 535 536 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v) 537 { 538 fprintf (file, "DIRECTION_V ("); 539 print_direction_vector (file, v, DDR_NB_LOOPS (ddr)); 540 fprintf (file, ")\n"); 541 } 542 } 543 544 fprintf (file, "\n\n"); 545 } 546 547 /* Dumps the data dependence relations DDRS in FILE. */ 548 549 DEBUG_FUNCTION void 550 dump_ddrs (FILE *file, vec<ddr_p> ddrs) 551 { 552 unsigned int i; 553 struct data_dependence_relation *ddr; 554 555 FOR_EACH_VEC_ELT (ddrs, i, ddr) 556 dump_data_dependence_relation (file, ddr); 557 558 fprintf (file, "\n\n"); 559 } 560 561 DEBUG_FUNCTION void 562 debug_ddrs (vec<ddr_p> ddrs) 563 { 564 dump_ddrs (stderr, ddrs); 565 } 566 567 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1 568 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero 569 constant of type ssizetype, and returns true. If we cannot do this 570 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false 571 is returned. */ 572 573 static bool 574 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, 575 tree *var, tree *off) 576 { 577 tree var0, var1; 578 tree off0, off1; 579 enum tree_code ocode = code; 580 581 *var = NULL_TREE; 582 *off = NULL_TREE; 583 584 switch (code) 585 { 586 case INTEGER_CST: 587 *var = build_int_cst (type, 0); 588 *off = fold_convert (ssizetype, op0); 589 return true; 590 591 case POINTER_PLUS_EXPR: 592 ocode = PLUS_EXPR; 593 /* FALLTHROUGH */ 594 case PLUS_EXPR: 595 case MINUS_EXPR: 596 split_constant_offset (op0, &var0, &off0); 597 split_constant_offset (op1, &var1, &off1); 598 *var = fold_build2 (code, type, var0, var1); 599 *off = size_binop (ocode, off0, off1); 600 return true; 601 602 case MULT_EXPR: 603 if (TREE_CODE (op1) != INTEGER_CST) 604 return false; 605 606 split_constant_offset (op0, &var0, &off0); 607 *var = fold_build2 (MULT_EXPR, type, var0, op1); 608 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1)); 609 return true; 610 611 case ADDR_EXPR: 612 { 613 tree base, poffset; 614 HOST_WIDE_INT pbitsize, pbitpos; 615 machine_mode pmode; 616 int punsignedp, preversep, pvolatilep; 617 618 op0 = TREE_OPERAND (op0, 0); 619 base 620 = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode, 621 &punsignedp, &preversep, &pvolatilep); 622 623 if (pbitpos % BITS_PER_UNIT != 0) 624 return false; 625 base = build_fold_addr_expr (base); 626 off0 = ssize_int (pbitpos / BITS_PER_UNIT); 627 628 if (poffset) 629 { 630 split_constant_offset (poffset, &poffset, &off1); 631 off0 = size_binop (PLUS_EXPR, off0, off1); 632 if (POINTER_TYPE_P (TREE_TYPE (base))) 633 base = fold_build_pointer_plus (base, poffset); 634 else 635 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, 636 fold_convert (TREE_TYPE (base), poffset)); 637 } 638 639 var0 = fold_convert (type, base); 640 641 /* If variable length types are involved, punt, otherwise casts 642 might be converted into ARRAY_REFs in gimplify_conversion. 643 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which 644 possibly no longer appears in current GIMPLE, might resurface. 645 This perhaps could run 646 if (CONVERT_EXPR_P (var0)) 647 { 648 gimplify_conversion (&var0); 649 // Attempt to fill in any within var0 found ARRAY_REF's 650 // element size from corresponding op embedded ARRAY_REF, 651 // if unsuccessful, just punt. 652 } */ 653 while (POINTER_TYPE_P (type)) 654 type = TREE_TYPE (type); 655 if (int_size_in_bytes (type) < 0) 656 return false; 657 658 *var = var0; 659 *off = off0; 660 return true; 661 } 662 663 case SSA_NAME: 664 { 665 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0)) 666 return false; 667 668 gimple *def_stmt = SSA_NAME_DEF_STMT (op0); 669 enum tree_code subcode; 670 671 if (gimple_code (def_stmt) != GIMPLE_ASSIGN) 672 return false; 673 674 var0 = gimple_assign_rhs1 (def_stmt); 675 subcode = gimple_assign_rhs_code (def_stmt); 676 var1 = gimple_assign_rhs2 (def_stmt); 677 678 return split_constant_offset_1 (type, var0, subcode, var1, var, off); 679 } 680 CASE_CONVERT: 681 { 682 /* We must not introduce undefined overflow, and we must not change the value. 683 Hence we're okay if the inner type doesn't overflow to start with 684 (pointer or signed), the outer type also is an integer or pointer 685 and the outer precision is at least as large as the inner. */ 686 tree itype = TREE_TYPE (op0); 687 if ((POINTER_TYPE_P (itype) 688 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype))) 689 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype) 690 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type))) 691 { 692 split_constant_offset (op0, &var0, off); 693 *var = fold_convert (type, var0); 694 return true; 695 } 696 return false; 697 } 698 699 default: 700 return false; 701 } 702 } 703 704 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF 705 will be ssizetype. */ 706 707 void 708 split_constant_offset (tree exp, tree *var, tree *off) 709 { 710 tree type = TREE_TYPE (exp), otype, op0, op1, e, o; 711 enum tree_code code; 712 713 *var = exp; 714 *off = ssize_int (0); 715 STRIP_NOPS (exp); 716 717 if (tree_is_chrec (exp) 718 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS) 719 return; 720 721 otype = TREE_TYPE (exp); 722 code = TREE_CODE (exp); 723 extract_ops_from_tree (exp, &code, &op0, &op1); 724 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o)) 725 { 726 *var = fold_convert (type, e); 727 *off = o; 728 } 729 } 730 731 /* Returns the address ADDR of an object in a canonical shape (without nop 732 casts, and with type of pointer to the object). */ 733 734 static tree 735 canonicalize_base_object_address (tree addr) 736 { 737 tree orig = addr; 738 739 STRIP_NOPS (addr); 740 741 /* The base address may be obtained by casting from integer, in that case 742 keep the cast. */ 743 if (!POINTER_TYPE_P (TREE_TYPE (addr))) 744 return orig; 745 746 if (TREE_CODE (addr) != ADDR_EXPR) 747 return addr; 748 749 return build_fold_addr_expr (TREE_OPERAND (addr, 0)); 750 } 751 752 /* Analyzes the behavior of the memory reference DR in the innermost loop or 753 basic block that contains it. Returns true if analysis succeed or false 754 otherwise. */ 755 756 bool 757 dr_analyze_innermost (struct data_reference *dr, struct loop *nest) 758 { 759 gimple *stmt = DR_STMT (dr); 760 struct loop *loop = loop_containing_stmt (stmt); 761 tree ref = DR_REF (dr); 762 HOST_WIDE_INT pbitsize, pbitpos; 763 tree base, poffset; 764 machine_mode pmode; 765 int punsignedp, preversep, pvolatilep; 766 affine_iv base_iv, offset_iv; 767 tree init, dinit, step; 768 bool in_loop = (loop && loop->num); 769 770 if (dump_file && (dump_flags & TDF_DETAILS)) 771 fprintf (dump_file, "analyze_innermost: "); 772 773 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode, 774 &punsignedp, &preversep, &pvolatilep); 775 gcc_assert (base != NULL_TREE); 776 777 if (pbitpos % BITS_PER_UNIT != 0) 778 { 779 if (dump_file && (dump_flags & TDF_DETAILS)) 780 fprintf (dump_file, "failed: bit offset alignment.\n"); 781 return false; 782 } 783 784 if (preversep) 785 { 786 if (dump_file && (dump_flags & TDF_DETAILS)) 787 fprintf (dump_file, "failed: reverse storage order.\n"); 788 return false; 789 } 790 791 if (TREE_CODE (base) == MEM_REF) 792 { 793 if (!integer_zerop (TREE_OPERAND (base, 1))) 794 { 795 offset_int moff = mem_ref_offset (base); 796 tree mofft = wide_int_to_tree (sizetype, moff); 797 if (!poffset) 798 poffset = mofft; 799 else 800 poffset = size_binop (PLUS_EXPR, poffset, mofft); 801 } 802 base = TREE_OPERAND (base, 0); 803 } 804 else 805 base = build_fold_addr_expr (base); 806 807 if (in_loop) 808 { 809 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, 810 nest ? true : false)) 811 { 812 if (nest) 813 { 814 if (dump_file && (dump_flags & TDF_DETAILS)) 815 fprintf (dump_file, "failed: evolution of base is not" 816 " affine.\n"); 817 return false; 818 } 819 else 820 { 821 base_iv.base = base; 822 base_iv.step = ssize_int (0); 823 base_iv.no_overflow = true; 824 } 825 } 826 } 827 else 828 { 829 base_iv.base = base; 830 base_iv.step = ssize_int (0); 831 base_iv.no_overflow = true; 832 } 833 834 if (!poffset) 835 { 836 offset_iv.base = ssize_int (0); 837 offset_iv.step = ssize_int (0); 838 } 839 else 840 { 841 if (!in_loop) 842 { 843 offset_iv.base = poffset; 844 offset_iv.step = ssize_int (0); 845 } 846 else if (!simple_iv (loop, loop_containing_stmt (stmt), 847 poffset, &offset_iv, 848 nest ? true : false)) 849 { 850 if (nest) 851 { 852 if (dump_file && (dump_flags & TDF_DETAILS)) 853 fprintf (dump_file, "failed: evolution of offset is not" 854 " affine.\n"); 855 return false; 856 } 857 else 858 { 859 offset_iv.base = poffset; 860 offset_iv.step = ssize_int (0); 861 } 862 } 863 } 864 865 init = ssize_int (pbitpos / BITS_PER_UNIT); 866 split_constant_offset (base_iv.base, &base_iv.base, &dinit); 867 init = size_binop (PLUS_EXPR, init, dinit); 868 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); 869 init = size_binop (PLUS_EXPR, init, dinit); 870 871 step = size_binop (PLUS_EXPR, 872 fold_convert (ssizetype, base_iv.step), 873 fold_convert (ssizetype, offset_iv.step)); 874 875 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base); 876 877 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base); 878 DR_INIT (dr) = init; 879 DR_STEP (dr) = step; 880 881 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base)); 882 883 if (dump_file && (dump_flags & TDF_DETAILS)) 884 fprintf (dump_file, "success.\n"); 885 886 return true; 887 } 888 889 /* Determines the base object and the list of indices of memory reference 890 DR, analyzed in LOOP and instantiated in loop nest NEST. */ 891 892 static void 893 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop) 894 { 895 vec<tree> access_fns = vNULL; 896 tree ref, op; 897 tree base, off, access_fn; 898 basic_block before_loop; 899 900 /* If analyzing a basic-block there are no indices to analyze 901 and thus no access functions. */ 902 if (!nest) 903 { 904 DR_BASE_OBJECT (dr) = DR_REF (dr); 905 DR_ACCESS_FNS (dr).create (0); 906 return; 907 } 908 909 ref = DR_REF (dr); 910 before_loop = block_before_loop (nest); 911 912 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses 913 into a two element array with a constant index. The base is 914 then just the immediate underlying object. */ 915 if (TREE_CODE (ref) == REALPART_EXPR) 916 { 917 ref = TREE_OPERAND (ref, 0); 918 access_fns.safe_push (integer_zero_node); 919 } 920 else if (TREE_CODE (ref) == IMAGPART_EXPR) 921 { 922 ref = TREE_OPERAND (ref, 0); 923 access_fns.safe_push (integer_one_node); 924 } 925 926 /* Analyze access functions of dimensions we know to be independent. */ 927 while (handled_component_p (ref)) 928 { 929 if (TREE_CODE (ref) == ARRAY_REF) 930 { 931 op = TREE_OPERAND (ref, 1); 932 access_fn = analyze_scalar_evolution (loop, op); 933 access_fn = instantiate_scev (before_loop, loop, access_fn); 934 access_fns.safe_push (access_fn); 935 } 936 else if (TREE_CODE (ref) == COMPONENT_REF 937 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE) 938 { 939 /* For COMPONENT_REFs of records (but not unions!) use the 940 FIELD_DECL offset as constant access function so we can 941 disambiguate a[i].f1 and a[i].f2. */ 942 tree off = component_ref_field_offset (ref); 943 off = size_binop (PLUS_EXPR, 944 size_binop (MULT_EXPR, 945 fold_convert (bitsizetype, off), 946 bitsize_int (BITS_PER_UNIT)), 947 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1))); 948 access_fns.safe_push (off); 949 } 950 else 951 /* If we have an unhandled component we could not translate 952 to an access function stop analyzing. We have determined 953 our base object in this case. */ 954 break; 955 956 ref = TREE_OPERAND (ref, 0); 957 } 958 959 /* If the address operand of a MEM_REF base has an evolution in the 960 analyzed nest, add it as an additional independent access-function. */ 961 if (TREE_CODE (ref) == MEM_REF) 962 { 963 op = TREE_OPERAND (ref, 0); 964 access_fn = analyze_scalar_evolution (loop, op); 965 access_fn = instantiate_scev (before_loop, loop, access_fn); 966 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) 967 { 968 tree orig_type; 969 tree memoff = TREE_OPERAND (ref, 1); 970 base = initial_condition (access_fn); 971 orig_type = TREE_TYPE (base); 972 STRIP_USELESS_TYPE_CONVERSION (base); 973 split_constant_offset (base, &base, &off); 974 STRIP_USELESS_TYPE_CONVERSION (base); 975 /* Fold the MEM_REF offset into the evolutions initial 976 value to make more bases comparable. */ 977 if (!integer_zerop (memoff)) 978 { 979 off = size_binop (PLUS_EXPR, off, 980 fold_convert (ssizetype, memoff)); 981 memoff = build_int_cst (TREE_TYPE (memoff), 0); 982 } 983 /* Adjust the offset so it is a multiple of the access type 984 size and thus we separate bases that can possibly be used 985 to produce partial overlaps (which the access_fn machinery 986 cannot handle). */ 987 wide_int rem; 988 if (TYPE_SIZE_UNIT (TREE_TYPE (ref)) 989 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST 990 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref)))) 991 rem = wi::mod_trunc (off, TYPE_SIZE_UNIT (TREE_TYPE (ref)), SIGNED); 992 else 993 /* If we can't compute the remainder simply force the initial 994 condition to zero. */ 995 rem = off; 996 off = wide_int_to_tree (ssizetype, wi::sub (off, rem)); 997 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem); 998 /* And finally replace the initial condition. */ 999 access_fn = chrec_replace_initial_condition 1000 (access_fn, fold_convert (orig_type, off)); 1001 /* ??? This is still not a suitable base object for 1002 dr_may_alias_p - the base object needs to be an 1003 access that covers the object as whole. With 1004 an evolution in the pointer this cannot be 1005 guaranteed. 1006 As a band-aid, mark the access so we can special-case 1007 it in dr_may_alias_p. */ 1008 tree old = ref; 1009 ref = fold_build2_loc (EXPR_LOCATION (ref), 1010 MEM_REF, TREE_TYPE (ref), 1011 base, memoff); 1012 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old); 1013 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old); 1014 DR_UNCONSTRAINED_BASE (dr) = true; 1015 access_fns.safe_push (access_fn); 1016 } 1017 } 1018 else if (DECL_P (ref)) 1019 { 1020 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */ 1021 ref = build2 (MEM_REF, TREE_TYPE (ref), 1022 build_fold_addr_expr (ref), 1023 build_int_cst (reference_alias_ptr_type (ref), 0)); 1024 } 1025 1026 DR_BASE_OBJECT (dr) = ref; 1027 DR_ACCESS_FNS (dr) = access_fns; 1028 } 1029 1030 /* Extracts the alias analysis information from the memory reference DR. */ 1031 1032 static void 1033 dr_analyze_alias (struct data_reference *dr) 1034 { 1035 tree ref = DR_REF (dr); 1036 tree base = get_base_address (ref), addr; 1037 1038 if (INDIRECT_REF_P (base) 1039 || TREE_CODE (base) == MEM_REF) 1040 { 1041 addr = TREE_OPERAND (base, 0); 1042 if (TREE_CODE (addr) == SSA_NAME) 1043 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr); 1044 } 1045 } 1046 1047 /* Frees data reference DR. */ 1048 1049 void 1050 free_data_ref (data_reference_p dr) 1051 { 1052 DR_ACCESS_FNS (dr).release (); 1053 free (dr); 1054 } 1055 1056 /* Analyzes memory reference MEMREF accessed in STMT. The reference 1057 is read if IS_READ is true, write otherwise. Returns the 1058 data_reference description of MEMREF. NEST is the outermost loop 1059 in which the reference should be instantiated, LOOP is the loop in 1060 which the data reference should be analyzed. */ 1061 1062 struct data_reference * 1063 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt, 1064 bool is_read) 1065 { 1066 struct data_reference *dr; 1067 1068 if (dump_file && (dump_flags & TDF_DETAILS)) 1069 { 1070 fprintf (dump_file, "Creating dr for "); 1071 print_generic_expr (dump_file, memref, TDF_SLIM); 1072 fprintf (dump_file, "\n"); 1073 } 1074 1075 dr = XCNEW (struct data_reference); 1076 DR_STMT (dr) = stmt; 1077 DR_REF (dr) = memref; 1078 DR_IS_READ (dr) = is_read; 1079 1080 dr_analyze_innermost (dr, nest); 1081 dr_analyze_indices (dr, nest, loop); 1082 dr_analyze_alias (dr); 1083 1084 if (dump_file && (dump_flags & TDF_DETAILS)) 1085 { 1086 unsigned i; 1087 fprintf (dump_file, "\tbase_address: "); 1088 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM); 1089 fprintf (dump_file, "\n\toffset from base address: "); 1090 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM); 1091 fprintf (dump_file, "\n\tconstant offset from base address: "); 1092 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM); 1093 fprintf (dump_file, "\n\tstep: "); 1094 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM); 1095 fprintf (dump_file, "\n\taligned to: "); 1096 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM); 1097 fprintf (dump_file, "\n\tbase_object: "); 1098 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM); 1099 fprintf (dump_file, "\n"); 1100 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) 1101 { 1102 fprintf (dump_file, "\tAccess function %d: ", i); 1103 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM); 1104 } 1105 } 1106 1107 return dr; 1108 } 1109 1110 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical 1111 expressions. */ 1112 static bool 1113 dr_equal_offsets_p1 (tree offset1, tree offset2) 1114 { 1115 bool res; 1116 1117 STRIP_NOPS (offset1); 1118 STRIP_NOPS (offset2); 1119 1120 if (offset1 == offset2) 1121 return true; 1122 1123 if (TREE_CODE (offset1) != TREE_CODE (offset2) 1124 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) 1125 return false; 1126 1127 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), 1128 TREE_OPERAND (offset2, 0)); 1129 1130 if (!res || !BINARY_CLASS_P (offset1)) 1131 return res; 1132 1133 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), 1134 TREE_OPERAND (offset2, 1)); 1135 1136 return res; 1137 } 1138 1139 /* Check if DRA and DRB have equal offsets. */ 1140 bool 1141 dr_equal_offsets_p (struct data_reference *dra, 1142 struct data_reference *drb) 1143 { 1144 tree offset1, offset2; 1145 1146 offset1 = DR_OFFSET (dra); 1147 offset2 = DR_OFFSET (drb); 1148 1149 return dr_equal_offsets_p1 (offset1, offset2); 1150 } 1151 1152 /* Returns true if FNA == FNB. */ 1153 1154 static bool 1155 affine_function_equal_p (affine_fn fna, affine_fn fnb) 1156 { 1157 unsigned i, n = fna.length (); 1158 1159 if (n != fnb.length ()) 1160 return false; 1161 1162 for (i = 0; i < n; i++) 1163 if (!operand_equal_p (fna[i], fnb[i], 0)) 1164 return false; 1165 1166 return true; 1167 } 1168 1169 /* If all the functions in CF are the same, returns one of them, 1170 otherwise returns NULL. */ 1171 1172 static affine_fn 1173 common_affine_function (conflict_function *cf) 1174 { 1175 unsigned i; 1176 affine_fn comm; 1177 1178 if (!CF_NONTRIVIAL_P (cf)) 1179 return affine_fn (); 1180 1181 comm = cf->fns[0]; 1182 1183 for (i = 1; i < cf->n; i++) 1184 if (!affine_function_equal_p (comm, cf->fns[i])) 1185 return affine_fn (); 1186 1187 return comm; 1188 } 1189 1190 /* Returns the base of the affine function FN. */ 1191 1192 static tree 1193 affine_function_base (affine_fn fn) 1194 { 1195 return fn[0]; 1196 } 1197 1198 /* Returns true if FN is a constant. */ 1199 1200 static bool 1201 affine_function_constant_p (affine_fn fn) 1202 { 1203 unsigned i; 1204 tree coef; 1205 1206 for (i = 1; fn.iterate (i, &coef); i++) 1207 if (!integer_zerop (coef)) 1208 return false; 1209 1210 return true; 1211 } 1212 1213 /* Returns true if FN is the zero constant function. */ 1214 1215 static bool 1216 affine_function_zero_p (affine_fn fn) 1217 { 1218 return (integer_zerop (affine_function_base (fn)) 1219 && affine_function_constant_p (fn)); 1220 } 1221 1222 /* Returns a signed integer type with the largest precision from TA 1223 and TB. */ 1224 1225 static tree 1226 signed_type_for_types (tree ta, tree tb) 1227 { 1228 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb)) 1229 return signed_type_for (ta); 1230 else 1231 return signed_type_for (tb); 1232 } 1233 1234 /* Applies operation OP on affine functions FNA and FNB, and returns the 1235 result. */ 1236 1237 static affine_fn 1238 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb) 1239 { 1240 unsigned i, n, m; 1241 affine_fn ret; 1242 tree coef; 1243 1244 if (fnb.length () > fna.length ()) 1245 { 1246 n = fna.length (); 1247 m = fnb.length (); 1248 } 1249 else 1250 { 1251 n = fnb.length (); 1252 m = fna.length (); 1253 } 1254 1255 ret.create (m); 1256 for (i = 0; i < n; i++) 1257 { 1258 tree type = signed_type_for_types (TREE_TYPE (fna[i]), 1259 TREE_TYPE (fnb[i])); 1260 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i])); 1261 } 1262 1263 for (; fna.iterate (i, &coef); i++) 1264 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), 1265 coef, integer_zero_node)); 1266 for (; fnb.iterate (i, &coef); i++) 1267 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), 1268 integer_zero_node, coef)); 1269 1270 return ret; 1271 } 1272 1273 /* Returns the sum of affine functions FNA and FNB. */ 1274 1275 static affine_fn 1276 affine_fn_plus (affine_fn fna, affine_fn fnb) 1277 { 1278 return affine_fn_op (PLUS_EXPR, fna, fnb); 1279 } 1280 1281 /* Returns the difference of affine functions FNA and FNB. */ 1282 1283 static affine_fn 1284 affine_fn_minus (affine_fn fna, affine_fn fnb) 1285 { 1286 return affine_fn_op (MINUS_EXPR, fna, fnb); 1287 } 1288 1289 /* Frees affine function FN. */ 1290 1291 static void 1292 affine_fn_free (affine_fn fn) 1293 { 1294 fn.release (); 1295 } 1296 1297 /* Determine for each subscript in the data dependence relation DDR 1298 the distance. */ 1299 1300 static void 1301 compute_subscript_distance (struct data_dependence_relation *ddr) 1302 { 1303 conflict_function *cf_a, *cf_b; 1304 affine_fn fn_a, fn_b, diff; 1305 1306 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 1307 { 1308 unsigned int i; 1309 1310 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 1311 { 1312 struct subscript *subscript; 1313 1314 subscript = DDR_SUBSCRIPT (ddr, i); 1315 cf_a = SUB_CONFLICTS_IN_A (subscript); 1316 cf_b = SUB_CONFLICTS_IN_B (subscript); 1317 1318 fn_a = common_affine_function (cf_a); 1319 fn_b = common_affine_function (cf_b); 1320 if (!fn_a.exists () || !fn_b.exists ()) 1321 { 1322 SUB_DISTANCE (subscript) = chrec_dont_know; 1323 return; 1324 } 1325 diff = affine_fn_minus (fn_a, fn_b); 1326 1327 if (affine_function_constant_p (diff)) 1328 SUB_DISTANCE (subscript) = affine_function_base (diff); 1329 else 1330 SUB_DISTANCE (subscript) = chrec_dont_know; 1331 1332 affine_fn_free (diff); 1333 } 1334 } 1335 } 1336 1337 /* Returns the conflict function for "unknown". */ 1338 1339 static conflict_function * 1340 conflict_fn_not_known (void) 1341 { 1342 conflict_function *fn = XCNEW (conflict_function); 1343 fn->n = NOT_KNOWN; 1344 1345 return fn; 1346 } 1347 1348 /* Returns the conflict function for "independent". */ 1349 1350 static conflict_function * 1351 conflict_fn_no_dependence (void) 1352 { 1353 conflict_function *fn = XCNEW (conflict_function); 1354 fn->n = NO_DEPENDENCE; 1355 1356 return fn; 1357 } 1358 1359 /* Returns true if the address of OBJ is invariant in LOOP. */ 1360 1361 static bool 1362 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj) 1363 { 1364 while (handled_component_p (obj)) 1365 { 1366 if (TREE_CODE (obj) == ARRAY_REF) 1367 { 1368 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only 1369 need to check the stride and the lower bound of the reference. */ 1370 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), 1371 loop->num) 1372 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3), 1373 loop->num)) 1374 return false; 1375 } 1376 else if (TREE_CODE (obj) == COMPONENT_REF) 1377 { 1378 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), 1379 loop->num)) 1380 return false; 1381 } 1382 obj = TREE_OPERAND (obj, 0); 1383 } 1384 1385 if (!INDIRECT_REF_P (obj) 1386 && TREE_CODE (obj) != MEM_REF) 1387 return true; 1388 1389 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0), 1390 loop->num); 1391 } 1392 1393 /* Returns false if we can prove that data references A and B do not alias, 1394 true otherwise. If LOOP_NEST is false no cross-iteration aliases are 1395 considered. */ 1396 1397 bool 1398 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, 1399 struct loop *loop_nest) 1400 { 1401 tree addr_a = DR_BASE_OBJECT (a); 1402 tree addr_b = DR_BASE_OBJECT (b); 1403 1404 /* If we are not processing a loop nest but scalar code we 1405 do not need to care about possible cross-iteration dependences 1406 and thus can process the full original reference. Do so, 1407 similar to how loop invariant motion applies extra offset-based 1408 disambiguation. */ 1409 if (!loop_nest) 1410 { 1411 aff_tree off1, off2; 1412 widest_int size1, size2; 1413 get_inner_reference_aff (DR_REF (a), &off1, &size1); 1414 get_inner_reference_aff (DR_REF (b), &off2, &size2); 1415 aff_combination_scale (&off1, -1); 1416 aff_combination_add (&off2, &off1); 1417 if (aff_comb_cannot_overlap_p (&off2, size1, size2)) 1418 return false; 1419 } 1420 1421 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF) 1422 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF) 1423 /* For cross-iteration dependences the cliques must be valid for the 1424 whole loop, not just individual iterations. */ 1425 && (!loop_nest 1426 || MR_DEPENDENCE_CLIQUE (addr_a) == 1 1427 || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique) 1428 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b) 1429 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b)) 1430 return false; 1431 1432 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we 1433 do not know the size of the base-object. So we cannot do any 1434 offset/overlap based analysis but have to rely on points-to 1435 information only. */ 1436 if (TREE_CODE (addr_a) == MEM_REF 1437 && (DR_UNCONSTRAINED_BASE (a) 1438 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)) 1439 { 1440 /* For true dependences we can apply TBAA. */ 1441 if (flag_strict_aliasing 1442 && DR_IS_WRITE (a) && DR_IS_READ (b) 1443 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), 1444 get_alias_set (DR_REF (b)))) 1445 return false; 1446 if (TREE_CODE (addr_b) == MEM_REF) 1447 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1448 TREE_OPERAND (addr_b, 0)); 1449 else 1450 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1451 build_fold_addr_expr (addr_b)); 1452 } 1453 else if (TREE_CODE (addr_b) == MEM_REF 1454 && (DR_UNCONSTRAINED_BASE (b) 1455 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)) 1456 { 1457 /* For true dependences we can apply TBAA. */ 1458 if (flag_strict_aliasing 1459 && DR_IS_WRITE (a) && DR_IS_READ (b) 1460 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), 1461 get_alias_set (DR_REF (b)))) 1462 return false; 1463 if (TREE_CODE (addr_a) == MEM_REF) 1464 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1465 TREE_OPERAND (addr_b, 0)); 1466 else 1467 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a), 1468 TREE_OPERAND (addr_b, 0)); 1469 } 1470 1471 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object 1472 that is being subsetted in the loop nest. */ 1473 if (DR_IS_WRITE (a) && DR_IS_WRITE (b)) 1474 return refs_output_dependent_p (addr_a, addr_b); 1475 else if (DR_IS_READ (a) && DR_IS_WRITE (b)) 1476 return refs_anti_dependent_p (addr_a, addr_b); 1477 return refs_may_alias_p (addr_a, addr_b); 1478 } 1479 1480 /* Initialize a data dependence relation between data accesses A and 1481 B. NB_LOOPS is the number of loops surrounding the references: the 1482 size of the classic distance/direction vectors. */ 1483 1484 struct data_dependence_relation * 1485 initialize_data_dependence_relation (struct data_reference *a, 1486 struct data_reference *b, 1487 vec<loop_p> loop_nest) 1488 { 1489 struct data_dependence_relation *res; 1490 unsigned int i; 1491 1492 res = XNEW (struct data_dependence_relation); 1493 DDR_A (res) = a; 1494 DDR_B (res) = b; 1495 DDR_LOOP_NEST (res).create (0); 1496 DDR_REVERSED_P (res) = false; 1497 DDR_SUBSCRIPTS (res).create (0); 1498 DDR_DIR_VECTS (res).create (0); 1499 DDR_DIST_VECTS (res).create (0); 1500 1501 if (a == NULL || b == NULL) 1502 { 1503 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1504 return res; 1505 } 1506 1507 /* If the data references do not alias, then they are independent. */ 1508 if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL)) 1509 { 1510 DDR_ARE_DEPENDENT (res) = chrec_known; 1511 return res; 1512 } 1513 1514 /* The case where the references are exactly the same. */ 1515 if (operand_equal_p (DR_REF (a), DR_REF (b), 0)) 1516 { 1517 if ((loop_nest.exists () 1518 && !object_address_invariant_in_loop_p (loop_nest[0], 1519 DR_BASE_OBJECT (a))) 1520 || DR_NUM_DIMENSIONS (a) == 0) 1521 { 1522 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1523 return res; 1524 } 1525 DDR_AFFINE_P (res) = true; 1526 DDR_ARE_DEPENDENT (res) = NULL_TREE; 1527 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a)); 1528 DDR_LOOP_NEST (res) = loop_nest; 1529 DDR_INNER_LOOP (res) = 0; 1530 DDR_SELF_REFERENCE (res) = true; 1531 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) 1532 { 1533 struct subscript *subscript; 1534 1535 subscript = XNEW (struct subscript); 1536 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); 1537 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); 1538 SUB_LAST_CONFLICT (subscript) = chrec_dont_know; 1539 SUB_DISTANCE (subscript) = chrec_dont_know; 1540 DDR_SUBSCRIPTS (res).safe_push (subscript); 1541 } 1542 return res; 1543 } 1544 1545 /* If the references do not access the same object, we do not know 1546 whether they alias or not. We do not care about TBAA or alignment 1547 info so we can use OEP_ADDRESS_OF to avoid false negatives. 1548 But the accesses have to use compatible types as otherwise the 1549 built indices would not match. */ 1550 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), OEP_ADDRESS_OF) 1551 || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)), 1552 TREE_TYPE (DR_BASE_OBJECT (b)))) 1553 { 1554 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1555 return res; 1556 } 1557 1558 /* If the base of the object is not invariant in the loop nest, we cannot 1559 analyze it. TODO -- in fact, it would suffice to record that there may 1560 be arbitrary dependences in the loops where the base object varies. */ 1561 if ((loop_nest.exists () 1562 && !object_address_invariant_in_loop_p (loop_nest[0], DR_BASE_OBJECT (a))) 1563 || DR_NUM_DIMENSIONS (a) == 0) 1564 { 1565 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1566 return res; 1567 } 1568 1569 /* If the number of dimensions of the access to not agree we can have 1570 a pointer access to a component of the array element type and an 1571 array access while the base-objects are still the same. Punt. */ 1572 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b)) 1573 { 1574 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1575 return res; 1576 } 1577 1578 DDR_AFFINE_P (res) = true; 1579 DDR_ARE_DEPENDENT (res) = NULL_TREE; 1580 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a)); 1581 DDR_LOOP_NEST (res) = loop_nest; 1582 DDR_INNER_LOOP (res) = 0; 1583 DDR_SELF_REFERENCE (res) = false; 1584 1585 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) 1586 { 1587 struct subscript *subscript; 1588 1589 subscript = XNEW (struct subscript); 1590 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); 1591 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); 1592 SUB_LAST_CONFLICT (subscript) = chrec_dont_know; 1593 SUB_DISTANCE (subscript) = chrec_dont_know; 1594 DDR_SUBSCRIPTS (res).safe_push (subscript); 1595 } 1596 1597 return res; 1598 } 1599 1600 /* Frees memory used by the conflict function F. */ 1601 1602 static void 1603 free_conflict_function (conflict_function *f) 1604 { 1605 unsigned i; 1606 1607 if (CF_NONTRIVIAL_P (f)) 1608 { 1609 for (i = 0; i < f->n; i++) 1610 affine_fn_free (f->fns[i]); 1611 } 1612 free (f); 1613 } 1614 1615 /* Frees memory used by SUBSCRIPTS. */ 1616 1617 static void 1618 free_subscripts (vec<subscript_p> subscripts) 1619 { 1620 unsigned i; 1621 subscript_p s; 1622 1623 FOR_EACH_VEC_ELT (subscripts, i, s) 1624 { 1625 free_conflict_function (s->conflicting_iterations_in_a); 1626 free_conflict_function (s->conflicting_iterations_in_b); 1627 free (s); 1628 } 1629 subscripts.release (); 1630 } 1631 1632 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap 1633 description. */ 1634 1635 static inline void 1636 finalize_ddr_dependent (struct data_dependence_relation *ddr, 1637 tree chrec) 1638 { 1639 DDR_ARE_DEPENDENT (ddr) = chrec; 1640 free_subscripts (DDR_SUBSCRIPTS (ddr)); 1641 DDR_SUBSCRIPTS (ddr).create (0); 1642 } 1643 1644 /* The dependence relation DDR cannot be represented by a distance 1645 vector. */ 1646 1647 static inline void 1648 non_affine_dependence_relation (struct data_dependence_relation *ddr) 1649 { 1650 if (dump_file && (dump_flags & TDF_DETAILS)) 1651 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n"); 1652 1653 DDR_AFFINE_P (ddr) = false; 1654 } 1655 1656 1657 1658 /* This section contains the classic Banerjee tests. */ 1659 1660 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index 1661 variables, i.e., if the ZIV (Zero Index Variable) test is true. */ 1662 1663 static inline bool 1664 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b) 1665 { 1666 return (evolution_function_is_constant_p (chrec_a) 1667 && evolution_function_is_constant_p (chrec_b)); 1668 } 1669 1670 /* Returns true iff CHREC_A and CHREC_B are dependent on an index 1671 variable, i.e., if the SIV (Single Index Variable) test is true. */ 1672 1673 static bool 1674 siv_subscript_p (const_tree chrec_a, const_tree chrec_b) 1675 { 1676 if ((evolution_function_is_constant_p (chrec_a) 1677 && evolution_function_is_univariate_p (chrec_b)) 1678 || (evolution_function_is_constant_p (chrec_b) 1679 && evolution_function_is_univariate_p (chrec_a))) 1680 return true; 1681 1682 if (evolution_function_is_univariate_p (chrec_a) 1683 && evolution_function_is_univariate_p (chrec_b)) 1684 { 1685 switch (TREE_CODE (chrec_a)) 1686 { 1687 case POLYNOMIAL_CHREC: 1688 switch (TREE_CODE (chrec_b)) 1689 { 1690 case POLYNOMIAL_CHREC: 1691 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b)) 1692 return false; 1693 /* FALLTHRU */ 1694 1695 default: 1696 return true; 1697 } 1698 1699 default: 1700 return true; 1701 } 1702 } 1703 1704 return false; 1705 } 1706 1707 /* Creates a conflict function with N dimensions. The affine functions 1708 in each dimension follow. */ 1709 1710 static conflict_function * 1711 conflict_fn (unsigned n, ...) 1712 { 1713 unsigned i; 1714 conflict_function *ret = XCNEW (conflict_function); 1715 va_list ap; 1716 1717 gcc_assert (0 < n && n <= MAX_DIM); 1718 va_start (ap, n); 1719 1720 ret->n = n; 1721 for (i = 0; i < n; i++) 1722 ret->fns[i] = va_arg (ap, affine_fn); 1723 va_end (ap); 1724 1725 return ret; 1726 } 1727 1728 /* Returns constant affine function with value CST. */ 1729 1730 static affine_fn 1731 affine_fn_cst (tree cst) 1732 { 1733 affine_fn fn; 1734 fn.create (1); 1735 fn.quick_push (cst); 1736 return fn; 1737 } 1738 1739 /* Returns affine function with single variable, CST + COEF * x_DIM. */ 1740 1741 static affine_fn 1742 affine_fn_univar (tree cst, unsigned dim, tree coef) 1743 { 1744 affine_fn fn; 1745 fn.create (dim + 1); 1746 unsigned i; 1747 1748 gcc_assert (dim > 0); 1749 fn.quick_push (cst); 1750 for (i = 1; i < dim; i++) 1751 fn.quick_push (integer_zero_node); 1752 fn.quick_push (coef); 1753 return fn; 1754 } 1755 1756 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and 1757 *OVERLAPS_B are initialized to the functions that describe the 1758 relation between the elements accessed twice by CHREC_A and 1759 CHREC_B. For k >= 0, the following property is verified: 1760 1761 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 1762 1763 static void 1764 analyze_ziv_subscript (tree chrec_a, 1765 tree chrec_b, 1766 conflict_function **overlaps_a, 1767 conflict_function **overlaps_b, 1768 tree *last_conflicts) 1769 { 1770 tree type, difference; 1771 dependence_stats.num_ziv++; 1772 1773 if (dump_file && (dump_flags & TDF_DETAILS)) 1774 fprintf (dump_file, "(analyze_ziv_subscript \n"); 1775 1776 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 1777 chrec_a = chrec_convert (type, chrec_a, NULL); 1778 chrec_b = chrec_convert (type, chrec_b, NULL); 1779 difference = chrec_fold_minus (type, chrec_a, chrec_b); 1780 1781 switch (TREE_CODE (difference)) 1782 { 1783 case INTEGER_CST: 1784 if (integer_zerop (difference)) 1785 { 1786 /* The difference is equal to zero: the accessed index 1787 overlaps for each iteration in the loop. */ 1788 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1789 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1790 *last_conflicts = chrec_dont_know; 1791 dependence_stats.num_ziv_dependent++; 1792 } 1793 else 1794 { 1795 /* The accesses do not overlap. */ 1796 *overlaps_a = conflict_fn_no_dependence (); 1797 *overlaps_b = conflict_fn_no_dependence (); 1798 *last_conflicts = integer_zero_node; 1799 dependence_stats.num_ziv_independent++; 1800 } 1801 break; 1802 1803 default: 1804 /* We're not sure whether the indexes overlap. For the moment, 1805 conservatively answer "don't know". */ 1806 if (dump_file && (dump_flags & TDF_DETAILS)) 1807 fprintf (dump_file, "ziv test failed: difference is non-integer.\n"); 1808 1809 *overlaps_a = conflict_fn_not_known (); 1810 *overlaps_b = conflict_fn_not_known (); 1811 *last_conflicts = chrec_dont_know; 1812 dependence_stats.num_ziv_unimplemented++; 1813 break; 1814 } 1815 1816 if (dump_file && (dump_flags & TDF_DETAILS)) 1817 fprintf (dump_file, ")\n"); 1818 } 1819 1820 /* Similar to max_stmt_executions_int, but returns the bound as a tree, 1821 and only if it fits to the int type. If this is not the case, or the 1822 bound on the number of iterations of LOOP could not be derived, returns 1823 chrec_dont_know. */ 1824 1825 static tree 1826 max_stmt_executions_tree (struct loop *loop) 1827 { 1828 widest_int nit; 1829 1830 if (!max_stmt_executions (loop, &nit)) 1831 return chrec_dont_know; 1832 1833 if (!wi::fits_to_tree_p (nit, unsigned_type_node)) 1834 return chrec_dont_know; 1835 1836 return wide_int_to_tree (unsigned_type_node, nit); 1837 } 1838 1839 /* Determine whether the CHREC is always positive/negative. If the expression 1840 cannot be statically analyzed, return false, otherwise set the answer into 1841 VALUE. */ 1842 1843 static bool 1844 chrec_is_positive (tree chrec, bool *value) 1845 { 1846 bool value0, value1, value2; 1847 tree end_value, nb_iter; 1848 1849 switch (TREE_CODE (chrec)) 1850 { 1851 case POLYNOMIAL_CHREC: 1852 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0) 1853 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1)) 1854 return false; 1855 1856 /* FIXME -- overflows. */ 1857 if (value0 == value1) 1858 { 1859 *value = value0; 1860 return true; 1861 } 1862 1863 /* Otherwise the chrec is under the form: "{-197, +, 2}_1", 1864 and the proof consists in showing that the sign never 1865 changes during the execution of the loop, from 0 to 1866 loop->nb_iterations. */ 1867 if (!evolution_function_is_affine_p (chrec)) 1868 return false; 1869 1870 nb_iter = number_of_latch_executions (get_chrec_loop (chrec)); 1871 if (chrec_contains_undetermined (nb_iter)) 1872 return false; 1873 1874 #if 0 1875 /* TODO -- If the test is after the exit, we may decrease the number of 1876 iterations by one. */ 1877 if (after_exit) 1878 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1)); 1879 #endif 1880 1881 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter); 1882 1883 if (!chrec_is_positive (end_value, &value2)) 1884 return false; 1885 1886 *value = value0; 1887 return value0 == value1; 1888 1889 case INTEGER_CST: 1890 switch (tree_int_cst_sgn (chrec)) 1891 { 1892 case -1: 1893 *value = false; 1894 break; 1895 case 1: 1896 *value = true; 1897 break; 1898 default: 1899 return false; 1900 } 1901 return true; 1902 1903 default: 1904 return false; 1905 } 1906 } 1907 1908 1909 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a 1910 constant, and CHREC_B is an affine function. *OVERLAPS_A and 1911 *OVERLAPS_B are initialized to the functions that describe the 1912 relation between the elements accessed twice by CHREC_A and 1913 CHREC_B. For k >= 0, the following property is verified: 1914 1915 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 1916 1917 static void 1918 analyze_siv_subscript_cst_affine (tree chrec_a, 1919 tree chrec_b, 1920 conflict_function **overlaps_a, 1921 conflict_function **overlaps_b, 1922 tree *last_conflicts) 1923 { 1924 bool value0, value1, value2; 1925 tree type, difference, tmp; 1926 1927 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 1928 chrec_a = chrec_convert (type, chrec_a, NULL); 1929 chrec_b = chrec_convert (type, chrec_b, NULL); 1930 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a); 1931 1932 /* Special case overlap in the first iteration. */ 1933 if (integer_zerop (difference)) 1934 { 1935 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1936 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1937 *last_conflicts = integer_one_node; 1938 return; 1939 } 1940 1941 if (!chrec_is_positive (initial_condition (difference), &value0)) 1942 { 1943 if (dump_file && (dump_flags & TDF_DETAILS)) 1944 fprintf (dump_file, "siv test failed: chrec is not positive.\n"); 1945 1946 dependence_stats.num_siv_unimplemented++; 1947 *overlaps_a = conflict_fn_not_known (); 1948 *overlaps_b = conflict_fn_not_known (); 1949 *last_conflicts = chrec_dont_know; 1950 return; 1951 } 1952 else 1953 { 1954 if (value0 == false) 1955 { 1956 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1)) 1957 { 1958 if (dump_file && (dump_flags & TDF_DETAILS)) 1959 fprintf (dump_file, "siv test failed: chrec not positive.\n"); 1960 1961 *overlaps_a = conflict_fn_not_known (); 1962 *overlaps_b = conflict_fn_not_known (); 1963 *last_conflicts = chrec_dont_know; 1964 dependence_stats.num_siv_unimplemented++; 1965 return; 1966 } 1967 else 1968 { 1969 if (value1 == true) 1970 { 1971 /* Example: 1972 chrec_a = 12 1973 chrec_b = {10, +, 1} 1974 */ 1975 1976 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference)) 1977 { 1978 HOST_WIDE_INT numiter; 1979 struct loop *loop = get_chrec_loop (chrec_b); 1980 1981 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1982 tmp = fold_build2 (EXACT_DIV_EXPR, type, 1983 fold_build1 (ABS_EXPR, type, difference), 1984 CHREC_RIGHT (chrec_b)); 1985 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp)); 1986 *last_conflicts = integer_one_node; 1987 1988 1989 /* Perform weak-zero siv test to see if overlap is 1990 outside the loop bounds. */ 1991 numiter = max_stmt_executions_int (loop); 1992 1993 if (numiter >= 0 1994 && compare_tree_int (tmp, numiter) > 0) 1995 { 1996 free_conflict_function (*overlaps_a); 1997 free_conflict_function (*overlaps_b); 1998 *overlaps_a = conflict_fn_no_dependence (); 1999 *overlaps_b = conflict_fn_no_dependence (); 2000 *last_conflicts = integer_zero_node; 2001 dependence_stats.num_siv_independent++; 2002 return; 2003 } 2004 dependence_stats.num_siv_dependent++; 2005 return; 2006 } 2007 2008 /* When the step does not divide the difference, there are 2009 no overlaps. */ 2010 else 2011 { 2012 *overlaps_a = conflict_fn_no_dependence (); 2013 *overlaps_b = conflict_fn_no_dependence (); 2014 *last_conflicts = integer_zero_node; 2015 dependence_stats.num_siv_independent++; 2016 return; 2017 } 2018 } 2019 2020 else 2021 { 2022 /* Example: 2023 chrec_a = 12 2024 chrec_b = {10, +, -1} 2025 2026 In this case, chrec_a will not overlap with chrec_b. */ 2027 *overlaps_a = conflict_fn_no_dependence (); 2028 *overlaps_b = conflict_fn_no_dependence (); 2029 *last_conflicts = integer_zero_node; 2030 dependence_stats.num_siv_independent++; 2031 return; 2032 } 2033 } 2034 } 2035 else 2036 { 2037 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2)) 2038 { 2039 if (dump_file && (dump_flags & TDF_DETAILS)) 2040 fprintf (dump_file, "siv test failed: chrec not positive.\n"); 2041 2042 *overlaps_a = conflict_fn_not_known (); 2043 *overlaps_b = conflict_fn_not_known (); 2044 *last_conflicts = chrec_dont_know; 2045 dependence_stats.num_siv_unimplemented++; 2046 return; 2047 } 2048 else 2049 { 2050 if (value2 == false) 2051 { 2052 /* Example: 2053 chrec_a = 3 2054 chrec_b = {10, +, -1} 2055 */ 2056 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference)) 2057 { 2058 HOST_WIDE_INT numiter; 2059 struct loop *loop = get_chrec_loop (chrec_b); 2060 2061 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2062 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference, 2063 CHREC_RIGHT (chrec_b)); 2064 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp)); 2065 *last_conflicts = integer_one_node; 2066 2067 /* Perform weak-zero siv test to see if overlap is 2068 outside the loop bounds. */ 2069 numiter = max_stmt_executions_int (loop); 2070 2071 if (numiter >= 0 2072 && compare_tree_int (tmp, numiter) > 0) 2073 { 2074 free_conflict_function (*overlaps_a); 2075 free_conflict_function (*overlaps_b); 2076 *overlaps_a = conflict_fn_no_dependence (); 2077 *overlaps_b = conflict_fn_no_dependence (); 2078 *last_conflicts = integer_zero_node; 2079 dependence_stats.num_siv_independent++; 2080 return; 2081 } 2082 dependence_stats.num_siv_dependent++; 2083 return; 2084 } 2085 2086 /* When the step does not divide the difference, there 2087 are no overlaps. */ 2088 else 2089 { 2090 *overlaps_a = conflict_fn_no_dependence (); 2091 *overlaps_b = conflict_fn_no_dependence (); 2092 *last_conflicts = integer_zero_node; 2093 dependence_stats.num_siv_independent++; 2094 return; 2095 } 2096 } 2097 else 2098 { 2099 /* Example: 2100 chrec_a = 3 2101 chrec_b = {4, +, 1} 2102 2103 In this case, chrec_a will not overlap with chrec_b. */ 2104 *overlaps_a = conflict_fn_no_dependence (); 2105 *overlaps_b = conflict_fn_no_dependence (); 2106 *last_conflicts = integer_zero_node; 2107 dependence_stats.num_siv_independent++; 2108 return; 2109 } 2110 } 2111 } 2112 } 2113 } 2114 2115 /* Helper recursive function for initializing the matrix A. Returns 2116 the initial value of CHREC. */ 2117 2118 static tree 2119 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult) 2120 { 2121 gcc_assert (chrec); 2122 2123 switch (TREE_CODE (chrec)) 2124 { 2125 case POLYNOMIAL_CHREC: 2126 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec))) 2127 return chrec_dont_know; 2128 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec)); 2129 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult); 2130 2131 case PLUS_EXPR: 2132 case MULT_EXPR: 2133 case MINUS_EXPR: 2134 { 2135 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2136 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult); 2137 2138 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1); 2139 } 2140 2141 CASE_CONVERT: 2142 { 2143 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2144 return chrec_convert (chrec_type (chrec), op, NULL); 2145 } 2146 2147 case BIT_NOT_EXPR: 2148 { 2149 /* Handle ~X as -1 - X. */ 2150 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2151 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec), 2152 build_int_cst (TREE_TYPE (chrec), -1), op); 2153 } 2154 2155 case INTEGER_CST: 2156 return chrec; 2157 2158 default: 2159 gcc_unreachable (); 2160 return NULL_TREE; 2161 } 2162 } 2163 2164 #define FLOOR_DIV(x,y) ((x) / (y)) 2165 2166 /* Solves the special case of the Diophantine equation: 2167 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B) 2168 2169 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the 2170 number of iterations that loops X and Y run. The overlaps will be 2171 constructed as evolutions in dimension DIM. */ 2172 2173 static void 2174 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter, 2175 HOST_WIDE_INT step_a, 2176 HOST_WIDE_INT step_b, 2177 affine_fn *overlaps_a, 2178 affine_fn *overlaps_b, 2179 tree *last_conflicts, int dim) 2180 { 2181 if (((step_a > 0 && step_b > 0) 2182 || (step_a < 0 && step_b < 0))) 2183 { 2184 HOST_WIDE_INT step_overlaps_a, step_overlaps_b; 2185 HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2; 2186 2187 gcd_steps_a_b = gcd (step_a, step_b); 2188 step_overlaps_a = step_b / gcd_steps_a_b; 2189 step_overlaps_b = step_a / gcd_steps_a_b; 2190 2191 if (niter > 0) 2192 { 2193 tau2 = FLOOR_DIV (niter, step_overlaps_a); 2194 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b)); 2195 last_conflict = tau2; 2196 *last_conflicts = build_int_cst (NULL_TREE, last_conflict); 2197 } 2198 else 2199 *last_conflicts = chrec_dont_know; 2200 2201 *overlaps_a = affine_fn_univar (integer_zero_node, dim, 2202 build_int_cst (NULL_TREE, 2203 step_overlaps_a)); 2204 *overlaps_b = affine_fn_univar (integer_zero_node, dim, 2205 build_int_cst (NULL_TREE, 2206 step_overlaps_b)); 2207 } 2208 2209 else 2210 { 2211 *overlaps_a = affine_fn_cst (integer_zero_node); 2212 *overlaps_b = affine_fn_cst (integer_zero_node); 2213 *last_conflicts = integer_zero_node; 2214 } 2215 } 2216 2217 /* Solves the special case of a Diophantine equation where CHREC_A is 2218 an affine bivariate function, and CHREC_B is an affine univariate 2219 function. For example, 2220 2221 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z 2222 2223 has the following overlapping functions: 2224 2225 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v 2226 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v 2227 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v 2228 2229 FORNOW: This is a specialized implementation for a case occurring in 2230 a common benchmark. Implement the general algorithm. */ 2231 2232 static void 2233 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, 2234 conflict_function **overlaps_a, 2235 conflict_function **overlaps_b, 2236 tree *last_conflicts) 2237 { 2238 bool xz_p, yz_p, xyz_p; 2239 HOST_WIDE_INT step_x, step_y, step_z; 2240 HOST_WIDE_INT niter_x, niter_y, niter_z, niter; 2241 affine_fn overlaps_a_xz, overlaps_b_xz; 2242 affine_fn overlaps_a_yz, overlaps_b_yz; 2243 affine_fn overlaps_a_xyz, overlaps_b_xyz; 2244 affine_fn ova1, ova2, ovb; 2245 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz; 2246 2247 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a))); 2248 step_y = int_cst_value (CHREC_RIGHT (chrec_a)); 2249 step_z = int_cst_value (CHREC_RIGHT (chrec_b)); 2250 2251 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a))); 2252 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2253 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2254 2255 if (niter_x < 0 || niter_y < 0 || niter_z < 0) 2256 { 2257 if (dump_file && (dump_flags & TDF_DETAILS)) 2258 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n"); 2259 2260 *overlaps_a = conflict_fn_not_known (); 2261 *overlaps_b = conflict_fn_not_known (); 2262 *last_conflicts = chrec_dont_know; 2263 return; 2264 } 2265 2266 niter = MIN (niter_x, niter_z); 2267 compute_overlap_steps_for_affine_univar (niter, step_x, step_z, 2268 &overlaps_a_xz, 2269 &overlaps_b_xz, 2270 &last_conflicts_xz, 1); 2271 niter = MIN (niter_y, niter_z); 2272 compute_overlap_steps_for_affine_univar (niter, step_y, step_z, 2273 &overlaps_a_yz, 2274 &overlaps_b_yz, 2275 &last_conflicts_yz, 2); 2276 niter = MIN (niter_x, niter_z); 2277 niter = MIN (niter_y, niter); 2278 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z, 2279 &overlaps_a_xyz, 2280 &overlaps_b_xyz, 2281 &last_conflicts_xyz, 3); 2282 2283 xz_p = !integer_zerop (last_conflicts_xz); 2284 yz_p = !integer_zerop (last_conflicts_yz); 2285 xyz_p = !integer_zerop (last_conflicts_xyz); 2286 2287 if (xz_p || yz_p || xyz_p) 2288 { 2289 ova1 = affine_fn_cst (integer_zero_node); 2290 ova2 = affine_fn_cst (integer_zero_node); 2291 ovb = affine_fn_cst (integer_zero_node); 2292 if (xz_p) 2293 { 2294 affine_fn t0 = ova1; 2295 affine_fn t2 = ovb; 2296 2297 ova1 = affine_fn_plus (ova1, overlaps_a_xz); 2298 ovb = affine_fn_plus (ovb, overlaps_b_xz); 2299 affine_fn_free (t0); 2300 affine_fn_free (t2); 2301 *last_conflicts = last_conflicts_xz; 2302 } 2303 if (yz_p) 2304 { 2305 affine_fn t0 = ova2; 2306 affine_fn t2 = ovb; 2307 2308 ova2 = affine_fn_plus (ova2, overlaps_a_yz); 2309 ovb = affine_fn_plus (ovb, overlaps_b_yz); 2310 affine_fn_free (t0); 2311 affine_fn_free (t2); 2312 *last_conflicts = last_conflicts_yz; 2313 } 2314 if (xyz_p) 2315 { 2316 affine_fn t0 = ova1; 2317 affine_fn t2 = ova2; 2318 affine_fn t4 = ovb; 2319 2320 ova1 = affine_fn_plus (ova1, overlaps_a_xyz); 2321 ova2 = affine_fn_plus (ova2, overlaps_a_xyz); 2322 ovb = affine_fn_plus (ovb, overlaps_b_xyz); 2323 affine_fn_free (t0); 2324 affine_fn_free (t2); 2325 affine_fn_free (t4); 2326 *last_conflicts = last_conflicts_xyz; 2327 } 2328 *overlaps_a = conflict_fn (2, ova1, ova2); 2329 *overlaps_b = conflict_fn (1, ovb); 2330 } 2331 else 2332 { 2333 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2334 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2335 *last_conflicts = integer_zero_node; 2336 } 2337 2338 affine_fn_free (overlaps_a_xz); 2339 affine_fn_free (overlaps_b_xz); 2340 affine_fn_free (overlaps_a_yz); 2341 affine_fn_free (overlaps_b_yz); 2342 affine_fn_free (overlaps_a_xyz); 2343 affine_fn_free (overlaps_b_xyz); 2344 } 2345 2346 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */ 2347 2348 static void 2349 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2, 2350 int size) 2351 { 2352 memcpy (vec2, vec1, size * sizeof (*vec1)); 2353 } 2354 2355 /* Copy the elements of M x N matrix MAT1 to MAT2. */ 2356 2357 static void 2358 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2, 2359 int m, int n) 2360 { 2361 int i; 2362 2363 for (i = 0; i < m; i++) 2364 lambda_vector_copy (mat1[i], mat2[i], n); 2365 } 2366 2367 /* Store the N x N identity matrix in MAT. */ 2368 2369 static void 2370 lambda_matrix_id (lambda_matrix mat, int size) 2371 { 2372 int i, j; 2373 2374 for (i = 0; i < size; i++) 2375 for (j = 0; j < size; j++) 2376 mat[i][j] = (i == j) ? 1 : 0; 2377 } 2378 2379 /* Return the first nonzero element of vector VEC1 between START and N. 2380 We must have START <= N. Returns N if VEC1 is the zero vector. */ 2381 2382 static int 2383 lambda_vector_first_nz (lambda_vector vec1, int n, int start) 2384 { 2385 int j = start; 2386 while (j < n && vec1[j] == 0) 2387 j++; 2388 return j; 2389 } 2390 2391 /* Add a multiple of row R1 of matrix MAT with N columns to row R2: 2392 R2 = R2 + CONST1 * R1. */ 2393 2394 static void 2395 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1) 2396 { 2397 int i; 2398 2399 if (const1 == 0) 2400 return; 2401 2402 for (i = 0; i < n; i++) 2403 mat[r2][i] += const1 * mat[r1][i]; 2404 } 2405 2406 /* Multiply vector VEC1 of length SIZE by a constant CONST1, 2407 and store the result in VEC2. */ 2408 2409 static void 2410 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2, 2411 int size, int const1) 2412 { 2413 int i; 2414 2415 if (const1 == 0) 2416 lambda_vector_clear (vec2, size); 2417 else 2418 for (i = 0; i < size; i++) 2419 vec2[i] = const1 * vec1[i]; 2420 } 2421 2422 /* Negate vector VEC1 with length SIZE and store it in VEC2. */ 2423 2424 static void 2425 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2, 2426 int size) 2427 { 2428 lambda_vector_mult_const (vec1, vec2, size, -1); 2429 } 2430 2431 /* Negate row R1 of matrix MAT which has N columns. */ 2432 2433 static void 2434 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1) 2435 { 2436 lambda_vector_negate (mat[r1], mat[r1], n); 2437 } 2438 2439 /* Return true if two vectors are equal. */ 2440 2441 static bool 2442 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size) 2443 { 2444 int i; 2445 for (i = 0; i < size; i++) 2446 if (vec1[i] != vec2[i]) 2447 return false; 2448 return true; 2449 } 2450 2451 /* Given an M x N integer matrix A, this function determines an M x 2452 M unimodular matrix U, and an M x N echelon matrix S such that 2453 "U.A = S". This decomposition is also known as "right Hermite". 2454 2455 Ref: Algorithm 2.1 page 33 in "Loop Transformations for 2456 Restructuring Compilers" Utpal Banerjee. */ 2457 2458 static void 2459 lambda_matrix_right_hermite (lambda_matrix A, int m, int n, 2460 lambda_matrix S, lambda_matrix U) 2461 { 2462 int i, j, i0 = 0; 2463 2464 lambda_matrix_copy (A, S, m, n); 2465 lambda_matrix_id (U, m); 2466 2467 for (j = 0; j < n; j++) 2468 { 2469 if (lambda_vector_first_nz (S[j], m, i0) < m) 2470 { 2471 ++i0; 2472 for (i = m - 1; i >= i0; i--) 2473 { 2474 while (S[i][j] != 0) 2475 { 2476 int sigma, factor, a, b; 2477 2478 a = S[i-1][j]; 2479 b = S[i][j]; 2480 sigma = (a * b < 0) ? -1: 1; 2481 a = abs (a); 2482 b = abs (b); 2483 factor = sigma * (a / b); 2484 2485 lambda_matrix_row_add (S, n, i, i-1, -factor); 2486 std::swap (S[i], S[i-1]); 2487 2488 lambda_matrix_row_add (U, m, i, i-1, -factor); 2489 std::swap (U[i], U[i-1]); 2490 } 2491 } 2492 } 2493 } 2494 } 2495 2496 /* Determines the overlapping elements due to accesses CHREC_A and 2497 CHREC_B, that are affine functions. This function cannot handle 2498 symbolic evolution functions, ie. when initial conditions are 2499 parameters, because it uses lambda matrices of integers. */ 2500 2501 static void 2502 analyze_subscript_affine_affine (tree chrec_a, 2503 tree chrec_b, 2504 conflict_function **overlaps_a, 2505 conflict_function **overlaps_b, 2506 tree *last_conflicts) 2507 { 2508 unsigned nb_vars_a, nb_vars_b, dim; 2509 HOST_WIDE_INT gamma, gcd_alpha_beta; 2510 lambda_matrix A, U, S; 2511 struct obstack scratch_obstack; 2512 2513 if (eq_evolutions_p (chrec_a, chrec_b)) 2514 { 2515 /* The accessed index overlaps for each iteration in the 2516 loop. */ 2517 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2518 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2519 *last_conflicts = chrec_dont_know; 2520 return; 2521 } 2522 if (dump_file && (dump_flags & TDF_DETAILS)) 2523 fprintf (dump_file, "(analyze_subscript_affine_affine \n"); 2524 2525 /* For determining the initial intersection, we have to solve a 2526 Diophantine equation. This is the most time consuming part. 2527 2528 For answering to the question: "Is there a dependence?" we have 2529 to prove that there exists a solution to the Diophantine 2530 equation, and that the solution is in the iteration domain, 2531 i.e. the solution is positive or zero, and that the solution 2532 happens before the upper bound loop.nb_iterations. Otherwise 2533 there is no dependence. This function outputs a description of 2534 the iterations that hold the intersections. */ 2535 2536 nb_vars_a = nb_vars_in_chrec (chrec_a); 2537 nb_vars_b = nb_vars_in_chrec (chrec_b); 2538 2539 gcc_obstack_init (&scratch_obstack); 2540 2541 dim = nb_vars_a + nb_vars_b; 2542 U = lambda_matrix_new (dim, dim, &scratch_obstack); 2543 A = lambda_matrix_new (dim, 1, &scratch_obstack); 2544 S = lambda_matrix_new (dim, 1, &scratch_obstack); 2545 2546 tree init_a = initialize_matrix_A (A, chrec_a, 0, 1); 2547 tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1); 2548 if (init_a == chrec_dont_know 2549 || init_b == chrec_dont_know) 2550 { 2551 if (dump_file && (dump_flags & TDF_DETAILS)) 2552 fprintf (dump_file, "affine-affine test failed: " 2553 "representation issue.\n"); 2554 *overlaps_a = conflict_fn_not_known (); 2555 *overlaps_b = conflict_fn_not_known (); 2556 *last_conflicts = chrec_dont_know; 2557 goto end_analyze_subs_aa; 2558 } 2559 gamma = int_cst_value (init_b) - int_cst_value (init_a); 2560 2561 /* Don't do all the hard work of solving the Diophantine equation 2562 when we already know the solution: for example, 2563 | {3, +, 1}_1 2564 | {3, +, 4}_2 2565 | gamma = 3 - 3 = 0. 2566 Then the first overlap occurs during the first iterations: 2567 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x) 2568 */ 2569 if (gamma == 0) 2570 { 2571 if (nb_vars_a == 1 && nb_vars_b == 1) 2572 { 2573 HOST_WIDE_INT step_a, step_b; 2574 HOST_WIDE_INT niter, niter_a, niter_b; 2575 affine_fn ova, ovb; 2576 2577 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2578 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2579 niter = MIN (niter_a, niter_b); 2580 step_a = int_cst_value (CHREC_RIGHT (chrec_a)); 2581 step_b = int_cst_value (CHREC_RIGHT (chrec_b)); 2582 2583 compute_overlap_steps_for_affine_univar (niter, step_a, step_b, 2584 &ova, &ovb, 2585 last_conflicts, 1); 2586 *overlaps_a = conflict_fn (1, ova); 2587 *overlaps_b = conflict_fn (1, ovb); 2588 } 2589 2590 else if (nb_vars_a == 2 && nb_vars_b == 1) 2591 compute_overlap_steps_for_affine_1_2 2592 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts); 2593 2594 else if (nb_vars_a == 1 && nb_vars_b == 2) 2595 compute_overlap_steps_for_affine_1_2 2596 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts); 2597 2598 else 2599 { 2600 if (dump_file && (dump_flags & TDF_DETAILS)) 2601 fprintf (dump_file, "affine-affine test failed: too many variables.\n"); 2602 *overlaps_a = conflict_fn_not_known (); 2603 *overlaps_b = conflict_fn_not_known (); 2604 *last_conflicts = chrec_dont_know; 2605 } 2606 goto end_analyze_subs_aa; 2607 } 2608 2609 /* U.A = S */ 2610 lambda_matrix_right_hermite (A, dim, 1, S, U); 2611 2612 if (S[0][0] < 0) 2613 { 2614 S[0][0] *= -1; 2615 lambda_matrix_row_negate (U, dim, 0); 2616 } 2617 gcd_alpha_beta = S[0][0]; 2618 2619 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5, 2620 but that is a quite strange case. Instead of ICEing, answer 2621 don't know. */ 2622 if (gcd_alpha_beta == 0) 2623 { 2624 *overlaps_a = conflict_fn_not_known (); 2625 *overlaps_b = conflict_fn_not_known (); 2626 *last_conflicts = chrec_dont_know; 2627 goto end_analyze_subs_aa; 2628 } 2629 2630 /* The classic "gcd-test". */ 2631 if (!int_divides_p (gcd_alpha_beta, gamma)) 2632 { 2633 /* The "gcd-test" has determined that there is no integer 2634 solution, i.e. there is no dependence. */ 2635 *overlaps_a = conflict_fn_no_dependence (); 2636 *overlaps_b = conflict_fn_no_dependence (); 2637 *last_conflicts = integer_zero_node; 2638 } 2639 2640 /* Both access functions are univariate. This includes SIV and MIV cases. */ 2641 else if (nb_vars_a == 1 && nb_vars_b == 1) 2642 { 2643 /* Both functions should have the same evolution sign. */ 2644 if (((A[0][0] > 0 && -A[1][0] > 0) 2645 || (A[0][0] < 0 && -A[1][0] < 0))) 2646 { 2647 /* The solutions are given by: 2648 | 2649 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0] 2650 | [u21 u22] [y0] 2651 2652 For a given integer t. Using the following variables, 2653 2654 | i0 = u11 * gamma / gcd_alpha_beta 2655 | j0 = u12 * gamma / gcd_alpha_beta 2656 | i1 = u21 2657 | j1 = u22 2658 2659 the solutions are: 2660 2661 | x0 = i0 + i1 * t, 2662 | y0 = j0 + j1 * t. */ 2663 HOST_WIDE_INT i0, j0, i1, j1; 2664 2665 i0 = U[0][0] * gamma / gcd_alpha_beta; 2666 j0 = U[0][1] * gamma / gcd_alpha_beta; 2667 i1 = U[1][0]; 2668 j1 = U[1][1]; 2669 2670 if ((i1 == 0 && i0 < 0) 2671 || (j1 == 0 && j0 < 0)) 2672 { 2673 /* There is no solution. 2674 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations" 2675 falls in here, but for the moment we don't look at the 2676 upper bound of the iteration domain. */ 2677 *overlaps_a = conflict_fn_no_dependence (); 2678 *overlaps_b = conflict_fn_no_dependence (); 2679 *last_conflicts = integer_zero_node; 2680 goto end_analyze_subs_aa; 2681 } 2682 2683 if (i1 > 0 && j1 > 0) 2684 { 2685 HOST_WIDE_INT niter_a 2686 = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2687 HOST_WIDE_INT niter_b 2688 = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2689 HOST_WIDE_INT niter = MIN (niter_a, niter_b); 2690 2691 /* (X0, Y0) is a solution of the Diophantine equation: 2692 "chrec_a (X0) = chrec_b (Y0)". */ 2693 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1), 2694 CEIL (-j0, j1)); 2695 HOST_WIDE_INT x0 = i1 * tau1 + i0; 2696 HOST_WIDE_INT y0 = j1 * tau1 + j0; 2697 2698 /* (X1, Y1) is the smallest positive solution of the eq 2699 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the 2700 first conflict occurs. */ 2701 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1); 2702 HOST_WIDE_INT x1 = x0 - i1 * min_multiple; 2703 HOST_WIDE_INT y1 = y0 - j1 * min_multiple; 2704 2705 if (niter > 0) 2706 { 2707 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1), 2708 FLOOR_DIV (niter_b - j0, j1)); 2709 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1; 2710 2711 /* If the overlap occurs outside of the bounds of the 2712 loop, there is no dependence. */ 2713 if (x1 >= niter_a || y1 >= niter_b) 2714 { 2715 *overlaps_a = conflict_fn_no_dependence (); 2716 *overlaps_b = conflict_fn_no_dependence (); 2717 *last_conflicts = integer_zero_node; 2718 goto end_analyze_subs_aa; 2719 } 2720 else 2721 *last_conflicts = build_int_cst (NULL_TREE, last_conflict); 2722 } 2723 else 2724 *last_conflicts = chrec_dont_know; 2725 2726 *overlaps_a 2727 = conflict_fn (1, 2728 affine_fn_univar (build_int_cst (NULL_TREE, x1), 2729 1, 2730 build_int_cst (NULL_TREE, i1))); 2731 *overlaps_b 2732 = conflict_fn (1, 2733 affine_fn_univar (build_int_cst (NULL_TREE, y1), 2734 1, 2735 build_int_cst (NULL_TREE, j1))); 2736 } 2737 else 2738 { 2739 /* FIXME: For the moment, the upper bound of the 2740 iteration domain for i and j is not checked. */ 2741 if (dump_file && (dump_flags & TDF_DETAILS)) 2742 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2743 *overlaps_a = conflict_fn_not_known (); 2744 *overlaps_b = conflict_fn_not_known (); 2745 *last_conflicts = chrec_dont_know; 2746 } 2747 } 2748 else 2749 { 2750 if (dump_file && (dump_flags & TDF_DETAILS)) 2751 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2752 *overlaps_a = conflict_fn_not_known (); 2753 *overlaps_b = conflict_fn_not_known (); 2754 *last_conflicts = chrec_dont_know; 2755 } 2756 } 2757 else 2758 { 2759 if (dump_file && (dump_flags & TDF_DETAILS)) 2760 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2761 *overlaps_a = conflict_fn_not_known (); 2762 *overlaps_b = conflict_fn_not_known (); 2763 *last_conflicts = chrec_dont_know; 2764 } 2765 2766 end_analyze_subs_aa: 2767 obstack_free (&scratch_obstack, NULL); 2768 if (dump_file && (dump_flags & TDF_DETAILS)) 2769 { 2770 fprintf (dump_file, " (overlaps_a = "); 2771 dump_conflict_function (dump_file, *overlaps_a); 2772 fprintf (dump_file, ")\n (overlaps_b = "); 2773 dump_conflict_function (dump_file, *overlaps_b); 2774 fprintf (dump_file, "))\n"); 2775 } 2776 } 2777 2778 /* Returns true when analyze_subscript_affine_affine can be used for 2779 determining the dependence relation between chrec_a and chrec_b, 2780 that contain symbols. This function modifies chrec_a and chrec_b 2781 such that the analysis result is the same, and such that they don't 2782 contain symbols, and then can safely be passed to the analyzer. 2783 2784 Example: The analysis of the following tuples of evolutions produce 2785 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1 2786 vs. {0, +, 1}_1 2787 2788 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1) 2789 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1) 2790 */ 2791 2792 static bool 2793 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b) 2794 { 2795 tree diff, type, left_a, left_b, right_b; 2796 2797 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a)) 2798 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b))) 2799 /* FIXME: For the moment not handled. Might be refined later. */ 2800 return false; 2801 2802 type = chrec_type (*chrec_a); 2803 left_a = CHREC_LEFT (*chrec_a); 2804 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL); 2805 diff = chrec_fold_minus (type, left_a, left_b); 2806 2807 if (!evolution_function_is_constant_p (diff)) 2808 return false; 2809 2810 if (dump_file && (dump_flags & TDF_DETAILS)) 2811 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n"); 2812 2813 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a), 2814 diff, CHREC_RIGHT (*chrec_a)); 2815 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL); 2816 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b), 2817 build_int_cst (type, 0), 2818 right_b); 2819 return true; 2820 } 2821 2822 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and 2823 *OVERLAPS_B are initialized to the functions that describe the 2824 relation between the elements accessed twice by CHREC_A and 2825 CHREC_B. For k >= 0, the following property is verified: 2826 2827 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 2828 2829 static void 2830 analyze_siv_subscript (tree chrec_a, 2831 tree chrec_b, 2832 conflict_function **overlaps_a, 2833 conflict_function **overlaps_b, 2834 tree *last_conflicts, 2835 int loop_nest_num) 2836 { 2837 dependence_stats.num_siv++; 2838 2839 if (dump_file && (dump_flags & TDF_DETAILS)) 2840 fprintf (dump_file, "(analyze_siv_subscript \n"); 2841 2842 if (evolution_function_is_constant_p (chrec_a) 2843 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)) 2844 analyze_siv_subscript_cst_affine (chrec_a, chrec_b, 2845 overlaps_a, overlaps_b, last_conflicts); 2846 2847 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num) 2848 && evolution_function_is_constant_p (chrec_b)) 2849 analyze_siv_subscript_cst_affine (chrec_b, chrec_a, 2850 overlaps_b, overlaps_a, last_conflicts); 2851 2852 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num) 2853 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)) 2854 { 2855 if (!chrec_contains_symbols (chrec_a) 2856 && !chrec_contains_symbols (chrec_b)) 2857 { 2858 analyze_subscript_affine_affine (chrec_a, chrec_b, 2859 overlaps_a, overlaps_b, 2860 last_conflicts); 2861 2862 if (CF_NOT_KNOWN_P (*overlaps_a) 2863 || CF_NOT_KNOWN_P (*overlaps_b)) 2864 dependence_stats.num_siv_unimplemented++; 2865 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 2866 || CF_NO_DEPENDENCE_P (*overlaps_b)) 2867 dependence_stats.num_siv_independent++; 2868 else 2869 dependence_stats.num_siv_dependent++; 2870 } 2871 else if (can_use_analyze_subscript_affine_affine (&chrec_a, 2872 &chrec_b)) 2873 { 2874 analyze_subscript_affine_affine (chrec_a, chrec_b, 2875 overlaps_a, overlaps_b, 2876 last_conflicts); 2877 2878 if (CF_NOT_KNOWN_P (*overlaps_a) 2879 || CF_NOT_KNOWN_P (*overlaps_b)) 2880 dependence_stats.num_siv_unimplemented++; 2881 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 2882 || CF_NO_DEPENDENCE_P (*overlaps_b)) 2883 dependence_stats.num_siv_independent++; 2884 else 2885 dependence_stats.num_siv_dependent++; 2886 } 2887 else 2888 goto siv_subscript_dontknow; 2889 } 2890 2891 else 2892 { 2893 siv_subscript_dontknow:; 2894 if (dump_file && (dump_flags & TDF_DETAILS)) 2895 fprintf (dump_file, " siv test failed: unimplemented"); 2896 *overlaps_a = conflict_fn_not_known (); 2897 *overlaps_b = conflict_fn_not_known (); 2898 *last_conflicts = chrec_dont_know; 2899 dependence_stats.num_siv_unimplemented++; 2900 } 2901 2902 if (dump_file && (dump_flags & TDF_DETAILS)) 2903 fprintf (dump_file, ")\n"); 2904 } 2905 2906 /* Returns false if we can prove that the greatest common divisor of the steps 2907 of CHREC does not divide CST, false otherwise. */ 2908 2909 static bool 2910 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst) 2911 { 2912 HOST_WIDE_INT cd = 0, val; 2913 tree step; 2914 2915 if (!tree_fits_shwi_p (cst)) 2916 return true; 2917 val = tree_to_shwi (cst); 2918 2919 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC) 2920 { 2921 step = CHREC_RIGHT (chrec); 2922 if (!tree_fits_shwi_p (step)) 2923 return true; 2924 cd = gcd (cd, tree_to_shwi (step)); 2925 chrec = CHREC_LEFT (chrec); 2926 } 2927 2928 return val % cd == 0; 2929 } 2930 2931 /* Analyze a MIV (Multiple Index Variable) subscript with respect to 2932 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the 2933 functions that describe the relation between the elements accessed 2934 twice by CHREC_A and CHREC_B. For k >= 0, the following property 2935 is verified: 2936 2937 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 2938 2939 static void 2940 analyze_miv_subscript (tree chrec_a, 2941 tree chrec_b, 2942 conflict_function **overlaps_a, 2943 conflict_function **overlaps_b, 2944 tree *last_conflicts, 2945 struct loop *loop_nest) 2946 { 2947 tree type, difference; 2948 2949 dependence_stats.num_miv++; 2950 if (dump_file && (dump_flags & TDF_DETAILS)) 2951 fprintf (dump_file, "(analyze_miv_subscript \n"); 2952 2953 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 2954 chrec_a = chrec_convert (type, chrec_a, NULL); 2955 chrec_b = chrec_convert (type, chrec_b, NULL); 2956 difference = chrec_fold_minus (type, chrec_a, chrec_b); 2957 2958 if (eq_evolutions_p (chrec_a, chrec_b)) 2959 { 2960 /* Access functions are the same: all the elements are accessed 2961 in the same order. */ 2962 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2963 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2964 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a)); 2965 dependence_stats.num_miv_dependent++; 2966 } 2967 2968 else if (evolution_function_is_constant_p (difference) 2969 /* For the moment, the following is verified: 2970 evolution_function_is_affine_multivariate_p (chrec_a, 2971 loop_nest->num) */ 2972 && !gcd_of_steps_may_divide_p (chrec_a, difference)) 2973 { 2974 /* testsuite/.../ssa-chrec-33.c 2975 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2 2976 2977 The difference is 1, and all the evolution steps are multiples 2978 of 2, consequently there are no overlapping elements. */ 2979 *overlaps_a = conflict_fn_no_dependence (); 2980 *overlaps_b = conflict_fn_no_dependence (); 2981 *last_conflicts = integer_zero_node; 2982 dependence_stats.num_miv_independent++; 2983 } 2984 2985 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num) 2986 && !chrec_contains_symbols (chrec_a) 2987 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num) 2988 && !chrec_contains_symbols (chrec_b)) 2989 { 2990 /* testsuite/.../ssa-chrec-35.c 2991 {0, +, 1}_2 vs. {0, +, 1}_3 2992 the overlapping elements are respectively located at iterations: 2993 {0, +, 1}_x and {0, +, 1}_x, 2994 in other words, we have the equality: 2995 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x) 2996 2997 Other examples: 2998 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) = 2999 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y) 3000 3001 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) = 3002 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) 3003 */ 3004 analyze_subscript_affine_affine (chrec_a, chrec_b, 3005 overlaps_a, overlaps_b, last_conflicts); 3006 3007 if (CF_NOT_KNOWN_P (*overlaps_a) 3008 || CF_NOT_KNOWN_P (*overlaps_b)) 3009 dependence_stats.num_miv_unimplemented++; 3010 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 3011 || CF_NO_DEPENDENCE_P (*overlaps_b)) 3012 dependence_stats.num_miv_independent++; 3013 else 3014 dependence_stats.num_miv_dependent++; 3015 } 3016 3017 else 3018 { 3019 /* When the analysis is too difficult, answer "don't know". */ 3020 if (dump_file && (dump_flags & TDF_DETAILS)) 3021 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n"); 3022 3023 *overlaps_a = conflict_fn_not_known (); 3024 *overlaps_b = conflict_fn_not_known (); 3025 *last_conflicts = chrec_dont_know; 3026 dependence_stats.num_miv_unimplemented++; 3027 } 3028 3029 if (dump_file && (dump_flags & TDF_DETAILS)) 3030 fprintf (dump_file, ")\n"); 3031 } 3032 3033 /* Determines the iterations for which CHREC_A is equal to CHREC_B in 3034 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and 3035 OVERLAP_ITERATIONS_B are initialized with two functions that 3036 describe the iterations that contain conflicting elements. 3037 3038 Remark: For an integer k >= 0, the following equality is true: 3039 3040 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)). 3041 */ 3042 3043 static void 3044 analyze_overlapping_iterations (tree chrec_a, 3045 tree chrec_b, 3046 conflict_function **overlap_iterations_a, 3047 conflict_function **overlap_iterations_b, 3048 tree *last_conflicts, struct loop *loop_nest) 3049 { 3050 unsigned int lnn = loop_nest->num; 3051 3052 dependence_stats.num_subscript_tests++; 3053 3054 if (dump_file && (dump_flags & TDF_DETAILS)) 3055 { 3056 fprintf (dump_file, "(analyze_overlapping_iterations \n"); 3057 fprintf (dump_file, " (chrec_a = "); 3058 print_generic_expr (dump_file, chrec_a, 0); 3059 fprintf (dump_file, ")\n (chrec_b = "); 3060 print_generic_expr (dump_file, chrec_b, 0); 3061 fprintf (dump_file, ")\n"); 3062 } 3063 3064 if (chrec_a == NULL_TREE 3065 || chrec_b == NULL_TREE 3066 || chrec_contains_undetermined (chrec_a) 3067 || chrec_contains_undetermined (chrec_b)) 3068 { 3069 dependence_stats.num_subscript_undetermined++; 3070 3071 *overlap_iterations_a = conflict_fn_not_known (); 3072 *overlap_iterations_b = conflict_fn_not_known (); 3073 } 3074 3075 /* If they are the same chrec, and are affine, they overlap 3076 on every iteration. */ 3077 else if (eq_evolutions_p (chrec_a, chrec_b) 3078 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn) 3079 || operand_equal_p (chrec_a, chrec_b, 0))) 3080 { 3081 dependence_stats.num_same_subscript_function++; 3082 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3083 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3084 *last_conflicts = chrec_dont_know; 3085 } 3086 3087 /* If they aren't the same, and aren't affine, we can't do anything 3088 yet. */ 3089 else if ((chrec_contains_symbols (chrec_a) 3090 || chrec_contains_symbols (chrec_b)) 3091 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn) 3092 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn))) 3093 { 3094 dependence_stats.num_subscript_undetermined++; 3095 *overlap_iterations_a = conflict_fn_not_known (); 3096 *overlap_iterations_b = conflict_fn_not_known (); 3097 } 3098 3099 else if (ziv_subscript_p (chrec_a, chrec_b)) 3100 analyze_ziv_subscript (chrec_a, chrec_b, 3101 overlap_iterations_a, overlap_iterations_b, 3102 last_conflicts); 3103 3104 else if (siv_subscript_p (chrec_a, chrec_b)) 3105 analyze_siv_subscript (chrec_a, chrec_b, 3106 overlap_iterations_a, overlap_iterations_b, 3107 last_conflicts, lnn); 3108 3109 else 3110 analyze_miv_subscript (chrec_a, chrec_b, 3111 overlap_iterations_a, overlap_iterations_b, 3112 last_conflicts, loop_nest); 3113 3114 if (dump_file && (dump_flags & TDF_DETAILS)) 3115 { 3116 fprintf (dump_file, " (overlap_iterations_a = "); 3117 dump_conflict_function (dump_file, *overlap_iterations_a); 3118 fprintf (dump_file, ")\n (overlap_iterations_b = "); 3119 dump_conflict_function (dump_file, *overlap_iterations_b); 3120 fprintf (dump_file, "))\n"); 3121 } 3122 } 3123 3124 /* Helper function for uniquely inserting distance vectors. */ 3125 3126 static void 3127 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v) 3128 { 3129 unsigned i; 3130 lambda_vector v; 3131 3132 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v) 3133 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr))) 3134 return; 3135 3136 DDR_DIST_VECTS (ddr).safe_push (dist_v); 3137 } 3138 3139 /* Helper function for uniquely inserting direction vectors. */ 3140 3141 static void 3142 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v) 3143 { 3144 unsigned i; 3145 lambda_vector v; 3146 3147 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v) 3148 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr))) 3149 return; 3150 3151 DDR_DIR_VECTS (ddr).safe_push (dir_v); 3152 } 3153 3154 /* Add a distance of 1 on all the loops outer than INDEX. If we 3155 haven't yet determined a distance for this outer loop, push a new 3156 distance vector composed of the previous distance, and a distance 3157 of 1 for this outer loop. Example: 3158 3159 | loop_1 3160 | loop_2 3161 | A[10] 3162 | endloop_2 3163 | endloop_1 3164 3165 Saved vectors are of the form (dist_in_1, dist_in_2). First, we 3166 save (0, 1), then we have to save (1, 0). */ 3167 3168 static void 3169 add_outer_distances (struct data_dependence_relation *ddr, 3170 lambda_vector dist_v, int index) 3171 { 3172 /* For each outer loop where init_v is not set, the accesses are 3173 in dependence of distance 1 in the loop. */ 3174 while (--index >= 0) 3175 { 3176 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3177 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr)); 3178 save_v[index] = 1; 3179 save_dist_v (ddr, save_v); 3180 } 3181 } 3182 3183 /* Return false when fail to represent the data dependence as a 3184 distance vector. INIT_B is set to true when a component has been 3185 added to the distance vector DIST_V. INDEX_CARRY is then set to 3186 the index in DIST_V that carries the dependence. */ 3187 3188 static bool 3189 build_classic_dist_vector_1 (struct data_dependence_relation *ddr, 3190 struct data_reference *ddr_a, 3191 struct data_reference *ddr_b, 3192 lambda_vector dist_v, bool *init_b, 3193 int *index_carry) 3194 { 3195 unsigned i; 3196 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3197 3198 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3199 { 3200 tree access_fn_a, access_fn_b; 3201 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); 3202 3203 if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) 3204 { 3205 non_affine_dependence_relation (ddr); 3206 return false; 3207 } 3208 3209 access_fn_a = DR_ACCESS_FN (ddr_a, i); 3210 access_fn_b = DR_ACCESS_FN (ddr_b, i); 3211 3212 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC 3213 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) 3214 { 3215 HOST_WIDE_INT dist; 3216 int index; 3217 int var_a = CHREC_VARIABLE (access_fn_a); 3218 int var_b = CHREC_VARIABLE (access_fn_b); 3219 3220 if (var_a != var_b 3221 || chrec_contains_undetermined (SUB_DISTANCE (subscript))) 3222 { 3223 non_affine_dependence_relation (ddr); 3224 return false; 3225 } 3226 3227 dist = int_cst_value (SUB_DISTANCE (subscript)); 3228 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr)); 3229 *index_carry = MIN (index, *index_carry); 3230 3231 /* This is the subscript coupling test. If we have already 3232 recorded a distance for this loop (a distance coming from 3233 another subscript), it should be the same. For example, 3234 in the following code, there is no dependence: 3235 3236 | loop i = 0, N, 1 3237 | T[i+1][i] = ... 3238 | ... = T[i][i] 3239 | endloop 3240 */ 3241 if (init_v[index] != 0 && dist_v[index] != dist) 3242 { 3243 finalize_ddr_dependent (ddr, chrec_known); 3244 return false; 3245 } 3246 3247 dist_v[index] = dist; 3248 init_v[index] = 1; 3249 *init_b = true; 3250 } 3251 else if (!operand_equal_p (access_fn_a, access_fn_b, 0)) 3252 { 3253 /* This can be for example an affine vs. constant dependence 3254 (T[i] vs. T[3]) that is not an affine dependence and is 3255 not representable as a distance vector. */ 3256 non_affine_dependence_relation (ddr); 3257 return false; 3258 } 3259 } 3260 3261 return true; 3262 } 3263 3264 /* Return true when the DDR contains only constant access functions. */ 3265 3266 static bool 3267 constant_access_functions (const struct data_dependence_relation *ddr) 3268 { 3269 unsigned i; 3270 3271 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3272 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i)) 3273 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i))) 3274 return false; 3275 3276 return true; 3277 } 3278 3279 /* Helper function for the case where DDR_A and DDR_B are the same 3280 multivariate access function with a constant step. For an example 3281 see pr34635-1.c. */ 3282 3283 static void 3284 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2) 3285 { 3286 int x_1, x_2; 3287 tree c_1 = CHREC_LEFT (c_2); 3288 tree c_0 = CHREC_LEFT (c_1); 3289 lambda_vector dist_v; 3290 HOST_WIDE_INT v1, v2, cd; 3291 3292 /* Polynomials with more than 2 variables are not handled yet. When 3293 the evolution steps are parameters, it is not possible to 3294 represent the dependence using classical distance vectors. */ 3295 if (TREE_CODE (c_0) != INTEGER_CST 3296 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST 3297 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST) 3298 { 3299 DDR_AFFINE_P (ddr) = false; 3300 return; 3301 } 3302 3303 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr)); 3304 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr)); 3305 3306 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */ 3307 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3308 v1 = int_cst_value (CHREC_RIGHT (c_1)); 3309 v2 = int_cst_value (CHREC_RIGHT (c_2)); 3310 cd = gcd (v1, v2); 3311 v1 /= cd; 3312 v2 /= cd; 3313 3314 if (v2 < 0) 3315 { 3316 v2 = -v2; 3317 v1 = -v1; 3318 } 3319 3320 dist_v[x_1] = v2; 3321 dist_v[x_2] = -v1; 3322 save_dist_v (ddr, dist_v); 3323 3324 add_outer_distances (ddr, dist_v, x_1); 3325 } 3326 3327 /* Helper function for the case where DDR_A and DDR_B are the same 3328 access functions. */ 3329 3330 static void 3331 add_other_self_distances (struct data_dependence_relation *ddr) 3332 { 3333 lambda_vector dist_v; 3334 unsigned i; 3335 int index_carry = DDR_NB_LOOPS (ddr); 3336 3337 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3338 { 3339 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i); 3340 3341 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC) 3342 { 3343 if (!evolution_function_is_univariate_p (access_fun)) 3344 { 3345 if (DDR_NUM_SUBSCRIPTS (ddr) != 1) 3346 { 3347 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know; 3348 return; 3349 } 3350 3351 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0); 3352 3353 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC) 3354 add_multivariate_self_dist (ddr, access_fun); 3355 else 3356 /* The evolution step is not constant: it varies in 3357 the outer loop, so this cannot be represented by a 3358 distance vector. For example in pr34635.c the 3359 evolution is {0, +, {0, +, 4}_1}_2. */ 3360 DDR_AFFINE_P (ddr) = false; 3361 3362 return; 3363 } 3364 3365 index_carry = MIN (index_carry, 3366 index_in_loop_nest (CHREC_VARIABLE (access_fun), 3367 DDR_LOOP_NEST (ddr))); 3368 } 3369 } 3370 3371 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3372 add_outer_distances (ddr, dist_v, index_carry); 3373 } 3374 3375 static void 3376 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr) 3377 { 3378 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3379 3380 dist_v[DDR_INNER_LOOP (ddr)] = 1; 3381 save_dist_v (ddr, dist_v); 3382 } 3383 3384 /* Adds a unit distance vector to DDR when there is a 0 overlap. This 3385 is the case for example when access functions are the same and 3386 equal to a constant, as in: 3387 3388 | loop_1 3389 | A[3] = ... 3390 | ... = A[3] 3391 | endloop_1 3392 3393 in which case the distance vectors are (0) and (1). */ 3394 3395 static void 3396 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr) 3397 { 3398 unsigned i, j; 3399 3400 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3401 { 3402 subscript_p sub = DDR_SUBSCRIPT (ddr, i); 3403 conflict_function *ca = SUB_CONFLICTS_IN_A (sub); 3404 conflict_function *cb = SUB_CONFLICTS_IN_B (sub); 3405 3406 for (j = 0; j < ca->n; j++) 3407 if (affine_function_zero_p (ca->fns[j])) 3408 { 3409 insert_innermost_unit_dist_vector (ddr); 3410 return; 3411 } 3412 3413 for (j = 0; j < cb->n; j++) 3414 if (affine_function_zero_p (cb->fns[j])) 3415 { 3416 insert_innermost_unit_dist_vector (ddr); 3417 return; 3418 } 3419 } 3420 } 3421 3422 /* Compute the classic per loop distance vector. DDR is the data 3423 dependence relation to build a vector from. Return false when fail 3424 to represent the data dependence as a distance vector. */ 3425 3426 static bool 3427 build_classic_dist_vector (struct data_dependence_relation *ddr, 3428 struct loop *loop_nest) 3429 { 3430 bool init_b = false; 3431 int index_carry = DDR_NB_LOOPS (ddr); 3432 lambda_vector dist_v; 3433 3434 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE) 3435 return false; 3436 3437 if (same_access_functions (ddr)) 3438 { 3439 /* Save the 0 vector. */ 3440 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3441 save_dist_v (ddr, dist_v); 3442 3443 if (constant_access_functions (ddr)) 3444 add_distance_for_zero_overlaps (ddr); 3445 3446 if (DDR_NB_LOOPS (ddr) > 1) 3447 add_other_self_distances (ddr); 3448 3449 return true; 3450 } 3451 3452 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3453 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr), 3454 dist_v, &init_b, &index_carry)) 3455 return false; 3456 3457 /* Save the distance vector if we initialized one. */ 3458 if (init_b) 3459 { 3460 /* Verify a basic constraint: classic distance vectors should 3461 always be lexicographically positive. 3462 3463 Data references are collected in the order of execution of 3464 the program, thus for the following loop 3465 3466 | for (i = 1; i < 100; i++) 3467 | for (j = 1; j < 100; j++) 3468 | { 3469 | t = T[j+1][i-1]; // A 3470 | T[j][i] = t + 2; // B 3471 | } 3472 3473 references are collected following the direction of the wind: 3474 A then B. The data dependence tests are performed also 3475 following this order, such that we're looking at the distance 3476 separating the elements accessed by A from the elements later 3477 accessed by B. But in this example, the distance returned by 3478 test_dep (A, B) is lexicographically negative (-1, 1), that 3479 means that the access A occurs later than B with respect to 3480 the outer loop, ie. we're actually looking upwind. In this 3481 case we solve test_dep (B, A) looking downwind to the 3482 lexicographically positive solution, that returns the 3483 distance vector (1, -1). */ 3484 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr))) 3485 { 3486 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3487 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3488 loop_nest)) 3489 return false; 3490 compute_subscript_distance (ddr); 3491 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3492 save_v, &init_b, &index_carry)) 3493 return false; 3494 save_dist_v (ddr, save_v); 3495 DDR_REVERSED_P (ddr) = true; 3496 3497 /* In this case there is a dependence forward for all the 3498 outer loops: 3499 3500 | for (k = 1; k < 100; k++) 3501 | for (i = 1; i < 100; i++) 3502 | for (j = 1; j < 100; j++) 3503 | { 3504 | t = T[j+1][i-1]; // A 3505 | T[j][i] = t + 2; // B 3506 | } 3507 3508 the vectors are: 3509 (0, 1, -1) 3510 (1, 1, -1) 3511 (1, -1, 1) 3512 */ 3513 if (DDR_NB_LOOPS (ddr) > 1) 3514 { 3515 add_outer_distances (ddr, save_v, index_carry); 3516 add_outer_distances (ddr, dist_v, index_carry); 3517 } 3518 } 3519 else 3520 { 3521 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3522 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr)); 3523 3524 if (DDR_NB_LOOPS (ddr) > 1) 3525 { 3526 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3527 3528 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), 3529 DDR_A (ddr), loop_nest)) 3530 return false; 3531 compute_subscript_distance (ddr); 3532 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3533 opposite_v, &init_b, 3534 &index_carry)) 3535 return false; 3536 3537 save_dist_v (ddr, save_v); 3538 add_outer_distances (ddr, dist_v, index_carry); 3539 add_outer_distances (ddr, opposite_v, index_carry); 3540 } 3541 else 3542 save_dist_v (ddr, save_v); 3543 } 3544 } 3545 else 3546 { 3547 /* There is a distance of 1 on all the outer loops: Example: 3548 there is a dependence of distance 1 on loop_1 for the array A. 3549 3550 | loop_1 3551 | A[5] = ... 3552 | endloop 3553 */ 3554 add_outer_distances (ddr, dist_v, 3555 lambda_vector_first_nz (dist_v, 3556 DDR_NB_LOOPS (ddr), 0)); 3557 } 3558 3559 if (dump_file && (dump_flags & TDF_DETAILS)) 3560 { 3561 unsigned i; 3562 3563 fprintf (dump_file, "(build_classic_dist_vector\n"); 3564 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 3565 { 3566 fprintf (dump_file, " dist_vector = ("); 3567 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i), 3568 DDR_NB_LOOPS (ddr)); 3569 fprintf (dump_file, " )\n"); 3570 } 3571 fprintf (dump_file, ")\n"); 3572 } 3573 3574 return true; 3575 } 3576 3577 /* Return the direction for a given distance. 3578 FIXME: Computing dir this way is suboptimal, since dir can catch 3579 cases that dist is unable to represent. */ 3580 3581 static inline enum data_dependence_direction 3582 dir_from_dist (int dist) 3583 { 3584 if (dist > 0) 3585 return dir_positive; 3586 else if (dist < 0) 3587 return dir_negative; 3588 else 3589 return dir_equal; 3590 } 3591 3592 /* Compute the classic per loop direction vector. DDR is the data 3593 dependence relation to build a vector from. */ 3594 3595 static void 3596 build_classic_dir_vector (struct data_dependence_relation *ddr) 3597 { 3598 unsigned i, j; 3599 lambda_vector dist_v; 3600 3601 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 3602 { 3603 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3604 3605 for (j = 0; j < DDR_NB_LOOPS (ddr); j++) 3606 dir_v[j] = dir_from_dist (dist_v[j]); 3607 3608 save_dir_v (ddr, dir_v); 3609 } 3610 } 3611 3612 /* Helper function. Returns true when there is a dependence between 3613 data references DRA and DRB. */ 3614 3615 static bool 3616 subscript_dependence_tester_1 (struct data_dependence_relation *ddr, 3617 struct data_reference *dra, 3618 struct data_reference *drb, 3619 struct loop *loop_nest) 3620 { 3621 unsigned int i; 3622 tree last_conflicts; 3623 struct subscript *subscript; 3624 tree res = NULL_TREE; 3625 3626 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++) 3627 { 3628 conflict_function *overlaps_a, *overlaps_b; 3629 3630 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i), 3631 DR_ACCESS_FN (drb, i), 3632 &overlaps_a, &overlaps_b, 3633 &last_conflicts, loop_nest); 3634 3635 if (SUB_CONFLICTS_IN_A (subscript)) 3636 free_conflict_function (SUB_CONFLICTS_IN_A (subscript)); 3637 if (SUB_CONFLICTS_IN_B (subscript)) 3638 free_conflict_function (SUB_CONFLICTS_IN_B (subscript)); 3639 3640 SUB_CONFLICTS_IN_A (subscript) = overlaps_a; 3641 SUB_CONFLICTS_IN_B (subscript) = overlaps_b; 3642 SUB_LAST_CONFLICT (subscript) = last_conflicts; 3643 3644 /* If there is any undetermined conflict function we have to 3645 give a conservative answer in case we cannot prove that 3646 no dependence exists when analyzing another subscript. */ 3647 if (CF_NOT_KNOWN_P (overlaps_a) 3648 || CF_NOT_KNOWN_P (overlaps_b)) 3649 { 3650 res = chrec_dont_know; 3651 continue; 3652 } 3653 3654 /* When there is a subscript with no dependence we can stop. */ 3655 else if (CF_NO_DEPENDENCE_P (overlaps_a) 3656 || CF_NO_DEPENDENCE_P (overlaps_b)) 3657 { 3658 res = chrec_known; 3659 break; 3660 } 3661 } 3662 3663 if (res == NULL_TREE) 3664 return true; 3665 3666 if (res == chrec_known) 3667 dependence_stats.num_dependence_independent++; 3668 else 3669 dependence_stats.num_dependence_undetermined++; 3670 finalize_ddr_dependent (ddr, res); 3671 return false; 3672 } 3673 3674 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */ 3675 3676 static void 3677 subscript_dependence_tester (struct data_dependence_relation *ddr, 3678 struct loop *loop_nest) 3679 { 3680 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest)) 3681 dependence_stats.num_dependence_dependent++; 3682 3683 compute_subscript_distance (ddr); 3684 if (build_classic_dist_vector (ddr, loop_nest)) 3685 build_classic_dir_vector (ddr); 3686 } 3687 3688 /* Returns true when all the access functions of A are affine or 3689 constant with respect to LOOP_NEST. */ 3690 3691 static bool 3692 access_functions_are_affine_or_constant_p (const struct data_reference *a, 3693 const struct loop *loop_nest) 3694 { 3695 unsigned int i; 3696 vec<tree> fns = DR_ACCESS_FNS (a); 3697 tree t; 3698 3699 FOR_EACH_VEC_ELT (fns, i, t) 3700 if (!evolution_function_is_invariant_p (t, loop_nest->num) 3701 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num)) 3702 return false; 3703 3704 return true; 3705 } 3706 3707 /* This computes the affine dependence relation between A and B with 3708 respect to LOOP_NEST. CHREC_KNOWN is used for representing the 3709 independence between two accesses, while CHREC_DONT_KNOW is used 3710 for representing the unknown relation. 3711 3712 Note that it is possible to stop the computation of the dependence 3713 relation the first time we detect a CHREC_KNOWN element for a given 3714 subscript. */ 3715 3716 void 3717 compute_affine_dependence (struct data_dependence_relation *ddr, 3718 struct loop *loop_nest) 3719 { 3720 struct data_reference *dra = DDR_A (ddr); 3721 struct data_reference *drb = DDR_B (ddr); 3722 3723 if (dump_file && (dump_flags & TDF_DETAILS)) 3724 { 3725 fprintf (dump_file, "(compute_affine_dependence\n"); 3726 fprintf (dump_file, " stmt_a: "); 3727 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM); 3728 fprintf (dump_file, " stmt_b: "); 3729 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM); 3730 } 3731 3732 /* Analyze only when the dependence relation is not yet known. */ 3733 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 3734 { 3735 dependence_stats.num_dependence_tests++; 3736 3737 if (access_functions_are_affine_or_constant_p (dra, loop_nest) 3738 && access_functions_are_affine_or_constant_p (drb, loop_nest)) 3739 subscript_dependence_tester (ddr, loop_nest); 3740 3741 /* As a last case, if the dependence cannot be determined, or if 3742 the dependence is considered too difficult to determine, answer 3743 "don't know". */ 3744 else 3745 { 3746 dependence_stats.num_dependence_undetermined++; 3747 3748 if (dump_file && (dump_flags & TDF_DETAILS)) 3749 { 3750 fprintf (dump_file, "Data ref a:\n"); 3751 dump_data_reference (dump_file, dra); 3752 fprintf (dump_file, "Data ref b:\n"); 3753 dump_data_reference (dump_file, drb); 3754 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n"); 3755 } 3756 finalize_ddr_dependent (ddr, chrec_dont_know); 3757 } 3758 } 3759 3760 if (dump_file && (dump_flags & TDF_DETAILS)) 3761 { 3762 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 3763 fprintf (dump_file, ") -> no dependence\n"); 3764 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 3765 fprintf (dump_file, ") -> dependence analysis failed\n"); 3766 else 3767 fprintf (dump_file, ")\n"); 3768 } 3769 } 3770 3771 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all 3772 the data references in DATAREFS, in the LOOP_NEST. When 3773 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self 3774 relations. Return true when successful, i.e. data references number 3775 is small enough to be handled. */ 3776 3777 bool 3778 compute_all_dependences (vec<data_reference_p> datarefs, 3779 vec<ddr_p> *dependence_relations, 3780 vec<loop_p> loop_nest, 3781 bool compute_self_and_rr) 3782 { 3783 struct data_dependence_relation *ddr; 3784 struct data_reference *a, *b; 3785 unsigned int i, j; 3786 3787 if ((int) datarefs.length () 3788 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) 3789 { 3790 struct data_dependence_relation *ddr; 3791 3792 /* Insert a single relation into dependence_relations: 3793 chrec_dont_know. */ 3794 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest); 3795 dependence_relations->safe_push (ddr); 3796 return false; 3797 } 3798 3799 FOR_EACH_VEC_ELT (datarefs, i, a) 3800 for (j = i + 1; datarefs.iterate (j, &b); j++) 3801 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr) 3802 { 3803 ddr = initialize_data_dependence_relation (a, b, loop_nest); 3804 dependence_relations->safe_push (ddr); 3805 if (loop_nest.exists ()) 3806 compute_affine_dependence (ddr, loop_nest[0]); 3807 } 3808 3809 if (compute_self_and_rr) 3810 FOR_EACH_VEC_ELT (datarefs, i, a) 3811 { 3812 ddr = initialize_data_dependence_relation (a, a, loop_nest); 3813 dependence_relations->safe_push (ddr); 3814 if (loop_nest.exists ()) 3815 compute_affine_dependence (ddr, loop_nest[0]); 3816 } 3817 3818 return true; 3819 } 3820 3821 /* Describes a location of a memory reference. */ 3822 3823 struct data_ref_loc 3824 { 3825 /* The memory reference. */ 3826 tree ref; 3827 3828 /* True if the memory reference is read. */ 3829 bool is_read; 3830 }; 3831 3832 3833 /* Stores the locations of memory references in STMT to REFERENCES. Returns 3834 true if STMT clobbers memory, false otherwise. */ 3835 3836 static bool 3837 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references) 3838 { 3839 bool clobbers_memory = false; 3840 data_ref_loc ref; 3841 tree op0, op1; 3842 enum gimple_code stmt_code = gimple_code (stmt); 3843 3844 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects. 3845 As we cannot model data-references to not spelled out 3846 accesses give up if they may occur. */ 3847 if (stmt_code == GIMPLE_CALL 3848 && !(gimple_call_flags (stmt) & ECF_CONST)) 3849 { 3850 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */ 3851 if (gimple_call_internal_p (stmt)) 3852 switch (gimple_call_internal_fn (stmt)) 3853 { 3854 case IFN_GOMP_SIMD_LANE: 3855 { 3856 struct loop *loop = gimple_bb (stmt)->loop_father; 3857 tree uid = gimple_call_arg (stmt, 0); 3858 gcc_assert (TREE_CODE (uid) == SSA_NAME); 3859 if (loop == NULL 3860 || loop->simduid != SSA_NAME_VAR (uid)) 3861 clobbers_memory = true; 3862 break; 3863 } 3864 case IFN_MASK_LOAD: 3865 case IFN_MASK_STORE: 3866 break; 3867 default: 3868 clobbers_memory = true; 3869 break; 3870 } 3871 else 3872 clobbers_memory = true; 3873 } 3874 else if (stmt_code == GIMPLE_ASM 3875 && (gimple_asm_volatile_p (as_a <gasm *> (stmt)) 3876 || gimple_vuse (stmt))) 3877 clobbers_memory = true; 3878 3879 if (!gimple_vuse (stmt)) 3880 return clobbers_memory; 3881 3882 if (stmt_code == GIMPLE_ASSIGN) 3883 { 3884 tree base; 3885 op0 = gimple_assign_lhs (stmt); 3886 op1 = gimple_assign_rhs1 (stmt); 3887 3888 if (DECL_P (op1) 3889 || (REFERENCE_CLASS_P (op1) 3890 && (base = get_base_address (op1)) 3891 && TREE_CODE (base) != SSA_NAME 3892 && !is_gimple_min_invariant (base))) 3893 { 3894 ref.ref = op1; 3895 ref.is_read = true; 3896 references->safe_push (ref); 3897 } 3898 } 3899 else if (stmt_code == GIMPLE_CALL) 3900 { 3901 unsigned i, n; 3902 tree ptr, type; 3903 unsigned int align; 3904 3905 ref.is_read = false; 3906 if (gimple_call_internal_p (stmt)) 3907 switch (gimple_call_internal_fn (stmt)) 3908 { 3909 case IFN_MASK_LOAD: 3910 if (gimple_call_lhs (stmt) == NULL_TREE) 3911 break; 3912 ref.is_read = true; 3913 /* FALLTHRU */ 3914 case IFN_MASK_STORE: 3915 ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0); 3916 align = tree_to_shwi (gimple_call_arg (stmt, 1)); 3917 if (ref.is_read) 3918 type = TREE_TYPE (gimple_call_lhs (stmt)); 3919 else 3920 type = TREE_TYPE (gimple_call_arg (stmt, 3)); 3921 if (TYPE_ALIGN (type) != align) 3922 type = build_aligned_type (type, align); 3923 ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0), 3924 ptr); 3925 references->safe_push (ref); 3926 return false; 3927 default: 3928 break; 3929 } 3930 3931 op0 = gimple_call_lhs (stmt); 3932 n = gimple_call_num_args (stmt); 3933 for (i = 0; i < n; i++) 3934 { 3935 op1 = gimple_call_arg (stmt, i); 3936 3937 if (DECL_P (op1) 3938 || (REFERENCE_CLASS_P (op1) && get_base_address (op1))) 3939 { 3940 ref.ref = op1; 3941 ref.is_read = true; 3942 references->safe_push (ref); 3943 } 3944 } 3945 } 3946 else 3947 return clobbers_memory; 3948 3949 if (op0 3950 && (DECL_P (op0) 3951 || (REFERENCE_CLASS_P (op0) && get_base_address (op0)))) 3952 { 3953 ref.ref = op0; 3954 ref.is_read = false; 3955 references->safe_push (ref); 3956 } 3957 return clobbers_memory; 3958 } 3959 3960 3961 /* Returns true if the loop-nest has any data reference. */ 3962 3963 bool 3964 loop_nest_has_data_refs (loop_p loop) 3965 { 3966 basic_block *bbs = get_loop_body (loop); 3967 auto_vec<data_ref_loc, 3> references; 3968 3969 for (unsigned i = 0; i < loop->num_nodes; i++) 3970 { 3971 basic_block bb = bbs[i]; 3972 gimple_stmt_iterator bsi; 3973 3974 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 3975 { 3976 gimple *stmt = gsi_stmt (bsi); 3977 get_references_in_stmt (stmt, &references); 3978 if (references.length ()) 3979 { 3980 free (bbs); 3981 return true; 3982 } 3983 } 3984 } 3985 free (bbs); 3986 3987 if (loop->inner) 3988 { 3989 loop = loop->inner; 3990 while (loop) 3991 { 3992 if (loop_nest_has_data_refs (loop)) 3993 return true; 3994 loop = loop->next; 3995 } 3996 } 3997 return false; 3998 } 3999 4000 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable 4001 reference, returns false, otherwise returns true. NEST is the outermost 4002 loop of the loop nest in which the references should be analyzed. */ 4003 4004 bool 4005 find_data_references_in_stmt (struct loop *nest, gimple *stmt, 4006 vec<data_reference_p> *datarefs) 4007 { 4008 unsigned i; 4009 auto_vec<data_ref_loc, 2> references; 4010 data_ref_loc *ref; 4011 bool ret = true; 4012 data_reference_p dr; 4013 4014 if (get_references_in_stmt (stmt, &references)) 4015 return false; 4016 4017 FOR_EACH_VEC_ELT (references, i, ref) 4018 { 4019 dr = create_data_ref (nest, loop_containing_stmt (stmt), 4020 ref->ref, stmt, ref->is_read); 4021 gcc_assert (dr != NULL); 4022 datarefs->safe_push (dr); 4023 } 4024 4025 return ret; 4026 } 4027 4028 /* Stores the data references in STMT to DATAREFS. If there is an 4029 unanalyzable reference, returns false, otherwise returns true. 4030 NEST is the outermost loop of the loop nest in which the references 4031 should be instantiated, LOOP is the loop in which the references 4032 should be analyzed. */ 4033 4034 bool 4035 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt, 4036 vec<data_reference_p> *datarefs) 4037 { 4038 unsigned i; 4039 auto_vec<data_ref_loc, 2> references; 4040 data_ref_loc *ref; 4041 bool ret = true; 4042 data_reference_p dr; 4043 4044 if (get_references_in_stmt (stmt, &references)) 4045 return false; 4046 4047 FOR_EACH_VEC_ELT (references, i, ref) 4048 { 4049 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read); 4050 gcc_assert (dr != NULL); 4051 datarefs->safe_push (dr); 4052 } 4053 4054 return ret; 4055 } 4056 4057 /* Search the data references in LOOP, and record the information into 4058 DATAREFS. Returns chrec_dont_know when failing to analyze a 4059 difficult case, returns NULL_TREE otherwise. */ 4060 4061 tree 4062 find_data_references_in_bb (struct loop *loop, basic_block bb, 4063 vec<data_reference_p> *datarefs) 4064 { 4065 gimple_stmt_iterator bsi; 4066 4067 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 4068 { 4069 gimple *stmt = gsi_stmt (bsi); 4070 4071 if (!find_data_references_in_stmt (loop, stmt, datarefs)) 4072 { 4073 struct data_reference *res; 4074 res = XCNEW (struct data_reference); 4075 datarefs->safe_push (res); 4076 4077 return chrec_dont_know; 4078 } 4079 } 4080 4081 return NULL_TREE; 4082 } 4083 4084 /* Search the data references in LOOP, and record the information into 4085 DATAREFS. Returns chrec_dont_know when failing to analyze a 4086 difficult case, returns NULL_TREE otherwise. 4087 4088 TODO: This function should be made smarter so that it can handle address 4089 arithmetic as if they were array accesses, etc. */ 4090 4091 tree 4092 find_data_references_in_loop (struct loop *loop, 4093 vec<data_reference_p> *datarefs) 4094 { 4095 basic_block bb, *bbs; 4096 unsigned int i; 4097 4098 bbs = get_loop_body_in_dom_order (loop); 4099 4100 for (i = 0; i < loop->num_nodes; i++) 4101 { 4102 bb = bbs[i]; 4103 4104 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know) 4105 { 4106 free (bbs); 4107 return chrec_dont_know; 4108 } 4109 } 4110 free (bbs); 4111 4112 return NULL_TREE; 4113 } 4114 4115 /* Recursive helper function. */ 4116 4117 static bool 4118 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest) 4119 { 4120 /* Inner loops of the nest should not contain siblings. Example: 4121 when there are two consecutive loops, 4122 4123 | loop_0 4124 | loop_1 4125 | A[{0, +, 1}_1] 4126 | endloop_1 4127 | loop_2 4128 | A[{0, +, 1}_2] 4129 | endloop_2 4130 | endloop_0 4131 4132 the dependence relation cannot be captured by the distance 4133 abstraction. */ 4134 if (loop->next) 4135 return false; 4136 4137 loop_nest->safe_push (loop); 4138 if (loop->inner) 4139 return find_loop_nest_1 (loop->inner, loop_nest); 4140 return true; 4141 } 4142 4143 /* Return false when the LOOP is not well nested. Otherwise return 4144 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will 4145 contain the loops from the outermost to the innermost, as they will 4146 appear in the classic distance vector. */ 4147 4148 bool 4149 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest) 4150 { 4151 loop_nest->safe_push (loop); 4152 if (loop->inner) 4153 return find_loop_nest_1 (loop->inner, loop_nest); 4154 return true; 4155 } 4156 4157 /* Returns true when the data dependences have been computed, false otherwise. 4158 Given a loop nest LOOP, the following vectors are returned: 4159 DATAREFS is initialized to all the array elements contained in this loop, 4160 DEPENDENCE_RELATIONS contains the relations between the data references. 4161 Compute read-read and self relations if 4162 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */ 4163 4164 bool 4165 compute_data_dependences_for_loop (struct loop *loop, 4166 bool compute_self_and_read_read_dependences, 4167 vec<loop_p> *loop_nest, 4168 vec<data_reference_p> *datarefs, 4169 vec<ddr_p> *dependence_relations) 4170 { 4171 bool res = true; 4172 4173 memset (&dependence_stats, 0, sizeof (dependence_stats)); 4174 4175 /* If the loop nest is not well formed, or one of the data references 4176 is not computable, give up without spending time to compute other 4177 dependences. */ 4178 if (!loop 4179 || !find_loop_nest (loop, loop_nest) 4180 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know 4181 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest, 4182 compute_self_and_read_read_dependences)) 4183 res = false; 4184 4185 if (dump_file && (dump_flags & TDF_STATS)) 4186 { 4187 fprintf (dump_file, "Dependence tester statistics:\n"); 4188 4189 fprintf (dump_file, "Number of dependence tests: %d\n", 4190 dependence_stats.num_dependence_tests); 4191 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n", 4192 dependence_stats.num_dependence_dependent); 4193 fprintf (dump_file, "Number of dependence tests classified independent: %d\n", 4194 dependence_stats.num_dependence_independent); 4195 fprintf (dump_file, "Number of undetermined dependence tests: %d\n", 4196 dependence_stats.num_dependence_undetermined); 4197 4198 fprintf (dump_file, "Number of subscript tests: %d\n", 4199 dependence_stats.num_subscript_tests); 4200 fprintf (dump_file, "Number of undetermined subscript tests: %d\n", 4201 dependence_stats.num_subscript_undetermined); 4202 fprintf (dump_file, "Number of same subscript function: %d\n", 4203 dependence_stats.num_same_subscript_function); 4204 4205 fprintf (dump_file, "Number of ziv tests: %d\n", 4206 dependence_stats.num_ziv); 4207 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n", 4208 dependence_stats.num_ziv_dependent); 4209 fprintf (dump_file, "Number of ziv tests returning independent: %d\n", 4210 dependence_stats.num_ziv_independent); 4211 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n", 4212 dependence_stats.num_ziv_unimplemented); 4213 4214 fprintf (dump_file, "Number of siv tests: %d\n", 4215 dependence_stats.num_siv); 4216 fprintf (dump_file, "Number of siv tests returning dependent: %d\n", 4217 dependence_stats.num_siv_dependent); 4218 fprintf (dump_file, "Number of siv tests returning independent: %d\n", 4219 dependence_stats.num_siv_independent); 4220 fprintf (dump_file, "Number of siv tests unimplemented: %d\n", 4221 dependence_stats.num_siv_unimplemented); 4222 4223 fprintf (dump_file, "Number of miv tests: %d\n", 4224 dependence_stats.num_miv); 4225 fprintf (dump_file, "Number of miv tests returning dependent: %d\n", 4226 dependence_stats.num_miv_dependent); 4227 fprintf (dump_file, "Number of miv tests returning independent: %d\n", 4228 dependence_stats.num_miv_independent); 4229 fprintf (dump_file, "Number of miv tests unimplemented: %d\n", 4230 dependence_stats.num_miv_unimplemented); 4231 } 4232 4233 return res; 4234 } 4235 4236 /* Free the memory used by a data dependence relation DDR. */ 4237 4238 void 4239 free_dependence_relation (struct data_dependence_relation *ddr) 4240 { 4241 if (ddr == NULL) 4242 return; 4243 4244 if (DDR_SUBSCRIPTS (ddr).exists ()) 4245 free_subscripts (DDR_SUBSCRIPTS (ddr)); 4246 DDR_DIST_VECTS (ddr).release (); 4247 DDR_DIR_VECTS (ddr).release (); 4248 4249 free (ddr); 4250 } 4251 4252 /* Free the memory used by the data dependence relations from 4253 DEPENDENCE_RELATIONS. */ 4254 4255 void 4256 free_dependence_relations (vec<ddr_p> dependence_relations) 4257 { 4258 unsigned int i; 4259 struct data_dependence_relation *ddr; 4260 4261 FOR_EACH_VEC_ELT (dependence_relations, i, ddr) 4262 if (ddr) 4263 free_dependence_relation (ddr); 4264 4265 dependence_relations.release (); 4266 } 4267 4268 /* Free the memory used by the data references from DATAREFS. */ 4269 4270 void 4271 free_data_refs (vec<data_reference_p> datarefs) 4272 { 4273 unsigned int i; 4274 struct data_reference *dr; 4275 4276 FOR_EACH_VEC_ELT (datarefs, i, dr) 4277 free_data_ref (dr); 4278 datarefs.release (); 4279 } 4280