1 /* Data references and dependences detectors. 2 Copyright (C) 2003-2013 Free Software Foundation, Inc. 3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* This pass walks a given loop structure searching for array 22 references. The information about the array accesses is recorded 23 in DATA_REFERENCE structures. 24 25 The basic test for determining the dependences is: 26 given two access functions chrec1 and chrec2 to a same array, and 27 x and y two vectors from the iteration domain, the same element of 28 the array is accessed twice at iterations x and y if and only if: 29 | chrec1 (x) == chrec2 (y). 30 31 The goals of this analysis are: 32 33 - to determine the independence: the relation between two 34 independent accesses is qualified with the chrec_known (this 35 information allows a loop parallelization), 36 37 - when two data references access the same data, to qualify the 38 dependence relation with classic dependence representations: 39 40 - distance vectors 41 - direction vectors 42 - loop carried level dependence 43 - polyhedron dependence 44 or with the chains of recurrences based representation, 45 46 - to define a knowledge base for storing the data dependence 47 information, 48 49 - to define an interface to access this data. 50 51 52 Definitions: 53 54 - subscript: given two array accesses a subscript is the tuple 55 composed of the access functions for a given dimension. Example: 56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts: 57 (f1, g1), (f2, g2), (f3, g3). 58 59 - Diophantine equation: an equation whose coefficients and 60 solutions are integer constants, for example the equation 61 | 3*x + 2*y = 1 62 has an integer solution x = 1 and y = -1. 63 64 References: 65 66 - "Advanced Compilation for High Performance Computing" by Randy 67 Allen and Ken Kennedy. 68 http://citeseer.ist.psu.edu/goff91practical.html 69 70 - "Loop Transformations for Restructuring Compilers - The Foundations" 71 by Utpal Banerjee. 72 73 74 */ 75 76 #include "config.h" 77 #include "system.h" 78 #include "coretypes.h" 79 #include "gimple-pretty-print.h" 80 #include "tree-flow.h" 81 #include "cfgloop.h" 82 #include "tree-data-ref.h" 83 #include "tree-scalar-evolution.h" 84 #include "dumpfile.h" 85 #include "langhooks.h" 86 #include "tree-affine.h" 87 #include "params.h" 88 89 static struct datadep_stats 90 { 91 int num_dependence_tests; 92 int num_dependence_dependent; 93 int num_dependence_independent; 94 int num_dependence_undetermined; 95 96 int num_subscript_tests; 97 int num_subscript_undetermined; 98 int num_same_subscript_function; 99 100 int num_ziv; 101 int num_ziv_independent; 102 int num_ziv_dependent; 103 int num_ziv_unimplemented; 104 105 int num_siv; 106 int num_siv_independent; 107 int num_siv_dependent; 108 int num_siv_unimplemented; 109 110 int num_miv; 111 int num_miv_independent; 112 int num_miv_dependent; 113 int num_miv_unimplemented; 114 } dependence_stats; 115 116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *, 117 struct data_reference *, 118 struct data_reference *, 119 struct loop *); 120 /* Returns true iff A divides B. */ 121 122 static inline bool 123 tree_fold_divides_p (const_tree a, const_tree b) 124 { 125 gcc_assert (TREE_CODE (a) == INTEGER_CST); 126 gcc_assert (TREE_CODE (b) == INTEGER_CST); 127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a)); 128 } 129 130 /* Returns true iff A divides B. */ 131 132 static inline bool 133 int_divides_p (int a, int b) 134 { 135 return ((b % a) == 0); 136 } 137 138 139 140 /* Dump into FILE all the data references from DATAREFS. */ 141 142 static void 143 dump_data_references (FILE *file, vec<data_reference_p> datarefs) 144 { 145 unsigned int i; 146 struct data_reference *dr; 147 148 FOR_EACH_VEC_ELT (datarefs, i, dr) 149 dump_data_reference (file, dr); 150 } 151 152 /* Dump into STDERR all the data references from DATAREFS. */ 153 154 DEBUG_FUNCTION void 155 debug_data_references (vec<data_reference_p> datarefs) 156 { 157 dump_data_references (stderr, datarefs); 158 } 159 160 /* Print to STDERR the data_reference DR. */ 161 162 DEBUG_FUNCTION void 163 debug_data_reference (struct data_reference *dr) 164 { 165 dump_data_reference (stderr, dr); 166 } 167 168 /* Dump function for a DATA_REFERENCE structure. */ 169 170 void 171 dump_data_reference (FILE *outf, 172 struct data_reference *dr) 173 { 174 unsigned int i; 175 176 fprintf (outf, "#(Data Ref: \n"); 177 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index); 178 fprintf (outf, "# stmt: "); 179 print_gimple_stmt (outf, DR_STMT (dr), 0, 0); 180 fprintf (outf, "# ref: "); 181 print_generic_stmt (outf, DR_REF (dr), 0); 182 fprintf (outf, "# base_object: "); 183 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0); 184 185 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) 186 { 187 fprintf (outf, "# Access function %d: ", i); 188 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0); 189 } 190 fprintf (outf, "#)\n"); 191 } 192 193 /* Dumps the affine function described by FN to the file OUTF. */ 194 195 static void 196 dump_affine_function (FILE *outf, affine_fn fn) 197 { 198 unsigned i; 199 tree coef; 200 201 print_generic_expr (outf, fn[0], TDF_SLIM); 202 for (i = 1; fn.iterate (i, &coef); i++) 203 { 204 fprintf (outf, " + "); 205 print_generic_expr (outf, coef, TDF_SLIM); 206 fprintf (outf, " * x_%u", i); 207 } 208 } 209 210 /* Dumps the conflict function CF to the file OUTF. */ 211 212 static void 213 dump_conflict_function (FILE *outf, conflict_function *cf) 214 { 215 unsigned i; 216 217 if (cf->n == NO_DEPENDENCE) 218 fprintf (outf, "no dependence"); 219 else if (cf->n == NOT_KNOWN) 220 fprintf (outf, "not known"); 221 else 222 { 223 for (i = 0; i < cf->n; i++) 224 { 225 if (i != 0) 226 fprintf (outf, " "); 227 fprintf (outf, "["); 228 dump_affine_function (outf, cf->fns[i]); 229 fprintf (outf, "]"); 230 } 231 } 232 } 233 234 /* Dump function for a SUBSCRIPT structure. */ 235 236 static void 237 dump_subscript (FILE *outf, struct subscript *subscript) 238 { 239 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript); 240 241 fprintf (outf, "\n (subscript \n"); 242 fprintf (outf, " iterations_that_access_an_element_twice_in_A: "); 243 dump_conflict_function (outf, cf); 244 if (CF_NONTRIVIAL_P (cf)) 245 { 246 tree last_iteration = SUB_LAST_CONFLICT (subscript); 247 fprintf (outf, "\n last_conflict: "); 248 print_generic_expr (outf, last_iteration, 0); 249 } 250 251 cf = SUB_CONFLICTS_IN_B (subscript); 252 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: "); 253 dump_conflict_function (outf, cf); 254 if (CF_NONTRIVIAL_P (cf)) 255 { 256 tree last_iteration = SUB_LAST_CONFLICT (subscript); 257 fprintf (outf, "\n last_conflict: "); 258 print_generic_expr (outf, last_iteration, 0); 259 } 260 261 fprintf (outf, "\n (Subscript distance: "); 262 print_generic_expr (outf, SUB_DISTANCE (subscript), 0); 263 fprintf (outf, " ))\n"); 264 } 265 266 /* Print the classic direction vector DIRV to OUTF. */ 267 268 static void 269 print_direction_vector (FILE *outf, 270 lambda_vector dirv, 271 int length) 272 { 273 int eq; 274 275 for (eq = 0; eq < length; eq++) 276 { 277 enum data_dependence_direction dir = ((enum data_dependence_direction) 278 dirv[eq]); 279 280 switch (dir) 281 { 282 case dir_positive: 283 fprintf (outf, " +"); 284 break; 285 case dir_negative: 286 fprintf (outf, " -"); 287 break; 288 case dir_equal: 289 fprintf (outf, " ="); 290 break; 291 case dir_positive_or_equal: 292 fprintf (outf, " +="); 293 break; 294 case dir_positive_or_negative: 295 fprintf (outf, " +-"); 296 break; 297 case dir_negative_or_equal: 298 fprintf (outf, " -="); 299 break; 300 case dir_star: 301 fprintf (outf, " *"); 302 break; 303 default: 304 fprintf (outf, "indep"); 305 break; 306 } 307 } 308 fprintf (outf, "\n"); 309 } 310 311 /* Print a vector of direction vectors. */ 312 313 static void 314 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects, 315 int length) 316 { 317 unsigned j; 318 lambda_vector v; 319 320 FOR_EACH_VEC_ELT (dir_vects, j, v) 321 print_direction_vector (outf, v, length); 322 } 323 324 /* Print out a vector VEC of length N to OUTFILE. */ 325 326 static inline void 327 print_lambda_vector (FILE * outfile, lambda_vector vector, int n) 328 { 329 int i; 330 331 for (i = 0; i < n; i++) 332 fprintf (outfile, "%3d ", vector[i]); 333 fprintf (outfile, "\n"); 334 } 335 336 /* Print a vector of distance vectors. */ 337 338 static void 339 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects, 340 int length) 341 { 342 unsigned j; 343 lambda_vector v; 344 345 FOR_EACH_VEC_ELT (dist_vects, j, v) 346 print_lambda_vector (outf, v, length); 347 } 348 349 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */ 350 351 static void 352 dump_data_dependence_relation (FILE *outf, 353 struct data_dependence_relation *ddr) 354 { 355 struct data_reference *dra, *drb; 356 357 fprintf (outf, "(Data Dep: \n"); 358 359 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 360 { 361 if (ddr) 362 { 363 dra = DDR_A (ddr); 364 drb = DDR_B (ddr); 365 if (dra) 366 dump_data_reference (outf, dra); 367 else 368 fprintf (outf, " (nil)\n"); 369 if (drb) 370 dump_data_reference (outf, drb); 371 else 372 fprintf (outf, " (nil)\n"); 373 } 374 fprintf (outf, " (don't know)\n)\n"); 375 return; 376 } 377 378 dra = DDR_A (ddr); 379 drb = DDR_B (ddr); 380 dump_data_reference (outf, dra); 381 dump_data_reference (outf, drb); 382 383 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 384 fprintf (outf, " (no dependence)\n"); 385 386 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 387 { 388 unsigned int i; 389 struct loop *loopi; 390 391 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 392 { 393 fprintf (outf, " access_fn_A: "); 394 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0); 395 fprintf (outf, " access_fn_B: "); 396 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0); 397 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i)); 398 } 399 400 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr)); 401 fprintf (outf, " loop nest: ("); 402 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi) 403 fprintf (outf, "%d ", loopi->num); 404 fprintf (outf, ")\n"); 405 406 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 407 { 408 fprintf (outf, " distance_vector: "); 409 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i), 410 DDR_NB_LOOPS (ddr)); 411 } 412 413 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++) 414 { 415 fprintf (outf, " direction_vector: "); 416 print_direction_vector (outf, DDR_DIR_VECT (ddr, i), 417 DDR_NB_LOOPS (ddr)); 418 } 419 } 420 421 fprintf (outf, ")\n"); 422 } 423 424 /* Debug version. */ 425 426 DEBUG_FUNCTION void 427 debug_data_dependence_relation (struct data_dependence_relation *ddr) 428 { 429 dump_data_dependence_relation (stderr, ddr); 430 } 431 432 /* Dump into FILE all the dependence relations from DDRS. */ 433 434 void 435 dump_data_dependence_relations (FILE *file, 436 vec<ddr_p> ddrs) 437 { 438 unsigned int i; 439 struct data_dependence_relation *ddr; 440 441 FOR_EACH_VEC_ELT (ddrs, i, ddr) 442 dump_data_dependence_relation (file, ddr); 443 } 444 445 /* Dump to STDERR all the dependence relations from DDRS. */ 446 447 DEBUG_FUNCTION void 448 debug_data_dependence_relations (vec<ddr_p> ddrs) 449 { 450 dump_data_dependence_relations (stderr, ddrs); 451 } 452 453 /* Dumps the distance and direction vectors in FILE. DDRS contains 454 the dependence relations, and VECT_SIZE is the size of the 455 dependence vectors, or in other words the number of loops in the 456 considered nest. */ 457 458 static void 459 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs) 460 { 461 unsigned int i, j; 462 struct data_dependence_relation *ddr; 463 lambda_vector v; 464 465 FOR_EACH_VEC_ELT (ddrs, i, ddr) 466 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr)) 467 { 468 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v) 469 { 470 fprintf (file, "DISTANCE_V ("); 471 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr)); 472 fprintf (file, ")\n"); 473 } 474 475 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v) 476 { 477 fprintf (file, "DIRECTION_V ("); 478 print_direction_vector (file, v, DDR_NB_LOOPS (ddr)); 479 fprintf (file, ")\n"); 480 } 481 } 482 483 fprintf (file, "\n\n"); 484 } 485 486 /* Dumps the data dependence relations DDRS in FILE. */ 487 488 static void 489 dump_ddrs (FILE *file, vec<ddr_p> ddrs) 490 { 491 unsigned int i; 492 struct data_dependence_relation *ddr; 493 494 FOR_EACH_VEC_ELT (ddrs, i, ddr) 495 dump_data_dependence_relation (file, ddr); 496 497 fprintf (file, "\n\n"); 498 } 499 500 DEBUG_FUNCTION void 501 debug_ddrs (vec<ddr_p> ddrs) 502 { 503 dump_ddrs (stderr, ddrs); 504 } 505 506 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1 507 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero 508 constant of type ssizetype, and returns true. If we cannot do this 509 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false 510 is returned. */ 511 512 static bool 513 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, 514 tree *var, tree *off) 515 { 516 tree var0, var1; 517 tree off0, off1; 518 enum tree_code ocode = code; 519 520 *var = NULL_TREE; 521 *off = NULL_TREE; 522 523 switch (code) 524 { 525 case INTEGER_CST: 526 *var = build_int_cst (type, 0); 527 *off = fold_convert (ssizetype, op0); 528 return true; 529 530 case POINTER_PLUS_EXPR: 531 ocode = PLUS_EXPR; 532 /* FALLTHROUGH */ 533 case PLUS_EXPR: 534 case MINUS_EXPR: 535 split_constant_offset (op0, &var0, &off0); 536 split_constant_offset (op1, &var1, &off1); 537 *var = fold_build2 (code, type, var0, var1); 538 *off = size_binop (ocode, off0, off1); 539 return true; 540 541 case MULT_EXPR: 542 if (TREE_CODE (op1) != INTEGER_CST) 543 return false; 544 545 split_constant_offset (op0, &var0, &off0); 546 *var = fold_build2 (MULT_EXPR, type, var0, op1); 547 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1)); 548 return true; 549 550 case ADDR_EXPR: 551 { 552 tree base, poffset; 553 HOST_WIDE_INT pbitsize, pbitpos; 554 enum machine_mode pmode; 555 int punsignedp, pvolatilep; 556 557 op0 = TREE_OPERAND (op0, 0); 558 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, 559 &pmode, &punsignedp, &pvolatilep, false); 560 561 if (pbitpos % BITS_PER_UNIT != 0) 562 return false; 563 base = build_fold_addr_expr (base); 564 off0 = ssize_int (pbitpos / BITS_PER_UNIT); 565 566 if (poffset) 567 { 568 split_constant_offset (poffset, &poffset, &off1); 569 off0 = size_binop (PLUS_EXPR, off0, off1); 570 if (POINTER_TYPE_P (TREE_TYPE (base))) 571 base = fold_build_pointer_plus (base, poffset); 572 else 573 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, 574 fold_convert (TREE_TYPE (base), poffset)); 575 } 576 577 var0 = fold_convert (type, base); 578 579 /* If variable length types are involved, punt, otherwise casts 580 might be converted into ARRAY_REFs in gimplify_conversion. 581 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which 582 possibly no longer appears in current GIMPLE, might resurface. 583 This perhaps could run 584 if (CONVERT_EXPR_P (var0)) 585 { 586 gimplify_conversion (&var0); 587 // Attempt to fill in any within var0 found ARRAY_REF's 588 // element size from corresponding op embedded ARRAY_REF, 589 // if unsuccessful, just punt. 590 } */ 591 while (POINTER_TYPE_P (type)) 592 type = TREE_TYPE (type); 593 if (int_size_in_bytes (type) < 0) 594 return false; 595 596 *var = var0; 597 *off = off0; 598 return true; 599 } 600 601 case SSA_NAME: 602 { 603 gimple def_stmt = SSA_NAME_DEF_STMT (op0); 604 enum tree_code subcode; 605 606 if (gimple_code (def_stmt) != GIMPLE_ASSIGN) 607 return false; 608 609 var0 = gimple_assign_rhs1 (def_stmt); 610 subcode = gimple_assign_rhs_code (def_stmt); 611 var1 = gimple_assign_rhs2 (def_stmt); 612 613 return split_constant_offset_1 (type, var0, subcode, var1, var, off); 614 } 615 CASE_CONVERT: 616 { 617 /* We must not introduce undefined overflow, and we must not change the value. 618 Hence we're okay if the inner type doesn't overflow to start with 619 (pointer or signed), the outer type also is an integer or pointer 620 and the outer precision is at least as large as the inner. */ 621 tree itype = TREE_TYPE (op0); 622 if ((POINTER_TYPE_P (itype) 623 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype))) 624 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype) 625 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type))) 626 { 627 split_constant_offset (op0, &var0, off); 628 *var = fold_convert (type, var0); 629 return true; 630 } 631 return false; 632 } 633 634 default: 635 return false; 636 } 637 } 638 639 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF 640 will be ssizetype. */ 641 642 void 643 split_constant_offset (tree exp, tree *var, tree *off) 644 { 645 tree type = TREE_TYPE (exp), otype, op0, op1, e, o; 646 enum tree_code code; 647 648 *var = exp; 649 *off = ssize_int (0); 650 STRIP_NOPS (exp); 651 652 if (tree_is_chrec (exp) 653 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS) 654 return; 655 656 otype = TREE_TYPE (exp); 657 code = TREE_CODE (exp); 658 extract_ops_from_tree (exp, &code, &op0, &op1); 659 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o)) 660 { 661 *var = fold_convert (type, e); 662 *off = o; 663 } 664 } 665 666 /* Returns the address ADDR of an object in a canonical shape (without nop 667 casts, and with type of pointer to the object). */ 668 669 static tree 670 canonicalize_base_object_address (tree addr) 671 { 672 tree orig = addr; 673 674 STRIP_NOPS (addr); 675 676 /* The base address may be obtained by casting from integer, in that case 677 keep the cast. */ 678 if (!POINTER_TYPE_P (TREE_TYPE (addr))) 679 return orig; 680 681 if (TREE_CODE (addr) != ADDR_EXPR) 682 return addr; 683 684 return build_fold_addr_expr (TREE_OPERAND (addr, 0)); 685 } 686 687 /* Analyzes the behavior of the memory reference DR in the innermost loop or 688 basic block that contains it. Returns true if analysis succeed or false 689 otherwise. */ 690 691 bool 692 dr_analyze_innermost (struct data_reference *dr, struct loop *nest) 693 { 694 gimple stmt = DR_STMT (dr); 695 struct loop *loop = loop_containing_stmt (stmt); 696 tree ref = DR_REF (dr); 697 HOST_WIDE_INT pbitsize, pbitpos; 698 tree base, poffset; 699 enum machine_mode pmode; 700 int punsignedp, pvolatilep; 701 affine_iv base_iv, offset_iv; 702 tree init, dinit, step; 703 bool in_loop = (loop && loop->num); 704 705 if (dump_file && (dump_flags & TDF_DETAILS)) 706 fprintf (dump_file, "analyze_innermost: "); 707 708 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, 709 &pmode, &punsignedp, &pvolatilep, false); 710 gcc_assert (base != NULL_TREE); 711 712 if (pbitpos % BITS_PER_UNIT != 0) 713 { 714 if (dump_file && (dump_flags & TDF_DETAILS)) 715 fprintf (dump_file, "failed: bit offset alignment.\n"); 716 return false; 717 } 718 719 if (TREE_CODE (base) == MEM_REF) 720 { 721 if (!integer_zerop (TREE_OPERAND (base, 1))) 722 { 723 double_int moff = mem_ref_offset (base); 724 tree mofft = double_int_to_tree (sizetype, moff); 725 if (!poffset) 726 poffset = mofft; 727 else 728 poffset = size_binop (PLUS_EXPR, poffset, mofft); 729 } 730 base = TREE_OPERAND (base, 0); 731 } 732 else 733 base = build_fold_addr_expr (base); 734 735 if (in_loop) 736 { 737 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, 738 nest ? true : false)) 739 { 740 if (nest) 741 { 742 if (dump_file && (dump_flags & TDF_DETAILS)) 743 fprintf (dump_file, "failed: evolution of base is not" 744 " affine.\n"); 745 return false; 746 } 747 else 748 { 749 base_iv.base = base; 750 base_iv.step = ssize_int (0); 751 base_iv.no_overflow = true; 752 } 753 } 754 } 755 else 756 { 757 base_iv.base = base; 758 base_iv.step = ssize_int (0); 759 base_iv.no_overflow = true; 760 } 761 762 if (!poffset) 763 { 764 offset_iv.base = ssize_int (0); 765 offset_iv.step = ssize_int (0); 766 } 767 else 768 { 769 if (!in_loop) 770 { 771 offset_iv.base = poffset; 772 offset_iv.step = ssize_int (0); 773 } 774 else if (!simple_iv (loop, loop_containing_stmt (stmt), 775 poffset, &offset_iv, 776 nest ? true : false)) 777 { 778 if (nest) 779 { 780 if (dump_file && (dump_flags & TDF_DETAILS)) 781 fprintf (dump_file, "failed: evolution of offset is not" 782 " affine.\n"); 783 return false; 784 } 785 else 786 { 787 offset_iv.base = poffset; 788 offset_iv.step = ssize_int (0); 789 } 790 } 791 } 792 793 init = ssize_int (pbitpos / BITS_PER_UNIT); 794 split_constant_offset (base_iv.base, &base_iv.base, &dinit); 795 init = size_binop (PLUS_EXPR, init, dinit); 796 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); 797 init = size_binop (PLUS_EXPR, init, dinit); 798 799 step = size_binop (PLUS_EXPR, 800 fold_convert (ssizetype, base_iv.step), 801 fold_convert (ssizetype, offset_iv.step)); 802 803 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base); 804 805 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base); 806 DR_INIT (dr) = init; 807 DR_STEP (dr) = step; 808 809 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base)); 810 811 if (dump_file && (dump_flags & TDF_DETAILS)) 812 fprintf (dump_file, "success.\n"); 813 814 return true; 815 } 816 817 /* Determines the base object and the list of indices of memory reference 818 DR, analyzed in LOOP and instantiated in loop nest NEST. */ 819 820 static void 821 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop) 822 { 823 vec<tree> access_fns = vNULL; 824 tree ref, op; 825 tree base, off, access_fn; 826 basic_block before_loop; 827 828 /* If analyzing a basic-block there are no indices to analyze 829 and thus no access functions. */ 830 if (!nest) 831 { 832 DR_BASE_OBJECT (dr) = DR_REF (dr); 833 DR_ACCESS_FNS (dr).create (0); 834 return; 835 } 836 837 ref = DR_REF (dr); 838 before_loop = block_before_loop (nest); 839 840 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses 841 into a two element array with a constant index. The base is 842 then just the immediate underlying object. */ 843 if (TREE_CODE (ref) == REALPART_EXPR) 844 { 845 ref = TREE_OPERAND (ref, 0); 846 access_fns.safe_push (integer_zero_node); 847 } 848 else if (TREE_CODE (ref) == IMAGPART_EXPR) 849 { 850 ref = TREE_OPERAND (ref, 0); 851 access_fns.safe_push (integer_one_node); 852 } 853 854 /* Analyze access functions of dimensions we know to be independent. */ 855 while (handled_component_p (ref)) 856 { 857 if (TREE_CODE (ref) == ARRAY_REF) 858 { 859 op = TREE_OPERAND (ref, 1); 860 access_fn = analyze_scalar_evolution (loop, op); 861 access_fn = instantiate_scev (before_loop, loop, access_fn); 862 access_fns.safe_push (access_fn); 863 } 864 else if (TREE_CODE (ref) == COMPONENT_REF 865 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE) 866 { 867 /* For COMPONENT_REFs of records (but not unions!) use the 868 FIELD_DECL offset as constant access function so we can 869 disambiguate a[i].f1 and a[i].f2. */ 870 tree off = component_ref_field_offset (ref); 871 off = size_binop (PLUS_EXPR, 872 size_binop (MULT_EXPR, 873 fold_convert (bitsizetype, off), 874 bitsize_int (BITS_PER_UNIT)), 875 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1))); 876 access_fns.safe_push (off); 877 } 878 else 879 /* If we have an unhandled component we could not translate 880 to an access function stop analyzing. We have determined 881 our base object in this case. */ 882 break; 883 884 ref = TREE_OPERAND (ref, 0); 885 } 886 887 /* If the address operand of a MEM_REF base has an evolution in the 888 analyzed nest, add it as an additional independent access-function. */ 889 if (TREE_CODE (ref) == MEM_REF) 890 { 891 op = TREE_OPERAND (ref, 0); 892 access_fn = analyze_scalar_evolution (loop, op); 893 access_fn = instantiate_scev (before_loop, loop, access_fn); 894 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) 895 { 896 tree orig_type; 897 tree memoff = TREE_OPERAND (ref, 1); 898 base = initial_condition (access_fn); 899 orig_type = TREE_TYPE (base); 900 STRIP_USELESS_TYPE_CONVERSION (base); 901 split_constant_offset (base, &base, &off); 902 /* Fold the MEM_REF offset into the evolutions initial 903 value to make more bases comparable. */ 904 if (!integer_zerop (memoff)) 905 { 906 off = size_binop (PLUS_EXPR, off, 907 fold_convert (ssizetype, memoff)); 908 memoff = build_int_cst (TREE_TYPE (memoff), 0); 909 } 910 access_fn = chrec_replace_initial_condition 911 (access_fn, fold_convert (orig_type, off)); 912 /* ??? This is still not a suitable base object for 913 dr_may_alias_p - the base object needs to be an 914 access that covers the object as whole. With 915 an evolution in the pointer this cannot be 916 guaranteed. 917 As a band-aid, mark the access so we can special-case 918 it in dr_may_alias_p. */ 919 ref = fold_build2_loc (EXPR_LOCATION (ref), 920 MEM_REF, TREE_TYPE (ref), 921 base, memoff); 922 DR_UNCONSTRAINED_BASE (dr) = true; 923 access_fns.safe_push (access_fn); 924 } 925 } 926 else if (DECL_P (ref)) 927 { 928 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */ 929 ref = build2 (MEM_REF, TREE_TYPE (ref), 930 build_fold_addr_expr (ref), 931 build_int_cst (reference_alias_ptr_type (ref), 0)); 932 } 933 934 DR_BASE_OBJECT (dr) = ref; 935 DR_ACCESS_FNS (dr) = access_fns; 936 } 937 938 /* Extracts the alias analysis information from the memory reference DR. */ 939 940 static void 941 dr_analyze_alias (struct data_reference *dr) 942 { 943 tree ref = DR_REF (dr); 944 tree base = get_base_address (ref), addr; 945 946 if (INDIRECT_REF_P (base) 947 || TREE_CODE (base) == MEM_REF) 948 { 949 addr = TREE_OPERAND (base, 0); 950 if (TREE_CODE (addr) == SSA_NAME) 951 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr); 952 } 953 } 954 955 /* Frees data reference DR. */ 956 957 void 958 free_data_ref (data_reference_p dr) 959 { 960 DR_ACCESS_FNS (dr).release (); 961 free (dr); 962 } 963 964 /* Analyzes memory reference MEMREF accessed in STMT. The reference 965 is read if IS_READ is true, write otherwise. Returns the 966 data_reference description of MEMREF. NEST is the outermost loop 967 in which the reference should be instantiated, LOOP is the loop in 968 which the data reference should be analyzed. */ 969 970 struct data_reference * 971 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt, 972 bool is_read) 973 { 974 struct data_reference *dr; 975 976 if (dump_file && (dump_flags & TDF_DETAILS)) 977 { 978 fprintf (dump_file, "Creating dr for "); 979 print_generic_expr (dump_file, memref, TDF_SLIM); 980 fprintf (dump_file, "\n"); 981 } 982 983 dr = XCNEW (struct data_reference); 984 DR_STMT (dr) = stmt; 985 DR_REF (dr) = memref; 986 DR_IS_READ (dr) = is_read; 987 988 dr_analyze_innermost (dr, nest); 989 dr_analyze_indices (dr, nest, loop); 990 dr_analyze_alias (dr); 991 992 if (dump_file && (dump_flags & TDF_DETAILS)) 993 { 994 unsigned i; 995 fprintf (dump_file, "\tbase_address: "); 996 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM); 997 fprintf (dump_file, "\n\toffset from base address: "); 998 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM); 999 fprintf (dump_file, "\n\tconstant offset from base address: "); 1000 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM); 1001 fprintf (dump_file, "\n\tstep: "); 1002 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM); 1003 fprintf (dump_file, "\n\taligned to: "); 1004 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM); 1005 fprintf (dump_file, "\n\tbase_object: "); 1006 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM); 1007 fprintf (dump_file, "\n"); 1008 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) 1009 { 1010 fprintf (dump_file, "\tAccess function %d: ", i); 1011 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM); 1012 } 1013 } 1014 1015 return dr; 1016 } 1017 1018 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical 1019 expressions. */ 1020 static bool 1021 dr_equal_offsets_p1 (tree offset1, tree offset2) 1022 { 1023 bool res; 1024 1025 STRIP_NOPS (offset1); 1026 STRIP_NOPS (offset2); 1027 1028 if (offset1 == offset2) 1029 return true; 1030 1031 if (TREE_CODE (offset1) != TREE_CODE (offset2) 1032 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) 1033 return false; 1034 1035 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), 1036 TREE_OPERAND (offset2, 0)); 1037 1038 if (!res || !BINARY_CLASS_P (offset1)) 1039 return res; 1040 1041 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), 1042 TREE_OPERAND (offset2, 1)); 1043 1044 return res; 1045 } 1046 1047 /* Check if DRA and DRB have equal offsets. */ 1048 bool 1049 dr_equal_offsets_p (struct data_reference *dra, 1050 struct data_reference *drb) 1051 { 1052 tree offset1, offset2; 1053 1054 offset1 = DR_OFFSET (dra); 1055 offset2 = DR_OFFSET (drb); 1056 1057 return dr_equal_offsets_p1 (offset1, offset2); 1058 } 1059 1060 /* Returns true if FNA == FNB. */ 1061 1062 static bool 1063 affine_function_equal_p (affine_fn fna, affine_fn fnb) 1064 { 1065 unsigned i, n = fna.length (); 1066 1067 if (n != fnb.length ()) 1068 return false; 1069 1070 for (i = 0; i < n; i++) 1071 if (!operand_equal_p (fna[i], fnb[i], 0)) 1072 return false; 1073 1074 return true; 1075 } 1076 1077 /* If all the functions in CF are the same, returns one of them, 1078 otherwise returns NULL. */ 1079 1080 static affine_fn 1081 common_affine_function (conflict_function *cf) 1082 { 1083 unsigned i; 1084 affine_fn comm; 1085 1086 if (!CF_NONTRIVIAL_P (cf)) 1087 return affine_fn(); 1088 1089 comm = cf->fns[0]; 1090 1091 for (i = 1; i < cf->n; i++) 1092 if (!affine_function_equal_p (comm, cf->fns[i])) 1093 return affine_fn(); 1094 1095 return comm; 1096 } 1097 1098 /* Returns the base of the affine function FN. */ 1099 1100 static tree 1101 affine_function_base (affine_fn fn) 1102 { 1103 return fn[0]; 1104 } 1105 1106 /* Returns true if FN is a constant. */ 1107 1108 static bool 1109 affine_function_constant_p (affine_fn fn) 1110 { 1111 unsigned i; 1112 tree coef; 1113 1114 for (i = 1; fn.iterate (i, &coef); i++) 1115 if (!integer_zerop (coef)) 1116 return false; 1117 1118 return true; 1119 } 1120 1121 /* Returns true if FN is the zero constant function. */ 1122 1123 static bool 1124 affine_function_zero_p (affine_fn fn) 1125 { 1126 return (integer_zerop (affine_function_base (fn)) 1127 && affine_function_constant_p (fn)); 1128 } 1129 1130 /* Returns a signed integer type with the largest precision from TA 1131 and TB. */ 1132 1133 static tree 1134 signed_type_for_types (tree ta, tree tb) 1135 { 1136 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb)) 1137 return signed_type_for (ta); 1138 else 1139 return signed_type_for (tb); 1140 } 1141 1142 /* Applies operation OP on affine functions FNA and FNB, and returns the 1143 result. */ 1144 1145 static affine_fn 1146 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb) 1147 { 1148 unsigned i, n, m; 1149 affine_fn ret; 1150 tree coef; 1151 1152 if (fnb.length () > fna.length ()) 1153 { 1154 n = fna.length (); 1155 m = fnb.length (); 1156 } 1157 else 1158 { 1159 n = fnb.length (); 1160 m = fna.length (); 1161 } 1162 1163 ret.create (m); 1164 for (i = 0; i < n; i++) 1165 { 1166 tree type = signed_type_for_types (TREE_TYPE (fna[i]), 1167 TREE_TYPE (fnb[i])); 1168 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i])); 1169 } 1170 1171 for (; fna.iterate (i, &coef); i++) 1172 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), 1173 coef, integer_zero_node)); 1174 for (; fnb.iterate (i, &coef); i++) 1175 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), 1176 integer_zero_node, coef)); 1177 1178 return ret; 1179 } 1180 1181 /* Returns the sum of affine functions FNA and FNB. */ 1182 1183 static affine_fn 1184 affine_fn_plus (affine_fn fna, affine_fn fnb) 1185 { 1186 return affine_fn_op (PLUS_EXPR, fna, fnb); 1187 } 1188 1189 /* Returns the difference of affine functions FNA and FNB. */ 1190 1191 static affine_fn 1192 affine_fn_minus (affine_fn fna, affine_fn fnb) 1193 { 1194 return affine_fn_op (MINUS_EXPR, fna, fnb); 1195 } 1196 1197 /* Frees affine function FN. */ 1198 1199 static void 1200 affine_fn_free (affine_fn fn) 1201 { 1202 fn.release (); 1203 } 1204 1205 /* Determine for each subscript in the data dependence relation DDR 1206 the distance. */ 1207 1208 static void 1209 compute_subscript_distance (struct data_dependence_relation *ddr) 1210 { 1211 conflict_function *cf_a, *cf_b; 1212 affine_fn fn_a, fn_b, diff; 1213 1214 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 1215 { 1216 unsigned int i; 1217 1218 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 1219 { 1220 struct subscript *subscript; 1221 1222 subscript = DDR_SUBSCRIPT (ddr, i); 1223 cf_a = SUB_CONFLICTS_IN_A (subscript); 1224 cf_b = SUB_CONFLICTS_IN_B (subscript); 1225 1226 fn_a = common_affine_function (cf_a); 1227 fn_b = common_affine_function (cf_b); 1228 if (!fn_a.exists () || !fn_b.exists ()) 1229 { 1230 SUB_DISTANCE (subscript) = chrec_dont_know; 1231 return; 1232 } 1233 diff = affine_fn_minus (fn_a, fn_b); 1234 1235 if (affine_function_constant_p (diff)) 1236 SUB_DISTANCE (subscript) = affine_function_base (diff); 1237 else 1238 SUB_DISTANCE (subscript) = chrec_dont_know; 1239 1240 affine_fn_free (diff); 1241 } 1242 } 1243 } 1244 1245 /* Returns the conflict function for "unknown". */ 1246 1247 static conflict_function * 1248 conflict_fn_not_known (void) 1249 { 1250 conflict_function *fn = XCNEW (conflict_function); 1251 fn->n = NOT_KNOWN; 1252 1253 return fn; 1254 } 1255 1256 /* Returns the conflict function for "independent". */ 1257 1258 static conflict_function * 1259 conflict_fn_no_dependence (void) 1260 { 1261 conflict_function *fn = XCNEW (conflict_function); 1262 fn->n = NO_DEPENDENCE; 1263 1264 return fn; 1265 } 1266 1267 /* Returns true if the address of OBJ is invariant in LOOP. */ 1268 1269 static bool 1270 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj) 1271 { 1272 while (handled_component_p (obj)) 1273 { 1274 if (TREE_CODE (obj) == ARRAY_REF) 1275 { 1276 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only 1277 need to check the stride and the lower bound of the reference. */ 1278 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), 1279 loop->num) 1280 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3), 1281 loop->num)) 1282 return false; 1283 } 1284 else if (TREE_CODE (obj) == COMPONENT_REF) 1285 { 1286 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), 1287 loop->num)) 1288 return false; 1289 } 1290 obj = TREE_OPERAND (obj, 0); 1291 } 1292 1293 if (!INDIRECT_REF_P (obj) 1294 && TREE_CODE (obj) != MEM_REF) 1295 return true; 1296 1297 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0), 1298 loop->num); 1299 } 1300 1301 /* Returns false if we can prove that data references A and B do not alias, 1302 true otherwise. If LOOP_NEST is false no cross-iteration aliases are 1303 considered. */ 1304 1305 bool 1306 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, 1307 bool loop_nest) 1308 { 1309 tree addr_a = DR_BASE_OBJECT (a); 1310 tree addr_b = DR_BASE_OBJECT (b); 1311 1312 /* If we are not processing a loop nest but scalar code we 1313 do not need to care about possible cross-iteration dependences 1314 and thus can process the full original reference. Do so, 1315 similar to how loop invariant motion applies extra offset-based 1316 disambiguation. */ 1317 if (!loop_nest) 1318 { 1319 aff_tree off1, off2; 1320 double_int size1, size2; 1321 get_inner_reference_aff (DR_REF (a), &off1, &size1); 1322 get_inner_reference_aff (DR_REF (b), &off2, &size2); 1323 aff_combination_scale (&off1, double_int_minus_one); 1324 aff_combination_add (&off2, &off1); 1325 if (aff_comb_cannot_overlap_p (&off2, size1, size2)) 1326 return false; 1327 } 1328 1329 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we 1330 do not know the size of the base-object. So we cannot do any 1331 offset/overlap based analysis but have to rely on points-to 1332 information only. */ 1333 if (TREE_CODE (addr_a) == MEM_REF 1334 && (DR_UNCONSTRAINED_BASE (a) 1335 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)) 1336 { 1337 /* For true dependences we can apply TBAA. */ 1338 if (flag_strict_aliasing 1339 && DR_IS_WRITE (a) && DR_IS_READ (b) 1340 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), 1341 get_alias_set (DR_REF (b)))) 1342 return false; 1343 if (TREE_CODE (addr_b) == MEM_REF) 1344 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1345 TREE_OPERAND (addr_b, 0)); 1346 else 1347 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1348 build_fold_addr_expr (addr_b)); 1349 } 1350 else if (TREE_CODE (addr_b) == MEM_REF 1351 && (DR_UNCONSTRAINED_BASE (b) 1352 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)) 1353 { 1354 /* For true dependences we can apply TBAA. */ 1355 if (flag_strict_aliasing 1356 && DR_IS_WRITE (a) && DR_IS_READ (b) 1357 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), 1358 get_alias_set (DR_REF (b)))) 1359 return false; 1360 if (TREE_CODE (addr_a) == MEM_REF) 1361 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1362 TREE_OPERAND (addr_b, 0)); 1363 else 1364 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a), 1365 TREE_OPERAND (addr_b, 0)); 1366 } 1367 1368 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object 1369 that is being subsetted in the loop nest. */ 1370 if (DR_IS_WRITE (a) && DR_IS_WRITE (b)) 1371 return refs_output_dependent_p (addr_a, addr_b); 1372 else if (DR_IS_READ (a) && DR_IS_WRITE (b)) 1373 return refs_anti_dependent_p (addr_a, addr_b); 1374 return refs_may_alias_p (addr_a, addr_b); 1375 } 1376 1377 /* Initialize a data dependence relation between data accesses A and 1378 B. NB_LOOPS is the number of loops surrounding the references: the 1379 size of the classic distance/direction vectors. */ 1380 1381 struct data_dependence_relation * 1382 initialize_data_dependence_relation (struct data_reference *a, 1383 struct data_reference *b, 1384 vec<loop_p> loop_nest) 1385 { 1386 struct data_dependence_relation *res; 1387 unsigned int i; 1388 1389 res = XNEW (struct data_dependence_relation); 1390 DDR_A (res) = a; 1391 DDR_B (res) = b; 1392 DDR_LOOP_NEST (res).create (0); 1393 DDR_REVERSED_P (res) = false; 1394 DDR_SUBSCRIPTS (res).create (0); 1395 DDR_DIR_VECTS (res).create (0); 1396 DDR_DIST_VECTS (res).create (0); 1397 1398 if (a == NULL || b == NULL) 1399 { 1400 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1401 return res; 1402 } 1403 1404 /* If the data references do not alias, then they are independent. */ 1405 if (!dr_may_alias_p (a, b, loop_nest.exists ())) 1406 { 1407 DDR_ARE_DEPENDENT (res) = chrec_known; 1408 return res; 1409 } 1410 1411 /* The case where the references are exactly the same. */ 1412 if (operand_equal_p (DR_REF (a), DR_REF (b), 0)) 1413 { 1414 if (loop_nest.exists () 1415 && !object_address_invariant_in_loop_p (loop_nest[0], 1416 DR_BASE_OBJECT (a))) 1417 { 1418 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1419 return res; 1420 } 1421 DDR_AFFINE_P (res) = true; 1422 DDR_ARE_DEPENDENT (res) = NULL_TREE; 1423 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a)); 1424 DDR_LOOP_NEST (res) = loop_nest; 1425 DDR_INNER_LOOP (res) = 0; 1426 DDR_SELF_REFERENCE (res) = true; 1427 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) 1428 { 1429 struct subscript *subscript; 1430 1431 subscript = XNEW (struct subscript); 1432 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); 1433 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); 1434 SUB_LAST_CONFLICT (subscript) = chrec_dont_know; 1435 SUB_DISTANCE (subscript) = chrec_dont_know; 1436 DDR_SUBSCRIPTS (res).safe_push (subscript); 1437 } 1438 return res; 1439 } 1440 1441 /* If the references do not access the same object, we do not know 1442 whether they alias or not. */ 1443 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0)) 1444 { 1445 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1446 return res; 1447 } 1448 1449 /* If the base of the object is not invariant in the loop nest, we cannot 1450 analyze it. TODO -- in fact, it would suffice to record that there may 1451 be arbitrary dependences in the loops where the base object varies. */ 1452 if (loop_nest.exists () 1453 && !object_address_invariant_in_loop_p (loop_nest[0], 1454 DR_BASE_OBJECT (a))) 1455 { 1456 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1457 return res; 1458 } 1459 1460 /* If the number of dimensions of the access to not agree we can have 1461 a pointer access to a component of the array element type and an 1462 array access while the base-objects are still the same. Punt. */ 1463 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b)) 1464 { 1465 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1466 return res; 1467 } 1468 1469 DDR_AFFINE_P (res) = true; 1470 DDR_ARE_DEPENDENT (res) = NULL_TREE; 1471 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a)); 1472 DDR_LOOP_NEST (res) = loop_nest; 1473 DDR_INNER_LOOP (res) = 0; 1474 DDR_SELF_REFERENCE (res) = false; 1475 1476 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) 1477 { 1478 struct subscript *subscript; 1479 1480 subscript = XNEW (struct subscript); 1481 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); 1482 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); 1483 SUB_LAST_CONFLICT (subscript) = chrec_dont_know; 1484 SUB_DISTANCE (subscript) = chrec_dont_know; 1485 DDR_SUBSCRIPTS (res).safe_push (subscript); 1486 } 1487 1488 return res; 1489 } 1490 1491 /* Frees memory used by the conflict function F. */ 1492 1493 static void 1494 free_conflict_function (conflict_function *f) 1495 { 1496 unsigned i; 1497 1498 if (CF_NONTRIVIAL_P (f)) 1499 { 1500 for (i = 0; i < f->n; i++) 1501 affine_fn_free (f->fns[i]); 1502 } 1503 free (f); 1504 } 1505 1506 /* Frees memory used by SUBSCRIPTS. */ 1507 1508 static void 1509 free_subscripts (vec<subscript_p> subscripts) 1510 { 1511 unsigned i; 1512 subscript_p s; 1513 1514 FOR_EACH_VEC_ELT (subscripts, i, s) 1515 { 1516 free_conflict_function (s->conflicting_iterations_in_a); 1517 free_conflict_function (s->conflicting_iterations_in_b); 1518 free (s); 1519 } 1520 subscripts.release (); 1521 } 1522 1523 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap 1524 description. */ 1525 1526 static inline void 1527 finalize_ddr_dependent (struct data_dependence_relation *ddr, 1528 tree chrec) 1529 { 1530 DDR_ARE_DEPENDENT (ddr) = chrec; 1531 free_subscripts (DDR_SUBSCRIPTS (ddr)); 1532 DDR_SUBSCRIPTS (ddr).create (0); 1533 } 1534 1535 /* The dependence relation DDR cannot be represented by a distance 1536 vector. */ 1537 1538 static inline void 1539 non_affine_dependence_relation (struct data_dependence_relation *ddr) 1540 { 1541 if (dump_file && (dump_flags & TDF_DETAILS)) 1542 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n"); 1543 1544 DDR_AFFINE_P (ddr) = false; 1545 } 1546 1547 1548 1549 /* This section contains the classic Banerjee tests. */ 1550 1551 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index 1552 variables, i.e., if the ZIV (Zero Index Variable) test is true. */ 1553 1554 static inline bool 1555 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b) 1556 { 1557 return (evolution_function_is_constant_p (chrec_a) 1558 && evolution_function_is_constant_p (chrec_b)); 1559 } 1560 1561 /* Returns true iff CHREC_A and CHREC_B are dependent on an index 1562 variable, i.e., if the SIV (Single Index Variable) test is true. */ 1563 1564 static bool 1565 siv_subscript_p (const_tree chrec_a, const_tree chrec_b) 1566 { 1567 if ((evolution_function_is_constant_p (chrec_a) 1568 && evolution_function_is_univariate_p (chrec_b)) 1569 || (evolution_function_is_constant_p (chrec_b) 1570 && evolution_function_is_univariate_p (chrec_a))) 1571 return true; 1572 1573 if (evolution_function_is_univariate_p (chrec_a) 1574 && evolution_function_is_univariate_p (chrec_b)) 1575 { 1576 switch (TREE_CODE (chrec_a)) 1577 { 1578 case POLYNOMIAL_CHREC: 1579 switch (TREE_CODE (chrec_b)) 1580 { 1581 case POLYNOMIAL_CHREC: 1582 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b)) 1583 return false; 1584 1585 default: 1586 return true; 1587 } 1588 1589 default: 1590 return true; 1591 } 1592 } 1593 1594 return false; 1595 } 1596 1597 /* Creates a conflict function with N dimensions. The affine functions 1598 in each dimension follow. */ 1599 1600 static conflict_function * 1601 conflict_fn (unsigned n, ...) 1602 { 1603 unsigned i; 1604 conflict_function *ret = XCNEW (conflict_function); 1605 va_list ap; 1606 1607 gcc_assert (0 < n && n <= MAX_DIM); 1608 va_start(ap, n); 1609 1610 ret->n = n; 1611 for (i = 0; i < n; i++) 1612 ret->fns[i] = va_arg (ap, affine_fn); 1613 va_end(ap); 1614 1615 return ret; 1616 } 1617 1618 /* Returns constant affine function with value CST. */ 1619 1620 static affine_fn 1621 affine_fn_cst (tree cst) 1622 { 1623 affine_fn fn; 1624 fn.create (1); 1625 fn.quick_push (cst); 1626 return fn; 1627 } 1628 1629 /* Returns affine function with single variable, CST + COEF * x_DIM. */ 1630 1631 static affine_fn 1632 affine_fn_univar (tree cst, unsigned dim, tree coef) 1633 { 1634 affine_fn fn; 1635 fn.create (dim + 1); 1636 unsigned i; 1637 1638 gcc_assert (dim > 0); 1639 fn.quick_push (cst); 1640 for (i = 1; i < dim; i++) 1641 fn.quick_push (integer_zero_node); 1642 fn.quick_push (coef); 1643 return fn; 1644 } 1645 1646 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and 1647 *OVERLAPS_B are initialized to the functions that describe the 1648 relation between the elements accessed twice by CHREC_A and 1649 CHREC_B. For k >= 0, the following property is verified: 1650 1651 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 1652 1653 static void 1654 analyze_ziv_subscript (tree chrec_a, 1655 tree chrec_b, 1656 conflict_function **overlaps_a, 1657 conflict_function **overlaps_b, 1658 tree *last_conflicts) 1659 { 1660 tree type, difference; 1661 dependence_stats.num_ziv++; 1662 1663 if (dump_file && (dump_flags & TDF_DETAILS)) 1664 fprintf (dump_file, "(analyze_ziv_subscript \n"); 1665 1666 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 1667 chrec_a = chrec_convert (type, chrec_a, NULL); 1668 chrec_b = chrec_convert (type, chrec_b, NULL); 1669 difference = chrec_fold_minus (type, chrec_a, chrec_b); 1670 1671 switch (TREE_CODE (difference)) 1672 { 1673 case INTEGER_CST: 1674 if (integer_zerop (difference)) 1675 { 1676 /* The difference is equal to zero: the accessed index 1677 overlaps for each iteration in the loop. */ 1678 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1679 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1680 *last_conflicts = chrec_dont_know; 1681 dependence_stats.num_ziv_dependent++; 1682 } 1683 else 1684 { 1685 /* The accesses do not overlap. */ 1686 *overlaps_a = conflict_fn_no_dependence (); 1687 *overlaps_b = conflict_fn_no_dependence (); 1688 *last_conflicts = integer_zero_node; 1689 dependence_stats.num_ziv_independent++; 1690 } 1691 break; 1692 1693 default: 1694 /* We're not sure whether the indexes overlap. For the moment, 1695 conservatively answer "don't know". */ 1696 if (dump_file && (dump_flags & TDF_DETAILS)) 1697 fprintf (dump_file, "ziv test failed: difference is non-integer.\n"); 1698 1699 *overlaps_a = conflict_fn_not_known (); 1700 *overlaps_b = conflict_fn_not_known (); 1701 *last_conflicts = chrec_dont_know; 1702 dependence_stats.num_ziv_unimplemented++; 1703 break; 1704 } 1705 1706 if (dump_file && (dump_flags & TDF_DETAILS)) 1707 fprintf (dump_file, ")\n"); 1708 } 1709 1710 /* Similar to max_stmt_executions_int, but returns the bound as a tree, 1711 and only if it fits to the int type. If this is not the case, or the 1712 bound on the number of iterations of LOOP could not be derived, returns 1713 chrec_dont_know. */ 1714 1715 static tree 1716 max_stmt_executions_tree (struct loop *loop) 1717 { 1718 double_int nit; 1719 1720 if (!max_stmt_executions (loop, &nit)) 1721 return chrec_dont_know; 1722 1723 if (!double_int_fits_to_tree_p (unsigned_type_node, nit)) 1724 return chrec_dont_know; 1725 1726 return double_int_to_tree (unsigned_type_node, nit); 1727 } 1728 1729 /* Determine whether the CHREC is always positive/negative. If the expression 1730 cannot be statically analyzed, return false, otherwise set the answer into 1731 VALUE. */ 1732 1733 static bool 1734 chrec_is_positive (tree chrec, bool *value) 1735 { 1736 bool value0, value1, value2; 1737 tree end_value, nb_iter; 1738 1739 switch (TREE_CODE (chrec)) 1740 { 1741 case POLYNOMIAL_CHREC: 1742 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0) 1743 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1)) 1744 return false; 1745 1746 /* FIXME -- overflows. */ 1747 if (value0 == value1) 1748 { 1749 *value = value0; 1750 return true; 1751 } 1752 1753 /* Otherwise the chrec is under the form: "{-197, +, 2}_1", 1754 and the proof consists in showing that the sign never 1755 changes during the execution of the loop, from 0 to 1756 loop->nb_iterations. */ 1757 if (!evolution_function_is_affine_p (chrec)) 1758 return false; 1759 1760 nb_iter = number_of_latch_executions (get_chrec_loop (chrec)); 1761 if (chrec_contains_undetermined (nb_iter)) 1762 return false; 1763 1764 #if 0 1765 /* TODO -- If the test is after the exit, we may decrease the number of 1766 iterations by one. */ 1767 if (after_exit) 1768 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1)); 1769 #endif 1770 1771 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter); 1772 1773 if (!chrec_is_positive (end_value, &value2)) 1774 return false; 1775 1776 *value = value0; 1777 return value0 == value1; 1778 1779 case INTEGER_CST: 1780 switch (tree_int_cst_sgn (chrec)) 1781 { 1782 case -1: 1783 *value = false; 1784 break; 1785 case 1: 1786 *value = true; 1787 break; 1788 default: 1789 return false; 1790 } 1791 return true; 1792 1793 default: 1794 return false; 1795 } 1796 } 1797 1798 1799 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a 1800 constant, and CHREC_B is an affine function. *OVERLAPS_A and 1801 *OVERLAPS_B are initialized to the functions that describe the 1802 relation between the elements accessed twice by CHREC_A and 1803 CHREC_B. For k >= 0, the following property is verified: 1804 1805 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 1806 1807 static void 1808 analyze_siv_subscript_cst_affine (tree chrec_a, 1809 tree chrec_b, 1810 conflict_function **overlaps_a, 1811 conflict_function **overlaps_b, 1812 tree *last_conflicts) 1813 { 1814 bool value0, value1, value2; 1815 tree type, difference, tmp; 1816 1817 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 1818 chrec_a = chrec_convert (type, chrec_a, NULL); 1819 chrec_b = chrec_convert (type, chrec_b, NULL); 1820 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a); 1821 1822 /* Special case overlap in the first iteration. */ 1823 if (integer_zerop (difference)) 1824 { 1825 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1826 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1827 *last_conflicts = integer_one_node; 1828 return; 1829 } 1830 1831 if (!chrec_is_positive (initial_condition (difference), &value0)) 1832 { 1833 if (dump_file && (dump_flags & TDF_DETAILS)) 1834 fprintf (dump_file, "siv test failed: chrec is not positive.\n"); 1835 1836 dependence_stats.num_siv_unimplemented++; 1837 *overlaps_a = conflict_fn_not_known (); 1838 *overlaps_b = conflict_fn_not_known (); 1839 *last_conflicts = chrec_dont_know; 1840 return; 1841 } 1842 else 1843 { 1844 if (value0 == false) 1845 { 1846 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1)) 1847 { 1848 if (dump_file && (dump_flags & TDF_DETAILS)) 1849 fprintf (dump_file, "siv test failed: chrec not positive.\n"); 1850 1851 *overlaps_a = conflict_fn_not_known (); 1852 *overlaps_b = conflict_fn_not_known (); 1853 *last_conflicts = chrec_dont_know; 1854 dependence_stats.num_siv_unimplemented++; 1855 return; 1856 } 1857 else 1858 { 1859 if (value1 == true) 1860 { 1861 /* Example: 1862 chrec_a = 12 1863 chrec_b = {10, +, 1} 1864 */ 1865 1866 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference)) 1867 { 1868 HOST_WIDE_INT numiter; 1869 struct loop *loop = get_chrec_loop (chrec_b); 1870 1871 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1872 tmp = fold_build2 (EXACT_DIV_EXPR, type, 1873 fold_build1 (ABS_EXPR, type, difference), 1874 CHREC_RIGHT (chrec_b)); 1875 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp)); 1876 *last_conflicts = integer_one_node; 1877 1878 1879 /* Perform weak-zero siv test to see if overlap is 1880 outside the loop bounds. */ 1881 numiter = max_stmt_executions_int (loop); 1882 1883 if (numiter >= 0 1884 && compare_tree_int (tmp, numiter) > 0) 1885 { 1886 free_conflict_function (*overlaps_a); 1887 free_conflict_function (*overlaps_b); 1888 *overlaps_a = conflict_fn_no_dependence (); 1889 *overlaps_b = conflict_fn_no_dependence (); 1890 *last_conflicts = integer_zero_node; 1891 dependence_stats.num_siv_independent++; 1892 return; 1893 } 1894 dependence_stats.num_siv_dependent++; 1895 return; 1896 } 1897 1898 /* When the step does not divide the difference, there are 1899 no overlaps. */ 1900 else 1901 { 1902 *overlaps_a = conflict_fn_no_dependence (); 1903 *overlaps_b = conflict_fn_no_dependence (); 1904 *last_conflicts = integer_zero_node; 1905 dependence_stats.num_siv_independent++; 1906 return; 1907 } 1908 } 1909 1910 else 1911 { 1912 /* Example: 1913 chrec_a = 12 1914 chrec_b = {10, +, -1} 1915 1916 In this case, chrec_a will not overlap with chrec_b. */ 1917 *overlaps_a = conflict_fn_no_dependence (); 1918 *overlaps_b = conflict_fn_no_dependence (); 1919 *last_conflicts = integer_zero_node; 1920 dependence_stats.num_siv_independent++; 1921 return; 1922 } 1923 } 1924 } 1925 else 1926 { 1927 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2)) 1928 { 1929 if (dump_file && (dump_flags & TDF_DETAILS)) 1930 fprintf (dump_file, "siv test failed: chrec not positive.\n"); 1931 1932 *overlaps_a = conflict_fn_not_known (); 1933 *overlaps_b = conflict_fn_not_known (); 1934 *last_conflicts = chrec_dont_know; 1935 dependence_stats.num_siv_unimplemented++; 1936 return; 1937 } 1938 else 1939 { 1940 if (value2 == false) 1941 { 1942 /* Example: 1943 chrec_a = 3 1944 chrec_b = {10, +, -1} 1945 */ 1946 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference)) 1947 { 1948 HOST_WIDE_INT numiter; 1949 struct loop *loop = get_chrec_loop (chrec_b); 1950 1951 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1952 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference, 1953 CHREC_RIGHT (chrec_b)); 1954 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp)); 1955 *last_conflicts = integer_one_node; 1956 1957 /* Perform weak-zero siv test to see if overlap is 1958 outside the loop bounds. */ 1959 numiter = max_stmt_executions_int (loop); 1960 1961 if (numiter >= 0 1962 && compare_tree_int (tmp, numiter) > 0) 1963 { 1964 free_conflict_function (*overlaps_a); 1965 free_conflict_function (*overlaps_b); 1966 *overlaps_a = conflict_fn_no_dependence (); 1967 *overlaps_b = conflict_fn_no_dependence (); 1968 *last_conflicts = integer_zero_node; 1969 dependence_stats.num_siv_independent++; 1970 return; 1971 } 1972 dependence_stats.num_siv_dependent++; 1973 return; 1974 } 1975 1976 /* When the step does not divide the difference, there 1977 are no overlaps. */ 1978 else 1979 { 1980 *overlaps_a = conflict_fn_no_dependence (); 1981 *overlaps_b = conflict_fn_no_dependence (); 1982 *last_conflicts = integer_zero_node; 1983 dependence_stats.num_siv_independent++; 1984 return; 1985 } 1986 } 1987 else 1988 { 1989 /* Example: 1990 chrec_a = 3 1991 chrec_b = {4, +, 1} 1992 1993 In this case, chrec_a will not overlap with chrec_b. */ 1994 *overlaps_a = conflict_fn_no_dependence (); 1995 *overlaps_b = conflict_fn_no_dependence (); 1996 *last_conflicts = integer_zero_node; 1997 dependence_stats.num_siv_independent++; 1998 return; 1999 } 2000 } 2001 } 2002 } 2003 } 2004 2005 /* Helper recursive function for initializing the matrix A. Returns 2006 the initial value of CHREC. */ 2007 2008 static tree 2009 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult) 2010 { 2011 gcc_assert (chrec); 2012 2013 switch (TREE_CODE (chrec)) 2014 { 2015 case POLYNOMIAL_CHREC: 2016 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST); 2017 2018 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec)); 2019 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult); 2020 2021 case PLUS_EXPR: 2022 case MULT_EXPR: 2023 case MINUS_EXPR: 2024 { 2025 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2026 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult); 2027 2028 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1); 2029 } 2030 2031 case NOP_EXPR: 2032 { 2033 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2034 return chrec_convert (chrec_type (chrec), op, NULL); 2035 } 2036 2037 case BIT_NOT_EXPR: 2038 { 2039 /* Handle ~X as -1 - X. */ 2040 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2041 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec), 2042 build_int_cst (TREE_TYPE (chrec), -1), op); 2043 } 2044 2045 case INTEGER_CST: 2046 return chrec; 2047 2048 default: 2049 gcc_unreachable (); 2050 return NULL_TREE; 2051 } 2052 } 2053 2054 #define FLOOR_DIV(x,y) ((x) / (y)) 2055 2056 /* Solves the special case of the Diophantine equation: 2057 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B) 2058 2059 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the 2060 number of iterations that loops X and Y run. The overlaps will be 2061 constructed as evolutions in dimension DIM. */ 2062 2063 static void 2064 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b, 2065 affine_fn *overlaps_a, 2066 affine_fn *overlaps_b, 2067 tree *last_conflicts, int dim) 2068 { 2069 if (((step_a > 0 && step_b > 0) 2070 || (step_a < 0 && step_b < 0))) 2071 { 2072 int step_overlaps_a, step_overlaps_b; 2073 int gcd_steps_a_b, last_conflict, tau2; 2074 2075 gcd_steps_a_b = gcd (step_a, step_b); 2076 step_overlaps_a = step_b / gcd_steps_a_b; 2077 step_overlaps_b = step_a / gcd_steps_a_b; 2078 2079 if (niter > 0) 2080 { 2081 tau2 = FLOOR_DIV (niter, step_overlaps_a); 2082 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b)); 2083 last_conflict = tau2; 2084 *last_conflicts = build_int_cst (NULL_TREE, last_conflict); 2085 } 2086 else 2087 *last_conflicts = chrec_dont_know; 2088 2089 *overlaps_a = affine_fn_univar (integer_zero_node, dim, 2090 build_int_cst (NULL_TREE, 2091 step_overlaps_a)); 2092 *overlaps_b = affine_fn_univar (integer_zero_node, dim, 2093 build_int_cst (NULL_TREE, 2094 step_overlaps_b)); 2095 } 2096 2097 else 2098 { 2099 *overlaps_a = affine_fn_cst (integer_zero_node); 2100 *overlaps_b = affine_fn_cst (integer_zero_node); 2101 *last_conflicts = integer_zero_node; 2102 } 2103 } 2104 2105 /* Solves the special case of a Diophantine equation where CHREC_A is 2106 an affine bivariate function, and CHREC_B is an affine univariate 2107 function. For example, 2108 2109 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z 2110 2111 has the following overlapping functions: 2112 2113 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v 2114 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v 2115 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v 2116 2117 FORNOW: This is a specialized implementation for a case occurring in 2118 a common benchmark. Implement the general algorithm. */ 2119 2120 static void 2121 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, 2122 conflict_function **overlaps_a, 2123 conflict_function **overlaps_b, 2124 tree *last_conflicts) 2125 { 2126 bool xz_p, yz_p, xyz_p; 2127 int step_x, step_y, step_z; 2128 HOST_WIDE_INT niter_x, niter_y, niter_z, niter; 2129 affine_fn overlaps_a_xz, overlaps_b_xz; 2130 affine_fn overlaps_a_yz, overlaps_b_yz; 2131 affine_fn overlaps_a_xyz, overlaps_b_xyz; 2132 affine_fn ova1, ova2, ovb; 2133 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz; 2134 2135 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a))); 2136 step_y = int_cst_value (CHREC_RIGHT (chrec_a)); 2137 step_z = int_cst_value (CHREC_RIGHT (chrec_b)); 2138 2139 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a))); 2140 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2141 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2142 2143 if (niter_x < 0 || niter_y < 0 || niter_z < 0) 2144 { 2145 if (dump_file && (dump_flags & TDF_DETAILS)) 2146 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n"); 2147 2148 *overlaps_a = conflict_fn_not_known (); 2149 *overlaps_b = conflict_fn_not_known (); 2150 *last_conflicts = chrec_dont_know; 2151 return; 2152 } 2153 2154 niter = MIN (niter_x, niter_z); 2155 compute_overlap_steps_for_affine_univar (niter, step_x, step_z, 2156 &overlaps_a_xz, 2157 &overlaps_b_xz, 2158 &last_conflicts_xz, 1); 2159 niter = MIN (niter_y, niter_z); 2160 compute_overlap_steps_for_affine_univar (niter, step_y, step_z, 2161 &overlaps_a_yz, 2162 &overlaps_b_yz, 2163 &last_conflicts_yz, 2); 2164 niter = MIN (niter_x, niter_z); 2165 niter = MIN (niter_y, niter); 2166 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z, 2167 &overlaps_a_xyz, 2168 &overlaps_b_xyz, 2169 &last_conflicts_xyz, 3); 2170 2171 xz_p = !integer_zerop (last_conflicts_xz); 2172 yz_p = !integer_zerop (last_conflicts_yz); 2173 xyz_p = !integer_zerop (last_conflicts_xyz); 2174 2175 if (xz_p || yz_p || xyz_p) 2176 { 2177 ova1 = affine_fn_cst (integer_zero_node); 2178 ova2 = affine_fn_cst (integer_zero_node); 2179 ovb = affine_fn_cst (integer_zero_node); 2180 if (xz_p) 2181 { 2182 affine_fn t0 = ova1; 2183 affine_fn t2 = ovb; 2184 2185 ova1 = affine_fn_plus (ova1, overlaps_a_xz); 2186 ovb = affine_fn_plus (ovb, overlaps_b_xz); 2187 affine_fn_free (t0); 2188 affine_fn_free (t2); 2189 *last_conflicts = last_conflicts_xz; 2190 } 2191 if (yz_p) 2192 { 2193 affine_fn t0 = ova2; 2194 affine_fn t2 = ovb; 2195 2196 ova2 = affine_fn_plus (ova2, overlaps_a_yz); 2197 ovb = affine_fn_plus (ovb, overlaps_b_yz); 2198 affine_fn_free (t0); 2199 affine_fn_free (t2); 2200 *last_conflicts = last_conflicts_yz; 2201 } 2202 if (xyz_p) 2203 { 2204 affine_fn t0 = ova1; 2205 affine_fn t2 = ova2; 2206 affine_fn t4 = ovb; 2207 2208 ova1 = affine_fn_plus (ova1, overlaps_a_xyz); 2209 ova2 = affine_fn_plus (ova2, overlaps_a_xyz); 2210 ovb = affine_fn_plus (ovb, overlaps_b_xyz); 2211 affine_fn_free (t0); 2212 affine_fn_free (t2); 2213 affine_fn_free (t4); 2214 *last_conflicts = last_conflicts_xyz; 2215 } 2216 *overlaps_a = conflict_fn (2, ova1, ova2); 2217 *overlaps_b = conflict_fn (1, ovb); 2218 } 2219 else 2220 { 2221 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2222 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2223 *last_conflicts = integer_zero_node; 2224 } 2225 2226 affine_fn_free (overlaps_a_xz); 2227 affine_fn_free (overlaps_b_xz); 2228 affine_fn_free (overlaps_a_yz); 2229 affine_fn_free (overlaps_b_yz); 2230 affine_fn_free (overlaps_a_xyz); 2231 affine_fn_free (overlaps_b_xyz); 2232 } 2233 2234 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */ 2235 2236 static void 2237 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2, 2238 int size) 2239 { 2240 memcpy (vec2, vec1, size * sizeof (*vec1)); 2241 } 2242 2243 /* Copy the elements of M x N matrix MAT1 to MAT2. */ 2244 2245 static void 2246 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2, 2247 int m, int n) 2248 { 2249 int i; 2250 2251 for (i = 0; i < m; i++) 2252 lambda_vector_copy (mat1[i], mat2[i], n); 2253 } 2254 2255 /* Store the N x N identity matrix in MAT. */ 2256 2257 static void 2258 lambda_matrix_id (lambda_matrix mat, int size) 2259 { 2260 int i, j; 2261 2262 for (i = 0; i < size; i++) 2263 for (j = 0; j < size; j++) 2264 mat[i][j] = (i == j) ? 1 : 0; 2265 } 2266 2267 /* Return the first nonzero element of vector VEC1 between START and N. 2268 We must have START <= N. Returns N if VEC1 is the zero vector. */ 2269 2270 static int 2271 lambda_vector_first_nz (lambda_vector vec1, int n, int start) 2272 { 2273 int j = start; 2274 while (j < n && vec1[j] == 0) 2275 j++; 2276 return j; 2277 } 2278 2279 /* Add a multiple of row R1 of matrix MAT with N columns to row R2: 2280 R2 = R2 + CONST1 * R1. */ 2281 2282 static void 2283 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1) 2284 { 2285 int i; 2286 2287 if (const1 == 0) 2288 return; 2289 2290 for (i = 0; i < n; i++) 2291 mat[r2][i] += const1 * mat[r1][i]; 2292 } 2293 2294 /* Swap rows R1 and R2 in matrix MAT. */ 2295 2296 static void 2297 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2) 2298 { 2299 lambda_vector row; 2300 2301 row = mat[r1]; 2302 mat[r1] = mat[r2]; 2303 mat[r2] = row; 2304 } 2305 2306 /* Multiply vector VEC1 of length SIZE by a constant CONST1, 2307 and store the result in VEC2. */ 2308 2309 static void 2310 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2, 2311 int size, int const1) 2312 { 2313 int i; 2314 2315 if (const1 == 0) 2316 lambda_vector_clear (vec2, size); 2317 else 2318 for (i = 0; i < size; i++) 2319 vec2[i] = const1 * vec1[i]; 2320 } 2321 2322 /* Negate vector VEC1 with length SIZE and store it in VEC2. */ 2323 2324 static void 2325 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2, 2326 int size) 2327 { 2328 lambda_vector_mult_const (vec1, vec2, size, -1); 2329 } 2330 2331 /* Negate row R1 of matrix MAT which has N columns. */ 2332 2333 static void 2334 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1) 2335 { 2336 lambda_vector_negate (mat[r1], mat[r1], n); 2337 } 2338 2339 /* Return true if two vectors are equal. */ 2340 2341 static bool 2342 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size) 2343 { 2344 int i; 2345 for (i = 0; i < size; i++) 2346 if (vec1[i] != vec2[i]) 2347 return false; 2348 return true; 2349 } 2350 2351 /* Given an M x N integer matrix A, this function determines an M x 2352 M unimodular matrix U, and an M x N echelon matrix S such that 2353 "U.A = S". This decomposition is also known as "right Hermite". 2354 2355 Ref: Algorithm 2.1 page 33 in "Loop Transformations for 2356 Restructuring Compilers" Utpal Banerjee. */ 2357 2358 static void 2359 lambda_matrix_right_hermite (lambda_matrix A, int m, int n, 2360 lambda_matrix S, lambda_matrix U) 2361 { 2362 int i, j, i0 = 0; 2363 2364 lambda_matrix_copy (A, S, m, n); 2365 lambda_matrix_id (U, m); 2366 2367 for (j = 0; j < n; j++) 2368 { 2369 if (lambda_vector_first_nz (S[j], m, i0) < m) 2370 { 2371 ++i0; 2372 for (i = m - 1; i >= i0; i--) 2373 { 2374 while (S[i][j] != 0) 2375 { 2376 int sigma, factor, a, b; 2377 2378 a = S[i-1][j]; 2379 b = S[i][j]; 2380 sigma = (a * b < 0) ? -1: 1; 2381 a = abs (a); 2382 b = abs (b); 2383 factor = sigma * (a / b); 2384 2385 lambda_matrix_row_add (S, n, i, i-1, -factor); 2386 lambda_matrix_row_exchange (S, i, i-1); 2387 2388 lambda_matrix_row_add (U, m, i, i-1, -factor); 2389 lambda_matrix_row_exchange (U, i, i-1); 2390 } 2391 } 2392 } 2393 } 2394 } 2395 2396 /* Determines the overlapping elements due to accesses CHREC_A and 2397 CHREC_B, that are affine functions. This function cannot handle 2398 symbolic evolution functions, ie. when initial conditions are 2399 parameters, because it uses lambda matrices of integers. */ 2400 2401 static void 2402 analyze_subscript_affine_affine (tree chrec_a, 2403 tree chrec_b, 2404 conflict_function **overlaps_a, 2405 conflict_function **overlaps_b, 2406 tree *last_conflicts) 2407 { 2408 unsigned nb_vars_a, nb_vars_b, dim; 2409 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta; 2410 lambda_matrix A, U, S; 2411 struct obstack scratch_obstack; 2412 2413 if (eq_evolutions_p (chrec_a, chrec_b)) 2414 { 2415 /* The accessed index overlaps for each iteration in the 2416 loop. */ 2417 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2418 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2419 *last_conflicts = chrec_dont_know; 2420 return; 2421 } 2422 if (dump_file && (dump_flags & TDF_DETAILS)) 2423 fprintf (dump_file, "(analyze_subscript_affine_affine \n"); 2424 2425 /* For determining the initial intersection, we have to solve a 2426 Diophantine equation. This is the most time consuming part. 2427 2428 For answering to the question: "Is there a dependence?" we have 2429 to prove that there exists a solution to the Diophantine 2430 equation, and that the solution is in the iteration domain, 2431 i.e. the solution is positive or zero, and that the solution 2432 happens before the upper bound loop.nb_iterations. Otherwise 2433 there is no dependence. This function outputs a description of 2434 the iterations that hold the intersections. */ 2435 2436 nb_vars_a = nb_vars_in_chrec (chrec_a); 2437 nb_vars_b = nb_vars_in_chrec (chrec_b); 2438 2439 gcc_obstack_init (&scratch_obstack); 2440 2441 dim = nb_vars_a + nb_vars_b; 2442 U = lambda_matrix_new (dim, dim, &scratch_obstack); 2443 A = lambda_matrix_new (dim, 1, &scratch_obstack); 2444 S = lambda_matrix_new (dim, 1, &scratch_obstack); 2445 2446 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1)); 2447 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1)); 2448 gamma = init_b - init_a; 2449 2450 /* Don't do all the hard work of solving the Diophantine equation 2451 when we already know the solution: for example, 2452 | {3, +, 1}_1 2453 | {3, +, 4}_2 2454 | gamma = 3 - 3 = 0. 2455 Then the first overlap occurs during the first iterations: 2456 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x) 2457 */ 2458 if (gamma == 0) 2459 { 2460 if (nb_vars_a == 1 && nb_vars_b == 1) 2461 { 2462 HOST_WIDE_INT step_a, step_b; 2463 HOST_WIDE_INT niter, niter_a, niter_b; 2464 affine_fn ova, ovb; 2465 2466 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2467 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2468 niter = MIN (niter_a, niter_b); 2469 step_a = int_cst_value (CHREC_RIGHT (chrec_a)); 2470 step_b = int_cst_value (CHREC_RIGHT (chrec_b)); 2471 2472 compute_overlap_steps_for_affine_univar (niter, step_a, step_b, 2473 &ova, &ovb, 2474 last_conflicts, 1); 2475 *overlaps_a = conflict_fn (1, ova); 2476 *overlaps_b = conflict_fn (1, ovb); 2477 } 2478 2479 else if (nb_vars_a == 2 && nb_vars_b == 1) 2480 compute_overlap_steps_for_affine_1_2 2481 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts); 2482 2483 else if (nb_vars_a == 1 && nb_vars_b == 2) 2484 compute_overlap_steps_for_affine_1_2 2485 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts); 2486 2487 else 2488 { 2489 if (dump_file && (dump_flags & TDF_DETAILS)) 2490 fprintf (dump_file, "affine-affine test failed: too many variables.\n"); 2491 *overlaps_a = conflict_fn_not_known (); 2492 *overlaps_b = conflict_fn_not_known (); 2493 *last_conflicts = chrec_dont_know; 2494 } 2495 goto end_analyze_subs_aa; 2496 } 2497 2498 /* U.A = S */ 2499 lambda_matrix_right_hermite (A, dim, 1, S, U); 2500 2501 if (S[0][0] < 0) 2502 { 2503 S[0][0] *= -1; 2504 lambda_matrix_row_negate (U, dim, 0); 2505 } 2506 gcd_alpha_beta = S[0][0]; 2507 2508 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5, 2509 but that is a quite strange case. Instead of ICEing, answer 2510 don't know. */ 2511 if (gcd_alpha_beta == 0) 2512 { 2513 *overlaps_a = conflict_fn_not_known (); 2514 *overlaps_b = conflict_fn_not_known (); 2515 *last_conflicts = chrec_dont_know; 2516 goto end_analyze_subs_aa; 2517 } 2518 2519 /* The classic "gcd-test". */ 2520 if (!int_divides_p (gcd_alpha_beta, gamma)) 2521 { 2522 /* The "gcd-test" has determined that there is no integer 2523 solution, i.e. there is no dependence. */ 2524 *overlaps_a = conflict_fn_no_dependence (); 2525 *overlaps_b = conflict_fn_no_dependence (); 2526 *last_conflicts = integer_zero_node; 2527 } 2528 2529 /* Both access functions are univariate. This includes SIV and MIV cases. */ 2530 else if (nb_vars_a == 1 && nb_vars_b == 1) 2531 { 2532 /* Both functions should have the same evolution sign. */ 2533 if (((A[0][0] > 0 && -A[1][0] > 0) 2534 || (A[0][0] < 0 && -A[1][0] < 0))) 2535 { 2536 /* The solutions are given by: 2537 | 2538 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0] 2539 | [u21 u22] [y0] 2540 2541 For a given integer t. Using the following variables, 2542 2543 | i0 = u11 * gamma / gcd_alpha_beta 2544 | j0 = u12 * gamma / gcd_alpha_beta 2545 | i1 = u21 2546 | j1 = u22 2547 2548 the solutions are: 2549 2550 | x0 = i0 + i1 * t, 2551 | y0 = j0 + j1 * t. */ 2552 HOST_WIDE_INT i0, j0, i1, j1; 2553 2554 i0 = U[0][0] * gamma / gcd_alpha_beta; 2555 j0 = U[0][1] * gamma / gcd_alpha_beta; 2556 i1 = U[1][0]; 2557 j1 = U[1][1]; 2558 2559 if ((i1 == 0 && i0 < 0) 2560 || (j1 == 0 && j0 < 0)) 2561 { 2562 /* There is no solution. 2563 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations" 2564 falls in here, but for the moment we don't look at the 2565 upper bound of the iteration domain. */ 2566 *overlaps_a = conflict_fn_no_dependence (); 2567 *overlaps_b = conflict_fn_no_dependence (); 2568 *last_conflicts = integer_zero_node; 2569 goto end_analyze_subs_aa; 2570 } 2571 2572 if (i1 > 0 && j1 > 0) 2573 { 2574 HOST_WIDE_INT niter_a 2575 = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2576 HOST_WIDE_INT niter_b 2577 = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2578 HOST_WIDE_INT niter = MIN (niter_a, niter_b); 2579 2580 /* (X0, Y0) is a solution of the Diophantine equation: 2581 "chrec_a (X0) = chrec_b (Y0)". */ 2582 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1), 2583 CEIL (-j0, j1)); 2584 HOST_WIDE_INT x0 = i1 * tau1 + i0; 2585 HOST_WIDE_INT y0 = j1 * tau1 + j0; 2586 2587 /* (X1, Y1) is the smallest positive solution of the eq 2588 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the 2589 first conflict occurs. */ 2590 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1); 2591 HOST_WIDE_INT x1 = x0 - i1 * min_multiple; 2592 HOST_WIDE_INT y1 = y0 - j1 * min_multiple; 2593 2594 if (niter > 0) 2595 { 2596 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1), 2597 FLOOR_DIV (niter - j0, j1)); 2598 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1; 2599 2600 /* If the overlap occurs outside of the bounds of the 2601 loop, there is no dependence. */ 2602 if (x1 >= niter || y1 >= niter) 2603 { 2604 *overlaps_a = conflict_fn_no_dependence (); 2605 *overlaps_b = conflict_fn_no_dependence (); 2606 *last_conflicts = integer_zero_node; 2607 goto end_analyze_subs_aa; 2608 } 2609 else 2610 *last_conflicts = build_int_cst (NULL_TREE, last_conflict); 2611 } 2612 else 2613 *last_conflicts = chrec_dont_know; 2614 2615 *overlaps_a 2616 = conflict_fn (1, 2617 affine_fn_univar (build_int_cst (NULL_TREE, x1), 2618 1, 2619 build_int_cst (NULL_TREE, i1))); 2620 *overlaps_b 2621 = conflict_fn (1, 2622 affine_fn_univar (build_int_cst (NULL_TREE, y1), 2623 1, 2624 build_int_cst (NULL_TREE, j1))); 2625 } 2626 else 2627 { 2628 /* FIXME: For the moment, the upper bound of the 2629 iteration domain for i and j is not checked. */ 2630 if (dump_file && (dump_flags & TDF_DETAILS)) 2631 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2632 *overlaps_a = conflict_fn_not_known (); 2633 *overlaps_b = conflict_fn_not_known (); 2634 *last_conflicts = chrec_dont_know; 2635 } 2636 } 2637 else 2638 { 2639 if (dump_file && (dump_flags & TDF_DETAILS)) 2640 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2641 *overlaps_a = conflict_fn_not_known (); 2642 *overlaps_b = conflict_fn_not_known (); 2643 *last_conflicts = chrec_dont_know; 2644 } 2645 } 2646 else 2647 { 2648 if (dump_file && (dump_flags & TDF_DETAILS)) 2649 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2650 *overlaps_a = conflict_fn_not_known (); 2651 *overlaps_b = conflict_fn_not_known (); 2652 *last_conflicts = chrec_dont_know; 2653 } 2654 2655 end_analyze_subs_aa: 2656 obstack_free (&scratch_obstack, NULL); 2657 if (dump_file && (dump_flags & TDF_DETAILS)) 2658 { 2659 fprintf (dump_file, " (overlaps_a = "); 2660 dump_conflict_function (dump_file, *overlaps_a); 2661 fprintf (dump_file, ")\n (overlaps_b = "); 2662 dump_conflict_function (dump_file, *overlaps_b); 2663 fprintf (dump_file, "))\n"); 2664 } 2665 } 2666 2667 /* Returns true when analyze_subscript_affine_affine can be used for 2668 determining the dependence relation between chrec_a and chrec_b, 2669 that contain symbols. This function modifies chrec_a and chrec_b 2670 such that the analysis result is the same, and such that they don't 2671 contain symbols, and then can safely be passed to the analyzer. 2672 2673 Example: The analysis of the following tuples of evolutions produce 2674 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1 2675 vs. {0, +, 1}_1 2676 2677 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1) 2678 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1) 2679 */ 2680 2681 static bool 2682 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b) 2683 { 2684 tree diff, type, left_a, left_b, right_b; 2685 2686 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a)) 2687 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b))) 2688 /* FIXME: For the moment not handled. Might be refined later. */ 2689 return false; 2690 2691 type = chrec_type (*chrec_a); 2692 left_a = CHREC_LEFT (*chrec_a); 2693 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL); 2694 diff = chrec_fold_minus (type, left_a, left_b); 2695 2696 if (!evolution_function_is_constant_p (diff)) 2697 return false; 2698 2699 if (dump_file && (dump_flags & TDF_DETAILS)) 2700 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n"); 2701 2702 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a), 2703 diff, CHREC_RIGHT (*chrec_a)); 2704 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL); 2705 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b), 2706 build_int_cst (type, 0), 2707 right_b); 2708 return true; 2709 } 2710 2711 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and 2712 *OVERLAPS_B are initialized to the functions that describe the 2713 relation between the elements accessed twice by CHREC_A and 2714 CHREC_B. For k >= 0, the following property is verified: 2715 2716 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 2717 2718 static void 2719 analyze_siv_subscript (tree chrec_a, 2720 tree chrec_b, 2721 conflict_function **overlaps_a, 2722 conflict_function **overlaps_b, 2723 tree *last_conflicts, 2724 int loop_nest_num) 2725 { 2726 dependence_stats.num_siv++; 2727 2728 if (dump_file && (dump_flags & TDF_DETAILS)) 2729 fprintf (dump_file, "(analyze_siv_subscript \n"); 2730 2731 if (evolution_function_is_constant_p (chrec_a) 2732 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)) 2733 analyze_siv_subscript_cst_affine (chrec_a, chrec_b, 2734 overlaps_a, overlaps_b, last_conflicts); 2735 2736 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num) 2737 && evolution_function_is_constant_p (chrec_b)) 2738 analyze_siv_subscript_cst_affine (chrec_b, chrec_a, 2739 overlaps_b, overlaps_a, last_conflicts); 2740 2741 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num) 2742 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)) 2743 { 2744 if (!chrec_contains_symbols (chrec_a) 2745 && !chrec_contains_symbols (chrec_b)) 2746 { 2747 analyze_subscript_affine_affine (chrec_a, chrec_b, 2748 overlaps_a, overlaps_b, 2749 last_conflicts); 2750 2751 if (CF_NOT_KNOWN_P (*overlaps_a) 2752 || CF_NOT_KNOWN_P (*overlaps_b)) 2753 dependence_stats.num_siv_unimplemented++; 2754 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 2755 || CF_NO_DEPENDENCE_P (*overlaps_b)) 2756 dependence_stats.num_siv_independent++; 2757 else 2758 dependence_stats.num_siv_dependent++; 2759 } 2760 else if (can_use_analyze_subscript_affine_affine (&chrec_a, 2761 &chrec_b)) 2762 { 2763 analyze_subscript_affine_affine (chrec_a, chrec_b, 2764 overlaps_a, overlaps_b, 2765 last_conflicts); 2766 2767 if (CF_NOT_KNOWN_P (*overlaps_a) 2768 || CF_NOT_KNOWN_P (*overlaps_b)) 2769 dependence_stats.num_siv_unimplemented++; 2770 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 2771 || CF_NO_DEPENDENCE_P (*overlaps_b)) 2772 dependence_stats.num_siv_independent++; 2773 else 2774 dependence_stats.num_siv_dependent++; 2775 } 2776 else 2777 goto siv_subscript_dontknow; 2778 } 2779 2780 else 2781 { 2782 siv_subscript_dontknow:; 2783 if (dump_file && (dump_flags & TDF_DETAILS)) 2784 fprintf (dump_file, " siv test failed: unimplemented"); 2785 *overlaps_a = conflict_fn_not_known (); 2786 *overlaps_b = conflict_fn_not_known (); 2787 *last_conflicts = chrec_dont_know; 2788 dependence_stats.num_siv_unimplemented++; 2789 } 2790 2791 if (dump_file && (dump_flags & TDF_DETAILS)) 2792 fprintf (dump_file, ")\n"); 2793 } 2794 2795 /* Returns false if we can prove that the greatest common divisor of the steps 2796 of CHREC does not divide CST, false otherwise. */ 2797 2798 static bool 2799 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst) 2800 { 2801 HOST_WIDE_INT cd = 0, val; 2802 tree step; 2803 2804 if (!host_integerp (cst, 0)) 2805 return true; 2806 val = tree_low_cst (cst, 0); 2807 2808 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC) 2809 { 2810 step = CHREC_RIGHT (chrec); 2811 if (!host_integerp (step, 0)) 2812 return true; 2813 cd = gcd (cd, tree_low_cst (step, 0)); 2814 chrec = CHREC_LEFT (chrec); 2815 } 2816 2817 return val % cd == 0; 2818 } 2819 2820 /* Analyze a MIV (Multiple Index Variable) subscript with respect to 2821 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the 2822 functions that describe the relation between the elements accessed 2823 twice by CHREC_A and CHREC_B. For k >= 0, the following property 2824 is verified: 2825 2826 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 2827 2828 static void 2829 analyze_miv_subscript (tree chrec_a, 2830 tree chrec_b, 2831 conflict_function **overlaps_a, 2832 conflict_function **overlaps_b, 2833 tree *last_conflicts, 2834 struct loop *loop_nest) 2835 { 2836 tree type, difference; 2837 2838 dependence_stats.num_miv++; 2839 if (dump_file && (dump_flags & TDF_DETAILS)) 2840 fprintf (dump_file, "(analyze_miv_subscript \n"); 2841 2842 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 2843 chrec_a = chrec_convert (type, chrec_a, NULL); 2844 chrec_b = chrec_convert (type, chrec_b, NULL); 2845 difference = chrec_fold_minus (type, chrec_a, chrec_b); 2846 2847 if (eq_evolutions_p (chrec_a, chrec_b)) 2848 { 2849 /* Access functions are the same: all the elements are accessed 2850 in the same order. */ 2851 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2852 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2853 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a)); 2854 dependence_stats.num_miv_dependent++; 2855 } 2856 2857 else if (evolution_function_is_constant_p (difference) 2858 /* For the moment, the following is verified: 2859 evolution_function_is_affine_multivariate_p (chrec_a, 2860 loop_nest->num) */ 2861 && !gcd_of_steps_may_divide_p (chrec_a, difference)) 2862 { 2863 /* testsuite/.../ssa-chrec-33.c 2864 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2 2865 2866 The difference is 1, and all the evolution steps are multiples 2867 of 2, consequently there are no overlapping elements. */ 2868 *overlaps_a = conflict_fn_no_dependence (); 2869 *overlaps_b = conflict_fn_no_dependence (); 2870 *last_conflicts = integer_zero_node; 2871 dependence_stats.num_miv_independent++; 2872 } 2873 2874 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num) 2875 && !chrec_contains_symbols (chrec_a) 2876 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num) 2877 && !chrec_contains_symbols (chrec_b)) 2878 { 2879 /* testsuite/.../ssa-chrec-35.c 2880 {0, +, 1}_2 vs. {0, +, 1}_3 2881 the overlapping elements are respectively located at iterations: 2882 {0, +, 1}_x and {0, +, 1}_x, 2883 in other words, we have the equality: 2884 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x) 2885 2886 Other examples: 2887 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) = 2888 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y) 2889 2890 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) = 2891 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) 2892 */ 2893 analyze_subscript_affine_affine (chrec_a, chrec_b, 2894 overlaps_a, overlaps_b, last_conflicts); 2895 2896 if (CF_NOT_KNOWN_P (*overlaps_a) 2897 || CF_NOT_KNOWN_P (*overlaps_b)) 2898 dependence_stats.num_miv_unimplemented++; 2899 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 2900 || CF_NO_DEPENDENCE_P (*overlaps_b)) 2901 dependence_stats.num_miv_independent++; 2902 else 2903 dependence_stats.num_miv_dependent++; 2904 } 2905 2906 else 2907 { 2908 /* When the analysis is too difficult, answer "don't know". */ 2909 if (dump_file && (dump_flags & TDF_DETAILS)) 2910 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n"); 2911 2912 *overlaps_a = conflict_fn_not_known (); 2913 *overlaps_b = conflict_fn_not_known (); 2914 *last_conflicts = chrec_dont_know; 2915 dependence_stats.num_miv_unimplemented++; 2916 } 2917 2918 if (dump_file && (dump_flags & TDF_DETAILS)) 2919 fprintf (dump_file, ")\n"); 2920 } 2921 2922 /* Determines the iterations for which CHREC_A is equal to CHREC_B in 2923 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and 2924 OVERLAP_ITERATIONS_B are initialized with two functions that 2925 describe the iterations that contain conflicting elements. 2926 2927 Remark: For an integer k >= 0, the following equality is true: 2928 2929 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)). 2930 */ 2931 2932 static void 2933 analyze_overlapping_iterations (tree chrec_a, 2934 tree chrec_b, 2935 conflict_function **overlap_iterations_a, 2936 conflict_function **overlap_iterations_b, 2937 tree *last_conflicts, struct loop *loop_nest) 2938 { 2939 unsigned int lnn = loop_nest->num; 2940 2941 dependence_stats.num_subscript_tests++; 2942 2943 if (dump_file && (dump_flags & TDF_DETAILS)) 2944 { 2945 fprintf (dump_file, "(analyze_overlapping_iterations \n"); 2946 fprintf (dump_file, " (chrec_a = "); 2947 print_generic_expr (dump_file, chrec_a, 0); 2948 fprintf (dump_file, ")\n (chrec_b = "); 2949 print_generic_expr (dump_file, chrec_b, 0); 2950 fprintf (dump_file, ")\n"); 2951 } 2952 2953 if (chrec_a == NULL_TREE 2954 || chrec_b == NULL_TREE 2955 || chrec_contains_undetermined (chrec_a) 2956 || chrec_contains_undetermined (chrec_b)) 2957 { 2958 dependence_stats.num_subscript_undetermined++; 2959 2960 *overlap_iterations_a = conflict_fn_not_known (); 2961 *overlap_iterations_b = conflict_fn_not_known (); 2962 } 2963 2964 /* If they are the same chrec, and are affine, they overlap 2965 on every iteration. */ 2966 else if (eq_evolutions_p (chrec_a, chrec_b) 2967 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn) 2968 || operand_equal_p (chrec_a, chrec_b, 0))) 2969 { 2970 dependence_stats.num_same_subscript_function++; 2971 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2972 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2973 *last_conflicts = chrec_dont_know; 2974 } 2975 2976 /* If they aren't the same, and aren't affine, we can't do anything 2977 yet. */ 2978 else if ((chrec_contains_symbols (chrec_a) 2979 || chrec_contains_symbols (chrec_b)) 2980 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn) 2981 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn))) 2982 { 2983 dependence_stats.num_subscript_undetermined++; 2984 *overlap_iterations_a = conflict_fn_not_known (); 2985 *overlap_iterations_b = conflict_fn_not_known (); 2986 } 2987 2988 else if (ziv_subscript_p (chrec_a, chrec_b)) 2989 analyze_ziv_subscript (chrec_a, chrec_b, 2990 overlap_iterations_a, overlap_iterations_b, 2991 last_conflicts); 2992 2993 else if (siv_subscript_p (chrec_a, chrec_b)) 2994 analyze_siv_subscript (chrec_a, chrec_b, 2995 overlap_iterations_a, overlap_iterations_b, 2996 last_conflicts, lnn); 2997 2998 else 2999 analyze_miv_subscript (chrec_a, chrec_b, 3000 overlap_iterations_a, overlap_iterations_b, 3001 last_conflicts, loop_nest); 3002 3003 if (dump_file && (dump_flags & TDF_DETAILS)) 3004 { 3005 fprintf (dump_file, " (overlap_iterations_a = "); 3006 dump_conflict_function (dump_file, *overlap_iterations_a); 3007 fprintf (dump_file, ")\n (overlap_iterations_b = "); 3008 dump_conflict_function (dump_file, *overlap_iterations_b); 3009 fprintf (dump_file, "))\n"); 3010 } 3011 } 3012 3013 /* Helper function for uniquely inserting distance vectors. */ 3014 3015 static void 3016 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v) 3017 { 3018 unsigned i; 3019 lambda_vector v; 3020 3021 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v) 3022 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr))) 3023 return; 3024 3025 DDR_DIST_VECTS (ddr).safe_push (dist_v); 3026 } 3027 3028 /* Helper function for uniquely inserting direction vectors. */ 3029 3030 static void 3031 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v) 3032 { 3033 unsigned i; 3034 lambda_vector v; 3035 3036 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v) 3037 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr))) 3038 return; 3039 3040 DDR_DIR_VECTS (ddr).safe_push (dir_v); 3041 } 3042 3043 /* Add a distance of 1 on all the loops outer than INDEX. If we 3044 haven't yet determined a distance for this outer loop, push a new 3045 distance vector composed of the previous distance, and a distance 3046 of 1 for this outer loop. Example: 3047 3048 | loop_1 3049 | loop_2 3050 | A[10] 3051 | endloop_2 3052 | endloop_1 3053 3054 Saved vectors are of the form (dist_in_1, dist_in_2). First, we 3055 save (0, 1), then we have to save (1, 0). */ 3056 3057 static void 3058 add_outer_distances (struct data_dependence_relation *ddr, 3059 lambda_vector dist_v, int index) 3060 { 3061 /* For each outer loop where init_v is not set, the accesses are 3062 in dependence of distance 1 in the loop. */ 3063 while (--index >= 0) 3064 { 3065 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3066 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr)); 3067 save_v[index] = 1; 3068 save_dist_v (ddr, save_v); 3069 } 3070 } 3071 3072 /* Return false when fail to represent the data dependence as a 3073 distance vector. INIT_B is set to true when a component has been 3074 added to the distance vector DIST_V. INDEX_CARRY is then set to 3075 the index in DIST_V that carries the dependence. */ 3076 3077 static bool 3078 build_classic_dist_vector_1 (struct data_dependence_relation *ddr, 3079 struct data_reference *ddr_a, 3080 struct data_reference *ddr_b, 3081 lambda_vector dist_v, bool *init_b, 3082 int *index_carry) 3083 { 3084 unsigned i; 3085 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3086 3087 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3088 { 3089 tree access_fn_a, access_fn_b; 3090 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); 3091 3092 if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) 3093 { 3094 non_affine_dependence_relation (ddr); 3095 return false; 3096 } 3097 3098 access_fn_a = DR_ACCESS_FN (ddr_a, i); 3099 access_fn_b = DR_ACCESS_FN (ddr_b, i); 3100 3101 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC 3102 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) 3103 { 3104 int dist, index; 3105 int var_a = CHREC_VARIABLE (access_fn_a); 3106 int var_b = CHREC_VARIABLE (access_fn_b); 3107 3108 if (var_a != var_b 3109 || chrec_contains_undetermined (SUB_DISTANCE (subscript))) 3110 { 3111 non_affine_dependence_relation (ddr); 3112 return false; 3113 } 3114 3115 dist = int_cst_value (SUB_DISTANCE (subscript)); 3116 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr)); 3117 *index_carry = MIN (index, *index_carry); 3118 3119 /* This is the subscript coupling test. If we have already 3120 recorded a distance for this loop (a distance coming from 3121 another subscript), it should be the same. For example, 3122 in the following code, there is no dependence: 3123 3124 | loop i = 0, N, 1 3125 | T[i+1][i] = ... 3126 | ... = T[i][i] 3127 | endloop 3128 */ 3129 if (init_v[index] != 0 && dist_v[index] != dist) 3130 { 3131 finalize_ddr_dependent (ddr, chrec_known); 3132 return false; 3133 } 3134 3135 dist_v[index] = dist; 3136 init_v[index] = 1; 3137 *init_b = true; 3138 } 3139 else if (!operand_equal_p (access_fn_a, access_fn_b, 0)) 3140 { 3141 /* This can be for example an affine vs. constant dependence 3142 (T[i] vs. T[3]) that is not an affine dependence and is 3143 not representable as a distance vector. */ 3144 non_affine_dependence_relation (ddr); 3145 return false; 3146 } 3147 } 3148 3149 return true; 3150 } 3151 3152 /* Return true when the DDR contains only constant access functions. */ 3153 3154 static bool 3155 constant_access_functions (const struct data_dependence_relation *ddr) 3156 { 3157 unsigned i; 3158 3159 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3160 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i)) 3161 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i))) 3162 return false; 3163 3164 return true; 3165 } 3166 3167 /* Helper function for the case where DDR_A and DDR_B are the same 3168 multivariate access function with a constant step. For an example 3169 see pr34635-1.c. */ 3170 3171 static void 3172 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2) 3173 { 3174 int x_1, x_2; 3175 tree c_1 = CHREC_LEFT (c_2); 3176 tree c_0 = CHREC_LEFT (c_1); 3177 lambda_vector dist_v; 3178 int v1, v2, cd; 3179 3180 /* Polynomials with more than 2 variables are not handled yet. When 3181 the evolution steps are parameters, it is not possible to 3182 represent the dependence using classical distance vectors. */ 3183 if (TREE_CODE (c_0) != INTEGER_CST 3184 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST 3185 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST) 3186 { 3187 DDR_AFFINE_P (ddr) = false; 3188 return; 3189 } 3190 3191 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr)); 3192 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr)); 3193 3194 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */ 3195 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3196 v1 = int_cst_value (CHREC_RIGHT (c_1)); 3197 v2 = int_cst_value (CHREC_RIGHT (c_2)); 3198 cd = gcd (v1, v2); 3199 v1 /= cd; 3200 v2 /= cd; 3201 3202 if (v2 < 0) 3203 { 3204 v2 = -v2; 3205 v1 = -v1; 3206 } 3207 3208 dist_v[x_1] = v2; 3209 dist_v[x_2] = -v1; 3210 save_dist_v (ddr, dist_v); 3211 3212 add_outer_distances (ddr, dist_v, x_1); 3213 } 3214 3215 /* Helper function for the case where DDR_A and DDR_B are the same 3216 access functions. */ 3217 3218 static void 3219 add_other_self_distances (struct data_dependence_relation *ddr) 3220 { 3221 lambda_vector dist_v; 3222 unsigned i; 3223 int index_carry = DDR_NB_LOOPS (ddr); 3224 3225 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3226 { 3227 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i); 3228 3229 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC) 3230 { 3231 if (!evolution_function_is_univariate_p (access_fun)) 3232 { 3233 if (DDR_NUM_SUBSCRIPTS (ddr) != 1) 3234 { 3235 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know; 3236 return; 3237 } 3238 3239 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0); 3240 3241 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC) 3242 add_multivariate_self_dist (ddr, access_fun); 3243 else 3244 /* The evolution step is not constant: it varies in 3245 the outer loop, so this cannot be represented by a 3246 distance vector. For example in pr34635.c the 3247 evolution is {0, +, {0, +, 4}_1}_2. */ 3248 DDR_AFFINE_P (ddr) = false; 3249 3250 return; 3251 } 3252 3253 index_carry = MIN (index_carry, 3254 index_in_loop_nest (CHREC_VARIABLE (access_fun), 3255 DDR_LOOP_NEST (ddr))); 3256 } 3257 } 3258 3259 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3260 add_outer_distances (ddr, dist_v, index_carry); 3261 } 3262 3263 static void 3264 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr) 3265 { 3266 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3267 3268 dist_v[DDR_INNER_LOOP (ddr)] = 1; 3269 save_dist_v (ddr, dist_v); 3270 } 3271 3272 /* Adds a unit distance vector to DDR when there is a 0 overlap. This 3273 is the case for example when access functions are the same and 3274 equal to a constant, as in: 3275 3276 | loop_1 3277 | A[3] = ... 3278 | ... = A[3] 3279 | endloop_1 3280 3281 in which case the distance vectors are (0) and (1). */ 3282 3283 static void 3284 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr) 3285 { 3286 unsigned i, j; 3287 3288 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3289 { 3290 subscript_p sub = DDR_SUBSCRIPT (ddr, i); 3291 conflict_function *ca = SUB_CONFLICTS_IN_A (sub); 3292 conflict_function *cb = SUB_CONFLICTS_IN_B (sub); 3293 3294 for (j = 0; j < ca->n; j++) 3295 if (affine_function_zero_p (ca->fns[j])) 3296 { 3297 insert_innermost_unit_dist_vector (ddr); 3298 return; 3299 } 3300 3301 for (j = 0; j < cb->n; j++) 3302 if (affine_function_zero_p (cb->fns[j])) 3303 { 3304 insert_innermost_unit_dist_vector (ddr); 3305 return; 3306 } 3307 } 3308 } 3309 3310 /* Compute the classic per loop distance vector. DDR is the data 3311 dependence relation to build a vector from. Return false when fail 3312 to represent the data dependence as a distance vector. */ 3313 3314 static bool 3315 build_classic_dist_vector (struct data_dependence_relation *ddr, 3316 struct loop *loop_nest) 3317 { 3318 bool init_b = false; 3319 int index_carry = DDR_NB_LOOPS (ddr); 3320 lambda_vector dist_v; 3321 3322 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE) 3323 return false; 3324 3325 if (same_access_functions (ddr)) 3326 { 3327 /* Save the 0 vector. */ 3328 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3329 save_dist_v (ddr, dist_v); 3330 3331 if (constant_access_functions (ddr)) 3332 add_distance_for_zero_overlaps (ddr); 3333 3334 if (DDR_NB_LOOPS (ddr) > 1) 3335 add_other_self_distances (ddr); 3336 3337 return true; 3338 } 3339 3340 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3341 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr), 3342 dist_v, &init_b, &index_carry)) 3343 return false; 3344 3345 /* Save the distance vector if we initialized one. */ 3346 if (init_b) 3347 { 3348 /* Verify a basic constraint: classic distance vectors should 3349 always be lexicographically positive. 3350 3351 Data references are collected in the order of execution of 3352 the program, thus for the following loop 3353 3354 | for (i = 1; i < 100; i++) 3355 | for (j = 1; j < 100; j++) 3356 | { 3357 | t = T[j+1][i-1]; // A 3358 | T[j][i] = t + 2; // B 3359 | } 3360 3361 references are collected following the direction of the wind: 3362 A then B. The data dependence tests are performed also 3363 following this order, such that we're looking at the distance 3364 separating the elements accessed by A from the elements later 3365 accessed by B. But in this example, the distance returned by 3366 test_dep (A, B) is lexicographically negative (-1, 1), that 3367 means that the access A occurs later than B with respect to 3368 the outer loop, ie. we're actually looking upwind. In this 3369 case we solve test_dep (B, A) looking downwind to the 3370 lexicographically positive solution, that returns the 3371 distance vector (1, -1). */ 3372 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr))) 3373 { 3374 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3375 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3376 loop_nest)) 3377 return false; 3378 compute_subscript_distance (ddr); 3379 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3380 save_v, &init_b, &index_carry)) 3381 return false; 3382 save_dist_v (ddr, save_v); 3383 DDR_REVERSED_P (ddr) = true; 3384 3385 /* In this case there is a dependence forward for all the 3386 outer loops: 3387 3388 | for (k = 1; k < 100; k++) 3389 | for (i = 1; i < 100; i++) 3390 | for (j = 1; j < 100; j++) 3391 | { 3392 | t = T[j+1][i-1]; // A 3393 | T[j][i] = t + 2; // B 3394 | } 3395 3396 the vectors are: 3397 (0, 1, -1) 3398 (1, 1, -1) 3399 (1, -1, 1) 3400 */ 3401 if (DDR_NB_LOOPS (ddr) > 1) 3402 { 3403 add_outer_distances (ddr, save_v, index_carry); 3404 add_outer_distances (ddr, dist_v, index_carry); 3405 } 3406 } 3407 else 3408 { 3409 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3410 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr)); 3411 3412 if (DDR_NB_LOOPS (ddr) > 1) 3413 { 3414 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3415 3416 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), 3417 DDR_A (ddr), loop_nest)) 3418 return false; 3419 compute_subscript_distance (ddr); 3420 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3421 opposite_v, &init_b, 3422 &index_carry)) 3423 return false; 3424 3425 save_dist_v (ddr, save_v); 3426 add_outer_distances (ddr, dist_v, index_carry); 3427 add_outer_distances (ddr, opposite_v, index_carry); 3428 } 3429 else 3430 save_dist_v (ddr, save_v); 3431 } 3432 } 3433 else 3434 { 3435 /* There is a distance of 1 on all the outer loops: Example: 3436 there is a dependence of distance 1 on loop_1 for the array A. 3437 3438 | loop_1 3439 | A[5] = ... 3440 | endloop 3441 */ 3442 add_outer_distances (ddr, dist_v, 3443 lambda_vector_first_nz (dist_v, 3444 DDR_NB_LOOPS (ddr), 0)); 3445 } 3446 3447 if (dump_file && (dump_flags & TDF_DETAILS)) 3448 { 3449 unsigned i; 3450 3451 fprintf (dump_file, "(build_classic_dist_vector\n"); 3452 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 3453 { 3454 fprintf (dump_file, " dist_vector = ("); 3455 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i), 3456 DDR_NB_LOOPS (ddr)); 3457 fprintf (dump_file, " )\n"); 3458 } 3459 fprintf (dump_file, ")\n"); 3460 } 3461 3462 return true; 3463 } 3464 3465 /* Return the direction for a given distance. 3466 FIXME: Computing dir this way is suboptimal, since dir can catch 3467 cases that dist is unable to represent. */ 3468 3469 static inline enum data_dependence_direction 3470 dir_from_dist (int dist) 3471 { 3472 if (dist > 0) 3473 return dir_positive; 3474 else if (dist < 0) 3475 return dir_negative; 3476 else 3477 return dir_equal; 3478 } 3479 3480 /* Compute the classic per loop direction vector. DDR is the data 3481 dependence relation to build a vector from. */ 3482 3483 static void 3484 build_classic_dir_vector (struct data_dependence_relation *ddr) 3485 { 3486 unsigned i, j; 3487 lambda_vector dist_v; 3488 3489 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 3490 { 3491 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3492 3493 for (j = 0; j < DDR_NB_LOOPS (ddr); j++) 3494 dir_v[j] = dir_from_dist (dist_v[j]); 3495 3496 save_dir_v (ddr, dir_v); 3497 } 3498 } 3499 3500 /* Helper function. Returns true when there is a dependence between 3501 data references DRA and DRB. */ 3502 3503 static bool 3504 subscript_dependence_tester_1 (struct data_dependence_relation *ddr, 3505 struct data_reference *dra, 3506 struct data_reference *drb, 3507 struct loop *loop_nest) 3508 { 3509 unsigned int i; 3510 tree last_conflicts; 3511 struct subscript *subscript; 3512 tree res = NULL_TREE; 3513 3514 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++) 3515 { 3516 conflict_function *overlaps_a, *overlaps_b; 3517 3518 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i), 3519 DR_ACCESS_FN (drb, i), 3520 &overlaps_a, &overlaps_b, 3521 &last_conflicts, loop_nest); 3522 3523 if (SUB_CONFLICTS_IN_A (subscript)) 3524 free_conflict_function (SUB_CONFLICTS_IN_A (subscript)); 3525 if (SUB_CONFLICTS_IN_B (subscript)) 3526 free_conflict_function (SUB_CONFLICTS_IN_B (subscript)); 3527 3528 SUB_CONFLICTS_IN_A (subscript) = overlaps_a; 3529 SUB_CONFLICTS_IN_B (subscript) = overlaps_b; 3530 SUB_LAST_CONFLICT (subscript) = last_conflicts; 3531 3532 /* If there is any undetermined conflict function we have to 3533 give a conservative answer in case we cannot prove that 3534 no dependence exists when analyzing another subscript. */ 3535 if (CF_NOT_KNOWN_P (overlaps_a) 3536 || CF_NOT_KNOWN_P (overlaps_b)) 3537 { 3538 res = chrec_dont_know; 3539 continue; 3540 } 3541 3542 /* When there is a subscript with no dependence we can stop. */ 3543 else if (CF_NO_DEPENDENCE_P (overlaps_a) 3544 || CF_NO_DEPENDENCE_P (overlaps_b)) 3545 { 3546 res = chrec_known; 3547 break; 3548 } 3549 } 3550 3551 if (res == NULL_TREE) 3552 return true; 3553 3554 if (res == chrec_known) 3555 dependence_stats.num_dependence_independent++; 3556 else 3557 dependence_stats.num_dependence_undetermined++; 3558 finalize_ddr_dependent (ddr, res); 3559 return false; 3560 } 3561 3562 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */ 3563 3564 static void 3565 subscript_dependence_tester (struct data_dependence_relation *ddr, 3566 struct loop *loop_nest) 3567 { 3568 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest)) 3569 dependence_stats.num_dependence_dependent++; 3570 3571 compute_subscript_distance (ddr); 3572 if (build_classic_dist_vector (ddr, loop_nest)) 3573 build_classic_dir_vector (ddr); 3574 } 3575 3576 /* Returns true when all the access functions of A are affine or 3577 constant with respect to LOOP_NEST. */ 3578 3579 static bool 3580 access_functions_are_affine_or_constant_p (const struct data_reference *a, 3581 const struct loop *loop_nest) 3582 { 3583 unsigned int i; 3584 vec<tree> fns = DR_ACCESS_FNS (a); 3585 tree t; 3586 3587 FOR_EACH_VEC_ELT (fns, i, t) 3588 if (!evolution_function_is_invariant_p (t, loop_nest->num) 3589 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num)) 3590 return false; 3591 3592 return true; 3593 } 3594 3595 /* Initializes an equation for an OMEGA problem using the information 3596 contained in the ACCESS_FUN. Returns true when the operation 3597 succeeded. 3598 3599 PB is the omega constraint system. 3600 EQ is the number of the equation to be initialized. 3601 OFFSET is used for shifting the variables names in the constraints: 3602 a constrain is composed of 2 * the number of variables surrounding 3603 dependence accesses. OFFSET is set either to 0 for the first n variables, 3604 then it is set to n. 3605 ACCESS_FUN is expected to be an affine chrec. */ 3606 3607 static bool 3608 init_omega_eq_with_af (omega_pb pb, unsigned eq, 3609 unsigned int offset, tree access_fun, 3610 struct data_dependence_relation *ddr) 3611 { 3612 switch (TREE_CODE (access_fun)) 3613 { 3614 case POLYNOMIAL_CHREC: 3615 { 3616 tree left = CHREC_LEFT (access_fun); 3617 tree right = CHREC_RIGHT (access_fun); 3618 int var = CHREC_VARIABLE (access_fun); 3619 unsigned var_idx; 3620 3621 if (TREE_CODE (right) != INTEGER_CST) 3622 return false; 3623 3624 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr)); 3625 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right); 3626 3627 /* Compute the innermost loop index. */ 3628 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx); 3629 3630 if (offset == 0) 3631 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1] 3632 += int_cst_value (right); 3633 3634 switch (TREE_CODE (left)) 3635 { 3636 case POLYNOMIAL_CHREC: 3637 return init_omega_eq_with_af (pb, eq, offset, left, ddr); 3638 3639 case INTEGER_CST: 3640 pb->eqs[eq].coef[0] += int_cst_value (left); 3641 return true; 3642 3643 default: 3644 return false; 3645 } 3646 } 3647 3648 case INTEGER_CST: 3649 pb->eqs[eq].coef[0] += int_cst_value (access_fun); 3650 return true; 3651 3652 default: 3653 return false; 3654 } 3655 } 3656 3657 /* As explained in the comments preceding init_omega_for_ddr, we have 3658 to set up a system for each loop level, setting outer loops 3659 variation to zero, and current loop variation to positive or zero. 3660 Save each lexico positive distance vector. */ 3661 3662 static void 3663 omega_extract_distance_vectors (omega_pb pb, 3664 struct data_dependence_relation *ddr) 3665 { 3666 int eq, geq; 3667 unsigned i, j; 3668 struct loop *loopi, *loopj; 3669 enum omega_result res; 3670 3671 /* Set a new problem for each loop in the nest. The basis is the 3672 problem that we have initialized until now. On top of this we 3673 add new constraints. */ 3674 for (i = 0; i <= DDR_INNER_LOOP (ddr) 3675 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++) 3676 { 3677 int dist = 0; 3678 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), 3679 DDR_NB_LOOPS (ddr)); 3680 3681 omega_copy_problem (copy, pb); 3682 3683 /* For all the outer loops "loop_j", add "dj = 0". */ 3684 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++) 3685 { 3686 eq = omega_add_zero_eq (copy, omega_black); 3687 copy->eqs[eq].coef[j + 1] = 1; 3688 } 3689 3690 /* For "loop_i", add "0 <= di". */ 3691 geq = omega_add_zero_geq (copy, omega_black); 3692 copy->geqs[geq].coef[i + 1] = 1; 3693 3694 /* Reduce the constraint system, and test that the current 3695 problem is feasible. */ 3696 res = omega_simplify_problem (copy); 3697 if (res == omega_false 3698 || res == omega_unknown 3699 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr)) 3700 goto next_problem; 3701 3702 for (eq = 0; eq < copy->num_subs; eq++) 3703 if (copy->subs[eq].key == (int) i + 1) 3704 { 3705 dist = copy->subs[eq].coef[0]; 3706 goto found_dist; 3707 } 3708 3709 if (dist == 0) 3710 { 3711 /* Reinitialize problem... */ 3712 omega_copy_problem (copy, pb); 3713 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++) 3714 { 3715 eq = omega_add_zero_eq (copy, omega_black); 3716 copy->eqs[eq].coef[j + 1] = 1; 3717 } 3718 3719 /* ..., but this time "di = 1". */ 3720 eq = omega_add_zero_eq (copy, omega_black); 3721 copy->eqs[eq].coef[i + 1] = 1; 3722 copy->eqs[eq].coef[0] = -1; 3723 3724 res = omega_simplify_problem (copy); 3725 if (res == omega_false 3726 || res == omega_unknown 3727 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr)) 3728 goto next_problem; 3729 3730 for (eq = 0; eq < copy->num_subs; eq++) 3731 if (copy->subs[eq].key == (int) i + 1) 3732 { 3733 dist = copy->subs[eq].coef[0]; 3734 goto found_dist; 3735 } 3736 } 3737 3738 found_dist:; 3739 /* Save the lexicographically positive distance vector. */ 3740 if (dist >= 0) 3741 { 3742 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3743 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3744 3745 dist_v[i] = dist; 3746 3747 for (eq = 0; eq < copy->num_subs; eq++) 3748 if (copy->subs[eq].key > 0) 3749 { 3750 dist = copy->subs[eq].coef[0]; 3751 dist_v[copy->subs[eq].key - 1] = dist; 3752 } 3753 3754 for (j = 0; j < DDR_NB_LOOPS (ddr); j++) 3755 dir_v[j] = dir_from_dist (dist_v[j]); 3756 3757 save_dist_v (ddr, dist_v); 3758 save_dir_v (ddr, dir_v); 3759 } 3760 3761 next_problem:; 3762 omega_free_problem (copy); 3763 } 3764 } 3765 3766 /* This is called for each subscript of a tuple of data references: 3767 insert an equality for representing the conflicts. */ 3768 3769 static bool 3770 omega_setup_subscript (tree access_fun_a, tree access_fun_b, 3771 struct data_dependence_relation *ddr, 3772 omega_pb pb, bool *maybe_dependent) 3773 { 3774 int eq; 3775 tree type = signed_type_for_types (TREE_TYPE (access_fun_a), 3776 TREE_TYPE (access_fun_b)); 3777 tree fun_a = chrec_convert (type, access_fun_a, NULL); 3778 tree fun_b = chrec_convert (type, access_fun_b, NULL); 3779 tree difference = chrec_fold_minus (type, fun_a, fun_b); 3780 tree minus_one; 3781 3782 /* When the fun_a - fun_b is not constant, the dependence is not 3783 captured by the classic distance vector representation. */ 3784 if (TREE_CODE (difference) != INTEGER_CST) 3785 return false; 3786 3787 /* ZIV test. */ 3788 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference)) 3789 { 3790 /* There is no dependence. */ 3791 *maybe_dependent = false; 3792 return true; 3793 } 3794 3795 minus_one = build_int_cst (type, -1); 3796 fun_b = chrec_fold_multiply (type, fun_b, minus_one); 3797 3798 eq = omega_add_zero_eq (pb, omega_black); 3799 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr) 3800 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr)) 3801 /* There is probably a dependence, but the system of 3802 constraints cannot be built: answer "don't know". */ 3803 return false; 3804 3805 /* GCD test. */ 3806 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0] 3807 && !int_divides_p (lambda_vector_gcd 3808 ((lambda_vector) &(pb->eqs[eq].coef[1]), 3809 2 * DDR_NB_LOOPS (ddr)), 3810 pb->eqs[eq].coef[0])) 3811 { 3812 /* There is no dependence. */ 3813 *maybe_dependent = false; 3814 return true; 3815 } 3816 3817 return true; 3818 } 3819 3820 /* Helper function, same as init_omega_for_ddr but specialized for 3821 data references A and B. */ 3822 3823 static bool 3824 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb, 3825 struct data_dependence_relation *ddr, 3826 omega_pb pb, bool *maybe_dependent) 3827 { 3828 unsigned i; 3829 int ineq; 3830 struct loop *loopi; 3831 unsigned nb_loops = DDR_NB_LOOPS (ddr); 3832 3833 /* Insert an equality per subscript. */ 3834 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3835 { 3836 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i), 3837 ddr, pb, maybe_dependent)) 3838 return false; 3839 else if (*maybe_dependent == false) 3840 { 3841 /* There is no dependence. */ 3842 DDR_ARE_DEPENDENT (ddr) = chrec_known; 3843 return true; 3844 } 3845 } 3846 3847 /* Insert inequalities: constraints corresponding to the iteration 3848 domain, i.e. the loops surrounding the references "loop_x" and 3849 the distance variables "dx". The layout of the OMEGA 3850 representation is as follows: 3851 - coef[0] is the constant 3852 - coef[1..nb_loops] are the protected variables that will not be 3853 removed by the solver: the "dx" 3854 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x". 3855 */ 3856 for (i = 0; i <= DDR_INNER_LOOP (ddr) 3857 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++) 3858 { 3859 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi); 3860 3861 /* 0 <= loop_x */ 3862 ineq = omega_add_zero_geq (pb, omega_black); 3863 pb->geqs[ineq].coef[i + nb_loops + 1] = 1; 3864 3865 /* 0 <= loop_x + dx */ 3866 ineq = omega_add_zero_geq (pb, omega_black); 3867 pb->geqs[ineq].coef[i + nb_loops + 1] = 1; 3868 pb->geqs[ineq].coef[i + 1] = 1; 3869 3870 if (nbi != -1) 3871 { 3872 /* loop_x <= nb_iters */ 3873 ineq = omega_add_zero_geq (pb, omega_black); 3874 pb->geqs[ineq].coef[i + nb_loops + 1] = -1; 3875 pb->geqs[ineq].coef[0] = nbi; 3876 3877 /* loop_x + dx <= nb_iters */ 3878 ineq = omega_add_zero_geq (pb, omega_black); 3879 pb->geqs[ineq].coef[i + nb_loops + 1] = -1; 3880 pb->geqs[ineq].coef[i + 1] = -1; 3881 pb->geqs[ineq].coef[0] = nbi; 3882 3883 /* A step "dx" bigger than nb_iters is not feasible, so 3884 add "0 <= nb_iters + dx", */ 3885 ineq = omega_add_zero_geq (pb, omega_black); 3886 pb->geqs[ineq].coef[i + 1] = 1; 3887 pb->geqs[ineq].coef[0] = nbi; 3888 /* and "dx <= nb_iters". */ 3889 ineq = omega_add_zero_geq (pb, omega_black); 3890 pb->geqs[ineq].coef[i + 1] = -1; 3891 pb->geqs[ineq].coef[0] = nbi; 3892 } 3893 } 3894 3895 omega_extract_distance_vectors (pb, ddr); 3896 3897 return true; 3898 } 3899 3900 /* Sets up the Omega dependence problem for the data dependence 3901 relation DDR. Returns false when the constraint system cannot be 3902 built, ie. when the test answers "don't know". Returns true 3903 otherwise, and when independence has been proved (using one of the 3904 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise 3905 set MAYBE_DEPENDENT to true. 3906 3907 Example: for setting up the dependence system corresponding to the 3908 conflicting accesses 3909 3910 | loop_i 3911 | loop_j 3912 | A[i, i+1] = ... 3913 | ... A[2*j, 2*(i + j)] 3914 | endloop_j 3915 | endloop_i 3916 3917 the following constraints come from the iteration domain: 3918 3919 0 <= i <= Ni 3920 0 <= i + di <= Ni 3921 0 <= j <= Nj 3922 0 <= j + dj <= Nj 3923 3924 where di, dj are the distance variables. The constraints 3925 representing the conflicting elements are: 3926 3927 i = 2 * (j + dj) 3928 i + 1 = 2 * (i + di + j + dj) 3929 3930 For asking that the resulting distance vector (di, dj) be 3931 lexicographically positive, we insert the constraint "di >= 0". If 3932 "di = 0" in the solution, we fix that component to zero, and we 3933 look at the inner loops: we set a new problem where all the outer 3934 loop distances are zero, and fix this inner component to be 3935 positive. When one of the components is positive, we save that 3936 distance, and set a new problem where the distance on this loop is 3937 zero, searching for other distances in the inner loops. Here is 3938 the classic example that illustrates that we have to set for each 3939 inner loop a new problem: 3940 3941 | loop_1 3942 | loop_2 3943 | A[10] 3944 | endloop_2 3945 | endloop_1 3946 3947 we have to save two distances (1, 0) and (0, 1). 3948 3949 Given two array references, refA and refB, we have to set the 3950 dependence problem twice, refA vs. refB and refB vs. refA, and we 3951 cannot do a single test, as refB might occur before refA in the 3952 inner loops, and the contrary when considering outer loops: ex. 3953 3954 | loop_0 3955 | loop_1 3956 | loop_2 3957 | T[{1,+,1}_2][{1,+,1}_1] // refA 3958 | T[{2,+,1}_2][{0,+,1}_1] // refB 3959 | endloop_2 3960 | endloop_1 3961 | endloop_0 3962 3963 refB touches the elements in T before refA, and thus for the same 3964 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1) 3965 but for successive loop_0 iterations, we have (1, -1, 1) 3966 3967 The Omega solver expects the distance variables ("di" in the 3968 previous example) to come first in the constraint system (as 3969 variables to be protected, or "safe" variables), the constraint 3970 system is built using the following layout: 3971 3972 "cst | distance vars | index vars". 3973 */ 3974 3975 static bool 3976 init_omega_for_ddr (struct data_dependence_relation *ddr, 3977 bool *maybe_dependent) 3978 { 3979 omega_pb pb; 3980 bool res = false; 3981 3982 *maybe_dependent = true; 3983 3984 if (same_access_functions (ddr)) 3985 { 3986 unsigned j; 3987 lambda_vector dir_v; 3988 3989 /* Save the 0 vector. */ 3990 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr))); 3991 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3992 for (j = 0; j < DDR_NB_LOOPS (ddr); j++) 3993 dir_v[j] = dir_equal; 3994 save_dir_v (ddr, dir_v); 3995 3996 /* Save the dependences carried by outer loops. */ 3997 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); 3998 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb, 3999 maybe_dependent); 4000 omega_free_problem (pb); 4001 return res; 4002 } 4003 4004 /* Omega expects the protected variables (those that have to be kept 4005 after elimination) to appear first in the constraint system. 4006 These variables are the distance variables. In the following 4007 initialization we declare NB_LOOPS safe variables, and the total 4008 number of variables for the constraint system is 2*NB_LOOPS. */ 4009 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); 4010 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb, 4011 maybe_dependent); 4012 omega_free_problem (pb); 4013 4014 /* Stop computation if not decidable, or no dependence. */ 4015 if (res == false || *maybe_dependent == false) 4016 return res; 4017 4018 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); 4019 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb, 4020 maybe_dependent); 4021 omega_free_problem (pb); 4022 4023 return res; 4024 } 4025 4026 /* Return true when DDR contains the same information as that stored 4027 in DIR_VECTS and in DIST_VECTS, return false otherwise. */ 4028 4029 static bool 4030 ddr_consistent_p (FILE *file, 4031 struct data_dependence_relation *ddr, 4032 vec<lambda_vector> dist_vects, 4033 vec<lambda_vector> dir_vects) 4034 { 4035 unsigned int i, j; 4036 4037 /* If dump_file is set, output there. */ 4038 if (dump_file && (dump_flags & TDF_DETAILS)) 4039 file = dump_file; 4040 4041 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr)) 4042 { 4043 lambda_vector b_dist_v; 4044 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n", 4045 dist_vects.length (), 4046 DDR_NUM_DIST_VECTS (ddr)); 4047 4048 fprintf (file, "Banerjee dist vectors:\n"); 4049 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v) 4050 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr)); 4051 4052 fprintf (file, "Omega dist vectors:\n"); 4053 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 4054 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr)); 4055 4056 fprintf (file, "data dependence relation:\n"); 4057 dump_data_dependence_relation (file, ddr); 4058 4059 fprintf (file, ")\n"); 4060 return false; 4061 } 4062 4063 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr)) 4064 { 4065 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n", 4066 dir_vects.length (), 4067 DDR_NUM_DIR_VECTS (ddr)); 4068 return false; 4069 } 4070 4071 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 4072 { 4073 lambda_vector a_dist_v; 4074 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i); 4075 4076 /* Distance vectors are not ordered in the same way in the DDR 4077 and in the DIST_VECTS: search for a matching vector. */ 4078 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v) 4079 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr))) 4080 break; 4081 4082 if (j == dist_vects.length ()) 4083 { 4084 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n"); 4085 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr)); 4086 fprintf (file, "not found in Omega dist vectors:\n"); 4087 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr)); 4088 fprintf (file, "data dependence relation:\n"); 4089 dump_data_dependence_relation (file, ddr); 4090 fprintf (file, ")\n"); 4091 } 4092 } 4093 4094 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++) 4095 { 4096 lambda_vector a_dir_v; 4097 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i); 4098 4099 /* Direction vectors are not ordered in the same way in the DDR 4100 and in the DIR_VECTS: search for a matching vector. */ 4101 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v) 4102 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr))) 4103 break; 4104 4105 if (j == dist_vects.length ()) 4106 { 4107 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n"); 4108 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr)); 4109 fprintf (file, "not found in Omega dir vectors:\n"); 4110 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr)); 4111 fprintf (file, "data dependence relation:\n"); 4112 dump_data_dependence_relation (file, ddr); 4113 fprintf (file, ")\n"); 4114 } 4115 } 4116 4117 return true; 4118 } 4119 4120 /* This computes the affine dependence relation between A and B with 4121 respect to LOOP_NEST. CHREC_KNOWN is used for representing the 4122 independence between two accesses, while CHREC_DONT_KNOW is used 4123 for representing the unknown relation. 4124 4125 Note that it is possible to stop the computation of the dependence 4126 relation the first time we detect a CHREC_KNOWN element for a given 4127 subscript. */ 4128 4129 void 4130 compute_affine_dependence (struct data_dependence_relation *ddr, 4131 struct loop *loop_nest) 4132 { 4133 struct data_reference *dra = DDR_A (ddr); 4134 struct data_reference *drb = DDR_B (ddr); 4135 4136 if (dump_file && (dump_flags & TDF_DETAILS)) 4137 { 4138 fprintf (dump_file, "(compute_affine_dependence\n"); 4139 fprintf (dump_file, " stmt_a: "); 4140 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM); 4141 fprintf (dump_file, " stmt_b: "); 4142 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM); 4143 } 4144 4145 /* Analyze only when the dependence relation is not yet known. */ 4146 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 4147 { 4148 dependence_stats.num_dependence_tests++; 4149 4150 if (access_functions_are_affine_or_constant_p (dra, loop_nest) 4151 && access_functions_are_affine_or_constant_p (drb, loop_nest)) 4152 { 4153 subscript_dependence_tester (ddr, loop_nest); 4154 4155 if (flag_check_data_deps) 4156 { 4157 /* Dump the dependences from the first algorithm. */ 4158 if (dump_file && (dump_flags & TDF_DETAILS)) 4159 { 4160 fprintf (dump_file, "\n\nBanerjee Analyzer\n"); 4161 dump_data_dependence_relation (dump_file, ddr); 4162 } 4163 4164 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 4165 { 4166 bool maybe_dependent; 4167 vec<lambda_vector> dir_vects, dist_vects; 4168 4169 /* Save the result of the first DD analyzer. */ 4170 dist_vects = DDR_DIST_VECTS (ddr); 4171 dir_vects = DDR_DIR_VECTS (ddr); 4172 4173 /* Reset the information. */ 4174 DDR_DIST_VECTS (ddr).create (0); 4175 DDR_DIR_VECTS (ddr).create (0); 4176 4177 /* Compute the same information using Omega. */ 4178 if (!init_omega_for_ddr (ddr, &maybe_dependent)) 4179 goto csys_dont_know; 4180 4181 if (dump_file && (dump_flags & TDF_DETAILS)) 4182 { 4183 fprintf (dump_file, "Omega Analyzer\n"); 4184 dump_data_dependence_relation (dump_file, ddr); 4185 } 4186 4187 /* Check that we get the same information. */ 4188 if (maybe_dependent) 4189 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects, 4190 dir_vects)); 4191 } 4192 } 4193 } 4194 4195 /* As a last case, if the dependence cannot be determined, or if 4196 the dependence is considered too difficult to determine, answer 4197 "don't know". */ 4198 else 4199 { 4200 csys_dont_know:; 4201 dependence_stats.num_dependence_undetermined++; 4202 4203 if (dump_file && (dump_flags & TDF_DETAILS)) 4204 { 4205 fprintf (dump_file, "Data ref a:\n"); 4206 dump_data_reference (dump_file, dra); 4207 fprintf (dump_file, "Data ref b:\n"); 4208 dump_data_reference (dump_file, drb); 4209 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n"); 4210 } 4211 finalize_ddr_dependent (ddr, chrec_dont_know); 4212 } 4213 } 4214 4215 if (dump_file && (dump_flags & TDF_DETAILS)) 4216 { 4217 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 4218 fprintf (dump_file, ") -> no dependence\n"); 4219 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 4220 fprintf (dump_file, ") -> dependence analysis failed\n"); 4221 else 4222 fprintf (dump_file, ")\n"); 4223 } 4224 } 4225 4226 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all 4227 the data references in DATAREFS, in the LOOP_NEST. When 4228 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self 4229 relations. Return true when successful, i.e. data references number 4230 is small enough to be handled. */ 4231 4232 bool 4233 compute_all_dependences (vec<data_reference_p> datarefs, 4234 vec<ddr_p> *dependence_relations, 4235 vec<loop_p> loop_nest, 4236 bool compute_self_and_rr) 4237 { 4238 struct data_dependence_relation *ddr; 4239 struct data_reference *a, *b; 4240 unsigned int i, j; 4241 4242 if ((int) datarefs.length () 4243 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) 4244 { 4245 struct data_dependence_relation *ddr; 4246 4247 /* Insert a single relation into dependence_relations: 4248 chrec_dont_know. */ 4249 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest); 4250 dependence_relations->safe_push (ddr); 4251 return false; 4252 } 4253 4254 FOR_EACH_VEC_ELT (datarefs, i, a) 4255 for (j = i + 1; datarefs.iterate (j, &b); j++) 4256 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr) 4257 { 4258 ddr = initialize_data_dependence_relation (a, b, loop_nest); 4259 dependence_relations->safe_push (ddr); 4260 if (loop_nest.exists ()) 4261 compute_affine_dependence (ddr, loop_nest[0]); 4262 } 4263 4264 if (compute_self_and_rr) 4265 FOR_EACH_VEC_ELT (datarefs, i, a) 4266 { 4267 ddr = initialize_data_dependence_relation (a, a, loop_nest); 4268 dependence_relations->safe_push (ddr); 4269 if (loop_nest.exists ()) 4270 compute_affine_dependence (ddr, loop_nest[0]); 4271 } 4272 4273 return true; 4274 } 4275 4276 /* Describes a location of a memory reference. */ 4277 4278 typedef struct data_ref_loc_d 4279 { 4280 /* Position of the memory reference. */ 4281 tree *pos; 4282 4283 /* True if the memory reference is read. */ 4284 bool is_read; 4285 } data_ref_loc; 4286 4287 4288 /* Stores the locations of memory references in STMT to REFERENCES. Returns 4289 true if STMT clobbers memory, false otherwise. */ 4290 4291 static bool 4292 get_references_in_stmt (gimple stmt, vec<data_ref_loc> *references) 4293 { 4294 bool clobbers_memory = false; 4295 data_ref_loc ref; 4296 tree *op0, *op1; 4297 enum gimple_code stmt_code = gimple_code (stmt); 4298 4299 references->create (0); 4300 4301 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects. 4302 As we cannot model data-references to not spelled out 4303 accesses give up if they may occur. */ 4304 if ((stmt_code == GIMPLE_CALL 4305 && !(gimple_call_flags (stmt) & ECF_CONST)) 4306 || (stmt_code == GIMPLE_ASM 4307 && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt)))) 4308 clobbers_memory = true; 4309 4310 if (!gimple_vuse (stmt)) 4311 return clobbers_memory; 4312 4313 if (stmt_code == GIMPLE_ASSIGN) 4314 { 4315 tree base; 4316 op0 = gimple_assign_lhs_ptr (stmt); 4317 op1 = gimple_assign_rhs1_ptr (stmt); 4318 4319 if (DECL_P (*op1) 4320 || (REFERENCE_CLASS_P (*op1) 4321 && (base = get_base_address (*op1)) 4322 && TREE_CODE (base) != SSA_NAME)) 4323 { 4324 ref.pos = op1; 4325 ref.is_read = true; 4326 references->safe_push (ref); 4327 } 4328 } 4329 else if (stmt_code == GIMPLE_CALL) 4330 { 4331 unsigned i, n; 4332 4333 op0 = gimple_call_lhs_ptr (stmt); 4334 n = gimple_call_num_args (stmt); 4335 for (i = 0; i < n; i++) 4336 { 4337 op1 = gimple_call_arg_ptr (stmt, i); 4338 4339 if (DECL_P (*op1) 4340 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1))) 4341 { 4342 ref.pos = op1; 4343 ref.is_read = true; 4344 references->safe_push (ref); 4345 } 4346 } 4347 } 4348 else 4349 return clobbers_memory; 4350 4351 if (*op0 4352 && (DECL_P (*op0) 4353 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))) 4354 { 4355 ref.pos = op0; 4356 ref.is_read = false; 4357 references->safe_push (ref); 4358 } 4359 return clobbers_memory; 4360 } 4361 4362 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable 4363 reference, returns false, otherwise returns true. NEST is the outermost 4364 loop of the loop nest in which the references should be analyzed. */ 4365 4366 bool 4367 find_data_references_in_stmt (struct loop *nest, gimple stmt, 4368 vec<data_reference_p> *datarefs) 4369 { 4370 unsigned i; 4371 vec<data_ref_loc> references; 4372 data_ref_loc *ref; 4373 bool ret = true; 4374 data_reference_p dr; 4375 4376 if (get_references_in_stmt (stmt, &references)) 4377 { 4378 references.release (); 4379 return false; 4380 } 4381 4382 FOR_EACH_VEC_ELT (references, i, ref) 4383 { 4384 dr = create_data_ref (nest, loop_containing_stmt (stmt), 4385 *ref->pos, stmt, ref->is_read); 4386 gcc_assert (dr != NULL); 4387 datarefs->safe_push (dr); 4388 } 4389 references.release (); 4390 return ret; 4391 } 4392 4393 /* Stores the data references in STMT to DATAREFS. If there is an 4394 unanalyzable reference, returns false, otherwise returns true. 4395 NEST is the outermost loop of the loop nest in which the references 4396 should be instantiated, LOOP is the loop in which the references 4397 should be analyzed. */ 4398 4399 bool 4400 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt, 4401 vec<data_reference_p> *datarefs) 4402 { 4403 unsigned i; 4404 vec<data_ref_loc> references; 4405 data_ref_loc *ref; 4406 bool ret = true; 4407 data_reference_p dr; 4408 4409 if (get_references_in_stmt (stmt, &references)) 4410 { 4411 references.release (); 4412 return false; 4413 } 4414 4415 FOR_EACH_VEC_ELT (references, i, ref) 4416 { 4417 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read); 4418 gcc_assert (dr != NULL); 4419 datarefs->safe_push (dr); 4420 } 4421 4422 references.release (); 4423 return ret; 4424 } 4425 4426 /* Search the data references in LOOP, and record the information into 4427 DATAREFS. Returns chrec_dont_know when failing to analyze a 4428 difficult case, returns NULL_TREE otherwise. */ 4429 4430 tree 4431 find_data_references_in_bb (struct loop *loop, basic_block bb, 4432 vec<data_reference_p> *datarefs) 4433 { 4434 gimple_stmt_iterator bsi; 4435 4436 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 4437 { 4438 gimple stmt = gsi_stmt (bsi); 4439 4440 if (!find_data_references_in_stmt (loop, stmt, datarefs)) 4441 { 4442 struct data_reference *res; 4443 res = XCNEW (struct data_reference); 4444 datarefs->safe_push (res); 4445 4446 return chrec_dont_know; 4447 } 4448 } 4449 4450 return NULL_TREE; 4451 } 4452 4453 /* Search the data references in LOOP, and record the information into 4454 DATAREFS. Returns chrec_dont_know when failing to analyze a 4455 difficult case, returns NULL_TREE otherwise. 4456 4457 TODO: This function should be made smarter so that it can handle address 4458 arithmetic as if they were array accesses, etc. */ 4459 4460 static tree 4461 find_data_references_in_loop (struct loop *loop, 4462 vec<data_reference_p> *datarefs) 4463 { 4464 basic_block bb, *bbs; 4465 unsigned int i; 4466 4467 bbs = get_loop_body_in_dom_order (loop); 4468 4469 for (i = 0; i < loop->num_nodes; i++) 4470 { 4471 bb = bbs[i]; 4472 4473 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know) 4474 { 4475 free (bbs); 4476 return chrec_dont_know; 4477 } 4478 } 4479 free (bbs); 4480 4481 return NULL_TREE; 4482 } 4483 4484 /* Recursive helper function. */ 4485 4486 static bool 4487 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest) 4488 { 4489 /* Inner loops of the nest should not contain siblings. Example: 4490 when there are two consecutive loops, 4491 4492 | loop_0 4493 | loop_1 4494 | A[{0, +, 1}_1] 4495 | endloop_1 4496 | loop_2 4497 | A[{0, +, 1}_2] 4498 | endloop_2 4499 | endloop_0 4500 4501 the dependence relation cannot be captured by the distance 4502 abstraction. */ 4503 if (loop->next) 4504 return false; 4505 4506 loop_nest->safe_push (loop); 4507 if (loop->inner) 4508 return find_loop_nest_1 (loop->inner, loop_nest); 4509 return true; 4510 } 4511 4512 /* Return false when the LOOP is not well nested. Otherwise return 4513 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will 4514 contain the loops from the outermost to the innermost, as they will 4515 appear in the classic distance vector. */ 4516 4517 bool 4518 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest) 4519 { 4520 loop_nest->safe_push (loop); 4521 if (loop->inner) 4522 return find_loop_nest_1 (loop->inner, loop_nest); 4523 return true; 4524 } 4525 4526 /* Returns true when the data dependences have been computed, false otherwise. 4527 Given a loop nest LOOP, the following vectors are returned: 4528 DATAREFS is initialized to all the array elements contained in this loop, 4529 DEPENDENCE_RELATIONS contains the relations between the data references. 4530 Compute read-read and self relations if 4531 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */ 4532 4533 bool 4534 compute_data_dependences_for_loop (struct loop *loop, 4535 bool compute_self_and_read_read_dependences, 4536 vec<loop_p> *loop_nest, 4537 vec<data_reference_p> *datarefs, 4538 vec<ddr_p> *dependence_relations) 4539 { 4540 bool res = true; 4541 4542 memset (&dependence_stats, 0, sizeof (dependence_stats)); 4543 4544 /* If the loop nest is not well formed, or one of the data references 4545 is not computable, give up without spending time to compute other 4546 dependences. */ 4547 if (!loop 4548 || !find_loop_nest (loop, loop_nest) 4549 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know 4550 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest, 4551 compute_self_and_read_read_dependences)) 4552 res = false; 4553 4554 if (dump_file && (dump_flags & TDF_STATS)) 4555 { 4556 fprintf (dump_file, "Dependence tester statistics:\n"); 4557 4558 fprintf (dump_file, "Number of dependence tests: %d\n", 4559 dependence_stats.num_dependence_tests); 4560 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n", 4561 dependence_stats.num_dependence_dependent); 4562 fprintf (dump_file, "Number of dependence tests classified independent: %d\n", 4563 dependence_stats.num_dependence_independent); 4564 fprintf (dump_file, "Number of undetermined dependence tests: %d\n", 4565 dependence_stats.num_dependence_undetermined); 4566 4567 fprintf (dump_file, "Number of subscript tests: %d\n", 4568 dependence_stats.num_subscript_tests); 4569 fprintf (dump_file, "Number of undetermined subscript tests: %d\n", 4570 dependence_stats.num_subscript_undetermined); 4571 fprintf (dump_file, "Number of same subscript function: %d\n", 4572 dependence_stats.num_same_subscript_function); 4573 4574 fprintf (dump_file, "Number of ziv tests: %d\n", 4575 dependence_stats.num_ziv); 4576 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n", 4577 dependence_stats.num_ziv_dependent); 4578 fprintf (dump_file, "Number of ziv tests returning independent: %d\n", 4579 dependence_stats.num_ziv_independent); 4580 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n", 4581 dependence_stats.num_ziv_unimplemented); 4582 4583 fprintf (dump_file, "Number of siv tests: %d\n", 4584 dependence_stats.num_siv); 4585 fprintf (dump_file, "Number of siv tests returning dependent: %d\n", 4586 dependence_stats.num_siv_dependent); 4587 fprintf (dump_file, "Number of siv tests returning independent: %d\n", 4588 dependence_stats.num_siv_independent); 4589 fprintf (dump_file, "Number of siv tests unimplemented: %d\n", 4590 dependence_stats.num_siv_unimplemented); 4591 4592 fprintf (dump_file, "Number of miv tests: %d\n", 4593 dependence_stats.num_miv); 4594 fprintf (dump_file, "Number of miv tests returning dependent: %d\n", 4595 dependence_stats.num_miv_dependent); 4596 fprintf (dump_file, "Number of miv tests returning independent: %d\n", 4597 dependence_stats.num_miv_independent); 4598 fprintf (dump_file, "Number of miv tests unimplemented: %d\n", 4599 dependence_stats.num_miv_unimplemented); 4600 } 4601 4602 return res; 4603 } 4604 4605 /* Returns true when the data dependences for the basic block BB have been 4606 computed, false otherwise. 4607 DATAREFS is initialized to all the array elements contained in this basic 4608 block, DEPENDENCE_RELATIONS contains the relations between the data 4609 references. Compute read-read and self relations if 4610 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */ 4611 bool 4612 compute_data_dependences_for_bb (basic_block bb, 4613 bool compute_self_and_read_read_dependences, 4614 vec<data_reference_p> *datarefs, 4615 vec<ddr_p> *dependence_relations) 4616 { 4617 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know) 4618 return false; 4619 4620 return compute_all_dependences (*datarefs, dependence_relations, vNULL, 4621 compute_self_and_read_read_dependences); 4622 } 4623 4624 /* Entry point (for testing only). Analyze all the data references 4625 and the dependence relations in LOOP. 4626 4627 The data references are computed first. 4628 4629 A relation on these nodes is represented by a complete graph. Some 4630 of the relations could be of no interest, thus the relations can be 4631 computed on demand. 4632 4633 In the following function we compute all the relations. This is 4634 just a first implementation that is here for: 4635 - for showing how to ask for the dependence relations, 4636 - for the debugging the whole dependence graph, 4637 - for the dejagnu testcases and maintenance. 4638 4639 It is possible to ask only for a part of the graph, avoiding to 4640 compute the whole dependence graph. The computed dependences are 4641 stored in a knowledge base (KB) such that later queries don't 4642 recompute the same information. The implementation of this KB is 4643 transparent to the optimizer, and thus the KB can be changed with a 4644 more efficient implementation, or the KB could be disabled. */ 4645 static void 4646 analyze_all_data_dependences (struct loop *loop) 4647 { 4648 unsigned int i; 4649 int nb_data_refs = 10; 4650 vec<data_reference_p> datarefs; 4651 datarefs.create (nb_data_refs); 4652 vec<ddr_p> dependence_relations; 4653 dependence_relations.create (nb_data_refs * nb_data_refs); 4654 vec<loop_p> loop_nest; 4655 loop_nest.create (3); 4656 4657 /* Compute DDs on the whole function. */ 4658 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs, 4659 &dependence_relations); 4660 4661 if (dump_file) 4662 { 4663 dump_data_dependence_relations (dump_file, dependence_relations); 4664 fprintf (dump_file, "\n\n"); 4665 4666 if (dump_flags & TDF_DETAILS) 4667 dump_dist_dir_vectors (dump_file, dependence_relations); 4668 4669 if (dump_flags & TDF_STATS) 4670 { 4671 unsigned nb_top_relations = 0; 4672 unsigned nb_bot_relations = 0; 4673 unsigned nb_chrec_relations = 0; 4674 struct data_dependence_relation *ddr; 4675 4676 FOR_EACH_VEC_ELT (dependence_relations, i, ddr) 4677 { 4678 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr))) 4679 nb_top_relations++; 4680 4681 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 4682 nb_bot_relations++; 4683 4684 else 4685 nb_chrec_relations++; 4686 } 4687 4688 gather_stats_on_scev_database (); 4689 } 4690 } 4691 4692 loop_nest.release (); 4693 free_dependence_relations (dependence_relations); 4694 free_data_refs (datarefs); 4695 } 4696 4697 /* Computes all the data dependences and check that the results of 4698 several analyzers are the same. */ 4699 4700 void 4701 tree_check_data_deps (void) 4702 { 4703 loop_iterator li; 4704 struct loop *loop_nest; 4705 4706 FOR_EACH_LOOP (li, loop_nest, 0) 4707 analyze_all_data_dependences (loop_nest); 4708 } 4709 4710 /* Free the memory used by a data dependence relation DDR. */ 4711 4712 void 4713 free_dependence_relation (struct data_dependence_relation *ddr) 4714 { 4715 if (ddr == NULL) 4716 return; 4717 4718 if (DDR_SUBSCRIPTS (ddr).exists ()) 4719 free_subscripts (DDR_SUBSCRIPTS (ddr)); 4720 DDR_DIST_VECTS (ddr).release (); 4721 DDR_DIR_VECTS (ddr).release (); 4722 4723 free (ddr); 4724 } 4725 4726 /* Free the memory used by the data dependence relations from 4727 DEPENDENCE_RELATIONS. */ 4728 4729 void 4730 free_dependence_relations (vec<ddr_p> dependence_relations) 4731 { 4732 unsigned int i; 4733 struct data_dependence_relation *ddr; 4734 4735 FOR_EACH_VEC_ELT (dependence_relations, i, ddr) 4736 if (ddr) 4737 free_dependence_relation (ddr); 4738 4739 dependence_relations.release (); 4740 } 4741 4742 /* Free the memory used by the data references from DATAREFS. */ 4743 4744 void 4745 free_data_refs (vec<data_reference_p> datarefs) 4746 { 4747 unsigned int i; 4748 struct data_reference *dr; 4749 4750 FOR_EACH_VEC_ELT (datarefs, i, dr) 4751 free_data_ref (dr); 4752 datarefs.release (); 4753 } 4754 4755 4756 4757 /* Dump vertex I in RDG to FILE. */ 4758 4759 static void 4760 dump_rdg_vertex (FILE *file, struct graph *rdg, int i) 4761 { 4762 struct vertex *v = &(rdg->vertices[i]); 4763 struct graph_edge *e; 4764 4765 fprintf (file, "(vertex %d: (%s%s) (in:", i, 4766 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", 4767 RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); 4768 4769 if (v->pred) 4770 for (e = v->pred; e; e = e->pred_next) 4771 fprintf (file, " %d", e->src); 4772 4773 fprintf (file, ") (out:"); 4774 4775 if (v->succ) 4776 for (e = v->succ; e; e = e->succ_next) 4777 fprintf (file, " %d", e->dest); 4778 4779 fprintf (file, ")\n"); 4780 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); 4781 fprintf (file, ")\n"); 4782 } 4783 4784 /* Call dump_rdg_vertex on stderr. */ 4785 4786 DEBUG_FUNCTION void 4787 debug_rdg_vertex (struct graph *rdg, int i) 4788 { 4789 dump_rdg_vertex (stderr, rdg, i); 4790 } 4791 4792 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the 4793 dumped vertices to that bitmap. */ 4794 4795 static void 4796 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped) 4797 { 4798 int i; 4799 4800 fprintf (file, "(%d\n", c); 4801 4802 for (i = 0; i < rdg->n_vertices; i++) 4803 if (rdg->vertices[i].component == c) 4804 { 4805 if (dumped) 4806 bitmap_set_bit (dumped, i); 4807 4808 dump_rdg_vertex (file, rdg, i); 4809 } 4810 4811 fprintf (file, ")\n"); 4812 } 4813 4814 /* Call dump_rdg_vertex on stderr. */ 4815 4816 DEBUG_FUNCTION void 4817 debug_rdg_component (struct graph *rdg, int c) 4818 { 4819 dump_rdg_component (stderr, rdg, c, NULL); 4820 } 4821 4822 /* Dump the reduced dependence graph RDG to FILE. */ 4823 4824 void 4825 dump_rdg (FILE *file, struct graph *rdg) 4826 { 4827 int i; 4828 bitmap dumped = BITMAP_ALLOC (NULL); 4829 4830 fprintf (file, "(rdg\n"); 4831 4832 for (i = 0; i < rdg->n_vertices; i++) 4833 if (!bitmap_bit_p (dumped, i)) 4834 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped); 4835 4836 fprintf (file, ")\n"); 4837 BITMAP_FREE (dumped); 4838 } 4839 4840 /* Call dump_rdg on stderr. */ 4841 4842 DEBUG_FUNCTION void 4843 debug_rdg (struct graph *rdg) 4844 { 4845 dump_rdg (stderr, rdg); 4846 } 4847 4848 static void 4849 dot_rdg_1 (FILE *file, struct graph *rdg) 4850 { 4851 int i; 4852 4853 fprintf (file, "digraph RDG {\n"); 4854 4855 for (i = 0; i < rdg->n_vertices; i++) 4856 { 4857 struct vertex *v = &(rdg->vertices[i]); 4858 struct graph_edge *e; 4859 4860 /* Highlight reads from memory. */ 4861 if (RDG_MEM_READS_STMT (rdg, i)) 4862 fprintf (file, "%d [style=filled, fillcolor=green]\n", i); 4863 4864 /* Highlight stores to memory. */ 4865 if (RDG_MEM_WRITE_STMT (rdg, i)) 4866 fprintf (file, "%d [style=filled, fillcolor=red]\n", i); 4867 4868 if (v->succ) 4869 for (e = v->succ; e; e = e->succ_next) 4870 switch (RDGE_TYPE (e)) 4871 { 4872 case input_dd: 4873 fprintf (file, "%d -> %d [label=input] \n", i, e->dest); 4874 break; 4875 4876 case output_dd: 4877 fprintf (file, "%d -> %d [label=output] \n", i, e->dest); 4878 break; 4879 4880 case flow_dd: 4881 /* These are the most common dependences: don't print these. */ 4882 fprintf (file, "%d -> %d \n", i, e->dest); 4883 break; 4884 4885 case anti_dd: 4886 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest); 4887 break; 4888 4889 default: 4890 gcc_unreachable (); 4891 } 4892 } 4893 4894 fprintf (file, "}\n\n"); 4895 } 4896 4897 /* Display the Reduced Dependence Graph using dotty. */ 4898 extern void dot_rdg (struct graph *); 4899 4900 DEBUG_FUNCTION void 4901 dot_rdg (struct graph *rdg) 4902 { 4903 /* When debugging, enable the following code. This cannot be used 4904 in production compilers because it calls "system". */ 4905 #if 0 4906 FILE *file = fopen ("/tmp/rdg.dot", "w"); 4907 gcc_assert (file != NULL); 4908 4909 dot_rdg_1 (file, rdg); 4910 fclose (file); 4911 4912 system ("dotty /tmp/rdg.dot &"); 4913 #else 4914 dot_rdg_1 (stderr, rdg); 4915 #endif 4916 } 4917 4918 /* Returns the index of STMT in RDG. */ 4919 4920 int 4921 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt) 4922 { 4923 int index = gimple_uid (stmt); 4924 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); 4925 return index; 4926 } 4927 4928 /* Creates an edge in RDG for each distance vector from DDR. The 4929 order that we keep track of in the RDG is the order in which 4930 statements have to be executed. */ 4931 4932 static void 4933 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr) 4934 { 4935 struct graph_edge *e; 4936 int va, vb; 4937 data_reference_p dra = DDR_A (ddr); 4938 data_reference_p drb = DDR_B (ddr); 4939 unsigned level = ddr_dependence_level (ddr); 4940 4941 /* For non scalar dependences, when the dependence is REVERSED, 4942 statement B has to be executed before statement A. */ 4943 if (level > 0 4944 && !DDR_REVERSED_P (ddr)) 4945 { 4946 data_reference_p tmp = dra; 4947 dra = drb; 4948 drb = tmp; 4949 } 4950 4951 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra)); 4952 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb)); 4953 4954 if (va < 0 || vb < 0) 4955 return; 4956 4957 e = add_edge (rdg, va, vb); 4958 e->data = XNEW (struct rdg_edge); 4959 4960 RDGE_LEVEL (e) = level; 4961 RDGE_RELATION (e) = ddr; 4962 4963 /* Determines the type of the data dependence. */ 4964 if (DR_IS_READ (dra) && DR_IS_READ (drb)) 4965 RDGE_TYPE (e) = input_dd; 4966 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)) 4967 RDGE_TYPE (e) = output_dd; 4968 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb)) 4969 RDGE_TYPE (e) = flow_dd; 4970 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb)) 4971 RDGE_TYPE (e) = anti_dd; 4972 } 4973 4974 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is 4975 the index of DEF in RDG. */ 4976 4977 static void 4978 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) 4979 { 4980 use_operand_p imm_use_p; 4981 imm_use_iterator iterator; 4982 4983 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) 4984 { 4985 struct graph_edge *e; 4986 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); 4987 4988 if (use < 0) 4989 continue; 4990 4991 e = add_edge (rdg, idef, use); 4992 e->data = XNEW (struct rdg_edge); 4993 RDGE_TYPE (e) = flow_dd; 4994 RDGE_RELATION (e) = NULL; 4995 } 4996 } 4997 4998 /* Creates the edges of the reduced dependence graph RDG. */ 4999 5000 static void 5001 create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs) 5002 { 5003 int i; 5004 struct data_dependence_relation *ddr; 5005 def_operand_p def_p; 5006 ssa_op_iter iter; 5007 5008 FOR_EACH_VEC_ELT (ddrs, i, ddr) 5009 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 5010 create_rdg_edge_for_ddr (rdg, ddr); 5011 5012 for (i = 0; i < rdg->n_vertices; i++) 5013 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), 5014 iter, SSA_OP_DEF) 5015 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); 5016 } 5017 5018 /* Build the vertices of the reduced dependence graph RDG. */ 5019 5020 static void 5021 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop) 5022 { 5023 int i, j; 5024 gimple stmt; 5025 5026 FOR_EACH_VEC_ELT (stmts, i, stmt) 5027 { 5028 vec<data_ref_loc> references; 5029 data_ref_loc *ref; 5030 struct vertex *v = &(rdg->vertices[i]); 5031 5032 /* Record statement to vertex mapping. */ 5033 gimple_set_uid (stmt, i); 5034 5035 v->data = XNEW (struct rdg_vertex); 5036 RDGV_STMT (v) = stmt; 5037 RDGV_DATAREFS (v).create (0); 5038 RDGV_HAS_MEM_WRITE (v) = false; 5039 RDGV_HAS_MEM_READS (v) = false; 5040 if (gimple_code (stmt) == GIMPLE_PHI) 5041 continue; 5042 5043 get_references_in_stmt (stmt, &references); 5044 FOR_EACH_VEC_ELT (references, j, ref) 5045 { 5046 data_reference_p dr; 5047 if (!ref->is_read) 5048 RDGV_HAS_MEM_WRITE (v) = true; 5049 else 5050 RDGV_HAS_MEM_READS (v) = true; 5051 dr = create_data_ref (loop, loop_containing_stmt (stmt), 5052 *ref->pos, stmt, ref->is_read); 5053 if (dr) 5054 RDGV_DATAREFS (v).safe_push (dr); 5055 } 5056 references.release (); 5057 } 5058 } 5059 5060 /* Initialize STMTS with all the statements of LOOP. When 5061 INCLUDE_PHIS is true, include also the PHI nodes. The order in 5062 which we discover statements is important as 5063 generate_loops_for_partition is using the same traversal for 5064 identifying statements. */ 5065 5066 static void 5067 stmts_from_loop (struct loop *loop, vec<gimple> *stmts) 5068 { 5069 unsigned int i; 5070 basic_block *bbs = get_loop_body_in_dom_order (loop); 5071 5072 for (i = 0; i < loop->num_nodes; i++) 5073 { 5074 basic_block bb = bbs[i]; 5075 gimple_stmt_iterator bsi; 5076 gimple stmt; 5077 5078 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 5079 stmts->safe_push (gsi_stmt (bsi)); 5080 5081 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 5082 { 5083 stmt = gsi_stmt (bsi); 5084 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) 5085 stmts->safe_push (stmt); 5086 } 5087 } 5088 5089 free (bbs); 5090 } 5091 5092 /* Returns true when all the dependences are computable. */ 5093 5094 static bool 5095 known_dependences_p (vec<ddr_p> dependence_relations) 5096 { 5097 ddr_p ddr; 5098 unsigned int i; 5099 5100 FOR_EACH_VEC_ELT (dependence_relations, i, ddr) 5101 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 5102 return false; 5103 5104 return true; 5105 } 5106 5107 /* Build the Reduced Dependence Graph (RDG) with one vertex per 5108 statement of the loop nest, and one edge per data dependence or 5109 scalar dependence. */ 5110 5111 struct graph * 5112 build_empty_rdg (int n_stmts) 5113 { 5114 struct graph *rdg = new_graph (n_stmts); 5115 return rdg; 5116 } 5117 5118 /* Build the Reduced Dependence Graph (RDG) with one vertex per 5119 statement of the loop nest, and one edge per data dependence or 5120 scalar dependence. */ 5121 5122 struct graph * 5123 build_rdg (struct loop *loop, 5124 vec<loop_p> *loop_nest, 5125 vec<ddr_p> *dependence_relations, 5126 vec<data_reference_p> *datarefs) 5127 { 5128 struct graph *rdg = NULL; 5129 5130 if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs, 5131 dependence_relations) 5132 && known_dependences_p (*dependence_relations)) 5133 { 5134 vec<gimple> stmts; 5135 stmts.create (10); 5136 stmts_from_loop (loop, &stmts); 5137 rdg = build_empty_rdg (stmts.length ()); 5138 create_rdg_vertices (rdg, stmts, loop); 5139 create_rdg_edges (rdg, *dependence_relations); 5140 stmts.release (); 5141 } 5142 5143 return rdg; 5144 } 5145 5146 /* Free the reduced dependence graph RDG. */ 5147 5148 void 5149 free_rdg (struct graph *rdg) 5150 { 5151 int i; 5152 5153 for (i = 0; i < rdg->n_vertices; i++) 5154 { 5155 struct vertex *v = &(rdg->vertices[i]); 5156 struct graph_edge *e; 5157 5158 for (e = v->succ; e; e = e->succ_next) 5159 free (e->data); 5160 5161 gimple_set_uid (RDGV_STMT (v), -1); 5162 free_data_refs (RDGV_DATAREFS (v)); 5163 free (v->data); 5164 } 5165 5166 free_graph (rdg); 5167 } 5168 5169 /* Determines whether the statement from vertex V of the RDG has a 5170 definition used outside the loop that contains this statement. */ 5171 5172 bool 5173 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v) 5174 { 5175 gimple stmt = RDG_STMT (rdg, v); 5176 struct loop *loop = loop_containing_stmt (stmt); 5177 use_operand_p imm_use_p; 5178 imm_use_iterator iterator; 5179 ssa_op_iter it; 5180 def_operand_p def_p; 5181 5182 if (!loop) 5183 return true; 5184 5185 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF) 5186 { 5187 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p)) 5188 { 5189 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop) 5190 return true; 5191 } 5192 } 5193 5194 return false; 5195 } 5196