1 /* Loop distribution. 2 Copyright (C) 2006-2015 Free Software Foundation, Inc. 3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> 4 and Sebastian Pop <sebastian.pop@amd.com>. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 3, or (at your option) any 11 later version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 /* This pass performs loop distribution: for example, the loop 23 24 |DO I = 2, N 25 | A(I) = B(I) + C 26 | D(I) = A(I-1)*E 27 |ENDDO 28 29 is transformed to 30 31 |DOALL I = 2, N 32 | A(I) = B(I) + C 33 |ENDDO 34 | 35 |DOALL I = 2, N 36 | D(I) = A(I-1)*E 37 |ENDDO 38 39 This pass uses an RDG, Reduced Dependence Graph built on top of the 40 data dependence relations. The RDG is then topologically sorted to 41 obtain a map of information producers/consumers based on which it 42 generates the new loops. */ 43 44 #include "config.h" 45 #include "system.h" 46 #include "coretypes.h" 47 #include "hash-set.h" 48 #include "machmode.h" 49 #include "vec.h" 50 #include "double-int.h" 51 #include "input.h" 52 #include "alias.h" 53 #include "symtab.h" 54 #include "options.h" 55 #include "wide-int.h" 56 #include "inchash.h" 57 #include "tree.h" 58 #include "fold-const.h" 59 #include "predict.h" 60 #include "tm.h" 61 #include "hard-reg-set.h" 62 #include "input.h" 63 #include "function.h" 64 #include "dominance.h" 65 #include "cfg.h" 66 #include "cfganal.h" 67 #include "basic-block.h" 68 #include "tree-ssa-alias.h" 69 #include "internal-fn.h" 70 #include "gimple-expr.h" 71 #include "is-a.h" 72 #include "gimple.h" 73 #include "gimple-iterator.h" 74 #include "gimplify-me.h" 75 #include "stor-layout.h" 76 #include "gimple-ssa.h" 77 #include "tree-cfg.h" 78 #include "tree-phinodes.h" 79 #include "ssa-iterators.h" 80 #include "stringpool.h" 81 #include "tree-ssanames.h" 82 #include "tree-ssa-loop-manip.h" 83 #include "tree-ssa-loop.h" 84 #include "tree-into-ssa.h" 85 #include "tree-ssa.h" 86 #include "cfgloop.h" 87 #include "tree-chrec.h" 88 #include "tree-data-ref.h" 89 #include "tree-scalar-evolution.h" 90 #include "tree-pass.h" 91 #include "gimple-pretty-print.h" 92 #include "tree-vectorizer.h" 93 #include "real.h" 94 95 96 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ 97 typedef struct rdg_vertex 98 { 99 /* The statement represented by this vertex. */ 100 gimple stmt; 101 102 /* Vector of data-references in this statement. */ 103 vec<data_reference_p> datarefs; 104 105 /* True when the statement contains a write to memory. */ 106 bool has_mem_write; 107 108 /* True when the statement contains a read from memory. */ 109 bool has_mem_reads; 110 } *rdg_vertex_p; 111 112 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt 113 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs 114 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write 115 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads 116 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I])) 117 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I])) 118 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) 119 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) 120 121 /* Data dependence type. */ 122 123 enum rdg_dep_type 124 { 125 /* Read After Write (RAW). */ 126 flow_dd = 'f', 127 128 /* Control dependence (execute conditional on). */ 129 control_dd = 'c' 130 }; 131 132 /* Dependence information attached to an edge of the RDG. */ 133 134 typedef struct rdg_edge 135 { 136 /* Type of the dependence. */ 137 enum rdg_dep_type type; 138 } *rdg_edge_p; 139 140 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type 141 142 /* Dump vertex I in RDG to FILE. */ 143 144 static void 145 dump_rdg_vertex (FILE *file, struct graph *rdg, int i) 146 { 147 struct vertex *v = &(rdg->vertices[i]); 148 struct graph_edge *e; 149 150 fprintf (file, "(vertex %d: (%s%s) (in:", i, 151 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", 152 RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); 153 154 if (v->pred) 155 for (e = v->pred; e; e = e->pred_next) 156 fprintf (file, " %d", e->src); 157 158 fprintf (file, ") (out:"); 159 160 if (v->succ) 161 for (e = v->succ; e; e = e->succ_next) 162 fprintf (file, " %d", e->dest); 163 164 fprintf (file, ")\n"); 165 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); 166 fprintf (file, ")\n"); 167 } 168 169 /* Call dump_rdg_vertex on stderr. */ 170 171 DEBUG_FUNCTION void 172 debug_rdg_vertex (struct graph *rdg, int i) 173 { 174 dump_rdg_vertex (stderr, rdg, i); 175 } 176 177 /* Dump the reduced dependence graph RDG to FILE. */ 178 179 static void 180 dump_rdg (FILE *file, struct graph *rdg) 181 { 182 fprintf (file, "(rdg\n"); 183 for (int i = 0; i < rdg->n_vertices; i++) 184 dump_rdg_vertex (file, rdg, i); 185 fprintf (file, ")\n"); 186 } 187 188 /* Call dump_rdg on stderr. */ 189 190 DEBUG_FUNCTION void 191 debug_rdg (struct graph *rdg) 192 { 193 dump_rdg (stderr, rdg); 194 } 195 196 static void 197 dot_rdg_1 (FILE *file, struct graph *rdg) 198 { 199 int i; 200 pretty_printer buffer; 201 pp_needs_newline (&buffer) = false; 202 buffer.buffer->stream = file; 203 204 fprintf (file, "digraph RDG {\n"); 205 206 for (i = 0; i < rdg->n_vertices; i++) 207 { 208 struct vertex *v = &(rdg->vertices[i]); 209 struct graph_edge *e; 210 211 fprintf (file, "%d [label=\"[%d] ", i, i); 212 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); 213 pp_flush (&buffer); 214 fprintf (file, "\"]\n"); 215 216 /* Highlight reads from memory. */ 217 if (RDG_MEM_READS_STMT (rdg, i)) 218 fprintf (file, "%d [style=filled, fillcolor=green]\n", i); 219 220 /* Highlight stores to memory. */ 221 if (RDG_MEM_WRITE_STMT (rdg, i)) 222 fprintf (file, "%d [style=filled, fillcolor=red]\n", i); 223 224 if (v->succ) 225 for (e = v->succ; e; e = e->succ_next) 226 switch (RDGE_TYPE (e)) 227 { 228 case flow_dd: 229 /* These are the most common dependences: don't print these. */ 230 fprintf (file, "%d -> %d \n", i, e->dest); 231 break; 232 233 case control_dd: 234 fprintf (file, "%d -> %d [label=control] \n", i, e->dest); 235 break; 236 237 default: 238 gcc_unreachable (); 239 } 240 } 241 242 fprintf (file, "}\n\n"); 243 } 244 245 /* Display the Reduced Dependence Graph using dotty. */ 246 247 DEBUG_FUNCTION void 248 dot_rdg (struct graph *rdg) 249 { 250 /* When debugging, you may want to enable the following code. */ 251 #ifdef HAVE_POPEN 252 FILE *file = popen ("dot -Tx11", "w"); 253 if (!file) 254 return; 255 dot_rdg_1 (file, rdg); 256 fflush (file); 257 close (fileno (file)); 258 pclose (file); 259 #else 260 dot_rdg_1 (stderr, rdg); 261 #endif 262 } 263 264 /* Returns the index of STMT in RDG. */ 265 266 static int 267 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt) 268 { 269 int index = gimple_uid (stmt); 270 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); 271 return index; 272 } 273 274 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is 275 the index of DEF in RDG. */ 276 277 static void 278 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) 279 { 280 use_operand_p imm_use_p; 281 imm_use_iterator iterator; 282 283 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) 284 { 285 struct graph_edge *e; 286 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); 287 288 if (use < 0) 289 continue; 290 291 e = add_edge (rdg, idef, use); 292 e->data = XNEW (struct rdg_edge); 293 RDGE_TYPE (e) = flow_dd; 294 } 295 } 296 297 /* Creates an edge for the control dependences of BB to the vertex V. */ 298 299 static void 300 create_edge_for_control_dependence (struct graph *rdg, basic_block bb, 301 int v, control_dependences *cd) 302 { 303 bitmap_iterator bi; 304 unsigned edge_n; 305 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), 306 0, edge_n, bi) 307 { 308 basic_block cond_bb = cd->get_edge (edge_n)->src; 309 gimple stmt = last_stmt (cond_bb); 310 if (stmt && is_ctrl_stmt (stmt)) 311 { 312 struct graph_edge *e; 313 int c = rdg_vertex_for_stmt (rdg, stmt); 314 if (c < 0) 315 continue; 316 317 e = add_edge (rdg, c, v); 318 e->data = XNEW (struct rdg_edge); 319 RDGE_TYPE (e) = control_dd; 320 } 321 } 322 } 323 324 /* Creates the edges of the reduced dependence graph RDG. */ 325 326 static void 327 create_rdg_flow_edges (struct graph *rdg) 328 { 329 int i; 330 def_operand_p def_p; 331 ssa_op_iter iter; 332 333 for (i = 0; i < rdg->n_vertices; i++) 334 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), 335 iter, SSA_OP_DEF) 336 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); 337 } 338 339 /* Creates the edges of the reduced dependence graph RDG. */ 340 341 static void 342 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd) 343 { 344 int i; 345 346 for (i = 0; i < rdg->n_vertices; i++) 347 { 348 gimple stmt = RDG_STMT (rdg, i); 349 if (gimple_code (stmt) == GIMPLE_PHI) 350 { 351 edge_iterator ei; 352 edge e; 353 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) 354 create_edge_for_control_dependence (rdg, e->src, i, cd); 355 } 356 else 357 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); 358 } 359 } 360 361 /* Build the vertices of the reduced dependence graph RDG. Return false 362 if that failed. */ 363 364 static bool 365 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop, 366 vec<data_reference_p> *datarefs) 367 { 368 int i; 369 gimple stmt; 370 371 FOR_EACH_VEC_ELT (stmts, i, stmt) 372 { 373 struct vertex *v = &(rdg->vertices[i]); 374 375 /* Record statement to vertex mapping. */ 376 gimple_set_uid (stmt, i); 377 378 v->data = XNEW (struct rdg_vertex); 379 RDGV_STMT (v) = stmt; 380 RDGV_DATAREFS (v).create (0); 381 RDGV_HAS_MEM_WRITE (v) = false; 382 RDGV_HAS_MEM_READS (v) = false; 383 if (gimple_code (stmt) == GIMPLE_PHI) 384 continue; 385 386 unsigned drp = datarefs->length (); 387 if (!find_data_references_in_stmt (loop, stmt, datarefs)) 388 return false; 389 for (unsigned j = drp; j < datarefs->length (); ++j) 390 { 391 data_reference_p dr = (*datarefs)[j]; 392 if (DR_IS_READ (dr)) 393 RDGV_HAS_MEM_READS (v) = true; 394 else 395 RDGV_HAS_MEM_WRITE (v) = true; 396 RDGV_DATAREFS (v).safe_push (dr); 397 } 398 } 399 return true; 400 } 401 402 /* Initialize STMTS with all the statements of LOOP. The order in 403 which we discover statements is important as 404 generate_loops_for_partition is using the same traversal for 405 identifying statements in loop copies. */ 406 407 static void 408 stmts_from_loop (struct loop *loop, vec<gimple> *stmts) 409 { 410 unsigned int i; 411 basic_block *bbs = get_loop_body_in_dom_order (loop); 412 413 for (i = 0; i < loop->num_nodes; i++) 414 { 415 basic_block bb = bbs[i]; 416 417 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); 418 gsi_next (&bsi)) 419 if (!virtual_operand_p (gimple_phi_result (bsi.phi ()))) 420 stmts->safe_push (bsi.phi ()); 421 422 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); 423 gsi_next (&bsi)) 424 { 425 gimple stmt = gsi_stmt (bsi); 426 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) 427 stmts->safe_push (stmt); 428 } 429 } 430 431 free (bbs); 432 } 433 434 /* Free the reduced dependence graph RDG. */ 435 436 static void 437 free_rdg (struct graph *rdg) 438 { 439 int i; 440 441 for (i = 0; i < rdg->n_vertices; i++) 442 { 443 struct vertex *v = &(rdg->vertices[i]); 444 struct graph_edge *e; 445 446 for (e = v->succ; e; e = e->succ_next) 447 free (e->data); 448 449 if (v->data) 450 { 451 gimple_set_uid (RDGV_STMT (v), -1); 452 free_data_refs (RDGV_DATAREFS (v)); 453 free (v->data); 454 } 455 } 456 457 free_graph (rdg); 458 } 459 460 /* Build the Reduced Dependence Graph (RDG) with one vertex per 461 statement of the loop nest LOOP_NEST, and one edge per data dependence or 462 scalar dependence. */ 463 464 static struct graph * 465 build_rdg (vec<loop_p> loop_nest, control_dependences *cd) 466 { 467 struct graph *rdg; 468 vec<data_reference_p> datarefs; 469 470 /* Create the RDG vertices from the stmts of the loop nest. */ 471 auto_vec<gimple, 10> stmts; 472 stmts_from_loop (loop_nest[0], &stmts); 473 rdg = new_graph (stmts.length ()); 474 datarefs.create (10); 475 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs)) 476 { 477 datarefs.release (); 478 free_rdg (rdg); 479 return NULL; 480 } 481 stmts.release (); 482 483 create_rdg_flow_edges (rdg); 484 if (cd) 485 create_rdg_cd_edges (rdg, cd); 486 487 datarefs.release (); 488 489 return rdg; 490 } 491 492 493 494 enum partition_kind { 495 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY 496 }; 497 498 typedef struct partition_s 499 { 500 bitmap stmts; 501 bitmap loops; 502 bool reduction_p; 503 enum partition_kind kind; 504 /* data-references a kind != PKIND_NORMAL partition is about. */ 505 data_reference_p main_dr; 506 data_reference_p secondary_dr; 507 tree niter; 508 bool plus_one; 509 } *partition_t; 510 511 512 /* Allocate and initialize a partition from BITMAP. */ 513 514 static partition_t 515 partition_alloc (bitmap stmts, bitmap loops) 516 { 517 partition_t partition = XCNEW (struct partition_s); 518 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); 519 partition->loops = loops ? loops : BITMAP_ALLOC (NULL); 520 partition->reduction_p = false; 521 partition->kind = PKIND_NORMAL; 522 return partition; 523 } 524 525 /* Free PARTITION. */ 526 527 static void 528 partition_free (partition_t partition) 529 { 530 BITMAP_FREE (partition->stmts); 531 BITMAP_FREE (partition->loops); 532 free (partition); 533 } 534 535 /* Returns true if the partition can be generated as a builtin. */ 536 537 static bool 538 partition_builtin_p (partition_t partition) 539 { 540 return partition->kind != PKIND_NORMAL; 541 } 542 543 /* Returns true if the partition contains a reduction. */ 544 545 static bool 546 partition_reduction_p (partition_t partition) 547 { 548 return partition->reduction_p; 549 } 550 551 /* Merge PARTITION into the partition DEST. */ 552 553 static void 554 partition_merge_into (partition_t dest, partition_t partition) 555 { 556 dest->kind = PKIND_NORMAL; 557 bitmap_ior_into (dest->stmts, partition->stmts); 558 if (partition_reduction_p (partition)) 559 dest->reduction_p = true; 560 } 561 562 563 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after 564 the LOOP. */ 565 566 static bool 567 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) 568 { 569 imm_use_iterator imm_iter; 570 use_operand_p use_p; 571 572 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) 573 { 574 gimple use_stmt = USE_STMT (use_p); 575 if (!is_gimple_debug (use_stmt) 576 && loop != loop_containing_stmt (use_stmt)) 577 return true; 578 } 579 580 return false; 581 } 582 583 /* Returns true when STMT defines a scalar variable used after the 584 loop LOOP. */ 585 586 static bool 587 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt) 588 { 589 def_operand_p def_p; 590 ssa_op_iter op_iter; 591 592 if (gimple_code (stmt) == GIMPLE_PHI) 593 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop); 594 595 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) 596 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop)) 597 return true; 598 599 return false; 600 } 601 602 /* Return a copy of LOOP placed before LOOP. */ 603 604 static struct loop * 605 copy_loop_before (struct loop *loop) 606 { 607 struct loop *res; 608 edge preheader = loop_preheader_edge (loop); 609 610 initialize_original_copy_tables (); 611 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); 612 gcc_assert (res != NULL); 613 free_original_copy_tables (); 614 delete_update_ssa (); 615 616 return res; 617 } 618 619 /* Creates an empty basic block after LOOP. */ 620 621 static void 622 create_bb_after_loop (struct loop *loop) 623 { 624 edge exit = single_exit (loop); 625 626 if (!exit) 627 return; 628 629 split_edge (exit); 630 } 631 632 /* Generate code for PARTITION from the code in LOOP. The loop is 633 copied when COPY_P is true. All the statements not flagged in the 634 PARTITION bitmap are removed from the loop or from its copy. The 635 statements are indexed in sequence inside a basic block, and the 636 basic blocks of a loop are taken in dom order. */ 637 638 static void 639 generate_loops_for_partition (struct loop *loop, partition_t partition, 640 bool copy_p) 641 { 642 unsigned i; 643 basic_block *bbs; 644 645 if (copy_p) 646 { 647 loop = copy_loop_before (loop); 648 gcc_assert (loop != NULL); 649 create_preheader (loop, CP_SIMPLE_PREHEADERS); 650 create_bb_after_loop (loop); 651 } 652 653 /* Remove stmts not in the PARTITION bitmap. */ 654 bbs = get_loop_body_in_dom_order (loop); 655 656 if (MAY_HAVE_DEBUG_STMTS) 657 for (i = 0; i < loop->num_nodes; i++) 658 { 659 basic_block bb = bbs[i]; 660 661 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); 662 gsi_next (&bsi)) 663 { 664 gphi *phi = bsi.phi (); 665 if (!virtual_operand_p (gimple_phi_result (phi)) 666 && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) 667 reset_debug_uses (phi); 668 } 669 670 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 671 { 672 gimple stmt = gsi_stmt (bsi); 673 if (gimple_code (stmt) != GIMPLE_LABEL 674 && !is_gimple_debug (stmt) 675 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) 676 reset_debug_uses (stmt); 677 } 678 } 679 680 for (i = 0; i < loop->num_nodes; i++) 681 { 682 basic_block bb = bbs[i]; 683 684 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) 685 { 686 gphi *phi = bsi.phi (); 687 if (!virtual_operand_p (gimple_phi_result (phi)) 688 && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) 689 remove_phi_node (&bsi, true); 690 else 691 gsi_next (&bsi); 692 } 693 694 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) 695 { 696 gimple stmt = gsi_stmt (bsi); 697 if (gimple_code (stmt) != GIMPLE_LABEL 698 && !is_gimple_debug (stmt) 699 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) 700 { 701 /* Choose an arbitrary path through the empty CFG part 702 that this unnecessary control stmt controls. */ 703 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt)) 704 { 705 gimple_cond_make_false (cond_stmt); 706 update_stmt (stmt); 707 } 708 else if (gimple_code (stmt) == GIMPLE_SWITCH) 709 { 710 gswitch *switch_stmt = as_a <gswitch *> (stmt); 711 gimple_switch_set_index 712 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1))); 713 update_stmt (stmt); 714 } 715 else 716 { 717 unlink_stmt_vdef (stmt); 718 gsi_remove (&bsi, true); 719 release_defs (stmt); 720 continue; 721 } 722 } 723 gsi_next (&bsi); 724 } 725 } 726 727 free (bbs); 728 } 729 730 /* Build the size argument for a memory operation call. */ 731 732 static tree 733 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter, 734 bool plus_one) 735 { 736 tree size = fold_convert_loc (loc, sizetype, nb_iter); 737 if (plus_one) 738 size = size_binop (PLUS_EXPR, size, size_one_node); 739 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size, 740 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 741 size = fold_convert_loc (loc, size_type_node, size); 742 return size; 743 } 744 745 /* Build an address argument for a memory operation call. */ 746 747 static tree 748 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) 749 { 750 tree addr_base; 751 752 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); 753 addr_base = fold_convert_loc (loc, sizetype, addr_base); 754 755 /* Test for a negative stride, iterating over every element. */ 756 if (tree_int_cst_sgn (DR_STEP (dr)) == -1) 757 { 758 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, 759 fold_convert_loc (loc, sizetype, nb_bytes)); 760 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, 761 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 762 } 763 764 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); 765 } 766 767 /* If VAL memory representation contains the same value in all bytes, 768 return that value, otherwise return -1. 769 E.g. for 0x24242424 return 0x24, for IEEE double 770 747708026454360457216.0 return 0x44, etc. */ 771 772 static int 773 const_with_all_bytes_same (tree val) 774 { 775 unsigned char buf[64]; 776 int i, len; 777 778 if (integer_zerop (val) 779 || (TREE_CODE (val) == CONSTRUCTOR 780 && !TREE_CLOBBER_P (val) 781 && CONSTRUCTOR_NELTS (val) == 0)) 782 return 0; 783 784 if (real_zerop (val)) 785 { 786 /* Only return 0 for +0.0, not for -0.0, which doesn't have 787 an all bytes same memory representation. Don't transform 788 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */ 789 switch (TREE_CODE (val)) 790 { 791 case REAL_CST: 792 if (!real_isneg (TREE_REAL_CST_PTR (val))) 793 return 0; 794 break; 795 case COMPLEX_CST: 796 if (!const_with_all_bytes_same (TREE_REALPART (val)) 797 && !const_with_all_bytes_same (TREE_IMAGPART (val))) 798 return 0; 799 break; 800 case VECTOR_CST: 801 unsigned int j; 802 for (j = 0; j < VECTOR_CST_NELTS (val); ++j) 803 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j))) 804 break; 805 if (j == VECTOR_CST_NELTS (val)) 806 return 0; 807 break; 808 default: 809 break; 810 } 811 } 812 813 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) 814 return -1; 815 816 len = native_encode_expr (val, buf, sizeof (buf)); 817 if (len == 0) 818 return -1; 819 for (i = 1; i < len; i++) 820 if (buf[i] != buf[0]) 821 return -1; 822 return buf[0]; 823 } 824 825 /* Generate a call to memset for PARTITION in LOOP. */ 826 827 static void 828 generate_memset_builtin (struct loop *loop, partition_t partition) 829 { 830 gimple_stmt_iterator gsi; 831 gimple stmt, fn_call; 832 tree mem, fn, nb_bytes; 833 location_t loc; 834 tree val; 835 836 stmt = DR_STMT (partition->main_dr); 837 loc = gimple_location (stmt); 838 839 /* The new statements will be placed before LOOP. */ 840 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 841 842 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, 843 partition->plus_one); 844 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 845 false, GSI_CONTINUE_LINKING); 846 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); 847 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, 848 false, GSI_CONTINUE_LINKING); 849 850 /* This exactly matches the pattern recognition in classify_partition. */ 851 val = gimple_assign_rhs1 (stmt); 852 /* Handle constants like 0x15151515 and similarly 853 floating point constants etc. where all bytes are the same. */ 854 int bytev = const_with_all_bytes_same (val); 855 if (bytev != -1) 856 val = build_int_cst (integer_type_node, bytev); 857 else if (TREE_CODE (val) == INTEGER_CST) 858 val = fold_convert (integer_type_node, val); 859 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) 860 { 861 tree tem = make_ssa_name (integer_type_node); 862 gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val); 863 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); 864 val = tem; 865 } 866 867 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); 868 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); 869 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 870 871 if (dump_file && (dump_flags & TDF_DETAILS)) 872 { 873 fprintf (dump_file, "generated memset"); 874 if (bytev == 0) 875 fprintf (dump_file, " zero\n"); 876 else 877 fprintf (dump_file, "\n"); 878 } 879 } 880 881 /* Generate a call to memcpy for PARTITION in LOOP. */ 882 883 static void 884 generate_memcpy_builtin (struct loop *loop, partition_t partition) 885 { 886 gimple_stmt_iterator gsi; 887 gimple stmt, fn_call; 888 tree dest, src, fn, nb_bytes; 889 location_t loc; 890 enum built_in_function kind; 891 892 stmt = DR_STMT (partition->main_dr); 893 loc = gimple_location (stmt); 894 895 /* The new statements will be placed before LOOP. */ 896 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 897 898 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, 899 partition->plus_one); 900 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 901 false, GSI_CONTINUE_LINKING); 902 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); 903 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); 904 if (ptr_derefs_may_alias_p (dest, src)) 905 kind = BUILT_IN_MEMMOVE; 906 else 907 kind = BUILT_IN_MEMCPY; 908 909 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, 910 false, GSI_CONTINUE_LINKING); 911 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, 912 false, GSI_CONTINUE_LINKING); 913 fn = build_fold_addr_expr (builtin_decl_implicit (kind)); 914 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); 915 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 916 917 if (dump_file && (dump_flags & TDF_DETAILS)) 918 { 919 if (kind == BUILT_IN_MEMCPY) 920 fprintf (dump_file, "generated memcpy\n"); 921 else 922 fprintf (dump_file, "generated memmove\n"); 923 } 924 } 925 926 /* Remove and destroy the loop LOOP. */ 927 928 static void 929 destroy_loop (struct loop *loop) 930 { 931 unsigned nbbs = loop->num_nodes; 932 edge exit = single_exit (loop); 933 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; 934 basic_block *bbs; 935 unsigned i; 936 937 bbs = get_loop_body_in_dom_order (loop); 938 939 redirect_edge_pred (exit, src); 940 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); 941 exit->flags |= EDGE_FALLTHRU; 942 cancel_loop_tree (loop); 943 rescan_loop_exit (exit, false, true); 944 945 for (i = 0; i < nbbs; i++) 946 { 947 /* We have made sure to not leave any dangling uses of SSA 948 names defined in the loop. With the exception of virtuals. 949 Make sure we replace all uses of virtual defs that will remain 950 outside of the loop with the bare symbol as delete_basic_block 951 will release them. */ 952 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); 953 gsi_next (&gsi)) 954 { 955 gphi *phi = gsi.phi (); 956 if (virtual_operand_p (gimple_phi_result (phi))) 957 mark_virtual_phi_result_for_renaming (phi); 958 } 959 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); 960 gsi_next (&gsi)) 961 { 962 gimple stmt = gsi_stmt (gsi); 963 tree vdef = gimple_vdef (stmt); 964 if (vdef && TREE_CODE (vdef) == SSA_NAME) 965 mark_virtual_operand_for_renaming (vdef); 966 } 967 delete_basic_block (bbs[i]); 968 } 969 free (bbs); 970 971 set_immediate_dominator (CDI_DOMINATORS, dest, 972 recompute_dominator (CDI_DOMINATORS, dest)); 973 } 974 975 /* Generates code for PARTITION. */ 976 977 static void 978 generate_code_for_partition (struct loop *loop, 979 partition_t partition, bool copy_p) 980 { 981 switch (partition->kind) 982 { 983 case PKIND_NORMAL: 984 /* Reductions all have to be in the last partition. */ 985 gcc_assert (!partition_reduction_p (partition) 986 || !copy_p); 987 generate_loops_for_partition (loop, partition, copy_p); 988 return; 989 990 case PKIND_MEMSET: 991 generate_memset_builtin (loop, partition); 992 break; 993 994 case PKIND_MEMCPY: 995 generate_memcpy_builtin (loop, partition); 996 break; 997 998 default: 999 gcc_unreachable (); 1000 } 1001 1002 /* Common tail for partitions we turn into a call. If this was the last 1003 partition for which we generate code, we have to destroy the loop. */ 1004 if (!copy_p) 1005 destroy_loop (loop); 1006 } 1007 1008 1009 /* Returns a partition with all the statements needed for computing 1010 the vertex V of the RDG, also including the loop exit conditions. */ 1011 1012 static partition_t 1013 build_rdg_partition_for_vertex (struct graph *rdg, int v) 1014 { 1015 partition_t partition = partition_alloc (NULL, NULL); 1016 auto_vec<int, 3> nodes; 1017 unsigned i; 1018 int x; 1019 1020 graphds_dfs (rdg, &v, 1, &nodes, false, NULL); 1021 1022 FOR_EACH_VEC_ELT (nodes, i, x) 1023 { 1024 bitmap_set_bit (partition->stmts, x); 1025 bitmap_set_bit (partition->loops, 1026 loop_containing_stmt (RDG_STMT (rdg, x))->num); 1027 } 1028 1029 return partition; 1030 } 1031 1032 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. 1033 For the moment we detect only the memset zero pattern. */ 1034 1035 static void 1036 classify_partition (loop_p loop, struct graph *rdg, partition_t partition) 1037 { 1038 bitmap_iterator bi; 1039 unsigned i; 1040 tree nb_iter; 1041 data_reference_p single_load, single_store; 1042 bool volatiles_p = false; 1043 bool plus_one = false; 1044 1045 partition->kind = PKIND_NORMAL; 1046 partition->main_dr = NULL; 1047 partition->secondary_dr = NULL; 1048 partition->niter = NULL_TREE; 1049 partition->plus_one = false; 1050 1051 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) 1052 { 1053 gimple stmt = RDG_STMT (rdg, i); 1054 1055 if (gimple_has_volatile_ops (stmt)) 1056 volatiles_p = true; 1057 1058 /* If the stmt has uses outside of the loop mark it as reduction. */ 1059 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) 1060 { 1061 partition->reduction_p = true; 1062 return; 1063 } 1064 } 1065 1066 /* Perform general partition disqualification for builtins. */ 1067 if (volatiles_p 1068 || !flag_tree_loop_distribute_patterns) 1069 return; 1070 1071 /* Detect memset and memcpy. */ 1072 single_load = NULL; 1073 single_store = NULL; 1074 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) 1075 { 1076 gimple stmt = RDG_STMT (rdg, i); 1077 data_reference_p dr; 1078 unsigned j; 1079 1080 if (gimple_code (stmt) == GIMPLE_PHI) 1081 continue; 1082 1083 /* Any scalar stmts are ok. */ 1084 if (!gimple_vuse (stmt)) 1085 continue; 1086 1087 /* Otherwise just regular loads/stores. */ 1088 if (!gimple_assign_single_p (stmt)) 1089 return; 1090 1091 /* But exactly one store and/or load. */ 1092 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) 1093 { 1094 if (DR_IS_READ (dr)) 1095 { 1096 if (single_load != NULL) 1097 return; 1098 single_load = dr; 1099 } 1100 else 1101 { 1102 if (single_store != NULL) 1103 return; 1104 single_store = dr; 1105 } 1106 } 1107 } 1108 1109 if (!single_store) 1110 return; 1111 1112 nb_iter = number_of_latch_executions (loop); 1113 if (!nb_iter || nb_iter == chrec_dont_know) 1114 return; 1115 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, 1116 gimple_bb (DR_STMT (single_store)))) 1117 plus_one = true; 1118 1119 if (single_store && !single_load) 1120 { 1121 gimple stmt = DR_STMT (single_store); 1122 tree rhs = gimple_assign_rhs1 (stmt); 1123 if (const_with_all_bytes_same (rhs) == -1 1124 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) 1125 || (TYPE_MODE (TREE_TYPE (rhs)) 1126 != TYPE_MODE (unsigned_char_type_node)))) 1127 return; 1128 if (TREE_CODE (rhs) == SSA_NAME 1129 && !SSA_NAME_IS_DEFAULT_DEF (rhs) 1130 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) 1131 return; 1132 if (!adjacent_dr_p (single_store) 1133 || !dominated_by_p (CDI_DOMINATORS, 1134 loop->latch, gimple_bb (stmt))) 1135 return; 1136 partition->kind = PKIND_MEMSET; 1137 partition->main_dr = single_store; 1138 partition->niter = nb_iter; 1139 partition->plus_one = plus_one; 1140 } 1141 else if (single_store && single_load) 1142 { 1143 gimple store = DR_STMT (single_store); 1144 gimple load = DR_STMT (single_load); 1145 /* Direct aggregate copy or via an SSA name temporary. */ 1146 if (load != store 1147 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) 1148 return; 1149 if (!adjacent_dr_p (single_store) 1150 || !adjacent_dr_p (single_load) 1151 || !operand_equal_p (DR_STEP (single_store), 1152 DR_STEP (single_load), 0) 1153 || !dominated_by_p (CDI_DOMINATORS, 1154 loop->latch, gimple_bb (store))) 1155 return; 1156 /* Now check that if there is a dependence this dependence is 1157 of a suitable form for memmove. */ 1158 vec<loop_p> loops = vNULL; 1159 ddr_p ddr; 1160 loops.safe_push (loop); 1161 ddr = initialize_data_dependence_relation (single_load, single_store, 1162 loops); 1163 compute_affine_dependence (ddr, loop); 1164 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1165 { 1166 free_dependence_relation (ddr); 1167 loops.release (); 1168 return; 1169 } 1170 if (DDR_ARE_DEPENDENT (ddr) != chrec_known) 1171 { 1172 if (DDR_NUM_DIST_VECTS (ddr) == 0) 1173 { 1174 free_dependence_relation (ddr); 1175 loops.release (); 1176 return; 1177 } 1178 lambda_vector dist_v; 1179 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 1180 { 1181 int dist = dist_v[index_in_loop_nest (loop->num, 1182 DDR_LOOP_NEST (ddr))]; 1183 if (dist > 0 && !DDR_REVERSED_P (ddr)) 1184 { 1185 free_dependence_relation (ddr); 1186 loops.release (); 1187 return; 1188 } 1189 } 1190 } 1191 free_dependence_relation (ddr); 1192 loops.release (); 1193 partition->kind = PKIND_MEMCPY; 1194 partition->main_dr = single_store; 1195 partition->secondary_dr = single_load; 1196 partition->niter = nb_iter; 1197 partition->plus_one = plus_one; 1198 } 1199 } 1200 1201 /* For a data reference REF, return the declaration of its base 1202 address or NULL_TREE if the base is not determined. */ 1203 1204 static tree 1205 ref_base_address (data_reference_p dr) 1206 { 1207 tree base_address = DR_BASE_ADDRESS (dr); 1208 if (base_address 1209 && TREE_CODE (base_address) == ADDR_EXPR) 1210 return TREE_OPERAND (base_address, 0); 1211 1212 return base_address; 1213 } 1214 1215 /* Returns true when PARTITION1 and PARTITION2 have similar memory 1216 accesses in RDG. */ 1217 1218 static bool 1219 similar_memory_accesses (struct graph *rdg, partition_t partition1, 1220 partition_t partition2) 1221 { 1222 unsigned i, j, k, l; 1223 bitmap_iterator bi, bj; 1224 data_reference_p ref1, ref2; 1225 1226 /* First check whether in the intersection of the two partitions are 1227 any loads or stores. Common loads are the situation that happens 1228 most often. */ 1229 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi) 1230 if (RDG_MEM_WRITE_STMT (rdg, i) 1231 || RDG_MEM_READS_STMT (rdg, i)) 1232 return true; 1233 1234 /* Then check all data-references against each other. */ 1235 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi) 1236 if (RDG_MEM_WRITE_STMT (rdg, i) 1237 || RDG_MEM_READS_STMT (rdg, i)) 1238 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj) 1239 if (RDG_MEM_WRITE_STMT (rdg, j) 1240 || RDG_MEM_READS_STMT (rdg, j)) 1241 { 1242 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1) 1243 { 1244 tree base1 = ref_base_address (ref1); 1245 if (base1) 1246 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2) 1247 if (base1 == ref_base_address (ref2)) 1248 return true; 1249 } 1250 } 1251 1252 return false; 1253 } 1254 1255 /* Aggregate several components into a useful partition that is 1256 registered in the PARTITIONS vector. Partitions will be 1257 distributed in different loops. */ 1258 1259 static void 1260 rdg_build_partitions (struct graph *rdg, 1261 vec<gimple> starting_stmts, 1262 vec<partition_t> *partitions) 1263 { 1264 bitmap processed = BITMAP_ALLOC (NULL); 1265 int i; 1266 gimple stmt; 1267 1268 FOR_EACH_VEC_ELT (starting_stmts, i, stmt) 1269 { 1270 int v = rdg_vertex_for_stmt (rdg, stmt); 1271 1272 if (dump_file && (dump_flags & TDF_DETAILS)) 1273 fprintf (dump_file, 1274 "ldist asked to generate code for vertex %d\n", v); 1275 1276 /* If the vertex is already contained in another partition so 1277 is the partition rooted at it. */ 1278 if (bitmap_bit_p (processed, v)) 1279 continue; 1280 1281 partition_t partition = build_rdg_partition_for_vertex (rdg, v); 1282 bitmap_ior_into (processed, partition->stmts); 1283 1284 if (dump_file && (dump_flags & TDF_DETAILS)) 1285 { 1286 fprintf (dump_file, "ldist useful partition:\n"); 1287 dump_bitmap (dump_file, partition->stmts); 1288 } 1289 1290 partitions->safe_push (partition); 1291 } 1292 1293 /* All vertices should have been assigned to at least one partition now, 1294 other than vertices belonging to dead code. */ 1295 1296 BITMAP_FREE (processed); 1297 } 1298 1299 /* Dump to FILE the PARTITIONS. */ 1300 1301 static void 1302 dump_rdg_partitions (FILE *file, vec<partition_t> partitions) 1303 { 1304 int i; 1305 partition_t partition; 1306 1307 FOR_EACH_VEC_ELT (partitions, i, partition) 1308 debug_bitmap_file (file, partition->stmts); 1309 } 1310 1311 /* Debug PARTITIONS. */ 1312 extern void debug_rdg_partitions (vec<partition_t> ); 1313 1314 DEBUG_FUNCTION void 1315 debug_rdg_partitions (vec<partition_t> partitions) 1316 { 1317 dump_rdg_partitions (stderr, partitions); 1318 } 1319 1320 /* Returns the number of read and write operations in the RDG. */ 1321 1322 static int 1323 number_of_rw_in_rdg (struct graph *rdg) 1324 { 1325 int i, res = 0; 1326 1327 for (i = 0; i < rdg->n_vertices; i++) 1328 { 1329 if (RDG_MEM_WRITE_STMT (rdg, i)) 1330 ++res; 1331 1332 if (RDG_MEM_READS_STMT (rdg, i)) 1333 ++res; 1334 } 1335 1336 return res; 1337 } 1338 1339 /* Returns the number of read and write operations in a PARTITION of 1340 the RDG. */ 1341 1342 static int 1343 number_of_rw_in_partition (struct graph *rdg, partition_t partition) 1344 { 1345 int res = 0; 1346 unsigned i; 1347 bitmap_iterator ii; 1348 1349 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii) 1350 { 1351 if (RDG_MEM_WRITE_STMT (rdg, i)) 1352 ++res; 1353 1354 if (RDG_MEM_READS_STMT (rdg, i)) 1355 ++res; 1356 } 1357 1358 return res; 1359 } 1360 1361 /* Returns true when one of the PARTITIONS contains all the read or 1362 write operations of RDG. */ 1363 1364 static bool 1365 partition_contains_all_rw (struct graph *rdg, 1366 vec<partition_t> partitions) 1367 { 1368 int i; 1369 partition_t partition; 1370 int nrw = number_of_rw_in_rdg (rdg); 1371 1372 FOR_EACH_VEC_ELT (partitions, i, partition) 1373 if (nrw == number_of_rw_in_partition (rdg, partition)) 1374 return true; 1375 1376 return false; 1377 } 1378 1379 /* Compute partition dependence created by the data references in DRS1 1380 and DRS2 and modify and return DIR according to that. */ 1381 1382 static int 1383 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir, 1384 vec<data_reference_p> drs1, 1385 vec<data_reference_p> drs2) 1386 { 1387 data_reference_p dr1, dr2; 1388 1389 /* dependence direction - 0 is no dependence, -1 is back, 1390 1 is forth, 2 is both (we can stop then, merging will occur). */ 1391 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii) 1392 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj) 1393 { 1394 data_reference_p saved_dr1 = dr1; 1395 int this_dir = 1; 1396 ddr_p ddr; 1397 /* Re-shuffle data-refs to be in dominator order. */ 1398 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) 1399 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) 1400 { 1401 data_reference_p tem = dr1; 1402 dr1 = dr2; 1403 dr2 = tem; 1404 this_dir = -this_dir; 1405 } 1406 ddr = initialize_data_dependence_relation (dr1, dr2, loops); 1407 compute_affine_dependence (ddr, loops[0]); 1408 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1409 this_dir = 2; 1410 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 1411 { 1412 if (DDR_REVERSED_P (ddr)) 1413 { 1414 data_reference_p tem = dr1; 1415 dr1 = dr2; 1416 dr2 = tem; 1417 this_dir = -this_dir; 1418 } 1419 /* Known dependences can still be unordered througout the 1420 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ 1421 if (DDR_NUM_DIST_VECTS (ddr) != 1) 1422 this_dir = 2; 1423 /* If the overlap is exact preserve stmt order. */ 1424 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1)) 1425 ; 1426 else 1427 { 1428 /* Else as the distance vector is lexicographic positive 1429 swap the dependence direction. */ 1430 this_dir = -this_dir; 1431 } 1432 } 1433 else 1434 this_dir = 0; 1435 free_dependence_relation (ddr); 1436 if (dir == 0) 1437 dir = this_dir; 1438 else if (dir != this_dir) 1439 return 2; 1440 /* Shuffle "back" dr1. */ 1441 dr1 = saved_dr1; 1442 } 1443 return dir; 1444 } 1445 1446 /* Compare postorder number of the partition graph vertices V1 and V2. */ 1447 1448 static int 1449 pgcmp (const void *v1_, const void *v2_) 1450 { 1451 const vertex *v1 = (const vertex *)v1_; 1452 const vertex *v2 = (const vertex *)v2_; 1453 return v2->post - v1->post; 1454 } 1455 1456 /* Distributes the code from LOOP in such a way that producer 1457 statements are placed before consumer statements. Tries to separate 1458 only the statements from STMTS into separate loops. 1459 Returns the number of distributed loops. */ 1460 1461 static int 1462 distribute_loop (struct loop *loop, vec<gimple> stmts, 1463 control_dependences *cd, int *nb_calls) 1464 { 1465 struct graph *rdg; 1466 partition_t partition; 1467 bool any_builtin; 1468 int i, nbp; 1469 graph *pg = NULL; 1470 int num_sccs = 1; 1471 1472 *nb_calls = 0; 1473 auto_vec<loop_p, 3> loop_nest; 1474 if (!find_loop_nest (loop, &loop_nest)) 1475 return 0; 1476 1477 rdg = build_rdg (loop_nest, cd); 1478 if (!rdg) 1479 { 1480 if (dump_file && (dump_flags & TDF_DETAILS)) 1481 fprintf (dump_file, 1482 "Loop %d not distributed: failed to build the RDG.\n", 1483 loop->num); 1484 1485 return 0; 1486 } 1487 1488 if (dump_file && (dump_flags & TDF_DETAILS)) 1489 dump_rdg (dump_file, rdg); 1490 1491 auto_vec<partition_t, 3> partitions; 1492 rdg_build_partitions (rdg, stmts, &partitions); 1493 1494 any_builtin = false; 1495 FOR_EACH_VEC_ELT (partitions, i, partition) 1496 { 1497 classify_partition (loop, rdg, partition); 1498 any_builtin |= partition_builtin_p (partition); 1499 } 1500 1501 /* If we are only distributing patterns but did not detect any, 1502 simply bail out. */ 1503 if (!flag_tree_loop_distribution 1504 && !any_builtin) 1505 { 1506 nbp = 0; 1507 goto ldist_done; 1508 } 1509 1510 /* If we are only distributing patterns fuse all partitions that 1511 were not classified as builtins. This also avoids chopping 1512 a loop into pieces, separated by builtin calls. That is, we 1513 only want no or a single loop body remaining. */ 1514 partition_t into; 1515 if (!flag_tree_loop_distribution) 1516 { 1517 for (i = 0; partitions.iterate (i, &into); ++i) 1518 if (!partition_builtin_p (into)) 1519 break; 1520 for (++i; partitions.iterate (i, &partition); ++i) 1521 if (!partition_builtin_p (partition)) 1522 { 1523 if (dump_file && (dump_flags & TDF_DETAILS)) 1524 { 1525 fprintf (dump_file, "fusing non-builtin partitions\n"); 1526 dump_bitmap (dump_file, into->stmts); 1527 dump_bitmap (dump_file, partition->stmts); 1528 } 1529 partition_merge_into (into, partition); 1530 partitions.unordered_remove (i); 1531 partition_free (partition); 1532 i--; 1533 } 1534 } 1535 1536 /* Due to limitations in the transform phase we have to fuse all 1537 reduction partitions into the last partition so the existing 1538 loop will contain all loop-closed PHI nodes. */ 1539 for (i = 0; partitions.iterate (i, &into); ++i) 1540 if (partition_reduction_p (into)) 1541 break; 1542 for (i = i + 1; partitions.iterate (i, &partition); ++i) 1543 if (partition_reduction_p (partition)) 1544 { 1545 if (dump_file && (dump_flags & TDF_DETAILS)) 1546 { 1547 fprintf (dump_file, "fusing partitions\n"); 1548 dump_bitmap (dump_file, into->stmts); 1549 dump_bitmap (dump_file, partition->stmts); 1550 fprintf (dump_file, "because they have reductions\n"); 1551 } 1552 partition_merge_into (into, partition); 1553 partitions.unordered_remove (i); 1554 partition_free (partition); 1555 i--; 1556 } 1557 1558 /* Apply our simple cost model - fuse partitions with similar 1559 memory accesses. */ 1560 for (i = 0; partitions.iterate (i, &into); ++i) 1561 { 1562 if (partition_builtin_p (into)) 1563 continue; 1564 for (int j = i + 1; 1565 partitions.iterate (j, &partition); ++j) 1566 { 1567 if (!partition_builtin_p (partition) 1568 && similar_memory_accesses (rdg, into, partition)) 1569 { 1570 if (dump_file && (dump_flags & TDF_DETAILS)) 1571 { 1572 fprintf (dump_file, "fusing partitions\n"); 1573 dump_bitmap (dump_file, into->stmts); 1574 dump_bitmap (dump_file, partition->stmts); 1575 fprintf (dump_file, "because they have similar " 1576 "memory accesses\n"); 1577 } 1578 partition_merge_into (into, partition); 1579 partitions.unordered_remove (j); 1580 partition_free (partition); 1581 j--; 1582 } 1583 } 1584 } 1585 1586 /* Build the partition dependency graph. */ 1587 if (partitions.length () > 1) 1588 { 1589 pg = new_graph (partitions.length ()); 1590 struct pgdata { 1591 partition_t partition; 1592 vec<data_reference_p> writes; 1593 vec<data_reference_p> reads; 1594 }; 1595 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data)) 1596 for (i = 0; partitions.iterate (i, &partition); ++i) 1597 { 1598 vertex *v = &pg->vertices[i]; 1599 pgdata *data = new pgdata; 1600 data_reference_p dr; 1601 /* FIXME - leaks. */ 1602 v->data = data; 1603 bitmap_iterator bi; 1604 unsigned j; 1605 data->partition = partition; 1606 data->reads = vNULL; 1607 data->writes = vNULL; 1608 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi) 1609 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k) 1610 if (DR_IS_READ (dr)) 1611 data->reads.safe_push (dr); 1612 else 1613 data->writes.safe_push (dr); 1614 } 1615 partition_t partition1, partition2; 1616 for (i = 0; partitions.iterate (i, &partition1); ++i) 1617 for (int j = i + 1; partitions.iterate (j, &partition2); ++j) 1618 { 1619 /* dependence direction - 0 is no dependence, -1 is back, 1620 1 is forth, 2 is both (we can stop then, merging will occur). */ 1621 int dir = 0; 1622 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1623 PGDATA(i)->writes, 1624 PGDATA(j)->reads); 1625 if (dir != 2) 1626 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1627 PGDATA(i)->reads, 1628 PGDATA(j)->writes); 1629 if (dir != 2) 1630 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1631 PGDATA(i)->writes, 1632 PGDATA(j)->writes); 1633 if (dir == 1 || dir == 2) 1634 add_edge (pg, i, j); 1635 if (dir == -1 || dir == 2) 1636 add_edge (pg, j, i); 1637 } 1638 1639 /* Add edges to the reduction partition (if any) to force it last. */ 1640 unsigned j; 1641 for (j = 0; partitions.iterate (j, &partition); ++j) 1642 if (partition_reduction_p (partition)) 1643 break; 1644 if (j < partitions.length ()) 1645 { 1646 for (unsigned i = 0; partitions.iterate (i, &partition); ++i) 1647 if (i != j) 1648 add_edge (pg, i, j); 1649 } 1650 1651 /* Compute partitions we cannot separate and fuse them. */ 1652 num_sccs = graphds_scc (pg, NULL); 1653 for (i = 0; i < num_sccs; ++i) 1654 { 1655 partition_t first; 1656 int j; 1657 for (j = 0; partitions.iterate (j, &first); ++j) 1658 if (pg->vertices[j].component == i) 1659 break; 1660 for (j = j + 1; partitions.iterate (j, &partition); ++j) 1661 if (pg->vertices[j].component == i) 1662 { 1663 if (dump_file && (dump_flags & TDF_DETAILS)) 1664 { 1665 fprintf (dump_file, "fusing partitions\n"); 1666 dump_bitmap (dump_file, first->stmts); 1667 dump_bitmap (dump_file, partition->stmts); 1668 fprintf (dump_file, "because they are in the same " 1669 "dependence SCC\n"); 1670 } 1671 partition_merge_into (first, partition); 1672 partitions[j] = NULL; 1673 partition_free (partition); 1674 PGDATA (j)->partition = NULL; 1675 } 1676 } 1677 1678 /* Now order the remaining nodes in postorder. */ 1679 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp); 1680 partitions.truncate (0); 1681 for (i = 0; i < pg->n_vertices; ++i) 1682 { 1683 pgdata *data = PGDATA (i); 1684 if (data->partition) 1685 partitions.safe_push (data->partition); 1686 data->reads.release (); 1687 data->writes.release (); 1688 delete data; 1689 } 1690 gcc_assert (partitions.length () == (unsigned)num_sccs); 1691 free_graph (pg); 1692 } 1693 1694 nbp = partitions.length (); 1695 if (nbp == 0 1696 || (nbp == 1 && !partition_builtin_p (partitions[0])) 1697 || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) 1698 { 1699 nbp = 0; 1700 goto ldist_done; 1701 } 1702 1703 if (dump_file && (dump_flags & TDF_DETAILS)) 1704 dump_rdg_partitions (dump_file, partitions); 1705 1706 FOR_EACH_VEC_ELT (partitions, i, partition) 1707 { 1708 if (partition_builtin_p (partition)) 1709 (*nb_calls)++; 1710 generate_code_for_partition (loop, partition, i < nbp - 1); 1711 } 1712 1713 ldist_done: 1714 1715 FOR_EACH_VEC_ELT (partitions, i, partition) 1716 partition_free (partition); 1717 1718 free_rdg (rdg); 1719 return nbp - *nb_calls; 1720 } 1721 1722 /* Distribute all loops in the current function. */ 1723 1724 namespace { 1725 1726 const pass_data pass_data_loop_distribution = 1727 { 1728 GIMPLE_PASS, /* type */ 1729 "ldist", /* name */ 1730 OPTGROUP_LOOP, /* optinfo_flags */ 1731 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ 1732 ( PROP_cfg | PROP_ssa ), /* properties_required */ 1733 0, /* properties_provided */ 1734 0, /* properties_destroyed */ 1735 0, /* todo_flags_start */ 1736 0, /* todo_flags_finish */ 1737 }; 1738 1739 class pass_loop_distribution : public gimple_opt_pass 1740 { 1741 public: 1742 pass_loop_distribution (gcc::context *ctxt) 1743 : gimple_opt_pass (pass_data_loop_distribution, ctxt) 1744 {} 1745 1746 /* opt_pass methods: */ 1747 virtual bool gate (function *) 1748 { 1749 return flag_tree_loop_distribution 1750 || flag_tree_loop_distribute_patterns; 1751 } 1752 1753 virtual unsigned int execute (function *); 1754 1755 }; // class pass_loop_distribution 1756 1757 unsigned int 1758 pass_loop_distribution::execute (function *fun) 1759 { 1760 struct loop *loop; 1761 bool changed = false; 1762 basic_block bb; 1763 control_dependences *cd = NULL; 1764 1765 FOR_ALL_BB_FN (bb, fun) 1766 { 1767 gimple_stmt_iterator gsi; 1768 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1769 gimple_set_uid (gsi_stmt (gsi), -1); 1770 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1771 gimple_set_uid (gsi_stmt (gsi), -1); 1772 } 1773 1774 /* We can at the moment only distribute non-nested loops, thus restrict 1775 walking to innermost loops. */ 1776 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) 1777 { 1778 auto_vec<gimple> work_list; 1779 basic_block *bbs; 1780 int num = loop->num; 1781 unsigned int i; 1782 1783 /* If the loop doesn't have a single exit we will fail anyway, 1784 so do that early. */ 1785 if (!single_exit (loop)) 1786 continue; 1787 1788 /* Only optimize hot loops. */ 1789 if (!optimize_loop_for_speed_p (loop)) 1790 continue; 1791 1792 /* Initialize the worklist with stmts we seed the partitions with. */ 1793 bbs = get_loop_body_in_dom_order (loop); 1794 for (i = 0; i < loop->num_nodes; ++i) 1795 { 1796 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); 1797 !gsi_end_p (gsi); 1798 gsi_next (&gsi)) 1799 { 1800 gphi *phi = gsi.phi (); 1801 if (virtual_operand_p (gimple_phi_result (phi))) 1802 continue; 1803 /* Distribute stmts which have defs that are used outside of 1804 the loop. */ 1805 if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) 1806 continue; 1807 work_list.safe_push (phi); 1808 } 1809 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); 1810 !gsi_end_p (gsi); 1811 gsi_next (&gsi)) 1812 { 1813 gimple stmt = gsi_stmt (gsi); 1814 1815 /* If there is a stmt with side-effects bail out - we 1816 cannot and should not distribute this loop. */ 1817 if (gimple_has_side_effects (stmt)) 1818 { 1819 work_list.truncate (0); 1820 goto out; 1821 } 1822 1823 /* Distribute stmts which have defs that are used outside of 1824 the loop. */ 1825 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) 1826 ; 1827 /* Otherwise only distribute stores for now. */ 1828 else if (!gimple_vdef (stmt)) 1829 continue; 1830 1831 work_list.safe_push (stmt); 1832 } 1833 } 1834 out: 1835 free (bbs); 1836 1837 int nb_generated_loops = 0; 1838 int nb_generated_calls = 0; 1839 location_t loc = find_loop_location (loop); 1840 if (work_list.length () > 0) 1841 { 1842 if (!cd) 1843 { 1844 calculate_dominance_info (CDI_DOMINATORS); 1845 calculate_dominance_info (CDI_POST_DOMINATORS); 1846 cd = new control_dependences (create_edge_list ()); 1847 free_dominance_info (CDI_POST_DOMINATORS); 1848 } 1849 nb_generated_loops = distribute_loop (loop, work_list, cd, 1850 &nb_generated_calls); 1851 } 1852 1853 if (nb_generated_loops + nb_generated_calls > 0) 1854 { 1855 changed = true; 1856 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, 1857 loc, "Loop %d distributed: split to %d loops " 1858 "and %d library calls.\n", 1859 num, nb_generated_loops, nb_generated_calls); 1860 } 1861 else if (dump_file && (dump_flags & TDF_DETAILS)) 1862 fprintf (dump_file, "Loop %d is the same.\n", num); 1863 } 1864 1865 if (cd) 1866 delete cd; 1867 1868 if (changed) 1869 { 1870 /* Cached scalar evolutions now may refer to wrong or non-existing 1871 loops. */ 1872 scev_reset_htab (); 1873 mark_virtual_operands_for_renaming (fun); 1874 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); 1875 } 1876 1877 #ifdef ENABLE_CHECKING 1878 verify_loop_structure (); 1879 #endif 1880 1881 return 0; 1882 } 1883 1884 } // anon namespace 1885 1886 gimple_opt_pass * 1887 make_pass_loop_distribution (gcc::context *ctxt) 1888 { 1889 return new pass_loop_distribution (ctxt); 1890 } 1891