1 /* Loop distribution. 2 Copyright (C) 2006-2016 Free Software Foundation, Inc. 3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> 4 and Sebastian Pop <sebastian.pop@amd.com>. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 3, or (at your option) any 11 later version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 /* This pass performs loop distribution: for example, the loop 23 24 |DO I = 2, N 25 | A(I) = B(I) + C 26 | D(I) = A(I-1)*E 27 |ENDDO 28 29 is transformed to 30 31 |DOALL I = 2, N 32 | A(I) = B(I) + C 33 |ENDDO 34 | 35 |DOALL I = 2, N 36 | D(I) = A(I-1)*E 37 |ENDDO 38 39 This pass uses an RDG, Reduced Dependence Graph built on top of the 40 data dependence relations. The RDG is then topologically sorted to 41 obtain a map of information producers/consumers based on which it 42 generates the new loops. */ 43 44 #include "config.h" 45 #include "system.h" 46 #include "coretypes.h" 47 #include "backend.h" 48 #include "tree.h" 49 #include "gimple.h" 50 #include "cfghooks.h" 51 #include "tree-pass.h" 52 #include "ssa.h" 53 #include "gimple-pretty-print.h" 54 #include "fold-const.h" 55 #include "cfganal.h" 56 #include "gimple-iterator.h" 57 #include "gimplify-me.h" 58 #include "stor-layout.h" 59 #include "tree-cfg.h" 60 #include "tree-ssa-loop-manip.h" 61 #include "tree-ssa-loop.h" 62 #include "tree-into-ssa.h" 63 #include "tree-ssa.h" 64 #include "cfgloop.h" 65 #include "tree-scalar-evolution.h" 66 #include "tree-vectorizer.h" 67 68 69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ 70 struct rdg_vertex 71 { 72 /* The statement represented by this vertex. */ 73 gimple *stmt; 74 75 /* Vector of data-references in this statement. */ 76 vec<data_reference_p> datarefs; 77 78 /* True when the statement contains a write to memory. */ 79 bool has_mem_write; 80 81 /* True when the statement contains a read from memory. */ 82 bool has_mem_reads; 83 }; 84 85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt 86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs 87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write 88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads 89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I])) 90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I])) 91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) 92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) 93 94 /* Data dependence type. */ 95 96 enum rdg_dep_type 97 { 98 /* Read After Write (RAW). */ 99 flow_dd = 'f', 100 101 /* Control dependence (execute conditional on). */ 102 control_dd = 'c' 103 }; 104 105 /* Dependence information attached to an edge of the RDG. */ 106 107 struct rdg_edge 108 { 109 /* Type of the dependence. */ 110 enum rdg_dep_type type; 111 }; 112 113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type 114 115 /* Dump vertex I in RDG to FILE. */ 116 117 static void 118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i) 119 { 120 struct vertex *v = &(rdg->vertices[i]); 121 struct graph_edge *e; 122 123 fprintf (file, "(vertex %d: (%s%s) (in:", i, 124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", 125 RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); 126 127 if (v->pred) 128 for (e = v->pred; e; e = e->pred_next) 129 fprintf (file, " %d", e->src); 130 131 fprintf (file, ") (out:"); 132 133 if (v->succ) 134 for (e = v->succ; e; e = e->succ_next) 135 fprintf (file, " %d", e->dest); 136 137 fprintf (file, ")\n"); 138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); 139 fprintf (file, ")\n"); 140 } 141 142 /* Call dump_rdg_vertex on stderr. */ 143 144 DEBUG_FUNCTION void 145 debug_rdg_vertex (struct graph *rdg, int i) 146 { 147 dump_rdg_vertex (stderr, rdg, i); 148 } 149 150 /* Dump the reduced dependence graph RDG to FILE. */ 151 152 static void 153 dump_rdg (FILE *file, struct graph *rdg) 154 { 155 fprintf (file, "(rdg\n"); 156 for (int i = 0; i < rdg->n_vertices; i++) 157 dump_rdg_vertex (file, rdg, i); 158 fprintf (file, ")\n"); 159 } 160 161 /* Call dump_rdg on stderr. */ 162 163 DEBUG_FUNCTION void 164 debug_rdg (struct graph *rdg) 165 { 166 dump_rdg (stderr, rdg); 167 } 168 169 static void 170 dot_rdg_1 (FILE *file, struct graph *rdg) 171 { 172 int i; 173 pretty_printer buffer; 174 pp_needs_newline (&buffer) = false; 175 buffer.buffer->stream = file; 176 177 fprintf (file, "digraph RDG {\n"); 178 179 for (i = 0; i < rdg->n_vertices; i++) 180 { 181 struct vertex *v = &(rdg->vertices[i]); 182 struct graph_edge *e; 183 184 fprintf (file, "%d [label=\"[%d] ", i, i); 185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); 186 pp_flush (&buffer); 187 fprintf (file, "\"]\n"); 188 189 /* Highlight reads from memory. */ 190 if (RDG_MEM_READS_STMT (rdg, i)) 191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i); 192 193 /* Highlight stores to memory. */ 194 if (RDG_MEM_WRITE_STMT (rdg, i)) 195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i); 196 197 if (v->succ) 198 for (e = v->succ; e; e = e->succ_next) 199 switch (RDGE_TYPE (e)) 200 { 201 case flow_dd: 202 /* These are the most common dependences: don't print these. */ 203 fprintf (file, "%d -> %d \n", i, e->dest); 204 break; 205 206 case control_dd: 207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest); 208 break; 209 210 default: 211 gcc_unreachable (); 212 } 213 } 214 215 fprintf (file, "}\n\n"); 216 } 217 218 /* Display the Reduced Dependence Graph using dotty. */ 219 220 DEBUG_FUNCTION void 221 dot_rdg (struct graph *rdg) 222 { 223 /* When debugging, you may want to enable the following code. */ 224 #ifdef HAVE_POPEN 225 FILE *file = popen ("dot -Tx11", "w"); 226 if (!file) 227 return; 228 dot_rdg_1 (file, rdg); 229 fflush (file); 230 close (fileno (file)); 231 pclose (file); 232 #else 233 dot_rdg_1 (stderr, rdg); 234 #endif 235 } 236 237 /* Returns the index of STMT in RDG. */ 238 239 static int 240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt) 241 { 242 int index = gimple_uid (stmt); 243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); 244 return index; 245 } 246 247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is 248 the index of DEF in RDG. */ 249 250 static void 251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) 252 { 253 use_operand_p imm_use_p; 254 imm_use_iterator iterator; 255 256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) 257 { 258 struct graph_edge *e; 259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); 260 261 if (use < 0) 262 continue; 263 264 e = add_edge (rdg, idef, use); 265 e->data = XNEW (struct rdg_edge); 266 RDGE_TYPE (e) = flow_dd; 267 } 268 } 269 270 /* Creates an edge for the control dependences of BB to the vertex V. */ 271 272 static void 273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb, 274 int v, control_dependences *cd) 275 { 276 bitmap_iterator bi; 277 unsigned edge_n; 278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), 279 0, edge_n, bi) 280 { 281 basic_block cond_bb = cd->get_edge (edge_n)->src; 282 gimple *stmt = last_stmt (cond_bb); 283 if (stmt && is_ctrl_stmt (stmt)) 284 { 285 struct graph_edge *e; 286 int c = rdg_vertex_for_stmt (rdg, stmt); 287 if (c < 0) 288 continue; 289 290 e = add_edge (rdg, c, v); 291 e->data = XNEW (struct rdg_edge); 292 RDGE_TYPE (e) = control_dd; 293 } 294 } 295 } 296 297 /* Creates the edges of the reduced dependence graph RDG. */ 298 299 static void 300 create_rdg_flow_edges (struct graph *rdg) 301 { 302 int i; 303 def_operand_p def_p; 304 ssa_op_iter iter; 305 306 for (i = 0; i < rdg->n_vertices; i++) 307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), 308 iter, SSA_OP_DEF) 309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); 310 } 311 312 /* Creates the edges of the reduced dependence graph RDG. */ 313 314 static void 315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd) 316 { 317 int i; 318 319 for (i = 0; i < rdg->n_vertices; i++) 320 { 321 gimple *stmt = RDG_STMT (rdg, i); 322 if (gimple_code (stmt) == GIMPLE_PHI) 323 { 324 edge_iterator ei; 325 edge e; 326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) 327 create_edge_for_control_dependence (rdg, e->src, i, cd); 328 } 329 else 330 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); 331 } 332 } 333 334 /* Build the vertices of the reduced dependence graph RDG. Return false 335 if that failed. */ 336 337 static bool 338 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop, 339 vec<data_reference_p> *datarefs) 340 { 341 int i; 342 gimple *stmt; 343 344 FOR_EACH_VEC_ELT (stmts, i, stmt) 345 { 346 struct vertex *v = &(rdg->vertices[i]); 347 348 /* Record statement to vertex mapping. */ 349 gimple_set_uid (stmt, i); 350 351 v->data = XNEW (struct rdg_vertex); 352 RDGV_STMT (v) = stmt; 353 RDGV_DATAREFS (v).create (0); 354 RDGV_HAS_MEM_WRITE (v) = false; 355 RDGV_HAS_MEM_READS (v) = false; 356 if (gimple_code (stmt) == GIMPLE_PHI) 357 continue; 358 359 unsigned drp = datarefs->length (); 360 if (!find_data_references_in_stmt (loop, stmt, datarefs)) 361 return false; 362 for (unsigned j = drp; j < datarefs->length (); ++j) 363 { 364 data_reference_p dr = (*datarefs)[j]; 365 if (DR_IS_READ (dr)) 366 RDGV_HAS_MEM_READS (v) = true; 367 else 368 RDGV_HAS_MEM_WRITE (v) = true; 369 RDGV_DATAREFS (v).safe_push (dr); 370 } 371 } 372 return true; 373 } 374 375 /* Initialize STMTS with all the statements of LOOP. The order in 376 which we discover statements is important as 377 generate_loops_for_partition is using the same traversal for 378 identifying statements in loop copies. */ 379 380 static void 381 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts) 382 { 383 unsigned int i; 384 basic_block *bbs = get_loop_body_in_dom_order (loop); 385 386 for (i = 0; i < loop->num_nodes; i++) 387 { 388 basic_block bb = bbs[i]; 389 390 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); 391 gsi_next (&bsi)) 392 if (!virtual_operand_p (gimple_phi_result (bsi.phi ()))) 393 stmts->safe_push (bsi.phi ()); 394 395 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); 396 gsi_next (&bsi)) 397 { 398 gimple *stmt = gsi_stmt (bsi); 399 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) 400 stmts->safe_push (stmt); 401 } 402 } 403 404 free (bbs); 405 } 406 407 /* Free the reduced dependence graph RDG. */ 408 409 static void 410 free_rdg (struct graph *rdg) 411 { 412 int i; 413 414 for (i = 0; i < rdg->n_vertices; i++) 415 { 416 struct vertex *v = &(rdg->vertices[i]); 417 struct graph_edge *e; 418 419 for (e = v->succ; e; e = e->succ_next) 420 free (e->data); 421 422 if (v->data) 423 { 424 gimple_set_uid (RDGV_STMT (v), -1); 425 free_data_refs (RDGV_DATAREFS (v)); 426 free (v->data); 427 } 428 } 429 430 free_graph (rdg); 431 } 432 433 /* Build the Reduced Dependence Graph (RDG) with one vertex per 434 statement of the loop nest LOOP_NEST, and one edge per data dependence or 435 scalar dependence. */ 436 437 static struct graph * 438 build_rdg (vec<loop_p> loop_nest, control_dependences *cd) 439 { 440 struct graph *rdg; 441 vec<data_reference_p> datarefs; 442 443 /* Create the RDG vertices from the stmts of the loop nest. */ 444 auto_vec<gimple *, 10> stmts; 445 stmts_from_loop (loop_nest[0], &stmts); 446 rdg = new_graph (stmts.length ()); 447 datarefs.create (10); 448 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs)) 449 { 450 datarefs.release (); 451 free_rdg (rdg); 452 return NULL; 453 } 454 stmts.release (); 455 456 create_rdg_flow_edges (rdg); 457 if (cd) 458 create_rdg_cd_edges (rdg, cd); 459 460 datarefs.release (); 461 462 return rdg; 463 } 464 465 466 467 enum partition_kind { 468 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY 469 }; 470 471 struct partition 472 { 473 bitmap stmts; 474 bitmap loops; 475 bool reduction_p; 476 enum partition_kind kind; 477 /* data-references a kind != PKIND_NORMAL partition is about. */ 478 data_reference_p main_dr; 479 data_reference_p secondary_dr; 480 tree niter; 481 bool plus_one; 482 }; 483 484 485 /* Allocate and initialize a partition from BITMAP. */ 486 487 static partition * 488 partition_alloc (bitmap stmts, bitmap loops) 489 { 490 partition *partition = XCNEW (struct partition); 491 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); 492 partition->loops = loops ? loops : BITMAP_ALLOC (NULL); 493 partition->reduction_p = false; 494 partition->kind = PKIND_NORMAL; 495 return partition; 496 } 497 498 /* Free PARTITION. */ 499 500 static void 501 partition_free (partition *partition) 502 { 503 BITMAP_FREE (partition->stmts); 504 BITMAP_FREE (partition->loops); 505 free (partition); 506 } 507 508 /* Returns true if the partition can be generated as a builtin. */ 509 510 static bool 511 partition_builtin_p (partition *partition) 512 { 513 return partition->kind != PKIND_NORMAL; 514 } 515 516 /* Returns true if the partition contains a reduction. */ 517 518 static bool 519 partition_reduction_p (partition *partition) 520 { 521 return partition->reduction_p; 522 } 523 524 /* Merge PARTITION into the partition DEST. */ 525 526 static void 527 partition_merge_into (partition *dest, partition *partition) 528 { 529 dest->kind = PKIND_NORMAL; 530 bitmap_ior_into (dest->stmts, partition->stmts); 531 if (partition_reduction_p (partition)) 532 dest->reduction_p = true; 533 } 534 535 536 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after 537 the LOOP. */ 538 539 static bool 540 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) 541 { 542 imm_use_iterator imm_iter; 543 use_operand_p use_p; 544 545 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) 546 { 547 gimple *use_stmt = USE_STMT (use_p); 548 if (!is_gimple_debug (use_stmt) 549 && loop != loop_containing_stmt (use_stmt)) 550 return true; 551 } 552 553 return false; 554 } 555 556 /* Returns true when STMT defines a scalar variable used after the 557 loop LOOP. */ 558 559 static bool 560 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt) 561 { 562 def_operand_p def_p; 563 ssa_op_iter op_iter; 564 565 if (gimple_code (stmt) == GIMPLE_PHI) 566 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop); 567 568 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) 569 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop)) 570 return true; 571 572 return false; 573 } 574 575 /* Return a copy of LOOP placed before LOOP. */ 576 577 static struct loop * 578 copy_loop_before (struct loop *loop) 579 { 580 struct loop *res; 581 edge preheader = loop_preheader_edge (loop); 582 583 initialize_original_copy_tables (); 584 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); 585 gcc_assert (res != NULL); 586 free_original_copy_tables (); 587 delete_update_ssa (); 588 589 return res; 590 } 591 592 /* Creates an empty basic block after LOOP. */ 593 594 static void 595 create_bb_after_loop (struct loop *loop) 596 { 597 edge exit = single_exit (loop); 598 599 if (!exit) 600 return; 601 602 split_edge (exit); 603 } 604 605 /* Generate code for PARTITION from the code in LOOP. The loop is 606 copied when COPY_P is true. All the statements not flagged in the 607 PARTITION bitmap are removed from the loop or from its copy. The 608 statements are indexed in sequence inside a basic block, and the 609 basic blocks of a loop are taken in dom order. */ 610 611 static void 612 generate_loops_for_partition (struct loop *loop, partition *partition, 613 bool copy_p) 614 { 615 unsigned i; 616 basic_block *bbs; 617 618 if (copy_p) 619 { 620 loop = copy_loop_before (loop); 621 gcc_assert (loop != NULL); 622 create_preheader (loop, CP_SIMPLE_PREHEADERS); 623 create_bb_after_loop (loop); 624 } 625 626 /* Remove stmts not in the PARTITION bitmap. */ 627 bbs = get_loop_body_in_dom_order (loop); 628 629 if (MAY_HAVE_DEBUG_STMTS) 630 for (i = 0; i < loop->num_nodes; i++) 631 { 632 basic_block bb = bbs[i]; 633 634 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); 635 gsi_next (&bsi)) 636 { 637 gphi *phi = bsi.phi (); 638 if (!virtual_operand_p (gimple_phi_result (phi)) 639 && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) 640 reset_debug_uses (phi); 641 } 642 643 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 644 { 645 gimple *stmt = gsi_stmt (bsi); 646 if (gimple_code (stmt) != GIMPLE_LABEL 647 && !is_gimple_debug (stmt) 648 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) 649 reset_debug_uses (stmt); 650 } 651 } 652 653 for (i = 0; i < loop->num_nodes; i++) 654 { 655 basic_block bb = bbs[i]; 656 657 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) 658 { 659 gphi *phi = bsi.phi (); 660 if (!virtual_operand_p (gimple_phi_result (phi)) 661 && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) 662 remove_phi_node (&bsi, true); 663 else 664 gsi_next (&bsi); 665 } 666 667 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) 668 { 669 gimple *stmt = gsi_stmt (bsi); 670 if (gimple_code (stmt) != GIMPLE_LABEL 671 && !is_gimple_debug (stmt) 672 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) 673 { 674 /* Choose an arbitrary path through the empty CFG part 675 that this unnecessary control stmt controls. */ 676 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt)) 677 { 678 gimple_cond_make_false (cond_stmt); 679 update_stmt (stmt); 680 } 681 else if (gimple_code (stmt) == GIMPLE_SWITCH) 682 { 683 gswitch *switch_stmt = as_a <gswitch *> (stmt); 684 gimple_switch_set_index 685 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1))); 686 update_stmt (stmt); 687 } 688 else 689 { 690 unlink_stmt_vdef (stmt); 691 gsi_remove (&bsi, true); 692 release_defs (stmt); 693 continue; 694 } 695 } 696 gsi_next (&bsi); 697 } 698 } 699 700 free (bbs); 701 } 702 703 /* Build the size argument for a memory operation call. */ 704 705 static tree 706 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter, 707 bool plus_one) 708 { 709 tree size = fold_convert_loc (loc, sizetype, nb_iter); 710 if (plus_one) 711 size = size_binop (PLUS_EXPR, size, size_one_node); 712 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size, 713 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 714 size = fold_convert_loc (loc, size_type_node, size); 715 return size; 716 } 717 718 /* Build an address argument for a memory operation call. */ 719 720 static tree 721 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) 722 { 723 tree addr_base; 724 725 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); 726 addr_base = fold_convert_loc (loc, sizetype, addr_base); 727 728 /* Test for a negative stride, iterating over every element. */ 729 if (tree_int_cst_sgn (DR_STEP (dr)) == -1) 730 { 731 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, 732 fold_convert_loc (loc, sizetype, nb_bytes)); 733 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, 734 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 735 } 736 737 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); 738 } 739 740 /* If VAL memory representation contains the same value in all bytes, 741 return that value, otherwise return -1. 742 E.g. for 0x24242424 return 0x24, for IEEE double 743 747708026454360457216.0 return 0x44, etc. */ 744 745 static int 746 const_with_all_bytes_same (tree val) 747 { 748 unsigned char buf[64]; 749 int i, len; 750 751 if (integer_zerop (val) 752 || (TREE_CODE (val) == CONSTRUCTOR 753 && !TREE_CLOBBER_P (val) 754 && CONSTRUCTOR_NELTS (val) == 0)) 755 return 0; 756 757 if (real_zerop (val)) 758 { 759 /* Only return 0 for +0.0, not for -0.0, which doesn't have 760 an all bytes same memory representation. Don't transform 761 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */ 762 switch (TREE_CODE (val)) 763 { 764 case REAL_CST: 765 if (!real_isneg (TREE_REAL_CST_PTR (val))) 766 return 0; 767 break; 768 case COMPLEX_CST: 769 if (!const_with_all_bytes_same (TREE_REALPART (val)) 770 && !const_with_all_bytes_same (TREE_IMAGPART (val))) 771 return 0; 772 break; 773 case VECTOR_CST: 774 unsigned int j; 775 for (j = 0; j < VECTOR_CST_NELTS (val); ++j) 776 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j))) 777 break; 778 if (j == VECTOR_CST_NELTS (val)) 779 return 0; 780 break; 781 default: 782 break; 783 } 784 } 785 786 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) 787 return -1; 788 789 len = native_encode_expr (val, buf, sizeof (buf)); 790 if (len == 0) 791 return -1; 792 for (i = 1; i < len; i++) 793 if (buf[i] != buf[0]) 794 return -1; 795 return buf[0]; 796 } 797 798 /* Generate a call to memset for PARTITION in LOOP. */ 799 800 static void 801 generate_memset_builtin (struct loop *loop, partition *partition) 802 { 803 gimple_stmt_iterator gsi; 804 gimple *stmt, *fn_call; 805 tree mem, fn, nb_bytes; 806 location_t loc; 807 tree val; 808 809 stmt = DR_STMT (partition->main_dr); 810 loc = gimple_location (stmt); 811 812 /* The new statements will be placed before LOOP. */ 813 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 814 815 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, 816 partition->plus_one); 817 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 818 false, GSI_CONTINUE_LINKING); 819 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); 820 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, 821 false, GSI_CONTINUE_LINKING); 822 823 /* This exactly matches the pattern recognition in classify_partition. */ 824 val = gimple_assign_rhs1 (stmt); 825 /* Handle constants like 0x15151515 and similarly 826 floating point constants etc. where all bytes are the same. */ 827 int bytev = const_with_all_bytes_same (val); 828 if (bytev != -1) 829 val = build_int_cst (integer_type_node, bytev); 830 else if (TREE_CODE (val) == INTEGER_CST) 831 val = fold_convert (integer_type_node, val); 832 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) 833 { 834 tree tem = make_ssa_name (integer_type_node); 835 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val); 836 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); 837 val = tem; 838 } 839 840 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); 841 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); 842 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 843 844 if (dump_file && (dump_flags & TDF_DETAILS)) 845 { 846 fprintf (dump_file, "generated memset"); 847 if (bytev == 0) 848 fprintf (dump_file, " zero\n"); 849 else 850 fprintf (dump_file, "\n"); 851 } 852 } 853 854 /* Generate a call to memcpy for PARTITION in LOOP. */ 855 856 static void 857 generate_memcpy_builtin (struct loop *loop, partition *partition) 858 { 859 gimple_stmt_iterator gsi; 860 gimple *stmt, *fn_call; 861 tree dest, src, fn, nb_bytes; 862 location_t loc; 863 enum built_in_function kind; 864 865 stmt = DR_STMT (partition->main_dr); 866 loc = gimple_location (stmt); 867 868 /* The new statements will be placed before LOOP. */ 869 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 870 871 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, 872 partition->plus_one); 873 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 874 false, GSI_CONTINUE_LINKING); 875 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); 876 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); 877 if (ptr_derefs_may_alias_p (dest, src)) 878 kind = BUILT_IN_MEMMOVE; 879 else 880 kind = BUILT_IN_MEMCPY; 881 882 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, 883 false, GSI_CONTINUE_LINKING); 884 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, 885 false, GSI_CONTINUE_LINKING); 886 fn = build_fold_addr_expr (builtin_decl_implicit (kind)); 887 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); 888 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 889 890 if (dump_file && (dump_flags & TDF_DETAILS)) 891 { 892 if (kind == BUILT_IN_MEMCPY) 893 fprintf (dump_file, "generated memcpy\n"); 894 else 895 fprintf (dump_file, "generated memmove\n"); 896 } 897 } 898 899 /* Remove and destroy the loop LOOP. */ 900 901 static void 902 destroy_loop (struct loop *loop) 903 { 904 unsigned nbbs = loop->num_nodes; 905 edge exit = single_exit (loop); 906 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; 907 basic_block *bbs; 908 unsigned i; 909 910 bbs = get_loop_body_in_dom_order (loop); 911 912 redirect_edge_pred (exit, src); 913 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); 914 exit->flags |= EDGE_FALLTHRU; 915 cancel_loop_tree (loop); 916 rescan_loop_exit (exit, false, true); 917 918 i = nbbs; 919 do 920 { 921 /* We have made sure to not leave any dangling uses of SSA 922 names defined in the loop. With the exception of virtuals. 923 Make sure we replace all uses of virtual defs that will remain 924 outside of the loop with the bare symbol as delete_basic_block 925 will release them. */ 926 --i; 927 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); 928 gsi_next (&gsi)) 929 { 930 gphi *phi = gsi.phi (); 931 if (virtual_operand_p (gimple_phi_result (phi))) 932 mark_virtual_phi_result_for_renaming (phi); 933 } 934 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); 935 gsi_next (&gsi)) 936 { 937 gimple *stmt = gsi_stmt (gsi); 938 tree vdef = gimple_vdef (stmt); 939 if (vdef && TREE_CODE (vdef) == SSA_NAME) 940 mark_virtual_operand_for_renaming (vdef); 941 } 942 delete_basic_block (bbs[i]); 943 } 944 while (i != 0); 945 946 free (bbs); 947 948 set_immediate_dominator (CDI_DOMINATORS, dest, 949 recompute_dominator (CDI_DOMINATORS, dest)); 950 } 951 952 /* Generates code for PARTITION. */ 953 954 static void 955 generate_code_for_partition (struct loop *loop, 956 partition *partition, bool copy_p) 957 { 958 switch (partition->kind) 959 { 960 case PKIND_NORMAL: 961 /* Reductions all have to be in the last partition. */ 962 gcc_assert (!partition_reduction_p (partition) 963 || !copy_p); 964 generate_loops_for_partition (loop, partition, copy_p); 965 return; 966 967 case PKIND_MEMSET: 968 generate_memset_builtin (loop, partition); 969 break; 970 971 case PKIND_MEMCPY: 972 generate_memcpy_builtin (loop, partition); 973 break; 974 975 default: 976 gcc_unreachable (); 977 } 978 979 /* Common tail for partitions we turn into a call. If this was the last 980 partition for which we generate code, we have to destroy the loop. */ 981 if (!copy_p) 982 destroy_loop (loop); 983 } 984 985 986 /* Returns a partition with all the statements needed for computing 987 the vertex V of the RDG, also including the loop exit conditions. */ 988 989 static partition * 990 build_rdg_partition_for_vertex (struct graph *rdg, int v) 991 { 992 partition *partition = partition_alloc (NULL, NULL); 993 auto_vec<int, 3> nodes; 994 unsigned i; 995 int x; 996 997 graphds_dfs (rdg, &v, 1, &nodes, false, NULL); 998 999 FOR_EACH_VEC_ELT (nodes, i, x) 1000 { 1001 bitmap_set_bit (partition->stmts, x); 1002 bitmap_set_bit (partition->loops, 1003 loop_containing_stmt (RDG_STMT (rdg, x))->num); 1004 } 1005 1006 return partition; 1007 } 1008 1009 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. 1010 For the moment we detect only the memset zero pattern. */ 1011 1012 static void 1013 classify_partition (loop_p loop, struct graph *rdg, partition *partition) 1014 { 1015 bitmap_iterator bi; 1016 unsigned i; 1017 tree nb_iter; 1018 data_reference_p single_load, single_store; 1019 bool volatiles_p = false; 1020 bool plus_one = false; 1021 1022 partition->kind = PKIND_NORMAL; 1023 partition->main_dr = NULL; 1024 partition->secondary_dr = NULL; 1025 partition->niter = NULL_TREE; 1026 partition->plus_one = false; 1027 1028 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) 1029 { 1030 gimple *stmt = RDG_STMT (rdg, i); 1031 1032 if (gimple_has_volatile_ops (stmt)) 1033 volatiles_p = true; 1034 1035 /* If the stmt has uses outside of the loop mark it as reduction. */ 1036 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) 1037 { 1038 partition->reduction_p = true; 1039 return; 1040 } 1041 } 1042 1043 /* Perform general partition disqualification for builtins. */ 1044 if (volatiles_p 1045 || !flag_tree_loop_distribute_patterns) 1046 return; 1047 1048 /* Detect memset and memcpy. */ 1049 single_load = NULL; 1050 single_store = NULL; 1051 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) 1052 { 1053 gimple *stmt = RDG_STMT (rdg, i); 1054 data_reference_p dr; 1055 unsigned j; 1056 1057 if (gimple_code (stmt) == GIMPLE_PHI) 1058 continue; 1059 1060 /* Any scalar stmts are ok. */ 1061 if (!gimple_vuse (stmt)) 1062 continue; 1063 1064 /* Otherwise just regular loads/stores. */ 1065 if (!gimple_assign_single_p (stmt)) 1066 return; 1067 1068 /* But exactly one store and/or load. */ 1069 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) 1070 { 1071 if (DR_IS_READ (dr)) 1072 { 1073 if (single_load != NULL) 1074 return; 1075 single_load = dr; 1076 } 1077 else 1078 { 1079 if (single_store != NULL) 1080 return; 1081 single_store = dr; 1082 } 1083 } 1084 } 1085 1086 if (!single_store) 1087 return; 1088 1089 nb_iter = number_of_latch_executions (loop); 1090 if (!nb_iter || nb_iter == chrec_dont_know) 1091 return; 1092 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, 1093 gimple_bb (DR_STMT (single_store)))) 1094 plus_one = true; 1095 1096 if (single_store && !single_load) 1097 { 1098 gimple *stmt = DR_STMT (single_store); 1099 tree rhs = gimple_assign_rhs1 (stmt); 1100 if (const_with_all_bytes_same (rhs) == -1 1101 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) 1102 || (TYPE_MODE (TREE_TYPE (rhs)) 1103 != TYPE_MODE (unsigned_char_type_node)))) 1104 return; 1105 if (TREE_CODE (rhs) == SSA_NAME 1106 && !SSA_NAME_IS_DEFAULT_DEF (rhs) 1107 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) 1108 return; 1109 if (!adjacent_dr_p (single_store) 1110 || !dominated_by_p (CDI_DOMINATORS, 1111 loop->latch, gimple_bb (stmt))) 1112 return; 1113 partition->kind = PKIND_MEMSET; 1114 partition->main_dr = single_store; 1115 partition->niter = nb_iter; 1116 partition->plus_one = plus_one; 1117 } 1118 else if (single_store && single_load) 1119 { 1120 gimple *store = DR_STMT (single_store); 1121 gimple *load = DR_STMT (single_load); 1122 /* Direct aggregate copy or via an SSA name temporary. */ 1123 if (load != store 1124 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) 1125 return; 1126 if (!adjacent_dr_p (single_store) 1127 || !adjacent_dr_p (single_load) 1128 || !operand_equal_p (DR_STEP (single_store), 1129 DR_STEP (single_load), 0) 1130 || !dominated_by_p (CDI_DOMINATORS, 1131 loop->latch, gimple_bb (store))) 1132 return; 1133 /* Now check that if there is a dependence this dependence is 1134 of a suitable form for memmove. */ 1135 vec<loop_p> loops = vNULL; 1136 ddr_p ddr; 1137 loops.safe_push (loop); 1138 ddr = initialize_data_dependence_relation (single_load, single_store, 1139 loops); 1140 compute_affine_dependence (ddr, loop); 1141 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1142 { 1143 free_dependence_relation (ddr); 1144 loops.release (); 1145 return; 1146 } 1147 if (DDR_ARE_DEPENDENT (ddr) != chrec_known) 1148 { 1149 if (DDR_NUM_DIST_VECTS (ddr) == 0) 1150 { 1151 free_dependence_relation (ddr); 1152 loops.release (); 1153 return; 1154 } 1155 lambda_vector dist_v; 1156 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 1157 { 1158 int dist = dist_v[index_in_loop_nest (loop->num, 1159 DDR_LOOP_NEST (ddr))]; 1160 if (dist > 0 && !DDR_REVERSED_P (ddr)) 1161 { 1162 free_dependence_relation (ddr); 1163 loops.release (); 1164 return; 1165 } 1166 } 1167 } 1168 free_dependence_relation (ddr); 1169 loops.release (); 1170 partition->kind = PKIND_MEMCPY; 1171 partition->main_dr = single_store; 1172 partition->secondary_dr = single_load; 1173 partition->niter = nb_iter; 1174 partition->plus_one = plus_one; 1175 } 1176 } 1177 1178 /* For a data reference REF, return the declaration of its base 1179 address or NULL_TREE if the base is not determined. */ 1180 1181 static tree 1182 ref_base_address (data_reference_p dr) 1183 { 1184 tree base_address = DR_BASE_ADDRESS (dr); 1185 if (base_address 1186 && TREE_CODE (base_address) == ADDR_EXPR) 1187 return TREE_OPERAND (base_address, 0); 1188 1189 return base_address; 1190 } 1191 1192 /* Returns true when PARTITION1 and PARTITION2 have similar memory 1193 accesses in RDG. */ 1194 1195 static bool 1196 similar_memory_accesses (struct graph *rdg, partition *partition1, 1197 partition *partition2) 1198 { 1199 unsigned i, j, k, l; 1200 bitmap_iterator bi, bj; 1201 data_reference_p ref1, ref2; 1202 1203 /* First check whether in the intersection of the two partitions are 1204 any loads or stores. Common loads are the situation that happens 1205 most often. */ 1206 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi) 1207 if (RDG_MEM_WRITE_STMT (rdg, i) 1208 || RDG_MEM_READS_STMT (rdg, i)) 1209 return true; 1210 1211 /* Then check all data-references against each other. */ 1212 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi) 1213 if (RDG_MEM_WRITE_STMT (rdg, i) 1214 || RDG_MEM_READS_STMT (rdg, i)) 1215 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj) 1216 if (RDG_MEM_WRITE_STMT (rdg, j) 1217 || RDG_MEM_READS_STMT (rdg, j)) 1218 { 1219 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1) 1220 { 1221 tree base1 = ref_base_address (ref1); 1222 if (base1) 1223 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2) 1224 if (base1 == ref_base_address (ref2)) 1225 return true; 1226 } 1227 } 1228 1229 return false; 1230 } 1231 1232 /* Aggregate several components into a useful partition that is 1233 registered in the PARTITIONS vector. Partitions will be 1234 distributed in different loops. */ 1235 1236 static void 1237 rdg_build_partitions (struct graph *rdg, 1238 vec<gimple *> starting_stmts, 1239 vec<partition *> *partitions) 1240 { 1241 bitmap processed = BITMAP_ALLOC (NULL); 1242 int i; 1243 gimple *stmt; 1244 1245 FOR_EACH_VEC_ELT (starting_stmts, i, stmt) 1246 { 1247 int v = rdg_vertex_for_stmt (rdg, stmt); 1248 1249 if (dump_file && (dump_flags & TDF_DETAILS)) 1250 fprintf (dump_file, 1251 "ldist asked to generate code for vertex %d\n", v); 1252 1253 /* If the vertex is already contained in another partition so 1254 is the partition rooted at it. */ 1255 if (bitmap_bit_p (processed, v)) 1256 continue; 1257 1258 partition *partition = build_rdg_partition_for_vertex (rdg, v); 1259 bitmap_ior_into (processed, partition->stmts); 1260 1261 if (dump_file && (dump_flags & TDF_DETAILS)) 1262 { 1263 fprintf (dump_file, "ldist useful partition:\n"); 1264 dump_bitmap (dump_file, partition->stmts); 1265 } 1266 1267 partitions->safe_push (partition); 1268 } 1269 1270 /* All vertices should have been assigned to at least one partition now, 1271 other than vertices belonging to dead code. */ 1272 1273 BITMAP_FREE (processed); 1274 } 1275 1276 /* Dump to FILE the PARTITIONS. */ 1277 1278 static void 1279 dump_rdg_partitions (FILE *file, vec<partition *> partitions) 1280 { 1281 int i; 1282 partition *partition; 1283 1284 FOR_EACH_VEC_ELT (partitions, i, partition) 1285 debug_bitmap_file (file, partition->stmts); 1286 } 1287 1288 /* Debug PARTITIONS. */ 1289 extern void debug_rdg_partitions (vec<partition *> ); 1290 1291 DEBUG_FUNCTION void 1292 debug_rdg_partitions (vec<partition *> partitions) 1293 { 1294 dump_rdg_partitions (stderr, partitions); 1295 } 1296 1297 /* Returns the number of read and write operations in the RDG. */ 1298 1299 static int 1300 number_of_rw_in_rdg (struct graph *rdg) 1301 { 1302 int i, res = 0; 1303 1304 for (i = 0; i < rdg->n_vertices; i++) 1305 { 1306 if (RDG_MEM_WRITE_STMT (rdg, i)) 1307 ++res; 1308 1309 if (RDG_MEM_READS_STMT (rdg, i)) 1310 ++res; 1311 } 1312 1313 return res; 1314 } 1315 1316 /* Returns the number of read and write operations in a PARTITION of 1317 the RDG. */ 1318 1319 static int 1320 number_of_rw_in_partition (struct graph *rdg, partition *partition) 1321 { 1322 int res = 0; 1323 unsigned i; 1324 bitmap_iterator ii; 1325 1326 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii) 1327 { 1328 if (RDG_MEM_WRITE_STMT (rdg, i)) 1329 ++res; 1330 1331 if (RDG_MEM_READS_STMT (rdg, i)) 1332 ++res; 1333 } 1334 1335 return res; 1336 } 1337 1338 /* Returns true when one of the PARTITIONS contains all the read or 1339 write operations of RDG. */ 1340 1341 static bool 1342 partition_contains_all_rw (struct graph *rdg, 1343 vec<partition *> partitions) 1344 { 1345 int i; 1346 partition *partition; 1347 int nrw = number_of_rw_in_rdg (rdg); 1348 1349 FOR_EACH_VEC_ELT (partitions, i, partition) 1350 if (nrw == number_of_rw_in_partition (rdg, partition)) 1351 return true; 1352 1353 return false; 1354 } 1355 1356 /* Compute partition dependence created by the data references in DRS1 1357 and DRS2 and modify and return DIR according to that. */ 1358 1359 static int 1360 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir, 1361 vec<data_reference_p> drs1, 1362 vec<data_reference_p> drs2) 1363 { 1364 data_reference_p dr1, dr2; 1365 1366 /* dependence direction - 0 is no dependence, -1 is back, 1367 1 is forth, 2 is both (we can stop then, merging will occur). */ 1368 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii) 1369 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj) 1370 { 1371 data_reference_p saved_dr1 = dr1; 1372 int this_dir = 1; 1373 ddr_p ddr; 1374 /* Re-shuffle data-refs to be in dominator order. */ 1375 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) 1376 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) 1377 { 1378 std::swap (dr1, dr2); 1379 this_dir = -this_dir; 1380 } 1381 ddr = initialize_data_dependence_relation (dr1, dr2, loops); 1382 compute_affine_dependence (ddr, loops[0]); 1383 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1384 this_dir = 2; 1385 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 1386 { 1387 if (DDR_REVERSED_P (ddr)) 1388 { 1389 std::swap (dr1, dr2); 1390 this_dir = -this_dir; 1391 } 1392 /* Known dependences can still be unordered througout the 1393 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ 1394 if (DDR_NUM_DIST_VECTS (ddr) != 1) 1395 this_dir = 2; 1396 /* If the overlap is exact preserve stmt order. */ 1397 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1)) 1398 ; 1399 else 1400 { 1401 /* Else as the distance vector is lexicographic positive 1402 swap the dependence direction. */ 1403 this_dir = -this_dir; 1404 } 1405 } 1406 else 1407 this_dir = 0; 1408 free_dependence_relation (ddr); 1409 if (dir == 0) 1410 dir = this_dir; 1411 else if (dir != this_dir) 1412 return 2; 1413 /* Shuffle "back" dr1. */ 1414 dr1 = saved_dr1; 1415 } 1416 return dir; 1417 } 1418 1419 /* Compare postorder number of the partition graph vertices V1 and V2. */ 1420 1421 static int 1422 pgcmp (const void *v1_, const void *v2_) 1423 { 1424 const vertex *v1 = (const vertex *)v1_; 1425 const vertex *v2 = (const vertex *)v2_; 1426 return v2->post - v1->post; 1427 } 1428 1429 /* Distributes the code from LOOP in such a way that producer 1430 statements are placed before consumer statements. Tries to separate 1431 only the statements from STMTS into separate loops. 1432 Returns the number of distributed loops. */ 1433 1434 static int 1435 distribute_loop (struct loop *loop, vec<gimple *> stmts, 1436 control_dependences *cd, int *nb_calls) 1437 { 1438 struct graph *rdg; 1439 partition *partition; 1440 bool any_builtin; 1441 int i, nbp; 1442 graph *pg = NULL; 1443 int num_sccs = 1; 1444 1445 *nb_calls = 0; 1446 auto_vec<loop_p, 3> loop_nest; 1447 if (!find_loop_nest (loop, &loop_nest)) 1448 return 0; 1449 1450 rdg = build_rdg (loop_nest, cd); 1451 if (!rdg) 1452 { 1453 if (dump_file && (dump_flags & TDF_DETAILS)) 1454 fprintf (dump_file, 1455 "Loop %d not distributed: failed to build the RDG.\n", 1456 loop->num); 1457 1458 return 0; 1459 } 1460 1461 if (dump_file && (dump_flags & TDF_DETAILS)) 1462 dump_rdg (dump_file, rdg); 1463 1464 auto_vec<struct partition *, 3> partitions; 1465 rdg_build_partitions (rdg, stmts, &partitions); 1466 1467 any_builtin = false; 1468 FOR_EACH_VEC_ELT (partitions, i, partition) 1469 { 1470 classify_partition (loop, rdg, partition); 1471 any_builtin |= partition_builtin_p (partition); 1472 } 1473 1474 /* If we are only distributing patterns but did not detect any, 1475 simply bail out. */ 1476 if (!flag_tree_loop_distribution 1477 && !any_builtin) 1478 { 1479 nbp = 0; 1480 goto ldist_done; 1481 } 1482 1483 /* If we are only distributing patterns fuse all partitions that 1484 were not classified as builtins. This also avoids chopping 1485 a loop into pieces, separated by builtin calls. That is, we 1486 only want no or a single loop body remaining. */ 1487 struct partition *into; 1488 if (!flag_tree_loop_distribution) 1489 { 1490 for (i = 0; partitions.iterate (i, &into); ++i) 1491 if (!partition_builtin_p (into)) 1492 break; 1493 for (++i; partitions.iterate (i, &partition); ++i) 1494 if (!partition_builtin_p (partition)) 1495 { 1496 if (dump_file && (dump_flags & TDF_DETAILS)) 1497 { 1498 fprintf (dump_file, "fusing non-builtin partitions\n"); 1499 dump_bitmap (dump_file, into->stmts); 1500 dump_bitmap (dump_file, partition->stmts); 1501 } 1502 partition_merge_into (into, partition); 1503 partitions.unordered_remove (i); 1504 partition_free (partition); 1505 i--; 1506 } 1507 } 1508 1509 /* Due to limitations in the transform phase we have to fuse all 1510 reduction partitions into the last partition so the existing 1511 loop will contain all loop-closed PHI nodes. */ 1512 for (i = 0; partitions.iterate (i, &into); ++i) 1513 if (partition_reduction_p (into)) 1514 break; 1515 for (i = i + 1; partitions.iterate (i, &partition); ++i) 1516 if (partition_reduction_p (partition)) 1517 { 1518 if (dump_file && (dump_flags & TDF_DETAILS)) 1519 { 1520 fprintf (dump_file, "fusing partitions\n"); 1521 dump_bitmap (dump_file, into->stmts); 1522 dump_bitmap (dump_file, partition->stmts); 1523 fprintf (dump_file, "because they have reductions\n"); 1524 } 1525 partition_merge_into (into, partition); 1526 partitions.unordered_remove (i); 1527 partition_free (partition); 1528 i--; 1529 } 1530 1531 /* Apply our simple cost model - fuse partitions with similar 1532 memory accesses. */ 1533 for (i = 0; partitions.iterate (i, &into); ++i) 1534 { 1535 if (partition_builtin_p (into)) 1536 continue; 1537 for (int j = i + 1; 1538 partitions.iterate (j, &partition); ++j) 1539 { 1540 if (!partition_builtin_p (partition) 1541 && similar_memory_accesses (rdg, into, partition)) 1542 { 1543 if (dump_file && (dump_flags & TDF_DETAILS)) 1544 { 1545 fprintf (dump_file, "fusing partitions\n"); 1546 dump_bitmap (dump_file, into->stmts); 1547 dump_bitmap (dump_file, partition->stmts); 1548 fprintf (dump_file, "because they have similar " 1549 "memory accesses\n"); 1550 } 1551 partition_merge_into (into, partition); 1552 partitions.unordered_remove (j); 1553 partition_free (partition); 1554 j--; 1555 } 1556 } 1557 } 1558 1559 /* Build the partition dependency graph. */ 1560 if (partitions.length () > 1) 1561 { 1562 pg = new_graph (partitions.length ()); 1563 struct pgdata { 1564 struct partition *partition; 1565 vec<data_reference_p> writes; 1566 vec<data_reference_p> reads; 1567 }; 1568 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data)) 1569 for (i = 0; partitions.iterate (i, &partition); ++i) 1570 { 1571 vertex *v = &pg->vertices[i]; 1572 pgdata *data = new pgdata; 1573 data_reference_p dr; 1574 /* FIXME - leaks. */ 1575 v->data = data; 1576 bitmap_iterator bi; 1577 unsigned j; 1578 data->partition = partition; 1579 data->reads = vNULL; 1580 data->writes = vNULL; 1581 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi) 1582 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k) 1583 if (DR_IS_READ (dr)) 1584 data->reads.safe_push (dr); 1585 else 1586 data->writes.safe_push (dr); 1587 } 1588 struct partition *partition1, *partition2; 1589 for (i = 0; partitions.iterate (i, &partition1); ++i) 1590 for (int j = i + 1; partitions.iterate (j, &partition2); ++j) 1591 { 1592 /* dependence direction - 0 is no dependence, -1 is back, 1593 1 is forth, 2 is both (we can stop then, merging will occur). */ 1594 int dir = 0; 1595 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1596 PGDATA(i)->writes, 1597 PGDATA(j)->reads); 1598 if (dir != 2) 1599 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1600 PGDATA(i)->reads, 1601 PGDATA(j)->writes); 1602 if (dir != 2) 1603 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1604 PGDATA(i)->writes, 1605 PGDATA(j)->writes); 1606 if (dir == 1 || dir == 2) 1607 add_edge (pg, i, j); 1608 if (dir == -1 || dir == 2) 1609 add_edge (pg, j, i); 1610 } 1611 1612 /* Add edges to the reduction partition (if any) to force it last. */ 1613 unsigned j; 1614 for (j = 0; partitions.iterate (j, &partition); ++j) 1615 if (partition_reduction_p (partition)) 1616 break; 1617 if (j < partitions.length ()) 1618 { 1619 for (unsigned i = 0; partitions.iterate (i, &partition); ++i) 1620 if (i != j) 1621 add_edge (pg, i, j); 1622 } 1623 1624 /* Compute partitions we cannot separate and fuse them. */ 1625 num_sccs = graphds_scc (pg, NULL); 1626 for (i = 0; i < num_sccs; ++i) 1627 { 1628 struct partition *first; 1629 int j; 1630 for (j = 0; partitions.iterate (j, &first); ++j) 1631 if (pg->vertices[j].component == i) 1632 break; 1633 for (j = j + 1; partitions.iterate (j, &partition); ++j) 1634 if (pg->vertices[j].component == i) 1635 { 1636 if (dump_file && (dump_flags & TDF_DETAILS)) 1637 { 1638 fprintf (dump_file, "fusing partitions\n"); 1639 dump_bitmap (dump_file, first->stmts); 1640 dump_bitmap (dump_file, partition->stmts); 1641 fprintf (dump_file, "because they are in the same " 1642 "dependence SCC\n"); 1643 } 1644 partition_merge_into (first, partition); 1645 partitions[j] = NULL; 1646 partition_free (partition); 1647 PGDATA (j)->partition = NULL; 1648 } 1649 } 1650 1651 /* Now order the remaining nodes in postorder. */ 1652 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp); 1653 partitions.truncate (0); 1654 for (i = 0; i < pg->n_vertices; ++i) 1655 { 1656 pgdata *data = PGDATA (i); 1657 if (data->partition) 1658 partitions.safe_push (data->partition); 1659 data->reads.release (); 1660 data->writes.release (); 1661 delete data; 1662 } 1663 gcc_assert (partitions.length () == (unsigned)num_sccs); 1664 free_graph (pg); 1665 } 1666 1667 nbp = partitions.length (); 1668 if (nbp == 0 1669 || (nbp == 1 && !partition_builtin_p (partitions[0])) 1670 || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) 1671 { 1672 nbp = 0; 1673 goto ldist_done; 1674 } 1675 1676 if (dump_file && (dump_flags & TDF_DETAILS)) 1677 dump_rdg_partitions (dump_file, partitions); 1678 1679 FOR_EACH_VEC_ELT (partitions, i, partition) 1680 { 1681 if (partition_builtin_p (partition)) 1682 (*nb_calls)++; 1683 generate_code_for_partition (loop, partition, i < nbp - 1); 1684 } 1685 1686 ldist_done: 1687 1688 FOR_EACH_VEC_ELT (partitions, i, partition) 1689 partition_free (partition); 1690 1691 free_rdg (rdg); 1692 return nbp - *nb_calls; 1693 } 1694 1695 /* Distribute all loops in the current function. */ 1696 1697 namespace { 1698 1699 const pass_data pass_data_loop_distribution = 1700 { 1701 GIMPLE_PASS, /* type */ 1702 "ldist", /* name */ 1703 OPTGROUP_LOOP, /* optinfo_flags */ 1704 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ 1705 ( PROP_cfg | PROP_ssa ), /* properties_required */ 1706 0, /* properties_provided */ 1707 0, /* properties_destroyed */ 1708 0, /* todo_flags_start */ 1709 0, /* todo_flags_finish */ 1710 }; 1711 1712 class pass_loop_distribution : public gimple_opt_pass 1713 { 1714 public: 1715 pass_loop_distribution (gcc::context *ctxt) 1716 : gimple_opt_pass (pass_data_loop_distribution, ctxt) 1717 {} 1718 1719 /* opt_pass methods: */ 1720 virtual bool gate (function *) 1721 { 1722 return flag_tree_loop_distribution 1723 || flag_tree_loop_distribute_patterns; 1724 } 1725 1726 virtual unsigned int execute (function *); 1727 1728 }; // class pass_loop_distribution 1729 1730 unsigned int 1731 pass_loop_distribution::execute (function *fun) 1732 { 1733 struct loop *loop; 1734 bool changed = false; 1735 basic_block bb; 1736 control_dependences *cd = NULL; 1737 1738 FOR_ALL_BB_FN (bb, fun) 1739 { 1740 gimple_stmt_iterator gsi; 1741 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1742 gimple_set_uid (gsi_stmt (gsi), -1); 1743 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1744 gimple_set_uid (gsi_stmt (gsi), -1); 1745 } 1746 1747 /* We can at the moment only distribute non-nested loops, thus restrict 1748 walking to innermost loops. */ 1749 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) 1750 { 1751 auto_vec<gimple *> work_list; 1752 basic_block *bbs; 1753 int num = loop->num; 1754 unsigned int i; 1755 1756 /* If the loop doesn't have a single exit we will fail anyway, 1757 so do that early. */ 1758 if (!single_exit (loop)) 1759 continue; 1760 1761 /* Only optimize hot loops. */ 1762 if (!optimize_loop_for_speed_p (loop)) 1763 continue; 1764 1765 /* Initialize the worklist with stmts we seed the partitions with. */ 1766 bbs = get_loop_body_in_dom_order (loop); 1767 for (i = 0; i < loop->num_nodes; ++i) 1768 { 1769 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); 1770 !gsi_end_p (gsi); 1771 gsi_next (&gsi)) 1772 { 1773 gphi *phi = gsi.phi (); 1774 if (virtual_operand_p (gimple_phi_result (phi))) 1775 continue; 1776 /* Distribute stmts which have defs that are used outside of 1777 the loop. */ 1778 if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) 1779 continue; 1780 work_list.safe_push (phi); 1781 } 1782 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); 1783 !gsi_end_p (gsi); 1784 gsi_next (&gsi)) 1785 { 1786 gimple *stmt = gsi_stmt (gsi); 1787 1788 /* If there is a stmt with side-effects bail out - we 1789 cannot and should not distribute this loop. */ 1790 if (gimple_has_side_effects (stmt)) 1791 { 1792 work_list.truncate (0); 1793 goto out; 1794 } 1795 1796 /* Distribute stmts which have defs that are used outside of 1797 the loop. */ 1798 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) 1799 ; 1800 /* Otherwise only distribute stores for now. */ 1801 else if (!gimple_vdef (stmt)) 1802 continue; 1803 1804 work_list.safe_push (stmt); 1805 } 1806 } 1807 out: 1808 free (bbs); 1809 1810 int nb_generated_loops = 0; 1811 int nb_generated_calls = 0; 1812 location_t loc = find_loop_location (loop); 1813 if (work_list.length () > 0) 1814 { 1815 if (!cd) 1816 { 1817 calculate_dominance_info (CDI_DOMINATORS); 1818 calculate_dominance_info (CDI_POST_DOMINATORS); 1819 cd = new control_dependences (create_edge_list ()); 1820 free_dominance_info (CDI_POST_DOMINATORS); 1821 } 1822 nb_generated_loops = distribute_loop (loop, work_list, cd, 1823 &nb_generated_calls); 1824 } 1825 1826 if (nb_generated_loops + nb_generated_calls > 0) 1827 { 1828 changed = true; 1829 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, 1830 loc, "Loop %d distributed: split to %d loops " 1831 "and %d library calls.\n", 1832 num, nb_generated_loops, nb_generated_calls); 1833 } 1834 else if (dump_file && (dump_flags & TDF_DETAILS)) 1835 fprintf (dump_file, "Loop %d is the same.\n", num); 1836 } 1837 1838 if (cd) 1839 delete cd; 1840 1841 if (changed) 1842 { 1843 /* Cached scalar evolutions now may refer to wrong or non-existing 1844 loops. */ 1845 scev_reset_htab (); 1846 mark_virtual_operands_for_renaming (fun); 1847 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); 1848 } 1849 1850 checking_verify_loop_structure (); 1851 1852 return 0; 1853 } 1854 1855 } // anon namespace 1856 1857 gimple_opt_pass * 1858 make_pass_loop_distribution (gcc::context *ctxt) 1859 { 1860 return new pass_loop_distribution (ctxt); 1861 } 1862