1 /* Loop distribution. 2 Copyright (C) 2006-2017 Free Software Foundation, Inc. 3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> 4 and Sebastian Pop <sebastian.pop@amd.com>. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 3, or (at your option) any 11 later version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 /* This pass performs loop distribution: for example, the loop 23 24 |DO I = 2, N 25 | A(I) = B(I) + C 26 | D(I) = A(I-1)*E 27 |ENDDO 28 29 is transformed to 30 31 |DOALL I = 2, N 32 | A(I) = B(I) + C 33 |ENDDO 34 | 35 |DOALL I = 2, N 36 | D(I) = A(I-1)*E 37 |ENDDO 38 39 This pass uses an RDG, Reduced Dependence Graph built on top of the 40 data dependence relations. The RDG is then topologically sorted to 41 obtain a map of information producers/consumers based on which it 42 generates the new loops. */ 43 44 #include "config.h" 45 #include "system.h" 46 #include "coretypes.h" 47 #include "backend.h" 48 #include "tree.h" 49 #include "gimple.h" 50 #include "cfghooks.h" 51 #include "tree-pass.h" 52 #include "ssa.h" 53 #include "gimple-pretty-print.h" 54 #include "fold-const.h" 55 #include "cfganal.h" 56 #include "gimple-iterator.h" 57 #include "gimplify-me.h" 58 #include "stor-layout.h" 59 #include "tree-cfg.h" 60 #include "tree-ssa-loop-manip.h" 61 #include "tree-ssa-loop.h" 62 #include "tree-into-ssa.h" 63 #include "tree-ssa.h" 64 #include "cfgloop.h" 65 #include "tree-scalar-evolution.h" 66 #include "tree-vectorizer.h" 67 #include "tree-eh.h" 68 69 70 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ 71 struct rdg_vertex 72 { 73 /* The statement represented by this vertex. */ 74 gimple *stmt; 75 76 /* Vector of data-references in this statement. */ 77 vec<data_reference_p> datarefs; 78 79 /* True when the statement contains a write to memory. */ 80 bool has_mem_write; 81 82 /* True when the statement contains a read from memory. */ 83 bool has_mem_reads; 84 }; 85 86 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt 87 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs 88 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write 89 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads 90 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I])) 91 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I])) 92 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) 93 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) 94 95 /* Data dependence type. */ 96 97 enum rdg_dep_type 98 { 99 /* Read After Write (RAW). */ 100 flow_dd = 'f', 101 102 /* Control dependence (execute conditional on). */ 103 control_dd = 'c' 104 }; 105 106 /* Dependence information attached to an edge of the RDG. */ 107 108 struct rdg_edge 109 { 110 /* Type of the dependence. */ 111 enum rdg_dep_type type; 112 }; 113 114 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type 115 116 /* Dump vertex I in RDG to FILE. */ 117 118 static void 119 dump_rdg_vertex (FILE *file, struct graph *rdg, int i) 120 { 121 struct vertex *v = &(rdg->vertices[i]); 122 struct graph_edge *e; 123 124 fprintf (file, "(vertex %d: (%s%s) (in:", i, 125 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", 126 RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); 127 128 if (v->pred) 129 for (e = v->pred; e; e = e->pred_next) 130 fprintf (file, " %d", e->src); 131 132 fprintf (file, ") (out:"); 133 134 if (v->succ) 135 for (e = v->succ; e; e = e->succ_next) 136 fprintf (file, " %d", e->dest); 137 138 fprintf (file, ")\n"); 139 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); 140 fprintf (file, ")\n"); 141 } 142 143 /* Call dump_rdg_vertex on stderr. */ 144 145 DEBUG_FUNCTION void 146 debug_rdg_vertex (struct graph *rdg, int i) 147 { 148 dump_rdg_vertex (stderr, rdg, i); 149 } 150 151 /* Dump the reduced dependence graph RDG to FILE. */ 152 153 static void 154 dump_rdg (FILE *file, struct graph *rdg) 155 { 156 fprintf (file, "(rdg\n"); 157 for (int i = 0; i < rdg->n_vertices; i++) 158 dump_rdg_vertex (file, rdg, i); 159 fprintf (file, ")\n"); 160 } 161 162 /* Call dump_rdg on stderr. */ 163 164 DEBUG_FUNCTION void 165 debug_rdg (struct graph *rdg) 166 { 167 dump_rdg (stderr, rdg); 168 } 169 170 static void 171 dot_rdg_1 (FILE *file, struct graph *rdg) 172 { 173 int i; 174 pretty_printer buffer; 175 pp_needs_newline (&buffer) = false; 176 buffer.buffer->stream = file; 177 178 fprintf (file, "digraph RDG {\n"); 179 180 for (i = 0; i < rdg->n_vertices; i++) 181 { 182 struct vertex *v = &(rdg->vertices[i]); 183 struct graph_edge *e; 184 185 fprintf (file, "%d [label=\"[%d] ", i, i); 186 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); 187 pp_flush (&buffer); 188 fprintf (file, "\"]\n"); 189 190 /* Highlight reads from memory. */ 191 if (RDG_MEM_READS_STMT (rdg, i)) 192 fprintf (file, "%d [style=filled, fillcolor=green]\n", i); 193 194 /* Highlight stores to memory. */ 195 if (RDG_MEM_WRITE_STMT (rdg, i)) 196 fprintf (file, "%d [style=filled, fillcolor=red]\n", i); 197 198 if (v->succ) 199 for (e = v->succ; e; e = e->succ_next) 200 switch (RDGE_TYPE (e)) 201 { 202 case flow_dd: 203 /* These are the most common dependences: don't print these. */ 204 fprintf (file, "%d -> %d \n", i, e->dest); 205 break; 206 207 case control_dd: 208 fprintf (file, "%d -> %d [label=control] \n", i, e->dest); 209 break; 210 211 default: 212 gcc_unreachable (); 213 } 214 } 215 216 fprintf (file, "}\n\n"); 217 } 218 219 /* Display the Reduced Dependence Graph using dotty. */ 220 221 DEBUG_FUNCTION void 222 dot_rdg (struct graph *rdg) 223 { 224 /* When debugging, you may want to enable the following code. */ 225 #ifdef HAVE_POPEN 226 FILE *file = popen ("dot -Tx11", "w"); 227 if (!file) 228 return; 229 dot_rdg_1 (file, rdg); 230 fflush (file); 231 close (fileno (file)); 232 pclose (file); 233 #else 234 dot_rdg_1 (stderr, rdg); 235 #endif 236 } 237 238 /* Returns the index of STMT in RDG. */ 239 240 static int 241 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt) 242 { 243 int index = gimple_uid (stmt); 244 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); 245 return index; 246 } 247 248 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is 249 the index of DEF in RDG. */ 250 251 static void 252 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) 253 { 254 use_operand_p imm_use_p; 255 imm_use_iterator iterator; 256 257 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) 258 { 259 struct graph_edge *e; 260 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); 261 262 if (use < 0) 263 continue; 264 265 e = add_edge (rdg, idef, use); 266 e->data = XNEW (struct rdg_edge); 267 RDGE_TYPE (e) = flow_dd; 268 } 269 } 270 271 /* Creates an edge for the control dependences of BB to the vertex V. */ 272 273 static void 274 create_edge_for_control_dependence (struct graph *rdg, basic_block bb, 275 int v, control_dependences *cd) 276 { 277 bitmap_iterator bi; 278 unsigned edge_n; 279 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), 280 0, edge_n, bi) 281 { 282 basic_block cond_bb = cd->get_edge_src (edge_n); 283 gimple *stmt = last_stmt (cond_bb); 284 if (stmt && is_ctrl_stmt (stmt)) 285 { 286 struct graph_edge *e; 287 int c = rdg_vertex_for_stmt (rdg, stmt); 288 if (c < 0) 289 continue; 290 291 e = add_edge (rdg, c, v); 292 e->data = XNEW (struct rdg_edge); 293 RDGE_TYPE (e) = control_dd; 294 } 295 } 296 } 297 298 /* Creates the edges of the reduced dependence graph RDG. */ 299 300 static void 301 create_rdg_flow_edges (struct graph *rdg) 302 { 303 int i; 304 def_operand_p def_p; 305 ssa_op_iter iter; 306 307 for (i = 0; i < rdg->n_vertices; i++) 308 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), 309 iter, SSA_OP_DEF) 310 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); 311 } 312 313 /* Creates the edges of the reduced dependence graph RDG. */ 314 315 static void 316 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop) 317 { 318 int i; 319 320 for (i = 0; i < rdg->n_vertices; i++) 321 { 322 gimple *stmt = RDG_STMT (rdg, i); 323 if (gimple_code (stmt) == GIMPLE_PHI) 324 { 325 edge_iterator ei; 326 edge e; 327 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) 328 if (flow_bb_inside_loop_p (loop, e->src)) 329 create_edge_for_control_dependence (rdg, e->src, i, cd); 330 } 331 else 332 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); 333 } 334 } 335 336 /* Build the vertices of the reduced dependence graph RDG. Return false 337 if that failed. */ 338 339 static bool 340 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop, 341 vec<data_reference_p> *datarefs) 342 { 343 int i; 344 gimple *stmt; 345 346 FOR_EACH_VEC_ELT (stmts, i, stmt) 347 { 348 struct vertex *v = &(rdg->vertices[i]); 349 350 /* Record statement to vertex mapping. */ 351 gimple_set_uid (stmt, i); 352 353 v->data = XNEW (struct rdg_vertex); 354 RDGV_STMT (v) = stmt; 355 RDGV_DATAREFS (v).create (0); 356 RDGV_HAS_MEM_WRITE (v) = false; 357 RDGV_HAS_MEM_READS (v) = false; 358 if (gimple_code (stmt) == GIMPLE_PHI) 359 continue; 360 361 unsigned drp = datarefs->length (); 362 if (!find_data_references_in_stmt (loop, stmt, datarefs)) 363 return false; 364 for (unsigned j = drp; j < datarefs->length (); ++j) 365 { 366 data_reference_p dr = (*datarefs)[j]; 367 if (DR_IS_READ (dr)) 368 RDGV_HAS_MEM_READS (v) = true; 369 else 370 RDGV_HAS_MEM_WRITE (v) = true; 371 RDGV_DATAREFS (v).safe_push (dr); 372 } 373 } 374 return true; 375 } 376 377 /* Initialize STMTS with all the statements of LOOP. The order in 378 which we discover statements is important as 379 generate_loops_for_partition is using the same traversal for 380 identifying statements in loop copies. */ 381 382 static void 383 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts) 384 { 385 unsigned int i; 386 basic_block *bbs = get_loop_body_in_dom_order (loop); 387 388 for (i = 0; i < loop->num_nodes; i++) 389 { 390 basic_block bb = bbs[i]; 391 392 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); 393 gsi_next (&bsi)) 394 if (!virtual_operand_p (gimple_phi_result (bsi.phi ()))) 395 stmts->safe_push (bsi.phi ()); 396 397 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); 398 gsi_next (&bsi)) 399 { 400 gimple *stmt = gsi_stmt (bsi); 401 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) 402 stmts->safe_push (stmt); 403 } 404 } 405 406 free (bbs); 407 } 408 409 /* Free the reduced dependence graph RDG. */ 410 411 static void 412 free_rdg (struct graph *rdg) 413 { 414 int i; 415 416 for (i = 0; i < rdg->n_vertices; i++) 417 { 418 struct vertex *v = &(rdg->vertices[i]); 419 struct graph_edge *e; 420 421 for (e = v->succ; e; e = e->succ_next) 422 free (e->data); 423 424 if (v->data) 425 { 426 gimple_set_uid (RDGV_STMT (v), -1); 427 free_data_refs (RDGV_DATAREFS (v)); 428 free (v->data); 429 } 430 } 431 432 free_graph (rdg); 433 } 434 435 /* Build the Reduced Dependence Graph (RDG) with one vertex per 436 statement of the loop nest LOOP_NEST, and one edge per data dependence or 437 scalar dependence. */ 438 439 static struct graph * 440 build_rdg (vec<loop_p> loop_nest, control_dependences *cd) 441 { 442 struct graph *rdg; 443 vec<data_reference_p> datarefs; 444 445 /* Create the RDG vertices from the stmts of the loop nest. */ 446 auto_vec<gimple *, 10> stmts; 447 stmts_from_loop (loop_nest[0], &stmts); 448 rdg = new_graph (stmts.length ()); 449 datarefs.create (10); 450 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs)) 451 { 452 datarefs.release (); 453 free_rdg (rdg); 454 return NULL; 455 } 456 stmts.release (); 457 458 create_rdg_flow_edges (rdg); 459 if (cd) 460 create_rdg_cd_edges (rdg, cd, loop_nest[0]); 461 462 datarefs.release (); 463 464 return rdg; 465 } 466 467 468 469 enum partition_kind { 470 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE 471 }; 472 473 struct partition 474 { 475 bitmap stmts; 476 bitmap loops; 477 bool reduction_p; 478 bool plus_one; 479 enum partition_kind kind; 480 /* data-references a kind != PKIND_NORMAL partition is about. */ 481 data_reference_p main_dr; 482 data_reference_p secondary_dr; 483 tree niter; 484 }; 485 486 487 /* Allocate and initialize a partition from BITMAP. */ 488 489 static partition * 490 partition_alloc (bitmap stmts, bitmap loops) 491 { 492 partition *partition = XCNEW (struct partition); 493 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); 494 partition->loops = loops ? loops : BITMAP_ALLOC (NULL); 495 partition->reduction_p = false; 496 partition->kind = PKIND_NORMAL; 497 return partition; 498 } 499 500 /* Free PARTITION. */ 501 502 static void 503 partition_free (partition *partition) 504 { 505 BITMAP_FREE (partition->stmts); 506 BITMAP_FREE (partition->loops); 507 free (partition); 508 } 509 510 /* Returns true if the partition can be generated as a builtin. */ 511 512 static bool 513 partition_builtin_p (partition *partition) 514 { 515 return partition->kind != PKIND_NORMAL; 516 } 517 518 /* Returns true if the partition contains a reduction. */ 519 520 static bool 521 partition_reduction_p (partition *partition) 522 { 523 return partition->reduction_p; 524 } 525 526 /* Merge PARTITION into the partition DEST. */ 527 528 static void 529 partition_merge_into (partition *dest, partition *partition) 530 { 531 dest->kind = PKIND_NORMAL; 532 bitmap_ior_into (dest->stmts, partition->stmts); 533 if (partition_reduction_p (partition)) 534 dest->reduction_p = true; 535 } 536 537 538 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after 539 the LOOP. */ 540 541 static bool 542 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) 543 { 544 imm_use_iterator imm_iter; 545 use_operand_p use_p; 546 547 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) 548 { 549 gimple *use_stmt = USE_STMT (use_p); 550 if (!is_gimple_debug (use_stmt) 551 && loop != loop_containing_stmt (use_stmt)) 552 return true; 553 } 554 555 return false; 556 } 557 558 /* Returns true when STMT defines a scalar variable used after the 559 loop LOOP. */ 560 561 static bool 562 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt) 563 { 564 def_operand_p def_p; 565 ssa_op_iter op_iter; 566 567 if (gimple_code (stmt) == GIMPLE_PHI) 568 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop); 569 570 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) 571 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop)) 572 return true; 573 574 return false; 575 } 576 577 /* Return a copy of LOOP placed before LOOP. */ 578 579 static struct loop * 580 copy_loop_before (struct loop *loop) 581 { 582 struct loop *res; 583 edge preheader = loop_preheader_edge (loop); 584 585 initialize_original_copy_tables (); 586 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); 587 gcc_assert (res != NULL); 588 free_original_copy_tables (); 589 delete_update_ssa (); 590 591 return res; 592 } 593 594 /* Creates an empty basic block after LOOP. */ 595 596 static void 597 create_bb_after_loop (struct loop *loop) 598 { 599 edge exit = single_exit (loop); 600 601 if (!exit) 602 return; 603 604 split_edge (exit); 605 } 606 607 /* Generate code for PARTITION from the code in LOOP. The loop is 608 copied when COPY_P is true. All the statements not flagged in the 609 PARTITION bitmap are removed from the loop or from its copy. The 610 statements are indexed in sequence inside a basic block, and the 611 basic blocks of a loop are taken in dom order. */ 612 613 static void 614 generate_loops_for_partition (struct loop *loop, partition *partition, 615 bool copy_p) 616 { 617 unsigned i; 618 basic_block *bbs; 619 620 if (copy_p) 621 { 622 loop = copy_loop_before (loop); 623 gcc_assert (loop != NULL); 624 create_preheader (loop, CP_SIMPLE_PREHEADERS); 625 create_bb_after_loop (loop); 626 } 627 628 /* Remove stmts not in the PARTITION bitmap. */ 629 bbs = get_loop_body_in_dom_order (loop); 630 631 if (MAY_HAVE_DEBUG_STMTS) 632 for (i = 0; i < loop->num_nodes; i++) 633 { 634 basic_block bb = bbs[i]; 635 636 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); 637 gsi_next (&bsi)) 638 { 639 gphi *phi = bsi.phi (); 640 if (!virtual_operand_p (gimple_phi_result (phi)) 641 && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) 642 reset_debug_uses (phi); 643 } 644 645 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 646 { 647 gimple *stmt = gsi_stmt (bsi); 648 if (gimple_code (stmt) != GIMPLE_LABEL 649 && !is_gimple_debug (stmt) 650 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) 651 reset_debug_uses (stmt); 652 } 653 } 654 655 for (i = 0; i < loop->num_nodes; i++) 656 { 657 basic_block bb = bbs[i]; 658 659 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) 660 { 661 gphi *phi = bsi.phi (); 662 if (!virtual_operand_p (gimple_phi_result (phi)) 663 && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) 664 remove_phi_node (&bsi, true); 665 else 666 gsi_next (&bsi); 667 } 668 669 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) 670 { 671 gimple *stmt = gsi_stmt (bsi); 672 if (gimple_code (stmt) != GIMPLE_LABEL 673 && !is_gimple_debug (stmt) 674 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) 675 { 676 /* Choose an arbitrary path through the empty CFG part 677 that this unnecessary control stmt controls. */ 678 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt)) 679 { 680 gimple_cond_make_false (cond_stmt); 681 update_stmt (stmt); 682 } 683 else if (gimple_code (stmt) == GIMPLE_SWITCH) 684 { 685 gswitch *switch_stmt = as_a <gswitch *> (stmt); 686 gimple_switch_set_index 687 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1))); 688 update_stmt (stmt); 689 } 690 else 691 { 692 unlink_stmt_vdef (stmt); 693 gsi_remove (&bsi, true); 694 release_defs (stmt); 695 continue; 696 } 697 } 698 gsi_next (&bsi); 699 } 700 } 701 702 free (bbs); 703 } 704 705 /* Build the size argument for a memory operation call. */ 706 707 static tree 708 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter, 709 bool plus_one) 710 { 711 tree size = fold_convert_loc (loc, sizetype, nb_iter); 712 if (plus_one) 713 size = size_binop (PLUS_EXPR, size, size_one_node); 714 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size, 715 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 716 size = fold_convert_loc (loc, size_type_node, size); 717 return size; 718 } 719 720 /* Build an address argument for a memory operation call. */ 721 722 static tree 723 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) 724 { 725 tree addr_base; 726 727 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); 728 addr_base = fold_convert_loc (loc, sizetype, addr_base); 729 730 /* Test for a negative stride, iterating over every element. */ 731 if (tree_int_cst_sgn (DR_STEP (dr)) == -1) 732 { 733 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, 734 fold_convert_loc (loc, sizetype, nb_bytes)); 735 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, 736 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 737 } 738 739 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); 740 } 741 742 /* If VAL memory representation contains the same value in all bytes, 743 return that value, otherwise return -1. 744 E.g. for 0x24242424 return 0x24, for IEEE double 745 747708026454360457216.0 return 0x44, etc. */ 746 747 static int 748 const_with_all_bytes_same (tree val) 749 { 750 unsigned char buf[64]; 751 int i, len; 752 753 if (integer_zerop (val) 754 || (TREE_CODE (val) == CONSTRUCTOR 755 && !TREE_CLOBBER_P (val) 756 && CONSTRUCTOR_NELTS (val) == 0)) 757 return 0; 758 759 if (real_zerop (val)) 760 { 761 /* Only return 0 for +0.0, not for -0.0, which doesn't have 762 an all bytes same memory representation. Don't transform 763 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */ 764 switch (TREE_CODE (val)) 765 { 766 case REAL_CST: 767 if (!real_isneg (TREE_REAL_CST_PTR (val))) 768 return 0; 769 break; 770 case COMPLEX_CST: 771 if (!const_with_all_bytes_same (TREE_REALPART (val)) 772 && !const_with_all_bytes_same (TREE_IMAGPART (val))) 773 return 0; 774 break; 775 case VECTOR_CST: 776 unsigned int j; 777 for (j = 0; j < VECTOR_CST_NELTS (val); ++j) 778 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j))) 779 break; 780 if (j == VECTOR_CST_NELTS (val)) 781 return 0; 782 break; 783 default: 784 break; 785 } 786 } 787 788 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) 789 return -1; 790 791 len = native_encode_expr (val, buf, sizeof (buf)); 792 if (len == 0) 793 return -1; 794 for (i = 1; i < len; i++) 795 if (buf[i] != buf[0]) 796 return -1; 797 return buf[0]; 798 } 799 800 /* Generate a call to memset for PARTITION in LOOP. */ 801 802 static void 803 generate_memset_builtin (struct loop *loop, partition *partition) 804 { 805 gimple_stmt_iterator gsi; 806 gimple *stmt, *fn_call; 807 tree mem, fn, nb_bytes; 808 location_t loc; 809 tree val; 810 811 stmt = DR_STMT (partition->main_dr); 812 loc = gimple_location (stmt); 813 814 /* The new statements will be placed before LOOP. */ 815 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 816 817 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, 818 partition->plus_one); 819 nb_bytes = rewrite_to_non_trapping_overflow (nb_bytes); 820 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 821 false, GSI_CONTINUE_LINKING); 822 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); 823 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, 824 false, GSI_CONTINUE_LINKING); 825 826 /* This exactly matches the pattern recognition in classify_partition. */ 827 val = gimple_assign_rhs1 (stmt); 828 /* Handle constants like 0x15151515 and similarly 829 floating point constants etc. where all bytes are the same. */ 830 int bytev = const_with_all_bytes_same (val); 831 if (bytev != -1) 832 val = build_int_cst (integer_type_node, bytev); 833 else if (TREE_CODE (val) == INTEGER_CST) 834 val = fold_convert (integer_type_node, val); 835 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) 836 { 837 tree tem = make_ssa_name (integer_type_node); 838 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val); 839 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); 840 val = tem; 841 } 842 843 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); 844 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); 845 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 846 847 if (dump_file && (dump_flags & TDF_DETAILS)) 848 { 849 fprintf (dump_file, "generated memset"); 850 if (bytev == 0) 851 fprintf (dump_file, " zero\n"); 852 else 853 fprintf (dump_file, "\n"); 854 } 855 } 856 857 /* Generate a call to memcpy for PARTITION in LOOP. */ 858 859 static void 860 generate_memcpy_builtin (struct loop *loop, partition *partition) 861 { 862 gimple_stmt_iterator gsi; 863 gimple *stmt, *fn_call; 864 tree dest, src, fn, nb_bytes; 865 location_t loc; 866 enum built_in_function kind; 867 868 stmt = DR_STMT (partition->main_dr); 869 loc = gimple_location (stmt); 870 871 /* The new statements will be placed before LOOP. */ 872 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 873 874 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, 875 partition->plus_one); 876 nb_bytes = rewrite_to_non_trapping_overflow (nb_bytes); 877 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 878 false, GSI_CONTINUE_LINKING); 879 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); 880 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); 881 if (partition->kind == PKIND_MEMCPY 882 || ! ptr_derefs_may_alias_p (dest, src)) 883 kind = BUILT_IN_MEMCPY; 884 else 885 kind = BUILT_IN_MEMMOVE; 886 887 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, 888 false, GSI_CONTINUE_LINKING); 889 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, 890 false, GSI_CONTINUE_LINKING); 891 fn = build_fold_addr_expr (builtin_decl_implicit (kind)); 892 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); 893 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 894 895 if (dump_file && (dump_flags & TDF_DETAILS)) 896 { 897 if (kind == BUILT_IN_MEMCPY) 898 fprintf (dump_file, "generated memcpy\n"); 899 else 900 fprintf (dump_file, "generated memmove\n"); 901 } 902 } 903 904 /* Remove and destroy the loop LOOP. */ 905 906 static void 907 destroy_loop (struct loop *loop) 908 { 909 unsigned nbbs = loop->num_nodes; 910 edge exit = single_exit (loop); 911 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; 912 basic_block *bbs; 913 unsigned i; 914 915 bbs = get_loop_body_in_dom_order (loop); 916 917 redirect_edge_pred (exit, src); 918 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); 919 exit->flags |= EDGE_FALLTHRU; 920 cancel_loop_tree (loop); 921 rescan_loop_exit (exit, false, true); 922 923 i = nbbs; 924 do 925 { 926 /* We have made sure to not leave any dangling uses of SSA 927 names defined in the loop. With the exception of virtuals. 928 Make sure we replace all uses of virtual defs that will remain 929 outside of the loop with the bare symbol as delete_basic_block 930 will release them. */ 931 --i; 932 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); 933 gsi_next (&gsi)) 934 { 935 gphi *phi = gsi.phi (); 936 if (virtual_operand_p (gimple_phi_result (phi))) 937 mark_virtual_phi_result_for_renaming (phi); 938 } 939 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); 940 gsi_next (&gsi)) 941 { 942 gimple *stmt = gsi_stmt (gsi); 943 tree vdef = gimple_vdef (stmt); 944 if (vdef && TREE_CODE (vdef) == SSA_NAME) 945 mark_virtual_operand_for_renaming (vdef); 946 } 947 delete_basic_block (bbs[i]); 948 } 949 while (i != 0); 950 951 free (bbs); 952 953 set_immediate_dominator (CDI_DOMINATORS, dest, 954 recompute_dominator (CDI_DOMINATORS, dest)); 955 } 956 957 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */ 958 959 static bool 960 generate_code_for_partition (struct loop *loop, 961 partition *partition, bool copy_p) 962 { 963 switch (partition->kind) 964 { 965 case PKIND_NORMAL: 966 /* Reductions all have to be in the last partition. */ 967 gcc_assert (!partition_reduction_p (partition) 968 || !copy_p); 969 generate_loops_for_partition (loop, partition, copy_p); 970 return false; 971 972 case PKIND_MEMSET: 973 generate_memset_builtin (loop, partition); 974 break; 975 976 case PKIND_MEMCPY: 977 case PKIND_MEMMOVE: 978 generate_memcpy_builtin (loop, partition); 979 break; 980 981 default: 982 gcc_unreachable (); 983 } 984 985 /* Common tail for partitions we turn into a call. If this was the last 986 partition for which we generate code, we have to destroy the loop. */ 987 if (!copy_p) 988 return true; 989 return false; 990 } 991 992 993 /* Returns a partition with all the statements needed for computing 994 the vertex V of the RDG, also including the loop exit conditions. */ 995 996 static partition * 997 build_rdg_partition_for_vertex (struct graph *rdg, int v) 998 { 999 partition *partition = partition_alloc (NULL, NULL); 1000 auto_vec<int, 3> nodes; 1001 unsigned i; 1002 int x; 1003 1004 graphds_dfs (rdg, &v, 1, &nodes, false, NULL); 1005 1006 FOR_EACH_VEC_ELT (nodes, i, x) 1007 { 1008 bitmap_set_bit (partition->stmts, x); 1009 bitmap_set_bit (partition->loops, 1010 loop_containing_stmt (RDG_STMT (rdg, x))->num); 1011 } 1012 1013 return partition; 1014 } 1015 1016 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. 1017 For the moment we detect only the memset zero pattern. */ 1018 1019 static void 1020 classify_partition (loop_p loop, struct graph *rdg, partition *partition) 1021 { 1022 bitmap_iterator bi; 1023 unsigned i; 1024 tree nb_iter; 1025 data_reference_p single_load, single_store; 1026 bool volatiles_p = false; 1027 bool plus_one = false; 1028 1029 partition->kind = PKIND_NORMAL; 1030 partition->main_dr = NULL; 1031 partition->secondary_dr = NULL; 1032 partition->niter = NULL_TREE; 1033 partition->plus_one = false; 1034 1035 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) 1036 { 1037 gimple *stmt = RDG_STMT (rdg, i); 1038 1039 if (gimple_has_volatile_ops (stmt)) 1040 volatiles_p = true; 1041 1042 /* If the stmt has uses outside of the loop mark it as reduction. */ 1043 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) 1044 { 1045 partition->reduction_p = true; 1046 return; 1047 } 1048 } 1049 1050 /* Perform general partition disqualification for builtins. */ 1051 if (volatiles_p 1052 || !flag_tree_loop_distribute_patterns) 1053 return; 1054 1055 /* Detect memset and memcpy. */ 1056 single_load = NULL; 1057 single_store = NULL; 1058 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) 1059 { 1060 gimple *stmt = RDG_STMT (rdg, i); 1061 data_reference_p dr; 1062 unsigned j; 1063 1064 if (gimple_code (stmt) == GIMPLE_PHI) 1065 continue; 1066 1067 /* Any scalar stmts are ok. */ 1068 if (!gimple_vuse (stmt)) 1069 continue; 1070 1071 /* Otherwise just regular loads/stores. */ 1072 if (!gimple_assign_single_p (stmt)) 1073 return; 1074 1075 /* But exactly one store and/or load. */ 1076 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) 1077 { 1078 tree type = TREE_TYPE (DR_REF (dr)); 1079 1080 /* The memset, memcpy and memmove library calls are only 1081 able to deal with generic address space. */ 1082 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type))) 1083 return; 1084 1085 if (DR_IS_READ (dr)) 1086 { 1087 if (single_load != NULL) 1088 return; 1089 single_load = dr; 1090 } 1091 else 1092 { 1093 if (single_store != NULL) 1094 return; 1095 single_store = dr; 1096 } 1097 } 1098 } 1099 1100 if (!single_store) 1101 return; 1102 1103 nb_iter = number_of_latch_executions (loop); 1104 if (!nb_iter || nb_iter == chrec_dont_know) 1105 return; 1106 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, 1107 gimple_bb (DR_STMT (single_store)))) 1108 plus_one = true; 1109 1110 if (single_store && !single_load) 1111 { 1112 gimple *stmt = DR_STMT (single_store); 1113 tree rhs = gimple_assign_rhs1 (stmt); 1114 if (const_with_all_bytes_same (rhs) == -1 1115 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) 1116 || (TYPE_MODE (TREE_TYPE (rhs)) 1117 != TYPE_MODE (unsigned_char_type_node)))) 1118 return; 1119 if (TREE_CODE (rhs) == SSA_NAME 1120 && !SSA_NAME_IS_DEFAULT_DEF (rhs) 1121 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) 1122 return; 1123 if (!adjacent_dr_p (single_store) 1124 || !dominated_by_p (CDI_DOMINATORS, 1125 loop->latch, gimple_bb (stmt))) 1126 return; 1127 partition->kind = PKIND_MEMSET; 1128 partition->main_dr = single_store; 1129 partition->niter = nb_iter; 1130 partition->plus_one = plus_one; 1131 } 1132 else if (single_store && single_load) 1133 { 1134 gimple *store = DR_STMT (single_store); 1135 gimple *load = DR_STMT (single_load); 1136 /* Direct aggregate copy or via an SSA name temporary. */ 1137 if (load != store 1138 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) 1139 return; 1140 if (!adjacent_dr_p (single_store) 1141 || !adjacent_dr_p (single_load) 1142 || !operand_equal_p (DR_STEP (single_store), 1143 DR_STEP (single_load), 0) 1144 || !dominated_by_p (CDI_DOMINATORS, 1145 loop->latch, gimple_bb (store))) 1146 return; 1147 /* Now check that if there is a dependence this dependence is 1148 of a suitable form for memmove. */ 1149 vec<loop_p> loops = vNULL; 1150 ddr_p ddr; 1151 loops.safe_push (loop); 1152 ddr = initialize_data_dependence_relation (single_load, single_store, 1153 loops); 1154 compute_affine_dependence (ddr, loop); 1155 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1156 { 1157 free_dependence_relation (ddr); 1158 loops.release (); 1159 return; 1160 } 1161 if (DDR_ARE_DEPENDENT (ddr) != chrec_known) 1162 { 1163 if (DDR_NUM_DIST_VECTS (ddr) == 0) 1164 { 1165 free_dependence_relation (ddr); 1166 loops.release (); 1167 return; 1168 } 1169 lambda_vector dist_v; 1170 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 1171 { 1172 int dist = dist_v[index_in_loop_nest (loop->num, 1173 DDR_LOOP_NEST (ddr))]; 1174 if (dist > 0 && !DDR_REVERSED_P (ddr)) 1175 { 1176 free_dependence_relation (ddr); 1177 loops.release (); 1178 return; 1179 } 1180 } 1181 partition->kind = PKIND_MEMMOVE; 1182 } 1183 else 1184 partition->kind = PKIND_MEMCPY; 1185 free_dependence_relation (ddr); 1186 loops.release (); 1187 partition->main_dr = single_store; 1188 partition->secondary_dr = single_load; 1189 partition->niter = nb_iter; 1190 partition->plus_one = plus_one; 1191 } 1192 } 1193 1194 /* For a data reference REF, return the declaration of its base 1195 address or NULL_TREE if the base is not determined. */ 1196 1197 static tree 1198 ref_base_address (data_reference_p dr) 1199 { 1200 tree base_address = DR_BASE_ADDRESS (dr); 1201 if (base_address 1202 && TREE_CODE (base_address) == ADDR_EXPR) 1203 return TREE_OPERAND (base_address, 0); 1204 1205 return base_address; 1206 } 1207 1208 /* Returns true when PARTITION1 and PARTITION2 have similar memory 1209 accesses in RDG. */ 1210 1211 static bool 1212 similar_memory_accesses (struct graph *rdg, partition *partition1, 1213 partition *partition2) 1214 { 1215 unsigned i, j, k, l; 1216 bitmap_iterator bi, bj; 1217 data_reference_p ref1, ref2; 1218 1219 /* First check whether in the intersection of the two partitions are 1220 any loads or stores. Common loads are the situation that happens 1221 most often. */ 1222 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi) 1223 if (RDG_MEM_WRITE_STMT (rdg, i) 1224 || RDG_MEM_READS_STMT (rdg, i)) 1225 return true; 1226 1227 /* Then check all data-references against each other. */ 1228 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi) 1229 if (RDG_MEM_WRITE_STMT (rdg, i) 1230 || RDG_MEM_READS_STMT (rdg, i)) 1231 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj) 1232 if (RDG_MEM_WRITE_STMT (rdg, j) 1233 || RDG_MEM_READS_STMT (rdg, j)) 1234 { 1235 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1) 1236 { 1237 tree base1 = ref_base_address (ref1); 1238 if (base1) 1239 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2) 1240 if (base1 == ref_base_address (ref2)) 1241 return true; 1242 } 1243 } 1244 1245 return false; 1246 } 1247 1248 /* Aggregate several components into a useful partition that is 1249 registered in the PARTITIONS vector. Partitions will be 1250 distributed in different loops. */ 1251 1252 static void 1253 rdg_build_partitions (struct graph *rdg, 1254 vec<gimple *> starting_stmts, 1255 vec<partition *> *partitions) 1256 { 1257 bitmap processed = BITMAP_ALLOC (NULL); 1258 int i; 1259 gimple *stmt; 1260 1261 FOR_EACH_VEC_ELT (starting_stmts, i, stmt) 1262 { 1263 int v = rdg_vertex_for_stmt (rdg, stmt); 1264 1265 if (dump_file && (dump_flags & TDF_DETAILS)) 1266 fprintf (dump_file, 1267 "ldist asked to generate code for vertex %d\n", v); 1268 1269 /* If the vertex is already contained in another partition so 1270 is the partition rooted at it. */ 1271 if (bitmap_bit_p (processed, v)) 1272 continue; 1273 1274 partition *partition = build_rdg_partition_for_vertex (rdg, v); 1275 bitmap_ior_into (processed, partition->stmts); 1276 1277 if (dump_file && (dump_flags & TDF_DETAILS)) 1278 { 1279 fprintf (dump_file, "ldist useful partition:\n"); 1280 dump_bitmap (dump_file, partition->stmts); 1281 } 1282 1283 partitions->safe_push (partition); 1284 } 1285 1286 /* All vertices should have been assigned to at least one partition now, 1287 other than vertices belonging to dead code. */ 1288 1289 BITMAP_FREE (processed); 1290 } 1291 1292 /* Dump to FILE the PARTITIONS. */ 1293 1294 static void 1295 dump_rdg_partitions (FILE *file, vec<partition *> partitions) 1296 { 1297 int i; 1298 partition *partition; 1299 1300 FOR_EACH_VEC_ELT (partitions, i, partition) 1301 debug_bitmap_file (file, partition->stmts); 1302 } 1303 1304 /* Debug PARTITIONS. */ 1305 extern void debug_rdg_partitions (vec<partition *> ); 1306 1307 DEBUG_FUNCTION void 1308 debug_rdg_partitions (vec<partition *> partitions) 1309 { 1310 dump_rdg_partitions (stderr, partitions); 1311 } 1312 1313 /* Returns the number of read and write operations in the RDG. */ 1314 1315 static int 1316 number_of_rw_in_rdg (struct graph *rdg) 1317 { 1318 int i, res = 0; 1319 1320 for (i = 0; i < rdg->n_vertices; i++) 1321 { 1322 if (RDG_MEM_WRITE_STMT (rdg, i)) 1323 ++res; 1324 1325 if (RDG_MEM_READS_STMT (rdg, i)) 1326 ++res; 1327 } 1328 1329 return res; 1330 } 1331 1332 /* Returns the number of read and write operations in a PARTITION of 1333 the RDG. */ 1334 1335 static int 1336 number_of_rw_in_partition (struct graph *rdg, partition *partition) 1337 { 1338 int res = 0; 1339 unsigned i; 1340 bitmap_iterator ii; 1341 1342 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii) 1343 { 1344 if (RDG_MEM_WRITE_STMT (rdg, i)) 1345 ++res; 1346 1347 if (RDG_MEM_READS_STMT (rdg, i)) 1348 ++res; 1349 } 1350 1351 return res; 1352 } 1353 1354 /* Returns true when one of the PARTITIONS contains all the read or 1355 write operations of RDG. */ 1356 1357 static bool 1358 partition_contains_all_rw (struct graph *rdg, 1359 vec<partition *> partitions) 1360 { 1361 int i; 1362 partition *partition; 1363 int nrw = number_of_rw_in_rdg (rdg); 1364 1365 FOR_EACH_VEC_ELT (partitions, i, partition) 1366 if (nrw == number_of_rw_in_partition (rdg, partition)) 1367 return true; 1368 1369 return false; 1370 } 1371 1372 /* Compute partition dependence created by the data references in DRS1 1373 and DRS2 and modify and return DIR according to that. */ 1374 1375 static int 1376 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir, 1377 vec<data_reference_p> drs1, 1378 vec<data_reference_p> drs2) 1379 { 1380 data_reference_p dr1, dr2; 1381 1382 /* dependence direction - 0 is no dependence, -1 is back, 1383 1 is forth, 2 is both (we can stop then, merging will occur). */ 1384 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii) 1385 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj) 1386 { 1387 data_reference_p saved_dr1 = dr1; 1388 int this_dir = 1; 1389 ddr_p ddr; 1390 /* Re-shuffle data-refs to be in dominator order. */ 1391 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) 1392 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) 1393 { 1394 std::swap (dr1, dr2); 1395 this_dir = -this_dir; 1396 } 1397 ddr = initialize_data_dependence_relation (dr1, dr2, loops); 1398 compute_affine_dependence (ddr, loops[0]); 1399 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1400 this_dir = 2; 1401 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 1402 { 1403 if (DDR_REVERSED_P (ddr)) 1404 { 1405 std::swap (dr1, dr2); 1406 this_dir = -this_dir; 1407 } 1408 /* Known dependences can still be unordered througout the 1409 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ 1410 if (DDR_NUM_DIST_VECTS (ddr) != 1) 1411 this_dir = 2; 1412 /* If the overlap is exact preserve stmt order. */ 1413 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1)) 1414 ; 1415 else 1416 { 1417 /* Else as the distance vector is lexicographic positive 1418 swap the dependence direction. */ 1419 this_dir = -this_dir; 1420 } 1421 } 1422 else 1423 this_dir = 0; 1424 free_dependence_relation (ddr); 1425 if (this_dir == 2) 1426 return 2; 1427 else if (dir == 0) 1428 dir = this_dir; 1429 else if (this_dir != 0 && dir != this_dir) 1430 return 2; 1431 /* Shuffle "back" dr1. */ 1432 dr1 = saved_dr1; 1433 } 1434 return dir; 1435 } 1436 1437 /* Compare postorder number of the partition graph vertices V1 and V2. */ 1438 1439 static int 1440 pgcmp (const void *v1_, const void *v2_) 1441 { 1442 const vertex *v1 = (const vertex *)v1_; 1443 const vertex *v2 = (const vertex *)v2_; 1444 return v2->post - v1->post; 1445 } 1446 1447 /* Distributes the code from LOOP in such a way that producer 1448 statements are placed before consumer statements. Tries to separate 1449 only the statements from STMTS into separate loops. 1450 Returns the number of distributed loops. Set *DESTROY_P to whether 1451 LOOP needs to be destroyed. */ 1452 1453 static int 1454 distribute_loop (struct loop *loop, vec<gimple *> stmts, 1455 control_dependences *cd, int *nb_calls, bool *destroy_p) 1456 { 1457 struct graph *rdg; 1458 partition *partition; 1459 bool any_builtin; 1460 int i, nbp; 1461 graph *pg = NULL; 1462 int num_sccs = 1; 1463 1464 *destroy_p = false; 1465 *nb_calls = 0; 1466 auto_vec<loop_p, 3> loop_nest; 1467 if (!find_loop_nest (loop, &loop_nest)) 1468 return 0; 1469 1470 rdg = build_rdg (loop_nest, cd); 1471 if (!rdg) 1472 { 1473 if (dump_file && (dump_flags & TDF_DETAILS)) 1474 fprintf (dump_file, 1475 "Loop %d not distributed: failed to build the RDG.\n", 1476 loop->num); 1477 1478 return 0; 1479 } 1480 1481 if (dump_file && (dump_flags & TDF_DETAILS)) 1482 dump_rdg (dump_file, rdg); 1483 1484 auto_vec<struct partition *, 3> partitions; 1485 rdg_build_partitions (rdg, stmts, &partitions); 1486 1487 any_builtin = false; 1488 FOR_EACH_VEC_ELT (partitions, i, partition) 1489 { 1490 classify_partition (loop, rdg, partition); 1491 any_builtin |= partition_builtin_p (partition); 1492 } 1493 1494 /* If we are only distributing patterns but did not detect any, 1495 simply bail out. */ 1496 if (!flag_tree_loop_distribution 1497 && !any_builtin) 1498 { 1499 nbp = 0; 1500 goto ldist_done; 1501 } 1502 1503 /* If we are only distributing patterns fuse all partitions that 1504 were not classified as builtins. This also avoids chopping 1505 a loop into pieces, separated by builtin calls. That is, we 1506 only want no or a single loop body remaining. */ 1507 struct partition *into; 1508 if (!flag_tree_loop_distribution) 1509 { 1510 for (i = 0; partitions.iterate (i, &into); ++i) 1511 if (!partition_builtin_p (into)) 1512 break; 1513 for (++i; partitions.iterate (i, &partition); ++i) 1514 if (!partition_builtin_p (partition)) 1515 { 1516 if (dump_file && (dump_flags & TDF_DETAILS)) 1517 { 1518 fprintf (dump_file, "fusing non-builtin partitions\n"); 1519 dump_bitmap (dump_file, into->stmts); 1520 dump_bitmap (dump_file, partition->stmts); 1521 } 1522 partition_merge_into (into, partition); 1523 partitions.unordered_remove (i); 1524 partition_free (partition); 1525 i--; 1526 } 1527 } 1528 1529 /* Due to limitations in the transform phase we have to fuse all 1530 reduction partitions into the last partition so the existing 1531 loop will contain all loop-closed PHI nodes. */ 1532 for (i = 0; partitions.iterate (i, &into); ++i) 1533 if (partition_reduction_p (into)) 1534 break; 1535 for (i = i + 1; partitions.iterate (i, &partition); ++i) 1536 if (partition_reduction_p (partition)) 1537 { 1538 if (dump_file && (dump_flags & TDF_DETAILS)) 1539 { 1540 fprintf (dump_file, "fusing partitions\n"); 1541 dump_bitmap (dump_file, into->stmts); 1542 dump_bitmap (dump_file, partition->stmts); 1543 fprintf (dump_file, "because they have reductions\n"); 1544 } 1545 partition_merge_into (into, partition); 1546 partitions.unordered_remove (i); 1547 partition_free (partition); 1548 i--; 1549 } 1550 1551 /* Apply our simple cost model - fuse partitions with similar 1552 memory accesses. */ 1553 for (i = 0; partitions.iterate (i, &into); ++i) 1554 { 1555 bool changed = false; 1556 if (partition_builtin_p (into)) 1557 continue; 1558 for (int j = i + 1; 1559 partitions.iterate (j, &partition); ++j) 1560 { 1561 if (similar_memory_accesses (rdg, into, partition)) 1562 { 1563 if (dump_file && (dump_flags & TDF_DETAILS)) 1564 { 1565 fprintf (dump_file, "fusing partitions\n"); 1566 dump_bitmap (dump_file, into->stmts); 1567 dump_bitmap (dump_file, partition->stmts); 1568 fprintf (dump_file, "because they have similar " 1569 "memory accesses\n"); 1570 } 1571 partition_merge_into (into, partition); 1572 partitions.unordered_remove (j); 1573 partition_free (partition); 1574 j--; 1575 changed = true; 1576 } 1577 } 1578 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar 1579 accesses when 1 and 2 have similar accesses but not 0 and 1 1580 then in the next iteration we will fail to consider merging 1581 1 into 0,2. So try again if we did any merging into 0. */ 1582 if (changed) 1583 i--; 1584 } 1585 1586 /* Build the partition dependency graph. */ 1587 if (partitions.length () > 1) 1588 { 1589 pg = new_graph (partitions.length ()); 1590 struct pgdata { 1591 struct partition *partition; 1592 vec<data_reference_p> writes; 1593 vec<data_reference_p> reads; 1594 }; 1595 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data)) 1596 for (i = 0; partitions.iterate (i, &partition); ++i) 1597 { 1598 vertex *v = &pg->vertices[i]; 1599 pgdata *data = new pgdata; 1600 data_reference_p dr; 1601 /* FIXME - leaks. */ 1602 v->data = data; 1603 bitmap_iterator bi; 1604 unsigned j; 1605 data->partition = partition; 1606 data->reads = vNULL; 1607 data->writes = vNULL; 1608 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi) 1609 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k) 1610 if (DR_IS_READ (dr)) 1611 data->reads.safe_push (dr); 1612 else 1613 data->writes.safe_push (dr); 1614 } 1615 struct partition *partition1, *partition2; 1616 for (i = 0; partitions.iterate (i, &partition1); ++i) 1617 for (int j = i + 1; partitions.iterate (j, &partition2); ++j) 1618 { 1619 /* dependence direction - 0 is no dependence, -1 is back, 1620 1 is forth, 2 is both (we can stop then, merging will occur). */ 1621 int dir = 0; 1622 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1623 PGDATA(i)->writes, 1624 PGDATA(j)->reads); 1625 if (dir != 2) 1626 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1627 PGDATA(i)->reads, 1628 PGDATA(j)->writes); 1629 if (dir != 2) 1630 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1631 PGDATA(i)->writes, 1632 PGDATA(j)->writes); 1633 if (dir == 1 || dir == 2) 1634 add_edge (pg, i, j); 1635 if (dir == -1 || dir == 2) 1636 add_edge (pg, j, i); 1637 } 1638 1639 /* Add edges to the reduction partition (if any) to force it last. */ 1640 unsigned j; 1641 for (j = 0; partitions.iterate (j, &partition); ++j) 1642 if (partition_reduction_p (partition)) 1643 break; 1644 if (j < partitions.length ()) 1645 { 1646 for (unsigned i = 0; partitions.iterate (i, &partition); ++i) 1647 if (i != j) 1648 add_edge (pg, i, j); 1649 } 1650 1651 /* Compute partitions we cannot separate and fuse them. */ 1652 num_sccs = graphds_scc (pg, NULL); 1653 for (i = 0; i < num_sccs; ++i) 1654 { 1655 struct partition *first; 1656 int j; 1657 for (j = 0; partitions.iterate (j, &first); ++j) 1658 if (pg->vertices[j].component == i) 1659 break; 1660 for (j = j + 1; partitions.iterate (j, &partition); ++j) 1661 if (pg->vertices[j].component == i) 1662 { 1663 if (dump_file && (dump_flags & TDF_DETAILS)) 1664 { 1665 fprintf (dump_file, "fusing partitions\n"); 1666 dump_bitmap (dump_file, first->stmts); 1667 dump_bitmap (dump_file, partition->stmts); 1668 fprintf (dump_file, "because they are in the same " 1669 "dependence SCC\n"); 1670 } 1671 partition_merge_into (first, partition); 1672 partitions[j] = NULL; 1673 partition_free (partition); 1674 PGDATA (j)->partition = NULL; 1675 } 1676 } 1677 1678 /* Now order the remaining nodes in postorder. */ 1679 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp); 1680 partitions.truncate (0); 1681 for (i = 0; i < pg->n_vertices; ++i) 1682 { 1683 pgdata *data = PGDATA (i); 1684 if (data->partition) 1685 partitions.safe_push (data->partition); 1686 data->reads.release (); 1687 data->writes.release (); 1688 delete data; 1689 } 1690 gcc_assert (partitions.length () == (unsigned)num_sccs); 1691 free_graph (pg); 1692 } 1693 1694 nbp = partitions.length (); 1695 if (nbp == 0 1696 || (nbp == 1 && !partition_builtin_p (partitions[0])) 1697 || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) 1698 { 1699 nbp = 0; 1700 goto ldist_done; 1701 } 1702 1703 if (dump_file && (dump_flags & TDF_DETAILS)) 1704 dump_rdg_partitions (dump_file, partitions); 1705 1706 FOR_EACH_VEC_ELT (partitions, i, partition) 1707 { 1708 if (partition_builtin_p (partition)) 1709 (*nb_calls)++; 1710 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1); 1711 } 1712 1713 ldist_done: 1714 1715 FOR_EACH_VEC_ELT (partitions, i, partition) 1716 partition_free (partition); 1717 1718 free_rdg (rdg); 1719 return nbp - *nb_calls; 1720 } 1721 1722 /* Distribute all loops in the current function. */ 1723 1724 namespace { 1725 1726 const pass_data pass_data_loop_distribution = 1727 { 1728 GIMPLE_PASS, /* type */ 1729 "ldist", /* name */ 1730 OPTGROUP_LOOP, /* optinfo_flags */ 1731 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ 1732 ( PROP_cfg | PROP_ssa ), /* properties_required */ 1733 0, /* properties_provided */ 1734 0, /* properties_destroyed */ 1735 0, /* todo_flags_start */ 1736 0, /* todo_flags_finish */ 1737 }; 1738 1739 class pass_loop_distribution : public gimple_opt_pass 1740 { 1741 public: 1742 pass_loop_distribution (gcc::context *ctxt) 1743 : gimple_opt_pass (pass_data_loop_distribution, ctxt) 1744 {} 1745 1746 /* opt_pass methods: */ 1747 virtual bool gate (function *) 1748 { 1749 return flag_tree_loop_distribution 1750 || flag_tree_loop_distribute_patterns; 1751 } 1752 1753 virtual unsigned int execute (function *); 1754 1755 }; // class pass_loop_distribution 1756 1757 unsigned int 1758 pass_loop_distribution::execute (function *fun) 1759 { 1760 struct loop *loop; 1761 bool changed = false; 1762 basic_block bb; 1763 control_dependences *cd = NULL; 1764 auto_vec<loop_p> loops_to_be_destroyed; 1765 1766 FOR_ALL_BB_FN (bb, fun) 1767 { 1768 gimple_stmt_iterator gsi; 1769 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1770 gimple_set_uid (gsi_stmt (gsi), -1); 1771 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1772 gimple_set_uid (gsi_stmt (gsi), -1); 1773 } 1774 1775 /* We can at the moment only distribute non-nested loops, thus restrict 1776 walking to innermost loops. */ 1777 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) 1778 { 1779 auto_vec<gimple *> work_list; 1780 basic_block *bbs; 1781 int num = loop->num; 1782 unsigned int i; 1783 1784 /* If the loop doesn't have a single exit we will fail anyway, 1785 so do that early. */ 1786 if (!single_exit (loop)) 1787 continue; 1788 1789 /* Only optimize hot loops. */ 1790 if (!optimize_loop_for_speed_p (loop)) 1791 continue; 1792 1793 /* Initialize the worklist with stmts we seed the partitions with. */ 1794 bbs = get_loop_body_in_dom_order (loop); 1795 for (i = 0; i < loop->num_nodes; ++i) 1796 { 1797 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); 1798 !gsi_end_p (gsi); 1799 gsi_next (&gsi)) 1800 { 1801 gphi *phi = gsi.phi (); 1802 if (virtual_operand_p (gimple_phi_result (phi))) 1803 continue; 1804 /* Distribute stmts which have defs that are used outside of 1805 the loop. */ 1806 if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) 1807 continue; 1808 work_list.safe_push (phi); 1809 } 1810 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); 1811 !gsi_end_p (gsi); 1812 gsi_next (&gsi)) 1813 { 1814 gimple *stmt = gsi_stmt (gsi); 1815 1816 /* If there is a stmt with side-effects bail out - we 1817 cannot and should not distribute this loop. */ 1818 if (gimple_has_side_effects (stmt)) 1819 { 1820 work_list.truncate (0); 1821 goto out; 1822 } 1823 1824 /* Distribute stmts which have defs that are used outside of 1825 the loop. */ 1826 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) 1827 ; 1828 /* Otherwise only distribute stores for now. */ 1829 else if (!gimple_vdef (stmt)) 1830 continue; 1831 1832 work_list.safe_push (stmt); 1833 } 1834 } 1835 out: 1836 free (bbs); 1837 1838 int nb_generated_loops = 0; 1839 int nb_generated_calls = 0; 1840 location_t loc = find_loop_location (loop); 1841 if (work_list.length () > 0) 1842 { 1843 if (!cd) 1844 { 1845 calculate_dominance_info (CDI_DOMINATORS); 1846 calculate_dominance_info (CDI_POST_DOMINATORS); 1847 cd = new control_dependences (); 1848 free_dominance_info (CDI_POST_DOMINATORS); 1849 } 1850 bool destroy_p; 1851 nb_generated_loops = distribute_loop (loop, work_list, cd, 1852 &nb_generated_calls, 1853 &destroy_p); 1854 if (destroy_p) 1855 loops_to_be_destroyed.safe_push (loop); 1856 } 1857 1858 if (nb_generated_loops + nb_generated_calls > 0) 1859 { 1860 changed = true; 1861 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, 1862 loc, "Loop %d distributed: split to %d loops " 1863 "and %d library calls.\n", 1864 num, nb_generated_loops, nb_generated_calls); 1865 } 1866 else if (dump_file && (dump_flags & TDF_DETAILS)) 1867 fprintf (dump_file, "Loop %d is the same.\n", num); 1868 } 1869 1870 if (cd) 1871 delete cd; 1872 1873 if (changed) 1874 { 1875 /* Destroy loop bodies that could not be reused. Do this late as we 1876 otherwise can end up refering to stale data in control dependences. */ 1877 unsigned i; 1878 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop) 1879 destroy_loop (loop); 1880 1881 /* Cached scalar evolutions now may refer to wrong or non-existing 1882 loops. */ 1883 scev_reset_htab (); 1884 mark_virtual_operands_for_renaming (fun); 1885 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); 1886 } 1887 1888 checking_verify_loop_structure (); 1889 1890 return 0; 1891 } 1892 1893 } // anon namespace 1894 1895 gimple_opt_pass * 1896 make_pass_loop_distribution (gcc::context *ctxt) 1897 { 1898 return new pass_loop_distribution (ctxt); 1899 } 1900