1 /* Loop distribution. 2 Copyright (C) 2006-2017 Free Software Foundation, Inc. 3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> 4 and Sebastian Pop <sebastian.pop@amd.com>. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 3, or (at your option) any 11 later version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 /* This pass performs loop distribution: for example, the loop 23 24 |DO I = 2, N 25 | A(I) = B(I) + C 26 | D(I) = A(I-1)*E 27 |ENDDO 28 29 is transformed to 30 31 |DOALL I = 2, N 32 | A(I) = B(I) + C 33 |ENDDO 34 | 35 |DOALL I = 2, N 36 | D(I) = A(I-1)*E 37 |ENDDO 38 39 This pass uses an RDG, Reduced Dependence Graph built on top of the 40 data dependence relations. The RDG is then topologically sorted to 41 obtain a map of information producers/consumers based on which it 42 generates the new loops. */ 43 44 #include "config.h" 45 #include "system.h" 46 #include "coretypes.h" 47 #include "backend.h" 48 #include "tree.h" 49 #include "gimple.h" 50 #include "cfghooks.h" 51 #include "tree-pass.h" 52 #include "ssa.h" 53 #include "gimple-pretty-print.h" 54 #include "fold-const.h" 55 #include "cfganal.h" 56 #include "gimple-iterator.h" 57 #include "gimplify-me.h" 58 #include "stor-layout.h" 59 #include "tree-cfg.h" 60 #include "tree-ssa-loop-manip.h" 61 #include "tree-ssa-loop.h" 62 #include "tree-into-ssa.h" 63 #include "tree-ssa.h" 64 #include "cfgloop.h" 65 #include "tree-scalar-evolution.h" 66 #include "tree-vectorizer.h" 67 68 69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ 70 struct rdg_vertex 71 { 72 /* The statement represented by this vertex. */ 73 gimple *stmt; 74 75 /* Vector of data-references in this statement. */ 76 vec<data_reference_p> datarefs; 77 78 /* True when the statement contains a write to memory. */ 79 bool has_mem_write; 80 81 /* True when the statement contains a read from memory. */ 82 bool has_mem_reads; 83 }; 84 85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt 86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs 87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write 88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads 89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I])) 90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I])) 91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) 92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) 93 94 /* Data dependence type. */ 95 96 enum rdg_dep_type 97 { 98 /* Read After Write (RAW). */ 99 flow_dd = 'f', 100 101 /* Control dependence (execute conditional on). */ 102 control_dd = 'c' 103 }; 104 105 /* Dependence information attached to an edge of the RDG. */ 106 107 struct rdg_edge 108 { 109 /* Type of the dependence. */ 110 enum rdg_dep_type type; 111 }; 112 113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type 114 115 /* Dump vertex I in RDG to FILE. */ 116 117 static void 118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i) 119 { 120 struct vertex *v = &(rdg->vertices[i]); 121 struct graph_edge *e; 122 123 fprintf (file, "(vertex %d: (%s%s) (in:", i, 124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", 125 RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); 126 127 if (v->pred) 128 for (e = v->pred; e; e = e->pred_next) 129 fprintf (file, " %d", e->src); 130 131 fprintf (file, ") (out:"); 132 133 if (v->succ) 134 for (e = v->succ; e; e = e->succ_next) 135 fprintf (file, " %d", e->dest); 136 137 fprintf (file, ")\n"); 138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); 139 fprintf (file, ")\n"); 140 } 141 142 /* Call dump_rdg_vertex on stderr. */ 143 144 DEBUG_FUNCTION void 145 debug_rdg_vertex (struct graph *rdg, int i) 146 { 147 dump_rdg_vertex (stderr, rdg, i); 148 } 149 150 /* Dump the reduced dependence graph RDG to FILE. */ 151 152 static void 153 dump_rdg (FILE *file, struct graph *rdg) 154 { 155 fprintf (file, "(rdg\n"); 156 for (int i = 0; i < rdg->n_vertices; i++) 157 dump_rdg_vertex (file, rdg, i); 158 fprintf (file, ")\n"); 159 } 160 161 /* Call dump_rdg on stderr. */ 162 163 DEBUG_FUNCTION void 164 debug_rdg (struct graph *rdg) 165 { 166 dump_rdg (stderr, rdg); 167 } 168 169 static void 170 dot_rdg_1 (FILE *file, struct graph *rdg) 171 { 172 int i; 173 pretty_printer buffer; 174 pp_needs_newline (&buffer) = false; 175 buffer.buffer->stream = file; 176 177 fprintf (file, "digraph RDG {\n"); 178 179 for (i = 0; i < rdg->n_vertices; i++) 180 { 181 struct vertex *v = &(rdg->vertices[i]); 182 struct graph_edge *e; 183 184 fprintf (file, "%d [label=\"[%d] ", i, i); 185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); 186 pp_flush (&buffer); 187 fprintf (file, "\"]\n"); 188 189 /* Highlight reads from memory. */ 190 if (RDG_MEM_READS_STMT (rdg, i)) 191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i); 192 193 /* Highlight stores to memory. */ 194 if (RDG_MEM_WRITE_STMT (rdg, i)) 195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i); 196 197 if (v->succ) 198 for (e = v->succ; e; e = e->succ_next) 199 switch (RDGE_TYPE (e)) 200 { 201 case flow_dd: 202 /* These are the most common dependences: don't print these. */ 203 fprintf (file, "%d -> %d \n", i, e->dest); 204 break; 205 206 case control_dd: 207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest); 208 break; 209 210 default: 211 gcc_unreachable (); 212 } 213 } 214 215 fprintf (file, "}\n\n"); 216 } 217 218 /* Display the Reduced Dependence Graph using dotty. */ 219 220 DEBUG_FUNCTION void 221 dot_rdg (struct graph *rdg) 222 { 223 /* When debugging, you may want to enable the following code. */ 224 #ifdef HAVE_POPEN 225 FILE *file = popen ("dot -Tx11", "w"); 226 if (!file) 227 return; 228 dot_rdg_1 (file, rdg); 229 fflush (file); 230 close (fileno (file)); 231 pclose (file); 232 #else 233 dot_rdg_1 (stderr, rdg); 234 #endif 235 } 236 237 /* Returns the index of STMT in RDG. */ 238 239 static int 240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt) 241 { 242 int index = gimple_uid (stmt); 243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); 244 return index; 245 } 246 247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is 248 the index of DEF in RDG. */ 249 250 static void 251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) 252 { 253 use_operand_p imm_use_p; 254 imm_use_iterator iterator; 255 256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) 257 { 258 struct graph_edge *e; 259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); 260 261 if (use < 0) 262 continue; 263 264 e = add_edge (rdg, idef, use); 265 e->data = XNEW (struct rdg_edge); 266 RDGE_TYPE (e) = flow_dd; 267 } 268 } 269 270 /* Creates an edge for the control dependences of BB to the vertex V. */ 271 272 static void 273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb, 274 int v, control_dependences *cd) 275 { 276 bitmap_iterator bi; 277 unsigned edge_n; 278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), 279 0, edge_n, bi) 280 { 281 basic_block cond_bb = cd->get_edge_src (edge_n); 282 gimple *stmt = last_stmt (cond_bb); 283 if (stmt && is_ctrl_stmt (stmt)) 284 { 285 struct graph_edge *e; 286 int c = rdg_vertex_for_stmt (rdg, stmt); 287 if (c < 0) 288 continue; 289 290 e = add_edge (rdg, c, v); 291 e->data = XNEW (struct rdg_edge); 292 RDGE_TYPE (e) = control_dd; 293 } 294 } 295 } 296 297 /* Creates the edges of the reduced dependence graph RDG. */ 298 299 static void 300 create_rdg_flow_edges (struct graph *rdg) 301 { 302 int i; 303 def_operand_p def_p; 304 ssa_op_iter iter; 305 306 for (i = 0; i < rdg->n_vertices; i++) 307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), 308 iter, SSA_OP_DEF) 309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); 310 } 311 312 /* Creates the edges of the reduced dependence graph RDG. */ 313 314 static void 315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop) 316 { 317 int i; 318 319 for (i = 0; i < rdg->n_vertices; i++) 320 { 321 gimple *stmt = RDG_STMT (rdg, i); 322 if (gimple_code (stmt) == GIMPLE_PHI) 323 { 324 edge_iterator ei; 325 edge e; 326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) 327 if (flow_bb_inside_loop_p (loop, e->src)) 328 create_edge_for_control_dependence (rdg, e->src, i, cd); 329 } 330 else 331 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); 332 } 333 } 334 335 /* Build the vertices of the reduced dependence graph RDG. Return false 336 if that failed. */ 337 338 static bool 339 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop, 340 vec<data_reference_p> *datarefs) 341 { 342 int i; 343 gimple *stmt; 344 345 FOR_EACH_VEC_ELT (stmts, i, stmt) 346 { 347 struct vertex *v = &(rdg->vertices[i]); 348 349 /* Record statement to vertex mapping. */ 350 gimple_set_uid (stmt, i); 351 352 v->data = XNEW (struct rdg_vertex); 353 RDGV_STMT (v) = stmt; 354 RDGV_DATAREFS (v).create (0); 355 RDGV_HAS_MEM_WRITE (v) = false; 356 RDGV_HAS_MEM_READS (v) = false; 357 if (gimple_code (stmt) == GIMPLE_PHI) 358 continue; 359 360 unsigned drp = datarefs->length (); 361 if (!find_data_references_in_stmt (loop, stmt, datarefs)) 362 return false; 363 for (unsigned j = drp; j < datarefs->length (); ++j) 364 { 365 data_reference_p dr = (*datarefs)[j]; 366 if (DR_IS_READ (dr)) 367 RDGV_HAS_MEM_READS (v) = true; 368 else 369 RDGV_HAS_MEM_WRITE (v) = true; 370 RDGV_DATAREFS (v).safe_push (dr); 371 } 372 } 373 return true; 374 } 375 376 /* Initialize STMTS with all the statements of LOOP. The order in 377 which we discover statements is important as 378 generate_loops_for_partition is using the same traversal for 379 identifying statements in loop copies. */ 380 381 static void 382 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts) 383 { 384 unsigned int i; 385 basic_block *bbs = get_loop_body_in_dom_order (loop); 386 387 for (i = 0; i < loop->num_nodes; i++) 388 { 389 basic_block bb = bbs[i]; 390 391 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); 392 gsi_next (&bsi)) 393 if (!virtual_operand_p (gimple_phi_result (bsi.phi ()))) 394 stmts->safe_push (bsi.phi ()); 395 396 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); 397 gsi_next (&bsi)) 398 { 399 gimple *stmt = gsi_stmt (bsi); 400 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) 401 stmts->safe_push (stmt); 402 } 403 } 404 405 free (bbs); 406 } 407 408 /* Free the reduced dependence graph RDG. */ 409 410 static void 411 free_rdg (struct graph *rdg) 412 { 413 int i; 414 415 for (i = 0; i < rdg->n_vertices; i++) 416 { 417 struct vertex *v = &(rdg->vertices[i]); 418 struct graph_edge *e; 419 420 for (e = v->succ; e; e = e->succ_next) 421 free (e->data); 422 423 if (v->data) 424 { 425 gimple_set_uid (RDGV_STMT (v), -1); 426 free_data_refs (RDGV_DATAREFS (v)); 427 free (v->data); 428 } 429 } 430 431 free_graph (rdg); 432 } 433 434 /* Build the Reduced Dependence Graph (RDG) with one vertex per 435 statement of the loop nest LOOP_NEST, and one edge per data dependence or 436 scalar dependence. */ 437 438 static struct graph * 439 build_rdg (vec<loop_p> loop_nest, control_dependences *cd) 440 { 441 struct graph *rdg; 442 vec<data_reference_p> datarefs; 443 444 /* Create the RDG vertices from the stmts of the loop nest. */ 445 auto_vec<gimple *, 10> stmts; 446 stmts_from_loop (loop_nest[0], &stmts); 447 rdg = new_graph (stmts.length ()); 448 datarefs.create (10); 449 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs)) 450 { 451 datarefs.release (); 452 free_rdg (rdg); 453 return NULL; 454 } 455 stmts.release (); 456 457 create_rdg_flow_edges (rdg); 458 if (cd) 459 create_rdg_cd_edges (rdg, cd, loop_nest[0]); 460 461 datarefs.release (); 462 463 return rdg; 464 } 465 466 467 468 enum partition_kind { 469 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE 470 }; 471 472 struct partition 473 { 474 bitmap stmts; 475 bitmap loops; 476 bool reduction_p; 477 bool plus_one; 478 enum partition_kind kind; 479 /* data-references a kind != PKIND_NORMAL partition is about. */ 480 data_reference_p main_dr; 481 data_reference_p secondary_dr; 482 tree niter; 483 }; 484 485 486 /* Allocate and initialize a partition from BITMAP. */ 487 488 static partition * 489 partition_alloc (bitmap stmts, bitmap loops) 490 { 491 partition *partition = XCNEW (struct partition); 492 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); 493 partition->loops = loops ? loops : BITMAP_ALLOC (NULL); 494 partition->reduction_p = false; 495 partition->kind = PKIND_NORMAL; 496 return partition; 497 } 498 499 /* Free PARTITION. */ 500 501 static void 502 partition_free (partition *partition) 503 { 504 BITMAP_FREE (partition->stmts); 505 BITMAP_FREE (partition->loops); 506 free (partition); 507 } 508 509 /* Returns true if the partition can be generated as a builtin. */ 510 511 static bool 512 partition_builtin_p (partition *partition) 513 { 514 return partition->kind != PKIND_NORMAL; 515 } 516 517 /* Returns true if the partition contains a reduction. */ 518 519 static bool 520 partition_reduction_p (partition *partition) 521 { 522 return partition->reduction_p; 523 } 524 525 /* Merge PARTITION into the partition DEST. */ 526 527 static void 528 partition_merge_into (partition *dest, partition *partition) 529 { 530 dest->kind = PKIND_NORMAL; 531 bitmap_ior_into (dest->stmts, partition->stmts); 532 if (partition_reduction_p (partition)) 533 dest->reduction_p = true; 534 } 535 536 537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after 538 the LOOP. */ 539 540 static bool 541 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) 542 { 543 imm_use_iterator imm_iter; 544 use_operand_p use_p; 545 546 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) 547 { 548 gimple *use_stmt = USE_STMT (use_p); 549 if (!is_gimple_debug (use_stmt) 550 && loop != loop_containing_stmt (use_stmt)) 551 return true; 552 } 553 554 return false; 555 } 556 557 /* Returns true when STMT defines a scalar variable used after the 558 loop LOOP. */ 559 560 static bool 561 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt) 562 { 563 def_operand_p def_p; 564 ssa_op_iter op_iter; 565 566 if (gimple_code (stmt) == GIMPLE_PHI) 567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop); 568 569 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) 570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop)) 571 return true; 572 573 return false; 574 } 575 576 /* Return a copy of LOOP placed before LOOP. */ 577 578 static struct loop * 579 copy_loop_before (struct loop *loop) 580 { 581 struct loop *res; 582 edge preheader = loop_preheader_edge (loop); 583 584 initialize_original_copy_tables (); 585 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); 586 gcc_assert (res != NULL); 587 free_original_copy_tables (); 588 delete_update_ssa (); 589 590 return res; 591 } 592 593 /* Creates an empty basic block after LOOP. */ 594 595 static void 596 create_bb_after_loop (struct loop *loop) 597 { 598 edge exit = single_exit (loop); 599 600 if (!exit) 601 return; 602 603 split_edge (exit); 604 } 605 606 /* Generate code for PARTITION from the code in LOOP. The loop is 607 copied when COPY_P is true. All the statements not flagged in the 608 PARTITION bitmap are removed from the loop or from its copy. The 609 statements are indexed in sequence inside a basic block, and the 610 basic blocks of a loop are taken in dom order. */ 611 612 static void 613 generate_loops_for_partition (struct loop *loop, partition *partition, 614 bool copy_p) 615 { 616 unsigned i; 617 basic_block *bbs; 618 619 if (copy_p) 620 { 621 loop = copy_loop_before (loop); 622 gcc_assert (loop != NULL); 623 create_preheader (loop, CP_SIMPLE_PREHEADERS); 624 create_bb_after_loop (loop); 625 } 626 627 /* Remove stmts not in the PARTITION bitmap. */ 628 bbs = get_loop_body_in_dom_order (loop); 629 630 if (MAY_HAVE_DEBUG_STMTS) 631 for (i = 0; i < loop->num_nodes; i++) 632 { 633 basic_block bb = bbs[i]; 634 635 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); 636 gsi_next (&bsi)) 637 { 638 gphi *phi = bsi.phi (); 639 if (!virtual_operand_p (gimple_phi_result (phi)) 640 && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) 641 reset_debug_uses (phi); 642 } 643 644 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 645 { 646 gimple *stmt = gsi_stmt (bsi); 647 if (gimple_code (stmt) != GIMPLE_LABEL 648 && !is_gimple_debug (stmt) 649 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) 650 reset_debug_uses (stmt); 651 } 652 } 653 654 for (i = 0; i < loop->num_nodes; i++) 655 { 656 basic_block bb = bbs[i]; 657 658 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) 659 { 660 gphi *phi = bsi.phi (); 661 if (!virtual_operand_p (gimple_phi_result (phi)) 662 && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) 663 remove_phi_node (&bsi, true); 664 else 665 gsi_next (&bsi); 666 } 667 668 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) 669 { 670 gimple *stmt = gsi_stmt (bsi); 671 if (gimple_code (stmt) != GIMPLE_LABEL 672 && !is_gimple_debug (stmt) 673 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) 674 { 675 /* Choose an arbitrary path through the empty CFG part 676 that this unnecessary control stmt controls. */ 677 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt)) 678 { 679 gimple_cond_make_false (cond_stmt); 680 update_stmt (stmt); 681 } 682 else if (gimple_code (stmt) == GIMPLE_SWITCH) 683 { 684 gswitch *switch_stmt = as_a <gswitch *> (stmt); 685 gimple_switch_set_index 686 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1))); 687 update_stmt (stmt); 688 } 689 else 690 { 691 unlink_stmt_vdef (stmt); 692 gsi_remove (&bsi, true); 693 release_defs (stmt); 694 continue; 695 } 696 } 697 gsi_next (&bsi); 698 } 699 } 700 701 free (bbs); 702 } 703 704 /* Build the size argument for a memory operation call. */ 705 706 static tree 707 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter, 708 bool plus_one) 709 { 710 tree size = fold_convert_loc (loc, sizetype, nb_iter); 711 if (plus_one) 712 size = size_binop (PLUS_EXPR, size, size_one_node); 713 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size, 714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 715 size = fold_convert_loc (loc, size_type_node, size); 716 return size; 717 } 718 719 /* Build an address argument for a memory operation call. */ 720 721 static tree 722 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) 723 { 724 tree addr_base; 725 726 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); 727 addr_base = fold_convert_loc (loc, sizetype, addr_base); 728 729 /* Test for a negative stride, iterating over every element. */ 730 if (tree_int_cst_sgn (DR_STEP (dr)) == -1) 731 { 732 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, 733 fold_convert_loc (loc, sizetype, nb_bytes)); 734 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, 735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 736 } 737 738 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); 739 } 740 741 /* If VAL memory representation contains the same value in all bytes, 742 return that value, otherwise return -1. 743 E.g. for 0x24242424 return 0x24, for IEEE double 744 747708026454360457216.0 return 0x44, etc. */ 745 746 static int 747 const_with_all_bytes_same (tree val) 748 { 749 unsigned char buf[64]; 750 int i, len; 751 752 if (integer_zerop (val) 753 || (TREE_CODE (val) == CONSTRUCTOR 754 && !TREE_CLOBBER_P (val) 755 && CONSTRUCTOR_NELTS (val) == 0)) 756 return 0; 757 758 if (real_zerop (val)) 759 { 760 /* Only return 0 for +0.0, not for -0.0, which doesn't have 761 an all bytes same memory representation. Don't transform 762 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */ 763 switch (TREE_CODE (val)) 764 { 765 case REAL_CST: 766 if (!real_isneg (TREE_REAL_CST_PTR (val))) 767 return 0; 768 break; 769 case COMPLEX_CST: 770 if (!const_with_all_bytes_same (TREE_REALPART (val)) 771 && !const_with_all_bytes_same (TREE_IMAGPART (val))) 772 return 0; 773 break; 774 case VECTOR_CST: 775 unsigned int j; 776 for (j = 0; j < VECTOR_CST_NELTS (val); ++j) 777 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j))) 778 break; 779 if (j == VECTOR_CST_NELTS (val)) 780 return 0; 781 break; 782 default: 783 break; 784 } 785 } 786 787 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) 788 return -1; 789 790 len = native_encode_expr (val, buf, sizeof (buf)); 791 if (len == 0) 792 return -1; 793 for (i = 1; i < len; i++) 794 if (buf[i] != buf[0]) 795 return -1; 796 return buf[0]; 797 } 798 799 /* Generate a call to memset for PARTITION in LOOP. */ 800 801 static void 802 generate_memset_builtin (struct loop *loop, partition *partition) 803 { 804 gimple_stmt_iterator gsi; 805 gimple *stmt, *fn_call; 806 tree mem, fn, nb_bytes; 807 location_t loc; 808 tree val; 809 810 stmt = DR_STMT (partition->main_dr); 811 loc = gimple_location (stmt); 812 813 /* The new statements will be placed before LOOP. */ 814 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 815 816 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, 817 partition->plus_one); 818 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 819 false, GSI_CONTINUE_LINKING); 820 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); 821 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, 822 false, GSI_CONTINUE_LINKING); 823 824 /* This exactly matches the pattern recognition in classify_partition. */ 825 val = gimple_assign_rhs1 (stmt); 826 /* Handle constants like 0x15151515 and similarly 827 floating point constants etc. where all bytes are the same. */ 828 int bytev = const_with_all_bytes_same (val); 829 if (bytev != -1) 830 val = build_int_cst (integer_type_node, bytev); 831 else if (TREE_CODE (val) == INTEGER_CST) 832 val = fold_convert (integer_type_node, val); 833 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) 834 { 835 tree tem = make_ssa_name (integer_type_node); 836 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val); 837 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); 838 val = tem; 839 } 840 841 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); 842 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); 843 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 844 845 if (dump_file && (dump_flags & TDF_DETAILS)) 846 { 847 fprintf (dump_file, "generated memset"); 848 if (bytev == 0) 849 fprintf (dump_file, " zero\n"); 850 else 851 fprintf (dump_file, "\n"); 852 } 853 } 854 855 /* Generate a call to memcpy for PARTITION in LOOP. */ 856 857 static void 858 generate_memcpy_builtin (struct loop *loop, partition *partition) 859 { 860 gimple_stmt_iterator gsi; 861 gimple *stmt, *fn_call; 862 tree dest, src, fn, nb_bytes; 863 location_t loc; 864 enum built_in_function kind; 865 866 stmt = DR_STMT (partition->main_dr); 867 loc = gimple_location (stmt); 868 869 /* The new statements will be placed before LOOP. */ 870 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 871 872 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, 873 partition->plus_one); 874 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 875 false, GSI_CONTINUE_LINKING); 876 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); 877 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); 878 if (partition->kind == PKIND_MEMCPY 879 || ! ptr_derefs_may_alias_p (dest, src)) 880 kind = BUILT_IN_MEMCPY; 881 else 882 kind = BUILT_IN_MEMMOVE; 883 884 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, 885 false, GSI_CONTINUE_LINKING); 886 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, 887 false, GSI_CONTINUE_LINKING); 888 fn = build_fold_addr_expr (builtin_decl_implicit (kind)); 889 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); 890 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 891 892 if (dump_file && (dump_flags & TDF_DETAILS)) 893 { 894 if (kind == BUILT_IN_MEMCPY) 895 fprintf (dump_file, "generated memcpy\n"); 896 else 897 fprintf (dump_file, "generated memmove\n"); 898 } 899 } 900 901 /* Remove and destroy the loop LOOP. */ 902 903 static void 904 destroy_loop (struct loop *loop) 905 { 906 unsigned nbbs = loop->num_nodes; 907 edge exit = single_exit (loop); 908 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; 909 basic_block *bbs; 910 unsigned i; 911 912 bbs = get_loop_body_in_dom_order (loop); 913 914 redirect_edge_pred (exit, src); 915 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); 916 exit->flags |= EDGE_FALLTHRU; 917 cancel_loop_tree (loop); 918 rescan_loop_exit (exit, false, true); 919 920 i = nbbs; 921 do 922 { 923 /* We have made sure to not leave any dangling uses of SSA 924 names defined in the loop. With the exception of virtuals. 925 Make sure we replace all uses of virtual defs that will remain 926 outside of the loop with the bare symbol as delete_basic_block 927 will release them. */ 928 --i; 929 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); 930 gsi_next (&gsi)) 931 { 932 gphi *phi = gsi.phi (); 933 if (virtual_operand_p (gimple_phi_result (phi))) 934 mark_virtual_phi_result_for_renaming (phi); 935 } 936 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); 937 gsi_next (&gsi)) 938 { 939 gimple *stmt = gsi_stmt (gsi); 940 tree vdef = gimple_vdef (stmt); 941 if (vdef && TREE_CODE (vdef) == SSA_NAME) 942 mark_virtual_operand_for_renaming (vdef); 943 } 944 delete_basic_block (bbs[i]); 945 } 946 while (i != 0); 947 948 free (bbs); 949 950 set_immediate_dominator (CDI_DOMINATORS, dest, 951 recompute_dominator (CDI_DOMINATORS, dest)); 952 } 953 954 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */ 955 956 static bool 957 generate_code_for_partition (struct loop *loop, 958 partition *partition, bool copy_p) 959 { 960 switch (partition->kind) 961 { 962 case PKIND_NORMAL: 963 /* Reductions all have to be in the last partition. */ 964 gcc_assert (!partition_reduction_p (partition) 965 || !copy_p); 966 generate_loops_for_partition (loop, partition, copy_p); 967 return false; 968 969 case PKIND_MEMSET: 970 generate_memset_builtin (loop, partition); 971 break; 972 973 case PKIND_MEMCPY: 974 case PKIND_MEMMOVE: 975 generate_memcpy_builtin (loop, partition); 976 break; 977 978 default: 979 gcc_unreachable (); 980 } 981 982 /* Common tail for partitions we turn into a call. If this was the last 983 partition for which we generate code, we have to destroy the loop. */ 984 if (!copy_p) 985 return true; 986 return false; 987 } 988 989 990 /* Returns a partition with all the statements needed for computing 991 the vertex V of the RDG, also including the loop exit conditions. */ 992 993 static partition * 994 build_rdg_partition_for_vertex (struct graph *rdg, int v) 995 { 996 partition *partition = partition_alloc (NULL, NULL); 997 auto_vec<int, 3> nodes; 998 unsigned i; 999 int x; 1000 1001 graphds_dfs (rdg, &v, 1, &nodes, false, NULL); 1002 1003 FOR_EACH_VEC_ELT (nodes, i, x) 1004 { 1005 bitmap_set_bit (partition->stmts, x); 1006 bitmap_set_bit (partition->loops, 1007 loop_containing_stmt (RDG_STMT (rdg, x))->num); 1008 } 1009 1010 return partition; 1011 } 1012 1013 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. 1014 For the moment we detect only the memset zero pattern. */ 1015 1016 static void 1017 classify_partition (loop_p loop, struct graph *rdg, partition *partition) 1018 { 1019 bitmap_iterator bi; 1020 unsigned i; 1021 tree nb_iter; 1022 data_reference_p single_load, single_store; 1023 bool volatiles_p = false; 1024 bool plus_one = false; 1025 1026 partition->kind = PKIND_NORMAL; 1027 partition->main_dr = NULL; 1028 partition->secondary_dr = NULL; 1029 partition->niter = NULL_TREE; 1030 partition->plus_one = false; 1031 1032 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) 1033 { 1034 gimple *stmt = RDG_STMT (rdg, i); 1035 1036 if (gimple_has_volatile_ops (stmt)) 1037 volatiles_p = true; 1038 1039 /* If the stmt has uses outside of the loop mark it as reduction. */ 1040 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) 1041 { 1042 partition->reduction_p = true; 1043 return; 1044 } 1045 } 1046 1047 /* Perform general partition disqualification for builtins. */ 1048 if (volatiles_p 1049 || !flag_tree_loop_distribute_patterns) 1050 return; 1051 1052 /* Detect memset and memcpy. */ 1053 single_load = NULL; 1054 single_store = NULL; 1055 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) 1056 { 1057 gimple *stmt = RDG_STMT (rdg, i); 1058 data_reference_p dr; 1059 unsigned j; 1060 1061 if (gimple_code (stmt) == GIMPLE_PHI) 1062 continue; 1063 1064 /* Any scalar stmts are ok. */ 1065 if (!gimple_vuse (stmt)) 1066 continue; 1067 1068 /* Otherwise just regular loads/stores. */ 1069 if (!gimple_assign_single_p (stmt)) 1070 return; 1071 1072 /* But exactly one store and/or load. */ 1073 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) 1074 { 1075 tree type = TREE_TYPE (DR_REF (dr)); 1076 1077 /* The memset, memcpy and memmove library calls are only 1078 able to deal with generic address space. */ 1079 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type))) 1080 return; 1081 1082 if (DR_IS_READ (dr)) 1083 { 1084 if (single_load != NULL) 1085 return; 1086 single_load = dr; 1087 } 1088 else 1089 { 1090 if (single_store != NULL) 1091 return; 1092 single_store = dr; 1093 } 1094 } 1095 } 1096 1097 if (!single_store) 1098 return; 1099 1100 nb_iter = number_of_latch_executions (loop); 1101 if (!nb_iter || nb_iter == chrec_dont_know) 1102 return; 1103 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, 1104 gimple_bb (DR_STMT (single_store)))) 1105 plus_one = true; 1106 1107 if (single_store && !single_load) 1108 { 1109 gimple *stmt = DR_STMT (single_store); 1110 tree rhs = gimple_assign_rhs1 (stmt); 1111 if (const_with_all_bytes_same (rhs) == -1 1112 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) 1113 || (TYPE_MODE (TREE_TYPE (rhs)) 1114 != TYPE_MODE (unsigned_char_type_node)))) 1115 return; 1116 if (TREE_CODE (rhs) == SSA_NAME 1117 && !SSA_NAME_IS_DEFAULT_DEF (rhs) 1118 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) 1119 return; 1120 if (!adjacent_dr_p (single_store) 1121 || !dominated_by_p (CDI_DOMINATORS, 1122 loop->latch, gimple_bb (stmt))) 1123 return; 1124 partition->kind = PKIND_MEMSET; 1125 partition->main_dr = single_store; 1126 partition->niter = nb_iter; 1127 partition->plus_one = plus_one; 1128 } 1129 else if (single_store && single_load) 1130 { 1131 gimple *store = DR_STMT (single_store); 1132 gimple *load = DR_STMT (single_load); 1133 /* Direct aggregate copy or via an SSA name temporary. */ 1134 if (load != store 1135 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) 1136 return; 1137 if (!adjacent_dr_p (single_store) 1138 || !adjacent_dr_p (single_load) 1139 || !operand_equal_p (DR_STEP (single_store), 1140 DR_STEP (single_load), 0) 1141 || !dominated_by_p (CDI_DOMINATORS, 1142 loop->latch, gimple_bb (store))) 1143 return; 1144 /* Now check that if there is a dependence this dependence is 1145 of a suitable form for memmove. */ 1146 vec<loop_p> loops = vNULL; 1147 ddr_p ddr; 1148 loops.safe_push (loop); 1149 ddr = initialize_data_dependence_relation (single_load, single_store, 1150 loops); 1151 compute_affine_dependence (ddr, loop); 1152 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1153 { 1154 free_dependence_relation (ddr); 1155 loops.release (); 1156 return; 1157 } 1158 if (DDR_ARE_DEPENDENT (ddr) != chrec_known) 1159 { 1160 if (DDR_NUM_DIST_VECTS (ddr) == 0) 1161 { 1162 free_dependence_relation (ddr); 1163 loops.release (); 1164 return; 1165 } 1166 lambda_vector dist_v; 1167 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 1168 { 1169 int dist = dist_v[index_in_loop_nest (loop->num, 1170 DDR_LOOP_NEST (ddr))]; 1171 if (dist > 0 && !DDR_REVERSED_P (ddr)) 1172 { 1173 free_dependence_relation (ddr); 1174 loops.release (); 1175 return; 1176 } 1177 } 1178 partition->kind = PKIND_MEMMOVE; 1179 } 1180 else 1181 partition->kind = PKIND_MEMCPY; 1182 free_dependence_relation (ddr); 1183 loops.release (); 1184 partition->main_dr = single_store; 1185 partition->secondary_dr = single_load; 1186 partition->niter = nb_iter; 1187 partition->plus_one = plus_one; 1188 } 1189 } 1190 1191 /* For a data reference REF, return the declaration of its base 1192 address or NULL_TREE if the base is not determined. */ 1193 1194 static tree 1195 ref_base_address (data_reference_p dr) 1196 { 1197 tree base_address = DR_BASE_ADDRESS (dr); 1198 if (base_address 1199 && TREE_CODE (base_address) == ADDR_EXPR) 1200 return TREE_OPERAND (base_address, 0); 1201 1202 return base_address; 1203 } 1204 1205 /* Returns true when PARTITION1 and PARTITION2 have similar memory 1206 accesses in RDG. */ 1207 1208 static bool 1209 similar_memory_accesses (struct graph *rdg, partition *partition1, 1210 partition *partition2) 1211 { 1212 unsigned i, j, k, l; 1213 bitmap_iterator bi, bj; 1214 data_reference_p ref1, ref2; 1215 1216 /* First check whether in the intersection of the two partitions are 1217 any loads or stores. Common loads are the situation that happens 1218 most often. */ 1219 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi) 1220 if (RDG_MEM_WRITE_STMT (rdg, i) 1221 || RDG_MEM_READS_STMT (rdg, i)) 1222 return true; 1223 1224 /* Then check all data-references against each other. */ 1225 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi) 1226 if (RDG_MEM_WRITE_STMT (rdg, i) 1227 || RDG_MEM_READS_STMT (rdg, i)) 1228 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj) 1229 if (RDG_MEM_WRITE_STMT (rdg, j) 1230 || RDG_MEM_READS_STMT (rdg, j)) 1231 { 1232 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1) 1233 { 1234 tree base1 = ref_base_address (ref1); 1235 if (base1) 1236 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2) 1237 if (base1 == ref_base_address (ref2)) 1238 return true; 1239 } 1240 } 1241 1242 return false; 1243 } 1244 1245 /* Aggregate several components into a useful partition that is 1246 registered in the PARTITIONS vector. Partitions will be 1247 distributed in different loops. */ 1248 1249 static void 1250 rdg_build_partitions (struct graph *rdg, 1251 vec<gimple *> starting_stmts, 1252 vec<partition *> *partitions) 1253 { 1254 bitmap processed = BITMAP_ALLOC (NULL); 1255 int i; 1256 gimple *stmt; 1257 1258 FOR_EACH_VEC_ELT (starting_stmts, i, stmt) 1259 { 1260 int v = rdg_vertex_for_stmt (rdg, stmt); 1261 1262 if (dump_file && (dump_flags & TDF_DETAILS)) 1263 fprintf (dump_file, 1264 "ldist asked to generate code for vertex %d\n", v); 1265 1266 /* If the vertex is already contained in another partition so 1267 is the partition rooted at it. */ 1268 if (bitmap_bit_p (processed, v)) 1269 continue; 1270 1271 partition *partition = build_rdg_partition_for_vertex (rdg, v); 1272 bitmap_ior_into (processed, partition->stmts); 1273 1274 if (dump_file && (dump_flags & TDF_DETAILS)) 1275 { 1276 fprintf (dump_file, "ldist useful partition:\n"); 1277 dump_bitmap (dump_file, partition->stmts); 1278 } 1279 1280 partitions->safe_push (partition); 1281 } 1282 1283 /* All vertices should have been assigned to at least one partition now, 1284 other than vertices belonging to dead code. */ 1285 1286 BITMAP_FREE (processed); 1287 } 1288 1289 /* Dump to FILE the PARTITIONS. */ 1290 1291 static void 1292 dump_rdg_partitions (FILE *file, vec<partition *> partitions) 1293 { 1294 int i; 1295 partition *partition; 1296 1297 FOR_EACH_VEC_ELT (partitions, i, partition) 1298 debug_bitmap_file (file, partition->stmts); 1299 } 1300 1301 /* Debug PARTITIONS. */ 1302 extern void debug_rdg_partitions (vec<partition *> ); 1303 1304 DEBUG_FUNCTION void 1305 debug_rdg_partitions (vec<partition *> partitions) 1306 { 1307 dump_rdg_partitions (stderr, partitions); 1308 } 1309 1310 /* Returns the number of read and write operations in the RDG. */ 1311 1312 static int 1313 number_of_rw_in_rdg (struct graph *rdg) 1314 { 1315 int i, res = 0; 1316 1317 for (i = 0; i < rdg->n_vertices; i++) 1318 { 1319 if (RDG_MEM_WRITE_STMT (rdg, i)) 1320 ++res; 1321 1322 if (RDG_MEM_READS_STMT (rdg, i)) 1323 ++res; 1324 } 1325 1326 return res; 1327 } 1328 1329 /* Returns the number of read and write operations in a PARTITION of 1330 the RDG. */ 1331 1332 static int 1333 number_of_rw_in_partition (struct graph *rdg, partition *partition) 1334 { 1335 int res = 0; 1336 unsigned i; 1337 bitmap_iterator ii; 1338 1339 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii) 1340 { 1341 if (RDG_MEM_WRITE_STMT (rdg, i)) 1342 ++res; 1343 1344 if (RDG_MEM_READS_STMT (rdg, i)) 1345 ++res; 1346 } 1347 1348 return res; 1349 } 1350 1351 /* Returns true when one of the PARTITIONS contains all the read or 1352 write operations of RDG. */ 1353 1354 static bool 1355 partition_contains_all_rw (struct graph *rdg, 1356 vec<partition *> partitions) 1357 { 1358 int i; 1359 partition *partition; 1360 int nrw = number_of_rw_in_rdg (rdg); 1361 1362 FOR_EACH_VEC_ELT (partitions, i, partition) 1363 if (nrw == number_of_rw_in_partition (rdg, partition)) 1364 return true; 1365 1366 return false; 1367 } 1368 1369 /* Compute partition dependence created by the data references in DRS1 1370 and DRS2 and modify and return DIR according to that. */ 1371 1372 static int 1373 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir, 1374 vec<data_reference_p> drs1, 1375 vec<data_reference_p> drs2) 1376 { 1377 data_reference_p dr1, dr2; 1378 1379 /* dependence direction - 0 is no dependence, -1 is back, 1380 1 is forth, 2 is both (we can stop then, merging will occur). */ 1381 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii) 1382 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj) 1383 { 1384 data_reference_p saved_dr1 = dr1; 1385 int this_dir = 1; 1386 ddr_p ddr; 1387 /* Re-shuffle data-refs to be in dominator order. */ 1388 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) 1389 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) 1390 { 1391 std::swap (dr1, dr2); 1392 this_dir = -this_dir; 1393 } 1394 ddr = initialize_data_dependence_relation (dr1, dr2, loops); 1395 compute_affine_dependence (ddr, loops[0]); 1396 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 1397 this_dir = 2; 1398 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 1399 { 1400 if (DDR_REVERSED_P (ddr)) 1401 { 1402 std::swap (dr1, dr2); 1403 this_dir = -this_dir; 1404 } 1405 /* Known dependences can still be unordered througout the 1406 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ 1407 if (DDR_NUM_DIST_VECTS (ddr) != 1) 1408 this_dir = 2; 1409 /* If the overlap is exact preserve stmt order. */ 1410 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1)) 1411 ; 1412 else 1413 { 1414 /* Else as the distance vector is lexicographic positive 1415 swap the dependence direction. */ 1416 this_dir = -this_dir; 1417 } 1418 } 1419 else 1420 this_dir = 0; 1421 free_dependence_relation (ddr); 1422 if (this_dir == 2) 1423 return 2; 1424 else if (dir == 0) 1425 dir = this_dir; 1426 else if (this_dir != 0 && dir != this_dir) 1427 return 2; 1428 /* Shuffle "back" dr1. */ 1429 dr1 = saved_dr1; 1430 } 1431 return dir; 1432 } 1433 1434 /* Compare postorder number of the partition graph vertices V1 and V2. */ 1435 1436 static int 1437 pgcmp (const void *v1_, const void *v2_) 1438 { 1439 const vertex *v1 = (const vertex *)v1_; 1440 const vertex *v2 = (const vertex *)v2_; 1441 return v2->post - v1->post; 1442 } 1443 1444 /* Distributes the code from LOOP in such a way that producer 1445 statements are placed before consumer statements. Tries to separate 1446 only the statements from STMTS into separate loops. 1447 Returns the number of distributed loops. Set *DESTROY_P to whether 1448 LOOP needs to be destroyed. */ 1449 1450 static int 1451 distribute_loop (struct loop *loop, vec<gimple *> stmts, 1452 control_dependences *cd, int *nb_calls, bool *destroy_p) 1453 { 1454 struct graph *rdg; 1455 partition *partition; 1456 bool any_builtin; 1457 int i, nbp; 1458 graph *pg = NULL; 1459 int num_sccs = 1; 1460 1461 *destroy_p = false; 1462 *nb_calls = 0; 1463 auto_vec<loop_p, 3> loop_nest; 1464 if (!find_loop_nest (loop, &loop_nest)) 1465 return 0; 1466 1467 rdg = build_rdg (loop_nest, cd); 1468 if (!rdg) 1469 { 1470 if (dump_file && (dump_flags & TDF_DETAILS)) 1471 fprintf (dump_file, 1472 "Loop %d not distributed: failed to build the RDG.\n", 1473 loop->num); 1474 1475 return 0; 1476 } 1477 1478 if (dump_file && (dump_flags & TDF_DETAILS)) 1479 dump_rdg (dump_file, rdg); 1480 1481 auto_vec<struct partition *, 3> partitions; 1482 rdg_build_partitions (rdg, stmts, &partitions); 1483 1484 any_builtin = false; 1485 FOR_EACH_VEC_ELT (partitions, i, partition) 1486 { 1487 classify_partition (loop, rdg, partition); 1488 any_builtin |= partition_builtin_p (partition); 1489 } 1490 1491 /* If we are only distributing patterns but did not detect any, 1492 simply bail out. */ 1493 if (!flag_tree_loop_distribution 1494 && !any_builtin) 1495 { 1496 nbp = 0; 1497 goto ldist_done; 1498 } 1499 1500 /* If we are only distributing patterns fuse all partitions that 1501 were not classified as builtins. This also avoids chopping 1502 a loop into pieces, separated by builtin calls. That is, we 1503 only want no or a single loop body remaining. */ 1504 struct partition *into; 1505 if (!flag_tree_loop_distribution) 1506 { 1507 for (i = 0; partitions.iterate (i, &into); ++i) 1508 if (!partition_builtin_p (into)) 1509 break; 1510 for (++i; partitions.iterate (i, &partition); ++i) 1511 if (!partition_builtin_p (partition)) 1512 { 1513 if (dump_file && (dump_flags & TDF_DETAILS)) 1514 { 1515 fprintf (dump_file, "fusing non-builtin partitions\n"); 1516 dump_bitmap (dump_file, into->stmts); 1517 dump_bitmap (dump_file, partition->stmts); 1518 } 1519 partition_merge_into (into, partition); 1520 partitions.unordered_remove (i); 1521 partition_free (partition); 1522 i--; 1523 } 1524 } 1525 1526 /* Due to limitations in the transform phase we have to fuse all 1527 reduction partitions into the last partition so the existing 1528 loop will contain all loop-closed PHI nodes. */ 1529 for (i = 0; partitions.iterate (i, &into); ++i) 1530 if (partition_reduction_p (into)) 1531 break; 1532 for (i = i + 1; partitions.iterate (i, &partition); ++i) 1533 if (partition_reduction_p (partition)) 1534 { 1535 if (dump_file && (dump_flags & TDF_DETAILS)) 1536 { 1537 fprintf (dump_file, "fusing partitions\n"); 1538 dump_bitmap (dump_file, into->stmts); 1539 dump_bitmap (dump_file, partition->stmts); 1540 fprintf (dump_file, "because they have reductions\n"); 1541 } 1542 partition_merge_into (into, partition); 1543 partitions.unordered_remove (i); 1544 partition_free (partition); 1545 i--; 1546 } 1547 1548 /* Apply our simple cost model - fuse partitions with similar 1549 memory accesses. */ 1550 for (i = 0; partitions.iterate (i, &into); ++i) 1551 { 1552 bool changed = false; 1553 if (partition_builtin_p (into)) 1554 continue; 1555 for (int j = i + 1; 1556 partitions.iterate (j, &partition); ++j) 1557 { 1558 if (similar_memory_accesses (rdg, into, partition)) 1559 { 1560 if (dump_file && (dump_flags & TDF_DETAILS)) 1561 { 1562 fprintf (dump_file, "fusing partitions\n"); 1563 dump_bitmap (dump_file, into->stmts); 1564 dump_bitmap (dump_file, partition->stmts); 1565 fprintf (dump_file, "because they have similar " 1566 "memory accesses\n"); 1567 } 1568 partition_merge_into (into, partition); 1569 partitions.unordered_remove (j); 1570 partition_free (partition); 1571 j--; 1572 changed = true; 1573 } 1574 } 1575 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar 1576 accesses when 1 and 2 have similar accesses but not 0 and 1 1577 then in the next iteration we will fail to consider merging 1578 1 into 0,2. So try again if we did any merging into 0. */ 1579 if (changed) 1580 i--; 1581 } 1582 1583 /* Build the partition dependency graph. */ 1584 if (partitions.length () > 1) 1585 { 1586 pg = new_graph (partitions.length ()); 1587 struct pgdata { 1588 struct partition *partition; 1589 vec<data_reference_p> writes; 1590 vec<data_reference_p> reads; 1591 }; 1592 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data)) 1593 for (i = 0; partitions.iterate (i, &partition); ++i) 1594 { 1595 vertex *v = &pg->vertices[i]; 1596 pgdata *data = new pgdata; 1597 data_reference_p dr; 1598 /* FIXME - leaks. */ 1599 v->data = data; 1600 bitmap_iterator bi; 1601 unsigned j; 1602 data->partition = partition; 1603 data->reads = vNULL; 1604 data->writes = vNULL; 1605 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi) 1606 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k) 1607 if (DR_IS_READ (dr)) 1608 data->reads.safe_push (dr); 1609 else 1610 data->writes.safe_push (dr); 1611 } 1612 struct partition *partition1, *partition2; 1613 for (i = 0; partitions.iterate (i, &partition1); ++i) 1614 for (int j = i + 1; partitions.iterate (j, &partition2); ++j) 1615 { 1616 /* dependence direction - 0 is no dependence, -1 is back, 1617 1 is forth, 2 is both (we can stop then, merging will occur). */ 1618 int dir = 0; 1619 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1620 PGDATA(i)->writes, 1621 PGDATA(j)->reads); 1622 if (dir != 2) 1623 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1624 PGDATA(i)->reads, 1625 PGDATA(j)->writes); 1626 if (dir != 2) 1627 dir = pg_add_dependence_edges (rdg, loop_nest, dir, 1628 PGDATA(i)->writes, 1629 PGDATA(j)->writes); 1630 if (dir == 1 || dir == 2) 1631 add_edge (pg, i, j); 1632 if (dir == -1 || dir == 2) 1633 add_edge (pg, j, i); 1634 } 1635 1636 /* Add edges to the reduction partition (if any) to force it last. */ 1637 unsigned j; 1638 for (j = 0; partitions.iterate (j, &partition); ++j) 1639 if (partition_reduction_p (partition)) 1640 break; 1641 if (j < partitions.length ()) 1642 { 1643 for (unsigned i = 0; partitions.iterate (i, &partition); ++i) 1644 if (i != j) 1645 add_edge (pg, i, j); 1646 } 1647 1648 /* Compute partitions we cannot separate and fuse them. */ 1649 num_sccs = graphds_scc (pg, NULL); 1650 for (i = 0; i < num_sccs; ++i) 1651 { 1652 struct partition *first; 1653 int j; 1654 for (j = 0; partitions.iterate (j, &first); ++j) 1655 if (pg->vertices[j].component == i) 1656 break; 1657 for (j = j + 1; partitions.iterate (j, &partition); ++j) 1658 if (pg->vertices[j].component == i) 1659 { 1660 if (dump_file && (dump_flags & TDF_DETAILS)) 1661 { 1662 fprintf (dump_file, "fusing partitions\n"); 1663 dump_bitmap (dump_file, first->stmts); 1664 dump_bitmap (dump_file, partition->stmts); 1665 fprintf (dump_file, "because they are in the same " 1666 "dependence SCC\n"); 1667 } 1668 partition_merge_into (first, partition); 1669 partitions[j] = NULL; 1670 partition_free (partition); 1671 PGDATA (j)->partition = NULL; 1672 } 1673 } 1674 1675 /* Now order the remaining nodes in postorder. */ 1676 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp); 1677 partitions.truncate (0); 1678 for (i = 0; i < pg->n_vertices; ++i) 1679 { 1680 pgdata *data = PGDATA (i); 1681 if (data->partition) 1682 partitions.safe_push (data->partition); 1683 data->reads.release (); 1684 data->writes.release (); 1685 delete data; 1686 } 1687 gcc_assert (partitions.length () == (unsigned)num_sccs); 1688 free_graph (pg); 1689 } 1690 1691 nbp = partitions.length (); 1692 if (nbp == 0 1693 || (nbp == 1 && !partition_builtin_p (partitions[0])) 1694 || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) 1695 { 1696 nbp = 0; 1697 goto ldist_done; 1698 } 1699 1700 if (dump_file && (dump_flags & TDF_DETAILS)) 1701 dump_rdg_partitions (dump_file, partitions); 1702 1703 FOR_EACH_VEC_ELT (partitions, i, partition) 1704 { 1705 if (partition_builtin_p (partition)) 1706 (*nb_calls)++; 1707 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1); 1708 } 1709 1710 ldist_done: 1711 1712 FOR_EACH_VEC_ELT (partitions, i, partition) 1713 partition_free (partition); 1714 1715 free_rdg (rdg); 1716 return nbp - *nb_calls; 1717 } 1718 1719 /* Distribute all loops in the current function. */ 1720 1721 namespace { 1722 1723 const pass_data pass_data_loop_distribution = 1724 { 1725 GIMPLE_PASS, /* type */ 1726 "ldist", /* name */ 1727 OPTGROUP_LOOP, /* optinfo_flags */ 1728 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ 1729 ( PROP_cfg | PROP_ssa ), /* properties_required */ 1730 0, /* properties_provided */ 1731 0, /* properties_destroyed */ 1732 0, /* todo_flags_start */ 1733 0, /* todo_flags_finish */ 1734 }; 1735 1736 class pass_loop_distribution : public gimple_opt_pass 1737 { 1738 public: 1739 pass_loop_distribution (gcc::context *ctxt) 1740 : gimple_opt_pass (pass_data_loop_distribution, ctxt) 1741 {} 1742 1743 /* opt_pass methods: */ 1744 virtual bool gate (function *) 1745 { 1746 return flag_tree_loop_distribution 1747 || flag_tree_loop_distribute_patterns; 1748 } 1749 1750 virtual unsigned int execute (function *); 1751 1752 }; // class pass_loop_distribution 1753 1754 unsigned int 1755 pass_loop_distribution::execute (function *fun) 1756 { 1757 struct loop *loop; 1758 bool changed = false; 1759 basic_block bb; 1760 control_dependences *cd = NULL; 1761 auto_vec<loop_p> loops_to_be_destroyed; 1762 1763 FOR_ALL_BB_FN (bb, fun) 1764 { 1765 gimple_stmt_iterator gsi; 1766 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1767 gimple_set_uid (gsi_stmt (gsi), -1); 1768 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1769 gimple_set_uid (gsi_stmt (gsi), -1); 1770 } 1771 1772 /* We can at the moment only distribute non-nested loops, thus restrict 1773 walking to innermost loops. */ 1774 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) 1775 { 1776 auto_vec<gimple *> work_list; 1777 basic_block *bbs; 1778 int num = loop->num; 1779 unsigned int i; 1780 1781 /* If the loop doesn't have a single exit we will fail anyway, 1782 so do that early. */ 1783 if (!single_exit (loop)) 1784 continue; 1785 1786 /* Only optimize hot loops. */ 1787 if (!optimize_loop_for_speed_p (loop)) 1788 continue; 1789 1790 /* Initialize the worklist with stmts we seed the partitions with. */ 1791 bbs = get_loop_body_in_dom_order (loop); 1792 for (i = 0; i < loop->num_nodes; ++i) 1793 { 1794 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); 1795 !gsi_end_p (gsi); 1796 gsi_next (&gsi)) 1797 { 1798 gphi *phi = gsi.phi (); 1799 if (virtual_operand_p (gimple_phi_result (phi))) 1800 continue; 1801 /* Distribute stmts which have defs that are used outside of 1802 the loop. */ 1803 if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) 1804 continue; 1805 work_list.safe_push (phi); 1806 } 1807 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); 1808 !gsi_end_p (gsi); 1809 gsi_next (&gsi)) 1810 { 1811 gimple *stmt = gsi_stmt (gsi); 1812 1813 /* If there is a stmt with side-effects bail out - we 1814 cannot and should not distribute this loop. */ 1815 if (gimple_has_side_effects (stmt)) 1816 { 1817 work_list.truncate (0); 1818 goto out; 1819 } 1820 1821 /* Distribute stmts which have defs that are used outside of 1822 the loop. */ 1823 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) 1824 ; 1825 /* Otherwise only distribute stores for now. */ 1826 else if (!gimple_vdef (stmt)) 1827 continue; 1828 1829 work_list.safe_push (stmt); 1830 } 1831 } 1832 out: 1833 free (bbs); 1834 1835 int nb_generated_loops = 0; 1836 int nb_generated_calls = 0; 1837 location_t loc = find_loop_location (loop); 1838 if (work_list.length () > 0) 1839 { 1840 if (!cd) 1841 { 1842 calculate_dominance_info (CDI_DOMINATORS); 1843 calculate_dominance_info (CDI_POST_DOMINATORS); 1844 cd = new control_dependences (); 1845 free_dominance_info (CDI_POST_DOMINATORS); 1846 } 1847 bool destroy_p; 1848 nb_generated_loops = distribute_loop (loop, work_list, cd, 1849 &nb_generated_calls, 1850 &destroy_p); 1851 if (destroy_p) 1852 loops_to_be_destroyed.safe_push (loop); 1853 } 1854 1855 if (nb_generated_loops + nb_generated_calls > 0) 1856 { 1857 changed = true; 1858 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, 1859 loc, "Loop %d distributed: split to %d loops " 1860 "and %d library calls.\n", 1861 num, nb_generated_loops, nb_generated_calls); 1862 } 1863 else if (dump_file && (dump_flags & TDF_DETAILS)) 1864 fprintf (dump_file, "Loop %d is the same.\n", num); 1865 } 1866 1867 if (cd) 1868 delete cd; 1869 1870 if (changed) 1871 { 1872 /* Destroy loop bodies that could not be reused. Do this late as we 1873 otherwise can end up refering to stale data in control dependences. */ 1874 unsigned i; 1875 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop) 1876 destroy_loop (loop); 1877 1878 /* Cached scalar evolutions now may refer to wrong or non-existing 1879 loops. */ 1880 scev_reset_htab (); 1881 mark_virtual_operands_for_renaming (fun); 1882 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); 1883 } 1884 1885 checking_verify_loop_structure (); 1886 1887 return 0; 1888 } 1889 1890 } // anon namespace 1891 1892 gimple_opt_pass * 1893 make_pass_loop_distribution (gcc::context *ctxt) 1894 { 1895 return new pass_loop_distribution (ctxt); 1896 } 1897