1 /* Statement Analysis and Transformation for Vectorization 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 3 Free Software Foundation, Inc. 4 Contributed by Dorit Naishlos <dorit@il.ibm.com> 5 and Ira Rosen <irar@il.ibm.com> 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "tm.h" 27 #include "ggc.h" 28 #include "tree.h" 29 #include "target.h" 30 #include "basic-block.h" 31 #include "diagnostic.h" 32 #include "tree-flow.h" 33 #include "tree-dump.h" 34 #include "cfgloop.h" 35 #include "cfglayout.h" 36 #include "expr.h" 37 #include "recog.h" 38 #include "optabs.h" 39 #include "toplev.h" 40 #include "tree-vectorizer.h" 41 #include "langhooks.h" 42 43 44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 45 46 /* Function vect_mark_relevant. 47 48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ 49 50 static void 51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, 52 enum vect_relevant relevant, bool live_p) 53 { 54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); 56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 57 58 if (vect_print_dump_info (REPORT_DETAILS)) 59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p); 60 61 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 62 { 63 gimple pattern_stmt; 64 65 /* This is the last stmt in a sequence that was detected as a 66 pattern that can potentially be vectorized. Don't mark the stmt 67 as relevant/live because it's not going to be vectorized. 68 Instead mark the pattern-stmt that replaces it. */ 69 70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 71 72 if (vect_print_dump_info (REPORT_DETAILS)) 73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); 74 stmt_info = vinfo_for_stmt (pattern_stmt); 75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); 76 save_relevant = STMT_VINFO_RELEVANT (stmt_info); 77 save_live_p = STMT_VINFO_LIVE_P (stmt_info); 78 stmt = pattern_stmt; 79 } 80 81 STMT_VINFO_LIVE_P (stmt_info) |= live_p; 82 if (relevant > STMT_VINFO_RELEVANT (stmt_info)) 83 STMT_VINFO_RELEVANT (stmt_info) = relevant; 84 85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant 86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 87 { 88 if (vect_print_dump_info (REPORT_DETAILS)) 89 fprintf (vect_dump, "already marked relevant/live."); 90 return; 91 } 92 93 VEC_safe_push (gimple, heap, *worklist, stmt); 94 } 95 96 97 /* Function vect_stmt_relevant_p. 98 99 Return true if STMT in loop that is represented by LOOP_VINFO is 100 "relevant for vectorization". 101 102 A stmt is considered "relevant for vectorization" if: 103 - it has uses outside the loop. 104 - it has vdefs (it alters memory). 105 - control stmts in the loop (except for the exit condition). 106 107 CHECKME: what other side effects would the vectorizer allow? */ 108 109 static bool 110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo, 111 enum vect_relevant *relevant, bool *live_p) 112 { 113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 114 ssa_op_iter op_iter; 115 imm_use_iterator imm_iter; 116 use_operand_p use_p; 117 def_operand_p def_p; 118 119 *relevant = vect_unused_in_scope; 120 *live_p = false; 121 122 /* cond stmt other than loop exit cond. */ 123 if (is_ctrl_stmt (stmt) 124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) 125 != loop_exit_ctrl_vec_info_type) 126 *relevant = vect_used_in_scope; 127 128 /* changing memory. */ 129 if (gimple_code (stmt) != GIMPLE_PHI) 130 if (gimple_vdef (stmt)) 131 { 132 if (vect_print_dump_info (REPORT_DETAILS)) 133 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs."); 134 *relevant = vect_used_in_scope; 135 } 136 137 /* uses outside the loop. */ 138 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) 139 { 140 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 141 { 142 basic_block bb = gimple_bb (USE_STMT (use_p)); 143 if (!flow_bb_inside_loop_p (loop, bb)) 144 { 145 if (vect_print_dump_info (REPORT_DETAILS)) 146 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop."); 147 148 if (is_gimple_debug (USE_STMT (use_p))) 149 continue; 150 151 /* We expect all such uses to be in the loop exit phis 152 (because of loop closed form) */ 153 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); 154 gcc_assert (bb == single_exit (loop)->dest); 155 156 *live_p = true; 157 } 158 } 159 } 160 161 return (*live_p || *relevant); 162 } 163 164 165 /* Function exist_non_indexing_operands_for_use_p 166 167 USE is one of the uses attached to STMT. Check if USE is 168 used in STMT for anything other than indexing an array. */ 169 170 static bool 171 exist_non_indexing_operands_for_use_p (tree use, gimple stmt) 172 { 173 tree operand; 174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 175 176 /* USE corresponds to some operand in STMT. If there is no data 177 reference in STMT, then any operand that corresponds to USE 178 is not indexing an array. */ 179 if (!STMT_VINFO_DATA_REF (stmt_info)) 180 return true; 181 182 /* STMT has a data_ref. FORNOW this means that its of one of 183 the following forms: 184 -1- ARRAY_REF = var 185 -2- var = ARRAY_REF 186 (This should have been verified in analyze_data_refs). 187 188 'var' in the second case corresponds to a def, not a use, 189 so USE cannot correspond to any operands that are not used 190 for array indexing. 191 192 Therefore, all we need to check is if STMT falls into the 193 first case, and whether var corresponds to USE. */ 194 195 if (!gimple_assign_copy_p (stmt)) 196 return false; 197 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME) 198 return false; 199 operand = gimple_assign_rhs1 (stmt); 200 if (TREE_CODE (operand) != SSA_NAME) 201 return false; 202 203 if (operand == use) 204 return true; 205 206 return false; 207 } 208 209 210 /* 211 Function process_use. 212 213 Inputs: 214 - a USE in STMT in a loop represented by LOOP_VINFO 215 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt 216 that defined USE. This is done by calling mark_relevant and passing it 217 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). 218 219 Outputs: 220 Generally, LIVE_P and RELEVANT are used to define the liveness and 221 relevance info of the DEF_STMT of this USE: 222 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p 223 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant 224 Exceptions: 225 - case 1: If USE is used only for address computations (e.g. array indexing), 226 which does not need to be directly vectorized, then the liveness/relevance 227 of the respective DEF_STMT is left unchanged. 228 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we 229 skip DEF_STMT cause it had already been processed. 230 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will 231 be modified accordingly. 232 233 Return true if everything is as expected. Return false otherwise. */ 234 235 static bool 236 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, 237 enum vect_relevant relevant, VEC(gimple,heap) **worklist) 238 { 239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 240 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 241 stmt_vec_info dstmt_vinfo; 242 basic_block bb, def_bb; 243 tree def; 244 gimple def_stmt; 245 enum vect_def_type dt; 246 247 /* case 1: we are only interested in uses that need to be vectorized. Uses 248 that are used for address computation are not considered relevant. */ 249 if (!exist_non_indexing_operands_for_use_p (use, stmt)) 250 return true; 251 252 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt)) 253 { 254 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 255 fprintf (vect_dump, "not vectorized: unsupported use in stmt."); 256 return false; 257 } 258 259 if (!def_stmt || gimple_nop_p (def_stmt)) 260 return true; 261 262 def_bb = gimple_bb (def_stmt); 263 if (!flow_bb_inside_loop_p (loop, def_bb)) 264 { 265 if (vect_print_dump_info (REPORT_DETAILS)) 266 fprintf (vect_dump, "def_stmt is out of loop."); 267 return true; 268 } 269 270 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT). 271 DEF_STMT must have already been processed, because this should be the 272 only way that STMT, which is a reduction-phi, was put in the worklist, 273 as there should be no other uses for DEF_STMT in the loop. So we just 274 check that everything is as expected, and we are done. */ 275 dstmt_vinfo = vinfo_for_stmt (def_stmt); 276 bb = gimple_bb (stmt); 277 if (gimple_code (stmt) == GIMPLE_PHI 278 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 279 && gimple_code (def_stmt) != GIMPLE_PHI 280 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def 281 && bb->loop_father == def_bb->loop_father) 282 { 283 if (vect_print_dump_info (REPORT_DETAILS)) 284 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest."); 285 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo)) 286 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo)); 287 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction); 288 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo) 289 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope); 290 return true; 291 } 292 293 /* case 3a: outer-loop stmt defining an inner-loop stmt: 294 outer-loop-header-bb: 295 d = def_stmt 296 inner-loop: 297 stmt # use (d) 298 outer-loop-tail-bb: 299 ... */ 300 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father)) 301 { 302 if (vect_print_dump_info (REPORT_DETAILS)) 303 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt."); 304 305 switch (relevant) 306 { 307 case vect_unused_in_scope: 308 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ? 309 vect_used_in_scope : vect_unused_in_scope; 310 break; 311 312 case vect_used_in_outer_by_reduction: 313 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 314 relevant = vect_used_by_reduction; 315 break; 316 317 case vect_used_in_outer: 318 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 319 relevant = vect_used_in_scope; 320 break; 321 322 case vect_used_in_scope: 323 break; 324 325 default: 326 gcc_unreachable (); 327 } 328 } 329 330 /* case 3b: inner-loop stmt defining an outer-loop stmt: 331 outer-loop-header-bb: 332 ... 333 inner-loop: 334 d = def_stmt 335 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction): 336 stmt # use (d) */ 337 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) 338 { 339 if (vect_print_dump_info (REPORT_DETAILS)) 340 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt."); 341 342 switch (relevant) 343 { 344 case vect_unused_in_scope: 345 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 346 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ? 347 vect_used_in_outer_by_reduction : vect_unused_in_scope; 348 break; 349 350 case vect_used_by_reduction: 351 relevant = vect_used_in_outer_by_reduction; 352 break; 353 354 case vect_used_in_scope: 355 relevant = vect_used_in_outer; 356 break; 357 358 default: 359 gcc_unreachable (); 360 } 361 } 362 363 vect_mark_relevant (worklist, def_stmt, relevant, live_p); 364 return true; 365 } 366 367 368 /* Function vect_mark_stmts_to_be_vectorized. 369 370 Not all stmts in the loop need to be vectorized. For example: 371 372 for i... 373 for j... 374 1. T0 = i + j 375 2. T1 = a[T0] 376 377 3. j = j + 1 378 379 Stmt 1 and 3 do not need to be vectorized, because loop control and 380 addressing of vectorized data-refs are handled differently. 381 382 This pass detects such stmts. */ 383 384 bool 385 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) 386 { 387 VEC(gimple,heap) *worklist; 388 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 389 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 390 unsigned int nbbs = loop->num_nodes; 391 gimple_stmt_iterator si; 392 gimple stmt; 393 unsigned int i; 394 stmt_vec_info stmt_vinfo; 395 basic_block bb; 396 gimple phi; 397 bool live_p; 398 enum vect_relevant relevant, tmp_relevant; 399 enum vect_def_type def_type; 400 401 if (vect_print_dump_info (REPORT_DETAILS)) 402 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ==="); 403 404 worklist = VEC_alloc (gimple, heap, 64); 405 406 /* 1. Init worklist. */ 407 for (i = 0; i < nbbs; i++) 408 { 409 bb = bbs[i]; 410 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) 411 { 412 phi = gsi_stmt (si); 413 if (vect_print_dump_info (REPORT_DETAILS)) 414 { 415 fprintf (vect_dump, "init: phi relevant? "); 416 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); 417 } 418 419 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) 420 vect_mark_relevant (&worklist, phi, relevant, live_p); 421 } 422 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 423 { 424 stmt = gsi_stmt (si); 425 if (vect_print_dump_info (REPORT_DETAILS)) 426 { 427 fprintf (vect_dump, "init: stmt relevant? "); 428 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 429 } 430 431 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) 432 vect_mark_relevant (&worklist, stmt, relevant, live_p); 433 } 434 } 435 436 /* 2. Process_worklist */ 437 while (VEC_length (gimple, worklist) > 0) 438 { 439 use_operand_p use_p; 440 ssa_op_iter iter; 441 442 stmt = VEC_pop (gimple, worklist); 443 if (vect_print_dump_info (REPORT_DETAILS)) 444 { 445 fprintf (vect_dump, "worklist: examine stmt: "); 446 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 447 } 448 449 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it 450 (DEF_STMT) as relevant/irrelevant and live/dead according to the 451 liveness and relevance properties of STMT. */ 452 stmt_vinfo = vinfo_for_stmt (stmt); 453 relevant = STMT_VINFO_RELEVANT (stmt_vinfo); 454 live_p = STMT_VINFO_LIVE_P (stmt_vinfo); 455 456 /* Generally, the liveness and relevance properties of STMT are 457 propagated as is to the DEF_STMTs of its USEs: 458 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO) 459 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO) 460 461 One exception is when STMT has been identified as defining a reduction 462 variable; in this case we set the liveness/relevance as follows: 463 live_p = false 464 relevant = vect_used_by_reduction 465 This is because we distinguish between two kinds of relevant stmts - 466 those that are used by a reduction computation, and those that are 467 (also) used by a regular computation. This allows us later on to 468 identify stmts that are used solely by a reduction, and therefore the 469 order of the results that they produce does not have to be kept. */ 470 471 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo); 472 tmp_relevant = relevant; 473 switch (def_type) 474 { 475 case vect_reduction_def: 476 switch (tmp_relevant) 477 { 478 case vect_unused_in_scope: 479 relevant = vect_used_by_reduction; 480 break; 481 482 case vect_used_by_reduction: 483 if (gimple_code (stmt) == GIMPLE_PHI) 484 break; 485 /* fall through */ 486 487 default: 488 if (vect_print_dump_info (REPORT_DETAILS)) 489 fprintf (vect_dump, "unsupported use of reduction."); 490 491 VEC_free (gimple, heap, worklist); 492 return false; 493 } 494 495 live_p = false; 496 break; 497 498 case vect_nested_cycle: 499 if (tmp_relevant != vect_unused_in_scope 500 && tmp_relevant != vect_used_in_outer_by_reduction 501 && tmp_relevant != vect_used_in_outer) 502 { 503 if (vect_print_dump_info (REPORT_DETAILS)) 504 fprintf (vect_dump, "unsupported use of nested cycle."); 505 506 VEC_free (gimple, heap, worklist); 507 return false; 508 } 509 510 live_p = false; 511 break; 512 513 case vect_double_reduction_def: 514 if (tmp_relevant != vect_unused_in_scope 515 && tmp_relevant != vect_used_by_reduction) 516 { 517 if (vect_print_dump_info (REPORT_DETAILS)) 518 fprintf (vect_dump, "unsupported use of double reduction."); 519 520 VEC_free (gimple, heap, worklist); 521 return false; 522 } 523 524 live_p = false; 525 break; 526 527 default: 528 break; 529 } 530 531 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 532 { 533 tree op = USE_FROM_PTR (use_p); 534 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist)) 535 { 536 VEC_free (gimple, heap, worklist); 537 return false; 538 } 539 } 540 } /* while worklist */ 541 542 VEC_free (gimple, heap, worklist); 543 return true; 544 } 545 546 547 int 548 cost_for_stmt (gimple stmt) 549 { 550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 551 552 switch (STMT_VINFO_TYPE (stmt_info)) 553 { 554 case load_vec_info_type: 555 return TARG_SCALAR_LOAD_COST; 556 case store_vec_info_type: 557 return TARG_SCALAR_STORE_COST; 558 case shift_vec_info_type: 559 case op_vec_info_type: 560 case condition_vec_info_type: 561 case assignment_vec_info_type: 562 case reduc_vec_info_type: 563 case induc_vec_info_type: 564 case type_promotion_vec_info_type: 565 case type_demotion_vec_info_type: 566 case type_conversion_vec_info_type: 567 case call_vec_info_type: 568 return TARG_SCALAR_STMT_COST; 569 case undef_vec_info_type: 570 default: 571 gcc_unreachable (); 572 } 573 } 574 575 /* Function vect_model_simple_cost. 576 577 Models cost for simple operations, i.e. those that only emit ncopies of a 578 single op. Right now, this does not account for multiple insns that could 579 be generated for the single vector op. We will handle that shortly. */ 580 581 void 582 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, 583 enum vect_def_type *dt, slp_tree slp_node) 584 { 585 int i; 586 int inside_cost = 0, outside_cost = 0; 587 588 /* The SLP costs were already calculated during SLP tree build. */ 589 if (PURE_SLP_STMT (stmt_info)) 590 return; 591 592 inside_cost = ncopies * TARG_VEC_STMT_COST; 593 594 /* FORNOW: Assuming maximum 2 args per stmts. */ 595 for (i = 0; i < 2; i++) 596 { 597 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 598 outside_cost += TARG_SCALAR_TO_VEC_COST; 599 } 600 601 if (vect_print_dump_info (REPORT_COST)) 602 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, " 603 "outside_cost = %d .", inside_cost, outside_cost); 604 605 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */ 606 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost); 607 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost); 608 } 609 610 611 /* Function vect_cost_strided_group_size 612 613 For strided load or store, return the group_size only if it is the first 614 load or store of a group, else return 1. This ensures that group size is 615 only returned once per group. */ 616 617 static int 618 vect_cost_strided_group_size (stmt_vec_info stmt_info) 619 { 620 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info); 621 622 if (first_stmt == STMT_VINFO_STMT (stmt_info)) 623 return DR_GROUP_SIZE (stmt_info); 624 625 return 1; 626 } 627 628 629 /* Function vect_model_store_cost 630 631 Models cost for stores. In the case of strided accesses, one access 632 has the overhead of the strided access attributed to it. */ 633 634 void 635 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, 636 enum vect_def_type dt, slp_tree slp_node) 637 { 638 int group_size; 639 int inside_cost = 0, outside_cost = 0; 640 641 /* The SLP costs were already calculated during SLP tree build. */ 642 if (PURE_SLP_STMT (stmt_info)) 643 return; 644 645 if (dt == vect_constant_def || dt == vect_external_def) 646 outside_cost = TARG_SCALAR_TO_VEC_COST; 647 648 /* Strided access? */ 649 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node) 650 group_size = vect_cost_strided_group_size (stmt_info); 651 /* Not a strided access. */ 652 else 653 group_size = 1; 654 655 /* Is this an access in a group of stores, which provide strided access? 656 If so, add in the cost of the permutes. */ 657 if (group_size > 1) 658 { 659 /* Uses a high and low interleave operation for each needed permute. */ 660 inside_cost = ncopies * exact_log2(group_size) * group_size 661 * TARG_VEC_STMT_COST; 662 663 if (vect_print_dump_info (REPORT_COST)) 664 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .", 665 group_size); 666 667 } 668 669 /* Costs of the stores. */ 670 inside_cost += ncopies * TARG_VEC_STORE_COST; 671 672 if (vect_print_dump_info (REPORT_COST)) 673 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, " 674 "outside_cost = %d .", inside_cost, outside_cost); 675 676 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */ 677 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost); 678 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost); 679 } 680 681 682 /* Function vect_model_load_cost 683 684 Models cost for loads. In the case of strided accesses, the last access 685 has the overhead of the strided access attributed to it. Since unaligned 686 accesses are supported for loads, we also account for the costs of the 687 access scheme chosen. */ 688 689 void 690 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) 691 692 { 693 int group_size; 694 int alignment_support_cheme; 695 gimple first_stmt; 696 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 697 int inside_cost = 0, outside_cost = 0; 698 699 /* The SLP costs were already calculated during SLP tree build. */ 700 if (PURE_SLP_STMT (stmt_info)) 701 return; 702 703 /* Strided accesses? */ 704 first_stmt = DR_GROUP_FIRST_DR (stmt_info); 705 if (first_stmt && !slp_node) 706 { 707 group_size = vect_cost_strided_group_size (stmt_info); 708 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 709 } 710 /* Not a strided access. */ 711 else 712 { 713 group_size = 1; 714 first_dr = dr; 715 } 716 717 alignment_support_cheme = vect_supportable_dr_alignment (first_dr); 718 719 /* Is this an access in a group of loads providing strided access? 720 If so, add in the cost of the permutes. */ 721 if (group_size > 1) 722 { 723 /* Uses an even and odd extract operations for each needed permute. */ 724 inside_cost = ncopies * exact_log2(group_size) * group_size 725 * TARG_VEC_STMT_COST; 726 727 if (vect_print_dump_info (REPORT_COST)) 728 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .", 729 group_size); 730 731 } 732 733 /* The loads themselves. */ 734 switch (alignment_support_cheme) 735 { 736 case dr_aligned: 737 { 738 inside_cost += ncopies * TARG_VEC_LOAD_COST; 739 740 if (vect_print_dump_info (REPORT_COST)) 741 fprintf (vect_dump, "vect_model_load_cost: aligned."); 742 743 break; 744 } 745 case dr_unaligned_supported: 746 { 747 /* Here, we assign an additional cost for the unaligned load. */ 748 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST; 749 750 if (vect_print_dump_info (REPORT_COST)) 751 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by " 752 "hardware."); 753 754 break; 755 } 756 case dr_explicit_realign: 757 { 758 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST); 759 760 /* FIXME: If the misalignment remains fixed across the iterations of 761 the containing loop, the following cost should be added to the 762 outside costs. */ 763 if (targetm.vectorize.builtin_mask_for_load) 764 inside_cost += TARG_VEC_STMT_COST; 765 766 break; 767 } 768 case dr_explicit_realign_optimized: 769 { 770 if (vect_print_dump_info (REPORT_COST)) 771 fprintf (vect_dump, "vect_model_load_cost: unaligned software " 772 "pipelined."); 773 774 /* Unaligned software pipeline has a load of an address, an initial 775 load, and possibly a mask operation to "prime" the loop. However, 776 if this is an access in a group of loads, which provide strided 777 access, then the above cost should only be considered for one 778 access in the group. Inside the loop, there is a load op 779 and a realignment op. */ 780 781 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node) 782 { 783 outside_cost = 2*TARG_VEC_STMT_COST; 784 if (targetm.vectorize.builtin_mask_for_load) 785 outside_cost += TARG_VEC_STMT_COST; 786 } 787 788 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST); 789 790 break; 791 } 792 793 default: 794 gcc_unreachable (); 795 } 796 797 if (vect_print_dump_info (REPORT_COST)) 798 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, " 799 "outside_cost = %d .", inside_cost, outside_cost); 800 801 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */ 802 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost); 803 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost); 804 } 805 806 807 /* Function vect_init_vector. 808 809 Insert a new stmt (INIT_STMT) that initializes a new vector variable with 810 the vector elements of VECTOR_VAR. Place the initialization at BSI if it 811 is not NULL. Otherwise, place the initialization at the loop preheader. 812 Return the DEF of INIT_STMT. 813 It will be used in the vectorization of STMT. */ 814 815 tree 816 vect_init_vector (gimple stmt, tree vector_var, tree vector_type, 817 gimple_stmt_iterator *gsi) 818 { 819 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 820 tree new_var; 821 gimple init_stmt; 822 tree vec_oprnd; 823 edge pe; 824 tree new_temp; 825 basic_block new_bb; 826 827 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_"); 828 add_referenced_var (new_var); 829 init_stmt = gimple_build_assign (new_var, vector_var); 830 new_temp = make_ssa_name (new_var, init_stmt); 831 gimple_assign_set_lhs (init_stmt, new_temp); 832 833 if (gsi) 834 vect_finish_stmt_generation (stmt, init_stmt, gsi); 835 else 836 { 837 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 838 839 if (loop_vinfo) 840 { 841 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 842 843 if (nested_in_vect_loop_p (loop, stmt)) 844 loop = loop->inner; 845 846 pe = loop_preheader_edge (loop); 847 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt); 848 gcc_assert (!new_bb); 849 } 850 else 851 { 852 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 853 basic_block bb; 854 gimple_stmt_iterator gsi_bb_start; 855 856 gcc_assert (bb_vinfo); 857 bb = BB_VINFO_BB (bb_vinfo); 858 gsi_bb_start = gsi_after_labels (bb); 859 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT); 860 } 861 } 862 863 if (vect_print_dump_info (REPORT_DETAILS)) 864 { 865 fprintf (vect_dump, "created new init_stmt: "); 866 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM); 867 } 868 869 vec_oprnd = gimple_assign_lhs (init_stmt); 870 return vec_oprnd; 871 } 872 873 874 /* Function vect_get_vec_def_for_operand. 875 876 OP is an operand in STMT. This function returns a (vector) def that will be 877 used in the vectorized stmt for STMT. 878 879 In the case that OP is an SSA_NAME which is defined in the loop, then 880 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 881 882 In case OP is an invariant or constant, a new stmt that creates a vector def 883 needs to be introduced. */ 884 885 tree 886 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def) 887 { 888 tree vec_oprnd; 889 gimple vec_stmt; 890 gimple def_stmt; 891 stmt_vec_info def_stmt_info = NULL; 892 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 893 unsigned int nunits; 894 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 895 tree vec_inv; 896 tree vec_cst; 897 tree t = NULL_TREE; 898 tree def; 899 int i; 900 enum vect_def_type dt; 901 bool is_simple_use; 902 tree vector_type; 903 904 if (vect_print_dump_info (REPORT_DETAILS)) 905 { 906 fprintf (vect_dump, "vect_get_vec_def_for_operand: "); 907 print_generic_expr (vect_dump, op, TDF_SLIM); 908 } 909 910 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, 911 &dt); 912 gcc_assert (is_simple_use); 913 if (vect_print_dump_info (REPORT_DETAILS)) 914 { 915 if (def) 916 { 917 fprintf (vect_dump, "def = "); 918 print_generic_expr (vect_dump, def, TDF_SLIM); 919 } 920 if (def_stmt) 921 { 922 fprintf (vect_dump, " def_stmt = "); 923 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM); 924 } 925 } 926 927 switch (dt) 928 { 929 /* Case 1: operand is a constant. */ 930 case vect_constant_def: 931 { 932 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 933 gcc_assert (vector_type); 934 nunits = TYPE_VECTOR_SUBPARTS (vector_type); 935 936 if (scalar_def) 937 *scalar_def = op; 938 939 /* Create 'vect_cst_ = {cst,cst,...,cst}' */ 940 if (vect_print_dump_info (REPORT_DETAILS)) 941 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); 942 943 for (i = nunits - 1; i >= 0; --i) 944 { 945 t = tree_cons (NULL_TREE, op, t); 946 } 947 vec_cst = build_vector (vector_type, t); 948 return vect_init_vector (stmt, vec_cst, vector_type, NULL); 949 } 950 951 /* Case 2: operand is defined outside the loop - loop invariant. */ 952 case vect_external_def: 953 { 954 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def)); 955 gcc_assert (vector_type); 956 nunits = TYPE_VECTOR_SUBPARTS (vector_type); 957 958 if (scalar_def) 959 *scalar_def = def; 960 961 /* Create 'vec_inv = {inv,inv,..,inv}' */ 962 if (vect_print_dump_info (REPORT_DETAILS)) 963 fprintf (vect_dump, "Create vector_inv."); 964 965 for (i = nunits - 1; i >= 0; --i) 966 { 967 t = tree_cons (NULL_TREE, def, t); 968 } 969 970 /* FIXME: use build_constructor directly. */ 971 vec_inv = build_constructor_from_list (vector_type, t); 972 return vect_init_vector (stmt, vec_inv, vector_type, NULL); 973 } 974 975 /* Case 3: operand is defined inside the loop. */ 976 case vect_internal_def: 977 { 978 if (scalar_def) 979 *scalar_def = NULL/* FIXME tuples: def_stmt*/; 980 981 /* Get the def from the vectorized stmt. */ 982 def_stmt_info = vinfo_for_stmt (def_stmt); 983 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 984 gcc_assert (vec_stmt); 985 if (gimple_code (vec_stmt) == GIMPLE_PHI) 986 vec_oprnd = PHI_RESULT (vec_stmt); 987 else if (is_gimple_call (vec_stmt)) 988 vec_oprnd = gimple_call_lhs (vec_stmt); 989 else 990 vec_oprnd = gimple_assign_lhs (vec_stmt); 991 return vec_oprnd; 992 } 993 994 /* Case 4: operand is defined by a loop header phi - reduction */ 995 case vect_reduction_def: 996 case vect_double_reduction_def: 997 case vect_nested_cycle: 998 { 999 struct loop *loop; 1000 1001 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1002 loop = (gimple_bb (def_stmt))->loop_father; 1003 1004 /* Get the def before the loop */ 1005 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); 1006 return get_initial_def_for_reduction (stmt, op, scalar_def); 1007 } 1008 1009 /* Case 5: operand is defined by loop-header phi - induction. */ 1010 case vect_induction_def: 1011 { 1012 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1013 1014 /* Get the def from the vectorized stmt. */ 1015 def_stmt_info = vinfo_for_stmt (def_stmt); 1016 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1017 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1018 vec_oprnd = PHI_RESULT (vec_stmt); 1019 else 1020 vec_oprnd = gimple_get_lhs (vec_stmt); 1021 return vec_oprnd; 1022 } 1023 1024 default: 1025 gcc_unreachable (); 1026 } 1027 } 1028 1029 1030 /* Function vect_get_vec_def_for_stmt_copy 1031 1032 Return a vector-def for an operand. This function is used when the 1033 vectorized stmt to be created (by the caller to this function) is a "copy" 1034 created in case the vectorized result cannot fit in one vector, and several 1035 copies of the vector-stmt are required. In this case the vector-def is 1036 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field 1037 of the stmt that defines VEC_OPRND. 1038 DT is the type of the vector def VEC_OPRND. 1039 1040 Context: 1041 In case the vectorization factor (VF) is bigger than the number 1042 of elements that can fit in a vectype (nunits), we have to generate 1043 more than one vector stmt to vectorize the scalar stmt. This situation 1044 arises when there are multiple data-types operated upon in the loop; the 1045 smallest data-type determines the VF, and as a result, when vectorizing 1046 stmts operating on wider types we need to create 'VF/nunits' "copies" of the 1047 vector stmt (each computing a vector of 'nunits' results, and together 1048 computing 'VF' results in each iteration). This function is called when 1049 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in 1050 which VF=16 and nunits=4, so the number of copies required is 4): 1051 1052 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT 1053 1054 S1: x = load VS1.0: vx.0 = memref0 VS1.1 1055 VS1.1: vx.1 = memref1 VS1.2 1056 VS1.2: vx.2 = memref2 VS1.3 1057 VS1.3: vx.3 = memref3 1058 1059 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1 1060 VSnew.1: vz1 = vx.1 + ... VSnew.2 1061 VSnew.2: vz2 = vx.2 + ... VSnew.3 1062 VSnew.3: vz3 = vx.3 + ... 1063 1064 The vectorization of S1 is explained in vectorizable_load. 1065 The vectorization of S2: 1066 To create the first vector-stmt out of the 4 copies - VSnew.0 - 1067 the function 'vect_get_vec_def_for_operand' is called to 1068 get the relevant vector-def for each operand of S2. For operand x it 1069 returns the vector-def 'vx.0'. 1070 1071 To create the remaining copies of the vector-stmt (VSnew.j), this 1072 function is called to get the relevant vector-def for each operand. It is 1073 obtained from the respective VS1.j stmt, which is recorded in the 1074 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. 1075 1076 For example, to obtain the vector-def 'vx.1' in order to create the 1077 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. 1078 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the 1079 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1', 1080 and return its def ('vx.1'). 1081 Overall, to create the above sequence this function will be called 3 times: 1082 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0); 1083 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1); 1084 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */ 1085 1086 tree 1087 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) 1088 { 1089 gimple vec_stmt_for_operand; 1090 stmt_vec_info def_stmt_info; 1091 1092 /* Do nothing; can reuse same def. */ 1093 if (dt == vect_external_def || dt == vect_constant_def ) 1094 return vec_oprnd; 1095 1096 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd); 1097 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand); 1098 gcc_assert (def_stmt_info); 1099 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); 1100 gcc_assert (vec_stmt_for_operand); 1101 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1102 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI) 1103 vec_oprnd = PHI_RESULT (vec_stmt_for_operand); 1104 else 1105 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1106 return vec_oprnd; 1107 } 1108 1109 1110 /* Get vectorized definitions for the operands to create a copy of an original 1111 stmt. See vect_get_vec_def_for_stmt_copy() for details. */ 1112 1113 static void 1114 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, 1115 VEC(tree,heap) **vec_oprnds0, 1116 VEC(tree,heap) **vec_oprnds1) 1117 { 1118 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0); 1119 1120 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd); 1121 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd); 1122 1123 if (vec_oprnds1 && *vec_oprnds1) 1124 { 1125 vec_oprnd = VEC_pop (tree, *vec_oprnds1); 1126 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd); 1127 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd); 1128 } 1129 } 1130 1131 1132 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */ 1133 1134 static void 1135 vect_get_vec_defs (tree op0, tree op1, gimple stmt, 1136 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1, 1137 slp_tree slp_node) 1138 { 1139 if (slp_node) 1140 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1); 1141 else 1142 { 1143 tree vec_oprnd; 1144 1145 *vec_oprnds0 = VEC_alloc (tree, heap, 1); 1146 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL); 1147 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd); 1148 1149 if (op1) 1150 { 1151 *vec_oprnds1 = VEC_alloc (tree, heap, 1); 1152 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL); 1153 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd); 1154 } 1155 } 1156 } 1157 1158 1159 /* Function vect_finish_stmt_generation. 1160 1161 Insert a new stmt. */ 1162 1163 void 1164 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt, 1165 gimple_stmt_iterator *gsi) 1166 { 1167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1168 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1169 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1170 1171 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL); 1172 1173 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); 1174 1175 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo, 1176 bb_vinfo)); 1177 1178 if (vect_print_dump_info (REPORT_DETAILS)) 1179 { 1180 fprintf (vect_dump, "add new stmt: "); 1181 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM); 1182 } 1183 1184 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi))); 1185 } 1186 1187 /* Checks if CALL can be vectorized in type VECTYPE. Returns 1188 a function declaration if the target has a vectorized version 1189 of the function, or NULL_TREE if the function cannot be vectorized. */ 1190 1191 tree 1192 vectorizable_function (gimple call, tree vectype_out, tree vectype_in) 1193 { 1194 tree fndecl = gimple_call_fndecl (call); 1195 1196 /* We only handle functions that do not read or clobber memory -- i.e. 1197 const or novops ones. */ 1198 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS))) 1199 return NULL_TREE; 1200 1201 if (!fndecl 1202 || TREE_CODE (fndecl) != FUNCTION_DECL 1203 || !DECL_BUILT_IN (fndecl)) 1204 return NULL_TREE; 1205 1206 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out, 1207 vectype_in); 1208 } 1209 1210 /* Function vectorizable_call. 1211 1212 Check if STMT performs a function call that can be vectorized. 1213 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1214 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1215 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1216 1217 static bool 1218 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) 1219 { 1220 tree vec_dest; 1221 tree scalar_dest; 1222 tree op, type; 1223 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 1224 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; 1225 tree vectype_out, vectype_in; 1226 int nunits_in; 1227 int nunits_out; 1228 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1229 tree fndecl, new_temp, def, rhs_type, lhs_type; 1230 gimple def_stmt; 1231 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 1232 gimple new_stmt = NULL; 1233 int ncopies, j; 1234 VEC(tree, heap) *vargs = NULL; 1235 enum { NARROW, NONE, WIDEN } modifier; 1236 size_t i, nargs; 1237 1238 /* FORNOW: unsupported in basic block SLP. */ 1239 gcc_assert (loop_vinfo); 1240 1241 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1242 return false; 1243 1244 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 1245 return false; 1246 1247 /* FORNOW: SLP not supported. */ 1248 if (STMT_SLP_TYPE (stmt_info)) 1249 return false; 1250 1251 /* Is STMT a vectorizable call? */ 1252 if (!is_gimple_call (stmt)) 1253 return false; 1254 1255 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 1256 return false; 1257 1258 /* Process function arguments. */ 1259 rhs_type = NULL_TREE; 1260 nargs = gimple_call_num_args (stmt); 1261 1262 /* Bail out if the function has more than two arguments, we 1263 do not have interesting builtin functions to vectorize with 1264 more than two arguments. No arguments is also not good. */ 1265 if (nargs == 0 || nargs > 2) 1266 return false; 1267 1268 for (i = 0; i < nargs; i++) 1269 { 1270 op = gimple_call_arg (stmt, i); 1271 1272 /* We can only handle calls with arguments of the same type. */ 1273 if (rhs_type 1274 && !types_compatible_p (rhs_type, TREE_TYPE (op))) 1275 { 1276 if (vect_print_dump_info (REPORT_DETAILS)) 1277 fprintf (vect_dump, "argument types differ."); 1278 return false; 1279 } 1280 rhs_type = TREE_TYPE (op); 1281 1282 if (!vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt[i])) 1283 { 1284 if (vect_print_dump_info (REPORT_DETAILS)) 1285 fprintf (vect_dump, "use not simple."); 1286 return false; 1287 } 1288 } 1289 1290 vectype_in = get_vectype_for_scalar_type (rhs_type); 1291 if (!vectype_in) 1292 return false; 1293 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 1294 1295 lhs_type = TREE_TYPE (gimple_call_lhs (stmt)); 1296 vectype_out = get_vectype_for_scalar_type (lhs_type); 1297 if (!vectype_out) 1298 return false; 1299 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 1300 1301 /* FORNOW */ 1302 if (nunits_in == nunits_out / 2) 1303 modifier = NARROW; 1304 else if (nunits_out == nunits_in) 1305 modifier = NONE; 1306 else if (nunits_out == nunits_in / 2) 1307 modifier = WIDEN; 1308 else 1309 return false; 1310 1311 /* For now, we only vectorize functions if a target specific builtin 1312 is available. TODO -- in some cases, it might be profitable to 1313 insert the calls for pieces of the vector, in order to be able 1314 to vectorize other operations in the loop. */ 1315 fndecl = vectorizable_function (stmt, vectype_out, vectype_in); 1316 if (fndecl == NULL_TREE) 1317 { 1318 if (vect_print_dump_info (REPORT_DETAILS)) 1319 fprintf (vect_dump, "function is not vectorizable."); 1320 1321 return false; 1322 } 1323 1324 gcc_assert (!gimple_vuse (stmt)); 1325 1326 if (modifier == NARROW) 1327 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 1328 else 1329 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 1330 1331 /* Sanity check: make sure that at least one copy of the vectorized stmt 1332 needs to be generated. */ 1333 gcc_assert (ncopies >= 1); 1334 1335 if (!vec_stmt) /* transformation not required. */ 1336 { 1337 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 1338 if (vect_print_dump_info (REPORT_DETAILS)) 1339 fprintf (vect_dump, "=== vectorizable_call ==="); 1340 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 1341 return true; 1342 } 1343 1344 /** Transform. **/ 1345 1346 if (vect_print_dump_info (REPORT_DETAILS)) 1347 fprintf (vect_dump, "transform operation."); 1348 1349 /* Handle def. */ 1350 scalar_dest = gimple_call_lhs (stmt); 1351 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 1352 1353 prev_stmt_info = NULL; 1354 switch (modifier) 1355 { 1356 case NONE: 1357 for (j = 0; j < ncopies; ++j) 1358 { 1359 /* Build argument list for the vectorized call. */ 1360 if (j == 0) 1361 vargs = VEC_alloc (tree, heap, nargs); 1362 else 1363 VEC_truncate (tree, vargs, 0); 1364 1365 for (i = 0; i < nargs; i++) 1366 { 1367 op = gimple_call_arg (stmt, i); 1368 if (j == 0) 1369 vec_oprnd0 1370 = vect_get_vec_def_for_operand (op, stmt, NULL); 1371 else 1372 { 1373 vec_oprnd0 = gimple_call_arg (new_stmt, i); 1374 vec_oprnd0 1375 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 1376 } 1377 1378 VEC_quick_push (tree, vargs, vec_oprnd0); 1379 } 1380 1381 new_stmt = gimple_build_call_vec (fndecl, vargs); 1382 new_temp = make_ssa_name (vec_dest, new_stmt); 1383 gimple_call_set_lhs (new_stmt, new_temp); 1384 1385 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1386 mark_symbols_for_renaming (new_stmt); 1387 1388 if (j == 0) 1389 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 1390 else 1391 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1392 1393 prev_stmt_info = vinfo_for_stmt (new_stmt); 1394 } 1395 1396 break; 1397 1398 case NARROW: 1399 for (j = 0; j < ncopies; ++j) 1400 { 1401 /* Build argument list for the vectorized call. */ 1402 if (j == 0) 1403 vargs = VEC_alloc (tree, heap, nargs * 2); 1404 else 1405 VEC_truncate (tree, vargs, 0); 1406 1407 for (i = 0; i < nargs; i++) 1408 { 1409 op = gimple_call_arg (stmt, i); 1410 if (j == 0) 1411 { 1412 vec_oprnd0 1413 = vect_get_vec_def_for_operand (op, stmt, NULL); 1414 vec_oprnd1 1415 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 1416 } 1417 else 1418 { 1419 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1); 1420 vec_oprnd0 1421 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1); 1422 vec_oprnd1 1423 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 1424 } 1425 1426 VEC_quick_push (tree, vargs, vec_oprnd0); 1427 VEC_quick_push (tree, vargs, vec_oprnd1); 1428 } 1429 1430 new_stmt = gimple_build_call_vec (fndecl, vargs); 1431 new_temp = make_ssa_name (vec_dest, new_stmt); 1432 gimple_call_set_lhs (new_stmt, new_temp); 1433 1434 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1435 mark_symbols_for_renaming (new_stmt); 1436 1437 if (j == 0) 1438 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 1439 else 1440 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1441 1442 prev_stmt_info = vinfo_for_stmt (new_stmt); 1443 } 1444 1445 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 1446 1447 break; 1448 1449 case WIDEN: 1450 /* No current target implements this case. */ 1451 return false; 1452 } 1453 1454 VEC_free (tree, heap, vargs); 1455 1456 /* Update the exception handling table with the vector stmt if necessary. */ 1457 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt)) 1458 gimple_purge_dead_eh_edges (gimple_bb (stmt)); 1459 1460 /* The call in STMT might prevent it from being removed in dce. 1461 We however cannot remove it here, due to the way the ssa name 1462 it defines is mapped to the new definition. So just replace 1463 rhs of the statement with something harmless. */ 1464 1465 type = TREE_TYPE (scalar_dest); 1466 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), 1467 fold_convert (type, integer_zero_node)); 1468 set_vinfo_for_stmt (new_stmt, stmt_info); 1469 set_vinfo_for_stmt (stmt, NULL); 1470 STMT_VINFO_STMT (stmt_info) = new_stmt; 1471 gsi_replace (gsi, new_stmt, false); 1472 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; 1473 1474 return true; 1475 } 1476 1477 1478 /* Function vect_gen_widened_results_half 1479 1480 Create a vector stmt whose code, type, number of arguments, and result 1481 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are 1482 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. 1483 In the case that CODE is a CALL_EXPR, this means that a call to DECL 1484 needs to be created (DECL is a function-decl of a target-builtin). 1485 STMT is the original scalar stmt that we are vectorizing. */ 1486 1487 static gimple 1488 vect_gen_widened_results_half (enum tree_code code, 1489 tree decl, 1490 tree vec_oprnd0, tree vec_oprnd1, int op_type, 1491 tree vec_dest, gimple_stmt_iterator *gsi, 1492 gimple stmt) 1493 { 1494 gimple new_stmt; 1495 tree new_temp; 1496 1497 /* Generate half of the widened result: */ 1498 if (code == CALL_EXPR) 1499 { 1500 /* Target specific support */ 1501 if (op_type == binary_op) 1502 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1); 1503 else 1504 new_stmt = gimple_build_call (decl, 1, vec_oprnd0); 1505 new_temp = make_ssa_name (vec_dest, new_stmt); 1506 gimple_call_set_lhs (new_stmt, new_temp); 1507 } 1508 else 1509 { 1510 /* Generic support */ 1511 gcc_assert (op_type == TREE_CODE_LENGTH (code)); 1512 if (op_type != binary_op) 1513 vec_oprnd1 = NULL; 1514 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0, 1515 vec_oprnd1); 1516 new_temp = make_ssa_name (vec_dest, new_stmt); 1517 gimple_assign_set_lhs (new_stmt, new_temp); 1518 } 1519 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1520 1521 return new_stmt; 1522 } 1523 1524 1525 /* Check if STMT performs a conversion operation, that can be vectorized. 1526 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1527 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1528 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1529 1530 static bool 1531 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, 1532 gimple *vec_stmt, slp_tree slp_node) 1533 { 1534 tree vec_dest; 1535 tree scalar_dest; 1536 tree op0; 1537 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 1538 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1539 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1540 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; 1541 tree decl1 = NULL_TREE, decl2 = NULL_TREE; 1542 tree new_temp; 1543 tree def; 1544 gimple def_stmt; 1545 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 1546 gimple new_stmt = NULL; 1547 stmt_vec_info prev_stmt_info; 1548 int nunits_in; 1549 int nunits_out; 1550 tree vectype_out, vectype_in; 1551 int ncopies, j; 1552 tree rhs_type, lhs_type; 1553 tree builtin_decl; 1554 enum { NARROW, NONE, WIDEN } modifier; 1555 int i; 1556 VEC(tree,heap) *vec_oprnds0 = NULL; 1557 tree vop0; 1558 tree integral_type; 1559 VEC(tree,heap) *dummy = NULL; 1560 int dummy_int; 1561 1562 /* Is STMT a vectorizable conversion? */ 1563 1564 /* FORNOW: unsupported in basic block SLP. */ 1565 gcc_assert (loop_vinfo); 1566 1567 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1568 return false; 1569 1570 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 1571 return false; 1572 1573 if (!is_gimple_assign (stmt)) 1574 return false; 1575 1576 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 1577 return false; 1578 1579 code = gimple_assign_rhs_code (stmt); 1580 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 1581 return false; 1582 1583 /* Check types of lhs and rhs. */ 1584 op0 = gimple_assign_rhs1 (stmt); 1585 rhs_type = TREE_TYPE (op0); 1586 vectype_in = get_vectype_for_scalar_type (rhs_type); 1587 if (!vectype_in) 1588 return false; 1589 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 1590 1591 scalar_dest = gimple_assign_lhs (stmt); 1592 lhs_type = TREE_TYPE (scalar_dest); 1593 vectype_out = get_vectype_for_scalar_type (lhs_type); 1594 if (!vectype_out) 1595 return false; 1596 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 1597 1598 /* FORNOW */ 1599 if (nunits_in == nunits_out / 2) 1600 modifier = NARROW; 1601 else if (nunits_out == nunits_in) 1602 modifier = NONE; 1603 else if (nunits_out == nunits_in / 2) 1604 modifier = WIDEN; 1605 else 1606 return false; 1607 1608 if (modifier == NONE) 1609 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out); 1610 1611 /* Bail out if the types are both integral or non-integral. */ 1612 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type)) 1613 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type))) 1614 return false; 1615 1616 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out; 1617 1618 if (modifier == NARROW) 1619 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 1620 else 1621 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 1622 1623 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies 1624 this, so we can safely override NCOPIES with 1 here. */ 1625 if (slp_node) 1626 ncopies = 1; 1627 1628 /* Sanity check: make sure that at least one copy of the vectorized stmt 1629 needs to be generated. */ 1630 gcc_assert (ncopies >= 1); 1631 1632 /* Check the operands of the operation. */ 1633 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0])) 1634 { 1635 if (vect_print_dump_info (REPORT_DETAILS)) 1636 fprintf (vect_dump, "use not simple."); 1637 return false; 1638 } 1639 1640 /* Supportable by target? */ 1641 if ((modifier == NONE 1642 && !targetm.vectorize.builtin_conversion (code, integral_type)) 1643 || (modifier == WIDEN 1644 && !supportable_widening_operation (code, stmt, vectype_in, 1645 &decl1, &decl2, 1646 &code1, &code2, 1647 &dummy_int, &dummy)) 1648 || (modifier == NARROW 1649 && !supportable_narrowing_operation (code, stmt, vectype_in, 1650 &code1, &dummy_int, &dummy))) 1651 { 1652 if (vect_print_dump_info (REPORT_DETAILS)) 1653 fprintf (vect_dump, "conversion not supported by target."); 1654 return false; 1655 } 1656 1657 if (modifier != NONE) 1658 { 1659 STMT_VINFO_VECTYPE (stmt_info) = vectype_in; 1660 /* FORNOW: SLP not supported. */ 1661 if (STMT_SLP_TYPE (stmt_info)) 1662 return false; 1663 } 1664 1665 if (!vec_stmt) /* transformation not required. */ 1666 { 1667 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; 1668 return true; 1669 } 1670 1671 /** Transform. **/ 1672 if (vect_print_dump_info (REPORT_DETAILS)) 1673 fprintf (vect_dump, "transform conversion."); 1674 1675 /* Handle def. */ 1676 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 1677 1678 if (modifier == NONE && !slp_node) 1679 vec_oprnds0 = VEC_alloc (tree, heap, 1); 1680 1681 prev_stmt_info = NULL; 1682 switch (modifier) 1683 { 1684 case NONE: 1685 for (j = 0; j < ncopies; j++) 1686 { 1687 if (j == 0) 1688 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); 1689 else 1690 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); 1691 1692 builtin_decl = 1693 targetm.vectorize.builtin_conversion (code, integral_type); 1694 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++) 1695 { 1696 /* Arguments are ready. create the new vector stmt. */ 1697 new_stmt = gimple_build_call (builtin_decl, 1, vop0); 1698 new_temp = make_ssa_name (vec_dest, new_stmt); 1699 gimple_call_set_lhs (new_stmt, new_temp); 1700 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1701 if (slp_node) 1702 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 1703 } 1704 1705 if (j == 0) 1706 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 1707 else 1708 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1709 prev_stmt_info = vinfo_for_stmt (new_stmt); 1710 } 1711 break; 1712 1713 case WIDEN: 1714 /* In case the vectorization factor (VF) is bigger than the number 1715 of elements that we can fit in a vectype (nunits), we have to 1716 generate more than one vector stmt - i.e - we need to "unroll" 1717 the vector stmt by a factor VF/nunits. */ 1718 for (j = 0; j < ncopies; j++) 1719 { 1720 if (j == 0) 1721 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 1722 else 1723 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 1724 1725 STMT_VINFO_VECTYPE (stmt_info) = vectype_in; 1726 1727 /* Generate first half of the widened result: */ 1728 new_stmt 1729 = vect_gen_widened_results_half (code1, decl1, 1730 vec_oprnd0, vec_oprnd1, 1731 unary_op, vec_dest, gsi, stmt); 1732 if (j == 0) 1733 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 1734 else 1735 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1736 prev_stmt_info = vinfo_for_stmt (new_stmt); 1737 1738 /* Generate second half of the widened result: */ 1739 new_stmt 1740 = vect_gen_widened_results_half (code2, decl2, 1741 vec_oprnd0, vec_oprnd1, 1742 unary_op, vec_dest, gsi, stmt); 1743 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1744 prev_stmt_info = vinfo_for_stmt (new_stmt); 1745 } 1746 break; 1747 1748 case NARROW: 1749 /* In case the vectorization factor (VF) is bigger than the number 1750 of elements that we can fit in a vectype (nunits), we have to 1751 generate more than one vector stmt - i.e - we need to "unroll" 1752 the vector stmt by a factor VF/nunits. */ 1753 for (j = 0; j < ncopies; j++) 1754 { 1755 /* Handle uses. */ 1756 if (j == 0) 1757 { 1758 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 1759 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 1760 } 1761 else 1762 { 1763 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1); 1764 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 1765 } 1766 1767 /* Arguments are ready. Create the new vector stmt. */ 1768 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0, 1769 vec_oprnd1); 1770 new_temp = make_ssa_name (vec_dest, new_stmt); 1771 gimple_assign_set_lhs (new_stmt, new_temp); 1772 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1773 1774 if (j == 0) 1775 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 1776 else 1777 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1778 1779 prev_stmt_info = vinfo_for_stmt (new_stmt); 1780 } 1781 1782 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 1783 } 1784 1785 if (vec_oprnds0) 1786 VEC_free (tree, heap, vec_oprnds0); 1787 1788 return true; 1789 } 1790 /* Function vectorizable_assignment. 1791 1792 Check if STMT performs an assignment (copy) that can be vectorized. 1793 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1794 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1795 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1796 1797 static bool 1798 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi, 1799 gimple *vec_stmt, slp_tree slp_node) 1800 { 1801 tree vec_dest; 1802 tree scalar_dest; 1803 tree op; 1804 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1805 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1806 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1807 tree new_temp; 1808 tree def; 1809 gimple def_stmt; 1810 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 1811 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); 1812 int ncopies; 1813 int i, j; 1814 VEC(tree,heap) *vec_oprnds = NULL; 1815 tree vop; 1816 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1817 gimple new_stmt = NULL; 1818 stmt_vec_info prev_stmt_info = NULL; 1819 enum tree_code code; 1820 tree vectype_in, vectype_out; 1821 1822 /* Multiple types in SLP are handled by creating the appropriate number of 1823 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 1824 case of SLP. */ 1825 if (slp_node) 1826 ncopies = 1; 1827 else 1828 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 1829 1830 gcc_assert (ncopies >= 1); 1831 1832 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 1833 return false; 1834 1835 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 1836 return false; 1837 1838 /* Is vectorizable assignment? */ 1839 if (!is_gimple_assign (stmt)) 1840 return false; 1841 1842 scalar_dest = gimple_assign_lhs (stmt); 1843 if (TREE_CODE (scalar_dest) != SSA_NAME) 1844 return false; 1845 1846 code = gimple_assign_rhs_code (stmt); 1847 if (gimple_assign_single_p (stmt) 1848 || code == PAREN_EXPR 1849 || CONVERT_EXPR_CODE_P (code)) 1850 op = gimple_assign_rhs1 (stmt); 1851 else 1852 return false; 1853 1854 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0])) 1855 { 1856 if (vect_print_dump_info (REPORT_DETAILS)) 1857 fprintf (vect_dump, "use not simple."); 1858 return false; 1859 } 1860 1861 /* We can handle NOP_EXPR conversions that do not change the number 1862 of elements or the vector size. */ 1863 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); 1864 vectype_out 1865 = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt))); 1866 if (CONVERT_EXPR_CODE_P (code) 1867 && (!vectype_in 1868 || !vectype_out 1869 || (TYPE_VECTOR_SUBPARTS (vectype_out) 1870 != TYPE_VECTOR_SUBPARTS (vectype_in)) 1871 || (GET_MODE_SIZE (TYPE_MODE (vectype_out)) 1872 != GET_MODE_SIZE (TYPE_MODE (vectype_in))))) 1873 return false; 1874 1875 if (!vec_stmt) /* transformation not required. */ 1876 { 1877 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 1878 if (vect_print_dump_info (REPORT_DETAILS)) 1879 fprintf (vect_dump, "=== vectorizable_assignment ==="); 1880 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 1881 return true; 1882 } 1883 1884 /** Transform. **/ 1885 if (vect_print_dump_info (REPORT_DETAILS)) 1886 fprintf (vect_dump, "transform assignment."); 1887 1888 /* Handle def. */ 1889 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1890 1891 /* Handle use. */ 1892 for (j = 0; j < ncopies; j++) 1893 { 1894 /* Handle uses. */ 1895 if (j == 0) 1896 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); 1897 else 1898 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); 1899 1900 /* Arguments are ready. create the new vector stmt. */ 1901 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++) 1902 { 1903 if (CONVERT_EXPR_CODE_P (code)) 1904 vop = build1 (VIEW_CONVERT_EXPR, vectype_out, vop); 1905 new_stmt = gimple_build_assign (vec_dest, vop); 1906 new_temp = make_ssa_name (vec_dest, new_stmt); 1907 gimple_assign_set_lhs (new_stmt, new_temp); 1908 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1909 if (slp_node) 1910 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 1911 } 1912 1913 if (slp_node) 1914 continue; 1915 1916 if (j == 0) 1917 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 1918 else 1919 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 1920 1921 prev_stmt_info = vinfo_for_stmt (new_stmt); 1922 } 1923 1924 VEC_free (tree, heap, vec_oprnds); 1925 return true; 1926 } 1927 1928 1929 /* Function vectorizable_shift. 1930 1931 Check if STMT performs a shift operation that can be vectorized. 1932 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1933 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1934 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1935 1936 static bool 1937 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, 1938 gimple *vec_stmt, slp_tree slp_node) 1939 { 1940 tree vec_dest; 1941 tree scalar_dest; 1942 tree op0, op1 = NULL; 1943 tree vec_oprnd1 = NULL_TREE; 1944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1945 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1946 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1947 enum tree_code code; 1948 enum machine_mode vec_mode; 1949 tree new_temp; 1950 int op_type; 1951 optab optab; 1952 int icode; 1953 enum machine_mode optab_op2_mode; 1954 tree def; 1955 gimple def_stmt; 1956 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 1957 gimple new_stmt = NULL; 1958 stmt_vec_info prev_stmt_info; 1959 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 1960 int nunits_out; 1961 tree vectype_out; 1962 int ncopies; 1963 int j, i; 1964 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; 1965 tree vop0, vop1; 1966 unsigned int k; 1967 bool scalar_shift_arg = false; 1968 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1969 int vf; 1970 1971 if (loop_vinfo) 1972 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1973 else 1974 vf = 1; 1975 1976 /* Multiple types in SLP are handled by creating the appropriate number of 1977 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 1978 case of SLP. */ 1979 if (slp_node) 1980 ncopies = 1; 1981 else 1982 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 1983 1984 gcc_assert (ncopies >= 1); 1985 1986 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 1987 return false; 1988 1989 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 1990 return false; 1991 1992 /* Is STMT a vectorizable shift? */ 1993 if (!is_gimple_assign (stmt)) 1994 return false; 1995 1996 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 1997 return false; 1998 1999 scalar_dest = gimple_assign_lhs (stmt); 2000 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest)); 2001 if (!vectype_out) 2002 return false; 2003 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2004 if (nunits_out != nunits_in) 2005 return false; 2006 2007 code = gimple_assign_rhs_code (stmt); 2008 2009 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 2010 || code == RROTATE_EXPR)) 2011 return false; 2012 2013 op_type = TREE_CODE_LENGTH (code); 2014 op0 = gimple_assign_rhs1 (stmt); 2015 if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0])) 2016 { 2017 if (vect_print_dump_info (REPORT_DETAILS)) 2018 fprintf (vect_dump, "use not simple."); 2019 return false; 2020 } 2021 2022 op1 = gimple_assign_rhs2 (stmt); 2023 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1])) 2024 { 2025 if (vect_print_dump_info (REPORT_DETAILS)) 2026 fprintf (vect_dump, "use not simple."); 2027 return false; 2028 } 2029 2030 /* Determine whether the shift amount is a vector, or scalar. If the 2031 shift/rotate amount is a vector, use the vector/vector shift optabs. */ 2032 /* vector shifted by vector */ 2033 if (dt[1] == vect_internal_def) 2034 { 2035 optab = optab_for_tree_code (code, vectype, optab_vector); 2036 if (vect_print_dump_info (REPORT_DETAILS)) 2037 fprintf (vect_dump, "vector/vector shift/rotate found."); 2038 } 2039 2040 /* See if the machine has a vector shifted by scalar insn and if not 2041 then see if it has a vector shifted by vector insn */ 2042 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def) 2043 { 2044 optab = optab_for_tree_code (code, vectype, optab_scalar); 2045 if (optab 2046 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code 2047 != CODE_FOR_nothing)) 2048 { 2049 scalar_shift_arg = true; 2050 if (vect_print_dump_info (REPORT_DETAILS)) 2051 fprintf (vect_dump, "vector/scalar shift/rotate found."); 2052 } 2053 else 2054 { 2055 optab = optab_for_tree_code (code, vectype, optab_vector); 2056 if (optab 2057 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code 2058 != CODE_FOR_nothing)) 2059 { 2060 if (vect_print_dump_info (REPORT_DETAILS)) 2061 fprintf (vect_dump, "vector/vector shift/rotate found."); 2062 2063 /* Unlike the other binary operators, shifts/rotates have 2064 the rhs being int, instead of the same type as the lhs, 2065 so make sure the scalar is the right type if we are 2066 dealing with vectors of short/char. */ 2067 if (dt[1] == vect_constant_def) 2068 op1 = fold_convert (TREE_TYPE (vectype), op1); 2069 } 2070 } 2071 } 2072 2073 else 2074 { 2075 if (vect_print_dump_info (REPORT_DETAILS)) 2076 fprintf (vect_dump, "operand mode requires invariant argument."); 2077 return false; 2078 } 2079 2080 /* Supportable by target? */ 2081 if (!optab) 2082 { 2083 if (vect_print_dump_info (REPORT_DETAILS)) 2084 fprintf (vect_dump, "no optab."); 2085 return false; 2086 } 2087 vec_mode = TYPE_MODE (vectype); 2088 icode = (int) optab_handler (optab, vec_mode)->insn_code; 2089 if (icode == CODE_FOR_nothing) 2090 { 2091 if (vect_print_dump_info (REPORT_DETAILS)) 2092 fprintf (vect_dump, "op not supported by target."); 2093 /* Check only during analysis. */ 2094 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 2095 || (vf < vect_min_worthwhile_factor (code) 2096 && !vec_stmt)) 2097 return false; 2098 if (vect_print_dump_info (REPORT_DETAILS)) 2099 fprintf (vect_dump, "proceeding using word mode."); 2100 } 2101 2102 /* Worthwhile without SIMD support? Check only during analysis. */ 2103 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 2104 && vf < vect_min_worthwhile_factor (code) 2105 && !vec_stmt) 2106 { 2107 if (vect_print_dump_info (REPORT_DETAILS)) 2108 fprintf (vect_dump, "not worthwhile without SIMD support."); 2109 return false; 2110 } 2111 2112 if (!vec_stmt) /* transformation not required. */ 2113 { 2114 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; 2115 if (vect_print_dump_info (REPORT_DETAILS)) 2116 fprintf (vect_dump, "=== vectorizable_shift ==="); 2117 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 2118 return true; 2119 } 2120 2121 /** Transform. **/ 2122 2123 if (vect_print_dump_info (REPORT_DETAILS)) 2124 fprintf (vect_dump, "transform shift."); 2125 2126 /* Handle def. */ 2127 vec_dest = vect_create_destination_var (scalar_dest, vectype); 2128 2129 /* Allocate VECs for vector operands. In case of SLP, vector operands are 2130 created in the previous stages of the recursion, so no allocation is 2131 needed, except for the case of shift with scalar shift argument. In that 2132 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to 2133 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE. 2134 In case of loop-based vectorization we allocate VECs of size 1. We 2135 allocate VEC_OPRNDS1 only in case of binary operation. */ 2136 if (!slp_node) 2137 { 2138 vec_oprnds0 = VEC_alloc (tree, heap, 1); 2139 vec_oprnds1 = VEC_alloc (tree, heap, 1); 2140 } 2141 else if (scalar_shift_arg) 2142 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); 2143 2144 prev_stmt_info = NULL; 2145 for (j = 0; j < ncopies; j++) 2146 { 2147 /* Handle uses. */ 2148 if (j == 0) 2149 { 2150 if (scalar_shift_arg) 2151 { 2152 /* Vector shl and shr insn patterns can be defined with scalar 2153 operand 2 (shift operand). In this case, use constant or loop 2154 invariant op1 directly, without extending it to vector mode 2155 first. */ 2156 optab_op2_mode = insn_data[icode].operand[2].mode; 2157 if (!VECTOR_MODE_P (optab_op2_mode)) 2158 { 2159 if (vect_print_dump_info (REPORT_DETAILS)) 2160 fprintf (vect_dump, "operand 1 using scalar mode."); 2161 vec_oprnd1 = op1; 2162 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); 2163 if (slp_node) 2164 { 2165 /* Store vec_oprnd1 for every vector stmt to be created 2166 for SLP_NODE. We check during the analysis that all the 2167 shift arguments are the same. 2168 TODO: Allow different constants for different vector 2169 stmts generated for an SLP instance. */ 2170 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 2171 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); 2172 } 2173 } 2174 } 2175 2176 /* vec_oprnd1 is available if operand 1 should be of a scalar-type 2177 (a special case for certain kind of vector shifts); otherwise, 2178 operand 1 should be of a vector type (the usual case). */ 2179 if (vec_oprnd1) 2180 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 2181 slp_node); 2182 else 2183 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 2184 slp_node); 2185 } 2186 else 2187 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 2188 2189 /* Arguments are ready. Create the new vector stmt. */ 2190 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++) 2191 { 2192 vop1 = VEC_index (tree, vec_oprnds1, i); 2193 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); 2194 new_temp = make_ssa_name (vec_dest, new_stmt); 2195 gimple_assign_set_lhs (new_stmt, new_temp); 2196 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2197 if (slp_node) 2198 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 2199 } 2200 2201 if (slp_node) 2202 continue; 2203 2204 if (j == 0) 2205 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2206 else 2207 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2208 prev_stmt_info = vinfo_for_stmt (new_stmt); 2209 } 2210 2211 VEC_free (tree, heap, vec_oprnds0); 2212 VEC_free (tree, heap, vec_oprnds1); 2213 2214 return true; 2215 } 2216 2217 2218 /* Function vectorizable_operation. 2219 2220 Check if STMT performs a binary or unary operation that can be vectorized. 2221 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2222 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2223 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2224 2225 static bool 2226 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, 2227 gimple *vec_stmt, slp_tree slp_node) 2228 { 2229 tree vec_dest; 2230 tree scalar_dest; 2231 tree op0, op1 = NULL; 2232 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2233 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2234 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2235 enum tree_code code; 2236 enum machine_mode vec_mode; 2237 tree new_temp; 2238 int op_type; 2239 optab optab; 2240 int icode; 2241 tree def; 2242 gimple def_stmt; 2243 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 2244 gimple new_stmt = NULL; 2245 stmt_vec_info prev_stmt_info; 2246 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 2247 int nunits_out; 2248 tree vectype_out; 2249 int ncopies; 2250 int j, i; 2251 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; 2252 tree vop0, vop1; 2253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2254 int vf; 2255 2256 if (loop_vinfo) 2257 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2258 else 2259 vf = 1; 2260 2261 /* Multiple types in SLP are handled by creating the appropriate number of 2262 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2263 case of SLP. */ 2264 if (slp_node) 2265 ncopies = 1; 2266 else 2267 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 2268 2269 gcc_assert (ncopies >= 1); 2270 2271 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 2272 return false; 2273 2274 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2275 return false; 2276 2277 /* Is STMT a vectorizable binary/unary operation? */ 2278 if (!is_gimple_assign (stmt)) 2279 return false; 2280 2281 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 2282 return false; 2283 2284 scalar_dest = gimple_assign_lhs (stmt); 2285 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest)); 2286 if (!vectype_out) 2287 return false; 2288 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2289 if (nunits_out != nunits_in) 2290 return false; 2291 2292 code = gimple_assign_rhs_code (stmt); 2293 2294 /* For pointer addition, we should use the normal plus for 2295 the vector addition. */ 2296 if (code == POINTER_PLUS_EXPR) 2297 code = PLUS_EXPR; 2298 2299 /* Support only unary or binary operations. */ 2300 op_type = TREE_CODE_LENGTH (code); 2301 if (op_type != unary_op && op_type != binary_op) 2302 { 2303 if (vect_print_dump_info (REPORT_DETAILS)) 2304 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type); 2305 return false; 2306 } 2307 2308 op0 = gimple_assign_rhs1 (stmt); 2309 if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0])) 2310 { 2311 if (vect_print_dump_info (REPORT_DETAILS)) 2312 fprintf (vect_dump, "use not simple."); 2313 return false; 2314 } 2315 2316 if (op_type == binary_op) 2317 { 2318 op1 = gimple_assign_rhs2 (stmt); 2319 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, 2320 &dt[1])) 2321 { 2322 if (vect_print_dump_info (REPORT_DETAILS)) 2323 fprintf (vect_dump, "use not simple."); 2324 return false; 2325 } 2326 } 2327 2328 /* Shifts are handled in vectorizable_shift (). */ 2329 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 2330 || code == RROTATE_EXPR) 2331 return false; 2332 2333 optab = optab_for_tree_code (code, vectype, optab_default); 2334 2335 /* Supportable by target? */ 2336 if (!optab) 2337 { 2338 if (vect_print_dump_info (REPORT_DETAILS)) 2339 fprintf (vect_dump, "no optab."); 2340 return false; 2341 } 2342 vec_mode = TYPE_MODE (vectype); 2343 icode = (int) optab_handler (optab, vec_mode)->insn_code; 2344 if (icode == CODE_FOR_nothing) 2345 { 2346 if (vect_print_dump_info (REPORT_DETAILS)) 2347 fprintf (vect_dump, "op not supported by target."); 2348 /* Check only during analysis. */ 2349 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 2350 || (vf < vect_min_worthwhile_factor (code) 2351 && !vec_stmt)) 2352 return false; 2353 if (vect_print_dump_info (REPORT_DETAILS)) 2354 fprintf (vect_dump, "proceeding using word mode."); 2355 } 2356 2357 /* Worthwhile without SIMD support? Check only during analysis. */ 2358 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 2359 && vf < vect_min_worthwhile_factor (code) 2360 && !vec_stmt) 2361 { 2362 if (vect_print_dump_info (REPORT_DETAILS)) 2363 fprintf (vect_dump, "not worthwhile without SIMD support."); 2364 return false; 2365 } 2366 2367 if (!vec_stmt) /* transformation not required. */ 2368 { 2369 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 2370 if (vect_print_dump_info (REPORT_DETAILS)) 2371 fprintf (vect_dump, "=== vectorizable_operation ==="); 2372 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 2373 return true; 2374 } 2375 2376 /** Transform. **/ 2377 2378 if (vect_print_dump_info (REPORT_DETAILS)) 2379 fprintf (vect_dump, "transform binary/unary operation."); 2380 2381 /* Handle def. */ 2382 vec_dest = vect_create_destination_var (scalar_dest, vectype); 2383 2384 /* Allocate VECs for vector operands. In case of SLP, vector operands are 2385 created in the previous stages of the recursion, so no allocation is 2386 needed, except for the case of shift with scalar shift argument. In that 2387 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to 2388 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE. 2389 In case of loop-based vectorization we allocate VECs of size 1. We 2390 allocate VEC_OPRNDS1 only in case of binary operation. */ 2391 if (!slp_node) 2392 { 2393 vec_oprnds0 = VEC_alloc (tree, heap, 1); 2394 if (op_type == binary_op) 2395 vec_oprnds1 = VEC_alloc (tree, heap, 1); 2396 } 2397 2398 /* In case the vectorization factor (VF) is bigger than the number 2399 of elements that we can fit in a vectype (nunits), we have to generate 2400 more than one vector stmt - i.e - we need to "unroll" the 2401 vector stmt by a factor VF/nunits. In doing so, we record a pointer 2402 from one copy of the vector stmt to the next, in the field 2403 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 2404 stages to find the correct vector defs to be used when vectorizing 2405 stmts that use the defs of the current stmt. The example below illustrates 2406 the vectorization process when VF=16 and nunits=4 (i.e - we need to create 2407 4 vectorized stmts): 2408 2409 before vectorization: 2410 RELATED_STMT VEC_STMT 2411 S1: x = memref - - 2412 S2: z = x + 1 - - 2413 2414 step 1: vectorize stmt S1 (done in vectorizable_load. See more details 2415 there): 2416 RELATED_STMT VEC_STMT 2417 VS1_0: vx0 = memref0 VS1_1 - 2418 VS1_1: vx1 = memref1 VS1_2 - 2419 VS1_2: vx2 = memref2 VS1_3 - 2420 VS1_3: vx3 = memref3 - - 2421 S1: x = load - VS1_0 2422 S2: z = x + 1 - - 2423 2424 step2: vectorize stmt S2 (done here): 2425 To vectorize stmt S2 we first need to find the relevant vector 2426 def for the first operand 'x'. This is, as usual, obtained from 2427 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt 2428 that defines 'x' (S1). This way we find the stmt VS1_0, and the 2429 relevant vector def 'vx0'. Having found 'vx0' we can generate 2430 the vector stmt VS2_0, and as usual, record it in the 2431 STMT_VINFO_VEC_STMT of stmt S2. 2432 When creating the second copy (VS2_1), we obtain the relevant vector 2433 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of 2434 stmt VS1_0. This way we find the stmt VS1_1 and the relevant 2435 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a 2436 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. 2437 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting 2438 chain of stmts and pointers: 2439 RELATED_STMT VEC_STMT 2440 VS1_0: vx0 = memref0 VS1_1 - 2441 VS1_1: vx1 = memref1 VS1_2 - 2442 VS1_2: vx2 = memref2 VS1_3 - 2443 VS1_3: vx3 = memref3 - - 2444 S1: x = load - VS1_0 2445 VS2_0: vz0 = vx0 + v1 VS2_1 - 2446 VS2_1: vz1 = vx1 + v1 VS2_2 - 2447 VS2_2: vz2 = vx2 + v1 VS2_3 - 2448 VS2_3: vz3 = vx3 + v1 - - 2449 S2: z = x + 1 - VS2_0 */ 2450 2451 prev_stmt_info = NULL; 2452 for (j = 0; j < ncopies; j++) 2453 { 2454 /* Handle uses. */ 2455 if (j == 0) 2456 { 2457 if (op_type == binary_op) 2458 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 2459 slp_node); 2460 else 2461 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 2462 slp_node); 2463 } 2464 else 2465 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 2466 2467 /* Arguments are ready. Create the new vector stmt. */ 2468 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++) 2469 { 2470 vop1 = ((op_type == binary_op) 2471 ? VEC_index (tree, vec_oprnds1, i) : NULL); 2472 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); 2473 new_temp = make_ssa_name (vec_dest, new_stmt); 2474 gimple_assign_set_lhs (new_stmt, new_temp); 2475 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2476 if (slp_node) 2477 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 2478 } 2479 2480 if (slp_node) 2481 continue; 2482 2483 if (j == 0) 2484 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2485 else 2486 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2487 prev_stmt_info = vinfo_for_stmt (new_stmt); 2488 } 2489 2490 VEC_free (tree, heap, vec_oprnds0); 2491 if (vec_oprnds1) 2492 VEC_free (tree, heap, vec_oprnds1); 2493 2494 return true; 2495 } 2496 2497 2498 /* Get vectorized definitions for loop-based vectorization. For the first 2499 operand we call vect_get_vec_def_for_operand() (with OPRND containing 2500 scalar operand), and for the rest we get a copy with 2501 vect_get_vec_def_for_stmt_copy() using the previous vector definition 2502 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details. 2503 The vectors are collected into VEC_OPRNDS. */ 2504 2505 static void 2506 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt, 2507 VEC (tree, heap) **vec_oprnds, int multi_step_cvt) 2508 { 2509 tree vec_oprnd; 2510 2511 /* Get first vector operand. */ 2512 /* All the vector operands except the very first one (that is scalar oprnd) 2513 are stmt copies. */ 2514 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE) 2515 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL); 2516 else 2517 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd); 2518 2519 VEC_quick_push (tree, *vec_oprnds, vec_oprnd); 2520 2521 /* Get second vector operand. */ 2522 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd); 2523 VEC_quick_push (tree, *vec_oprnds, vec_oprnd); 2524 2525 *oprnd = vec_oprnd; 2526 2527 /* For conversion in multiple steps, continue to get operands 2528 recursively. */ 2529 if (multi_step_cvt) 2530 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1); 2531 } 2532 2533 2534 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS. 2535 For multi-step conversions store the resulting vectors and call the function 2536 recursively. */ 2537 2538 static void 2539 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds, 2540 int multi_step_cvt, gimple stmt, 2541 VEC (tree, heap) *vec_dsts, 2542 gimple_stmt_iterator *gsi, 2543 slp_tree slp_node, enum tree_code code, 2544 stmt_vec_info *prev_stmt_info) 2545 { 2546 unsigned int i; 2547 tree vop0, vop1, new_tmp, vec_dest; 2548 gimple new_stmt; 2549 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2550 2551 vec_dest = VEC_pop (tree, vec_dsts); 2552 2553 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2) 2554 { 2555 /* Create demotion operation. */ 2556 vop0 = VEC_index (tree, *vec_oprnds, i); 2557 vop1 = VEC_index (tree, *vec_oprnds, i + 1); 2558 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); 2559 new_tmp = make_ssa_name (vec_dest, new_stmt); 2560 gimple_assign_set_lhs (new_stmt, new_tmp); 2561 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2562 2563 if (multi_step_cvt) 2564 /* Store the resulting vector for next recursive call. */ 2565 VEC_replace (tree, *vec_oprnds, i/2, new_tmp); 2566 else 2567 { 2568 /* This is the last step of the conversion sequence. Store the 2569 vectors in SLP_NODE or in vector info of the scalar statement 2570 (or in STMT_VINFO_RELATED_STMT chain). */ 2571 if (slp_node) 2572 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 2573 else 2574 { 2575 if (!*prev_stmt_info) 2576 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 2577 else 2578 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt; 2579 2580 *prev_stmt_info = vinfo_for_stmt (new_stmt); 2581 } 2582 } 2583 } 2584 2585 /* For multi-step demotion operations we first generate demotion operations 2586 from the source type to the intermediate types, and then combine the 2587 results (stored in VEC_OPRNDS) in demotion operation to the destination 2588 type. */ 2589 if (multi_step_cvt) 2590 { 2591 /* At each level of recursion we have have of the operands we had at the 2592 previous level. */ 2593 VEC_truncate (tree, *vec_oprnds, (i+1)/2); 2594 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1, 2595 stmt, vec_dsts, gsi, slp_node, 2596 code, prev_stmt_info); 2597 } 2598 } 2599 2600 2601 /* Function vectorizable_type_demotion 2602 2603 Check if STMT performs a binary or unary operation that involves 2604 type demotion, and if it can be vectorized. 2605 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2606 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2607 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2608 2609 static bool 2610 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, 2611 gimple *vec_stmt, slp_tree slp_node) 2612 { 2613 tree vec_dest; 2614 tree scalar_dest; 2615 tree op0; 2616 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2617 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2618 enum tree_code code, code1 = ERROR_MARK; 2619 tree def; 2620 gimple def_stmt; 2621 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 2622 stmt_vec_info prev_stmt_info; 2623 int nunits_in; 2624 int nunits_out; 2625 tree vectype_out; 2626 int ncopies; 2627 int j, i; 2628 tree vectype_in; 2629 int multi_step_cvt = 0; 2630 VEC (tree, heap) *vec_oprnds0 = NULL; 2631 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; 2632 tree last_oprnd, intermediate_type; 2633 2634 /* FORNOW: not supported by basic block SLP vectorization. */ 2635 gcc_assert (loop_vinfo); 2636 2637 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 2638 return false; 2639 2640 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2641 return false; 2642 2643 /* Is STMT a vectorizable type-demotion operation? */ 2644 if (!is_gimple_assign (stmt)) 2645 return false; 2646 2647 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 2648 return false; 2649 2650 code = gimple_assign_rhs_code (stmt); 2651 if (!CONVERT_EXPR_CODE_P (code)) 2652 return false; 2653 2654 op0 = gimple_assign_rhs1 (stmt); 2655 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0)); 2656 if (!vectype_in) 2657 return false; 2658 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 2659 2660 scalar_dest = gimple_assign_lhs (stmt); 2661 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest)); 2662 if (!vectype_out) 2663 return false; 2664 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2665 if (nunits_in >= nunits_out) 2666 return false; 2667 2668 /* Multiple types in SLP are handled by creating the appropriate number of 2669 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2670 case of SLP. */ 2671 if (slp_node) 2672 ncopies = 1; 2673 else 2674 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; 2675 gcc_assert (ncopies >= 1); 2676 2677 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) 2678 && INTEGRAL_TYPE_P (TREE_TYPE (op0))) 2679 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest)) 2680 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) 2681 && CONVERT_EXPR_CODE_P (code)))) 2682 return false; 2683 2684 /* Check the operands of the operation. */ 2685 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0])) 2686 { 2687 if (vect_print_dump_info (REPORT_DETAILS)) 2688 fprintf (vect_dump, "use not simple."); 2689 return false; 2690 } 2691 2692 /* Supportable by target? */ 2693 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1, 2694 &multi_step_cvt, &interm_types)) 2695 return false; 2696 2697 STMT_VINFO_VECTYPE (stmt_info) = vectype_in; 2698 2699 if (!vec_stmt) /* transformation not required. */ 2700 { 2701 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; 2702 if (vect_print_dump_info (REPORT_DETAILS)) 2703 fprintf (vect_dump, "=== vectorizable_demotion ==="); 2704 vect_model_simple_cost (stmt_info, ncopies, dt, NULL); 2705 return true; 2706 } 2707 2708 /** Transform. **/ 2709 if (vect_print_dump_info (REPORT_DETAILS)) 2710 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.", 2711 ncopies); 2712 2713 /* In case of multi-step demotion, we first generate demotion operations to 2714 the intermediate types, and then from that types to the final one. 2715 We create vector destinations for the intermediate type (TYPES) received 2716 from supportable_narrowing_operation, and store them in the correct order 2717 for future use in vect_create_vectorized_demotion_stmts(). */ 2718 if (multi_step_cvt) 2719 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1); 2720 else 2721 vec_dsts = VEC_alloc (tree, heap, 1); 2722 2723 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 2724 VEC_quick_push (tree, vec_dsts, vec_dest); 2725 2726 if (multi_step_cvt) 2727 { 2728 for (i = VEC_length (tree, interm_types) - 1; 2729 VEC_iterate (tree, interm_types, i, intermediate_type); i--) 2730 { 2731 vec_dest = vect_create_destination_var (scalar_dest, 2732 intermediate_type); 2733 VEC_quick_push (tree, vec_dsts, vec_dest); 2734 } 2735 } 2736 2737 /* In case the vectorization factor (VF) is bigger than the number 2738 of elements that we can fit in a vectype (nunits), we have to generate 2739 more than one vector stmt - i.e - we need to "unroll" the 2740 vector stmt by a factor VF/nunits. */ 2741 last_oprnd = op0; 2742 prev_stmt_info = NULL; 2743 for (j = 0; j < ncopies; j++) 2744 { 2745 /* Handle uses. */ 2746 if (slp_node) 2747 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL); 2748 else 2749 { 2750 VEC_free (tree, heap, vec_oprnds0); 2751 vec_oprnds0 = VEC_alloc (tree, heap, 2752 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2)); 2753 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0, 2754 vect_pow2 (multi_step_cvt) - 1); 2755 } 2756 2757 /* Arguments are ready. Create the new vector stmts. */ 2758 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts); 2759 vect_create_vectorized_demotion_stmts (&vec_oprnds0, 2760 multi_step_cvt, stmt, tmp_vec_dsts, 2761 gsi, slp_node, code1, 2762 &prev_stmt_info); 2763 } 2764 2765 VEC_free (tree, heap, vec_oprnds0); 2766 VEC_free (tree, heap, vec_dsts); 2767 VEC_free (tree, heap, tmp_vec_dsts); 2768 VEC_free (tree, heap, interm_types); 2769 2770 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 2771 return true; 2772 } 2773 2774 2775 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 2776 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store 2777 the resulting vectors and call the function recursively. */ 2778 2779 static void 2780 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0, 2781 VEC (tree, heap) **vec_oprnds1, 2782 int multi_step_cvt, gimple stmt, 2783 VEC (tree, heap) *vec_dsts, 2784 gimple_stmt_iterator *gsi, 2785 slp_tree slp_node, enum tree_code code1, 2786 enum tree_code code2, tree decl1, 2787 tree decl2, int op_type, 2788 stmt_vec_info *prev_stmt_info) 2789 { 2790 int i; 2791 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest; 2792 gimple new_stmt1, new_stmt2; 2793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2794 VEC (tree, heap) *vec_tmp; 2795 2796 vec_dest = VEC_pop (tree, vec_dsts); 2797 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2); 2798 2799 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++) 2800 { 2801 if (op_type == binary_op) 2802 vop1 = VEC_index (tree, *vec_oprnds1, i); 2803 else 2804 vop1 = NULL_TREE; 2805 2806 /* Generate the two halves of promotion operation. */ 2807 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1, 2808 op_type, vec_dest, gsi, stmt); 2809 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1, 2810 op_type, vec_dest, gsi, stmt); 2811 if (is_gimple_call (new_stmt1)) 2812 { 2813 new_tmp1 = gimple_call_lhs (new_stmt1); 2814 new_tmp2 = gimple_call_lhs (new_stmt2); 2815 } 2816 else 2817 { 2818 new_tmp1 = gimple_assign_lhs (new_stmt1); 2819 new_tmp2 = gimple_assign_lhs (new_stmt2); 2820 } 2821 2822 if (multi_step_cvt) 2823 { 2824 /* Store the results for the recursive call. */ 2825 VEC_quick_push (tree, vec_tmp, new_tmp1); 2826 VEC_quick_push (tree, vec_tmp, new_tmp2); 2827 } 2828 else 2829 { 2830 /* Last step of promotion sequience - store the results. */ 2831 if (slp_node) 2832 { 2833 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1); 2834 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2); 2835 } 2836 else 2837 { 2838 if (!*prev_stmt_info) 2839 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1; 2840 else 2841 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1; 2842 2843 *prev_stmt_info = vinfo_for_stmt (new_stmt1); 2844 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2; 2845 *prev_stmt_info = vinfo_for_stmt (new_stmt2); 2846 } 2847 } 2848 } 2849 2850 if (multi_step_cvt) 2851 { 2852 /* For multi-step promotion operation we first generate we call the 2853 function recurcively for every stage. We start from the input type, 2854 create promotion operations to the intermediate types, and then 2855 create promotions to the output type. */ 2856 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp); 2857 VEC_free (tree, heap, vec_tmp); 2858 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1, 2859 multi_step_cvt - 1, stmt, 2860 vec_dsts, gsi, slp_node, code1, 2861 code2, decl2, decl2, op_type, 2862 prev_stmt_info); 2863 } 2864 } 2865 2866 2867 /* Function vectorizable_type_promotion 2868 2869 Check if STMT performs a binary or unary operation that involves 2870 type promotion, and if it can be vectorized. 2871 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2872 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2873 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2874 2875 static bool 2876 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, 2877 gimple *vec_stmt, slp_tree slp_node) 2878 { 2879 tree vec_dest; 2880 tree scalar_dest; 2881 tree op0, op1 = NULL; 2882 tree vec_oprnd0=NULL, vec_oprnd1=NULL; 2883 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2884 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2885 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; 2886 tree decl1 = NULL_TREE, decl2 = NULL_TREE; 2887 int op_type; 2888 tree def; 2889 gimple def_stmt; 2890 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 2891 stmt_vec_info prev_stmt_info; 2892 int nunits_in; 2893 int nunits_out; 2894 tree vectype_out; 2895 int ncopies; 2896 int j, i; 2897 tree vectype_in; 2898 tree intermediate_type = NULL_TREE; 2899 int multi_step_cvt = 0; 2900 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; 2901 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; 2902 2903 /* FORNOW: not supported by basic block SLP vectorization. */ 2904 gcc_assert (loop_vinfo); 2905 2906 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 2907 return false; 2908 2909 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 2910 return false; 2911 2912 /* Is STMT a vectorizable type-promotion operation? */ 2913 if (!is_gimple_assign (stmt)) 2914 return false; 2915 2916 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 2917 return false; 2918 2919 code = gimple_assign_rhs_code (stmt); 2920 if (!CONVERT_EXPR_CODE_P (code) 2921 && code != WIDEN_MULT_EXPR) 2922 return false; 2923 2924 op0 = gimple_assign_rhs1 (stmt); 2925 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0)); 2926 if (!vectype_in) 2927 return false; 2928 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 2929 2930 scalar_dest = gimple_assign_lhs (stmt); 2931 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest)); 2932 if (!vectype_out) 2933 return false; 2934 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 2935 if (nunits_in <= nunits_out) 2936 return false; 2937 2938 /* Multiple types in SLP are handled by creating the appropriate number of 2939 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2940 case of SLP. */ 2941 if (slp_node) 2942 ncopies = 1; 2943 else 2944 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; 2945 2946 gcc_assert (ncopies >= 1); 2947 2948 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) 2949 && INTEGRAL_TYPE_P (TREE_TYPE (op0))) 2950 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest)) 2951 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) 2952 && CONVERT_EXPR_CODE_P (code)))) 2953 return false; 2954 2955 /* Check the operands of the operation. */ 2956 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0])) 2957 { 2958 if (vect_print_dump_info (REPORT_DETAILS)) 2959 fprintf (vect_dump, "use not simple."); 2960 return false; 2961 } 2962 2963 op_type = TREE_CODE_LENGTH (code); 2964 if (op_type == binary_op) 2965 { 2966 op1 = gimple_assign_rhs2 (stmt); 2967 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1])) 2968 { 2969 if (vect_print_dump_info (REPORT_DETAILS)) 2970 fprintf (vect_dump, "use not simple."); 2971 return false; 2972 } 2973 } 2974 2975 /* Supportable by target? */ 2976 if (!supportable_widening_operation (code, stmt, vectype_in, 2977 &decl1, &decl2, &code1, &code2, 2978 &multi_step_cvt, &interm_types)) 2979 return false; 2980 2981 /* Binary widening operation can only be supported directly by the 2982 architecture. */ 2983 gcc_assert (!(multi_step_cvt && op_type == binary_op)); 2984 2985 STMT_VINFO_VECTYPE (stmt_info) = vectype_in; 2986 2987 if (!vec_stmt) /* transformation not required. */ 2988 { 2989 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; 2990 if (vect_print_dump_info (REPORT_DETAILS)) 2991 fprintf (vect_dump, "=== vectorizable_promotion ==="); 2992 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL); 2993 return true; 2994 } 2995 2996 /** Transform. **/ 2997 2998 if (vect_print_dump_info (REPORT_DETAILS)) 2999 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", 3000 ncopies); 3001 3002 /* Handle def. */ 3003 /* In case of multi-step promotion, we first generate promotion operations 3004 to the intermediate types, and then from that types to the final one. 3005 We store vector destination in VEC_DSTS in the correct order for 3006 recursive creation of promotion operations in 3007 vect_create_vectorized_promotion_stmts(). Vector destinations are created 3008 according to TYPES recieved from supportable_widening_operation(). */ 3009 if (multi_step_cvt) 3010 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1); 3011 else 3012 vec_dsts = VEC_alloc (tree, heap, 1); 3013 3014 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 3015 VEC_quick_push (tree, vec_dsts, vec_dest); 3016 3017 if (multi_step_cvt) 3018 { 3019 for (i = VEC_length (tree, interm_types) - 1; 3020 VEC_iterate (tree, interm_types, i, intermediate_type); i--) 3021 { 3022 vec_dest = vect_create_destination_var (scalar_dest, 3023 intermediate_type); 3024 VEC_quick_push (tree, vec_dsts, vec_dest); 3025 } 3026 } 3027 3028 if (!slp_node) 3029 { 3030 vec_oprnds0 = VEC_alloc (tree, heap, 3031 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1)); 3032 if (op_type == binary_op) 3033 vec_oprnds1 = VEC_alloc (tree, heap, 1); 3034 } 3035 3036 /* In case the vectorization factor (VF) is bigger than the number 3037 of elements that we can fit in a vectype (nunits), we have to generate 3038 more than one vector stmt - i.e - we need to "unroll" the 3039 vector stmt by a factor VF/nunits. */ 3040 3041 prev_stmt_info = NULL; 3042 for (j = 0; j < ncopies; j++) 3043 { 3044 /* Handle uses. */ 3045 if (j == 0) 3046 { 3047 if (slp_node) 3048 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1); 3049 else 3050 { 3051 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 3052 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); 3053 if (op_type == binary_op) 3054 { 3055 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); 3056 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); 3057 } 3058 } 3059 } 3060 else 3061 { 3062 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 3063 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0); 3064 if (op_type == binary_op) 3065 { 3066 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); 3067 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1); 3068 } 3069 } 3070 3071 /* Arguments are ready. Create the new vector stmts. */ 3072 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts); 3073 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1, 3074 multi_step_cvt, stmt, 3075 tmp_vec_dsts, 3076 gsi, slp_node, code1, code2, 3077 decl1, decl2, op_type, 3078 &prev_stmt_info); 3079 } 3080 3081 VEC_free (tree, heap, vec_dsts); 3082 VEC_free (tree, heap, tmp_vec_dsts); 3083 VEC_free (tree, heap, interm_types); 3084 VEC_free (tree, heap, vec_oprnds0); 3085 VEC_free (tree, heap, vec_oprnds1); 3086 3087 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 3088 return true; 3089 } 3090 3091 3092 /* Function vectorizable_store. 3093 3094 Check if STMT defines a non scalar data-ref (array/pointer/structure) that 3095 can be vectorized. 3096 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3097 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 3098 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3099 3100 static bool 3101 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 3102 slp_tree slp_node) 3103 { 3104 tree scalar_dest; 3105 tree data_ref; 3106 tree op; 3107 tree vec_oprnd = NULL_TREE; 3108 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3109 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; 3110 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3111 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3112 struct loop *loop = NULL; 3113 enum machine_mode vec_mode; 3114 tree dummy; 3115 enum dr_alignment_support alignment_support_scheme; 3116 tree def; 3117 gimple def_stmt; 3118 enum vect_def_type dt; 3119 stmt_vec_info prev_stmt_info = NULL; 3120 tree dataref_ptr = NULL_TREE; 3121 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 3122 int ncopies; 3123 int j; 3124 gimple next_stmt, first_stmt = NULL; 3125 bool strided_store = false; 3126 unsigned int group_size, i; 3127 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; 3128 bool inv_p; 3129 VEC(tree,heap) *vec_oprnds = NULL; 3130 bool slp = (slp_node != NULL); 3131 unsigned int vec_num; 3132 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3133 3134 if (loop_vinfo) 3135 loop = LOOP_VINFO_LOOP (loop_vinfo); 3136 3137 /* Multiple types in SLP are handled by creating the appropriate number of 3138 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3139 case of SLP. */ 3140 if (slp) 3141 ncopies = 1; 3142 else 3143 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 3144 3145 gcc_assert (ncopies >= 1); 3146 3147 /* FORNOW. This restriction should be relaxed. */ 3148 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1) 3149 { 3150 if (vect_print_dump_info (REPORT_DETAILS)) 3151 fprintf (vect_dump, "multiple types in nested loop."); 3152 return false; 3153 } 3154 3155 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3156 return false; 3157 3158 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 3159 return false; 3160 3161 /* Is vectorizable store? */ 3162 3163 if (!is_gimple_assign (stmt)) 3164 return false; 3165 3166 scalar_dest = gimple_assign_lhs (stmt); 3167 if (TREE_CODE (scalar_dest) != ARRAY_REF 3168 && TREE_CODE (scalar_dest) != INDIRECT_REF 3169 && TREE_CODE (scalar_dest) != COMPONENT_REF 3170 && TREE_CODE (scalar_dest) != IMAGPART_EXPR 3171 && TREE_CODE (scalar_dest) != REALPART_EXPR) 3172 return false; 3173 3174 gcc_assert (gimple_assign_single_p (stmt)); 3175 op = gimple_assign_rhs1 (stmt); 3176 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt)) 3177 { 3178 if (vect_print_dump_info (REPORT_DETAILS)) 3179 fprintf (vect_dump, "use not simple."); 3180 return false; 3181 } 3182 3183 /* The scalar rhs type needs to be trivially convertible to the vector 3184 component type. This should always be the case. */ 3185 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op))) 3186 { 3187 if (vect_print_dump_info (REPORT_DETAILS)) 3188 fprintf (vect_dump, "??? operands of different types"); 3189 return false; 3190 } 3191 3192 vec_mode = TYPE_MODE (vectype); 3193 /* FORNOW. In some cases can vectorize even if data-type not supported 3194 (e.g. - array initialization with 0). */ 3195 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing) 3196 return false; 3197 3198 if (!STMT_VINFO_DATA_REF (stmt_info)) 3199 return false; 3200 3201 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 3202 { 3203 strided_store = true; 3204 first_stmt = DR_GROUP_FIRST_DR (stmt_info); 3205 if (!vect_strided_store_supported (vectype) 3206 && !PURE_SLP_STMT (stmt_info) && !slp) 3207 return false; 3208 3209 if (first_stmt == stmt) 3210 { 3211 /* STMT is the leader of the group. Check the operands of all the 3212 stmts of the group. */ 3213 next_stmt = DR_GROUP_NEXT_DR (stmt_info); 3214 while (next_stmt) 3215 { 3216 gcc_assert (gimple_assign_single_p (next_stmt)); 3217 op = gimple_assign_rhs1 (next_stmt); 3218 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, 3219 &def, &dt)) 3220 { 3221 if (vect_print_dump_info (REPORT_DETAILS)) 3222 fprintf (vect_dump, "use not simple."); 3223 return false; 3224 } 3225 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); 3226 } 3227 } 3228 } 3229 3230 if (!vec_stmt) /* transformation not required. */ 3231 { 3232 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 3233 vect_model_store_cost (stmt_info, ncopies, dt, NULL); 3234 return true; 3235 } 3236 3237 /** Transform. **/ 3238 3239 if (strided_store) 3240 { 3241 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 3242 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); 3243 3244 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++; 3245 3246 /* FORNOW */ 3247 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt)); 3248 3249 /* We vectorize all the stmts of the interleaving group when we 3250 reach the last stmt in the group. */ 3251 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt)) 3252 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)) 3253 && !slp) 3254 { 3255 *vec_stmt = NULL; 3256 return true; 3257 } 3258 3259 if (slp) 3260 strided_store = false; 3261 3262 /* VEC_NUM is the number of vect stmts to be created for this group. */ 3263 if (slp) 3264 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 3265 else 3266 vec_num = group_size; 3267 } 3268 else 3269 { 3270 first_stmt = stmt; 3271 first_dr = dr; 3272 group_size = vec_num = 1; 3273 } 3274 3275 if (vect_print_dump_info (REPORT_DETAILS)) 3276 fprintf (vect_dump, "transform store. ncopies = %d",ncopies); 3277 3278 dr_chain = VEC_alloc (tree, heap, group_size); 3279 oprnds = VEC_alloc (tree, heap, group_size); 3280 3281 alignment_support_scheme = vect_supportable_dr_alignment (first_dr); 3282 gcc_assert (alignment_support_scheme); 3283 3284 /* In case the vectorization factor (VF) is bigger than the number 3285 of elements that we can fit in a vectype (nunits), we have to generate 3286 more than one vector stmt - i.e - we need to "unroll" the 3287 vector stmt by a factor VF/nunits. For more details see documentation in 3288 vect_get_vec_def_for_copy_stmt. */ 3289 3290 /* In case of interleaving (non-unit strided access): 3291 3292 S1: &base + 2 = x2 3293 S2: &base = x0 3294 S3: &base + 1 = x1 3295 S4: &base + 3 = x3 3296 3297 We create vectorized stores starting from base address (the access of the 3298 first stmt in the chain (S2 in the above example), when the last store stmt 3299 of the chain (S4) is reached: 3300 3301 VS1: &base = vx2 3302 VS2: &base + vec_size*1 = vx0 3303 VS3: &base + vec_size*2 = vx1 3304 VS4: &base + vec_size*3 = vx3 3305 3306 Then permutation statements are generated: 3307 3308 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 > 3309 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 > 3310 ... 3311 3312 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 3313 (the order of the data-refs in the output of vect_permute_store_chain 3314 corresponds to the order of scalar stmts in the interleaving chain - see 3315 the documentation of vect_permute_store_chain()). 3316 3317 In case of both multiple types and interleaving, above vector stores and 3318 permutation stmts are created for every copy. The result vector stmts are 3319 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 3320 STMT_VINFO_RELATED_STMT for the next copies. 3321 */ 3322 3323 prev_stmt_info = NULL; 3324 for (j = 0; j < ncopies; j++) 3325 { 3326 gimple new_stmt; 3327 gimple ptr_incr; 3328 3329 if (j == 0) 3330 { 3331 if (slp) 3332 { 3333 /* Get vectorized arguments for SLP_NODE. */ 3334 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds, 3335 NULL); 3336 3337 vec_oprnd = VEC_index (tree, vec_oprnds, 0); 3338 } 3339 else 3340 { 3341 /* For interleaved stores we collect vectorized defs for all the 3342 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then 3343 used as an input to vect_permute_store_chain(), and OPRNDS as 3344 an input to vect_get_vec_def_for_stmt_copy() for the next copy. 3345 3346 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and 3347 OPRNDS are of size 1. */ 3348 next_stmt = first_stmt; 3349 for (i = 0; i < group_size; i++) 3350 { 3351 /* Since gaps are not supported for interleaved stores, 3352 GROUP_SIZE is the exact number of stmts in the chain. 3353 Therefore, NEXT_STMT can't be NULL_TREE. In case that 3354 there is no interleaving, GROUP_SIZE is 1, and only one 3355 iteration of the loop will be executed. */ 3356 gcc_assert (next_stmt 3357 && gimple_assign_single_p (next_stmt)); 3358 op = gimple_assign_rhs1 (next_stmt); 3359 3360 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, 3361 NULL); 3362 VEC_quick_push(tree, dr_chain, vec_oprnd); 3363 VEC_quick_push(tree, oprnds, vec_oprnd); 3364 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); 3365 } 3366 } 3367 3368 /* We should have catched mismatched types earlier. */ 3369 gcc_assert (useless_type_conversion_p (vectype, 3370 TREE_TYPE (vec_oprnd))); 3371 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE, 3372 &dummy, &ptr_incr, false, 3373 &inv_p); 3374 gcc_assert (bb_vinfo || !inv_p); 3375 } 3376 else 3377 { 3378 /* For interleaved stores we created vectorized defs for all the 3379 defs stored in OPRNDS in the previous iteration (previous copy). 3380 DR_CHAIN is then used as an input to vect_permute_store_chain(), 3381 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the 3382 next copy. 3383 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and 3384 OPRNDS are of size 1. */ 3385 for (i = 0; i < group_size; i++) 3386 { 3387 op = VEC_index (tree, oprnds, i); 3388 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, 3389 &dt); 3390 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op); 3391 VEC_replace(tree, dr_chain, i, vec_oprnd); 3392 VEC_replace(tree, oprnds, i, vec_oprnd); 3393 } 3394 dataref_ptr = 3395 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE); 3396 } 3397 3398 if (strided_store) 3399 { 3400 result_chain = VEC_alloc (tree, heap, group_size); 3401 /* Permute. */ 3402 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi, 3403 &result_chain)) 3404 return false; 3405 } 3406 3407 next_stmt = first_stmt; 3408 for (i = 0; i < vec_num; i++) 3409 { 3410 if (i > 0) 3411 /* Bump the vector pointer. */ 3412 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 3413 NULL_TREE); 3414 3415 if (slp) 3416 vec_oprnd = VEC_index (tree, vec_oprnds, i); 3417 else if (strided_store) 3418 /* For strided stores vectorized defs are interleaved in 3419 vect_permute_store_chain(). */ 3420 vec_oprnd = VEC_index (tree, result_chain, i); 3421 3422 if (aligned_access_p (first_dr)) 3423 data_ref = build_fold_indirect_ref (dataref_ptr); 3424 else 3425 { 3426 int mis = DR_MISALIGNMENT (first_dr); 3427 tree tmis = (mis == -1 ? size_zero_node : size_int (mis)); 3428 tmis = size_binop (MULT_EXPR, tmis, size_int (BITS_PER_UNIT)); 3429 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis); 3430 } 3431 3432 /* If accesses through a pointer to vectype do not alias the original 3433 memory reference we have a problem. This should never happen. */ 3434 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref), 3435 get_alias_set (gimple_assign_lhs (stmt)))); 3436 3437 /* Arguments are ready. Create the new vector stmt. */ 3438 new_stmt = gimple_build_assign (data_ref, vec_oprnd); 3439 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3440 mark_symbols_for_renaming (new_stmt); 3441 3442 if (slp) 3443 continue; 3444 3445 if (j == 0) 3446 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3447 else 3448 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3449 3450 prev_stmt_info = vinfo_for_stmt (new_stmt); 3451 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); 3452 if (!next_stmt) 3453 break; 3454 } 3455 } 3456 3457 VEC_free (tree, heap, dr_chain); 3458 VEC_free (tree, heap, oprnds); 3459 if (result_chain) 3460 VEC_free (tree, heap, result_chain); 3461 3462 return true; 3463 } 3464 3465 /* vectorizable_load. 3466 3467 Check if STMT reads a non scalar data-ref (array/pointer/structure) that 3468 can be vectorized. 3469 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3470 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 3471 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3472 3473 static bool 3474 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, 3475 slp_tree slp_node, slp_instance slp_node_instance) 3476 { 3477 tree scalar_dest; 3478 tree vec_dest = NULL; 3479 tree data_ref = NULL; 3480 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3481 stmt_vec_info prev_stmt_info; 3482 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3483 struct loop *loop = NULL; 3484 struct loop *containing_loop = (gimple_bb (stmt))->loop_father; 3485 bool nested_in_vect_loop = false; 3486 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; 3487 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3488 tree new_temp; 3489 int mode; 3490 gimple new_stmt = NULL; 3491 tree dummy; 3492 enum dr_alignment_support alignment_support_scheme; 3493 tree dataref_ptr = NULL_TREE; 3494 gimple ptr_incr; 3495 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 3496 int ncopies; 3497 int i, j, group_size; 3498 tree msq = NULL_TREE, lsq; 3499 tree offset = NULL_TREE; 3500 tree realignment_token = NULL_TREE; 3501 gimple phi = NULL; 3502 VEC(tree,heap) *dr_chain = NULL; 3503 bool strided_load = false; 3504 gimple first_stmt; 3505 tree scalar_type; 3506 bool inv_p; 3507 bool compute_in_loop = false; 3508 struct loop *at_loop; 3509 int vec_num; 3510 bool slp = (slp_node != NULL); 3511 bool slp_perm = false; 3512 enum tree_code code; 3513 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3514 int vf; 3515 3516 if (loop_vinfo) 3517 { 3518 loop = LOOP_VINFO_LOOP (loop_vinfo); 3519 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 3520 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 3521 } 3522 else 3523 vf = 1; 3524 3525 /* Multiple types in SLP are handled by creating the appropriate number of 3526 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3527 case of SLP. */ 3528 if (slp) 3529 ncopies = 1; 3530 else 3531 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 3532 3533 gcc_assert (ncopies >= 1); 3534 3535 /* FORNOW. This restriction should be relaxed. */ 3536 if (nested_in_vect_loop && ncopies > 1) 3537 { 3538 if (vect_print_dump_info (REPORT_DETAILS)) 3539 fprintf (vect_dump, "multiple types in nested loop."); 3540 return false; 3541 } 3542 3543 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3544 return false; 3545 3546 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 3547 return false; 3548 3549 /* Is vectorizable load? */ 3550 if (!is_gimple_assign (stmt)) 3551 return false; 3552 3553 scalar_dest = gimple_assign_lhs (stmt); 3554 if (TREE_CODE (scalar_dest) != SSA_NAME) 3555 return false; 3556 3557 code = gimple_assign_rhs_code (stmt); 3558 if (code != ARRAY_REF 3559 && code != INDIRECT_REF 3560 && code != COMPONENT_REF 3561 && code != IMAGPART_EXPR 3562 && code != REALPART_EXPR) 3563 return false; 3564 3565 if (!STMT_VINFO_DATA_REF (stmt_info)) 3566 return false; 3567 3568 scalar_type = TREE_TYPE (DR_REF (dr)); 3569 mode = (int) TYPE_MODE (vectype); 3570 3571 /* FORNOW. In some cases can vectorize even if data-type not supported 3572 (e.g. - data copies). */ 3573 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing) 3574 { 3575 if (vect_print_dump_info (REPORT_DETAILS)) 3576 fprintf (vect_dump, "Aligned load, but unsupported type."); 3577 return false; 3578 } 3579 3580 /* The vector component type needs to be trivially convertible to the 3581 scalar lhs. This should always be the case. */ 3582 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype))) 3583 { 3584 if (vect_print_dump_info (REPORT_DETAILS)) 3585 fprintf (vect_dump, "??? operands of different types"); 3586 return false; 3587 } 3588 3589 /* Check if the load is a part of an interleaving chain. */ 3590 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 3591 { 3592 strided_load = true; 3593 /* FORNOW */ 3594 gcc_assert (! nested_in_vect_loop); 3595 3596 /* Check if interleaving is supported. */ 3597 if (!vect_strided_load_supported (vectype) 3598 && !PURE_SLP_STMT (stmt_info) && !slp) 3599 return false; 3600 } 3601 3602 if (!vec_stmt) /* transformation not required. */ 3603 { 3604 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 3605 vect_model_load_cost (stmt_info, ncopies, NULL); 3606 return true; 3607 } 3608 3609 if (vect_print_dump_info (REPORT_DETAILS)) 3610 fprintf (vect_dump, "transform load."); 3611 3612 /** Transform. **/ 3613 3614 if (strided_load) 3615 { 3616 first_stmt = DR_GROUP_FIRST_DR (stmt_info); 3617 /* Check if the chain of loads is already vectorized. */ 3618 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) 3619 { 3620 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 3621 return true; 3622 } 3623 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 3624 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); 3625 3626 /* VEC_NUM is the number of vect stmts to be created for this group. */ 3627 if (slp) 3628 { 3629 strided_load = false; 3630 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 3631 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)) 3632 slp_perm = true; 3633 } 3634 else 3635 vec_num = group_size; 3636 3637 dr_chain = VEC_alloc (tree, heap, vec_num); 3638 } 3639 else 3640 { 3641 first_stmt = stmt; 3642 first_dr = dr; 3643 group_size = vec_num = 1; 3644 } 3645 3646 alignment_support_scheme = vect_supportable_dr_alignment (first_dr); 3647 gcc_assert (alignment_support_scheme); 3648 3649 /* In case the vectorization factor (VF) is bigger than the number 3650 of elements that we can fit in a vectype (nunits), we have to generate 3651 more than one vector stmt - i.e - we need to "unroll" the 3652 vector stmt by a factor VF/nunits. In doing so, we record a pointer 3653 from one copy of the vector stmt to the next, in the field 3654 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 3655 stages to find the correct vector defs to be used when vectorizing 3656 stmts that use the defs of the current stmt. The example below illustrates 3657 the vectorization process when VF=16 and nunits=4 (i.e - we need to create 3658 4 vectorized stmts): 3659 3660 before vectorization: 3661 RELATED_STMT VEC_STMT 3662 S1: x = memref - - 3663 S2: z = x + 1 - - 3664 3665 step 1: vectorize stmt S1: 3666 We first create the vector stmt VS1_0, and, as usual, record a 3667 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. 3668 Next, we create the vector stmt VS1_1, and record a pointer to 3669 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. 3670 Similarly, for VS1_2 and VS1_3. This is the resulting chain of 3671 stmts and pointers: 3672 RELATED_STMT VEC_STMT 3673 VS1_0: vx0 = memref0 VS1_1 - 3674 VS1_1: vx1 = memref1 VS1_2 - 3675 VS1_2: vx2 = memref2 VS1_3 - 3676 VS1_3: vx3 = memref3 - - 3677 S1: x = load - VS1_0 3678 S2: z = x + 1 - - 3679 3680 See in documentation in vect_get_vec_def_for_stmt_copy for how the 3681 information we recorded in RELATED_STMT field is used to vectorize 3682 stmt S2. */ 3683 3684 /* In case of interleaving (non-unit strided access): 3685 3686 S1: x2 = &base + 2 3687 S2: x0 = &base 3688 S3: x1 = &base + 1 3689 S4: x3 = &base + 3 3690 3691 Vectorized loads are created in the order of memory accesses 3692 starting from the access of the first stmt of the chain: 3693 3694 VS1: vx0 = &base 3695 VS2: vx1 = &base + vec_size*1 3696 VS3: vx3 = &base + vec_size*2 3697 VS4: vx4 = &base + vec_size*3 3698 3699 Then permutation statements are generated: 3700 3701 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 > 3702 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 > 3703 ... 3704 3705 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 3706 (the order of the data-refs in the output of vect_permute_load_chain 3707 corresponds to the order of scalar stmts in the interleaving chain - see 3708 the documentation of vect_permute_load_chain()). 3709 The generation of permutation stmts and recording them in 3710 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load(). 3711 3712 In case of both multiple types and interleaving, the vector loads and 3713 permutation stmts above are created for every copy. The result vector stmts 3714 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 3715 STMT_VINFO_RELATED_STMT for the next copies. */ 3716 3717 /* If the data reference is aligned (dr_aligned) or potentially unaligned 3718 on a target that supports unaligned accesses (dr_unaligned_supported) 3719 we generate the following code: 3720 p = initial_addr; 3721 indx = 0; 3722 loop { 3723 p = p + indx * vectype_size; 3724 vec_dest = *(p); 3725 indx = indx + 1; 3726 } 3727 3728 Otherwise, the data reference is potentially unaligned on a target that 3729 does not support unaligned accesses (dr_explicit_realign_optimized) - 3730 then generate the following code, in which the data in each iteration is 3731 obtained by two vector loads, one from the previous iteration, and one 3732 from the current iteration: 3733 p1 = initial_addr; 3734 msq_init = *(floor(p1)) 3735 p2 = initial_addr + VS - 1; 3736 realignment_token = call target_builtin; 3737 indx = 0; 3738 loop { 3739 p2 = p2 + indx * vectype_size 3740 lsq = *(floor(p2)) 3741 vec_dest = realign_load (msq, lsq, realignment_token) 3742 indx = indx + 1; 3743 msq = lsq; 3744 } */ 3745 3746 /* If the misalignment remains the same throughout the execution of the 3747 loop, we can create the init_addr and permutation mask at the loop 3748 preheader. Otherwise, it needs to be created inside the loop. 3749 This can only occur when vectorizing memory accesses in the inner-loop 3750 nested within an outer-loop that is being vectorized. */ 3751 3752 if (loop && nested_in_vect_loop_p (loop, stmt) 3753 && (TREE_INT_CST_LOW (DR_STEP (dr)) 3754 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)) 3755 { 3756 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized); 3757 compute_in_loop = true; 3758 } 3759 3760 if ((alignment_support_scheme == dr_explicit_realign_optimized 3761 || alignment_support_scheme == dr_explicit_realign) 3762 && !compute_in_loop) 3763 { 3764 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token, 3765 alignment_support_scheme, NULL_TREE, 3766 &at_loop); 3767 if (alignment_support_scheme == dr_explicit_realign_optimized) 3768 { 3769 phi = SSA_NAME_DEF_STMT (msq); 3770 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 3771 } 3772 } 3773 else 3774 at_loop = loop; 3775 3776 prev_stmt_info = NULL; 3777 for (j = 0; j < ncopies; j++) 3778 { 3779 /* 1. Create the vector pointer update chain. */ 3780 if (j == 0) 3781 dataref_ptr = vect_create_data_ref_ptr (first_stmt, 3782 at_loop, offset, 3783 &dummy, &ptr_incr, false, 3784 &inv_p); 3785 else 3786 dataref_ptr = 3787 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE); 3788 3789 for (i = 0; i < vec_num; i++) 3790 { 3791 if (i > 0) 3792 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 3793 NULL_TREE); 3794 3795 /* 2. Create the vector-load in the loop. */ 3796 switch (alignment_support_scheme) 3797 { 3798 case dr_aligned: 3799 gcc_assert (aligned_access_p (first_dr)); 3800 data_ref = build_fold_indirect_ref (dataref_ptr); 3801 break; 3802 case dr_unaligned_supported: 3803 { 3804 int mis = DR_MISALIGNMENT (first_dr); 3805 tree tmis = (mis == -1 ? size_zero_node : size_int (mis)); 3806 3807 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT)); 3808 data_ref = 3809 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis); 3810 break; 3811 } 3812 case dr_explicit_realign: 3813 { 3814 tree ptr, bump; 3815 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 3816 3817 if (compute_in_loop) 3818 msq = vect_setup_realignment (first_stmt, gsi, 3819 &realignment_token, 3820 dr_explicit_realign, 3821 dataref_ptr, NULL); 3822 3823 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); 3824 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3825 new_stmt = gimple_build_assign (vec_dest, data_ref); 3826 new_temp = make_ssa_name (vec_dest, new_stmt); 3827 gimple_assign_set_lhs (new_stmt, new_temp); 3828 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 3829 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 3830 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3831 msq = new_temp; 3832 3833 bump = size_binop (MULT_EXPR, vs_minus_1, 3834 TYPE_SIZE_UNIT (scalar_type)); 3835 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); 3836 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr); 3837 break; 3838 } 3839 case dr_explicit_realign_optimized: 3840 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); 3841 break; 3842 default: 3843 gcc_unreachable (); 3844 } 3845 /* If accesses through a pointer to vectype do not alias the original 3846 memory reference we have a problem. This should never happen. */ 3847 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref), 3848 get_alias_set (gimple_assign_rhs1 (stmt)))); 3849 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3850 new_stmt = gimple_build_assign (vec_dest, data_ref); 3851 new_temp = make_ssa_name (vec_dest, new_stmt); 3852 gimple_assign_set_lhs (new_stmt, new_temp); 3853 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3854 mark_symbols_for_renaming (new_stmt); 3855 3856 /* 3. Handle explicit realignment if necessary/supported. Create in 3857 loop: vec_dest = realign_load (msq, lsq, realignment_token) */ 3858 if (alignment_support_scheme == dr_explicit_realign_optimized 3859 || alignment_support_scheme == dr_explicit_realign) 3860 { 3861 tree tmp; 3862 3863 lsq = gimple_assign_lhs (new_stmt); 3864 if (!realignment_token) 3865 realignment_token = dataref_ptr; 3866 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3867 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, 3868 realignment_token); 3869 new_stmt = gimple_build_assign (vec_dest, tmp); 3870 new_temp = make_ssa_name (vec_dest, new_stmt); 3871 gimple_assign_set_lhs (new_stmt, new_temp); 3872 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3873 3874 if (alignment_support_scheme == dr_explicit_realign_optimized) 3875 { 3876 gcc_assert (phi); 3877 if (i == vec_num - 1 && j == ncopies - 1) 3878 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop), 3879 UNKNOWN_LOCATION); 3880 msq = lsq; 3881 } 3882 } 3883 3884 /* 4. Handle invariant-load. */ 3885 if (inv_p && !bb_vinfo) 3886 { 3887 gcc_assert (!strided_load); 3888 gcc_assert (nested_in_vect_loop_p (loop, stmt)); 3889 if (j == 0) 3890 { 3891 int k; 3892 tree t = NULL_TREE; 3893 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); 3894 3895 /* CHECKME: bitpos depends on endianess? */ 3896 bitpos = bitsize_zero_node; 3897 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, 3898 bitsize, bitpos); 3899 vec_dest = 3900 vect_create_destination_var (scalar_dest, NULL_TREE); 3901 new_stmt = gimple_build_assign (vec_dest, vec_inv); 3902 new_temp = make_ssa_name (vec_dest, new_stmt); 3903 gimple_assign_set_lhs (new_stmt, new_temp); 3904 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3905 3906 for (k = nunits - 1; k >= 0; --k) 3907 t = tree_cons (NULL_TREE, new_temp, t); 3908 /* FIXME: use build_constructor directly. */ 3909 vec_inv = build_constructor_from_list (vectype, t); 3910 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); 3911 new_stmt = SSA_NAME_DEF_STMT (new_temp); 3912 } 3913 else 3914 gcc_unreachable (); /* FORNOW. */ 3915 } 3916 3917 /* Collect vector loads and later create their permutation in 3918 vect_transform_strided_load (). */ 3919 if (strided_load || slp_perm) 3920 VEC_quick_push (tree, dr_chain, new_temp); 3921 3922 /* Store vector loads in the corresponding SLP_NODE. */ 3923 if (slp && !slp_perm) 3924 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); 3925 } 3926 3927 if (slp && !slp_perm) 3928 continue; 3929 3930 if (slp_perm) 3931 { 3932 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf, 3933 slp_node_instance, false)) 3934 { 3935 VEC_free (tree, heap, dr_chain); 3936 return false; 3937 } 3938 } 3939 else 3940 { 3941 if (strided_load) 3942 { 3943 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi)) 3944 return false; 3945 3946 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 3947 VEC_free (tree, heap, dr_chain); 3948 dr_chain = VEC_alloc (tree, heap, group_size); 3949 } 3950 else 3951 { 3952 if (j == 0) 3953 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3954 else 3955 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3956 prev_stmt_info = vinfo_for_stmt (new_stmt); 3957 } 3958 } 3959 } 3960 3961 if (dr_chain) 3962 VEC_free (tree, heap, dr_chain); 3963 3964 return true; 3965 } 3966 3967 /* Function vect_is_simple_cond. 3968 3969 Input: 3970 LOOP - the loop that is being vectorized. 3971 COND - Condition that is checked for simple use. 3972 3973 Returns whether a COND can be vectorized. Checks whether 3974 condition operands are supportable using vec_is_simple_use. */ 3975 3976 static bool 3977 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) 3978 { 3979 tree lhs, rhs; 3980 tree def; 3981 enum vect_def_type dt; 3982 3983 if (!COMPARISON_CLASS_P (cond)) 3984 return false; 3985 3986 lhs = TREE_OPERAND (cond, 0); 3987 rhs = TREE_OPERAND (cond, 1); 3988 3989 if (TREE_CODE (lhs) == SSA_NAME) 3990 { 3991 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); 3992 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def, 3993 &dt)) 3994 return false; 3995 } 3996 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST 3997 && TREE_CODE (lhs) != FIXED_CST) 3998 return false; 3999 4000 if (TREE_CODE (rhs) == SSA_NAME) 4001 { 4002 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); 4003 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def, 4004 &dt)) 4005 return false; 4006 } 4007 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST 4008 && TREE_CODE (rhs) != FIXED_CST) 4009 return false; 4010 4011 return true; 4012 } 4013 4014 /* vectorizable_condition. 4015 4016 Check if STMT is conditional modify expression that can be vectorized. 4017 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 4018 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 4019 at GSI. 4020 4021 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable 4022 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in 4023 else caluse if it is 2). 4024 4025 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 4026 4027 bool 4028 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, 4029 gimple *vec_stmt, tree reduc_def, int reduc_index) 4030 { 4031 tree scalar_dest = NULL_TREE; 4032 tree vec_dest = NULL_TREE; 4033 tree op = NULL_TREE; 4034 tree cond_expr, then_clause, else_clause; 4035 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4036 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 4037 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause; 4038 tree vec_compare, vec_cond_expr; 4039 tree new_temp; 4040 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4041 enum machine_mode vec_mode; 4042 tree def; 4043 enum vect_def_type dt; 4044 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 4045 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; 4046 enum tree_code code; 4047 4048 /* FORNOW: unsupported in basic block SLP. */ 4049 gcc_assert (loop_vinfo); 4050 4051 gcc_assert (ncopies >= 1); 4052 if (ncopies > 1) 4053 return false; /* FORNOW */ 4054 4055 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 4056 return false; 4057 4058 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 4059 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle 4060 && reduc_def)) 4061 return false; 4062 4063 /* FORNOW: SLP not supported. */ 4064 if (STMT_SLP_TYPE (stmt_info)) 4065 return false; 4066 4067 /* FORNOW: not yet supported. */ 4068 if (STMT_VINFO_LIVE_P (stmt_info)) 4069 { 4070 if (vect_print_dump_info (REPORT_DETAILS)) 4071 fprintf (vect_dump, "value used after loop."); 4072 return false; 4073 } 4074 4075 /* Is vectorizable conditional operation? */ 4076 if (!is_gimple_assign (stmt)) 4077 return false; 4078 4079 code = gimple_assign_rhs_code (stmt); 4080 4081 if (code != COND_EXPR) 4082 return false; 4083 4084 gcc_assert (gimple_assign_single_p (stmt)); 4085 op = gimple_assign_rhs1 (stmt); 4086 cond_expr = TREE_OPERAND (op, 0); 4087 then_clause = TREE_OPERAND (op, 1); 4088 else_clause = TREE_OPERAND (op, 2); 4089 4090 if (!vect_is_simple_cond (cond_expr, loop_vinfo)) 4091 return false; 4092 4093 /* We do not handle two different vector types for the condition 4094 and the values. */ 4095 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)), 4096 TREE_TYPE (vectype))) 4097 return false; 4098 4099 if (TREE_CODE (then_clause) == SSA_NAME) 4100 { 4101 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause); 4102 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL, 4103 &then_def_stmt, &def, &dt)) 4104 return false; 4105 } 4106 else if (TREE_CODE (then_clause) != INTEGER_CST 4107 && TREE_CODE (then_clause) != REAL_CST 4108 && TREE_CODE (then_clause) != FIXED_CST) 4109 return false; 4110 4111 if (TREE_CODE (else_clause) == SSA_NAME) 4112 { 4113 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause); 4114 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL, 4115 &else_def_stmt, &def, &dt)) 4116 return false; 4117 } 4118 else if (TREE_CODE (else_clause) != INTEGER_CST 4119 && TREE_CODE (else_clause) != REAL_CST 4120 && TREE_CODE (else_clause) != FIXED_CST) 4121 return false; 4122 4123 4124 vec_mode = TYPE_MODE (vectype); 4125 4126 if (!vec_stmt) 4127 { 4128 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 4129 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode); 4130 } 4131 4132 /* Transform */ 4133 4134 /* Handle def. */ 4135 scalar_dest = gimple_assign_lhs (stmt); 4136 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4137 4138 /* Handle cond expr. */ 4139 vec_cond_lhs = 4140 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); 4141 vec_cond_rhs = 4142 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); 4143 if (reduc_index == 1) 4144 vec_then_clause = reduc_def; 4145 else 4146 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); 4147 if (reduc_index == 2) 4148 vec_else_clause = reduc_def; 4149 else 4150 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); 4151 4152 /* Arguments are ready. Create the new vector stmt. */ 4153 vec_compare = build2 (TREE_CODE (cond_expr), vectype, 4154 vec_cond_lhs, vec_cond_rhs); 4155 vec_cond_expr = build3 (VEC_COND_EXPR, vectype, 4156 vec_compare, vec_then_clause, vec_else_clause); 4157 4158 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr); 4159 new_temp = make_ssa_name (vec_dest, *vec_stmt); 4160 gimple_assign_set_lhs (*vec_stmt, new_temp); 4161 vect_finish_stmt_generation (stmt, *vec_stmt, gsi); 4162 4163 return true; 4164 } 4165 4166 4167 /* Make sure the statement is vectorizable. */ 4168 4169 bool 4170 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) 4171 { 4172 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4173 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 4174 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); 4175 bool ok; 4176 HOST_WIDE_INT dummy; 4177 tree scalar_type, vectype; 4178 4179 if (vect_print_dump_info (REPORT_DETAILS)) 4180 { 4181 fprintf (vect_dump, "==> examining statement: "); 4182 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 4183 } 4184 4185 if (gimple_has_volatile_ops (stmt)) 4186 { 4187 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 4188 fprintf (vect_dump, "not vectorized: stmt has volatile operands"); 4189 4190 return false; 4191 } 4192 4193 /* Skip stmts that do not need to be vectorized. In loops this is expected 4194 to include: 4195 - the COND_EXPR which is the loop exit condition 4196 - any LABEL_EXPRs in the loop 4197 - computations that are used only for array indexing or loop control. 4198 In basic blocks we only analyze statements that are a part of some SLP 4199 instance, therefore, all the statements are relevant. */ 4200 4201 if (!STMT_VINFO_RELEVANT_P (stmt_info) 4202 && !STMT_VINFO_LIVE_P (stmt_info)) 4203 { 4204 if (vect_print_dump_info (REPORT_DETAILS)) 4205 fprintf (vect_dump, "irrelevant."); 4206 4207 return true; 4208 } 4209 4210 switch (STMT_VINFO_DEF_TYPE (stmt_info)) 4211 { 4212 case vect_internal_def: 4213 break; 4214 4215 case vect_reduction_def: 4216 case vect_nested_cycle: 4217 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer 4218 || relevance == vect_used_in_outer_by_reduction 4219 || relevance == vect_unused_in_scope)); 4220 break; 4221 4222 case vect_induction_def: 4223 case vect_constant_def: 4224 case vect_external_def: 4225 case vect_unknown_def_type: 4226 default: 4227 gcc_unreachable (); 4228 } 4229 4230 if (bb_vinfo) 4231 { 4232 gcc_assert (PURE_SLP_STMT (stmt_info)); 4233 4234 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); 4235 if (vect_print_dump_info (REPORT_DETAILS)) 4236 { 4237 fprintf (vect_dump, "get vectype for scalar type: "); 4238 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 4239 } 4240 4241 vectype = get_vectype_for_scalar_type (scalar_type); 4242 if (!vectype) 4243 { 4244 if (vect_print_dump_info (REPORT_DETAILS)) 4245 { 4246 fprintf (vect_dump, "not SLPed: unsupported data-type "); 4247 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 4248 } 4249 return false; 4250 } 4251 4252 if (vect_print_dump_info (REPORT_DETAILS)) 4253 { 4254 fprintf (vect_dump, "vectype: "); 4255 print_generic_expr (vect_dump, vectype, TDF_SLIM); 4256 } 4257 4258 STMT_VINFO_VECTYPE (stmt_info) = vectype; 4259 } 4260 4261 if (STMT_VINFO_RELEVANT_P (stmt_info)) 4262 { 4263 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))); 4264 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); 4265 *need_to_vectorize = true; 4266 } 4267 4268 ok = true; 4269 if (!bb_vinfo 4270 && (STMT_VINFO_RELEVANT_P (stmt_info) 4271 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) 4272 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL) 4273 || vectorizable_type_demotion (stmt, NULL, NULL, NULL) 4274 || vectorizable_conversion (stmt, NULL, NULL, NULL) 4275 || vectorizable_shift (stmt, NULL, NULL, NULL) 4276 || vectorizable_operation (stmt, NULL, NULL, NULL) 4277 || vectorizable_assignment (stmt, NULL, NULL, NULL) 4278 || vectorizable_load (stmt, NULL, NULL, NULL, NULL) 4279 || vectorizable_call (stmt, NULL, NULL) 4280 || vectorizable_store (stmt, NULL, NULL, NULL) 4281 || vectorizable_reduction (stmt, NULL, NULL) 4282 || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); 4283 else 4284 { 4285 if (bb_vinfo) 4286 ok = (vectorizable_shift (stmt, NULL, NULL, node) 4287 || vectorizable_operation (stmt, NULL, NULL, node) 4288 || vectorizable_assignment (stmt, NULL, NULL, node) 4289 || vectorizable_load (stmt, NULL, NULL, node, NULL) 4290 || vectorizable_store (stmt, NULL, NULL, node)); 4291 } 4292 4293 if (!ok) 4294 { 4295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 4296 { 4297 fprintf (vect_dump, "not vectorized: relevant stmt not "); 4298 fprintf (vect_dump, "supported: "); 4299 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 4300 } 4301 4302 return false; 4303 } 4304 4305 if (bb_vinfo) 4306 return true; 4307 4308 /* Stmts that are (also) "live" (i.e. - that are used out of the loop) 4309 need extra handling, except for vectorizable reductions. */ 4310 if (STMT_VINFO_LIVE_P (stmt_info) 4311 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 4312 ok = vectorizable_live_operation (stmt, NULL, NULL); 4313 4314 if (!ok) 4315 { 4316 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 4317 { 4318 fprintf (vect_dump, "not vectorized: live stmt not "); 4319 fprintf (vect_dump, "supported: "); 4320 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 4321 } 4322 4323 return false; 4324 } 4325 4326 if (!PURE_SLP_STMT (stmt_info)) 4327 { 4328 /* Groups of strided accesses whose size is not a power of 2 are not 4329 vectorizable yet using loop-vectorization. Therefore, if this stmt 4330 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and 4331 loop-based vectorized), the loop cannot be vectorized. */ 4332 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) 4333 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( 4334 DR_GROUP_FIRST_DR (stmt_info)))) == -1) 4335 { 4336 if (vect_print_dump_info (REPORT_DETAILS)) 4337 { 4338 fprintf (vect_dump, "not vectorized: the size of group " 4339 "of strided accesses is not a power of 2"); 4340 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 4341 } 4342 4343 return false; 4344 } 4345 } 4346 4347 return true; 4348 } 4349 4350 4351 /* Function vect_transform_stmt. 4352 4353 Create a vectorized stmt to replace STMT, and insert it at BSI. */ 4354 4355 bool 4356 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi, 4357 bool *strided_store, slp_tree slp_node, 4358 slp_instance slp_node_instance) 4359 { 4360 bool is_store = false; 4361 gimple vec_stmt = NULL; 4362 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4363 gimple orig_stmt_in_pattern; 4364 bool done; 4365 4366 switch (STMT_VINFO_TYPE (stmt_info)) 4367 { 4368 case type_demotion_vec_info_type: 4369 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node); 4370 gcc_assert (done); 4371 break; 4372 4373 case type_promotion_vec_info_type: 4374 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node); 4375 gcc_assert (done); 4376 break; 4377 4378 case type_conversion_vec_info_type: 4379 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node); 4380 gcc_assert (done); 4381 break; 4382 4383 case induc_vec_info_type: 4384 gcc_assert (!slp_node); 4385 done = vectorizable_induction (stmt, gsi, &vec_stmt); 4386 gcc_assert (done); 4387 break; 4388 4389 case shift_vec_info_type: 4390 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node); 4391 gcc_assert (done); 4392 break; 4393 4394 case op_vec_info_type: 4395 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node); 4396 gcc_assert (done); 4397 break; 4398 4399 case assignment_vec_info_type: 4400 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node); 4401 gcc_assert (done); 4402 break; 4403 4404 case load_vec_info_type: 4405 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node, 4406 slp_node_instance); 4407 gcc_assert (done); 4408 break; 4409 4410 case store_vec_info_type: 4411 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node); 4412 gcc_assert (done); 4413 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node) 4414 { 4415 /* In case of interleaving, the whole chain is vectorized when the 4416 last store in the chain is reached. Store stmts before the last 4417 one are skipped, and there vec_stmt_info shouldn't be freed 4418 meanwhile. */ 4419 *strided_store = true; 4420 if (STMT_VINFO_VEC_STMT (stmt_info)) 4421 is_store = true; 4422 } 4423 else 4424 is_store = true; 4425 break; 4426 4427 case condition_vec_info_type: 4428 gcc_assert (!slp_node); 4429 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0); 4430 gcc_assert (done); 4431 break; 4432 4433 case call_vec_info_type: 4434 gcc_assert (!slp_node); 4435 done = vectorizable_call (stmt, gsi, &vec_stmt); 4436 break; 4437 4438 case reduc_vec_info_type: 4439 gcc_assert (!slp_node); 4440 done = vectorizable_reduction (stmt, gsi, &vec_stmt); 4441 gcc_assert (done); 4442 break; 4443 4444 default: 4445 if (!STMT_VINFO_LIVE_P (stmt_info)) 4446 { 4447 if (vect_print_dump_info (REPORT_DETAILS)) 4448 fprintf (vect_dump, "stmt not supported."); 4449 gcc_unreachable (); 4450 } 4451 } 4452 4453 /* Handle inner-loop stmts whose DEF is used in the loop-nest that 4454 is being vectorized, but outside the immediately enclosing loop. */ 4455 if (vec_stmt 4456 && STMT_VINFO_LOOP_VINFO (stmt_info) 4457 && nested_in_vect_loop_p (LOOP_VINFO_LOOP ( 4458 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt) 4459 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 4460 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer 4461 || STMT_VINFO_RELEVANT (stmt_info) == 4462 vect_used_in_outer_by_reduction)) 4463 { 4464 struct loop *innerloop = LOOP_VINFO_LOOP ( 4465 STMT_VINFO_LOOP_VINFO (stmt_info))->inner; 4466 imm_use_iterator imm_iter; 4467 use_operand_p use_p; 4468 tree scalar_dest; 4469 gimple exit_phi; 4470 4471 if (vect_print_dump_info (REPORT_DETAILS)) 4472 fprintf (vect_dump, "Record the vdef for outer-loop vectorization."); 4473 4474 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there 4475 (to be used when vectorizing outer-loop stmts that use the DEF of 4476 STMT). */ 4477 if (gimple_code (stmt) == GIMPLE_PHI) 4478 scalar_dest = PHI_RESULT (stmt); 4479 else 4480 scalar_dest = gimple_assign_lhs (stmt); 4481 4482 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 4483 { 4484 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p)))) 4485 { 4486 exit_phi = USE_STMT (use_p); 4487 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt; 4488 } 4489 } 4490 } 4491 4492 /* Handle stmts whose DEF is used outside the loop-nest that is 4493 being vectorized. */ 4494 if (STMT_VINFO_LIVE_P (stmt_info) 4495 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 4496 { 4497 done = vectorizable_live_operation (stmt, gsi, &vec_stmt); 4498 gcc_assert (done); 4499 } 4500 4501 if (vec_stmt) 4502 { 4503 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 4504 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); 4505 if (orig_stmt_in_pattern) 4506 { 4507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); 4508 /* STMT was inserted by the vectorizer to replace a computation idiom. 4509 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that 4510 computed this idiom. We need to record a pointer to VEC_STMT in 4511 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the 4512 documentation of vect_pattern_recog. */ 4513 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 4514 { 4515 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); 4516 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; 4517 } 4518 } 4519 } 4520 4521 return is_store; 4522 } 4523 4524 4525 /* Remove a group of stores (for SLP or interleaving), free their 4526 stmt_vec_info. */ 4527 4528 void 4529 vect_remove_stores (gimple first_stmt) 4530 { 4531 gimple next = first_stmt; 4532 gimple tmp; 4533 gimple_stmt_iterator next_si; 4534 4535 while (next) 4536 { 4537 /* Free the attached stmt_vec_info and remove the stmt. */ 4538 next_si = gsi_for_stmt (next); 4539 gsi_remove (&next_si, true); 4540 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); 4541 free_stmt_vec_info (next); 4542 next = tmp; 4543 } 4544 } 4545 4546 4547 /* Function new_stmt_vec_info. 4548 4549 Create and initialize a new stmt_vec_info struct for STMT. */ 4550 4551 stmt_vec_info 4552 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo, 4553 bb_vec_info bb_vinfo) 4554 { 4555 stmt_vec_info res; 4556 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); 4557 4558 STMT_VINFO_TYPE (res) = undef_vec_info_type; 4559 STMT_VINFO_STMT (res) = stmt; 4560 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo; 4561 STMT_VINFO_BB_VINFO (res) = bb_vinfo; 4562 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; 4563 STMT_VINFO_LIVE_P (res) = false; 4564 STMT_VINFO_VECTYPE (res) = NULL; 4565 STMT_VINFO_VEC_STMT (res) = NULL; 4566 STMT_VINFO_IN_PATTERN_P (res) = false; 4567 STMT_VINFO_RELATED_STMT (res) = NULL; 4568 STMT_VINFO_DATA_REF (res) = NULL; 4569 4570 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; 4571 STMT_VINFO_DR_OFFSET (res) = NULL; 4572 STMT_VINFO_DR_INIT (res) = NULL; 4573 STMT_VINFO_DR_STEP (res) = NULL; 4574 STMT_VINFO_DR_ALIGNED_TO (res) = NULL; 4575 4576 if (gimple_code (stmt) == GIMPLE_PHI 4577 && is_loop_header_bb_p (gimple_bb (stmt))) 4578 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; 4579 else 4580 STMT_VINFO_DEF_TYPE (res) = vect_internal_def; 4581 4582 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5); 4583 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0; 4584 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0; 4585 STMT_SLP_TYPE (res) = loop_vect; 4586 DR_GROUP_FIRST_DR (res) = NULL; 4587 DR_GROUP_NEXT_DR (res) = NULL; 4588 DR_GROUP_SIZE (res) = 0; 4589 DR_GROUP_STORE_COUNT (res) = 0; 4590 DR_GROUP_GAP (res) = 0; 4591 DR_GROUP_SAME_DR_STMT (res) = NULL; 4592 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false; 4593 4594 return res; 4595 } 4596 4597 4598 /* Create a hash table for stmt_vec_info. */ 4599 4600 void 4601 init_stmt_vec_info_vec (void) 4602 { 4603 gcc_assert (!stmt_vec_info_vec); 4604 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50); 4605 } 4606 4607 4608 /* Free hash table for stmt_vec_info. */ 4609 4610 void 4611 free_stmt_vec_info_vec (void) 4612 { 4613 gcc_assert (stmt_vec_info_vec); 4614 VEC_free (vec_void_p, heap, stmt_vec_info_vec); 4615 } 4616 4617 4618 /* Free stmt vectorization related info. */ 4619 4620 void 4621 free_stmt_vec_info (gimple stmt) 4622 { 4623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4624 4625 if (!stmt_info) 4626 return; 4627 4628 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info)); 4629 set_vinfo_for_stmt (stmt, NULL); 4630 free (stmt_info); 4631 } 4632 4633 4634 /* Function get_vectype_for_scalar_type. 4635 4636 Returns the vector type corresponding to SCALAR_TYPE as supported 4637 by the target. */ 4638 4639 tree 4640 get_vectype_for_scalar_type (tree scalar_type) 4641 { 4642 enum machine_mode inner_mode = TYPE_MODE (scalar_type); 4643 unsigned int nbytes = GET_MODE_SIZE (inner_mode); 4644 int nunits; 4645 tree vectype; 4646 4647 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode)) 4648 return NULL_TREE; 4649 4650 /* We can't build a vector type of elements with alignment bigger than 4651 their size. */ 4652 if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) 4653 return NULL_TREE; 4654 4655 /* If we'd build a vector type of elements whose mode precision doesn't 4656 match their types precision we'll get mismatched types on vector 4657 extracts via BIT_FIELD_REFs. This effectively means we disable 4658 vectorization of bool and/or enum types in some languages. */ 4659 if (INTEGRAL_TYPE_P (scalar_type) 4660 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)) 4661 return NULL_TREE; 4662 4663 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD) 4664 is expected. */ 4665 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes; 4666 4667 vectype = build_vector_type (scalar_type, nunits); 4668 if (vect_print_dump_info (REPORT_DETAILS)) 4669 { 4670 fprintf (vect_dump, "get vectype with %d units of type ", nunits); 4671 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 4672 } 4673 4674 if (!vectype) 4675 return NULL_TREE; 4676 4677 if (vect_print_dump_info (REPORT_DETAILS)) 4678 { 4679 fprintf (vect_dump, "vectype: "); 4680 print_generic_expr (vect_dump, vectype, TDF_SLIM); 4681 } 4682 4683 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 4684 && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) 4685 { 4686 if (vect_print_dump_info (REPORT_DETAILS)) 4687 fprintf (vect_dump, "mode not supported by target."); 4688 return NULL_TREE; 4689 } 4690 4691 return vectype; 4692 } 4693 4694 /* Function vect_is_simple_use. 4695 4696 Input: 4697 LOOP_VINFO - the vect info of the loop that is being vectorized. 4698 BB_VINFO - the vect info of the basic block that is being vectorized. 4699 OPERAND - operand of a stmt in the loop or bb. 4700 DEF - the defining stmt in case OPERAND is an SSA_NAME. 4701 4702 Returns whether a stmt with OPERAND can be vectorized. 4703 For loops, supportable operands are constants, loop invariants, and operands 4704 that are defined by the current iteration of the loop. Unsupportable 4705 operands are those that are defined by a previous iteration of the loop (as 4706 is the case in reduction/induction computations). 4707 For basic blocks, supportable operands are constants and bb invariants. 4708 For now, operands defined outside the basic block are not supported. */ 4709 4710 bool 4711 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, 4712 bb_vec_info bb_vinfo, gimple *def_stmt, 4713 tree *def, enum vect_def_type *dt) 4714 { 4715 basic_block bb; 4716 stmt_vec_info stmt_vinfo; 4717 struct loop *loop = NULL; 4718 4719 if (loop_vinfo) 4720 loop = LOOP_VINFO_LOOP (loop_vinfo); 4721 4722 *def_stmt = NULL; 4723 *def = NULL_TREE; 4724 4725 if (vect_print_dump_info (REPORT_DETAILS)) 4726 { 4727 fprintf (vect_dump, "vect_is_simple_use: operand "); 4728 print_generic_expr (vect_dump, operand, TDF_SLIM); 4729 } 4730 4731 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) 4732 { 4733 *dt = vect_constant_def; 4734 return true; 4735 } 4736 4737 if (is_gimple_min_invariant (operand)) 4738 { 4739 *def = operand; 4740 *dt = vect_external_def; 4741 return true; 4742 } 4743 4744 if (TREE_CODE (operand) == PAREN_EXPR) 4745 { 4746 if (vect_print_dump_info (REPORT_DETAILS)) 4747 fprintf (vect_dump, "non-associatable copy."); 4748 operand = TREE_OPERAND (operand, 0); 4749 } 4750 4751 if (TREE_CODE (operand) != SSA_NAME) 4752 { 4753 if (vect_print_dump_info (REPORT_DETAILS)) 4754 fprintf (vect_dump, "not ssa-name."); 4755 return false; 4756 } 4757 4758 *def_stmt = SSA_NAME_DEF_STMT (operand); 4759 if (*def_stmt == NULL) 4760 { 4761 if (vect_print_dump_info (REPORT_DETAILS)) 4762 fprintf (vect_dump, "no def_stmt."); 4763 return false; 4764 } 4765 4766 if (vect_print_dump_info (REPORT_DETAILS)) 4767 { 4768 fprintf (vect_dump, "def_stmt: "); 4769 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM); 4770 } 4771 4772 /* Empty stmt is expected only in case of a function argument. 4773 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */ 4774 if (gimple_nop_p (*def_stmt)) 4775 { 4776 *def = operand; 4777 *dt = vect_external_def; 4778 return true; 4779 } 4780 4781 bb = gimple_bb (*def_stmt); 4782 4783 if ((loop && !flow_bb_inside_loop_p (loop, bb)) 4784 || (!loop && bb != BB_VINFO_BB (bb_vinfo)) 4785 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI)) 4786 *dt = vect_external_def; 4787 else 4788 { 4789 stmt_vinfo = vinfo_for_stmt (*def_stmt); 4790 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); 4791 } 4792 4793 if (*dt == vect_unknown_def_type) 4794 { 4795 if (vect_print_dump_info (REPORT_DETAILS)) 4796 fprintf (vect_dump, "Unsupported pattern."); 4797 return false; 4798 } 4799 4800 if (vect_print_dump_info (REPORT_DETAILS)) 4801 fprintf (vect_dump, "type of def: %d.",*dt); 4802 4803 switch (gimple_code (*def_stmt)) 4804 { 4805 case GIMPLE_PHI: 4806 *def = gimple_phi_result (*def_stmt); 4807 break; 4808 4809 case GIMPLE_ASSIGN: 4810 *def = gimple_assign_lhs (*def_stmt); 4811 break; 4812 4813 case GIMPLE_CALL: 4814 *def = gimple_call_lhs (*def_stmt); 4815 if (*def != NULL) 4816 break; 4817 /* FALLTHRU */ 4818 default: 4819 if (vect_print_dump_info (REPORT_DETAILS)) 4820 fprintf (vect_dump, "unsupported defining stmt: "); 4821 return false; 4822 } 4823 4824 return true; 4825 } 4826 4827 4828 /* Function supportable_widening_operation 4829 4830 Check whether an operation represented by the code CODE is a 4831 widening operation that is supported by the target platform in 4832 vector form (i.e., when operating on arguments of type VECTYPE). 4833 4834 Widening operations we currently support are NOP (CONVERT), FLOAT 4835 and WIDEN_MULT. This function checks if these operations are supported 4836 by the target platform either directly (via vector tree-codes), or via 4837 target builtins. 4838 4839 Output: 4840 - CODE1 and CODE2 are codes of vector operations to be used when 4841 vectorizing the operation, if available. 4842 - DECL1 and DECL2 are decls of target builtin functions to be used 4843 when vectorizing the operation, if available. In this case, 4844 CODE1 and CODE2 are CALL_EXPR. 4845 - MULTI_STEP_CVT determines the number of required intermediate steps in 4846 case of multi-step conversion (like char->short->int - in that case 4847 MULTI_STEP_CVT will be 1). 4848 - INTERM_TYPES contains the intermediate type required to perform the 4849 widening operation (short in the above example). */ 4850 4851 bool 4852 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype, 4853 tree *decl1, tree *decl2, 4854 enum tree_code *code1, enum tree_code *code2, 4855 int *multi_step_cvt, 4856 VEC (tree, heap) **interm_types) 4857 { 4858 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4859 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); 4860 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); 4861 bool ordered_p; 4862 enum machine_mode vec_mode; 4863 enum insn_code icode1, icode2; 4864 optab optab1, optab2; 4865 tree type = gimple_expr_type (stmt); 4866 tree wide_vectype = get_vectype_for_scalar_type (type); 4867 enum tree_code c1, c2; 4868 4869 /* The result of a vectorized widening operation usually requires two vectors 4870 (because the widened results do not fit int one vector). The generated 4871 vector results would normally be expected to be generated in the same 4872 order as in the original scalar computation, i.e. if 8 results are 4873 generated in each vector iteration, they are to be organized as follows: 4874 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8]. 4875 4876 However, in the special case that the result of the widening operation is 4877 used in a reduction computation only, the order doesn't matter (because 4878 when vectorizing a reduction we change the order of the computation). 4879 Some targets can take advantage of this and generate more efficient code. 4880 For example, targets like Altivec, that support widen_mult using a sequence 4881 of {mult_even,mult_odd} generate the following vectors: 4882 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. 4883 4884 When vectorizing outer-loops, we execute the inner-loop sequentially 4885 (each vectorized inner-loop iteration contributes to VF outer-loop 4886 iterations in parallel). We therefore don't allow to change the order 4887 of the computation in the inner-loop during outer-loop vectorization. */ 4888 4889 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction 4890 && !nested_in_vect_loop_p (vect_loop, stmt)) 4891 ordered_p = false; 4892 else 4893 ordered_p = true; 4894 4895 if (!ordered_p 4896 && code == WIDEN_MULT_EXPR 4897 && targetm.vectorize.builtin_mul_widen_even 4898 && targetm.vectorize.builtin_mul_widen_even (vectype) 4899 && targetm.vectorize.builtin_mul_widen_odd 4900 && targetm.vectorize.builtin_mul_widen_odd (vectype)) 4901 { 4902 if (vect_print_dump_info (REPORT_DETAILS)) 4903 fprintf (vect_dump, "Unordered widening operation detected."); 4904 4905 *code1 = *code2 = CALL_EXPR; 4906 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype); 4907 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype); 4908 return true; 4909 } 4910 4911 switch (code) 4912 { 4913 case WIDEN_MULT_EXPR: 4914 if (BYTES_BIG_ENDIAN) 4915 { 4916 c1 = VEC_WIDEN_MULT_HI_EXPR; 4917 c2 = VEC_WIDEN_MULT_LO_EXPR; 4918 } 4919 else 4920 { 4921 c2 = VEC_WIDEN_MULT_HI_EXPR; 4922 c1 = VEC_WIDEN_MULT_LO_EXPR; 4923 } 4924 break; 4925 4926 CASE_CONVERT: 4927 if (BYTES_BIG_ENDIAN) 4928 { 4929 c1 = VEC_UNPACK_HI_EXPR; 4930 c2 = VEC_UNPACK_LO_EXPR; 4931 } 4932 else 4933 { 4934 c2 = VEC_UNPACK_HI_EXPR; 4935 c1 = VEC_UNPACK_LO_EXPR; 4936 } 4937 break; 4938 4939 case FLOAT_EXPR: 4940 if (BYTES_BIG_ENDIAN) 4941 { 4942 c1 = VEC_UNPACK_FLOAT_HI_EXPR; 4943 c2 = VEC_UNPACK_FLOAT_LO_EXPR; 4944 } 4945 else 4946 { 4947 c2 = VEC_UNPACK_FLOAT_HI_EXPR; 4948 c1 = VEC_UNPACK_FLOAT_LO_EXPR; 4949 } 4950 break; 4951 4952 case FIX_TRUNC_EXPR: 4953 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/ 4954 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for 4955 computing the operation. */ 4956 return false; 4957 4958 default: 4959 gcc_unreachable (); 4960 } 4961 4962 if (code == FIX_TRUNC_EXPR) 4963 { 4964 /* The signedness is determined from output operand. */ 4965 optab1 = optab_for_tree_code (c1, type, optab_default); 4966 optab2 = optab_for_tree_code (c2, type, optab_default); 4967 } 4968 else 4969 { 4970 optab1 = optab_for_tree_code (c1, vectype, optab_default); 4971 optab2 = optab_for_tree_code (c2, vectype, optab_default); 4972 } 4973 4974 if (!optab1 || !optab2) 4975 return false; 4976 4977 vec_mode = TYPE_MODE (vectype); 4978 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing 4979 || (icode2 = optab_handler (optab2, vec_mode)->insn_code) 4980 == CODE_FOR_nothing) 4981 return false; 4982 4983 /* Check if it's a multi-step conversion that can be done using intermediate 4984 types. */ 4985 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype) 4986 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype)) 4987 { 4988 int i; 4989 tree prev_type = vectype, intermediate_type; 4990 enum machine_mode intermediate_mode, prev_mode = vec_mode; 4991 optab optab3, optab4; 4992 4993 if (!CONVERT_EXPR_CODE_P (code)) 4994 return false; 4995 4996 *code1 = c1; 4997 *code2 = c2; 4998 4999 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 5000 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS 5001 to get to NARROW_VECTYPE, and fail if we do not. */ 5002 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); 5003 for (i = 0; i < 3; i++) 5004 { 5005 intermediate_mode = insn_data[icode1].operand[0].mode; 5006 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode, 5007 TYPE_UNSIGNED (prev_type)); 5008 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); 5009 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); 5010 5011 if (!optab3 || !optab4 5012 || (icode1 = optab1->handlers[(int) prev_mode].insn_code) 5013 == CODE_FOR_nothing 5014 || insn_data[icode1].operand[0].mode != intermediate_mode 5015 || (icode2 = optab2->handlers[(int) prev_mode].insn_code) 5016 == CODE_FOR_nothing 5017 || insn_data[icode2].operand[0].mode != intermediate_mode 5018 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code) 5019 == CODE_FOR_nothing 5020 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code) 5021 == CODE_FOR_nothing) 5022 return false; 5023 5024 VEC_quick_push (tree, *interm_types, intermediate_type); 5025 (*multi_step_cvt)++; 5026 5027 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 5028 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 5029 return true; 5030 5031 prev_type = intermediate_type; 5032 prev_mode = intermediate_mode; 5033 } 5034 5035 return false; 5036 } 5037 5038 *code1 = c1; 5039 *code2 = c2; 5040 return true; 5041 } 5042 5043 5044 /* Function supportable_narrowing_operation 5045 5046 Check whether an operation represented by the code CODE is a 5047 narrowing operation that is supported by the target platform in 5048 vector form (i.e., when operating on arguments of type VECTYPE). 5049 5050 Narrowing operations we currently support are NOP (CONVERT) and 5051 FIX_TRUNC. This function checks if these operations are supported by 5052 the target platform directly via vector tree-codes. 5053 5054 Output: 5055 - CODE1 is the code of a vector operation to be used when 5056 vectorizing the operation, if available. 5057 - MULTI_STEP_CVT determines the number of required intermediate steps in 5058 case of multi-step conversion (like int->short->char - in that case 5059 MULTI_STEP_CVT will be 1). 5060 - INTERM_TYPES contains the intermediate type required to perform the 5061 narrowing operation (short in the above example). */ 5062 5063 bool 5064 supportable_narrowing_operation (enum tree_code code, 5065 const_gimple stmt, tree vectype, 5066 enum tree_code *code1, int *multi_step_cvt, 5067 VEC (tree, heap) **interm_types) 5068 { 5069 enum machine_mode vec_mode; 5070 enum insn_code icode1; 5071 optab optab1, interm_optab; 5072 tree type = gimple_expr_type (stmt); 5073 tree narrow_vectype = get_vectype_for_scalar_type (type); 5074 enum tree_code c1; 5075 tree intermediate_type, prev_type; 5076 int i; 5077 5078 switch (code) 5079 { 5080 CASE_CONVERT: 5081 c1 = VEC_PACK_TRUNC_EXPR; 5082 break; 5083 5084 case FIX_TRUNC_EXPR: 5085 c1 = VEC_PACK_FIX_TRUNC_EXPR; 5086 break; 5087 5088 case FLOAT_EXPR: 5089 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR 5090 tree code and optabs used for computing the operation. */ 5091 return false; 5092 5093 default: 5094 gcc_unreachable (); 5095 } 5096 5097 if (code == FIX_TRUNC_EXPR) 5098 /* The signedness is determined from output operand. */ 5099 optab1 = optab_for_tree_code (c1, type, optab_default); 5100 else 5101 optab1 = optab_for_tree_code (c1, vectype, optab_default); 5102 5103 if (!optab1) 5104 return false; 5105 5106 vec_mode = TYPE_MODE (vectype); 5107 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) 5108 == CODE_FOR_nothing) 5109 return false; 5110 5111 /* Check if it's a multi-step conversion that can be done using intermediate 5112 types. */ 5113 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype)) 5114 { 5115 enum machine_mode intermediate_mode, prev_mode = vec_mode; 5116 5117 *code1 = c1; 5118 prev_type = vectype; 5119 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 5120 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS 5121 to get to NARROW_VECTYPE, and fail if we do not. */ 5122 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); 5123 for (i = 0; i < 3; i++) 5124 { 5125 intermediate_mode = insn_data[icode1].operand[0].mode; 5126 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode, 5127 TYPE_UNSIGNED (prev_type)); 5128 interm_optab = optab_for_tree_code (c1, intermediate_type, 5129 optab_default); 5130 if (!interm_optab 5131 || (icode1 = optab1->handlers[(int) prev_mode].insn_code) 5132 == CODE_FOR_nothing 5133 || insn_data[icode1].operand[0].mode != intermediate_mode 5134 || (icode1 5135 = interm_optab->handlers[(int) intermediate_mode].insn_code) 5136 == CODE_FOR_nothing) 5137 return false; 5138 5139 VEC_quick_push (tree, *interm_types, intermediate_type); 5140 (*multi_step_cvt)++; 5141 5142 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 5143 return true; 5144 5145 prev_type = intermediate_type; 5146 prev_mode = intermediate_mode; 5147 } 5148 5149 return false; 5150 } 5151 5152 *code1 = c1; 5153 return true; 5154 } 5155