1 /* Statement Analysis and Transformation for Vectorization 2 Copyright (C) 2003-2020 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 and Ira Rosen <irar@il.ibm.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "target.h" 27 #include "rtl.h" 28 #include "tree.h" 29 #include "gimple.h" 30 #include "ssa.h" 31 #include "optabs-tree.h" 32 #include "insn-config.h" 33 #include "recog.h" /* FIXME: for insn_data */ 34 #include "cgraph.h" 35 #include "dumpfile.h" 36 #include "alias.h" 37 #include "fold-const.h" 38 #include "stor-layout.h" 39 #include "tree-eh.h" 40 #include "gimplify.h" 41 #include "gimple-iterator.h" 42 #include "gimplify-me.h" 43 #include "tree-cfg.h" 44 #include "tree-ssa-loop-manip.h" 45 #include "cfgloop.h" 46 #include "explow.h" 47 #include "tree-ssa-loop.h" 48 #include "tree-scalar-evolution.h" 49 #include "tree-vectorizer.h" 50 #include "builtins.h" 51 #include "internal-fn.h" 52 #include "tree-vector-builder.h" 53 #include "vec-perm-indices.h" 54 #include "tree-ssa-loop-niter.h" 55 #include "gimple-fold.h" 56 #include "regs.h" 57 #include "attribs.h" 58 59 /* For lang_hooks.types.type_for_mode. */ 60 #include "langhooks.h" 61 62 /* Return the vectorized type for the given statement. */ 63 64 tree 65 stmt_vectype (class _stmt_vec_info *stmt_info) 66 { 67 return STMT_VINFO_VECTYPE (stmt_info); 68 } 69 70 /* Return TRUE iff the given statement is in an inner loop relative to 71 the loop being vectorized. */ 72 bool 73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info) 74 { 75 gimple *stmt = STMT_VINFO_STMT (stmt_info); 76 basic_block bb = gimple_bb (stmt); 77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 78 class loop* loop; 79 80 if (!loop_vinfo) 81 return false; 82 83 loop = LOOP_VINFO_LOOP (loop_vinfo); 84 85 return (bb->loop_father == loop->inner); 86 } 87 88 /* Record the cost of a statement, either by directly informing the 89 target model or by saving it in a vector for later processing. 90 Return a preliminary estimate of the statement's cost. */ 91 92 unsigned 93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, 94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, 95 int misalign, enum vect_cost_model_location where) 96 { 97 if ((kind == vector_load || kind == unaligned_load) 98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 99 kind = vector_gather_load; 100 if ((kind == vector_store || kind == unaligned_store) 101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 102 kind = vector_scatter_store; 103 104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign }; 105 body_cost_vec->safe_push (si); 106 107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; 108 return (unsigned) 109 (builtin_vectorization_cost (kind, vectype, misalign) * count); 110 } 111 112 /* Return a variable of type ELEM_TYPE[NELEMS]. */ 113 114 static tree 115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) 116 { 117 return create_tmp_var (build_array_type_nelts (elem_type, nelems), 118 "vect_array"); 119 } 120 121 /* ARRAY is an array of vectors created by create_vector_array. 122 Return an SSA_NAME for the vector in index N. The reference 123 is part of the vectorization of STMT_INFO and the vector is associated 124 with scalar destination SCALAR_DEST. */ 125 126 static tree 127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 128 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n) 129 { 130 tree vect_type, vect, vect_name, array_ref; 131 gimple *new_stmt; 132 133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); 134 vect_type = TREE_TYPE (TREE_TYPE (array)); 135 vect = vect_create_destination_var (scalar_dest, vect_type); 136 array_ref = build4 (ARRAY_REF, vect_type, array, 137 build_int_cst (size_type_node, n), 138 NULL_TREE, NULL_TREE); 139 140 new_stmt = gimple_build_assign (vect, array_ref); 141 vect_name = make_ssa_name (vect, new_stmt); 142 gimple_assign_set_lhs (new_stmt, vect_name); 143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 144 145 return vect_name; 146 } 147 148 /* ARRAY is an array of vectors created by create_vector_array. 149 Emit code to store SSA_NAME VECT in index N of the array. 150 The store is part of the vectorization of STMT_INFO. */ 151 152 static void 153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 154 tree vect, tree array, unsigned HOST_WIDE_INT n) 155 { 156 tree array_ref; 157 gimple *new_stmt; 158 159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, 160 build_int_cst (size_type_node, n), 161 NULL_TREE, NULL_TREE); 162 163 new_stmt = gimple_build_assign (array_ref, vect); 164 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 165 } 166 167 /* PTR is a pointer to an array of type TYPE. Return a representation 168 of *PTR. The memory reference replaces those in FIRST_DR 169 (and its group). */ 170 171 static tree 172 create_array_ref (tree type, tree ptr, tree alias_ptr_type) 173 { 174 tree mem_ref; 175 176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); 177 /* Arrays have the same alignment as their type. */ 178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0); 179 return mem_ref; 180 } 181 182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO. 183 Emit the clobber before *GSI. */ 184 185 static void 186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 187 tree var) 188 { 189 tree clobber = build_clobber (TREE_TYPE (var)); 190 gimple *new_stmt = gimple_build_assign (var, clobber); 191 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 192 } 193 194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 195 196 /* Function vect_mark_relevant. 197 198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */ 199 200 static void 201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info, 202 enum vect_relevant relevant, bool live_p) 203 { 204 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); 205 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 206 207 if (dump_enabled_p ()) 208 dump_printf_loc (MSG_NOTE, vect_location, 209 "mark relevant %d, live %d: %G", relevant, live_p, 210 stmt_info->stmt); 211 212 /* If this stmt is an original stmt in a pattern, we might need to mark its 213 related pattern stmt instead of the original stmt. However, such stmts 214 may have their own uses that are not in any pattern, in such cases the 215 stmt itself should be marked. */ 216 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 217 { 218 /* This is the last stmt in a sequence that was detected as a 219 pattern that can potentially be vectorized. Don't mark the stmt 220 as relevant/live because it's not going to be vectorized. 221 Instead mark the pattern-stmt that replaces it. */ 222 223 if (dump_enabled_p ()) 224 dump_printf_loc (MSG_NOTE, vect_location, 225 "last stmt in pattern. don't mark" 226 " relevant/live.\n"); 227 stmt_vec_info old_stmt_info = stmt_info; 228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); 229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info); 230 save_relevant = STMT_VINFO_RELEVANT (stmt_info); 231 save_live_p = STMT_VINFO_LIVE_P (stmt_info); 232 } 233 234 STMT_VINFO_LIVE_P (stmt_info) |= live_p; 235 if (relevant > STMT_VINFO_RELEVANT (stmt_info)) 236 STMT_VINFO_RELEVANT (stmt_info) = relevant; 237 238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant 239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 240 { 241 if (dump_enabled_p ()) 242 dump_printf_loc (MSG_NOTE, vect_location, 243 "already marked relevant/live.\n"); 244 return; 245 } 246 247 worklist->safe_push (stmt_info); 248 } 249 250 251 /* Function is_simple_and_all_uses_invariant 252 253 Return true if STMT_INFO is simple and all uses of it are invariant. */ 254 255 bool 256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info, 257 loop_vec_info loop_vinfo) 258 { 259 tree op; 260 ssa_op_iter iter; 261 262 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 263 if (!stmt) 264 return false; 265 266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 267 { 268 enum vect_def_type dt = vect_uninitialized_def; 269 270 if (!vect_is_simple_use (op, loop_vinfo, &dt)) 271 { 272 if (dump_enabled_p ()) 273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 274 "use not simple.\n"); 275 return false; 276 } 277 278 if (dt != vect_external_def && dt != vect_constant_def) 279 return false; 280 } 281 return true; 282 } 283 284 /* Function vect_stmt_relevant_p. 285 286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO, 287 is "relevant for vectorization". 288 289 A stmt is considered "relevant for vectorization" if: 290 - it has uses outside the loop. 291 - it has vdefs (it alters memory). 292 - control stmts in the loop (except for the exit condition). 293 294 CHECKME: what other side effects would the vectorizer allow? */ 295 296 static bool 297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, 298 enum vect_relevant *relevant, bool *live_p) 299 { 300 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 301 ssa_op_iter op_iter; 302 imm_use_iterator imm_iter; 303 use_operand_p use_p; 304 def_operand_p def_p; 305 306 *relevant = vect_unused_in_scope; 307 *live_p = false; 308 309 /* cond stmt other than loop exit cond. */ 310 if (is_ctrl_stmt (stmt_info->stmt) 311 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type) 312 *relevant = vect_used_in_scope; 313 314 /* changing memory. */ 315 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI) 316 if (gimple_vdef (stmt_info->stmt) 317 && !gimple_clobber_p (stmt_info->stmt)) 318 { 319 if (dump_enabled_p ()) 320 dump_printf_loc (MSG_NOTE, vect_location, 321 "vec_stmt_relevant_p: stmt has vdefs.\n"); 322 *relevant = vect_used_in_scope; 323 } 324 325 /* uses outside the loop. */ 326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF) 327 { 328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 329 { 330 basic_block bb = gimple_bb (USE_STMT (use_p)); 331 if (!flow_bb_inside_loop_p (loop, bb)) 332 { 333 if (is_gimple_debug (USE_STMT (use_p))) 334 continue; 335 336 if (dump_enabled_p ()) 337 dump_printf_loc (MSG_NOTE, vect_location, 338 "vec_stmt_relevant_p: used out of loop.\n"); 339 340 /* We expect all such uses to be in the loop exit phis 341 (because of loop closed form) */ 342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); 343 gcc_assert (bb == single_exit (loop)->dest); 344 345 *live_p = true; 346 } 347 } 348 } 349 350 if (*live_p && *relevant == vect_unused_in_scope 351 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo)) 352 { 353 if (dump_enabled_p ()) 354 dump_printf_loc (MSG_NOTE, vect_location, 355 "vec_stmt_relevant_p: stmt live but not relevant.\n"); 356 *relevant = vect_used_only_live; 357 } 358 359 return (*live_p || *relevant); 360 } 361 362 363 /* Function exist_non_indexing_operands_for_use_p 364 365 USE is one of the uses attached to STMT_INFO. Check if USE is 366 used in STMT_INFO for anything other than indexing an array. */ 367 368 static bool 369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info) 370 { 371 tree operand; 372 373 /* USE corresponds to some operand in STMT. If there is no data 374 reference in STMT, then any operand that corresponds to USE 375 is not indexing an array. */ 376 if (!STMT_VINFO_DATA_REF (stmt_info)) 377 return true; 378 379 /* STMT has a data_ref. FORNOW this means that its of one of 380 the following forms: 381 -1- ARRAY_REF = var 382 -2- var = ARRAY_REF 383 (This should have been verified in analyze_data_refs). 384 385 'var' in the second case corresponds to a def, not a use, 386 so USE cannot correspond to any operands that are not used 387 for array indexing. 388 389 Therefore, all we need to check is if STMT falls into the 390 first case, and whether var corresponds to USE. */ 391 392 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); 393 if (!assign || !gimple_assign_copy_p (assign)) 394 { 395 gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 396 if (call && gimple_call_internal_p (call)) 397 { 398 internal_fn ifn = gimple_call_internal_fn (call); 399 int mask_index = internal_fn_mask_index (ifn); 400 if (mask_index >= 0 401 && use == gimple_call_arg (call, mask_index)) 402 return true; 403 int stored_value_index = internal_fn_stored_value_index (ifn); 404 if (stored_value_index >= 0 405 && use == gimple_call_arg (call, stored_value_index)) 406 return true; 407 if (internal_gather_scatter_fn_p (ifn) 408 && use == gimple_call_arg (call, 1)) 409 return true; 410 } 411 return false; 412 } 413 414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME) 415 return false; 416 operand = gimple_assign_rhs1 (assign); 417 if (TREE_CODE (operand) != SSA_NAME) 418 return false; 419 420 if (operand == use) 421 return true; 422 423 return false; 424 } 425 426 427 /* 428 Function process_use. 429 430 Inputs: 431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO 432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt 433 that defined USE. This is done by calling mark_relevant and passing it 434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). 435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't 436 be performed. 437 438 Outputs: 439 Generally, LIVE_P and RELEVANT are used to define the liveness and 440 relevance info of the DEF_STMT of this USE: 441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p 442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant 443 Exceptions: 444 - case 1: If USE is used only for address computations (e.g. array indexing), 445 which does not need to be directly vectorized, then the liveness/relevance 446 of the respective DEF_STMT is left unchanged. 447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt, 448 we skip DEF_STMT cause it had already been processed. 449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then 450 "relevant" will be modified accordingly. 451 452 Return true if everything is as expected. Return false otherwise. */ 453 454 static opt_result 455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo, 456 enum vect_relevant relevant, vec<stmt_vec_info> *worklist, 457 bool force) 458 { 459 stmt_vec_info dstmt_vinfo; 460 enum vect_def_type dt; 461 462 /* case 1: we are only interested in uses that need to be vectorized. Uses 463 that are used for address computation are not considered relevant. */ 464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo)) 465 return opt_result::success (); 466 467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo)) 468 return opt_result::failure_at (stmt_vinfo->stmt, 469 "not vectorized:" 470 " unsupported use in stmt.\n"); 471 472 if (!dstmt_vinfo) 473 return opt_result::success (); 474 475 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt); 476 basic_block bb = gimple_bb (stmt_vinfo->stmt); 477 478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO). 479 We have to force the stmt live since the epilogue loop needs it to 480 continue computing the reduction. */ 481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI 482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI 484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def 485 && bb->loop_father == def_bb->loop_father) 486 { 487 if (dump_enabled_p ()) 488 dump_printf_loc (MSG_NOTE, vect_location, 489 "reduc-stmt defining reduc-phi in the same nest.\n"); 490 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true); 491 return opt_result::success (); 492 } 493 494 /* case 3a: outer-loop stmt defining an inner-loop stmt: 495 outer-loop-header-bb: 496 d = dstmt_vinfo 497 inner-loop: 498 stmt # use (d) 499 outer-loop-tail-bb: 500 ... */ 501 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father)) 502 { 503 if (dump_enabled_p ()) 504 dump_printf_loc (MSG_NOTE, vect_location, 505 "outer-loop def-stmt defining inner-loop stmt.\n"); 506 507 switch (relevant) 508 { 509 case vect_unused_in_scope: 510 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ? 511 vect_used_in_scope : vect_unused_in_scope; 512 break; 513 514 case vect_used_in_outer_by_reduction: 515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 516 relevant = vect_used_by_reduction; 517 break; 518 519 case vect_used_in_outer: 520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 521 relevant = vect_used_in_scope; 522 break; 523 524 case vect_used_in_scope: 525 break; 526 527 default: 528 gcc_unreachable (); 529 } 530 } 531 532 /* case 3b: inner-loop stmt defining an outer-loop stmt: 533 outer-loop-header-bb: 534 ... 535 inner-loop: 536 d = dstmt_vinfo 537 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction): 538 stmt # use (d) */ 539 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) 540 { 541 if (dump_enabled_p ()) 542 dump_printf_loc (MSG_NOTE, vect_location, 543 "inner-loop def-stmt defining outer-loop stmt.\n"); 544 545 switch (relevant) 546 { 547 case vect_unused_in_scope: 548 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 549 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ? 550 vect_used_in_outer_by_reduction : vect_unused_in_scope; 551 break; 552 553 case vect_used_by_reduction: 554 case vect_used_only_live: 555 relevant = vect_used_in_outer_by_reduction; 556 break; 557 558 case vect_used_in_scope: 559 relevant = vect_used_in_outer; 560 break; 561 562 default: 563 gcc_unreachable (); 564 } 565 } 566 /* We are also not interested in uses on loop PHI backedges that are 567 inductions. Otherwise we'll needlessly vectorize the IV increment 568 and cause hybrid SLP for SLP inductions. Unless the PHI is live 569 of course. */ 570 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI 571 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def 572 && ! STMT_VINFO_LIVE_P (stmt_vinfo) 573 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, 574 loop_latch_edge (bb->loop_father)) 575 == use)) 576 { 577 if (dump_enabled_p ()) 578 dump_printf_loc (MSG_NOTE, vect_location, 579 "induction value on backedge.\n"); 580 return opt_result::success (); 581 } 582 583 584 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false); 585 return opt_result::success (); 586 } 587 588 589 /* Function vect_mark_stmts_to_be_vectorized. 590 591 Not all stmts in the loop need to be vectorized. For example: 592 593 for i... 594 for j... 595 1. T0 = i + j 596 2. T1 = a[T0] 597 598 3. j = j + 1 599 600 Stmt 1 and 3 do not need to be vectorized, because loop control and 601 addressing of vectorized data-refs are handled differently. 602 603 This pass detects such stmts. */ 604 605 opt_result 606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) 607 { 608 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 609 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 610 unsigned int nbbs = loop->num_nodes; 611 gimple_stmt_iterator si; 612 unsigned int i; 613 basic_block bb; 614 bool live_p; 615 enum vect_relevant relevant; 616 617 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized"); 618 619 auto_vec<stmt_vec_info, 64> worklist; 620 621 /* 1. Init worklist. */ 622 for (i = 0; i < nbbs; i++) 623 { 624 bb = bbs[i]; 625 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) 626 { 627 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); 628 if (dump_enabled_p ()) 629 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G", 630 phi_info->stmt); 631 632 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p)) 633 vect_mark_relevant (&worklist, phi_info, relevant, live_p); 634 } 635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 636 { 637 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); 638 if (dump_enabled_p ()) 639 dump_printf_loc (MSG_NOTE, vect_location, 640 "init: stmt relevant? %G", stmt_info->stmt); 641 642 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p)) 643 vect_mark_relevant (&worklist, stmt_info, relevant, live_p); 644 } 645 } 646 647 /* 2. Process_worklist */ 648 while (worklist.length () > 0) 649 { 650 use_operand_p use_p; 651 ssa_op_iter iter; 652 653 stmt_vec_info stmt_vinfo = worklist.pop (); 654 if (dump_enabled_p ()) 655 dump_printf_loc (MSG_NOTE, vect_location, 656 "worklist: examine stmt: %G", stmt_vinfo->stmt); 657 658 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it 659 (DEF_STMT) as relevant/irrelevant according to the relevance property 660 of STMT. */ 661 relevant = STMT_VINFO_RELEVANT (stmt_vinfo); 662 663 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is 664 propagated as is to the DEF_STMTs of its USEs. 665 666 One exception is when STMT has been identified as defining a reduction 667 variable; in this case we set the relevance to vect_used_by_reduction. 668 This is because we distinguish between two kinds of relevant stmts - 669 those that are used by a reduction computation, and those that are 670 (also) used by a regular computation. This allows us later on to 671 identify stmts that are used solely by a reduction, and therefore the 672 order of the results that they produce does not have to be kept. */ 673 674 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo)) 675 { 676 case vect_reduction_def: 677 gcc_assert (relevant != vect_unused_in_scope); 678 if (relevant != vect_unused_in_scope 679 && relevant != vect_used_in_scope 680 && relevant != vect_used_by_reduction 681 && relevant != vect_used_only_live) 682 return opt_result::failure_at 683 (stmt_vinfo->stmt, "unsupported use of reduction.\n"); 684 break; 685 686 case vect_nested_cycle: 687 if (relevant != vect_unused_in_scope 688 && relevant != vect_used_in_outer_by_reduction 689 && relevant != vect_used_in_outer) 690 return opt_result::failure_at 691 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n"); 692 break; 693 694 case vect_double_reduction_def: 695 if (relevant != vect_unused_in_scope 696 && relevant != vect_used_by_reduction 697 && relevant != vect_used_only_live) 698 return opt_result::failure_at 699 (stmt_vinfo->stmt, "unsupported use of double reduction.\n"); 700 break; 701 702 default: 703 break; 704 } 705 706 if (is_pattern_stmt_p (stmt_vinfo)) 707 { 708 /* Pattern statements are not inserted into the code, so 709 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we 710 have to scan the RHS or function arguments instead. */ 711 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt)) 712 { 713 enum tree_code rhs_code = gimple_assign_rhs_code (assign); 714 tree op = gimple_assign_rhs1 (assign); 715 716 i = 1; 717 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op)) 718 { 719 opt_result res 720 = process_use (stmt_vinfo, TREE_OPERAND (op, 0), 721 loop_vinfo, relevant, &worklist, false); 722 if (!res) 723 return res; 724 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1), 725 loop_vinfo, relevant, &worklist, false); 726 if (!res) 727 return res; 728 i = 2; 729 } 730 for (; i < gimple_num_ops (assign); i++) 731 { 732 op = gimple_op (assign, i); 733 if (TREE_CODE (op) == SSA_NAME) 734 { 735 opt_result res 736 = process_use (stmt_vinfo, op, loop_vinfo, relevant, 737 &worklist, false); 738 if (!res) 739 return res; 740 } 741 } 742 } 743 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt)) 744 { 745 for (i = 0; i < gimple_call_num_args (call); i++) 746 { 747 tree arg = gimple_call_arg (call, i); 748 opt_result res 749 = process_use (stmt_vinfo, arg, loop_vinfo, relevant, 750 &worklist, false); 751 if (!res) 752 return res; 753 } 754 } 755 } 756 else 757 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE) 758 { 759 tree op = USE_FROM_PTR (use_p); 760 opt_result res 761 = process_use (stmt_vinfo, op, loop_vinfo, relevant, 762 &worklist, false); 763 if (!res) 764 return res; 765 } 766 767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo)) 768 { 769 gather_scatter_info gs_info; 770 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info)) 771 gcc_unreachable (); 772 opt_result res 773 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant, 774 &worklist, true); 775 if (!res) 776 { 777 if (fatal) 778 *fatal = false; 779 return res; 780 } 781 } 782 } /* while worklist */ 783 784 return opt_result::success (); 785 } 786 787 /* Compute the prologue cost for invariant or constant operands. */ 788 789 static unsigned 790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info, 791 unsigned opno, enum vect_def_type dt, 792 stmt_vector_for_cost *cost_vec) 793 { 794 vec_info *vinfo = stmt_info->vinfo; 795 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt; 796 tree op = gimple_op (stmt, opno); 797 unsigned prologue_cost = 0; 798 799 /* Without looking at the actual initializer a vector of 800 constants can be implemented as load from the constant pool. 801 When all elements are the same we can use a splat. */ 802 tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node); 803 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length (); 804 unsigned num_vects_to_check; 805 unsigned HOST_WIDE_INT const_nunits; 806 unsigned nelt_limit; 807 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits) 808 && ! multiple_p (const_nunits, group_size)) 809 { 810 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node); 811 nelt_limit = const_nunits; 812 } 813 else 814 { 815 /* If either the vector has variable length or the vectors 816 are composed of repeated whole groups we only need to 817 cost construction once. All vectors will be the same. */ 818 num_vects_to_check = 1; 819 nelt_limit = group_size; 820 } 821 tree elt = NULL_TREE; 822 unsigned nelt = 0; 823 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j) 824 { 825 unsigned si = j % group_size; 826 if (nelt == 0) 827 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno); 828 /* ??? We're just tracking whether all operands of a single 829 vector initializer are the same, ideally we'd check if 830 we emitted the same one already. */ 831 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, 832 opno)) 833 elt = NULL_TREE; 834 nelt++; 835 if (nelt == nelt_limit) 836 { 837 /* ??? We need to pass down stmt_info for a vector type 838 even if it points to the wrong stmt. */ 839 prologue_cost += record_stmt_cost 840 (cost_vec, 1, 841 dt == vect_external_def 842 ? (elt ? scalar_to_vec : vec_construct) 843 : vector_load, 844 stmt_info, 0, vect_prologue); 845 nelt = 0; 846 } 847 } 848 849 return prologue_cost; 850 } 851 852 /* Function vect_model_simple_cost. 853 854 Models cost for simple operations, i.e. those that only emit ncopies of a 855 single op. Right now, this does not account for multiple insns that could 856 be generated for the single vector op. We will handle that shortly. */ 857 858 static void 859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, 860 enum vect_def_type *dt, 861 int ndts, 862 slp_tree node, 863 stmt_vector_for_cost *cost_vec, 864 vect_cost_for_stmt kind = vector_stmt) 865 { 866 int inside_cost = 0, prologue_cost = 0; 867 868 gcc_assert (cost_vec != NULL); 869 870 /* ??? Somehow we need to fix this at the callers. */ 871 if (node) 872 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node); 873 874 if (node) 875 { 876 /* Scan operands and account for prologue cost of constants/externals. 877 ??? This over-estimates cost for multiple uses and should be 878 re-engineered. */ 879 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt; 880 tree lhs = gimple_get_lhs (stmt); 881 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i) 882 { 883 tree op = gimple_op (stmt, i); 884 enum vect_def_type dt; 885 if (!op || op == lhs) 886 continue; 887 if (vect_is_simple_use (op, stmt_info->vinfo, &dt) 888 && (dt == vect_constant_def || dt == vect_external_def)) 889 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info, 890 i, dt, cost_vec); 891 } 892 } 893 else 894 /* Cost the "broadcast" of a scalar operand in to a vector operand. 895 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector 896 cost model. */ 897 for (int i = 0; i < ndts; i++) 898 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 899 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, 900 stmt_info, 0, vect_prologue); 901 902 /* Adjust for two-operator SLP nodes. */ 903 if (node && SLP_TREE_TWO_OPERATORS (node)) 904 { 905 ncopies *= 2; 906 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm, 907 stmt_info, 0, vect_body); 908 } 909 910 /* Pass the inside-of-loop statements to the target-specific cost model. */ 911 inside_cost += record_stmt_cost (cost_vec, ncopies, kind, 912 stmt_info, 0, vect_body); 913 914 if (dump_enabled_p ()) 915 dump_printf_loc (MSG_NOTE, vect_location, 916 "vect_model_simple_cost: inside_cost = %d, " 917 "prologue_cost = %d .\n", inside_cost, prologue_cost); 918 } 919 920 921 /* Model cost for type demotion and promotion operations. PWR is 922 normally zero for single-step promotions and demotions. It will be 923 one if two-step promotion/demotion is required, and so on. NCOPIES 924 is the number of vector results (and thus number of instructions) 925 for the narrowest end of the operation chain. Each additional 926 step doubles the number of instructions required. */ 927 928 static void 929 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, 930 enum vect_def_type *dt, 931 unsigned int ncopies, int pwr, 932 stmt_vector_for_cost *cost_vec) 933 { 934 int i; 935 int inside_cost = 0, prologue_cost = 0; 936 937 for (i = 0; i < pwr + 1; i++) 938 { 939 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote, 940 stmt_info, 0, vect_body); 941 ncopies *= 2; 942 } 943 944 /* FORNOW: Assuming maximum 2 args per stmts. */ 945 for (i = 0; i < 2; i++) 946 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 947 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, 948 stmt_info, 0, vect_prologue); 949 950 if (dump_enabled_p ()) 951 dump_printf_loc (MSG_NOTE, vect_location, 952 "vect_model_promotion_demotion_cost: inside_cost = %d, " 953 "prologue_cost = %d .\n", inside_cost, prologue_cost); 954 } 955 956 /* Returns true if the current function returns DECL. */ 957 958 static bool 959 cfun_returns (tree decl) 960 { 961 edge_iterator ei; 962 edge e; 963 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 964 { 965 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src)); 966 if (!ret) 967 continue; 968 if (gimple_return_retval (ret) == decl) 969 return true; 970 /* We often end up with an aggregate copy to the result decl, 971 handle that case as well. First skip intermediate clobbers 972 though. */ 973 gimple *def = ret; 974 do 975 { 976 def = SSA_NAME_DEF_STMT (gimple_vuse (def)); 977 } 978 while (gimple_clobber_p (def)); 979 if (is_a <gassign *> (def) 980 && gimple_assign_lhs (def) == gimple_return_retval (ret) 981 && gimple_assign_rhs1 (def) == decl) 982 return true; 983 } 984 return false; 985 } 986 987 /* Function vect_model_store_cost 988 989 Models cost for stores. In the case of grouped accesses, one access 990 has the overhead of the grouped access attributed to it. */ 991 992 static void 993 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, 994 enum vect_def_type dt, 995 vect_memory_access_type memory_access_type, 996 vec_load_store_type vls_type, slp_tree slp_node, 997 stmt_vector_for_cost *cost_vec) 998 { 999 unsigned int inside_cost = 0, prologue_cost = 0; 1000 stmt_vec_info first_stmt_info = stmt_info; 1001 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); 1002 1003 /* ??? Somehow we need to fix this at the callers. */ 1004 if (slp_node) 1005 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 1006 1007 if (vls_type == VLS_STORE_INVARIANT) 1008 { 1009 if (slp_node) 1010 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info, 1011 1, dt, cost_vec); 1012 else 1013 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, 1014 stmt_info, 0, vect_prologue); 1015 } 1016 1017 /* Grouped stores update all elements in the group at once, 1018 so we want the DR for the first statement. */ 1019 if (!slp_node && grouped_access_p) 1020 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 1021 1022 /* True if we should include any once-per-group costs as well as 1023 the cost of the statement itself. For SLP we only get called 1024 once per group anyhow. */ 1025 bool first_stmt_p = (first_stmt_info == stmt_info); 1026 1027 /* We assume that the cost of a single store-lanes instruction is 1028 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped 1029 access is instead being provided by a permute-and-store operation, 1030 include the cost of the permutes. */ 1031 if (first_stmt_p 1032 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE) 1033 { 1034 /* Uses a high and low interleave or shuffle operations for each 1035 needed permute. */ 1036 int group_size = DR_GROUP_SIZE (first_stmt_info); 1037 int nstmts = ncopies * ceil_log2 (group_size) * group_size; 1038 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm, 1039 stmt_info, 0, vect_body); 1040 1041 if (dump_enabled_p ()) 1042 dump_printf_loc (MSG_NOTE, vect_location, 1043 "vect_model_store_cost: strided group_size = %d .\n", 1044 group_size); 1045 } 1046 1047 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1048 /* Costs of the stores. */ 1049 if (memory_access_type == VMAT_ELEMENTWISE 1050 || memory_access_type == VMAT_GATHER_SCATTER) 1051 { 1052 /* N scalar stores plus extracting the elements. */ 1053 unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 1054 inside_cost += record_stmt_cost (cost_vec, 1055 ncopies * assumed_nunits, 1056 scalar_store, stmt_info, 0, vect_body); 1057 } 1058 else 1059 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec); 1060 1061 if (memory_access_type == VMAT_ELEMENTWISE 1062 || memory_access_type == VMAT_STRIDED_SLP) 1063 { 1064 /* N scalar stores plus extracting the elements. */ 1065 unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 1066 inside_cost += record_stmt_cost (cost_vec, 1067 ncopies * assumed_nunits, 1068 vec_to_scalar, stmt_info, 0, vect_body); 1069 } 1070 1071 /* When vectorizing a store into the function result assign 1072 a penalty if the function returns in a multi-register location. 1073 In this case we assume we'll end up with having to spill the 1074 vector result and do piecewise loads as a conservative estimate. */ 1075 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref); 1076 if (base 1077 && (TREE_CODE (base) == RESULT_DECL 1078 || (DECL_P (base) && cfun_returns (base))) 1079 && !aggregate_value_p (base, cfun->decl)) 1080 { 1081 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1); 1082 /* ??? Handle PARALLEL in some way. */ 1083 if (REG_P (reg)) 1084 { 1085 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg)); 1086 /* Assume that a single reg-reg move is possible and cheap, 1087 do not account for vector to gp register move cost. */ 1088 if (nregs > 1) 1089 { 1090 /* Spill. */ 1091 prologue_cost += record_stmt_cost (cost_vec, ncopies, 1092 vector_store, 1093 stmt_info, 0, vect_epilogue); 1094 /* Loads. */ 1095 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs, 1096 scalar_load, 1097 stmt_info, 0, vect_epilogue); 1098 } 1099 } 1100 } 1101 1102 if (dump_enabled_p ()) 1103 dump_printf_loc (MSG_NOTE, vect_location, 1104 "vect_model_store_cost: inside_cost = %d, " 1105 "prologue_cost = %d .\n", inside_cost, prologue_cost); 1106 } 1107 1108 1109 /* Calculate cost of DR's memory access. */ 1110 void 1111 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies, 1112 unsigned int *inside_cost, 1113 stmt_vector_for_cost *body_cost_vec) 1114 { 1115 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 1116 int alignment_support_scheme 1117 = vect_supportable_dr_alignment (dr_info, false); 1118 1119 switch (alignment_support_scheme) 1120 { 1121 case dr_aligned: 1122 { 1123 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1124 vector_store, stmt_info, 0, 1125 vect_body); 1126 1127 if (dump_enabled_p ()) 1128 dump_printf_loc (MSG_NOTE, vect_location, 1129 "vect_model_store_cost: aligned.\n"); 1130 break; 1131 } 1132 1133 case dr_unaligned_supported: 1134 { 1135 /* Here, we assign an additional cost for the unaligned store. */ 1136 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1137 unaligned_store, stmt_info, 1138 DR_MISALIGNMENT (dr_info), 1139 vect_body); 1140 if (dump_enabled_p ()) 1141 dump_printf_loc (MSG_NOTE, vect_location, 1142 "vect_model_store_cost: unaligned supported by " 1143 "hardware.\n"); 1144 break; 1145 } 1146 1147 case dr_unaligned_unsupported: 1148 { 1149 *inside_cost = VECT_MAX_COST; 1150 1151 if (dump_enabled_p ()) 1152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1153 "vect_model_store_cost: unsupported access.\n"); 1154 break; 1155 } 1156 1157 default: 1158 gcc_unreachable (); 1159 } 1160 } 1161 1162 1163 /* Function vect_model_load_cost 1164 1165 Models cost for loads. In the case of grouped accesses, one access has 1166 the overhead of the grouped access attributed to it. Since unaligned 1167 accesses are supported for loads, we also account for the costs of the 1168 access scheme chosen. */ 1169 1170 static void 1171 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies, 1172 vect_memory_access_type memory_access_type, 1173 slp_instance instance, 1174 slp_tree slp_node, 1175 stmt_vector_for_cost *cost_vec) 1176 { 1177 unsigned int inside_cost = 0, prologue_cost = 0; 1178 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); 1179 1180 gcc_assert (cost_vec); 1181 1182 /* ??? Somehow we need to fix this at the callers. */ 1183 if (slp_node) 1184 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 1185 1186 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 1187 { 1188 /* If the load is permuted then the alignment is determined by 1189 the first group element not by the first scalar stmt DR. */ 1190 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 1191 /* Record the cost for the permutation. */ 1192 unsigned n_perms; 1193 unsigned assumed_nunits 1194 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info)); 1195 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size; 1196 vect_transform_slp_perm_load (slp_node, vNULL, NULL, 1197 slp_vf, instance, true, 1198 &n_perms); 1199 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm, 1200 first_stmt_info, 0, vect_body); 1201 /* And adjust the number of loads performed. This handles 1202 redundancies as well as loads that are later dead. */ 1203 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info)); 1204 bitmap_clear (perm); 1205 for (unsigned i = 0; 1206 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i) 1207 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]); 1208 ncopies = 0; 1209 bool load_seen = false; 1210 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i) 1211 { 1212 if (i % assumed_nunits == 0) 1213 { 1214 if (load_seen) 1215 ncopies++; 1216 load_seen = false; 1217 } 1218 if (bitmap_bit_p (perm, i)) 1219 load_seen = true; 1220 } 1221 if (load_seen) 1222 ncopies++; 1223 gcc_assert (ncopies 1224 <= (DR_GROUP_SIZE (first_stmt_info) 1225 - DR_GROUP_GAP (first_stmt_info) 1226 + assumed_nunits - 1) / assumed_nunits); 1227 } 1228 1229 /* Grouped loads read all elements in the group at once, 1230 so we want the DR for the first statement. */ 1231 stmt_vec_info first_stmt_info = stmt_info; 1232 if (!slp_node && grouped_access_p) 1233 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 1234 1235 /* True if we should include any once-per-group costs as well as 1236 the cost of the statement itself. For SLP we only get called 1237 once per group anyhow. */ 1238 bool first_stmt_p = (first_stmt_info == stmt_info); 1239 1240 /* We assume that the cost of a single load-lanes instruction is 1241 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped 1242 access is instead being provided by a load-and-permute operation, 1243 include the cost of the permutes. */ 1244 if (first_stmt_p 1245 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE) 1246 { 1247 /* Uses an even and odd extract operations or shuffle operations 1248 for each needed permute. */ 1249 int group_size = DR_GROUP_SIZE (first_stmt_info); 1250 int nstmts = ncopies * ceil_log2 (group_size) * group_size; 1251 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm, 1252 stmt_info, 0, vect_body); 1253 1254 if (dump_enabled_p ()) 1255 dump_printf_loc (MSG_NOTE, vect_location, 1256 "vect_model_load_cost: strided group_size = %d .\n", 1257 group_size); 1258 } 1259 1260 /* The loads themselves. */ 1261 if (memory_access_type == VMAT_ELEMENTWISE 1262 || memory_access_type == VMAT_GATHER_SCATTER) 1263 { 1264 /* N scalar loads plus gathering them into a vector. */ 1265 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1266 unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 1267 inside_cost += record_stmt_cost (cost_vec, 1268 ncopies * assumed_nunits, 1269 scalar_load, stmt_info, 0, vect_body); 1270 } 1271 else 1272 vect_get_load_cost (stmt_info, ncopies, first_stmt_p, 1273 &inside_cost, &prologue_cost, 1274 cost_vec, cost_vec, true); 1275 if (memory_access_type == VMAT_ELEMENTWISE 1276 || memory_access_type == VMAT_STRIDED_SLP) 1277 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct, 1278 stmt_info, 0, vect_body); 1279 1280 if (dump_enabled_p ()) 1281 dump_printf_loc (MSG_NOTE, vect_location, 1282 "vect_model_load_cost: inside_cost = %d, " 1283 "prologue_cost = %d .\n", inside_cost, prologue_cost); 1284 } 1285 1286 1287 /* Calculate cost of DR's memory access. */ 1288 void 1289 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies, 1290 bool add_realign_cost, unsigned int *inside_cost, 1291 unsigned int *prologue_cost, 1292 stmt_vector_for_cost *prologue_cost_vec, 1293 stmt_vector_for_cost *body_cost_vec, 1294 bool record_prologue_costs) 1295 { 1296 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 1297 int alignment_support_scheme 1298 = vect_supportable_dr_alignment (dr_info, false); 1299 1300 switch (alignment_support_scheme) 1301 { 1302 case dr_aligned: 1303 { 1304 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1305 stmt_info, 0, vect_body); 1306 1307 if (dump_enabled_p ()) 1308 dump_printf_loc (MSG_NOTE, vect_location, 1309 "vect_model_load_cost: aligned.\n"); 1310 1311 break; 1312 } 1313 case dr_unaligned_supported: 1314 { 1315 /* Here, we assign an additional cost for the unaligned load. */ 1316 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1317 unaligned_load, stmt_info, 1318 DR_MISALIGNMENT (dr_info), 1319 vect_body); 1320 1321 if (dump_enabled_p ()) 1322 dump_printf_loc (MSG_NOTE, vect_location, 1323 "vect_model_load_cost: unaligned supported by " 1324 "hardware.\n"); 1325 1326 break; 1327 } 1328 case dr_explicit_realign: 1329 { 1330 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2, 1331 vector_load, stmt_info, 0, vect_body); 1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1333 vec_perm, stmt_info, 0, vect_body); 1334 1335 /* FIXME: If the misalignment remains fixed across the iterations of 1336 the containing loop, the following cost should be added to the 1337 prologue costs. */ 1338 if (targetm.vectorize.builtin_mask_for_load) 1339 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt, 1340 stmt_info, 0, vect_body); 1341 1342 if (dump_enabled_p ()) 1343 dump_printf_loc (MSG_NOTE, vect_location, 1344 "vect_model_load_cost: explicit realign\n"); 1345 1346 break; 1347 } 1348 case dr_explicit_realign_optimized: 1349 { 1350 if (dump_enabled_p ()) 1351 dump_printf_loc (MSG_NOTE, vect_location, 1352 "vect_model_load_cost: unaligned software " 1353 "pipelined.\n"); 1354 1355 /* Unaligned software pipeline has a load of an address, an initial 1356 load, and possibly a mask operation to "prime" the loop. However, 1357 if this is an access in a group of loads, which provide grouped 1358 access, then the above cost should only be considered for one 1359 access in the group. Inside the loop, there is a load op 1360 and a realignment op. */ 1361 1362 if (add_realign_cost && record_prologue_costs) 1363 { 1364 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2, 1365 vector_stmt, stmt_info, 1366 0, vect_prologue); 1367 if (targetm.vectorize.builtin_mask_for_load) 1368 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1, 1369 vector_stmt, stmt_info, 1370 0, vect_prologue); 1371 } 1372 1373 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1374 stmt_info, 0, vect_body); 1375 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm, 1376 stmt_info, 0, vect_body); 1377 1378 if (dump_enabled_p ()) 1379 dump_printf_loc (MSG_NOTE, vect_location, 1380 "vect_model_load_cost: explicit realign optimized" 1381 "\n"); 1382 1383 break; 1384 } 1385 1386 case dr_unaligned_unsupported: 1387 { 1388 *inside_cost = VECT_MAX_COST; 1389 1390 if (dump_enabled_p ()) 1391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1392 "vect_model_load_cost: unsupported access.\n"); 1393 break; 1394 } 1395 1396 default: 1397 gcc_unreachable (); 1398 } 1399 } 1400 1401 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in 1402 the loop preheader for the vectorized stmt STMT_VINFO. */ 1403 1404 static void 1405 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt, 1406 gimple_stmt_iterator *gsi) 1407 { 1408 if (gsi) 1409 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi); 1410 else 1411 { 1412 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1413 1414 if (loop_vinfo) 1415 { 1416 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1417 basic_block new_bb; 1418 edge pe; 1419 1420 if (nested_in_vect_loop_p (loop, stmt_vinfo)) 1421 loop = loop->inner; 1422 1423 pe = loop_preheader_edge (loop); 1424 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 1425 gcc_assert (!new_bb); 1426 } 1427 else 1428 { 1429 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 1430 basic_block bb; 1431 gimple_stmt_iterator gsi_bb_start; 1432 1433 gcc_assert (bb_vinfo); 1434 bb = BB_VINFO_BB (bb_vinfo); 1435 gsi_bb_start = gsi_after_labels (bb); 1436 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT); 1437 } 1438 } 1439 1440 if (dump_enabled_p ()) 1441 dump_printf_loc (MSG_NOTE, vect_location, 1442 "created new init_stmt: %G", new_stmt); 1443 } 1444 1445 /* Function vect_init_vector. 1446 1447 Insert a new stmt (INIT_STMT) that initializes a new variable of type 1448 TYPE with the value VAL. If TYPE is a vector type and VAL does not have 1449 vector type a vector with all elements equal to VAL is created first. 1450 Place the initialization at GSI if it is not NULL. Otherwise, place the 1451 initialization at the loop preheader. 1452 Return the DEF of INIT_STMT. 1453 It will be used in the vectorization of STMT_INFO. */ 1454 1455 tree 1456 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type, 1457 gimple_stmt_iterator *gsi) 1458 { 1459 gimple *init_stmt; 1460 tree new_temp; 1461 1462 /* We abuse this function to push sth to a SSA name with initial 'val'. */ 1463 if (! useless_type_conversion_p (type, TREE_TYPE (val))) 1464 { 1465 gcc_assert (TREE_CODE (type) == VECTOR_TYPE); 1466 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val))) 1467 { 1468 /* Scalar boolean value should be transformed into 1469 all zeros or all ones value before building a vector. */ 1470 if (VECTOR_BOOLEAN_TYPE_P (type)) 1471 { 1472 tree true_val = build_all_ones_cst (TREE_TYPE (type)); 1473 tree false_val = build_zero_cst (TREE_TYPE (type)); 1474 1475 if (CONSTANT_CLASS_P (val)) 1476 val = integer_zerop (val) ? false_val : true_val; 1477 else 1478 { 1479 new_temp = make_ssa_name (TREE_TYPE (type)); 1480 init_stmt = gimple_build_assign (new_temp, COND_EXPR, 1481 val, true_val, false_val); 1482 vect_init_vector_1 (stmt_info, init_stmt, gsi); 1483 val = new_temp; 1484 } 1485 } 1486 else 1487 { 1488 gimple_seq stmts = NULL; 1489 if (! INTEGRAL_TYPE_P (TREE_TYPE (val))) 1490 val = gimple_build (&stmts, VIEW_CONVERT_EXPR, 1491 TREE_TYPE (type), val); 1492 else 1493 /* ??? Condition vectorization expects us to do 1494 promotion of invariant/external defs. */ 1495 val = gimple_convert (&stmts, TREE_TYPE (type), val); 1496 for (gimple_stmt_iterator gsi2 = gsi_start (stmts); 1497 !gsi_end_p (gsi2); ) 1498 { 1499 init_stmt = gsi_stmt (gsi2); 1500 gsi_remove (&gsi2, false); 1501 vect_init_vector_1 (stmt_info, init_stmt, gsi); 1502 } 1503 } 1504 } 1505 val = build_vector_from_val (type, val); 1506 } 1507 1508 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_"); 1509 init_stmt = gimple_build_assign (new_temp, val); 1510 vect_init_vector_1 (stmt_info, init_stmt, gsi); 1511 return new_temp; 1512 } 1513 1514 /* Function vect_get_vec_def_for_operand_1. 1515 1516 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def 1517 with type DT that will be used in the vectorized stmt. */ 1518 1519 tree 1520 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info, 1521 enum vect_def_type dt) 1522 { 1523 tree vec_oprnd; 1524 stmt_vec_info vec_stmt_info; 1525 1526 switch (dt) 1527 { 1528 /* operand is a constant or a loop invariant. */ 1529 case vect_constant_def: 1530 case vect_external_def: 1531 /* Code should use vect_get_vec_def_for_operand. */ 1532 gcc_unreachable (); 1533 1534 /* Operand is defined by a loop header phi. In case of nested 1535 cycles we also may have uses of the backedge def. */ 1536 case vect_reduction_def: 1537 case vect_double_reduction_def: 1538 case vect_nested_cycle: 1539 case vect_induction_def: 1540 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI 1541 || dt == vect_nested_cycle); 1542 /* Fallthru. */ 1543 1544 /* operand is defined inside the loop. */ 1545 case vect_internal_def: 1546 { 1547 /* Get the def from the vectorized stmt. */ 1548 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info); 1549 /* Get vectorized pattern statement. */ 1550 if (!vec_stmt_info 1551 && STMT_VINFO_IN_PATTERN_P (def_stmt_info) 1552 && !STMT_VINFO_RELEVANT (def_stmt_info)) 1553 vec_stmt_info = (STMT_VINFO_VEC_STMT 1554 (STMT_VINFO_RELATED_STMT (def_stmt_info))); 1555 gcc_assert (vec_stmt_info); 1556 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt)) 1557 vec_oprnd = PHI_RESULT (phi); 1558 else 1559 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt); 1560 return vec_oprnd; 1561 } 1562 1563 default: 1564 gcc_unreachable (); 1565 } 1566 } 1567 1568 1569 /* Function vect_get_vec_def_for_operand. 1570 1571 OP is an operand in STMT_VINFO. This function returns a (vector) def 1572 that will be used in the vectorized stmt for STMT_VINFO. 1573 1574 In the case that OP is an SSA_NAME which is defined in the loop, then 1575 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 1576 1577 In case OP is an invariant or constant, a new stmt that creates a vector def 1578 needs to be introduced. VECTYPE may be used to specify a required type for 1579 vector invariant. */ 1580 1581 tree 1582 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype) 1583 { 1584 gimple *def_stmt; 1585 enum vect_def_type dt; 1586 bool is_simple_use; 1587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1588 1589 if (dump_enabled_p ()) 1590 dump_printf_loc (MSG_NOTE, vect_location, 1591 "vect_get_vec_def_for_operand: %T\n", op); 1592 1593 stmt_vec_info def_stmt_info; 1594 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt, 1595 &def_stmt_info, &def_stmt); 1596 gcc_assert (is_simple_use); 1597 if (def_stmt && dump_enabled_p ()) 1598 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt); 1599 1600 if (dt == vect_constant_def || dt == vect_external_def) 1601 { 1602 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 1603 tree vector_type; 1604 1605 if (vectype) 1606 vector_type = vectype; 1607 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) 1608 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype)) 1609 vector_type = truth_type_for (stmt_vectype); 1610 else 1611 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op)); 1612 1613 gcc_assert (vector_type); 1614 return vect_init_vector (stmt_vinfo, op, vector_type, NULL); 1615 } 1616 else 1617 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt); 1618 } 1619 1620 1621 /* Function vect_get_vec_def_for_stmt_copy 1622 1623 Return a vector-def for an operand. This function is used when the 1624 vectorized stmt to be created (by the caller to this function) is a "copy" 1625 created in case the vectorized result cannot fit in one vector, and several 1626 copies of the vector-stmt are required. In this case the vector-def is 1627 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field 1628 of the stmt that defines VEC_OPRND. VINFO describes the vectorization. 1629 1630 Context: 1631 In case the vectorization factor (VF) is bigger than the number 1632 of elements that can fit in a vectype (nunits), we have to generate 1633 more than one vector stmt to vectorize the scalar stmt. This situation 1634 arises when there are multiple data-types operated upon in the loop; the 1635 smallest data-type determines the VF, and as a result, when vectorizing 1636 stmts operating on wider types we need to create 'VF/nunits' "copies" of the 1637 vector stmt (each computing a vector of 'nunits' results, and together 1638 computing 'VF' results in each iteration). This function is called when 1639 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in 1640 which VF=16 and nunits=4, so the number of copies required is 4): 1641 1642 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT 1643 1644 S1: x = load VS1.0: vx.0 = memref0 VS1.1 1645 VS1.1: vx.1 = memref1 VS1.2 1646 VS1.2: vx.2 = memref2 VS1.3 1647 VS1.3: vx.3 = memref3 1648 1649 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1 1650 VSnew.1: vz1 = vx.1 + ... VSnew.2 1651 VSnew.2: vz2 = vx.2 + ... VSnew.3 1652 VSnew.3: vz3 = vx.3 + ... 1653 1654 The vectorization of S1 is explained in vectorizable_load. 1655 The vectorization of S2: 1656 To create the first vector-stmt out of the 4 copies - VSnew.0 - 1657 the function 'vect_get_vec_def_for_operand' is called to 1658 get the relevant vector-def for each operand of S2. For operand x it 1659 returns the vector-def 'vx.0'. 1660 1661 To create the remaining copies of the vector-stmt (VSnew.j), this 1662 function is called to get the relevant vector-def for each operand. It is 1663 obtained from the respective VS1.j stmt, which is recorded in the 1664 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. 1665 1666 For example, to obtain the vector-def 'vx.1' in order to create the 1667 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. 1668 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the 1669 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1', 1670 and return its def ('vx.1'). 1671 Overall, to create the above sequence this function will be called 3 times: 1672 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0); 1673 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1); 1674 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */ 1675 1676 tree 1677 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd) 1678 { 1679 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd); 1680 if (!def_stmt_info) 1681 /* Do nothing; can reuse same def. */ 1682 return vec_oprnd; 1683 1684 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info); 1685 gcc_assert (def_stmt_info); 1686 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt)) 1687 vec_oprnd = PHI_RESULT (phi); 1688 else 1689 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt); 1690 return vec_oprnd; 1691 } 1692 1693 1694 /* Get vectorized definitions for the operands to create a copy of an original 1695 stmt. See vect_get_vec_def_for_stmt_copy () for details. */ 1696 1697 void 1698 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo, 1699 vec<tree> *vec_oprnds0, 1700 vec<tree> *vec_oprnds1) 1701 { 1702 tree vec_oprnd = vec_oprnds0->pop (); 1703 1704 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd); 1705 vec_oprnds0->quick_push (vec_oprnd); 1706 1707 if (vec_oprnds1 && vec_oprnds1->length ()) 1708 { 1709 vec_oprnd = vec_oprnds1->pop (); 1710 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd); 1711 vec_oprnds1->quick_push (vec_oprnd); 1712 } 1713 } 1714 1715 1716 /* Get vectorized definitions for OP0 and OP1. */ 1717 1718 void 1719 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info, 1720 vec<tree> *vec_oprnds0, 1721 vec<tree> *vec_oprnds1, 1722 slp_tree slp_node) 1723 { 1724 if (slp_node) 1725 { 1726 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ()); 1727 vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1); 1728 *vec_oprnds0 = vec_defs[0]; 1729 if (op1) 1730 *vec_oprnds1 = vec_defs[1]; 1731 } 1732 else 1733 { 1734 tree vec_oprnd; 1735 1736 vec_oprnds0->create (1); 1737 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info); 1738 vec_oprnds0->quick_push (vec_oprnd); 1739 1740 if (op1) 1741 { 1742 vec_oprnds1->create (1); 1743 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info); 1744 vec_oprnds1->quick_push (vec_oprnd); 1745 } 1746 } 1747 } 1748 1749 /* Helper function called by vect_finish_replace_stmt and 1750 vect_finish_stmt_generation. Set the location of the new 1751 statement and create and return a stmt_vec_info for it. */ 1752 1753 static stmt_vec_info 1754 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt) 1755 { 1756 vec_info *vinfo = stmt_info->vinfo; 1757 1758 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt); 1759 1760 if (dump_enabled_p ()) 1761 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt); 1762 1763 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt)); 1764 1765 /* While EH edges will generally prevent vectorization, stmt might 1766 e.g. be in a must-not-throw region. Ensure newly created stmts 1767 that could throw are part of the same region. */ 1768 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt); 1769 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt)) 1770 add_stmt_to_eh_lp (vec_stmt, lp_nr); 1771 1772 return vec_stmt_info; 1773 } 1774 1775 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT, 1776 which sets the same scalar result as STMT_INFO did. Create and return a 1777 stmt_vec_info for VEC_STMT. */ 1778 1779 stmt_vec_info 1780 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt) 1781 { 1782 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt; 1783 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt)); 1784 1785 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt); 1786 gsi_replace (&gsi, vec_stmt, true); 1787 1788 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt); 1789 } 1790 1791 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it 1792 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */ 1793 1794 stmt_vec_info 1795 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt, 1796 gimple_stmt_iterator *gsi) 1797 { 1798 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL); 1799 1800 if (!gsi_end_p (*gsi) 1801 && gimple_has_mem_ops (vec_stmt)) 1802 { 1803 gimple *at_stmt = gsi_stmt (*gsi); 1804 tree vuse = gimple_vuse (at_stmt); 1805 if (vuse && TREE_CODE (vuse) == SSA_NAME) 1806 { 1807 tree vdef = gimple_vdef (at_stmt); 1808 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt)); 1809 /* If we have an SSA vuse and insert a store, update virtual 1810 SSA form to avoid triggering the renamer. Do so only 1811 if we can easily see all uses - which is what almost always 1812 happens with the way vectorized stmts are inserted. */ 1813 if ((vdef && TREE_CODE (vdef) == SSA_NAME) 1814 && ((is_gimple_assign (vec_stmt) 1815 && !is_gimple_reg (gimple_assign_lhs (vec_stmt))) 1816 || (is_gimple_call (vec_stmt) 1817 && !(gimple_call_flags (vec_stmt) 1818 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))))) 1819 { 1820 tree new_vdef = copy_ssa_name (vuse, vec_stmt); 1821 gimple_set_vdef (vec_stmt, new_vdef); 1822 SET_USE (gimple_vuse_op (at_stmt), new_vdef); 1823 } 1824 } 1825 } 1826 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); 1827 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt); 1828 } 1829 1830 /* We want to vectorize a call to combined function CFN with function 1831 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN 1832 as the types of all inputs. Check whether this is possible using 1833 an internal function, returning its code if so or IFN_LAST if not. */ 1834 1835 static internal_fn 1836 vectorizable_internal_function (combined_fn cfn, tree fndecl, 1837 tree vectype_out, tree vectype_in) 1838 { 1839 internal_fn ifn; 1840 if (internal_fn_p (cfn)) 1841 ifn = as_internal_fn (cfn); 1842 else 1843 ifn = associated_internal_fn (fndecl); 1844 if (ifn != IFN_LAST && direct_internal_fn_p (ifn)) 1845 { 1846 const direct_internal_fn_info &info = direct_internal_fn (ifn); 1847 if (info.vectorizable) 1848 { 1849 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in); 1850 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in); 1851 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1), 1852 OPTIMIZE_FOR_SPEED)) 1853 return ifn; 1854 } 1855 } 1856 return IFN_LAST; 1857 } 1858 1859 1860 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info, 1861 gimple_stmt_iterator *); 1862 1863 /* Check whether a load or store statement in the loop described by 1864 LOOP_VINFO is possible in a fully-masked loop. This is testing 1865 whether the vectorizer pass has the appropriate support, as well as 1866 whether the target does. 1867 1868 VLS_TYPE says whether the statement is a load or store and VECTYPE 1869 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE 1870 says how the load or store is going to be implemented and GROUP_SIZE 1871 is the number of load or store statements in the containing group. 1872 If the access is a gather load or scatter store, GS_INFO describes 1873 its arguments. If the load or store is conditional, SCALAR_MASK is the 1874 condition under which it occurs. 1875 1876 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not 1877 supported, otherwise record the required mask types. */ 1878 1879 static void 1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, 1881 vec_load_store_type vls_type, int group_size, 1882 vect_memory_access_type memory_access_type, 1883 gather_scatter_info *gs_info, tree scalar_mask) 1884 { 1885 /* Invariant loads need no special support. */ 1886 if (memory_access_type == VMAT_INVARIANT) 1887 return; 1888 1889 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); 1890 machine_mode vecmode = TYPE_MODE (vectype); 1891 bool is_load = (vls_type == VLS_LOAD); 1892 if (memory_access_type == VMAT_LOAD_STORE_LANES) 1893 { 1894 if (is_load 1895 ? !vect_load_lanes_supported (vectype, group_size, true) 1896 : !vect_store_lanes_supported (vectype, group_size, true)) 1897 { 1898 if (dump_enabled_p ()) 1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1900 "can't use a fully-masked loop because the" 1901 " target doesn't have an appropriate masked" 1902 " load/store-lanes instruction.\n"); 1903 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 1904 return; 1905 } 1906 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype); 1907 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask); 1908 return; 1909 } 1910 1911 if (memory_access_type == VMAT_GATHER_SCATTER) 1912 { 1913 internal_fn ifn = (is_load 1914 ? IFN_MASK_GATHER_LOAD 1915 : IFN_MASK_SCATTER_STORE); 1916 if (!internal_gather_scatter_fn_supported_p (ifn, vectype, 1917 gs_info->memory_type, 1918 gs_info->offset_vectype, 1919 gs_info->scale)) 1920 { 1921 if (dump_enabled_p ()) 1922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1923 "can't use a fully-masked loop because the" 1924 " target doesn't have an appropriate masked" 1925 " gather load or scatter store instruction.\n"); 1926 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 1927 return; 1928 } 1929 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype); 1930 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask); 1931 return; 1932 } 1933 1934 if (memory_access_type != VMAT_CONTIGUOUS 1935 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE) 1936 { 1937 /* Element X of the data must come from iteration i * VF + X of the 1938 scalar loop. We need more work to support other mappings. */ 1939 if (dump_enabled_p ()) 1940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1941 "can't use a fully-masked loop because an access" 1942 " isn't contiguous.\n"); 1943 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 1944 return; 1945 } 1946 1947 machine_mode mask_mode; 1948 if (!VECTOR_MODE_P (vecmode) 1949 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode) 1950 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load)) 1951 { 1952 if (dump_enabled_p ()) 1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1954 "can't use a fully-masked loop because the target" 1955 " doesn't have the appropriate masked load or" 1956 " store.\n"); 1957 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 1958 return; 1959 } 1960 /* We might load more scalars than we need for permuting SLP loads. 1961 We checked in get_group_load_store_type that the extra elements 1962 don't leak into a new vector. */ 1963 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 1964 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1965 unsigned int nvectors; 1966 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors)) 1967 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask); 1968 else 1969 gcc_unreachable (); 1970 } 1971 1972 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized 1973 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask 1974 that needs to be applied to all loads and stores in a vectorized loop. 1975 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK. 1976 1977 MASK_TYPE is the type of both masks. If new statements are needed, 1978 insert them before GSI. */ 1979 1980 static tree 1981 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask, 1982 gimple_stmt_iterator *gsi) 1983 { 1984 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask))); 1985 if (!loop_mask) 1986 return vec_mask; 1987 1988 gcc_assert (TREE_TYPE (loop_mask) == mask_type); 1989 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and"); 1990 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR, 1991 vec_mask, loop_mask); 1992 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT); 1993 return and_res; 1994 } 1995 1996 /* Determine whether we can use a gather load or scatter store to vectorize 1997 strided load or store STMT_INFO by truncating the current offset to a 1998 smaller width. We need to be able to construct an offset vector: 1999 2000 { 0, X, X*2, X*3, ... } 2001 2002 without loss of precision, where X is STMT_INFO's DR_STEP. 2003 2004 Return true if this is possible, describing the gather load or scatter 2005 store in GS_INFO. MASKED_P is true if the load or store is conditional. */ 2006 2007 static bool 2008 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, 2009 loop_vec_info loop_vinfo, bool masked_p, 2010 gather_scatter_info *gs_info) 2011 { 2012 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 2013 data_reference *dr = dr_info->dr; 2014 tree step = DR_STEP (dr); 2015 if (TREE_CODE (step) != INTEGER_CST) 2016 { 2017 /* ??? Perhaps we could use range information here? */ 2018 if (dump_enabled_p ()) 2019 dump_printf_loc (MSG_NOTE, vect_location, 2020 "cannot truncate variable step.\n"); 2021 return false; 2022 } 2023 2024 /* Get the number of bits in an element. */ 2025 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2026 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype)); 2027 unsigned int element_bits = GET_MODE_BITSIZE (element_mode); 2028 2029 /* Set COUNT to the upper limit on the number of elements - 1. 2030 Start with the maximum vectorization factor. */ 2031 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1; 2032 2033 /* Try lowering COUNT to the number of scalar latch iterations. */ 2034 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2035 widest_int max_iters; 2036 if (max_loop_iterations (loop, &max_iters) 2037 && max_iters < count) 2038 count = max_iters.to_shwi (); 2039 2040 /* Try scales of 1 and the element size. */ 2041 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) }; 2042 wi::overflow_type overflow = wi::OVF_NONE; 2043 for (int i = 0; i < 2; ++i) 2044 { 2045 int scale = scales[i]; 2046 widest_int factor; 2047 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor)) 2048 continue; 2049 2050 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */ 2051 widest_int range = wi::mul (count, factor, SIGNED, &overflow); 2052 if (overflow) 2053 continue; 2054 signop sign = range >= 0 ? UNSIGNED : SIGNED; 2055 unsigned int min_offset_bits = wi::min_precision (range, sign); 2056 2057 /* Find the narrowest viable offset type. */ 2058 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits); 2059 tree offset_type = build_nonstandard_integer_type (offset_bits, 2060 sign == UNSIGNED); 2061 2062 /* See whether the target supports the operation with an offset 2063 no narrower than OFFSET_TYPE. */ 2064 tree memory_type = TREE_TYPE (DR_REF (dr)); 2065 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, 2066 vectype, memory_type, offset_type, scale, 2067 &gs_info->ifn, &gs_info->offset_vectype)) 2068 continue; 2069 2070 gs_info->decl = NULL_TREE; 2071 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET, 2072 but we don't need to store that here. */ 2073 gs_info->base = NULL_TREE; 2074 gs_info->element_type = TREE_TYPE (vectype); 2075 gs_info->offset = fold_convert (offset_type, step); 2076 gs_info->offset_dt = vect_constant_def; 2077 gs_info->scale = scale; 2078 gs_info->memory_type = memory_type; 2079 return true; 2080 } 2081 2082 if (overflow && dump_enabled_p ()) 2083 dump_printf_loc (MSG_NOTE, vect_location, 2084 "truncating gather/scatter offset to %d bits" 2085 " might change its value.\n", element_bits); 2086 2087 return false; 2088 } 2089 2090 /* Return true if we can use gather/scatter internal functions to 2091 vectorize STMT_INFO, which is a grouped or strided load or store. 2092 MASKED_P is true if load or store is conditional. When returning 2093 true, fill in GS_INFO with the information required to perform the 2094 operation. */ 2095 2096 static bool 2097 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, 2098 loop_vec_info loop_vinfo, bool masked_p, 2099 gather_scatter_info *gs_info) 2100 { 2101 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info) 2102 || gs_info->decl) 2103 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo, 2104 masked_p, gs_info); 2105 2106 tree old_offset_type = TREE_TYPE (gs_info->offset); 2107 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); 2108 2109 gcc_assert (TYPE_PRECISION (new_offset_type) 2110 >= TYPE_PRECISION (old_offset_type)); 2111 gs_info->offset = fold_convert (new_offset_type, gs_info->offset); 2112 2113 if (dump_enabled_p ()) 2114 dump_printf_loc (MSG_NOTE, vect_location, 2115 "using gather/scatter for strided/grouped access," 2116 " scale = %d\n", gs_info->scale); 2117 2118 return true; 2119 } 2120 2121 /* STMT_INFO is a non-strided load or store, meaning that it accesses 2122 elements with a known constant step. Return -1 if that step 2123 is negative, 0 if it is zero, and 1 if it is greater than zero. */ 2124 2125 static int 2126 compare_step_with_zero (stmt_vec_info stmt_info) 2127 { 2128 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 2129 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step, 2130 size_zero_node); 2131 } 2132 2133 /* If the target supports a permute mask that reverses the elements in 2134 a vector of type VECTYPE, return that mask, otherwise return null. */ 2135 2136 static tree 2137 perm_mask_for_reverse (tree vectype) 2138 { 2139 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2140 2141 /* The encoding has a single stepped pattern. */ 2142 vec_perm_builder sel (nunits, 1, 3); 2143 for (int i = 0; i < 3; ++i) 2144 sel.quick_push (nunits - 1 - i); 2145 2146 vec_perm_indices indices (sel, 1, nunits); 2147 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) 2148 return NULL_TREE; 2149 return vect_gen_perm_mask_checked (vectype, indices); 2150 } 2151 2152 /* A subroutine of get_load_store_type, with a subset of the same 2153 arguments. Handle the case where STMT_INFO is a load or store that 2154 accesses consecutive elements with a negative step. */ 2155 2156 static vect_memory_access_type 2157 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, 2158 vec_load_store_type vls_type, 2159 unsigned int ncopies) 2160 { 2161 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 2162 dr_alignment_support alignment_support_scheme; 2163 2164 if (ncopies > 1) 2165 { 2166 if (dump_enabled_p ()) 2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2168 "multiple types with negative step.\n"); 2169 return VMAT_ELEMENTWISE; 2170 } 2171 2172 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); 2173 if (alignment_support_scheme != dr_aligned 2174 && alignment_support_scheme != dr_unaligned_supported) 2175 { 2176 if (dump_enabled_p ()) 2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2178 "negative step but alignment required.\n"); 2179 return VMAT_ELEMENTWISE; 2180 } 2181 2182 if (vls_type == VLS_STORE_INVARIANT) 2183 { 2184 if (dump_enabled_p ()) 2185 dump_printf_loc (MSG_NOTE, vect_location, 2186 "negative step with invariant source;" 2187 " no permute needed.\n"); 2188 return VMAT_CONTIGUOUS_DOWN; 2189 } 2190 2191 if (!perm_mask_for_reverse (vectype)) 2192 { 2193 if (dump_enabled_p ()) 2194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2195 "negative step and reversing not supported.\n"); 2196 return VMAT_ELEMENTWISE; 2197 } 2198 2199 return VMAT_CONTIGUOUS_REVERSE; 2200 } 2201 2202 /* STMT_INFO is either a masked or unconditional store. Return the value 2203 being stored. */ 2204 2205 tree 2206 vect_get_store_rhs (stmt_vec_info stmt_info) 2207 { 2208 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) 2209 { 2210 gcc_assert (gimple_assign_single_p (assign)); 2211 return gimple_assign_rhs1 (assign); 2212 } 2213 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt)) 2214 { 2215 internal_fn ifn = gimple_call_internal_fn (call); 2216 int index = internal_fn_stored_value_index (ifn); 2217 gcc_assert (index >= 0); 2218 return gimple_call_arg (call, index); 2219 } 2220 gcc_unreachable (); 2221 } 2222 2223 /* Function VECTOR_VECTOR_COMPOSITION_TYPE 2224 2225 This function returns a vector type which can be composed with NETLS pieces, 2226 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the 2227 same vector size as the return vector. It checks target whether supports 2228 pieces-size vector mode for construction firstly, if target fails to, check 2229 pieces-size scalar mode for construction further. It returns NULL_TREE if 2230 fails to find the available composition. 2231 2232 For example, for (vtype=V16QI, nelts=4), we can probably get: 2233 - V16QI with PTYPE V4QI. 2234 - V4SI with PTYPE SI. 2235 - NULL_TREE. */ 2236 2237 static tree 2238 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype) 2239 { 2240 gcc_assert (VECTOR_TYPE_P (vtype)); 2241 gcc_assert (known_gt (nelts, 0U)); 2242 2243 machine_mode vmode = TYPE_MODE (vtype); 2244 if (!VECTOR_MODE_P (vmode)) 2245 return NULL_TREE; 2246 2247 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode); 2248 unsigned int pbsize; 2249 if (constant_multiple_p (vbsize, nelts, &pbsize)) 2250 { 2251 /* First check if vec_init optab supports construction from 2252 vector pieces directly. */ 2253 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype)); 2254 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode); 2255 machine_mode rmode; 2256 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode) 2257 && (convert_optab_handler (vec_init_optab, vmode, rmode) 2258 != CODE_FOR_nothing)) 2259 { 2260 *ptype = build_vector_type (TREE_TYPE (vtype), inelts); 2261 return vtype; 2262 } 2263 2264 /* Otherwise check if exists an integer type of the same piece size and 2265 if vec_init optab supports construction from it directly. */ 2266 if (int_mode_for_size (pbsize, 0).exists (&elmode) 2267 && related_vector_mode (vmode, elmode, nelts).exists (&rmode) 2268 && (convert_optab_handler (vec_init_optab, rmode, elmode) 2269 != CODE_FOR_nothing)) 2270 { 2271 *ptype = build_nonstandard_integer_type (pbsize, 1); 2272 return build_vector_type (*ptype, nelts); 2273 } 2274 } 2275 2276 return NULL_TREE; 2277 } 2278 2279 /* A subroutine of get_load_store_type, with a subset of the same 2280 arguments. Handle the case where STMT_INFO is part of a grouped load 2281 or store. 2282 2283 For stores, the statements in the group are all consecutive 2284 and there is no gap at the end. For loads, the statements in the 2285 group might not be consecutive; there can be gaps between statements 2286 as well as at the end. */ 2287 2288 static bool 2289 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, 2290 bool masked_p, vec_load_store_type vls_type, 2291 vect_memory_access_type *memory_access_type, 2292 gather_scatter_info *gs_info) 2293 { 2294 vec_info *vinfo = stmt_info->vinfo; 2295 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2296 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; 2297 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 2298 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 2299 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info); 2300 bool single_element_p = (stmt_info == first_stmt_info 2301 && !DR_GROUP_NEXT_ELEMENT (stmt_info)); 2302 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info); 2303 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2304 2305 /* True if the vectorized statements would access beyond the last 2306 statement in the group. */ 2307 bool overrun_p = false; 2308 2309 /* True if we can cope with such overrun by peeling for gaps, so that 2310 there is at least one final scalar iteration after the vector loop. */ 2311 bool can_overrun_p = (!masked_p 2312 && vls_type == VLS_LOAD 2313 && loop_vinfo 2314 && !loop->inner); 2315 2316 /* There can only be a gap at the end of the group if the stride is 2317 known at compile time. */ 2318 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0); 2319 2320 /* Stores can't yet have gaps. */ 2321 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0); 2322 2323 if (slp) 2324 { 2325 if (STMT_VINFO_STRIDED_P (first_stmt_info)) 2326 { 2327 /* Try to use consecutive accesses of DR_GROUP_SIZE elements, 2328 separated by the stride, until we have a complete vector. 2329 Fall back to scalar accesses if that isn't possible. */ 2330 if (multiple_p (nunits, group_size)) 2331 *memory_access_type = VMAT_STRIDED_SLP; 2332 else 2333 *memory_access_type = VMAT_ELEMENTWISE; 2334 } 2335 else 2336 { 2337 overrun_p = loop_vinfo && gap != 0; 2338 if (overrun_p && vls_type != VLS_LOAD) 2339 { 2340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2341 "Grouped store with gaps requires" 2342 " non-consecutive accesses\n"); 2343 return false; 2344 } 2345 /* An overrun is fine if the trailing elements are smaller 2346 than the alignment boundary B. Every vector access will 2347 be a multiple of B and so we are guaranteed to access a 2348 non-gap element in the same B-sized block. */ 2349 if (overrun_p 2350 && gap < (vect_known_alignment_in_bytes (first_dr_info) 2351 / vect_get_scalar_dr_size (first_dr_info))) 2352 overrun_p = false; 2353 2354 /* If the gap splits the vector in half and the target 2355 can do half-vector operations avoid the epilogue peeling 2356 by simply loading half of the vector only. Usually 2357 the construction with an upper zero half will be elided. */ 2358 dr_alignment_support alignment_support_scheme; 2359 tree half_vtype; 2360 if (overrun_p 2361 && !masked_p 2362 && (((alignment_support_scheme 2363 = vect_supportable_dr_alignment (first_dr_info, false))) 2364 == dr_aligned 2365 || alignment_support_scheme == dr_unaligned_supported) 2366 && known_eq (nunits, (group_size - gap) * 2) 2367 && known_eq (nunits, group_size) 2368 && (vector_vector_composition_type (vectype, 2, &half_vtype) 2369 != NULL_TREE)) 2370 overrun_p = false; 2371 2372 if (overrun_p && !can_overrun_p) 2373 { 2374 if (dump_enabled_p ()) 2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2376 "Peeling for outer loop is not supported\n"); 2377 return false; 2378 } 2379 int cmp = compare_step_with_zero (stmt_info); 2380 if (cmp < 0) 2381 *memory_access_type = get_negative_load_store_type 2382 (stmt_info, vectype, vls_type, 1); 2383 else 2384 { 2385 gcc_assert (!loop_vinfo || cmp > 0); 2386 *memory_access_type = VMAT_CONTIGUOUS; 2387 } 2388 } 2389 } 2390 else 2391 { 2392 /* We can always handle this case using elementwise accesses, 2393 but see if something more efficient is available. */ 2394 *memory_access_type = VMAT_ELEMENTWISE; 2395 2396 /* If there is a gap at the end of the group then these optimizations 2397 would access excess elements in the last iteration. */ 2398 bool would_overrun_p = (gap != 0); 2399 /* An overrun is fine if the trailing elements are smaller than the 2400 alignment boundary B. Every vector access will be a multiple of B 2401 and so we are guaranteed to access a non-gap element in the 2402 same B-sized block. */ 2403 if (would_overrun_p 2404 && !masked_p 2405 && gap < (vect_known_alignment_in_bytes (first_dr_info) 2406 / vect_get_scalar_dr_size (first_dr_info))) 2407 would_overrun_p = false; 2408 2409 if (!STMT_VINFO_STRIDED_P (first_stmt_info) 2410 && (can_overrun_p || !would_overrun_p) 2411 && compare_step_with_zero (stmt_info) > 0) 2412 { 2413 /* First cope with the degenerate case of a single-element 2414 vector. */ 2415 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) 2416 ; 2417 2418 /* Otherwise try using LOAD/STORE_LANES. */ 2419 else if (vls_type == VLS_LOAD 2420 ? vect_load_lanes_supported (vectype, group_size, masked_p) 2421 : vect_store_lanes_supported (vectype, group_size, 2422 masked_p)) 2423 { 2424 *memory_access_type = VMAT_LOAD_STORE_LANES; 2425 overrun_p = would_overrun_p; 2426 } 2427 2428 /* If that fails, try using permuting loads. */ 2429 else if (vls_type == VLS_LOAD 2430 ? vect_grouped_load_supported (vectype, single_element_p, 2431 group_size) 2432 : vect_grouped_store_supported (vectype, group_size)) 2433 { 2434 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; 2435 overrun_p = would_overrun_p; 2436 } 2437 } 2438 2439 /* As a last resort, trying using a gather load or scatter store. 2440 2441 ??? Although the code can handle all group sizes correctly, 2442 it probably isn't a win to use separate strided accesses based 2443 on nearby locations. Or, even if it's a win over scalar code, 2444 it might not be a win over vectorizing at a lower VF, if that 2445 allows us to use contiguous accesses. */ 2446 if (*memory_access_type == VMAT_ELEMENTWISE 2447 && single_element_p 2448 && loop_vinfo 2449 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, 2450 masked_p, gs_info)) 2451 *memory_access_type = VMAT_GATHER_SCATTER; 2452 } 2453 2454 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info) 2455 { 2456 /* STMT is the leader of the group. Check the operands of all the 2457 stmts of the group. */ 2458 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info); 2459 while (next_stmt_info) 2460 { 2461 tree op = vect_get_store_rhs (next_stmt_info); 2462 enum vect_def_type dt; 2463 if (!vect_is_simple_use (op, vinfo, &dt)) 2464 { 2465 if (dump_enabled_p ()) 2466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2467 "use not simple.\n"); 2468 return false; 2469 } 2470 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 2471 } 2472 } 2473 2474 if (overrun_p) 2475 { 2476 gcc_assert (can_overrun_p); 2477 if (dump_enabled_p ()) 2478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2479 "Data access with gaps requires scalar " 2480 "epilogue loop\n"); 2481 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; 2482 } 2483 2484 return true; 2485 } 2486 2487 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true 2488 if there is a memory access type that the vectorized form can use, 2489 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers 2490 or scatters, fill in GS_INFO accordingly. 2491 2492 SLP says whether we're performing SLP rather than loop vectorization. 2493 MASKED_P is true if the statement is conditional on a vectorized mask. 2494 VECTYPE is the vector type that the vectorized statements will use. 2495 NCOPIES is the number of vector statements that will be needed. */ 2496 2497 static bool 2498 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, 2499 bool masked_p, vec_load_store_type vls_type, 2500 unsigned int ncopies, 2501 vect_memory_access_type *memory_access_type, 2502 gather_scatter_info *gs_info) 2503 { 2504 vec_info *vinfo = stmt_info->vinfo; 2505 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2506 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2507 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 2508 { 2509 *memory_access_type = VMAT_GATHER_SCATTER; 2510 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)) 2511 gcc_unreachable (); 2512 else if (!vect_is_simple_use (gs_info->offset, vinfo, 2513 &gs_info->offset_dt, 2514 &gs_info->offset_vectype)) 2515 { 2516 if (dump_enabled_p ()) 2517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2518 "%s index use not simple.\n", 2519 vls_type == VLS_LOAD ? "gather" : "scatter"); 2520 return false; 2521 } 2522 } 2523 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 2524 { 2525 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p, 2526 vls_type, memory_access_type, gs_info)) 2527 return false; 2528 } 2529 else if (STMT_VINFO_STRIDED_P (stmt_info)) 2530 { 2531 gcc_assert (!slp); 2532 if (loop_vinfo 2533 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, 2534 masked_p, gs_info)) 2535 *memory_access_type = VMAT_GATHER_SCATTER; 2536 else 2537 *memory_access_type = VMAT_ELEMENTWISE; 2538 } 2539 else 2540 { 2541 int cmp = compare_step_with_zero (stmt_info); 2542 if (cmp < 0) 2543 *memory_access_type = get_negative_load_store_type 2544 (stmt_info, vectype, vls_type, ncopies); 2545 else if (cmp == 0) 2546 { 2547 gcc_assert (vls_type == VLS_LOAD); 2548 *memory_access_type = VMAT_INVARIANT; 2549 } 2550 else 2551 *memory_access_type = VMAT_CONTIGUOUS; 2552 } 2553 2554 if ((*memory_access_type == VMAT_ELEMENTWISE 2555 || *memory_access_type == VMAT_STRIDED_SLP) 2556 && !nunits.is_constant ()) 2557 { 2558 if (dump_enabled_p ()) 2559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2560 "Not using elementwise accesses due to variable " 2561 "vectorization factor.\n"); 2562 return false; 2563 } 2564 2565 /* FIXME: At the moment the cost model seems to underestimate the 2566 cost of using elementwise accesses. This check preserves the 2567 traditional behavior until that can be fixed. */ 2568 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 2569 if (!first_stmt_info) 2570 first_stmt_info = stmt_info; 2571 if (*memory_access_type == VMAT_ELEMENTWISE 2572 && !STMT_VINFO_STRIDED_P (first_stmt_info) 2573 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info) 2574 && !DR_GROUP_NEXT_ELEMENT (stmt_info) 2575 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info)))) 2576 { 2577 if (dump_enabled_p ()) 2578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2579 "not falling back to elementwise accesses\n"); 2580 return false; 2581 } 2582 return true; 2583 } 2584 2585 /* Return true if boolean argument MASK is suitable for vectorizing 2586 conditional operation STMT_INFO. When returning true, store the type 2587 of the definition in *MASK_DT_OUT and the type of the vectorized mask 2588 in *MASK_VECTYPE_OUT. */ 2589 2590 static bool 2591 vect_check_scalar_mask (stmt_vec_info stmt_info, tree mask, 2592 vect_def_type *mask_dt_out, 2593 tree *mask_vectype_out) 2594 { 2595 vec_info *vinfo = stmt_info->vinfo; 2596 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask))) 2597 { 2598 if (dump_enabled_p ()) 2599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2600 "mask argument is not a boolean.\n"); 2601 return false; 2602 } 2603 2604 if (TREE_CODE (mask) != SSA_NAME) 2605 { 2606 if (dump_enabled_p ()) 2607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2608 "mask argument is not an SSA name.\n"); 2609 return false; 2610 } 2611 2612 enum vect_def_type mask_dt; 2613 tree mask_vectype; 2614 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype)) 2615 { 2616 if (dump_enabled_p ()) 2617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2618 "mask use not simple.\n"); 2619 return false; 2620 } 2621 2622 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2623 if (!mask_vectype) 2624 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype)); 2625 2626 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)) 2627 { 2628 if (dump_enabled_p ()) 2629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2630 "could not find an appropriate vector mask type.\n"); 2631 return false; 2632 } 2633 2634 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype), 2635 TYPE_VECTOR_SUBPARTS (vectype))) 2636 { 2637 if (dump_enabled_p ()) 2638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2639 "vector mask type %T" 2640 " does not match vector data type %T.\n", 2641 mask_vectype, vectype); 2642 2643 return false; 2644 } 2645 2646 *mask_dt_out = mask_dt; 2647 *mask_vectype_out = mask_vectype; 2648 return true; 2649 } 2650 2651 /* Return true if stored value RHS is suitable for vectorizing store 2652 statement STMT_INFO. When returning true, store the type of the 2653 definition in *RHS_DT_OUT, the type of the vectorized store value in 2654 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */ 2655 2656 static bool 2657 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs, 2658 vect_def_type *rhs_dt_out, tree *rhs_vectype_out, 2659 vec_load_store_type *vls_type_out) 2660 { 2661 /* In the case this is a store from a constant make sure 2662 native_encode_expr can handle it. */ 2663 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0) 2664 { 2665 if (dump_enabled_p ()) 2666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2667 "cannot encode constant as a byte sequence.\n"); 2668 return false; 2669 } 2670 2671 enum vect_def_type rhs_dt; 2672 tree rhs_vectype; 2673 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype)) 2674 { 2675 if (dump_enabled_p ()) 2676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2677 "use not simple.\n"); 2678 return false; 2679 } 2680 2681 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2682 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype)) 2683 { 2684 if (dump_enabled_p ()) 2685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2686 "incompatible vector types.\n"); 2687 return false; 2688 } 2689 2690 *rhs_dt_out = rhs_dt; 2691 *rhs_vectype_out = rhs_vectype; 2692 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def) 2693 *vls_type_out = VLS_STORE_INVARIANT; 2694 else 2695 *vls_type_out = VLS_STORE; 2696 return true; 2697 } 2698 2699 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO. 2700 Note that we support masks with floating-point type, in which case the 2701 floats are interpreted as a bitmask. */ 2702 2703 static tree 2704 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype) 2705 { 2706 if (TREE_CODE (masktype) == INTEGER_TYPE) 2707 return build_int_cst (masktype, -1); 2708 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) 2709 { 2710 tree mask = build_int_cst (TREE_TYPE (masktype), -1); 2711 mask = build_vector_from_val (masktype, mask); 2712 return vect_init_vector (stmt_info, mask, masktype, NULL); 2713 } 2714 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) 2715 { 2716 REAL_VALUE_TYPE r; 2717 long tmp[6]; 2718 for (int j = 0; j < 6; ++j) 2719 tmp[j] = -1; 2720 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); 2721 tree mask = build_real (TREE_TYPE (masktype), r); 2722 mask = build_vector_from_val (masktype, mask); 2723 return vect_init_vector (stmt_info, mask, masktype, NULL); 2724 } 2725 gcc_unreachable (); 2726 } 2727 2728 /* Build an all-zero merge value of type VECTYPE while vectorizing 2729 STMT_INFO as a gather load. */ 2730 2731 static tree 2732 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype) 2733 { 2734 tree merge; 2735 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE) 2736 merge = build_int_cst (TREE_TYPE (vectype), 0); 2737 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype))) 2738 { 2739 REAL_VALUE_TYPE r; 2740 long tmp[6]; 2741 for (int j = 0; j < 6; ++j) 2742 tmp[j] = 0; 2743 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype))); 2744 merge = build_real (TREE_TYPE (vectype), r); 2745 } 2746 else 2747 gcc_unreachable (); 2748 merge = build_vector_from_val (vectype, merge); 2749 return vect_init_vector (stmt_info, merge, vectype, NULL); 2750 } 2751 2752 /* Build a gather load call while vectorizing STMT_INFO. Insert new 2753 instructions before GSI and add them to VEC_STMT. GS_INFO describes 2754 the gather load operation. If the load is conditional, MASK is the 2755 unvectorized condition and MASK_DT is its definition type, otherwise 2756 MASK is null. */ 2757 2758 static void 2759 vect_build_gather_load_calls (stmt_vec_info stmt_info, 2760 gimple_stmt_iterator *gsi, 2761 stmt_vec_info *vec_stmt, 2762 gather_scatter_info *gs_info, 2763 tree mask) 2764 { 2765 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2766 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2767 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2768 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2769 int ncopies = vect_get_num_copies (loop_vinfo, vectype); 2770 edge pe = loop_preheader_edge (loop); 2771 enum { NARROW, NONE, WIDEN } modifier; 2772 poly_uint64 gather_off_nunits 2773 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype); 2774 2775 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); 2776 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); 2777 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2778 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2779 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2780 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2781 tree scaletype = TREE_VALUE (arglist); 2782 tree real_masktype = masktype; 2783 gcc_checking_assert (types_compatible_p (srctype, rettype) 2784 && (!mask 2785 || TREE_CODE (masktype) == INTEGER_TYPE 2786 || types_compatible_p (srctype, masktype))); 2787 if (mask && TREE_CODE (masktype) == INTEGER_TYPE) 2788 masktype = truth_type_for (srctype); 2789 2790 tree mask_halftype = masktype; 2791 tree perm_mask = NULL_TREE; 2792 tree mask_perm_mask = NULL_TREE; 2793 if (known_eq (nunits, gather_off_nunits)) 2794 modifier = NONE; 2795 else if (known_eq (nunits * 2, gather_off_nunits)) 2796 { 2797 modifier = WIDEN; 2798 2799 /* Currently widening gathers and scatters are only supported for 2800 fixed-length vectors. */ 2801 int count = gather_off_nunits.to_constant (); 2802 vec_perm_builder sel (count, count, 1); 2803 for (int i = 0; i < count; ++i) 2804 sel.quick_push (i | (count / 2)); 2805 2806 vec_perm_indices indices (sel, 1, count); 2807 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, 2808 indices); 2809 } 2810 else if (known_eq (nunits, gather_off_nunits * 2)) 2811 { 2812 modifier = NARROW; 2813 2814 /* Currently narrowing gathers and scatters are only supported for 2815 fixed-length vectors. */ 2816 int count = nunits.to_constant (); 2817 vec_perm_builder sel (count, count, 1); 2818 sel.quick_grow (count); 2819 for (int i = 0; i < count; ++i) 2820 sel[i] = i < count / 2 ? i : i + count / 2; 2821 vec_perm_indices indices (sel, 2, count); 2822 perm_mask = vect_gen_perm_mask_checked (vectype, indices); 2823 2824 ncopies *= 2; 2825 2826 if (mask && masktype == real_masktype) 2827 { 2828 for (int i = 0; i < count; ++i) 2829 sel[i] = i | (count / 2); 2830 indices.new_vector (sel, 2, count); 2831 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices); 2832 } 2833 else if (mask) 2834 mask_halftype = truth_type_for (gs_info->offset_vectype); 2835 } 2836 else 2837 gcc_unreachable (); 2838 2839 tree scalar_dest = gimple_get_lhs (stmt_info->stmt); 2840 tree vec_dest = vect_create_destination_var (scalar_dest, vectype); 2841 2842 tree ptr = fold_convert (ptrtype, gs_info->base); 2843 if (!is_gimple_min_invariant (ptr)) 2844 { 2845 gimple_seq seq; 2846 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 2847 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 2848 gcc_assert (!new_bb); 2849 } 2850 2851 tree scale = build_int_cst (scaletype, gs_info->scale); 2852 2853 tree vec_oprnd0 = NULL_TREE; 2854 tree vec_mask = NULL_TREE; 2855 tree src_op = NULL_TREE; 2856 tree mask_op = NULL_TREE; 2857 tree prev_res = NULL_TREE; 2858 stmt_vec_info prev_stmt_info = NULL; 2859 2860 if (!mask) 2861 { 2862 src_op = vect_build_zero_merge_argument (stmt_info, rettype); 2863 mask_op = vect_build_all_ones_mask (stmt_info, masktype); 2864 } 2865 2866 for (int j = 0; j < ncopies; ++j) 2867 { 2868 tree op, var; 2869 if (modifier == WIDEN && (j & 1)) 2870 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, 2871 perm_mask, stmt_info, gsi); 2872 else if (j == 0) 2873 op = vec_oprnd0 2874 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info); 2875 else 2876 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo, 2877 vec_oprnd0); 2878 2879 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 2880 { 2881 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)), 2882 TYPE_VECTOR_SUBPARTS (idxtype))); 2883 var = vect_get_new_ssa_name (idxtype, vect_simple_var); 2884 op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 2885 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 2886 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 2887 op = var; 2888 } 2889 2890 if (mask) 2891 { 2892 if (mask_perm_mask && (j & 1)) 2893 mask_op = permute_vec_elements (mask_op, mask_op, 2894 mask_perm_mask, stmt_info, gsi); 2895 else 2896 { 2897 if (j == 0) 2898 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info); 2899 else if (modifier != NARROW || (j & 1) == 0) 2900 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo, 2901 vec_mask); 2902 2903 mask_op = vec_mask; 2904 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask))) 2905 { 2906 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)); 2907 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype); 2908 gcc_assert (known_eq (sub1, sub2)); 2909 var = vect_get_new_ssa_name (masktype, vect_simple_var); 2910 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); 2911 gassign *new_stmt 2912 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op); 2913 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 2914 mask_op = var; 2915 } 2916 } 2917 if (modifier == NARROW && masktype != real_masktype) 2918 { 2919 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var); 2920 gassign *new_stmt 2921 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR 2922 : VEC_UNPACK_LO_EXPR, 2923 mask_op); 2924 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 2925 mask_op = var; 2926 } 2927 src_op = mask_op; 2928 } 2929 2930 tree mask_arg = mask_op; 2931 if (masktype != real_masktype) 2932 { 2933 tree utype, optype = TREE_TYPE (mask_op); 2934 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype)) 2935 utype = real_masktype; 2936 else 2937 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1); 2938 var = vect_get_new_ssa_name (utype, vect_scalar_var); 2939 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op); 2940 gassign *new_stmt 2941 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg); 2942 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 2943 mask_arg = var; 2944 if (!useless_type_conversion_p (real_masktype, utype)) 2945 { 2946 gcc_assert (TYPE_PRECISION (utype) 2947 <= TYPE_PRECISION (real_masktype)); 2948 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var); 2949 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg); 2950 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 2951 mask_arg = var; 2952 } 2953 src_op = build_zero_cst (srctype); 2954 } 2955 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, 2956 mask_arg, scale); 2957 2958 stmt_vec_info new_stmt_info; 2959 if (!useless_type_conversion_p (vectype, rettype)) 2960 { 2961 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2962 TYPE_VECTOR_SUBPARTS (rettype))); 2963 op = vect_get_new_ssa_name (rettype, vect_simple_var); 2964 gimple_call_set_lhs (new_call, op); 2965 vect_finish_stmt_generation (stmt_info, new_call, gsi); 2966 var = make_ssa_name (vec_dest); 2967 op = build1 (VIEW_CONVERT_EXPR, vectype, op); 2968 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 2969 new_stmt_info 2970 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 2971 } 2972 else 2973 { 2974 var = make_ssa_name (vec_dest, new_call); 2975 gimple_call_set_lhs (new_call, var); 2976 new_stmt_info 2977 = vect_finish_stmt_generation (stmt_info, new_call, gsi); 2978 } 2979 2980 if (modifier == NARROW) 2981 { 2982 if ((j & 1) == 0) 2983 { 2984 prev_res = var; 2985 continue; 2986 } 2987 var = permute_vec_elements (prev_res, var, perm_mask, 2988 stmt_info, gsi); 2989 new_stmt_info = loop_vinfo->lookup_def (var); 2990 } 2991 2992 if (prev_stmt_info == NULL) 2993 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 2994 else 2995 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 2996 prev_stmt_info = new_stmt_info; 2997 } 2998 } 2999 3000 /* Prepare the base and offset in GS_INFO for vectorization. 3001 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET 3002 to the vectorized offset argument for the first copy of STMT_INFO. 3003 STMT_INFO is the statement described by GS_INFO and LOOP is the 3004 containing loop. */ 3005 3006 static void 3007 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info, 3008 gather_scatter_info *gs_info, 3009 tree *dataref_ptr, tree *vec_offset) 3010 { 3011 gimple_seq stmts = NULL; 3012 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE); 3013 if (stmts != NULL) 3014 { 3015 basic_block new_bb; 3016 edge pe = loop_preheader_edge (loop); 3017 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 3018 gcc_assert (!new_bb); 3019 } 3020 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info, 3021 gs_info->offset_vectype); 3022 } 3023 3024 /* Prepare to implement a grouped or strided load or store using 3025 the gather load or scatter store operation described by GS_INFO. 3026 STMT_INFO is the load or store statement. 3027 3028 Set *DATAREF_BUMP to the amount that should be added to the base 3029 address after each copy of the vectorized statement. Set *VEC_OFFSET 3030 to an invariant offset vector in which element I has the value 3031 I * DR_STEP / SCALE. */ 3032 3033 static void 3034 vect_get_strided_load_store_ops (stmt_vec_info stmt_info, 3035 loop_vec_info loop_vinfo, 3036 gather_scatter_info *gs_info, 3037 tree *dataref_bump, tree *vec_offset) 3038 { 3039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 3040 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 3041 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3042 gimple_seq stmts; 3043 3044 tree bump = size_binop (MULT_EXPR, 3045 fold_convert (sizetype, unshare_expr (DR_STEP (dr))), 3046 size_int (TYPE_VECTOR_SUBPARTS (vectype))); 3047 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE); 3048 if (stmts) 3049 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 3050 3051 /* The offset given in GS_INFO can have pointer type, so use the element 3052 type of the vector instead. */ 3053 tree offset_type = TREE_TYPE (gs_info->offset); 3054 offset_type = TREE_TYPE (gs_info->offset_vectype); 3055 3056 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */ 3057 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)), 3058 ssize_int (gs_info->scale)); 3059 step = fold_convert (offset_type, step); 3060 step = force_gimple_operand (step, &stmts, true, NULL_TREE); 3061 3062 /* Create {0, X, X*2, X*3, ...}. */ 3063 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype, 3064 build_zero_cst (offset_type), step); 3065 if (stmts) 3066 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 3067 } 3068 3069 /* Return the amount that should be added to a vector pointer to move 3070 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference 3071 being vectorized and MEMORY_ACCESS_TYPE describes the type of 3072 vectorization. */ 3073 3074 static tree 3075 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type, 3076 vect_memory_access_type memory_access_type) 3077 { 3078 if (memory_access_type == VMAT_INVARIANT) 3079 return size_zero_node; 3080 3081 tree iv_step = TYPE_SIZE_UNIT (aggr_type); 3082 tree step = vect_dr_behavior (dr_info)->step; 3083 if (tree_int_cst_sgn (step) == -1) 3084 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step); 3085 return iv_step; 3086 } 3087 3088 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */ 3089 3090 static bool 3091 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 3092 stmt_vec_info *vec_stmt, slp_tree slp_node, 3093 tree vectype_in, stmt_vector_for_cost *cost_vec) 3094 { 3095 tree op, vectype; 3096 gcall *stmt = as_a <gcall *> (stmt_info->stmt); 3097 vec_info *vinfo = stmt_info->vinfo; 3098 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3099 unsigned ncopies; 3100 3101 op = gimple_call_arg (stmt, 0); 3102 vectype = STMT_VINFO_VECTYPE (stmt_info); 3103 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 3104 3105 /* Multiple types in SLP are handled by creating the appropriate number of 3106 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 3107 case of SLP. */ 3108 if (slp_node) 3109 ncopies = 1; 3110 else 3111 ncopies = vect_get_num_copies (loop_vinfo, vectype); 3112 3113 gcc_assert (ncopies >= 1); 3114 3115 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in); 3116 if (! char_vectype) 3117 return false; 3118 3119 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype); 3120 unsigned word_bytes; 3121 if (!constant_multiple_p (num_bytes, nunits, &word_bytes)) 3122 return false; 3123 3124 /* The encoding uses one stepped pattern for each byte in the word. */ 3125 vec_perm_builder elts (num_bytes, word_bytes, 3); 3126 for (unsigned i = 0; i < 3; ++i) 3127 for (unsigned j = 0; j < word_bytes; ++j) 3128 elts.quick_push ((i + 1) * word_bytes - j - 1); 3129 3130 vec_perm_indices indices (elts, 1, num_bytes); 3131 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) 3132 return false; 3133 3134 if (! vec_stmt) 3135 { 3136 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 3137 DUMP_VECT_SCOPE ("vectorizable_bswap"); 3138 if (! slp_node) 3139 { 3140 record_stmt_cost (cost_vec, 3141 1, vector_stmt, stmt_info, 0, vect_prologue); 3142 record_stmt_cost (cost_vec, 3143 ncopies, vec_perm, stmt_info, 0, vect_body); 3144 } 3145 return true; 3146 } 3147 3148 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices); 3149 3150 /* Transform. */ 3151 vec<tree> vec_oprnds = vNULL; 3152 stmt_vec_info new_stmt_info = NULL; 3153 stmt_vec_info prev_stmt_info = NULL; 3154 for (unsigned j = 0; j < ncopies; j++) 3155 { 3156 /* Handle uses. */ 3157 if (j == 0) 3158 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node); 3159 else 3160 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL); 3161 3162 /* Arguments are ready. create the new vector stmt. */ 3163 unsigned i; 3164 tree vop; 3165 FOR_EACH_VEC_ELT (vec_oprnds, i, vop) 3166 { 3167 gimple *new_stmt; 3168 tree tem = make_ssa_name (char_vectype); 3169 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 3170 char_vectype, vop)); 3171 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 3172 tree tem2 = make_ssa_name (char_vectype); 3173 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR, 3174 tem, tem, bswap_vconst); 3175 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 3176 tem = make_ssa_name (vectype); 3177 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 3178 vectype, tem2)); 3179 new_stmt_info 3180 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 3181 if (slp_node) 3182 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 3183 } 3184 3185 if (slp_node) 3186 continue; 3187 3188 if (j == 0) 3189 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 3190 else 3191 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 3192 3193 prev_stmt_info = new_stmt_info; 3194 } 3195 3196 vec_oprnds.release (); 3197 return true; 3198 } 3199 3200 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have 3201 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT 3202 in a single step. On success, store the binary pack code in 3203 *CONVERT_CODE. */ 3204 3205 static bool 3206 simple_integer_narrowing (tree vectype_out, tree vectype_in, 3207 tree_code *convert_code) 3208 { 3209 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out)) 3210 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in))) 3211 return false; 3212 3213 tree_code code; 3214 int multi_step_cvt = 0; 3215 auto_vec <tree, 8> interm_types; 3216 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in, 3217 &code, &multi_step_cvt, &interm_types) 3218 || multi_step_cvt) 3219 return false; 3220 3221 *convert_code = code; 3222 return true; 3223 } 3224 3225 /* Function vectorizable_call. 3226 3227 Check if STMT_INFO performs a function call that can be vectorized. 3228 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 3229 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 3230 Return true if STMT_INFO is vectorizable in this way. */ 3231 3232 static bool 3233 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 3234 stmt_vec_info *vec_stmt, slp_tree slp_node, 3235 stmt_vector_for_cost *cost_vec) 3236 { 3237 gcall *stmt; 3238 tree vec_dest; 3239 tree scalar_dest; 3240 tree op; 3241 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 3242 stmt_vec_info prev_stmt_info; 3243 tree vectype_out, vectype_in; 3244 poly_uint64 nunits_in; 3245 poly_uint64 nunits_out; 3246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3247 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3248 vec_info *vinfo = stmt_info->vinfo; 3249 tree fndecl, new_temp, rhs_type; 3250 enum vect_def_type dt[4] 3251 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type, 3252 vect_unknown_def_type }; 3253 tree vectypes[ARRAY_SIZE (dt)] = {}; 3254 int ndts = ARRAY_SIZE (dt); 3255 int ncopies, j; 3256 auto_vec<tree, 8> vargs; 3257 auto_vec<tree, 8> orig_vargs; 3258 enum { NARROW, NONE, WIDEN } modifier; 3259 size_t i, nargs; 3260 tree lhs; 3261 3262 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3263 return false; 3264 3265 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 3266 && ! vec_stmt) 3267 return false; 3268 3269 /* Is STMT_INFO a vectorizable call? */ 3270 stmt = dyn_cast <gcall *> (stmt_info->stmt); 3271 if (!stmt) 3272 return false; 3273 3274 if (gimple_call_internal_p (stmt) 3275 && (internal_load_fn_p (gimple_call_internal_fn (stmt)) 3276 || internal_store_fn_p (gimple_call_internal_fn (stmt)))) 3277 /* Handled by vectorizable_load and vectorizable_store. */ 3278 return false; 3279 3280 if (gimple_call_lhs (stmt) == NULL_TREE 3281 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 3282 return false; 3283 3284 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt)); 3285 3286 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 3287 3288 /* Process function arguments. */ 3289 rhs_type = NULL_TREE; 3290 vectype_in = NULL_TREE; 3291 nargs = gimple_call_num_args (stmt); 3292 3293 /* Bail out if the function has more than three arguments, we do not have 3294 interesting builtin functions to vectorize with more than two arguments 3295 except for fma. No arguments is also not good. */ 3296 if (nargs == 0 || nargs > 4) 3297 return false; 3298 3299 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */ 3300 combined_fn cfn = gimple_call_combined_fn (stmt); 3301 if (cfn == CFN_GOMP_SIMD_LANE) 3302 { 3303 nargs = 0; 3304 rhs_type = unsigned_type_node; 3305 } 3306 3307 int mask_opno = -1; 3308 if (internal_fn_p (cfn)) 3309 mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); 3310 3311 for (i = 0; i < nargs; i++) 3312 { 3313 op = gimple_call_arg (stmt, i); 3314 3315 if ((int) i == mask_opno) 3316 { 3317 if (!vect_check_scalar_mask (stmt_info, op, &dt[i], &vectypes[i])) 3318 return false; 3319 continue; 3320 } 3321 3322 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i])) 3323 { 3324 if (dump_enabled_p ()) 3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3326 "use not simple.\n"); 3327 return false; 3328 } 3329 3330 /* We can only handle calls with arguments of the same type. */ 3331 if (rhs_type 3332 && !types_compatible_p (rhs_type, TREE_TYPE (op))) 3333 { 3334 if (dump_enabled_p ()) 3335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3336 "argument types differ.\n"); 3337 return false; 3338 } 3339 if (!rhs_type) 3340 rhs_type = TREE_TYPE (op); 3341 3342 if (!vectype_in) 3343 vectype_in = vectypes[i]; 3344 else if (vectypes[i] 3345 && !types_compatible_p (vectypes[i], vectype_in)) 3346 { 3347 if (dump_enabled_p ()) 3348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3349 "argument vector types differ.\n"); 3350 return false; 3351 } 3352 } 3353 /* If all arguments are external or constant defs, infer the vector type 3354 from the scalar type. */ 3355 if (!vectype_in) 3356 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node); 3357 if (vec_stmt) 3358 gcc_assert (vectype_in); 3359 if (!vectype_in) 3360 { 3361 if (dump_enabled_p ()) 3362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3363 "no vectype for scalar type %T\n", rhs_type); 3364 3365 return false; 3366 } 3367 /* FORNOW: we don't yet support mixtures of vector sizes for calls, 3368 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz* 3369 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed 3370 by a pack of the two vectors into an SI vector. We would need 3371 separate code to handle direct VnDI->VnSI IFN_CTZs. */ 3372 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out)) 3373 { 3374 if (dump_enabled_p ()) 3375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3376 "mismatched vector sizes %T and %T\n", 3377 vectype_in, vectype_out); 3378 return false; 3379 } 3380 3381 if (VECTOR_BOOLEAN_TYPE_P (vectype_out) 3382 != VECTOR_BOOLEAN_TYPE_P (vectype_in)) 3383 { 3384 if (dump_enabled_p ()) 3385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3386 "mixed mask and nonmask vector types\n"); 3387 return false; 3388 } 3389 3390 /* FORNOW */ 3391 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 3392 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3393 if (known_eq (nunits_in * 2, nunits_out)) 3394 modifier = NARROW; 3395 else if (known_eq (nunits_out, nunits_in)) 3396 modifier = NONE; 3397 else if (known_eq (nunits_out * 2, nunits_in)) 3398 modifier = WIDEN; 3399 else 3400 return false; 3401 3402 /* We only handle functions that do not read or clobber memory. */ 3403 if (gimple_vuse (stmt)) 3404 { 3405 if (dump_enabled_p ()) 3406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3407 "function reads from or writes to memory.\n"); 3408 return false; 3409 } 3410 3411 /* For now, we only vectorize functions if a target specific builtin 3412 is available. TODO -- in some cases, it might be profitable to 3413 insert the calls for pieces of the vector, in order to be able 3414 to vectorize other operations in the loop. */ 3415 fndecl = NULL_TREE; 3416 internal_fn ifn = IFN_LAST; 3417 tree callee = gimple_call_fndecl (stmt); 3418 3419 /* First try using an internal function. */ 3420 tree_code convert_code = ERROR_MARK; 3421 if (cfn != CFN_LAST 3422 && (modifier == NONE 3423 || (modifier == NARROW 3424 && simple_integer_narrowing (vectype_out, vectype_in, 3425 &convert_code)))) 3426 ifn = vectorizable_internal_function (cfn, callee, vectype_out, 3427 vectype_in); 3428 3429 /* If that fails, try asking for a target-specific built-in function. */ 3430 if (ifn == IFN_LAST) 3431 { 3432 if (cfn != CFN_LAST) 3433 fndecl = targetm.vectorize.builtin_vectorized_function 3434 (cfn, vectype_out, vectype_in); 3435 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD)) 3436 fndecl = targetm.vectorize.builtin_md_vectorized_function 3437 (callee, vectype_out, vectype_in); 3438 } 3439 3440 if (ifn == IFN_LAST && !fndecl) 3441 { 3442 if (cfn == CFN_GOMP_SIMD_LANE 3443 && !slp_node 3444 && loop_vinfo 3445 && LOOP_VINFO_LOOP (loop_vinfo)->simduid 3446 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME 3447 && LOOP_VINFO_LOOP (loop_vinfo)->simduid 3448 == SSA_NAME_VAR (gimple_call_arg (stmt, 0))) 3449 { 3450 /* We can handle IFN_GOMP_SIMD_LANE by returning a 3451 { 0, 1, 2, ... vf - 1 } vector. */ 3452 gcc_assert (nargs == 0); 3453 } 3454 else if (modifier == NONE 3455 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16) 3456 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32) 3457 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64))) 3458 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node, 3459 vectype_in, cost_vec); 3460 else 3461 { 3462 if (dump_enabled_p ()) 3463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3464 "function is not vectorizable.\n"); 3465 return false; 3466 } 3467 } 3468 3469 if (slp_node) 3470 ncopies = 1; 3471 else if (modifier == NARROW && ifn == IFN_LAST) 3472 ncopies = vect_get_num_copies (loop_vinfo, vectype_out); 3473 else 3474 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); 3475 3476 /* Sanity check: make sure that at least one copy of the vectorized stmt 3477 needs to be generated. */ 3478 gcc_assert (ncopies >= 1); 3479 3480 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); 3481 if (!vec_stmt) /* transformation not required. */ 3482 { 3483 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 3484 DUMP_VECT_SCOPE ("vectorizable_call"); 3485 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec); 3486 if (ifn != IFN_LAST && modifier == NARROW && !slp_node) 3487 record_stmt_cost (cost_vec, ncopies / 2, 3488 vec_promote_demote, stmt_info, 0, vect_body); 3489 3490 if (loop_vinfo && mask_opno >= 0) 3491 { 3492 unsigned int nvectors = (slp_node 3493 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) 3494 : ncopies); 3495 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); 3496 vect_record_loop_mask (loop_vinfo, masks, nvectors, 3497 vectype_out, scalar_mask); 3498 } 3499 return true; 3500 } 3501 3502 /* Transform. */ 3503 3504 if (dump_enabled_p ()) 3505 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); 3506 3507 /* Handle def. */ 3508 scalar_dest = gimple_call_lhs (stmt); 3509 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 3510 3511 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); 3512 3513 stmt_vec_info new_stmt_info = NULL; 3514 prev_stmt_info = NULL; 3515 if (modifier == NONE || ifn != IFN_LAST) 3516 { 3517 tree prev_res = NULL_TREE; 3518 vargs.safe_grow (nargs); 3519 orig_vargs.safe_grow (nargs); 3520 for (j = 0; j < ncopies; ++j) 3521 { 3522 /* Build argument list for the vectorized call. */ 3523 if (slp_node) 3524 { 3525 auto_vec<vec<tree> > vec_defs (nargs); 3526 vec<tree> vec_oprnds0; 3527 3528 vect_get_slp_defs (slp_node, &vec_defs); 3529 vec_oprnds0 = vec_defs[0]; 3530 3531 /* Arguments are ready. Create the new vector stmt. */ 3532 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) 3533 { 3534 size_t k; 3535 for (k = 0; k < nargs; k++) 3536 { 3537 vec<tree> vec_oprndsk = vec_defs[k]; 3538 vargs[k] = vec_oprndsk[i]; 3539 } 3540 if (modifier == NARROW) 3541 { 3542 /* We don't define any narrowing conditional functions 3543 at present. */ 3544 gcc_assert (mask_opno < 0); 3545 tree half_res = make_ssa_name (vectype_in); 3546 gcall *call 3547 = gimple_build_call_internal_vec (ifn, vargs); 3548 gimple_call_set_lhs (call, half_res); 3549 gimple_call_set_nothrow (call, true); 3550 vect_finish_stmt_generation (stmt_info, call, gsi); 3551 if ((i & 1) == 0) 3552 { 3553 prev_res = half_res; 3554 continue; 3555 } 3556 new_temp = make_ssa_name (vec_dest); 3557 gimple *new_stmt 3558 = gimple_build_assign (new_temp, convert_code, 3559 prev_res, half_res); 3560 new_stmt_info 3561 = vect_finish_stmt_generation (stmt_info, new_stmt, 3562 gsi); 3563 } 3564 else 3565 { 3566 if (mask_opno >= 0 && masked_loop_p) 3567 { 3568 unsigned int vec_num = vec_oprnds0.length (); 3569 /* Always true for SLP. */ 3570 gcc_assert (ncopies == 1); 3571 tree mask = vect_get_loop_mask (gsi, masks, vec_num, 3572 vectype_out, i); 3573 vargs[mask_opno] = prepare_load_store_mask 3574 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi); 3575 } 3576 3577 gcall *call; 3578 if (ifn != IFN_LAST) 3579 call = gimple_build_call_internal_vec (ifn, vargs); 3580 else 3581 call = gimple_build_call_vec (fndecl, vargs); 3582 new_temp = make_ssa_name (vec_dest, call); 3583 gimple_call_set_lhs (call, new_temp); 3584 gimple_call_set_nothrow (call, true); 3585 new_stmt_info 3586 = vect_finish_stmt_generation (stmt_info, call, gsi); 3587 } 3588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 3589 } 3590 3591 for (i = 0; i < nargs; i++) 3592 { 3593 vec<tree> vec_oprndsi = vec_defs[i]; 3594 vec_oprndsi.release (); 3595 } 3596 continue; 3597 } 3598 3599 for (i = 0; i < nargs; i++) 3600 { 3601 op = gimple_call_arg (stmt, i); 3602 if (j == 0) 3603 vec_oprnd0 3604 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]); 3605 else 3606 vec_oprnd0 3607 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]); 3608 3609 orig_vargs[i] = vargs[i] = vec_oprnd0; 3610 } 3611 3612 if (mask_opno >= 0 && masked_loop_p) 3613 { 3614 tree mask = vect_get_loop_mask (gsi, masks, ncopies, 3615 vectype_out, j); 3616 vargs[mask_opno] 3617 = prepare_load_store_mask (TREE_TYPE (mask), mask, 3618 vargs[mask_opno], gsi); 3619 } 3620 3621 if (cfn == CFN_GOMP_SIMD_LANE) 3622 { 3623 tree cst = build_index_vector (vectype_out, j * nunits_out, 1); 3624 tree new_var 3625 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_"); 3626 gimple *init_stmt = gimple_build_assign (new_var, cst); 3627 vect_init_vector_1 (stmt_info, init_stmt, NULL); 3628 new_temp = make_ssa_name (vec_dest); 3629 gimple *new_stmt = gimple_build_assign (new_temp, new_var); 3630 new_stmt_info 3631 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 3632 } 3633 else if (modifier == NARROW) 3634 { 3635 /* We don't define any narrowing conditional functions at 3636 present. */ 3637 gcc_assert (mask_opno < 0); 3638 tree half_res = make_ssa_name (vectype_in); 3639 gcall *call = gimple_build_call_internal_vec (ifn, vargs); 3640 gimple_call_set_lhs (call, half_res); 3641 gimple_call_set_nothrow (call, true); 3642 vect_finish_stmt_generation (stmt_info, call, gsi); 3643 if ((j & 1) == 0) 3644 { 3645 prev_res = half_res; 3646 continue; 3647 } 3648 new_temp = make_ssa_name (vec_dest); 3649 gassign *new_stmt = gimple_build_assign (new_temp, convert_code, 3650 prev_res, half_res); 3651 new_stmt_info 3652 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 3653 } 3654 else 3655 { 3656 gcall *call; 3657 if (ifn != IFN_LAST) 3658 call = gimple_build_call_internal_vec (ifn, vargs); 3659 else 3660 call = gimple_build_call_vec (fndecl, vargs); 3661 new_temp = make_ssa_name (vec_dest, call); 3662 gimple_call_set_lhs (call, new_temp); 3663 gimple_call_set_nothrow (call, true); 3664 new_stmt_info 3665 = vect_finish_stmt_generation (stmt_info, call, gsi); 3666 } 3667 3668 if (j == (modifier == NARROW ? 1 : 0)) 3669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 3670 else 3671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 3672 3673 prev_stmt_info = new_stmt_info; 3674 } 3675 } 3676 else if (modifier == NARROW) 3677 { 3678 /* We don't define any narrowing conditional functions at present. */ 3679 gcc_assert (mask_opno < 0); 3680 for (j = 0; j < ncopies; ++j) 3681 { 3682 /* Build argument list for the vectorized call. */ 3683 if (j == 0) 3684 vargs.create (nargs * 2); 3685 else 3686 vargs.truncate (0); 3687 3688 if (slp_node) 3689 { 3690 auto_vec<vec<tree> > vec_defs (nargs); 3691 vec<tree> vec_oprnds0; 3692 3693 vect_get_slp_defs (slp_node, &vec_defs); 3694 vec_oprnds0 = vec_defs[0]; 3695 3696 /* Arguments are ready. Create the new vector stmt. */ 3697 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2) 3698 { 3699 size_t k; 3700 vargs.truncate (0); 3701 for (k = 0; k < nargs; k++) 3702 { 3703 vec<tree> vec_oprndsk = vec_defs[k]; 3704 vargs.quick_push (vec_oprndsk[i]); 3705 vargs.quick_push (vec_oprndsk[i + 1]); 3706 } 3707 gcall *call; 3708 if (ifn != IFN_LAST) 3709 call = gimple_build_call_internal_vec (ifn, vargs); 3710 else 3711 call = gimple_build_call_vec (fndecl, vargs); 3712 new_temp = make_ssa_name (vec_dest, call); 3713 gimple_call_set_lhs (call, new_temp); 3714 gimple_call_set_nothrow (call, true); 3715 new_stmt_info 3716 = vect_finish_stmt_generation (stmt_info, call, gsi); 3717 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 3718 } 3719 3720 for (i = 0; i < nargs; i++) 3721 { 3722 vec<tree> vec_oprndsi = vec_defs[i]; 3723 vec_oprndsi.release (); 3724 } 3725 continue; 3726 } 3727 3728 for (i = 0; i < nargs; i++) 3729 { 3730 op = gimple_call_arg (stmt, i); 3731 if (j == 0) 3732 { 3733 vec_oprnd0 3734 = vect_get_vec_def_for_operand (op, stmt_info, 3735 vectypes[i]); 3736 vec_oprnd1 3737 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0); 3738 } 3739 else 3740 { 3741 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt, 3742 2 * i + 1); 3743 vec_oprnd0 3744 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1); 3745 vec_oprnd1 3746 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0); 3747 } 3748 3749 vargs.quick_push (vec_oprnd0); 3750 vargs.quick_push (vec_oprnd1); 3751 } 3752 3753 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs); 3754 new_temp = make_ssa_name (vec_dest, new_stmt); 3755 gimple_call_set_lhs (new_stmt, new_temp); 3756 new_stmt_info 3757 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 3758 3759 if (j == 0) 3760 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info; 3761 else 3762 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 3763 3764 prev_stmt_info = new_stmt_info; 3765 } 3766 3767 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 3768 } 3769 else 3770 /* No current target implements this case. */ 3771 return false; 3772 3773 vargs.release (); 3774 3775 /* The call in STMT might prevent it from being removed in dce. 3776 We however cannot remove it here, due to the way the ssa name 3777 it defines is mapped to the new definition. So just replace 3778 rhs of the statement with something harmless. */ 3779 3780 if (slp_node) 3781 return true; 3782 3783 stmt_info = vect_orig_stmt (stmt_info); 3784 lhs = gimple_get_lhs (stmt_info->stmt); 3785 3786 gassign *new_stmt 3787 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); 3788 vinfo->replace_stmt (gsi, stmt_info, new_stmt); 3789 3790 return true; 3791 } 3792 3793 3794 struct simd_call_arg_info 3795 { 3796 tree vectype; 3797 tree op; 3798 HOST_WIDE_INT linear_step; 3799 enum vect_def_type dt; 3800 unsigned int align; 3801 bool simd_lane_linear; 3802 }; 3803 3804 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME, 3805 is linear within simd lane (but not within whole loop), note it in 3806 *ARGINFO. */ 3807 3808 static void 3809 vect_simd_lane_linear (tree op, class loop *loop, 3810 struct simd_call_arg_info *arginfo) 3811 { 3812 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 3813 3814 if (!is_gimple_assign (def_stmt) 3815 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR 3816 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))) 3817 return; 3818 3819 tree base = gimple_assign_rhs1 (def_stmt); 3820 HOST_WIDE_INT linear_step = 0; 3821 tree v = gimple_assign_rhs2 (def_stmt); 3822 while (TREE_CODE (v) == SSA_NAME) 3823 { 3824 tree t; 3825 def_stmt = SSA_NAME_DEF_STMT (v); 3826 if (is_gimple_assign (def_stmt)) 3827 switch (gimple_assign_rhs_code (def_stmt)) 3828 { 3829 case PLUS_EXPR: 3830 t = gimple_assign_rhs2 (def_stmt); 3831 if (linear_step || TREE_CODE (t) != INTEGER_CST) 3832 return; 3833 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t); 3834 v = gimple_assign_rhs1 (def_stmt); 3835 continue; 3836 case MULT_EXPR: 3837 t = gimple_assign_rhs2 (def_stmt); 3838 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t)) 3839 return; 3840 linear_step = tree_to_shwi (t); 3841 v = gimple_assign_rhs1 (def_stmt); 3842 continue; 3843 CASE_CONVERT: 3844 t = gimple_assign_rhs1 (def_stmt); 3845 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE 3846 || (TYPE_PRECISION (TREE_TYPE (v)) 3847 < TYPE_PRECISION (TREE_TYPE (t)))) 3848 return; 3849 if (!linear_step) 3850 linear_step = 1; 3851 v = t; 3852 continue; 3853 default: 3854 return; 3855 } 3856 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE) 3857 && loop->simduid 3858 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME 3859 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0)) 3860 == loop->simduid)) 3861 { 3862 if (!linear_step) 3863 linear_step = 1; 3864 arginfo->linear_step = linear_step; 3865 arginfo->op = base; 3866 arginfo->simd_lane_linear = true; 3867 return; 3868 } 3869 } 3870 } 3871 3872 /* Return the number of elements in vector type VECTYPE, which is associated 3873 with a SIMD clone. At present these vectors always have a constant 3874 length. */ 3875 3876 static unsigned HOST_WIDE_INT 3877 simd_clone_subparts (tree vectype) 3878 { 3879 return TYPE_VECTOR_SUBPARTS (vectype).to_constant (); 3880 } 3881 3882 /* Function vectorizable_simd_clone_call. 3883 3884 Check if STMT_INFO performs a function call that can be vectorized 3885 by calling a simd clone of the function. 3886 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 3887 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 3888 Return true if STMT_INFO is vectorizable in this way. */ 3889 3890 static bool 3891 vectorizable_simd_clone_call (stmt_vec_info stmt_info, 3892 gimple_stmt_iterator *gsi, 3893 stmt_vec_info *vec_stmt, slp_tree slp_node, 3894 stmt_vector_for_cost *) 3895 { 3896 tree vec_dest; 3897 tree scalar_dest; 3898 tree op, type; 3899 tree vec_oprnd0 = NULL_TREE; 3900 stmt_vec_info prev_stmt_info; 3901 tree vectype; 3902 unsigned int nunits; 3903 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3904 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3905 vec_info *vinfo = stmt_info->vinfo; 3906 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; 3907 tree fndecl, new_temp; 3908 int ncopies, j; 3909 auto_vec<simd_call_arg_info> arginfo; 3910 vec<tree> vargs = vNULL; 3911 size_t i, nargs; 3912 tree lhs, rtype, ratype; 3913 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL; 3914 3915 /* Is STMT a vectorizable call? */ 3916 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt); 3917 if (!stmt) 3918 return false; 3919 3920 fndecl = gimple_call_fndecl (stmt); 3921 if (fndecl == NULL_TREE) 3922 return false; 3923 3924 struct cgraph_node *node = cgraph_node::get (fndecl); 3925 if (node == NULL || node->simd_clones == NULL) 3926 return false; 3927 3928 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3929 return false; 3930 3931 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 3932 && ! vec_stmt) 3933 return false; 3934 3935 if (gimple_call_lhs (stmt) 3936 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 3937 return false; 3938 3939 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt)); 3940 3941 vectype = STMT_VINFO_VECTYPE (stmt_info); 3942 3943 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info)) 3944 return false; 3945 3946 /* FORNOW */ 3947 if (slp_node) 3948 return false; 3949 3950 /* Process function arguments. */ 3951 nargs = gimple_call_num_args (stmt); 3952 3953 /* Bail out if the function has zero arguments. */ 3954 if (nargs == 0) 3955 return false; 3956 3957 arginfo.reserve (nargs, true); 3958 3959 for (i = 0; i < nargs; i++) 3960 { 3961 simd_call_arg_info thisarginfo; 3962 affine_iv iv; 3963 3964 thisarginfo.linear_step = 0; 3965 thisarginfo.align = 0; 3966 thisarginfo.op = NULL_TREE; 3967 thisarginfo.simd_lane_linear = false; 3968 3969 op = gimple_call_arg (stmt, i); 3970 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt, 3971 &thisarginfo.vectype) 3972 || thisarginfo.dt == vect_uninitialized_def) 3973 { 3974 if (dump_enabled_p ()) 3975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3976 "use not simple.\n"); 3977 return false; 3978 } 3979 3980 if (thisarginfo.dt == vect_constant_def 3981 || thisarginfo.dt == vect_external_def) 3982 gcc_assert (thisarginfo.vectype == NULL_TREE); 3983 else 3984 { 3985 gcc_assert (thisarginfo.vectype != NULL_TREE); 3986 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype)) 3987 { 3988 if (dump_enabled_p ()) 3989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3990 "vector mask arguments are not supported\n"); 3991 return false; 3992 } 3993 } 3994 3995 /* For linear arguments, the analyze phase should have saved 3996 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */ 3997 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length () 3998 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]) 3999 { 4000 gcc_assert (vec_stmt); 4001 thisarginfo.linear_step 4002 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]); 4003 thisarginfo.op 4004 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1]; 4005 thisarginfo.simd_lane_linear 4006 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3] 4007 == boolean_true_node); 4008 /* If loop has been peeled for alignment, we need to adjust it. */ 4009 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo); 4010 tree n2 = LOOP_VINFO_NITERS (loop_vinfo); 4011 if (n1 != n2 && !thisarginfo.simd_lane_linear) 4012 { 4013 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2); 4014 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]; 4015 tree opt = TREE_TYPE (thisarginfo.op); 4016 bias = fold_convert (TREE_TYPE (step), bias); 4017 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step); 4018 thisarginfo.op 4019 = fold_build2 (POINTER_TYPE_P (opt) 4020 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt, 4021 thisarginfo.op, bias); 4022 } 4023 } 4024 else if (!vec_stmt 4025 && thisarginfo.dt != vect_constant_def 4026 && thisarginfo.dt != vect_external_def 4027 && loop_vinfo 4028 && TREE_CODE (op) == SSA_NAME 4029 && simple_iv (loop, loop_containing_stmt (stmt), op, 4030 &iv, false) 4031 && tree_fits_shwi_p (iv.step)) 4032 { 4033 thisarginfo.linear_step = tree_to_shwi (iv.step); 4034 thisarginfo.op = iv.base; 4035 } 4036 else if ((thisarginfo.dt == vect_constant_def 4037 || thisarginfo.dt == vect_external_def) 4038 && POINTER_TYPE_P (TREE_TYPE (op))) 4039 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT; 4040 /* Addresses of array elements indexed by GOMP_SIMD_LANE are 4041 linear too. */ 4042 if (POINTER_TYPE_P (TREE_TYPE (op)) 4043 && !thisarginfo.linear_step 4044 && !vec_stmt 4045 && thisarginfo.dt != vect_constant_def 4046 && thisarginfo.dt != vect_external_def 4047 && loop_vinfo 4048 && !slp_node 4049 && TREE_CODE (op) == SSA_NAME) 4050 vect_simd_lane_linear (op, loop, &thisarginfo); 4051 4052 arginfo.quick_push (thisarginfo); 4053 } 4054 4055 unsigned HOST_WIDE_INT vf; 4056 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) 4057 { 4058 if (dump_enabled_p ()) 4059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4060 "not considering SIMD clones; not yet supported" 4061 " for variable-width vectors.\n"); 4062 return false; 4063 } 4064 4065 unsigned int badness = 0; 4066 struct cgraph_node *bestn = NULL; 4067 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ()) 4068 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]); 4069 else 4070 for (struct cgraph_node *n = node->simd_clones; n != NULL; 4071 n = n->simdclone->next_clone) 4072 { 4073 unsigned int this_badness = 0; 4074 if (n->simdclone->simdlen > vf 4075 || n->simdclone->nargs != nargs) 4076 continue; 4077 if (n->simdclone->simdlen < vf) 4078 this_badness += (exact_log2 (vf) 4079 - exact_log2 (n->simdclone->simdlen)) * 1024; 4080 if (n->simdclone->inbranch) 4081 this_badness += 2048; 4082 int target_badness = targetm.simd_clone.usable (n); 4083 if (target_badness < 0) 4084 continue; 4085 this_badness += target_badness * 512; 4086 /* FORNOW: Have to add code to add the mask argument. */ 4087 if (n->simdclone->inbranch) 4088 continue; 4089 for (i = 0; i < nargs; i++) 4090 { 4091 switch (n->simdclone->args[i].arg_type) 4092 { 4093 case SIMD_CLONE_ARG_TYPE_VECTOR: 4094 if (!useless_type_conversion_p 4095 (n->simdclone->args[i].orig_type, 4096 TREE_TYPE (gimple_call_arg (stmt, i)))) 4097 i = -1; 4098 else if (arginfo[i].dt == vect_constant_def 4099 || arginfo[i].dt == vect_external_def 4100 || arginfo[i].linear_step) 4101 this_badness += 64; 4102 break; 4103 case SIMD_CLONE_ARG_TYPE_UNIFORM: 4104 if (arginfo[i].dt != vect_constant_def 4105 && arginfo[i].dt != vect_external_def) 4106 i = -1; 4107 break; 4108 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: 4109 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: 4110 if (arginfo[i].dt == vect_constant_def 4111 || arginfo[i].dt == vect_external_def 4112 || (arginfo[i].linear_step 4113 != n->simdclone->args[i].linear_step)) 4114 i = -1; 4115 break; 4116 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: 4117 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: 4118 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: 4119 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: 4120 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: 4121 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: 4122 /* FORNOW */ 4123 i = -1; 4124 break; 4125 case SIMD_CLONE_ARG_TYPE_MASK: 4126 gcc_unreachable (); 4127 } 4128 if (i == (size_t) -1) 4129 break; 4130 if (n->simdclone->args[i].alignment > arginfo[i].align) 4131 { 4132 i = -1; 4133 break; 4134 } 4135 if (arginfo[i].align) 4136 this_badness += (exact_log2 (arginfo[i].align) 4137 - exact_log2 (n->simdclone->args[i].alignment)); 4138 } 4139 if (i == (size_t) -1) 4140 continue; 4141 if (bestn == NULL || this_badness < badness) 4142 { 4143 bestn = n; 4144 badness = this_badness; 4145 } 4146 } 4147 4148 if (bestn == NULL) 4149 return false; 4150 4151 for (i = 0; i < nargs; i++) 4152 if ((arginfo[i].dt == vect_constant_def 4153 || arginfo[i].dt == vect_external_def) 4154 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) 4155 { 4156 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i)); 4157 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, 4158 slp_node); 4159 if (arginfo[i].vectype == NULL 4160 || (simd_clone_subparts (arginfo[i].vectype) 4161 > bestn->simdclone->simdlen)) 4162 return false; 4163 } 4164 4165 fndecl = bestn->decl; 4166 nunits = bestn->simdclone->simdlen; 4167 ncopies = vf / nunits; 4168 4169 /* If the function isn't const, only allow it in simd loops where user 4170 has asserted that at least nunits consecutive iterations can be 4171 performed using SIMD instructions. */ 4172 if ((loop == NULL || (unsigned) loop->safelen < nunits) 4173 && gimple_vuse (stmt)) 4174 return false; 4175 4176 /* Sanity check: make sure that at least one copy of the vectorized stmt 4177 needs to be generated. */ 4178 gcc_assert (ncopies >= 1); 4179 4180 if (!vec_stmt) /* transformation not required. */ 4181 { 4182 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl); 4183 for (i = 0; i < nargs; i++) 4184 if ((bestn->simdclone->args[i].arg_type 4185 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) 4186 || (bestn->simdclone->args[i].arg_type 4187 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)) 4188 { 4189 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3 4190 + 1); 4191 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op); 4192 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op)) 4193 ? size_type_node : TREE_TYPE (arginfo[i].op); 4194 tree ls = build_int_cst (lst, arginfo[i].linear_step); 4195 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls); 4196 tree sll = arginfo[i].simd_lane_linear 4197 ? boolean_true_node : boolean_false_node; 4198 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll); 4199 } 4200 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; 4201 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call"); 4202 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */ 4203 return true; 4204 } 4205 4206 /* Transform. */ 4207 4208 if (dump_enabled_p ()) 4209 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); 4210 4211 /* Handle def. */ 4212 scalar_dest = gimple_call_lhs (stmt); 4213 vec_dest = NULL_TREE; 4214 rtype = NULL_TREE; 4215 ratype = NULL_TREE; 4216 if (scalar_dest) 4217 { 4218 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4219 rtype = TREE_TYPE (TREE_TYPE (fndecl)); 4220 if (TREE_CODE (rtype) == ARRAY_TYPE) 4221 { 4222 ratype = rtype; 4223 rtype = TREE_TYPE (ratype); 4224 } 4225 } 4226 4227 prev_stmt_info = NULL; 4228 for (j = 0; j < ncopies; ++j) 4229 { 4230 /* Build argument list for the vectorized call. */ 4231 if (j == 0) 4232 vargs.create (nargs); 4233 else 4234 vargs.truncate (0); 4235 4236 for (i = 0; i < nargs; i++) 4237 { 4238 unsigned int k, l, m, o; 4239 tree atype; 4240 op = gimple_call_arg (stmt, i); 4241 switch (bestn->simdclone->args[i].arg_type) 4242 { 4243 case SIMD_CLONE_ARG_TYPE_VECTOR: 4244 atype = bestn->simdclone->args[i].vector_type; 4245 o = nunits / simd_clone_subparts (atype); 4246 for (m = j * o; m < (j + 1) * o; m++) 4247 { 4248 if (simd_clone_subparts (atype) 4249 < simd_clone_subparts (arginfo[i].vectype)) 4250 { 4251 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype)); 4252 k = (simd_clone_subparts (arginfo[i].vectype) 4253 / simd_clone_subparts (atype)); 4254 gcc_assert ((k & (k - 1)) == 0); 4255 if (m == 0) 4256 vec_oprnd0 4257 = vect_get_vec_def_for_operand (op, stmt_info); 4258 else 4259 { 4260 vec_oprnd0 = arginfo[i].op; 4261 if ((m & (k - 1)) == 0) 4262 vec_oprnd0 4263 = vect_get_vec_def_for_stmt_copy (vinfo, 4264 vec_oprnd0); 4265 } 4266 arginfo[i].op = vec_oprnd0; 4267 vec_oprnd0 4268 = build3 (BIT_FIELD_REF, atype, vec_oprnd0, 4269 bitsize_int (prec), 4270 bitsize_int ((m & (k - 1)) * prec)); 4271 gassign *new_stmt 4272 = gimple_build_assign (make_ssa_name (atype), 4273 vec_oprnd0); 4274 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 4275 vargs.safe_push (gimple_assign_lhs (new_stmt)); 4276 } 4277 else 4278 { 4279 k = (simd_clone_subparts (atype) 4280 / simd_clone_subparts (arginfo[i].vectype)); 4281 gcc_assert ((k & (k - 1)) == 0); 4282 vec<constructor_elt, va_gc> *ctor_elts; 4283 if (k != 1) 4284 vec_alloc (ctor_elts, k); 4285 else 4286 ctor_elts = NULL; 4287 for (l = 0; l < k; l++) 4288 { 4289 if (m == 0 && l == 0) 4290 vec_oprnd0 4291 = vect_get_vec_def_for_operand (op, stmt_info); 4292 else 4293 vec_oprnd0 4294 = vect_get_vec_def_for_stmt_copy (vinfo, 4295 arginfo[i].op); 4296 arginfo[i].op = vec_oprnd0; 4297 if (k == 1) 4298 break; 4299 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, 4300 vec_oprnd0); 4301 } 4302 if (k == 1) 4303 vargs.safe_push (vec_oprnd0); 4304 else 4305 { 4306 vec_oprnd0 = build_constructor (atype, ctor_elts); 4307 gassign *new_stmt 4308 = gimple_build_assign (make_ssa_name (atype), 4309 vec_oprnd0); 4310 vect_finish_stmt_generation (stmt_info, new_stmt, 4311 gsi); 4312 vargs.safe_push (gimple_assign_lhs (new_stmt)); 4313 } 4314 } 4315 } 4316 break; 4317 case SIMD_CLONE_ARG_TYPE_UNIFORM: 4318 vargs.safe_push (op); 4319 break; 4320 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: 4321 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: 4322 if (j == 0) 4323 { 4324 gimple_seq stmts; 4325 arginfo[i].op 4326 = force_gimple_operand (unshare_expr (arginfo[i].op), 4327 &stmts, true, NULL_TREE); 4328 if (stmts != NULL) 4329 { 4330 basic_block new_bb; 4331 edge pe = loop_preheader_edge (loop); 4332 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 4333 gcc_assert (!new_bb); 4334 } 4335 if (arginfo[i].simd_lane_linear) 4336 { 4337 vargs.safe_push (arginfo[i].op); 4338 break; 4339 } 4340 tree phi_res = copy_ssa_name (op); 4341 gphi *new_phi = create_phi_node (phi_res, loop->header); 4342 loop_vinfo->add_stmt (new_phi); 4343 add_phi_arg (new_phi, arginfo[i].op, 4344 loop_preheader_edge (loop), UNKNOWN_LOCATION); 4345 enum tree_code code 4346 = POINTER_TYPE_P (TREE_TYPE (op)) 4347 ? POINTER_PLUS_EXPR : PLUS_EXPR; 4348 tree type = POINTER_TYPE_P (TREE_TYPE (op)) 4349 ? sizetype : TREE_TYPE (op); 4350 widest_int cst 4351 = wi::mul (bestn->simdclone->args[i].linear_step, 4352 ncopies * nunits); 4353 tree tcst = wide_int_to_tree (type, cst); 4354 tree phi_arg = copy_ssa_name (op); 4355 gassign *new_stmt 4356 = gimple_build_assign (phi_arg, code, phi_res, tcst); 4357 gimple_stmt_iterator si = gsi_after_labels (loop->header); 4358 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT); 4359 loop_vinfo->add_stmt (new_stmt); 4360 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop), 4361 UNKNOWN_LOCATION); 4362 arginfo[i].op = phi_res; 4363 vargs.safe_push (phi_res); 4364 } 4365 else 4366 { 4367 enum tree_code code 4368 = POINTER_TYPE_P (TREE_TYPE (op)) 4369 ? POINTER_PLUS_EXPR : PLUS_EXPR; 4370 tree type = POINTER_TYPE_P (TREE_TYPE (op)) 4371 ? sizetype : TREE_TYPE (op); 4372 widest_int cst 4373 = wi::mul (bestn->simdclone->args[i].linear_step, 4374 j * nunits); 4375 tree tcst = wide_int_to_tree (type, cst); 4376 new_temp = make_ssa_name (TREE_TYPE (op)); 4377 gassign *new_stmt 4378 = gimple_build_assign (new_temp, code, 4379 arginfo[i].op, tcst); 4380 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 4381 vargs.safe_push (new_temp); 4382 } 4383 break; 4384 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: 4385 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: 4386 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: 4387 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: 4388 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: 4389 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: 4390 default: 4391 gcc_unreachable (); 4392 } 4393 } 4394 4395 gcall *new_call = gimple_build_call_vec (fndecl, vargs); 4396 if (vec_dest) 4397 { 4398 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits); 4399 if (ratype) 4400 new_temp = create_tmp_var (ratype); 4401 else if (simd_clone_subparts (vectype) 4402 == simd_clone_subparts (rtype)) 4403 new_temp = make_ssa_name (vec_dest, new_call); 4404 else 4405 new_temp = make_ssa_name (rtype, new_call); 4406 gimple_call_set_lhs (new_call, new_temp); 4407 } 4408 stmt_vec_info new_stmt_info 4409 = vect_finish_stmt_generation (stmt_info, new_call, gsi); 4410 4411 if (vec_dest) 4412 { 4413 if (simd_clone_subparts (vectype) < nunits) 4414 { 4415 unsigned int k, l; 4416 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype)); 4417 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype)); 4418 k = nunits / simd_clone_subparts (vectype); 4419 gcc_assert ((k & (k - 1)) == 0); 4420 for (l = 0; l < k; l++) 4421 { 4422 tree t; 4423 if (ratype) 4424 { 4425 t = build_fold_addr_expr (new_temp); 4426 t = build2 (MEM_REF, vectype, t, 4427 build_int_cst (TREE_TYPE (t), l * bytes)); 4428 } 4429 else 4430 t = build3 (BIT_FIELD_REF, vectype, new_temp, 4431 bitsize_int (prec), bitsize_int (l * prec)); 4432 gimple *new_stmt 4433 = gimple_build_assign (make_ssa_name (vectype), t); 4434 new_stmt_info 4435 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 4436 4437 if (j == 0 && l == 0) 4438 STMT_VINFO_VEC_STMT (stmt_info) 4439 = *vec_stmt = new_stmt_info; 4440 else 4441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 4442 4443 prev_stmt_info = new_stmt_info; 4444 } 4445 4446 if (ratype) 4447 vect_clobber_variable (stmt_info, gsi, new_temp); 4448 continue; 4449 } 4450 else if (simd_clone_subparts (vectype) > nunits) 4451 { 4452 unsigned int k = (simd_clone_subparts (vectype) 4453 / simd_clone_subparts (rtype)); 4454 gcc_assert ((k & (k - 1)) == 0); 4455 if ((j & (k - 1)) == 0) 4456 vec_alloc (ret_ctor_elts, k); 4457 if (ratype) 4458 { 4459 unsigned int m, o = nunits / simd_clone_subparts (rtype); 4460 for (m = 0; m < o; m++) 4461 { 4462 tree tem = build4 (ARRAY_REF, rtype, new_temp, 4463 size_int (m), NULL_TREE, NULL_TREE); 4464 gimple *new_stmt 4465 = gimple_build_assign (make_ssa_name (rtype), tem); 4466 new_stmt_info 4467 = vect_finish_stmt_generation (stmt_info, new_stmt, 4468 gsi); 4469 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, 4470 gimple_assign_lhs (new_stmt)); 4471 } 4472 vect_clobber_variable (stmt_info, gsi, new_temp); 4473 } 4474 else 4475 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp); 4476 if ((j & (k - 1)) != k - 1) 4477 continue; 4478 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts); 4479 gimple *new_stmt 4480 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0); 4481 new_stmt_info 4482 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 4483 4484 if ((unsigned) j == k - 1) 4485 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 4486 else 4487 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 4488 4489 prev_stmt_info = new_stmt_info; 4490 continue; 4491 } 4492 else if (ratype) 4493 { 4494 tree t = build_fold_addr_expr (new_temp); 4495 t = build2 (MEM_REF, vectype, t, 4496 build_int_cst (TREE_TYPE (t), 0)); 4497 gimple *new_stmt 4498 = gimple_build_assign (make_ssa_name (vec_dest), t); 4499 new_stmt_info 4500 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 4501 vect_clobber_variable (stmt_info, gsi, new_temp); 4502 } 4503 } 4504 4505 if (j == 0) 4506 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 4507 else 4508 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 4509 4510 prev_stmt_info = new_stmt_info; 4511 } 4512 4513 vargs.release (); 4514 4515 /* The call in STMT might prevent it from being removed in dce. 4516 We however cannot remove it here, due to the way the ssa name 4517 it defines is mapped to the new definition. So just replace 4518 rhs of the statement with something harmless. */ 4519 4520 if (slp_node) 4521 return true; 4522 4523 gimple *new_stmt; 4524 if (scalar_dest) 4525 { 4526 type = TREE_TYPE (scalar_dest); 4527 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt); 4528 new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); 4529 } 4530 else 4531 new_stmt = gimple_build_nop (); 4532 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt); 4533 unlink_stmt_vdef (stmt); 4534 4535 return true; 4536 } 4537 4538 4539 /* Function vect_gen_widened_results_half 4540 4541 Create a vector stmt whose code, type, number of arguments, and result 4542 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are 4543 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI. 4544 In the case that CODE is a CALL_EXPR, this means that a call to DECL 4545 needs to be created (DECL is a function-decl of a target-builtin). 4546 STMT_INFO is the original scalar stmt that we are vectorizing. */ 4547 4548 static gimple * 4549 vect_gen_widened_results_half (enum tree_code code, 4550 tree vec_oprnd0, tree vec_oprnd1, int op_type, 4551 tree vec_dest, gimple_stmt_iterator *gsi, 4552 stmt_vec_info stmt_info) 4553 { 4554 gimple *new_stmt; 4555 tree new_temp; 4556 4557 /* Generate half of the widened result: */ 4558 gcc_assert (op_type == TREE_CODE_LENGTH (code)); 4559 if (op_type != binary_op) 4560 vec_oprnd1 = NULL; 4561 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); 4562 new_temp = make_ssa_name (vec_dest, new_stmt); 4563 gimple_assign_set_lhs (new_stmt, new_temp); 4564 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 4565 4566 return new_stmt; 4567 } 4568 4569 4570 /* Get vectorized definitions for loop-based vectorization of STMT_INFO. 4571 For the first operand we call vect_get_vec_def_for_operand (with OPRND 4572 containing scalar operand), and for the rest we get a copy with 4573 vect_get_vec_def_for_stmt_copy() using the previous vector definition 4574 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details. 4575 The vectors are collected into VEC_OPRNDS. */ 4576 4577 static void 4578 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info, 4579 vec<tree> *vec_oprnds, int multi_step_cvt) 4580 { 4581 vec_info *vinfo = stmt_info->vinfo; 4582 tree vec_oprnd; 4583 4584 /* Get first vector operand. */ 4585 /* All the vector operands except the very first one (that is scalar oprnd) 4586 are stmt copies. */ 4587 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE) 4588 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info); 4589 else 4590 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd); 4591 4592 vec_oprnds->quick_push (vec_oprnd); 4593 4594 /* Get second vector operand. */ 4595 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd); 4596 vec_oprnds->quick_push (vec_oprnd); 4597 4598 *oprnd = vec_oprnd; 4599 4600 /* For conversion in multiple steps, continue to get operands 4601 recursively. */ 4602 if (multi_step_cvt) 4603 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds, 4604 multi_step_cvt - 1); 4605 } 4606 4607 4608 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS. 4609 For multi-step conversions store the resulting vectors and call the function 4610 recursively. */ 4611 4612 static void 4613 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds, 4614 int multi_step_cvt, 4615 stmt_vec_info stmt_info, 4616 vec<tree> vec_dsts, 4617 gimple_stmt_iterator *gsi, 4618 slp_tree slp_node, enum tree_code code, 4619 stmt_vec_info *prev_stmt_info) 4620 { 4621 unsigned int i; 4622 tree vop0, vop1, new_tmp, vec_dest; 4623 4624 vec_dest = vec_dsts.pop (); 4625 4626 for (i = 0; i < vec_oprnds->length (); i += 2) 4627 { 4628 /* Create demotion operation. */ 4629 vop0 = (*vec_oprnds)[i]; 4630 vop1 = (*vec_oprnds)[i + 1]; 4631 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); 4632 new_tmp = make_ssa_name (vec_dest, new_stmt); 4633 gimple_assign_set_lhs (new_stmt, new_tmp); 4634 stmt_vec_info new_stmt_info 4635 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 4636 4637 if (multi_step_cvt) 4638 /* Store the resulting vector for next recursive call. */ 4639 (*vec_oprnds)[i/2] = new_tmp; 4640 else 4641 { 4642 /* This is the last step of the conversion sequence. Store the 4643 vectors in SLP_NODE or in vector info of the scalar statement 4644 (or in STMT_VINFO_RELATED_STMT chain). */ 4645 if (slp_node) 4646 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 4647 else 4648 { 4649 if (!*prev_stmt_info) 4650 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info; 4651 else 4652 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info; 4653 4654 *prev_stmt_info = new_stmt_info; 4655 } 4656 } 4657 } 4658 4659 /* For multi-step demotion operations we first generate demotion operations 4660 from the source type to the intermediate types, and then combine the 4661 results (stored in VEC_OPRNDS) in demotion operation to the destination 4662 type. */ 4663 if (multi_step_cvt) 4664 { 4665 /* At each level of recursion we have half of the operands we had at the 4666 previous level. */ 4667 vec_oprnds->truncate ((i+1)/2); 4668 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1, 4669 stmt_info, vec_dsts, gsi, 4670 slp_node, VEC_PACK_TRUNC_EXPR, 4671 prev_stmt_info); 4672 } 4673 4674 vec_dsts.quick_push (vec_dest); 4675 } 4676 4677 4678 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 4679 and VEC_OPRNDS1, for a binary operation associated with scalar statement 4680 STMT_INFO. For multi-step conversions store the resulting vectors and 4681 call the function recursively. */ 4682 4683 static void 4684 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0, 4685 vec<tree> *vec_oprnds1, 4686 stmt_vec_info stmt_info, tree vec_dest, 4687 gimple_stmt_iterator *gsi, 4688 enum tree_code code1, 4689 enum tree_code code2, int op_type) 4690 { 4691 int i; 4692 tree vop0, vop1, new_tmp1, new_tmp2; 4693 gimple *new_stmt1, *new_stmt2; 4694 vec<tree> vec_tmp = vNULL; 4695 4696 vec_tmp.create (vec_oprnds0->length () * 2); 4697 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0) 4698 { 4699 if (op_type == binary_op) 4700 vop1 = (*vec_oprnds1)[i]; 4701 else 4702 vop1 = NULL_TREE; 4703 4704 /* Generate the two halves of promotion operation. */ 4705 new_stmt1 = vect_gen_widened_results_half (code1, vop0, vop1, 4706 op_type, vec_dest, gsi, 4707 stmt_info); 4708 new_stmt2 = vect_gen_widened_results_half (code2, vop0, vop1, 4709 op_type, vec_dest, gsi, 4710 stmt_info); 4711 if (is_gimple_call (new_stmt1)) 4712 { 4713 new_tmp1 = gimple_call_lhs (new_stmt1); 4714 new_tmp2 = gimple_call_lhs (new_stmt2); 4715 } 4716 else 4717 { 4718 new_tmp1 = gimple_assign_lhs (new_stmt1); 4719 new_tmp2 = gimple_assign_lhs (new_stmt2); 4720 } 4721 4722 /* Store the results for the next step. */ 4723 vec_tmp.quick_push (new_tmp1); 4724 vec_tmp.quick_push (new_tmp2); 4725 } 4726 4727 vec_oprnds0->release (); 4728 *vec_oprnds0 = vec_tmp; 4729 } 4730 4731 4732 /* Check if STMT_INFO performs a conversion operation that can be vectorized. 4733 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 4734 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 4735 Return true if STMT_INFO is vectorizable in this way. */ 4736 4737 static bool 4738 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 4739 stmt_vec_info *vec_stmt, slp_tree slp_node, 4740 stmt_vector_for_cost *cost_vec) 4741 { 4742 tree vec_dest; 4743 tree scalar_dest; 4744 tree op0, op1 = NULL_TREE; 4745 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 4746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4747 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; 4748 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; 4749 tree new_temp; 4750 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 4751 int ndts = 2; 4752 stmt_vec_info prev_stmt_info; 4753 poly_uint64 nunits_in; 4754 poly_uint64 nunits_out; 4755 tree vectype_out, vectype_in; 4756 int ncopies, i, j; 4757 tree lhs_type, rhs_type; 4758 enum { NARROW, NONE, WIDEN } modifier; 4759 vec<tree> vec_oprnds0 = vNULL; 4760 vec<tree> vec_oprnds1 = vNULL; 4761 tree vop0; 4762 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 4763 vec_info *vinfo = stmt_info->vinfo; 4764 int multi_step_cvt = 0; 4765 vec<tree> interm_types = vNULL; 4766 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE; 4767 int op_type; 4768 unsigned short fltsz; 4769 4770 /* Is STMT a vectorizable conversion? */ 4771 4772 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4773 return false; 4774 4775 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 4776 && ! vec_stmt) 4777 return false; 4778 4779 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 4780 if (!stmt) 4781 return false; 4782 4783 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 4784 return false; 4785 4786 code = gimple_assign_rhs_code (stmt); 4787 if (!CONVERT_EXPR_CODE_P (code) 4788 && code != FIX_TRUNC_EXPR 4789 && code != FLOAT_EXPR 4790 && code != WIDEN_MULT_EXPR 4791 && code != WIDEN_LSHIFT_EXPR) 4792 return false; 4793 4794 op_type = TREE_CODE_LENGTH (code); 4795 4796 /* Check types of lhs and rhs. */ 4797 scalar_dest = gimple_assign_lhs (stmt); 4798 lhs_type = TREE_TYPE (scalar_dest); 4799 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 4800 4801 op0 = gimple_assign_rhs1 (stmt); 4802 rhs_type = TREE_TYPE (op0); 4803 4804 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 4805 && !((INTEGRAL_TYPE_P (lhs_type) 4806 && INTEGRAL_TYPE_P (rhs_type)) 4807 || (SCALAR_FLOAT_TYPE_P (lhs_type) 4808 && SCALAR_FLOAT_TYPE_P (rhs_type)))) 4809 return false; 4810 4811 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) 4812 && ((INTEGRAL_TYPE_P (lhs_type) 4813 && !type_has_mode_precision_p (lhs_type)) 4814 || (INTEGRAL_TYPE_P (rhs_type) 4815 && !type_has_mode_precision_p (rhs_type)))) 4816 { 4817 if (dump_enabled_p ()) 4818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4819 "type conversion to/from bit-precision unsupported." 4820 "\n"); 4821 return false; 4822 } 4823 4824 /* Check the operands of the operation. */ 4825 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in)) 4826 { 4827 if (dump_enabled_p ()) 4828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4829 "use not simple.\n"); 4830 return false; 4831 } 4832 if (op_type == binary_op) 4833 { 4834 bool ok; 4835 4836 op1 = gimple_assign_rhs2 (stmt); 4837 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR); 4838 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of 4839 OP1. */ 4840 if (CONSTANT_CLASS_P (op0)) 4841 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in); 4842 else 4843 ok = vect_is_simple_use (op1, vinfo, &dt[1]); 4844 4845 if (!ok) 4846 { 4847 if (dump_enabled_p ()) 4848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4849 "use not simple.\n"); 4850 return false; 4851 } 4852 } 4853 4854 /* If op0 is an external or constant def, infer the vector type 4855 from the scalar type. */ 4856 if (!vectype_in) 4857 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node); 4858 if (vec_stmt) 4859 gcc_assert (vectype_in); 4860 if (!vectype_in) 4861 { 4862 if (dump_enabled_p ()) 4863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4864 "no vectype for scalar type %T\n", rhs_type); 4865 4866 return false; 4867 } 4868 4869 if (VECTOR_BOOLEAN_TYPE_P (vectype_out) 4870 && !VECTOR_BOOLEAN_TYPE_P (vectype_in)) 4871 { 4872 if (dump_enabled_p ()) 4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4874 "can't convert between boolean and non " 4875 "boolean vectors %T\n", rhs_type); 4876 4877 return false; 4878 } 4879 4880 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 4881 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 4882 if (known_eq (nunits_out, nunits_in)) 4883 modifier = NONE; 4884 else if (multiple_p (nunits_out, nunits_in)) 4885 modifier = NARROW; 4886 else 4887 { 4888 gcc_checking_assert (multiple_p (nunits_in, nunits_out)); 4889 modifier = WIDEN; 4890 } 4891 4892 /* Multiple types in SLP are handled by creating the appropriate number of 4893 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 4894 case of SLP. */ 4895 if (slp_node) 4896 ncopies = 1; 4897 else if (modifier == NARROW) 4898 ncopies = vect_get_num_copies (loop_vinfo, vectype_out); 4899 else 4900 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); 4901 4902 /* Sanity check: make sure that at least one copy of the vectorized stmt 4903 needs to be generated. */ 4904 gcc_assert (ncopies >= 1); 4905 4906 bool found_mode = false; 4907 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type); 4908 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type); 4909 opt_scalar_mode rhs_mode_iter; 4910 4911 /* Supportable by target? */ 4912 switch (modifier) 4913 { 4914 case NONE: 4915 if (code != FIX_TRUNC_EXPR 4916 && code != FLOAT_EXPR 4917 && !CONVERT_EXPR_CODE_P (code)) 4918 return false; 4919 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1)) 4920 break; 4921 /* FALLTHRU */ 4922 unsupported: 4923 if (dump_enabled_p ()) 4924 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4925 "conversion not supported by target.\n"); 4926 return false; 4927 4928 case WIDEN: 4929 if (supportable_widening_operation (code, stmt_info, vectype_out, 4930 vectype_in, &code1, &code2, 4931 &multi_step_cvt, &interm_types)) 4932 { 4933 /* Binary widening operation can only be supported directly by the 4934 architecture. */ 4935 gcc_assert (!(multi_step_cvt && op_type == binary_op)); 4936 break; 4937 } 4938 4939 if (code != FLOAT_EXPR 4940 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode)) 4941 goto unsupported; 4942 4943 fltsz = GET_MODE_SIZE (lhs_mode); 4944 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode) 4945 { 4946 rhs_mode = rhs_mode_iter.require (); 4947 if (GET_MODE_SIZE (rhs_mode) > fltsz) 4948 break; 4949 4950 cvt_type 4951 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 4952 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 4953 if (cvt_type == NULL_TREE) 4954 goto unsupported; 4955 4956 if (GET_MODE_SIZE (rhs_mode) == fltsz) 4957 { 4958 if (!supportable_convert_operation (code, vectype_out, 4959 cvt_type, &codecvt1)) 4960 goto unsupported; 4961 } 4962 else if (!supportable_widening_operation (code, stmt_info, 4963 vectype_out, cvt_type, 4964 &codecvt1, &codecvt2, 4965 &multi_step_cvt, 4966 &interm_types)) 4967 continue; 4968 else 4969 gcc_assert (multi_step_cvt == 0); 4970 4971 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type, 4972 vectype_in, &code1, &code2, 4973 &multi_step_cvt, &interm_types)) 4974 { 4975 found_mode = true; 4976 break; 4977 } 4978 } 4979 4980 if (!found_mode) 4981 goto unsupported; 4982 4983 if (GET_MODE_SIZE (rhs_mode) == fltsz) 4984 codecvt2 = ERROR_MARK; 4985 else 4986 { 4987 multi_step_cvt++; 4988 interm_types.safe_push (cvt_type); 4989 cvt_type = NULL_TREE; 4990 } 4991 break; 4992 4993 case NARROW: 4994 gcc_assert (op_type == unary_op); 4995 if (supportable_narrowing_operation (code, vectype_out, vectype_in, 4996 &code1, &multi_step_cvt, 4997 &interm_types)) 4998 break; 4999 5000 if (code != FIX_TRUNC_EXPR 5001 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) 5002 goto unsupported; 5003 5004 cvt_type 5005 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 5006 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 5007 if (cvt_type == NULL_TREE) 5008 goto unsupported; 5009 if (!supportable_convert_operation (code, cvt_type, vectype_in, 5010 &codecvt1)) 5011 goto unsupported; 5012 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, 5013 &code1, &multi_step_cvt, 5014 &interm_types)) 5015 break; 5016 goto unsupported; 5017 5018 default: 5019 gcc_unreachable (); 5020 } 5021 5022 if (!vec_stmt) /* transformation not required. */ 5023 { 5024 DUMP_VECT_SCOPE ("vectorizable_conversion"); 5025 if (modifier == NONE) 5026 { 5027 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; 5028 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, 5029 cost_vec); 5030 } 5031 else if (modifier == NARROW) 5032 { 5033 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; 5034 /* The final packing step produces one vector result per copy. */ 5035 unsigned int nvectors 5036 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies); 5037 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, 5038 multi_step_cvt, cost_vec); 5039 } 5040 else 5041 { 5042 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; 5043 /* The initial unpacking step produces two vector results 5044 per copy. MULTI_STEP_CVT is 0 for a single conversion, 5045 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */ 5046 unsigned int nvectors 5047 = (slp_node 5048 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt 5049 : ncopies * 2); 5050 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, 5051 multi_step_cvt, cost_vec); 5052 } 5053 interm_types.release (); 5054 return true; 5055 } 5056 5057 /* Transform. */ 5058 if (dump_enabled_p ()) 5059 dump_printf_loc (MSG_NOTE, vect_location, 5060 "transform conversion. ncopies = %d.\n", ncopies); 5061 5062 if (op_type == binary_op) 5063 { 5064 if (CONSTANT_CLASS_P (op0)) 5065 op0 = fold_convert (TREE_TYPE (op1), op0); 5066 else if (CONSTANT_CLASS_P (op1)) 5067 op1 = fold_convert (TREE_TYPE (op0), op1); 5068 } 5069 5070 /* In case of multi-step conversion, we first generate conversion operations 5071 to the intermediate types, and then from that types to the final one. 5072 We create vector destinations for the intermediate type (TYPES) received 5073 from supportable_*_operation, and store them in the correct order 5074 for future use in vect_create_vectorized_*_stmts (). */ 5075 auto_vec<tree> vec_dsts (multi_step_cvt + 1); 5076 vec_dest = vect_create_destination_var (scalar_dest, 5077 (cvt_type && modifier == WIDEN) 5078 ? cvt_type : vectype_out); 5079 vec_dsts.quick_push (vec_dest); 5080 5081 if (multi_step_cvt) 5082 { 5083 for (i = interm_types.length () - 1; 5084 interm_types.iterate (i, &intermediate_type); i--) 5085 { 5086 vec_dest = vect_create_destination_var (scalar_dest, 5087 intermediate_type); 5088 vec_dsts.quick_push (vec_dest); 5089 } 5090 } 5091 5092 if (cvt_type) 5093 vec_dest = vect_create_destination_var (scalar_dest, 5094 modifier == WIDEN 5095 ? vectype_out : cvt_type); 5096 5097 if (!slp_node) 5098 { 5099 if (modifier == WIDEN) 5100 { 5101 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1); 5102 if (op_type == binary_op) 5103 vec_oprnds1.create (1); 5104 } 5105 else if (modifier == NARROW) 5106 vec_oprnds0.create ( 5107 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1)); 5108 } 5109 else if (code == WIDEN_LSHIFT_EXPR) 5110 vec_oprnds1.create (slp_node->vec_stmts_size); 5111 5112 last_oprnd = op0; 5113 prev_stmt_info = NULL; 5114 switch (modifier) 5115 { 5116 case NONE: 5117 for (j = 0; j < ncopies; j++) 5118 { 5119 if (j == 0) 5120 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0, 5121 NULL, slp_node); 5122 else 5123 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL); 5124 5125 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5126 { 5127 stmt_vec_info new_stmt_info; 5128 /* Arguments are ready, create the new vector stmt. */ 5129 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); 5130 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0); 5131 new_temp = make_ssa_name (vec_dest, new_stmt); 5132 gimple_assign_set_lhs (new_stmt, new_temp); 5133 new_stmt_info 5134 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 5135 5136 if (slp_node) 5137 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 5138 else 5139 { 5140 if (!prev_stmt_info) 5141 STMT_VINFO_VEC_STMT (stmt_info) 5142 = *vec_stmt = new_stmt_info; 5143 else 5144 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 5145 prev_stmt_info = new_stmt_info; 5146 } 5147 } 5148 } 5149 break; 5150 5151 case WIDEN: 5152 /* In case the vectorization factor (VF) is bigger than the number 5153 of elements that we can fit in a vectype (nunits), we have to 5154 generate more than one vector stmt - i.e - we need to "unroll" 5155 the vector stmt by a factor VF/nunits. */ 5156 for (j = 0; j < ncopies; j++) 5157 { 5158 /* Handle uses. */ 5159 if (j == 0) 5160 { 5161 if (slp_node) 5162 { 5163 if (code == WIDEN_LSHIFT_EXPR) 5164 { 5165 unsigned int k; 5166 5167 vec_oprnd1 = op1; 5168 /* Store vec_oprnd1 for every vector stmt to be created 5169 for SLP_NODE. We check during the analysis that all 5170 the shift arguments are the same. */ 5171 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 5172 vec_oprnds1.quick_push (vec_oprnd1); 5173 5174 vect_get_vec_defs (op0, NULL_TREE, stmt_info, 5175 &vec_oprnds0, NULL, slp_node); 5176 } 5177 else 5178 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, 5179 &vec_oprnds1, slp_node); 5180 } 5181 else 5182 { 5183 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info); 5184 vec_oprnds0.quick_push (vec_oprnd0); 5185 if (op_type == binary_op) 5186 { 5187 if (code == WIDEN_LSHIFT_EXPR) 5188 vec_oprnd1 = op1; 5189 else 5190 vec_oprnd1 5191 = vect_get_vec_def_for_operand (op1, stmt_info); 5192 vec_oprnds1.quick_push (vec_oprnd1); 5193 } 5194 } 5195 } 5196 else 5197 { 5198 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0); 5199 vec_oprnds0.truncate (0); 5200 vec_oprnds0.quick_push (vec_oprnd0); 5201 if (op_type == binary_op) 5202 { 5203 if (code == WIDEN_LSHIFT_EXPR) 5204 vec_oprnd1 = op1; 5205 else 5206 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, 5207 vec_oprnd1); 5208 vec_oprnds1.truncate (0); 5209 vec_oprnds1.quick_push (vec_oprnd1); 5210 } 5211 } 5212 5213 /* Arguments are ready. Create the new vector stmts. */ 5214 for (i = multi_step_cvt; i >= 0; i--) 5215 { 5216 tree this_dest = vec_dsts[i]; 5217 enum tree_code c1 = code1, c2 = code2; 5218 if (i == 0 && codecvt2 != ERROR_MARK) 5219 { 5220 c1 = codecvt1; 5221 c2 = codecvt2; 5222 } 5223 vect_create_vectorized_promotion_stmts (&vec_oprnds0, 5224 &vec_oprnds1, stmt_info, 5225 this_dest, gsi, 5226 c1, c2, op_type); 5227 } 5228 5229 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5230 { 5231 stmt_vec_info new_stmt_info; 5232 if (cvt_type) 5233 { 5234 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 5235 new_temp = make_ssa_name (vec_dest); 5236 gassign *new_stmt 5237 = gimple_build_assign (new_temp, codecvt1, vop0); 5238 new_stmt_info 5239 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 5240 } 5241 else 5242 new_stmt_info = vinfo->lookup_def (vop0); 5243 5244 if (slp_node) 5245 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 5246 else 5247 { 5248 if (!prev_stmt_info) 5249 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info; 5250 else 5251 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 5252 prev_stmt_info = new_stmt_info; 5253 } 5254 } 5255 } 5256 5257 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 5258 break; 5259 5260 case NARROW: 5261 /* In case the vectorization factor (VF) is bigger than the number 5262 of elements that we can fit in a vectype (nunits), we have to 5263 generate more than one vector stmt - i.e - we need to "unroll" 5264 the vector stmt by a factor VF/nunits. */ 5265 for (j = 0; j < ncopies; j++) 5266 { 5267 /* Handle uses. */ 5268 if (slp_node) 5269 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL, 5270 slp_node); 5271 else 5272 { 5273 vec_oprnds0.truncate (0); 5274 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0, 5275 vect_pow2 (multi_step_cvt) - 1); 5276 } 5277 5278 /* Arguments are ready. Create the new vector stmts. */ 5279 if (cvt_type) 5280 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5281 { 5282 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 5283 new_temp = make_ssa_name (vec_dest); 5284 gassign *new_stmt 5285 = gimple_build_assign (new_temp, codecvt1, vop0); 5286 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 5287 vec_oprnds0[i] = new_temp; 5288 } 5289 5290 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt, 5291 stmt_info, vec_dsts, gsi, 5292 slp_node, code1, 5293 &prev_stmt_info); 5294 } 5295 5296 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 5297 break; 5298 } 5299 5300 vec_oprnds0.release (); 5301 vec_oprnds1.release (); 5302 interm_types.release (); 5303 5304 return true; 5305 } 5306 5307 /* Return true if we can assume from the scalar form of STMT_INFO that 5308 neither the scalar nor the vector forms will generate code. STMT_INFO 5309 is known not to involve a data reference. */ 5310 5311 bool 5312 vect_nop_conversion_p (stmt_vec_info stmt_info) 5313 { 5314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 5315 if (!stmt) 5316 return false; 5317 5318 tree lhs = gimple_assign_lhs (stmt); 5319 tree_code code = gimple_assign_rhs_code (stmt); 5320 tree rhs = gimple_assign_rhs1 (stmt); 5321 5322 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR) 5323 return true; 5324 5325 if (CONVERT_EXPR_CODE_P (code)) 5326 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)); 5327 5328 return false; 5329 } 5330 5331 /* Function vectorizable_assignment. 5332 5333 Check if STMT_INFO performs an assignment (copy) that can be vectorized. 5334 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized 5335 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 5336 Return true if STMT_INFO is vectorizable in this way. */ 5337 5338 static bool 5339 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 5340 stmt_vec_info *vec_stmt, slp_tree slp_node, 5341 stmt_vector_for_cost *cost_vec) 5342 { 5343 tree vec_dest; 5344 tree scalar_dest; 5345 tree op; 5346 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5347 tree new_temp; 5348 enum vect_def_type dt[1] = {vect_unknown_def_type}; 5349 int ndts = 1; 5350 int ncopies; 5351 int i, j; 5352 vec<tree> vec_oprnds = vNULL; 5353 tree vop; 5354 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5355 vec_info *vinfo = stmt_info->vinfo; 5356 stmt_vec_info prev_stmt_info = NULL; 5357 enum tree_code code; 5358 tree vectype_in; 5359 5360 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5361 return false; 5362 5363 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5364 && ! vec_stmt) 5365 return false; 5366 5367 /* Is vectorizable assignment? */ 5368 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 5369 if (!stmt) 5370 return false; 5371 5372 scalar_dest = gimple_assign_lhs (stmt); 5373 if (TREE_CODE (scalar_dest) != SSA_NAME) 5374 return false; 5375 5376 code = gimple_assign_rhs_code (stmt); 5377 if (gimple_assign_single_p (stmt) 5378 || code == PAREN_EXPR 5379 || CONVERT_EXPR_CODE_P (code)) 5380 op = gimple_assign_rhs1 (stmt); 5381 else 5382 return false; 5383 5384 if (code == VIEW_CONVERT_EXPR) 5385 op = TREE_OPERAND (op, 0); 5386 5387 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5388 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 5389 5390 /* Multiple types in SLP are handled by creating the appropriate number of 5391 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5392 case of SLP. */ 5393 if (slp_node) 5394 ncopies = 1; 5395 else 5396 ncopies = vect_get_num_copies (loop_vinfo, vectype); 5397 5398 gcc_assert (ncopies >= 1); 5399 5400 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in)) 5401 { 5402 if (dump_enabled_p ()) 5403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5404 "use not simple.\n"); 5405 return false; 5406 } 5407 5408 /* We can handle NOP_EXPR conversions that do not change the number 5409 of elements or the vector size. */ 5410 if ((CONVERT_EXPR_CODE_P (code) 5411 || code == VIEW_CONVERT_EXPR) 5412 && (!vectype_in 5413 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits) 5414 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)), 5415 GET_MODE_SIZE (TYPE_MODE (vectype_in))))) 5416 return false; 5417 5418 /* We do not handle bit-precision changes. */ 5419 if ((CONVERT_EXPR_CODE_P (code) 5420 || code == VIEW_CONVERT_EXPR) 5421 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) 5422 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)) 5423 || !type_has_mode_precision_p (TREE_TYPE (op))) 5424 /* But a conversion that does not change the bit-pattern is ok. */ 5425 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 5426 > TYPE_PRECISION (TREE_TYPE (op))) 5427 && TYPE_UNSIGNED (TREE_TYPE (op))) 5428 /* Conversion between boolean types of different sizes is 5429 a simple assignment in case their vectypes are same 5430 boolean vectors. */ 5431 && (!VECTOR_BOOLEAN_TYPE_P (vectype) 5432 || !VECTOR_BOOLEAN_TYPE_P (vectype_in))) 5433 { 5434 if (dump_enabled_p ()) 5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5436 "type conversion to/from bit-precision " 5437 "unsupported.\n"); 5438 return false; 5439 } 5440 5441 if (!vec_stmt) /* transformation not required. */ 5442 { 5443 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 5444 DUMP_VECT_SCOPE ("vectorizable_assignment"); 5445 if (!vect_nop_conversion_p (stmt_info)) 5446 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, 5447 cost_vec); 5448 return true; 5449 } 5450 5451 /* Transform. */ 5452 if (dump_enabled_p ()) 5453 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n"); 5454 5455 /* Handle def. */ 5456 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5457 5458 /* Handle use. */ 5459 for (j = 0; j < ncopies; j++) 5460 { 5461 /* Handle uses. */ 5462 if (j == 0) 5463 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node); 5464 else 5465 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL); 5466 5467 /* Arguments are ready. create the new vector stmt. */ 5468 stmt_vec_info new_stmt_info = NULL; 5469 FOR_EACH_VEC_ELT (vec_oprnds, i, vop) 5470 { 5471 if (CONVERT_EXPR_CODE_P (code) 5472 || code == VIEW_CONVERT_EXPR) 5473 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop); 5474 gassign *new_stmt = gimple_build_assign (vec_dest, vop); 5475 new_temp = make_ssa_name (vec_dest, new_stmt); 5476 gimple_assign_set_lhs (new_stmt, new_temp); 5477 new_stmt_info 5478 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 5479 if (slp_node) 5480 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 5481 } 5482 5483 if (slp_node) 5484 continue; 5485 5486 if (j == 0) 5487 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 5488 else 5489 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 5490 5491 prev_stmt_info = new_stmt_info; 5492 } 5493 5494 vec_oprnds.release (); 5495 return true; 5496 } 5497 5498 5499 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE 5500 either as shift by a scalar or by a vector. */ 5501 5502 bool 5503 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type) 5504 { 5505 5506 machine_mode vec_mode; 5507 optab optab; 5508 int icode; 5509 tree vectype; 5510 5511 vectype = get_vectype_for_scalar_type (vinfo, scalar_type); 5512 if (!vectype) 5513 return false; 5514 5515 optab = optab_for_tree_code (code, vectype, optab_scalar); 5516 if (!optab 5517 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) 5518 { 5519 optab = optab_for_tree_code (code, vectype, optab_vector); 5520 if (!optab 5521 || (optab_handler (optab, TYPE_MODE (vectype)) 5522 == CODE_FOR_nothing)) 5523 return false; 5524 } 5525 5526 vec_mode = TYPE_MODE (vectype); 5527 icode = (int) optab_handler (optab, vec_mode); 5528 if (icode == CODE_FOR_nothing) 5529 return false; 5530 5531 return true; 5532 } 5533 5534 5535 /* Function vectorizable_shift. 5536 5537 Check if STMT_INFO performs a shift operation that can be vectorized. 5538 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized 5539 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 5540 Return true if STMT_INFO is vectorizable in this way. */ 5541 5542 static bool 5543 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 5544 stmt_vec_info *vec_stmt, slp_tree slp_node, 5545 stmt_vector_for_cost *cost_vec) 5546 { 5547 tree vec_dest; 5548 tree scalar_dest; 5549 tree op0, op1 = NULL; 5550 tree vec_oprnd1 = NULL_TREE; 5551 tree vectype; 5552 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5553 enum tree_code code; 5554 machine_mode vec_mode; 5555 tree new_temp; 5556 optab optab; 5557 int icode; 5558 machine_mode optab_op2_mode; 5559 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 5560 int ndts = 2; 5561 stmt_vec_info prev_stmt_info; 5562 poly_uint64 nunits_in; 5563 poly_uint64 nunits_out; 5564 tree vectype_out; 5565 tree op1_vectype; 5566 int ncopies; 5567 int j, i; 5568 vec<tree> vec_oprnds0 = vNULL; 5569 vec<tree> vec_oprnds1 = vNULL; 5570 tree vop0, vop1; 5571 unsigned int k; 5572 bool scalar_shift_arg = true; 5573 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5574 vec_info *vinfo = stmt_info->vinfo; 5575 bool incompatible_op1_vectype_p = false; 5576 5577 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5578 return false; 5579 5580 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5581 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle 5582 && ! vec_stmt) 5583 return false; 5584 5585 /* Is STMT a vectorizable binary/unary operation? */ 5586 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 5587 if (!stmt) 5588 return false; 5589 5590 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 5591 return false; 5592 5593 code = gimple_assign_rhs_code (stmt); 5594 5595 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 5596 || code == RROTATE_EXPR)) 5597 return false; 5598 5599 scalar_dest = gimple_assign_lhs (stmt); 5600 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 5601 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))) 5602 { 5603 if (dump_enabled_p ()) 5604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5605 "bit-precision shifts not supported.\n"); 5606 return false; 5607 } 5608 5609 op0 = gimple_assign_rhs1 (stmt); 5610 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype)) 5611 { 5612 if (dump_enabled_p ()) 5613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5614 "use not simple.\n"); 5615 return false; 5616 } 5617 /* If op0 is an external or constant def, infer the vector type 5618 from the scalar type. */ 5619 if (!vectype) 5620 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node); 5621 if (vec_stmt) 5622 gcc_assert (vectype); 5623 if (!vectype) 5624 { 5625 if (dump_enabled_p ()) 5626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5627 "no vectype for scalar type\n"); 5628 return false; 5629 } 5630 5631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 5632 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 5633 if (maybe_ne (nunits_out, nunits_in)) 5634 return false; 5635 5636 op1 = gimple_assign_rhs2 (stmt); 5637 stmt_vec_info op1_def_stmt_info; 5638 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype, 5639 &op1_def_stmt_info)) 5640 { 5641 if (dump_enabled_p ()) 5642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5643 "use not simple.\n"); 5644 return false; 5645 } 5646 5647 /* Multiple types in SLP are handled by creating the appropriate number of 5648 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5649 case of SLP. */ 5650 if (slp_node) 5651 ncopies = 1; 5652 else 5653 ncopies = vect_get_num_copies (loop_vinfo, vectype); 5654 5655 gcc_assert (ncopies >= 1); 5656 5657 /* Determine whether the shift amount is a vector, or scalar. If the 5658 shift/rotate amount is a vector, use the vector/vector shift optabs. */ 5659 5660 if ((dt[1] == vect_internal_def 5661 || dt[1] == vect_induction_def 5662 || dt[1] == vect_nested_cycle) 5663 && !slp_node) 5664 scalar_shift_arg = false; 5665 else if (dt[1] == vect_constant_def 5666 || dt[1] == vect_external_def 5667 || dt[1] == vect_internal_def) 5668 { 5669 /* In SLP, need to check whether the shift count is the same, 5670 in loops if it is a constant or invariant, it is always 5671 a scalar shift. */ 5672 if (slp_node) 5673 { 5674 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node); 5675 stmt_vec_info slpstmt_info; 5676 5677 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info) 5678 { 5679 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt); 5680 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0)) 5681 scalar_shift_arg = false; 5682 } 5683 5684 /* For internal SLP defs we have to make sure we see scalar stmts 5685 for all vector elements. 5686 ??? For different vectors we could resort to a different 5687 scalar shift operand but code-generation below simply always 5688 takes the first. */ 5689 if (dt[1] == vect_internal_def 5690 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), 5691 stmts.length ())) 5692 scalar_shift_arg = false; 5693 } 5694 5695 /* If the shift amount is computed by a pattern stmt we cannot 5696 use the scalar amount directly thus give up and use a vector 5697 shift. */ 5698 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info)) 5699 scalar_shift_arg = false; 5700 } 5701 else 5702 { 5703 if (dump_enabled_p ()) 5704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5705 "operand mode requires invariant argument.\n"); 5706 return false; 5707 } 5708 5709 /* Vector shifted by vector. */ 5710 bool was_scalar_shift_arg = scalar_shift_arg; 5711 if (!scalar_shift_arg) 5712 { 5713 optab = optab_for_tree_code (code, vectype, optab_vector); 5714 if (dump_enabled_p ()) 5715 dump_printf_loc (MSG_NOTE, vect_location, 5716 "vector/vector shift/rotate found.\n"); 5717 5718 if (!op1_vectype) 5719 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1), 5720 slp_node); 5721 incompatible_op1_vectype_p 5722 = (op1_vectype == NULL_TREE 5723 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype), 5724 TYPE_VECTOR_SUBPARTS (vectype)) 5725 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)); 5726 if (incompatible_op1_vectype_p 5727 && (!slp_node 5728 || SLP_TREE_DEF_TYPE 5729 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def)) 5730 { 5731 if (dump_enabled_p ()) 5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5733 "unusable type for last operand in" 5734 " vector/vector shift/rotate.\n"); 5735 return false; 5736 } 5737 } 5738 /* See if the machine has a vector shifted by scalar insn and if not 5739 then see if it has a vector shifted by vector insn. */ 5740 else 5741 { 5742 optab = optab_for_tree_code (code, vectype, optab_scalar); 5743 if (optab 5744 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing) 5745 { 5746 if (dump_enabled_p ()) 5747 dump_printf_loc (MSG_NOTE, vect_location, 5748 "vector/scalar shift/rotate found.\n"); 5749 } 5750 else 5751 { 5752 optab = optab_for_tree_code (code, vectype, optab_vector); 5753 if (optab 5754 && (optab_handler (optab, TYPE_MODE (vectype)) 5755 != CODE_FOR_nothing)) 5756 { 5757 scalar_shift_arg = false; 5758 5759 if (dump_enabled_p ()) 5760 dump_printf_loc (MSG_NOTE, vect_location, 5761 "vector/vector shift/rotate found.\n"); 5762 5763 /* Unlike the other binary operators, shifts/rotates have 5764 the rhs being int, instead of the same type as the lhs, 5765 so make sure the scalar is the right type if we are 5766 dealing with vectors of long long/long/short/char. */ 5767 incompatible_op1_vectype_p 5768 = !tree_nop_conversion_p (TREE_TYPE (vectype), 5769 TREE_TYPE (op1)); 5770 } 5771 } 5772 } 5773 5774 /* Supportable by target? */ 5775 if (!optab) 5776 { 5777 if (dump_enabled_p ()) 5778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5779 "no optab.\n"); 5780 return false; 5781 } 5782 vec_mode = TYPE_MODE (vectype); 5783 icode = (int) optab_handler (optab, vec_mode); 5784 if (icode == CODE_FOR_nothing) 5785 { 5786 if (dump_enabled_p ()) 5787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5788 "op not supported by target.\n"); 5789 /* Check only during analysis. */ 5790 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) 5791 || (!vec_stmt 5792 && !vect_worthwhile_without_simd_p (vinfo, code))) 5793 return false; 5794 if (dump_enabled_p ()) 5795 dump_printf_loc (MSG_NOTE, vect_location, 5796 "proceeding using word mode.\n"); 5797 } 5798 5799 /* Worthwhile without SIMD support? Check only during analysis. */ 5800 if (!vec_stmt 5801 && !VECTOR_MODE_P (TYPE_MODE (vectype)) 5802 && !vect_worthwhile_without_simd_p (vinfo, code)) 5803 { 5804 if (dump_enabled_p ()) 5805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5806 "not worthwhile without SIMD support.\n"); 5807 return false; 5808 } 5809 5810 if (!vec_stmt) /* transformation not required. */ 5811 { 5812 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; 5813 DUMP_VECT_SCOPE ("vectorizable_shift"); 5814 vect_model_simple_cost (stmt_info, ncopies, dt, 5815 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec); 5816 return true; 5817 } 5818 5819 /* Transform. */ 5820 5821 if (dump_enabled_p ()) 5822 dump_printf_loc (MSG_NOTE, vect_location, 5823 "transform binary/unary operation.\n"); 5824 5825 if (incompatible_op1_vectype_p && !slp_node) 5826 { 5827 op1 = fold_convert (TREE_TYPE (vectype), op1); 5828 if (dt[1] != vect_constant_def) 5829 op1 = vect_init_vector (stmt_info, op1, 5830 TREE_TYPE (vectype), NULL); 5831 } 5832 5833 /* Handle def. */ 5834 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5835 5836 prev_stmt_info = NULL; 5837 for (j = 0; j < ncopies; j++) 5838 { 5839 /* Handle uses. */ 5840 if (j == 0) 5841 { 5842 if (scalar_shift_arg) 5843 { 5844 /* Vector shl and shr insn patterns can be defined with scalar 5845 operand 2 (shift operand). In this case, use constant or loop 5846 invariant op1 directly, without extending it to vector mode 5847 first. */ 5848 optab_op2_mode = insn_data[icode].operand[2].mode; 5849 if (!VECTOR_MODE_P (optab_op2_mode)) 5850 { 5851 if (dump_enabled_p ()) 5852 dump_printf_loc (MSG_NOTE, vect_location, 5853 "operand 1 using scalar mode.\n"); 5854 vec_oprnd1 = op1; 5855 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1); 5856 vec_oprnds1.quick_push (vec_oprnd1); 5857 if (slp_node) 5858 { 5859 /* Store vec_oprnd1 for every vector stmt to be created 5860 for SLP_NODE. We check during the analysis that all 5861 the shift arguments are the same. 5862 TODO: Allow different constants for different vector 5863 stmts generated for an SLP instance. */ 5864 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 5865 vec_oprnds1.quick_push (vec_oprnd1); 5866 } 5867 } 5868 } 5869 else if (slp_node && incompatible_op1_vectype_p) 5870 { 5871 if (was_scalar_shift_arg) 5872 { 5873 /* If the argument was the same in all lanes create 5874 the correctly typed vector shift amount directly. */ 5875 op1 = fold_convert (TREE_TYPE (vectype), op1); 5876 op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype), 5877 !loop_vinfo ? gsi : NULL); 5878 vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype, 5879 !loop_vinfo ? gsi : NULL); 5880 vec_oprnds1.create (slp_node->vec_stmts_size); 5881 for (k = 0; k < slp_node->vec_stmts_size; k++) 5882 vec_oprnds1.quick_push (vec_oprnd1); 5883 } 5884 else if (dt[1] == vect_constant_def) 5885 { 5886 /* Convert the scalar constant shift amounts in-place. */ 5887 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1]; 5888 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def); 5889 for (unsigned i = 0; 5890 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i) 5891 { 5892 SLP_TREE_SCALAR_OPS (shift)[i] 5893 = fold_convert (TREE_TYPE (vectype), 5894 SLP_TREE_SCALAR_OPS (shift)[i]); 5895 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i]) 5896 == INTEGER_CST)); 5897 } 5898 } 5899 else 5900 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype)); 5901 } 5902 5903 /* vec_oprnd1 is available if operand 1 should be of a scalar-type 5904 (a special case for certain kind of vector shifts); otherwise, 5905 operand 1 should be of a vector type (the usual case). */ 5906 if (vec_oprnd1) 5907 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL, 5908 slp_node); 5909 else 5910 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1, 5911 slp_node); 5912 } 5913 else 5914 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1); 5915 5916 /* Arguments are ready. Create the new vector stmt. */ 5917 stmt_vec_info new_stmt_info = NULL; 5918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5919 { 5920 vop1 = vec_oprnds1[i]; 5921 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); 5922 new_temp = make_ssa_name (vec_dest, new_stmt); 5923 gimple_assign_set_lhs (new_stmt, new_temp); 5924 new_stmt_info 5925 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 5926 if (slp_node) 5927 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 5928 } 5929 5930 if (slp_node) 5931 continue; 5932 5933 if (j == 0) 5934 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 5935 else 5936 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 5937 prev_stmt_info = new_stmt_info; 5938 } 5939 5940 vec_oprnds0.release (); 5941 vec_oprnds1.release (); 5942 5943 return true; 5944 } 5945 5946 5947 /* Function vectorizable_operation. 5948 5949 Check if STMT_INFO performs a binary, unary or ternary operation that can 5950 be vectorized. 5951 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 5952 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 5953 Return true if STMT_INFO is vectorizable in this way. */ 5954 5955 static bool 5956 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 5957 stmt_vec_info *vec_stmt, slp_tree slp_node, 5958 stmt_vector_for_cost *cost_vec) 5959 { 5960 tree vec_dest; 5961 tree scalar_dest; 5962 tree op0, op1 = NULL_TREE, op2 = NULL_TREE; 5963 tree vectype; 5964 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5965 enum tree_code code, orig_code; 5966 machine_mode vec_mode; 5967 tree new_temp; 5968 int op_type; 5969 optab optab; 5970 bool target_support_p; 5971 enum vect_def_type dt[3] 5972 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 5973 int ndts = 3; 5974 stmt_vec_info prev_stmt_info; 5975 poly_uint64 nunits_in; 5976 poly_uint64 nunits_out; 5977 tree vectype_out; 5978 int ncopies, vec_num; 5979 int j, i; 5980 vec<tree> vec_oprnds0 = vNULL; 5981 vec<tree> vec_oprnds1 = vNULL; 5982 vec<tree> vec_oprnds2 = vNULL; 5983 tree vop0, vop1, vop2; 5984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5985 vec_info *vinfo = stmt_info->vinfo; 5986 5987 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5988 return false; 5989 5990 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5991 && ! vec_stmt) 5992 return false; 5993 5994 /* Is STMT a vectorizable binary/unary operation? */ 5995 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 5996 if (!stmt) 5997 return false; 5998 5999 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 6000 return false; 6001 6002 orig_code = code = gimple_assign_rhs_code (stmt); 6003 6004 /* Shifts are handled in vectorizable_shift. */ 6005 if (code == LSHIFT_EXPR 6006 || code == RSHIFT_EXPR 6007 || code == LROTATE_EXPR 6008 || code == RROTATE_EXPR) 6009 return false; 6010 6011 /* Comparisons are handled in vectorizable_comparison. */ 6012 if (TREE_CODE_CLASS (code) == tcc_comparison) 6013 return false; 6014 6015 /* Conditions are handled in vectorizable_condition. */ 6016 if (code == COND_EXPR) 6017 return false; 6018 6019 /* For pointer addition and subtraction, we should use the normal 6020 plus and minus for the vector operation. */ 6021 if (code == POINTER_PLUS_EXPR) 6022 code = PLUS_EXPR; 6023 if (code == POINTER_DIFF_EXPR) 6024 code = MINUS_EXPR; 6025 6026 /* Support only unary or binary operations. */ 6027 op_type = TREE_CODE_LENGTH (code); 6028 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op) 6029 { 6030 if (dump_enabled_p ()) 6031 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6032 "num. args = %d (not unary/binary/ternary op).\n", 6033 op_type); 6034 return false; 6035 } 6036 6037 scalar_dest = gimple_assign_lhs (stmt); 6038 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 6039 6040 /* Most operations cannot handle bit-precision types without extra 6041 truncations. */ 6042 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out); 6043 if (!mask_op_p 6044 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)) 6045 /* Exception are bitwise binary operations. */ 6046 && code != BIT_IOR_EXPR 6047 && code != BIT_XOR_EXPR 6048 && code != BIT_AND_EXPR) 6049 { 6050 if (dump_enabled_p ()) 6051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6052 "bit-precision arithmetic not supported.\n"); 6053 return false; 6054 } 6055 6056 op0 = gimple_assign_rhs1 (stmt); 6057 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype)) 6058 { 6059 if (dump_enabled_p ()) 6060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6061 "use not simple.\n"); 6062 return false; 6063 } 6064 /* If op0 is an external or constant def, infer the vector type 6065 from the scalar type. */ 6066 if (!vectype) 6067 { 6068 /* For boolean type we cannot determine vectype by 6069 invariant value (don't know whether it is a vector 6070 of booleans or vector of integers). We use output 6071 vectype because operations on boolean don't change 6072 type. */ 6073 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0))) 6074 { 6075 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest))) 6076 { 6077 if (dump_enabled_p ()) 6078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6079 "not supported operation on bool value.\n"); 6080 return false; 6081 } 6082 vectype = vectype_out; 6083 } 6084 else 6085 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), 6086 slp_node); 6087 } 6088 if (vec_stmt) 6089 gcc_assert (vectype); 6090 if (!vectype) 6091 { 6092 if (dump_enabled_p ()) 6093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6094 "no vectype for scalar type %T\n", 6095 TREE_TYPE (op0)); 6096 6097 return false; 6098 } 6099 6100 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 6101 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 6102 if (maybe_ne (nunits_out, nunits_in)) 6103 return false; 6104 6105 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE; 6106 if (op_type == binary_op || op_type == ternary_op) 6107 { 6108 op1 = gimple_assign_rhs2 (stmt); 6109 if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2)) 6110 { 6111 if (dump_enabled_p ()) 6112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6113 "use not simple.\n"); 6114 return false; 6115 } 6116 } 6117 if (op_type == ternary_op) 6118 { 6119 op2 = gimple_assign_rhs3 (stmt); 6120 if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3)) 6121 { 6122 if (dump_enabled_p ()) 6123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6124 "use not simple.\n"); 6125 return false; 6126 } 6127 } 6128 6129 /* Multiple types in SLP are handled by creating the appropriate number of 6130 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 6131 case of SLP. */ 6132 if (slp_node) 6133 { 6134 ncopies = 1; 6135 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 6136 } 6137 else 6138 { 6139 ncopies = vect_get_num_copies (loop_vinfo, vectype); 6140 vec_num = 1; 6141 } 6142 6143 gcc_assert (ncopies >= 1); 6144 6145 /* Reject attempts to combine mask types with nonmask types, e.g. if 6146 we have an AND between a (nonmask) boolean loaded from memory and 6147 a (mask) boolean result of a comparison. 6148 6149 TODO: We could easily fix these cases up using pattern statements. */ 6150 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p 6151 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p) 6152 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p)) 6153 { 6154 if (dump_enabled_p ()) 6155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6156 "mixed mask and nonmask vector types\n"); 6157 return false; 6158 } 6159 6160 /* Supportable by target? */ 6161 6162 vec_mode = TYPE_MODE (vectype); 6163 if (code == MULT_HIGHPART_EXPR) 6164 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)); 6165 else 6166 { 6167 optab = optab_for_tree_code (code, vectype, optab_default); 6168 if (!optab) 6169 { 6170 if (dump_enabled_p ()) 6171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6172 "no optab.\n"); 6173 return false; 6174 } 6175 target_support_p = (optab_handler (optab, vec_mode) 6176 != CODE_FOR_nothing); 6177 } 6178 6179 if (!target_support_p) 6180 { 6181 if (dump_enabled_p ()) 6182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6183 "op not supported by target.\n"); 6184 /* Check only during analysis. */ 6185 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) 6186 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code))) 6187 return false; 6188 if (dump_enabled_p ()) 6189 dump_printf_loc (MSG_NOTE, vect_location, 6190 "proceeding using word mode.\n"); 6191 } 6192 6193 /* Worthwhile without SIMD support? Check only during analysis. */ 6194 if (!VECTOR_MODE_P (vec_mode) 6195 && !vec_stmt 6196 && !vect_worthwhile_without_simd_p (vinfo, code)) 6197 { 6198 if (dump_enabled_p ()) 6199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6200 "not worthwhile without SIMD support.\n"); 6201 return false; 6202 } 6203 6204 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); 6205 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); 6206 internal_fn cond_fn = get_conditional_internal_fn (code); 6207 6208 if (!vec_stmt) /* transformation not required. */ 6209 { 6210 /* If this operation is part of a reduction, a fully-masked loop 6211 should only change the active lanes of the reduction chain, 6212 keeping the inactive lanes as-is. */ 6213 if (loop_vinfo 6214 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) 6215 && reduc_idx >= 0) 6216 { 6217 if (cond_fn == IFN_LAST 6218 || !direct_internal_fn_supported_p (cond_fn, vectype, 6219 OPTIMIZE_FOR_SPEED)) 6220 { 6221 if (dump_enabled_p ()) 6222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6223 "can't use a fully-masked loop because no" 6224 " conditional operation is available.\n"); 6225 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 6226 } 6227 else 6228 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, 6229 vectype, NULL); 6230 } 6231 6232 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 6233 DUMP_VECT_SCOPE ("vectorizable_operation"); 6234 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec); 6235 return true; 6236 } 6237 6238 /* Transform. */ 6239 6240 if (dump_enabled_p ()) 6241 dump_printf_loc (MSG_NOTE, vect_location, 6242 "transform binary/unary operation.\n"); 6243 6244 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); 6245 6246 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as 6247 vectors with unsigned elements, but the result is signed. So, we 6248 need to compute the MINUS_EXPR into vectype temporary and 6249 VIEW_CONVERT_EXPR it into the final vectype_out result. */ 6250 tree vec_cvt_dest = NULL_TREE; 6251 if (orig_code == POINTER_DIFF_EXPR) 6252 { 6253 vec_dest = vect_create_destination_var (scalar_dest, vectype); 6254 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out); 6255 } 6256 /* Handle def. */ 6257 else 6258 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 6259 6260 /* In case the vectorization factor (VF) is bigger than the number 6261 of elements that we can fit in a vectype (nunits), we have to generate 6262 more than one vector stmt - i.e - we need to "unroll" the 6263 vector stmt by a factor VF/nunits. In doing so, we record a pointer 6264 from one copy of the vector stmt to the next, in the field 6265 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 6266 stages to find the correct vector defs to be used when vectorizing 6267 stmts that use the defs of the current stmt. The example below 6268 illustrates the vectorization process when VF=16 and nunits=4 (i.e., 6269 we need to create 4 vectorized stmts): 6270 6271 before vectorization: 6272 RELATED_STMT VEC_STMT 6273 S1: x = memref - - 6274 S2: z = x + 1 - - 6275 6276 step 1: vectorize stmt S1 (done in vectorizable_load. See more details 6277 there): 6278 RELATED_STMT VEC_STMT 6279 VS1_0: vx0 = memref0 VS1_1 - 6280 VS1_1: vx1 = memref1 VS1_2 - 6281 VS1_2: vx2 = memref2 VS1_3 - 6282 VS1_3: vx3 = memref3 - - 6283 S1: x = load - VS1_0 6284 S2: z = x + 1 - - 6285 6286 step2: vectorize stmt S2 (done here): 6287 To vectorize stmt S2 we first need to find the relevant vector 6288 def for the first operand 'x'. This is, as usual, obtained from 6289 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt 6290 that defines 'x' (S1). This way we find the stmt VS1_0, and the 6291 relevant vector def 'vx0'. Having found 'vx0' we can generate 6292 the vector stmt VS2_0, and as usual, record it in the 6293 STMT_VINFO_VEC_STMT of stmt S2. 6294 When creating the second copy (VS2_1), we obtain the relevant vector 6295 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of 6296 stmt VS1_0. This way we find the stmt VS1_1 and the relevant 6297 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a 6298 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. 6299 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting 6300 chain of stmts and pointers: 6301 RELATED_STMT VEC_STMT 6302 VS1_0: vx0 = memref0 VS1_1 - 6303 VS1_1: vx1 = memref1 VS1_2 - 6304 VS1_2: vx2 = memref2 VS1_3 - 6305 VS1_3: vx3 = memref3 - - 6306 S1: x = load - VS1_0 6307 VS2_0: vz0 = vx0 + v1 VS2_1 - 6308 VS2_1: vz1 = vx1 + v1 VS2_2 - 6309 VS2_2: vz2 = vx2 + v1 VS2_3 - 6310 VS2_3: vz3 = vx3 + v1 - - 6311 S2: z = x + 1 - VS2_0 */ 6312 6313 prev_stmt_info = NULL; 6314 for (j = 0; j < ncopies; j++) 6315 { 6316 /* Handle uses. */ 6317 if (j == 0) 6318 { 6319 if (op_type == binary_op) 6320 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1, 6321 slp_node); 6322 else if (op_type == ternary_op) 6323 { 6324 if (slp_node) 6325 { 6326 auto_vec<vec<tree> > vec_defs(3); 6327 vect_get_slp_defs (slp_node, &vec_defs); 6328 vec_oprnds0 = vec_defs[0]; 6329 vec_oprnds1 = vec_defs[1]; 6330 vec_oprnds2 = vec_defs[2]; 6331 } 6332 else 6333 { 6334 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, 6335 &vec_oprnds1, NULL); 6336 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2, 6337 NULL, NULL); 6338 } 6339 } 6340 else 6341 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL, 6342 slp_node); 6343 } 6344 else 6345 { 6346 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1); 6347 if (op_type == ternary_op) 6348 { 6349 tree vec_oprnd = vec_oprnds2.pop (); 6350 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo, 6351 vec_oprnd)); 6352 } 6353 } 6354 6355 /* Arguments are ready. Create the new vector stmt. */ 6356 stmt_vec_info new_stmt_info = NULL; 6357 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 6358 { 6359 vop1 = ((op_type == binary_op || op_type == ternary_op) 6360 ? vec_oprnds1[i] : NULL_TREE); 6361 vop2 = ((op_type == ternary_op) 6362 ? vec_oprnds2[i] : NULL_TREE); 6363 if (masked_loop_p && reduc_idx >= 0) 6364 { 6365 /* Perform the operation on active elements only and take 6366 inactive elements from the reduction chain input. */ 6367 gcc_assert (!vop2); 6368 vop2 = reduc_idx == 1 ? vop1 : vop0; 6369 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, 6370 vectype, i * ncopies + j); 6371 gcall *call = gimple_build_call_internal (cond_fn, 4, mask, 6372 vop0, vop1, vop2); 6373 new_temp = make_ssa_name (vec_dest, call); 6374 gimple_call_set_lhs (call, new_temp); 6375 gimple_call_set_nothrow (call, true); 6376 new_stmt_info 6377 = vect_finish_stmt_generation (stmt_info, call, gsi); 6378 } 6379 else 6380 { 6381 gassign *new_stmt = gimple_build_assign (vec_dest, code, 6382 vop0, vop1, vop2); 6383 new_temp = make_ssa_name (vec_dest, new_stmt); 6384 gimple_assign_set_lhs (new_stmt, new_temp); 6385 new_stmt_info 6386 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 6387 if (vec_cvt_dest) 6388 { 6389 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); 6390 gassign *new_stmt 6391 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR, 6392 new_temp); 6393 new_temp = make_ssa_name (vec_cvt_dest, new_stmt); 6394 gimple_assign_set_lhs (new_stmt, new_temp); 6395 new_stmt_info 6396 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 6397 } 6398 } 6399 if (slp_node) 6400 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 6401 } 6402 6403 if (slp_node) 6404 continue; 6405 6406 if (j == 0) 6407 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 6408 else 6409 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 6410 prev_stmt_info = new_stmt_info; 6411 } 6412 6413 vec_oprnds0.release (); 6414 vec_oprnds1.release (); 6415 vec_oprnds2.release (); 6416 6417 return true; 6418 } 6419 6420 /* A helper function to ensure data reference DR_INFO's base alignment. */ 6421 6422 static void 6423 ensure_base_align (dr_vec_info *dr_info) 6424 { 6425 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED) 6426 return; 6427 6428 if (dr_info->base_misaligned) 6429 { 6430 tree base_decl = dr_info->base_decl; 6431 6432 // We should only be able to increase the alignment of a base object if 6433 // we know what its new alignment should be at compile time. 6434 unsigned HOST_WIDE_INT align_base_to = 6435 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT; 6436 6437 if (decl_in_symtab_p (base_decl)) 6438 symtab_node::get (base_decl)->increase_alignment (align_base_to); 6439 else if (DECL_ALIGN (base_decl) < align_base_to) 6440 { 6441 SET_DECL_ALIGN (base_decl, align_base_to); 6442 DECL_USER_ALIGN (base_decl) = 1; 6443 } 6444 dr_info->base_misaligned = false; 6445 } 6446 } 6447 6448 6449 /* Function get_group_alias_ptr_type. 6450 6451 Return the alias type for the group starting at FIRST_STMT_INFO. */ 6452 6453 static tree 6454 get_group_alias_ptr_type (stmt_vec_info first_stmt_info) 6455 { 6456 struct data_reference *first_dr, *next_dr; 6457 6458 first_dr = STMT_VINFO_DATA_REF (first_stmt_info); 6459 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info); 6460 while (next_stmt_info) 6461 { 6462 next_dr = STMT_VINFO_DATA_REF (next_stmt_info); 6463 if (get_alias_set (DR_REF (first_dr)) 6464 != get_alias_set (DR_REF (next_dr))) 6465 { 6466 if (dump_enabled_p ()) 6467 dump_printf_loc (MSG_NOTE, vect_location, 6468 "conflicting alias set types.\n"); 6469 return ptr_type_node; 6470 } 6471 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 6472 } 6473 return reference_alias_ptr_type (DR_REF (first_dr)); 6474 } 6475 6476 6477 /* Function scan_operand_equal_p. 6478 6479 Helper function for check_scan_store. Compare two references 6480 with .GOMP_SIMD_LANE bases. */ 6481 6482 static bool 6483 scan_operand_equal_p (tree ref1, tree ref2) 6484 { 6485 tree ref[2] = { ref1, ref2 }; 6486 poly_int64 bitsize[2], bitpos[2]; 6487 tree offset[2], base[2]; 6488 for (int i = 0; i < 2; ++i) 6489 { 6490 machine_mode mode; 6491 int unsignedp, reversep, volatilep = 0; 6492 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i], 6493 &offset[i], &mode, &unsignedp, 6494 &reversep, &volatilep); 6495 if (reversep || volatilep || maybe_ne (bitpos[i], 0)) 6496 return false; 6497 if (TREE_CODE (base[i]) == MEM_REF 6498 && offset[i] == NULL_TREE 6499 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME) 6500 { 6501 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0)); 6502 if (is_gimple_assign (def_stmt) 6503 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR 6504 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR 6505 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME) 6506 { 6507 if (maybe_ne (mem_ref_offset (base[i]), 0)) 6508 return false; 6509 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0); 6510 offset[i] = gimple_assign_rhs2 (def_stmt); 6511 } 6512 } 6513 } 6514 6515 if (!operand_equal_p (base[0], base[1], 0)) 6516 return false; 6517 if (maybe_ne (bitsize[0], bitsize[1])) 6518 return false; 6519 if (offset[0] != offset[1]) 6520 { 6521 if (!offset[0] || !offset[1]) 6522 return false; 6523 if (!operand_equal_p (offset[0], offset[1], 0)) 6524 { 6525 tree step[2]; 6526 for (int i = 0; i < 2; ++i) 6527 { 6528 step[i] = integer_one_node; 6529 if (TREE_CODE (offset[i]) == SSA_NAME) 6530 { 6531 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]); 6532 if (is_gimple_assign (def_stmt) 6533 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR 6534 && (TREE_CODE (gimple_assign_rhs2 (def_stmt)) 6535 == INTEGER_CST)) 6536 { 6537 step[i] = gimple_assign_rhs2 (def_stmt); 6538 offset[i] = gimple_assign_rhs1 (def_stmt); 6539 } 6540 } 6541 else if (TREE_CODE (offset[i]) == MULT_EXPR) 6542 { 6543 step[i] = TREE_OPERAND (offset[i], 1); 6544 offset[i] = TREE_OPERAND (offset[i], 0); 6545 } 6546 tree rhs1 = NULL_TREE; 6547 if (TREE_CODE (offset[i]) == SSA_NAME) 6548 { 6549 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]); 6550 if (gimple_assign_cast_p (def_stmt)) 6551 rhs1 = gimple_assign_rhs1 (def_stmt); 6552 } 6553 else if (CONVERT_EXPR_P (offset[i])) 6554 rhs1 = TREE_OPERAND (offset[i], 0); 6555 if (rhs1 6556 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) 6557 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i])) 6558 && (TYPE_PRECISION (TREE_TYPE (offset[i])) 6559 >= TYPE_PRECISION (TREE_TYPE (rhs1)))) 6560 offset[i] = rhs1; 6561 } 6562 if (!operand_equal_p (offset[0], offset[1], 0) 6563 || !operand_equal_p (step[0], step[1], 0)) 6564 return false; 6565 } 6566 } 6567 return true; 6568 } 6569 6570 6571 enum scan_store_kind { 6572 /* Normal permutation. */ 6573 scan_store_kind_perm, 6574 6575 /* Whole vector left shift permutation with zero init. */ 6576 scan_store_kind_lshift_zero, 6577 6578 /* Whole vector left shift permutation and VEC_COND_EXPR. */ 6579 scan_store_kind_lshift_cond 6580 }; 6581 6582 /* Function check_scan_store. 6583 6584 Verify if we can perform the needed permutations or whole vector shifts. 6585 Return -1 on failure, otherwise exact log2 of vectype's nunits. 6586 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation 6587 to do at each step. */ 6588 6589 static int 6590 scan_store_can_perm_p (tree vectype, tree init, 6591 vec<enum scan_store_kind> *use_whole_vector = NULL) 6592 { 6593 enum machine_mode vec_mode = TYPE_MODE (vectype); 6594 unsigned HOST_WIDE_INT nunits; 6595 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) 6596 return -1; 6597 int units_log2 = exact_log2 (nunits); 6598 if (units_log2 <= 0) 6599 return -1; 6600 6601 int i; 6602 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm; 6603 for (i = 0; i <= units_log2; ++i) 6604 { 6605 unsigned HOST_WIDE_INT j, k; 6606 enum scan_store_kind kind = scan_store_kind_perm; 6607 vec_perm_builder sel (nunits, nunits, 1); 6608 sel.quick_grow (nunits); 6609 if (i == units_log2) 6610 { 6611 for (j = 0; j < nunits; ++j) 6612 sel[j] = nunits - 1; 6613 } 6614 else 6615 { 6616 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) 6617 sel[j] = j; 6618 for (k = 0; j < nunits; ++j, ++k) 6619 sel[j] = nunits + k; 6620 } 6621 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); 6622 if (!can_vec_perm_const_p (vec_mode, indices)) 6623 { 6624 if (i == units_log2) 6625 return -1; 6626 6627 if (whole_vector_shift_kind == scan_store_kind_perm) 6628 { 6629 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing) 6630 return -1; 6631 whole_vector_shift_kind = scan_store_kind_lshift_zero; 6632 /* Whole vector shifts shift in zeros, so if init is all zero 6633 constant, there is no need to do anything further. */ 6634 if ((TREE_CODE (init) != INTEGER_CST 6635 && TREE_CODE (init) != REAL_CST) 6636 || !initializer_zerop (init)) 6637 { 6638 tree masktype = truth_type_for (vectype); 6639 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST)) 6640 return -1; 6641 whole_vector_shift_kind = scan_store_kind_lshift_cond; 6642 } 6643 } 6644 kind = whole_vector_shift_kind; 6645 } 6646 if (use_whole_vector) 6647 { 6648 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ()) 6649 use_whole_vector->safe_grow_cleared (i); 6650 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ()) 6651 use_whole_vector->safe_push (kind); 6652 } 6653 } 6654 6655 return units_log2; 6656 } 6657 6658 6659 /* Function check_scan_store. 6660 6661 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */ 6662 6663 static bool 6664 check_scan_store (stmt_vec_info stmt_info, tree vectype, 6665 enum vect_def_type rhs_dt, bool slp, tree mask, 6666 vect_memory_access_type memory_access_type) 6667 { 6668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 6669 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 6670 tree ref_type; 6671 6672 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1); 6673 if (slp 6674 || mask 6675 || memory_access_type != VMAT_CONTIGUOUS 6676 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR 6677 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0)) 6678 || loop_vinfo == NULL 6679 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 6680 || STMT_VINFO_GROUPED_ACCESS (stmt_info) 6681 || !integer_zerop (get_dr_vinfo_offset (dr_info)) 6682 || !integer_zerop (DR_INIT (dr_info->dr)) 6683 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr))) 6684 || !alias_sets_conflict_p (get_alias_set (vectype), 6685 get_alias_set (TREE_TYPE (ref_type)))) 6686 { 6687 if (dump_enabled_p ()) 6688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6689 "unsupported OpenMP scan store.\n"); 6690 return false; 6691 } 6692 6693 /* We need to pattern match code built by OpenMP lowering and simplified 6694 by following optimizations into something we can handle. 6695 #pragma omp simd reduction(inscan,+:r) 6696 for (...) 6697 { 6698 r += something (); 6699 #pragma omp scan inclusive (r) 6700 use (r); 6701 } 6702 shall have body with: 6703 // Initialization for input phase, store the reduction initializer: 6704 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0); 6705 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1); 6706 D.2042[_21] = 0; 6707 // Actual input phase: 6708 ... 6709 r.0_5 = D.2042[_20]; 6710 _6 = _4 + r.0_5; 6711 D.2042[_20] = _6; 6712 // Initialization for scan phase: 6713 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2); 6714 _26 = D.2043[_25]; 6715 _27 = D.2042[_25]; 6716 _28 = _26 + _27; 6717 D.2043[_25] = _28; 6718 D.2042[_25] = _28; 6719 // Actual scan phase: 6720 ... 6721 r.1_8 = D.2042[_20]; 6722 ... 6723 The "omp simd array" variable D.2042 holds the privatized copy used 6724 inside of the loop and D.2043 is another one that holds copies of 6725 the current original list item. The separate GOMP_SIMD_LANE ifn 6726 kinds are there in order to allow optimizing the initializer store 6727 and combiner sequence, e.g. if it is originally some C++ish user 6728 defined reduction, but allow the vectorizer to pattern recognize it 6729 and turn into the appropriate vectorized scan. 6730 6731 For exclusive scan, this is slightly different: 6732 #pragma omp simd reduction(inscan,+:r) 6733 for (...) 6734 { 6735 use (r); 6736 #pragma omp scan exclusive (r) 6737 r += something (); 6738 } 6739 shall have body with: 6740 // Initialization for input phase, store the reduction initializer: 6741 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0); 6742 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1); 6743 D.2042[_21] = 0; 6744 // Actual input phase: 6745 ... 6746 r.0_5 = D.2042[_20]; 6747 _6 = _4 + r.0_5; 6748 D.2042[_20] = _6; 6749 // Initialization for scan phase: 6750 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3); 6751 _26 = D.2043[_25]; 6752 D.2044[_25] = _26; 6753 _27 = D.2042[_25]; 6754 _28 = _26 + _27; 6755 D.2043[_25] = _28; 6756 // Actual scan phase: 6757 ... 6758 r.1_8 = D.2044[_20]; 6759 ... */ 6760 6761 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2) 6762 { 6763 /* Match the D.2042[_21] = 0; store above. Just require that 6764 it is a constant or external definition store. */ 6765 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def) 6766 { 6767 fail_init: 6768 if (dump_enabled_p ()) 6769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6770 "unsupported OpenMP scan initializer store.\n"); 6771 return false; 6772 } 6773 6774 if (! loop_vinfo->scan_map) 6775 loop_vinfo->scan_map = new hash_map<tree, tree>; 6776 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); 6777 tree &cached = loop_vinfo->scan_map->get_or_insert (var); 6778 if (cached) 6779 goto fail_init; 6780 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info)); 6781 6782 /* These stores can be vectorized normally. */ 6783 return true; 6784 } 6785 6786 if (rhs_dt != vect_internal_def) 6787 { 6788 fail: 6789 if (dump_enabled_p ()) 6790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6791 "unsupported OpenMP scan combiner pattern.\n"); 6792 return false; 6793 } 6794 6795 gimple *stmt = STMT_VINFO_STMT (stmt_info); 6796 tree rhs = gimple_assign_rhs1 (stmt); 6797 if (TREE_CODE (rhs) != SSA_NAME) 6798 goto fail; 6799 6800 gimple *other_store_stmt = NULL; 6801 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); 6802 bool inscan_var_store 6803 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; 6804 6805 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) 6806 { 6807 if (!inscan_var_store) 6808 { 6809 use_operand_p use_p; 6810 imm_use_iterator iter; 6811 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) 6812 { 6813 gimple *use_stmt = USE_STMT (use_p); 6814 if (use_stmt == stmt || is_gimple_debug (use_stmt)) 6815 continue; 6816 if (gimple_bb (use_stmt) != gimple_bb (stmt) 6817 || !is_gimple_assign (use_stmt) 6818 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS 6819 || other_store_stmt 6820 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME) 6821 goto fail; 6822 other_store_stmt = use_stmt; 6823 } 6824 if (other_store_stmt == NULL) 6825 goto fail; 6826 rhs = gimple_assign_lhs (other_store_stmt); 6827 if (!single_imm_use (rhs, &use_p, &other_store_stmt)) 6828 goto fail; 6829 } 6830 } 6831 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) 6832 { 6833 use_operand_p use_p; 6834 imm_use_iterator iter; 6835 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) 6836 { 6837 gimple *use_stmt = USE_STMT (use_p); 6838 if (use_stmt == stmt || is_gimple_debug (use_stmt)) 6839 continue; 6840 if (other_store_stmt) 6841 goto fail; 6842 other_store_stmt = use_stmt; 6843 } 6844 } 6845 else 6846 goto fail; 6847 6848 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); 6849 if (gimple_bb (def_stmt) != gimple_bb (stmt) 6850 || !is_gimple_assign (def_stmt) 6851 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS) 6852 goto fail; 6853 6854 enum tree_code code = gimple_assign_rhs_code (def_stmt); 6855 /* For pointer addition, we should use the normal plus for the vector 6856 operation. */ 6857 switch (code) 6858 { 6859 case POINTER_PLUS_EXPR: 6860 code = PLUS_EXPR; 6861 break; 6862 case MULT_HIGHPART_EXPR: 6863 goto fail; 6864 default: 6865 break; 6866 } 6867 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code)) 6868 goto fail; 6869 6870 tree rhs1 = gimple_assign_rhs1 (def_stmt); 6871 tree rhs2 = gimple_assign_rhs2 (def_stmt); 6872 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME) 6873 goto fail; 6874 6875 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); 6876 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2); 6877 if (gimple_bb (load1_stmt) != gimple_bb (stmt) 6878 || !gimple_assign_load_p (load1_stmt) 6879 || gimple_bb (load2_stmt) != gimple_bb (stmt) 6880 || !gimple_assign_load_p (load2_stmt)) 6881 goto fail; 6882 6883 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt); 6884 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); 6885 if (load1_stmt_info == NULL 6886 || load2_stmt_info == NULL 6887 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) 6888 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)) 6889 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) 6890 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) 6891 goto fail; 6892 6893 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store) 6894 { 6895 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info); 6896 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR 6897 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0))) 6898 goto fail; 6899 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0); 6900 tree lrhs; 6901 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) 6902 lrhs = rhs1; 6903 else 6904 lrhs = rhs2; 6905 use_operand_p use_p; 6906 imm_use_iterator iter; 6907 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs) 6908 { 6909 gimple *use_stmt = USE_STMT (use_p); 6910 if (use_stmt == def_stmt || is_gimple_debug (use_stmt)) 6911 continue; 6912 if (other_store_stmt) 6913 goto fail; 6914 other_store_stmt = use_stmt; 6915 } 6916 } 6917 6918 if (other_store_stmt == NULL) 6919 goto fail; 6920 if (gimple_bb (other_store_stmt) != gimple_bb (stmt) 6921 || !gimple_store_p (other_store_stmt)) 6922 goto fail; 6923 6924 stmt_vec_info other_store_stmt_info 6925 = loop_vinfo->lookup_stmt (other_store_stmt); 6926 if (other_store_stmt_info == NULL 6927 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) 6928 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) 6929 goto fail; 6930 6931 gimple *stmt1 = stmt; 6932 gimple *stmt2 = other_store_stmt; 6933 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) 6934 std::swap (stmt1, stmt2); 6935 if (scan_operand_equal_p (gimple_assign_lhs (stmt1), 6936 gimple_assign_rhs1 (load2_stmt))) 6937 { 6938 std::swap (rhs1, rhs2); 6939 std::swap (load1_stmt, load2_stmt); 6940 std::swap (load1_stmt_info, load2_stmt_info); 6941 } 6942 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1), 6943 gimple_assign_rhs1 (load1_stmt))) 6944 goto fail; 6945 6946 tree var3 = NULL_TREE; 6947 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3 6948 && !scan_operand_equal_p (gimple_assign_lhs (stmt2), 6949 gimple_assign_rhs1 (load2_stmt))) 6950 goto fail; 6951 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) 6952 { 6953 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info); 6954 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR 6955 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0))) 6956 goto fail; 6957 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0); 6958 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3)) 6959 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3)) 6960 || lookup_attribute ("omp simd inscan exclusive", 6961 DECL_ATTRIBUTES (var3))) 6962 goto fail; 6963 } 6964 6965 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info); 6966 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR 6967 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0))) 6968 goto fail; 6969 6970 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); 6971 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0); 6972 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1)) 6973 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2)) 6974 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) 6975 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2)))) 6976 goto fail; 6977 6978 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) 6979 std::swap (var1, var2); 6980 6981 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) 6982 { 6983 if (!lookup_attribute ("omp simd inscan exclusive", 6984 DECL_ATTRIBUTES (var1))) 6985 goto fail; 6986 var1 = var3; 6987 } 6988 6989 if (loop_vinfo->scan_map == NULL) 6990 goto fail; 6991 tree *init = loop_vinfo->scan_map->get (var1); 6992 if (init == NULL) 6993 goto fail; 6994 6995 /* The IL is as expected, now check if we can actually vectorize it. 6996 Inclusive scan: 6997 _26 = D.2043[_25]; 6998 _27 = D.2042[_25]; 6999 _28 = _26 + _27; 7000 D.2043[_25] = _28; 7001 D.2042[_25] = _28; 7002 should be vectorized as (where _40 is the vectorized rhs 7003 from the D.2042[_21] = 0; store): 7004 _30 = MEM <vector(8) int> [(int *)&D.2043]; 7005 _31 = MEM <vector(8) int> [(int *)&D.2042]; 7006 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; 7007 _33 = _31 + _32; 7008 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] }; 7009 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>; 7010 _35 = _33 + _34; 7011 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], 7012 // _31[1]+.._31[4], ... _31[4]+.._31[7] }; 7013 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>; 7014 _37 = _35 + _36; 7015 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], 7016 // _31[0]+.._31[4], ... _31[0]+.._31[7] }; 7017 _38 = _30 + _37; 7018 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>; 7019 MEM <vector(8) int> [(int *)&D.2043] = _39; 7020 MEM <vector(8) int> [(int *)&D.2042] = _38; 7021 Exclusive scan: 7022 _26 = D.2043[_25]; 7023 D.2044[_25] = _26; 7024 _27 = D.2042[_25]; 7025 _28 = _26 + _27; 7026 D.2043[_25] = _28; 7027 should be vectorized as (where _40 is the vectorized rhs 7028 from the D.2042[_21] = 0; store): 7029 _30 = MEM <vector(8) int> [(int *)&D.2043]; 7030 _31 = MEM <vector(8) int> [(int *)&D.2042]; 7031 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; 7032 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>; 7033 _34 = _32 + _33; 7034 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3], 7035 // _31[3]+_31[4], ... _31[5]+.._31[6] }; 7036 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>; 7037 _36 = _34 + _35; 7038 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], 7039 // _31[1]+.._31[4], ... _31[3]+.._31[6] }; 7040 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>; 7041 _38 = _36 + _37; 7042 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], 7043 // _31[0]+.._31[4], ... _31[0]+.._31[6] }; 7044 _39 = _30 + _38; 7045 _50 = _31 + _39; 7046 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>; 7047 MEM <vector(8) int> [(int *)&D.2044] = _39; 7048 MEM <vector(8) int> [(int *)&D.2042] = _51; */ 7049 enum machine_mode vec_mode = TYPE_MODE (vectype); 7050 optab optab = optab_for_tree_code (code, vectype, optab_default); 7051 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing) 7052 goto fail; 7053 7054 int units_log2 = scan_store_can_perm_p (vectype, *init); 7055 if (units_log2 == -1) 7056 goto fail; 7057 7058 return true; 7059 } 7060 7061 7062 /* Function vectorizable_scan_store. 7063 7064 Helper of vectorizable_score, arguments like on vectorizable_store. 7065 Handle only the transformation, checking is done in check_scan_store. */ 7066 7067 static bool 7068 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 7069 stmt_vec_info *vec_stmt, int ncopies) 7070 { 7071 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 7072 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); 7073 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)); 7074 vec_info *vinfo = stmt_info->vinfo; 7075 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 7076 7077 if (dump_enabled_p ()) 7078 dump_printf_loc (MSG_NOTE, vect_location, 7079 "transform scan store. ncopies = %d\n", ncopies); 7080 7081 gimple *stmt = STMT_VINFO_STMT (stmt_info); 7082 tree rhs = gimple_assign_rhs1 (stmt); 7083 gcc_assert (TREE_CODE (rhs) == SSA_NAME); 7084 7085 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); 7086 bool inscan_var_store 7087 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; 7088 7089 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) 7090 { 7091 use_operand_p use_p; 7092 imm_use_iterator iter; 7093 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) 7094 { 7095 gimple *use_stmt = USE_STMT (use_p); 7096 if (use_stmt == stmt || is_gimple_debug (use_stmt)) 7097 continue; 7098 rhs = gimple_assign_lhs (use_stmt); 7099 break; 7100 } 7101 } 7102 7103 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); 7104 enum tree_code code = gimple_assign_rhs_code (def_stmt); 7105 if (code == POINTER_PLUS_EXPR) 7106 code = PLUS_EXPR; 7107 gcc_assert (TREE_CODE_LENGTH (code) == binary_op 7108 && commutative_tree_code (code)); 7109 tree rhs1 = gimple_assign_rhs1 (def_stmt); 7110 tree rhs2 = gimple_assign_rhs2 (def_stmt); 7111 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME); 7112 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); 7113 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2); 7114 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt); 7115 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); 7116 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info); 7117 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info); 7118 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0); 7119 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0); 7120 7121 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) 7122 { 7123 std::swap (rhs1, rhs2); 7124 std::swap (var1, var2); 7125 std::swap (load1_dr_info, load2_dr_info); 7126 } 7127 7128 tree *init = loop_vinfo->scan_map->get (var1); 7129 gcc_assert (init); 7130 7131 unsigned HOST_WIDE_INT nunits; 7132 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) 7133 gcc_unreachable (); 7134 auto_vec<enum scan_store_kind, 16> use_whole_vector; 7135 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector); 7136 gcc_assert (units_log2 > 0); 7137 auto_vec<tree, 16> perms; 7138 perms.quick_grow (units_log2 + 1); 7139 tree zero_vec = NULL_TREE, masktype = NULL_TREE; 7140 for (int i = 0; i <= units_log2; ++i) 7141 { 7142 unsigned HOST_WIDE_INT j, k; 7143 vec_perm_builder sel (nunits, nunits, 1); 7144 sel.quick_grow (nunits); 7145 if (i == units_log2) 7146 for (j = 0; j < nunits; ++j) 7147 sel[j] = nunits - 1; 7148 else 7149 { 7150 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) 7151 sel[j] = j; 7152 for (k = 0; j < nunits; ++j, ++k) 7153 sel[j] = nunits + k; 7154 } 7155 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); 7156 if (!use_whole_vector.is_empty () 7157 && use_whole_vector[i] != scan_store_kind_perm) 7158 { 7159 if (zero_vec == NULL_TREE) 7160 zero_vec = build_zero_cst (vectype); 7161 if (masktype == NULL_TREE 7162 && use_whole_vector[i] == scan_store_kind_lshift_cond) 7163 masktype = truth_type_for (vectype); 7164 perms[i] = vect_gen_perm_mask_any (vectype, indices); 7165 } 7166 else 7167 perms[i] = vect_gen_perm_mask_checked (vectype, indices); 7168 } 7169 7170 stmt_vec_info prev_stmt_info = NULL; 7171 tree vec_oprnd1 = NULL_TREE; 7172 tree vec_oprnd2 = NULL_TREE; 7173 tree vec_oprnd3 = NULL_TREE; 7174 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr); 7175 tree dataref_offset = build_int_cst (ref_type, 0); 7176 tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS); 7177 tree ldataref_ptr = NULL_TREE; 7178 tree orig = NULL_TREE; 7179 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) 7180 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr); 7181 for (int j = 0; j < ncopies; j++) 7182 { 7183 stmt_vec_info new_stmt_info; 7184 if (j == 0) 7185 { 7186 vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info); 7187 if (ldataref_ptr == NULL) 7188 vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); 7189 vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info); 7190 orig = vec_oprnd3; 7191 } 7192 else 7193 { 7194 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1); 7195 if (ldataref_ptr == NULL) 7196 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); 7197 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3); 7198 if (!inscan_var_store) 7199 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); 7200 } 7201 7202 if (ldataref_ptr) 7203 { 7204 vec_oprnd2 = make_ssa_name (vectype); 7205 tree data_ref = fold_build2 (MEM_REF, vectype, 7206 unshare_expr (ldataref_ptr), 7207 dataref_offset); 7208 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr)); 7209 gimple *g = gimple_build_assign (vec_oprnd2, data_ref); 7210 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); 7211 if (prev_stmt_info == NULL) 7212 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 7213 else 7214 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7215 prev_stmt_info = new_stmt_info; 7216 } 7217 7218 tree v = vec_oprnd2; 7219 for (int i = 0; i < units_log2; ++i) 7220 { 7221 tree new_temp = make_ssa_name (vectype); 7222 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, 7223 (zero_vec 7224 && (use_whole_vector[i] 7225 != scan_store_kind_perm)) 7226 ? zero_vec : vec_oprnd1, v, 7227 perms[i]); 7228 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); 7229 if (prev_stmt_info == NULL) 7230 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 7231 else 7232 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7233 prev_stmt_info = new_stmt_info; 7234 7235 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond) 7236 { 7237 /* Whole vector shift shifted in zero bits, but if *init 7238 is not initializer_zerop, we need to replace those elements 7239 with elements from vec_oprnd1. */ 7240 tree_vector_builder vb (masktype, nunits, 1); 7241 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k) 7242 vb.quick_push (k < (HOST_WIDE_INT_1U << i) 7243 ? boolean_false_node : boolean_true_node); 7244 7245 tree new_temp2 = make_ssa_name (vectype); 7246 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (), 7247 new_temp, vec_oprnd1); 7248 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); 7249 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7250 prev_stmt_info = new_stmt_info; 7251 new_temp = new_temp2; 7252 } 7253 7254 /* For exclusive scan, perform the perms[i] permutation once 7255 more. */ 7256 if (i == 0 7257 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 7258 && v == vec_oprnd2) 7259 { 7260 v = new_temp; 7261 --i; 7262 continue; 7263 } 7264 7265 tree new_temp2 = make_ssa_name (vectype); 7266 g = gimple_build_assign (new_temp2, code, v, new_temp); 7267 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); 7268 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7269 prev_stmt_info = new_stmt_info; 7270 7271 v = new_temp2; 7272 } 7273 7274 tree new_temp = make_ssa_name (vectype); 7275 gimple *g = gimple_build_assign (new_temp, code, orig, v); 7276 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); 7277 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7278 prev_stmt_info = new_stmt_info; 7279 7280 tree last_perm_arg = new_temp; 7281 /* For exclusive scan, new_temp computed above is the exclusive scan 7282 prefix sum. Turn it into inclusive prefix sum for the broadcast 7283 of the last element into orig. */ 7284 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) 7285 { 7286 last_perm_arg = make_ssa_name (vectype); 7287 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2); 7288 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); 7289 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7290 prev_stmt_info = new_stmt_info; 7291 } 7292 7293 orig = make_ssa_name (vectype); 7294 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg, 7295 last_perm_arg, perms[units_log2]); 7296 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); 7297 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7298 prev_stmt_info = new_stmt_info; 7299 7300 if (!inscan_var_store) 7301 { 7302 tree data_ref = fold_build2 (MEM_REF, vectype, 7303 unshare_expr (dataref_ptr), 7304 dataref_offset); 7305 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); 7306 g = gimple_build_assign (data_ref, new_temp); 7307 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); 7308 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7309 prev_stmt_info = new_stmt_info; 7310 } 7311 } 7312 7313 if (inscan_var_store) 7314 for (int j = 0; j < ncopies; j++) 7315 { 7316 if (j != 0) 7317 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); 7318 7319 tree data_ref = fold_build2 (MEM_REF, vectype, 7320 unshare_expr (dataref_ptr), 7321 dataref_offset); 7322 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); 7323 gimple *g = gimple_build_assign (data_ref, orig); 7324 stmt_vec_info new_stmt_info 7325 = vect_finish_stmt_generation (stmt_info, g, gsi); 7326 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7327 prev_stmt_info = new_stmt_info; 7328 } 7329 return true; 7330 } 7331 7332 7333 /* Function vectorizable_store. 7334 7335 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure) 7336 that can be vectorized. 7337 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 7338 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 7339 Return true if STMT_INFO is vectorizable in this way. */ 7340 7341 static bool 7342 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 7343 stmt_vec_info *vec_stmt, slp_tree slp_node, 7344 stmt_vector_for_cost *cost_vec) 7345 { 7346 tree data_ref; 7347 tree op; 7348 tree vec_oprnd = NULL_TREE; 7349 tree elem_type; 7350 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 7351 class loop *loop = NULL; 7352 machine_mode vec_mode; 7353 tree dummy; 7354 enum dr_alignment_support alignment_support_scheme; 7355 enum vect_def_type rhs_dt = vect_unknown_def_type; 7356 enum vect_def_type mask_dt = vect_unknown_def_type; 7357 stmt_vec_info prev_stmt_info = NULL; 7358 tree dataref_ptr = NULL_TREE; 7359 tree dataref_offset = NULL_TREE; 7360 gimple *ptr_incr = NULL; 7361 int ncopies; 7362 int j; 7363 stmt_vec_info first_stmt_info; 7364 bool grouped_store; 7365 unsigned int group_size, i; 7366 vec<tree> oprnds = vNULL; 7367 vec<tree> result_chain = vNULL; 7368 tree offset = NULL_TREE; 7369 vec<tree> vec_oprnds = vNULL; 7370 bool slp = (slp_node != NULL); 7371 unsigned int vec_num; 7372 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 7373 vec_info *vinfo = stmt_info->vinfo; 7374 tree aggr_type; 7375 gather_scatter_info gs_info; 7376 poly_uint64 vf; 7377 vec_load_store_type vls_type; 7378 tree ref_type; 7379 7380 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 7381 return false; 7382 7383 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 7384 && ! vec_stmt) 7385 return false; 7386 7387 /* Is vectorizable store? */ 7388 7389 tree mask = NULL_TREE, mask_vectype = NULL_TREE; 7390 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) 7391 { 7392 tree scalar_dest = gimple_assign_lhs (assign); 7393 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR 7394 && is_pattern_stmt_p (stmt_info)) 7395 scalar_dest = TREE_OPERAND (scalar_dest, 0); 7396 if (TREE_CODE (scalar_dest) != ARRAY_REF 7397 && TREE_CODE (scalar_dest) != BIT_FIELD_REF 7398 && TREE_CODE (scalar_dest) != INDIRECT_REF 7399 && TREE_CODE (scalar_dest) != COMPONENT_REF 7400 && TREE_CODE (scalar_dest) != IMAGPART_EXPR 7401 && TREE_CODE (scalar_dest) != REALPART_EXPR 7402 && TREE_CODE (scalar_dest) != MEM_REF) 7403 return false; 7404 } 7405 else 7406 { 7407 gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 7408 if (!call || !gimple_call_internal_p (call)) 7409 return false; 7410 7411 internal_fn ifn = gimple_call_internal_fn (call); 7412 if (!internal_store_fn_p (ifn)) 7413 return false; 7414 7415 if (slp_node != NULL) 7416 { 7417 if (dump_enabled_p ()) 7418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7419 "SLP of masked stores not supported.\n"); 7420 return false; 7421 } 7422 7423 int mask_index = internal_fn_mask_index (ifn); 7424 if (mask_index >= 0) 7425 { 7426 mask = gimple_call_arg (call, mask_index); 7427 if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt, 7428 &mask_vectype)) 7429 return false; 7430 } 7431 } 7432 7433 op = vect_get_store_rhs (stmt_info); 7434 7435 /* Cannot have hybrid store SLP -- that would mean storing to the 7436 same location twice. */ 7437 gcc_assert (slp == PURE_SLP_STMT (stmt_info)); 7438 7439 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE; 7440 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 7441 7442 if (loop_vinfo) 7443 { 7444 loop = LOOP_VINFO_LOOP (loop_vinfo); 7445 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 7446 } 7447 else 7448 vf = 1; 7449 7450 /* Multiple types in SLP are handled by creating the appropriate number of 7451 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 7452 case of SLP. */ 7453 if (slp) 7454 ncopies = 1; 7455 else 7456 ncopies = vect_get_num_copies (loop_vinfo, vectype); 7457 7458 gcc_assert (ncopies >= 1); 7459 7460 /* FORNOW. This restriction should be relaxed. */ 7461 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1) 7462 { 7463 if (dump_enabled_p ()) 7464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7465 "multiple types in nested loop.\n"); 7466 return false; 7467 } 7468 7469 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type)) 7470 return false; 7471 7472 elem_type = TREE_TYPE (vectype); 7473 vec_mode = TYPE_MODE (vectype); 7474 7475 if (!STMT_VINFO_DATA_REF (stmt_info)) 7476 return false; 7477 7478 vect_memory_access_type memory_access_type; 7479 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies, 7480 &memory_access_type, &gs_info)) 7481 return false; 7482 7483 if (mask) 7484 { 7485 if (memory_access_type == VMAT_CONTIGUOUS) 7486 { 7487 if (!VECTOR_MODE_P (vec_mode) 7488 || !can_vec_mask_load_store_p (vec_mode, 7489 TYPE_MODE (mask_vectype), false)) 7490 return false; 7491 } 7492 else if (memory_access_type != VMAT_LOAD_STORE_LANES 7493 && (memory_access_type != VMAT_GATHER_SCATTER 7494 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype)))) 7495 { 7496 if (dump_enabled_p ()) 7497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7498 "unsupported access type for masked store.\n"); 7499 return false; 7500 } 7501 } 7502 else 7503 { 7504 /* FORNOW. In some cases can vectorize even if data-type not supported 7505 (e.g. - array initialization with 0). */ 7506 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) 7507 return false; 7508 } 7509 7510 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL; 7511 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info) 7512 && memory_access_type != VMAT_GATHER_SCATTER 7513 && (slp || memory_access_type != VMAT_CONTIGUOUS)); 7514 if (grouped_store) 7515 { 7516 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 7517 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 7518 group_size = DR_GROUP_SIZE (first_stmt_info); 7519 } 7520 else 7521 { 7522 first_stmt_info = stmt_info; 7523 first_dr_info = dr_info; 7524 group_size = vec_num = 1; 7525 } 7526 7527 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt) 7528 { 7529 if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask, 7530 memory_access_type)) 7531 return false; 7532 } 7533 7534 if (!vec_stmt) /* transformation not required. */ 7535 { 7536 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; 7537 7538 if (loop_vinfo 7539 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) 7540 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size, 7541 memory_access_type, &gs_info, mask); 7542 7543 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 7544 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type, 7545 vls_type, slp_node, cost_vec); 7546 return true; 7547 } 7548 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); 7549 7550 /* Transform. */ 7551 7552 ensure_base_align (dr_info); 7553 7554 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) 7555 { 7556 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src; 7557 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl)); 7558 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; 7559 tree ptr, var, scale, vec_mask; 7560 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE; 7561 tree mask_halfvectype = mask_vectype; 7562 edge pe = loop_preheader_edge (loop); 7563 gimple_seq seq; 7564 basic_block new_bb; 7565 enum { NARROW, NONE, WIDEN } modifier; 7566 poly_uint64 scatter_off_nunits 7567 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); 7568 7569 if (known_eq (nunits, scatter_off_nunits)) 7570 modifier = NONE; 7571 else if (known_eq (nunits * 2, scatter_off_nunits)) 7572 { 7573 modifier = WIDEN; 7574 7575 /* Currently gathers and scatters are only supported for 7576 fixed-length vectors. */ 7577 unsigned int count = scatter_off_nunits.to_constant (); 7578 vec_perm_builder sel (count, count, 1); 7579 for (i = 0; i < (unsigned int) count; ++i) 7580 sel.quick_push (i | (count / 2)); 7581 7582 vec_perm_indices indices (sel, 1, count); 7583 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, 7584 indices); 7585 gcc_assert (perm_mask != NULL_TREE); 7586 } 7587 else if (known_eq (nunits, scatter_off_nunits * 2)) 7588 { 7589 modifier = NARROW; 7590 7591 /* Currently gathers and scatters are only supported for 7592 fixed-length vectors. */ 7593 unsigned int count = nunits.to_constant (); 7594 vec_perm_builder sel (count, count, 1); 7595 for (i = 0; i < (unsigned int) count; ++i) 7596 sel.quick_push (i | (count / 2)); 7597 7598 vec_perm_indices indices (sel, 2, count); 7599 perm_mask = vect_gen_perm_mask_checked (vectype, indices); 7600 gcc_assert (perm_mask != NULL_TREE); 7601 ncopies *= 2; 7602 7603 if (mask) 7604 mask_halfvectype = truth_type_for (gs_info.offset_vectype); 7605 } 7606 else 7607 gcc_unreachable (); 7608 7609 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl)); 7610 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 7611 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 7612 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 7613 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 7614 scaletype = TREE_VALUE (arglist); 7615 7616 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE 7617 && TREE_CODE (rettype) == VOID_TYPE); 7618 7619 ptr = fold_convert (ptrtype, gs_info.base); 7620 if (!is_gimple_min_invariant (ptr)) 7621 { 7622 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 7623 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 7624 gcc_assert (!new_bb); 7625 } 7626 7627 if (mask == NULL_TREE) 7628 { 7629 mask_arg = build_int_cst (masktype, -1); 7630 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL); 7631 } 7632 7633 scale = build_int_cst (scaletype, gs_info.scale); 7634 7635 prev_stmt_info = NULL; 7636 for (j = 0; j < ncopies; ++j) 7637 { 7638 if (j == 0) 7639 { 7640 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info); 7641 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset, 7642 stmt_info); 7643 if (mask) 7644 { 7645 tree mask_vectype = truth_type_for (vectype); 7646 mask_op = vec_mask 7647 = vect_get_vec_def_for_operand (mask, 7648 stmt_info, mask_vectype); 7649 } 7650 } 7651 else if (modifier != NONE && (j & 1)) 7652 { 7653 if (modifier == WIDEN) 7654 { 7655 src 7656 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, 7657 vec_oprnd1); 7658 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask, 7659 stmt_info, gsi); 7660 if (mask) 7661 mask_op 7662 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, 7663 vec_mask); 7664 } 7665 else if (modifier == NARROW) 7666 { 7667 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask, 7668 stmt_info, gsi); 7669 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, 7670 vec_oprnd0); 7671 } 7672 else 7673 gcc_unreachable (); 7674 } 7675 else 7676 { 7677 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, 7678 vec_oprnd1); 7679 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, 7680 vec_oprnd0); 7681 if (mask) 7682 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, 7683 vec_mask); 7684 } 7685 7686 if (!useless_type_conversion_p (srctype, TREE_TYPE (src))) 7687 { 7688 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)), 7689 TYPE_VECTOR_SUBPARTS (srctype))); 7690 var = vect_get_new_ssa_name (srctype, vect_simple_var); 7691 src = build1 (VIEW_CONVERT_EXPR, srctype, src); 7692 gassign *new_stmt 7693 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src); 7694 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 7695 src = var; 7696 } 7697 7698 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 7699 { 7700 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)), 7701 TYPE_VECTOR_SUBPARTS (idxtype))); 7702 var = vect_get_new_ssa_name (idxtype, vect_simple_var); 7703 op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 7704 gassign *new_stmt 7705 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 7706 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 7707 op = var; 7708 } 7709 7710 if (mask) 7711 { 7712 tree utype; 7713 mask_arg = mask_op; 7714 if (modifier == NARROW) 7715 { 7716 var = vect_get_new_ssa_name (mask_halfvectype, 7717 vect_simple_var); 7718 gassign *new_stmt 7719 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR 7720 : VEC_UNPACK_LO_EXPR, 7721 mask_op); 7722 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 7723 mask_arg = var; 7724 } 7725 tree optype = TREE_TYPE (mask_arg); 7726 if (TYPE_MODE (masktype) == TYPE_MODE (optype)) 7727 utype = masktype; 7728 else 7729 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1); 7730 var = vect_get_new_ssa_name (utype, vect_scalar_var); 7731 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg); 7732 gassign *new_stmt 7733 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg); 7734 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 7735 mask_arg = var; 7736 if (!useless_type_conversion_p (masktype, utype)) 7737 { 7738 gcc_assert (TYPE_PRECISION (utype) 7739 <= TYPE_PRECISION (masktype)); 7740 var = vect_get_new_ssa_name (masktype, vect_scalar_var); 7741 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg); 7742 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 7743 mask_arg = var; 7744 } 7745 } 7746 7747 gcall *new_stmt 7748 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale); 7749 stmt_vec_info new_stmt_info 7750 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 7751 7752 if (prev_stmt_info == NULL) 7753 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 7754 else 7755 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 7756 prev_stmt_info = new_stmt_info; 7757 } 7758 return true; 7759 } 7760 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3) 7761 return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies); 7762 7763 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 7764 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++; 7765 7766 if (grouped_store) 7767 { 7768 /* FORNOW */ 7769 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info)); 7770 7771 /* We vectorize all the stmts of the interleaving group when we 7772 reach the last stmt in the group. */ 7773 if (DR_GROUP_STORE_COUNT (first_stmt_info) 7774 < DR_GROUP_SIZE (first_stmt_info) 7775 && !slp) 7776 { 7777 *vec_stmt = NULL; 7778 return true; 7779 } 7780 7781 if (slp) 7782 { 7783 grouped_store = false; 7784 /* VEC_NUM is the number of vect stmts to be created for this 7785 group. */ 7786 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 7787 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 7788 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info) 7789 == first_stmt_info); 7790 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 7791 op = vect_get_store_rhs (first_stmt_info); 7792 } 7793 else 7794 /* VEC_NUM is the number of vect stmts to be created for this 7795 group. */ 7796 vec_num = group_size; 7797 7798 ref_type = get_group_alias_ptr_type (first_stmt_info); 7799 } 7800 else 7801 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr)); 7802 7803 if (dump_enabled_p ()) 7804 dump_printf_loc (MSG_NOTE, vect_location, 7805 "transform store. ncopies = %d\n", ncopies); 7806 7807 if (memory_access_type == VMAT_ELEMENTWISE 7808 || memory_access_type == VMAT_STRIDED_SLP) 7809 { 7810 gimple_stmt_iterator incr_gsi; 7811 bool insert_after; 7812 gimple *incr; 7813 tree offvar; 7814 tree ivstep; 7815 tree running_off; 7816 tree stride_base, stride_step, alias_off; 7817 tree vec_oprnd; 7818 tree dr_offset; 7819 unsigned int g; 7820 /* Checked by get_load_store_type. */ 7821 unsigned int const_nunits = nunits.to_constant (); 7822 7823 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); 7824 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); 7825 7826 dr_offset = get_dr_vinfo_offset (first_dr_info); 7827 stride_base 7828 = fold_build_pointer_plus 7829 (DR_BASE_ADDRESS (first_dr_info->dr), 7830 size_binop (PLUS_EXPR, 7831 convert_to_ptrofftype (dr_offset), 7832 convert_to_ptrofftype (DR_INIT (first_dr_info->dr)))); 7833 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr)); 7834 7835 /* For a store with loop-invariant (but other than power-of-2) 7836 stride (i.e. not a grouped access) like so: 7837 7838 for (i = 0; i < n; i += stride) 7839 array[i] = ...; 7840 7841 we generate a new induction variable and new stores from 7842 the components of the (vectorized) rhs: 7843 7844 for (j = 0; ; j += VF*stride) 7845 vectemp = ...; 7846 tmp1 = vectemp[0]; 7847 array[j] = tmp1; 7848 tmp2 = vectemp[1]; 7849 array[j + stride] = tmp2; 7850 ... 7851 */ 7852 7853 unsigned nstores = const_nunits; 7854 unsigned lnel = 1; 7855 tree ltype = elem_type; 7856 tree lvectype = vectype; 7857 if (slp) 7858 { 7859 if (group_size < const_nunits 7860 && const_nunits % group_size == 0) 7861 { 7862 nstores = const_nunits / group_size; 7863 lnel = group_size; 7864 ltype = build_vector_type (elem_type, group_size); 7865 lvectype = vectype; 7866 7867 /* First check if vec_extract optab doesn't support extraction 7868 of vector elts directly. */ 7869 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type); 7870 machine_mode vmode; 7871 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 7872 || !related_vector_mode (TYPE_MODE (vectype), elmode, 7873 group_size).exists (&vmode) 7874 || (convert_optab_handler (vec_extract_optab, 7875 TYPE_MODE (vectype), vmode) 7876 == CODE_FOR_nothing)) 7877 { 7878 /* Try to avoid emitting an extract of vector elements 7879 by performing the extracts using an integer type of the 7880 same size, extracting from a vector of those and then 7881 re-interpreting it as the original vector type if 7882 supported. */ 7883 unsigned lsize 7884 = group_size * GET_MODE_BITSIZE (elmode); 7885 unsigned int lnunits = const_nunits / group_size; 7886 /* If we can't construct such a vector fall back to 7887 element extracts from the original vector type and 7888 element size stores. */ 7889 if (int_mode_for_size (lsize, 0).exists (&elmode) 7890 && VECTOR_MODE_P (TYPE_MODE (vectype)) 7891 && related_vector_mode (TYPE_MODE (vectype), elmode, 7892 lnunits).exists (&vmode) 7893 && (convert_optab_handler (vec_extract_optab, 7894 vmode, elmode) 7895 != CODE_FOR_nothing)) 7896 { 7897 nstores = lnunits; 7898 lnel = group_size; 7899 ltype = build_nonstandard_integer_type (lsize, 1); 7900 lvectype = build_vector_type (ltype, nstores); 7901 } 7902 /* Else fall back to vector extraction anyway. 7903 Fewer stores are more important than avoiding spilling 7904 of the vector we extract from. Compared to the 7905 construction case in vectorizable_load no store-forwarding 7906 issue exists here for reasonable archs. */ 7907 } 7908 } 7909 else if (group_size >= const_nunits 7910 && group_size % const_nunits == 0) 7911 { 7912 nstores = 1; 7913 lnel = const_nunits; 7914 ltype = vectype; 7915 lvectype = vectype; 7916 } 7917 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type)); 7918 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 7919 } 7920 7921 ivstep = stride_step; 7922 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, 7923 build_int_cst (TREE_TYPE (ivstep), vf)); 7924 7925 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 7926 7927 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); 7928 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); 7929 create_iv (stride_base, ivstep, NULL, 7930 loop, &incr_gsi, insert_after, 7931 &offvar, NULL); 7932 incr = gsi_stmt (incr_gsi); 7933 loop_vinfo->add_stmt (incr); 7934 7935 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); 7936 7937 prev_stmt_info = NULL; 7938 alias_off = build_int_cst (ref_type, 0); 7939 stmt_vec_info next_stmt_info = first_stmt_info; 7940 for (g = 0; g < group_size; g++) 7941 { 7942 running_off = offvar; 7943 if (g) 7944 { 7945 tree size = TYPE_SIZE_UNIT (ltype); 7946 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g), 7947 size); 7948 tree newoff = copy_ssa_name (running_off, NULL); 7949 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 7950 running_off, pos); 7951 vect_finish_stmt_generation (stmt_info, incr, gsi); 7952 running_off = newoff; 7953 } 7954 unsigned int group_el = 0; 7955 unsigned HOST_WIDE_INT 7956 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 7957 for (j = 0; j < ncopies; j++) 7958 { 7959 /* We've set op and dt above, from vect_get_store_rhs, 7960 and first_stmt_info == stmt_info. */ 7961 if (j == 0) 7962 { 7963 if (slp) 7964 { 7965 vect_get_vec_defs (op, NULL_TREE, stmt_info, 7966 &vec_oprnds, NULL, slp_node); 7967 vec_oprnd = vec_oprnds[0]; 7968 } 7969 else 7970 { 7971 op = vect_get_store_rhs (next_stmt_info); 7972 vec_oprnd = vect_get_vec_def_for_operand 7973 (op, next_stmt_info); 7974 } 7975 } 7976 else 7977 { 7978 if (slp) 7979 vec_oprnd = vec_oprnds[j]; 7980 else 7981 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, 7982 vec_oprnd); 7983 } 7984 /* Pun the vector to extract from if necessary. */ 7985 if (lvectype != vectype) 7986 { 7987 tree tem = make_ssa_name (lvectype); 7988 gimple *pun 7989 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 7990 lvectype, vec_oprnd)); 7991 vect_finish_stmt_generation (stmt_info, pun, gsi); 7992 vec_oprnd = tem; 7993 } 7994 for (i = 0; i < nstores; i++) 7995 { 7996 tree newref, newoff; 7997 gimple *incr, *assign; 7998 tree size = TYPE_SIZE (ltype); 7999 /* Extract the i'th component. */ 8000 tree pos = fold_build2 (MULT_EXPR, bitsizetype, 8001 bitsize_int (i), size); 8002 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd, 8003 size, pos); 8004 8005 elem = force_gimple_operand_gsi (gsi, elem, true, 8006 NULL_TREE, true, 8007 GSI_SAME_STMT); 8008 8009 tree this_off = build_int_cst (TREE_TYPE (alias_off), 8010 group_el * elsz); 8011 newref = build2 (MEM_REF, ltype, 8012 running_off, this_off); 8013 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr)); 8014 8015 /* And store it to *running_off. */ 8016 assign = gimple_build_assign (newref, elem); 8017 stmt_vec_info assign_info 8018 = vect_finish_stmt_generation (stmt_info, assign, gsi); 8019 8020 group_el += lnel; 8021 if (! slp 8022 || group_el == group_size) 8023 { 8024 newoff = copy_ssa_name (running_off, NULL); 8025 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 8026 running_off, stride_step); 8027 vect_finish_stmt_generation (stmt_info, incr, gsi); 8028 8029 running_off = newoff; 8030 group_el = 0; 8031 } 8032 if (g == group_size - 1 8033 && !slp) 8034 { 8035 if (j == 0 && i == 0) 8036 STMT_VINFO_VEC_STMT (stmt_info) 8037 = *vec_stmt = assign_info; 8038 else 8039 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info; 8040 prev_stmt_info = assign_info; 8041 } 8042 } 8043 } 8044 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 8045 if (slp) 8046 break; 8047 } 8048 8049 vec_oprnds.release (); 8050 return true; 8051 } 8052 8053 auto_vec<tree> dr_chain (group_size); 8054 oprnds.create (group_size); 8055 8056 /* Gather-scatter accesses perform only component accesses, alignment 8057 is irrelevant for them. */ 8058 if (memory_access_type == VMAT_GATHER_SCATTER) 8059 alignment_support_scheme = dr_unaligned_supported; 8060 else 8061 alignment_support_scheme 8062 = vect_supportable_dr_alignment (first_dr_info, false); 8063 8064 gcc_assert (alignment_support_scheme); 8065 vec_loop_masks *loop_masks 8066 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 8067 ? &LOOP_VINFO_MASKS (loop_vinfo) 8068 : NULL); 8069 /* Targets with store-lane instructions must not require explicit 8070 realignment. vect_supportable_dr_alignment always returns either 8071 dr_aligned or dr_unaligned_supported for masked operations. */ 8072 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES 8073 && !mask 8074 && !loop_masks) 8075 || alignment_support_scheme == dr_aligned 8076 || alignment_support_scheme == dr_unaligned_supported); 8077 8078 if (memory_access_type == VMAT_CONTIGUOUS_DOWN 8079 || memory_access_type == VMAT_CONTIGUOUS_REVERSE) 8080 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); 8081 8082 tree bump; 8083 tree vec_offset = NULL_TREE; 8084 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 8085 { 8086 aggr_type = NULL_TREE; 8087 bump = NULL_TREE; 8088 } 8089 else if (memory_access_type == VMAT_GATHER_SCATTER) 8090 { 8091 aggr_type = elem_type; 8092 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info, 8093 &bump, &vec_offset); 8094 } 8095 else 8096 { 8097 if (memory_access_type == VMAT_LOAD_STORE_LANES) 8098 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 8099 else 8100 aggr_type = vectype; 8101 bump = vect_get_data_ptr_increment (dr_info, aggr_type, 8102 memory_access_type); 8103 } 8104 8105 if (mask) 8106 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true; 8107 8108 /* In case the vectorization factor (VF) is bigger than the number 8109 of elements that we can fit in a vectype (nunits), we have to generate 8110 more than one vector stmt - i.e - we need to "unroll" the 8111 vector stmt by a factor VF/nunits. For more details see documentation in 8112 vect_get_vec_def_for_copy_stmt. */ 8113 8114 /* In case of interleaving (non-unit grouped access): 8115 8116 S1: &base + 2 = x2 8117 S2: &base = x0 8118 S3: &base + 1 = x1 8119 S4: &base + 3 = x3 8120 8121 We create vectorized stores starting from base address (the access of the 8122 first stmt in the chain (S2 in the above example), when the last store stmt 8123 of the chain (S4) is reached: 8124 8125 VS1: &base = vx2 8126 VS2: &base + vec_size*1 = vx0 8127 VS3: &base + vec_size*2 = vx1 8128 VS4: &base + vec_size*3 = vx3 8129 8130 Then permutation statements are generated: 8131 8132 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} > 8133 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} > 8134 ... 8135 8136 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 8137 (the order of the data-refs in the output of vect_permute_store_chain 8138 corresponds to the order of scalar stmts in the interleaving chain - see 8139 the documentation of vect_permute_store_chain()). 8140 8141 In case of both multiple types and interleaving, above vector stores and 8142 permutation stmts are created for every copy. The result vector stmts are 8143 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 8144 STMT_VINFO_RELATED_STMT for the next copies. 8145 */ 8146 8147 prev_stmt_info = NULL; 8148 tree vec_mask = NULL_TREE; 8149 for (j = 0; j < ncopies; j++) 8150 { 8151 stmt_vec_info new_stmt_info; 8152 if (j == 0) 8153 { 8154 if (slp) 8155 { 8156 /* Get vectorized arguments for SLP_NODE. */ 8157 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds, 8158 NULL, slp_node); 8159 8160 vec_oprnd = vec_oprnds[0]; 8161 } 8162 else 8163 { 8164 /* For interleaved stores we collect vectorized defs for all the 8165 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then 8166 used as an input to vect_permute_store_chain(), and OPRNDS as 8167 an input to vect_get_vec_def_for_stmt_copy() for the next copy. 8168 8169 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and 8170 OPRNDS are of size 1. */ 8171 stmt_vec_info next_stmt_info = first_stmt_info; 8172 for (i = 0; i < group_size; i++) 8173 { 8174 /* Since gaps are not supported for interleaved stores, 8175 DR_GROUP_SIZE is the exact number of stmts in the chain. 8176 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case 8177 that there is no interleaving, DR_GROUP_SIZE is 1, 8178 and only one iteration of the loop will be executed. */ 8179 op = vect_get_store_rhs (next_stmt_info); 8180 vec_oprnd = vect_get_vec_def_for_operand 8181 (op, next_stmt_info); 8182 dr_chain.quick_push (vec_oprnd); 8183 oprnds.quick_push (vec_oprnd); 8184 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 8185 } 8186 if (mask) 8187 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info, 8188 mask_vectype); 8189 } 8190 8191 /* We should have catched mismatched types earlier. */ 8192 gcc_assert (useless_type_conversion_p (vectype, 8193 TREE_TYPE (vec_oprnd))); 8194 bool simd_lane_access_p 8195 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0; 8196 if (simd_lane_access_p 8197 && !loop_masks 8198 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR 8199 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0)) 8200 && integer_zerop (get_dr_vinfo_offset (first_dr_info)) 8201 && integer_zerop (DR_INIT (first_dr_info->dr)) 8202 && alias_sets_conflict_p (get_alias_set (aggr_type), 8203 get_alias_set (TREE_TYPE (ref_type)))) 8204 { 8205 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr)); 8206 dataref_offset = build_int_cst (ref_type, 0); 8207 } 8208 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 8209 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info, 8210 &dataref_ptr, &vec_offset); 8211 else 8212 dataref_ptr 8213 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, 8214 simd_lane_access_p ? loop : NULL, 8215 offset, &dummy, gsi, &ptr_incr, 8216 simd_lane_access_p, NULL_TREE, bump); 8217 } 8218 else 8219 { 8220 /* For interleaved stores we created vectorized defs for all the 8221 defs stored in OPRNDS in the previous iteration (previous copy). 8222 DR_CHAIN is then used as an input to vect_permute_store_chain(), 8223 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the 8224 next copy. 8225 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and 8226 OPRNDS are of size 1. */ 8227 for (i = 0; i < group_size; i++) 8228 { 8229 op = oprnds[i]; 8230 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op); 8231 dr_chain[i] = vec_oprnd; 8232 oprnds[i] = vec_oprnd; 8233 } 8234 if (mask) 8235 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask); 8236 if (dataref_offset) 8237 dataref_offset 8238 = int_const_binop (PLUS_EXPR, dataref_offset, bump); 8239 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 8240 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset); 8241 else 8242 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 8243 stmt_info, bump); 8244 } 8245 8246 if (memory_access_type == VMAT_LOAD_STORE_LANES) 8247 { 8248 tree vec_array; 8249 8250 /* Get an array into which we can store the individual vectors. */ 8251 vec_array = create_vector_array (vectype, vec_num); 8252 8253 /* Invalidate the current contents of VEC_ARRAY. This should 8254 become an RTL clobber too, which prevents the vector registers 8255 from being upward-exposed. */ 8256 vect_clobber_variable (stmt_info, gsi, vec_array); 8257 8258 /* Store the individual vectors into the array. */ 8259 for (i = 0; i < vec_num; i++) 8260 { 8261 vec_oprnd = dr_chain[i]; 8262 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i); 8263 } 8264 8265 tree final_mask = NULL; 8266 if (loop_masks) 8267 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies, 8268 vectype, j); 8269 if (vec_mask) 8270 final_mask = prepare_load_store_mask (mask_vectype, final_mask, 8271 vec_mask, gsi); 8272 8273 gcall *call; 8274 if (final_mask) 8275 { 8276 /* Emit: 8277 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, 8278 VEC_ARRAY). */ 8279 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); 8280 tree alias_ptr = build_int_cst (ref_type, align); 8281 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, 8282 dataref_ptr, alias_ptr, 8283 final_mask, vec_array); 8284 } 8285 else 8286 { 8287 /* Emit: 8288 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ 8289 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type); 8290 call = gimple_build_call_internal (IFN_STORE_LANES, 1, 8291 vec_array); 8292 gimple_call_set_lhs (call, data_ref); 8293 } 8294 gimple_call_set_nothrow (call, true); 8295 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi); 8296 8297 /* Record that VEC_ARRAY is now dead. */ 8298 vect_clobber_variable (stmt_info, gsi, vec_array); 8299 } 8300 else 8301 { 8302 new_stmt_info = NULL; 8303 if (grouped_store) 8304 { 8305 if (j == 0) 8306 result_chain.create (group_size); 8307 /* Permute. */ 8308 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi, 8309 &result_chain); 8310 } 8311 8312 stmt_vec_info next_stmt_info = first_stmt_info; 8313 for (i = 0; i < vec_num; i++) 8314 { 8315 unsigned misalign; 8316 unsigned HOST_WIDE_INT align; 8317 8318 tree final_mask = NULL_TREE; 8319 if (loop_masks) 8320 final_mask = vect_get_loop_mask (gsi, loop_masks, 8321 vec_num * ncopies, 8322 vectype, vec_num * j + i); 8323 if (vec_mask) 8324 final_mask = prepare_load_store_mask (mask_vectype, final_mask, 8325 vec_mask, gsi); 8326 8327 if (memory_access_type == VMAT_GATHER_SCATTER) 8328 { 8329 tree scale = size_int (gs_info.scale); 8330 gcall *call; 8331 if (loop_masks) 8332 call = gimple_build_call_internal 8333 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset, 8334 scale, vec_oprnd, final_mask); 8335 else 8336 call = gimple_build_call_internal 8337 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset, 8338 scale, vec_oprnd); 8339 gimple_call_set_nothrow (call, true); 8340 new_stmt_info 8341 = vect_finish_stmt_generation (stmt_info, call, gsi); 8342 break; 8343 } 8344 8345 if (i > 0) 8346 /* Bump the vector pointer. */ 8347 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 8348 stmt_info, bump); 8349 8350 if (slp) 8351 vec_oprnd = vec_oprnds[i]; 8352 else if (grouped_store) 8353 /* For grouped stores vectorized defs are interleaved in 8354 vect_permute_store_chain(). */ 8355 vec_oprnd = result_chain[i]; 8356 8357 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); 8358 if (aligned_access_p (first_dr_info)) 8359 misalign = 0; 8360 else if (DR_MISALIGNMENT (first_dr_info) == -1) 8361 { 8362 align = dr_alignment (vect_dr_behavior (first_dr_info)); 8363 misalign = 0; 8364 } 8365 else 8366 misalign = DR_MISALIGNMENT (first_dr_info); 8367 if (dataref_offset == NULL_TREE 8368 && TREE_CODE (dataref_ptr) == SSA_NAME) 8369 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, 8370 misalign); 8371 align = least_bit_hwi (misalign | align); 8372 8373 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 8374 { 8375 tree perm_mask = perm_mask_for_reverse (vectype); 8376 tree perm_dest = vect_create_destination_var 8377 (vect_get_store_rhs (stmt_info), vectype); 8378 tree new_temp = make_ssa_name (perm_dest); 8379 8380 /* Generate the permute statement. */ 8381 gimple *perm_stmt 8382 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd, 8383 vec_oprnd, perm_mask); 8384 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi); 8385 8386 perm_stmt = SSA_NAME_DEF_STMT (new_temp); 8387 vec_oprnd = new_temp; 8388 } 8389 8390 /* Arguments are ready. Create the new vector stmt. */ 8391 if (final_mask) 8392 { 8393 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); 8394 gcall *call 8395 = gimple_build_call_internal (IFN_MASK_STORE, 4, 8396 dataref_ptr, ptr, 8397 final_mask, vec_oprnd); 8398 gimple_call_set_nothrow (call, true); 8399 new_stmt_info 8400 = vect_finish_stmt_generation (stmt_info, call, gsi); 8401 } 8402 else 8403 { 8404 data_ref = fold_build2 (MEM_REF, vectype, 8405 dataref_ptr, 8406 dataref_offset 8407 ? dataref_offset 8408 : build_int_cst (ref_type, 0)); 8409 if (aligned_access_p (first_dr_info)) 8410 ; 8411 else 8412 TREE_TYPE (data_ref) 8413 = build_aligned_type (TREE_TYPE (data_ref), 8414 align * BITS_PER_UNIT); 8415 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); 8416 gassign *new_stmt 8417 = gimple_build_assign (data_ref, vec_oprnd); 8418 new_stmt_info 8419 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 8420 } 8421 8422 if (slp) 8423 continue; 8424 8425 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 8426 if (!next_stmt_info) 8427 break; 8428 } 8429 } 8430 if (!slp) 8431 { 8432 if (j == 0) 8433 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 8434 else 8435 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 8436 prev_stmt_info = new_stmt_info; 8437 } 8438 } 8439 8440 oprnds.release (); 8441 result_chain.release (); 8442 vec_oprnds.release (); 8443 8444 return true; 8445 } 8446 8447 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent 8448 VECTOR_CST mask. No checks are made that the target platform supports the 8449 mask, so callers may wish to test can_vec_perm_const_p separately, or use 8450 vect_gen_perm_mask_checked. */ 8451 8452 tree 8453 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel) 8454 { 8455 tree mask_type; 8456 8457 poly_uint64 nunits = sel.length (); 8458 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype))); 8459 8460 mask_type = build_vector_type (ssizetype, nunits); 8461 return vec_perm_indices_to_tree (mask_type, sel); 8462 } 8463 8464 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p, 8465 i.e. that the target supports the pattern _for arbitrary input vectors_. */ 8466 8467 tree 8468 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel) 8469 { 8470 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel)); 8471 return vect_gen_perm_mask_any (vectype, sel); 8472 } 8473 8474 /* Given a vector variable X and Y, that was generated for the scalar 8475 STMT_INFO, generate instructions to permute the vector elements of X and Y 8476 using permutation mask MASK_VEC, insert them at *GSI and return the 8477 permuted vector variable. */ 8478 8479 static tree 8480 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info, 8481 gimple_stmt_iterator *gsi) 8482 { 8483 tree vectype = TREE_TYPE (x); 8484 tree perm_dest, data_ref; 8485 gimple *perm_stmt; 8486 8487 tree scalar_dest = gimple_get_lhs (stmt_info->stmt); 8488 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME) 8489 perm_dest = vect_create_destination_var (scalar_dest, vectype); 8490 else 8491 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL); 8492 data_ref = make_ssa_name (perm_dest); 8493 8494 /* Generate the permute statement. */ 8495 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec); 8496 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi); 8497 8498 return data_ref; 8499 } 8500 8501 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP, 8502 inserting them on the loops preheader edge. Returns true if we 8503 were successful in doing so (and thus STMT_INFO can be moved then), 8504 otherwise returns false. */ 8505 8506 static bool 8507 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop) 8508 { 8509 ssa_op_iter i; 8510 tree op; 8511 bool any = false; 8512 8513 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE) 8514 { 8515 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 8516 if (!gimple_nop_p (def_stmt) 8517 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 8518 { 8519 /* Make sure we don't need to recurse. While we could do 8520 so in simple cases when there are more complex use webs 8521 we don't have an easy way to preserve stmt order to fulfil 8522 dependencies within them. */ 8523 tree op2; 8524 ssa_op_iter i2; 8525 if (gimple_code (def_stmt) == GIMPLE_PHI) 8526 return false; 8527 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE) 8528 { 8529 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2); 8530 if (!gimple_nop_p (def_stmt2) 8531 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2))) 8532 return false; 8533 } 8534 any = true; 8535 } 8536 } 8537 8538 if (!any) 8539 return true; 8540 8541 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE) 8542 { 8543 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 8544 if (!gimple_nop_p (def_stmt) 8545 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 8546 { 8547 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 8548 gsi_remove (&gsi, false); 8549 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt); 8550 } 8551 } 8552 8553 return true; 8554 } 8555 8556 /* vectorizable_load. 8557 8558 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure) 8559 that can be vectorized. 8560 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 8561 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 8562 Return true if STMT_INFO is vectorizable in this way. */ 8563 8564 static bool 8565 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 8566 stmt_vec_info *vec_stmt, slp_tree slp_node, 8567 slp_instance slp_node_instance, 8568 stmt_vector_for_cost *cost_vec) 8569 { 8570 tree scalar_dest; 8571 tree vec_dest = NULL; 8572 tree data_ref = NULL; 8573 stmt_vec_info prev_stmt_info; 8574 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 8575 class loop *loop = NULL; 8576 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father; 8577 bool nested_in_vect_loop = false; 8578 tree elem_type; 8579 tree new_temp; 8580 machine_mode mode; 8581 tree dummy; 8582 enum dr_alignment_support alignment_support_scheme; 8583 tree dataref_ptr = NULL_TREE; 8584 tree dataref_offset = NULL_TREE; 8585 gimple *ptr_incr = NULL; 8586 int ncopies; 8587 int i, j; 8588 unsigned int group_size; 8589 poly_uint64 group_gap_adj; 8590 tree msq = NULL_TREE, lsq; 8591 tree offset = NULL_TREE; 8592 tree byte_offset = NULL_TREE; 8593 tree realignment_token = NULL_TREE; 8594 gphi *phi = NULL; 8595 vec<tree> dr_chain = vNULL; 8596 bool grouped_load = false; 8597 stmt_vec_info first_stmt_info; 8598 stmt_vec_info first_stmt_info_for_drptr = NULL; 8599 bool compute_in_loop = false; 8600 class loop *at_loop; 8601 int vec_num; 8602 bool slp = (slp_node != NULL); 8603 bool slp_perm = false; 8604 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 8605 poly_uint64 vf; 8606 tree aggr_type; 8607 gather_scatter_info gs_info; 8608 vec_info *vinfo = stmt_info->vinfo; 8609 tree ref_type; 8610 enum vect_def_type mask_dt = vect_unknown_def_type; 8611 8612 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 8613 return false; 8614 8615 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 8616 && ! vec_stmt) 8617 return false; 8618 8619 tree mask = NULL_TREE, mask_vectype = NULL_TREE; 8620 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) 8621 { 8622 scalar_dest = gimple_assign_lhs (assign); 8623 if (TREE_CODE (scalar_dest) != SSA_NAME) 8624 return false; 8625 8626 tree_code code = gimple_assign_rhs_code (assign); 8627 if (code != ARRAY_REF 8628 && code != BIT_FIELD_REF 8629 && code != INDIRECT_REF 8630 && code != COMPONENT_REF 8631 && code != IMAGPART_EXPR 8632 && code != REALPART_EXPR 8633 && code != MEM_REF 8634 && TREE_CODE_CLASS (code) != tcc_declaration) 8635 return false; 8636 } 8637 else 8638 { 8639 gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 8640 if (!call || !gimple_call_internal_p (call)) 8641 return false; 8642 8643 internal_fn ifn = gimple_call_internal_fn (call); 8644 if (!internal_load_fn_p (ifn)) 8645 return false; 8646 8647 scalar_dest = gimple_call_lhs (call); 8648 if (!scalar_dest) 8649 return false; 8650 8651 int mask_index = internal_fn_mask_index (ifn); 8652 if (mask_index >= 0) 8653 { 8654 mask = gimple_call_arg (call, mask_index); 8655 if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt, 8656 &mask_vectype)) 8657 return false; 8658 } 8659 } 8660 8661 if (!STMT_VINFO_DATA_REF (stmt_info)) 8662 return false; 8663 8664 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 8665 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 8666 8667 if (loop_vinfo) 8668 { 8669 loop = LOOP_VINFO_LOOP (loop_vinfo); 8670 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info); 8671 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 8672 } 8673 else 8674 vf = 1; 8675 8676 /* Multiple types in SLP are handled by creating the appropriate number of 8677 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 8678 case of SLP. */ 8679 if (slp) 8680 ncopies = 1; 8681 else 8682 ncopies = vect_get_num_copies (loop_vinfo, vectype); 8683 8684 gcc_assert (ncopies >= 1); 8685 8686 /* FORNOW. This restriction should be relaxed. */ 8687 if (nested_in_vect_loop && ncopies > 1) 8688 { 8689 if (dump_enabled_p ()) 8690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8691 "multiple types in nested loop.\n"); 8692 return false; 8693 } 8694 8695 /* Invalidate assumptions made by dependence analysis when vectorization 8696 on the unrolled body effectively re-orders stmts. */ 8697 if (ncopies > 1 8698 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 8699 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 8700 STMT_VINFO_MIN_NEG_DIST (stmt_info))) 8701 { 8702 if (dump_enabled_p ()) 8703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8704 "cannot perform implicit CSE when unrolling " 8705 "with negative dependence distance\n"); 8706 return false; 8707 } 8708 8709 elem_type = TREE_TYPE (vectype); 8710 mode = TYPE_MODE (vectype); 8711 8712 /* FORNOW. In some cases can vectorize even if data-type not supported 8713 (e.g. - data copies). */ 8714 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) 8715 { 8716 if (dump_enabled_p ()) 8717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8718 "Aligned load, but unsupported type.\n"); 8719 return false; 8720 } 8721 8722 /* Check if the load is a part of an interleaving chain. */ 8723 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 8724 { 8725 grouped_load = true; 8726 /* FORNOW */ 8727 gcc_assert (!nested_in_vect_loop); 8728 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)); 8729 8730 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 8731 group_size = DR_GROUP_SIZE (first_stmt_info); 8732 8733 /* Refuse non-SLP vectorization of SLP-only groups. */ 8734 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info)) 8735 { 8736 if (dump_enabled_p ()) 8737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8738 "cannot vectorize load in non-SLP mode.\n"); 8739 return false; 8740 } 8741 8742 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 8743 slp_perm = true; 8744 8745 /* Invalidate assumptions made by dependence analysis when vectorization 8746 on the unrolled body effectively re-orders stmts. */ 8747 if (!PURE_SLP_STMT (stmt_info) 8748 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 8749 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 8750 STMT_VINFO_MIN_NEG_DIST (stmt_info))) 8751 { 8752 if (dump_enabled_p ()) 8753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8754 "cannot perform implicit CSE when performing " 8755 "group loads with negative dependence distance\n"); 8756 return false; 8757 } 8758 } 8759 else 8760 group_size = 1; 8761 8762 vect_memory_access_type memory_access_type; 8763 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies, 8764 &memory_access_type, &gs_info)) 8765 return false; 8766 8767 if (mask) 8768 { 8769 if (memory_access_type == VMAT_CONTIGUOUS) 8770 { 8771 machine_mode vec_mode = TYPE_MODE (vectype); 8772 if (!VECTOR_MODE_P (vec_mode) 8773 || !can_vec_mask_load_store_p (vec_mode, 8774 TYPE_MODE (mask_vectype), true)) 8775 return false; 8776 } 8777 else if (memory_access_type != VMAT_LOAD_STORE_LANES 8778 && memory_access_type != VMAT_GATHER_SCATTER) 8779 { 8780 if (dump_enabled_p ()) 8781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8782 "unsupported access type for masked load.\n"); 8783 return false; 8784 } 8785 } 8786 8787 if (!vec_stmt) /* transformation not required. */ 8788 { 8789 if (!slp) 8790 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; 8791 8792 if (loop_vinfo 8793 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) 8794 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size, 8795 memory_access_type, &gs_info, mask); 8796 8797 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 8798 vect_model_load_cost (stmt_info, ncopies, memory_access_type, 8799 slp_node_instance, slp_node, cost_vec); 8800 return true; 8801 } 8802 8803 if (!slp) 8804 gcc_assert (memory_access_type 8805 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); 8806 8807 if (dump_enabled_p ()) 8808 dump_printf_loc (MSG_NOTE, vect_location, 8809 "transform load. ncopies = %d\n", ncopies); 8810 8811 /* Transform. */ 8812 8813 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL; 8814 ensure_base_align (dr_info); 8815 8816 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) 8817 { 8818 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask); 8819 return true; 8820 } 8821 8822 if (memory_access_type == VMAT_INVARIANT) 8823 { 8824 gcc_assert (!grouped_load && !mask && !bb_vinfo); 8825 /* If we have versioned for aliasing or the loop doesn't 8826 have any data dependencies that would preclude this, 8827 then we are sure this is a loop invariant load and 8828 thus we can insert it on the preheader edge. */ 8829 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) 8830 && !nested_in_vect_loop 8831 && hoist_defs_of_uses (stmt_info, loop)); 8832 if (hoist_p) 8833 { 8834 gassign *stmt = as_a <gassign *> (stmt_info->stmt); 8835 if (dump_enabled_p ()) 8836 dump_printf_loc (MSG_NOTE, vect_location, 8837 "hoisting out of the vectorized loop: %G", stmt); 8838 scalar_dest = copy_ssa_name (scalar_dest); 8839 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt)); 8840 gsi_insert_on_edge_immediate 8841 (loop_preheader_edge (loop), 8842 gimple_build_assign (scalar_dest, rhs)); 8843 } 8844 /* These copies are all equivalent, but currently the representation 8845 requires a separate STMT_VINFO_VEC_STMT for each one. */ 8846 prev_stmt_info = NULL; 8847 gimple_stmt_iterator gsi2 = *gsi; 8848 gsi_next (&gsi2); 8849 for (j = 0; j < ncopies; j++) 8850 { 8851 stmt_vec_info new_stmt_info; 8852 if (hoist_p) 8853 { 8854 new_temp = vect_init_vector (stmt_info, scalar_dest, 8855 vectype, NULL); 8856 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp); 8857 new_stmt_info = vinfo->add_stmt (new_stmt); 8858 } 8859 else 8860 { 8861 new_temp = vect_init_vector (stmt_info, scalar_dest, 8862 vectype, &gsi2); 8863 new_stmt_info = vinfo->lookup_def (new_temp); 8864 } 8865 if (slp) 8866 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 8867 else if (j == 0) 8868 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 8869 else 8870 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 8871 prev_stmt_info = new_stmt_info; 8872 } 8873 return true; 8874 } 8875 8876 if (memory_access_type == VMAT_ELEMENTWISE 8877 || memory_access_type == VMAT_STRIDED_SLP) 8878 { 8879 gimple_stmt_iterator incr_gsi; 8880 bool insert_after; 8881 gimple *incr; 8882 tree offvar; 8883 tree ivstep; 8884 tree running_off; 8885 vec<constructor_elt, va_gc> *v = NULL; 8886 tree stride_base, stride_step, alias_off; 8887 /* Checked by get_load_store_type. */ 8888 unsigned int const_nunits = nunits.to_constant (); 8889 unsigned HOST_WIDE_INT cst_offset = 0; 8890 tree dr_offset; 8891 8892 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); 8893 gcc_assert (!nested_in_vect_loop); 8894 8895 if (grouped_load) 8896 { 8897 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 8898 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 8899 } 8900 else 8901 { 8902 first_stmt_info = stmt_info; 8903 first_dr_info = dr_info; 8904 } 8905 if (slp && grouped_load) 8906 { 8907 group_size = DR_GROUP_SIZE (first_stmt_info); 8908 ref_type = get_group_alias_ptr_type (first_stmt_info); 8909 } 8910 else 8911 { 8912 if (grouped_load) 8913 cst_offset 8914 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))) 8915 * vect_get_place_in_interleaving_chain (stmt_info, 8916 first_stmt_info)); 8917 group_size = 1; 8918 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)); 8919 } 8920 8921 dr_offset = get_dr_vinfo_offset (first_dr_info); 8922 stride_base 8923 = fold_build_pointer_plus 8924 (DR_BASE_ADDRESS (first_dr_info->dr), 8925 size_binop (PLUS_EXPR, 8926 convert_to_ptrofftype (dr_offset), 8927 convert_to_ptrofftype (DR_INIT (first_dr_info->dr)))); 8928 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr)); 8929 8930 /* For a load with loop-invariant (but other than power-of-2) 8931 stride (i.e. not a grouped access) like so: 8932 8933 for (i = 0; i < n; i += stride) 8934 ... = array[i]; 8935 8936 we generate a new induction variable and new accesses to 8937 form a new vector (or vectors, depending on ncopies): 8938 8939 for (j = 0; ; j += VF*stride) 8940 tmp1 = array[j]; 8941 tmp2 = array[j + stride]; 8942 ... 8943 vectemp = {tmp1, tmp2, ...} 8944 */ 8945 8946 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step, 8947 build_int_cst (TREE_TYPE (stride_step), vf)); 8948 8949 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 8950 8951 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); 8952 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); 8953 create_iv (stride_base, ivstep, NULL, 8954 loop, &incr_gsi, insert_after, 8955 &offvar, NULL); 8956 incr = gsi_stmt (incr_gsi); 8957 loop_vinfo->add_stmt (incr); 8958 8959 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); 8960 8961 prev_stmt_info = NULL; 8962 running_off = offvar; 8963 alias_off = build_int_cst (ref_type, 0); 8964 int nloads = const_nunits; 8965 int lnel = 1; 8966 tree ltype = TREE_TYPE (vectype); 8967 tree lvectype = vectype; 8968 auto_vec<tree> dr_chain; 8969 if (memory_access_type == VMAT_STRIDED_SLP) 8970 { 8971 if (group_size < const_nunits) 8972 { 8973 /* First check if vec_init optab supports construction from vector 8974 elts directly. Otherwise avoid emitting a constructor of 8975 vector elements by performing the loads using an integer type 8976 of the same size, constructing a vector of those and then 8977 re-interpreting it as the original vector type. This avoids a 8978 huge runtime penalty due to the general inability to perform 8979 store forwarding from smaller stores to a larger load. */ 8980 tree ptype; 8981 tree vtype 8982 = vector_vector_composition_type (vectype, 8983 const_nunits / group_size, 8984 &ptype); 8985 if (vtype != NULL_TREE) 8986 { 8987 nloads = const_nunits / group_size; 8988 lnel = group_size; 8989 lvectype = vtype; 8990 ltype = ptype; 8991 } 8992 } 8993 else 8994 { 8995 nloads = 1; 8996 lnel = const_nunits; 8997 ltype = vectype; 8998 } 8999 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); 9000 } 9001 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */ 9002 else if (nloads == 1) 9003 ltype = vectype; 9004 9005 if (slp) 9006 { 9007 /* For SLP permutation support we need to load the whole group, 9008 not only the number of vector stmts the permutation result 9009 fits in. */ 9010 if (slp_perm) 9011 { 9012 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for 9013 variable VF. */ 9014 unsigned int const_vf = vf.to_constant (); 9015 ncopies = CEIL (group_size * const_vf, const_nunits); 9016 dr_chain.create (ncopies); 9017 } 9018 else 9019 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 9020 } 9021 unsigned int group_el = 0; 9022 unsigned HOST_WIDE_INT 9023 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 9024 for (j = 0; j < ncopies; j++) 9025 { 9026 if (nloads > 1) 9027 vec_alloc (v, nloads); 9028 stmt_vec_info new_stmt_info = NULL; 9029 for (i = 0; i < nloads; i++) 9030 { 9031 tree this_off = build_int_cst (TREE_TYPE (alias_off), 9032 group_el * elsz + cst_offset); 9033 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off); 9034 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); 9035 gassign *new_stmt 9036 = gimple_build_assign (make_ssa_name (ltype), data_ref); 9037 new_stmt_info 9038 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 9039 if (nloads > 1) 9040 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, 9041 gimple_assign_lhs (new_stmt)); 9042 9043 group_el += lnel; 9044 if (! slp 9045 || group_el == group_size) 9046 { 9047 tree newoff = copy_ssa_name (running_off); 9048 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 9049 running_off, stride_step); 9050 vect_finish_stmt_generation (stmt_info, incr, gsi); 9051 9052 running_off = newoff; 9053 group_el = 0; 9054 } 9055 } 9056 if (nloads > 1) 9057 { 9058 tree vec_inv = build_constructor (lvectype, v); 9059 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi); 9060 new_stmt_info = vinfo->lookup_def (new_temp); 9061 if (lvectype != vectype) 9062 { 9063 gassign *new_stmt 9064 = gimple_build_assign (make_ssa_name (vectype), 9065 VIEW_CONVERT_EXPR, 9066 build1 (VIEW_CONVERT_EXPR, 9067 vectype, new_temp)); 9068 new_stmt_info 9069 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 9070 } 9071 } 9072 9073 if (slp) 9074 { 9075 if (slp_perm) 9076 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt)); 9077 else 9078 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 9079 } 9080 else 9081 { 9082 if (j == 0) 9083 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 9084 else 9085 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 9086 prev_stmt_info = new_stmt_info; 9087 } 9088 } 9089 if (slp_perm) 9090 { 9091 unsigned n_perms; 9092 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf, 9093 slp_node_instance, false, &n_perms); 9094 } 9095 return true; 9096 } 9097 9098 if (memory_access_type == VMAT_GATHER_SCATTER 9099 || (!slp && memory_access_type == VMAT_CONTIGUOUS)) 9100 grouped_load = false; 9101 9102 if (grouped_load) 9103 { 9104 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 9105 group_size = DR_GROUP_SIZE (first_stmt_info); 9106 /* For SLP vectorization we directly vectorize a subchain 9107 without permutation. */ 9108 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 9109 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 9110 /* For BB vectorization always use the first stmt to base 9111 the data ref pointer on. */ 9112 if (bb_vinfo) 9113 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 9114 9115 /* Check if the chain of loads is already vectorized. */ 9116 if (STMT_VINFO_VEC_STMT (first_stmt_info) 9117 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS. 9118 ??? But we can only do so if there is exactly one 9119 as we have no way to get at the rest. Leave the CSE 9120 opportunity alone. 9121 ??? With the group load eventually participating 9122 in multiple different permutations (having multiple 9123 slp nodes which refer to the same group) the CSE 9124 is even wrong code. See PR56270. */ 9125 && !slp) 9126 { 9127 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 9128 return true; 9129 } 9130 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); 9131 group_gap_adj = 0; 9132 9133 /* VEC_NUM is the number of vect stmts to be created for this group. */ 9134 if (slp) 9135 { 9136 grouped_load = false; 9137 /* If an SLP permutation is from N elements to N elements, 9138 and if one vector holds a whole number of N, we can load 9139 the inputs to the permutation in the same way as an 9140 unpermuted sequence. In other cases we need to load the 9141 whole group, not only the number of vector stmts the 9142 permutation result fits in. */ 9143 if (slp_perm 9144 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance) 9145 || !multiple_p (nunits, group_size))) 9146 { 9147 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for 9148 variable VF; see vect_transform_slp_perm_load. */ 9149 unsigned int const_vf = vf.to_constant (); 9150 unsigned int const_nunits = nunits.to_constant (); 9151 vec_num = CEIL (group_size * const_vf, const_nunits); 9152 group_gap_adj = vf * group_size - nunits * vec_num; 9153 } 9154 else 9155 { 9156 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 9157 group_gap_adj 9158 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance); 9159 } 9160 } 9161 else 9162 vec_num = group_size; 9163 9164 ref_type = get_group_alias_ptr_type (first_stmt_info); 9165 } 9166 else 9167 { 9168 first_stmt_info = stmt_info; 9169 first_dr_info = dr_info; 9170 group_size = vec_num = 1; 9171 group_gap_adj = 0; 9172 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr)); 9173 } 9174 9175 /* Gather-scatter accesses perform only component accesses, alignment 9176 is irrelevant for them. */ 9177 if (memory_access_type == VMAT_GATHER_SCATTER) 9178 alignment_support_scheme = dr_unaligned_supported; 9179 else 9180 alignment_support_scheme 9181 = vect_supportable_dr_alignment (first_dr_info, false); 9182 9183 gcc_assert (alignment_support_scheme); 9184 vec_loop_masks *loop_masks 9185 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 9186 ? &LOOP_VINFO_MASKS (loop_vinfo) 9187 : NULL); 9188 /* Targets with store-lane instructions must not require explicit 9189 realignment. vect_supportable_dr_alignment always returns either 9190 dr_aligned or dr_unaligned_supported for masked operations. */ 9191 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES 9192 && !mask 9193 && !loop_masks) 9194 || alignment_support_scheme == dr_aligned 9195 || alignment_support_scheme == dr_unaligned_supported); 9196 9197 /* In case the vectorization factor (VF) is bigger than the number 9198 of elements that we can fit in a vectype (nunits), we have to generate 9199 more than one vector stmt - i.e - we need to "unroll" the 9200 vector stmt by a factor VF/nunits. In doing so, we record a pointer 9201 from one copy of the vector stmt to the next, in the field 9202 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 9203 stages to find the correct vector defs to be used when vectorizing 9204 stmts that use the defs of the current stmt. The example below 9205 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we 9206 need to create 4 vectorized stmts): 9207 9208 before vectorization: 9209 RELATED_STMT VEC_STMT 9210 S1: x = memref - - 9211 S2: z = x + 1 - - 9212 9213 step 1: vectorize stmt S1: 9214 We first create the vector stmt VS1_0, and, as usual, record a 9215 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. 9216 Next, we create the vector stmt VS1_1, and record a pointer to 9217 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. 9218 Similarly, for VS1_2 and VS1_3. This is the resulting chain of 9219 stmts and pointers: 9220 RELATED_STMT VEC_STMT 9221 VS1_0: vx0 = memref0 VS1_1 - 9222 VS1_1: vx1 = memref1 VS1_2 - 9223 VS1_2: vx2 = memref2 VS1_3 - 9224 VS1_3: vx3 = memref3 - - 9225 S1: x = load - VS1_0 9226 S2: z = x + 1 - - 9227 9228 See in documentation in vect_get_vec_def_for_stmt_copy for how the 9229 information we recorded in RELATED_STMT field is used to vectorize 9230 stmt S2. */ 9231 9232 /* In case of interleaving (non-unit grouped access): 9233 9234 S1: x2 = &base + 2 9235 S2: x0 = &base 9236 S3: x1 = &base + 1 9237 S4: x3 = &base + 3 9238 9239 Vectorized loads are created in the order of memory accesses 9240 starting from the access of the first stmt of the chain: 9241 9242 VS1: vx0 = &base 9243 VS2: vx1 = &base + vec_size*1 9244 VS3: vx3 = &base + vec_size*2 9245 VS4: vx4 = &base + vec_size*3 9246 9247 Then permutation statements are generated: 9248 9249 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } > 9250 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } > 9251 ... 9252 9253 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 9254 (the order of the data-refs in the output of vect_permute_load_chain 9255 corresponds to the order of scalar stmts in the interleaving chain - see 9256 the documentation of vect_permute_load_chain()). 9257 The generation of permutation stmts and recording them in 9258 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load(). 9259 9260 In case of both multiple types and interleaving, the vector loads and 9261 permutation stmts above are created for every copy. The result vector 9262 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the 9263 corresponding STMT_VINFO_RELATED_STMT for the next copies. */ 9264 9265 /* If the data reference is aligned (dr_aligned) or potentially unaligned 9266 on a target that supports unaligned accesses (dr_unaligned_supported) 9267 we generate the following code: 9268 p = initial_addr; 9269 indx = 0; 9270 loop { 9271 p = p + indx * vectype_size; 9272 vec_dest = *(p); 9273 indx = indx + 1; 9274 } 9275 9276 Otherwise, the data reference is potentially unaligned on a target that 9277 does not support unaligned accesses (dr_explicit_realign_optimized) - 9278 then generate the following code, in which the data in each iteration is 9279 obtained by two vector loads, one from the previous iteration, and one 9280 from the current iteration: 9281 p1 = initial_addr; 9282 msq_init = *(floor(p1)) 9283 p2 = initial_addr + VS - 1; 9284 realignment_token = call target_builtin; 9285 indx = 0; 9286 loop { 9287 p2 = p2 + indx * vectype_size 9288 lsq = *(floor(p2)) 9289 vec_dest = realign_load (msq, lsq, realignment_token) 9290 indx = indx + 1; 9291 msq = lsq; 9292 } */ 9293 9294 /* If the misalignment remains the same throughout the execution of the 9295 loop, we can create the init_addr and permutation mask at the loop 9296 preheader. Otherwise, it needs to be created inside the loop. 9297 This can only occur when vectorizing memory accesses in the inner-loop 9298 nested within an outer-loop that is being vectorized. */ 9299 9300 if (nested_in_vect_loop 9301 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr), 9302 GET_MODE_SIZE (TYPE_MODE (vectype)))) 9303 { 9304 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized); 9305 compute_in_loop = true; 9306 } 9307 9308 bool diff_first_stmt_info 9309 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr; 9310 9311 if ((alignment_support_scheme == dr_explicit_realign_optimized 9312 || alignment_support_scheme == dr_explicit_realign) 9313 && !compute_in_loop) 9314 { 9315 /* If we have different first_stmt_info, we can't set up realignment 9316 here, since we can't guarantee first_stmt_info DR has been 9317 initialized yet, use first_stmt_info_for_drptr DR by bumping the 9318 distance from first_stmt_info DR instead as below. */ 9319 if (!diff_first_stmt_info) 9320 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token, 9321 alignment_support_scheme, NULL_TREE, 9322 &at_loop); 9323 if (alignment_support_scheme == dr_explicit_realign_optimized) 9324 { 9325 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq)); 9326 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype), 9327 size_one_node); 9328 gcc_assert (!first_stmt_info_for_drptr); 9329 } 9330 } 9331 else 9332 at_loop = loop; 9333 9334 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 9335 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); 9336 9337 tree bump; 9338 tree vec_offset = NULL_TREE; 9339 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 9340 { 9341 aggr_type = NULL_TREE; 9342 bump = NULL_TREE; 9343 } 9344 else if (memory_access_type == VMAT_GATHER_SCATTER) 9345 { 9346 aggr_type = elem_type; 9347 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info, 9348 &bump, &vec_offset); 9349 } 9350 else 9351 { 9352 if (memory_access_type == VMAT_LOAD_STORE_LANES) 9353 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 9354 else 9355 aggr_type = vectype; 9356 bump = vect_get_data_ptr_increment (dr_info, aggr_type, 9357 memory_access_type); 9358 } 9359 9360 tree vec_mask = NULL_TREE; 9361 prev_stmt_info = NULL; 9362 poly_uint64 group_elt = 0; 9363 for (j = 0; j < ncopies; j++) 9364 { 9365 stmt_vec_info new_stmt_info = NULL; 9366 /* 1. Create the vector or array pointer update chain. */ 9367 if (j == 0) 9368 { 9369 bool simd_lane_access_p 9370 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0; 9371 if (simd_lane_access_p 9372 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR 9373 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0)) 9374 && integer_zerop (get_dr_vinfo_offset (first_dr_info)) 9375 && integer_zerop (DR_INIT (first_dr_info->dr)) 9376 && alias_sets_conflict_p (get_alias_set (aggr_type), 9377 get_alias_set (TREE_TYPE (ref_type))) 9378 && (alignment_support_scheme == dr_aligned 9379 || alignment_support_scheme == dr_unaligned_supported)) 9380 { 9381 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr)); 9382 dataref_offset = build_int_cst (ref_type, 0); 9383 } 9384 else if (diff_first_stmt_info) 9385 { 9386 dataref_ptr 9387 = vect_create_data_ref_ptr (first_stmt_info_for_drptr, 9388 aggr_type, at_loop, offset, &dummy, 9389 gsi, &ptr_incr, simd_lane_access_p, 9390 byte_offset, bump); 9391 /* Adjust the pointer by the difference to first_stmt. */ 9392 data_reference_p ptrdr 9393 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr); 9394 tree diff 9395 = fold_convert (sizetype, 9396 size_binop (MINUS_EXPR, 9397 DR_INIT (first_dr_info->dr), 9398 DR_INIT (ptrdr))); 9399 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 9400 stmt_info, diff); 9401 if (alignment_support_scheme == dr_explicit_realign) 9402 { 9403 msq = vect_setup_realignment (first_stmt_info_for_drptr, gsi, 9404 &realignment_token, 9405 alignment_support_scheme, 9406 dataref_ptr, &at_loop); 9407 gcc_assert (!compute_in_loop); 9408 } 9409 } 9410 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 9411 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info, 9412 &dataref_ptr, &vec_offset); 9413 else 9414 dataref_ptr 9415 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop, 9416 offset, &dummy, gsi, &ptr_incr, 9417 simd_lane_access_p, 9418 byte_offset, bump); 9419 if (mask) 9420 { 9421 if (slp_node) 9422 { 9423 auto_vec<vec<tree> > vec_defs (1); 9424 vect_get_slp_defs (slp_node, &vec_defs); 9425 vec_mask = vec_defs[0][0]; 9426 } 9427 else 9428 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info, 9429 mask_vectype); 9430 } 9431 } 9432 else 9433 { 9434 if (dataref_offset) 9435 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, 9436 bump); 9437 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 9438 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset); 9439 else 9440 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 9441 stmt_info, bump); 9442 if (mask) 9443 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask); 9444 } 9445 9446 if (grouped_load || slp_perm) 9447 dr_chain.create (vec_num); 9448 9449 if (memory_access_type == VMAT_LOAD_STORE_LANES) 9450 { 9451 tree vec_array; 9452 9453 vec_array = create_vector_array (vectype, vec_num); 9454 9455 tree final_mask = NULL_TREE; 9456 if (loop_masks) 9457 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies, 9458 vectype, j); 9459 if (vec_mask) 9460 final_mask = prepare_load_store_mask (mask_vectype, final_mask, 9461 vec_mask, gsi); 9462 9463 gcall *call; 9464 if (final_mask) 9465 { 9466 /* Emit: 9467 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR, 9468 VEC_MASK). */ 9469 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); 9470 tree alias_ptr = build_int_cst (ref_type, align); 9471 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, 9472 dataref_ptr, alias_ptr, 9473 final_mask); 9474 } 9475 else 9476 { 9477 /* Emit: 9478 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ 9479 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type); 9480 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); 9481 } 9482 gimple_call_set_lhs (call, vec_array); 9483 gimple_call_set_nothrow (call, true); 9484 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi); 9485 9486 /* Extract each vector into an SSA_NAME. */ 9487 for (i = 0; i < vec_num; i++) 9488 { 9489 new_temp = read_vector_array (stmt_info, gsi, scalar_dest, 9490 vec_array, i); 9491 dr_chain.quick_push (new_temp); 9492 } 9493 9494 /* Record the mapping between SSA_NAMEs and statements. */ 9495 vect_record_grouped_load_vectors (stmt_info, dr_chain); 9496 9497 /* Record that VEC_ARRAY is now dead. */ 9498 vect_clobber_variable (stmt_info, gsi, vec_array); 9499 } 9500 else 9501 { 9502 for (i = 0; i < vec_num; i++) 9503 { 9504 tree final_mask = NULL_TREE; 9505 if (loop_masks 9506 && memory_access_type != VMAT_INVARIANT) 9507 final_mask = vect_get_loop_mask (gsi, loop_masks, 9508 vec_num * ncopies, 9509 vectype, vec_num * j + i); 9510 if (vec_mask) 9511 final_mask = prepare_load_store_mask (mask_vectype, final_mask, 9512 vec_mask, gsi); 9513 9514 if (i > 0) 9515 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 9516 stmt_info, bump); 9517 9518 /* 2. Create the vector-load in the loop. */ 9519 gimple *new_stmt = NULL; 9520 switch (alignment_support_scheme) 9521 { 9522 case dr_aligned: 9523 case dr_unaligned_supported: 9524 { 9525 unsigned int misalign; 9526 unsigned HOST_WIDE_INT align; 9527 9528 if (memory_access_type == VMAT_GATHER_SCATTER) 9529 { 9530 tree zero = build_zero_cst (vectype); 9531 tree scale = size_int (gs_info.scale); 9532 gcall *call; 9533 if (loop_masks) 9534 call = gimple_build_call_internal 9535 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr, 9536 vec_offset, scale, zero, final_mask); 9537 else 9538 call = gimple_build_call_internal 9539 (IFN_GATHER_LOAD, 4, dataref_ptr, 9540 vec_offset, scale, zero); 9541 gimple_call_set_nothrow (call, true); 9542 new_stmt = call; 9543 data_ref = NULL_TREE; 9544 break; 9545 } 9546 9547 align = 9548 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); 9549 if (alignment_support_scheme == dr_aligned) 9550 { 9551 gcc_assert (aligned_access_p (first_dr_info)); 9552 misalign = 0; 9553 } 9554 else if (DR_MISALIGNMENT (first_dr_info) == -1) 9555 { 9556 align = dr_alignment 9557 (vect_dr_behavior (first_dr_info)); 9558 misalign = 0; 9559 } 9560 else 9561 misalign = DR_MISALIGNMENT (first_dr_info); 9562 if (dataref_offset == NULL_TREE 9563 && TREE_CODE (dataref_ptr) == SSA_NAME) 9564 set_ptr_info_alignment (get_ptr_info (dataref_ptr), 9565 align, misalign); 9566 align = least_bit_hwi (misalign | align); 9567 9568 if (final_mask) 9569 { 9570 tree ptr = build_int_cst (ref_type, 9571 align * BITS_PER_UNIT); 9572 gcall *call 9573 = gimple_build_call_internal (IFN_MASK_LOAD, 3, 9574 dataref_ptr, ptr, 9575 final_mask); 9576 gimple_call_set_nothrow (call, true); 9577 new_stmt = call; 9578 data_ref = NULL_TREE; 9579 } 9580 else 9581 { 9582 tree ltype = vectype; 9583 tree new_vtype = NULL_TREE; 9584 /* If there's no peeling for gaps but we have a gap 9585 with slp loads then load the lower half of the 9586 vector only. See get_group_load_store_type for 9587 when we apply this optimization. */ 9588 if (slp 9589 && loop_vinfo 9590 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) 9591 && DR_GROUP_GAP (first_stmt_info) != 0 9592 && known_eq (nunits, 9593 (group_size 9594 - DR_GROUP_GAP (first_stmt_info)) * 2) 9595 && known_eq (nunits, group_size)) 9596 { 9597 tree half_vtype; 9598 new_vtype 9599 = vector_vector_composition_type (vectype, 2, 9600 &half_vtype); 9601 if (new_vtype != NULL_TREE) 9602 ltype = half_vtype; 9603 } 9604 tree offset 9605 = (dataref_offset ? dataref_offset 9606 : build_int_cst (ref_type, 0)); 9607 if (ltype != vectype 9608 && memory_access_type == VMAT_CONTIGUOUS_REVERSE) 9609 { 9610 unsigned HOST_WIDE_INT gap 9611 = DR_GROUP_GAP (first_stmt_info); 9612 gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type)); 9613 tree gapcst = build_int_cst (ref_type, gap); 9614 offset = size_binop (PLUS_EXPR, offset, gapcst); 9615 } 9616 data_ref 9617 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset); 9618 if (alignment_support_scheme == dr_aligned) 9619 ; 9620 else 9621 TREE_TYPE (data_ref) 9622 = build_aligned_type (TREE_TYPE (data_ref), 9623 align * BITS_PER_UNIT); 9624 if (ltype != vectype) 9625 { 9626 vect_copy_ref_info (data_ref, 9627 DR_REF (first_dr_info->dr)); 9628 tree tem = make_ssa_name (ltype); 9629 new_stmt = gimple_build_assign (tem, data_ref); 9630 vect_finish_stmt_generation (stmt_info, new_stmt, 9631 gsi); 9632 data_ref = NULL; 9633 vec<constructor_elt, va_gc> *v; 9634 vec_alloc (v, 2); 9635 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 9636 { 9637 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, 9638 build_zero_cst (ltype)); 9639 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); 9640 } 9641 else 9642 { 9643 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); 9644 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, 9645 build_zero_cst (ltype)); 9646 } 9647 gcc_assert (new_vtype != NULL_TREE); 9648 if (new_vtype == vectype) 9649 new_stmt = gimple_build_assign ( 9650 vec_dest, build_constructor (vectype, v)); 9651 else 9652 { 9653 tree new_vname = make_ssa_name (new_vtype); 9654 new_stmt = gimple_build_assign ( 9655 new_vname, build_constructor (new_vtype, v)); 9656 vect_finish_stmt_generation (stmt_info, 9657 new_stmt, gsi); 9658 new_stmt = gimple_build_assign ( 9659 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype, 9660 new_vname)); 9661 } 9662 } 9663 } 9664 break; 9665 } 9666 case dr_explicit_realign: 9667 { 9668 tree ptr, bump; 9669 9670 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype)); 9671 9672 if (compute_in_loop) 9673 msq = vect_setup_realignment (first_stmt_info, gsi, 9674 &realignment_token, 9675 dr_explicit_realign, 9676 dataref_ptr, NULL); 9677 9678 if (TREE_CODE (dataref_ptr) == SSA_NAME) 9679 ptr = copy_ssa_name (dataref_ptr); 9680 else 9681 ptr = make_ssa_name (TREE_TYPE (dataref_ptr)); 9682 // For explicit realign the target alignment should be 9683 // known at compile time. 9684 unsigned HOST_WIDE_INT align = 9685 DR_TARGET_ALIGNMENT (first_dr_info).to_constant (); 9686 new_stmt = gimple_build_assign 9687 (ptr, BIT_AND_EXPR, dataref_ptr, 9688 build_int_cst 9689 (TREE_TYPE (dataref_ptr), 9690 -(HOST_WIDE_INT) align)); 9691 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 9692 data_ref 9693 = build2 (MEM_REF, vectype, ptr, 9694 build_int_cst (ref_type, 0)); 9695 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); 9696 vec_dest = vect_create_destination_var (scalar_dest, 9697 vectype); 9698 new_stmt = gimple_build_assign (vec_dest, data_ref); 9699 new_temp = make_ssa_name (vec_dest, new_stmt); 9700 gimple_assign_set_lhs (new_stmt, new_temp); 9701 gimple_move_vops (new_stmt, stmt_info->stmt); 9702 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 9703 msq = new_temp; 9704 9705 bump = size_binop (MULT_EXPR, vs, 9706 TYPE_SIZE_UNIT (elem_type)); 9707 bump = size_binop (MINUS_EXPR, bump, size_one_node); 9708 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, 9709 stmt_info, bump); 9710 new_stmt = gimple_build_assign 9711 (NULL_TREE, BIT_AND_EXPR, ptr, 9712 build_int_cst 9713 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align)); 9714 ptr = copy_ssa_name (ptr, new_stmt); 9715 gimple_assign_set_lhs (new_stmt, ptr); 9716 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 9717 data_ref 9718 = build2 (MEM_REF, vectype, ptr, 9719 build_int_cst (ref_type, 0)); 9720 break; 9721 } 9722 case dr_explicit_realign_optimized: 9723 { 9724 if (TREE_CODE (dataref_ptr) == SSA_NAME) 9725 new_temp = copy_ssa_name (dataref_ptr); 9726 else 9727 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr)); 9728 // We should only be doing this if we know the target 9729 // alignment at compile time. 9730 unsigned HOST_WIDE_INT align = 9731 DR_TARGET_ALIGNMENT (first_dr_info).to_constant (); 9732 new_stmt = gimple_build_assign 9733 (new_temp, BIT_AND_EXPR, dataref_ptr, 9734 build_int_cst (TREE_TYPE (dataref_ptr), 9735 -(HOST_WIDE_INT) align)); 9736 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 9737 data_ref 9738 = build2 (MEM_REF, vectype, new_temp, 9739 build_int_cst (ref_type, 0)); 9740 break; 9741 } 9742 default: 9743 gcc_unreachable (); 9744 } 9745 vec_dest = vect_create_destination_var (scalar_dest, vectype); 9746 /* DATA_REF is null if we've already built the statement. */ 9747 if (data_ref) 9748 { 9749 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); 9750 new_stmt = gimple_build_assign (vec_dest, data_ref); 9751 } 9752 new_temp = make_ssa_name (vec_dest, new_stmt); 9753 gimple_set_lhs (new_stmt, new_temp); 9754 new_stmt_info 9755 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 9756 9757 /* 3. Handle explicit realignment if necessary/supported. 9758 Create in loop: 9759 vec_dest = realign_load (msq, lsq, realignment_token) */ 9760 if (alignment_support_scheme == dr_explicit_realign_optimized 9761 || alignment_support_scheme == dr_explicit_realign) 9762 { 9763 lsq = gimple_assign_lhs (new_stmt); 9764 if (!realignment_token) 9765 realignment_token = dataref_ptr; 9766 vec_dest = vect_create_destination_var (scalar_dest, vectype); 9767 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, 9768 msq, lsq, realignment_token); 9769 new_temp = make_ssa_name (vec_dest, new_stmt); 9770 gimple_assign_set_lhs (new_stmt, new_temp); 9771 new_stmt_info 9772 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 9773 9774 if (alignment_support_scheme == dr_explicit_realign_optimized) 9775 { 9776 gcc_assert (phi); 9777 if (i == vec_num - 1 && j == ncopies - 1) 9778 add_phi_arg (phi, lsq, 9779 loop_latch_edge (containing_loop), 9780 UNKNOWN_LOCATION); 9781 msq = lsq; 9782 } 9783 } 9784 9785 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 9786 { 9787 tree perm_mask = perm_mask_for_reverse (vectype); 9788 new_temp = permute_vec_elements (new_temp, new_temp, 9789 perm_mask, stmt_info, gsi); 9790 new_stmt_info = vinfo->lookup_def (new_temp); 9791 } 9792 9793 /* Collect vector loads and later create their permutation in 9794 vect_transform_grouped_load (). */ 9795 if (grouped_load || slp_perm) 9796 dr_chain.quick_push (new_temp); 9797 9798 /* Store vector loads in the corresponding SLP_NODE. */ 9799 if (slp && !slp_perm) 9800 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 9801 9802 /* With SLP permutation we load the gaps as well, without 9803 we need to skip the gaps after we manage to fully load 9804 all elements. group_gap_adj is DR_GROUP_SIZE here. */ 9805 group_elt += nunits; 9806 if (maybe_ne (group_gap_adj, 0U) 9807 && !slp_perm 9808 && known_eq (group_elt, group_size - group_gap_adj)) 9809 { 9810 poly_wide_int bump_val 9811 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) 9812 * group_gap_adj); 9813 tree bump = wide_int_to_tree (sizetype, bump_val); 9814 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 9815 stmt_info, bump); 9816 group_elt = 0; 9817 } 9818 } 9819 /* Bump the vector pointer to account for a gap or for excess 9820 elements loaded for a permuted SLP load. */ 9821 if (maybe_ne (group_gap_adj, 0U) && slp_perm) 9822 { 9823 poly_wide_int bump_val 9824 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) 9825 * group_gap_adj); 9826 tree bump = wide_int_to_tree (sizetype, bump_val); 9827 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 9828 stmt_info, bump); 9829 } 9830 } 9831 9832 if (slp && !slp_perm) 9833 continue; 9834 9835 if (slp_perm) 9836 { 9837 unsigned n_perms; 9838 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf, 9839 slp_node_instance, false, 9840 &n_perms)) 9841 { 9842 dr_chain.release (); 9843 return false; 9844 } 9845 } 9846 else 9847 { 9848 if (grouped_load) 9849 { 9850 if (memory_access_type != VMAT_LOAD_STORE_LANES) 9851 vect_transform_grouped_load (stmt_info, dr_chain, 9852 group_size, gsi); 9853 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 9854 } 9855 else 9856 { 9857 if (j == 0) 9858 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 9859 else 9860 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 9861 prev_stmt_info = new_stmt_info; 9862 } 9863 } 9864 dr_chain.release (); 9865 } 9866 9867 return true; 9868 } 9869 9870 /* Function vect_is_simple_cond. 9871 9872 Input: 9873 LOOP - the loop that is being vectorized. 9874 COND - Condition that is checked for simple use. 9875 9876 Output: 9877 *COMP_VECTYPE - the vector type for the comparison. 9878 *DTS - The def types for the arguments of the comparison 9879 9880 Returns whether a COND can be vectorized. Checks whether 9881 condition operands are supportable using vec_is_simple_use. */ 9882 9883 static bool 9884 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node, 9885 tree *comp_vectype, enum vect_def_type *dts, 9886 tree vectype) 9887 { 9888 tree lhs, rhs; 9889 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 9890 9891 /* Mask case. */ 9892 if (TREE_CODE (cond) == SSA_NAME 9893 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond))) 9894 { 9895 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype) 9896 || !*comp_vectype 9897 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype)) 9898 return false; 9899 return true; 9900 } 9901 9902 if (!COMPARISON_CLASS_P (cond)) 9903 return false; 9904 9905 lhs = TREE_OPERAND (cond, 0); 9906 rhs = TREE_OPERAND (cond, 1); 9907 9908 if (TREE_CODE (lhs) == SSA_NAME) 9909 { 9910 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1)) 9911 return false; 9912 } 9913 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST 9914 || TREE_CODE (lhs) == FIXED_CST) 9915 dts[0] = vect_constant_def; 9916 else 9917 return false; 9918 9919 if (TREE_CODE (rhs) == SSA_NAME) 9920 { 9921 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2)) 9922 return false; 9923 } 9924 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST 9925 || TREE_CODE (rhs) == FIXED_CST) 9926 dts[1] = vect_constant_def; 9927 else 9928 return false; 9929 9930 if (vectype1 && vectype2 9931 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), 9932 TYPE_VECTOR_SUBPARTS (vectype2))) 9933 return false; 9934 9935 *comp_vectype = vectype1 ? vectype1 : vectype2; 9936 /* Invariant comparison. */ 9937 if (! *comp_vectype) 9938 { 9939 tree scalar_type = TREE_TYPE (lhs); 9940 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)) 9941 *comp_vectype = truth_type_for (vectype); 9942 else 9943 { 9944 /* If we can widen the comparison to match vectype do so. */ 9945 if (INTEGRAL_TYPE_P (scalar_type) 9946 && !slp_node 9947 && tree_int_cst_lt (TYPE_SIZE (scalar_type), 9948 TYPE_SIZE (TREE_TYPE (vectype)))) 9949 scalar_type = build_nonstandard_integer_type 9950 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))), 9951 TYPE_UNSIGNED (scalar_type)); 9952 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, 9953 slp_node); 9954 } 9955 } 9956 9957 return true; 9958 } 9959 9960 /* vectorizable_condition. 9961 9962 Check if STMT_INFO is conditional modify expression that can be vectorized. 9963 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 9964 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 9965 at GSI. 9966 9967 When STMT_INFO is vectorized as a nested cycle, for_reduction is true. 9968 9969 Return true if STMT_INFO is vectorizable in this way. */ 9970 9971 static bool 9972 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 9973 stmt_vec_info *vec_stmt, 9974 slp_tree slp_node, stmt_vector_for_cost *cost_vec) 9975 { 9976 vec_info *vinfo = stmt_info->vinfo; 9977 tree scalar_dest = NULL_TREE; 9978 tree vec_dest = NULL_TREE; 9979 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE; 9980 tree then_clause, else_clause; 9981 tree comp_vectype = NULL_TREE; 9982 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE; 9983 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE; 9984 tree vec_compare; 9985 tree new_temp; 9986 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 9987 enum vect_def_type dts[4] 9988 = {vect_unknown_def_type, vect_unknown_def_type, 9989 vect_unknown_def_type, vect_unknown_def_type}; 9990 int ndts = 4; 9991 int ncopies; 9992 int vec_num; 9993 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR; 9994 stmt_vec_info prev_stmt_info = NULL; 9995 int i, j; 9996 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 9997 vec<tree> vec_oprnds0 = vNULL; 9998 vec<tree> vec_oprnds1 = vNULL; 9999 vec<tree> vec_oprnds2 = vNULL; 10000 vec<tree> vec_oprnds3 = vNULL; 10001 tree vec_cmp_type; 10002 bool masked = false; 10003 10004 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 10005 return false; 10006 10007 /* Is vectorizable conditional operation? */ 10008 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 10009 if (!stmt) 10010 return false; 10011 10012 code = gimple_assign_rhs_code (stmt); 10013 if (code != COND_EXPR) 10014 return false; 10015 10016 stmt_vec_info reduc_info = NULL; 10017 int reduc_index = -1; 10018 vect_reduction_type reduction_type = TREE_CODE_REDUCTION; 10019 bool for_reduction 10020 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL; 10021 if (for_reduction) 10022 { 10023 if (STMT_SLP_TYPE (stmt_info)) 10024 return false; 10025 reduc_info = info_for_reduction (stmt_info); 10026 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); 10027 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info); 10028 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION 10029 || reduc_index != -1); 10030 } 10031 else 10032 { 10033 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 10034 return false; 10035 10036 /* FORNOW: only supported as part of a reduction. */ 10037 if (STMT_VINFO_LIVE_P (stmt_info)) 10038 { 10039 if (dump_enabled_p ()) 10040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10041 "value used after loop.\n"); 10042 return false; 10043 } 10044 } 10045 10046 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 10047 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 10048 10049 if (slp_node) 10050 { 10051 ncopies = 1; 10052 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 10053 } 10054 else 10055 { 10056 ncopies = vect_get_num_copies (loop_vinfo, vectype); 10057 vec_num = 1; 10058 } 10059 10060 gcc_assert (ncopies >= 1); 10061 if (for_reduction && ncopies > 1) 10062 return false; /* FORNOW */ 10063 10064 cond_expr = gimple_assign_rhs1 (stmt); 10065 then_clause = gimple_assign_rhs2 (stmt); 10066 else_clause = gimple_assign_rhs3 (stmt); 10067 10068 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node, 10069 &comp_vectype, &dts[0], vectype) 10070 || !comp_vectype) 10071 return false; 10072 10073 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1)) 10074 return false; 10075 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2)) 10076 return false; 10077 10078 if (vectype1 && !useless_type_conversion_p (vectype, vectype1)) 10079 return false; 10080 10081 if (vectype2 && !useless_type_conversion_p (vectype, vectype2)) 10082 return false; 10083 10084 masked = !COMPARISON_CLASS_P (cond_expr); 10085 vec_cmp_type = truth_type_for (comp_vectype); 10086 10087 if (vec_cmp_type == NULL_TREE) 10088 return false; 10089 10090 cond_code = TREE_CODE (cond_expr); 10091 if (!masked) 10092 { 10093 cond_expr0 = TREE_OPERAND (cond_expr, 0); 10094 cond_expr1 = TREE_OPERAND (cond_expr, 1); 10095 } 10096 10097 /* For conditional reductions, the "then" value needs to be the candidate 10098 value calculated by this iteration while the "else" value needs to be 10099 the result carried over from previous iterations. If the COND_EXPR 10100 is the other way around, we need to swap it. */ 10101 bool must_invert_cmp_result = false; 10102 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1) 10103 { 10104 if (masked) 10105 must_invert_cmp_result = true; 10106 else 10107 { 10108 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0)); 10109 tree_code new_code = invert_tree_comparison (cond_code, honor_nans); 10110 if (new_code == ERROR_MARK) 10111 must_invert_cmp_result = true; 10112 else 10113 { 10114 cond_code = new_code; 10115 /* Make sure we don't accidentally use the old condition. */ 10116 cond_expr = NULL_TREE; 10117 } 10118 } 10119 std::swap (then_clause, else_clause); 10120 } 10121 10122 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype)) 10123 { 10124 /* Boolean values may have another representation in vectors 10125 and therefore we prefer bit operations over comparison for 10126 them (which also works for scalar masks). We store opcodes 10127 to use in bitop1 and bitop2. Statement is vectorized as 10128 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2) 10129 depending on bitop1 and bitop2 arity. */ 10130 switch (cond_code) 10131 { 10132 case GT_EXPR: 10133 bitop1 = BIT_NOT_EXPR; 10134 bitop2 = BIT_AND_EXPR; 10135 break; 10136 case GE_EXPR: 10137 bitop1 = BIT_NOT_EXPR; 10138 bitop2 = BIT_IOR_EXPR; 10139 break; 10140 case LT_EXPR: 10141 bitop1 = BIT_NOT_EXPR; 10142 bitop2 = BIT_AND_EXPR; 10143 std::swap (cond_expr0, cond_expr1); 10144 break; 10145 case LE_EXPR: 10146 bitop1 = BIT_NOT_EXPR; 10147 bitop2 = BIT_IOR_EXPR; 10148 std::swap (cond_expr0, cond_expr1); 10149 break; 10150 case NE_EXPR: 10151 bitop1 = BIT_XOR_EXPR; 10152 break; 10153 case EQ_EXPR: 10154 bitop1 = BIT_XOR_EXPR; 10155 bitop2 = BIT_NOT_EXPR; 10156 break; 10157 default: 10158 return false; 10159 } 10160 cond_code = SSA_NAME; 10161 } 10162 10163 if (TREE_CODE_CLASS (cond_code) == tcc_comparison 10164 && reduction_type == EXTRACT_LAST_REDUCTION 10165 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code)) 10166 { 10167 if (dump_enabled_p ()) 10168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10169 "reduction comparison operation not supported.\n"); 10170 return false; 10171 } 10172 10173 if (!vec_stmt) 10174 { 10175 if (bitop1 != NOP_EXPR) 10176 { 10177 machine_mode mode = TYPE_MODE (comp_vectype); 10178 optab optab; 10179 10180 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default); 10181 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 10182 return false; 10183 10184 if (bitop2 != NOP_EXPR) 10185 { 10186 optab = optab_for_tree_code (bitop2, comp_vectype, 10187 optab_default); 10188 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 10189 return false; 10190 } 10191 } 10192 10193 if (loop_vinfo 10194 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) 10195 && reduction_type == EXTRACT_LAST_REDUCTION) 10196 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo), 10197 ncopies * vec_num, vectype, NULL); 10198 10199 vect_cost_for_stmt kind = vector_stmt; 10200 if (reduction_type == EXTRACT_LAST_REDUCTION) 10201 /* Count one reduction-like operation per vector. */ 10202 kind = vec_to_scalar; 10203 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) 10204 return false; 10205 10206 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 10207 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node, 10208 cost_vec, kind); 10209 return true; 10210 } 10211 10212 /* Transform. */ 10213 10214 if (!slp_node) 10215 { 10216 vec_oprnds0.create (1); 10217 vec_oprnds1.create (1); 10218 vec_oprnds2.create (1); 10219 vec_oprnds3.create (1); 10220 } 10221 10222 /* Handle def. */ 10223 scalar_dest = gimple_assign_lhs (stmt); 10224 if (reduction_type != EXTRACT_LAST_REDUCTION) 10225 vec_dest = vect_create_destination_var (scalar_dest, vectype); 10226 10227 /* Handle cond expr. */ 10228 for (j = 0; j < ncopies; j++) 10229 { 10230 bool swap_cond_operands = false; 10231 10232 /* See whether another part of the vectorized code applies a loop 10233 mask to the condition, or to its inverse. */ 10234 10235 vec_loop_masks *masks = NULL; 10236 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) 10237 { 10238 if (reduction_type == EXTRACT_LAST_REDUCTION) 10239 masks = &LOOP_VINFO_MASKS (loop_vinfo); 10240 else 10241 { 10242 scalar_cond_masked_key cond (cond_expr, ncopies); 10243 if (loop_vinfo->scalar_cond_masked_set.contains (cond)) 10244 masks = &LOOP_VINFO_MASKS (loop_vinfo); 10245 else 10246 { 10247 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0)); 10248 cond.code = invert_tree_comparison (cond.code, honor_nans); 10249 if (loop_vinfo->scalar_cond_masked_set.contains (cond)) 10250 { 10251 masks = &LOOP_VINFO_MASKS (loop_vinfo); 10252 cond_code = cond.code; 10253 swap_cond_operands = true; 10254 } 10255 } 10256 } 10257 } 10258 10259 stmt_vec_info new_stmt_info = NULL; 10260 if (j == 0) 10261 { 10262 if (slp_node) 10263 { 10264 auto_vec<vec<tree>, 4> vec_defs; 10265 vect_get_slp_defs (slp_node, &vec_defs); 10266 vec_oprnds3 = vec_defs.pop (); 10267 vec_oprnds2 = vec_defs.pop (); 10268 if (!masked) 10269 vec_oprnds1 = vec_defs.pop (); 10270 vec_oprnds0 = vec_defs.pop (); 10271 } 10272 else 10273 { 10274 if (masked) 10275 { 10276 vec_cond_lhs 10277 = vect_get_vec_def_for_operand (cond_expr, stmt_info, 10278 comp_vectype); 10279 } 10280 else 10281 { 10282 vec_cond_lhs 10283 = vect_get_vec_def_for_operand (cond_expr0, 10284 stmt_info, comp_vectype); 10285 vec_cond_rhs 10286 = vect_get_vec_def_for_operand (cond_expr1, 10287 stmt_info, comp_vectype); 10288 } 10289 vec_then_clause = vect_get_vec_def_for_operand (then_clause, 10290 stmt_info); 10291 if (reduction_type != EXTRACT_LAST_REDUCTION) 10292 vec_else_clause = vect_get_vec_def_for_operand (else_clause, 10293 stmt_info); 10294 } 10295 } 10296 else 10297 { 10298 vec_cond_lhs 10299 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ()); 10300 if (!masked) 10301 vec_cond_rhs 10302 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ()); 10303 10304 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo, 10305 vec_oprnds2.pop ()); 10306 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo, 10307 vec_oprnds3.pop ()); 10308 } 10309 10310 if (!slp_node) 10311 { 10312 vec_oprnds0.quick_push (vec_cond_lhs); 10313 if (!masked) 10314 vec_oprnds1.quick_push (vec_cond_rhs); 10315 vec_oprnds2.quick_push (vec_then_clause); 10316 vec_oprnds3.quick_push (vec_else_clause); 10317 } 10318 10319 /* Arguments are ready. Create the new vector stmt. */ 10320 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs) 10321 { 10322 vec_then_clause = vec_oprnds2[i]; 10323 vec_else_clause = vec_oprnds3[i]; 10324 10325 if (swap_cond_operands) 10326 std::swap (vec_then_clause, vec_else_clause); 10327 10328 if (masked) 10329 vec_compare = vec_cond_lhs; 10330 else 10331 { 10332 vec_cond_rhs = vec_oprnds1[i]; 10333 if (bitop1 == NOP_EXPR) 10334 vec_compare = build2 (cond_code, vec_cmp_type, 10335 vec_cond_lhs, vec_cond_rhs); 10336 else 10337 { 10338 new_temp = make_ssa_name (vec_cmp_type); 10339 gassign *new_stmt; 10340 if (bitop1 == BIT_NOT_EXPR) 10341 new_stmt = gimple_build_assign (new_temp, bitop1, 10342 vec_cond_rhs); 10343 else 10344 new_stmt 10345 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs, 10346 vec_cond_rhs); 10347 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10348 if (bitop2 == NOP_EXPR) 10349 vec_compare = new_temp; 10350 else if (bitop2 == BIT_NOT_EXPR) 10351 { 10352 /* Instead of doing ~x ? y : z do x ? z : y. */ 10353 vec_compare = new_temp; 10354 std::swap (vec_then_clause, vec_else_clause); 10355 } 10356 else 10357 { 10358 vec_compare = make_ssa_name (vec_cmp_type); 10359 new_stmt 10360 = gimple_build_assign (vec_compare, bitop2, 10361 vec_cond_lhs, new_temp); 10362 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10363 } 10364 } 10365 } 10366 10367 /* If we decided to apply a loop mask to the result of the vector 10368 comparison, AND the comparison with the mask now. Later passes 10369 should then be able to reuse the AND results between mulitple 10370 vector statements. 10371 10372 For example: 10373 for (int i = 0; i < 100; ++i) 10374 x[i] = y[i] ? z[i] : 10; 10375 10376 results in following optimized GIMPLE: 10377 10378 mask__35.8_43 = vect__4.7_41 != { 0, ... }; 10379 vec_mask_and_46 = loop_mask_40 & mask__35.8_43; 10380 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B]; 10381 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46); 10382 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46, 10383 vect_iftmp.11_47, { 10, ... }>; 10384 10385 instead of using a masked and unmasked forms of 10386 vec != { 0, ... } (masked in the MASK_LOAD, 10387 unmasked in the VEC_COND_EXPR). */ 10388 10389 /* Force vec_compare to be an SSA_NAME rather than a comparison, 10390 in cases where that's necessary. */ 10391 10392 if (masks || reduction_type == EXTRACT_LAST_REDUCTION) 10393 { 10394 if (!is_gimple_val (vec_compare)) 10395 { 10396 tree vec_compare_name = make_ssa_name (vec_cmp_type); 10397 gassign *new_stmt = gimple_build_assign (vec_compare_name, 10398 vec_compare); 10399 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10400 vec_compare = vec_compare_name; 10401 } 10402 10403 if (must_invert_cmp_result) 10404 { 10405 tree vec_compare_name = make_ssa_name (vec_cmp_type); 10406 gassign *new_stmt = gimple_build_assign (vec_compare_name, 10407 BIT_NOT_EXPR, 10408 vec_compare); 10409 vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10410 vec_compare = vec_compare_name; 10411 } 10412 10413 if (masks) 10414 { 10415 unsigned vec_num = vec_oprnds0.length (); 10416 tree loop_mask 10417 = vect_get_loop_mask (gsi, masks, vec_num * ncopies, 10418 vectype, vec_num * j + i); 10419 tree tmp2 = make_ssa_name (vec_cmp_type); 10420 gassign *g 10421 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare, 10422 loop_mask); 10423 vect_finish_stmt_generation (stmt_info, g, gsi); 10424 vec_compare = tmp2; 10425 } 10426 } 10427 10428 if (reduction_type == EXTRACT_LAST_REDUCTION) 10429 { 10430 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt; 10431 tree lhs = gimple_get_lhs (old_stmt); 10432 gcall *new_stmt = gimple_build_call_internal 10433 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, 10434 vec_then_clause); 10435 gimple_call_set_lhs (new_stmt, lhs); 10436 SSA_NAME_DEF_STMT (lhs) = new_stmt; 10437 if (old_stmt == gsi_stmt (*gsi)) 10438 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt); 10439 else 10440 { 10441 /* In this case we're moving the definition to later in the 10442 block. That doesn't matter because the only uses of the 10443 lhs are in phi statements. */ 10444 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt); 10445 gsi_remove (&old_gsi, true); 10446 new_stmt_info 10447 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10448 } 10449 } 10450 else 10451 { 10452 new_temp = make_ssa_name (vec_dest); 10453 gassign *new_stmt 10454 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare, 10455 vec_then_clause, vec_else_clause); 10456 new_stmt_info 10457 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10458 } 10459 if (slp_node) 10460 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 10461 } 10462 10463 if (slp_node) 10464 continue; 10465 10466 if (j == 0) 10467 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 10468 else 10469 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 10470 10471 prev_stmt_info = new_stmt_info; 10472 } 10473 10474 vec_oprnds0.release (); 10475 vec_oprnds1.release (); 10476 vec_oprnds2.release (); 10477 vec_oprnds3.release (); 10478 10479 return true; 10480 } 10481 10482 /* vectorizable_comparison. 10483 10484 Check if STMT_INFO is comparison expression that can be vectorized. 10485 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized 10486 comparison, put it in VEC_STMT, and insert it at GSI. 10487 10488 Return true if STMT_INFO is vectorizable in this way. */ 10489 10490 static bool 10491 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 10492 stmt_vec_info *vec_stmt, 10493 slp_tree slp_node, stmt_vector_for_cost *cost_vec) 10494 { 10495 vec_info *vinfo = stmt_info->vinfo; 10496 tree lhs, rhs1, rhs2; 10497 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 10498 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 10499 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE; 10500 tree new_temp; 10501 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 10502 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type}; 10503 int ndts = 2; 10504 poly_uint64 nunits; 10505 int ncopies; 10506 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR; 10507 stmt_vec_info prev_stmt_info = NULL; 10508 int i, j; 10509 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 10510 vec<tree> vec_oprnds0 = vNULL; 10511 vec<tree> vec_oprnds1 = vNULL; 10512 tree mask_type; 10513 tree mask; 10514 10515 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 10516 return false; 10517 10518 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)) 10519 return false; 10520 10521 mask_type = vectype; 10522 nunits = TYPE_VECTOR_SUBPARTS (vectype); 10523 10524 if (slp_node) 10525 ncopies = 1; 10526 else 10527 ncopies = vect_get_num_copies (loop_vinfo, vectype); 10528 10529 gcc_assert (ncopies >= 1); 10530 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) 10531 return false; 10532 10533 if (STMT_VINFO_LIVE_P (stmt_info)) 10534 { 10535 if (dump_enabled_p ()) 10536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10537 "value used after loop.\n"); 10538 return false; 10539 } 10540 10541 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); 10542 if (!stmt) 10543 return false; 10544 10545 code = gimple_assign_rhs_code (stmt); 10546 10547 if (TREE_CODE_CLASS (code) != tcc_comparison) 10548 return false; 10549 10550 rhs1 = gimple_assign_rhs1 (stmt); 10551 rhs2 = gimple_assign_rhs2 (stmt); 10552 10553 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1)) 10554 return false; 10555 10556 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2)) 10557 return false; 10558 10559 if (vectype1 && vectype2 10560 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), 10561 TYPE_VECTOR_SUBPARTS (vectype2))) 10562 return false; 10563 10564 vectype = vectype1 ? vectype1 : vectype2; 10565 10566 /* Invariant comparison. */ 10567 if (!vectype) 10568 { 10569 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) 10570 vectype = mask_type; 10571 else 10572 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), 10573 slp_node); 10574 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits)) 10575 return false; 10576 } 10577 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype))) 10578 return false; 10579 10580 /* Can't compare mask and non-mask types. */ 10581 if (vectype1 && vectype2 10582 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2))) 10583 return false; 10584 10585 /* Boolean values may have another representation in vectors 10586 and therefore we prefer bit operations over comparison for 10587 them (which also works for scalar masks). We store opcodes 10588 to use in bitop1 and bitop2. Statement is vectorized as 10589 BITOP2 (rhs1 BITOP1 rhs2) or 10590 rhs1 BITOP2 (BITOP1 rhs2) 10591 depending on bitop1 and bitop2 arity. */ 10592 bool swap_p = false; 10593 if (VECTOR_BOOLEAN_TYPE_P (vectype)) 10594 { 10595 if (code == GT_EXPR) 10596 { 10597 bitop1 = BIT_NOT_EXPR; 10598 bitop2 = BIT_AND_EXPR; 10599 } 10600 else if (code == GE_EXPR) 10601 { 10602 bitop1 = BIT_NOT_EXPR; 10603 bitop2 = BIT_IOR_EXPR; 10604 } 10605 else if (code == LT_EXPR) 10606 { 10607 bitop1 = BIT_NOT_EXPR; 10608 bitop2 = BIT_AND_EXPR; 10609 swap_p = true; 10610 } 10611 else if (code == LE_EXPR) 10612 { 10613 bitop1 = BIT_NOT_EXPR; 10614 bitop2 = BIT_IOR_EXPR; 10615 swap_p = true; 10616 } 10617 else 10618 { 10619 bitop1 = BIT_XOR_EXPR; 10620 if (code == EQ_EXPR) 10621 bitop2 = BIT_NOT_EXPR; 10622 } 10623 } 10624 10625 if (!vec_stmt) 10626 { 10627 if (bitop1 == NOP_EXPR) 10628 { 10629 if (!expand_vec_cmp_expr_p (vectype, mask_type, code)) 10630 return false; 10631 } 10632 else 10633 { 10634 machine_mode mode = TYPE_MODE (vectype); 10635 optab optab; 10636 10637 optab = optab_for_tree_code (bitop1, vectype, optab_default); 10638 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 10639 return false; 10640 10641 if (bitop2 != NOP_EXPR) 10642 { 10643 optab = optab_for_tree_code (bitop2, vectype, optab_default); 10644 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 10645 return false; 10646 } 10647 } 10648 10649 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; 10650 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)), 10651 dts, ndts, slp_node, cost_vec); 10652 return true; 10653 } 10654 10655 /* Transform. */ 10656 if (!slp_node) 10657 { 10658 vec_oprnds0.create (1); 10659 vec_oprnds1.create (1); 10660 } 10661 10662 /* Handle def. */ 10663 lhs = gimple_assign_lhs (stmt); 10664 mask = vect_create_destination_var (lhs, mask_type); 10665 10666 /* Handle cmp expr. */ 10667 for (j = 0; j < ncopies; j++) 10668 { 10669 stmt_vec_info new_stmt_info = NULL; 10670 if (j == 0) 10671 { 10672 if (slp_node) 10673 { 10674 auto_vec<vec<tree>, 2> vec_defs; 10675 vect_get_slp_defs (slp_node, &vec_defs); 10676 vec_oprnds1 = vec_defs.pop (); 10677 vec_oprnds0 = vec_defs.pop (); 10678 if (swap_p) 10679 std::swap (vec_oprnds0, vec_oprnds1); 10680 } 10681 else 10682 { 10683 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info, 10684 vectype); 10685 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info, 10686 vectype); 10687 } 10688 } 10689 else 10690 { 10691 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo, 10692 vec_oprnds0.pop ()); 10693 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo, 10694 vec_oprnds1.pop ()); 10695 } 10696 10697 if (!slp_node) 10698 { 10699 if (swap_p && j == 0) 10700 std::swap (vec_rhs1, vec_rhs2); 10701 vec_oprnds0.quick_push (vec_rhs1); 10702 vec_oprnds1.quick_push (vec_rhs2); 10703 } 10704 10705 /* Arguments are ready. Create the new vector stmt. */ 10706 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1) 10707 { 10708 vec_rhs2 = vec_oprnds1[i]; 10709 10710 new_temp = make_ssa_name (mask); 10711 if (bitop1 == NOP_EXPR) 10712 { 10713 gassign *new_stmt = gimple_build_assign (new_temp, code, 10714 vec_rhs1, vec_rhs2); 10715 new_stmt_info 10716 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10717 } 10718 else 10719 { 10720 gassign *new_stmt; 10721 if (bitop1 == BIT_NOT_EXPR) 10722 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2); 10723 else 10724 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1, 10725 vec_rhs2); 10726 new_stmt_info 10727 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10728 if (bitop2 != NOP_EXPR) 10729 { 10730 tree res = make_ssa_name (mask); 10731 if (bitop2 == BIT_NOT_EXPR) 10732 new_stmt = gimple_build_assign (res, bitop2, new_temp); 10733 else 10734 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1, 10735 new_temp); 10736 new_stmt_info 10737 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); 10738 } 10739 } 10740 if (slp_node) 10741 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); 10742 } 10743 10744 if (slp_node) 10745 continue; 10746 10747 if (j == 0) 10748 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; 10749 else 10750 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; 10751 10752 prev_stmt_info = new_stmt_info; 10753 } 10754 10755 vec_oprnds0.release (); 10756 vec_oprnds1.release (); 10757 10758 return true; 10759 } 10760 10761 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation 10762 can handle all live statements in the node. Otherwise return true 10763 if STMT_INFO is not live or if vectorizable_live_operation can handle it. 10764 GSI and VEC_STMT_P are as for vectorizable_live_operation. */ 10765 10766 static bool 10767 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 10768 slp_tree slp_node, slp_instance slp_node_instance, 10769 bool vec_stmt_p, 10770 stmt_vector_for_cost *cost_vec) 10771 { 10772 if (slp_node) 10773 { 10774 stmt_vec_info slp_stmt_info; 10775 unsigned int i; 10776 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info) 10777 { 10778 if (STMT_VINFO_LIVE_P (slp_stmt_info) 10779 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, 10780 slp_node_instance, i, 10781 vec_stmt_p, cost_vec)) 10782 return false; 10783 } 10784 } 10785 else if (STMT_VINFO_LIVE_P (stmt_info) 10786 && !vectorizable_live_operation (stmt_info, gsi, slp_node, 10787 slp_node_instance, -1, 10788 vec_stmt_p, cost_vec)) 10789 return false; 10790 10791 return true; 10792 } 10793 10794 /* Make sure the statement is vectorizable. */ 10795 10796 opt_result 10797 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize, 10798 slp_tree node, slp_instance node_instance, 10799 stmt_vector_for_cost *cost_vec) 10800 { 10801 vec_info *vinfo = stmt_info->vinfo; 10802 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 10803 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); 10804 bool ok; 10805 gimple_seq pattern_def_seq; 10806 10807 if (dump_enabled_p ()) 10808 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", 10809 stmt_info->stmt); 10810 10811 if (gimple_has_volatile_ops (stmt_info->stmt)) 10812 return opt_result::failure_at (stmt_info->stmt, 10813 "not vectorized:" 10814 " stmt has volatile operands: %G\n", 10815 stmt_info->stmt); 10816 10817 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 10818 && node == NULL 10819 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) 10820 { 10821 gimple_stmt_iterator si; 10822 10823 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si)) 10824 { 10825 stmt_vec_info pattern_def_stmt_info 10826 = vinfo->lookup_stmt (gsi_stmt (si)); 10827 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info) 10828 || STMT_VINFO_LIVE_P (pattern_def_stmt_info)) 10829 { 10830 /* Analyze def stmt of STMT if it's a pattern stmt. */ 10831 if (dump_enabled_p ()) 10832 dump_printf_loc (MSG_NOTE, vect_location, 10833 "==> examining pattern def statement: %G", 10834 pattern_def_stmt_info->stmt); 10835 10836 opt_result res 10837 = vect_analyze_stmt (pattern_def_stmt_info, 10838 need_to_vectorize, node, node_instance, 10839 cost_vec); 10840 if (!res) 10841 return res; 10842 } 10843 } 10844 } 10845 10846 /* Skip stmts that do not need to be vectorized. In loops this is expected 10847 to include: 10848 - the COND_EXPR which is the loop exit condition 10849 - any LABEL_EXPRs in the loop 10850 - computations that are used only for array indexing or loop control. 10851 In basic blocks we only analyze statements that are a part of some SLP 10852 instance, therefore, all the statements are relevant. 10853 10854 Pattern statement needs to be analyzed instead of the original statement 10855 if the original statement is not relevant. Otherwise, we analyze both 10856 statements. In basic blocks we are called from some SLP instance 10857 traversal, don't analyze pattern stmts instead, the pattern stmts 10858 already will be part of SLP instance. */ 10859 10860 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); 10861 if (!STMT_VINFO_RELEVANT_P (stmt_info) 10862 && !STMT_VINFO_LIVE_P (stmt_info)) 10863 { 10864 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 10865 && pattern_stmt_info 10866 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info) 10867 || STMT_VINFO_LIVE_P (pattern_stmt_info))) 10868 { 10869 /* Analyze PATTERN_STMT instead of the original stmt. */ 10870 stmt_info = pattern_stmt_info; 10871 if (dump_enabled_p ()) 10872 dump_printf_loc (MSG_NOTE, vect_location, 10873 "==> examining pattern statement: %G", 10874 stmt_info->stmt); 10875 } 10876 else 10877 { 10878 if (dump_enabled_p ()) 10879 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n"); 10880 10881 return opt_result::success (); 10882 } 10883 } 10884 else if (STMT_VINFO_IN_PATTERN_P (stmt_info) 10885 && node == NULL 10886 && pattern_stmt_info 10887 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info) 10888 || STMT_VINFO_LIVE_P (pattern_stmt_info))) 10889 { 10890 /* Analyze PATTERN_STMT too. */ 10891 if (dump_enabled_p ()) 10892 dump_printf_loc (MSG_NOTE, vect_location, 10893 "==> examining pattern statement: %G", 10894 pattern_stmt_info->stmt); 10895 10896 opt_result res 10897 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node, 10898 node_instance, cost_vec); 10899 if (!res) 10900 return res; 10901 } 10902 10903 switch (STMT_VINFO_DEF_TYPE (stmt_info)) 10904 { 10905 case vect_internal_def: 10906 break; 10907 10908 case vect_reduction_def: 10909 case vect_nested_cycle: 10910 gcc_assert (!bb_vinfo 10911 && (relevance == vect_used_in_outer 10912 || relevance == vect_used_in_outer_by_reduction 10913 || relevance == vect_used_by_reduction 10914 || relevance == vect_unused_in_scope 10915 || relevance == vect_used_only_live)); 10916 break; 10917 10918 case vect_induction_def: 10919 gcc_assert (!bb_vinfo); 10920 break; 10921 10922 case vect_constant_def: 10923 case vect_external_def: 10924 case vect_unknown_def_type: 10925 default: 10926 gcc_unreachable (); 10927 } 10928 10929 if (STMT_VINFO_RELEVANT_P (stmt_info)) 10930 { 10931 tree type = gimple_expr_type (stmt_info->stmt); 10932 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type))); 10933 gcall *call = dyn_cast <gcall *> (stmt_info->stmt); 10934 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) 10935 || (call && gimple_call_lhs (call) == NULL_TREE)); 10936 *need_to_vectorize = true; 10937 } 10938 10939 if (PURE_SLP_STMT (stmt_info) && !node) 10940 { 10941 if (dump_enabled_p ()) 10942 dump_printf_loc (MSG_NOTE, vect_location, 10943 "handled only by SLP analysis\n"); 10944 return opt_result::success (); 10945 } 10946 10947 ok = true; 10948 if (!bb_vinfo 10949 && (STMT_VINFO_RELEVANT_P (stmt_info) 10950 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) 10951 /* Prefer vectorizable_call over vectorizable_simd_clone_call so 10952 -mveclibabi= takes preference over library functions with 10953 the simd attribute. */ 10954 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec) 10955 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node, 10956 cost_vec) 10957 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec) 10958 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec) 10959 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec) 10960 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance, 10961 cost_vec) 10962 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec) 10963 || vectorizable_reduction (stmt_info, node, node_instance, cost_vec) 10964 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec) 10965 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec) 10966 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec) 10967 || vectorizable_comparison (stmt_info, NULL, NULL, node, 10968 cost_vec) 10969 || vectorizable_lc_phi (stmt_info, NULL, node)); 10970 else 10971 { 10972 if (bb_vinfo) 10973 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec) 10974 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node, 10975 cost_vec) 10976 || vectorizable_conversion (stmt_info, NULL, NULL, node, 10977 cost_vec) 10978 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec) 10979 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec) 10980 || vectorizable_assignment (stmt_info, NULL, NULL, node, 10981 cost_vec) 10982 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance, 10983 cost_vec) 10984 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec) 10985 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec) 10986 || vectorizable_comparison (stmt_info, NULL, NULL, node, 10987 cost_vec)); 10988 } 10989 10990 if (!ok) 10991 return opt_result::failure_at (stmt_info->stmt, 10992 "not vectorized:" 10993 " relevant stmt not supported: %G", 10994 stmt_info->stmt); 10995 10996 /* Stmts that are (also) "live" (i.e. - that are used out of the loop) 10997 need extra handling, except for vectorizable reductions. */ 10998 if (!bb_vinfo 10999 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 11000 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type 11001 && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance, 11002 false, cost_vec)) 11003 return opt_result::failure_at (stmt_info->stmt, 11004 "not vectorized:" 11005 " live stmt not supported: %G", 11006 stmt_info->stmt); 11007 11008 return opt_result::success (); 11009 } 11010 11011 11012 /* Function vect_transform_stmt. 11013 11014 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */ 11015 11016 bool 11017 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, 11018 slp_tree slp_node, slp_instance slp_node_instance) 11019 { 11020 vec_info *vinfo = stmt_info->vinfo; 11021 bool is_store = false; 11022 stmt_vec_info vec_stmt = NULL; 11023 bool done; 11024 11025 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info)); 11026 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info); 11027 11028 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info) 11029 && nested_in_vect_loop_p 11030 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)), 11031 stmt_info)); 11032 11033 gimple *stmt = stmt_info->stmt; 11034 switch (STMT_VINFO_TYPE (stmt_info)) 11035 { 11036 case type_demotion_vec_info_type: 11037 case type_promotion_vec_info_type: 11038 case type_conversion_vec_info_type: 11039 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node, 11040 NULL); 11041 gcc_assert (done); 11042 break; 11043 11044 case induc_vec_info_type: 11045 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node, 11046 NULL); 11047 gcc_assert (done); 11048 break; 11049 11050 case shift_vec_info_type: 11051 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL); 11052 gcc_assert (done); 11053 break; 11054 11055 case op_vec_info_type: 11056 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node, 11057 NULL); 11058 gcc_assert (done); 11059 break; 11060 11061 case assignment_vec_info_type: 11062 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node, 11063 NULL); 11064 gcc_assert (done); 11065 break; 11066 11067 case load_vec_info_type: 11068 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node, 11069 slp_node_instance, NULL); 11070 gcc_assert (done); 11071 break; 11072 11073 case store_vec_info_type: 11074 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL); 11075 gcc_assert (done); 11076 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node) 11077 { 11078 /* In case of interleaving, the whole chain is vectorized when the 11079 last store in the chain is reached. Store stmts before the last 11080 one are skipped, and there vec_stmt_info shouldn't be freed 11081 meanwhile. */ 11082 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info); 11083 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info)) 11084 is_store = true; 11085 } 11086 else 11087 is_store = true; 11088 break; 11089 11090 case condition_vec_info_type: 11091 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL); 11092 gcc_assert (done); 11093 break; 11094 11095 case comparison_vec_info_type: 11096 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt, 11097 slp_node, NULL); 11098 gcc_assert (done); 11099 break; 11100 11101 case call_vec_info_type: 11102 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL); 11103 stmt = gsi_stmt (*gsi); 11104 break; 11105 11106 case call_simd_clone_vec_info_type: 11107 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt, 11108 slp_node, NULL); 11109 stmt = gsi_stmt (*gsi); 11110 break; 11111 11112 case reduc_vec_info_type: 11113 done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node); 11114 gcc_assert (done); 11115 break; 11116 11117 case cycle_phi_info_type: 11118 done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node, 11119 slp_node_instance); 11120 gcc_assert (done); 11121 break; 11122 11123 case lc_phi_info_type: 11124 done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node); 11125 gcc_assert (done); 11126 break; 11127 11128 default: 11129 if (!STMT_VINFO_LIVE_P (stmt_info)) 11130 { 11131 if (dump_enabled_p ()) 11132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 11133 "stmt not supported.\n"); 11134 gcc_unreachable (); 11135 } 11136 } 11137 11138 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT. 11139 This would break hybrid SLP vectorization. */ 11140 if (slp_node) 11141 gcc_assert (!vec_stmt 11142 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info); 11143 11144 /* Handle inner-loop stmts whose DEF is used in the loop-nest that 11145 is being vectorized, but outside the immediately enclosing loop. */ 11146 if (vec_stmt 11147 && nested_p 11148 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 11149 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer 11150 || STMT_VINFO_RELEVANT (stmt_info) == 11151 vect_used_in_outer_by_reduction)) 11152 { 11153 class loop *innerloop = LOOP_VINFO_LOOP ( 11154 STMT_VINFO_LOOP_VINFO (stmt_info))->inner; 11155 imm_use_iterator imm_iter; 11156 use_operand_p use_p; 11157 tree scalar_dest; 11158 11159 if (dump_enabled_p ()) 11160 dump_printf_loc (MSG_NOTE, vect_location, 11161 "Record the vdef for outer-loop vectorization.\n"); 11162 11163 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there 11164 (to be used when vectorizing outer-loop stmts that use the DEF of 11165 STMT). */ 11166 if (gimple_code (stmt) == GIMPLE_PHI) 11167 scalar_dest = PHI_RESULT (stmt); 11168 else 11169 scalar_dest = gimple_get_lhs (stmt); 11170 11171 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 11172 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p)))) 11173 { 11174 stmt_vec_info exit_phi_info 11175 = vinfo->lookup_stmt (USE_STMT (use_p)); 11176 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt; 11177 } 11178 } 11179 11180 if (vec_stmt) 11181 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 11182 11183 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) 11184 return is_store; 11185 11186 /* Handle stmts whose DEF is used outside the loop-nest that is 11187 being vectorized. */ 11188 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, 11189 slp_node_instance, true, NULL); 11190 gcc_assert (done); 11191 11192 return false; 11193 } 11194 11195 11196 /* Remove a group of stores (for SLP or interleaving), free their 11197 stmt_vec_info. */ 11198 11199 void 11200 vect_remove_stores (stmt_vec_info first_stmt_info) 11201 { 11202 vec_info *vinfo = first_stmt_info->vinfo; 11203 stmt_vec_info next_stmt_info = first_stmt_info; 11204 11205 while (next_stmt_info) 11206 { 11207 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info); 11208 next_stmt_info = vect_orig_stmt (next_stmt_info); 11209 /* Free the attached stmt_vec_info and remove the stmt. */ 11210 vinfo->remove_stmt (next_stmt_info); 11211 next_stmt_info = tmp; 11212 } 11213 } 11214 11215 /* If NUNITS is nonzero, return a vector type that contains NUNITS 11216 elements of type SCALAR_TYPE, or null if the target doesn't support 11217 such a type. 11218 11219 If NUNITS is zero, return a vector type that contains elements of 11220 type SCALAR_TYPE, choosing whichever vector size the target prefers. 11221 11222 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode 11223 for this vectorization region and want to "autodetect" the best choice. 11224 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE 11225 and we want the new type to be interoperable with it. PREVAILING_MODE 11226 in this case can be a scalar integer mode or a vector mode; when it 11227 is a vector mode, the function acts like a tree-level version of 11228 related_vector_mode. */ 11229 11230 tree 11231 get_related_vectype_for_scalar_type (machine_mode prevailing_mode, 11232 tree scalar_type, poly_uint64 nunits) 11233 { 11234 tree orig_scalar_type = scalar_type; 11235 scalar_mode inner_mode; 11236 machine_mode simd_mode; 11237 tree vectype; 11238 11239 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode) 11240 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)) 11241 return NULL_TREE; 11242 11243 unsigned int nbytes = GET_MODE_SIZE (inner_mode); 11244 11245 /* For vector types of elements whose mode precision doesn't 11246 match their types precision we use a element type of mode 11247 precision. The vectorization routines will have to make sure 11248 they support the proper result truncation/extension. 11249 We also make sure to build vector types with INTEGER_TYPE 11250 component type only. */ 11251 if (INTEGRAL_TYPE_P (scalar_type) 11252 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type) 11253 || TREE_CODE (scalar_type) != INTEGER_TYPE)) 11254 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode), 11255 TYPE_UNSIGNED (scalar_type)); 11256 11257 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components. 11258 When the component mode passes the above test simply use a type 11259 corresponding to that mode. The theory is that any use that 11260 would cause problems with this will disable vectorization anyway. */ 11261 else if (!SCALAR_FLOAT_TYPE_P (scalar_type) 11262 && !INTEGRAL_TYPE_P (scalar_type)) 11263 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); 11264 11265 /* We can't build a vector type of elements with alignment bigger than 11266 their size. */ 11267 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) 11268 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 11269 TYPE_UNSIGNED (scalar_type)); 11270 11271 /* If we felt back to using the mode fail if there was 11272 no scalar type for it. */ 11273 if (scalar_type == NULL_TREE) 11274 return NULL_TREE; 11275 11276 /* If no prevailing mode was supplied, use the mode the target prefers. 11277 Otherwise lookup a vector mode based on the prevailing mode. */ 11278 if (prevailing_mode == VOIDmode) 11279 { 11280 gcc_assert (known_eq (nunits, 0U)); 11281 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode); 11282 if (SCALAR_INT_MODE_P (simd_mode)) 11283 { 11284 /* Traditional behavior is not to take the integer mode 11285 literally, but simply to use it as a way of determining 11286 the vector size. It is up to mode_for_vector to decide 11287 what the TYPE_MODE should be. 11288 11289 Note that nunits == 1 is allowed in order to support single 11290 element vector types. */ 11291 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits) 11292 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode)) 11293 return NULL_TREE; 11294 } 11295 } 11296 else if (SCALAR_INT_MODE_P (prevailing_mode) 11297 || !related_vector_mode (prevailing_mode, 11298 inner_mode, nunits).exists (&simd_mode)) 11299 { 11300 /* Fall back to using mode_for_vector, mostly in the hope of being 11301 able to use an integer mode. */ 11302 if (known_eq (nunits, 0U) 11303 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits)) 11304 return NULL_TREE; 11305 11306 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode)) 11307 return NULL_TREE; 11308 } 11309 11310 vectype = build_vector_type_for_mode (scalar_type, simd_mode); 11311 11312 /* In cases where the mode was chosen by mode_for_vector, check that 11313 the target actually supports the chosen mode, or that it at least 11314 allows the vector mode to be replaced by a like-sized integer. */ 11315 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 11316 && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) 11317 return NULL_TREE; 11318 11319 /* Re-attach the address-space qualifier if we canonicalized the scalar 11320 type. */ 11321 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype)) 11322 return build_qualified_type 11323 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type))); 11324 11325 return vectype; 11326 } 11327 11328 /* Function get_vectype_for_scalar_type. 11329 11330 Returns the vector type corresponding to SCALAR_TYPE as supported 11331 by the target. If GROUP_SIZE is nonzero and we're performing BB 11332 vectorization, make sure that the number of elements in the vector 11333 is no bigger than GROUP_SIZE. */ 11334 11335 tree 11336 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, 11337 unsigned int group_size) 11338 { 11339 /* For BB vectorization, we should always have a group size once we've 11340 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs 11341 are tentative requests during things like early data reference 11342 analysis and pattern recognition. */ 11343 if (is_a <bb_vec_info> (vinfo)) 11344 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0); 11345 else 11346 group_size = 0; 11347 11348 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, 11349 scalar_type); 11350 if (vectype && vinfo->vector_mode == VOIDmode) 11351 vinfo->vector_mode = TYPE_MODE (vectype); 11352 11353 /* Register the natural choice of vector type, before the group size 11354 has been applied. */ 11355 if (vectype) 11356 vinfo->used_vector_modes.add (TYPE_MODE (vectype)); 11357 11358 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE, 11359 try again with an explicit number of elements. */ 11360 if (vectype 11361 && group_size 11362 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size)) 11363 { 11364 /* Start with the biggest number of units that fits within 11365 GROUP_SIZE and halve it until we find a valid vector type. 11366 Usually either the first attempt will succeed or all will 11367 fail (in the latter case because GROUP_SIZE is too small 11368 for the target), but it's possible that a target could have 11369 a hole between supported vector types. 11370 11371 If GROUP_SIZE is not a power of 2, this has the effect of 11372 trying the largest power of 2 that fits within the group, 11373 even though the group is not a multiple of that vector size. 11374 The BB vectorizer will then try to carve up the group into 11375 smaller pieces. */ 11376 unsigned int nunits = 1 << floor_log2 (group_size); 11377 do 11378 { 11379 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, 11380 scalar_type, nunits); 11381 nunits /= 2; 11382 } 11383 while (nunits > 1 && !vectype); 11384 } 11385 11386 return vectype; 11387 } 11388 11389 /* Return the vector type corresponding to SCALAR_TYPE as supported 11390 by the target. NODE, if nonnull, is the SLP tree node that will 11391 use the returned vector type. */ 11392 11393 tree 11394 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node) 11395 { 11396 unsigned int group_size = 0; 11397 if (node) 11398 { 11399 group_size = SLP_TREE_SCALAR_OPS (node).length (); 11400 if (group_size == 0) 11401 group_size = SLP_TREE_SCALAR_STMTS (node).length (); 11402 } 11403 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size); 11404 } 11405 11406 /* Function get_mask_type_for_scalar_type. 11407 11408 Returns the mask type corresponding to a result of comparison 11409 of vectors of specified SCALAR_TYPE as supported by target. 11410 If GROUP_SIZE is nonzero and we're performing BB vectorization, 11411 make sure that the number of elements in the vector is no bigger 11412 than GROUP_SIZE. */ 11413 11414 tree 11415 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type, 11416 unsigned int group_size) 11417 { 11418 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); 11419 11420 if (!vectype) 11421 return NULL; 11422 11423 return truth_type_for (vectype); 11424 } 11425 11426 /* Function get_same_sized_vectype 11427 11428 Returns a vector type corresponding to SCALAR_TYPE of size 11429 VECTOR_TYPE if supported by the target. */ 11430 11431 tree 11432 get_same_sized_vectype (tree scalar_type, tree vector_type) 11433 { 11434 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)) 11435 return truth_type_for (vector_type); 11436 11437 poly_uint64 nunits; 11438 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)), 11439 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits)) 11440 return NULL_TREE; 11441 11442 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type), 11443 scalar_type, nunits); 11444 } 11445 11446 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE 11447 would not change the chosen vector modes. */ 11448 11449 bool 11450 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode) 11451 { 11452 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin (); 11453 i != vinfo->used_vector_modes.end (); ++i) 11454 if (!VECTOR_MODE_P (*i) 11455 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i) 11456 return false; 11457 return true; 11458 } 11459 11460 /* Function vect_is_simple_use. 11461 11462 Input: 11463 VINFO - the vect info of the loop or basic block that is being vectorized. 11464 OPERAND - operand in the loop or bb. 11465 Output: 11466 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in 11467 case OPERAND is an SSA_NAME that is defined in the vectorizable region 11468 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME; 11469 the definition could be anywhere in the function 11470 DT - the type of definition 11471 11472 Returns whether a stmt with OPERAND can be vectorized. 11473 For loops, supportable operands are constants, loop invariants, and operands 11474 that are defined by the current iteration of the loop. Unsupportable 11475 operands are those that are defined by a previous iteration of the loop (as 11476 is the case in reduction/induction computations). 11477 For basic blocks, supportable operands are constants and bb invariants. 11478 For now, operands defined outside the basic block are not supported. */ 11479 11480 bool 11481 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, 11482 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out) 11483 { 11484 if (def_stmt_info_out) 11485 *def_stmt_info_out = NULL; 11486 if (def_stmt_out) 11487 *def_stmt_out = NULL; 11488 *dt = vect_unknown_def_type; 11489 11490 if (dump_enabled_p ()) 11491 { 11492 dump_printf_loc (MSG_NOTE, vect_location, 11493 "vect_is_simple_use: operand "); 11494 if (TREE_CODE (operand) == SSA_NAME 11495 && !SSA_NAME_IS_DEFAULT_DEF (operand)) 11496 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0); 11497 else 11498 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand); 11499 } 11500 11501 if (CONSTANT_CLASS_P (operand)) 11502 *dt = vect_constant_def; 11503 else if (is_gimple_min_invariant (operand)) 11504 *dt = vect_external_def; 11505 else if (TREE_CODE (operand) != SSA_NAME) 11506 *dt = vect_unknown_def_type; 11507 else if (SSA_NAME_IS_DEFAULT_DEF (operand)) 11508 *dt = vect_external_def; 11509 else 11510 { 11511 gimple *def_stmt = SSA_NAME_DEF_STMT (operand); 11512 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand); 11513 if (!stmt_vinfo) 11514 *dt = vect_external_def; 11515 else 11516 { 11517 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo); 11518 def_stmt = stmt_vinfo->stmt; 11519 switch (gimple_code (def_stmt)) 11520 { 11521 case GIMPLE_PHI: 11522 case GIMPLE_ASSIGN: 11523 case GIMPLE_CALL: 11524 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); 11525 break; 11526 default: 11527 *dt = vect_unknown_def_type; 11528 break; 11529 } 11530 if (def_stmt_info_out) 11531 *def_stmt_info_out = stmt_vinfo; 11532 } 11533 if (def_stmt_out) 11534 *def_stmt_out = def_stmt; 11535 } 11536 11537 if (dump_enabled_p ()) 11538 { 11539 dump_printf (MSG_NOTE, ", type of def: "); 11540 switch (*dt) 11541 { 11542 case vect_uninitialized_def: 11543 dump_printf (MSG_NOTE, "uninitialized\n"); 11544 break; 11545 case vect_constant_def: 11546 dump_printf (MSG_NOTE, "constant\n"); 11547 break; 11548 case vect_external_def: 11549 dump_printf (MSG_NOTE, "external\n"); 11550 break; 11551 case vect_internal_def: 11552 dump_printf (MSG_NOTE, "internal\n"); 11553 break; 11554 case vect_induction_def: 11555 dump_printf (MSG_NOTE, "induction\n"); 11556 break; 11557 case vect_reduction_def: 11558 dump_printf (MSG_NOTE, "reduction\n"); 11559 break; 11560 case vect_double_reduction_def: 11561 dump_printf (MSG_NOTE, "double reduction\n"); 11562 break; 11563 case vect_nested_cycle: 11564 dump_printf (MSG_NOTE, "nested cycle\n"); 11565 break; 11566 case vect_unknown_def_type: 11567 dump_printf (MSG_NOTE, "unknown\n"); 11568 break; 11569 } 11570 } 11571 11572 if (*dt == vect_unknown_def_type) 11573 { 11574 if (dump_enabled_p ()) 11575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 11576 "Unsupported pattern.\n"); 11577 return false; 11578 } 11579 11580 return true; 11581 } 11582 11583 /* Function vect_is_simple_use. 11584 11585 Same as vect_is_simple_use but also determines the vector operand 11586 type of OPERAND and stores it to *VECTYPE. If the definition of 11587 OPERAND is vect_uninitialized_def, vect_constant_def or 11588 vect_external_def *VECTYPE will be set to NULL_TREE and the caller 11589 is responsible to compute the best suited vector type for the 11590 scalar operand. */ 11591 11592 bool 11593 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, 11594 tree *vectype, stmt_vec_info *def_stmt_info_out, 11595 gimple **def_stmt_out) 11596 { 11597 stmt_vec_info def_stmt_info; 11598 gimple *def_stmt; 11599 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt)) 11600 return false; 11601 11602 if (def_stmt_out) 11603 *def_stmt_out = def_stmt; 11604 if (def_stmt_info_out) 11605 *def_stmt_info_out = def_stmt_info; 11606 11607 /* Now get a vector type if the def is internal, otherwise supply 11608 NULL_TREE and leave it up to the caller to figure out a proper 11609 type for the use stmt. */ 11610 if (*dt == vect_internal_def 11611 || *dt == vect_induction_def 11612 || *dt == vect_reduction_def 11613 || *dt == vect_double_reduction_def 11614 || *dt == vect_nested_cycle) 11615 { 11616 *vectype = STMT_VINFO_VECTYPE (def_stmt_info); 11617 gcc_assert (*vectype != NULL_TREE); 11618 if (dump_enabled_p ()) 11619 dump_printf_loc (MSG_NOTE, vect_location, 11620 "vect_is_simple_use: vectype %T\n", *vectype); 11621 } 11622 else if (*dt == vect_uninitialized_def 11623 || *dt == vect_constant_def 11624 || *dt == vect_external_def) 11625 *vectype = NULL_TREE; 11626 else 11627 gcc_unreachable (); 11628 11629 return true; 11630 } 11631 11632 11633 /* Function supportable_widening_operation 11634 11635 Check whether an operation represented by the code CODE is a 11636 widening operation that is supported by the target platform in 11637 vector form (i.e., when operating on arguments of type VECTYPE_IN 11638 producing a result of type VECTYPE_OUT). 11639 11640 Widening operations we currently support are NOP (CONVERT), FLOAT, 11641 FIX_TRUNC and WIDEN_MULT. This function checks if these operations 11642 are supported by the target platform either directly (via vector 11643 tree-codes), or via target builtins. 11644 11645 Output: 11646 - CODE1 and CODE2 are codes of vector operations to be used when 11647 vectorizing the operation, if available. 11648 - MULTI_STEP_CVT determines the number of required intermediate steps in 11649 case of multi-step conversion (like char->short->int - in that case 11650 MULTI_STEP_CVT will be 1). 11651 - INTERM_TYPES contains the intermediate type required to perform the 11652 widening operation (short in the above example). */ 11653 11654 bool 11655 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info, 11656 tree vectype_out, tree vectype_in, 11657 enum tree_code *code1, enum tree_code *code2, 11658 int *multi_step_cvt, 11659 vec<tree> *interm_types) 11660 { 11661 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); 11662 class loop *vect_loop = NULL; 11663 machine_mode vec_mode; 11664 enum insn_code icode1, icode2; 11665 optab optab1, optab2; 11666 tree vectype = vectype_in; 11667 tree wide_vectype = vectype_out; 11668 enum tree_code c1, c2; 11669 int i; 11670 tree prev_type, intermediate_type; 11671 machine_mode intermediate_mode, prev_mode; 11672 optab optab3, optab4; 11673 11674 *multi_step_cvt = 0; 11675 if (loop_info) 11676 vect_loop = LOOP_VINFO_LOOP (loop_info); 11677 11678 switch (code) 11679 { 11680 case WIDEN_MULT_EXPR: 11681 /* The result of a vectorized widening operation usually requires 11682 two vectors (because the widened results do not fit into one vector). 11683 The generated vector results would normally be expected to be 11684 generated in the same order as in the original scalar computation, 11685 i.e. if 8 results are generated in each vector iteration, they are 11686 to be organized as follows: 11687 vect1: [res1,res2,res3,res4], 11688 vect2: [res5,res6,res7,res8]. 11689 11690 However, in the special case that the result of the widening 11691 operation is used in a reduction computation only, the order doesn't 11692 matter (because when vectorizing a reduction we change the order of 11693 the computation). Some targets can take advantage of this and 11694 generate more efficient code. For example, targets like Altivec, 11695 that support widen_mult using a sequence of {mult_even,mult_odd} 11696 generate the following vectors: 11697 vect1: [res1,res3,res5,res7], 11698 vect2: [res2,res4,res6,res8]. 11699 11700 When vectorizing outer-loops, we execute the inner-loop sequentially 11701 (each vectorized inner-loop iteration contributes to VF outer-loop 11702 iterations in parallel). We therefore don't allow to change the 11703 order of the computation in the inner-loop during outer-loop 11704 vectorization. */ 11705 /* TODO: Another case in which order doesn't *really* matter is when we 11706 widen and then contract again, e.g. (short)((int)x * y >> 8). 11707 Normally, pack_trunc performs an even/odd permute, whereas the 11708 repack from an even/odd expansion would be an interleave, which 11709 would be significantly simpler for e.g. AVX2. */ 11710 /* In any case, in order to avoid duplicating the code below, recurse 11711 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values 11712 are properly set up for the caller. If we fail, we'll continue with 11713 a VEC_WIDEN_MULT_LO/HI_EXPR check. */ 11714 if (vect_loop 11715 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction 11716 && !nested_in_vect_loop_p (vect_loop, stmt_info) 11717 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR, 11718 stmt_info, vectype_out, 11719 vectype_in, code1, code2, 11720 multi_step_cvt, interm_types)) 11721 { 11722 /* Elements in a vector with vect_used_by_reduction property cannot 11723 be reordered if the use chain with this property does not have the 11724 same operation. One such an example is s += a * b, where elements 11725 in a and b cannot be reordered. Here we check if the vector defined 11726 by STMT is only directly used in the reduction statement. */ 11727 tree lhs = gimple_assign_lhs (stmt_info->stmt); 11728 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs); 11729 if (use_stmt_info 11730 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def) 11731 return true; 11732 } 11733 c1 = VEC_WIDEN_MULT_LO_EXPR; 11734 c2 = VEC_WIDEN_MULT_HI_EXPR; 11735 break; 11736 11737 case DOT_PROD_EXPR: 11738 c1 = DOT_PROD_EXPR; 11739 c2 = DOT_PROD_EXPR; 11740 break; 11741 11742 case SAD_EXPR: 11743 c1 = SAD_EXPR; 11744 c2 = SAD_EXPR; 11745 break; 11746 11747 case VEC_WIDEN_MULT_EVEN_EXPR: 11748 /* Support the recursion induced just above. */ 11749 c1 = VEC_WIDEN_MULT_EVEN_EXPR; 11750 c2 = VEC_WIDEN_MULT_ODD_EXPR; 11751 break; 11752 11753 case WIDEN_LSHIFT_EXPR: 11754 c1 = VEC_WIDEN_LSHIFT_LO_EXPR; 11755 c2 = VEC_WIDEN_LSHIFT_HI_EXPR; 11756 break; 11757 11758 CASE_CONVERT: 11759 c1 = VEC_UNPACK_LO_EXPR; 11760 c2 = VEC_UNPACK_HI_EXPR; 11761 break; 11762 11763 case FLOAT_EXPR: 11764 c1 = VEC_UNPACK_FLOAT_LO_EXPR; 11765 c2 = VEC_UNPACK_FLOAT_HI_EXPR; 11766 break; 11767 11768 case FIX_TRUNC_EXPR: 11769 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR; 11770 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR; 11771 break; 11772 11773 default: 11774 gcc_unreachable (); 11775 } 11776 11777 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR) 11778 std::swap (c1, c2); 11779 11780 if (code == FIX_TRUNC_EXPR) 11781 { 11782 /* The signedness is determined from output operand. */ 11783 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 11784 optab2 = optab_for_tree_code (c2, vectype_out, optab_default); 11785 } 11786 else if (CONVERT_EXPR_CODE_P (code) 11787 && VECTOR_BOOLEAN_TYPE_P (wide_vectype) 11788 && VECTOR_BOOLEAN_TYPE_P (vectype) 11789 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype) 11790 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))) 11791 { 11792 /* If the input and result modes are the same, a different optab 11793 is needed where we pass in the number of units in vectype. */ 11794 optab1 = vec_unpacks_sbool_lo_optab; 11795 optab2 = vec_unpacks_sbool_hi_optab; 11796 } 11797 else 11798 { 11799 optab1 = optab_for_tree_code (c1, vectype, optab_default); 11800 optab2 = optab_for_tree_code (c2, vectype, optab_default); 11801 } 11802 11803 if (!optab1 || !optab2) 11804 return false; 11805 11806 vec_mode = TYPE_MODE (vectype); 11807 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing 11808 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing) 11809 return false; 11810 11811 *code1 = c1; 11812 *code2 = c2; 11813 11814 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 11815 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 11816 { 11817 if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 11818 return true; 11819 /* For scalar masks we may have different boolean 11820 vector types having the same QImode. Thus we 11821 add additional check for elements number. */ 11822 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 11823 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)) 11824 return true; 11825 } 11826 11827 /* Check if it's a multi-step conversion that can be done using intermediate 11828 types. */ 11829 11830 prev_type = vectype; 11831 prev_mode = vec_mode; 11832 11833 if (!CONVERT_EXPR_CODE_P (code)) 11834 return false; 11835 11836 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 11837 intermediate steps in promotion sequence. We try 11838 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do 11839 not. */ 11840 interm_types->create (MAX_INTERM_CVT_STEPS); 11841 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 11842 { 11843 intermediate_mode = insn_data[icode1].operand[0].mode; 11844 if (VECTOR_BOOLEAN_TYPE_P (prev_type)) 11845 intermediate_type 11846 = vect_halve_mask_nunits (prev_type, intermediate_mode); 11847 else 11848 intermediate_type 11849 = lang_hooks.types.type_for_mode (intermediate_mode, 11850 TYPE_UNSIGNED (prev_type)); 11851 11852 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type) 11853 && VECTOR_BOOLEAN_TYPE_P (prev_type) 11854 && intermediate_mode == prev_mode 11855 && SCALAR_INT_MODE_P (prev_mode)) 11856 { 11857 /* If the input and result modes are the same, a different optab 11858 is needed where we pass in the number of units in vectype. */ 11859 optab3 = vec_unpacks_sbool_lo_optab; 11860 optab4 = vec_unpacks_sbool_hi_optab; 11861 } 11862 else 11863 { 11864 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); 11865 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); 11866 } 11867 11868 if (!optab3 || !optab4 11869 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing 11870 || insn_data[icode1].operand[0].mode != intermediate_mode 11871 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing 11872 || insn_data[icode2].operand[0].mode != intermediate_mode 11873 || ((icode1 = optab_handler (optab3, intermediate_mode)) 11874 == CODE_FOR_nothing) 11875 || ((icode2 = optab_handler (optab4, intermediate_mode)) 11876 == CODE_FOR_nothing)) 11877 break; 11878 11879 interm_types->quick_push (intermediate_type); 11880 (*multi_step_cvt)++; 11881 11882 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 11883 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 11884 { 11885 if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 11886 return true; 11887 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type), 11888 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)) 11889 return true; 11890 } 11891 11892 prev_type = intermediate_type; 11893 prev_mode = intermediate_mode; 11894 } 11895 11896 interm_types->release (); 11897 return false; 11898 } 11899 11900 11901 /* Function supportable_narrowing_operation 11902 11903 Check whether an operation represented by the code CODE is a 11904 narrowing operation that is supported by the target platform in 11905 vector form (i.e., when operating on arguments of type VECTYPE_IN 11906 and producing a result of type VECTYPE_OUT). 11907 11908 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC 11909 and FLOAT. This function checks if these operations are supported by 11910 the target platform directly via vector tree-codes. 11911 11912 Output: 11913 - CODE1 is the code of a vector operation to be used when 11914 vectorizing the operation, if available. 11915 - MULTI_STEP_CVT determines the number of required intermediate steps in 11916 case of multi-step conversion (like int->short->char - in that case 11917 MULTI_STEP_CVT will be 1). 11918 - INTERM_TYPES contains the intermediate type required to perform the 11919 narrowing operation (short in the above example). */ 11920 11921 bool 11922 supportable_narrowing_operation (enum tree_code code, 11923 tree vectype_out, tree vectype_in, 11924 enum tree_code *code1, int *multi_step_cvt, 11925 vec<tree> *interm_types) 11926 { 11927 machine_mode vec_mode; 11928 enum insn_code icode1; 11929 optab optab1, interm_optab; 11930 tree vectype = vectype_in; 11931 tree narrow_vectype = vectype_out; 11932 enum tree_code c1; 11933 tree intermediate_type, prev_type; 11934 machine_mode intermediate_mode, prev_mode; 11935 int i; 11936 bool uns; 11937 11938 *multi_step_cvt = 0; 11939 switch (code) 11940 { 11941 CASE_CONVERT: 11942 c1 = VEC_PACK_TRUNC_EXPR; 11943 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype) 11944 && VECTOR_BOOLEAN_TYPE_P (vectype) 11945 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype) 11946 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))) 11947 optab1 = vec_pack_sbool_trunc_optab; 11948 else 11949 optab1 = optab_for_tree_code (c1, vectype, optab_default); 11950 break; 11951 11952 case FIX_TRUNC_EXPR: 11953 c1 = VEC_PACK_FIX_TRUNC_EXPR; 11954 /* The signedness is determined from output operand. */ 11955 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 11956 break; 11957 11958 case FLOAT_EXPR: 11959 c1 = VEC_PACK_FLOAT_EXPR; 11960 optab1 = optab_for_tree_code (c1, vectype, optab_default); 11961 break; 11962 11963 default: 11964 gcc_unreachable (); 11965 } 11966 11967 if (!optab1) 11968 return false; 11969 11970 vec_mode = TYPE_MODE (vectype); 11971 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing) 11972 return false; 11973 11974 *code1 = c1; 11975 11976 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 11977 { 11978 if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 11979 return true; 11980 /* For scalar masks we may have different boolean 11981 vector types having the same QImode. Thus we 11982 add additional check for elements number. */ 11983 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2, 11984 TYPE_VECTOR_SUBPARTS (narrow_vectype))) 11985 return true; 11986 } 11987 11988 if (code == FLOAT_EXPR) 11989 return false; 11990 11991 /* Check if it's a multi-step conversion that can be done using intermediate 11992 types. */ 11993 prev_mode = vec_mode; 11994 prev_type = vectype; 11995 if (code == FIX_TRUNC_EXPR) 11996 uns = TYPE_UNSIGNED (vectype_out); 11997 else 11998 uns = TYPE_UNSIGNED (vectype); 11999 12000 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer 12001 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more 12002 costly than signed. */ 12003 if (code == FIX_TRUNC_EXPR && uns) 12004 { 12005 enum insn_code icode2; 12006 12007 intermediate_type 12008 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0); 12009 interm_optab 12010 = optab_for_tree_code (c1, intermediate_type, optab_default); 12011 if (interm_optab != unknown_optab 12012 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing 12013 && insn_data[icode1].operand[0].mode 12014 == insn_data[icode2].operand[0].mode) 12015 { 12016 uns = false; 12017 optab1 = interm_optab; 12018 icode1 = icode2; 12019 } 12020 } 12021 12022 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 12023 intermediate steps in promotion sequence. We try 12024 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */ 12025 interm_types->create (MAX_INTERM_CVT_STEPS); 12026 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 12027 { 12028 intermediate_mode = insn_data[icode1].operand[0].mode; 12029 if (VECTOR_BOOLEAN_TYPE_P (prev_type)) 12030 intermediate_type 12031 = vect_double_mask_nunits (prev_type, intermediate_mode); 12032 else 12033 intermediate_type 12034 = lang_hooks.types.type_for_mode (intermediate_mode, uns); 12035 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type) 12036 && VECTOR_BOOLEAN_TYPE_P (prev_type) 12037 && intermediate_mode == prev_mode 12038 && SCALAR_INT_MODE_P (prev_mode)) 12039 interm_optab = vec_pack_sbool_trunc_optab; 12040 else 12041 interm_optab 12042 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type, 12043 optab_default); 12044 if (!interm_optab 12045 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing) 12046 || insn_data[icode1].operand[0].mode != intermediate_mode 12047 || ((icode1 = optab_handler (interm_optab, intermediate_mode)) 12048 == CODE_FOR_nothing)) 12049 break; 12050 12051 interm_types->quick_push (intermediate_type); 12052 (*multi_step_cvt)++; 12053 12054 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 12055 { 12056 if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 12057 return true; 12058 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2, 12059 TYPE_VECTOR_SUBPARTS (narrow_vectype))) 12060 return true; 12061 } 12062 12063 prev_mode = intermediate_mode; 12064 prev_type = intermediate_type; 12065 optab1 = interm_optab; 12066 } 12067 12068 interm_types->release (); 12069 return false; 12070 } 12071 12072 /* Generate and return a statement that sets vector mask MASK such that 12073 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */ 12074 12075 gcall * 12076 vect_gen_while (tree mask, tree start_index, tree end_index) 12077 { 12078 tree cmp_type = TREE_TYPE (start_index); 12079 tree mask_type = TREE_TYPE (mask); 12080 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT, 12081 cmp_type, mask_type, 12082 OPTIMIZE_FOR_SPEED)); 12083 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3, 12084 start_index, end_index, 12085 build_zero_cst (mask_type)); 12086 gimple_call_set_lhs (call, mask); 12087 return call; 12088 } 12089 12090 /* Generate a vector mask of type MASK_TYPE for which index I is false iff 12091 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */ 12092 12093 tree 12094 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index, 12095 tree end_index) 12096 { 12097 tree tmp = make_ssa_name (mask_type); 12098 gcall *call = vect_gen_while (tmp, start_index, end_index); 12099 gimple_seq_add_stmt (seq, call); 12100 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp); 12101 } 12102 12103 /* Try to compute the vector types required to vectorize STMT_INFO, 12104 returning true on success and false if vectorization isn't possible. 12105 If GROUP_SIZE is nonzero and we're performing BB vectorization, 12106 take sure that the number of elements in the vectors is no bigger 12107 than GROUP_SIZE. 12108 12109 On success: 12110 12111 - Set *STMT_VECTYPE_OUT to: 12112 - NULL_TREE if the statement doesn't need to be vectorized; 12113 - the equivalent of STMT_VINFO_VECTYPE otherwise. 12114 12115 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum 12116 number of units needed to vectorize STMT_INFO, or NULL_TREE if the 12117 statement does not help to determine the overall number of units. */ 12118 12119 opt_result 12120 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info, 12121 tree *stmt_vectype_out, 12122 tree *nunits_vectype_out, 12123 unsigned int group_size) 12124 { 12125 vec_info *vinfo = stmt_info->vinfo; 12126 gimple *stmt = stmt_info->stmt; 12127 12128 /* For BB vectorization, we should always have a group size once we've 12129 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs 12130 are tentative requests during things like early data reference 12131 analysis and pattern recognition. */ 12132 if (is_a <bb_vec_info> (vinfo)) 12133 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0); 12134 else 12135 group_size = 0; 12136 12137 *stmt_vectype_out = NULL_TREE; 12138 *nunits_vectype_out = NULL_TREE; 12139 12140 if (gimple_get_lhs (stmt) == NULL_TREE 12141 /* MASK_STORE has no lhs, but is ok. */ 12142 && !gimple_call_internal_p (stmt, IFN_MASK_STORE)) 12143 { 12144 if (is_a <gcall *> (stmt)) 12145 { 12146 /* Ignore calls with no lhs. These must be calls to 12147 #pragma omp simd functions, and what vectorization factor 12148 it really needs can't be determined until 12149 vectorizable_simd_clone_call. */ 12150 if (dump_enabled_p ()) 12151 dump_printf_loc (MSG_NOTE, vect_location, 12152 "defer to SIMD clone analysis.\n"); 12153 return opt_result::success (); 12154 } 12155 12156 return opt_result::failure_at (stmt, 12157 "not vectorized: irregular stmt.%G", stmt); 12158 } 12159 12160 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))) 12161 return opt_result::failure_at (stmt, 12162 "not vectorized: vector stmt in loop:%G", 12163 stmt); 12164 12165 tree vectype; 12166 tree scalar_type = NULL_TREE; 12167 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info)) 12168 { 12169 vectype = STMT_VINFO_VECTYPE (stmt_info); 12170 if (dump_enabled_p ()) 12171 dump_printf_loc (MSG_NOTE, vect_location, 12172 "precomputed vectype: %T\n", vectype); 12173 } 12174 else if (vect_use_mask_type_p (stmt_info)) 12175 { 12176 unsigned int precision = stmt_info->mask_precision; 12177 scalar_type = build_nonstandard_integer_type (precision, 1); 12178 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size); 12179 if (!vectype) 12180 return opt_result::failure_at (stmt, "not vectorized: unsupported" 12181 " data-type %T\n", scalar_type); 12182 if (dump_enabled_p ()) 12183 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); 12184 } 12185 else 12186 { 12187 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)) 12188 scalar_type = TREE_TYPE (DR_REF (dr)); 12189 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) 12190 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); 12191 else 12192 scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); 12193 12194 if (dump_enabled_p ()) 12195 { 12196 if (group_size) 12197 dump_printf_loc (MSG_NOTE, vect_location, 12198 "get vectype for scalar type (group size %d):" 12199 " %T\n", group_size, scalar_type); 12200 else 12201 dump_printf_loc (MSG_NOTE, vect_location, 12202 "get vectype for scalar type: %T\n", scalar_type); 12203 } 12204 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); 12205 if (!vectype) 12206 return opt_result::failure_at (stmt, 12207 "not vectorized:" 12208 " unsupported data-type %T\n", 12209 scalar_type); 12210 12211 if (dump_enabled_p ()) 12212 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); 12213 } 12214 *stmt_vectype_out = vectype; 12215 12216 /* Don't try to compute scalar types if the stmt produces a boolean 12217 vector; use the existing vector type instead. */ 12218 tree nunits_vectype = vectype; 12219 if (!VECTOR_BOOLEAN_TYPE_P (vectype)) 12220 { 12221 /* The number of units is set according to the smallest scalar 12222 type (or the largest vector size, but we only support one 12223 vector size per vectorization). */ 12224 HOST_WIDE_INT dummy; 12225 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy); 12226 if (scalar_type != TREE_TYPE (vectype)) 12227 { 12228 if (dump_enabled_p ()) 12229 dump_printf_loc (MSG_NOTE, vect_location, 12230 "get vectype for smallest scalar type: %T\n", 12231 scalar_type); 12232 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, 12233 group_size); 12234 if (!nunits_vectype) 12235 return opt_result::failure_at 12236 (stmt, "not vectorized: unsupported data-type %T\n", 12237 scalar_type); 12238 if (dump_enabled_p ()) 12239 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n", 12240 nunits_vectype); 12241 } 12242 } 12243 12244 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype), 12245 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out))) 12246 return opt_result::failure_at (stmt, 12247 "Not vectorized: Incompatible number " 12248 "of vector subparts between %T and %T\n", 12249 nunits_vectype, *stmt_vectype_out); 12250 12251 if (dump_enabled_p ()) 12252 { 12253 dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); 12254 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype)); 12255 dump_printf (MSG_NOTE, "\n"); 12256 } 12257 12258 *nunits_vectype_out = nunits_vectype; 12259 return opt_result::success (); 12260 } 12261