1 /* Statement Analysis and Transformation for Vectorization 2 Copyright (C) 2003-2018 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 and Ira Rosen <irar@il.ibm.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "target.h" 27 #include "rtl.h" 28 #include "tree.h" 29 #include "gimple.h" 30 #include "ssa.h" 31 #include "optabs-tree.h" 32 #include "insn-config.h" 33 #include "recog.h" /* FIXME: for insn_data */ 34 #include "cgraph.h" 35 #include "dumpfile.h" 36 #include "alias.h" 37 #include "fold-const.h" 38 #include "stor-layout.h" 39 #include "tree-eh.h" 40 #include "gimplify.h" 41 #include "gimple-iterator.h" 42 #include "gimplify-me.h" 43 #include "tree-cfg.h" 44 #include "tree-ssa-loop-manip.h" 45 #include "cfgloop.h" 46 #include "tree-ssa-loop.h" 47 #include "tree-scalar-evolution.h" 48 #include "tree-vectorizer.h" 49 #include "builtins.h" 50 #include "internal-fn.h" 51 #include "tree-vector-builder.h" 52 #include "vec-perm-indices.h" 53 #include "tree-ssa-loop-niter.h" 54 #include "gimple-fold.h" 55 56 /* For lang_hooks.types.type_for_mode. */ 57 #include "langhooks.h" 58 59 /* Return the vectorized type for the given statement. */ 60 61 tree 62 stmt_vectype (struct _stmt_vec_info *stmt_info) 63 { 64 return STMT_VINFO_VECTYPE (stmt_info); 65 } 66 67 /* Return TRUE iff the given statement is in an inner loop relative to 68 the loop being vectorized. */ 69 bool 70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info) 71 { 72 gimple *stmt = STMT_VINFO_STMT (stmt_info); 73 basic_block bb = gimple_bb (stmt); 74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 75 struct loop* loop; 76 77 if (!loop_vinfo) 78 return false; 79 80 loop = LOOP_VINFO_LOOP (loop_vinfo); 81 82 return (bb->loop_father == loop->inner); 83 } 84 85 /* Record the cost of a statement, either by directly informing the 86 target model or by saving it in a vector for later processing. 87 Return a preliminary estimate of the statement's cost. */ 88 89 unsigned 90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, 92 int misalign, enum vect_cost_model_location where) 93 { 94 if ((kind == vector_load || kind == unaligned_load) 95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 96 kind = vector_gather_load; 97 if ((kind == vector_store || kind == unaligned_store) 98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 99 kind = vector_scatter_store; 100 if (body_cost_vec) 101 { 102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; 103 stmt_info_for_cost si = { count, kind, 104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL, 105 misalign }; 106 body_cost_vec->safe_push (si); 107 return (unsigned) 108 (builtin_vectorization_cost (kind, vectype, misalign) * count); 109 } 110 else 111 return add_stmt_cost (stmt_info->vinfo->target_cost_data, 112 count, kind, stmt_info, misalign, where); 113 } 114 115 /* Return a variable of type ELEM_TYPE[NELEMS]. */ 116 117 static tree 118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) 119 { 120 return create_tmp_var (build_array_type_nelts (elem_type, nelems), 121 "vect_array"); 122 } 123 124 /* ARRAY is an array of vectors created by create_vector_array. 125 Return an SSA_NAME for the vector in index N. The reference 126 is part of the vectorization of STMT and the vector is associated 127 with scalar destination SCALAR_DEST. */ 128 129 static tree 130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest, 131 tree array, unsigned HOST_WIDE_INT n) 132 { 133 tree vect_type, vect, vect_name, array_ref; 134 gimple *new_stmt; 135 136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); 137 vect_type = TREE_TYPE (TREE_TYPE (array)); 138 vect = vect_create_destination_var (scalar_dest, vect_type); 139 array_ref = build4 (ARRAY_REF, vect_type, array, 140 build_int_cst (size_type_node, n), 141 NULL_TREE, NULL_TREE); 142 143 new_stmt = gimple_build_assign (vect, array_ref); 144 vect_name = make_ssa_name (vect, new_stmt); 145 gimple_assign_set_lhs (new_stmt, vect_name); 146 vect_finish_stmt_generation (stmt, new_stmt, gsi); 147 148 return vect_name; 149 } 150 151 /* ARRAY is an array of vectors created by create_vector_array. 152 Emit code to store SSA_NAME VECT in index N of the array. 153 The store is part of the vectorization of STMT. */ 154 155 static void 156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect, 157 tree array, unsigned HOST_WIDE_INT n) 158 { 159 tree array_ref; 160 gimple *new_stmt; 161 162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, 163 build_int_cst (size_type_node, n), 164 NULL_TREE, NULL_TREE); 165 166 new_stmt = gimple_build_assign (array_ref, vect); 167 vect_finish_stmt_generation (stmt, new_stmt, gsi); 168 } 169 170 /* PTR is a pointer to an array of type TYPE. Return a representation 171 of *PTR. The memory reference replaces those in FIRST_DR 172 (and its group). */ 173 174 static tree 175 create_array_ref (tree type, tree ptr, tree alias_ptr_type) 176 { 177 tree mem_ref; 178 179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); 180 /* Arrays have the same alignment as their type. */ 181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0); 182 return mem_ref; 183 } 184 185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ 186 187 /* Function vect_mark_relevant. 188 189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */ 190 191 static void 192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt, 193 enum vect_relevant relevant, bool live_p) 194 { 195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 196 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); 197 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); 198 gimple *pattern_stmt; 199 200 if (dump_enabled_p ()) 201 { 202 dump_printf_loc (MSG_NOTE, vect_location, 203 "mark relevant %d, live %d: ", relevant, live_p); 204 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 205 } 206 207 /* If this stmt is an original stmt in a pattern, we might need to mark its 208 related pattern stmt instead of the original stmt. However, such stmts 209 may have their own uses that are not in any pattern, in such cases the 210 stmt itself should be marked. */ 211 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 212 { 213 /* This is the last stmt in a sequence that was detected as a 214 pattern that can potentially be vectorized. Don't mark the stmt 215 as relevant/live because it's not going to be vectorized. 216 Instead mark the pattern-stmt that replaces it. */ 217 218 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 219 220 if (dump_enabled_p ()) 221 dump_printf_loc (MSG_NOTE, vect_location, 222 "last stmt in pattern. don't mark" 223 " relevant/live.\n"); 224 stmt_info = vinfo_for_stmt (pattern_stmt); 225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); 226 save_relevant = STMT_VINFO_RELEVANT (stmt_info); 227 save_live_p = STMT_VINFO_LIVE_P (stmt_info); 228 stmt = pattern_stmt; 229 } 230 231 STMT_VINFO_LIVE_P (stmt_info) |= live_p; 232 if (relevant > STMT_VINFO_RELEVANT (stmt_info)) 233 STMT_VINFO_RELEVANT (stmt_info) = relevant; 234 235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant 236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) 237 { 238 if (dump_enabled_p ()) 239 dump_printf_loc (MSG_NOTE, vect_location, 240 "already marked relevant/live.\n"); 241 return; 242 } 243 244 worklist->safe_push (stmt); 245 } 246 247 248 /* Function is_simple_and_all_uses_invariant 249 250 Return true if STMT is simple and all uses of it are invariant. */ 251 252 bool 253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo) 254 { 255 tree op; 256 gimple *def_stmt; 257 ssa_op_iter iter; 258 259 if (!is_gimple_assign (stmt)) 260 return false; 261 262 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) 263 { 264 enum vect_def_type dt = vect_uninitialized_def; 265 266 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt)) 267 { 268 if (dump_enabled_p ()) 269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 270 "use not simple.\n"); 271 return false; 272 } 273 274 if (dt != vect_external_def && dt != vect_constant_def) 275 return false; 276 } 277 return true; 278 } 279 280 /* Function vect_stmt_relevant_p. 281 282 Return true if STMT in loop that is represented by LOOP_VINFO is 283 "relevant for vectorization". 284 285 A stmt is considered "relevant for vectorization" if: 286 - it has uses outside the loop. 287 - it has vdefs (it alters memory). 288 - control stmts in the loop (except for the exit condition). 289 290 CHECKME: what other side effects would the vectorizer allow? */ 291 292 static bool 293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo, 294 enum vect_relevant *relevant, bool *live_p) 295 { 296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 297 ssa_op_iter op_iter; 298 imm_use_iterator imm_iter; 299 use_operand_p use_p; 300 def_operand_p def_p; 301 302 *relevant = vect_unused_in_scope; 303 *live_p = false; 304 305 /* cond stmt other than loop exit cond. */ 306 if (is_ctrl_stmt (stmt) 307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) 308 != loop_exit_ctrl_vec_info_type) 309 *relevant = vect_used_in_scope; 310 311 /* changing memory. */ 312 if (gimple_code (stmt) != GIMPLE_PHI) 313 if (gimple_vdef (stmt) 314 && !gimple_clobber_p (stmt)) 315 { 316 if (dump_enabled_p ()) 317 dump_printf_loc (MSG_NOTE, vect_location, 318 "vec_stmt_relevant_p: stmt has vdefs.\n"); 319 *relevant = vect_used_in_scope; 320 } 321 322 /* uses outside the loop. */ 323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) 324 { 325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p)) 326 { 327 basic_block bb = gimple_bb (USE_STMT (use_p)); 328 if (!flow_bb_inside_loop_p (loop, bb)) 329 { 330 if (dump_enabled_p ()) 331 dump_printf_loc (MSG_NOTE, vect_location, 332 "vec_stmt_relevant_p: used out of loop.\n"); 333 334 if (is_gimple_debug (USE_STMT (use_p))) 335 continue; 336 337 /* We expect all such uses to be in the loop exit phis 338 (because of loop closed form) */ 339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); 340 gcc_assert (bb == single_exit (loop)->dest); 341 342 *live_p = true; 343 } 344 } 345 } 346 347 if (*live_p && *relevant == vect_unused_in_scope 348 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo)) 349 { 350 if (dump_enabled_p ()) 351 dump_printf_loc (MSG_NOTE, vect_location, 352 "vec_stmt_relevant_p: stmt live but not relevant.\n"); 353 *relevant = vect_used_only_live; 354 } 355 356 return (*live_p || *relevant); 357 } 358 359 360 /* Function exist_non_indexing_operands_for_use_p 361 362 USE is one of the uses attached to STMT. Check if USE is 363 used in STMT for anything other than indexing an array. */ 364 365 static bool 366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt) 367 { 368 tree operand; 369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 370 371 /* USE corresponds to some operand in STMT. If there is no data 372 reference in STMT, then any operand that corresponds to USE 373 is not indexing an array. */ 374 if (!STMT_VINFO_DATA_REF (stmt_info)) 375 return true; 376 377 /* STMT has a data_ref. FORNOW this means that its of one of 378 the following forms: 379 -1- ARRAY_REF = var 380 -2- var = ARRAY_REF 381 (This should have been verified in analyze_data_refs). 382 383 'var' in the second case corresponds to a def, not a use, 384 so USE cannot correspond to any operands that are not used 385 for array indexing. 386 387 Therefore, all we need to check is if STMT falls into the 388 first case, and whether var corresponds to USE. */ 389 390 if (!gimple_assign_copy_p (stmt)) 391 { 392 if (is_gimple_call (stmt) 393 && gimple_call_internal_p (stmt)) 394 { 395 internal_fn ifn = gimple_call_internal_fn (stmt); 396 int mask_index = internal_fn_mask_index (ifn); 397 if (mask_index >= 0 398 && use == gimple_call_arg (stmt, mask_index)) 399 return true; 400 int stored_value_index = internal_fn_stored_value_index (ifn); 401 if (stored_value_index >= 0 402 && use == gimple_call_arg (stmt, stored_value_index)) 403 return true; 404 if (internal_gather_scatter_fn_p (ifn) 405 && use == gimple_call_arg (stmt, 1)) 406 return true; 407 } 408 return false; 409 } 410 411 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME) 412 return false; 413 operand = gimple_assign_rhs1 (stmt); 414 if (TREE_CODE (operand) != SSA_NAME) 415 return false; 416 417 if (operand == use) 418 return true; 419 420 return false; 421 } 422 423 424 /* 425 Function process_use. 426 427 Inputs: 428 - a USE in STMT in a loop represented by LOOP_VINFO 429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt 430 that defined USE. This is done by calling mark_relevant and passing it 431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). 432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't 433 be performed. 434 435 Outputs: 436 Generally, LIVE_P and RELEVANT are used to define the liveness and 437 relevance info of the DEF_STMT of this USE: 438 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p 439 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant 440 Exceptions: 441 - case 1: If USE is used only for address computations (e.g. array indexing), 442 which does not need to be directly vectorized, then the liveness/relevance 443 of the respective DEF_STMT is left unchanged. 444 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we 445 skip DEF_STMT cause it had already been processed. 446 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will 447 be modified accordingly. 448 449 Return true if everything is as expected. Return false otherwise. */ 450 451 static bool 452 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, 453 enum vect_relevant relevant, vec<gimple *> *worklist, 454 bool force) 455 { 456 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 457 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 458 stmt_vec_info dstmt_vinfo; 459 basic_block bb, def_bb; 460 gimple *def_stmt; 461 enum vect_def_type dt; 462 463 /* case 1: we are only interested in uses that need to be vectorized. Uses 464 that are used for address computation are not considered relevant. */ 465 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt)) 466 return true; 467 468 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt)) 469 { 470 if (dump_enabled_p ()) 471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 472 "not vectorized: unsupported use in stmt.\n"); 473 return false; 474 } 475 476 if (!def_stmt || gimple_nop_p (def_stmt)) 477 return true; 478 479 def_bb = gimple_bb (def_stmt); 480 if (!flow_bb_inside_loop_p (loop, def_bb)) 481 { 482 if (dump_enabled_p ()) 483 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n"); 484 return true; 485 } 486 487 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT). 488 DEF_STMT must have already been processed, because this should be the 489 only way that STMT, which is a reduction-phi, was put in the worklist, 490 as there should be no other uses for DEF_STMT in the loop. So we just 491 check that everything is as expected, and we are done. */ 492 dstmt_vinfo = vinfo_for_stmt (def_stmt); 493 bb = gimple_bb (stmt); 494 if (gimple_code (stmt) == GIMPLE_PHI 495 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 496 && gimple_code (def_stmt) != GIMPLE_PHI 497 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def 498 && bb->loop_father == def_bb->loop_father) 499 { 500 if (dump_enabled_p ()) 501 dump_printf_loc (MSG_NOTE, vect_location, 502 "reduc-stmt defining reduc-phi in the same nest.\n"); 503 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo)) 504 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo)); 505 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction); 506 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo) 507 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope); 508 return true; 509 } 510 511 /* case 3a: outer-loop stmt defining an inner-loop stmt: 512 outer-loop-header-bb: 513 d = def_stmt 514 inner-loop: 515 stmt # use (d) 516 outer-loop-tail-bb: 517 ... */ 518 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father)) 519 { 520 if (dump_enabled_p ()) 521 dump_printf_loc (MSG_NOTE, vect_location, 522 "outer-loop def-stmt defining inner-loop stmt.\n"); 523 524 switch (relevant) 525 { 526 case vect_unused_in_scope: 527 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ? 528 vect_used_in_scope : vect_unused_in_scope; 529 break; 530 531 case vect_used_in_outer_by_reduction: 532 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 533 relevant = vect_used_by_reduction; 534 break; 535 536 case vect_used_in_outer: 537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def); 538 relevant = vect_used_in_scope; 539 break; 540 541 case vect_used_in_scope: 542 break; 543 544 default: 545 gcc_unreachable (); 546 } 547 } 548 549 /* case 3b: inner-loop stmt defining an outer-loop stmt: 550 outer-loop-header-bb: 551 ... 552 inner-loop: 553 d = def_stmt 554 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction): 555 stmt # use (d) */ 556 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) 557 { 558 if (dump_enabled_p ()) 559 dump_printf_loc (MSG_NOTE, vect_location, 560 "inner-loop def-stmt defining outer-loop stmt.\n"); 561 562 switch (relevant) 563 { 564 case vect_unused_in_scope: 565 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def 566 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ? 567 vect_used_in_outer_by_reduction : vect_unused_in_scope; 568 break; 569 570 case vect_used_by_reduction: 571 case vect_used_only_live: 572 relevant = vect_used_in_outer_by_reduction; 573 break; 574 575 case vect_used_in_scope: 576 relevant = vect_used_in_outer; 577 break; 578 579 default: 580 gcc_unreachable (); 581 } 582 } 583 /* We are also not interested in uses on loop PHI backedges that are 584 inductions. Otherwise we'll needlessly vectorize the IV increment 585 and cause hybrid SLP for SLP inductions. Unless the PHI is live 586 of course. */ 587 else if (gimple_code (stmt) == GIMPLE_PHI 588 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def 589 && ! STMT_VINFO_LIVE_P (stmt_vinfo) 590 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father)) 591 == use)) 592 { 593 if (dump_enabled_p ()) 594 dump_printf_loc (MSG_NOTE, vect_location, 595 "induction value on backedge.\n"); 596 return true; 597 } 598 599 600 vect_mark_relevant (worklist, def_stmt, relevant, false); 601 return true; 602 } 603 604 605 /* Function vect_mark_stmts_to_be_vectorized. 606 607 Not all stmts in the loop need to be vectorized. For example: 608 609 for i... 610 for j... 611 1. T0 = i + j 612 2. T1 = a[T0] 613 614 3. j = j + 1 615 616 Stmt 1 and 3 do not need to be vectorized, because loop control and 617 addressing of vectorized data-refs are handled differently. 618 619 This pass detects such stmts. */ 620 621 bool 622 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) 623 { 624 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 625 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 626 unsigned int nbbs = loop->num_nodes; 627 gimple_stmt_iterator si; 628 gimple *stmt; 629 unsigned int i; 630 stmt_vec_info stmt_vinfo; 631 basic_block bb; 632 gimple *phi; 633 bool live_p; 634 enum vect_relevant relevant; 635 636 if (dump_enabled_p ()) 637 dump_printf_loc (MSG_NOTE, vect_location, 638 "=== vect_mark_stmts_to_be_vectorized ===\n"); 639 640 auto_vec<gimple *, 64> worklist; 641 642 /* 1. Init worklist. */ 643 for (i = 0; i < nbbs; i++) 644 { 645 bb = bbs[i]; 646 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) 647 { 648 phi = gsi_stmt (si); 649 if (dump_enabled_p ()) 650 { 651 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? "); 652 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); 653 } 654 655 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) 656 vect_mark_relevant (&worklist, phi, relevant, live_p); 657 } 658 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 659 { 660 stmt = gsi_stmt (si); 661 if (dump_enabled_p ()) 662 { 663 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? "); 664 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 665 } 666 667 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) 668 vect_mark_relevant (&worklist, stmt, relevant, live_p); 669 } 670 } 671 672 /* 2. Process_worklist */ 673 while (worklist.length () > 0) 674 { 675 use_operand_p use_p; 676 ssa_op_iter iter; 677 678 stmt = worklist.pop (); 679 if (dump_enabled_p ()) 680 { 681 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: "); 682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 683 } 684 685 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it 686 (DEF_STMT) as relevant/irrelevant according to the relevance property 687 of STMT. */ 688 stmt_vinfo = vinfo_for_stmt (stmt); 689 relevant = STMT_VINFO_RELEVANT (stmt_vinfo); 690 691 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is 692 propagated as is to the DEF_STMTs of its USEs. 693 694 One exception is when STMT has been identified as defining a reduction 695 variable; in this case we set the relevance to vect_used_by_reduction. 696 This is because we distinguish between two kinds of relevant stmts - 697 those that are used by a reduction computation, and those that are 698 (also) used by a regular computation. This allows us later on to 699 identify stmts that are used solely by a reduction, and therefore the 700 order of the results that they produce does not have to be kept. */ 701 702 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo)) 703 { 704 case vect_reduction_def: 705 gcc_assert (relevant != vect_unused_in_scope); 706 if (relevant != vect_unused_in_scope 707 && relevant != vect_used_in_scope 708 && relevant != vect_used_by_reduction 709 && relevant != vect_used_only_live) 710 { 711 if (dump_enabled_p ()) 712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 713 "unsupported use of reduction.\n"); 714 return false; 715 } 716 break; 717 718 case vect_nested_cycle: 719 if (relevant != vect_unused_in_scope 720 && relevant != vect_used_in_outer_by_reduction 721 && relevant != vect_used_in_outer) 722 { 723 if (dump_enabled_p ()) 724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 725 "unsupported use of nested cycle.\n"); 726 727 return false; 728 } 729 break; 730 731 case vect_double_reduction_def: 732 if (relevant != vect_unused_in_scope 733 && relevant != vect_used_by_reduction 734 && relevant != vect_used_only_live) 735 { 736 if (dump_enabled_p ()) 737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 738 "unsupported use of double reduction.\n"); 739 740 return false; 741 } 742 break; 743 744 default: 745 break; 746 } 747 748 if (is_pattern_stmt_p (stmt_vinfo)) 749 { 750 /* Pattern statements are not inserted into the code, so 751 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we 752 have to scan the RHS or function arguments instead. */ 753 if (is_gimple_assign (stmt)) 754 { 755 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 756 tree op = gimple_assign_rhs1 (stmt); 757 758 i = 1; 759 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op)) 760 { 761 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo, 762 relevant, &worklist, false) 763 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo, 764 relevant, &worklist, false)) 765 return false; 766 i = 2; 767 } 768 for (; i < gimple_num_ops (stmt); i++) 769 { 770 op = gimple_op (stmt, i); 771 if (TREE_CODE (op) == SSA_NAME 772 && !process_use (stmt, op, loop_vinfo, relevant, 773 &worklist, false)) 774 return false; 775 } 776 } 777 else if (is_gimple_call (stmt)) 778 { 779 for (i = 0; i < gimple_call_num_args (stmt); i++) 780 { 781 tree arg = gimple_call_arg (stmt, i); 782 if (!process_use (stmt, arg, loop_vinfo, relevant, 783 &worklist, false)) 784 return false; 785 } 786 } 787 } 788 else 789 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 790 { 791 tree op = USE_FROM_PTR (use_p); 792 if (!process_use (stmt, op, loop_vinfo, relevant, 793 &worklist, false)) 794 return false; 795 } 796 797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo)) 798 { 799 gather_scatter_info gs_info; 800 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)) 801 gcc_unreachable (); 802 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant, 803 &worklist, true)) 804 return false; 805 } 806 } /* while worklist */ 807 808 return true; 809 } 810 811 812 /* Function vect_model_simple_cost. 813 814 Models cost for simple operations, i.e. those that only emit ncopies of a 815 single op. Right now, this does not account for multiple insns that could 816 be generated for the single vector op. We will handle that shortly. */ 817 818 void 819 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, 820 enum vect_def_type *dt, 821 int ndts, 822 stmt_vector_for_cost *prologue_cost_vec, 823 stmt_vector_for_cost *body_cost_vec) 824 { 825 int i; 826 int inside_cost = 0, prologue_cost = 0; 827 828 /* The SLP costs were already calculated during SLP tree build. */ 829 gcc_assert (!PURE_SLP_STMT (stmt_info)); 830 831 /* Cost the "broadcast" of a scalar operand in to a vector operand. 832 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector 833 cost model. */ 834 for (i = 0; i < ndts; i++) 835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 836 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec, 837 stmt_info, 0, vect_prologue); 838 839 /* Pass the inside-of-loop statements to the target-specific cost model. */ 840 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt, 841 stmt_info, 0, vect_body); 842 843 if (dump_enabled_p ()) 844 dump_printf_loc (MSG_NOTE, vect_location, 845 "vect_model_simple_cost: inside_cost = %d, " 846 "prologue_cost = %d .\n", inside_cost, prologue_cost); 847 } 848 849 850 /* Model cost for type demotion and promotion operations. PWR is normally 851 zero for single-step promotions and demotions. It will be one if 852 two-step promotion/demotion is required, and so on. Each additional 853 step doubles the number of instructions required. */ 854 855 static void 856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, 857 enum vect_def_type *dt, int pwr) 858 { 859 int i, tmp; 860 int inside_cost = 0, prologue_cost = 0; 861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 862 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 863 void *target_cost_data; 864 865 /* The SLP costs were already calculated during SLP tree build. */ 866 gcc_assert (!PURE_SLP_STMT (stmt_info)); 867 868 if (loop_vinfo) 869 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); 870 else 871 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo); 872 873 for (i = 0; i < pwr + 1; i++) 874 { 875 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ? 876 (i + 1) : i; 877 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp), 878 vec_promote_demote, stmt_info, 0, 879 vect_body); 880 } 881 882 /* FORNOW: Assuming maximum 2 args per stmts. */ 883 for (i = 0; i < 2; i++) 884 if (dt[i] == vect_constant_def || dt[i] == vect_external_def) 885 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt, 886 stmt_info, 0, vect_prologue); 887 888 if (dump_enabled_p ()) 889 dump_printf_loc (MSG_NOTE, vect_location, 890 "vect_model_promotion_demotion_cost: inside_cost = %d, " 891 "prologue_cost = %d .\n", inside_cost, prologue_cost); 892 } 893 894 /* Function vect_model_store_cost 895 896 Models cost for stores. In the case of grouped accesses, one access 897 has the overhead of the grouped access attributed to it. */ 898 899 void 900 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, 901 vect_memory_access_type memory_access_type, 902 vec_load_store_type vls_type, slp_tree slp_node, 903 stmt_vector_for_cost *prologue_cost_vec, 904 stmt_vector_for_cost *body_cost_vec) 905 { 906 unsigned int inside_cost = 0, prologue_cost = 0; 907 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 908 gimple *first_stmt = STMT_VINFO_STMT (stmt_info); 909 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); 910 911 if (vls_type == VLS_STORE_INVARIANT) 912 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec, 913 stmt_info, 0, vect_prologue); 914 915 /* Grouped stores update all elements in the group at once, 916 so we want the DR for the first statement. */ 917 if (!slp_node && grouped_access_p) 918 { 919 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 920 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 921 } 922 923 /* True if we should include any once-per-group costs as well as 924 the cost of the statement itself. For SLP we only get called 925 once per group anyhow. */ 926 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info)); 927 928 /* We assume that the cost of a single store-lanes instruction is 929 equivalent to the cost of GROUP_SIZE separate stores. If a grouped 930 access is instead being provided by a permute-and-store operation, 931 include the cost of the permutes. */ 932 if (first_stmt_p 933 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE) 934 { 935 /* Uses a high and low interleave or shuffle operations for each 936 needed permute. */ 937 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 938 int nstmts = ncopies * ceil_log2 (group_size) * group_size; 939 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm, 940 stmt_info, 0, vect_body); 941 942 if (dump_enabled_p ()) 943 dump_printf_loc (MSG_NOTE, vect_location, 944 "vect_model_store_cost: strided group_size = %d .\n", 945 group_size); 946 } 947 948 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 949 /* Costs of the stores. */ 950 if (memory_access_type == VMAT_ELEMENTWISE 951 || memory_access_type == VMAT_GATHER_SCATTER) 952 { 953 /* N scalar stores plus extracting the elements. */ 954 unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 955 inside_cost += record_stmt_cost (body_cost_vec, 956 ncopies * assumed_nunits, 957 scalar_store, stmt_info, 0, vect_body); 958 } 959 else 960 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec); 961 962 if (memory_access_type == VMAT_ELEMENTWISE 963 || memory_access_type == VMAT_STRIDED_SLP) 964 { 965 /* N scalar stores plus extracting the elements. */ 966 unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 967 inside_cost += record_stmt_cost (body_cost_vec, 968 ncopies * assumed_nunits, 969 vec_to_scalar, stmt_info, 0, vect_body); 970 } 971 972 if (dump_enabled_p ()) 973 dump_printf_loc (MSG_NOTE, vect_location, 974 "vect_model_store_cost: inside_cost = %d, " 975 "prologue_cost = %d .\n", inside_cost, prologue_cost); 976 } 977 978 979 /* Calculate cost of DR's memory access. */ 980 void 981 vect_get_store_cost (struct data_reference *dr, int ncopies, 982 unsigned int *inside_cost, 983 stmt_vector_for_cost *body_cost_vec) 984 { 985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 986 gimple *stmt = DR_STMT (dr); 987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 988 989 switch (alignment_support_scheme) 990 { 991 case dr_aligned: 992 { 993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 994 vector_store, stmt_info, 0, 995 vect_body); 996 997 if (dump_enabled_p ()) 998 dump_printf_loc (MSG_NOTE, vect_location, 999 "vect_model_store_cost: aligned.\n"); 1000 break; 1001 } 1002 1003 case dr_unaligned_supported: 1004 { 1005 /* Here, we assign an additional cost for the unaligned store. */ 1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1007 unaligned_store, stmt_info, 1008 DR_MISALIGNMENT (dr), vect_body); 1009 if (dump_enabled_p ()) 1010 dump_printf_loc (MSG_NOTE, vect_location, 1011 "vect_model_store_cost: unaligned supported by " 1012 "hardware.\n"); 1013 break; 1014 } 1015 1016 case dr_unaligned_unsupported: 1017 { 1018 *inside_cost = VECT_MAX_COST; 1019 1020 if (dump_enabled_p ()) 1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1022 "vect_model_store_cost: unsupported access.\n"); 1023 break; 1024 } 1025 1026 default: 1027 gcc_unreachable (); 1028 } 1029 } 1030 1031 1032 /* Function vect_model_load_cost 1033 1034 Models cost for loads. In the case of grouped accesses, one access has 1035 the overhead of the grouped access attributed to it. Since unaligned 1036 accesses are supported for loads, we also account for the costs of the 1037 access scheme chosen. */ 1038 1039 void 1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, 1041 vect_memory_access_type memory_access_type, 1042 slp_tree slp_node, 1043 stmt_vector_for_cost *prologue_cost_vec, 1044 stmt_vector_for_cost *body_cost_vec) 1045 { 1046 gimple *first_stmt = STMT_VINFO_STMT (stmt_info); 1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1048 unsigned int inside_cost = 0, prologue_cost = 0; 1049 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); 1050 1051 /* Grouped loads read all elements in the group at once, 1052 so we want the DR for the first statement. */ 1053 if (!slp_node && grouped_access_p) 1054 { 1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 1056 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 1057 } 1058 1059 /* True if we should include any once-per-group costs as well as 1060 the cost of the statement itself. For SLP we only get called 1061 once per group anyhow. */ 1062 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info)); 1063 1064 /* We assume that the cost of a single load-lanes instruction is 1065 equivalent to the cost of GROUP_SIZE separate loads. If a grouped 1066 access is instead being provided by a load-and-permute operation, 1067 include the cost of the permutes. */ 1068 if (first_stmt_p 1069 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE) 1070 { 1071 /* Uses an even and odd extract operations or shuffle operations 1072 for each needed permute. */ 1073 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 1074 int nstmts = ncopies * ceil_log2 (group_size) * group_size; 1075 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm, 1076 stmt_info, 0, vect_body); 1077 1078 if (dump_enabled_p ()) 1079 dump_printf_loc (MSG_NOTE, vect_location, 1080 "vect_model_load_cost: strided group_size = %d .\n", 1081 group_size); 1082 } 1083 1084 /* The loads themselves. */ 1085 if (memory_access_type == VMAT_ELEMENTWISE 1086 || memory_access_type == VMAT_GATHER_SCATTER) 1087 { 1088 /* N scalar loads plus gathering them into a vector. */ 1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1090 unsigned int assumed_nunits = vect_nunits_for_cost (vectype); 1091 inside_cost += record_stmt_cost (body_cost_vec, 1092 ncopies * assumed_nunits, 1093 scalar_load, stmt_info, 0, vect_body); 1094 } 1095 else 1096 vect_get_load_cost (dr, ncopies, first_stmt_p, 1097 &inside_cost, &prologue_cost, 1098 prologue_cost_vec, body_cost_vec, true); 1099 if (memory_access_type == VMAT_ELEMENTWISE 1100 || memory_access_type == VMAT_STRIDED_SLP) 1101 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct, 1102 stmt_info, 0, vect_body); 1103 1104 if (dump_enabled_p ()) 1105 dump_printf_loc (MSG_NOTE, vect_location, 1106 "vect_model_load_cost: inside_cost = %d, " 1107 "prologue_cost = %d .\n", inside_cost, prologue_cost); 1108 } 1109 1110 1111 /* Calculate cost of DR's memory access. */ 1112 void 1113 vect_get_load_cost (struct data_reference *dr, int ncopies, 1114 bool add_realign_cost, unsigned int *inside_cost, 1115 unsigned int *prologue_cost, 1116 stmt_vector_for_cost *prologue_cost_vec, 1117 stmt_vector_for_cost *body_cost_vec, 1118 bool record_prologue_costs) 1119 { 1120 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 1121 gimple *stmt = DR_STMT (dr); 1122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1123 1124 switch (alignment_support_scheme) 1125 { 1126 case dr_aligned: 1127 { 1128 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1129 stmt_info, 0, vect_body); 1130 1131 if (dump_enabled_p ()) 1132 dump_printf_loc (MSG_NOTE, vect_location, 1133 "vect_model_load_cost: aligned.\n"); 1134 1135 break; 1136 } 1137 case dr_unaligned_supported: 1138 { 1139 /* Here, we assign an additional cost for the unaligned load. */ 1140 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1141 unaligned_load, stmt_info, 1142 DR_MISALIGNMENT (dr), vect_body); 1143 1144 if (dump_enabled_p ()) 1145 dump_printf_loc (MSG_NOTE, vect_location, 1146 "vect_model_load_cost: unaligned supported by " 1147 "hardware.\n"); 1148 1149 break; 1150 } 1151 case dr_explicit_realign: 1152 { 1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2, 1154 vector_load, stmt_info, 0, vect_body); 1155 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, 1156 vec_perm, stmt_info, 0, vect_body); 1157 1158 /* FIXME: If the misalignment remains fixed across the iterations of 1159 the containing loop, the following cost should be added to the 1160 prologue costs. */ 1161 if (targetm.vectorize.builtin_mask_for_load) 1162 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt, 1163 stmt_info, 0, vect_body); 1164 1165 if (dump_enabled_p ()) 1166 dump_printf_loc (MSG_NOTE, vect_location, 1167 "vect_model_load_cost: explicit realign\n"); 1168 1169 break; 1170 } 1171 case dr_explicit_realign_optimized: 1172 { 1173 if (dump_enabled_p ()) 1174 dump_printf_loc (MSG_NOTE, vect_location, 1175 "vect_model_load_cost: unaligned software " 1176 "pipelined.\n"); 1177 1178 /* Unaligned software pipeline has a load of an address, an initial 1179 load, and possibly a mask operation to "prime" the loop. However, 1180 if this is an access in a group of loads, which provide grouped 1181 access, then the above cost should only be considered for one 1182 access in the group. Inside the loop, there is a load op 1183 and a realignment op. */ 1184 1185 if (add_realign_cost && record_prologue_costs) 1186 { 1187 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2, 1188 vector_stmt, stmt_info, 1189 0, vect_prologue); 1190 if (targetm.vectorize.builtin_mask_for_load) 1191 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1, 1192 vector_stmt, stmt_info, 1193 0, vect_prologue); 1194 } 1195 1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, 1197 stmt_info, 0, vect_body); 1198 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm, 1199 stmt_info, 0, vect_body); 1200 1201 if (dump_enabled_p ()) 1202 dump_printf_loc (MSG_NOTE, vect_location, 1203 "vect_model_load_cost: explicit realign optimized" 1204 "\n"); 1205 1206 break; 1207 } 1208 1209 case dr_unaligned_unsupported: 1210 { 1211 *inside_cost = VECT_MAX_COST; 1212 1213 if (dump_enabled_p ()) 1214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1215 "vect_model_load_cost: unsupported access.\n"); 1216 break; 1217 } 1218 1219 default: 1220 gcc_unreachable (); 1221 } 1222 } 1223 1224 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in 1225 the loop preheader for the vectorized stmt STMT. */ 1226 1227 static void 1228 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi) 1229 { 1230 if (gsi) 1231 vect_finish_stmt_generation (stmt, new_stmt, gsi); 1232 else 1233 { 1234 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1236 1237 if (loop_vinfo) 1238 { 1239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1240 basic_block new_bb; 1241 edge pe; 1242 1243 if (nested_in_vect_loop_p (loop, stmt)) 1244 loop = loop->inner; 1245 1246 pe = loop_preheader_edge (loop); 1247 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 1248 gcc_assert (!new_bb); 1249 } 1250 else 1251 { 1252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 1253 basic_block bb; 1254 gimple_stmt_iterator gsi_bb_start; 1255 1256 gcc_assert (bb_vinfo); 1257 bb = BB_VINFO_BB (bb_vinfo); 1258 gsi_bb_start = gsi_after_labels (bb); 1259 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT); 1260 } 1261 } 1262 1263 if (dump_enabled_p ()) 1264 { 1265 dump_printf_loc (MSG_NOTE, vect_location, 1266 "created new init_stmt: "); 1267 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0); 1268 } 1269 } 1270 1271 /* Function vect_init_vector. 1272 1273 Insert a new stmt (INIT_STMT) that initializes a new variable of type 1274 TYPE with the value VAL. If TYPE is a vector type and VAL does not have 1275 vector type a vector with all elements equal to VAL is created first. 1276 Place the initialization at BSI if it is not NULL. Otherwise, place the 1277 initialization at the loop preheader. 1278 Return the DEF of INIT_STMT. 1279 It will be used in the vectorization of STMT. */ 1280 1281 tree 1282 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi) 1283 { 1284 gimple *init_stmt; 1285 tree new_temp; 1286 1287 /* We abuse this function to push sth to a SSA name with initial 'val'. */ 1288 if (! useless_type_conversion_p (type, TREE_TYPE (val))) 1289 { 1290 gcc_assert (TREE_CODE (type) == VECTOR_TYPE); 1291 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val))) 1292 { 1293 /* Scalar boolean value should be transformed into 1294 all zeros or all ones value before building a vector. */ 1295 if (VECTOR_BOOLEAN_TYPE_P (type)) 1296 { 1297 tree true_val = build_all_ones_cst (TREE_TYPE (type)); 1298 tree false_val = build_zero_cst (TREE_TYPE (type)); 1299 1300 if (CONSTANT_CLASS_P (val)) 1301 val = integer_zerop (val) ? false_val : true_val; 1302 else 1303 { 1304 new_temp = make_ssa_name (TREE_TYPE (type)); 1305 init_stmt = gimple_build_assign (new_temp, COND_EXPR, 1306 val, true_val, false_val); 1307 vect_init_vector_1 (stmt, init_stmt, gsi); 1308 val = new_temp; 1309 } 1310 } 1311 else if (CONSTANT_CLASS_P (val)) 1312 val = fold_convert (TREE_TYPE (type), val); 1313 else 1314 { 1315 new_temp = make_ssa_name (TREE_TYPE (type)); 1316 if (! INTEGRAL_TYPE_P (TREE_TYPE (val))) 1317 init_stmt = gimple_build_assign (new_temp, 1318 fold_build1 (VIEW_CONVERT_EXPR, 1319 TREE_TYPE (type), 1320 val)); 1321 else 1322 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val); 1323 vect_init_vector_1 (stmt, init_stmt, gsi); 1324 val = new_temp; 1325 } 1326 } 1327 val = build_vector_from_val (type, val); 1328 } 1329 1330 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_"); 1331 init_stmt = gimple_build_assign (new_temp, val); 1332 vect_init_vector_1 (stmt, init_stmt, gsi); 1333 return new_temp; 1334 } 1335 1336 /* Function vect_get_vec_def_for_operand_1. 1337 1338 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type 1339 DT that will be used in the vectorized stmt. */ 1340 1341 tree 1342 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt) 1343 { 1344 tree vec_oprnd; 1345 gimple *vec_stmt; 1346 stmt_vec_info def_stmt_info = NULL; 1347 1348 switch (dt) 1349 { 1350 /* operand is a constant or a loop invariant. */ 1351 case vect_constant_def: 1352 case vect_external_def: 1353 /* Code should use vect_get_vec_def_for_operand. */ 1354 gcc_unreachable (); 1355 1356 /* operand is defined inside the loop. */ 1357 case vect_internal_def: 1358 { 1359 /* Get the def from the vectorized stmt. */ 1360 def_stmt_info = vinfo_for_stmt (def_stmt); 1361 1362 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1363 /* Get vectorized pattern statement. */ 1364 if (!vec_stmt 1365 && STMT_VINFO_IN_PATTERN_P (def_stmt_info) 1366 && !STMT_VINFO_RELEVANT (def_stmt_info)) 1367 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( 1368 STMT_VINFO_RELATED_STMT (def_stmt_info))); 1369 gcc_assert (vec_stmt); 1370 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1371 vec_oprnd = PHI_RESULT (vec_stmt); 1372 else if (is_gimple_call (vec_stmt)) 1373 vec_oprnd = gimple_call_lhs (vec_stmt); 1374 else 1375 vec_oprnd = gimple_assign_lhs (vec_stmt); 1376 return vec_oprnd; 1377 } 1378 1379 /* operand is defined by a loop header phi. */ 1380 case vect_reduction_def: 1381 case vect_double_reduction_def: 1382 case vect_nested_cycle: 1383 case vect_induction_def: 1384 { 1385 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI); 1386 1387 /* Get the def from the vectorized stmt. */ 1388 def_stmt_info = vinfo_for_stmt (def_stmt); 1389 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 1390 if (gimple_code (vec_stmt) == GIMPLE_PHI) 1391 vec_oprnd = PHI_RESULT (vec_stmt); 1392 else 1393 vec_oprnd = gimple_get_lhs (vec_stmt); 1394 return vec_oprnd; 1395 } 1396 1397 default: 1398 gcc_unreachable (); 1399 } 1400 } 1401 1402 1403 /* Function vect_get_vec_def_for_operand. 1404 1405 OP is an operand in STMT. This function returns a (vector) def that will be 1406 used in the vectorized stmt for STMT. 1407 1408 In the case that OP is an SSA_NAME which is defined in the loop, then 1409 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 1410 1411 In case OP is an invariant or constant, a new stmt that creates a vector def 1412 needs to be introduced. VECTYPE may be used to specify a required type for 1413 vector invariant. */ 1414 1415 tree 1416 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype) 1417 { 1418 gimple *def_stmt; 1419 enum vect_def_type dt; 1420 bool is_simple_use; 1421 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1422 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1423 1424 if (dump_enabled_p ()) 1425 { 1426 dump_printf_loc (MSG_NOTE, vect_location, 1427 "vect_get_vec_def_for_operand: "); 1428 dump_generic_expr (MSG_NOTE, TDF_SLIM, op); 1429 dump_printf (MSG_NOTE, "\n"); 1430 } 1431 1432 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt); 1433 gcc_assert (is_simple_use); 1434 if (def_stmt && dump_enabled_p ()) 1435 { 1436 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = "); 1437 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); 1438 } 1439 1440 if (dt == vect_constant_def || dt == vect_external_def) 1441 { 1442 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 1443 tree vector_type; 1444 1445 if (vectype) 1446 vector_type = vectype; 1447 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) 1448 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype)) 1449 vector_type = build_same_sized_truth_vector_type (stmt_vectype); 1450 else 1451 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 1452 1453 gcc_assert (vector_type); 1454 return vect_init_vector (stmt, op, vector_type, NULL); 1455 } 1456 else 1457 return vect_get_vec_def_for_operand_1 (def_stmt, dt); 1458 } 1459 1460 1461 /* Function vect_get_vec_def_for_stmt_copy 1462 1463 Return a vector-def for an operand. This function is used when the 1464 vectorized stmt to be created (by the caller to this function) is a "copy" 1465 created in case the vectorized result cannot fit in one vector, and several 1466 copies of the vector-stmt are required. In this case the vector-def is 1467 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field 1468 of the stmt that defines VEC_OPRND. 1469 DT is the type of the vector def VEC_OPRND. 1470 1471 Context: 1472 In case the vectorization factor (VF) is bigger than the number 1473 of elements that can fit in a vectype (nunits), we have to generate 1474 more than one vector stmt to vectorize the scalar stmt. This situation 1475 arises when there are multiple data-types operated upon in the loop; the 1476 smallest data-type determines the VF, and as a result, when vectorizing 1477 stmts operating on wider types we need to create 'VF/nunits' "copies" of the 1478 vector stmt (each computing a vector of 'nunits' results, and together 1479 computing 'VF' results in each iteration). This function is called when 1480 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in 1481 which VF=16 and nunits=4, so the number of copies required is 4): 1482 1483 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT 1484 1485 S1: x = load VS1.0: vx.0 = memref0 VS1.1 1486 VS1.1: vx.1 = memref1 VS1.2 1487 VS1.2: vx.2 = memref2 VS1.3 1488 VS1.3: vx.3 = memref3 1489 1490 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1 1491 VSnew.1: vz1 = vx.1 + ... VSnew.2 1492 VSnew.2: vz2 = vx.2 + ... VSnew.3 1493 VSnew.3: vz3 = vx.3 + ... 1494 1495 The vectorization of S1 is explained in vectorizable_load. 1496 The vectorization of S2: 1497 To create the first vector-stmt out of the 4 copies - VSnew.0 - 1498 the function 'vect_get_vec_def_for_operand' is called to 1499 get the relevant vector-def for each operand of S2. For operand x it 1500 returns the vector-def 'vx.0'. 1501 1502 To create the remaining copies of the vector-stmt (VSnew.j), this 1503 function is called to get the relevant vector-def for each operand. It is 1504 obtained from the respective VS1.j stmt, which is recorded in the 1505 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND. 1506 1507 For example, to obtain the vector-def 'vx.1' in order to create the 1508 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. 1509 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the 1510 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1', 1511 and return its def ('vx.1'). 1512 Overall, to create the above sequence this function will be called 3 times: 1513 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0); 1514 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1); 1515 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */ 1516 1517 tree 1518 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) 1519 { 1520 gimple *vec_stmt_for_operand; 1521 stmt_vec_info def_stmt_info; 1522 1523 /* Do nothing; can reuse same def. */ 1524 if (dt == vect_external_def || dt == vect_constant_def ) 1525 return vec_oprnd; 1526 1527 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd); 1528 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand); 1529 gcc_assert (def_stmt_info); 1530 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); 1531 gcc_assert (vec_stmt_for_operand); 1532 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI) 1533 vec_oprnd = PHI_RESULT (vec_stmt_for_operand); 1534 else 1535 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); 1536 return vec_oprnd; 1537 } 1538 1539 1540 /* Get vectorized definitions for the operands to create a copy of an original 1541 stmt. See vect_get_vec_def_for_stmt_copy () for details. */ 1542 1543 void 1544 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, 1545 vec<tree> *vec_oprnds0, 1546 vec<tree> *vec_oprnds1) 1547 { 1548 tree vec_oprnd = vec_oprnds0->pop (); 1549 1550 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd); 1551 vec_oprnds0->quick_push (vec_oprnd); 1552 1553 if (vec_oprnds1 && vec_oprnds1->length ()) 1554 { 1555 vec_oprnd = vec_oprnds1->pop (); 1556 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd); 1557 vec_oprnds1->quick_push (vec_oprnd); 1558 } 1559 } 1560 1561 1562 /* Get vectorized definitions for OP0 and OP1. */ 1563 1564 void 1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt, 1566 vec<tree> *vec_oprnds0, 1567 vec<tree> *vec_oprnds1, 1568 slp_tree slp_node) 1569 { 1570 if (slp_node) 1571 { 1572 int nops = (op1 == NULL_TREE) ? 1 : 2; 1573 auto_vec<tree> ops (nops); 1574 auto_vec<vec<tree> > vec_defs (nops); 1575 1576 ops.quick_push (op0); 1577 if (op1) 1578 ops.quick_push (op1); 1579 1580 vect_get_slp_defs (ops, slp_node, &vec_defs); 1581 1582 *vec_oprnds0 = vec_defs[0]; 1583 if (op1) 1584 *vec_oprnds1 = vec_defs[1]; 1585 } 1586 else 1587 { 1588 tree vec_oprnd; 1589 1590 vec_oprnds0->create (1); 1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt); 1592 vec_oprnds0->quick_push (vec_oprnd); 1593 1594 if (op1) 1595 { 1596 vec_oprnds1->create (1); 1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt); 1598 vec_oprnds1->quick_push (vec_oprnd); 1599 } 1600 } 1601 } 1602 1603 /* Helper function called by vect_finish_replace_stmt and 1604 vect_finish_stmt_generation. Set the location of the new 1605 statement and create a stmt_vec_info for it. */ 1606 1607 static void 1608 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt) 1609 { 1610 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1611 vec_info *vinfo = stmt_info->vinfo; 1612 1613 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo)); 1614 1615 if (dump_enabled_p ()) 1616 { 1617 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: "); 1618 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0); 1619 } 1620 1621 gimple_set_location (vec_stmt, gimple_location (stmt)); 1622 1623 /* While EH edges will generally prevent vectorization, stmt might 1624 e.g. be in a must-not-throw region. Ensure newly created stmts 1625 that could throw are part of the same region. */ 1626 int lp_nr = lookup_stmt_eh_lp (stmt); 1627 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt)) 1628 add_stmt_to_eh_lp (vec_stmt, lp_nr); 1629 } 1630 1631 /* Replace the scalar statement STMT with a new vector statement VEC_STMT, 1632 which sets the same scalar result as STMT did. */ 1633 1634 void 1635 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt) 1636 { 1637 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt)); 1638 1639 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1640 gsi_replace (&gsi, vec_stmt, true); 1641 1642 vect_finish_stmt_generation_1 (stmt, vec_stmt); 1643 } 1644 1645 /* Function vect_finish_stmt_generation. 1646 1647 Insert a new stmt. */ 1648 1649 void 1650 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt, 1651 gimple_stmt_iterator *gsi) 1652 { 1653 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL); 1654 1655 if (!gsi_end_p (*gsi) 1656 && gimple_has_mem_ops (vec_stmt)) 1657 { 1658 gimple *at_stmt = gsi_stmt (*gsi); 1659 tree vuse = gimple_vuse (at_stmt); 1660 if (vuse && TREE_CODE (vuse) == SSA_NAME) 1661 { 1662 tree vdef = gimple_vdef (at_stmt); 1663 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt)); 1664 /* If we have an SSA vuse and insert a store, update virtual 1665 SSA form to avoid triggering the renamer. Do so only 1666 if we can easily see all uses - which is what almost always 1667 happens with the way vectorized stmts are inserted. */ 1668 if ((vdef && TREE_CODE (vdef) == SSA_NAME) 1669 && ((is_gimple_assign (vec_stmt) 1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt))) 1671 || (is_gimple_call (vec_stmt) 1672 && !(gimple_call_flags (vec_stmt) 1673 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))))) 1674 { 1675 tree new_vdef = copy_ssa_name (vuse, vec_stmt); 1676 gimple_set_vdef (vec_stmt, new_vdef); 1677 SET_USE (gimple_vuse_op (at_stmt), new_vdef); 1678 } 1679 } 1680 } 1681 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT); 1682 vect_finish_stmt_generation_1 (stmt, vec_stmt); 1683 } 1684 1685 /* We want to vectorize a call to combined function CFN with function 1686 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN 1687 as the types of all inputs. Check whether this is possible using 1688 an internal function, returning its code if so or IFN_LAST if not. */ 1689 1690 static internal_fn 1691 vectorizable_internal_function (combined_fn cfn, tree fndecl, 1692 tree vectype_out, tree vectype_in) 1693 { 1694 internal_fn ifn; 1695 if (internal_fn_p (cfn)) 1696 ifn = as_internal_fn (cfn); 1697 else 1698 ifn = associated_internal_fn (fndecl); 1699 if (ifn != IFN_LAST && direct_internal_fn_p (ifn)) 1700 { 1701 const direct_internal_fn_info &info = direct_internal_fn (ifn); 1702 if (info.vectorizable) 1703 { 1704 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in); 1705 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in); 1706 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1), 1707 OPTIMIZE_FOR_SPEED)) 1708 return ifn; 1709 } 1710 } 1711 return IFN_LAST; 1712 } 1713 1714 1715 static tree permute_vec_elements (tree, tree, tree, gimple *, 1716 gimple_stmt_iterator *); 1717 1718 /* Check whether a load or store statement in the loop described by 1719 LOOP_VINFO is possible in a fully-masked loop. This is testing 1720 whether the vectorizer pass has the appropriate support, as well as 1721 whether the target does. 1722 1723 VLS_TYPE says whether the statement is a load or store and VECTYPE 1724 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE 1725 says how the load or store is going to be implemented and GROUP_SIZE 1726 is the number of load or store statements in the containing group. 1727 If the access is a gather load or scatter store, GS_INFO describes 1728 its arguments. 1729 1730 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not 1731 supported, otherwise record the required mask types. */ 1732 1733 static void 1734 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, 1735 vec_load_store_type vls_type, int group_size, 1736 vect_memory_access_type memory_access_type, 1737 gather_scatter_info *gs_info) 1738 { 1739 /* Invariant loads need no special support. */ 1740 if (memory_access_type == VMAT_INVARIANT) 1741 return; 1742 1743 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); 1744 machine_mode vecmode = TYPE_MODE (vectype); 1745 bool is_load = (vls_type == VLS_LOAD); 1746 if (memory_access_type == VMAT_LOAD_STORE_LANES) 1747 { 1748 if (is_load 1749 ? !vect_load_lanes_supported (vectype, group_size, true) 1750 : !vect_store_lanes_supported (vectype, group_size, true)) 1751 { 1752 if (dump_enabled_p ()) 1753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1754 "can't use a fully-masked loop because the" 1755 " target doesn't have an appropriate masked" 1756 " load/store-lanes instruction.\n"); 1757 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 1758 return; 1759 } 1760 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype); 1761 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype); 1762 return; 1763 } 1764 1765 if (memory_access_type == VMAT_GATHER_SCATTER) 1766 { 1767 internal_fn ifn = (is_load 1768 ? IFN_MASK_GATHER_LOAD 1769 : IFN_MASK_SCATTER_STORE); 1770 tree offset_type = TREE_TYPE (gs_info->offset); 1771 if (!internal_gather_scatter_fn_supported_p (ifn, vectype, 1772 gs_info->memory_type, 1773 TYPE_SIGN (offset_type), 1774 gs_info->scale)) 1775 { 1776 if (dump_enabled_p ()) 1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1778 "can't use a fully-masked loop because the" 1779 " target doesn't have an appropriate masked" 1780 " gather load or scatter store instruction.\n"); 1781 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 1782 return; 1783 } 1784 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype); 1785 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype); 1786 return; 1787 } 1788 1789 if (memory_access_type != VMAT_CONTIGUOUS 1790 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE) 1791 { 1792 /* Element X of the data must come from iteration i * VF + X of the 1793 scalar loop. We need more work to support other mappings. */ 1794 if (dump_enabled_p ()) 1795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1796 "can't use a fully-masked loop because an access" 1797 " isn't contiguous.\n"); 1798 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 1799 return; 1800 } 1801 1802 machine_mode mask_mode; 1803 if (!(targetm.vectorize.get_mask_mode 1804 (GET_MODE_NUNITS (vecmode), 1805 GET_MODE_SIZE (vecmode)).exists (&mask_mode)) 1806 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load)) 1807 { 1808 if (dump_enabled_p ()) 1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1810 "can't use a fully-masked loop because the target" 1811 " doesn't have the appropriate masked load or" 1812 " store.\n"); 1813 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; 1814 return; 1815 } 1816 /* We might load more scalars than we need for permuting SLP loads. 1817 We checked in get_group_load_store_type that the extra elements 1818 don't leak into a new vector. */ 1819 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 1820 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1821 unsigned int nvectors; 1822 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors)) 1823 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype); 1824 else 1825 gcc_unreachable (); 1826 } 1827 1828 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized 1829 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask 1830 that needs to be applied to all loads and stores in a vectorized loop. 1831 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK. 1832 1833 MASK_TYPE is the type of both masks. If new statements are needed, 1834 insert them before GSI. */ 1835 1836 static tree 1837 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask, 1838 gimple_stmt_iterator *gsi) 1839 { 1840 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask))); 1841 if (!loop_mask) 1842 return vec_mask; 1843 1844 gcc_assert (TREE_TYPE (loop_mask) == mask_type); 1845 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and"); 1846 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR, 1847 vec_mask, loop_mask); 1848 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT); 1849 return and_res; 1850 } 1851 1852 /* Determine whether we can use a gather load or scatter store to vectorize 1853 strided load or store STMT by truncating the current offset to a smaller 1854 width. We need to be able to construct an offset vector: 1855 1856 { 0, X, X*2, X*3, ... } 1857 1858 without loss of precision, where X is STMT's DR_STEP. 1859 1860 Return true if this is possible, describing the gather load or scatter 1861 store in GS_INFO. MASKED_P is true if the load or store is conditional. */ 1862 1863 static bool 1864 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo, 1865 bool masked_p, 1866 gather_scatter_info *gs_info) 1867 { 1868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1869 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1870 tree step = DR_STEP (dr); 1871 if (TREE_CODE (step) != INTEGER_CST) 1872 { 1873 /* ??? Perhaps we could use range information here? */ 1874 if (dump_enabled_p ()) 1875 dump_printf_loc (MSG_NOTE, vect_location, 1876 "cannot truncate variable step.\n"); 1877 return false; 1878 } 1879 1880 /* Get the number of bits in an element. */ 1881 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1882 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype)); 1883 unsigned int element_bits = GET_MODE_BITSIZE (element_mode); 1884 1885 /* Set COUNT to the upper limit on the number of elements - 1. 1886 Start with the maximum vectorization factor. */ 1887 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1; 1888 1889 /* Try lowering COUNT to the number of scalar latch iterations. */ 1890 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1891 widest_int max_iters; 1892 if (max_loop_iterations (loop, &max_iters) 1893 && max_iters < count) 1894 count = max_iters.to_shwi (); 1895 1896 /* Try scales of 1 and the element size. */ 1897 int scales[] = { 1, vect_get_scalar_dr_size (dr) }; 1898 bool overflow_p = false; 1899 for (int i = 0; i < 2; ++i) 1900 { 1901 int scale = scales[i]; 1902 widest_int factor; 1903 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor)) 1904 continue; 1905 1906 /* See whether we can calculate (COUNT - 1) * STEP / SCALE 1907 in OFFSET_BITS bits. */ 1908 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p); 1909 if (overflow_p) 1910 continue; 1911 signop sign = range >= 0 ? UNSIGNED : SIGNED; 1912 if (wi::min_precision (range, sign) > element_bits) 1913 { 1914 overflow_p = true; 1915 continue; 1916 } 1917 1918 /* See whether the target supports the operation. */ 1919 tree memory_type = TREE_TYPE (DR_REF (dr)); 1920 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype, 1921 memory_type, element_bits, sign, scale, 1922 &gs_info->ifn, &gs_info->element_type)) 1923 continue; 1924 1925 tree offset_type = build_nonstandard_integer_type (element_bits, 1926 sign == UNSIGNED); 1927 1928 gs_info->decl = NULL_TREE; 1929 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET, 1930 but we don't need to store that here. */ 1931 gs_info->base = NULL_TREE; 1932 gs_info->offset = fold_convert (offset_type, step); 1933 gs_info->offset_dt = vect_constant_def; 1934 gs_info->offset_vectype = NULL_TREE; 1935 gs_info->scale = scale; 1936 gs_info->memory_type = memory_type; 1937 return true; 1938 } 1939 1940 if (overflow_p && dump_enabled_p ()) 1941 dump_printf_loc (MSG_NOTE, vect_location, 1942 "truncating gather/scatter offset to %d bits" 1943 " might change its value.\n", element_bits); 1944 1945 return false; 1946 } 1947 1948 /* Return true if we can use gather/scatter internal functions to 1949 vectorize STMT, which is a grouped or strided load or store. 1950 MASKED_P is true if load or store is conditional. When returning 1951 true, fill in GS_INFO with the information required to perform the 1952 operation. */ 1953 1954 static bool 1955 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo, 1956 bool masked_p, 1957 gather_scatter_info *gs_info) 1958 { 1959 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info) 1960 || gs_info->decl) 1961 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo, 1962 masked_p, gs_info); 1963 1964 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type); 1965 unsigned int element_bits = GET_MODE_BITSIZE (element_mode); 1966 tree offset_type = TREE_TYPE (gs_info->offset); 1967 unsigned int offset_bits = TYPE_PRECISION (offset_type); 1968 1969 /* Enforced by vect_check_gather_scatter. */ 1970 gcc_assert (element_bits >= offset_bits); 1971 1972 /* If the elements are wider than the offset, convert the offset to the 1973 same width, without changing its sign. */ 1974 if (element_bits > offset_bits) 1975 { 1976 bool unsigned_p = TYPE_UNSIGNED (offset_type); 1977 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p); 1978 gs_info->offset = fold_convert (offset_type, gs_info->offset); 1979 } 1980 1981 if (dump_enabled_p ()) 1982 dump_printf_loc (MSG_NOTE, vect_location, 1983 "using gather/scatter for strided/grouped access," 1984 " scale = %d\n", gs_info->scale); 1985 1986 return true; 1987 } 1988 1989 /* STMT is a non-strided load or store, meaning that it accesses 1990 elements with a known constant step. Return -1 if that step 1991 is negative, 0 if it is zero, and 1 if it is greater than zero. */ 1992 1993 static int 1994 compare_step_with_zero (gimple *stmt) 1995 { 1996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1997 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1998 return tree_int_cst_compare (vect_dr_behavior (dr)->step, 1999 size_zero_node); 2000 } 2001 2002 /* If the target supports a permute mask that reverses the elements in 2003 a vector of type VECTYPE, return that mask, otherwise return null. */ 2004 2005 static tree 2006 perm_mask_for_reverse (tree vectype) 2007 { 2008 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2009 2010 /* The encoding has a single stepped pattern. */ 2011 vec_perm_builder sel (nunits, 1, 3); 2012 for (int i = 0; i < 3; ++i) 2013 sel.quick_push (nunits - 1 - i); 2014 2015 vec_perm_indices indices (sel, 1, nunits); 2016 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) 2017 return NULL_TREE; 2018 return vect_gen_perm_mask_checked (vectype, indices); 2019 } 2020 2021 /* A subroutine of get_load_store_type, with a subset of the same 2022 arguments. Handle the case where STMT is a load or store that 2023 accesses consecutive elements with a negative step. */ 2024 2025 static vect_memory_access_type 2026 get_negative_load_store_type (gimple *stmt, tree vectype, 2027 vec_load_store_type vls_type, 2028 unsigned int ncopies) 2029 { 2030 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 2032 dr_alignment_support alignment_support_scheme; 2033 2034 if (ncopies > 1) 2035 { 2036 if (dump_enabled_p ()) 2037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2038 "multiple types with negative step.\n"); 2039 return VMAT_ELEMENTWISE; 2040 } 2041 2042 alignment_support_scheme = vect_supportable_dr_alignment (dr, false); 2043 if (alignment_support_scheme != dr_aligned 2044 && alignment_support_scheme != dr_unaligned_supported) 2045 { 2046 if (dump_enabled_p ()) 2047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2048 "negative step but alignment required.\n"); 2049 return VMAT_ELEMENTWISE; 2050 } 2051 2052 if (vls_type == VLS_STORE_INVARIANT) 2053 { 2054 if (dump_enabled_p ()) 2055 dump_printf_loc (MSG_NOTE, vect_location, 2056 "negative step with invariant source;" 2057 " no permute needed.\n"); 2058 return VMAT_CONTIGUOUS_DOWN; 2059 } 2060 2061 if (!perm_mask_for_reverse (vectype)) 2062 { 2063 if (dump_enabled_p ()) 2064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2065 "negative step and reversing not supported.\n"); 2066 return VMAT_ELEMENTWISE; 2067 } 2068 2069 return VMAT_CONTIGUOUS_REVERSE; 2070 } 2071 2072 /* STMT is either a masked or unconditional store. Return the value 2073 being stored. */ 2074 2075 tree 2076 vect_get_store_rhs (gimple *stmt) 2077 { 2078 if (gassign *assign = dyn_cast <gassign *> (stmt)) 2079 { 2080 gcc_assert (gimple_assign_single_p (assign)); 2081 return gimple_assign_rhs1 (assign); 2082 } 2083 if (gcall *call = dyn_cast <gcall *> (stmt)) 2084 { 2085 internal_fn ifn = gimple_call_internal_fn (call); 2086 int index = internal_fn_stored_value_index (ifn); 2087 gcc_assert (index >= 0); 2088 return gimple_call_arg (stmt, index); 2089 } 2090 gcc_unreachable (); 2091 } 2092 2093 /* A subroutine of get_load_store_type, with a subset of the same 2094 arguments. Handle the case where STMT is part of a grouped load 2095 or store. 2096 2097 For stores, the statements in the group are all consecutive 2098 and there is no gap at the end. For loads, the statements in the 2099 group might not be consecutive; there can be gaps between statements 2100 as well as at the end. */ 2101 2102 static bool 2103 get_group_load_store_type (gimple *stmt, tree vectype, bool slp, 2104 bool masked_p, vec_load_store_type vls_type, 2105 vect_memory_access_type *memory_access_type, 2106 gather_scatter_info *gs_info) 2107 { 2108 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2109 vec_info *vinfo = stmt_info->vinfo; 2110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2111 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; 2112 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 2113 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 2114 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 2115 bool single_element_p = (stmt == first_stmt 2116 && !GROUP_NEXT_ELEMENT (stmt_info)); 2117 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); 2118 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2119 2120 /* True if the vectorized statements would access beyond the last 2121 statement in the group. */ 2122 bool overrun_p = false; 2123 2124 /* True if we can cope with such overrun by peeling for gaps, so that 2125 there is at least one final scalar iteration after the vector loop. */ 2126 bool can_overrun_p = (!masked_p 2127 && vls_type == VLS_LOAD 2128 && loop_vinfo 2129 && !loop->inner); 2130 2131 /* There can only be a gap at the end of the group if the stride is 2132 known at compile time. */ 2133 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0); 2134 2135 /* Stores can't yet have gaps. */ 2136 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0); 2137 2138 if (slp) 2139 { 2140 if (STMT_VINFO_STRIDED_P (stmt_info)) 2141 { 2142 /* Try to use consecutive accesses of GROUP_SIZE elements, 2143 separated by the stride, until we have a complete vector. 2144 Fall back to scalar accesses if that isn't possible. */ 2145 if (multiple_p (nunits, group_size)) 2146 *memory_access_type = VMAT_STRIDED_SLP; 2147 else 2148 *memory_access_type = VMAT_ELEMENTWISE; 2149 } 2150 else 2151 { 2152 overrun_p = loop_vinfo && gap != 0; 2153 if (overrun_p && vls_type != VLS_LOAD) 2154 { 2155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2156 "Grouped store with gaps requires" 2157 " non-consecutive accesses\n"); 2158 return false; 2159 } 2160 /* An overrun is fine if the trailing elements are smaller 2161 than the alignment boundary B. Every vector access will 2162 be a multiple of B and so we are guaranteed to access a 2163 non-gap element in the same B-sized block. */ 2164 if (overrun_p 2165 && gap < (vect_known_alignment_in_bytes (first_dr) 2166 / vect_get_scalar_dr_size (first_dr))) 2167 overrun_p = false; 2168 if (overrun_p && !can_overrun_p) 2169 { 2170 if (dump_enabled_p ()) 2171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2172 "Peeling for outer loop is not supported\n"); 2173 return false; 2174 } 2175 int cmp = compare_step_with_zero (stmt); 2176 if (cmp < 0) 2177 *memory_access_type = get_negative_load_store_type 2178 (stmt, vectype, vls_type, 1); 2179 else 2180 { 2181 gcc_assert (!loop_vinfo || cmp > 0); 2182 *memory_access_type = VMAT_CONTIGUOUS; 2183 } 2184 } 2185 } 2186 else 2187 { 2188 /* We can always handle this case using elementwise accesses, 2189 but see if something more efficient is available. */ 2190 *memory_access_type = VMAT_ELEMENTWISE; 2191 2192 /* If there is a gap at the end of the group then these optimizations 2193 would access excess elements in the last iteration. */ 2194 bool would_overrun_p = (gap != 0); 2195 /* An overrun is fine if the trailing elements are smaller than the 2196 alignment boundary B. Every vector access will be a multiple of B 2197 and so we are guaranteed to access a non-gap element in the 2198 same B-sized block. */ 2199 if (would_overrun_p 2200 && !masked_p 2201 && gap < (vect_known_alignment_in_bytes (first_dr) 2202 / vect_get_scalar_dr_size (first_dr))) 2203 would_overrun_p = false; 2204 2205 if (!STMT_VINFO_STRIDED_P (stmt_info) 2206 && (can_overrun_p || !would_overrun_p) 2207 && compare_step_with_zero (stmt) > 0) 2208 { 2209 /* First cope with the degenerate case of a single-element 2210 vector. */ 2211 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) 2212 *memory_access_type = VMAT_CONTIGUOUS; 2213 2214 /* Otherwise try using LOAD/STORE_LANES. */ 2215 if (*memory_access_type == VMAT_ELEMENTWISE 2216 && (vls_type == VLS_LOAD 2217 ? vect_load_lanes_supported (vectype, group_size, masked_p) 2218 : vect_store_lanes_supported (vectype, group_size, 2219 masked_p))) 2220 { 2221 *memory_access_type = VMAT_LOAD_STORE_LANES; 2222 overrun_p = would_overrun_p; 2223 } 2224 2225 /* If that fails, try using permuting loads. */ 2226 if (*memory_access_type == VMAT_ELEMENTWISE 2227 && (vls_type == VLS_LOAD 2228 ? vect_grouped_load_supported (vectype, single_element_p, 2229 group_size) 2230 : vect_grouped_store_supported (vectype, group_size))) 2231 { 2232 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; 2233 overrun_p = would_overrun_p; 2234 } 2235 } 2236 2237 /* As a last resort, trying using a gather load or scatter store. 2238 2239 ??? Although the code can handle all group sizes correctly, 2240 it probably isn't a win to use separate strided accesses based 2241 on nearby locations. Or, even if it's a win over scalar code, 2242 it might not be a win over vectorizing at a lower VF, if that 2243 allows us to use contiguous accesses. */ 2244 if (*memory_access_type == VMAT_ELEMENTWISE 2245 && single_element_p 2246 && loop_vinfo 2247 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo, 2248 masked_p, gs_info)) 2249 *memory_access_type = VMAT_GATHER_SCATTER; 2250 } 2251 2252 if (vls_type != VLS_LOAD && first_stmt == stmt) 2253 { 2254 /* STMT is the leader of the group. Check the operands of all the 2255 stmts of the group. */ 2256 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info); 2257 while (next_stmt) 2258 { 2259 tree op = vect_get_store_rhs (next_stmt); 2260 gimple *def_stmt; 2261 enum vect_def_type dt; 2262 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt)) 2263 { 2264 if (dump_enabled_p ()) 2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2266 "use not simple.\n"); 2267 return false; 2268 } 2269 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 2270 } 2271 } 2272 2273 if (overrun_p) 2274 { 2275 gcc_assert (can_overrun_p); 2276 if (dump_enabled_p ()) 2277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2278 "Data access with gaps requires scalar " 2279 "epilogue loop\n"); 2280 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; 2281 } 2282 2283 return true; 2284 } 2285 2286 /* Analyze load or store statement STMT of type VLS_TYPE. Return true 2287 if there is a memory access type that the vectorized form can use, 2288 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers 2289 or scatters, fill in GS_INFO accordingly. 2290 2291 SLP says whether we're performing SLP rather than loop vectorization. 2292 MASKED_P is true if the statement is conditional on a vectorized mask. 2293 VECTYPE is the vector type that the vectorized statements will use. 2294 NCOPIES is the number of vector statements that will be needed. */ 2295 2296 static bool 2297 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p, 2298 vec_load_store_type vls_type, unsigned int ncopies, 2299 vect_memory_access_type *memory_access_type, 2300 gather_scatter_info *gs_info) 2301 { 2302 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2303 vec_info *vinfo = stmt_info->vinfo; 2304 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2305 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2306 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 2307 { 2308 *memory_access_type = VMAT_GATHER_SCATTER; 2309 gimple *def_stmt; 2310 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)) 2311 gcc_unreachable (); 2312 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt, 2313 &gs_info->offset_dt, 2314 &gs_info->offset_vectype)) 2315 { 2316 if (dump_enabled_p ()) 2317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2318 "%s index use not simple.\n", 2319 vls_type == VLS_LOAD ? "gather" : "scatter"); 2320 return false; 2321 } 2322 } 2323 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 2324 { 2325 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type, 2326 memory_access_type, gs_info)) 2327 return false; 2328 } 2329 else if (STMT_VINFO_STRIDED_P (stmt_info)) 2330 { 2331 gcc_assert (!slp); 2332 if (loop_vinfo 2333 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo, 2334 masked_p, gs_info)) 2335 *memory_access_type = VMAT_GATHER_SCATTER; 2336 else 2337 *memory_access_type = VMAT_ELEMENTWISE; 2338 } 2339 else 2340 { 2341 int cmp = compare_step_with_zero (stmt); 2342 if (cmp < 0) 2343 *memory_access_type = get_negative_load_store_type 2344 (stmt, vectype, vls_type, ncopies); 2345 else if (cmp == 0) 2346 { 2347 gcc_assert (vls_type == VLS_LOAD); 2348 *memory_access_type = VMAT_INVARIANT; 2349 } 2350 else 2351 *memory_access_type = VMAT_CONTIGUOUS; 2352 } 2353 2354 if ((*memory_access_type == VMAT_ELEMENTWISE 2355 || *memory_access_type == VMAT_STRIDED_SLP) 2356 && !nunits.is_constant ()) 2357 { 2358 if (dump_enabled_p ()) 2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2360 "Not using elementwise accesses due to variable " 2361 "vectorization factor.\n"); 2362 return false; 2363 } 2364 2365 /* FIXME: At the moment the cost model seems to underestimate the 2366 cost of using elementwise accesses. This check preserves the 2367 traditional behavior until that can be fixed. */ 2368 if (*memory_access_type == VMAT_ELEMENTWISE 2369 && !STMT_VINFO_STRIDED_P (stmt_info) 2370 && !(stmt == GROUP_FIRST_ELEMENT (stmt_info) 2371 && !GROUP_NEXT_ELEMENT (stmt_info) 2372 && !pow2p_hwi (GROUP_SIZE (stmt_info)))) 2373 { 2374 if (dump_enabled_p ()) 2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2376 "not falling back to elementwise accesses\n"); 2377 return false; 2378 } 2379 return true; 2380 } 2381 2382 /* Return true if boolean argument MASK is suitable for vectorizing 2383 conditional load or store STMT. When returning true, store the type 2384 of the definition in *MASK_DT_OUT and the type of the vectorized mask 2385 in *MASK_VECTYPE_OUT. */ 2386 2387 static bool 2388 vect_check_load_store_mask (gimple *stmt, tree mask, 2389 vect_def_type *mask_dt_out, 2390 tree *mask_vectype_out) 2391 { 2392 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask))) 2393 { 2394 if (dump_enabled_p ()) 2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2396 "mask argument is not a boolean.\n"); 2397 return false; 2398 } 2399 2400 if (TREE_CODE (mask) != SSA_NAME) 2401 { 2402 if (dump_enabled_p ()) 2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2404 "mask argument is not an SSA name.\n"); 2405 return false; 2406 } 2407 2408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2409 gimple *def_stmt; 2410 enum vect_def_type mask_dt; 2411 tree mask_vectype; 2412 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt, 2413 &mask_vectype)) 2414 { 2415 if (dump_enabled_p ()) 2416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2417 "mask use not simple.\n"); 2418 return false; 2419 } 2420 2421 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2422 if (!mask_vectype) 2423 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype)); 2424 2425 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)) 2426 { 2427 if (dump_enabled_p ()) 2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2429 "could not find an appropriate vector mask type.\n"); 2430 return false; 2431 } 2432 2433 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype), 2434 TYPE_VECTOR_SUBPARTS (vectype))) 2435 { 2436 if (dump_enabled_p ()) 2437 { 2438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2439 "vector mask type "); 2440 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype); 2441 dump_printf (MSG_MISSED_OPTIMIZATION, 2442 " does not match vector data type "); 2443 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype); 2444 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n"); 2445 } 2446 return false; 2447 } 2448 2449 *mask_dt_out = mask_dt; 2450 *mask_vectype_out = mask_vectype; 2451 return true; 2452 } 2453 2454 /* Return true if stored value RHS is suitable for vectorizing store 2455 statement STMT. When returning true, store the type of the 2456 definition in *RHS_DT_OUT, the type of the vectorized store value in 2457 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */ 2458 2459 static bool 2460 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out, 2461 tree *rhs_vectype_out, vec_load_store_type *vls_type_out) 2462 { 2463 /* In the case this is a store from a constant make sure 2464 native_encode_expr can handle it. */ 2465 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0) 2466 { 2467 if (dump_enabled_p ()) 2468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2469 "cannot encode constant as a byte sequence.\n"); 2470 return false; 2471 } 2472 2473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2474 gimple *def_stmt; 2475 enum vect_def_type rhs_dt; 2476 tree rhs_vectype; 2477 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt, 2478 &rhs_vectype)) 2479 { 2480 if (dump_enabled_p ()) 2481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2482 "use not simple.\n"); 2483 return false; 2484 } 2485 2486 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2487 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype)) 2488 { 2489 if (dump_enabled_p ()) 2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2491 "incompatible vector types.\n"); 2492 return false; 2493 } 2494 2495 *rhs_dt_out = rhs_dt; 2496 *rhs_vectype_out = rhs_vectype; 2497 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def) 2498 *vls_type_out = VLS_STORE_INVARIANT; 2499 else 2500 *vls_type_out = VLS_STORE; 2501 return true; 2502 } 2503 2504 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT. 2505 Note that we support masks with floating-point type, in which case the 2506 floats are interpreted as a bitmask. */ 2507 2508 static tree 2509 vect_build_all_ones_mask (gimple *stmt, tree masktype) 2510 { 2511 if (TREE_CODE (masktype) == INTEGER_TYPE) 2512 return build_int_cst (masktype, -1); 2513 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) 2514 { 2515 tree mask = build_int_cst (TREE_TYPE (masktype), -1); 2516 mask = build_vector_from_val (masktype, mask); 2517 return vect_init_vector (stmt, mask, masktype, NULL); 2518 } 2519 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) 2520 { 2521 REAL_VALUE_TYPE r; 2522 long tmp[6]; 2523 for (int j = 0; j < 6; ++j) 2524 tmp[j] = -1; 2525 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); 2526 tree mask = build_real (TREE_TYPE (masktype), r); 2527 mask = build_vector_from_val (masktype, mask); 2528 return vect_init_vector (stmt, mask, masktype, NULL); 2529 } 2530 gcc_unreachable (); 2531 } 2532 2533 /* Build an all-zero merge value of type VECTYPE while vectorizing 2534 STMT as a gather load. */ 2535 2536 static tree 2537 vect_build_zero_merge_argument (gimple *stmt, tree vectype) 2538 { 2539 tree merge; 2540 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE) 2541 merge = build_int_cst (TREE_TYPE (vectype), 0); 2542 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype))) 2543 { 2544 REAL_VALUE_TYPE r; 2545 long tmp[6]; 2546 for (int j = 0; j < 6; ++j) 2547 tmp[j] = 0; 2548 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype))); 2549 merge = build_real (TREE_TYPE (vectype), r); 2550 } 2551 else 2552 gcc_unreachable (); 2553 merge = build_vector_from_val (vectype, merge); 2554 return vect_init_vector (stmt, merge, vectype, NULL); 2555 } 2556 2557 /* Build a gather load call while vectorizing STMT. Insert new instructions 2558 before GSI and add them to VEC_STMT. GS_INFO describes the gather load 2559 operation. If the load is conditional, MASK is the unvectorized 2560 condition and MASK_DT is its definition type, otherwise MASK is null. */ 2561 2562 static void 2563 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi, 2564 gimple **vec_stmt, gather_scatter_info *gs_info, 2565 tree mask, vect_def_type mask_dt) 2566 { 2567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2568 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2569 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2570 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2571 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 2572 int ncopies = vect_get_num_copies (loop_vinfo, vectype); 2573 edge pe = loop_preheader_edge (loop); 2574 enum { NARROW, NONE, WIDEN } modifier; 2575 poly_uint64 gather_off_nunits 2576 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype); 2577 2578 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); 2579 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); 2580 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2581 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2582 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2583 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 2584 tree scaletype = TREE_VALUE (arglist); 2585 gcc_checking_assert (types_compatible_p (srctype, rettype) 2586 && (!mask || types_compatible_p (srctype, masktype))); 2587 2588 tree perm_mask = NULL_TREE; 2589 tree mask_perm_mask = NULL_TREE; 2590 if (known_eq (nunits, gather_off_nunits)) 2591 modifier = NONE; 2592 else if (known_eq (nunits * 2, gather_off_nunits)) 2593 { 2594 modifier = WIDEN; 2595 2596 /* Currently widening gathers and scatters are only supported for 2597 fixed-length vectors. */ 2598 int count = gather_off_nunits.to_constant (); 2599 vec_perm_builder sel (count, count, 1); 2600 for (int i = 0; i < count; ++i) 2601 sel.quick_push (i | (count / 2)); 2602 2603 vec_perm_indices indices (sel, 1, count); 2604 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, 2605 indices); 2606 } 2607 else if (known_eq (nunits, gather_off_nunits * 2)) 2608 { 2609 modifier = NARROW; 2610 2611 /* Currently narrowing gathers and scatters are only supported for 2612 fixed-length vectors. */ 2613 int count = nunits.to_constant (); 2614 vec_perm_builder sel (count, count, 1); 2615 sel.quick_grow (count); 2616 for (int i = 0; i < count; ++i) 2617 sel[i] = i < count / 2 ? i : i + count / 2; 2618 vec_perm_indices indices (sel, 2, count); 2619 perm_mask = vect_gen_perm_mask_checked (vectype, indices); 2620 2621 ncopies *= 2; 2622 2623 if (mask) 2624 { 2625 for (int i = 0; i < count; ++i) 2626 sel[i] = i | (count / 2); 2627 indices.new_vector (sel, 2, count); 2628 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices); 2629 } 2630 } 2631 else 2632 gcc_unreachable (); 2633 2634 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt), 2635 vectype); 2636 2637 tree ptr = fold_convert (ptrtype, gs_info->base); 2638 if (!is_gimple_min_invariant (ptr)) 2639 { 2640 gimple_seq seq; 2641 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 2642 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 2643 gcc_assert (!new_bb); 2644 } 2645 2646 tree scale = build_int_cst (scaletype, gs_info->scale); 2647 2648 tree vec_oprnd0 = NULL_TREE; 2649 tree vec_mask = NULL_TREE; 2650 tree src_op = NULL_TREE; 2651 tree mask_op = NULL_TREE; 2652 tree prev_res = NULL_TREE; 2653 stmt_vec_info prev_stmt_info = NULL; 2654 2655 if (!mask) 2656 { 2657 src_op = vect_build_zero_merge_argument (stmt, rettype); 2658 mask_op = vect_build_all_ones_mask (stmt, masktype); 2659 } 2660 2661 for (int j = 0; j < ncopies; ++j) 2662 { 2663 tree op, var; 2664 gimple *new_stmt; 2665 if (modifier == WIDEN && (j & 1)) 2666 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, 2667 perm_mask, stmt, gsi); 2668 else if (j == 0) 2669 op = vec_oprnd0 2670 = vect_get_vec_def_for_operand (gs_info->offset, stmt); 2671 else 2672 op = vec_oprnd0 2673 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0); 2674 2675 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 2676 { 2677 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)), 2678 TYPE_VECTOR_SUBPARTS (idxtype))); 2679 var = vect_get_new_ssa_name (idxtype, vect_simple_var); 2680 op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 2681 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 2682 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2683 op = var; 2684 } 2685 2686 if (mask) 2687 { 2688 if (mask_perm_mask && (j & 1)) 2689 mask_op = permute_vec_elements (mask_op, mask_op, 2690 mask_perm_mask, stmt, gsi); 2691 else 2692 { 2693 if (j == 0) 2694 vec_mask = vect_get_vec_def_for_operand (mask, stmt); 2695 else 2696 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask); 2697 2698 mask_op = vec_mask; 2699 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask))) 2700 { 2701 gcc_assert 2702 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)), 2703 TYPE_VECTOR_SUBPARTS (masktype))); 2704 var = vect_get_new_ssa_name (masktype, vect_simple_var); 2705 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); 2706 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, 2707 mask_op); 2708 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2709 mask_op = var; 2710 } 2711 } 2712 src_op = mask_op; 2713 } 2714 2715 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, 2716 mask_op, scale); 2717 2718 if (!useless_type_conversion_p (vectype, rettype)) 2719 { 2720 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2721 TYPE_VECTOR_SUBPARTS (rettype))); 2722 op = vect_get_new_ssa_name (rettype, vect_simple_var); 2723 gimple_call_set_lhs (new_stmt, op); 2724 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2725 var = make_ssa_name (vec_dest); 2726 op = build1 (VIEW_CONVERT_EXPR, vectype, op); 2727 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 2728 } 2729 else 2730 { 2731 var = make_ssa_name (vec_dest, new_stmt); 2732 gimple_call_set_lhs (new_stmt, var); 2733 } 2734 2735 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2736 2737 if (modifier == NARROW) 2738 { 2739 if ((j & 1) == 0) 2740 { 2741 prev_res = var; 2742 continue; 2743 } 2744 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi); 2745 new_stmt = SSA_NAME_DEF_STMT (var); 2746 } 2747 2748 if (prev_stmt_info == NULL) 2749 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2750 else 2751 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2752 prev_stmt_info = vinfo_for_stmt (new_stmt); 2753 } 2754 } 2755 2756 /* Prepare the base and offset in GS_INFO for vectorization. 2757 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET 2758 to the vectorized offset argument for the first copy of STMT. STMT 2759 is the statement described by GS_INFO and LOOP is the containing loop. */ 2760 2761 static void 2762 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt, 2763 gather_scatter_info *gs_info, 2764 tree *dataref_ptr, tree *vec_offset) 2765 { 2766 gimple_seq stmts = NULL; 2767 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE); 2768 if (stmts != NULL) 2769 { 2770 basic_block new_bb; 2771 edge pe = loop_preheader_edge (loop); 2772 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 2773 gcc_assert (!new_bb); 2774 } 2775 tree offset_type = TREE_TYPE (gs_info->offset); 2776 tree offset_vectype = get_vectype_for_scalar_type (offset_type); 2777 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt, 2778 offset_vectype); 2779 } 2780 2781 /* Prepare to implement a grouped or strided load or store using 2782 the gather load or scatter store operation described by GS_INFO. 2783 STMT is the load or store statement. 2784 2785 Set *DATAREF_BUMP to the amount that should be added to the base 2786 address after each copy of the vectorized statement. Set *VEC_OFFSET 2787 to an invariant offset vector in which element I has the value 2788 I * DR_STEP / SCALE. */ 2789 2790 static void 2791 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo, 2792 gather_scatter_info *gs_info, 2793 tree *dataref_bump, tree *vec_offset) 2794 { 2795 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2796 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 2797 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2798 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2799 gimple_seq stmts; 2800 2801 tree bump = size_binop (MULT_EXPR, 2802 fold_convert (sizetype, DR_STEP (dr)), 2803 size_int (TYPE_VECTOR_SUBPARTS (vectype))); 2804 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE); 2805 if (stmts) 2806 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 2807 2808 /* The offset given in GS_INFO can have pointer type, so use the element 2809 type of the vector instead. */ 2810 tree offset_type = TREE_TYPE (gs_info->offset); 2811 tree offset_vectype = get_vectype_for_scalar_type (offset_type); 2812 offset_type = TREE_TYPE (offset_vectype); 2813 2814 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */ 2815 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr), 2816 ssize_int (gs_info->scale)); 2817 step = fold_convert (offset_type, step); 2818 step = force_gimple_operand (step, &stmts, true, NULL_TREE); 2819 2820 /* Create {0, X, X*2, X*3, ...}. */ 2821 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype, 2822 build_zero_cst (offset_type), step); 2823 if (stmts) 2824 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 2825 } 2826 2827 /* Return the amount that should be added to a vector pointer to move 2828 to the next or previous copy of AGGR_TYPE. DR is the data reference 2829 being vectorized and MEMORY_ACCESS_TYPE describes the type of 2830 vectorization. */ 2831 2832 static tree 2833 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type, 2834 vect_memory_access_type memory_access_type) 2835 { 2836 if (memory_access_type == VMAT_INVARIANT) 2837 return size_zero_node; 2838 2839 tree iv_step = TYPE_SIZE_UNIT (aggr_type); 2840 tree step = vect_dr_behavior (dr)->step; 2841 if (tree_int_cst_sgn (step) == -1) 2842 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step); 2843 return iv_step; 2844 } 2845 2846 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */ 2847 2848 static bool 2849 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi, 2850 gimple **vec_stmt, slp_tree slp_node, 2851 tree vectype_in, enum vect_def_type *dt) 2852 { 2853 tree op, vectype; 2854 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2856 unsigned ncopies; 2857 unsigned HOST_WIDE_INT nunits, num_bytes; 2858 2859 op = gimple_call_arg (stmt, 0); 2860 vectype = STMT_VINFO_VECTYPE (stmt_info); 2861 2862 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) 2863 return false; 2864 2865 /* Multiple types in SLP are handled by creating the appropriate number of 2866 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 2867 case of SLP. */ 2868 if (slp_node) 2869 ncopies = 1; 2870 else 2871 ncopies = vect_get_num_copies (loop_vinfo, vectype); 2872 2873 gcc_assert (ncopies >= 1); 2874 2875 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in); 2876 if (! char_vectype) 2877 return false; 2878 2879 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes)) 2880 return false; 2881 2882 unsigned word_bytes = num_bytes / nunits; 2883 2884 /* The encoding uses one stepped pattern for each byte in the word. */ 2885 vec_perm_builder elts (num_bytes, word_bytes, 3); 2886 for (unsigned i = 0; i < 3; ++i) 2887 for (unsigned j = 0; j < word_bytes; ++j) 2888 elts.quick_push ((i + 1) * word_bytes - j - 1); 2889 2890 vec_perm_indices indices (elts, 1, num_bytes); 2891 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) 2892 return false; 2893 2894 if (! vec_stmt) 2895 { 2896 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 2897 if (dump_enabled_p ()) 2898 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ===" 2899 "\n"); 2900 if (! slp_node) 2901 { 2902 add_stmt_cost (stmt_info->vinfo->target_cost_data, 2903 1, vector_stmt, stmt_info, 0, vect_prologue); 2904 add_stmt_cost (stmt_info->vinfo->target_cost_data, 2905 ncopies, vec_perm, stmt_info, 0, vect_body); 2906 } 2907 return true; 2908 } 2909 2910 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices); 2911 2912 /* Transform. */ 2913 vec<tree> vec_oprnds = vNULL; 2914 gimple *new_stmt = NULL; 2915 stmt_vec_info prev_stmt_info = NULL; 2916 for (unsigned j = 0; j < ncopies; j++) 2917 { 2918 /* Handle uses. */ 2919 if (j == 0) 2920 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); 2921 else 2922 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); 2923 2924 /* Arguments are ready. create the new vector stmt. */ 2925 unsigned i; 2926 tree vop; 2927 FOR_EACH_VEC_ELT (vec_oprnds, i, vop) 2928 { 2929 tree tem = make_ssa_name (char_vectype); 2930 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 2931 char_vectype, vop)); 2932 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2933 tree tem2 = make_ssa_name (char_vectype); 2934 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR, 2935 tem, tem, bswap_vconst); 2936 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2937 tem = make_ssa_name (vectype); 2938 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 2939 vectype, tem2)); 2940 vect_finish_stmt_generation (stmt, new_stmt, gsi); 2941 if (slp_node) 2942 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 2943 } 2944 2945 if (slp_node) 2946 continue; 2947 2948 if (j == 0) 2949 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 2950 else 2951 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 2952 2953 prev_stmt_info = vinfo_for_stmt (new_stmt); 2954 } 2955 2956 vec_oprnds.release (); 2957 return true; 2958 } 2959 2960 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have 2961 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT 2962 in a single step. On success, store the binary pack code in 2963 *CONVERT_CODE. */ 2964 2965 static bool 2966 simple_integer_narrowing (tree vectype_out, tree vectype_in, 2967 tree_code *convert_code) 2968 { 2969 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out)) 2970 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in))) 2971 return false; 2972 2973 tree_code code; 2974 int multi_step_cvt = 0; 2975 auto_vec <tree, 8> interm_types; 2976 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in, 2977 &code, &multi_step_cvt, 2978 &interm_types) 2979 || multi_step_cvt) 2980 return false; 2981 2982 *convert_code = code; 2983 return true; 2984 } 2985 2986 /* Function vectorizable_call. 2987 2988 Check if GS performs a function call that can be vectorized. 2989 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2990 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 2991 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2992 2993 static bool 2994 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, 2995 slp_tree slp_node) 2996 { 2997 gcall *stmt; 2998 tree vec_dest; 2999 tree scalar_dest; 3000 tree op, type; 3001 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 3002 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info; 3003 tree vectype_out, vectype_in; 3004 poly_uint64 nunits_in; 3005 poly_uint64 nunits_out; 3006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3007 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3008 vec_info *vinfo = stmt_info->vinfo; 3009 tree fndecl, new_temp, rhs_type; 3010 gimple *def_stmt; 3011 enum vect_def_type dt[3] 3012 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 3013 int ndts = 3; 3014 gimple *new_stmt = NULL; 3015 int ncopies, j; 3016 vec<tree> vargs = vNULL; 3017 enum { NARROW, NONE, WIDEN } modifier; 3018 size_t i, nargs; 3019 tree lhs; 3020 3021 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3022 return false; 3023 3024 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 3025 && ! vec_stmt) 3026 return false; 3027 3028 /* Is GS a vectorizable call? */ 3029 stmt = dyn_cast <gcall *> (gs); 3030 if (!stmt) 3031 return false; 3032 3033 if (gimple_call_internal_p (stmt) 3034 && (internal_load_fn_p (gimple_call_internal_fn (stmt)) 3035 || internal_store_fn_p (gimple_call_internal_fn (stmt)))) 3036 /* Handled by vectorizable_load and vectorizable_store. */ 3037 return false; 3038 3039 if (gimple_call_lhs (stmt) == NULL_TREE 3040 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 3041 return false; 3042 3043 gcc_checking_assert (!stmt_can_throw_internal (stmt)); 3044 3045 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 3046 3047 /* Process function arguments. */ 3048 rhs_type = NULL_TREE; 3049 vectype_in = NULL_TREE; 3050 nargs = gimple_call_num_args (stmt); 3051 3052 /* Bail out if the function has more than three arguments, we do not have 3053 interesting builtin functions to vectorize with more than two arguments 3054 except for fma. No arguments is also not good. */ 3055 if (nargs == 0 || nargs > 3) 3056 return false; 3057 3058 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */ 3059 if (gimple_call_internal_p (stmt) 3060 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE) 3061 { 3062 nargs = 0; 3063 rhs_type = unsigned_type_node; 3064 } 3065 3066 for (i = 0; i < nargs; i++) 3067 { 3068 tree opvectype; 3069 3070 op = gimple_call_arg (stmt, i); 3071 3072 /* We can only handle calls with arguments of the same type. */ 3073 if (rhs_type 3074 && !types_compatible_p (rhs_type, TREE_TYPE (op))) 3075 { 3076 if (dump_enabled_p ()) 3077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3078 "argument types differ.\n"); 3079 return false; 3080 } 3081 if (!rhs_type) 3082 rhs_type = TREE_TYPE (op); 3083 3084 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype)) 3085 { 3086 if (dump_enabled_p ()) 3087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3088 "use not simple.\n"); 3089 return false; 3090 } 3091 3092 if (!vectype_in) 3093 vectype_in = opvectype; 3094 else if (opvectype 3095 && opvectype != vectype_in) 3096 { 3097 if (dump_enabled_p ()) 3098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3099 "argument vector types differ.\n"); 3100 return false; 3101 } 3102 } 3103 /* If all arguments are external or constant defs use a vector type with 3104 the same size as the output vector type. */ 3105 if (!vectype_in) 3106 vectype_in = get_same_sized_vectype (rhs_type, vectype_out); 3107 if (vec_stmt) 3108 gcc_assert (vectype_in); 3109 if (!vectype_in) 3110 { 3111 if (dump_enabled_p ()) 3112 { 3113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3114 "no vectype for scalar type "); 3115 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type); 3116 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 3117 } 3118 3119 return false; 3120 } 3121 3122 /* FORNOW */ 3123 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 3124 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 3125 if (known_eq (nunits_in * 2, nunits_out)) 3126 modifier = NARROW; 3127 else if (known_eq (nunits_out, nunits_in)) 3128 modifier = NONE; 3129 else if (known_eq (nunits_out * 2, nunits_in)) 3130 modifier = WIDEN; 3131 else 3132 return false; 3133 3134 /* We only handle functions that do not read or clobber memory. */ 3135 if (gimple_vuse (stmt)) 3136 { 3137 if (dump_enabled_p ()) 3138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3139 "function reads from or writes to memory.\n"); 3140 return false; 3141 } 3142 3143 /* For now, we only vectorize functions if a target specific builtin 3144 is available. TODO -- in some cases, it might be profitable to 3145 insert the calls for pieces of the vector, in order to be able 3146 to vectorize other operations in the loop. */ 3147 fndecl = NULL_TREE; 3148 internal_fn ifn = IFN_LAST; 3149 combined_fn cfn = gimple_call_combined_fn (stmt); 3150 tree callee = gimple_call_fndecl (stmt); 3151 3152 /* First try using an internal function. */ 3153 tree_code convert_code = ERROR_MARK; 3154 if (cfn != CFN_LAST 3155 && (modifier == NONE 3156 || (modifier == NARROW 3157 && simple_integer_narrowing (vectype_out, vectype_in, 3158 &convert_code)))) 3159 ifn = vectorizable_internal_function (cfn, callee, vectype_out, 3160 vectype_in); 3161 3162 /* If that fails, try asking for a target-specific built-in function. */ 3163 if (ifn == IFN_LAST) 3164 { 3165 if (cfn != CFN_LAST) 3166 fndecl = targetm.vectorize.builtin_vectorized_function 3167 (cfn, vectype_out, vectype_in); 3168 else if (callee) 3169 fndecl = targetm.vectorize.builtin_md_vectorized_function 3170 (callee, vectype_out, vectype_in); 3171 } 3172 3173 if (ifn == IFN_LAST && !fndecl) 3174 { 3175 if (cfn == CFN_GOMP_SIMD_LANE 3176 && !slp_node 3177 && loop_vinfo 3178 && LOOP_VINFO_LOOP (loop_vinfo)->simduid 3179 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME 3180 && LOOP_VINFO_LOOP (loop_vinfo)->simduid 3181 == SSA_NAME_VAR (gimple_call_arg (stmt, 0))) 3182 { 3183 /* We can handle IFN_GOMP_SIMD_LANE by returning a 3184 { 0, 1, 2, ... vf - 1 } vector. */ 3185 gcc_assert (nargs == 0); 3186 } 3187 else if (modifier == NONE 3188 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16) 3189 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32) 3190 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64))) 3191 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node, 3192 vectype_in, dt); 3193 else 3194 { 3195 if (dump_enabled_p ()) 3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3197 "function is not vectorizable.\n"); 3198 return false; 3199 } 3200 } 3201 3202 if (slp_node) 3203 ncopies = 1; 3204 else if (modifier == NARROW && ifn == IFN_LAST) 3205 ncopies = vect_get_num_copies (loop_vinfo, vectype_out); 3206 else 3207 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); 3208 3209 /* Sanity check: make sure that at least one copy of the vectorized stmt 3210 needs to be generated. */ 3211 gcc_assert (ncopies >= 1); 3212 3213 if (!vec_stmt) /* transformation not required. */ 3214 { 3215 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; 3216 if (dump_enabled_p ()) 3217 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===" 3218 "\n"); 3219 if (!slp_node) 3220 { 3221 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); 3222 if (ifn != IFN_LAST && modifier == NARROW && !slp_node) 3223 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2, 3224 vec_promote_demote, stmt_info, 0, vect_body); 3225 } 3226 3227 return true; 3228 } 3229 3230 /* Transform. */ 3231 3232 if (dump_enabled_p ()) 3233 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); 3234 3235 /* Handle def. */ 3236 scalar_dest = gimple_call_lhs (stmt); 3237 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 3238 3239 prev_stmt_info = NULL; 3240 if (modifier == NONE || ifn != IFN_LAST) 3241 { 3242 tree prev_res = NULL_TREE; 3243 for (j = 0; j < ncopies; ++j) 3244 { 3245 /* Build argument list for the vectorized call. */ 3246 if (j == 0) 3247 vargs.create (nargs); 3248 else 3249 vargs.truncate (0); 3250 3251 if (slp_node) 3252 { 3253 auto_vec<vec<tree> > vec_defs (nargs); 3254 vec<tree> vec_oprnds0; 3255 3256 for (i = 0; i < nargs; i++) 3257 vargs.quick_push (gimple_call_arg (stmt, i)); 3258 vect_get_slp_defs (vargs, slp_node, &vec_defs); 3259 vec_oprnds0 = vec_defs[0]; 3260 3261 /* Arguments are ready. Create the new vector stmt. */ 3262 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) 3263 { 3264 size_t k; 3265 for (k = 0; k < nargs; k++) 3266 { 3267 vec<tree> vec_oprndsk = vec_defs[k]; 3268 vargs[k] = vec_oprndsk[i]; 3269 } 3270 if (modifier == NARROW) 3271 { 3272 tree half_res = make_ssa_name (vectype_in); 3273 gcall *call 3274 = gimple_build_call_internal_vec (ifn, vargs); 3275 gimple_call_set_lhs (call, half_res); 3276 gimple_call_set_nothrow (call, true); 3277 new_stmt = call; 3278 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3279 if ((i & 1) == 0) 3280 { 3281 prev_res = half_res; 3282 continue; 3283 } 3284 new_temp = make_ssa_name (vec_dest); 3285 new_stmt = gimple_build_assign (new_temp, convert_code, 3286 prev_res, half_res); 3287 } 3288 else 3289 { 3290 gcall *call; 3291 if (ifn != IFN_LAST) 3292 call = gimple_build_call_internal_vec (ifn, vargs); 3293 else 3294 call = gimple_build_call_vec (fndecl, vargs); 3295 new_temp = make_ssa_name (vec_dest, call); 3296 gimple_call_set_lhs (call, new_temp); 3297 gimple_call_set_nothrow (call, true); 3298 new_stmt = call; 3299 } 3300 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3301 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3302 } 3303 3304 for (i = 0; i < nargs; i++) 3305 { 3306 vec<tree> vec_oprndsi = vec_defs[i]; 3307 vec_oprndsi.release (); 3308 } 3309 continue; 3310 } 3311 3312 for (i = 0; i < nargs; i++) 3313 { 3314 op = gimple_call_arg (stmt, i); 3315 if (j == 0) 3316 vec_oprnd0 3317 = vect_get_vec_def_for_operand (op, stmt); 3318 else 3319 { 3320 vec_oprnd0 = gimple_call_arg (new_stmt, i); 3321 vec_oprnd0 3322 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 3323 } 3324 3325 vargs.quick_push (vec_oprnd0); 3326 } 3327 3328 if (gimple_call_internal_p (stmt) 3329 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE) 3330 { 3331 tree cst = build_index_vector (vectype_out, j * nunits_out, 1); 3332 tree new_var 3333 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_"); 3334 gimple *init_stmt = gimple_build_assign (new_var, cst); 3335 vect_init_vector_1 (stmt, init_stmt, NULL); 3336 new_temp = make_ssa_name (vec_dest); 3337 new_stmt = gimple_build_assign (new_temp, new_var); 3338 } 3339 else if (modifier == NARROW) 3340 { 3341 tree half_res = make_ssa_name (vectype_in); 3342 gcall *call = gimple_build_call_internal_vec (ifn, vargs); 3343 gimple_call_set_lhs (call, half_res); 3344 gimple_call_set_nothrow (call, true); 3345 new_stmt = call; 3346 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3347 if ((j & 1) == 0) 3348 { 3349 prev_res = half_res; 3350 continue; 3351 } 3352 new_temp = make_ssa_name (vec_dest); 3353 new_stmt = gimple_build_assign (new_temp, convert_code, 3354 prev_res, half_res); 3355 } 3356 else 3357 { 3358 gcall *call; 3359 if (ifn != IFN_LAST) 3360 call = gimple_build_call_internal_vec (ifn, vargs); 3361 else 3362 call = gimple_build_call_vec (fndecl, vargs); 3363 new_temp = make_ssa_name (vec_dest, new_stmt); 3364 gimple_call_set_lhs (call, new_temp); 3365 gimple_call_set_nothrow (call, true); 3366 new_stmt = call; 3367 } 3368 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3369 3370 if (j == (modifier == NARROW ? 1 : 0)) 3371 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 3372 else 3373 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3374 3375 prev_stmt_info = vinfo_for_stmt (new_stmt); 3376 } 3377 } 3378 else if (modifier == NARROW) 3379 { 3380 for (j = 0; j < ncopies; ++j) 3381 { 3382 /* Build argument list for the vectorized call. */ 3383 if (j == 0) 3384 vargs.create (nargs * 2); 3385 else 3386 vargs.truncate (0); 3387 3388 if (slp_node) 3389 { 3390 auto_vec<vec<tree> > vec_defs (nargs); 3391 vec<tree> vec_oprnds0; 3392 3393 for (i = 0; i < nargs; i++) 3394 vargs.quick_push (gimple_call_arg (stmt, i)); 3395 vect_get_slp_defs (vargs, slp_node, &vec_defs); 3396 vec_oprnds0 = vec_defs[0]; 3397 3398 /* Arguments are ready. Create the new vector stmt. */ 3399 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2) 3400 { 3401 size_t k; 3402 vargs.truncate (0); 3403 for (k = 0; k < nargs; k++) 3404 { 3405 vec<tree> vec_oprndsk = vec_defs[k]; 3406 vargs.quick_push (vec_oprndsk[i]); 3407 vargs.quick_push (vec_oprndsk[i + 1]); 3408 } 3409 gcall *call; 3410 if (ifn != IFN_LAST) 3411 call = gimple_build_call_internal_vec (ifn, vargs); 3412 else 3413 call = gimple_build_call_vec (fndecl, vargs); 3414 new_temp = make_ssa_name (vec_dest, call); 3415 gimple_call_set_lhs (call, new_temp); 3416 gimple_call_set_nothrow (call, true); 3417 new_stmt = call; 3418 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3419 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 3420 } 3421 3422 for (i = 0; i < nargs; i++) 3423 { 3424 vec<tree> vec_oprndsi = vec_defs[i]; 3425 vec_oprndsi.release (); 3426 } 3427 continue; 3428 } 3429 3430 for (i = 0; i < nargs; i++) 3431 { 3432 op = gimple_call_arg (stmt, i); 3433 if (j == 0) 3434 { 3435 vec_oprnd0 3436 = vect_get_vec_def_for_operand (op, stmt); 3437 vec_oprnd1 3438 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 3439 } 3440 else 3441 { 3442 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1); 3443 vec_oprnd0 3444 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1); 3445 vec_oprnd1 3446 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0); 3447 } 3448 3449 vargs.quick_push (vec_oprnd0); 3450 vargs.quick_push (vec_oprnd1); 3451 } 3452 3453 new_stmt = gimple_build_call_vec (fndecl, vargs); 3454 new_temp = make_ssa_name (vec_dest, new_stmt); 3455 gimple_call_set_lhs (new_stmt, new_temp); 3456 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3457 3458 if (j == 0) 3459 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 3460 else 3461 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 3462 3463 prev_stmt_info = vinfo_for_stmt (new_stmt); 3464 } 3465 3466 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 3467 } 3468 else 3469 /* No current target implements this case. */ 3470 return false; 3471 3472 vargs.release (); 3473 3474 /* The call in STMT might prevent it from being removed in dce. 3475 We however cannot remove it here, due to the way the ssa name 3476 it defines is mapped to the new definition. So just replace 3477 rhs of the statement with something harmless. */ 3478 3479 if (slp_node) 3480 return true; 3481 3482 type = TREE_TYPE (scalar_dest); 3483 if (is_pattern_stmt_p (stmt_info)) 3484 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); 3485 else 3486 lhs = gimple_call_lhs (stmt); 3487 3488 new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); 3489 set_vinfo_for_stmt (new_stmt, stmt_info); 3490 set_vinfo_for_stmt (stmt, NULL); 3491 STMT_VINFO_STMT (stmt_info) = new_stmt; 3492 gsi_replace (gsi, new_stmt, false); 3493 3494 return true; 3495 } 3496 3497 3498 struct simd_call_arg_info 3499 { 3500 tree vectype; 3501 tree op; 3502 HOST_WIDE_INT linear_step; 3503 enum vect_def_type dt; 3504 unsigned int align; 3505 bool simd_lane_linear; 3506 }; 3507 3508 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME, 3509 is linear within simd lane (but not within whole loop), note it in 3510 *ARGINFO. */ 3511 3512 static void 3513 vect_simd_lane_linear (tree op, struct loop *loop, 3514 struct simd_call_arg_info *arginfo) 3515 { 3516 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 3517 3518 if (!is_gimple_assign (def_stmt) 3519 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR 3520 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))) 3521 return; 3522 3523 tree base = gimple_assign_rhs1 (def_stmt); 3524 HOST_WIDE_INT linear_step = 0; 3525 tree v = gimple_assign_rhs2 (def_stmt); 3526 while (TREE_CODE (v) == SSA_NAME) 3527 { 3528 tree t; 3529 def_stmt = SSA_NAME_DEF_STMT (v); 3530 if (is_gimple_assign (def_stmt)) 3531 switch (gimple_assign_rhs_code (def_stmt)) 3532 { 3533 case PLUS_EXPR: 3534 t = gimple_assign_rhs2 (def_stmt); 3535 if (linear_step || TREE_CODE (t) != INTEGER_CST) 3536 return; 3537 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t); 3538 v = gimple_assign_rhs1 (def_stmt); 3539 continue; 3540 case MULT_EXPR: 3541 t = gimple_assign_rhs2 (def_stmt); 3542 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t)) 3543 return; 3544 linear_step = tree_to_shwi (t); 3545 v = gimple_assign_rhs1 (def_stmt); 3546 continue; 3547 CASE_CONVERT: 3548 t = gimple_assign_rhs1 (def_stmt); 3549 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE 3550 || (TYPE_PRECISION (TREE_TYPE (v)) 3551 < TYPE_PRECISION (TREE_TYPE (t)))) 3552 return; 3553 if (!linear_step) 3554 linear_step = 1; 3555 v = t; 3556 continue; 3557 default: 3558 return; 3559 } 3560 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE) 3561 && loop->simduid 3562 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME 3563 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0)) 3564 == loop->simduid)) 3565 { 3566 if (!linear_step) 3567 linear_step = 1; 3568 arginfo->linear_step = linear_step; 3569 arginfo->op = base; 3570 arginfo->simd_lane_linear = true; 3571 return; 3572 } 3573 } 3574 } 3575 3576 /* Return the number of elements in vector type VECTYPE, which is associated 3577 with a SIMD clone. At present these vectors always have a constant 3578 length. */ 3579 3580 static unsigned HOST_WIDE_INT 3581 simd_clone_subparts (tree vectype) 3582 { 3583 return TYPE_VECTOR_SUBPARTS (vectype).to_constant (); 3584 } 3585 3586 /* Function vectorizable_simd_clone_call. 3587 3588 Check if STMT performs a function call that can be vectorized 3589 by calling a simd clone of the function. 3590 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 3591 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 3592 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 3593 3594 static bool 3595 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi, 3596 gimple **vec_stmt, slp_tree slp_node) 3597 { 3598 tree vec_dest; 3599 tree scalar_dest; 3600 tree op, type; 3601 tree vec_oprnd0 = NULL_TREE; 3602 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info; 3603 tree vectype; 3604 unsigned int nunits; 3605 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3606 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 3607 vec_info *vinfo = stmt_info->vinfo; 3608 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; 3609 tree fndecl, new_temp; 3610 gimple *def_stmt; 3611 gimple *new_stmt = NULL; 3612 int ncopies, j; 3613 auto_vec<simd_call_arg_info> arginfo; 3614 vec<tree> vargs = vNULL; 3615 size_t i, nargs; 3616 tree lhs, rtype, ratype; 3617 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL; 3618 3619 /* Is STMT a vectorizable call? */ 3620 if (!is_gimple_call (stmt)) 3621 return false; 3622 3623 fndecl = gimple_call_fndecl (stmt); 3624 if (fndecl == NULL_TREE) 3625 return false; 3626 3627 struct cgraph_node *node = cgraph_node::get (fndecl); 3628 if (node == NULL || node->simd_clones == NULL) 3629 return false; 3630 3631 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 3632 return false; 3633 3634 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 3635 && ! vec_stmt) 3636 return false; 3637 3638 if (gimple_call_lhs (stmt) 3639 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) 3640 return false; 3641 3642 gcc_checking_assert (!stmt_can_throw_internal (stmt)); 3643 3644 vectype = STMT_VINFO_VECTYPE (stmt_info); 3645 3646 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt)) 3647 return false; 3648 3649 /* FORNOW */ 3650 if (slp_node) 3651 return false; 3652 3653 /* Process function arguments. */ 3654 nargs = gimple_call_num_args (stmt); 3655 3656 /* Bail out if the function has zero arguments. */ 3657 if (nargs == 0) 3658 return false; 3659 3660 arginfo.reserve (nargs, true); 3661 3662 for (i = 0; i < nargs; i++) 3663 { 3664 simd_call_arg_info thisarginfo; 3665 affine_iv iv; 3666 3667 thisarginfo.linear_step = 0; 3668 thisarginfo.align = 0; 3669 thisarginfo.op = NULL_TREE; 3670 thisarginfo.simd_lane_linear = false; 3671 3672 op = gimple_call_arg (stmt, i); 3673 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt, 3674 &thisarginfo.vectype) 3675 || thisarginfo.dt == vect_uninitialized_def) 3676 { 3677 if (dump_enabled_p ()) 3678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3679 "use not simple.\n"); 3680 return false; 3681 } 3682 3683 if (thisarginfo.dt == vect_constant_def 3684 || thisarginfo.dt == vect_external_def) 3685 gcc_assert (thisarginfo.vectype == NULL_TREE); 3686 else 3687 gcc_assert (thisarginfo.vectype != NULL_TREE); 3688 3689 /* For linear arguments, the analyze phase should have saved 3690 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */ 3691 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length () 3692 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]) 3693 { 3694 gcc_assert (vec_stmt); 3695 thisarginfo.linear_step 3696 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]); 3697 thisarginfo.op 3698 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1]; 3699 thisarginfo.simd_lane_linear 3700 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3] 3701 == boolean_true_node); 3702 /* If loop has been peeled for alignment, we need to adjust it. */ 3703 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo); 3704 tree n2 = LOOP_VINFO_NITERS (loop_vinfo); 3705 if (n1 != n2 && !thisarginfo.simd_lane_linear) 3706 { 3707 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2); 3708 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]; 3709 tree opt = TREE_TYPE (thisarginfo.op); 3710 bias = fold_convert (TREE_TYPE (step), bias); 3711 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step); 3712 thisarginfo.op 3713 = fold_build2 (POINTER_TYPE_P (opt) 3714 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt, 3715 thisarginfo.op, bias); 3716 } 3717 } 3718 else if (!vec_stmt 3719 && thisarginfo.dt != vect_constant_def 3720 && thisarginfo.dt != vect_external_def 3721 && loop_vinfo 3722 && TREE_CODE (op) == SSA_NAME 3723 && simple_iv (loop, loop_containing_stmt (stmt), op, 3724 &iv, false) 3725 && tree_fits_shwi_p (iv.step)) 3726 { 3727 thisarginfo.linear_step = tree_to_shwi (iv.step); 3728 thisarginfo.op = iv.base; 3729 } 3730 else if ((thisarginfo.dt == vect_constant_def 3731 || thisarginfo.dt == vect_external_def) 3732 && POINTER_TYPE_P (TREE_TYPE (op))) 3733 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT; 3734 /* Addresses of array elements indexed by GOMP_SIMD_LANE are 3735 linear too. */ 3736 if (POINTER_TYPE_P (TREE_TYPE (op)) 3737 && !thisarginfo.linear_step 3738 && !vec_stmt 3739 && thisarginfo.dt != vect_constant_def 3740 && thisarginfo.dt != vect_external_def 3741 && loop_vinfo 3742 && !slp_node 3743 && TREE_CODE (op) == SSA_NAME) 3744 vect_simd_lane_linear (op, loop, &thisarginfo); 3745 3746 arginfo.quick_push (thisarginfo); 3747 } 3748 3749 unsigned HOST_WIDE_INT vf; 3750 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) 3751 { 3752 if (dump_enabled_p ()) 3753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3754 "not considering SIMD clones; not yet supported" 3755 " for variable-width vectors.\n"); 3756 return false; 3757 } 3758 3759 unsigned int badness = 0; 3760 struct cgraph_node *bestn = NULL; 3761 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ()) 3762 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]); 3763 else 3764 for (struct cgraph_node *n = node->simd_clones; n != NULL; 3765 n = n->simdclone->next_clone) 3766 { 3767 unsigned int this_badness = 0; 3768 if (n->simdclone->simdlen > vf 3769 || n->simdclone->nargs != nargs) 3770 continue; 3771 if (n->simdclone->simdlen < vf) 3772 this_badness += (exact_log2 (vf) 3773 - exact_log2 (n->simdclone->simdlen)) * 1024; 3774 if (n->simdclone->inbranch) 3775 this_badness += 2048; 3776 int target_badness = targetm.simd_clone.usable (n); 3777 if (target_badness < 0) 3778 continue; 3779 this_badness += target_badness * 512; 3780 /* FORNOW: Have to add code to add the mask argument. */ 3781 if (n->simdclone->inbranch) 3782 continue; 3783 for (i = 0; i < nargs; i++) 3784 { 3785 switch (n->simdclone->args[i].arg_type) 3786 { 3787 case SIMD_CLONE_ARG_TYPE_VECTOR: 3788 if (!useless_type_conversion_p 3789 (n->simdclone->args[i].orig_type, 3790 TREE_TYPE (gimple_call_arg (stmt, i)))) 3791 i = -1; 3792 else if (arginfo[i].dt == vect_constant_def 3793 || arginfo[i].dt == vect_external_def 3794 || arginfo[i].linear_step) 3795 this_badness += 64; 3796 break; 3797 case SIMD_CLONE_ARG_TYPE_UNIFORM: 3798 if (arginfo[i].dt != vect_constant_def 3799 && arginfo[i].dt != vect_external_def) 3800 i = -1; 3801 break; 3802 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: 3803 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: 3804 if (arginfo[i].dt == vect_constant_def 3805 || arginfo[i].dt == vect_external_def 3806 || (arginfo[i].linear_step 3807 != n->simdclone->args[i].linear_step)) 3808 i = -1; 3809 break; 3810 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: 3811 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: 3812 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: 3813 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: 3814 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: 3815 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: 3816 /* FORNOW */ 3817 i = -1; 3818 break; 3819 case SIMD_CLONE_ARG_TYPE_MASK: 3820 gcc_unreachable (); 3821 } 3822 if (i == (size_t) -1) 3823 break; 3824 if (n->simdclone->args[i].alignment > arginfo[i].align) 3825 { 3826 i = -1; 3827 break; 3828 } 3829 if (arginfo[i].align) 3830 this_badness += (exact_log2 (arginfo[i].align) 3831 - exact_log2 (n->simdclone->args[i].alignment)); 3832 } 3833 if (i == (size_t) -1) 3834 continue; 3835 if (bestn == NULL || this_badness < badness) 3836 { 3837 bestn = n; 3838 badness = this_badness; 3839 } 3840 } 3841 3842 if (bestn == NULL) 3843 return false; 3844 3845 for (i = 0; i < nargs; i++) 3846 if ((arginfo[i].dt == vect_constant_def 3847 || arginfo[i].dt == vect_external_def) 3848 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) 3849 { 3850 arginfo[i].vectype 3851 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt, 3852 i))); 3853 if (arginfo[i].vectype == NULL 3854 || (simd_clone_subparts (arginfo[i].vectype) 3855 > bestn->simdclone->simdlen)) 3856 return false; 3857 } 3858 3859 fndecl = bestn->decl; 3860 nunits = bestn->simdclone->simdlen; 3861 ncopies = vf / nunits; 3862 3863 /* If the function isn't const, only allow it in simd loops where user 3864 has asserted that at least nunits consecutive iterations can be 3865 performed using SIMD instructions. */ 3866 if ((loop == NULL || (unsigned) loop->safelen < nunits) 3867 && gimple_vuse (stmt)) 3868 return false; 3869 3870 /* Sanity check: make sure that at least one copy of the vectorized stmt 3871 needs to be generated. */ 3872 gcc_assert (ncopies >= 1); 3873 3874 if (!vec_stmt) /* transformation not required. */ 3875 { 3876 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl); 3877 for (i = 0; i < nargs; i++) 3878 if ((bestn->simdclone->args[i].arg_type 3879 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) 3880 || (bestn->simdclone->args[i].arg_type 3881 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)) 3882 { 3883 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3 3884 + 1); 3885 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op); 3886 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op)) 3887 ? size_type_node : TREE_TYPE (arginfo[i].op); 3888 tree ls = build_int_cst (lst, arginfo[i].linear_step); 3889 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls); 3890 tree sll = arginfo[i].simd_lane_linear 3891 ? boolean_true_node : boolean_false_node; 3892 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll); 3893 } 3894 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; 3895 if (dump_enabled_p ()) 3896 dump_printf_loc (MSG_NOTE, vect_location, 3897 "=== vectorizable_simd_clone_call ===\n"); 3898 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */ 3899 return true; 3900 } 3901 3902 /* Transform. */ 3903 3904 if (dump_enabled_p ()) 3905 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n"); 3906 3907 /* Handle def. */ 3908 scalar_dest = gimple_call_lhs (stmt); 3909 vec_dest = NULL_TREE; 3910 rtype = NULL_TREE; 3911 ratype = NULL_TREE; 3912 if (scalar_dest) 3913 { 3914 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3915 rtype = TREE_TYPE (TREE_TYPE (fndecl)); 3916 if (TREE_CODE (rtype) == ARRAY_TYPE) 3917 { 3918 ratype = rtype; 3919 rtype = TREE_TYPE (ratype); 3920 } 3921 } 3922 3923 prev_stmt_info = NULL; 3924 for (j = 0; j < ncopies; ++j) 3925 { 3926 /* Build argument list for the vectorized call. */ 3927 if (j == 0) 3928 vargs.create (nargs); 3929 else 3930 vargs.truncate (0); 3931 3932 for (i = 0; i < nargs; i++) 3933 { 3934 unsigned int k, l, m, o; 3935 tree atype; 3936 op = gimple_call_arg (stmt, i); 3937 switch (bestn->simdclone->args[i].arg_type) 3938 { 3939 case SIMD_CLONE_ARG_TYPE_VECTOR: 3940 atype = bestn->simdclone->args[i].vector_type; 3941 o = nunits / simd_clone_subparts (atype); 3942 for (m = j * o; m < (j + 1) * o; m++) 3943 { 3944 if (simd_clone_subparts (atype) 3945 < simd_clone_subparts (arginfo[i].vectype)) 3946 { 3947 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype)); 3948 k = (simd_clone_subparts (arginfo[i].vectype) 3949 / simd_clone_subparts (atype)); 3950 gcc_assert ((k & (k - 1)) == 0); 3951 if (m == 0) 3952 vec_oprnd0 3953 = vect_get_vec_def_for_operand (op, stmt); 3954 else 3955 { 3956 vec_oprnd0 = arginfo[i].op; 3957 if ((m & (k - 1)) == 0) 3958 vec_oprnd0 3959 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt, 3960 vec_oprnd0); 3961 } 3962 arginfo[i].op = vec_oprnd0; 3963 vec_oprnd0 3964 = build3 (BIT_FIELD_REF, atype, vec_oprnd0, 3965 bitsize_int (prec), 3966 bitsize_int ((m & (k - 1)) * prec)); 3967 new_stmt 3968 = gimple_build_assign (make_ssa_name (atype), 3969 vec_oprnd0); 3970 vect_finish_stmt_generation (stmt, new_stmt, gsi); 3971 vargs.safe_push (gimple_assign_lhs (new_stmt)); 3972 } 3973 else 3974 { 3975 k = (simd_clone_subparts (atype) 3976 / simd_clone_subparts (arginfo[i].vectype)); 3977 gcc_assert ((k & (k - 1)) == 0); 3978 vec<constructor_elt, va_gc> *ctor_elts; 3979 if (k != 1) 3980 vec_alloc (ctor_elts, k); 3981 else 3982 ctor_elts = NULL; 3983 for (l = 0; l < k; l++) 3984 { 3985 if (m == 0 && l == 0) 3986 vec_oprnd0 3987 = vect_get_vec_def_for_operand (op, stmt); 3988 else 3989 vec_oprnd0 3990 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt, 3991 arginfo[i].op); 3992 arginfo[i].op = vec_oprnd0; 3993 if (k == 1) 3994 break; 3995 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, 3996 vec_oprnd0); 3997 } 3998 if (k == 1) 3999 vargs.safe_push (vec_oprnd0); 4000 else 4001 { 4002 vec_oprnd0 = build_constructor (atype, ctor_elts); 4003 new_stmt 4004 = gimple_build_assign (make_ssa_name (atype), 4005 vec_oprnd0); 4006 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4007 vargs.safe_push (gimple_assign_lhs (new_stmt)); 4008 } 4009 } 4010 } 4011 break; 4012 case SIMD_CLONE_ARG_TYPE_UNIFORM: 4013 vargs.safe_push (op); 4014 break; 4015 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: 4016 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: 4017 if (j == 0) 4018 { 4019 gimple_seq stmts; 4020 arginfo[i].op 4021 = force_gimple_operand (arginfo[i].op, &stmts, true, 4022 NULL_TREE); 4023 if (stmts != NULL) 4024 { 4025 basic_block new_bb; 4026 edge pe = loop_preheader_edge (loop); 4027 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 4028 gcc_assert (!new_bb); 4029 } 4030 if (arginfo[i].simd_lane_linear) 4031 { 4032 vargs.safe_push (arginfo[i].op); 4033 break; 4034 } 4035 tree phi_res = copy_ssa_name (op); 4036 gphi *new_phi = create_phi_node (phi_res, loop->header); 4037 set_vinfo_for_stmt (new_phi, 4038 new_stmt_vec_info (new_phi, loop_vinfo)); 4039 add_phi_arg (new_phi, arginfo[i].op, 4040 loop_preheader_edge (loop), UNKNOWN_LOCATION); 4041 enum tree_code code 4042 = POINTER_TYPE_P (TREE_TYPE (op)) 4043 ? POINTER_PLUS_EXPR : PLUS_EXPR; 4044 tree type = POINTER_TYPE_P (TREE_TYPE (op)) 4045 ? sizetype : TREE_TYPE (op); 4046 widest_int cst 4047 = wi::mul (bestn->simdclone->args[i].linear_step, 4048 ncopies * nunits); 4049 tree tcst = wide_int_to_tree (type, cst); 4050 tree phi_arg = copy_ssa_name (op); 4051 new_stmt 4052 = gimple_build_assign (phi_arg, code, phi_res, tcst); 4053 gimple_stmt_iterator si = gsi_after_labels (loop->header); 4054 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT); 4055 set_vinfo_for_stmt (new_stmt, 4056 new_stmt_vec_info (new_stmt, loop_vinfo)); 4057 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop), 4058 UNKNOWN_LOCATION); 4059 arginfo[i].op = phi_res; 4060 vargs.safe_push (phi_res); 4061 } 4062 else 4063 { 4064 enum tree_code code 4065 = POINTER_TYPE_P (TREE_TYPE (op)) 4066 ? POINTER_PLUS_EXPR : PLUS_EXPR; 4067 tree type = POINTER_TYPE_P (TREE_TYPE (op)) 4068 ? sizetype : TREE_TYPE (op); 4069 widest_int cst 4070 = wi::mul (bestn->simdclone->args[i].linear_step, 4071 j * nunits); 4072 tree tcst = wide_int_to_tree (type, cst); 4073 new_temp = make_ssa_name (TREE_TYPE (op)); 4074 new_stmt = gimple_build_assign (new_temp, code, 4075 arginfo[i].op, tcst); 4076 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4077 vargs.safe_push (new_temp); 4078 } 4079 break; 4080 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: 4081 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: 4082 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: 4083 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: 4084 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: 4085 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: 4086 default: 4087 gcc_unreachable (); 4088 } 4089 } 4090 4091 new_stmt = gimple_build_call_vec (fndecl, vargs); 4092 if (vec_dest) 4093 { 4094 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits); 4095 if (ratype) 4096 new_temp = create_tmp_var (ratype); 4097 else if (simd_clone_subparts (vectype) 4098 == simd_clone_subparts (rtype)) 4099 new_temp = make_ssa_name (vec_dest, new_stmt); 4100 else 4101 new_temp = make_ssa_name (rtype, new_stmt); 4102 gimple_call_set_lhs (new_stmt, new_temp); 4103 } 4104 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4105 4106 if (vec_dest) 4107 { 4108 if (simd_clone_subparts (vectype) < nunits) 4109 { 4110 unsigned int k, l; 4111 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype)); 4112 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype)); 4113 k = nunits / simd_clone_subparts (vectype); 4114 gcc_assert ((k & (k - 1)) == 0); 4115 for (l = 0; l < k; l++) 4116 { 4117 tree t; 4118 if (ratype) 4119 { 4120 t = build_fold_addr_expr (new_temp); 4121 t = build2 (MEM_REF, vectype, t, 4122 build_int_cst (TREE_TYPE (t), l * bytes)); 4123 } 4124 else 4125 t = build3 (BIT_FIELD_REF, vectype, new_temp, 4126 bitsize_int (prec), bitsize_int (l * prec)); 4127 new_stmt 4128 = gimple_build_assign (make_ssa_name (vectype), t); 4129 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4130 if (j == 0 && l == 0) 4131 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4132 else 4133 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4134 4135 prev_stmt_info = vinfo_for_stmt (new_stmt); 4136 } 4137 4138 if (ratype) 4139 { 4140 tree clobber = build_constructor (ratype, NULL); 4141 TREE_THIS_VOLATILE (clobber) = 1; 4142 new_stmt = gimple_build_assign (new_temp, clobber); 4143 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4144 } 4145 continue; 4146 } 4147 else if (simd_clone_subparts (vectype) > nunits) 4148 { 4149 unsigned int k = (simd_clone_subparts (vectype) 4150 / simd_clone_subparts (rtype)); 4151 gcc_assert ((k & (k - 1)) == 0); 4152 if ((j & (k - 1)) == 0) 4153 vec_alloc (ret_ctor_elts, k); 4154 if (ratype) 4155 { 4156 unsigned int m, o = nunits / simd_clone_subparts (rtype); 4157 for (m = 0; m < o; m++) 4158 { 4159 tree tem = build4 (ARRAY_REF, rtype, new_temp, 4160 size_int (m), NULL_TREE, NULL_TREE); 4161 new_stmt 4162 = gimple_build_assign (make_ssa_name (rtype), tem); 4163 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4164 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, 4165 gimple_assign_lhs (new_stmt)); 4166 } 4167 tree clobber = build_constructor (ratype, NULL); 4168 TREE_THIS_VOLATILE (clobber) = 1; 4169 new_stmt = gimple_build_assign (new_temp, clobber); 4170 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4171 } 4172 else 4173 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp); 4174 if ((j & (k - 1)) != k - 1) 4175 continue; 4176 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts); 4177 new_stmt 4178 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0); 4179 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4180 4181 if ((unsigned) j == k - 1) 4182 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4183 else 4184 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4185 4186 prev_stmt_info = vinfo_for_stmt (new_stmt); 4187 continue; 4188 } 4189 else if (ratype) 4190 { 4191 tree t = build_fold_addr_expr (new_temp); 4192 t = build2 (MEM_REF, vectype, t, 4193 build_int_cst (TREE_TYPE (t), 0)); 4194 new_stmt 4195 = gimple_build_assign (make_ssa_name (vec_dest), t); 4196 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4197 tree clobber = build_constructor (ratype, NULL); 4198 TREE_THIS_VOLATILE (clobber) = 1; 4199 vect_finish_stmt_generation (stmt, 4200 gimple_build_assign (new_temp, 4201 clobber), gsi); 4202 } 4203 } 4204 4205 if (j == 0) 4206 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4207 else 4208 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4209 4210 prev_stmt_info = vinfo_for_stmt (new_stmt); 4211 } 4212 4213 vargs.release (); 4214 4215 /* The call in STMT might prevent it from being removed in dce. 4216 We however cannot remove it here, due to the way the ssa name 4217 it defines is mapped to the new definition. So just replace 4218 rhs of the statement with something harmless. */ 4219 4220 if (slp_node) 4221 return true; 4222 4223 if (scalar_dest) 4224 { 4225 type = TREE_TYPE (scalar_dest); 4226 if (is_pattern_stmt_p (stmt_info)) 4227 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); 4228 else 4229 lhs = gimple_call_lhs (stmt); 4230 new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); 4231 } 4232 else 4233 new_stmt = gimple_build_nop (); 4234 set_vinfo_for_stmt (new_stmt, stmt_info); 4235 set_vinfo_for_stmt (stmt, NULL); 4236 STMT_VINFO_STMT (stmt_info) = new_stmt; 4237 gsi_replace (gsi, new_stmt, true); 4238 unlink_stmt_vdef (stmt); 4239 4240 return true; 4241 } 4242 4243 4244 /* Function vect_gen_widened_results_half 4245 4246 Create a vector stmt whose code, type, number of arguments, and result 4247 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are 4248 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. 4249 In the case that CODE is a CALL_EXPR, this means that a call to DECL 4250 needs to be created (DECL is a function-decl of a target-builtin). 4251 STMT is the original scalar stmt that we are vectorizing. */ 4252 4253 static gimple * 4254 vect_gen_widened_results_half (enum tree_code code, 4255 tree decl, 4256 tree vec_oprnd0, tree vec_oprnd1, int op_type, 4257 tree vec_dest, gimple_stmt_iterator *gsi, 4258 gimple *stmt) 4259 { 4260 gimple *new_stmt; 4261 tree new_temp; 4262 4263 /* Generate half of the widened result: */ 4264 if (code == CALL_EXPR) 4265 { 4266 /* Target specific support */ 4267 if (op_type == binary_op) 4268 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1); 4269 else 4270 new_stmt = gimple_build_call (decl, 1, vec_oprnd0); 4271 new_temp = make_ssa_name (vec_dest, new_stmt); 4272 gimple_call_set_lhs (new_stmt, new_temp); 4273 } 4274 else 4275 { 4276 /* Generic support */ 4277 gcc_assert (op_type == TREE_CODE_LENGTH (code)); 4278 if (op_type != binary_op) 4279 vec_oprnd1 = NULL; 4280 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); 4281 new_temp = make_ssa_name (vec_dest, new_stmt); 4282 gimple_assign_set_lhs (new_stmt, new_temp); 4283 } 4284 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4285 4286 return new_stmt; 4287 } 4288 4289 4290 /* Get vectorized definitions for loop-based vectorization. For the first 4291 operand we call vect_get_vec_def_for_operand() (with OPRND containing 4292 scalar operand), and for the rest we get a copy with 4293 vect_get_vec_def_for_stmt_copy() using the previous vector definition 4294 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details. 4295 The vectors are collected into VEC_OPRNDS. */ 4296 4297 static void 4298 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt, 4299 vec<tree> *vec_oprnds, int multi_step_cvt) 4300 { 4301 tree vec_oprnd; 4302 4303 /* Get first vector operand. */ 4304 /* All the vector operands except the very first one (that is scalar oprnd) 4305 are stmt copies. */ 4306 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE) 4307 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt); 4308 else 4309 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd); 4310 4311 vec_oprnds->quick_push (vec_oprnd); 4312 4313 /* Get second vector operand. */ 4314 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd); 4315 vec_oprnds->quick_push (vec_oprnd); 4316 4317 *oprnd = vec_oprnd; 4318 4319 /* For conversion in multiple steps, continue to get operands 4320 recursively. */ 4321 if (multi_step_cvt) 4322 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1); 4323 } 4324 4325 4326 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS. 4327 For multi-step conversions store the resulting vectors and call the function 4328 recursively. */ 4329 4330 static void 4331 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds, 4332 int multi_step_cvt, gimple *stmt, 4333 vec<tree> vec_dsts, 4334 gimple_stmt_iterator *gsi, 4335 slp_tree slp_node, enum tree_code code, 4336 stmt_vec_info *prev_stmt_info) 4337 { 4338 unsigned int i; 4339 tree vop0, vop1, new_tmp, vec_dest; 4340 gimple *new_stmt; 4341 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4342 4343 vec_dest = vec_dsts.pop (); 4344 4345 for (i = 0; i < vec_oprnds->length (); i += 2) 4346 { 4347 /* Create demotion operation. */ 4348 vop0 = (*vec_oprnds)[i]; 4349 vop1 = (*vec_oprnds)[i + 1]; 4350 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); 4351 new_tmp = make_ssa_name (vec_dest, new_stmt); 4352 gimple_assign_set_lhs (new_stmt, new_tmp); 4353 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4354 4355 if (multi_step_cvt) 4356 /* Store the resulting vector for next recursive call. */ 4357 (*vec_oprnds)[i/2] = new_tmp; 4358 else 4359 { 4360 /* This is the last step of the conversion sequence. Store the 4361 vectors in SLP_NODE or in vector info of the scalar statement 4362 (or in STMT_VINFO_RELATED_STMT chain). */ 4363 if (slp_node) 4364 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 4365 else 4366 { 4367 if (!*prev_stmt_info) 4368 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 4369 else 4370 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt; 4371 4372 *prev_stmt_info = vinfo_for_stmt (new_stmt); 4373 } 4374 } 4375 } 4376 4377 /* For multi-step demotion operations we first generate demotion operations 4378 from the source type to the intermediate types, and then combine the 4379 results (stored in VEC_OPRNDS) in demotion operation to the destination 4380 type. */ 4381 if (multi_step_cvt) 4382 { 4383 /* At each level of recursion we have half of the operands we had at the 4384 previous level. */ 4385 vec_oprnds->truncate ((i+1)/2); 4386 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1, 4387 stmt, vec_dsts, gsi, slp_node, 4388 VEC_PACK_TRUNC_EXPR, 4389 prev_stmt_info); 4390 } 4391 4392 vec_dsts.quick_push (vec_dest); 4393 } 4394 4395 4396 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 4397 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store 4398 the resulting vectors and call the function recursively. */ 4399 4400 static void 4401 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0, 4402 vec<tree> *vec_oprnds1, 4403 gimple *stmt, tree vec_dest, 4404 gimple_stmt_iterator *gsi, 4405 enum tree_code code1, 4406 enum tree_code code2, tree decl1, 4407 tree decl2, int op_type) 4408 { 4409 int i; 4410 tree vop0, vop1, new_tmp1, new_tmp2; 4411 gimple *new_stmt1, *new_stmt2; 4412 vec<tree> vec_tmp = vNULL; 4413 4414 vec_tmp.create (vec_oprnds0->length () * 2); 4415 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0) 4416 { 4417 if (op_type == binary_op) 4418 vop1 = (*vec_oprnds1)[i]; 4419 else 4420 vop1 = NULL_TREE; 4421 4422 /* Generate the two halves of promotion operation. */ 4423 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1, 4424 op_type, vec_dest, gsi, stmt); 4425 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1, 4426 op_type, vec_dest, gsi, stmt); 4427 if (is_gimple_call (new_stmt1)) 4428 { 4429 new_tmp1 = gimple_call_lhs (new_stmt1); 4430 new_tmp2 = gimple_call_lhs (new_stmt2); 4431 } 4432 else 4433 { 4434 new_tmp1 = gimple_assign_lhs (new_stmt1); 4435 new_tmp2 = gimple_assign_lhs (new_stmt2); 4436 } 4437 4438 /* Store the results for the next step. */ 4439 vec_tmp.quick_push (new_tmp1); 4440 vec_tmp.quick_push (new_tmp2); 4441 } 4442 4443 vec_oprnds0->release (); 4444 *vec_oprnds0 = vec_tmp; 4445 } 4446 4447 4448 /* Check if STMT performs a conversion operation, that can be vectorized. 4449 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 4450 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 4451 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 4452 4453 static bool 4454 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi, 4455 gimple **vec_stmt, slp_tree slp_node) 4456 { 4457 tree vec_dest; 4458 tree scalar_dest; 4459 tree op0, op1 = NULL_TREE; 4460 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; 4461 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 4462 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4463 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; 4464 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; 4465 tree decl1 = NULL_TREE, decl2 = NULL_TREE; 4466 tree new_temp; 4467 gimple *def_stmt; 4468 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 4469 int ndts = 2; 4470 gimple *new_stmt = NULL; 4471 stmt_vec_info prev_stmt_info; 4472 poly_uint64 nunits_in; 4473 poly_uint64 nunits_out; 4474 tree vectype_out, vectype_in; 4475 int ncopies, i, j; 4476 tree lhs_type, rhs_type; 4477 enum { NARROW, NONE, WIDEN } modifier; 4478 vec<tree> vec_oprnds0 = vNULL; 4479 vec<tree> vec_oprnds1 = vNULL; 4480 tree vop0; 4481 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 4482 vec_info *vinfo = stmt_info->vinfo; 4483 int multi_step_cvt = 0; 4484 vec<tree> interm_types = vNULL; 4485 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE; 4486 int op_type; 4487 unsigned short fltsz; 4488 4489 /* Is STMT a vectorizable conversion? */ 4490 4491 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 4492 return false; 4493 4494 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 4495 && ! vec_stmt) 4496 return false; 4497 4498 if (!is_gimple_assign (stmt)) 4499 return false; 4500 4501 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 4502 return false; 4503 4504 code = gimple_assign_rhs_code (stmt); 4505 if (!CONVERT_EXPR_CODE_P (code) 4506 && code != FIX_TRUNC_EXPR 4507 && code != FLOAT_EXPR 4508 && code != WIDEN_MULT_EXPR 4509 && code != WIDEN_LSHIFT_EXPR) 4510 return false; 4511 4512 op_type = TREE_CODE_LENGTH (code); 4513 4514 /* Check types of lhs and rhs. */ 4515 scalar_dest = gimple_assign_lhs (stmt); 4516 lhs_type = TREE_TYPE (scalar_dest); 4517 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 4518 4519 op0 = gimple_assign_rhs1 (stmt); 4520 rhs_type = TREE_TYPE (op0); 4521 4522 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 4523 && !((INTEGRAL_TYPE_P (lhs_type) 4524 && INTEGRAL_TYPE_P (rhs_type)) 4525 || (SCALAR_FLOAT_TYPE_P (lhs_type) 4526 && SCALAR_FLOAT_TYPE_P (rhs_type)))) 4527 return false; 4528 4529 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) 4530 && ((INTEGRAL_TYPE_P (lhs_type) 4531 && !type_has_mode_precision_p (lhs_type)) 4532 || (INTEGRAL_TYPE_P (rhs_type) 4533 && !type_has_mode_precision_p (rhs_type)))) 4534 { 4535 if (dump_enabled_p ()) 4536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4537 "type conversion to/from bit-precision unsupported." 4538 "\n"); 4539 return false; 4540 } 4541 4542 /* Check the operands of the operation. */ 4543 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in)) 4544 { 4545 if (dump_enabled_p ()) 4546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4547 "use not simple.\n"); 4548 return false; 4549 } 4550 if (op_type == binary_op) 4551 { 4552 bool ok; 4553 4554 op1 = gimple_assign_rhs2 (stmt); 4555 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR); 4556 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of 4557 OP1. */ 4558 if (CONSTANT_CLASS_P (op0)) 4559 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in); 4560 else 4561 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]); 4562 4563 if (!ok) 4564 { 4565 if (dump_enabled_p ()) 4566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4567 "use not simple.\n"); 4568 return false; 4569 } 4570 } 4571 4572 /* If op0 is an external or constant defs use a vector type of 4573 the same size as the output vector type. */ 4574 if (!vectype_in) 4575 vectype_in = get_same_sized_vectype (rhs_type, vectype_out); 4576 if (vec_stmt) 4577 gcc_assert (vectype_in); 4578 if (!vectype_in) 4579 { 4580 if (dump_enabled_p ()) 4581 { 4582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4583 "no vectype for scalar type "); 4584 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type); 4585 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 4586 } 4587 4588 return false; 4589 } 4590 4591 if (VECTOR_BOOLEAN_TYPE_P (vectype_out) 4592 && !VECTOR_BOOLEAN_TYPE_P (vectype_in)) 4593 { 4594 if (dump_enabled_p ()) 4595 { 4596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4597 "can't convert between boolean and non " 4598 "boolean vectors"); 4599 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type); 4600 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 4601 } 4602 4603 return false; 4604 } 4605 4606 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); 4607 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 4608 if (known_eq (nunits_out, nunits_in)) 4609 modifier = NONE; 4610 else if (multiple_p (nunits_out, nunits_in)) 4611 modifier = NARROW; 4612 else 4613 { 4614 gcc_checking_assert (multiple_p (nunits_in, nunits_out)); 4615 modifier = WIDEN; 4616 } 4617 4618 /* Multiple types in SLP are handled by creating the appropriate number of 4619 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 4620 case of SLP. */ 4621 if (slp_node) 4622 ncopies = 1; 4623 else if (modifier == NARROW) 4624 ncopies = vect_get_num_copies (loop_vinfo, vectype_out); 4625 else 4626 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); 4627 4628 /* Sanity check: make sure that at least one copy of the vectorized stmt 4629 needs to be generated. */ 4630 gcc_assert (ncopies >= 1); 4631 4632 bool found_mode = false; 4633 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type); 4634 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type); 4635 opt_scalar_mode rhs_mode_iter; 4636 4637 /* Supportable by target? */ 4638 switch (modifier) 4639 { 4640 case NONE: 4641 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) 4642 return false; 4643 if (supportable_convert_operation (code, vectype_out, vectype_in, 4644 &decl1, &code1)) 4645 break; 4646 /* FALLTHRU */ 4647 unsupported: 4648 if (dump_enabled_p ()) 4649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 4650 "conversion not supported by target.\n"); 4651 return false; 4652 4653 case WIDEN: 4654 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in, 4655 &code1, &code2, &multi_step_cvt, 4656 &interm_types)) 4657 { 4658 /* Binary widening operation can only be supported directly by the 4659 architecture. */ 4660 gcc_assert (!(multi_step_cvt && op_type == binary_op)); 4661 break; 4662 } 4663 4664 if (code != FLOAT_EXPR 4665 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode)) 4666 goto unsupported; 4667 4668 fltsz = GET_MODE_SIZE (lhs_mode); 4669 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode) 4670 { 4671 rhs_mode = rhs_mode_iter.require (); 4672 if (GET_MODE_SIZE (rhs_mode) > fltsz) 4673 break; 4674 4675 cvt_type 4676 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 4677 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 4678 if (cvt_type == NULL_TREE) 4679 goto unsupported; 4680 4681 if (GET_MODE_SIZE (rhs_mode) == fltsz) 4682 { 4683 if (!supportable_convert_operation (code, vectype_out, 4684 cvt_type, &decl1, &codecvt1)) 4685 goto unsupported; 4686 } 4687 else if (!supportable_widening_operation (code, stmt, vectype_out, 4688 cvt_type, &codecvt1, 4689 &codecvt2, &multi_step_cvt, 4690 &interm_types)) 4691 continue; 4692 else 4693 gcc_assert (multi_step_cvt == 0); 4694 4695 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type, 4696 vectype_in, &code1, &code2, 4697 &multi_step_cvt, &interm_types)) 4698 { 4699 found_mode = true; 4700 break; 4701 } 4702 } 4703 4704 if (!found_mode) 4705 goto unsupported; 4706 4707 if (GET_MODE_SIZE (rhs_mode) == fltsz) 4708 codecvt2 = ERROR_MARK; 4709 else 4710 { 4711 multi_step_cvt++; 4712 interm_types.safe_push (cvt_type); 4713 cvt_type = NULL_TREE; 4714 } 4715 break; 4716 4717 case NARROW: 4718 gcc_assert (op_type == unary_op); 4719 if (supportable_narrowing_operation (code, vectype_out, vectype_in, 4720 &code1, &multi_step_cvt, 4721 &interm_types)) 4722 break; 4723 4724 if (code != FIX_TRUNC_EXPR 4725 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) 4726 goto unsupported; 4727 4728 cvt_type 4729 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); 4730 cvt_type = get_same_sized_vectype (cvt_type, vectype_in); 4731 if (cvt_type == NULL_TREE) 4732 goto unsupported; 4733 if (!supportable_convert_operation (code, cvt_type, vectype_in, 4734 &decl1, &codecvt1)) 4735 goto unsupported; 4736 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, 4737 &code1, &multi_step_cvt, 4738 &interm_types)) 4739 break; 4740 goto unsupported; 4741 4742 default: 4743 gcc_unreachable (); 4744 } 4745 4746 if (!vec_stmt) /* transformation not required. */ 4747 { 4748 if (dump_enabled_p ()) 4749 dump_printf_loc (MSG_NOTE, vect_location, 4750 "=== vectorizable_conversion ===\n"); 4751 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR) 4752 { 4753 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; 4754 if (!slp_node) 4755 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); 4756 } 4757 else if (modifier == NARROW) 4758 { 4759 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; 4760 if (!slp_node) 4761 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); 4762 } 4763 else 4764 { 4765 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; 4766 if (!slp_node) 4767 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); 4768 } 4769 interm_types.release (); 4770 return true; 4771 } 4772 4773 /* Transform. */ 4774 if (dump_enabled_p ()) 4775 dump_printf_loc (MSG_NOTE, vect_location, 4776 "transform conversion. ncopies = %d.\n", ncopies); 4777 4778 if (op_type == binary_op) 4779 { 4780 if (CONSTANT_CLASS_P (op0)) 4781 op0 = fold_convert (TREE_TYPE (op1), op0); 4782 else if (CONSTANT_CLASS_P (op1)) 4783 op1 = fold_convert (TREE_TYPE (op0), op1); 4784 } 4785 4786 /* In case of multi-step conversion, we first generate conversion operations 4787 to the intermediate types, and then from that types to the final one. 4788 We create vector destinations for the intermediate type (TYPES) received 4789 from supportable_*_operation, and store them in the correct order 4790 for future use in vect_create_vectorized_*_stmts (). */ 4791 auto_vec<tree> vec_dsts (multi_step_cvt + 1); 4792 vec_dest = vect_create_destination_var (scalar_dest, 4793 (cvt_type && modifier == WIDEN) 4794 ? cvt_type : vectype_out); 4795 vec_dsts.quick_push (vec_dest); 4796 4797 if (multi_step_cvt) 4798 { 4799 for (i = interm_types.length () - 1; 4800 interm_types.iterate (i, &intermediate_type); i--) 4801 { 4802 vec_dest = vect_create_destination_var (scalar_dest, 4803 intermediate_type); 4804 vec_dsts.quick_push (vec_dest); 4805 } 4806 } 4807 4808 if (cvt_type) 4809 vec_dest = vect_create_destination_var (scalar_dest, 4810 modifier == WIDEN 4811 ? vectype_out : cvt_type); 4812 4813 if (!slp_node) 4814 { 4815 if (modifier == WIDEN) 4816 { 4817 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1); 4818 if (op_type == binary_op) 4819 vec_oprnds1.create (1); 4820 } 4821 else if (modifier == NARROW) 4822 vec_oprnds0.create ( 4823 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1)); 4824 } 4825 else if (code == WIDEN_LSHIFT_EXPR) 4826 vec_oprnds1.create (slp_node->vec_stmts_size); 4827 4828 last_oprnd = op0; 4829 prev_stmt_info = NULL; 4830 switch (modifier) 4831 { 4832 case NONE: 4833 for (j = 0; j < ncopies; j++) 4834 { 4835 if (j == 0) 4836 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); 4837 else 4838 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); 4839 4840 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 4841 { 4842 /* Arguments are ready, create the new vector stmt. */ 4843 if (code1 == CALL_EXPR) 4844 { 4845 new_stmt = gimple_build_call (decl1, 1, vop0); 4846 new_temp = make_ssa_name (vec_dest, new_stmt); 4847 gimple_call_set_lhs (new_stmt, new_temp); 4848 } 4849 else 4850 { 4851 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); 4852 new_stmt = gimple_build_assign (vec_dest, code1, vop0); 4853 new_temp = make_ssa_name (vec_dest, new_stmt); 4854 gimple_assign_set_lhs (new_stmt, new_temp); 4855 } 4856 4857 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4858 if (slp_node) 4859 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 4860 else 4861 { 4862 if (!prev_stmt_info) 4863 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 4864 else 4865 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4866 prev_stmt_info = vinfo_for_stmt (new_stmt); 4867 } 4868 } 4869 } 4870 break; 4871 4872 case WIDEN: 4873 /* In case the vectorization factor (VF) is bigger than the number 4874 of elements that we can fit in a vectype (nunits), we have to 4875 generate more than one vector stmt - i.e - we need to "unroll" 4876 the vector stmt by a factor VF/nunits. */ 4877 for (j = 0; j < ncopies; j++) 4878 { 4879 /* Handle uses. */ 4880 if (j == 0) 4881 { 4882 if (slp_node) 4883 { 4884 if (code == WIDEN_LSHIFT_EXPR) 4885 { 4886 unsigned int k; 4887 4888 vec_oprnd1 = op1; 4889 /* Store vec_oprnd1 for every vector stmt to be created 4890 for SLP_NODE. We check during the analysis that all 4891 the shift arguments are the same. */ 4892 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 4893 vec_oprnds1.quick_push (vec_oprnd1); 4894 4895 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 4896 slp_node); 4897 } 4898 else 4899 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, 4900 &vec_oprnds1, slp_node); 4901 } 4902 else 4903 { 4904 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); 4905 vec_oprnds0.quick_push (vec_oprnd0); 4906 if (op_type == binary_op) 4907 { 4908 if (code == WIDEN_LSHIFT_EXPR) 4909 vec_oprnd1 = op1; 4910 else 4911 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); 4912 vec_oprnds1.quick_push (vec_oprnd1); 4913 } 4914 } 4915 } 4916 else 4917 { 4918 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); 4919 vec_oprnds0.truncate (0); 4920 vec_oprnds0.quick_push (vec_oprnd0); 4921 if (op_type == binary_op) 4922 { 4923 if (code == WIDEN_LSHIFT_EXPR) 4924 vec_oprnd1 = op1; 4925 else 4926 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], 4927 vec_oprnd1); 4928 vec_oprnds1.truncate (0); 4929 vec_oprnds1.quick_push (vec_oprnd1); 4930 } 4931 } 4932 4933 /* Arguments are ready. Create the new vector stmts. */ 4934 for (i = multi_step_cvt; i >= 0; i--) 4935 { 4936 tree this_dest = vec_dsts[i]; 4937 enum tree_code c1 = code1, c2 = code2; 4938 if (i == 0 && codecvt2 != ERROR_MARK) 4939 { 4940 c1 = codecvt1; 4941 c2 = codecvt2; 4942 } 4943 vect_create_vectorized_promotion_stmts (&vec_oprnds0, 4944 &vec_oprnds1, 4945 stmt, this_dest, gsi, 4946 c1, c2, decl1, decl2, 4947 op_type); 4948 } 4949 4950 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 4951 { 4952 if (cvt_type) 4953 { 4954 if (codecvt1 == CALL_EXPR) 4955 { 4956 new_stmt = gimple_build_call (decl1, 1, vop0); 4957 new_temp = make_ssa_name (vec_dest, new_stmt); 4958 gimple_call_set_lhs (new_stmt, new_temp); 4959 } 4960 else 4961 { 4962 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 4963 new_temp = make_ssa_name (vec_dest); 4964 new_stmt = gimple_build_assign (new_temp, codecvt1, 4965 vop0); 4966 } 4967 4968 vect_finish_stmt_generation (stmt, new_stmt, gsi); 4969 } 4970 else 4971 new_stmt = SSA_NAME_DEF_STMT (vop0); 4972 4973 if (slp_node) 4974 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 4975 else 4976 { 4977 if (!prev_stmt_info) 4978 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; 4979 else 4980 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 4981 prev_stmt_info = vinfo_for_stmt (new_stmt); 4982 } 4983 } 4984 } 4985 4986 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 4987 break; 4988 4989 case NARROW: 4990 /* In case the vectorization factor (VF) is bigger than the number 4991 of elements that we can fit in a vectype (nunits), we have to 4992 generate more than one vector stmt - i.e - we need to "unroll" 4993 the vector stmt by a factor VF/nunits. */ 4994 for (j = 0; j < ncopies; j++) 4995 { 4996 /* Handle uses. */ 4997 if (slp_node) 4998 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 4999 slp_node); 5000 else 5001 { 5002 vec_oprnds0.truncate (0); 5003 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0, 5004 vect_pow2 (multi_step_cvt) - 1); 5005 } 5006 5007 /* Arguments are ready. Create the new vector stmts. */ 5008 if (cvt_type) 5009 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5010 { 5011 if (codecvt1 == CALL_EXPR) 5012 { 5013 new_stmt = gimple_build_call (decl1, 1, vop0); 5014 new_temp = make_ssa_name (vec_dest, new_stmt); 5015 gimple_call_set_lhs (new_stmt, new_temp); 5016 } 5017 else 5018 { 5019 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); 5020 new_temp = make_ssa_name (vec_dest); 5021 new_stmt = gimple_build_assign (new_temp, codecvt1, 5022 vop0); 5023 } 5024 5025 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5026 vec_oprnds0[i] = new_temp; 5027 } 5028 5029 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt, 5030 stmt, vec_dsts, gsi, 5031 slp_node, code1, 5032 &prev_stmt_info); 5033 } 5034 5035 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 5036 break; 5037 } 5038 5039 vec_oprnds0.release (); 5040 vec_oprnds1.release (); 5041 interm_types.release (); 5042 5043 return true; 5044 } 5045 5046 5047 /* Function vectorizable_assignment. 5048 5049 Check if STMT performs an assignment (copy) that can be vectorized. 5050 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 5051 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 5052 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 5053 5054 static bool 5055 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi, 5056 gimple **vec_stmt, slp_tree slp_node) 5057 { 5058 tree vec_dest; 5059 tree scalar_dest; 5060 tree op; 5061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5062 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5063 tree new_temp; 5064 gimple *def_stmt; 5065 enum vect_def_type dt[1] = {vect_unknown_def_type}; 5066 int ndts = 1; 5067 int ncopies; 5068 int i, j; 5069 vec<tree> vec_oprnds = vNULL; 5070 tree vop; 5071 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5072 vec_info *vinfo = stmt_info->vinfo; 5073 gimple *new_stmt = NULL; 5074 stmt_vec_info prev_stmt_info = NULL; 5075 enum tree_code code; 5076 tree vectype_in; 5077 5078 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5079 return false; 5080 5081 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5082 && ! vec_stmt) 5083 return false; 5084 5085 /* Is vectorizable assignment? */ 5086 if (!is_gimple_assign (stmt)) 5087 return false; 5088 5089 scalar_dest = gimple_assign_lhs (stmt); 5090 if (TREE_CODE (scalar_dest) != SSA_NAME) 5091 return false; 5092 5093 code = gimple_assign_rhs_code (stmt); 5094 if (gimple_assign_single_p (stmt) 5095 || code == PAREN_EXPR 5096 || CONVERT_EXPR_CODE_P (code)) 5097 op = gimple_assign_rhs1 (stmt); 5098 else 5099 return false; 5100 5101 if (code == VIEW_CONVERT_EXPR) 5102 op = TREE_OPERAND (op, 0); 5103 5104 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5105 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 5106 5107 /* Multiple types in SLP are handled by creating the appropriate number of 5108 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5109 case of SLP. */ 5110 if (slp_node) 5111 ncopies = 1; 5112 else 5113 ncopies = vect_get_num_copies (loop_vinfo, vectype); 5114 5115 gcc_assert (ncopies >= 1); 5116 5117 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in)) 5118 { 5119 if (dump_enabled_p ()) 5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5121 "use not simple.\n"); 5122 return false; 5123 } 5124 5125 /* We can handle NOP_EXPR conversions that do not change the number 5126 of elements or the vector size. */ 5127 if ((CONVERT_EXPR_CODE_P (code) 5128 || code == VIEW_CONVERT_EXPR) 5129 && (!vectype_in 5130 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits) 5131 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)), 5132 GET_MODE_SIZE (TYPE_MODE (vectype_in))))) 5133 return false; 5134 5135 /* We do not handle bit-precision changes. */ 5136 if ((CONVERT_EXPR_CODE_P (code) 5137 || code == VIEW_CONVERT_EXPR) 5138 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) 5139 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)) 5140 || !type_has_mode_precision_p (TREE_TYPE (op))) 5141 /* But a conversion that does not change the bit-pattern is ok. */ 5142 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest)) 5143 > TYPE_PRECISION (TREE_TYPE (op))) 5144 && TYPE_UNSIGNED (TREE_TYPE (op))) 5145 /* Conversion between boolean types of different sizes is 5146 a simple assignment in case their vectypes are same 5147 boolean vectors. */ 5148 && (!VECTOR_BOOLEAN_TYPE_P (vectype) 5149 || !VECTOR_BOOLEAN_TYPE_P (vectype_in))) 5150 { 5151 if (dump_enabled_p ()) 5152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5153 "type conversion to/from bit-precision " 5154 "unsupported.\n"); 5155 return false; 5156 } 5157 5158 if (!vec_stmt) /* transformation not required. */ 5159 { 5160 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 5161 if (dump_enabled_p ()) 5162 dump_printf_loc (MSG_NOTE, vect_location, 5163 "=== vectorizable_assignment ===\n"); 5164 if (!slp_node) 5165 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); 5166 return true; 5167 } 5168 5169 /* Transform. */ 5170 if (dump_enabled_p ()) 5171 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n"); 5172 5173 /* Handle def. */ 5174 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5175 5176 /* Handle use. */ 5177 for (j = 0; j < ncopies; j++) 5178 { 5179 /* Handle uses. */ 5180 if (j == 0) 5181 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); 5182 else 5183 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); 5184 5185 /* Arguments are ready. create the new vector stmt. */ 5186 FOR_EACH_VEC_ELT (vec_oprnds, i, vop) 5187 { 5188 if (CONVERT_EXPR_CODE_P (code) 5189 || code == VIEW_CONVERT_EXPR) 5190 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop); 5191 new_stmt = gimple_build_assign (vec_dest, vop); 5192 new_temp = make_ssa_name (vec_dest, new_stmt); 5193 gimple_assign_set_lhs (new_stmt, new_temp); 5194 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5195 if (slp_node) 5196 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 5197 } 5198 5199 if (slp_node) 5200 continue; 5201 5202 if (j == 0) 5203 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 5204 else 5205 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 5206 5207 prev_stmt_info = vinfo_for_stmt (new_stmt); 5208 } 5209 5210 vec_oprnds.release (); 5211 return true; 5212 } 5213 5214 5215 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE 5216 either as shift by a scalar or by a vector. */ 5217 5218 bool 5219 vect_supportable_shift (enum tree_code code, tree scalar_type) 5220 { 5221 5222 machine_mode vec_mode; 5223 optab optab; 5224 int icode; 5225 tree vectype; 5226 5227 vectype = get_vectype_for_scalar_type (scalar_type); 5228 if (!vectype) 5229 return false; 5230 5231 optab = optab_for_tree_code (code, vectype, optab_scalar); 5232 if (!optab 5233 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) 5234 { 5235 optab = optab_for_tree_code (code, vectype, optab_vector); 5236 if (!optab 5237 || (optab_handler (optab, TYPE_MODE (vectype)) 5238 == CODE_FOR_nothing)) 5239 return false; 5240 } 5241 5242 vec_mode = TYPE_MODE (vectype); 5243 icode = (int) optab_handler (optab, vec_mode); 5244 if (icode == CODE_FOR_nothing) 5245 return false; 5246 5247 return true; 5248 } 5249 5250 5251 /* Function vectorizable_shift. 5252 5253 Check if STMT performs a shift operation that can be vectorized. 5254 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 5255 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 5256 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 5257 5258 static bool 5259 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi, 5260 gimple **vec_stmt, slp_tree slp_node) 5261 { 5262 tree vec_dest; 5263 tree scalar_dest; 5264 tree op0, op1 = NULL; 5265 tree vec_oprnd1 = NULL_TREE; 5266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5267 tree vectype; 5268 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5269 enum tree_code code; 5270 machine_mode vec_mode; 5271 tree new_temp; 5272 optab optab; 5273 int icode; 5274 machine_mode optab_op2_mode; 5275 gimple *def_stmt; 5276 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; 5277 int ndts = 2; 5278 gimple *new_stmt = NULL; 5279 stmt_vec_info prev_stmt_info; 5280 poly_uint64 nunits_in; 5281 poly_uint64 nunits_out; 5282 tree vectype_out; 5283 tree op1_vectype; 5284 int ncopies; 5285 int j, i; 5286 vec<tree> vec_oprnds0 = vNULL; 5287 vec<tree> vec_oprnds1 = vNULL; 5288 tree vop0, vop1; 5289 unsigned int k; 5290 bool scalar_shift_arg = true; 5291 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5292 vec_info *vinfo = stmt_info->vinfo; 5293 5294 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5295 return false; 5296 5297 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5298 && ! vec_stmt) 5299 return false; 5300 5301 /* Is STMT a vectorizable binary/unary operation? */ 5302 if (!is_gimple_assign (stmt)) 5303 return false; 5304 5305 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 5306 return false; 5307 5308 code = gimple_assign_rhs_code (stmt); 5309 5310 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 5311 || code == RROTATE_EXPR)) 5312 return false; 5313 5314 scalar_dest = gimple_assign_lhs (stmt); 5315 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 5316 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))) 5317 { 5318 if (dump_enabled_p ()) 5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5320 "bit-precision shifts not supported.\n"); 5321 return false; 5322 } 5323 5324 op0 = gimple_assign_rhs1 (stmt); 5325 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype)) 5326 { 5327 if (dump_enabled_p ()) 5328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5329 "use not simple.\n"); 5330 return false; 5331 } 5332 /* If op0 is an external or constant def use a vector type with 5333 the same size as the output vector type. */ 5334 if (!vectype) 5335 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out); 5336 if (vec_stmt) 5337 gcc_assert (vectype); 5338 if (!vectype) 5339 { 5340 if (dump_enabled_p ()) 5341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5342 "no vectype for scalar type\n"); 5343 return false; 5344 } 5345 5346 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 5347 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 5348 if (maybe_ne (nunits_out, nunits_in)) 5349 return false; 5350 5351 op1 = gimple_assign_rhs2 (stmt); 5352 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype)) 5353 { 5354 if (dump_enabled_p ()) 5355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5356 "use not simple.\n"); 5357 return false; 5358 } 5359 5360 /* Multiple types in SLP are handled by creating the appropriate number of 5361 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5362 case of SLP. */ 5363 if (slp_node) 5364 ncopies = 1; 5365 else 5366 ncopies = vect_get_num_copies (loop_vinfo, vectype); 5367 5368 gcc_assert (ncopies >= 1); 5369 5370 /* Determine whether the shift amount is a vector, or scalar. If the 5371 shift/rotate amount is a vector, use the vector/vector shift optabs. */ 5372 5373 if ((dt[1] == vect_internal_def 5374 || dt[1] == vect_induction_def) 5375 && !slp_node) 5376 scalar_shift_arg = false; 5377 else if (dt[1] == vect_constant_def 5378 || dt[1] == vect_external_def 5379 || dt[1] == vect_internal_def) 5380 { 5381 /* In SLP, need to check whether the shift count is the same, 5382 in loops if it is a constant or invariant, it is always 5383 a scalar shift. */ 5384 if (slp_node) 5385 { 5386 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node); 5387 gimple *slpstmt; 5388 5389 FOR_EACH_VEC_ELT (stmts, k, slpstmt) 5390 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0)) 5391 scalar_shift_arg = false; 5392 5393 /* For internal SLP defs we have to make sure we see scalar stmts 5394 for all vector elements. 5395 ??? For different vectors we could resort to a different 5396 scalar shift operand but code-generation below simply always 5397 takes the first. */ 5398 if (dt[1] == vect_internal_def 5399 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), stmts.length ())) 5400 scalar_shift_arg = false; 5401 } 5402 5403 /* If the shift amount is computed by a pattern stmt we cannot 5404 use the scalar amount directly thus give up and use a vector 5405 shift. */ 5406 if (dt[1] == vect_internal_def) 5407 { 5408 gimple *def = SSA_NAME_DEF_STMT (op1); 5409 if (is_pattern_stmt_p (vinfo_for_stmt (def))) 5410 scalar_shift_arg = false; 5411 } 5412 } 5413 else 5414 { 5415 if (dump_enabled_p ()) 5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5417 "operand mode requires invariant argument.\n"); 5418 return false; 5419 } 5420 5421 /* Vector shifted by vector. */ 5422 if (!scalar_shift_arg) 5423 { 5424 optab = optab_for_tree_code (code, vectype, optab_vector); 5425 if (dump_enabled_p ()) 5426 dump_printf_loc (MSG_NOTE, vect_location, 5427 "vector/vector shift/rotate found.\n"); 5428 5429 if (!op1_vectype) 5430 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out); 5431 if (op1_vectype == NULL_TREE 5432 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) 5433 { 5434 if (dump_enabled_p ()) 5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5436 "unusable type for last operand in" 5437 " vector/vector shift/rotate.\n"); 5438 return false; 5439 } 5440 } 5441 /* See if the machine has a vector shifted by scalar insn and if not 5442 then see if it has a vector shifted by vector insn. */ 5443 else 5444 { 5445 optab = optab_for_tree_code (code, vectype, optab_scalar); 5446 if (optab 5447 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing) 5448 { 5449 if (dump_enabled_p ()) 5450 dump_printf_loc (MSG_NOTE, vect_location, 5451 "vector/scalar shift/rotate found.\n"); 5452 } 5453 else 5454 { 5455 optab = optab_for_tree_code (code, vectype, optab_vector); 5456 if (optab 5457 && (optab_handler (optab, TYPE_MODE (vectype)) 5458 != CODE_FOR_nothing)) 5459 { 5460 scalar_shift_arg = false; 5461 5462 if (dump_enabled_p ()) 5463 dump_printf_loc (MSG_NOTE, vect_location, 5464 "vector/vector shift/rotate found.\n"); 5465 5466 /* Unlike the other binary operators, shifts/rotates have 5467 the rhs being int, instead of the same type as the lhs, 5468 so make sure the scalar is the right type if we are 5469 dealing with vectors of long long/long/short/char. */ 5470 if (dt[1] == vect_constant_def) 5471 op1 = fold_convert (TREE_TYPE (vectype), op1); 5472 else if (!useless_type_conversion_p (TREE_TYPE (vectype), 5473 TREE_TYPE (op1))) 5474 { 5475 if (slp_node 5476 && TYPE_MODE (TREE_TYPE (vectype)) 5477 != TYPE_MODE (TREE_TYPE (op1))) 5478 { 5479 if (dump_enabled_p ()) 5480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5481 "unusable type for last operand in" 5482 " vector/vector shift/rotate.\n"); 5483 return false; 5484 } 5485 if (vec_stmt && !slp_node) 5486 { 5487 op1 = fold_convert (TREE_TYPE (vectype), op1); 5488 op1 = vect_init_vector (stmt, op1, 5489 TREE_TYPE (vectype), NULL); 5490 } 5491 } 5492 } 5493 } 5494 } 5495 5496 /* Supportable by target? */ 5497 if (!optab) 5498 { 5499 if (dump_enabled_p ()) 5500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5501 "no optab.\n"); 5502 return false; 5503 } 5504 vec_mode = TYPE_MODE (vectype); 5505 icode = (int) optab_handler (optab, vec_mode); 5506 if (icode == CODE_FOR_nothing) 5507 { 5508 if (dump_enabled_p ()) 5509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5510 "op not supported by target.\n"); 5511 /* Check only during analysis. */ 5512 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) 5513 || (!vec_stmt 5514 && !vect_worthwhile_without_simd_p (vinfo, code))) 5515 return false; 5516 if (dump_enabled_p ()) 5517 dump_printf_loc (MSG_NOTE, vect_location, 5518 "proceeding using word mode.\n"); 5519 } 5520 5521 /* Worthwhile without SIMD support? Check only during analysis. */ 5522 if (!vec_stmt 5523 && !VECTOR_MODE_P (TYPE_MODE (vectype)) 5524 && !vect_worthwhile_without_simd_p (vinfo, code)) 5525 { 5526 if (dump_enabled_p ()) 5527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5528 "not worthwhile without SIMD support.\n"); 5529 return false; 5530 } 5531 5532 if (!vec_stmt) /* transformation not required. */ 5533 { 5534 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; 5535 if (dump_enabled_p ()) 5536 dump_printf_loc (MSG_NOTE, vect_location, 5537 "=== vectorizable_shift ===\n"); 5538 if (!slp_node) 5539 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); 5540 return true; 5541 } 5542 5543 /* Transform. */ 5544 5545 if (dump_enabled_p ()) 5546 dump_printf_loc (MSG_NOTE, vect_location, 5547 "transform binary/unary operation.\n"); 5548 5549 /* Handle def. */ 5550 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5551 5552 prev_stmt_info = NULL; 5553 for (j = 0; j < ncopies; j++) 5554 { 5555 /* Handle uses. */ 5556 if (j == 0) 5557 { 5558 if (scalar_shift_arg) 5559 { 5560 /* Vector shl and shr insn patterns can be defined with scalar 5561 operand 2 (shift operand). In this case, use constant or loop 5562 invariant op1 directly, without extending it to vector mode 5563 first. */ 5564 optab_op2_mode = insn_data[icode].operand[2].mode; 5565 if (!VECTOR_MODE_P (optab_op2_mode)) 5566 { 5567 if (dump_enabled_p ()) 5568 dump_printf_loc (MSG_NOTE, vect_location, 5569 "operand 1 using scalar mode.\n"); 5570 vec_oprnd1 = op1; 5571 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1); 5572 vec_oprnds1.quick_push (vec_oprnd1); 5573 if (slp_node) 5574 { 5575 /* Store vec_oprnd1 for every vector stmt to be created 5576 for SLP_NODE. We check during the analysis that all 5577 the shift arguments are the same. 5578 TODO: Allow different constants for different vector 5579 stmts generated for an SLP instance. */ 5580 for (k = 0; k < slp_node->vec_stmts_size - 1; k++) 5581 vec_oprnds1.quick_push (vec_oprnd1); 5582 } 5583 } 5584 } 5585 5586 /* vec_oprnd1 is available if operand 1 should be of a scalar-type 5587 (a special case for certain kind of vector shifts); otherwise, 5588 operand 1 should be of a vector type (the usual case). */ 5589 if (vec_oprnd1) 5590 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 5591 slp_node); 5592 else 5593 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 5594 slp_node); 5595 } 5596 else 5597 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 5598 5599 /* Arguments are ready. Create the new vector stmt. */ 5600 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5601 { 5602 vop1 = vec_oprnds1[i]; 5603 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1); 5604 new_temp = make_ssa_name (vec_dest, new_stmt); 5605 gimple_assign_set_lhs (new_stmt, new_temp); 5606 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5607 if (slp_node) 5608 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 5609 } 5610 5611 if (slp_node) 5612 continue; 5613 5614 if (j == 0) 5615 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 5616 else 5617 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 5618 prev_stmt_info = vinfo_for_stmt (new_stmt); 5619 } 5620 5621 vec_oprnds0.release (); 5622 vec_oprnds1.release (); 5623 5624 return true; 5625 } 5626 5627 5628 /* Function vectorizable_operation. 5629 5630 Check if STMT performs a binary, unary or ternary operation that can 5631 be vectorized. 5632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 5633 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 5634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 5635 5636 static bool 5637 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi, 5638 gimple **vec_stmt, slp_tree slp_node) 5639 { 5640 tree vec_dest; 5641 tree scalar_dest; 5642 tree op0, op1 = NULL_TREE, op2 = NULL_TREE; 5643 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 5644 tree vectype; 5645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5646 enum tree_code code, orig_code; 5647 machine_mode vec_mode; 5648 tree new_temp; 5649 int op_type; 5650 optab optab; 5651 bool target_support_p; 5652 gimple *def_stmt; 5653 enum vect_def_type dt[3] 5654 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; 5655 int ndts = 3; 5656 gimple *new_stmt = NULL; 5657 stmt_vec_info prev_stmt_info; 5658 poly_uint64 nunits_in; 5659 poly_uint64 nunits_out; 5660 tree vectype_out; 5661 int ncopies; 5662 int j, i; 5663 vec<tree> vec_oprnds0 = vNULL; 5664 vec<tree> vec_oprnds1 = vNULL; 5665 vec<tree> vec_oprnds2 = vNULL; 5666 tree vop0, vop1, vop2; 5667 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 5668 vec_info *vinfo = stmt_info->vinfo; 5669 5670 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 5671 return false; 5672 5673 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 5674 && ! vec_stmt) 5675 return false; 5676 5677 /* Is STMT a vectorizable binary/unary operation? */ 5678 if (!is_gimple_assign (stmt)) 5679 return false; 5680 5681 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 5682 return false; 5683 5684 orig_code = code = gimple_assign_rhs_code (stmt); 5685 5686 /* For pointer addition and subtraction, we should use the normal 5687 plus and minus for the vector operation. */ 5688 if (code == POINTER_PLUS_EXPR) 5689 code = PLUS_EXPR; 5690 if (code == POINTER_DIFF_EXPR) 5691 code = MINUS_EXPR; 5692 5693 /* Support only unary or binary operations. */ 5694 op_type = TREE_CODE_LENGTH (code); 5695 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op) 5696 { 5697 if (dump_enabled_p ()) 5698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5699 "num. args = %d (not unary/binary/ternary op).\n", 5700 op_type); 5701 return false; 5702 } 5703 5704 scalar_dest = gimple_assign_lhs (stmt); 5705 vectype_out = STMT_VINFO_VECTYPE (stmt_info); 5706 5707 /* Most operations cannot handle bit-precision types without extra 5708 truncations. */ 5709 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) 5710 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)) 5711 /* Exception are bitwise binary operations. */ 5712 && code != BIT_IOR_EXPR 5713 && code != BIT_XOR_EXPR 5714 && code != BIT_AND_EXPR) 5715 { 5716 if (dump_enabled_p ()) 5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5718 "bit-precision arithmetic not supported.\n"); 5719 return false; 5720 } 5721 5722 op0 = gimple_assign_rhs1 (stmt); 5723 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype)) 5724 { 5725 if (dump_enabled_p ()) 5726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5727 "use not simple.\n"); 5728 return false; 5729 } 5730 /* If op0 is an external or constant def use a vector type with 5731 the same size as the output vector type. */ 5732 if (!vectype) 5733 { 5734 /* For boolean type we cannot determine vectype by 5735 invariant value (don't know whether it is a vector 5736 of booleans or vector of integers). We use output 5737 vectype because operations on boolean don't change 5738 type. */ 5739 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0))) 5740 { 5741 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest))) 5742 { 5743 if (dump_enabled_p ()) 5744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5745 "not supported operation on bool value.\n"); 5746 return false; 5747 } 5748 vectype = vectype_out; 5749 } 5750 else 5751 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out); 5752 } 5753 if (vec_stmt) 5754 gcc_assert (vectype); 5755 if (!vectype) 5756 { 5757 if (dump_enabled_p ()) 5758 { 5759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5760 "no vectype for scalar type "); 5761 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, 5762 TREE_TYPE (op0)); 5763 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 5764 } 5765 5766 return false; 5767 } 5768 5769 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); 5770 nunits_in = TYPE_VECTOR_SUBPARTS (vectype); 5771 if (maybe_ne (nunits_out, nunits_in)) 5772 return false; 5773 5774 if (op_type == binary_op || op_type == ternary_op) 5775 { 5776 op1 = gimple_assign_rhs2 (stmt); 5777 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1])) 5778 { 5779 if (dump_enabled_p ()) 5780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5781 "use not simple.\n"); 5782 return false; 5783 } 5784 } 5785 if (op_type == ternary_op) 5786 { 5787 op2 = gimple_assign_rhs3 (stmt); 5788 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2])) 5789 { 5790 if (dump_enabled_p ()) 5791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5792 "use not simple.\n"); 5793 return false; 5794 } 5795 } 5796 5797 /* Multiple types in SLP are handled by creating the appropriate number of 5798 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 5799 case of SLP. */ 5800 if (slp_node) 5801 ncopies = 1; 5802 else 5803 ncopies = vect_get_num_copies (loop_vinfo, vectype); 5804 5805 gcc_assert (ncopies >= 1); 5806 5807 /* Shifts are handled in vectorizable_shift (). */ 5808 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR 5809 || code == RROTATE_EXPR) 5810 return false; 5811 5812 /* Supportable by target? */ 5813 5814 vec_mode = TYPE_MODE (vectype); 5815 if (code == MULT_HIGHPART_EXPR) 5816 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)); 5817 else 5818 { 5819 optab = optab_for_tree_code (code, vectype, optab_default); 5820 if (!optab) 5821 { 5822 if (dump_enabled_p ()) 5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5824 "no optab.\n"); 5825 return false; 5826 } 5827 target_support_p = (optab_handler (optab, vec_mode) 5828 != CODE_FOR_nothing); 5829 } 5830 5831 if (!target_support_p) 5832 { 5833 if (dump_enabled_p ()) 5834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5835 "op not supported by target.\n"); 5836 /* Check only during analysis. */ 5837 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) 5838 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code))) 5839 return false; 5840 if (dump_enabled_p ()) 5841 dump_printf_loc (MSG_NOTE, vect_location, 5842 "proceeding using word mode.\n"); 5843 } 5844 5845 /* Worthwhile without SIMD support? Check only during analysis. */ 5846 if (!VECTOR_MODE_P (vec_mode) 5847 && !vec_stmt 5848 && !vect_worthwhile_without_simd_p (vinfo, code)) 5849 { 5850 if (dump_enabled_p ()) 5851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 5852 "not worthwhile without SIMD support.\n"); 5853 return false; 5854 } 5855 5856 if (!vec_stmt) /* transformation not required. */ 5857 { 5858 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 5859 if (dump_enabled_p ()) 5860 dump_printf_loc (MSG_NOTE, vect_location, 5861 "=== vectorizable_operation ===\n"); 5862 if (!slp_node) 5863 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL); 5864 return true; 5865 } 5866 5867 /* Transform. */ 5868 5869 if (dump_enabled_p ()) 5870 dump_printf_loc (MSG_NOTE, vect_location, 5871 "transform binary/unary operation.\n"); 5872 5873 /* Handle def. */ 5874 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5875 5876 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as 5877 vectors with unsigned elements, but the result is signed. So, we 5878 need to compute the MINUS_EXPR into vectype temporary and 5879 VIEW_CONVERT_EXPR it into the final vectype_out result. */ 5880 tree vec_cvt_dest = NULL_TREE; 5881 if (orig_code == POINTER_DIFF_EXPR) 5882 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out); 5883 5884 /* In case the vectorization factor (VF) is bigger than the number 5885 of elements that we can fit in a vectype (nunits), we have to generate 5886 more than one vector stmt - i.e - we need to "unroll" the 5887 vector stmt by a factor VF/nunits. In doing so, we record a pointer 5888 from one copy of the vector stmt to the next, in the field 5889 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 5890 stages to find the correct vector defs to be used when vectorizing 5891 stmts that use the defs of the current stmt. The example below 5892 illustrates the vectorization process when VF=16 and nunits=4 (i.e., 5893 we need to create 4 vectorized stmts): 5894 5895 before vectorization: 5896 RELATED_STMT VEC_STMT 5897 S1: x = memref - - 5898 S2: z = x + 1 - - 5899 5900 step 1: vectorize stmt S1 (done in vectorizable_load. See more details 5901 there): 5902 RELATED_STMT VEC_STMT 5903 VS1_0: vx0 = memref0 VS1_1 - 5904 VS1_1: vx1 = memref1 VS1_2 - 5905 VS1_2: vx2 = memref2 VS1_3 - 5906 VS1_3: vx3 = memref3 - - 5907 S1: x = load - VS1_0 5908 S2: z = x + 1 - - 5909 5910 step2: vectorize stmt S2 (done here): 5911 To vectorize stmt S2 we first need to find the relevant vector 5912 def for the first operand 'x'. This is, as usual, obtained from 5913 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt 5914 that defines 'x' (S1). This way we find the stmt VS1_0, and the 5915 relevant vector def 'vx0'. Having found 'vx0' we can generate 5916 the vector stmt VS2_0, and as usual, record it in the 5917 STMT_VINFO_VEC_STMT of stmt S2. 5918 When creating the second copy (VS2_1), we obtain the relevant vector 5919 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of 5920 stmt VS1_0. This way we find the stmt VS1_1 and the relevant 5921 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a 5922 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. 5923 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting 5924 chain of stmts and pointers: 5925 RELATED_STMT VEC_STMT 5926 VS1_0: vx0 = memref0 VS1_1 - 5927 VS1_1: vx1 = memref1 VS1_2 - 5928 VS1_2: vx2 = memref2 VS1_3 - 5929 VS1_3: vx3 = memref3 - - 5930 S1: x = load - VS1_0 5931 VS2_0: vz0 = vx0 + v1 VS2_1 - 5932 VS2_1: vz1 = vx1 + v1 VS2_2 - 5933 VS2_2: vz2 = vx2 + v1 VS2_3 - 5934 VS2_3: vz3 = vx3 + v1 - - 5935 S2: z = x + 1 - VS2_0 */ 5936 5937 prev_stmt_info = NULL; 5938 for (j = 0; j < ncopies; j++) 5939 { 5940 /* Handle uses. */ 5941 if (j == 0) 5942 { 5943 if (op_type == binary_op) 5944 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 5945 slp_node); 5946 else if (op_type == ternary_op) 5947 { 5948 if (slp_node) 5949 { 5950 auto_vec<tree> ops(3); 5951 ops.quick_push (op0); 5952 ops.quick_push (op1); 5953 ops.quick_push (op2); 5954 auto_vec<vec<tree> > vec_defs(3); 5955 vect_get_slp_defs (ops, slp_node, &vec_defs); 5956 vec_oprnds0 = vec_defs[0]; 5957 vec_oprnds1 = vec_defs[1]; 5958 vec_oprnds2 = vec_defs[2]; 5959 } 5960 else 5961 { 5962 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 5963 NULL); 5964 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL, 5965 NULL); 5966 } 5967 } 5968 else 5969 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 5970 slp_node); 5971 } 5972 else 5973 { 5974 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); 5975 if (op_type == ternary_op) 5976 { 5977 tree vec_oprnd = vec_oprnds2.pop (); 5978 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2], 5979 vec_oprnd)); 5980 } 5981 } 5982 5983 /* Arguments are ready. Create the new vector stmt. */ 5984 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) 5985 { 5986 vop1 = ((op_type == binary_op || op_type == ternary_op) 5987 ? vec_oprnds1[i] : NULL_TREE); 5988 vop2 = ((op_type == ternary_op) 5989 ? vec_oprnds2[i] : NULL_TREE); 5990 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2); 5991 new_temp = make_ssa_name (vec_dest, new_stmt); 5992 gimple_assign_set_lhs (new_stmt, new_temp); 5993 vect_finish_stmt_generation (stmt, new_stmt, gsi); 5994 if (vec_cvt_dest) 5995 { 5996 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); 5997 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR, 5998 new_temp); 5999 new_temp = make_ssa_name (vec_cvt_dest, new_stmt); 6000 gimple_assign_set_lhs (new_stmt, new_temp); 6001 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6002 } 6003 if (slp_node) 6004 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 6005 } 6006 6007 if (slp_node) 6008 continue; 6009 6010 if (j == 0) 6011 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 6012 else 6013 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 6014 prev_stmt_info = vinfo_for_stmt (new_stmt); 6015 } 6016 6017 vec_oprnds0.release (); 6018 vec_oprnds1.release (); 6019 vec_oprnds2.release (); 6020 6021 return true; 6022 } 6023 6024 /* A helper function to ensure data reference DR's base alignment. */ 6025 6026 static void 6027 ensure_base_align (struct data_reference *dr) 6028 { 6029 if (!dr->aux) 6030 return; 6031 6032 if (DR_VECT_AUX (dr)->base_misaligned) 6033 { 6034 tree base_decl = DR_VECT_AUX (dr)->base_decl; 6035 6036 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT; 6037 6038 if (decl_in_symtab_p (base_decl)) 6039 symtab_node::get (base_decl)->increase_alignment (align_base_to); 6040 else 6041 { 6042 SET_DECL_ALIGN (base_decl, align_base_to); 6043 DECL_USER_ALIGN (base_decl) = 1; 6044 } 6045 DR_VECT_AUX (dr)->base_misaligned = false; 6046 } 6047 } 6048 6049 6050 /* Function get_group_alias_ptr_type. 6051 6052 Return the alias type for the group starting at FIRST_STMT. */ 6053 6054 static tree 6055 get_group_alias_ptr_type (gimple *first_stmt) 6056 { 6057 struct data_reference *first_dr, *next_dr; 6058 gimple *next_stmt; 6059 6060 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 6061 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt)); 6062 while (next_stmt) 6063 { 6064 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt)); 6065 if (get_alias_set (DR_REF (first_dr)) 6066 != get_alias_set (DR_REF (next_dr))) 6067 { 6068 if (dump_enabled_p ()) 6069 dump_printf_loc (MSG_NOTE, vect_location, 6070 "conflicting alias set types.\n"); 6071 return ptr_type_node; 6072 } 6073 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 6074 } 6075 return reference_alias_ptr_type (DR_REF (first_dr)); 6076 } 6077 6078 6079 /* Function vectorizable_store. 6080 6081 Check if STMT defines a non scalar data-ref (array/pointer/structure) that 6082 can be vectorized. 6083 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 6084 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 6085 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 6086 6087 static bool 6088 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, 6089 slp_tree slp_node) 6090 { 6091 tree data_ref; 6092 tree op; 6093 tree vec_oprnd = NULL_TREE; 6094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 6095 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; 6096 tree elem_type; 6097 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 6098 struct loop *loop = NULL; 6099 machine_mode vec_mode; 6100 tree dummy; 6101 enum dr_alignment_support alignment_support_scheme; 6102 gimple *def_stmt; 6103 enum vect_def_type rhs_dt = vect_unknown_def_type; 6104 enum vect_def_type mask_dt = vect_unknown_def_type; 6105 stmt_vec_info prev_stmt_info = NULL; 6106 tree dataref_ptr = NULL_TREE; 6107 tree dataref_offset = NULL_TREE; 6108 gimple *ptr_incr = NULL; 6109 int ncopies; 6110 int j; 6111 gimple *next_stmt, *first_stmt; 6112 bool grouped_store; 6113 unsigned int group_size, i; 6114 vec<tree> oprnds = vNULL; 6115 vec<tree> result_chain = vNULL; 6116 bool inv_p; 6117 tree offset = NULL_TREE; 6118 vec<tree> vec_oprnds = vNULL; 6119 bool slp = (slp_node != NULL); 6120 unsigned int vec_num; 6121 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 6122 vec_info *vinfo = stmt_info->vinfo; 6123 tree aggr_type; 6124 gather_scatter_info gs_info; 6125 gimple *new_stmt; 6126 poly_uint64 vf; 6127 vec_load_store_type vls_type; 6128 tree ref_type; 6129 6130 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 6131 return false; 6132 6133 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 6134 && ! vec_stmt) 6135 return false; 6136 6137 /* Is vectorizable store? */ 6138 6139 tree mask = NULL_TREE, mask_vectype = NULL_TREE; 6140 if (is_gimple_assign (stmt)) 6141 { 6142 tree scalar_dest = gimple_assign_lhs (stmt); 6143 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR 6144 && is_pattern_stmt_p (stmt_info)) 6145 scalar_dest = TREE_OPERAND (scalar_dest, 0); 6146 if (TREE_CODE (scalar_dest) != ARRAY_REF 6147 && TREE_CODE (scalar_dest) != BIT_FIELD_REF 6148 && TREE_CODE (scalar_dest) != INDIRECT_REF 6149 && TREE_CODE (scalar_dest) != COMPONENT_REF 6150 && TREE_CODE (scalar_dest) != IMAGPART_EXPR 6151 && TREE_CODE (scalar_dest) != REALPART_EXPR 6152 && TREE_CODE (scalar_dest) != MEM_REF) 6153 return false; 6154 } 6155 else 6156 { 6157 gcall *call = dyn_cast <gcall *> (stmt); 6158 if (!call || !gimple_call_internal_p (call)) 6159 return false; 6160 6161 internal_fn ifn = gimple_call_internal_fn (call); 6162 if (!internal_store_fn_p (ifn)) 6163 return false; 6164 6165 if (slp_node != NULL) 6166 { 6167 if (dump_enabled_p ()) 6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6169 "SLP of masked stores not supported.\n"); 6170 return false; 6171 } 6172 6173 int mask_index = internal_fn_mask_index (ifn); 6174 if (mask_index >= 0) 6175 { 6176 mask = gimple_call_arg (call, mask_index); 6177 if (!vect_check_load_store_mask (stmt, mask, &mask_dt, 6178 &mask_vectype)) 6179 return false; 6180 } 6181 } 6182 6183 op = vect_get_store_rhs (stmt); 6184 6185 /* Cannot have hybrid store SLP -- that would mean storing to the 6186 same location twice. */ 6187 gcc_assert (slp == PURE_SLP_STMT (stmt_info)); 6188 6189 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE; 6190 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 6191 6192 if (loop_vinfo) 6193 { 6194 loop = LOOP_VINFO_LOOP (loop_vinfo); 6195 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 6196 } 6197 else 6198 vf = 1; 6199 6200 /* Multiple types in SLP are handled by creating the appropriate number of 6201 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 6202 case of SLP. */ 6203 if (slp) 6204 ncopies = 1; 6205 else 6206 ncopies = vect_get_num_copies (loop_vinfo, vectype); 6207 6208 gcc_assert (ncopies >= 1); 6209 6210 /* FORNOW. This restriction should be relaxed. */ 6211 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1) 6212 { 6213 if (dump_enabled_p ()) 6214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6215 "multiple types in nested loop.\n"); 6216 return false; 6217 } 6218 6219 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type)) 6220 return false; 6221 6222 elem_type = TREE_TYPE (vectype); 6223 vec_mode = TYPE_MODE (vectype); 6224 6225 if (!STMT_VINFO_DATA_REF (stmt_info)) 6226 return false; 6227 6228 vect_memory_access_type memory_access_type; 6229 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies, 6230 &memory_access_type, &gs_info)) 6231 return false; 6232 6233 if (mask) 6234 { 6235 if (memory_access_type == VMAT_CONTIGUOUS) 6236 { 6237 if (!VECTOR_MODE_P (vec_mode) 6238 || !can_vec_mask_load_store_p (vec_mode, 6239 TYPE_MODE (mask_vectype), false)) 6240 return false; 6241 } 6242 else if (memory_access_type != VMAT_LOAD_STORE_LANES 6243 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl)) 6244 { 6245 if (dump_enabled_p ()) 6246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6247 "unsupported access type for masked store.\n"); 6248 return false; 6249 } 6250 } 6251 else 6252 { 6253 /* FORNOW. In some cases can vectorize even if data-type not supported 6254 (e.g. - array initialization with 0). */ 6255 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) 6256 return false; 6257 } 6258 6259 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info) 6260 && memory_access_type != VMAT_GATHER_SCATTER 6261 && (slp || memory_access_type != VMAT_CONTIGUOUS)); 6262 if (grouped_store) 6263 { 6264 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 6265 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 6266 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 6267 } 6268 else 6269 { 6270 first_stmt = stmt; 6271 first_dr = dr; 6272 group_size = vec_num = 1; 6273 } 6274 6275 if (!vec_stmt) /* transformation not required. */ 6276 { 6277 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; 6278 6279 if (loop_vinfo 6280 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) 6281 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size, 6282 memory_access_type, &gs_info); 6283 6284 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 6285 /* The SLP costs are calculated during SLP analysis. */ 6286 if (!slp_node) 6287 vect_model_store_cost (stmt_info, ncopies, memory_access_type, 6288 vls_type, NULL, NULL, NULL); 6289 return true; 6290 } 6291 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); 6292 6293 /* Transform. */ 6294 6295 ensure_base_align (dr); 6296 6297 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) 6298 { 6299 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src; 6300 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl)); 6301 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; 6302 tree ptr, mask, var, scale, perm_mask = NULL_TREE; 6303 edge pe = loop_preheader_edge (loop); 6304 gimple_seq seq; 6305 basic_block new_bb; 6306 enum { NARROW, NONE, WIDEN } modifier; 6307 poly_uint64 scatter_off_nunits 6308 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); 6309 6310 if (known_eq (nunits, scatter_off_nunits)) 6311 modifier = NONE; 6312 else if (known_eq (nunits * 2, scatter_off_nunits)) 6313 { 6314 modifier = WIDEN; 6315 6316 /* Currently gathers and scatters are only supported for 6317 fixed-length vectors. */ 6318 unsigned int count = scatter_off_nunits.to_constant (); 6319 vec_perm_builder sel (count, count, 1); 6320 for (i = 0; i < (unsigned int) count; ++i) 6321 sel.quick_push (i | (count / 2)); 6322 6323 vec_perm_indices indices (sel, 1, count); 6324 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, 6325 indices); 6326 gcc_assert (perm_mask != NULL_TREE); 6327 } 6328 else if (known_eq (nunits, scatter_off_nunits * 2)) 6329 { 6330 modifier = NARROW; 6331 6332 /* Currently gathers and scatters are only supported for 6333 fixed-length vectors. */ 6334 unsigned int count = nunits.to_constant (); 6335 vec_perm_builder sel (count, count, 1); 6336 for (i = 0; i < (unsigned int) count; ++i) 6337 sel.quick_push (i | (count / 2)); 6338 6339 vec_perm_indices indices (sel, 2, count); 6340 perm_mask = vect_gen_perm_mask_checked (vectype, indices); 6341 gcc_assert (perm_mask != NULL_TREE); 6342 ncopies *= 2; 6343 } 6344 else 6345 gcc_unreachable (); 6346 6347 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl)); 6348 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 6349 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 6350 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 6351 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); 6352 scaletype = TREE_VALUE (arglist); 6353 6354 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE 6355 && TREE_CODE (rettype) == VOID_TYPE); 6356 6357 ptr = fold_convert (ptrtype, gs_info.base); 6358 if (!is_gimple_min_invariant (ptr)) 6359 { 6360 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); 6361 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); 6362 gcc_assert (!new_bb); 6363 } 6364 6365 /* Currently we support only unconditional scatter stores, 6366 so mask should be all ones. */ 6367 mask = build_int_cst (masktype, -1); 6368 mask = vect_init_vector (stmt, mask, masktype, NULL); 6369 6370 scale = build_int_cst (scaletype, gs_info.scale); 6371 6372 prev_stmt_info = NULL; 6373 for (j = 0; j < ncopies; ++j) 6374 { 6375 if (j == 0) 6376 { 6377 src = vec_oprnd1 6378 = vect_get_vec_def_for_operand (op, stmt); 6379 op = vec_oprnd0 6380 = vect_get_vec_def_for_operand (gs_info.offset, stmt); 6381 } 6382 else if (modifier != NONE && (j & 1)) 6383 { 6384 if (modifier == WIDEN) 6385 { 6386 src = vec_oprnd1 6387 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1); 6388 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask, 6389 stmt, gsi); 6390 } 6391 else if (modifier == NARROW) 6392 { 6393 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask, 6394 stmt, gsi); 6395 op = vec_oprnd0 6396 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, 6397 vec_oprnd0); 6398 } 6399 else 6400 gcc_unreachable (); 6401 } 6402 else 6403 { 6404 src = vec_oprnd1 6405 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1); 6406 op = vec_oprnd0 6407 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, 6408 vec_oprnd0); 6409 } 6410 6411 if (!useless_type_conversion_p (srctype, TREE_TYPE (src))) 6412 { 6413 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)), 6414 TYPE_VECTOR_SUBPARTS (srctype))); 6415 var = vect_get_new_ssa_name (srctype, vect_simple_var); 6416 src = build1 (VIEW_CONVERT_EXPR, srctype, src); 6417 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src); 6418 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6419 src = var; 6420 } 6421 6422 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) 6423 { 6424 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)), 6425 TYPE_VECTOR_SUBPARTS (idxtype))); 6426 var = vect_get_new_ssa_name (idxtype, vect_simple_var); 6427 op = build1 (VIEW_CONVERT_EXPR, idxtype, op); 6428 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op); 6429 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6430 op = var; 6431 } 6432 6433 new_stmt 6434 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale); 6435 6436 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6437 6438 if (prev_stmt_info == NULL) 6439 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 6440 else 6441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 6442 prev_stmt_info = vinfo_for_stmt (new_stmt); 6443 } 6444 return true; 6445 } 6446 6447 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 6448 { 6449 gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info); 6450 GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++; 6451 } 6452 6453 if (grouped_store) 6454 { 6455 /* FORNOW */ 6456 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt)); 6457 6458 /* We vectorize all the stmts of the interleaving group when we 6459 reach the last stmt in the group. */ 6460 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt)) 6461 < GROUP_SIZE (vinfo_for_stmt (first_stmt)) 6462 && !slp) 6463 { 6464 *vec_stmt = NULL; 6465 return true; 6466 } 6467 6468 if (slp) 6469 { 6470 grouped_store = false; 6471 /* VEC_NUM is the number of vect stmts to be created for this 6472 group. */ 6473 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 6474 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 6475 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt); 6476 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 6477 op = vect_get_store_rhs (first_stmt); 6478 } 6479 else 6480 /* VEC_NUM is the number of vect stmts to be created for this 6481 group. */ 6482 vec_num = group_size; 6483 6484 ref_type = get_group_alias_ptr_type (first_stmt); 6485 } 6486 else 6487 ref_type = reference_alias_ptr_type (DR_REF (first_dr)); 6488 6489 if (dump_enabled_p ()) 6490 dump_printf_loc (MSG_NOTE, vect_location, 6491 "transform store. ncopies = %d\n", ncopies); 6492 6493 if (memory_access_type == VMAT_ELEMENTWISE 6494 || memory_access_type == VMAT_STRIDED_SLP) 6495 { 6496 gimple_stmt_iterator incr_gsi; 6497 bool insert_after; 6498 gimple *incr; 6499 tree offvar; 6500 tree ivstep; 6501 tree running_off; 6502 tree stride_base, stride_step, alias_off; 6503 tree vec_oprnd; 6504 unsigned int g; 6505 /* Checked by get_load_store_type. */ 6506 unsigned int const_nunits = nunits.to_constant (); 6507 6508 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); 6509 gcc_assert (!nested_in_vect_loop_p (loop, stmt)); 6510 6511 stride_base 6512 = fold_build_pointer_plus 6513 (DR_BASE_ADDRESS (first_dr), 6514 size_binop (PLUS_EXPR, 6515 convert_to_ptrofftype (DR_OFFSET (first_dr)), 6516 convert_to_ptrofftype (DR_INIT (first_dr)))); 6517 stride_step = fold_convert (sizetype, DR_STEP (first_dr)); 6518 6519 /* For a store with loop-invariant (but other than power-of-2) 6520 stride (i.e. not a grouped access) like so: 6521 6522 for (i = 0; i < n; i += stride) 6523 array[i] = ...; 6524 6525 we generate a new induction variable and new stores from 6526 the components of the (vectorized) rhs: 6527 6528 for (j = 0; ; j += VF*stride) 6529 vectemp = ...; 6530 tmp1 = vectemp[0]; 6531 array[j] = tmp1; 6532 tmp2 = vectemp[1]; 6533 array[j + stride] = tmp2; 6534 ... 6535 */ 6536 6537 unsigned nstores = const_nunits; 6538 unsigned lnel = 1; 6539 tree ltype = elem_type; 6540 tree lvectype = vectype; 6541 if (slp) 6542 { 6543 if (group_size < const_nunits 6544 && const_nunits % group_size == 0) 6545 { 6546 nstores = const_nunits / group_size; 6547 lnel = group_size; 6548 ltype = build_vector_type (elem_type, group_size); 6549 lvectype = vectype; 6550 6551 /* First check if vec_extract optab doesn't support extraction 6552 of vector elts directly. */ 6553 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type); 6554 machine_mode vmode; 6555 if (!mode_for_vector (elmode, group_size).exists (&vmode) 6556 || !VECTOR_MODE_P (vmode) 6557 || !targetm.vector_mode_supported_p (vmode) 6558 || (convert_optab_handler (vec_extract_optab, 6559 TYPE_MODE (vectype), vmode) 6560 == CODE_FOR_nothing)) 6561 { 6562 /* Try to avoid emitting an extract of vector elements 6563 by performing the extracts using an integer type of the 6564 same size, extracting from a vector of those and then 6565 re-interpreting it as the original vector type if 6566 supported. */ 6567 unsigned lsize 6568 = group_size * GET_MODE_BITSIZE (elmode); 6569 elmode = int_mode_for_size (lsize, 0).require (); 6570 unsigned int lnunits = const_nunits / group_size; 6571 /* If we can't construct such a vector fall back to 6572 element extracts from the original vector type and 6573 element size stores. */ 6574 if (mode_for_vector (elmode, lnunits).exists (&vmode) 6575 && VECTOR_MODE_P (vmode) 6576 && targetm.vector_mode_supported_p (vmode) 6577 && (convert_optab_handler (vec_extract_optab, 6578 vmode, elmode) 6579 != CODE_FOR_nothing)) 6580 { 6581 nstores = lnunits; 6582 lnel = group_size; 6583 ltype = build_nonstandard_integer_type (lsize, 1); 6584 lvectype = build_vector_type (ltype, nstores); 6585 } 6586 /* Else fall back to vector extraction anyway. 6587 Fewer stores are more important than avoiding spilling 6588 of the vector we extract from. Compared to the 6589 construction case in vectorizable_load no store-forwarding 6590 issue exists here for reasonable archs. */ 6591 } 6592 } 6593 else if (group_size >= const_nunits 6594 && group_size % const_nunits == 0) 6595 { 6596 nstores = 1; 6597 lnel = const_nunits; 6598 ltype = vectype; 6599 lvectype = vectype; 6600 } 6601 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type)); 6602 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 6603 } 6604 6605 ivstep = stride_step; 6606 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, 6607 build_int_cst (TREE_TYPE (ivstep), vf)); 6608 6609 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 6610 6611 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); 6612 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); 6613 create_iv (stride_base, ivstep, NULL, 6614 loop, &incr_gsi, insert_after, 6615 &offvar, NULL); 6616 incr = gsi_stmt (incr_gsi); 6617 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo)); 6618 6619 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); 6620 6621 prev_stmt_info = NULL; 6622 alias_off = build_int_cst (ref_type, 0); 6623 next_stmt = first_stmt; 6624 for (g = 0; g < group_size; g++) 6625 { 6626 running_off = offvar; 6627 if (g) 6628 { 6629 tree size = TYPE_SIZE_UNIT (ltype); 6630 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g), 6631 size); 6632 tree newoff = copy_ssa_name (running_off, NULL); 6633 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 6634 running_off, pos); 6635 vect_finish_stmt_generation (stmt, incr, gsi); 6636 running_off = newoff; 6637 } 6638 unsigned int group_el = 0; 6639 unsigned HOST_WIDE_INT 6640 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 6641 for (j = 0; j < ncopies; j++) 6642 { 6643 /* We've set op and dt above, from vect_get_store_rhs, 6644 and first_stmt == stmt. */ 6645 if (j == 0) 6646 { 6647 if (slp) 6648 { 6649 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL, 6650 slp_node); 6651 vec_oprnd = vec_oprnds[0]; 6652 } 6653 else 6654 { 6655 op = vect_get_store_rhs (next_stmt); 6656 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt); 6657 } 6658 } 6659 else 6660 { 6661 if (slp) 6662 vec_oprnd = vec_oprnds[j]; 6663 else 6664 { 6665 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt); 6666 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, 6667 vec_oprnd); 6668 } 6669 } 6670 /* Pun the vector to extract from if necessary. */ 6671 if (lvectype != vectype) 6672 { 6673 tree tem = make_ssa_name (lvectype); 6674 gimple *pun 6675 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, 6676 lvectype, vec_oprnd)); 6677 vect_finish_stmt_generation (stmt, pun, gsi); 6678 vec_oprnd = tem; 6679 } 6680 for (i = 0; i < nstores; i++) 6681 { 6682 tree newref, newoff; 6683 gimple *incr, *assign; 6684 tree size = TYPE_SIZE (ltype); 6685 /* Extract the i'th component. */ 6686 tree pos = fold_build2 (MULT_EXPR, bitsizetype, 6687 bitsize_int (i), size); 6688 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd, 6689 size, pos); 6690 6691 elem = force_gimple_operand_gsi (gsi, elem, true, 6692 NULL_TREE, true, 6693 GSI_SAME_STMT); 6694 6695 tree this_off = build_int_cst (TREE_TYPE (alias_off), 6696 group_el * elsz); 6697 newref = build2 (MEM_REF, ltype, 6698 running_off, this_off); 6699 vect_copy_ref_info (newref, DR_REF (first_dr)); 6700 6701 /* And store it to *running_off. */ 6702 assign = gimple_build_assign (newref, elem); 6703 vect_finish_stmt_generation (stmt, assign, gsi); 6704 6705 group_el += lnel; 6706 if (! slp 6707 || group_el == group_size) 6708 { 6709 newoff = copy_ssa_name (running_off, NULL); 6710 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 6711 running_off, stride_step); 6712 vect_finish_stmt_generation (stmt, incr, gsi); 6713 6714 running_off = newoff; 6715 group_el = 0; 6716 } 6717 if (g == group_size - 1 6718 && !slp) 6719 { 6720 if (j == 0 && i == 0) 6721 STMT_VINFO_VEC_STMT (stmt_info) 6722 = *vec_stmt = assign; 6723 else 6724 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign; 6725 prev_stmt_info = vinfo_for_stmt (assign); 6726 } 6727 } 6728 } 6729 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 6730 if (slp) 6731 break; 6732 } 6733 6734 vec_oprnds.release (); 6735 return true; 6736 } 6737 6738 auto_vec<tree> dr_chain (group_size); 6739 oprnds.create (group_size); 6740 6741 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); 6742 gcc_assert (alignment_support_scheme); 6743 vec_loop_masks *loop_masks 6744 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 6745 ? &LOOP_VINFO_MASKS (loop_vinfo) 6746 : NULL); 6747 /* Targets with store-lane instructions must not require explicit 6748 realignment. vect_supportable_dr_alignment always returns either 6749 dr_aligned or dr_unaligned_supported for masked operations. */ 6750 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES 6751 && !mask 6752 && !loop_masks) 6753 || alignment_support_scheme == dr_aligned 6754 || alignment_support_scheme == dr_unaligned_supported); 6755 6756 if (memory_access_type == VMAT_CONTIGUOUS_DOWN 6757 || memory_access_type == VMAT_CONTIGUOUS_REVERSE) 6758 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); 6759 6760 tree bump; 6761 tree vec_offset = NULL_TREE; 6762 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 6763 { 6764 aggr_type = NULL_TREE; 6765 bump = NULL_TREE; 6766 } 6767 else if (memory_access_type == VMAT_GATHER_SCATTER) 6768 { 6769 aggr_type = elem_type; 6770 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info, 6771 &bump, &vec_offset); 6772 } 6773 else 6774 { 6775 if (memory_access_type == VMAT_LOAD_STORE_LANES) 6776 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 6777 else 6778 aggr_type = vectype; 6779 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type); 6780 } 6781 6782 if (mask) 6783 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true; 6784 6785 /* In case the vectorization factor (VF) is bigger than the number 6786 of elements that we can fit in a vectype (nunits), we have to generate 6787 more than one vector stmt - i.e - we need to "unroll" the 6788 vector stmt by a factor VF/nunits. For more details see documentation in 6789 vect_get_vec_def_for_copy_stmt. */ 6790 6791 /* In case of interleaving (non-unit grouped access): 6792 6793 S1: &base + 2 = x2 6794 S2: &base = x0 6795 S3: &base + 1 = x1 6796 S4: &base + 3 = x3 6797 6798 We create vectorized stores starting from base address (the access of the 6799 first stmt in the chain (S2 in the above example), when the last store stmt 6800 of the chain (S4) is reached: 6801 6802 VS1: &base = vx2 6803 VS2: &base + vec_size*1 = vx0 6804 VS3: &base + vec_size*2 = vx1 6805 VS4: &base + vec_size*3 = vx3 6806 6807 Then permutation statements are generated: 6808 6809 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} > 6810 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} > 6811 ... 6812 6813 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 6814 (the order of the data-refs in the output of vect_permute_store_chain 6815 corresponds to the order of scalar stmts in the interleaving chain - see 6816 the documentation of vect_permute_store_chain()). 6817 6818 In case of both multiple types and interleaving, above vector stores and 6819 permutation stmts are created for every copy. The result vector stmts are 6820 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding 6821 STMT_VINFO_RELATED_STMT for the next copies. 6822 */ 6823 6824 prev_stmt_info = NULL; 6825 tree vec_mask = NULL_TREE; 6826 for (j = 0; j < ncopies; j++) 6827 { 6828 6829 if (j == 0) 6830 { 6831 if (slp) 6832 { 6833 /* Get vectorized arguments for SLP_NODE. */ 6834 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, 6835 NULL, slp_node); 6836 6837 vec_oprnd = vec_oprnds[0]; 6838 } 6839 else 6840 { 6841 /* For interleaved stores we collect vectorized defs for all the 6842 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then 6843 used as an input to vect_permute_store_chain(), and OPRNDS as 6844 an input to vect_get_vec_def_for_stmt_copy() for the next copy. 6845 6846 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and 6847 OPRNDS are of size 1. */ 6848 next_stmt = first_stmt; 6849 for (i = 0; i < group_size; i++) 6850 { 6851 /* Since gaps are not supported for interleaved stores, 6852 GROUP_SIZE is the exact number of stmts in the chain. 6853 Therefore, NEXT_STMT can't be NULL_TREE. In case that 6854 there is no interleaving, GROUP_SIZE is 1, and only one 6855 iteration of the loop will be executed. */ 6856 op = vect_get_store_rhs (next_stmt); 6857 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt); 6858 dr_chain.quick_push (vec_oprnd); 6859 oprnds.quick_push (vec_oprnd); 6860 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 6861 } 6862 if (mask) 6863 vec_mask = vect_get_vec_def_for_operand (mask, stmt, 6864 mask_vectype); 6865 } 6866 6867 /* We should have catched mismatched types earlier. */ 6868 gcc_assert (useless_type_conversion_p (vectype, 6869 TREE_TYPE (vec_oprnd))); 6870 bool simd_lane_access_p 6871 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info); 6872 if (simd_lane_access_p 6873 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR 6874 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0)) 6875 && integer_zerop (DR_OFFSET (first_dr)) 6876 && integer_zerop (DR_INIT (first_dr)) 6877 && alias_sets_conflict_p (get_alias_set (aggr_type), 6878 get_alias_set (TREE_TYPE (ref_type)))) 6879 { 6880 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr)); 6881 dataref_offset = build_int_cst (ref_type, 0); 6882 inv_p = false; 6883 } 6884 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 6885 { 6886 vect_get_gather_scatter_ops (loop, stmt, &gs_info, 6887 &dataref_ptr, &vec_offset); 6888 inv_p = false; 6889 } 6890 else 6891 dataref_ptr 6892 = vect_create_data_ref_ptr (first_stmt, aggr_type, 6893 simd_lane_access_p ? loop : NULL, 6894 offset, &dummy, gsi, &ptr_incr, 6895 simd_lane_access_p, &inv_p, 6896 NULL_TREE, bump); 6897 gcc_assert (bb_vinfo || !inv_p); 6898 } 6899 else 6900 { 6901 /* For interleaved stores we created vectorized defs for all the 6902 defs stored in OPRNDS in the previous iteration (previous copy). 6903 DR_CHAIN is then used as an input to vect_permute_store_chain(), 6904 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the 6905 next copy. 6906 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and 6907 OPRNDS are of size 1. */ 6908 for (i = 0; i < group_size; i++) 6909 { 6910 op = oprnds[i]; 6911 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt); 6912 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op); 6913 dr_chain[i] = vec_oprnd; 6914 oprnds[i] = vec_oprnd; 6915 } 6916 if (mask) 6917 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask); 6918 if (dataref_offset) 6919 dataref_offset 6920 = int_const_binop (PLUS_EXPR, dataref_offset, bump); 6921 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 6922 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, 6923 vec_offset); 6924 else 6925 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, 6926 bump); 6927 } 6928 6929 if (memory_access_type == VMAT_LOAD_STORE_LANES) 6930 { 6931 tree vec_array; 6932 6933 /* Combine all the vectors into an array. */ 6934 vec_array = create_vector_array (vectype, vec_num); 6935 for (i = 0; i < vec_num; i++) 6936 { 6937 vec_oprnd = dr_chain[i]; 6938 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i); 6939 } 6940 6941 tree final_mask = NULL; 6942 if (loop_masks) 6943 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies, 6944 vectype, j); 6945 if (vec_mask) 6946 final_mask = prepare_load_store_mask (mask_vectype, final_mask, 6947 vec_mask, gsi); 6948 6949 gcall *call; 6950 if (final_mask) 6951 { 6952 /* Emit: 6953 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, 6954 VEC_ARRAY). */ 6955 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); 6956 tree alias_ptr = build_int_cst (ref_type, align); 6957 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, 6958 dataref_ptr, alias_ptr, 6959 final_mask, vec_array); 6960 } 6961 else 6962 { 6963 /* Emit: 6964 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ 6965 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type); 6966 call = gimple_build_call_internal (IFN_STORE_LANES, 1, 6967 vec_array); 6968 gimple_call_set_lhs (call, data_ref); 6969 } 6970 gimple_call_set_nothrow (call, true); 6971 new_stmt = call; 6972 vect_finish_stmt_generation (stmt, new_stmt, gsi); 6973 } 6974 else 6975 { 6976 new_stmt = NULL; 6977 if (grouped_store) 6978 { 6979 if (j == 0) 6980 result_chain.create (group_size); 6981 /* Permute. */ 6982 vect_permute_store_chain (dr_chain, group_size, stmt, gsi, 6983 &result_chain); 6984 } 6985 6986 next_stmt = first_stmt; 6987 for (i = 0; i < vec_num; i++) 6988 { 6989 unsigned align, misalign; 6990 6991 tree final_mask = NULL_TREE; 6992 if (loop_masks) 6993 final_mask = vect_get_loop_mask (gsi, loop_masks, 6994 vec_num * ncopies, 6995 vectype, vec_num * j + i); 6996 if (vec_mask) 6997 final_mask = prepare_load_store_mask (mask_vectype, final_mask, 6998 vec_mask, gsi); 6999 7000 if (memory_access_type == VMAT_GATHER_SCATTER) 7001 { 7002 tree scale = size_int (gs_info.scale); 7003 gcall *call; 7004 if (loop_masks) 7005 call = gimple_build_call_internal 7006 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset, 7007 scale, vec_oprnd, final_mask); 7008 else 7009 call = gimple_build_call_internal 7010 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset, 7011 scale, vec_oprnd); 7012 gimple_call_set_nothrow (call, true); 7013 new_stmt = call; 7014 vect_finish_stmt_generation (stmt, new_stmt, gsi); 7015 break; 7016 } 7017 7018 if (i > 0) 7019 /* Bump the vector pointer. */ 7020 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 7021 stmt, bump); 7022 7023 if (slp) 7024 vec_oprnd = vec_oprnds[i]; 7025 else if (grouped_store) 7026 /* For grouped stores vectorized defs are interleaved in 7027 vect_permute_store_chain(). */ 7028 vec_oprnd = result_chain[i]; 7029 7030 align = DR_TARGET_ALIGNMENT (first_dr); 7031 if (aligned_access_p (first_dr)) 7032 misalign = 0; 7033 else if (DR_MISALIGNMENT (first_dr) == -1) 7034 { 7035 align = dr_alignment (vect_dr_behavior (first_dr)); 7036 misalign = 0; 7037 } 7038 else 7039 misalign = DR_MISALIGNMENT (first_dr); 7040 if (dataref_offset == NULL_TREE 7041 && TREE_CODE (dataref_ptr) == SSA_NAME) 7042 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, 7043 misalign); 7044 7045 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 7046 { 7047 tree perm_mask = perm_mask_for_reverse (vectype); 7048 tree perm_dest 7049 = vect_create_destination_var (vect_get_store_rhs (stmt), 7050 vectype); 7051 tree new_temp = make_ssa_name (perm_dest); 7052 7053 /* Generate the permute statement. */ 7054 gimple *perm_stmt 7055 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd, 7056 vec_oprnd, perm_mask); 7057 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 7058 7059 perm_stmt = SSA_NAME_DEF_STMT (new_temp); 7060 vec_oprnd = new_temp; 7061 } 7062 7063 /* Arguments are ready. Create the new vector stmt. */ 7064 if (final_mask) 7065 { 7066 align = least_bit_hwi (misalign | align); 7067 tree ptr = build_int_cst (ref_type, align); 7068 gcall *call 7069 = gimple_build_call_internal (IFN_MASK_STORE, 4, 7070 dataref_ptr, ptr, 7071 final_mask, vec_oprnd); 7072 gimple_call_set_nothrow (call, true); 7073 new_stmt = call; 7074 } 7075 else 7076 { 7077 data_ref = fold_build2 (MEM_REF, vectype, 7078 dataref_ptr, 7079 dataref_offset 7080 ? dataref_offset 7081 : build_int_cst (ref_type, 0)); 7082 if (aligned_access_p (first_dr)) 7083 ; 7084 else if (DR_MISALIGNMENT (first_dr) == -1) 7085 TREE_TYPE (data_ref) 7086 = build_aligned_type (TREE_TYPE (data_ref), 7087 align * BITS_PER_UNIT); 7088 else 7089 TREE_TYPE (data_ref) 7090 = build_aligned_type (TREE_TYPE (data_ref), 7091 TYPE_ALIGN (elem_type)); 7092 vect_copy_ref_info (data_ref, DR_REF (first_dr)); 7093 new_stmt = gimple_build_assign (data_ref, vec_oprnd); 7094 } 7095 vect_finish_stmt_generation (stmt, new_stmt, gsi); 7096 7097 if (slp) 7098 continue; 7099 7100 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 7101 if (!next_stmt) 7102 break; 7103 } 7104 } 7105 if (!slp) 7106 { 7107 if (j == 0) 7108 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 7109 else 7110 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 7111 prev_stmt_info = vinfo_for_stmt (new_stmt); 7112 } 7113 } 7114 7115 oprnds.release (); 7116 result_chain.release (); 7117 vec_oprnds.release (); 7118 7119 return true; 7120 } 7121 7122 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent 7123 VECTOR_CST mask. No checks are made that the target platform supports the 7124 mask, so callers may wish to test can_vec_perm_const_p separately, or use 7125 vect_gen_perm_mask_checked. */ 7126 7127 tree 7128 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel) 7129 { 7130 tree mask_type; 7131 7132 poly_uint64 nunits = sel.length (); 7133 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype))); 7134 7135 mask_type = build_vector_type (ssizetype, nunits); 7136 return vec_perm_indices_to_tree (mask_type, sel); 7137 } 7138 7139 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p, 7140 i.e. that the target supports the pattern _for arbitrary input vectors_. */ 7141 7142 tree 7143 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel) 7144 { 7145 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel)); 7146 return vect_gen_perm_mask_any (vectype, sel); 7147 } 7148 7149 /* Given a vector variable X and Y, that was generated for the scalar 7150 STMT, generate instructions to permute the vector elements of X and Y 7151 using permutation mask MASK_VEC, insert them at *GSI and return the 7152 permuted vector variable. */ 7153 7154 static tree 7155 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt, 7156 gimple_stmt_iterator *gsi) 7157 { 7158 tree vectype = TREE_TYPE (x); 7159 tree perm_dest, data_ref; 7160 gimple *perm_stmt; 7161 7162 tree scalar_dest = gimple_get_lhs (stmt); 7163 if (TREE_CODE (scalar_dest) == SSA_NAME) 7164 perm_dest = vect_create_destination_var (scalar_dest, vectype); 7165 else 7166 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL); 7167 data_ref = make_ssa_name (perm_dest); 7168 7169 /* Generate the permute statement. */ 7170 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec); 7171 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 7172 7173 return data_ref; 7174 } 7175 7176 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP, 7177 inserting them on the loops preheader edge. Returns true if we 7178 were successful in doing so (and thus STMT can be moved then), 7179 otherwise returns false. */ 7180 7181 static bool 7182 hoist_defs_of_uses (gimple *stmt, struct loop *loop) 7183 { 7184 ssa_op_iter i; 7185 tree op; 7186 bool any = false; 7187 7188 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE) 7189 { 7190 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 7191 if (!gimple_nop_p (def_stmt) 7192 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 7193 { 7194 /* Make sure we don't need to recurse. While we could do 7195 so in simple cases when there are more complex use webs 7196 we don't have an easy way to preserve stmt order to fulfil 7197 dependencies within them. */ 7198 tree op2; 7199 ssa_op_iter i2; 7200 if (gimple_code (def_stmt) == GIMPLE_PHI) 7201 return false; 7202 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE) 7203 { 7204 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2); 7205 if (!gimple_nop_p (def_stmt2) 7206 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2))) 7207 return false; 7208 } 7209 any = true; 7210 } 7211 } 7212 7213 if (!any) 7214 return true; 7215 7216 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE) 7217 { 7218 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 7219 if (!gimple_nop_p (def_stmt) 7220 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) 7221 { 7222 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 7223 gsi_remove (&gsi, false); 7224 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt); 7225 } 7226 } 7227 7228 return true; 7229 } 7230 7231 /* vectorizable_load. 7232 7233 Check if STMT reads a non scalar data-ref (array/pointer/structure) that 7234 can be vectorized. 7235 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 7236 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 7237 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 7238 7239 static bool 7240 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, 7241 slp_tree slp_node, slp_instance slp_node_instance) 7242 { 7243 tree scalar_dest; 7244 tree vec_dest = NULL; 7245 tree data_ref = NULL; 7246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 7247 stmt_vec_info prev_stmt_info; 7248 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 7249 struct loop *loop = NULL; 7250 struct loop *containing_loop = (gimple_bb (stmt))->loop_father; 7251 bool nested_in_vect_loop = false; 7252 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; 7253 tree elem_type; 7254 tree new_temp; 7255 machine_mode mode; 7256 gimple *new_stmt = NULL; 7257 tree dummy; 7258 enum dr_alignment_support alignment_support_scheme; 7259 tree dataref_ptr = NULL_TREE; 7260 tree dataref_offset = NULL_TREE; 7261 gimple *ptr_incr = NULL; 7262 int ncopies; 7263 int i, j; 7264 unsigned int group_size; 7265 poly_uint64 group_gap_adj; 7266 tree msq = NULL_TREE, lsq; 7267 tree offset = NULL_TREE; 7268 tree byte_offset = NULL_TREE; 7269 tree realignment_token = NULL_TREE; 7270 gphi *phi = NULL; 7271 vec<tree> dr_chain = vNULL; 7272 bool grouped_load = false; 7273 gimple *first_stmt; 7274 gimple *first_stmt_for_drptr = NULL; 7275 bool inv_p; 7276 bool compute_in_loop = false; 7277 struct loop *at_loop; 7278 int vec_num; 7279 bool slp = (slp_node != NULL); 7280 bool slp_perm = false; 7281 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 7282 poly_uint64 vf; 7283 tree aggr_type; 7284 gather_scatter_info gs_info; 7285 vec_info *vinfo = stmt_info->vinfo; 7286 tree ref_type; 7287 enum vect_def_type mask_dt = vect_unknown_def_type; 7288 7289 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 7290 return false; 7291 7292 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 7293 && ! vec_stmt) 7294 return false; 7295 7296 tree mask = NULL_TREE, mask_vectype = NULL_TREE; 7297 if (is_gimple_assign (stmt)) 7298 { 7299 scalar_dest = gimple_assign_lhs (stmt); 7300 if (TREE_CODE (scalar_dest) != SSA_NAME) 7301 return false; 7302 7303 tree_code code = gimple_assign_rhs_code (stmt); 7304 if (code != ARRAY_REF 7305 && code != BIT_FIELD_REF 7306 && code != INDIRECT_REF 7307 && code != COMPONENT_REF 7308 && code != IMAGPART_EXPR 7309 && code != REALPART_EXPR 7310 && code != MEM_REF 7311 && TREE_CODE_CLASS (code) != tcc_declaration) 7312 return false; 7313 } 7314 else 7315 { 7316 gcall *call = dyn_cast <gcall *> (stmt); 7317 if (!call || !gimple_call_internal_p (call)) 7318 return false; 7319 7320 internal_fn ifn = gimple_call_internal_fn (call); 7321 if (!internal_load_fn_p (ifn)) 7322 return false; 7323 7324 scalar_dest = gimple_call_lhs (call); 7325 if (!scalar_dest) 7326 return false; 7327 7328 if (slp_node != NULL) 7329 { 7330 if (dump_enabled_p ()) 7331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7332 "SLP of masked loads not supported.\n"); 7333 return false; 7334 } 7335 7336 int mask_index = internal_fn_mask_index (ifn); 7337 if (mask_index >= 0) 7338 { 7339 mask = gimple_call_arg (call, mask_index); 7340 if (!vect_check_load_store_mask (stmt, mask, &mask_dt, 7341 &mask_vectype)) 7342 return false; 7343 } 7344 } 7345 7346 if (!STMT_VINFO_DATA_REF (stmt_info)) 7347 return false; 7348 7349 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 7350 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); 7351 7352 if (loop_vinfo) 7353 { 7354 loop = LOOP_VINFO_LOOP (loop_vinfo); 7355 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 7356 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 7357 } 7358 else 7359 vf = 1; 7360 7361 /* Multiple types in SLP are handled by creating the appropriate number of 7362 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in 7363 case of SLP. */ 7364 if (slp) 7365 ncopies = 1; 7366 else 7367 ncopies = vect_get_num_copies (loop_vinfo, vectype); 7368 7369 gcc_assert (ncopies >= 1); 7370 7371 /* FORNOW. This restriction should be relaxed. */ 7372 if (nested_in_vect_loop && ncopies > 1) 7373 { 7374 if (dump_enabled_p ()) 7375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7376 "multiple types in nested loop.\n"); 7377 return false; 7378 } 7379 7380 /* Invalidate assumptions made by dependence analysis when vectorization 7381 on the unrolled body effectively re-orders stmts. */ 7382 if (ncopies > 1 7383 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 7384 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 7385 STMT_VINFO_MIN_NEG_DIST (stmt_info))) 7386 { 7387 if (dump_enabled_p ()) 7388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7389 "cannot perform implicit CSE when unrolling " 7390 "with negative dependence distance\n"); 7391 return false; 7392 } 7393 7394 elem_type = TREE_TYPE (vectype); 7395 mode = TYPE_MODE (vectype); 7396 7397 /* FORNOW. In some cases can vectorize even if data-type not supported 7398 (e.g. - data copies). */ 7399 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) 7400 { 7401 if (dump_enabled_p ()) 7402 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7403 "Aligned load, but unsupported type.\n"); 7404 return false; 7405 } 7406 7407 /* Check if the load is a part of an interleaving chain. */ 7408 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) 7409 { 7410 grouped_load = true; 7411 /* FORNOW */ 7412 gcc_assert (!nested_in_vect_loop); 7413 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)); 7414 7415 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 7416 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 7417 7418 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 7419 slp_perm = true; 7420 7421 /* Invalidate assumptions made by dependence analysis when vectorization 7422 on the unrolled body effectively re-orders stmts. */ 7423 if (!PURE_SLP_STMT (stmt_info) 7424 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 7425 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 7426 STMT_VINFO_MIN_NEG_DIST (stmt_info))) 7427 { 7428 if (dump_enabled_p ()) 7429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7430 "cannot perform implicit CSE when performing " 7431 "group loads with negative dependence distance\n"); 7432 return false; 7433 } 7434 7435 /* Similarly when the stmt is a load that is both part of a SLP 7436 instance and a loop vectorized stmt via the same-dr mechanism 7437 we have to give up. */ 7438 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info) 7439 && (STMT_SLP_TYPE (stmt_info) 7440 != STMT_SLP_TYPE (vinfo_for_stmt 7441 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info))))) 7442 { 7443 if (dump_enabled_p ()) 7444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7445 "conflicting SLP types for CSEd load\n"); 7446 return false; 7447 } 7448 } 7449 else 7450 group_size = 1; 7451 7452 vect_memory_access_type memory_access_type; 7453 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies, 7454 &memory_access_type, &gs_info)) 7455 return false; 7456 7457 if (mask) 7458 { 7459 if (memory_access_type == VMAT_CONTIGUOUS) 7460 { 7461 machine_mode vec_mode = TYPE_MODE (vectype); 7462 if (!VECTOR_MODE_P (vec_mode) 7463 || !can_vec_mask_load_store_p (vec_mode, 7464 TYPE_MODE (mask_vectype), true)) 7465 return false; 7466 } 7467 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) 7468 { 7469 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl)); 7470 tree masktype 7471 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); 7472 if (TREE_CODE (masktype) == INTEGER_TYPE) 7473 { 7474 if (dump_enabled_p ()) 7475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7476 "masked gather with integer mask not" 7477 " supported."); 7478 return false; 7479 } 7480 } 7481 else if (memory_access_type != VMAT_LOAD_STORE_LANES 7482 && memory_access_type != VMAT_GATHER_SCATTER) 7483 { 7484 if (dump_enabled_p ()) 7485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7486 "unsupported access type for masked load.\n"); 7487 return false; 7488 } 7489 } 7490 7491 if (!vec_stmt) /* transformation not required. */ 7492 { 7493 if (!slp) 7494 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; 7495 7496 if (loop_vinfo 7497 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) 7498 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size, 7499 memory_access_type, &gs_info); 7500 7501 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 7502 /* The SLP costs are calculated during SLP analysis. */ 7503 if (! slp_node) 7504 vect_model_load_cost (stmt_info, ncopies, memory_access_type, 7505 NULL, NULL, NULL); 7506 return true; 7507 } 7508 7509 if (!slp) 7510 gcc_assert (memory_access_type 7511 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); 7512 7513 if (dump_enabled_p ()) 7514 dump_printf_loc (MSG_NOTE, vect_location, 7515 "transform load. ncopies = %d\n", ncopies); 7516 7517 /* Transform. */ 7518 7519 ensure_base_align (dr); 7520 7521 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) 7522 { 7523 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask, 7524 mask_dt); 7525 return true; 7526 } 7527 7528 if (memory_access_type == VMAT_ELEMENTWISE 7529 || memory_access_type == VMAT_STRIDED_SLP) 7530 { 7531 gimple_stmt_iterator incr_gsi; 7532 bool insert_after; 7533 gimple *incr; 7534 tree offvar; 7535 tree ivstep; 7536 tree running_off; 7537 vec<constructor_elt, va_gc> *v = NULL; 7538 tree stride_base, stride_step, alias_off; 7539 /* Checked by get_load_store_type. */ 7540 unsigned int const_nunits = nunits.to_constant (); 7541 unsigned HOST_WIDE_INT cst_offset = 0; 7542 7543 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); 7544 gcc_assert (!nested_in_vect_loop); 7545 7546 if (grouped_load) 7547 { 7548 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 7549 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 7550 } 7551 else 7552 { 7553 first_stmt = stmt; 7554 first_dr = dr; 7555 } 7556 if (slp && grouped_load) 7557 { 7558 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 7559 ref_type = get_group_alias_ptr_type (first_stmt); 7560 } 7561 else 7562 { 7563 if (grouped_load) 7564 cst_offset 7565 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))) 7566 * vect_get_place_in_interleaving_chain (stmt, first_stmt)); 7567 group_size = 1; 7568 ref_type = reference_alias_ptr_type (DR_REF (dr)); 7569 } 7570 7571 stride_base 7572 = fold_build_pointer_plus 7573 (DR_BASE_ADDRESS (first_dr), 7574 size_binop (PLUS_EXPR, 7575 convert_to_ptrofftype (DR_OFFSET (first_dr)), 7576 convert_to_ptrofftype (DR_INIT (first_dr)))); 7577 stride_step = fold_convert (sizetype, DR_STEP (first_dr)); 7578 7579 /* For a load with loop-invariant (but other than power-of-2) 7580 stride (i.e. not a grouped access) like so: 7581 7582 for (i = 0; i < n; i += stride) 7583 ... = array[i]; 7584 7585 we generate a new induction variable and new accesses to 7586 form a new vector (or vectors, depending on ncopies): 7587 7588 for (j = 0; ; j += VF*stride) 7589 tmp1 = array[j]; 7590 tmp2 = array[j + stride]; 7591 ... 7592 vectemp = {tmp1, tmp2, ...} 7593 */ 7594 7595 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step, 7596 build_int_cst (TREE_TYPE (stride_step), vf)); 7597 7598 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 7599 7600 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); 7601 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); 7602 create_iv (stride_base, ivstep, NULL, 7603 loop, &incr_gsi, insert_after, 7604 &offvar, NULL); 7605 incr = gsi_stmt (incr_gsi); 7606 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo)); 7607 7608 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); 7609 7610 prev_stmt_info = NULL; 7611 running_off = offvar; 7612 alias_off = build_int_cst (ref_type, 0); 7613 int nloads = const_nunits; 7614 int lnel = 1; 7615 tree ltype = TREE_TYPE (vectype); 7616 tree lvectype = vectype; 7617 auto_vec<tree> dr_chain; 7618 if (memory_access_type == VMAT_STRIDED_SLP) 7619 { 7620 if (group_size < const_nunits) 7621 { 7622 /* First check if vec_init optab supports construction from 7623 vector elts directly. */ 7624 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype)); 7625 machine_mode vmode; 7626 if (mode_for_vector (elmode, group_size).exists (&vmode) 7627 && VECTOR_MODE_P (vmode) 7628 && targetm.vector_mode_supported_p (vmode) 7629 && (convert_optab_handler (vec_init_optab, 7630 TYPE_MODE (vectype), vmode) 7631 != CODE_FOR_nothing)) 7632 { 7633 nloads = const_nunits / group_size; 7634 lnel = group_size; 7635 ltype = build_vector_type (TREE_TYPE (vectype), group_size); 7636 } 7637 else 7638 { 7639 /* Otherwise avoid emitting a constructor of vector elements 7640 by performing the loads using an integer type of the same 7641 size, constructing a vector of those and then 7642 re-interpreting it as the original vector type. 7643 This avoids a huge runtime penalty due to the general 7644 inability to perform store forwarding from smaller stores 7645 to a larger load. */ 7646 unsigned lsize 7647 = group_size * TYPE_PRECISION (TREE_TYPE (vectype)); 7648 elmode = int_mode_for_size (lsize, 0).require (); 7649 unsigned int lnunits = const_nunits / group_size; 7650 /* If we can't construct such a vector fall back to 7651 element loads of the original vector type. */ 7652 if (mode_for_vector (elmode, lnunits).exists (&vmode) 7653 && VECTOR_MODE_P (vmode) 7654 && targetm.vector_mode_supported_p (vmode) 7655 && (convert_optab_handler (vec_init_optab, vmode, elmode) 7656 != CODE_FOR_nothing)) 7657 { 7658 nloads = lnunits; 7659 lnel = group_size; 7660 ltype = build_nonstandard_integer_type (lsize, 1); 7661 lvectype = build_vector_type (ltype, nloads); 7662 } 7663 } 7664 } 7665 else 7666 { 7667 nloads = 1; 7668 lnel = const_nunits; 7669 ltype = vectype; 7670 } 7671 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); 7672 } 7673 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */ 7674 else if (nloads == 1) 7675 ltype = vectype; 7676 7677 if (slp) 7678 { 7679 /* For SLP permutation support we need to load the whole group, 7680 not only the number of vector stmts the permutation result 7681 fits in. */ 7682 if (slp_perm) 7683 { 7684 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for 7685 variable VF. */ 7686 unsigned int const_vf = vf.to_constant (); 7687 ncopies = CEIL (group_size * const_vf, const_nunits); 7688 dr_chain.create (ncopies); 7689 } 7690 else 7691 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 7692 } 7693 unsigned int group_el = 0; 7694 unsigned HOST_WIDE_INT 7695 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 7696 for (j = 0; j < ncopies; j++) 7697 { 7698 if (nloads > 1) 7699 vec_alloc (v, nloads); 7700 for (i = 0; i < nloads; i++) 7701 { 7702 tree this_off = build_int_cst (TREE_TYPE (alias_off), 7703 group_el * elsz + cst_offset); 7704 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off); 7705 vect_copy_ref_info (data_ref, DR_REF (first_dr)); 7706 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref); 7707 vect_finish_stmt_generation (stmt, new_stmt, gsi); 7708 if (nloads > 1) 7709 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, 7710 gimple_assign_lhs (new_stmt)); 7711 7712 group_el += lnel; 7713 if (! slp 7714 || group_el == group_size) 7715 { 7716 tree newoff = copy_ssa_name (running_off); 7717 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR, 7718 running_off, stride_step); 7719 vect_finish_stmt_generation (stmt, incr, gsi); 7720 7721 running_off = newoff; 7722 group_el = 0; 7723 } 7724 } 7725 if (nloads > 1) 7726 { 7727 tree vec_inv = build_constructor (lvectype, v); 7728 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi); 7729 new_stmt = SSA_NAME_DEF_STMT (new_temp); 7730 if (lvectype != vectype) 7731 { 7732 new_stmt = gimple_build_assign (make_ssa_name (vectype), 7733 VIEW_CONVERT_EXPR, 7734 build1 (VIEW_CONVERT_EXPR, 7735 vectype, new_temp)); 7736 vect_finish_stmt_generation (stmt, new_stmt, gsi); 7737 } 7738 } 7739 7740 if (slp) 7741 { 7742 if (slp_perm) 7743 dr_chain.quick_push (gimple_assign_lhs (new_stmt)); 7744 else 7745 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 7746 } 7747 else 7748 { 7749 if (j == 0) 7750 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 7751 else 7752 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 7753 prev_stmt_info = vinfo_for_stmt (new_stmt); 7754 } 7755 } 7756 if (slp_perm) 7757 { 7758 unsigned n_perms; 7759 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf, 7760 slp_node_instance, false, &n_perms); 7761 } 7762 return true; 7763 } 7764 7765 if (memory_access_type == VMAT_GATHER_SCATTER 7766 || (!slp && memory_access_type == VMAT_CONTIGUOUS)) 7767 grouped_load = false; 7768 7769 if (grouped_load) 7770 { 7771 first_stmt = GROUP_FIRST_ELEMENT (stmt_info); 7772 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); 7773 /* For SLP vectorization we directly vectorize a subchain 7774 without permutation. */ 7775 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) 7776 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 7777 /* For BB vectorization always use the first stmt to base 7778 the data ref pointer on. */ 7779 if (bb_vinfo) 7780 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 7781 7782 /* Check if the chain of loads is already vectorized. */ 7783 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)) 7784 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS. 7785 ??? But we can only do so if there is exactly one 7786 as we have no way to get at the rest. Leave the CSE 7787 opportunity alone. 7788 ??? With the group load eventually participating 7789 in multiple different permutations (having multiple 7790 slp nodes which refer to the same group) the CSE 7791 is even wrong code. See PR56270. */ 7792 && !slp) 7793 { 7794 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 7795 return true; 7796 } 7797 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); 7798 group_gap_adj = 0; 7799 7800 /* VEC_NUM is the number of vect stmts to be created for this group. */ 7801 if (slp) 7802 { 7803 grouped_load = false; 7804 /* For SLP permutation support we need to load the whole group, 7805 not only the number of vector stmts the permutation result 7806 fits in. */ 7807 if (slp_perm) 7808 { 7809 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for 7810 variable VF. */ 7811 unsigned int const_vf = vf.to_constant (); 7812 unsigned int const_nunits = nunits.to_constant (); 7813 vec_num = CEIL (group_size * const_vf, const_nunits); 7814 group_gap_adj = vf * group_size - nunits * vec_num; 7815 } 7816 else 7817 { 7818 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 7819 group_gap_adj 7820 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance); 7821 } 7822 } 7823 else 7824 vec_num = group_size; 7825 7826 ref_type = get_group_alias_ptr_type (first_stmt); 7827 } 7828 else 7829 { 7830 first_stmt = stmt; 7831 first_dr = dr; 7832 group_size = vec_num = 1; 7833 group_gap_adj = 0; 7834 ref_type = reference_alias_ptr_type (DR_REF (first_dr)); 7835 } 7836 7837 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); 7838 gcc_assert (alignment_support_scheme); 7839 vec_loop_masks *loop_masks 7840 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) 7841 ? &LOOP_VINFO_MASKS (loop_vinfo) 7842 : NULL); 7843 /* Targets with store-lane instructions must not require explicit 7844 realignment. vect_supportable_dr_alignment always returns either 7845 dr_aligned or dr_unaligned_supported for masked operations. */ 7846 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES 7847 && !mask 7848 && !loop_masks) 7849 || alignment_support_scheme == dr_aligned 7850 || alignment_support_scheme == dr_unaligned_supported); 7851 7852 /* In case the vectorization factor (VF) is bigger than the number 7853 of elements that we can fit in a vectype (nunits), we have to generate 7854 more than one vector stmt - i.e - we need to "unroll" the 7855 vector stmt by a factor VF/nunits. In doing so, we record a pointer 7856 from one copy of the vector stmt to the next, in the field 7857 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following 7858 stages to find the correct vector defs to be used when vectorizing 7859 stmts that use the defs of the current stmt. The example below 7860 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we 7861 need to create 4 vectorized stmts): 7862 7863 before vectorization: 7864 RELATED_STMT VEC_STMT 7865 S1: x = memref - - 7866 S2: z = x + 1 - - 7867 7868 step 1: vectorize stmt S1: 7869 We first create the vector stmt VS1_0, and, as usual, record a 7870 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. 7871 Next, we create the vector stmt VS1_1, and record a pointer to 7872 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. 7873 Similarly, for VS1_2 and VS1_3. This is the resulting chain of 7874 stmts and pointers: 7875 RELATED_STMT VEC_STMT 7876 VS1_0: vx0 = memref0 VS1_1 - 7877 VS1_1: vx1 = memref1 VS1_2 - 7878 VS1_2: vx2 = memref2 VS1_3 - 7879 VS1_3: vx3 = memref3 - - 7880 S1: x = load - VS1_0 7881 S2: z = x + 1 - - 7882 7883 See in documentation in vect_get_vec_def_for_stmt_copy for how the 7884 information we recorded in RELATED_STMT field is used to vectorize 7885 stmt S2. */ 7886 7887 /* In case of interleaving (non-unit grouped access): 7888 7889 S1: x2 = &base + 2 7890 S2: x0 = &base 7891 S3: x1 = &base + 1 7892 S4: x3 = &base + 3 7893 7894 Vectorized loads are created in the order of memory accesses 7895 starting from the access of the first stmt of the chain: 7896 7897 VS1: vx0 = &base 7898 VS2: vx1 = &base + vec_size*1 7899 VS3: vx3 = &base + vec_size*2 7900 VS4: vx4 = &base + vec_size*3 7901 7902 Then permutation statements are generated: 7903 7904 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } > 7905 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } > 7906 ... 7907 7908 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts 7909 (the order of the data-refs in the output of vect_permute_load_chain 7910 corresponds to the order of scalar stmts in the interleaving chain - see 7911 the documentation of vect_permute_load_chain()). 7912 The generation of permutation stmts and recording them in 7913 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load(). 7914 7915 In case of both multiple types and interleaving, the vector loads and 7916 permutation stmts above are created for every copy. The result vector 7917 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the 7918 corresponding STMT_VINFO_RELATED_STMT for the next copies. */ 7919 7920 /* If the data reference is aligned (dr_aligned) or potentially unaligned 7921 on a target that supports unaligned accesses (dr_unaligned_supported) 7922 we generate the following code: 7923 p = initial_addr; 7924 indx = 0; 7925 loop { 7926 p = p + indx * vectype_size; 7927 vec_dest = *(p); 7928 indx = indx + 1; 7929 } 7930 7931 Otherwise, the data reference is potentially unaligned on a target that 7932 does not support unaligned accesses (dr_explicit_realign_optimized) - 7933 then generate the following code, in which the data in each iteration is 7934 obtained by two vector loads, one from the previous iteration, and one 7935 from the current iteration: 7936 p1 = initial_addr; 7937 msq_init = *(floor(p1)) 7938 p2 = initial_addr + VS - 1; 7939 realignment_token = call target_builtin; 7940 indx = 0; 7941 loop { 7942 p2 = p2 + indx * vectype_size 7943 lsq = *(floor(p2)) 7944 vec_dest = realign_load (msq, lsq, realignment_token) 7945 indx = indx + 1; 7946 msq = lsq; 7947 } */ 7948 7949 /* If the misalignment remains the same throughout the execution of the 7950 loop, we can create the init_addr and permutation mask at the loop 7951 preheader. Otherwise, it needs to be created inside the loop. 7952 This can only occur when vectorizing memory accesses in the inner-loop 7953 nested within an outer-loop that is being vectorized. */ 7954 7955 if (nested_in_vect_loop 7956 && !multiple_p (DR_STEP_ALIGNMENT (dr), 7957 GET_MODE_SIZE (TYPE_MODE (vectype)))) 7958 { 7959 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized); 7960 compute_in_loop = true; 7961 } 7962 7963 if ((alignment_support_scheme == dr_explicit_realign_optimized 7964 || alignment_support_scheme == dr_explicit_realign) 7965 && !compute_in_loop) 7966 { 7967 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token, 7968 alignment_support_scheme, NULL_TREE, 7969 &at_loop); 7970 if (alignment_support_scheme == dr_explicit_realign_optimized) 7971 { 7972 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq)); 7973 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype), 7974 size_one_node); 7975 } 7976 } 7977 else 7978 at_loop = loop; 7979 7980 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 7981 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); 7982 7983 tree bump; 7984 tree vec_offset = NULL_TREE; 7985 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 7986 { 7987 aggr_type = NULL_TREE; 7988 bump = NULL_TREE; 7989 } 7990 else if (memory_access_type == VMAT_GATHER_SCATTER) 7991 { 7992 aggr_type = elem_type; 7993 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info, 7994 &bump, &vec_offset); 7995 } 7996 else 7997 { 7998 if (memory_access_type == VMAT_LOAD_STORE_LANES) 7999 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); 8000 else 8001 aggr_type = vectype; 8002 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type); 8003 } 8004 8005 tree vec_mask = NULL_TREE; 8006 prev_stmt_info = NULL; 8007 poly_uint64 group_elt = 0; 8008 for (j = 0; j < ncopies; j++) 8009 { 8010 /* 1. Create the vector or array pointer update chain. */ 8011 if (j == 0) 8012 { 8013 bool simd_lane_access_p 8014 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info); 8015 if (simd_lane_access_p 8016 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR 8017 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0)) 8018 && integer_zerop (DR_OFFSET (first_dr)) 8019 && integer_zerop (DR_INIT (first_dr)) 8020 && alias_sets_conflict_p (get_alias_set (aggr_type), 8021 get_alias_set (TREE_TYPE (ref_type))) 8022 && (alignment_support_scheme == dr_aligned 8023 || alignment_support_scheme == dr_unaligned_supported)) 8024 { 8025 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr)); 8026 dataref_offset = build_int_cst (ref_type, 0); 8027 inv_p = false; 8028 } 8029 else if (first_stmt_for_drptr 8030 && first_stmt != first_stmt_for_drptr) 8031 { 8032 dataref_ptr 8033 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type, 8034 at_loop, offset, &dummy, gsi, 8035 &ptr_incr, simd_lane_access_p, 8036 &inv_p, byte_offset, bump); 8037 /* Adjust the pointer by the difference to first_stmt. */ 8038 data_reference_p ptrdr 8039 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr)); 8040 tree diff = fold_convert (sizetype, 8041 size_binop (MINUS_EXPR, 8042 DR_INIT (first_dr), 8043 DR_INIT (ptrdr))); 8044 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 8045 stmt, diff); 8046 } 8047 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 8048 { 8049 vect_get_gather_scatter_ops (loop, stmt, &gs_info, 8050 &dataref_ptr, &vec_offset); 8051 inv_p = false; 8052 } 8053 else 8054 dataref_ptr 8055 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop, 8056 offset, &dummy, gsi, &ptr_incr, 8057 simd_lane_access_p, &inv_p, 8058 byte_offset, bump); 8059 if (mask) 8060 vec_mask = vect_get_vec_def_for_operand (mask, stmt, 8061 mask_vectype); 8062 } 8063 else 8064 { 8065 if (dataref_offset) 8066 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, 8067 bump); 8068 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) 8069 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, 8070 vec_offset); 8071 else 8072 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 8073 stmt, bump); 8074 if (mask) 8075 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask); 8076 } 8077 8078 if (grouped_load || slp_perm) 8079 dr_chain.create (vec_num); 8080 8081 if (memory_access_type == VMAT_LOAD_STORE_LANES) 8082 { 8083 tree vec_array; 8084 8085 vec_array = create_vector_array (vectype, vec_num); 8086 8087 tree final_mask = NULL_TREE; 8088 if (loop_masks) 8089 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies, 8090 vectype, j); 8091 if (vec_mask) 8092 final_mask = prepare_load_store_mask (mask_vectype, final_mask, 8093 vec_mask, gsi); 8094 8095 gcall *call; 8096 if (final_mask) 8097 { 8098 /* Emit: 8099 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR, 8100 VEC_MASK). */ 8101 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); 8102 tree alias_ptr = build_int_cst (ref_type, align); 8103 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, 8104 dataref_ptr, alias_ptr, 8105 final_mask); 8106 } 8107 else 8108 { 8109 /* Emit: 8110 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ 8111 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type); 8112 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); 8113 } 8114 gimple_call_set_lhs (call, vec_array); 8115 gimple_call_set_nothrow (call, true); 8116 new_stmt = call; 8117 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8118 8119 /* Extract each vector into an SSA_NAME. */ 8120 for (i = 0; i < vec_num; i++) 8121 { 8122 new_temp = read_vector_array (stmt, gsi, scalar_dest, 8123 vec_array, i); 8124 dr_chain.quick_push (new_temp); 8125 } 8126 8127 /* Record the mapping between SSA_NAMEs and statements. */ 8128 vect_record_grouped_load_vectors (stmt, dr_chain); 8129 } 8130 else 8131 { 8132 for (i = 0; i < vec_num; i++) 8133 { 8134 tree final_mask = NULL_TREE; 8135 if (loop_masks 8136 && memory_access_type != VMAT_INVARIANT) 8137 final_mask = vect_get_loop_mask (gsi, loop_masks, 8138 vec_num * ncopies, 8139 vectype, vec_num * j + i); 8140 if (vec_mask) 8141 final_mask = prepare_load_store_mask (mask_vectype, final_mask, 8142 vec_mask, gsi); 8143 8144 if (i > 0) 8145 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 8146 stmt, bump); 8147 8148 /* 2. Create the vector-load in the loop. */ 8149 switch (alignment_support_scheme) 8150 { 8151 case dr_aligned: 8152 case dr_unaligned_supported: 8153 { 8154 unsigned int align, misalign; 8155 8156 if (memory_access_type == VMAT_GATHER_SCATTER) 8157 { 8158 tree scale = size_int (gs_info.scale); 8159 gcall *call; 8160 if (loop_masks) 8161 call = gimple_build_call_internal 8162 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr, 8163 vec_offset, scale, final_mask); 8164 else 8165 call = gimple_build_call_internal 8166 (IFN_GATHER_LOAD, 3, dataref_ptr, 8167 vec_offset, scale); 8168 gimple_call_set_nothrow (call, true); 8169 new_stmt = call; 8170 data_ref = NULL_TREE; 8171 break; 8172 } 8173 8174 align = DR_TARGET_ALIGNMENT (dr); 8175 if (alignment_support_scheme == dr_aligned) 8176 { 8177 gcc_assert (aligned_access_p (first_dr)); 8178 misalign = 0; 8179 } 8180 else if (DR_MISALIGNMENT (first_dr) == -1) 8181 { 8182 align = dr_alignment (vect_dr_behavior (first_dr)); 8183 misalign = 0; 8184 } 8185 else 8186 misalign = DR_MISALIGNMENT (first_dr); 8187 if (dataref_offset == NULL_TREE 8188 && TREE_CODE (dataref_ptr) == SSA_NAME) 8189 set_ptr_info_alignment (get_ptr_info (dataref_ptr), 8190 align, misalign); 8191 8192 if (final_mask) 8193 { 8194 align = least_bit_hwi (misalign | align); 8195 tree ptr = build_int_cst (ref_type, align); 8196 gcall *call 8197 = gimple_build_call_internal (IFN_MASK_LOAD, 3, 8198 dataref_ptr, ptr, 8199 final_mask); 8200 gimple_call_set_nothrow (call, true); 8201 new_stmt = call; 8202 data_ref = NULL_TREE; 8203 } 8204 else 8205 { 8206 data_ref 8207 = fold_build2 (MEM_REF, vectype, dataref_ptr, 8208 dataref_offset 8209 ? dataref_offset 8210 : build_int_cst (ref_type, 0)); 8211 if (alignment_support_scheme == dr_aligned) 8212 ; 8213 else if (DR_MISALIGNMENT (first_dr) == -1) 8214 TREE_TYPE (data_ref) 8215 = build_aligned_type (TREE_TYPE (data_ref), 8216 align * BITS_PER_UNIT); 8217 else 8218 TREE_TYPE (data_ref) 8219 = build_aligned_type (TREE_TYPE (data_ref), 8220 TYPE_ALIGN (elem_type)); 8221 } 8222 break; 8223 } 8224 case dr_explicit_realign: 8225 { 8226 tree ptr, bump; 8227 8228 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype)); 8229 8230 if (compute_in_loop) 8231 msq = vect_setup_realignment (first_stmt, gsi, 8232 &realignment_token, 8233 dr_explicit_realign, 8234 dataref_ptr, NULL); 8235 8236 if (TREE_CODE (dataref_ptr) == SSA_NAME) 8237 ptr = copy_ssa_name (dataref_ptr); 8238 else 8239 ptr = make_ssa_name (TREE_TYPE (dataref_ptr)); 8240 unsigned int align = DR_TARGET_ALIGNMENT (first_dr); 8241 new_stmt = gimple_build_assign 8242 (ptr, BIT_AND_EXPR, dataref_ptr, 8243 build_int_cst 8244 (TREE_TYPE (dataref_ptr), 8245 -(HOST_WIDE_INT) align)); 8246 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8247 data_ref 8248 = build2 (MEM_REF, vectype, ptr, 8249 build_int_cst (ref_type, 0)); 8250 vect_copy_ref_info (data_ref, DR_REF (first_dr)); 8251 vec_dest = vect_create_destination_var (scalar_dest, 8252 vectype); 8253 new_stmt = gimple_build_assign (vec_dest, data_ref); 8254 new_temp = make_ssa_name (vec_dest, new_stmt); 8255 gimple_assign_set_lhs (new_stmt, new_temp); 8256 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 8257 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 8258 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8259 msq = new_temp; 8260 8261 bump = size_binop (MULT_EXPR, vs, 8262 TYPE_SIZE_UNIT (elem_type)); 8263 bump = size_binop (MINUS_EXPR, bump, size_one_node); 8264 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); 8265 new_stmt = gimple_build_assign 8266 (NULL_TREE, BIT_AND_EXPR, ptr, 8267 build_int_cst 8268 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align)); 8269 ptr = copy_ssa_name (ptr, new_stmt); 8270 gimple_assign_set_lhs (new_stmt, ptr); 8271 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8272 data_ref 8273 = build2 (MEM_REF, vectype, ptr, 8274 build_int_cst (ref_type, 0)); 8275 break; 8276 } 8277 case dr_explicit_realign_optimized: 8278 { 8279 if (TREE_CODE (dataref_ptr) == SSA_NAME) 8280 new_temp = copy_ssa_name (dataref_ptr); 8281 else 8282 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr)); 8283 unsigned int align = DR_TARGET_ALIGNMENT (first_dr); 8284 new_stmt = gimple_build_assign 8285 (new_temp, BIT_AND_EXPR, dataref_ptr, 8286 build_int_cst (TREE_TYPE (dataref_ptr), 8287 -(HOST_WIDE_INT) align)); 8288 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8289 data_ref 8290 = build2 (MEM_REF, vectype, new_temp, 8291 build_int_cst (ref_type, 0)); 8292 break; 8293 } 8294 default: 8295 gcc_unreachable (); 8296 } 8297 vec_dest = vect_create_destination_var (scalar_dest, vectype); 8298 /* DATA_REF is null if we've already built the statement. */ 8299 if (data_ref) 8300 { 8301 vect_copy_ref_info (data_ref, DR_REF (first_dr)); 8302 new_stmt = gimple_build_assign (vec_dest, data_ref); 8303 } 8304 new_temp = make_ssa_name (vec_dest, new_stmt); 8305 gimple_set_lhs (new_stmt, new_temp); 8306 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8307 8308 /* 3. Handle explicit realignment if necessary/supported. 8309 Create in loop: 8310 vec_dest = realign_load (msq, lsq, realignment_token) */ 8311 if (alignment_support_scheme == dr_explicit_realign_optimized 8312 || alignment_support_scheme == dr_explicit_realign) 8313 { 8314 lsq = gimple_assign_lhs (new_stmt); 8315 if (!realignment_token) 8316 realignment_token = dataref_ptr; 8317 vec_dest = vect_create_destination_var (scalar_dest, vectype); 8318 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, 8319 msq, lsq, realignment_token); 8320 new_temp = make_ssa_name (vec_dest, new_stmt); 8321 gimple_assign_set_lhs (new_stmt, new_temp); 8322 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8323 8324 if (alignment_support_scheme == dr_explicit_realign_optimized) 8325 { 8326 gcc_assert (phi); 8327 if (i == vec_num - 1 && j == ncopies - 1) 8328 add_phi_arg (phi, lsq, 8329 loop_latch_edge (containing_loop), 8330 UNKNOWN_LOCATION); 8331 msq = lsq; 8332 } 8333 } 8334 8335 /* 4. Handle invariant-load. */ 8336 if (inv_p && !bb_vinfo) 8337 { 8338 gcc_assert (!grouped_load); 8339 /* If we have versioned for aliasing or the loop doesn't 8340 have any data dependencies that would preclude this, 8341 then we are sure this is a loop invariant load and 8342 thus we can insert it on the preheader edge. */ 8343 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) 8344 && !nested_in_vect_loop 8345 && hoist_defs_of_uses (stmt, loop)) 8346 { 8347 if (dump_enabled_p ()) 8348 { 8349 dump_printf_loc (MSG_NOTE, vect_location, 8350 "hoisting out of the vectorized " 8351 "loop: "); 8352 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 8353 } 8354 tree tem = copy_ssa_name (scalar_dest); 8355 gsi_insert_on_edge_immediate 8356 (loop_preheader_edge (loop), 8357 gimple_build_assign (tem, 8358 unshare_expr 8359 (gimple_assign_rhs1 (stmt)))); 8360 new_temp = vect_init_vector (stmt, tem, vectype, NULL); 8361 new_stmt = SSA_NAME_DEF_STMT (new_temp); 8362 set_vinfo_for_stmt (new_stmt, 8363 new_stmt_vec_info (new_stmt, vinfo)); 8364 } 8365 else 8366 { 8367 gimple_stmt_iterator gsi2 = *gsi; 8368 gsi_next (&gsi2); 8369 new_temp = vect_init_vector (stmt, scalar_dest, 8370 vectype, &gsi2); 8371 new_stmt = SSA_NAME_DEF_STMT (new_temp); 8372 } 8373 } 8374 8375 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) 8376 { 8377 tree perm_mask = perm_mask_for_reverse (vectype); 8378 new_temp = permute_vec_elements (new_temp, new_temp, 8379 perm_mask, stmt, gsi); 8380 new_stmt = SSA_NAME_DEF_STMT (new_temp); 8381 } 8382 8383 /* Collect vector loads and later create their permutation in 8384 vect_transform_grouped_load (). */ 8385 if (grouped_load || slp_perm) 8386 dr_chain.quick_push (new_temp); 8387 8388 /* Store vector loads in the corresponding SLP_NODE. */ 8389 if (slp && !slp_perm) 8390 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 8391 8392 /* With SLP permutation we load the gaps as well, without 8393 we need to skip the gaps after we manage to fully load 8394 all elements. group_gap_adj is GROUP_SIZE here. */ 8395 group_elt += nunits; 8396 if (maybe_ne (group_gap_adj, 0U) 8397 && !slp_perm 8398 && known_eq (group_elt, group_size - group_gap_adj)) 8399 { 8400 poly_wide_int bump_val 8401 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) 8402 * group_gap_adj); 8403 tree bump = wide_int_to_tree (sizetype, bump_val); 8404 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 8405 stmt, bump); 8406 group_elt = 0; 8407 } 8408 } 8409 /* Bump the vector pointer to account for a gap or for excess 8410 elements loaded for a permuted SLP load. */ 8411 if (maybe_ne (group_gap_adj, 0U) && slp_perm) 8412 { 8413 poly_wide_int bump_val 8414 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) 8415 * group_gap_adj); 8416 tree bump = wide_int_to_tree (sizetype, bump_val); 8417 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, 8418 stmt, bump); 8419 } 8420 } 8421 8422 if (slp && !slp_perm) 8423 continue; 8424 8425 if (slp_perm) 8426 { 8427 unsigned n_perms; 8428 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf, 8429 slp_node_instance, false, 8430 &n_perms)) 8431 { 8432 dr_chain.release (); 8433 return false; 8434 } 8435 } 8436 else 8437 { 8438 if (grouped_load) 8439 { 8440 if (memory_access_type != VMAT_LOAD_STORE_LANES) 8441 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi); 8442 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 8443 } 8444 else 8445 { 8446 if (j == 0) 8447 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 8448 else 8449 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 8450 prev_stmt_info = vinfo_for_stmt (new_stmt); 8451 } 8452 } 8453 dr_chain.release (); 8454 } 8455 8456 return true; 8457 } 8458 8459 /* Function vect_is_simple_cond. 8460 8461 Input: 8462 LOOP - the loop that is being vectorized. 8463 COND - Condition that is checked for simple use. 8464 8465 Output: 8466 *COMP_VECTYPE - the vector type for the comparison. 8467 *DTS - The def types for the arguments of the comparison 8468 8469 Returns whether a COND can be vectorized. Checks whether 8470 condition operands are supportable using vec_is_simple_use. */ 8471 8472 static bool 8473 vect_is_simple_cond (tree cond, vec_info *vinfo, 8474 tree *comp_vectype, enum vect_def_type *dts, 8475 tree vectype) 8476 { 8477 tree lhs, rhs; 8478 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 8479 8480 /* Mask case. */ 8481 if (TREE_CODE (cond) == SSA_NAME 8482 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond))) 8483 { 8484 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond); 8485 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt, 8486 &dts[0], comp_vectype) 8487 || !*comp_vectype 8488 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype)) 8489 return false; 8490 return true; 8491 } 8492 8493 if (!COMPARISON_CLASS_P (cond)) 8494 return false; 8495 8496 lhs = TREE_OPERAND (cond, 0); 8497 rhs = TREE_OPERAND (cond, 1); 8498 8499 if (TREE_CODE (lhs) == SSA_NAME) 8500 { 8501 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); 8502 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1)) 8503 return false; 8504 } 8505 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST 8506 || TREE_CODE (lhs) == FIXED_CST) 8507 dts[0] = vect_constant_def; 8508 else 8509 return false; 8510 8511 if (TREE_CODE (rhs) == SSA_NAME) 8512 { 8513 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); 8514 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2)) 8515 return false; 8516 } 8517 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST 8518 || TREE_CODE (rhs) == FIXED_CST) 8519 dts[1] = vect_constant_def; 8520 else 8521 return false; 8522 8523 if (vectype1 && vectype2 8524 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), 8525 TYPE_VECTOR_SUBPARTS (vectype2))) 8526 return false; 8527 8528 *comp_vectype = vectype1 ? vectype1 : vectype2; 8529 /* Invariant comparison. */ 8530 if (! *comp_vectype && vectype) 8531 { 8532 tree scalar_type = TREE_TYPE (lhs); 8533 /* If we can widen the comparison to match vectype do so. */ 8534 if (INTEGRAL_TYPE_P (scalar_type) 8535 && tree_int_cst_lt (TYPE_SIZE (scalar_type), 8536 TYPE_SIZE (TREE_TYPE (vectype)))) 8537 scalar_type = build_nonstandard_integer_type 8538 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))), 8539 TYPE_UNSIGNED (scalar_type)); 8540 *comp_vectype = get_vectype_for_scalar_type (scalar_type); 8541 } 8542 8543 return true; 8544 } 8545 8546 /* vectorizable_condition. 8547 8548 Check if STMT is conditional modify expression that can be vectorized. 8549 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 8550 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 8551 at GSI. 8552 8553 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable 8554 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in 8555 else clause if it is 2). 8556 8557 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 8558 8559 bool 8560 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi, 8561 gimple **vec_stmt, tree reduc_def, int reduc_index, 8562 slp_tree slp_node) 8563 { 8564 tree scalar_dest = NULL_TREE; 8565 tree vec_dest = NULL_TREE; 8566 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE; 8567 tree then_clause, else_clause; 8568 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 8569 tree comp_vectype = NULL_TREE; 8570 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE; 8571 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE; 8572 tree vec_compare; 8573 tree new_temp; 8574 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 8575 enum vect_def_type dts[4] 8576 = {vect_unknown_def_type, vect_unknown_def_type, 8577 vect_unknown_def_type, vect_unknown_def_type}; 8578 int ndts = 4; 8579 int ncopies; 8580 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR; 8581 stmt_vec_info prev_stmt_info = NULL; 8582 int i, j; 8583 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 8584 vec<tree> vec_oprnds0 = vNULL; 8585 vec<tree> vec_oprnds1 = vNULL; 8586 vec<tree> vec_oprnds2 = vNULL; 8587 vec<tree> vec_oprnds3 = vNULL; 8588 tree vec_cmp_type; 8589 bool masked = false; 8590 8591 if (reduc_index && STMT_SLP_TYPE (stmt_info)) 8592 return false; 8593 8594 vect_reduction_type reduction_type 8595 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info); 8596 if (reduction_type == TREE_CODE_REDUCTION) 8597 { 8598 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 8599 return false; 8600 8601 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 8602 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle 8603 && reduc_def)) 8604 return false; 8605 8606 /* FORNOW: not yet supported. */ 8607 if (STMT_VINFO_LIVE_P (stmt_info)) 8608 { 8609 if (dump_enabled_p ()) 8610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 8611 "value used after loop.\n"); 8612 return false; 8613 } 8614 } 8615 8616 /* Is vectorizable conditional operation? */ 8617 if (!is_gimple_assign (stmt)) 8618 return false; 8619 8620 code = gimple_assign_rhs_code (stmt); 8621 8622 if (code != COND_EXPR) 8623 return false; 8624 8625 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 8626 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 8627 8628 if (slp_node) 8629 ncopies = 1; 8630 else 8631 ncopies = vect_get_num_copies (loop_vinfo, vectype); 8632 8633 gcc_assert (ncopies >= 1); 8634 if (reduc_index && ncopies > 1) 8635 return false; /* FORNOW */ 8636 8637 cond_expr = gimple_assign_rhs1 (stmt); 8638 then_clause = gimple_assign_rhs2 (stmt); 8639 else_clause = gimple_assign_rhs3 (stmt); 8640 8641 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, 8642 &comp_vectype, &dts[0], slp_node ? NULL : vectype) 8643 || !comp_vectype) 8644 return false; 8645 8646 gimple *def_stmt; 8647 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2], 8648 &vectype1)) 8649 return false; 8650 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3], 8651 &vectype2)) 8652 return false; 8653 8654 if (vectype1 && !useless_type_conversion_p (vectype, vectype1)) 8655 return false; 8656 8657 if (vectype2 && !useless_type_conversion_p (vectype, vectype2)) 8658 return false; 8659 8660 masked = !COMPARISON_CLASS_P (cond_expr); 8661 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype); 8662 8663 if (vec_cmp_type == NULL_TREE) 8664 return false; 8665 8666 cond_code = TREE_CODE (cond_expr); 8667 if (!masked) 8668 { 8669 cond_expr0 = TREE_OPERAND (cond_expr, 0); 8670 cond_expr1 = TREE_OPERAND (cond_expr, 1); 8671 } 8672 8673 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype)) 8674 { 8675 /* Boolean values may have another representation in vectors 8676 and therefore we prefer bit operations over comparison for 8677 them (which also works for scalar masks). We store opcodes 8678 to use in bitop1 and bitop2. Statement is vectorized as 8679 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2) 8680 depending on bitop1 and bitop2 arity. */ 8681 switch (cond_code) 8682 { 8683 case GT_EXPR: 8684 bitop1 = BIT_NOT_EXPR; 8685 bitop2 = BIT_AND_EXPR; 8686 break; 8687 case GE_EXPR: 8688 bitop1 = BIT_NOT_EXPR; 8689 bitop2 = BIT_IOR_EXPR; 8690 break; 8691 case LT_EXPR: 8692 bitop1 = BIT_NOT_EXPR; 8693 bitop2 = BIT_AND_EXPR; 8694 std::swap (cond_expr0, cond_expr1); 8695 break; 8696 case LE_EXPR: 8697 bitop1 = BIT_NOT_EXPR; 8698 bitop2 = BIT_IOR_EXPR; 8699 std::swap (cond_expr0, cond_expr1); 8700 break; 8701 case NE_EXPR: 8702 bitop1 = BIT_XOR_EXPR; 8703 break; 8704 case EQ_EXPR: 8705 bitop1 = BIT_XOR_EXPR; 8706 bitop2 = BIT_NOT_EXPR; 8707 break; 8708 default: 8709 return false; 8710 } 8711 cond_code = SSA_NAME; 8712 } 8713 8714 if (!vec_stmt) 8715 { 8716 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 8717 if (bitop1 != NOP_EXPR) 8718 { 8719 machine_mode mode = TYPE_MODE (comp_vectype); 8720 optab optab; 8721 8722 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default); 8723 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 8724 return false; 8725 8726 if (bitop2 != NOP_EXPR) 8727 { 8728 optab = optab_for_tree_code (bitop2, comp_vectype, 8729 optab_default); 8730 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 8731 return false; 8732 } 8733 } 8734 if (expand_vec_cond_expr_p (vectype, comp_vectype, 8735 cond_code)) 8736 { 8737 if (!slp_node) 8738 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL); 8739 return true; 8740 } 8741 return false; 8742 } 8743 8744 /* Transform. */ 8745 8746 if (!slp_node) 8747 { 8748 vec_oprnds0.create (1); 8749 vec_oprnds1.create (1); 8750 vec_oprnds2.create (1); 8751 vec_oprnds3.create (1); 8752 } 8753 8754 /* Handle def. */ 8755 scalar_dest = gimple_assign_lhs (stmt); 8756 if (reduction_type != EXTRACT_LAST_REDUCTION) 8757 vec_dest = vect_create_destination_var (scalar_dest, vectype); 8758 8759 /* Handle cond expr. */ 8760 for (j = 0; j < ncopies; j++) 8761 { 8762 gimple *new_stmt = NULL; 8763 if (j == 0) 8764 { 8765 if (slp_node) 8766 { 8767 auto_vec<tree, 4> ops; 8768 auto_vec<vec<tree>, 4> vec_defs; 8769 8770 if (masked) 8771 ops.safe_push (cond_expr); 8772 else 8773 { 8774 ops.safe_push (cond_expr0); 8775 ops.safe_push (cond_expr1); 8776 } 8777 ops.safe_push (then_clause); 8778 ops.safe_push (else_clause); 8779 vect_get_slp_defs (ops, slp_node, &vec_defs); 8780 vec_oprnds3 = vec_defs.pop (); 8781 vec_oprnds2 = vec_defs.pop (); 8782 if (!masked) 8783 vec_oprnds1 = vec_defs.pop (); 8784 vec_oprnds0 = vec_defs.pop (); 8785 } 8786 else 8787 { 8788 gimple *gtemp; 8789 if (masked) 8790 { 8791 vec_cond_lhs 8792 = vect_get_vec_def_for_operand (cond_expr, stmt, 8793 comp_vectype); 8794 vect_is_simple_use (cond_expr, stmt_info->vinfo, 8795 >emp, &dts[0]); 8796 } 8797 else 8798 { 8799 vec_cond_lhs 8800 = vect_get_vec_def_for_operand (cond_expr0, 8801 stmt, comp_vectype); 8802 vect_is_simple_use (cond_expr0, loop_vinfo, >emp, &dts[0]); 8803 8804 vec_cond_rhs 8805 = vect_get_vec_def_for_operand (cond_expr1, 8806 stmt, comp_vectype); 8807 vect_is_simple_use (cond_expr1, loop_vinfo, >emp, &dts[1]); 8808 } 8809 if (reduc_index == 1) 8810 vec_then_clause = reduc_def; 8811 else 8812 { 8813 vec_then_clause = vect_get_vec_def_for_operand (then_clause, 8814 stmt); 8815 vect_is_simple_use (then_clause, loop_vinfo, 8816 >emp, &dts[2]); 8817 } 8818 if (reduc_index == 2) 8819 vec_else_clause = reduc_def; 8820 else 8821 { 8822 vec_else_clause = vect_get_vec_def_for_operand (else_clause, 8823 stmt); 8824 vect_is_simple_use (else_clause, loop_vinfo, >emp, &dts[3]); 8825 } 8826 } 8827 } 8828 else 8829 { 8830 vec_cond_lhs 8831 = vect_get_vec_def_for_stmt_copy (dts[0], 8832 vec_oprnds0.pop ()); 8833 if (!masked) 8834 vec_cond_rhs 8835 = vect_get_vec_def_for_stmt_copy (dts[1], 8836 vec_oprnds1.pop ()); 8837 8838 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2], 8839 vec_oprnds2.pop ()); 8840 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3], 8841 vec_oprnds3.pop ()); 8842 } 8843 8844 if (!slp_node) 8845 { 8846 vec_oprnds0.quick_push (vec_cond_lhs); 8847 if (!masked) 8848 vec_oprnds1.quick_push (vec_cond_rhs); 8849 vec_oprnds2.quick_push (vec_then_clause); 8850 vec_oprnds3.quick_push (vec_else_clause); 8851 } 8852 8853 /* Arguments are ready. Create the new vector stmt. */ 8854 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs) 8855 { 8856 vec_then_clause = vec_oprnds2[i]; 8857 vec_else_clause = vec_oprnds3[i]; 8858 8859 if (masked) 8860 vec_compare = vec_cond_lhs; 8861 else 8862 { 8863 vec_cond_rhs = vec_oprnds1[i]; 8864 if (bitop1 == NOP_EXPR) 8865 vec_compare = build2 (cond_code, vec_cmp_type, 8866 vec_cond_lhs, vec_cond_rhs); 8867 else 8868 { 8869 new_temp = make_ssa_name (vec_cmp_type); 8870 if (bitop1 == BIT_NOT_EXPR) 8871 new_stmt = gimple_build_assign (new_temp, bitop1, 8872 vec_cond_rhs); 8873 else 8874 new_stmt 8875 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs, 8876 vec_cond_rhs); 8877 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8878 if (bitop2 == NOP_EXPR) 8879 vec_compare = new_temp; 8880 else if (bitop2 == BIT_NOT_EXPR) 8881 { 8882 /* Instead of doing ~x ? y : z do x ? z : y. */ 8883 vec_compare = new_temp; 8884 std::swap (vec_then_clause, vec_else_clause); 8885 } 8886 else 8887 { 8888 vec_compare = make_ssa_name (vec_cmp_type); 8889 new_stmt 8890 = gimple_build_assign (vec_compare, bitop2, 8891 vec_cond_lhs, new_temp); 8892 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8893 } 8894 } 8895 } 8896 if (reduction_type == EXTRACT_LAST_REDUCTION) 8897 { 8898 if (!is_gimple_val (vec_compare)) 8899 { 8900 tree vec_compare_name = make_ssa_name (vec_cmp_type); 8901 new_stmt = gimple_build_assign (vec_compare_name, 8902 vec_compare); 8903 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8904 vec_compare = vec_compare_name; 8905 } 8906 gcc_assert (reduc_index == 2); 8907 new_stmt = gimple_build_call_internal 8908 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, 8909 vec_then_clause); 8910 gimple_call_set_lhs (new_stmt, scalar_dest); 8911 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt; 8912 if (stmt == gsi_stmt (*gsi)) 8913 vect_finish_replace_stmt (stmt, new_stmt); 8914 else 8915 { 8916 /* In this case we're moving the definition to later in the 8917 block. That doesn't matter because the only uses of the 8918 lhs are in phi statements. */ 8919 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt); 8920 gsi_remove (&old_gsi, true); 8921 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8922 } 8923 } 8924 else 8925 { 8926 new_temp = make_ssa_name (vec_dest); 8927 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, 8928 vec_compare, vec_then_clause, 8929 vec_else_clause); 8930 vect_finish_stmt_generation (stmt, new_stmt, gsi); 8931 } 8932 if (slp_node) 8933 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 8934 } 8935 8936 if (slp_node) 8937 continue; 8938 8939 if (j == 0) 8940 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 8941 else 8942 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 8943 8944 prev_stmt_info = vinfo_for_stmt (new_stmt); 8945 } 8946 8947 vec_oprnds0.release (); 8948 vec_oprnds1.release (); 8949 vec_oprnds2.release (); 8950 vec_oprnds3.release (); 8951 8952 return true; 8953 } 8954 8955 /* vectorizable_comparison. 8956 8957 Check if STMT is comparison expression that can be vectorized. 8958 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 8959 comparison, put it in VEC_STMT, and insert it at GSI. 8960 8961 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 8962 8963 static bool 8964 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi, 8965 gimple **vec_stmt, tree reduc_def, 8966 slp_tree slp_node) 8967 { 8968 tree lhs, rhs1, rhs2; 8969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 8970 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; 8971 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 8972 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE; 8973 tree new_temp; 8974 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 8975 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type}; 8976 int ndts = 2; 8977 poly_uint64 nunits; 8978 int ncopies; 8979 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR; 8980 stmt_vec_info prev_stmt_info = NULL; 8981 int i, j; 8982 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 8983 vec<tree> vec_oprnds0 = vNULL; 8984 vec<tree> vec_oprnds1 = vNULL; 8985 gimple *def_stmt; 8986 tree mask_type; 8987 tree mask; 8988 8989 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 8990 return false; 8991 8992 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)) 8993 return false; 8994 8995 mask_type = vectype; 8996 nunits = TYPE_VECTOR_SUBPARTS (vectype); 8997 8998 if (slp_node) 8999 ncopies = 1; 9000 else 9001 ncopies = vect_get_num_copies (loop_vinfo, vectype); 9002 9003 gcc_assert (ncopies >= 1); 9004 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def 9005 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle 9006 && reduc_def)) 9007 return false; 9008 9009 if (STMT_VINFO_LIVE_P (stmt_info)) 9010 { 9011 if (dump_enabled_p ()) 9012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 9013 "value used after loop.\n"); 9014 return false; 9015 } 9016 9017 if (!is_gimple_assign (stmt)) 9018 return false; 9019 9020 code = gimple_assign_rhs_code (stmt); 9021 9022 if (TREE_CODE_CLASS (code) != tcc_comparison) 9023 return false; 9024 9025 rhs1 = gimple_assign_rhs1 (stmt); 9026 rhs2 = gimple_assign_rhs2 (stmt); 9027 9028 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt, 9029 &dts[0], &vectype1)) 9030 return false; 9031 9032 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt, 9033 &dts[1], &vectype2)) 9034 return false; 9035 9036 if (vectype1 && vectype2 9037 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), 9038 TYPE_VECTOR_SUBPARTS (vectype2))) 9039 return false; 9040 9041 vectype = vectype1 ? vectype1 : vectype2; 9042 9043 /* Invariant comparison. */ 9044 if (!vectype) 9045 { 9046 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1)); 9047 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits)) 9048 return false; 9049 } 9050 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype))) 9051 return false; 9052 9053 /* Can't compare mask and non-mask types. */ 9054 if (vectype1 && vectype2 9055 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2))) 9056 return false; 9057 9058 /* Boolean values may have another representation in vectors 9059 and therefore we prefer bit operations over comparison for 9060 them (which also works for scalar masks). We store opcodes 9061 to use in bitop1 and bitop2. Statement is vectorized as 9062 BITOP2 (rhs1 BITOP1 rhs2) or 9063 rhs1 BITOP2 (BITOP1 rhs2) 9064 depending on bitop1 and bitop2 arity. */ 9065 if (VECTOR_BOOLEAN_TYPE_P (vectype)) 9066 { 9067 if (code == GT_EXPR) 9068 { 9069 bitop1 = BIT_NOT_EXPR; 9070 bitop2 = BIT_AND_EXPR; 9071 } 9072 else if (code == GE_EXPR) 9073 { 9074 bitop1 = BIT_NOT_EXPR; 9075 bitop2 = BIT_IOR_EXPR; 9076 } 9077 else if (code == LT_EXPR) 9078 { 9079 bitop1 = BIT_NOT_EXPR; 9080 bitop2 = BIT_AND_EXPR; 9081 std::swap (rhs1, rhs2); 9082 std::swap (dts[0], dts[1]); 9083 } 9084 else if (code == LE_EXPR) 9085 { 9086 bitop1 = BIT_NOT_EXPR; 9087 bitop2 = BIT_IOR_EXPR; 9088 std::swap (rhs1, rhs2); 9089 std::swap (dts[0], dts[1]); 9090 } 9091 else 9092 { 9093 bitop1 = BIT_XOR_EXPR; 9094 if (code == EQ_EXPR) 9095 bitop2 = BIT_NOT_EXPR; 9096 } 9097 } 9098 9099 if (!vec_stmt) 9100 { 9101 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; 9102 if (!slp_node) 9103 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)), 9104 dts, ndts, NULL, NULL); 9105 if (bitop1 == NOP_EXPR) 9106 return expand_vec_cmp_expr_p (vectype, mask_type, code); 9107 else 9108 { 9109 machine_mode mode = TYPE_MODE (vectype); 9110 optab optab; 9111 9112 optab = optab_for_tree_code (bitop1, vectype, optab_default); 9113 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 9114 return false; 9115 9116 if (bitop2 != NOP_EXPR) 9117 { 9118 optab = optab_for_tree_code (bitop2, vectype, optab_default); 9119 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) 9120 return false; 9121 } 9122 return true; 9123 } 9124 } 9125 9126 /* Transform. */ 9127 if (!slp_node) 9128 { 9129 vec_oprnds0.create (1); 9130 vec_oprnds1.create (1); 9131 } 9132 9133 /* Handle def. */ 9134 lhs = gimple_assign_lhs (stmt); 9135 mask = vect_create_destination_var (lhs, mask_type); 9136 9137 /* Handle cmp expr. */ 9138 for (j = 0; j < ncopies; j++) 9139 { 9140 gassign *new_stmt = NULL; 9141 if (j == 0) 9142 { 9143 if (slp_node) 9144 { 9145 auto_vec<tree, 2> ops; 9146 auto_vec<vec<tree>, 2> vec_defs; 9147 9148 ops.safe_push (rhs1); 9149 ops.safe_push (rhs2); 9150 vect_get_slp_defs (ops, slp_node, &vec_defs); 9151 vec_oprnds1 = vec_defs.pop (); 9152 vec_oprnds0 = vec_defs.pop (); 9153 } 9154 else 9155 { 9156 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype); 9157 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype); 9158 } 9159 } 9160 else 9161 { 9162 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0], 9163 vec_oprnds0.pop ()); 9164 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1], 9165 vec_oprnds1.pop ()); 9166 } 9167 9168 if (!slp_node) 9169 { 9170 vec_oprnds0.quick_push (vec_rhs1); 9171 vec_oprnds1.quick_push (vec_rhs2); 9172 } 9173 9174 /* Arguments are ready. Create the new vector stmt. */ 9175 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1) 9176 { 9177 vec_rhs2 = vec_oprnds1[i]; 9178 9179 new_temp = make_ssa_name (mask); 9180 if (bitop1 == NOP_EXPR) 9181 { 9182 new_stmt = gimple_build_assign (new_temp, code, 9183 vec_rhs1, vec_rhs2); 9184 vect_finish_stmt_generation (stmt, new_stmt, gsi); 9185 } 9186 else 9187 { 9188 if (bitop1 == BIT_NOT_EXPR) 9189 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2); 9190 else 9191 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1, 9192 vec_rhs2); 9193 vect_finish_stmt_generation (stmt, new_stmt, gsi); 9194 if (bitop2 != NOP_EXPR) 9195 { 9196 tree res = make_ssa_name (mask); 9197 if (bitop2 == BIT_NOT_EXPR) 9198 new_stmt = gimple_build_assign (res, bitop2, new_temp); 9199 else 9200 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1, 9201 new_temp); 9202 vect_finish_stmt_generation (stmt, new_stmt, gsi); 9203 } 9204 } 9205 if (slp_node) 9206 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 9207 } 9208 9209 if (slp_node) 9210 continue; 9211 9212 if (j == 0) 9213 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 9214 else 9215 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 9216 9217 prev_stmt_info = vinfo_for_stmt (new_stmt); 9218 } 9219 9220 vec_oprnds0.release (); 9221 vec_oprnds1.release (); 9222 9223 return true; 9224 } 9225 9226 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation 9227 can handle all live statements in the node. Otherwise return true 9228 if STMT is not live or if vectorizable_live_operation can handle it. 9229 GSI and VEC_STMT are as for vectorizable_live_operation. */ 9230 9231 static bool 9232 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi, 9233 slp_tree slp_node, gimple **vec_stmt) 9234 { 9235 if (slp_node) 9236 { 9237 gimple *slp_stmt; 9238 unsigned int i; 9239 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt) 9240 { 9241 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt); 9242 if (STMT_VINFO_LIVE_P (slp_stmt_info) 9243 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i, 9244 vec_stmt)) 9245 return false; 9246 } 9247 } 9248 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt)) 9249 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt)) 9250 return false; 9251 9252 return true; 9253 } 9254 9255 /* Make sure the statement is vectorizable. */ 9256 9257 bool 9258 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node, 9259 slp_instance node_instance) 9260 { 9261 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 9262 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 9263 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); 9264 bool ok; 9265 gimple *pattern_stmt; 9266 gimple_seq pattern_def_seq; 9267 9268 if (dump_enabled_p ()) 9269 { 9270 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: "); 9271 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 9272 } 9273 9274 if (gimple_has_volatile_ops (stmt)) 9275 { 9276 if (dump_enabled_p ()) 9277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 9278 "not vectorized: stmt has volatile operands\n"); 9279 9280 return false; 9281 } 9282 9283 /* Skip stmts that do not need to be vectorized. In loops this is expected 9284 to include: 9285 - the COND_EXPR which is the loop exit condition 9286 - any LABEL_EXPRs in the loop 9287 - computations that are used only for array indexing or loop control. 9288 In basic blocks we only analyze statements that are a part of some SLP 9289 instance, therefore, all the statements are relevant. 9290 9291 Pattern statement needs to be analyzed instead of the original statement 9292 if the original statement is not relevant. Otherwise, we analyze both 9293 statements. In basic blocks we are called from some SLP instance 9294 traversal, don't analyze pattern stmts instead, the pattern stmts 9295 already will be part of SLP instance. */ 9296 9297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 9298 if (!STMT_VINFO_RELEVANT_P (stmt_info) 9299 && !STMT_VINFO_LIVE_P (stmt_info)) 9300 { 9301 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 9302 && pattern_stmt 9303 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) 9304 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) 9305 { 9306 /* Analyze PATTERN_STMT instead of the original stmt. */ 9307 stmt = pattern_stmt; 9308 stmt_info = vinfo_for_stmt (pattern_stmt); 9309 if (dump_enabled_p ()) 9310 { 9311 dump_printf_loc (MSG_NOTE, vect_location, 9312 "==> examining pattern statement: "); 9313 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 9314 } 9315 } 9316 else 9317 { 9318 if (dump_enabled_p ()) 9319 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n"); 9320 9321 return true; 9322 } 9323 } 9324 else if (STMT_VINFO_IN_PATTERN_P (stmt_info) 9325 && node == NULL 9326 && pattern_stmt 9327 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) 9328 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) 9329 { 9330 /* Analyze PATTERN_STMT too. */ 9331 if (dump_enabled_p ()) 9332 { 9333 dump_printf_loc (MSG_NOTE, vect_location, 9334 "==> examining pattern statement: "); 9335 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); 9336 } 9337 9338 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node, 9339 node_instance)) 9340 return false; 9341 } 9342 9343 if (is_pattern_stmt_p (stmt_info) 9344 && node == NULL 9345 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) 9346 { 9347 gimple_stmt_iterator si; 9348 9349 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si)) 9350 { 9351 gimple *pattern_def_stmt = gsi_stmt (si); 9352 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) 9353 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))) 9354 { 9355 /* Analyze def stmt of STMT if it's a pattern stmt. */ 9356 if (dump_enabled_p ()) 9357 { 9358 dump_printf_loc (MSG_NOTE, vect_location, 9359 "==> examining pattern def statement: "); 9360 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0); 9361 } 9362 9363 if (!vect_analyze_stmt (pattern_def_stmt, 9364 need_to_vectorize, node, node_instance)) 9365 return false; 9366 } 9367 } 9368 } 9369 9370 switch (STMT_VINFO_DEF_TYPE (stmt_info)) 9371 { 9372 case vect_internal_def: 9373 break; 9374 9375 case vect_reduction_def: 9376 case vect_nested_cycle: 9377 gcc_assert (!bb_vinfo 9378 && (relevance == vect_used_in_outer 9379 || relevance == vect_used_in_outer_by_reduction 9380 || relevance == vect_used_by_reduction 9381 || relevance == vect_unused_in_scope 9382 || relevance == vect_used_only_live)); 9383 break; 9384 9385 case vect_induction_def: 9386 gcc_assert (!bb_vinfo); 9387 break; 9388 9389 case vect_constant_def: 9390 case vect_external_def: 9391 case vect_unknown_def_type: 9392 default: 9393 gcc_unreachable (); 9394 } 9395 9396 if (STMT_VINFO_RELEVANT_P (stmt_info)) 9397 { 9398 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))); 9399 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) 9400 || (is_gimple_call (stmt) 9401 && gimple_call_lhs (stmt) == NULL_TREE)); 9402 *need_to_vectorize = true; 9403 } 9404 9405 if (PURE_SLP_STMT (stmt_info) && !node) 9406 { 9407 dump_printf_loc (MSG_NOTE, vect_location, 9408 "handled only by SLP analysis\n"); 9409 return true; 9410 } 9411 9412 ok = true; 9413 if (!bb_vinfo 9414 && (STMT_VINFO_RELEVANT_P (stmt_info) 9415 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) 9416 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node) 9417 || vectorizable_conversion (stmt, NULL, NULL, node) 9418 || vectorizable_shift (stmt, NULL, NULL, node) 9419 || vectorizable_operation (stmt, NULL, NULL, node) 9420 || vectorizable_assignment (stmt, NULL, NULL, node) 9421 || vectorizable_load (stmt, NULL, NULL, node, NULL) 9422 || vectorizable_call (stmt, NULL, NULL, node) 9423 || vectorizable_store (stmt, NULL, NULL, node) 9424 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance) 9425 || vectorizable_induction (stmt, NULL, NULL, node) 9426 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node) 9427 || vectorizable_comparison (stmt, NULL, NULL, NULL, node)); 9428 else 9429 { 9430 if (bb_vinfo) 9431 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node) 9432 || vectorizable_conversion (stmt, NULL, NULL, node) 9433 || vectorizable_shift (stmt, NULL, NULL, node) 9434 || vectorizable_operation (stmt, NULL, NULL, node) 9435 || vectorizable_assignment (stmt, NULL, NULL, node) 9436 || vectorizable_load (stmt, NULL, NULL, node, NULL) 9437 || vectorizable_call (stmt, NULL, NULL, node) 9438 || vectorizable_store (stmt, NULL, NULL, node) 9439 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node) 9440 || vectorizable_comparison (stmt, NULL, NULL, NULL, node)); 9441 } 9442 9443 if (!ok) 9444 { 9445 if (dump_enabled_p ()) 9446 { 9447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 9448 "not vectorized: relevant stmt not "); 9449 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: "); 9450 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); 9451 } 9452 9453 return false; 9454 } 9455 9456 if (bb_vinfo) 9457 return true; 9458 9459 /* Stmts that are (also) "live" (i.e. - that are used out of the loop) 9460 need extra handling, except for vectorizable reductions. */ 9461 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 9462 && !can_vectorize_live_stmts (stmt, NULL, node, NULL)) 9463 { 9464 if (dump_enabled_p ()) 9465 { 9466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 9467 "not vectorized: live stmt not supported: "); 9468 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); 9469 } 9470 9471 return false; 9472 } 9473 9474 return true; 9475 } 9476 9477 9478 /* Function vect_transform_stmt. 9479 9480 Create a vectorized stmt to replace STMT, and insert it at BSI. */ 9481 9482 bool 9483 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi, 9484 bool *grouped_store, slp_tree slp_node, 9485 slp_instance slp_node_instance) 9486 { 9487 bool is_store = false; 9488 gimple *vec_stmt = NULL; 9489 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 9490 bool done; 9491 9492 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info)); 9493 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); 9494 9495 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info) 9496 && nested_in_vect_loop_p 9497 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)), 9498 stmt)); 9499 9500 switch (STMT_VINFO_TYPE (stmt_info)) 9501 { 9502 case type_demotion_vec_info_type: 9503 case type_promotion_vec_info_type: 9504 case type_conversion_vec_info_type: 9505 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node); 9506 gcc_assert (done); 9507 break; 9508 9509 case induc_vec_info_type: 9510 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node); 9511 gcc_assert (done); 9512 break; 9513 9514 case shift_vec_info_type: 9515 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node); 9516 gcc_assert (done); 9517 break; 9518 9519 case op_vec_info_type: 9520 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node); 9521 gcc_assert (done); 9522 break; 9523 9524 case assignment_vec_info_type: 9525 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node); 9526 gcc_assert (done); 9527 break; 9528 9529 case load_vec_info_type: 9530 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node, 9531 slp_node_instance); 9532 gcc_assert (done); 9533 break; 9534 9535 case store_vec_info_type: 9536 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node); 9537 gcc_assert (done); 9538 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node) 9539 { 9540 /* In case of interleaving, the whole chain is vectorized when the 9541 last store in the chain is reached. Store stmts before the last 9542 one are skipped, and there vec_stmt_info shouldn't be freed 9543 meanwhile. */ 9544 *grouped_store = true; 9545 stmt_vec_info group_info 9546 = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)); 9547 if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info)) 9548 is_store = true; 9549 } 9550 else 9551 is_store = true; 9552 break; 9553 9554 case condition_vec_info_type: 9555 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); 9556 gcc_assert (done); 9557 break; 9558 9559 case comparison_vec_info_type: 9560 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node); 9561 gcc_assert (done); 9562 break; 9563 9564 case call_vec_info_type: 9565 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); 9566 stmt = gsi_stmt (*gsi); 9567 break; 9568 9569 case call_simd_clone_vec_info_type: 9570 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node); 9571 stmt = gsi_stmt (*gsi); 9572 break; 9573 9574 case reduc_vec_info_type: 9575 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node, 9576 slp_node_instance); 9577 gcc_assert (done); 9578 break; 9579 9580 default: 9581 if (!STMT_VINFO_LIVE_P (stmt_info)) 9582 { 9583 if (dump_enabled_p ()) 9584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 9585 "stmt not supported.\n"); 9586 gcc_unreachable (); 9587 } 9588 } 9589 9590 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT. 9591 This would break hybrid SLP vectorization. */ 9592 if (slp_node) 9593 gcc_assert (!vec_stmt 9594 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt); 9595 9596 /* Handle inner-loop stmts whose DEF is used in the loop-nest that 9597 is being vectorized, but outside the immediately enclosing loop. */ 9598 if (vec_stmt 9599 && nested_p 9600 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type 9601 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer 9602 || STMT_VINFO_RELEVANT (stmt_info) == 9603 vect_used_in_outer_by_reduction)) 9604 { 9605 struct loop *innerloop = LOOP_VINFO_LOOP ( 9606 STMT_VINFO_LOOP_VINFO (stmt_info))->inner; 9607 imm_use_iterator imm_iter; 9608 use_operand_p use_p; 9609 tree scalar_dest; 9610 gimple *exit_phi; 9611 9612 if (dump_enabled_p ()) 9613 dump_printf_loc (MSG_NOTE, vect_location, 9614 "Record the vdef for outer-loop vectorization.\n"); 9615 9616 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there 9617 (to be used when vectorizing outer-loop stmts that use the DEF of 9618 STMT). */ 9619 if (gimple_code (stmt) == GIMPLE_PHI) 9620 scalar_dest = PHI_RESULT (stmt); 9621 else 9622 scalar_dest = gimple_get_lhs (stmt); 9623 9624 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 9625 { 9626 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p)))) 9627 { 9628 exit_phi = USE_STMT (use_p); 9629 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt; 9630 } 9631 } 9632 } 9633 9634 /* Handle stmts whose DEF is used outside the loop-nest that is 9635 being vectorized. */ 9636 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type) 9637 { 9638 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt); 9639 gcc_assert (done); 9640 } 9641 9642 if (vec_stmt) 9643 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 9644 9645 return is_store; 9646 } 9647 9648 9649 /* Remove a group of stores (for SLP or interleaving), free their 9650 stmt_vec_info. */ 9651 9652 void 9653 vect_remove_stores (gimple *first_stmt) 9654 { 9655 gimple *next = first_stmt; 9656 gimple *tmp; 9657 gimple_stmt_iterator next_si; 9658 9659 while (next) 9660 { 9661 stmt_vec_info stmt_info = vinfo_for_stmt (next); 9662 9663 tmp = GROUP_NEXT_ELEMENT (stmt_info); 9664 if (is_pattern_stmt_p (stmt_info)) 9665 next = STMT_VINFO_RELATED_STMT (stmt_info); 9666 /* Free the attached stmt_vec_info and remove the stmt. */ 9667 next_si = gsi_for_stmt (next); 9668 unlink_stmt_vdef (next); 9669 gsi_remove (&next_si, true); 9670 release_defs (next); 9671 free_stmt_vec_info (next); 9672 next = tmp; 9673 } 9674 } 9675 9676 9677 /* Function new_stmt_vec_info. 9678 9679 Create and initialize a new stmt_vec_info struct for STMT. */ 9680 9681 stmt_vec_info 9682 new_stmt_vec_info (gimple *stmt, vec_info *vinfo) 9683 { 9684 stmt_vec_info res; 9685 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); 9686 9687 STMT_VINFO_TYPE (res) = undef_vec_info_type; 9688 STMT_VINFO_STMT (res) = stmt; 9689 res->vinfo = vinfo; 9690 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; 9691 STMT_VINFO_LIVE_P (res) = false; 9692 STMT_VINFO_VECTYPE (res) = NULL; 9693 STMT_VINFO_VEC_STMT (res) = NULL; 9694 STMT_VINFO_VECTORIZABLE (res) = true; 9695 STMT_VINFO_IN_PATTERN_P (res) = false; 9696 STMT_VINFO_RELATED_STMT (res) = NULL; 9697 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL; 9698 STMT_VINFO_DATA_REF (res) = NULL; 9699 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION; 9700 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK; 9701 9702 if (gimple_code (stmt) == GIMPLE_PHI 9703 && is_loop_header_bb_p (gimple_bb (stmt))) 9704 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; 9705 else 9706 STMT_VINFO_DEF_TYPE (res) = vect_internal_def; 9707 9708 STMT_VINFO_SAME_ALIGN_REFS (res).create (0); 9709 STMT_SLP_TYPE (res) = loop_vect; 9710 STMT_VINFO_NUM_SLP_USES (res) = 0; 9711 9712 GROUP_FIRST_ELEMENT (res) = NULL; 9713 GROUP_NEXT_ELEMENT (res) = NULL; 9714 GROUP_SIZE (res) = 0; 9715 GROUP_STORE_COUNT (res) = 0; 9716 GROUP_GAP (res) = 0; 9717 GROUP_SAME_DR_STMT (res) = NULL; 9718 9719 return res; 9720 } 9721 9722 9723 /* Create a hash table for stmt_vec_info. */ 9724 9725 void 9726 init_stmt_vec_info_vec (void) 9727 { 9728 gcc_assert (!stmt_vec_info_vec.exists ()); 9729 stmt_vec_info_vec.create (50); 9730 } 9731 9732 9733 /* Free hash table for stmt_vec_info. */ 9734 9735 void 9736 free_stmt_vec_info_vec (void) 9737 { 9738 unsigned int i; 9739 stmt_vec_info info; 9740 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info) 9741 if (info != NULL) 9742 free_stmt_vec_info (STMT_VINFO_STMT (info)); 9743 gcc_assert (stmt_vec_info_vec.exists ()); 9744 stmt_vec_info_vec.release (); 9745 } 9746 9747 9748 /* Free stmt vectorization related info. */ 9749 9750 void 9751 free_stmt_vec_info (gimple *stmt) 9752 { 9753 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 9754 9755 if (!stmt_info) 9756 return; 9757 9758 /* Check if this statement has a related "pattern stmt" 9759 (introduced by the vectorizer during the pattern recognition 9760 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info 9761 too. */ 9762 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 9763 { 9764 stmt_vec_info patt_info 9765 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 9766 if (patt_info) 9767 { 9768 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info); 9769 gimple *patt_stmt = STMT_VINFO_STMT (patt_info); 9770 gimple_set_bb (patt_stmt, NULL); 9771 tree lhs = gimple_get_lhs (patt_stmt); 9772 if (lhs && TREE_CODE (lhs) == SSA_NAME) 9773 release_ssa_name (lhs); 9774 if (seq) 9775 { 9776 gimple_stmt_iterator si; 9777 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si)) 9778 { 9779 gimple *seq_stmt = gsi_stmt (si); 9780 gimple_set_bb (seq_stmt, NULL); 9781 lhs = gimple_get_lhs (seq_stmt); 9782 if (lhs && TREE_CODE (lhs) == SSA_NAME) 9783 release_ssa_name (lhs); 9784 free_stmt_vec_info (seq_stmt); 9785 } 9786 } 9787 free_stmt_vec_info (patt_stmt); 9788 } 9789 } 9790 9791 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release (); 9792 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release (); 9793 set_vinfo_for_stmt (stmt, NULL); 9794 free (stmt_info); 9795 } 9796 9797 9798 /* Function get_vectype_for_scalar_type_and_size. 9799 9800 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported 9801 by the target. */ 9802 9803 tree 9804 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size) 9805 { 9806 tree orig_scalar_type = scalar_type; 9807 scalar_mode inner_mode; 9808 machine_mode simd_mode; 9809 poly_uint64 nunits; 9810 tree vectype; 9811 9812 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode) 9813 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)) 9814 return NULL_TREE; 9815 9816 unsigned int nbytes = GET_MODE_SIZE (inner_mode); 9817 9818 /* For vector types of elements whose mode precision doesn't 9819 match their types precision we use a element type of mode 9820 precision. The vectorization routines will have to make sure 9821 they support the proper result truncation/extension. 9822 We also make sure to build vector types with INTEGER_TYPE 9823 component type only. */ 9824 if (INTEGRAL_TYPE_P (scalar_type) 9825 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type) 9826 || TREE_CODE (scalar_type) != INTEGER_TYPE)) 9827 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode), 9828 TYPE_UNSIGNED (scalar_type)); 9829 9830 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components. 9831 When the component mode passes the above test simply use a type 9832 corresponding to that mode. The theory is that any use that 9833 would cause problems with this will disable vectorization anyway. */ 9834 else if (!SCALAR_FLOAT_TYPE_P (scalar_type) 9835 && !INTEGRAL_TYPE_P (scalar_type)) 9836 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); 9837 9838 /* We can't build a vector type of elements with alignment bigger than 9839 their size. */ 9840 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) 9841 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 9842 TYPE_UNSIGNED (scalar_type)); 9843 9844 /* If we felt back to using the mode fail if there was 9845 no scalar type for it. */ 9846 if (scalar_type == NULL_TREE) 9847 return NULL_TREE; 9848 9849 /* If no size was supplied use the mode the target prefers. Otherwise 9850 lookup a vector mode of the specified size. */ 9851 if (known_eq (size, 0U)) 9852 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode); 9853 else if (!multiple_p (size, nbytes, &nunits) 9854 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode)) 9855 return NULL_TREE; 9856 /* NOTE: nunits == 1 is allowed to support single element vector types. */ 9857 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)) 9858 return NULL_TREE; 9859 9860 vectype = build_vector_type (scalar_type, nunits); 9861 9862 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 9863 && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) 9864 return NULL_TREE; 9865 9866 /* Re-attach the address-space qualifier if we canonicalized the scalar 9867 type. */ 9868 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype)) 9869 return build_qualified_type 9870 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type))); 9871 9872 return vectype; 9873 } 9874 9875 poly_uint64 current_vector_size; 9876 9877 /* Function get_vectype_for_scalar_type. 9878 9879 Returns the vector type corresponding to SCALAR_TYPE as supported 9880 by the target. */ 9881 9882 tree 9883 get_vectype_for_scalar_type (tree scalar_type) 9884 { 9885 tree vectype; 9886 vectype = get_vectype_for_scalar_type_and_size (scalar_type, 9887 current_vector_size); 9888 if (vectype 9889 && known_eq (current_vector_size, 0U)) 9890 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype)); 9891 return vectype; 9892 } 9893 9894 /* Function get_mask_type_for_scalar_type. 9895 9896 Returns the mask type corresponding to a result of comparison 9897 of vectors of specified SCALAR_TYPE as supported by target. */ 9898 9899 tree 9900 get_mask_type_for_scalar_type (tree scalar_type) 9901 { 9902 tree vectype = get_vectype_for_scalar_type (scalar_type); 9903 9904 if (!vectype) 9905 return NULL; 9906 9907 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype), 9908 current_vector_size); 9909 } 9910 9911 /* Function get_same_sized_vectype 9912 9913 Returns a vector type corresponding to SCALAR_TYPE of size 9914 VECTOR_TYPE if supported by the target. */ 9915 9916 tree 9917 get_same_sized_vectype (tree scalar_type, tree vector_type) 9918 { 9919 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)) 9920 return build_same_sized_truth_vector_type (vector_type); 9921 9922 return get_vectype_for_scalar_type_and_size 9923 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type))); 9924 } 9925 9926 /* Function vect_is_simple_use. 9927 9928 Input: 9929 VINFO - the vect info of the loop or basic block that is being vectorized. 9930 OPERAND - operand in the loop or bb. 9931 Output: 9932 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME. 9933 DT - the type of definition 9934 9935 Returns whether a stmt with OPERAND can be vectorized. 9936 For loops, supportable operands are constants, loop invariants, and operands 9937 that are defined by the current iteration of the loop. Unsupportable 9938 operands are those that are defined by a previous iteration of the loop (as 9939 is the case in reduction/induction computations). 9940 For basic blocks, supportable operands are constants and bb invariants. 9941 For now, operands defined outside the basic block are not supported. */ 9942 9943 bool 9944 vect_is_simple_use (tree operand, vec_info *vinfo, 9945 gimple **def_stmt, enum vect_def_type *dt) 9946 { 9947 *def_stmt = NULL; 9948 *dt = vect_unknown_def_type; 9949 9950 if (dump_enabled_p ()) 9951 { 9952 dump_printf_loc (MSG_NOTE, vect_location, 9953 "vect_is_simple_use: operand "); 9954 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand); 9955 dump_printf (MSG_NOTE, "\n"); 9956 } 9957 9958 if (CONSTANT_CLASS_P (operand)) 9959 { 9960 *dt = vect_constant_def; 9961 return true; 9962 } 9963 9964 if (is_gimple_min_invariant (operand)) 9965 { 9966 *dt = vect_external_def; 9967 return true; 9968 } 9969 9970 if (TREE_CODE (operand) != SSA_NAME) 9971 { 9972 if (dump_enabled_p ()) 9973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 9974 "not ssa-name.\n"); 9975 return false; 9976 } 9977 9978 if (SSA_NAME_IS_DEFAULT_DEF (operand)) 9979 { 9980 *dt = vect_external_def; 9981 return true; 9982 } 9983 9984 *def_stmt = SSA_NAME_DEF_STMT (operand); 9985 if (dump_enabled_p ()) 9986 { 9987 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: "); 9988 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0); 9989 } 9990 9991 if (! vect_stmt_in_region_p (vinfo, *def_stmt)) 9992 *dt = vect_external_def; 9993 else 9994 { 9995 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt); 9996 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); 9997 } 9998 9999 if (dump_enabled_p ()) 10000 { 10001 dump_printf_loc (MSG_NOTE, vect_location, "type of def: "); 10002 switch (*dt) 10003 { 10004 case vect_uninitialized_def: 10005 dump_printf (MSG_NOTE, "uninitialized\n"); 10006 break; 10007 case vect_constant_def: 10008 dump_printf (MSG_NOTE, "constant\n"); 10009 break; 10010 case vect_external_def: 10011 dump_printf (MSG_NOTE, "external\n"); 10012 break; 10013 case vect_internal_def: 10014 dump_printf (MSG_NOTE, "internal\n"); 10015 break; 10016 case vect_induction_def: 10017 dump_printf (MSG_NOTE, "induction\n"); 10018 break; 10019 case vect_reduction_def: 10020 dump_printf (MSG_NOTE, "reduction\n"); 10021 break; 10022 case vect_double_reduction_def: 10023 dump_printf (MSG_NOTE, "double reduction\n"); 10024 break; 10025 case vect_nested_cycle: 10026 dump_printf (MSG_NOTE, "nested cycle\n"); 10027 break; 10028 case vect_unknown_def_type: 10029 dump_printf (MSG_NOTE, "unknown\n"); 10030 break; 10031 } 10032 } 10033 10034 if (*dt == vect_unknown_def_type) 10035 { 10036 if (dump_enabled_p ()) 10037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10038 "Unsupported pattern.\n"); 10039 return false; 10040 } 10041 10042 switch (gimple_code (*def_stmt)) 10043 { 10044 case GIMPLE_PHI: 10045 case GIMPLE_ASSIGN: 10046 case GIMPLE_CALL: 10047 break; 10048 default: 10049 if (dump_enabled_p ()) 10050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 10051 "unsupported defining stmt:\n"); 10052 return false; 10053 } 10054 10055 return true; 10056 } 10057 10058 /* Function vect_is_simple_use. 10059 10060 Same as vect_is_simple_use but also determines the vector operand 10061 type of OPERAND and stores it to *VECTYPE. If the definition of 10062 OPERAND is vect_uninitialized_def, vect_constant_def or 10063 vect_external_def *VECTYPE will be set to NULL_TREE and the caller 10064 is responsible to compute the best suited vector type for the 10065 scalar operand. */ 10066 10067 bool 10068 vect_is_simple_use (tree operand, vec_info *vinfo, 10069 gimple **def_stmt, enum vect_def_type *dt, tree *vectype) 10070 { 10071 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt)) 10072 return false; 10073 10074 /* Now get a vector type if the def is internal, otherwise supply 10075 NULL_TREE and leave it up to the caller to figure out a proper 10076 type for the use stmt. */ 10077 if (*dt == vect_internal_def 10078 || *dt == vect_induction_def 10079 || *dt == vect_reduction_def 10080 || *dt == vect_double_reduction_def 10081 || *dt == vect_nested_cycle) 10082 { 10083 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); 10084 10085 if (STMT_VINFO_IN_PATTERN_P (stmt_info) 10086 && !STMT_VINFO_RELEVANT (stmt_info) 10087 && !STMT_VINFO_LIVE_P (stmt_info)) 10088 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 10089 10090 *vectype = STMT_VINFO_VECTYPE (stmt_info); 10091 gcc_assert (*vectype != NULL_TREE); 10092 } 10093 else if (*dt == vect_uninitialized_def 10094 || *dt == vect_constant_def 10095 || *dt == vect_external_def) 10096 *vectype = NULL_TREE; 10097 else 10098 gcc_unreachable (); 10099 10100 return true; 10101 } 10102 10103 10104 /* Function supportable_widening_operation 10105 10106 Check whether an operation represented by the code CODE is a 10107 widening operation that is supported by the target platform in 10108 vector form (i.e., when operating on arguments of type VECTYPE_IN 10109 producing a result of type VECTYPE_OUT). 10110 10111 Widening operations we currently support are NOP (CONVERT), FLOAT 10112 and WIDEN_MULT. This function checks if these operations are supported 10113 by the target platform either directly (via vector tree-codes), or via 10114 target builtins. 10115 10116 Output: 10117 - CODE1 and CODE2 are codes of vector operations to be used when 10118 vectorizing the operation, if available. 10119 - MULTI_STEP_CVT determines the number of required intermediate steps in 10120 case of multi-step conversion (like char->short->int - in that case 10121 MULTI_STEP_CVT will be 1). 10122 - INTERM_TYPES contains the intermediate type required to perform the 10123 widening operation (short in the above example). */ 10124 10125 bool 10126 supportable_widening_operation (enum tree_code code, gimple *stmt, 10127 tree vectype_out, tree vectype_in, 10128 enum tree_code *code1, enum tree_code *code2, 10129 int *multi_step_cvt, 10130 vec<tree> *interm_types) 10131 { 10132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 10133 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); 10134 struct loop *vect_loop = NULL; 10135 machine_mode vec_mode; 10136 enum insn_code icode1, icode2; 10137 optab optab1, optab2; 10138 tree vectype = vectype_in; 10139 tree wide_vectype = vectype_out; 10140 enum tree_code c1, c2; 10141 int i; 10142 tree prev_type, intermediate_type; 10143 machine_mode intermediate_mode, prev_mode; 10144 optab optab3, optab4; 10145 10146 *multi_step_cvt = 0; 10147 if (loop_info) 10148 vect_loop = LOOP_VINFO_LOOP (loop_info); 10149 10150 switch (code) 10151 { 10152 case WIDEN_MULT_EXPR: 10153 /* The result of a vectorized widening operation usually requires 10154 two vectors (because the widened results do not fit into one vector). 10155 The generated vector results would normally be expected to be 10156 generated in the same order as in the original scalar computation, 10157 i.e. if 8 results are generated in each vector iteration, they are 10158 to be organized as follows: 10159 vect1: [res1,res2,res3,res4], 10160 vect2: [res5,res6,res7,res8]. 10161 10162 However, in the special case that the result of the widening 10163 operation is used in a reduction computation only, the order doesn't 10164 matter (because when vectorizing a reduction we change the order of 10165 the computation). Some targets can take advantage of this and 10166 generate more efficient code. For example, targets like Altivec, 10167 that support widen_mult using a sequence of {mult_even,mult_odd} 10168 generate the following vectors: 10169 vect1: [res1,res3,res5,res7], 10170 vect2: [res2,res4,res6,res8]. 10171 10172 When vectorizing outer-loops, we execute the inner-loop sequentially 10173 (each vectorized inner-loop iteration contributes to VF outer-loop 10174 iterations in parallel). We therefore don't allow to change the 10175 order of the computation in the inner-loop during outer-loop 10176 vectorization. */ 10177 /* TODO: Another case in which order doesn't *really* matter is when we 10178 widen and then contract again, e.g. (short)((int)x * y >> 8). 10179 Normally, pack_trunc performs an even/odd permute, whereas the 10180 repack from an even/odd expansion would be an interleave, which 10181 would be significantly simpler for e.g. AVX2. */ 10182 /* In any case, in order to avoid duplicating the code below, recurse 10183 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values 10184 are properly set up for the caller. If we fail, we'll continue with 10185 a VEC_WIDEN_MULT_LO/HI_EXPR check. */ 10186 if (vect_loop 10187 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction 10188 && !nested_in_vect_loop_p (vect_loop, stmt) 10189 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR, 10190 stmt, vectype_out, vectype_in, 10191 code1, code2, multi_step_cvt, 10192 interm_types)) 10193 { 10194 /* Elements in a vector with vect_used_by_reduction property cannot 10195 be reordered if the use chain with this property does not have the 10196 same operation. One such an example is s += a * b, where elements 10197 in a and b cannot be reordered. Here we check if the vector defined 10198 by STMT is only directly used in the reduction statement. */ 10199 tree lhs = gimple_assign_lhs (stmt); 10200 use_operand_p dummy; 10201 gimple *use_stmt; 10202 stmt_vec_info use_stmt_info = NULL; 10203 if (single_imm_use (lhs, &dummy, &use_stmt) 10204 && (use_stmt_info = vinfo_for_stmt (use_stmt)) 10205 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def) 10206 return true; 10207 } 10208 c1 = VEC_WIDEN_MULT_LO_EXPR; 10209 c2 = VEC_WIDEN_MULT_HI_EXPR; 10210 break; 10211 10212 case DOT_PROD_EXPR: 10213 c1 = DOT_PROD_EXPR; 10214 c2 = DOT_PROD_EXPR; 10215 break; 10216 10217 case SAD_EXPR: 10218 c1 = SAD_EXPR; 10219 c2 = SAD_EXPR; 10220 break; 10221 10222 case VEC_WIDEN_MULT_EVEN_EXPR: 10223 /* Support the recursion induced just above. */ 10224 c1 = VEC_WIDEN_MULT_EVEN_EXPR; 10225 c2 = VEC_WIDEN_MULT_ODD_EXPR; 10226 break; 10227 10228 case WIDEN_LSHIFT_EXPR: 10229 c1 = VEC_WIDEN_LSHIFT_LO_EXPR; 10230 c2 = VEC_WIDEN_LSHIFT_HI_EXPR; 10231 break; 10232 10233 CASE_CONVERT: 10234 c1 = VEC_UNPACK_LO_EXPR; 10235 c2 = VEC_UNPACK_HI_EXPR; 10236 break; 10237 10238 case FLOAT_EXPR: 10239 c1 = VEC_UNPACK_FLOAT_LO_EXPR; 10240 c2 = VEC_UNPACK_FLOAT_HI_EXPR; 10241 break; 10242 10243 case FIX_TRUNC_EXPR: 10244 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/ 10245 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for 10246 computing the operation. */ 10247 return false; 10248 10249 default: 10250 gcc_unreachable (); 10251 } 10252 10253 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR) 10254 std::swap (c1, c2); 10255 10256 if (code == FIX_TRUNC_EXPR) 10257 { 10258 /* The signedness is determined from output operand. */ 10259 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 10260 optab2 = optab_for_tree_code (c2, vectype_out, optab_default); 10261 } 10262 else 10263 { 10264 optab1 = optab_for_tree_code (c1, vectype, optab_default); 10265 optab2 = optab_for_tree_code (c2, vectype, optab_default); 10266 } 10267 10268 if (!optab1 || !optab2) 10269 return false; 10270 10271 vec_mode = TYPE_MODE (vectype); 10272 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing 10273 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing) 10274 return false; 10275 10276 *code1 = c1; 10277 *code2 = c2; 10278 10279 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 10280 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 10281 /* For scalar masks we may have different boolean 10282 vector types having the same QImode. Thus we 10283 add additional check for elements number. */ 10284 return (!VECTOR_BOOLEAN_TYPE_P (vectype) 10285 || known_eq (TYPE_VECTOR_SUBPARTS (vectype), 10286 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)); 10287 10288 /* Check if it's a multi-step conversion that can be done using intermediate 10289 types. */ 10290 10291 prev_type = vectype; 10292 prev_mode = vec_mode; 10293 10294 if (!CONVERT_EXPR_CODE_P (code)) 10295 return false; 10296 10297 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 10298 intermediate steps in promotion sequence. We try 10299 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do 10300 not. */ 10301 interm_types->create (MAX_INTERM_CVT_STEPS); 10302 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 10303 { 10304 intermediate_mode = insn_data[icode1].operand[0].mode; 10305 if (VECTOR_BOOLEAN_TYPE_P (prev_type)) 10306 { 10307 intermediate_type = vect_halve_mask_nunits (prev_type); 10308 if (intermediate_mode != TYPE_MODE (intermediate_type)) 10309 return false; 10310 } 10311 else 10312 intermediate_type 10313 = lang_hooks.types.type_for_mode (intermediate_mode, 10314 TYPE_UNSIGNED (prev_type)); 10315 10316 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); 10317 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); 10318 10319 if (!optab3 || !optab4 10320 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing 10321 || insn_data[icode1].operand[0].mode != intermediate_mode 10322 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing 10323 || insn_data[icode2].operand[0].mode != intermediate_mode 10324 || ((icode1 = optab_handler (optab3, intermediate_mode)) 10325 == CODE_FOR_nothing) 10326 || ((icode2 = optab_handler (optab4, intermediate_mode)) 10327 == CODE_FOR_nothing)) 10328 break; 10329 10330 interm_types->quick_push (intermediate_type); 10331 (*multi_step_cvt)++; 10332 10333 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) 10334 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) 10335 return (!VECTOR_BOOLEAN_TYPE_P (vectype) 10336 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type), 10337 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)); 10338 10339 prev_type = intermediate_type; 10340 prev_mode = intermediate_mode; 10341 } 10342 10343 interm_types->release (); 10344 return false; 10345 } 10346 10347 10348 /* Function supportable_narrowing_operation 10349 10350 Check whether an operation represented by the code CODE is a 10351 narrowing operation that is supported by the target platform in 10352 vector form (i.e., when operating on arguments of type VECTYPE_IN 10353 and producing a result of type VECTYPE_OUT). 10354 10355 Narrowing operations we currently support are NOP (CONVERT) and 10356 FIX_TRUNC. This function checks if these operations are supported by 10357 the target platform directly via vector tree-codes. 10358 10359 Output: 10360 - CODE1 is the code of a vector operation to be used when 10361 vectorizing the operation, if available. 10362 - MULTI_STEP_CVT determines the number of required intermediate steps in 10363 case of multi-step conversion (like int->short->char - in that case 10364 MULTI_STEP_CVT will be 1). 10365 - INTERM_TYPES contains the intermediate type required to perform the 10366 narrowing operation (short in the above example). */ 10367 10368 bool 10369 supportable_narrowing_operation (enum tree_code code, 10370 tree vectype_out, tree vectype_in, 10371 enum tree_code *code1, int *multi_step_cvt, 10372 vec<tree> *interm_types) 10373 { 10374 machine_mode vec_mode; 10375 enum insn_code icode1; 10376 optab optab1, interm_optab; 10377 tree vectype = vectype_in; 10378 tree narrow_vectype = vectype_out; 10379 enum tree_code c1; 10380 tree intermediate_type, prev_type; 10381 machine_mode intermediate_mode, prev_mode; 10382 int i; 10383 bool uns; 10384 10385 *multi_step_cvt = 0; 10386 switch (code) 10387 { 10388 CASE_CONVERT: 10389 c1 = VEC_PACK_TRUNC_EXPR; 10390 break; 10391 10392 case FIX_TRUNC_EXPR: 10393 c1 = VEC_PACK_FIX_TRUNC_EXPR; 10394 break; 10395 10396 case FLOAT_EXPR: 10397 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR 10398 tree code and optabs used for computing the operation. */ 10399 return false; 10400 10401 default: 10402 gcc_unreachable (); 10403 } 10404 10405 if (code == FIX_TRUNC_EXPR) 10406 /* The signedness is determined from output operand. */ 10407 optab1 = optab_for_tree_code (c1, vectype_out, optab_default); 10408 else 10409 optab1 = optab_for_tree_code (c1, vectype, optab_default); 10410 10411 if (!optab1) 10412 return false; 10413 10414 vec_mode = TYPE_MODE (vectype); 10415 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing) 10416 return false; 10417 10418 *code1 = c1; 10419 10420 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 10421 /* For scalar masks we may have different boolean 10422 vector types having the same QImode. Thus we 10423 add additional check for elements number. */ 10424 return (!VECTOR_BOOLEAN_TYPE_P (vectype) 10425 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2, 10426 TYPE_VECTOR_SUBPARTS (narrow_vectype))); 10427 10428 /* Check if it's a multi-step conversion that can be done using intermediate 10429 types. */ 10430 prev_mode = vec_mode; 10431 prev_type = vectype; 10432 if (code == FIX_TRUNC_EXPR) 10433 uns = TYPE_UNSIGNED (vectype_out); 10434 else 10435 uns = TYPE_UNSIGNED (vectype); 10436 10437 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer 10438 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more 10439 costly than signed. */ 10440 if (code == FIX_TRUNC_EXPR && uns) 10441 { 10442 enum insn_code icode2; 10443 10444 intermediate_type 10445 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0); 10446 interm_optab 10447 = optab_for_tree_code (c1, intermediate_type, optab_default); 10448 if (interm_optab != unknown_optab 10449 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing 10450 && insn_data[icode1].operand[0].mode 10451 == insn_data[icode2].operand[0].mode) 10452 { 10453 uns = false; 10454 optab1 = interm_optab; 10455 icode1 = icode2; 10456 } 10457 } 10458 10459 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS 10460 intermediate steps in promotion sequence. We try 10461 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */ 10462 interm_types->create (MAX_INTERM_CVT_STEPS); 10463 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++) 10464 { 10465 intermediate_mode = insn_data[icode1].operand[0].mode; 10466 if (VECTOR_BOOLEAN_TYPE_P (prev_type)) 10467 { 10468 intermediate_type = vect_double_mask_nunits (prev_type); 10469 if (intermediate_mode != TYPE_MODE (intermediate_type)) 10470 return false; 10471 } 10472 else 10473 intermediate_type 10474 = lang_hooks.types.type_for_mode (intermediate_mode, uns); 10475 interm_optab 10476 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type, 10477 optab_default); 10478 if (!interm_optab 10479 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing) 10480 || insn_data[icode1].operand[0].mode != intermediate_mode 10481 || ((icode1 = optab_handler (interm_optab, intermediate_mode)) 10482 == CODE_FOR_nothing)) 10483 break; 10484 10485 interm_types->quick_push (intermediate_type); 10486 (*multi_step_cvt)++; 10487 10488 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) 10489 return (!VECTOR_BOOLEAN_TYPE_P (vectype) 10490 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2, 10491 TYPE_VECTOR_SUBPARTS (narrow_vectype))); 10492 10493 prev_mode = intermediate_mode; 10494 prev_type = intermediate_type; 10495 optab1 = interm_optab; 10496 } 10497 10498 interm_types->release (); 10499 return false; 10500 } 10501 10502 /* Generate and return a statement that sets vector mask MASK such that 10503 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */ 10504 10505 gcall * 10506 vect_gen_while (tree mask, tree start_index, tree end_index) 10507 { 10508 tree cmp_type = TREE_TYPE (start_index); 10509 tree mask_type = TREE_TYPE (mask); 10510 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT, 10511 cmp_type, mask_type, 10512 OPTIMIZE_FOR_SPEED)); 10513 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3, 10514 start_index, end_index, 10515 build_zero_cst (mask_type)); 10516 gimple_call_set_lhs (call, mask); 10517 return call; 10518 } 10519 10520 /* Generate a vector mask of type MASK_TYPE for which index I is false iff 10521 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */ 10522 10523 tree 10524 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index, 10525 tree end_index) 10526 { 10527 tree tmp = make_ssa_name (mask_type); 10528 gcall *call = vect_gen_while (tmp, start_index, end_index); 10529 gimple_seq_add_stmt (seq, call); 10530 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp); 10531 } 10532