1 /* Analysis Utilities for Loop Vectorization. 2 Copyright (C) 2006-2017 Free Software Foundation, Inc. 3 Contributed by Dorit Nuzman <dorit@il.ibm.com> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "rtl.h" 26 #include "tree.h" 27 #include "gimple.h" 28 #include "ssa.h" 29 #include "expmed.h" 30 #include "optabs-tree.h" 31 #include "insn-config.h" 32 #include "recog.h" /* FIXME: for insn_data */ 33 #include "fold-const.h" 34 #include "stor-layout.h" 35 #include "tree-eh.h" 36 #include "gimplify.h" 37 #include "gimple-iterator.h" 38 #include "cfgloop.h" 39 #include "tree-vectorizer.h" 40 #include "dumpfile.h" 41 #include "builtins.h" 42 #include "internal-fn.h" 43 #include "case-cfn-macros.h" 44 45 /* Pattern recognition functions */ 46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *, 47 tree *); 48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *, 49 tree *); 50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *, 51 tree *); 52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *, 53 tree *); 54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *); 55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *, 56 tree *); 57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *, 58 tree *, tree *); 59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *); 60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *, 61 tree *, tree *); 62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *, 63 tree *, tree *); 64 65 static gimple *vect_recog_mult_pattern (vec<gimple *> *, 66 tree *, tree *); 67 68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *, 69 tree *, tree *); 70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *); 71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *); 72 73 struct vect_recog_func 74 { 75 vect_recog_func_ptr fn; 76 const char *name; 77 }; 78 79 /* Note that ordering matters - the first pattern matching on a stmt 80 is taken which means usually the more complex one needs to preceed 81 the less comples onex (widen_sum only after dot_prod or sad for example). */ 82 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = { 83 { vect_recog_widen_mult_pattern, "widen_mult" }, 84 { vect_recog_dot_prod_pattern, "dot_prod" }, 85 { vect_recog_sad_pattern, "sad" }, 86 { vect_recog_widen_sum_pattern, "widen_sum" }, 87 { vect_recog_pow_pattern, "pow" }, 88 { vect_recog_widen_shift_pattern, "widen_shift" }, 89 { vect_recog_over_widening_pattern, "over_widening" }, 90 { vect_recog_rotate_pattern, "rotate" }, 91 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, 92 { vect_recog_divmod_pattern, "divmod" }, 93 { vect_recog_mult_pattern, "mult" }, 94 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" }, 95 { vect_recog_bool_pattern, "bool" }, 96 { vect_recog_mask_conversion_pattern, "mask_conversion" } 97 }; 98 99 static inline void 100 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt) 101 { 102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), 103 stmt); 104 } 105 106 static inline void 107 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt) 108 { 109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL; 110 append_pattern_def_seq (stmt_info, stmt); 111 } 112 113 /* Check whether STMT2 is in the same loop or basic block as STMT1. 114 Which of the two applies depends on whether we're currently doing 115 loop-based or basic-block-based vectorization, as determined by 116 the vinfo_for_stmt for STMT1 (which must be defined). 117 118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed 119 to be defined as well. */ 120 121 static bool 122 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2) 123 { 124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1); 125 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2); 126 } 127 128 /* If the LHS of DEF_STMT has a single use, and that statement is 129 in the same loop or basic block, return it. */ 130 131 static gimple * 132 vect_single_imm_use (gimple *def_stmt) 133 { 134 tree lhs = gimple_assign_lhs (def_stmt); 135 use_operand_p use_p; 136 gimple *use_stmt; 137 138 if (!single_imm_use (lhs, &use_p, &use_stmt)) 139 return NULL; 140 141 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt)) 142 return NULL; 143 144 return use_stmt; 145 } 146 147 /* Check whether NAME, an ssa-name used in USE_STMT, 148 is a result of a type promotion, such that: 149 DEF_STMT: NAME = NOP (name0) 150 If CHECK_SIGN is TRUE, check that either both types are signed or both are 151 unsigned. */ 152 153 static bool 154 type_conversion_p (tree name, gimple *use_stmt, bool check_sign, 155 tree *orig_type, gimple **def_stmt, bool *promotion) 156 { 157 gimple *dummy_gimple; 158 stmt_vec_info stmt_vinfo; 159 tree type = TREE_TYPE (name); 160 tree oprnd0; 161 enum vect_def_type dt; 162 163 stmt_vinfo = vinfo_for_stmt (use_stmt); 164 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt)) 165 return false; 166 167 if (dt != vect_internal_def 168 && dt != vect_external_def && dt != vect_constant_def) 169 return false; 170 171 if (!*def_stmt) 172 return false; 173 174 if (dt == vect_internal_def) 175 { 176 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt); 177 if (STMT_VINFO_IN_PATTERN_P (def_vinfo)) 178 return false; 179 } 180 181 if (!is_gimple_assign (*def_stmt)) 182 return false; 183 184 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt))) 185 return false; 186 187 oprnd0 = gimple_assign_rhs1 (*def_stmt); 188 189 *orig_type = TREE_TYPE (oprnd0); 190 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type) 191 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign)) 192 return false; 193 194 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2)) 195 *promotion = true; 196 else 197 *promotion = false; 198 199 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt)) 200 return false; 201 202 return true; 203 } 204 205 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT 206 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */ 207 208 static tree 209 vect_recog_temp_ssa_var (tree type, gimple *stmt) 210 { 211 return make_temp_ssa_name (type, stmt, "patt"); 212 } 213 214 /* Function vect_recog_dot_prod_pattern 215 216 Try to find the following pattern: 217 218 type x_t, y_t; 219 TYPE1 prod; 220 TYPE2 sum = init; 221 loop: 222 sum_0 = phi <init, sum_1> 223 S1 x_t = ... 224 S2 y_t = ... 225 S3 x_T = (TYPE1) x_t; 226 S4 y_T = (TYPE1) y_t; 227 S5 prod = x_T * y_T; 228 [S6 prod = (TYPE2) prod; #optional] 229 S7 sum_1 = prod + sum_0; 230 231 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the 232 same size of 'TYPE1' or bigger. This is a special case of a reduction 233 computation. 234 235 Input: 236 237 * STMTS: Contains a stmt from which the pattern search begins. In the 238 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} 239 will be detected. 240 241 Output: 242 243 * TYPE_IN: The type of the input arguments to the pattern. 244 245 * TYPE_OUT: The type of the output of this pattern. 246 247 * Return value: A new stmt that will be used to replace the sequence of 248 stmts that constitute the pattern. In this case it will be: 249 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> 250 251 Note: The dot-prod idiom is a widening reduction pattern that is 252 vectorized without preserving all the intermediate results. It 253 produces only N/2 (widened) results (by summing up pairs of 254 intermediate results) rather than all N results. Therefore, we 255 cannot allow this pattern when we want to get all the results and in 256 the correct order (as is the case when this computation is in an 257 inner-loop nested in an outer-loop that us being vectorized). */ 258 259 static gimple * 260 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in, 261 tree *type_out) 262 { 263 gimple *stmt, *last_stmt = (*stmts)[0]; 264 tree oprnd0, oprnd1; 265 tree oprnd00, oprnd01; 266 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 267 tree type, half_type; 268 gimple *pattern_stmt; 269 tree prod_type; 270 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 271 struct loop *loop; 272 tree var; 273 bool promotion; 274 275 if (!loop_info) 276 return NULL; 277 278 loop = LOOP_VINFO_LOOP (loop_info); 279 280 /* We don't allow changing the order of the computation in the inner-loop 281 when doing outer-loop vectorization. */ 282 if (loop && nested_in_vect_loop_p (loop, last_stmt)) 283 return NULL; 284 285 if (!is_gimple_assign (last_stmt)) 286 return NULL; 287 288 type = gimple_expr_type (last_stmt); 289 290 /* Look for the following pattern 291 DX = (TYPE1) X; 292 DY = (TYPE1) Y; 293 DPROD = DX * DY; 294 DDPROD = (TYPE2) DPROD; 295 sum_1 = DDPROD + sum_0; 296 In which 297 - DX is double the size of X 298 - DY is double the size of Y 299 - DX, DY, DPROD all have the same type 300 - sum is the same size of DPROD or bigger 301 - sum has been recognized as a reduction variable. 302 303 This is equivalent to: 304 DPROD = X w* Y; #widen mult 305 sum_1 = DPROD w+ sum_0; #widen summation 306 or 307 DPROD = X w* Y; #widen mult 308 sum_1 = DPROD + sum_0; #summation 309 */ 310 311 /* Starting from LAST_STMT, follow the defs of its uses in search 312 of the above pattern. */ 313 314 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 315 return NULL; 316 317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 318 { 319 /* Has been detected as widening-summation? */ 320 321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 322 type = gimple_expr_type (stmt); 323 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR) 324 return NULL; 325 oprnd0 = gimple_assign_rhs1 (stmt); 326 oprnd1 = gimple_assign_rhs2 (stmt); 327 half_type = TREE_TYPE (oprnd0); 328 } 329 else 330 { 331 gimple *def_stmt; 332 333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def 334 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) 335 return NULL; 336 oprnd0 = gimple_assign_rhs1 (last_stmt); 337 oprnd1 = gimple_assign_rhs2 (last_stmt); 338 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 339 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 340 return NULL; 341 stmt = last_stmt; 342 343 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt, 344 &promotion) 345 && promotion) 346 { 347 stmt = def_stmt; 348 oprnd0 = gimple_assign_rhs1 (stmt); 349 } 350 else 351 half_type = type; 352 } 353 354 /* So far so good. Since last_stmt was detected as a (summation) reduction, 355 we know that oprnd1 is the reduction variable (defined by a loop-header 356 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 357 Left to check that oprnd0 is defined by a (widen_)mult_expr */ 358 if (TREE_CODE (oprnd0) != SSA_NAME) 359 return NULL; 360 361 prod_type = half_type; 362 stmt = SSA_NAME_DEF_STMT (oprnd0); 363 364 /* It could not be the dot_prod pattern if the stmt is outside the loop. */ 365 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 366 return NULL; 367 368 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 369 inside the loop (in case we are analyzing an outer-loop). */ 370 if (!is_gimple_assign (stmt)) 371 return NULL; 372 stmt_vinfo = vinfo_for_stmt (stmt); 373 gcc_assert (stmt_vinfo); 374 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 375 return NULL; 376 if (gimple_assign_rhs_code (stmt) != MULT_EXPR) 377 return NULL; 378 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 379 { 380 /* Has been detected as a widening multiplication? */ 381 382 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 383 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR) 384 return NULL; 385 stmt_vinfo = vinfo_for_stmt (stmt); 386 gcc_assert (stmt_vinfo); 387 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def); 388 oprnd00 = gimple_assign_rhs1 (stmt); 389 oprnd01 = gimple_assign_rhs2 (stmt); 390 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt)) 391 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo); 392 } 393 else 394 { 395 tree half_type0, half_type1; 396 gimple *def_stmt; 397 tree oprnd0, oprnd1; 398 399 oprnd0 = gimple_assign_rhs1 (stmt); 400 oprnd1 = gimple_assign_rhs2 (stmt); 401 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) 402 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) 403 return NULL; 404 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt, 405 &promotion) 406 || !promotion) 407 return NULL; 408 oprnd00 = gimple_assign_rhs1 (def_stmt); 409 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt, 410 &promotion) 411 || !promotion) 412 return NULL; 413 oprnd01 = gimple_assign_rhs1 (def_stmt); 414 if (!types_compatible_p (half_type0, half_type1)) 415 return NULL; 416 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) 417 return NULL; 418 } 419 420 half_type = TREE_TYPE (oprnd00); 421 *type_in = half_type; 422 *type_out = type; 423 424 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 425 var = vect_recog_temp_ssa_var (type, NULL); 426 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR, 427 oprnd00, oprnd01, oprnd1); 428 429 if (dump_enabled_p ()) 430 { 431 dump_printf_loc (MSG_NOTE, vect_location, 432 "vect_recog_dot_prod_pattern: detected: "); 433 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 434 } 435 436 return pattern_stmt; 437 } 438 439 440 /* Function vect_recog_sad_pattern 441 442 Try to find the following Sum of Absolute Difference (SAD) pattern: 443 444 type x_t, y_t; 445 signed TYPE1 diff, abs_diff; 446 TYPE2 sum = init; 447 loop: 448 sum_0 = phi <init, sum_1> 449 S1 x_t = ... 450 S2 y_t = ... 451 S3 x_T = (TYPE1) x_t; 452 S4 y_T = (TYPE1) y_t; 453 S5 diff = x_T - y_T; 454 S6 abs_diff = ABS_EXPR <diff>; 455 [S7 abs_diff = (TYPE2) abs_diff; #optional] 456 S8 sum_1 = abs_diff + sum_0; 457 458 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the 459 same size of 'TYPE1' or bigger. This is a special case of a reduction 460 computation. 461 462 Input: 463 464 * STMTS: Contains a stmt from which the pattern search begins. In the 465 example, when this function is called with S8, the pattern 466 {S3,S4,S5,S6,S7,S8} will be detected. 467 468 Output: 469 470 * TYPE_IN: The type of the input arguments to the pattern. 471 472 * TYPE_OUT: The type of the output of this pattern. 473 474 * Return value: A new stmt that will be used to replace the sequence of 475 stmts that constitute the pattern. In this case it will be: 476 SAD_EXPR <x_t, y_t, sum_0> 477 */ 478 479 static gimple * 480 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in, 481 tree *type_out) 482 { 483 gimple *last_stmt = (*stmts)[0]; 484 tree sad_oprnd0, sad_oprnd1; 485 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 486 tree half_type; 487 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 488 struct loop *loop; 489 bool promotion; 490 491 if (!loop_info) 492 return NULL; 493 494 loop = LOOP_VINFO_LOOP (loop_info); 495 496 /* We don't allow changing the order of the computation in the inner-loop 497 when doing outer-loop vectorization. */ 498 if (loop && nested_in_vect_loop_p (loop, last_stmt)) 499 return NULL; 500 501 if (!is_gimple_assign (last_stmt)) 502 return NULL; 503 504 tree sum_type = gimple_expr_type (last_stmt); 505 506 /* Look for the following pattern 507 DX = (TYPE1) X; 508 DY = (TYPE1) Y; 509 DDIFF = DX - DY; 510 DAD = ABS_EXPR <DDIFF>; 511 DDPROD = (TYPE2) DPROD; 512 sum_1 = DAD + sum_0; 513 In which 514 - DX is at least double the size of X 515 - DY is at least double the size of Y 516 - DX, DY, DDIFF, DAD all have the same type 517 - sum is the same size of DAD or bigger 518 - sum has been recognized as a reduction variable. 519 520 This is equivalent to: 521 DDIFF = X w- Y; #widen sub 522 DAD = ABS_EXPR <DDIFF>; 523 sum_1 = DAD w+ sum_0; #widen summation 524 or 525 DDIFF = X w- Y; #widen sub 526 DAD = ABS_EXPR <DDIFF>; 527 sum_1 = DAD + sum_0; #summation 528 */ 529 530 /* Starting from LAST_STMT, follow the defs of its uses in search 531 of the above pattern. */ 532 533 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 534 return NULL; 535 536 tree plus_oprnd0, plus_oprnd1; 537 538 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 539 { 540 /* Has been detected as widening-summation? */ 541 542 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 543 sum_type = gimple_expr_type (stmt); 544 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR) 545 return NULL; 546 plus_oprnd0 = gimple_assign_rhs1 (stmt); 547 plus_oprnd1 = gimple_assign_rhs2 (stmt); 548 half_type = TREE_TYPE (plus_oprnd0); 549 } 550 else 551 { 552 gimple *def_stmt; 553 554 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def 555 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) 556 return NULL; 557 plus_oprnd0 = gimple_assign_rhs1 (last_stmt); 558 plus_oprnd1 = gimple_assign_rhs2 (last_stmt); 559 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type) 560 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type)) 561 return NULL; 562 563 /* The type conversion could be promotion, demotion, 564 or just signed -> unsigned. */ 565 if (type_conversion_p (plus_oprnd0, last_stmt, false, 566 &half_type, &def_stmt, &promotion)) 567 plus_oprnd0 = gimple_assign_rhs1 (def_stmt); 568 else 569 half_type = sum_type; 570 } 571 572 /* So far so good. Since last_stmt was detected as a (summation) reduction, 573 we know that plus_oprnd1 is the reduction variable (defined by a loop-header 574 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body. 575 Then check that plus_oprnd0 is defined by an abs_expr. */ 576 577 if (TREE_CODE (plus_oprnd0) != SSA_NAME) 578 return NULL; 579 580 tree abs_type = half_type; 581 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0); 582 583 /* It could not be the sad pattern if the abs_stmt is outside the loop. */ 584 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt))) 585 return NULL; 586 587 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 588 inside the loop (in case we are analyzing an outer-loop). */ 589 if (!is_gimple_assign (abs_stmt)) 590 return NULL; 591 592 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt); 593 gcc_assert (abs_stmt_vinfo); 594 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def) 595 return NULL; 596 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR) 597 return NULL; 598 599 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt); 600 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type)) 601 return NULL; 602 if (TYPE_UNSIGNED (abs_type)) 603 return NULL; 604 605 /* We then detect if the operand of abs_expr is defined by a minus_expr. */ 606 607 if (TREE_CODE (abs_oprnd) != SSA_NAME) 608 return NULL; 609 610 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd); 611 612 /* It could not be the sad pattern if the diff_stmt is outside the loop. */ 613 if (!gimple_bb (diff_stmt) 614 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt))) 615 return NULL; 616 617 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 618 inside the loop (in case we are analyzing an outer-loop). */ 619 if (!is_gimple_assign (diff_stmt)) 620 return NULL; 621 622 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt); 623 gcc_assert (diff_stmt_vinfo); 624 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def) 625 return NULL; 626 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR) 627 return NULL; 628 629 tree half_type0, half_type1; 630 gimple *def_stmt; 631 632 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt); 633 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt); 634 635 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type) 636 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type)) 637 return NULL; 638 if (!type_conversion_p (minus_oprnd0, diff_stmt, false, 639 &half_type0, &def_stmt, &promotion) 640 || !promotion) 641 return NULL; 642 sad_oprnd0 = gimple_assign_rhs1 (def_stmt); 643 644 if (!type_conversion_p (minus_oprnd1, diff_stmt, false, 645 &half_type1, &def_stmt, &promotion) 646 || !promotion) 647 return NULL; 648 sad_oprnd1 = gimple_assign_rhs1 (def_stmt); 649 650 if (!types_compatible_p (half_type0, half_type1)) 651 return NULL; 652 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2 653 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2) 654 return NULL; 655 656 *type_in = TREE_TYPE (sad_oprnd0); 657 *type_out = sum_type; 658 659 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 660 tree var = vect_recog_temp_ssa_var (sum_type, NULL); 661 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0, 662 sad_oprnd1, plus_oprnd1); 663 664 if (dump_enabled_p ()) 665 { 666 dump_printf_loc (MSG_NOTE, vect_location, 667 "vect_recog_sad_pattern: detected: "); 668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 669 } 670 671 return pattern_stmt; 672 } 673 674 675 /* Handle widening operation by a constant. At the moment we support MULT_EXPR 676 and LSHIFT_EXPR. 677 678 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR 679 we check that CONST_OPRND is less or equal to the size of HALF_TYPE. 680 681 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than 682 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) 683 that satisfies the above restrictions, we can perform a widening opeartion 684 from the intermediate type to TYPE and replace a_T = (TYPE) a_t; 685 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */ 686 687 static bool 688 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code, 689 tree const_oprnd, tree *oprnd, 690 gimple **wstmt, tree type, 691 tree *half_type, gimple *def_stmt) 692 { 693 tree new_type, new_oprnd; 694 695 if (code != MULT_EXPR && code != LSHIFT_EXPR) 696 return false; 697 698 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) 699 || (code == LSHIFT_EXPR 700 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) 701 != 1)) 702 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) 703 { 704 /* CONST_OPRND is a constant of HALF_TYPE. */ 705 *oprnd = gimple_assign_rhs1 (def_stmt); 706 return true; 707 } 708 709 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)) 710 return false; 711 712 if (!vect_same_loop_or_bb_p (stmt, def_stmt)) 713 return false; 714 715 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for 716 a type 2 times bigger than HALF_TYPE. */ 717 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, 718 TYPE_UNSIGNED (type)); 719 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) 720 || (code == LSHIFT_EXPR 721 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) 722 return false; 723 724 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */ 725 *oprnd = gimple_assign_rhs1 (def_stmt); 726 new_oprnd = make_ssa_name (new_type); 727 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd); 728 *oprnd = new_oprnd; 729 730 *half_type = new_type; 731 return true; 732 } 733 734 735 /* Function vect_recog_widen_mult_pattern 736 737 Try to find the following pattern: 738 739 type1 a_t; 740 type2 b_t; 741 TYPE a_T, b_T, prod_T; 742 743 S1 a_t = ; 744 S2 b_t = ; 745 S3 a_T = (TYPE) a_t; 746 S4 b_T = (TYPE) b_t; 747 S5 prod_T = a_T * b_T; 748 749 where type 'TYPE' is at least double the size of type 'type1' and 'type2'. 750 751 Also detect unsigned cases: 752 753 unsigned type1 a_t; 754 unsigned type2 b_t; 755 unsigned TYPE u_prod_T; 756 TYPE a_T, b_T, prod_T; 757 758 S1 a_t = ; 759 S2 b_t = ; 760 S3 a_T = (TYPE) a_t; 761 S4 b_T = (TYPE) b_t; 762 S5 prod_T = a_T * b_T; 763 S6 u_prod_T = (unsigned TYPE) prod_T; 764 765 and multiplication by constants: 766 767 type a_t; 768 TYPE a_T, prod_T; 769 770 S1 a_t = ; 771 S3 a_T = (TYPE) a_t; 772 S5 prod_T = a_T * CONST; 773 774 A special case of multiplication by constants is when 'TYPE' is 4 times 775 bigger than 'type', but CONST fits an intermediate type 2 times smaller 776 than 'TYPE'. In that case we create an additional pattern stmt for S3 777 to create a variable of the intermediate type, and perform widen-mult 778 on the intermediate type as well: 779 780 type a_t; 781 interm_type a_it; 782 TYPE a_T, prod_T, prod_T'; 783 784 S1 a_t = ; 785 S3 a_T = (TYPE) a_t; 786 '--> a_it = (interm_type) a_t; 787 S5 prod_T = a_T * CONST; 788 '--> prod_T' = a_it w* CONST; 789 790 Input/Output: 791 792 * STMTS: Contains a stmt from which the pattern search begins. In the 793 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} 794 is detected. In case of unsigned widen-mult, the original stmt (S5) is 795 replaced with S6 in STMTS. In case of multiplication by a constant 796 of an intermediate type (the last case above), STMTS also contains S3 797 (inserted before S5). 798 799 Output: 800 801 * TYPE_IN: The type of the input arguments to the pattern. 802 803 * TYPE_OUT: The type of the output of this pattern. 804 805 * Return value: A new stmt that will be used to replace the sequence of 806 stmts that constitute the pattern. In this case it will be: 807 WIDEN_MULT <a_t, b_t> 808 If the result of WIDEN_MULT needs to be converted to a larger type, the 809 returned stmt will be this type conversion stmt. 810 */ 811 812 static gimple * 813 vect_recog_widen_mult_pattern (vec<gimple *> *stmts, 814 tree *type_in, tree *type_out) 815 { 816 gimple *last_stmt = stmts->pop (); 817 gimple *def_stmt0, *def_stmt1; 818 tree oprnd0, oprnd1; 819 tree type, half_type0, half_type1; 820 gimple *new_stmt = NULL, *pattern_stmt = NULL; 821 tree vectype, vecitype; 822 tree var; 823 enum tree_code dummy_code; 824 int dummy_int; 825 vec<tree> dummy_vec; 826 bool op1_ok; 827 bool promotion; 828 829 if (!is_gimple_assign (last_stmt)) 830 return NULL; 831 832 type = gimple_expr_type (last_stmt); 833 834 /* Starting from LAST_STMT, follow the defs of its uses in search 835 of the above pattern. */ 836 837 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) 838 return NULL; 839 840 oprnd0 = gimple_assign_rhs1 (last_stmt); 841 oprnd1 = gimple_assign_rhs2 (last_stmt); 842 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 843 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 844 return NULL; 845 846 /* Check argument 0. */ 847 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, 848 &promotion) 849 || !promotion) 850 return NULL; 851 /* Check argument 1. */ 852 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1, 853 &def_stmt1, &promotion); 854 855 if (op1_ok && promotion) 856 { 857 oprnd0 = gimple_assign_rhs1 (def_stmt0); 858 oprnd1 = gimple_assign_rhs1 (def_stmt1); 859 } 860 else 861 { 862 if (TREE_CODE (oprnd1) == INTEGER_CST 863 && TREE_CODE (half_type0) == INTEGER_TYPE 864 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, 865 &oprnd0, &new_stmt, type, 866 &half_type0, def_stmt0)) 867 { 868 half_type1 = half_type0; 869 oprnd1 = fold_convert (half_type1, oprnd1); 870 } 871 else 872 return NULL; 873 } 874 875 /* If the two arguments have different sizes, convert the one with 876 the smaller type into the larger type. */ 877 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1)) 878 { 879 /* If we already used up the single-stmt slot give up. */ 880 if (new_stmt) 881 return NULL; 882 883 tree* oprnd = NULL; 884 gimple *def_stmt = NULL; 885 886 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1)) 887 { 888 def_stmt = def_stmt0; 889 half_type0 = half_type1; 890 oprnd = &oprnd0; 891 } 892 else 893 { 894 def_stmt = def_stmt1; 895 half_type1 = half_type0; 896 oprnd = &oprnd1; 897 } 898 899 tree old_oprnd = gimple_assign_rhs1 (def_stmt); 900 tree new_oprnd = make_ssa_name (half_type0); 901 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd); 902 *oprnd = new_oprnd; 903 } 904 905 /* Handle unsigned case. Look for 906 S6 u_prod_T = (unsigned TYPE) prod_T; 907 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ 908 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) 909 { 910 gimple *use_stmt; 911 tree use_lhs; 912 tree use_type; 913 914 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) 915 return NULL; 916 917 use_stmt = vect_single_imm_use (last_stmt); 918 if (!use_stmt || !is_gimple_assign (use_stmt) 919 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) 920 return NULL; 921 922 use_lhs = gimple_assign_lhs (use_stmt); 923 use_type = TREE_TYPE (use_lhs); 924 if (!INTEGRAL_TYPE_P (use_type) 925 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) 926 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) 927 return NULL; 928 929 type = use_type; 930 last_stmt = use_stmt; 931 } 932 933 if (!types_compatible_p (half_type0, half_type1)) 934 return NULL; 935 936 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT 937 to get an intermediate result of type ITYPE. In this case we need 938 to build a statement to convert this intermediate result to type TYPE. */ 939 tree itype = type; 940 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2) 941 itype = build_nonstandard_integer_type 942 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2, 943 TYPE_UNSIGNED (type)); 944 945 /* Pattern detected. */ 946 if (dump_enabled_p ()) 947 dump_printf_loc (MSG_NOTE, vect_location, 948 "vect_recog_widen_mult_pattern: detected:\n"); 949 950 /* Check target support */ 951 vectype = get_vectype_for_scalar_type (half_type0); 952 vecitype = get_vectype_for_scalar_type (itype); 953 if (!vectype 954 || !vecitype 955 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, 956 vecitype, vectype, 957 &dummy_code, &dummy_code, 958 &dummy_int, &dummy_vec)) 959 return NULL; 960 961 *type_in = vectype; 962 *type_out = get_vectype_for_scalar_type (type); 963 964 /* Pattern supported. Create a stmt to be used to replace the pattern: */ 965 var = vect_recog_temp_ssa_var (itype, NULL); 966 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1); 967 968 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 969 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 970 971 /* If the original two operands have different sizes, we may need to convert 972 the smaller one into the larget type. If this is the case, at this point 973 the new stmt is already built. */ 974 if (new_stmt) 975 { 976 append_pattern_def_seq (stmt_vinfo, new_stmt); 977 stmt_vec_info new_stmt_info 978 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo); 979 set_vinfo_for_stmt (new_stmt, new_stmt_info); 980 STMT_VINFO_VECTYPE (new_stmt_info) = vectype; 981 } 982 983 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert 984 the result of the widen-mult operation into type TYPE. */ 985 if (itype != type) 986 { 987 append_pattern_def_seq (stmt_vinfo, pattern_stmt); 988 stmt_vec_info pattern_stmt_info 989 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo); 990 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 991 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype; 992 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), 993 NOP_EXPR, 994 gimple_assign_lhs (pattern_stmt)); 995 } 996 997 if (dump_enabled_p ()) 998 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); 999 1000 stmts->safe_push (last_stmt); 1001 return pattern_stmt; 1002 } 1003 1004 1005 /* Function vect_recog_pow_pattern 1006 1007 Try to find the following pattern: 1008 1009 x = POW (y, N); 1010 1011 with POW being one of pow, powf, powi, powif and N being 1012 either 2 or 0.5. 1013 1014 Input: 1015 1016 * LAST_STMT: A stmt from which the pattern search begins. 1017 1018 Output: 1019 1020 * TYPE_IN: The type of the input arguments to the pattern. 1021 1022 * TYPE_OUT: The type of the output of this pattern. 1023 1024 * Return value: A new stmt that will be used to replace the sequence of 1025 stmts that constitute the pattern. In this case it will be: 1026 x = x * x 1027 or 1028 x = sqrt (x) 1029 */ 1030 1031 static gimple * 1032 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in, 1033 tree *type_out) 1034 { 1035 gimple *last_stmt = (*stmts)[0]; 1036 tree base, exp = NULL; 1037 gimple *stmt; 1038 tree var; 1039 1040 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) 1041 return NULL; 1042 1043 switch (gimple_call_combined_fn (last_stmt)) 1044 { 1045 CASE_CFN_POW: 1046 CASE_CFN_POWI: 1047 base = gimple_call_arg (last_stmt, 0); 1048 exp = gimple_call_arg (last_stmt, 1); 1049 if (TREE_CODE (exp) != REAL_CST 1050 && TREE_CODE (exp) != INTEGER_CST) 1051 return NULL; 1052 break; 1053 1054 default: 1055 return NULL; 1056 } 1057 1058 /* We now have a pow or powi builtin function call with a constant 1059 exponent. */ 1060 1061 *type_out = NULL_TREE; 1062 1063 /* Catch squaring. */ 1064 if ((tree_fits_shwi_p (exp) 1065 && tree_to_shwi (exp) == 2) 1066 || (TREE_CODE (exp) == REAL_CST 1067 && real_equal (&TREE_REAL_CST (exp), &dconst2))) 1068 { 1069 *type_in = TREE_TYPE (base); 1070 1071 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); 1072 stmt = gimple_build_assign (var, MULT_EXPR, base, base); 1073 return stmt; 1074 } 1075 1076 /* Catch square root. */ 1077 if (TREE_CODE (exp) == REAL_CST 1078 && real_equal (&TREE_REAL_CST (exp), &dconsthalf)) 1079 { 1080 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base)); 1081 if (*type_in 1082 && direct_internal_fn_supported_p (IFN_SQRT, *type_in, 1083 OPTIMIZE_FOR_SPEED)) 1084 { 1085 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base); 1086 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt); 1087 gimple_call_set_lhs (stmt, var); 1088 return stmt; 1089 } 1090 } 1091 1092 return NULL; 1093 } 1094 1095 1096 /* Function vect_recog_widen_sum_pattern 1097 1098 Try to find the following pattern: 1099 1100 type x_t; 1101 TYPE x_T, sum = init; 1102 loop: 1103 sum_0 = phi <init, sum_1> 1104 S1 x_t = *p; 1105 S2 x_T = (TYPE) x_t; 1106 S3 sum_1 = x_T + sum_0; 1107 1108 where type 'TYPE' is at least double the size of type 'type', i.e - we're 1109 summing elements of type 'type' into an accumulator of type 'TYPE'. This is 1110 a special case of a reduction computation. 1111 1112 Input: 1113 1114 * LAST_STMT: A stmt from which the pattern search begins. In the example, 1115 when this function is called with S3, the pattern {S2,S3} will be detected. 1116 1117 Output: 1118 1119 * TYPE_IN: The type of the input arguments to the pattern. 1120 1121 * TYPE_OUT: The type of the output of this pattern. 1122 1123 * Return value: A new stmt that will be used to replace the sequence of 1124 stmts that constitute the pattern. In this case it will be: 1125 WIDEN_SUM <x_t, sum_0> 1126 1127 Note: The widening-sum idiom is a widening reduction pattern that is 1128 vectorized without preserving all the intermediate results. It 1129 produces only N/2 (widened) results (by summing up pairs of 1130 intermediate results) rather than all N results. Therefore, we 1131 cannot allow this pattern when we want to get all the results and in 1132 the correct order (as is the case when this computation is in an 1133 inner-loop nested in an outer-loop that us being vectorized). */ 1134 1135 static gimple * 1136 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in, 1137 tree *type_out) 1138 { 1139 gimple *stmt, *last_stmt = (*stmts)[0]; 1140 tree oprnd0, oprnd1; 1141 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1142 tree type, half_type; 1143 gimple *pattern_stmt; 1144 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1145 struct loop *loop; 1146 tree var; 1147 bool promotion; 1148 1149 if (!loop_info) 1150 return NULL; 1151 1152 loop = LOOP_VINFO_LOOP (loop_info); 1153 1154 /* We don't allow changing the order of the computation in the inner-loop 1155 when doing outer-loop vectorization. */ 1156 if (loop && nested_in_vect_loop_p (loop, last_stmt)) 1157 return NULL; 1158 1159 if (!is_gimple_assign (last_stmt)) 1160 return NULL; 1161 1162 type = gimple_expr_type (last_stmt); 1163 1164 /* Look for the following pattern 1165 DX = (TYPE) X; 1166 sum_1 = DX + sum_0; 1167 In which DX is at least double the size of X, and sum_1 has been 1168 recognized as a reduction variable. 1169 */ 1170 1171 /* Starting from LAST_STMT, follow the defs of its uses in search 1172 of the above pattern. */ 1173 1174 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 1175 return NULL; 1176 1177 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def 1178 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo)) 1179 return NULL; 1180 1181 oprnd0 = gimple_assign_rhs1 (last_stmt); 1182 oprnd1 = gimple_assign_rhs2 (last_stmt); 1183 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 1184 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 1185 return NULL; 1186 1187 /* So far so good. Since last_stmt was detected as a (summation) reduction, 1188 we know that oprnd1 is the reduction variable (defined by a loop-header 1189 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 1190 Left to check that oprnd0 is defined by a cast from type 'type' to type 1191 'TYPE'. */ 1192 1193 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt, 1194 &promotion) 1195 || !promotion) 1196 return NULL; 1197 1198 oprnd0 = gimple_assign_rhs1 (stmt); 1199 *type_in = half_type; 1200 *type_out = type; 1201 1202 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 1203 var = vect_recog_temp_ssa_var (type, NULL); 1204 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1); 1205 1206 if (dump_enabled_p ()) 1207 { 1208 dump_printf_loc (MSG_NOTE, vect_location, 1209 "vect_recog_widen_sum_pattern: detected: "); 1210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 1211 } 1212 1213 return pattern_stmt; 1214 } 1215 1216 1217 /* Return TRUE if the operation in STMT can be performed on a smaller type. 1218 1219 Input: 1220 STMT - a statement to check. 1221 DEF - we support operations with two operands, one of which is constant. 1222 The other operand can be defined by a demotion operation, or by a 1223 previous statement in a sequence of over-promoted operations. In the 1224 later case DEF is used to replace that operand. (It is defined by a 1225 pattern statement we created for the previous statement in the 1226 sequence). 1227 1228 Input/output: 1229 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not 1230 NULL, it's the type of DEF. 1231 STMTS - additional pattern statements. If a pattern statement (type 1232 conversion) is created in this function, its original statement is 1233 added to STMTS. 1234 1235 Output: 1236 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new 1237 operands to use in the new pattern statement for STMT (will be created 1238 in vect_recog_over_widening_pattern ()). 1239 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern 1240 statements for STMT: the first one is a type promotion and the second 1241 one is the operation itself. We return the type promotion statement 1242 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of 1243 the second pattern statement. */ 1244 1245 static bool 1246 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type, 1247 tree *op0, tree *op1, gimple **new_def_stmt, 1248 vec<gimple *> *stmts) 1249 { 1250 enum tree_code code; 1251 tree const_oprnd, oprnd; 1252 tree interm_type = NULL_TREE, half_type, new_oprnd, type; 1253 gimple *def_stmt, *new_stmt; 1254 bool first = false; 1255 bool promotion; 1256 1257 *op0 = NULL_TREE; 1258 *op1 = NULL_TREE; 1259 *new_def_stmt = NULL; 1260 1261 if (!is_gimple_assign (stmt)) 1262 return false; 1263 1264 code = gimple_assign_rhs_code (stmt); 1265 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR 1266 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR) 1267 return false; 1268 1269 oprnd = gimple_assign_rhs1 (stmt); 1270 const_oprnd = gimple_assign_rhs2 (stmt); 1271 type = gimple_expr_type (stmt); 1272 1273 if (TREE_CODE (oprnd) != SSA_NAME 1274 || TREE_CODE (const_oprnd) != INTEGER_CST) 1275 return false; 1276 1277 /* If oprnd has other uses besides that in stmt we cannot mark it 1278 as being part of a pattern only. */ 1279 if (!has_single_use (oprnd)) 1280 return false; 1281 1282 /* If we are in the middle of a sequence, we use DEF from a previous 1283 statement. Otherwise, OPRND has to be a result of type promotion. */ 1284 if (*new_type) 1285 { 1286 half_type = *new_type; 1287 oprnd = def; 1288 } 1289 else 1290 { 1291 first = true; 1292 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt, 1293 &promotion) 1294 || !promotion 1295 || !vect_same_loop_or_bb_p (stmt, def_stmt)) 1296 return false; 1297 } 1298 1299 /* Can we perform the operation on a smaller type? */ 1300 switch (code) 1301 { 1302 case BIT_IOR_EXPR: 1303 case BIT_XOR_EXPR: 1304 case BIT_AND_EXPR: 1305 if (!int_fits_type_p (const_oprnd, half_type)) 1306 { 1307 /* HALF_TYPE is not enough. Try a bigger type if possible. */ 1308 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 1309 return false; 1310 1311 interm_type = build_nonstandard_integer_type ( 1312 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 1313 if (!int_fits_type_p (const_oprnd, interm_type)) 1314 return false; 1315 } 1316 1317 break; 1318 1319 case LSHIFT_EXPR: 1320 /* Try intermediate type - HALF_TYPE is not enough for sure. */ 1321 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 1322 return false; 1323 1324 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. 1325 (e.g., if the original value was char, the shift amount is at most 8 1326 if we want to use short). */ 1327 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) 1328 return false; 1329 1330 interm_type = build_nonstandard_integer_type ( 1331 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 1332 1333 if (!vect_supportable_shift (code, interm_type)) 1334 return false; 1335 1336 break; 1337 1338 case RSHIFT_EXPR: 1339 if (vect_supportable_shift (code, half_type)) 1340 break; 1341 1342 /* Try intermediate type - HALF_TYPE is not supported. */ 1343 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 1344 return false; 1345 1346 interm_type = build_nonstandard_integer_type ( 1347 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 1348 1349 if (!vect_supportable_shift (code, interm_type)) 1350 return false; 1351 1352 break; 1353 1354 default: 1355 gcc_unreachable (); 1356 } 1357 1358 /* There are four possible cases: 1359 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's 1360 the first statement in the sequence) 1361 a. The original, HALF_TYPE, is not enough - we replace the promotion 1362 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE. 1363 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original 1364 promotion. 1365 2. OPRND is defined by a pattern statement we created. 1366 a. Its type is not sufficient for the operation, we create a new stmt: 1367 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store 1368 this statement in NEW_DEF_STMT, and it is later put in 1369 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT. 1370 b. OPRND is good to use in the new statement. */ 1371 if (first) 1372 { 1373 if (interm_type) 1374 { 1375 /* Replace the original type conversion HALF_TYPE->TYPE with 1376 HALF_TYPE->INTERM_TYPE. */ 1377 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) 1378 { 1379 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); 1380 /* Check if the already created pattern stmt is what we need. */ 1381 if (!is_gimple_assign (new_stmt) 1382 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt)) 1383 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) 1384 return false; 1385 1386 stmts->safe_push (def_stmt); 1387 oprnd = gimple_assign_lhs (new_stmt); 1388 } 1389 else 1390 { 1391 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */ 1392 oprnd = gimple_assign_rhs1 (def_stmt); 1393 new_oprnd = make_ssa_name (interm_type); 1394 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd); 1395 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; 1396 stmts->safe_push (def_stmt); 1397 oprnd = new_oprnd; 1398 } 1399 } 1400 else 1401 { 1402 /* Retrieve the operand before the type promotion. */ 1403 oprnd = gimple_assign_rhs1 (def_stmt); 1404 } 1405 } 1406 else 1407 { 1408 if (interm_type) 1409 { 1410 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */ 1411 new_oprnd = make_ssa_name (interm_type); 1412 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd); 1413 oprnd = new_oprnd; 1414 *new_def_stmt = new_stmt; 1415 } 1416 1417 /* Otherwise, OPRND is already set. */ 1418 } 1419 1420 if (interm_type) 1421 *new_type = interm_type; 1422 else 1423 *new_type = half_type; 1424 1425 *op0 = oprnd; 1426 *op1 = fold_convert (*new_type, const_oprnd); 1427 1428 return true; 1429 } 1430 1431 1432 /* Try to find a statement or a sequence of statements that can be performed 1433 on a smaller type: 1434 1435 type x_t; 1436 TYPE x_T, res0_T, res1_T; 1437 loop: 1438 S1 x_t = *p; 1439 S2 x_T = (TYPE) x_t; 1440 S3 res0_T = op (x_T, C0); 1441 S4 res1_T = op (res0_T, C1); 1442 S5 ... = () res1_T; - type demotion 1443 1444 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are 1445 constants. 1446 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either 1447 be 'type' or some intermediate type. For now, we expect S5 to be a type 1448 demotion operation. We also check that S3 and S4 have only one use. */ 1449 1450 static gimple * 1451 vect_recog_over_widening_pattern (vec<gimple *> *stmts, 1452 tree *type_in, tree *type_out) 1453 { 1454 gimple *stmt = stmts->pop (); 1455 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL, 1456 *use_stmt = NULL; 1457 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type; 1458 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd; 1459 bool first; 1460 tree type = NULL; 1461 1462 first = true; 1463 while (1) 1464 { 1465 if (!vinfo_for_stmt (stmt) 1466 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt))) 1467 return NULL; 1468 1469 new_def_stmt = NULL; 1470 if (!vect_operation_fits_smaller_type (stmt, var, &new_type, 1471 &op0, &op1, &new_def_stmt, 1472 stmts)) 1473 { 1474 if (first) 1475 return NULL; 1476 else 1477 break; 1478 } 1479 1480 /* STMT can be performed on a smaller type. Check its uses. */ 1481 use_stmt = vect_single_imm_use (stmt); 1482 if (!use_stmt || !is_gimple_assign (use_stmt)) 1483 return NULL; 1484 1485 /* Create pattern statement for STMT. */ 1486 vectype = get_vectype_for_scalar_type (new_type); 1487 if (!vectype) 1488 return NULL; 1489 1490 /* We want to collect all the statements for which we create pattern 1491 statetments, except for the case when the last statement in the 1492 sequence doesn't have a corresponding pattern statement. In such 1493 case we associate the last pattern statement with the last statement 1494 in the sequence. Therefore, we only add the original statement to 1495 the list if we know that it is not the last. */ 1496 if (prev_stmt) 1497 stmts->safe_push (prev_stmt); 1498 1499 var = vect_recog_temp_ssa_var (new_type, NULL); 1500 pattern_stmt 1501 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1); 1502 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; 1503 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt); 1504 1505 if (dump_enabled_p ()) 1506 { 1507 dump_printf_loc (MSG_NOTE, vect_location, 1508 "created pattern stmt: "); 1509 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 1510 } 1511 1512 type = gimple_expr_type (stmt); 1513 prev_stmt = stmt; 1514 stmt = use_stmt; 1515 1516 first = false; 1517 } 1518 1519 /* We got a sequence. We expect it to end with a type demotion operation. 1520 Otherwise, we quit (for now). There are three possible cases: the 1521 conversion is to NEW_TYPE (we don't do anything), the conversion is to 1522 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and 1523 NEW_TYPE differs (we create a new conversion statement). */ 1524 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) 1525 { 1526 use_lhs = gimple_assign_lhs (use_stmt); 1527 use_type = TREE_TYPE (use_lhs); 1528 /* Support only type demotion or signedess change. */ 1529 if (!INTEGRAL_TYPE_P (use_type) 1530 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type)) 1531 return NULL; 1532 1533 /* Check that NEW_TYPE is not bigger than the conversion result. */ 1534 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) 1535 return NULL; 1536 1537 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) 1538 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) 1539 { 1540 /* Create NEW_TYPE->USE_TYPE conversion. */ 1541 new_oprnd = make_ssa_name (use_type); 1542 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var); 1543 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt; 1544 1545 *type_in = get_vectype_for_scalar_type (new_type); 1546 *type_out = get_vectype_for_scalar_type (use_type); 1547 1548 /* We created a pattern statement for the last statement in the 1549 sequence, so we don't need to associate it with the pattern 1550 statement created for PREV_STMT. Therefore, we add PREV_STMT 1551 to the list in order to mark it later in vect_pattern_recog_1. */ 1552 if (prev_stmt) 1553 stmts->safe_push (prev_stmt); 1554 } 1555 else 1556 { 1557 if (prev_stmt) 1558 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt)) 1559 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt)); 1560 1561 *type_in = vectype; 1562 *type_out = NULL_TREE; 1563 } 1564 1565 stmts->safe_push (use_stmt); 1566 } 1567 else 1568 /* TODO: support general case, create a conversion to the correct type. */ 1569 return NULL; 1570 1571 /* Pattern detected. */ 1572 if (dump_enabled_p ()) 1573 { 1574 dump_printf_loc (MSG_NOTE, vect_location, 1575 "vect_recog_over_widening_pattern: detected: "); 1576 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 1577 } 1578 1579 return pattern_stmt; 1580 } 1581 1582 /* Detect widening shift pattern: 1583 1584 type a_t; 1585 TYPE a_T, res_T; 1586 1587 S1 a_t = ; 1588 S2 a_T = (TYPE) a_t; 1589 S3 res_T = a_T << CONST; 1590 1591 where type 'TYPE' is at least double the size of type 'type'. 1592 1593 Also detect cases where the shift result is immediately converted 1594 to another type 'result_type' that is no larger in size than 'TYPE'. 1595 In those cases we perform a widen-shift that directly results in 1596 'result_type', to avoid a possible over-widening situation: 1597 1598 type a_t; 1599 TYPE a_T, res_T; 1600 result_type res_result; 1601 1602 S1 a_t = ; 1603 S2 a_T = (TYPE) a_t; 1604 S3 res_T = a_T << CONST; 1605 S4 res_result = (result_type) res_T; 1606 '--> res_result' = a_t w<< CONST; 1607 1608 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we 1609 create an additional pattern stmt for S2 to create a variable of an 1610 intermediate type, and perform widen-shift on the intermediate type: 1611 1612 type a_t; 1613 interm_type a_it; 1614 TYPE a_T, res_T, res_T'; 1615 1616 S1 a_t = ; 1617 S2 a_T = (TYPE) a_t; 1618 '--> a_it = (interm_type) a_t; 1619 S3 res_T = a_T << CONST; 1620 '--> res_T' = a_it <<* CONST; 1621 1622 Input/Output: 1623 1624 * STMTS: Contains a stmt from which the pattern search begins. 1625 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 1626 in STMTS. When an intermediate type is used and a pattern statement is 1627 created for S2, we also put S2 here (before S3). 1628 1629 Output: 1630 1631 * TYPE_IN: The type of the input arguments to the pattern. 1632 1633 * TYPE_OUT: The type of the output of this pattern. 1634 1635 * Return value: A new stmt that will be used to replace the sequence of 1636 stmts that constitute the pattern. In this case it will be: 1637 WIDEN_LSHIFT_EXPR <a_t, CONST>. */ 1638 1639 static gimple * 1640 vect_recog_widen_shift_pattern (vec<gimple *> *stmts, 1641 tree *type_in, tree *type_out) 1642 { 1643 gimple *last_stmt = stmts->pop (); 1644 gimple *def_stmt0; 1645 tree oprnd0, oprnd1; 1646 tree type, half_type0; 1647 gimple *pattern_stmt; 1648 tree vectype, vectype_out = NULL_TREE; 1649 tree var; 1650 enum tree_code dummy_code; 1651 int dummy_int; 1652 vec<tree> dummy_vec; 1653 gimple *use_stmt; 1654 bool promotion; 1655 1656 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) 1657 return NULL; 1658 1659 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) 1660 return NULL; 1661 1662 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) 1663 return NULL; 1664 1665 oprnd0 = gimple_assign_rhs1 (last_stmt); 1666 oprnd1 = gimple_assign_rhs2 (last_stmt); 1667 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) 1668 return NULL; 1669 1670 /* Check operand 0: it has to be defined by a type promotion. */ 1671 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, 1672 &promotion) 1673 || !promotion) 1674 return NULL; 1675 1676 /* Check operand 1: has to be positive. We check that it fits the type 1677 in vect_handle_widen_op_by_const (). */ 1678 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) 1679 return NULL; 1680 1681 oprnd0 = gimple_assign_rhs1 (def_stmt0); 1682 type = gimple_expr_type (last_stmt); 1683 1684 /* Check for subsequent conversion to another type. */ 1685 use_stmt = vect_single_imm_use (last_stmt); 1686 if (use_stmt && is_gimple_assign (use_stmt) 1687 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)) 1688 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) 1689 { 1690 tree use_lhs = gimple_assign_lhs (use_stmt); 1691 tree use_type = TREE_TYPE (use_lhs); 1692 1693 if (INTEGRAL_TYPE_P (use_type) 1694 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type)) 1695 { 1696 last_stmt = use_stmt; 1697 type = use_type; 1698 } 1699 } 1700 1701 /* Check if this a widening operation. */ 1702 gimple *wstmt = NULL; 1703 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, 1704 &oprnd0, &wstmt, 1705 type, &half_type0, def_stmt0)) 1706 return NULL; 1707 1708 /* Pattern detected. */ 1709 if (dump_enabled_p ()) 1710 dump_printf_loc (MSG_NOTE, vect_location, 1711 "vect_recog_widen_shift_pattern: detected:\n"); 1712 1713 /* Check target support. */ 1714 vectype = get_vectype_for_scalar_type (half_type0); 1715 vectype_out = get_vectype_for_scalar_type (type); 1716 1717 if (!vectype 1718 || !vectype_out 1719 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, 1720 vectype_out, vectype, 1721 &dummy_code, &dummy_code, 1722 &dummy_int, &dummy_vec)) 1723 return NULL; 1724 1725 *type_in = vectype; 1726 *type_out = vectype_out; 1727 1728 /* Pattern supported. Create a stmt to be used to replace the pattern. */ 1729 var = vect_recog_temp_ssa_var (type, NULL); 1730 pattern_stmt = 1731 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1); 1732 if (wstmt) 1733 { 1734 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1735 new_pattern_def_seq (stmt_vinfo, wstmt); 1736 stmt_vec_info new_stmt_info 1737 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo); 1738 set_vinfo_for_stmt (wstmt, new_stmt_info); 1739 STMT_VINFO_VECTYPE (new_stmt_info) = vectype; 1740 } 1741 1742 if (dump_enabled_p ()) 1743 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); 1744 1745 stmts->safe_push (last_stmt); 1746 return pattern_stmt; 1747 } 1748 1749 /* Detect a rotate pattern wouldn't be otherwise vectorized: 1750 1751 type a_t, b_t, c_t; 1752 1753 S0 a_t = b_t r<< c_t; 1754 1755 Input/Output: 1756 1757 * STMTS: Contains a stmt from which the pattern search begins, 1758 i.e. the shift/rotate stmt. The original stmt (S0) is replaced 1759 with a sequence: 1760 1761 S1 d_t = -c_t; 1762 S2 e_t = d_t & (B - 1); 1763 S3 f_t = b_t << c_t; 1764 S4 g_t = b_t >> e_t; 1765 S0 a_t = f_t | g_t; 1766 1767 where B is element bitsize of type. 1768 1769 Output: 1770 1771 * TYPE_IN: The type of the input arguments to the pattern. 1772 1773 * TYPE_OUT: The type of the output of this pattern. 1774 1775 * Return value: A new stmt that will be used to replace the rotate 1776 S0 stmt. */ 1777 1778 static gimple * 1779 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out) 1780 { 1781 gimple *last_stmt = stmts->pop (); 1782 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2; 1783 gimple *pattern_stmt, *def_stmt; 1784 enum tree_code rhs_code; 1785 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1786 vec_info *vinfo = stmt_vinfo->vinfo; 1787 enum vect_def_type dt; 1788 optab optab1, optab2; 1789 edge ext_def = NULL; 1790 1791 if (!is_gimple_assign (last_stmt)) 1792 return NULL; 1793 1794 rhs_code = gimple_assign_rhs_code (last_stmt); 1795 switch (rhs_code) 1796 { 1797 case LROTATE_EXPR: 1798 case RROTATE_EXPR: 1799 break; 1800 default: 1801 return NULL; 1802 } 1803 1804 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 1805 return NULL; 1806 1807 lhs = gimple_assign_lhs (last_stmt); 1808 oprnd0 = gimple_assign_rhs1 (last_stmt); 1809 type = TREE_TYPE (oprnd0); 1810 oprnd1 = gimple_assign_rhs2 (last_stmt); 1811 if (TREE_CODE (oprnd0) != SSA_NAME 1812 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type) 1813 || !INTEGRAL_TYPE_P (type) 1814 || !TYPE_UNSIGNED (type)) 1815 return NULL; 1816 1817 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt)) 1818 return NULL; 1819 1820 if (dt != vect_internal_def 1821 && dt != vect_constant_def 1822 && dt != vect_external_def) 1823 return NULL; 1824 1825 vectype = get_vectype_for_scalar_type (type); 1826 if (vectype == NULL_TREE) 1827 return NULL; 1828 1829 /* If vector/vector or vector/scalar rotate is supported by the target, 1830 don't do anything here. */ 1831 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector); 1832 if (optab1 1833 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) 1834 return NULL; 1835 1836 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def) 1837 { 1838 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar); 1839 if (optab2 1840 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing) 1841 return NULL; 1842 } 1843 1844 /* If vector/vector or vector/scalar shifts aren't supported by the target, 1845 don't do anything here either. */ 1846 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); 1847 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector); 1848 if (!optab1 1849 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing 1850 || !optab2 1851 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) 1852 { 1853 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def) 1854 return NULL; 1855 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar); 1856 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar); 1857 if (!optab1 1858 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing 1859 || !optab2 1860 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) 1861 return NULL; 1862 } 1863 1864 *type_in = vectype; 1865 *type_out = vectype; 1866 if (*type_in == NULL_TREE) 1867 return NULL; 1868 1869 if (dt == vect_external_def 1870 && TREE_CODE (oprnd1) == SSA_NAME 1871 && is_a <loop_vec_info> (vinfo)) 1872 { 1873 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop; 1874 ext_def = loop_preheader_edge (loop); 1875 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1)) 1876 { 1877 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1)); 1878 if (bb == NULL 1879 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb)) 1880 ext_def = NULL; 1881 } 1882 } 1883 1884 def = NULL_TREE; 1885 if (TREE_CODE (oprnd1) == INTEGER_CST 1886 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type)) 1887 def = oprnd1; 1888 else if (def_stmt && gimple_assign_cast_p (def_stmt)) 1889 { 1890 tree rhs1 = gimple_assign_rhs1 (def_stmt); 1891 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type) 1892 && TYPE_PRECISION (TREE_TYPE (rhs1)) 1893 == TYPE_PRECISION (type)) 1894 def = rhs1; 1895 } 1896 1897 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 1898 if (def == NULL_TREE) 1899 { 1900 def = vect_recog_temp_ssa_var (type, NULL); 1901 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1); 1902 if (ext_def) 1903 { 1904 basic_block new_bb 1905 = gsi_insert_on_edge_immediate (ext_def, def_stmt); 1906 gcc_assert (!new_bb); 1907 } 1908 else 1909 append_pattern_def_seq (stmt_vinfo, def_stmt); 1910 } 1911 stype = TREE_TYPE (def); 1912 1913 if (TREE_CODE (def) == INTEGER_CST) 1914 { 1915 if (!tree_fits_uhwi_p (def) 1916 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type)) 1917 || integer_zerop (def)) 1918 return NULL; 1919 def2 = build_int_cst (stype, 1920 GET_MODE_PRECISION (TYPE_MODE (type)) 1921 - tree_to_uhwi (def)); 1922 } 1923 else 1924 { 1925 tree vecstype = get_vectype_for_scalar_type (stype); 1926 stmt_vec_info def_stmt_vinfo; 1927 1928 if (vecstype == NULL_TREE) 1929 return NULL; 1930 def2 = vect_recog_temp_ssa_var (stype, NULL); 1931 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def); 1932 if (ext_def) 1933 { 1934 basic_block new_bb 1935 = gsi_insert_on_edge_immediate (ext_def, def_stmt); 1936 gcc_assert (!new_bb); 1937 } 1938 else 1939 { 1940 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo); 1941 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 1942 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; 1943 append_pattern_def_seq (stmt_vinfo, def_stmt); 1944 } 1945 1946 def2 = vect_recog_temp_ssa_var (stype, NULL); 1947 tree mask 1948 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1); 1949 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR, 1950 gimple_assign_lhs (def_stmt), mask); 1951 if (ext_def) 1952 { 1953 basic_block new_bb 1954 = gsi_insert_on_edge_immediate (ext_def, def_stmt); 1955 gcc_assert (!new_bb); 1956 } 1957 else 1958 { 1959 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo); 1960 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 1961 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; 1962 append_pattern_def_seq (stmt_vinfo, def_stmt); 1963 } 1964 } 1965 1966 var1 = vect_recog_temp_ssa_var (type, NULL); 1967 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR 1968 ? LSHIFT_EXPR : RSHIFT_EXPR, 1969 oprnd0, def); 1970 append_pattern_def_seq (stmt_vinfo, def_stmt); 1971 1972 var2 = vect_recog_temp_ssa_var (type, NULL); 1973 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR 1974 ? RSHIFT_EXPR : LSHIFT_EXPR, 1975 oprnd0, def2); 1976 append_pattern_def_seq (stmt_vinfo, def_stmt); 1977 1978 /* Pattern detected. */ 1979 if (dump_enabled_p ()) 1980 dump_printf_loc (MSG_NOTE, vect_location, 1981 "vect_recog_rotate_pattern: detected:\n"); 1982 1983 /* Pattern supported. Create a stmt to be used to replace the pattern. */ 1984 var = vect_recog_temp_ssa_var (type, NULL); 1985 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2); 1986 1987 if (dump_enabled_p ()) 1988 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); 1989 1990 stmts->safe_push (last_stmt); 1991 return pattern_stmt; 1992 } 1993 1994 /* Detect a vector by vector shift pattern that wouldn't be otherwise 1995 vectorized: 1996 1997 type a_t; 1998 TYPE b_T, res_T; 1999 2000 S1 a_t = ; 2001 S2 b_T = ; 2002 S3 res_T = b_T op a_t; 2003 2004 where type 'TYPE' is a type with different size than 'type', 2005 and op is <<, >> or rotate. 2006 2007 Also detect cases: 2008 2009 type a_t; 2010 TYPE b_T, c_T, res_T; 2011 2012 S0 c_T = ; 2013 S1 a_t = (type) c_T; 2014 S2 b_T = ; 2015 S3 res_T = b_T op a_t; 2016 2017 Input/Output: 2018 2019 * STMTS: Contains a stmt from which the pattern search begins, 2020 i.e. the shift/rotate stmt. The original stmt (S3) is replaced 2021 with a shift/rotate which has same type on both operands, in the 2022 second case just b_T op c_T, in the first case with added cast 2023 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ. 2024 2025 Output: 2026 2027 * TYPE_IN: The type of the input arguments to the pattern. 2028 2029 * TYPE_OUT: The type of the output of this pattern. 2030 2031 * Return value: A new stmt that will be used to replace the shift/rotate 2032 S3 stmt. */ 2033 2034 static gimple * 2035 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts, 2036 tree *type_in, tree *type_out) 2037 { 2038 gimple *last_stmt = stmts->pop (); 2039 tree oprnd0, oprnd1, lhs, var; 2040 gimple *pattern_stmt, *def_stmt; 2041 enum tree_code rhs_code; 2042 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 2043 vec_info *vinfo = stmt_vinfo->vinfo; 2044 enum vect_def_type dt; 2045 2046 if (!is_gimple_assign (last_stmt)) 2047 return NULL; 2048 2049 rhs_code = gimple_assign_rhs_code (last_stmt); 2050 switch (rhs_code) 2051 { 2052 case LSHIFT_EXPR: 2053 case RSHIFT_EXPR: 2054 case LROTATE_EXPR: 2055 case RROTATE_EXPR: 2056 break; 2057 default: 2058 return NULL; 2059 } 2060 2061 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 2062 return NULL; 2063 2064 lhs = gimple_assign_lhs (last_stmt); 2065 oprnd0 = gimple_assign_rhs1 (last_stmt); 2066 oprnd1 = gimple_assign_rhs2 (last_stmt); 2067 if (TREE_CODE (oprnd0) != SSA_NAME 2068 || TREE_CODE (oprnd1) != SSA_NAME 2069 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1)) 2070 || TYPE_PRECISION (TREE_TYPE (oprnd1)) 2071 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1))) 2072 || TYPE_PRECISION (TREE_TYPE (lhs)) 2073 != TYPE_PRECISION (TREE_TYPE (oprnd0))) 2074 return NULL; 2075 2076 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt)) 2077 return NULL; 2078 2079 if (dt != vect_internal_def) 2080 return NULL; 2081 2082 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0)); 2083 *type_out = *type_in; 2084 if (*type_in == NULL_TREE) 2085 return NULL; 2086 2087 tree def = NULL_TREE; 2088 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt); 2089 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt)) 2090 { 2091 tree rhs1 = gimple_assign_rhs1 (def_stmt); 2092 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0)) 2093 && TYPE_PRECISION (TREE_TYPE (rhs1)) 2094 == TYPE_PRECISION (TREE_TYPE (oprnd0))) 2095 { 2096 if (TYPE_PRECISION (TREE_TYPE (oprnd1)) 2097 >= TYPE_PRECISION (TREE_TYPE (rhs1))) 2098 def = rhs1; 2099 else 2100 { 2101 tree mask 2102 = build_low_bits_mask (TREE_TYPE (rhs1), 2103 TYPE_PRECISION (TREE_TYPE (oprnd1))); 2104 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); 2105 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask); 2106 new_pattern_def_seq (stmt_vinfo, def_stmt); 2107 } 2108 } 2109 } 2110 2111 if (def == NULL_TREE) 2112 { 2113 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 2114 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1); 2115 new_pattern_def_seq (stmt_vinfo, def_stmt); 2116 } 2117 2118 /* Pattern detected. */ 2119 if (dump_enabled_p ()) 2120 dump_printf_loc (MSG_NOTE, vect_location, 2121 "vect_recog_vector_vector_shift_pattern: detected:\n"); 2122 2123 /* Pattern supported. Create a stmt to be used to replace the pattern. */ 2124 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 2125 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def); 2126 2127 if (dump_enabled_p ()) 2128 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); 2129 2130 stmts->safe_push (last_stmt); 2131 return pattern_stmt; 2132 } 2133 2134 /* Return true iff the target has a vector optab implementing the operation 2135 CODE on type VECTYPE. */ 2136 2137 static bool 2138 target_has_vecop_for_code (tree_code code, tree vectype) 2139 { 2140 optab voptab = optab_for_tree_code (code, vectype, optab_vector); 2141 return voptab 2142 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing; 2143 } 2144 2145 /* Verify that the target has optabs of VECTYPE to perform all the steps 2146 needed by the multiplication-by-immediate synthesis algorithm described by 2147 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is 2148 present. Return true iff the target supports all the steps. */ 2149 2150 static bool 2151 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var, 2152 tree vectype, bool synth_shift_p) 2153 { 2154 if (alg->op[0] != alg_zero && alg->op[0] != alg_m) 2155 return false; 2156 2157 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype); 2158 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype); 2159 2160 if (var == negate_variant 2161 && !target_has_vecop_for_code (NEGATE_EXPR, vectype)) 2162 return false; 2163 2164 /* If we must synthesize shifts with additions make sure that vector 2165 addition is available. */ 2166 if ((var == add_variant || synth_shift_p) && !supports_vplus) 2167 return false; 2168 2169 for (int i = 1; i < alg->ops; i++) 2170 { 2171 switch (alg->op[i]) 2172 { 2173 case alg_shift: 2174 break; 2175 case alg_add_t_m2: 2176 case alg_add_t2_m: 2177 case alg_add_factor: 2178 if (!supports_vplus) 2179 return false; 2180 break; 2181 case alg_sub_t_m2: 2182 case alg_sub_t2_m: 2183 case alg_sub_factor: 2184 if (!supports_vminus) 2185 return false; 2186 break; 2187 case alg_unknown: 2188 case alg_m: 2189 case alg_zero: 2190 case alg_impossible: 2191 return false; 2192 default: 2193 gcc_unreachable (); 2194 } 2195 } 2196 2197 return true; 2198 } 2199 2200 /* Synthesize a left shift of OP by AMNT bits using a series of additions and 2201 putting the final result in DEST. Append all statements but the last into 2202 VINFO. Return the last statement. */ 2203 2204 static gimple * 2205 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt, 2206 stmt_vec_info vinfo) 2207 { 2208 HOST_WIDE_INT i; 2209 tree itype = TREE_TYPE (op); 2210 tree prev_res = op; 2211 gcc_assert (amnt >= 0); 2212 for (i = 0; i < amnt; i++) 2213 { 2214 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL) 2215 : dest; 2216 gimple *stmt 2217 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res); 2218 prev_res = tmp_var; 2219 if (i < amnt - 1) 2220 append_pattern_def_seq (vinfo, stmt); 2221 else 2222 return stmt; 2223 } 2224 gcc_unreachable (); 2225 return NULL; 2226 } 2227 2228 /* Helper for vect_synth_mult_by_constant. Apply a binary operation 2229 CODE to operands OP1 and OP2, creating a new temporary SSA var in 2230 the process if necessary. Append the resulting assignment statements 2231 to the sequence in STMT_VINFO. Return the SSA variable that holds the 2232 result of the binary operation. If SYNTH_SHIFT_P is true synthesize 2233 left shifts using additions. */ 2234 2235 static tree 2236 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2, 2237 stmt_vec_info stmt_vinfo, bool synth_shift_p) 2238 { 2239 if (integer_zerop (op2) 2240 && (code == LSHIFT_EXPR 2241 || code == PLUS_EXPR)) 2242 { 2243 gcc_assert (TREE_CODE (op1) == SSA_NAME); 2244 return op1; 2245 } 2246 2247 gimple *stmt; 2248 tree itype = TREE_TYPE (op1); 2249 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL); 2250 2251 if (code == LSHIFT_EXPR 2252 && synth_shift_p) 2253 { 2254 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2), 2255 stmt_vinfo); 2256 append_pattern_def_seq (stmt_vinfo, stmt); 2257 return tmp_var; 2258 } 2259 2260 stmt = gimple_build_assign (tmp_var, code, op1, op2); 2261 append_pattern_def_seq (stmt_vinfo, stmt); 2262 return tmp_var; 2263 } 2264 2265 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts 2266 and simple arithmetic operations to be vectorized. Record the statements 2267 produced in STMT_VINFO and return the last statement in the sequence or 2268 NULL if it's not possible to synthesize such a multiplication. 2269 This function mirrors the behavior of expand_mult_const in expmed.c but 2270 works on tree-ssa form. */ 2271 2272 static gimple * 2273 vect_synth_mult_by_constant (tree op, tree val, 2274 stmt_vec_info stmt_vinfo) 2275 { 2276 tree itype = TREE_TYPE (op); 2277 machine_mode mode = TYPE_MODE (itype); 2278 struct algorithm alg; 2279 mult_variant variant; 2280 if (!tree_fits_shwi_p (val)) 2281 return NULL; 2282 2283 /* Multiplication synthesis by shifts, adds and subs can introduce 2284 signed overflow where the original operation didn't. Perform the 2285 operations on an unsigned type and cast back to avoid this. 2286 In the future we may want to relax this for synthesis algorithms 2287 that we can prove do not cause unexpected overflow. */ 2288 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype); 2289 2290 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype; 2291 2292 /* Targets that don't support vector shifts but support vector additions 2293 can synthesize shifts that way. */ 2294 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype); 2295 2296 HOST_WIDE_INT hwval = tree_to_shwi (val); 2297 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs. 2298 The vectorizer's benefit analysis will decide whether it's beneficial 2299 to do this. */ 2300 bool possible = choose_mult_variant (mode, hwval, &alg, 2301 &variant, MAX_COST); 2302 if (!possible) 2303 return NULL; 2304 2305 tree vectype = get_vectype_for_scalar_type (multtype); 2306 2307 if (!vectype 2308 || !target_supports_mult_synth_alg (&alg, variant, 2309 vectype, synth_shift_p)) 2310 return NULL; 2311 2312 tree accumulator; 2313 2314 /* Clear out the sequence of statements so we can populate it below. */ 2315 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 2316 gimple *stmt = NULL; 2317 2318 if (cast_to_unsigned_p) 2319 { 2320 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL); 2321 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op); 2322 append_pattern_def_seq (stmt_vinfo, stmt); 2323 op = tmp_op; 2324 } 2325 2326 if (alg.op[0] == alg_zero) 2327 accumulator = build_int_cst (multtype, 0); 2328 else 2329 accumulator = op; 2330 2331 bool needs_fixup = (variant == negate_variant) 2332 || (variant == add_variant); 2333 2334 for (int i = 1; i < alg.ops; i++) 2335 { 2336 tree shft_log = build_int_cst (multtype, alg.log[i]); 2337 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); 2338 tree tmp_var = NULL_TREE; 2339 2340 switch (alg.op[i]) 2341 { 2342 case alg_shift: 2343 if (synth_shift_p) 2344 stmt 2345 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i], 2346 stmt_vinfo); 2347 else 2348 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator, 2349 shft_log); 2350 break; 2351 case alg_add_t_m2: 2352 tmp_var 2353 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log, 2354 stmt_vinfo, synth_shift_p); 2355 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, 2356 tmp_var); 2357 break; 2358 case alg_sub_t_m2: 2359 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op, 2360 shft_log, stmt_vinfo, 2361 synth_shift_p); 2362 /* In some algorithms the first step involves zeroing the 2363 accumulator. If subtracting from such an accumulator 2364 just emit the negation directly. */ 2365 if (integer_zerop (accumulator)) 2366 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var); 2367 else 2368 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator, 2369 tmp_var); 2370 break; 2371 case alg_add_t2_m: 2372 tmp_var 2373 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log, 2374 stmt_vinfo, synth_shift_p); 2375 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op); 2376 break; 2377 case alg_sub_t2_m: 2378 tmp_var 2379 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log, 2380 stmt_vinfo, synth_shift_p); 2381 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op); 2382 break; 2383 case alg_add_factor: 2384 tmp_var 2385 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log, 2386 stmt_vinfo, synth_shift_p); 2387 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, 2388 tmp_var); 2389 break; 2390 case alg_sub_factor: 2391 tmp_var 2392 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log, 2393 stmt_vinfo, synth_shift_p); 2394 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, 2395 accumulator); 2396 break; 2397 default: 2398 gcc_unreachable (); 2399 } 2400 /* We don't want to append the last stmt in the sequence to stmt_vinfo 2401 but rather return it directly. */ 2402 2403 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p) 2404 append_pattern_def_seq (stmt_vinfo, stmt); 2405 accumulator = accum_tmp; 2406 } 2407 if (variant == negate_variant) 2408 { 2409 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); 2410 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator); 2411 accumulator = accum_tmp; 2412 if (cast_to_unsigned_p) 2413 append_pattern_def_seq (stmt_vinfo, stmt); 2414 } 2415 else if (variant == add_variant) 2416 { 2417 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); 2418 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op); 2419 accumulator = accum_tmp; 2420 if (cast_to_unsigned_p) 2421 append_pattern_def_seq (stmt_vinfo, stmt); 2422 } 2423 /* Move back to a signed if needed. */ 2424 if (cast_to_unsigned_p) 2425 { 2426 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL); 2427 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator); 2428 } 2429 2430 return stmt; 2431 } 2432 2433 /* Detect multiplication by constant and convert it into a sequence of 2434 shifts and additions, subtractions, negations. We reuse the 2435 choose_mult_variant algorithms from expmed.c 2436 2437 Input/Output: 2438 2439 STMTS: Contains a stmt from which the pattern search begins, 2440 i.e. the mult stmt. 2441 2442 Output: 2443 2444 * TYPE_IN: The type of the input arguments to the pattern. 2445 2446 * TYPE_OUT: The type of the output of this pattern. 2447 2448 * Return value: A new stmt that will be used to replace 2449 the multiplication. */ 2450 2451 static gimple * 2452 vect_recog_mult_pattern (vec<gimple *> *stmts, 2453 tree *type_in, tree *type_out) 2454 { 2455 gimple *last_stmt = stmts->pop (); 2456 tree oprnd0, oprnd1, vectype, itype; 2457 gimple *pattern_stmt; 2458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 2459 2460 if (!is_gimple_assign (last_stmt)) 2461 return NULL; 2462 2463 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) 2464 return NULL; 2465 2466 oprnd0 = gimple_assign_rhs1 (last_stmt); 2467 oprnd1 = gimple_assign_rhs2 (last_stmt); 2468 itype = TREE_TYPE (oprnd0); 2469 2470 if (TREE_CODE (oprnd0) != SSA_NAME 2471 || TREE_CODE (oprnd1) != INTEGER_CST 2472 || !INTEGRAL_TYPE_P (itype) 2473 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))) 2474 return NULL; 2475 2476 vectype = get_vectype_for_scalar_type (itype); 2477 if (vectype == NULL_TREE) 2478 return NULL; 2479 2480 /* If the target can handle vectorized multiplication natively, 2481 don't attempt to optimize this. */ 2482 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default); 2483 if (mul_optab != unknown_optab) 2484 { 2485 machine_mode vec_mode = TYPE_MODE (vectype); 2486 int icode = (int) optab_handler (mul_optab, vec_mode); 2487 if (icode != CODE_FOR_nothing) 2488 return NULL; 2489 } 2490 2491 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo); 2492 if (!pattern_stmt) 2493 return NULL; 2494 2495 /* Pattern detected. */ 2496 if (dump_enabled_p ()) 2497 dump_printf_loc (MSG_NOTE, vect_location, 2498 "vect_recog_mult_pattern: detected:\n"); 2499 2500 if (dump_enabled_p ()) 2501 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, 2502 pattern_stmt,0); 2503 2504 stmts->safe_push (last_stmt); 2505 *type_in = vectype; 2506 *type_out = vectype; 2507 2508 return pattern_stmt; 2509 } 2510 2511 /* Detect a signed division by a constant that wouldn't be 2512 otherwise vectorized: 2513 2514 type a_t, b_t; 2515 2516 S1 a_t = b_t / N; 2517 2518 where type 'type' is an integral type and N is a constant. 2519 2520 Similarly handle modulo by a constant: 2521 2522 S4 a_t = b_t % N; 2523 2524 Input/Output: 2525 2526 * STMTS: Contains a stmt from which the pattern search begins, 2527 i.e. the division stmt. S1 is replaced by if N is a power 2528 of two constant and type is signed: 2529 S3 y_t = b_t < 0 ? N - 1 : 0; 2530 S2 x_t = b_t + y_t; 2531 S1' a_t = x_t >> log2 (N); 2532 2533 S4 is replaced if N is a power of two constant and 2534 type is signed by (where *_T temporaries have unsigned type): 2535 S9 y_T = b_t < 0 ? -1U : 0U; 2536 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N)); 2537 S7 z_t = (type) z_T; 2538 S6 w_t = b_t + z_t; 2539 S5 x_t = w_t & (N - 1); 2540 S4' a_t = x_t - z_t; 2541 2542 Output: 2543 2544 * TYPE_IN: The type of the input arguments to the pattern. 2545 2546 * TYPE_OUT: The type of the output of this pattern. 2547 2548 * Return value: A new stmt that will be used to replace the division 2549 S1 or modulo S4 stmt. */ 2550 2551 static gimple * 2552 vect_recog_divmod_pattern (vec<gimple *> *stmts, 2553 tree *type_in, tree *type_out) 2554 { 2555 gimple *last_stmt = stmts->pop (); 2556 tree oprnd0, oprnd1, vectype, itype, cond; 2557 gimple *pattern_stmt, *def_stmt; 2558 enum tree_code rhs_code; 2559 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 2560 vec_info *vinfo = stmt_vinfo->vinfo; 2561 optab optab; 2562 tree q; 2563 int dummy_int, prec; 2564 stmt_vec_info def_stmt_vinfo; 2565 2566 if (!is_gimple_assign (last_stmt)) 2567 return NULL; 2568 2569 rhs_code = gimple_assign_rhs_code (last_stmt); 2570 switch (rhs_code) 2571 { 2572 case TRUNC_DIV_EXPR: 2573 case TRUNC_MOD_EXPR: 2574 break; 2575 default: 2576 return NULL; 2577 } 2578 2579 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 2580 return NULL; 2581 2582 oprnd0 = gimple_assign_rhs1 (last_stmt); 2583 oprnd1 = gimple_assign_rhs2 (last_stmt); 2584 itype = TREE_TYPE (oprnd0); 2585 if (TREE_CODE (oprnd0) != SSA_NAME 2586 || TREE_CODE (oprnd1) != INTEGER_CST 2587 || TREE_CODE (itype) != INTEGER_TYPE 2588 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))) 2589 return NULL; 2590 2591 vectype = get_vectype_for_scalar_type (itype); 2592 if (vectype == NULL_TREE) 2593 return NULL; 2594 2595 /* If the target can handle vectorized division or modulo natively, 2596 don't attempt to optimize this. */ 2597 optab = optab_for_tree_code (rhs_code, vectype, optab_default); 2598 if (optab != unknown_optab) 2599 { 2600 machine_mode vec_mode = TYPE_MODE (vectype); 2601 int icode = (int) optab_handler (optab, vec_mode); 2602 if (icode != CODE_FOR_nothing) 2603 return NULL; 2604 } 2605 2606 prec = TYPE_PRECISION (itype); 2607 if (integer_pow2p (oprnd1)) 2608 { 2609 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1) 2610 return NULL; 2611 2612 /* Pattern detected. */ 2613 if (dump_enabled_p ()) 2614 dump_printf_loc (MSG_NOTE, vect_location, 2615 "vect_recog_divmod_pattern: detected:\n"); 2616 2617 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, 2618 build_int_cst (itype, 0)); 2619 if (rhs_code == TRUNC_DIV_EXPR) 2620 { 2621 tree var = vect_recog_temp_ssa_var (itype, NULL); 2622 tree shift; 2623 def_stmt 2624 = gimple_build_assign (var, COND_EXPR, cond, 2625 fold_build2 (MINUS_EXPR, itype, oprnd1, 2626 build_int_cst (itype, 1)), 2627 build_int_cst (itype, 0)); 2628 new_pattern_def_seq (stmt_vinfo, def_stmt); 2629 var = vect_recog_temp_ssa_var (itype, NULL); 2630 def_stmt 2631 = gimple_build_assign (var, PLUS_EXPR, oprnd0, 2632 gimple_assign_lhs (def_stmt)); 2633 append_pattern_def_seq (stmt_vinfo, def_stmt); 2634 2635 shift = build_int_cst (itype, tree_log2 (oprnd1)); 2636 pattern_stmt 2637 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2638 RSHIFT_EXPR, var, shift); 2639 } 2640 else 2641 { 2642 tree signmask; 2643 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 2644 if (compare_tree_int (oprnd1, 2) == 0) 2645 { 2646 signmask = vect_recog_temp_ssa_var (itype, NULL); 2647 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond, 2648 build_int_cst (itype, 1), 2649 build_int_cst (itype, 0)); 2650 append_pattern_def_seq (stmt_vinfo, def_stmt); 2651 } 2652 else 2653 { 2654 tree utype 2655 = build_nonstandard_integer_type (prec, 1); 2656 tree vecutype = get_vectype_for_scalar_type (utype); 2657 tree shift 2658 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype)) 2659 - tree_log2 (oprnd1)); 2660 tree var = vect_recog_temp_ssa_var (utype, NULL); 2661 2662 def_stmt = gimple_build_assign (var, COND_EXPR, cond, 2663 build_int_cst (utype, -1), 2664 build_int_cst (utype, 0)); 2665 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo); 2666 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 2667 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; 2668 append_pattern_def_seq (stmt_vinfo, def_stmt); 2669 var = vect_recog_temp_ssa_var (utype, NULL); 2670 def_stmt = gimple_build_assign (var, RSHIFT_EXPR, 2671 gimple_assign_lhs (def_stmt), 2672 shift); 2673 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo); 2674 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 2675 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; 2676 append_pattern_def_seq (stmt_vinfo, def_stmt); 2677 signmask = vect_recog_temp_ssa_var (itype, NULL); 2678 def_stmt 2679 = gimple_build_assign (signmask, NOP_EXPR, var); 2680 append_pattern_def_seq (stmt_vinfo, def_stmt); 2681 } 2682 def_stmt 2683 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2684 PLUS_EXPR, oprnd0, signmask); 2685 append_pattern_def_seq (stmt_vinfo, def_stmt); 2686 def_stmt 2687 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2688 BIT_AND_EXPR, gimple_assign_lhs (def_stmt), 2689 fold_build2 (MINUS_EXPR, itype, oprnd1, 2690 build_int_cst (itype, 1))); 2691 append_pattern_def_seq (stmt_vinfo, def_stmt); 2692 2693 pattern_stmt 2694 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2695 MINUS_EXPR, gimple_assign_lhs (def_stmt), 2696 signmask); 2697 } 2698 2699 if (dump_enabled_p ()) 2700 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 2701 0); 2702 2703 stmts->safe_push (last_stmt); 2704 2705 *type_in = vectype; 2706 *type_out = vectype; 2707 return pattern_stmt; 2708 } 2709 2710 if (prec > HOST_BITS_PER_WIDE_INT 2711 || integer_zerop (oprnd1)) 2712 return NULL; 2713 2714 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype))) 2715 return NULL; 2716 2717 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 2718 2719 if (TYPE_UNSIGNED (itype)) 2720 { 2721 unsigned HOST_WIDE_INT mh, ml; 2722 int pre_shift, post_shift; 2723 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1) 2724 & GET_MODE_MASK (TYPE_MODE (itype))); 2725 tree t1, t2, t3, t4; 2726 2727 if (d >= (HOST_WIDE_INT_1U << (prec - 1))) 2728 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */ 2729 return NULL; 2730 2731 /* Find a suitable multiplier and right shift count 2732 instead of multiplying with D. */ 2733 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); 2734 2735 /* If the suggested multiplier is more than SIZE bits, we can do better 2736 for even divisors, using an initial right shift. */ 2737 if (mh != 0 && (d & 1) == 0) 2738 { 2739 pre_shift = ctz_or_zero (d); 2740 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift, 2741 &ml, &post_shift, &dummy_int); 2742 gcc_assert (!mh); 2743 } 2744 else 2745 pre_shift = 0; 2746 2747 if (mh != 0) 2748 { 2749 if (post_shift - 1 >= prec) 2750 return NULL; 2751 2752 /* t1 = oprnd0 h* ml; 2753 t2 = oprnd0 - t1; 2754 t3 = t2 >> 1; 2755 t4 = t1 + t3; 2756 q = t4 >> (post_shift - 1); */ 2757 t1 = vect_recog_temp_ssa_var (itype, NULL); 2758 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0, 2759 build_int_cst (itype, ml)); 2760 append_pattern_def_seq (stmt_vinfo, def_stmt); 2761 2762 t2 = vect_recog_temp_ssa_var (itype, NULL); 2763 def_stmt 2764 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1); 2765 append_pattern_def_seq (stmt_vinfo, def_stmt); 2766 2767 t3 = vect_recog_temp_ssa_var (itype, NULL); 2768 def_stmt 2769 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node); 2770 append_pattern_def_seq (stmt_vinfo, def_stmt); 2771 2772 t4 = vect_recog_temp_ssa_var (itype, NULL); 2773 def_stmt 2774 = gimple_build_assign (t4, PLUS_EXPR, t1, t3); 2775 2776 if (post_shift != 1) 2777 { 2778 append_pattern_def_seq (stmt_vinfo, def_stmt); 2779 2780 q = vect_recog_temp_ssa_var (itype, NULL); 2781 pattern_stmt 2782 = gimple_build_assign (q, RSHIFT_EXPR, t4, 2783 build_int_cst (itype, post_shift - 1)); 2784 } 2785 else 2786 { 2787 q = t4; 2788 pattern_stmt = def_stmt; 2789 } 2790 } 2791 else 2792 { 2793 if (pre_shift >= prec || post_shift >= prec) 2794 return NULL; 2795 2796 /* t1 = oprnd0 >> pre_shift; 2797 t2 = t1 h* ml; 2798 q = t2 >> post_shift; */ 2799 if (pre_shift) 2800 { 2801 t1 = vect_recog_temp_ssa_var (itype, NULL); 2802 def_stmt 2803 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0, 2804 build_int_cst (NULL, pre_shift)); 2805 append_pattern_def_seq (stmt_vinfo, def_stmt); 2806 } 2807 else 2808 t1 = oprnd0; 2809 2810 t2 = vect_recog_temp_ssa_var (itype, NULL); 2811 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1, 2812 build_int_cst (itype, ml)); 2813 2814 if (post_shift) 2815 { 2816 append_pattern_def_seq (stmt_vinfo, def_stmt); 2817 2818 q = vect_recog_temp_ssa_var (itype, NULL); 2819 def_stmt 2820 = gimple_build_assign (q, RSHIFT_EXPR, t2, 2821 build_int_cst (itype, post_shift)); 2822 } 2823 else 2824 q = t2; 2825 2826 pattern_stmt = def_stmt; 2827 } 2828 } 2829 else 2830 { 2831 unsigned HOST_WIDE_INT ml; 2832 int post_shift; 2833 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1); 2834 unsigned HOST_WIDE_INT abs_d; 2835 bool add = false; 2836 tree t1, t2, t3, t4; 2837 2838 /* Give up for -1. */ 2839 if (d == -1) 2840 return NULL; 2841 2842 /* Since d might be INT_MIN, we have to cast to 2843 unsigned HOST_WIDE_INT before negating to avoid 2844 undefined signed overflow. */ 2845 abs_d = (d >= 0 2846 ? (unsigned HOST_WIDE_INT) d 2847 : - (unsigned HOST_WIDE_INT) d); 2848 2849 /* n rem d = n rem -d */ 2850 if (rhs_code == TRUNC_MOD_EXPR && d < 0) 2851 { 2852 d = abs_d; 2853 oprnd1 = build_int_cst (itype, abs_d); 2854 } 2855 else if (HOST_BITS_PER_WIDE_INT >= prec 2856 && abs_d == HOST_WIDE_INT_1U << (prec - 1)) 2857 /* This case is not handled correctly below. */ 2858 return NULL; 2859 2860 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int); 2861 if (ml >= HOST_WIDE_INT_1U << (prec - 1)) 2862 { 2863 add = true; 2864 ml |= HOST_WIDE_INT_M1U << (prec - 1); 2865 } 2866 if (post_shift >= prec) 2867 return NULL; 2868 2869 /* t1 = oprnd0 h* ml; */ 2870 t1 = vect_recog_temp_ssa_var (itype, NULL); 2871 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0, 2872 build_int_cst (itype, ml)); 2873 2874 if (add) 2875 { 2876 /* t2 = t1 + oprnd0; */ 2877 append_pattern_def_seq (stmt_vinfo, def_stmt); 2878 t2 = vect_recog_temp_ssa_var (itype, NULL); 2879 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0); 2880 } 2881 else 2882 t2 = t1; 2883 2884 if (post_shift) 2885 { 2886 /* t3 = t2 >> post_shift; */ 2887 append_pattern_def_seq (stmt_vinfo, def_stmt); 2888 t3 = vect_recog_temp_ssa_var (itype, NULL); 2889 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2, 2890 build_int_cst (itype, post_shift)); 2891 } 2892 else 2893 t3 = t2; 2894 2895 wide_int oprnd0_min, oprnd0_max; 2896 int msb = 1; 2897 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE) 2898 { 2899 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype))) 2900 msb = 0; 2901 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype))) 2902 msb = -1; 2903 } 2904 2905 if (msb == 0 && d >= 0) 2906 { 2907 /* q = t3; */ 2908 q = t3; 2909 pattern_stmt = def_stmt; 2910 } 2911 else 2912 { 2913 /* t4 = oprnd0 >> (prec - 1); 2914 or if we know from VRP that oprnd0 >= 0 2915 t4 = 0; 2916 or if we know from VRP that oprnd0 < 0 2917 t4 = -1; */ 2918 append_pattern_def_seq (stmt_vinfo, def_stmt); 2919 t4 = vect_recog_temp_ssa_var (itype, NULL); 2920 if (msb != 1) 2921 def_stmt = gimple_build_assign (t4, INTEGER_CST, 2922 build_int_cst (itype, msb)); 2923 else 2924 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0, 2925 build_int_cst (itype, prec - 1)); 2926 append_pattern_def_seq (stmt_vinfo, def_stmt); 2927 2928 /* q = t3 - t4; or q = t4 - t3; */ 2929 q = vect_recog_temp_ssa_var (itype, NULL); 2930 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3, 2931 d < 0 ? t3 : t4); 2932 } 2933 } 2934 2935 if (rhs_code == TRUNC_MOD_EXPR) 2936 { 2937 tree r, t1; 2938 2939 /* We divided. Now finish by: 2940 t1 = q * oprnd1; 2941 r = oprnd0 - t1; */ 2942 append_pattern_def_seq (stmt_vinfo, pattern_stmt); 2943 2944 t1 = vect_recog_temp_ssa_var (itype, NULL); 2945 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1); 2946 append_pattern_def_seq (stmt_vinfo, def_stmt); 2947 2948 r = vect_recog_temp_ssa_var (itype, NULL); 2949 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1); 2950 } 2951 2952 /* Pattern detected. */ 2953 if (dump_enabled_p ()) 2954 { 2955 dump_printf_loc (MSG_NOTE, vect_location, 2956 "vect_recog_divmod_pattern: detected: "); 2957 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 2958 } 2959 2960 stmts->safe_push (last_stmt); 2961 2962 *type_in = vectype; 2963 *type_out = vectype; 2964 return pattern_stmt; 2965 } 2966 2967 /* Function vect_recog_mixed_size_cond_pattern 2968 2969 Try to find the following pattern: 2970 2971 type x_t, y_t; 2972 TYPE a_T, b_T, c_T; 2973 loop: 2974 S1 a_T = x_t CMP y_t ? b_T : c_T; 2975 2976 where type 'TYPE' is an integral type which has different size 2977 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider 2978 than 'type', the constants need to fit into an integer type 2979 with the same width as 'type') or results of conversion from 'type'. 2980 2981 Input: 2982 2983 * LAST_STMT: A stmt from which the pattern search begins. 2984 2985 Output: 2986 2987 * TYPE_IN: The type of the input arguments to the pattern. 2988 2989 * TYPE_OUT: The type of the output of this pattern. 2990 2991 * Return value: A new stmt that will be used to replace the pattern. 2992 Additionally a def_stmt is added. 2993 2994 a_it = x_t CMP y_t ? b_it : c_it; 2995 a_T = (TYPE) a_it; */ 2996 2997 static gimple * 2998 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in, 2999 tree *type_out) 3000 { 3001 gimple *last_stmt = (*stmts)[0]; 3002 tree cond_expr, then_clause, else_clause; 3003 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; 3004 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype; 3005 gimple *pattern_stmt, *def_stmt; 3006 vec_info *vinfo = stmt_vinfo->vinfo; 3007 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE; 3008 gimple *def_stmt0 = NULL, *def_stmt1 = NULL; 3009 bool promotion; 3010 tree comp_scalar_type; 3011 3012 if (!is_gimple_assign (last_stmt) 3013 || gimple_assign_rhs_code (last_stmt) != COND_EXPR 3014 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 3015 return NULL; 3016 3017 cond_expr = gimple_assign_rhs1 (last_stmt); 3018 then_clause = gimple_assign_rhs2 (last_stmt); 3019 else_clause = gimple_assign_rhs3 (last_stmt); 3020 3021 if (!COMPARISON_CLASS_P (cond_expr)) 3022 return NULL; 3023 3024 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); 3025 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type); 3026 if (comp_vectype == NULL_TREE) 3027 return NULL; 3028 3029 type = gimple_expr_type (last_stmt); 3030 if (types_compatible_p (type, comp_scalar_type) 3031 || ((TREE_CODE (then_clause) != INTEGER_CST 3032 || TREE_CODE (else_clause) != INTEGER_CST) 3033 && !INTEGRAL_TYPE_P (comp_scalar_type)) 3034 || !INTEGRAL_TYPE_P (type)) 3035 return NULL; 3036 3037 if ((TREE_CODE (then_clause) != INTEGER_CST 3038 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0, 3039 &def_stmt0, &promotion)) 3040 || (TREE_CODE (else_clause) != INTEGER_CST 3041 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1, 3042 &def_stmt1, &promotion))) 3043 return NULL; 3044 3045 if (orig_type0 && orig_type1 3046 && !types_compatible_p (orig_type0, orig_type1)) 3047 return NULL; 3048 3049 if (orig_type0) 3050 { 3051 if (!types_compatible_p (orig_type0, comp_scalar_type)) 3052 return NULL; 3053 then_clause = gimple_assign_rhs1 (def_stmt0); 3054 itype = orig_type0; 3055 } 3056 3057 if (orig_type1) 3058 { 3059 if (!types_compatible_p (orig_type1, comp_scalar_type)) 3060 return NULL; 3061 else_clause = gimple_assign_rhs1 (def_stmt1); 3062 itype = orig_type1; 3063 } 3064 3065 3066 HOST_WIDE_INT cmp_mode_size 3067 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype)); 3068 3069 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size) 3070 return NULL; 3071 3072 vectype = get_vectype_for_scalar_type (type); 3073 if (vectype == NULL_TREE) 3074 return NULL; 3075 3076 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr))) 3077 return NULL; 3078 3079 if (itype == NULL_TREE) 3080 itype = build_nonstandard_integer_type (cmp_mode_size, 3081 TYPE_UNSIGNED (type)); 3082 3083 if (itype == NULL_TREE 3084 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size) 3085 return NULL; 3086 3087 vecitype = get_vectype_for_scalar_type (itype); 3088 if (vecitype == NULL_TREE) 3089 return NULL; 3090 3091 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr))) 3092 return NULL; 3093 3094 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size) 3095 { 3096 if ((TREE_CODE (then_clause) == INTEGER_CST 3097 && !int_fits_type_p (then_clause, itype)) 3098 || (TREE_CODE (else_clause) == INTEGER_CST 3099 && !int_fits_type_p (else_clause, itype))) 3100 return NULL; 3101 } 3102 3103 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3104 COND_EXPR, unshare_expr (cond_expr), 3105 fold_convert (itype, then_clause), 3106 fold_convert (itype, else_clause)); 3107 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), 3108 NOP_EXPR, gimple_assign_lhs (def_stmt)); 3109 3110 new_pattern_def_seq (stmt_vinfo, def_stmt); 3111 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo); 3112 set_vinfo_for_stmt (def_stmt, def_stmt_info); 3113 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype; 3114 *type_in = vecitype; 3115 *type_out = vectype; 3116 3117 if (dump_enabled_p ()) 3118 dump_printf_loc (MSG_NOTE, vect_location, 3119 "vect_recog_mixed_size_cond_pattern: detected:\n"); 3120 3121 return pattern_stmt; 3122 } 3123 3124 3125 /* Helper function of vect_recog_bool_pattern. Called recursively, return 3126 true if bool VAR can and should be optimized that way. Assume it shouldn't 3127 in case it's a result of a comparison which can be directly vectorized into 3128 a vector comparison. Fills in STMTS with all stmts visited during the 3129 walk. */ 3130 3131 static bool 3132 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts) 3133 { 3134 gimple *def_stmt; 3135 enum vect_def_type dt; 3136 tree rhs1; 3137 enum tree_code rhs_code; 3138 3139 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt)) 3140 return false; 3141 3142 if (dt != vect_internal_def) 3143 return false; 3144 3145 if (!is_gimple_assign (def_stmt)) 3146 return false; 3147 3148 if (stmts.contains (def_stmt)) 3149 return true; 3150 3151 rhs1 = gimple_assign_rhs1 (def_stmt); 3152 rhs_code = gimple_assign_rhs_code (def_stmt); 3153 switch (rhs_code) 3154 { 3155 case SSA_NAME: 3156 if (! check_bool_pattern (rhs1, vinfo, stmts)) 3157 return false; 3158 break; 3159 3160 CASE_CONVERT: 3161 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) 3162 return false; 3163 if (! check_bool_pattern (rhs1, vinfo, stmts)) 3164 return false; 3165 break; 3166 3167 case BIT_NOT_EXPR: 3168 if (! check_bool_pattern (rhs1, vinfo, stmts)) 3169 return false; 3170 break; 3171 3172 case BIT_AND_EXPR: 3173 case BIT_IOR_EXPR: 3174 case BIT_XOR_EXPR: 3175 if (! check_bool_pattern (rhs1, vinfo, stmts) 3176 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts)) 3177 return false; 3178 break; 3179 3180 default: 3181 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) 3182 { 3183 tree vecitype, comp_vectype; 3184 3185 /* If the comparison can throw, then is_gimple_condexpr will be 3186 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */ 3187 if (stmt_could_throw_p (def_stmt)) 3188 return false; 3189 3190 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1)); 3191 if (comp_vectype == NULL_TREE) 3192 return false; 3193 3194 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1)); 3195 if (mask_type 3196 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code)) 3197 return false; 3198 3199 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE) 3200 { 3201 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 3202 tree itype 3203 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 3204 vecitype = get_vectype_for_scalar_type (itype); 3205 if (vecitype == NULL_TREE) 3206 return false; 3207 } 3208 else 3209 vecitype = comp_vectype; 3210 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code)) 3211 return false; 3212 } 3213 else 3214 return false; 3215 break; 3216 } 3217 3218 bool res = stmts.add (def_stmt); 3219 /* We can't end up recursing when just visiting SSA defs but not PHIs. */ 3220 gcc_assert (!res); 3221 3222 return true; 3223 } 3224 3225 3226 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous 3227 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs 3228 pattern sequence. */ 3229 3230 static tree 3231 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info) 3232 { 3233 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), 3234 NOP_EXPR, var); 3235 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo); 3236 set_vinfo_for_stmt (cast_stmt, patt_vinfo); 3237 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type); 3238 append_pattern_def_seq (stmt_info, cast_stmt); 3239 return gimple_assign_lhs (cast_stmt); 3240 } 3241 3242 /* Helper function of vect_recog_bool_pattern. Do the actual transformations. 3243 VAR is an SSA_NAME that should be transformed from bool to a wider integer 3244 type, OUT_TYPE is the desired final integer type of the whole pattern. 3245 STMT_INFO is the info of the pattern root and is where pattern stmts should 3246 be associated with. DEFS is a map of pattern defs. */ 3247 3248 static void 3249 adjust_bool_pattern (tree var, tree out_type, 3250 stmt_vec_info stmt_info, hash_map <tree, tree> &defs) 3251 { 3252 gimple *stmt = SSA_NAME_DEF_STMT (var); 3253 enum tree_code rhs_code, def_rhs_code; 3254 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2; 3255 location_t loc; 3256 gimple *pattern_stmt, *def_stmt; 3257 tree trueval = NULL_TREE; 3258 3259 rhs1 = gimple_assign_rhs1 (stmt); 3260 rhs2 = gimple_assign_rhs2 (stmt); 3261 rhs_code = gimple_assign_rhs_code (stmt); 3262 loc = gimple_location (stmt); 3263 switch (rhs_code) 3264 { 3265 case SSA_NAME: 3266 CASE_CONVERT: 3267 irhs1 = *defs.get (rhs1); 3268 itype = TREE_TYPE (irhs1); 3269 pattern_stmt 3270 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3271 SSA_NAME, irhs1); 3272 break; 3273 3274 case BIT_NOT_EXPR: 3275 irhs1 = *defs.get (rhs1); 3276 itype = TREE_TYPE (irhs1); 3277 pattern_stmt 3278 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3279 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1)); 3280 break; 3281 3282 case BIT_AND_EXPR: 3283 /* Try to optimize x = y & (a < b ? 1 : 0); into 3284 x = (a < b ? y : 0); 3285 3286 E.g. for: 3287 bool a_b, b_b, c_b; 3288 TYPE d_T; 3289 3290 S1 a_b = x1 CMP1 y1; 3291 S2 b_b = x2 CMP2 y2; 3292 S3 c_b = a_b & b_b; 3293 S4 d_T = (TYPE) c_b; 3294 3295 we would normally emit: 3296 3297 S1' a_T = x1 CMP1 y1 ? 1 : 0; 3298 S2' b_T = x2 CMP2 y2 ? 1 : 0; 3299 S3' c_T = a_T & b_T; 3300 S4' d_T = c_T; 3301 3302 but we can save one stmt by using the 3303 result of one of the COND_EXPRs in the other COND_EXPR and leave 3304 BIT_AND_EXPR stmt out: 3305 3306 S1' a_T = x1 CMP1 y1 ? 1 : 0; 3307 S3' c_T = x2 CMP2 y2 ? a_T : 0; 3308 S4' f_T = c_T; 3309 3310 At least when VEC_COND_EXPR is implemented using masks 3311 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it 3312 computes the comparison masks and ands it, in one case with 3313 all ones vector, in the other case with a vector register. 3314 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is 3315 often more expensive. */ 3316 def_stmt = SSA_NAME_DEF_STMT (rhs2); 3317 def_rhs_code = gimple_assign_rhs_code (def_stmt); 3318 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 3319 { 3320 irhs1 = *defs.get (rhs1); 3321 tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 3322 if (TYPE_PRECISION (TREE_TYPE (irhs1)) 3323 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) 3324 { 3325 rhs_code = def_rhs_code; 3326 rhs1 = def_rhs1; 3327 rhs2 = gimple_assign_rhs2 (def_stmt); 3328 trueval = irhs1; 3329 goto do_compare; 3330 } 3331 else 3332 irhs2 = *defs.get (rhs2); 3333 goto and_ior_xor; 3334 } 3335 def_stmt = SSA_NAME_DEF_STMT (rhs1); 3336 def_rhs_code = gimple_assign_rhs_code (def_stmt); 3337 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 3338 { 3339 irhs2 = *defs.get (rhs2); 3340 tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 3341 if (TYPE_PRECISION (TREE_TYPE (irhs2)) 3342 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) 3343 { 3344 rhs_code = def_rhs_code; 3345 rhs1 = def_rhs1; 3346 rhs2 = gimple_assign_rhs2 (def_stmt); 3347 trueval = irhs2; 3348 goto do_compare; 3349 } 3350 else 3351 irhs1 = *defs.get (rhs1); 3352 goto and_ior_xor; 3353 } 3354 /* FALLTHRU */ 3355 case BIT_IOR_EXPR: 3356 case BIT_XOR_EXPR: 3357 irhs1 = *defs.get (rhs1); 3358 irhs2 = *defs.get (rhs2); 3359 and_ior_xor: 3360 if (TYPE_PRECISION (TREE_TYPE (irhs1)) 3361 != TYPE_PRECISION (TREE_TYPE (irhs2))) 3362 { 3363 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1)); 3364 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2)); 3365 int out_prec = TYPE_PRECISION (out_type); 3366 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2)) 3367 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2, 3368 stmt_info); 3369 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2)) 3370 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1, 3371 stmt_info); 3372 else 3373 { 3374 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info); 3375 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info); 3376 } 3377 } 3378 itype = TREE_TYPE (irhs1); 3379 pattern_stmt 3380 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3381 rhs_code, irhs1, irhs2); 3382 break; 3383 3384 default: 3385 do_compare: 3386 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison); 3387 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE 3388 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)) 3389 || (TYPE_PRECISION (TREE_TYPE (rhs1)) 3390 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1))))) 3391 { 3392 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 3393 itype 3394 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 3395 } 3396 else 3397 itype = TREE_TYPE (rhs1); 3398 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2); 3399 if (trueval == NULL_TREE) 3400 trueval = build_int_cst (itype, 1); 3401 else 3402 gcc_checking_assert (useless_type_conversion_p (itype, 3403 TREE_TYPE (trueval))); 3404 pattern_stmt 3405 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3406 COND_EXPR, cond_expr, trueval, 3407 build_int_cst (itype, 0)); 3408 break; 3409 } 3410 3411 gimple_set_location (pattern_stmt, loc); 3412 /* ??? Why does vect_mark_pattern_stmts set the vector type on all 3413 pattern def seq stmts instead of just letting auto-detection do 3414 its work? */ 3415 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo); 3416 set_vinfo_for_stmt (pattern_stmt, patt_vinfo); 3417 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype); 3418 append_pattern_def_seq (stmt_info, pattern_stmt); 3419 defs.put (var, gimple_assign_lhs (pattern_stmt)); 3420 } 3421 3422 /* Comparison function to qsort a vector of gimple stmts after UID. */ 3423 3424 static int 3425 sort_after_uid (const void *p1, const void *p2) 3426 { 3427 const gimple *stmt1 = *(const gimple * const *)p1; 3428 const gimple *stmt2 = *(const gimple * const *)p2; 3429 return gimple_uid (stmt1) - gimple_uid (stmt2); 3430 } 3431 3432 /* Create pattern stmts for all stmts participating in the bool pattern 3433 specified by BOOL_STMT_SET and its root STMT with the desired type 3434 OUT_TYPE. Return the def of the pattern root. */ 3435 3436 static tree 3437 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set, 3438 tree out_type, gimple *stmt) 3439 { 3440 /* Gather original stmts in the bool pattern in their order of appearance 3441 in the IL. */ 3442 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ()); 3443 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin (); 3444 i != bool_stmt_set.end (); ++i) 3445 bool_stmts.quick_push (*i); 3446 bool_stmts.qsort (sort_after_uid); 3447 3448 /* Now process them in that order, producing pattern stmts. */ 3449 hash_map <tree, tree> defs; 3450 for (unsigned i = 0; i < bool_stmts.length (); ++i) 3451 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]), 3452 out_type, vinfo_for_stmt (stmt), defs); 3453 3454 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */ 3455 gimple *pattern_stmt 3456 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt))); 3457 return gimple_assign_lhs (pattern_stmt); 3458 } 3459 3460 /* Helper for search_type_for_mask. */ 3461 3462 static tree 3463 search_type_for_mask_1 (tree var, vec_info *vinfo, 3464 hash_map<gimple *, tree> &cache) 3465 { 3466 gimple *def_stmt; 3467 enum vect_def_type dt; 3468 tree rhs1; 3469 enum tree_code rhs_code; 3470 tree res = NULL_TREE, res2; 3471 3472 if (TREE_CODE (var) != SSA_NAME) 3473 return NULL_TREE; 3474 3475 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var))) 3476 return NULL_TREE; 3477 3478 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt)) 3479 return NULL_TREE; 3480 3481 if (dt != vect_internal_def) 3482 return NULL_TREE; 3483 3484 if (!is_gimple_assign (def_stmt)) 3485 return NULL_TREE; 3486 3487 tree *c = cache.get (def_stmt); 3488 if (c) 3489 return *c; 3490 3491 rhs_code = gimple_assign_rhs_code (def_stmt); 3492 rhs1 = gimple_assign_rhs1 (def_stmt); 3493 3494 switch (rhs_code) 3495 { 3496 case SSA_NAME: 3497 case BIT_NOT_EXPR: 3498 CASE_CONVERT: 3499 res = search_type_for_mask_1 (rhs1, vinfo, cache); 3500 break; 3501 3502 case BIT_AND_EXPR: 3503 case BIT_IOR_EXPR: 3504 case BIT_XOR_EXPR: 3505 res = search_type_for_mask_1 (rhs1, vinfo, cache); 3506 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo, 3507 cache); 3508 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2))) 3509 res = res2; 3510 break; 3511 3512 default: 3513 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) 3514 { 3515 tree comp_vectype, mask_type; 3516 3517 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) 3518 { 3519 res = search_type_for_mask_1 (rhs1, vinfo, cache); 3520 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), 3521 vinfo, cache); 3522 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2))) 3523 res = res2; 3524 break; 3525 } 3526 3527 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1)); 3528 if (comp_vectype == NULL_TREE) 3529 { 3530 res = NULL_TREE; 3531 break; 3532 } 3533 3534 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1)); 3535 if (!mask_type 3536 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code)) 3537 { 3538 res = NULL_TREE; 3539 break; 3540 } 3541 3542 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE 3543 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) 3544 { 3545 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 3546 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 3547 } 3548 else 3549 res = TREE_TYPE (rhs1); 3550 } 3551 } 3552 3553 cache.put (def_stmt, res); 3554 return res; 3555 } 3556 3557 /* Return the proper type for converting bool VAR into 3558 an integer value or NULL_TREE if no such type exists. 3559 The type is chosen so that converted value has the 3560 same number of elements as VAR's vector type. */ 3561 3562 static tree 3563 search_type_for_mask (tree var, vec_info *vinfo) 3564 { 3565 hash_map<gimple *, tree> cache; 3566 return search_type_for_mask_1 (var, vinfo, cache); 3567 } 3568 3569 /* Function vect_recog_bool_pattern 3570 3571 Try to find pattern like following: 3572 3573 bool a_b, b_b, c_b, d_b, e_b; 3574 TYPE f_T; 3575 loop: 3576 S1 a_b = x1 CMP1 y1; 3577 S2 b_b = x2 CMP2 y2; 3578 S3 c_b = a_b & b_b; 3579 S4 d_b = x3 CMP3 y3; 3580 S5 e_b = c_b | d_b; 3581 S6 f_T = (TYPE) e_b; 3582 3583 where type 'TYPE' is an integral type. Or a similar pattern 3584 ending in 3585 3586 S6 f_Y = e_b ? r_Y : s_Y; 3587 3588 as results from if-conversion of a complex condition. 3589 3590 Input: 3591 3592 * LAST_STMT: A stmt at the end from which the pattern 3593 search begins, i.e. cast of a bool to 3594 an integer type. 3595 3596 Output: 3597 3598 * TYPE_IN: The type of the input arguments to the pattern. 3599 3600 * TYPE_OUT: The type of the output of this pattern. 3601 3602 * Return value: A new stmt that will be used to replace the pattern. 3603 3604 Assuming size of TYPE is the same as size of all comparisons 3605 (otherwise some casts would be added where needed), the above 3606 sequence we create related pattern stmts: 3607 S1' a_T = x1 CMP1 y1 ? 1 : 0; 3608 S3' c_T = x2 CMP2 y2 ? a_T : 0; 3609 S4' d_T = x3 CMP3 y3 ? 1 : 0; 3610 S5' e_T = c_T | d_T; 3611 S6' f_T = e_T; 3612 3613 Instead of the above S3' we could emit: 3614 S2' b_T = x2 CMP2 y2 ? 1 : 0; 3615 S3' c_T = a_T | b_T; 3616 but the above is more efficient. */ 3617 3618 static gimple * 3619 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in, 3620 tree *type_out) 3621 { 3622 gimple *last_stmt = stmts->pop (); 3623 enum tree_code rhs_code; 3624 tree var, lhs, rhs, vectype; 3625 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 3626 stmt_vec_info new_stmt_info; 3627 vec_info *vinfo = stmt_vinfo->vinfo; 3628 gimple *pattern_stmt; 3629 3630 if (!is_gimple_assign (last_stmt)) 3631 return NULL; 3632 3633 var = gimple_assign_rhs1 (last_stmt); 3634 lhs = gimple_assign_lhs (last_stmt); 3635 3636 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var))) 3637 return NULL; 3638 3639 hash_set<gimple *> bool_stmts; 3640 3641 rhs_code = gimple_assign_rhs_code (last_stmt); 3642 if (CONVERT_EXPR_CODE_P (rhs_code)) 3643 { 3644 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3645 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1) 3646 return NULL; 3647 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs)); 3648 if (vectype == NULL_TREE) 3649 return NULL; 3650 3651 if (check_bool_pattern (var, vinfo, bool_stmts)) 3652 { 3653 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt); 3654 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 3655 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 3656 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); 3657 else 3658 pattern_stmt 3659 = gimple_build_assign (lhs, NOP_EXPR, rhs); 3660 } 3661 else 3662 { 3663 tree type = search_type_for_mask (var, vinfo); 3664 tree cst0, cst1, tmp; 3665 3666 if (!type) 3667 return NULL; 3668 3669 /* We may directly use cond with narrowed type to avoid 3670 multiple cond exprs with following result packing and 3671 perform single cond with packed mask instead. In case 3672 of widening we better make cond first and then extract 3673 results. */ 3674 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs))) 3675 type = TREE_TYPE (lhs); 3676 3677 cst0 = build_int_cst (type, 0); 3678 cst1 = build_int_cst (type, 1); 3679 tmp = vect_recog_temp_ssa_var (type, NULL); 3680 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0); 3681 3682 if (!useless_type_conversion_p (type, TREE_TYPE (lhs))) 3683 { 3684 tree new_vectype = get_vectype_for_scalar_type (type); 3685 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); 3686 set_vinfo_for_stmt (pattern_stmt, new_stmt_info); 3687 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype; 3688 new_pattern_def_seq (stmt_vinfo, pattern_stmt); 3689 3690 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 3691 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp); 3692 } 3693 } 3694 3695 *type_out = vectype; 3696 *type_in = vectype; 3697 stmts->safe_push (last_stmt); 3698 if (dump_enabled_p ()) 3699 dump_printf_loc (MSG_NOTE, vect_location, 3700 "vect_recog_bool_pattern: detected:\n"); 3701 3702 return pattern_stmt; 3703 } 3704 else if (rhs_code == COND_EXPR 3705 && TREE_CODE (var) == SSA_NAME) 3706 { 3707 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs)); 3708 if (vectype == NULL_TREE) 3709 return NULL; 3710 3711 /* Build a scalar type for the boolean result that when 3712 vectorized matches the vector type of the result in 3713 size and number of elements. */ 3714 unsigned prec 3715 = wi::udiv_trunc (TYPE_SIZE (vectype), 3716 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi (); 3717 tree type 3718 = build_nonstandard_integer_type (prec, 3719 TYPE_UNSIGNED (TREE_TYPE (var))); 3720 if (get_vectype_for_scalar_type (type) == NULL_TREE) 3721 return NULL; 3722 3723 if (!check_bool_pattern (var, vinfo, bool_stmts)) 3724 return NULL; 3725 3726 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt); 3727 3728 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 3729 pattern_stmt 3730 = gimple_build_assign (lhs, COND_EXPR, 3731 build2 (NE_EXPR, boolean_type_node, 3732 rhs, build_int_cst (type, 0)), 3733 gimple_assign_rhs2 (last_stmt), 3734 gimple_assign_rhs3 (last_stmt)); 3735 *type_out = vectype; 3736 *type_in = vectype; 3737 stmts->safe_push (last_stmt); 3738 if (dump_enabled_p ()) 3739 dump_printf_loc (MSG_NOTE, vect_location, 3740 "vect_recog_bool_pattern: detected:\n"); 3741 3742 return pattern_stmt; 3743 } 3744 else if (rhs_code == SSA_NAME 3745 && STMT_VINFO_DATA_REF (stmt_vinfo)) 3746 { 3747 stmt_vec_info pattern_stmt_info; 3748 vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 3749 gcc_assert (vectype != NULL_TREE); 3750 if (!VECTOR_MODE_P (TYPE_MODE (vectype))) 3751 return NULL; 3752 3753 if (check_bool_pattern (var, vinfo, bool_stmts)) 3754 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt); 3755 else 3756 { 3757 tree type = search_type_for_mask (var, vinfo); 3758 tree cst0, cst1, new_vectype; 3759 3760 if (!type) 3761 return NULL; 3762 3763 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype))) 3764 type = TREE_TYPE (vectype); 3765 3766 cst0 = build_int_cst (type, 0); 3767 cst1 = build_int_cst (type, 1); 3768 new_vectype = get_vectype_for_scalar_type (type); 3769 3770 rhs = vect_recog_temp_ssa_var (type, NULL); 3771 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0); 3772 3773 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); 3774 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 3775 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype; 3776 append_pattern_def_seq (stmt_vinfo, pattern_stmt); 3777 } 3778 3779 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); 3780 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 3781 { 3782 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 3783 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs); 3784 append_pattern_def_seq (stmt_vinfo, cast_stmt); 3785 rhs = rhs2; 3786 } 3787 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); 3788 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); 3789 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 3790 STMT_VINFO_DATA_REF (pattern_stmt_info) 3791 = STMT_VINFO_DATA_REF (stmt_vinfo); 3792 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) 3793 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); 3794 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo); 3795 STMT_VINFO_DR_OFFSET (pattern_stmt_info) 3796 = STMT_VINFO_DR_OFFSET (stmt_vinfo); 3797 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo); 3798 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) 3799 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); 3800 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt; 3801 *type_out = vectype; 3802 *type_in = vectype; 3803 stmts->safe_push (last_stmt); 3804 if (dump_enabled_p ()) 3805 dump_printf_loc (MSG_NOTE, vect_location, 3806 "vect_recog_bool_pattern: detected:\n"); 3807 return pattern_stmt; 3808 } 3809 else 3810 return NULL; 3811 } 3812 3813 3814 /* A helper for vect_recog_mask_conversion_pattern. Build 3815 conversion of MASK to a type suitable for masking VECTYPE. 3816 Built statement gets required vectype and is appended to 3817 a pattern sequence of STMT_VINFO. 3818 3819 Return converted mask. */ 3820 3821 static tree 3822 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo, 3823 vec_info *vinfo) 3824 { 3825 gimple *stmt; 3826 tree masktype, tmp; 3827 stmt_vec_info new_stmt_info; 3828 3829 masktype = build_same_sized_truth_vector_type (vectype); 3830 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL); 3831 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask); 3832 new_stmt_info = new_stmt_vec_info (stmt, vinfo); 3833 set_vinfo_for_stmt (stmt, new_stmt_info); 3834 STMT_VINFO_VECTYPE (new_stmt_info) = masktype; 3835 append_pattern_def_seq (stmt_vinfo, stmt); 3836 3837 return tmp; 3838 } 3839 3840 3841 /* Function vect_recog_mask_conversion_pattern 3842 3843 Try to find statements which require boolean type 3844 converison. Additional conversion statements are 3845 added to handle such cases. For example: 3846 3847 bool m_1, m_2, m_3; 3848 int i_4, i_5; 3849 double d_6, d_7; 3850 char c_1, c_2, c_3; 3851 3852 S1 m_1 = i_4 > i_5; 3853 S2 m_2 = d_6 < d_7; 3854 S3 m_3 = m_1 & m_2; 3855 S4 c_1 = m_3 ? c_2 : c_3; 3856 3857 Will be transformed into: 3858 3859 S1 m_1 = i_4 > i_5; 3860 S2 m_2 = d_6 < d_7; 3861 S3'' m_2' = (_Bool[bitsize=32])m_2 3862 S3' m_3' = m_1 & m_2'; 3863 S4'' m_3'' = (_Bool[bitsize=8])m_3' 3864 S4' c_1' = m_3'' ? c_2 : c_3; */ 3865 3866 static gimple * 3867 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in, 3868 tree *type_out) 3869 { 3870 gimple *last_stmt = stmts->pop (); 3871 enum tree_code rhs_code; 3872 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type; 3873 tree vectype1, vectype2; 3874 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 3875 stmt_vec_info pattern_stmt_info; 3876 vec_info *vinfo = stmt_vinfo->vinfo; 3877 gimple *pattern_stmt; 3878 3879 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */ 3880 if (is_gimple_call (last_stmt) 3881 && gimple_call_internal_p (last_stmt) 3882 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE 3883 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD)) 3884 { 3885 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD); 3886 3887 if (load) 3888 { 3889 lhs = gimple_call_lhs (last_stmt); 3890 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs)); 3891 } 3892 else 3893 { 3894 rhs2 = gimple_call_arg (last_stmt, 3); 3895 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2)); 3896 } 3897 3898 rhs1 = gimple_call_arg (last_stmt, 2); 3899 rhs1_type = search_type_for_mask (rhs1, vinfo); 3900 if (!rhs1_type) 3901 return NULL; 3902 vectype2 = get_mask_type_for_scalar_type (rhs1_type); 3903 3904 if (!vectype1 || !vectype2 3905 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2)) 3906 return NULL; 3907 3908 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo); 3909 3910 if (load) 3911 { 3912 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 3913 pattern_stmt 3914 = gimple_build_call_internal (IFN_MASK_LOAD, 3, 3915 gimple_call_arg (last_stmt, 0), 3916 gimple_call_arg (last_stmt, 1), 3917 tmp); 3918 gimple_call_set_lhs (pattern_stmt, lhs); 3919 } 3920 else 3921 pattern_stmt 3922 = gimple_build_call_internal (IFN_MASK_STORE, 4, 3923 gimple_call_arg (last_stmt, 0), 3924 gimple_call_arg (last_stmt, 1), 3925 tmp, 3926 gimple_call_arg (last_stmt, 3)); 3927 3928 3929 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); 3930 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 3931 STMT_VINFO_DATA_REF (pattern_stmt_info) 3932 = STMT_VINFO_DATA_REF (stmt_vinfo); 3933 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) 3934 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); 3935 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo); 3936 STMT_VINFO_DR_OFFSET (pattern_stmt_info) 3937 = STMT_VINFO_DR_OFFSET (stmt_vinfo); 3938 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo); 3939 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) 3940 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); 3941 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt; 3942 3943 *type_out = vectype1; 3944 *type_in = vectype1; 3945 stmts->safe_push (last_stmt); 3946 if (dump_enabled_p ()) 3947 dump_printf_loc (MSG_NOTE, vect_location, 3948 "vect_recog_mask_conversion_pattern: detected:\n"); 3949 3950 return pattern_stmt; 3951 } 3952 3953 if (!is_gimple_assign (last_stmt)) 3954 return NULL; 3955 3956 lhs = gimple_assign_lhs (last_stmt); 3957 rhs1 = gimple_assign_rhs1 (last_stmt); 3958 rhs_code = gimple_assign_rhs_code (last_stmt); 3959 3960 /* Check for cond expression requiring mask conversion. */ 3961 if (rhs_code == COND_EXPR) 3962 { 3963 /* vect_recog_mixed_size_cond_pattern could apply. 3964 Do nothing then. */ 3965 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 3966 return NULL; 3967 3968 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs)); 3969 3970 if (TREE_CODE (rhs1) == SSA_NAME) 3971 { 3972 rhs1_type = search_type_for_mask (rhs1, vinfo); 3973 if (!rhs1_type) 3974 return NULL; 3975 } 3976 else if (COMPARISON_CLASS_P (rhs1)) 3977 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0)); 3978 else 3979 return NULL; 3980 3981 vectype2 = get_mask_type_for_scalar_type (rhs1_type); 3982 3983 if (!vectype1 || !vectype2 3984 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2)) 3985 return NULL; 3986 3987 /* If rhs1 is a comparison we need to move it into a 3988 separate statement. */ 3989 if (TREE_CODE (rhs1) != SSA_NAME) 3990 { 3991 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); 3992 pattern_stmt = gimple_build_assign (tmp, rhs1); 3993 rhs1 = tmp; 3994 3995 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); 3996 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 3997 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2; 3998 append_pattern_def_seq (stmt_vinfo, pattern_stmt); 3999 } 4000 4001 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo); 4002 4003 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4004 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp, 4005 gimple_assign_rhs2 (last_stmt), 4006 gimple_assign_rhs3 (last_stmt)); 4007 4008 *type_out = vectype1; 4009 *type_in = vectype1; 4010 stmts->safe_push (last_stmt); 4011 if (dump_enabled_p ()) 4012 dump_printf_loc (MSG_NOTE, vect_location, 4013 "vect_recog_mask_conversion_pattern: detected:\n"); 4014 4015 return pattern_stmt; 4016 } 4017 4018 /* Now check for binary boolean operations requiring conversion for 4019 one of operands. */ 4020 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs))) 4021 return NULL; 4022 4023 if (rhs_code != BIT_IOR_EXPR 4024 && rhs_code != BIT_XOR_EXPR 4025 && rhs_code != BIT_AND_EXPR 4026 && TREE_CODE_CLASS (rhs_code) != tcc_comparison) 4027 return NULL; 4028 4029 rhs2 = gimple_assign_rhs2 (last_stmt); 4030 4031 rhs1_type = search_type_for_mask (rhs1, vinfo); 4032 rhs2_type = search_type_for_mask (rhs2, vinfo); 4033 4034 if (!rhs1_type || !rhs2_type 4035 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type)) 4036 return NULL; 4037 4038 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type)) 4039 { 4040 vectype1 = get_mask_type_for_scalar_type (rhs1_type); 4041 if (!vectype1) 4042 return NULL; 4043 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo); 4044 } 4045 else 4046 { 4047 vectype1 = get_mask_type_for_scalar_type (rhs2_type); 4048 if (!vectype1) 4049 return NULL; 4050 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo); 4051 } 4052 4053 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 4054 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2); 4055 4056 *type_out = vectype1; 4057 *type_in = vectype1; 4058 stmts->safe_push (last_stmt); 4059 if (dump_enabled_p ()) 4060 dump_printf_loc (MSG_NOTE, vect_location, 4061 "vect_recog_mask_conversion_pattern: detected:\n"); 4062 4063 return pattern_stmt; 4064 } 4065 4066 4067 /* Mark statements that are involved in a pattern. */ 4068 4069 static inline void 4070 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt, 4071 tree pattern_vectype) 4072 { 4073 stmt_vec_info pattern_stmt_info, def_stmt_info; 4074 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); 4075 vec_info *vinfo = orig_stmt_info->vinfo; 4076 gimple *def_stmt; 4077 4078 pattern_stmt_info = vinfo_for_stmt (pattern_stmt); 4079 if (pattern_stmt_info == NULL) 4080 { 4081 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo); 4082 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 4083 } 4084 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); 4085 4086 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; 4087 STMT_VINFO_DEF_TYPE (pattern_stmt_info) 4088 = STMT_VINFO_DEF_TYPE (orig_stmt_info); 4089 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; 4090 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; 4091 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; 4092 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info) 4093 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info); 4094 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)) 4095 { 4096 gimple_stmt_iterator si; 4097 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)); 4098 !gsi_end_p (si); gsi_next (&si)) 4099 { 4100 def_stmt = gsi_stmt (si); 4101 def_stmt_info = vinfo_for_stmt (def_stmt); 4102 if (def_stmt_info == NULL) 4103 { 4104 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo); 4105 set_vinfo_for_stmt (def_stmt, def_stmt_info); 4106 } 4107 gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); 4108 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; 4109 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def; 4110 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) 4111 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; 4112 } 4113 } 4114 } 4115 4116 /* Function vect_pattern_recog_1 4117 4118 Input: 4119 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain 4120 computation pattern. 4121 STMT: A stmt from which the pattern search should start. 4122 4123 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an 4124 expression that computes the same functionality and can be used to 4125 replace the sequence of stmts that are involved in the pattern. 4126 4127 Output: 4128 This function checks if the expression returned by PATTERN_RECOG_FUNC is 4129 supported in vector form by the target. We use 'TYPE_IN' to obtain the 4130 relevant vector type. If 'TYPE_IN' is already a vector type, then this 4131 indicates that target support had already been checked by PATTERN_RECOG_FUNC. 4132 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits 4133 to the available target pattern. 4134 4135 This function also does some bookkeeping, as explained in the documentation 4136 for vect_recog_pattern. */ 4137 4138 static bool 4139 vect_pattern_recog_1 (vect_recog_func *recog_func, 4140 gimple_stmt_iterator si, 4141 vec<gimple *> *stmts_to_replace) 4142 { 4143 gimple *stmt = gsi_stmt (si), *pattern_stmt; 4144 stmt_vec_info stmt_info; 4145 loop_vec_info loop_vinfo; 4146 tree pattern_vectype; 4147 tree type_in, type_out; 4148 enum tree_code code; 4149 int i; 4150 gimple *next; 4151 4152 stmts_to_replace->truncate (0); 4153 stmts_to_replace->quick_push (stmt); 4154 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out); 4155 if (!pattern_stmt) 4156 return false; 4157 4158 stmt = stmts_to_replace->last (); 4159 stmt_info = vinfo_for_stmt (stmt); 4160 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4161 4162 if (VECTOR_BOOLEAN_TYPE_P (type_in) 4163 || VECTOR_MODE_P (TYPE_MODE (type_in))) 4164 { 4165 /* No need to check target support (already checked by the pattern 4166 recognition function). */ 4167 pattern_vectype = type_out ? type_out : type_in; 4168 } 4169 else 4170 { 4171 machine_mode vec_mode; 4172 enum insn_code icode; 4173 optab optab; 4174 4175 /* Check target support */ 4176 type_in = get_vectype_for_scalar_type (type_in); 4177 if (!type_in) 4178 return false; 4179 if (type_out) 4180 type_out = get_vectype_for_scalar_type (type_out); 4181 else 4182 type_out = type_in; 4183 if (!type_out) 4184 return false; 4185 pattern_vectype = type_out; 4186 4187 if (is_gimple_assign (pattern_stmt)) 4188 code = gimple_assign_rhs_code (pattern_stmt); 4189 else 4190 { 4191 gcc_assert (is_gimple_call (pattern_stmt)); 4192 code = CALL_EXPR; 4193 } 4194 4195 optab = optab_for_tree_code (code, type_in, optab_default); 4196 vec_mode = TYPE_MODE (type_in); 4197 if (!optab 4198 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing 4199 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) 4200 return false; 4201 } 4202 4203 /* Found a vectorizable pattern. */ 4204 if (dump_enabled_p ()) 4205 { 4206 dump_printf_loc (MSG_NOTE, vect_location, 4207 "%s pattern recognized: ", recog_func->name); 4208 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 4209 } 4210 4211 /* Mark the stmts that are involved in the pattern. */ 4212 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype); 4213 4214 /* Patterns cannot be vectorized using SLP, because they change the order of 4215 computation. */ 4216 if (loop_vinfo) 4217 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) 4218 if (next == stmt) 4219 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i); 4220 4221 /* It is possible that additional pattern stmts are created and inserted in 4222 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the 4223 relevant statements. */ 4224 for (i = 0; stmts_to_replace->iterate (i, &stmt) 4225 && (unsigned) i < (stmts_to_replace->length () - 1); 4226 i++) 4227 { 4228 stmt_info = vinfo_for_stmt (stmt); 4229 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 4230 if (dump_enabled_p ()) 4231 { 4232 dump_printf_loc (MSG_NOTE, vect_location, 4233 "additional pattern stmt: "); 4234 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 4235 } 4236 4237 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); 4238 } 4239 4240 return true; 4241 } 4242 4243 4244 /* Function vect_pattern_recog 4245 4246 Input: 4247 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for 4248 computation idioms. 4249 4250 Output - for each computation idiom that is detected we create a new stmt 4251 that provides the same functionality and that can be vectorized. We 4252 also record some information in the struct_stmt_info of the relevant 4253 stmts, as explained below: 4254 4255 At the entry to this function we have the following stmts, with the 4256 following initial value in the STMT_VINFO fields: 4257 4258 stmt in_pattern_p related_stmt vec_stmt 4259 S1: a_i = .... - - - 4260 S2: a_2 = ..use(a_i).. - - - 4261 S3: a_1 = ..use(a_2).. - - - 4262 S4: a_0 = ..use(a_1).. - - - 4263 S5: ... = ..use(a_0).. - - - 4264 4265 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be 4266 represented by a single stmt. We then: 4267 - create a new stmt S6 equivalent to the pattern (the stmt is not 4268 inserted into the code) 4269 - fill in the STMT_VINFO fields as follows: 4270 4271 in_pattern_p related_stmt vec_stmt 4272 S1: a_i = .... - - - 4273 S2: a_2 = ..use(a_i).. - - - 4274 S3: a_1 = ..use(a_2).. - - - 4275 S4: a_0 = ..use(a_1).. true S6 - 4276 '---> S6: a_new = .... - S4 - 4277 S5: ... = ..use(a_0).. - - - 4278 4279 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point 4280 to each other through the RELATED_STMT field). 4281 4282 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead 4283 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will 4284 remain irrelevant unless used by stmts other than S4. 4285 4286 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} 4287 (because they are marked as irrelevant). It will vectorize S6, and record 4288 a pointer to the new vector stmt VS6 from S6 (as usual). 4289 S4 will be skipped, and S5 will be vectorized as usual: 4290 4291 in_pattern_p related_stmt vec_stmt 4292 S1: a_i = .... - - - 4293 S2: a_2 = ..use(a_i).. - - - 4294 S3: a_1 = ..use(a_2).. - - - 4295 > VS6: va_new = .... - - - 4296 S4: a_0 = ..use(a_1).. true S6 VS6 4297 '---> S6: a_new = .... - S4 VS6 4298 > VS5: ... = ..vuse(va_new).. - - - 4299 S5: ... = ..use(a_0).. - - - 4300 4301 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used 4302 elsewhere), and we'll end up with: 4303 4304 VS6: va_new = .... 4305 VS5: ... = ..vuse(va_new).. 4306 4307 In case of more than one pattern statements, e.g., widen-mult with 4308 intermediate type: 4309 4310 S1 a_t = ; 4311 S2 a_T = (TYPE) a_t; 4312 '--> S3: a_it = (interm_type) a_t; 4313 S4 prod_T = a_T * CONST; 4314 '--> S5: prod_T' = a_it w* CONST; 4315 4316 there may be other users of a_T outside the pattern. In that case S2 will 4317 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed 4318 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will 4319 be recorded in S3. */ 4320 4321 void 4322 vect_pattern_recog (vec_info *vinfo) 4323 { 4324 struct loop *loop; 4325 basic_block *bbs; 4326 unsigned int nbbs; 4327 gimple_stmt_iterator si; 4328 unsigned int i, j; 4329 auto_vec<gimple *, 1> stmts_to_replace; 4330 gimple *stmt; 4331 4332 if (dump_enabled_p ()) 4333 dump_printf_loc (MSG_NOTE, vect_location, 4334 "=== vect_pattern_recog ===\n"); 4335 4336 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) 4337 { 4338 loop = LOOP_VINFO_LOOP (loop_vinfo); 4339 bbs = LOOP_VINFO_BBS (loop_vinfo); 4340 nbbs = loop->num_nodes; 4341 4342 /* Scan through the loop stmts, applying the pattern recognition 4343 functions starting at each stmt visited: */ 4344 for (i = 0; i < nbbs; i++) 4345 { 4346 basic_block bb = bbs[i]; 4347 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 4348 { 4349 /* Scan over all generic vect_recog_xxx_pattern functions. */ 4350 for (j = 0; j < NUM_PATTERNS; j++) 4351 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si, 4352 &stmts_to_replace)) 4353 break; 4354 } 4355 } 4356 } 4357 else 4358 { 4359 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); 4360 for (si = bb_vinfo->region_begin; 4361 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si)) 4362 { 4363 if ((stmt = gsi_stmt (si)) 4364 && vinfo_for_stmt (stmt) 4365 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) 4366 continue; 4367 4368 /* Scan over all generic vect_recog_xxx_pattern functions. */ 4369 for (j = 0; j < NUM_PATTERNS; j++) 4370 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si, 4371 &stmts_to_replace)) 4372 break; 4373 } 4374 } 4375 } 4376