1 /* Lower vector operations to scalar operations. 2 Copyright (C) 2004-2017 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "rtl.h" 25 #include "tree.h" 26 #include "gimple.h" 27 #include "tree-pass.h" 28 #include "ssa.h" 29 #include "expmed.h" 30 #include "optabs-tree.h" 31 #include "diagnostic.h" 32 #include "fold-const.h" 33 #include "stor-layout.h" 34 #include "langhooks.h" 35 #include "tree-eh.h" 36 #include "gimple-iterator.h" 37 #include "gimplify-me.h" 38 #include "gimplify.h" 39 #include "tree-cfg.h" 40 41 42 static void expand_vector_operations_1 (gimple_stmt_iterator *); 43 44 45 /* Build a constant of type TYPE, made of VALUE's bits replicated 46 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ 47 static tree 48 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) 49 { 50 int width = tree_to_uhwi (TYPE_SIZE (inner_type)); 51 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1) 52 / HOST_BITS_PER_WIDE_INT; 53 unsigned HOST_WIDE_INT low, mask; 54 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS]; 55 int i; 56 57 gcc_assert (n && n <= WIDE_INT_MAX_ELTS); 58 59 if (width == HOST_BITS_PER_WIDE_INT) 60 low = value; 61 else 62 { 63 mask = ((HOST_WIDE_INT)1 << width) - 1; 64 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); 65 } 66 67 for (i = 0; i < n; i++) 68 a[i] = low; 69 70 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT); 71 return wide_int_to_tree 72 (type, wide_int::from_array (a, n, TYPE_PRECISION (type))); 73 } 74 75 static GTY(()) tree vector_inner_type; 76 static GTY(()) tree vector_last_type; 77 static GTY(()) int vector_last_nunits; 78 79 /* Return a suitable vector types made of SUBPARTS units each of mode 80 "word_mode" (the global variable). */ 81 static tree 82 build_word_mode_vector_type (int nunits) 83 { 84 if (!vector_inner_type) 85 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); 86 else if (vector_last_nunits == nunits) 87 { 88 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); 89 return vector_last_type; 90 } 91 92 vector_last_nunits = nunits; 93 vector_last_type = build_vector_type (vector_inner_type, nunits); 94 return vector_last_type; 95 } 96 97 typedef tree (*elem_op_func) (gimple_stmt_iterator *, 98 tree, tree, tree, tree, tree, enum tree_code, 99 tree); 100 101 static inline tree 102 tree_vec_extract (gimple_stmt_iterator *gsi, tree type, 103 tree t, tree bitsize, tree bitpos) 104 { 105 if (TREE_CODE (t) == SSA_NAME) 106 { 107 gimple *def_stmt = SSA_NAME_DEF_STMT (t); 108 if (is_gimple_assign (def_stmt) 109 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST 110 || (bitpos 111 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))) 112 t = gimple_assign_rhs1 (def_stmt); 113 } 114 if (bitpos) 115 { 116 if (TREE_CODE (type) == BOOLEAN_TYPE) 117 { 118 tree itype 119 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0); 120 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t, 121 bitsize, bitpos); 122 return gimplify_build2 (gsi, NE_EXPR, type, field, 123 build_zero_cst (itype)); 124 } 125 else 126 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); 127 } 128 else 129 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); 130 } 131 132 static tree 133 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, 134 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, 135 enum tree_code code, tree type ATTRIBUTE_UNUSED) 136 { 137 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 138 return gimplify_build1 (gsi, code, inner_type, a); 139 } 140 141 static tree 142 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 143 tree bitpos, tree bitsize, enum tree_code code, 144 tree type ATTRIBUTE_UNUSED) 145 { 146 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) 147 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 148 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) 149 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 150 return gimplify_build2 (gsi, code, inner_type, a, b); 151 } 152 153 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 154 155 INNER_TYPE is the type of A and B elements 156 157 returned expression is of signed integer type with the 158 size equal to the size of INNER_TYPE. */ 159 static tree 160 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 161 tree bitpos, tree bitsize, enum tree_code code, tree type) 162 { 163 tree stype = TREE_TYPE (type); 164 tree cst_false = build_zero_cst (stype); 165 tree cst_true = build_all_ones_cst (stype); 166 tree cmp; 167 168 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 169 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 170 171 cmp = build2 (code, boolean_type_node, a, b); 172 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false); 173 } 174 175 /* Expand vector addition to scalars. This does bit twiddling 176 in order to increase parallelism: 177 178 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ 179 (a ^ b) & 0x80808080 180 181 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ 182 (a ^ ~b) & 0x80808080 183 184 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) 185 186 This optimization should be done only if 4 vector items or more 187 fit into a word. */ 188 static tree 189 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, 190 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, 191 enum tree_code code, tree type ATTRIBUTE_UNUSED) 192 { 193 tree inner_type = TREE_TYPE (TREE_TYPE (a)); 194 unsigned HOST_WIDE_INT max; 195 tree low_bits, high_bits, a_low, b_low, result_low, signs; 196 197 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 198 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 199 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 200 201 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); 202 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 203 204 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); 205 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 206 if (code == PLUS_EXPR) 207 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); 208 else 209 { 210 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); 211 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); 212 } 213 214 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 215 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); 216 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 217 } 218 219 static tree 220 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, 221 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, 222 tree bitsize ATTRIBUTE_UNUSED, 223 enum tree_code code ATTRIBUTE_UNUSED, 224 tree type ATTRIBUTE_UNUSED) 225 { 226 tree inner_type = TREE_TYPE (TREE_TYPE (b)); 227 HOST_WIDE_INT max; 228 tree low_bits, high_bits, b_low, result_low, signs; 229 230 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 231 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 232 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 233 234 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 235 236 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 237 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); 238 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 239 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); 240 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 241 } 242 243 /* Expand a vector operation to scalars, by using many operations 244 whose type is the vector type's inner type. */ 245 static tree 246 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, 247 tree type, tree inner_type, 248 tree a, tree b, enum tree_code code) 249 { 250 vec<constructor_elt, va_gc> *v; 251 tree part_width = TYPE_SIZE (inner_type); 252 tree index = bitsize_int (0); 253 int nunits = TYPE_VECTOR_SUBPARTS (type); 254 int delta = tree_to_uhwi (part_width) 255 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); 256 int i; 257 location_t loc = gimple_location (gsi_stmt (*gsi)); 258 259 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) 260 warning_at (loc, OPT_Wvector_operation_performance, 261 "vector operation will be expanded piecewise"); 262 else 263 warning_at (loc, OPT_Wvector_operation_performance, 264 "vector operation will be expanded in parallel"); 265 266 vec_alloc (v, (nunits + delta - 1) / delta); 267 for (i = 0; i < nunits; 268 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) 269 { 270 tree result = f (gsi, inner_type, a, b, index, part_width, code, type); 271 constructor_elt ce = {NULL_TREE, result}; 272 v->quick_push (ce); 273 } 274 275 return build_constructor (type, v); 276 } 277 278 /* Expand a vector operation to scalars with the freedom to use 279 a scalar integer type, or to use a different size for the items 280 in the vector type. */ 281 static tree 282 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, 283 tree a, tree b, 284 enum tree_code code) 285 { 286 tree result, compute_type; 287 machine_mode mode; 288 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD; 289 location_t loc = gimple_location (gsi_stmt (*gsi)); 290 291 /* We have three strategies. If the type is already correct, just do 292 the operation an element at a time. Else, if the vector is wider than 293 one word, do it a word at a time; finally, if the vector is smaller 294 than one word, do it as a scalar. */ 295 if (TYPE_MODE (TREE_TYPE (type)) == word_mode) 296 return expand_vector_piecewise (gsi, f, 297 type, TREE_TYPE (type), 298 a, b, code); 299 else if (n_words > 1) 300 { 301 tree word_type = build_word_mode_vector_type (n_words); 302 result = expand_vector_piecewise (gsi, f, 303 word_type, TREE_TYPE (word_type), 304 a, b, code); 305 result = force_gimple_operand_gsi (gsi, result, true, NULL, true, 306 GSI_SAME_STMT); 307 } 308 else 309 { 310 /* Use a single scalar operation with a mode no wider than word_mode. */ 311 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0); 312 compute_type = lang_hooks.types.type_for_mode (mode, 1); 313 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type); 314 warning_at (loc, OPT_Wvector_operation_performance, 315 "vector operation will be expanded with a " 316 "single scalar operation"); 317 } 318 319 return result; 320 } 321 322 /* Expand a vector operation to scalars; for integer types we can use 323 special bit twiddling tricks to do the sums a word at a time, using 324 function F_PARALLEL instead of F. These tricks are done only if 325 they can process at least four items, that is, only if the vector 326 holds at least four items and if a word can hold four items. */ 327 static tree 328 expand_vector_addition (gimple_stmt_iterator *gsi, 329 elem_op_func f, elem_op_func f_parallel, 330 tree type, tree a, tree b, enum tree_code code) 331 { 332 int parts_per_word = UNITS_PER_WORD 333 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 334 335 if (INTEGRAL_TYPE_P (TREE_TYPE (type)) 336 && parts_per_word >= 4 337 && TYPE_VECTOR_SUBPARTS (type) >= 4) 338 return expand_vector_parallel (gsi, f_parallel, 339 type, a, b, code); 340 else 341 return expand_vector_piecewise (gsi, f, 342 type, TREE_TYPE (type), 343 a, b, code); 344 } 345 346 /* Try to expand vector comparison expression OP0 CODE OP1 by 347 querying optab if the following expression: 348 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> 349 can be expanded. */ 350 static tree 351 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, 352 tree op1, enum tree_code code) 353 { 354 tree t; 355 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) 356 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) 357 t = expand_vector_piecewise (gsi, do_compare, type, 358 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); 359 else 360 t = NULL_TREE; 361 362 return t; 363 } 364 365 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type 366 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding 367 the result if successful, otherwise return NULL_TREE. */ 368 static tree 369 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) 370 { 371 optab op; 372 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type); 373 bool scalar_shift = true; 374 375 for (i = 1; i < nunits; i++) 376 { 377 if (shiftcnts[i] != shiftcnts[0]) 378 scalar_shift = false; 379 } 380 381 if (scalar_shift && shiftcnts[0] == 0) 382 return op0; 383 384 if (scalar_shift) 385 { 386 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); 387 if (op != unknown_optab 388 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 389 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, 390 build_int_cst (NULL_TREE, shiftcnts[0])); 391 } 392 393 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); 394 if (op != unknown_optab 395 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 396 { 397 tree *vec = XALLOCAVEC (tree, nunits); 398 for (i = 0; i < nunits; i++) 399 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]); 400 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, 401 build_vector (type, vec)); 402 } 403 404 return NULL_TREE; 405 } 406 407 /* Try to expand integer vector division by constant using 408 widening multiply, shifts and additions. */ 409 static tree 410 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, 411 tree op1, enum tree_code code) 412 { 413 bool use_pow2 = true; 414 bool has_vector_shift = true; 415 int mode = -1, this_mode; 416 int pre_shift = -1, post_shift; 417 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type); 418 int *shifts = XALLOCAVEC (int, nunits * 4); 419 int *pre_shifts = shifts + nunits; 420 int *post_shifts = pre_shifts + nunits; 421 int *shift_temps = post_shifts + nunits; 422 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits); 423 int prec = TYPE_PRECISION (TREE_TYPE (type)); 424 int dummy_int; 425 unsigned int i; 426 signop sign_p = TYPE_SIGN (TREE_TYPE (type)); 427 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); 428 tree *vec; 429 tree cur_op, mulcst, tem; 430 optab op; 431 432 if (prec > HOST_BITS_PER_WIDE_INT) 433 return NULL_TREE; 434 435 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); 436 if (op == unknown_optab 437 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 438 has_vector_shift = false; 439 440 /* Analysis phase. Determine if all op1 elements are either power 441 of two and it is possible to expand it using shifts (or for remainder 442 using masking). Additionally compute the multiplicative constants 443 and pre and post shifts if the division is to be expanded using 444 widening or high part multiplication plus shifts. */ 445 for (i = 0; i < nunits; i++) 446 { 447 tree cst = VECTOR_CST_ELT (op1, i); 448 unsigned HOST_WIDE_INT ml; 449 450 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst)) 451 return NULL_TREE; 452 pre_shifts[i] = 0; 453 post_shifts[i] = 0; 454 mulc[i] = 0; 455 if (use_pow2 456 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)) 457 use_pow2 = false; 458 if (use_pow2) 459 { 460 shifts[i] = tree_log2 (cst); 461 if (shifts[i] != shifts[0] 462 && code == TRUNC_DIV_EXPR 463 && !has_vector_shift) 464 use_pow2 = false; 465 } 466 if (mode == -2) 467 continue; 468 if (sign_p == UNSIGNED) 469 { 470 unsigned HOST_WIDE_INT mh; 471 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask; 472 473 if (d >= (HOST_WIDE_INT_1U << (prec - 1))) 474 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */ 475 return NULL_TREE; 476 477 if (d <= 1) 478 { 479 mode = -2; 480 continue; 481 } 482 483 /* Find a suitable multiplier and right shift count 484 instead of multiplying with D. */ 485 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); 486 487 /* If the suggested multiplier is more than SIZE bits, we can 488 do better for even divisors, using an initial right shift. */ 489 if ((mh != 0 && (d & 1) == 0) 490 || (!has_vector_shift && pre_shift != -1)) 491 { 492 if (has_vector_shift) 493 pre_shift = ctz_or_zero (d); 494 else if (pre_shift == -1) 495 { 496 unsigned int j; 497 for (j = 0; j < nunits; j++) 498 { 499 tree cst2 = VECTOR_CST_ELT (op1, j); 500 unsigned HOST_WIDE_INT d2; 501 int this_pre_shift; 502 503 if (!tree_fits_uhwi_p (cst2)) 504 return NULL_TREE; 505 d2 = tree_to_uhwi (cst2) & mask; 506 if (d2 == 0) 507 return NULL_TREE; 508 this_pre_shift = floor_log2 (d2 & -d2); 509 if (pre_shift == -1 || this_pre_shift < pre_shift) 510 pre_shift = this_pre_shift; 511 } 512 if (i != 0 && pre_shift != 0) 513 { 514 /* Restart. */ 515 i = -1U; 516 mode = -1; 517 continue; 518 } 519 } 520 if (pre_shift != 0) 521 { 522 if ((d >> pre_shift) <= 1) 523 { 524 mode = -2; 525 continue; 526 } 527 mh = choose_multiplier (d >> pre_shift, prec, 528 prec - pre_shift, 529 &ml, &post_shift, &dummy_int); 530 gcc_assert (!mh); 531 pre_shifts[i] = pre_shift; 532 } 533 } 534 if (!mh) 535 this_mode = 0; 536 else 537 this_mode = 1; 538 } 539 else 540 { 541 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst); 542 unsigned HOST_WIDE_INT abs_d; 543 544 if (d == -1) 545 return NULL_TREE; 546 547 /* Since d might be INT_MIN, we have to cast to 548 unsigned HOST_WIDE_INT before negating to avoid 549 undefined signed overflow. */ 550 abs_d = (d >= 0 551 ? (unsigned HOST_WIDE_INT) d 552 : - (unsigned HOST_WIDE_INT) d); 553 554 /* n rem d = n rem -d */ 555 if (code == TRUNC_MOD_EXPR && d < 0) 556 d = abs_d; 557 else if (abs_d == HOST_WIDE_INT_1U << (prec - 1)) 558 { 559 /* This case is not handled correctly below. */ 560 mode = -2; 561 continue; 562 } 563 if (abs_d <= 1) 564 { 565 mode = -2; 566 continue; 567 } 568 569 choose_multiplier (abs_d, prec, prec - 1, &ml, 570 &post_shift, &dummy_int); 571 if (ml >= HOST_WIDE_INT_1U << (prec - 1)) 572 { 573 this_mode = 4 + (d < 0); 574 ml |= HOST_WIDE_INT_M1U << (prec - 1); 575 } 576 else 577 this_mode = 2 + (d < 0); 578 } 579 mulc[i] = ml; 580 post_shifts[i] = post_shift; 581 if ((i && !has_vector_shift && post_shifts[0] != post_shift) 582 || post_shift >= prec 583 || pre_shifts[i] >= prec) 584 this_mode = -2; 585 586 if (i == 0) 587 mode = this_mode; 588 else if (mode != this_mode) 589 mode = -2; 590 } 591 592 vec = XALLOCAVEC (tree, nunits); 593 594 if (use_pow2) 595 { 596 tree addend = NULL_TREE; 597 if (sign_p == SIGNED) 598 { 599 tree uns_type; 600 601 /* Both division and remainder sequences need 602 op0 < 0 ? mask : 0 computed. It can be either computed as 603 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i])) 604 if none of the shifts is 0, or as the conditional. */ 605 for (i = 0; i < nunits; i++) 606 if (shifts[i] == 0) 607 break; 608 uns_type 609 = build_vector_type (build_nonstandard_integer_type (prec, 1), 610 nunits); 611 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type)) 612 { 613 for (i = 0; i < nunits; i++) 614 shift_temps[i] = prec - 1; 615 cur_op = add_rshift (gsi, type, op0, shift_temps); 616 if (cur_op != NULL_TREE) 617 { 618 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, 619 uns_type, cur_op); 620 for (i = 0; i < nunits; i++) 621 shift_temps[i] = prec - shifts[i]; 622 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); 623 if (cur_op != NULL_TREE) 624 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, 625 type, cur_op); 626 } 627 } 628 if (addend == NULL_TREE 629 && expand_vec_cond_expr_p (type, type, LT_EXPR)) 630 { 631 tree zero, cst, cond, mask_type; 632 gimple *stmt; 633 634 mask_type = build_same_sized_truth_vector_type (type); 635 zero = build_zero_cst (type); 636 cond = build2 (LT_EXPR, mask_type, op0, zero); 637 for (i = 0; i < nunits; i++) 638 vec[i] = build_int_cst (TREE_TYPE (type), 639 (HOST_WIDE_INT_1U 640 << shifts[i]) - 1); 641 cst = build_vector (type, vec); 642 addend = make_ssa_name (type); 643 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond, 644 cst, zero); 645 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 646 } 647 } 648 if (code == TRUNC_DIV_EXPR) 649 { 650 if (sign_p == UNSIGNED) 651 { 652 /* q = op0 >> shift; */ 653 cur_op = add_rshift (gsi, type, op0, shifts); 654 if (cur_op != NULL_TREE) 655 return cur_op; 656 } 657 else if (addend != NULL_TREE) 658 { 659 /* t1 = op0 + addend; 660 q = t1 >> shift; */ 661 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 662 if (op != unknown_optab 663 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 664 { 665 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); 666 cur_op = add_rshift (gsi, type, cur_op, shifts); 667 if (cur_op != NULL_TREE) 668 return cur_op; 669 } 670 } 671 } 672 else 673 { 674 tree mask; 675 for (i = 0; i < nunits; i++) 676 vec[i] = build_int_cst (TREE_TYPE (type), 677 (HOST_WIDE_INT_1U 678 << shifts[i]) - 1); 679 mask = build_vector (type, vec); 680 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default); 681 if (op != unknown_optab 682 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 683 { 684 if (sign_p == UNSIGNED) 685 /* r = op0 & mask; */ 686 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask); 687 else if (addend != NULL_TREE) 688 { 689 /* t1 = op0 + addend; 690 t2 = t1 & mask; 691 r = t2 - addend; */ 692 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 693 if (op != unknown_optab 694 && optab_handler (op, TYPE_MODE (type)) 695 != CODE_FOR_nothing) 696 { 697 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, 698 addend); 699 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type, 700 cur_op, mask); 701 op = optab_for_tree_code (MINUS_EXPR, type, 702 optab_default); 703 if (op != unknown_optab 704 && optab_handler (op, TYPE_MODE (type)) 705 != CODE_FOR_nothing) 706 return gimplify_build2 (gsi, MINUS_EXPR, type, 707 cur_op, addend); 708 } 709 } 710 } 711 } 712 } 713 714 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) 715 return NULL_TREE; 716 717 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) 718 return NULL_TREE; 719 720 cur_op = op0; 721 722 switch (mode) 723 { 724 case 0: 725 gcc_assert (sign_p == UNSIGNED); 726 /* t1 = oprnd0 >> pre_shift; 727 t2 = t1 h* ml; 728 q = t2 >> post_shift; */ 729 cur_op = add_rshift (gsi, type, cur_op, pre_shifts); 730 if (cur_op == NULL_TREE) 731 return NULL_TREE; 732 break; 733 case 1: 734 gcc_assert (sign_p == UNSIGNED); 735 for (i = 0; i < nunits; i++) 736 { 737 shift_temps[i] = 1; 738 post_shifts[i]--; 739 } 740 break; 741 case 2: 742 case 3: 743 case 4: 744 case 5: 745 gcc_assert (sign_p == SIGNED); 746 for (i = 0; i < nunits; i++) 747 shift_temps[i] = prec - 1; 748 break; 749 default: 750 return NULL_TREE; 751 } 752 753 for (i = 0; i < nunits; i++) 754 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]); 755 mulcst = build_vector (type, vec); 756 757 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); 758 759 switch (mode) 760 { 761 case 0: 762 /* t1 = oprnd0 >> pre_shift; 763 t2 = t1 h* ml; 764 q = t2 >> post_shift; */ 765 cur_op = add_rshift (gsi, type, cur_op, post_shifts); 766 break; 767 case 1: 768 /* t1 = oprnd0 h* ml; 769 t2 = oprnd0 - t1; 770 t3 = t2 >> 1; 771 t4 = t1 + t3; 772 q = t4 >> (post_shift - 1); */ 773 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 774 if (op == unknown_optab 775 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 776 return NULL_TREE; 777 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); 778 tem = add_rshift (gsi, type, tem, shift_temps); 779 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 780 if (op == unknown_optab 781 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 782 return NULL_TREE; 783 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); 784 cur_op = add_rshift (gsi, type, tem, post_shifts); 785 if (cur_op == NULL_TREE) 786 return NULL_TREE; 787 break; 788 case 2: 789 case 3: 790 case 4: 791 case 5: 792 /* t1 = oprnd0 h* ml; 793 t2 = t1; [ iff (mode & 2) != 0 ] 794 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] 795 t3 = t2 >> post_shift; 796 t4 = oprnd0 >> (prec - 1); 797 q = t3 - t4; [ iff (mode & 1) == 0 ] 798 q = t4 - t3; [ iff (mode & 1) != 0 ] */ 799 if ((mode & 2) == 0) 800 { 801 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 802 if (op == unknown_optab 803 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 804 return NULL_TREE; 805 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); 806 } 807 cur_op = add_rshift (gsi, type, cur_op, post_shifts); 808 if (cur_op == NULL_TREE) 809 return NULL_TREE; 810 tem = add_rshift (gsi, type, op0, shift_temps); 811 if (tem == NULL_TREE) 812 return NULL_TREE; 813 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 814 if (op == unknown_optab 815 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 816 return NULL_TREE; 817 if ((mode & 1) == 0) 818 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem); 819 else 820 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op); 821 break; 822 default: 823 gcc_unreachable (); 824 } 825 826 if (code == TRUNC_DIV_EXPR) 827 return cur_op; 828 829 /* We divided. Now finish by: 830 t1 = q * oprnd1; 831 r = oprnd0 - t1; */ 832 op = optab_for_tree_code (MULT_EXPR, type, optab_default); 833 if (op == unknown_optab 834 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 835 return NULL_TREE; 836 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1); 837 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 838 if (op == unknown_optab 839 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 840 return NULL_TREE; 841 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem); 842 } 843 844 /* Expand a vector condition to scalars, by using many conditions 845 on the vector's elements. */ 846 static void 847 expand_vector_condition (gimple_stmt_iterator *gsi) 848 { 849 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 850 tree type = gimple_expr_type (stmt); 851 tree a = gimple_assign_rhs1 (stmt); 852 tree a1 = a; 853 tree a2 = NULL_TREE; 854 bool a_is_comparison = false; 855 tree b = gimple_assign_rhs2 (stmt); 856 tree c = gimple_assign_rhs3 (stmt); 857 vec<constructor_elt, va_gc> *v; 858 tree constr; 859 tree inner_type = TREE_TYPE (type); 860 tree cond_type = TREE_TYPE (TREE_TYPE (a)); 861 tree comp_inner_type = cond_type; 862 tree width = TYPE_SIZE (inner_type); 863 tree index = bitsize_int (0); 864 tree comp_width = width; 865 tree comp_index = index; 866 int nunits = TYPE_VECTOR_SUBPARTS (type); 867 int i; 868 location_t loc = gimple_location (gsi_stmt (*gsi)); 869 870 if (!is_gimple_val (a)) 871 { 872 gcc_assert (COMPARISON_CLASS_P (a)); 873 a_is_comparison = true; 874 a1 = TREE_OPERAND (a, 0); 875 a2 = TREE_OPERAND (a, 1); 876 comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); 877 comp_width = TYPE_SIZE (comp_inner_type); 878 } 879 880 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a))) 881 return; 882 883 /* Handle vector boolean types with bitmasks. If there is a comparison 884 and we can expand the comparison into the vector boolean bitmask, 885 or otherwise if it is compatible with type, we can transform 886 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5; 887 into 888 tmp_6 = x_2 < y_3; 889 tmp_7 = tmp_6 & vbfld_4; 890 tmp_8 = ~tmp_6; 891 tmp_9 = tmp_8 & vbfld_5; 892 vbfld_1 = tmp_7 | tmp_9; 893 Similarly for vbfld_10 instead of x_2 < y_3. */ 894 if (VECTOR_BOOLEAN_TYPE_P (type) 895 && SCALAR_INT_MODE_P (TYPE_MODE (type)) 896 && (GET_MODE_BITSIZE (TYPE_MODE (type)) 897 < (TYPE_VECTOR_SUBPARTS (type) 898 * GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))))) 899 && (a_is_comparison 900 ? useless_type_conversion_p (type, TREE_TYPE (a)) 901 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a)))) 902 { 903 if (a_is_comparison) 904 a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2); 905 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b); 906 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a); 907 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c); 908 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2); 909 gimple_assign_set_rhs_from_tree (gsi, a); 910 update_stmt (gsi_stmt (*gsi)); 911 return; 912 } 913 914 /* TODO: try and find a smaller vector type. */ 915 916 warning_at (loc, OPT_Wvector_operation_performance, 917 "vector condition will be expanded piecewise"); 918 919 vec_alloc (v, nunits); 920 for (i = 0; i < nunits; i++) 921 { 922 tree aa, result; 923 tree bb = tree_vec_extract (gsi, inner_type, b, width, index); 924 tree cc = tree_vec_extract (gsi, inner_type, c, width, index); 925 if (a_is_comparison) 926 { 927 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, 928 comp_width, comp_index); 929 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, 930 comp_width, comp_index); 931 aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); 932 } 933 else 934 aa = tree_vec_extract (gsi, cond_type, a, width, index); 935 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); 936 constructor_elt ce = {NULL_TREE, result}; 937 v->quick_push (ce); 938 index = int_const_binop (PLUS_EXPR, index, width); 939 if (width == comp_width) 940 comp_index = index; 941 else 942 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width); 943 } 944 945 constr = build_constructor (type, v); 946 gimple_assign_set_rhs_from_tree (gsi, constr); 947 update_stmt (gsi_stmt (*gsi)); 948 } 949 950 static tree 951 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, 952 gassign *assign, enum tree_code code) 953 { 954 machine_mode compute_mode = TYPE_MODE (compute_type); 955 956 /* If the compute mode is not a vector mode (hence we are not decomposing 957 a BLKmode vector to smaller, hardware-supported vectors), we may want 958 to expand the operations in parallel. */ 959 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT 960 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT 961 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT 962 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT 963 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM 964 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) 965 switch (code) 966 { 967 case PLUS_EXPR: 968 case MINUS_EXPR: 969 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) 970 return expand_vector_addition (gsi, do_binop, do_plus_minus, type, 971 gimple_assign_rhs1 (assign), 972 gimple_assign_rhs2 (assign), code); 973 break; 974 975 case NEGATE_EXPR: 976 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) 977 return expand_vector_addition (gsi, do_unop, do_negate, type, 978 gimple_assign_rhs1 (assign), 979 NULL_TREE, code); 980 break; 981 982 case BIT_AND_EXPR: 983 case BIT_IOR_EXPR: 984 case BIT_XOR_EXPR: 985 return expand_vector_parallel (gsi, do_binop, type, 986 gimple_assign_rhs1 (assign), 987 gimple_assign_rhs2 (assign), code); 988 989 case BIT_NOT_EXPR: 990 return expand_vector_parallel (gsi, do_unop, type, 991 gimple_assign_rhs1 (assign), 992 NULL_TREE, code); 993 case EQ_EXPR: 994 case NE_EXPR: 995 case GT_EXPR: 996 case LT_EXPR: 997 case GE_EXPR: 998 case LE_EXPR: 999 case UNEQ_EXPR: 1000 case UNGT_EXPR: 1001 case UNLT_EXPR: 1002 case UNGE_EXPR: 1003 case UNLE_EXPR: 1004 case LTGT_EXPR: 1005 case ORDERED_EXPR: 1006 case UNORDERED_EXPR: 1007 { 1008 tree rhs1 = gimple_assign_rhs1 (assign); 1009 tree rhs2 = gimple_assign_rhs2 (assign); 1010 1011 return expand_vector_comparison (gsi, type, rhs1, rhs2, code); 1012 } 1013 1014 case TRUNC_DIV_EXPR: 1015 case TRUNC_MOD_EXPR: 1016 { 1017 tree rhs1 = gimple_assign_rhs1 (assign); 1018 tree rhs2 = gimple_assign_rhs2 (assign); 1019 tree ret; 1020 1021 if (!optimize 1022 || !VECTOR_INTEGER_TYPE_P (type) 1023 || TREE_CODE (rhs2) != VECTOR_CST 1024 || !VECTOR_MODE_P (TYPE_MODE (type))) 1025 break; 1026 1027 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code); 1028 if (ret != NULL_TREE) 1029 return ret; 1030 break; 1031 } 1032 1033 default: 1034 break; 1035 } 1036 1037 if (TREE_CODE_CLASS (code) == tcc_unary) 1038 return expand_vector_piecewise (gsi, do_unop, type, compute_type, 1039 gimple_assign_rhs1 (assign), 1040 NULL_TREE, code); 1041 else 1042 return expand_vector_piecewise (gsi, do_binop, type, compute_type, 1043 gimple_assign_rhs1 (assign), 1044 gimple_assign_rhs2 (assign), code); 1045 } 1046 1047 /* Try to optimize 1048 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 }; 1049 style stmts into: 1050 _9 = { b_7, b_7, b_7, b_7 }; 1051 a_5 = _9 + { 0, 3, 6, 9 }; 1052 because vector splat operation is usually more efficient 1053 than piecewise initialization of the vector. */ 1054 1055 static void 1056 optimize_vector_constructor (gimple_stmt_iterator *gsi) 1057 { 1058 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1059 tree lhs = gimple_assign_lhs (stmt); 1060 tree rhs = gimple_assign_rhs1 (stmt); 1061 tree type = TREE_TYPE (rhs); 1062 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type); 1063 bool all_same = true; 1064 constructor_elt *elt; 1065 tree *cst; 1066 gimple *g; 1067 tree base = NULL_TREE; 1068 optab op; 1069 1070 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts) 1071 return; 1072 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 1073 if (op == unknown_optab 1074 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 1075 return; 1076 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt) 1077 if (TREE_CODE (elt->value) != SSA_NAME 1078 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) 1079 return; 1080 else 1081 { 1082 tree this_base = elt->value; 1083 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value) 1084 all_same = false; 1085 for (j = 0; j < nelts + 1; j++) 1086 { 1087 g = SSA_NAME_DEF_STMT (this_base); 1088 if (is_gimple_assign (g) 1089 && gimple_assign_rhs_code (g) == PLUS_EXPR 1090 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST 1091 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME 1092 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) 1093 this_base = gimple_assign_rhs1 (g); 1094 else 1095 break; 1096 } 1097 if (i == 0) 1098 base = this_base; 1099 else if (this_base != base) 1100 return; 1101 } 1102 if (all_same) 1103 return; 1104 cst = XALLOCAVEC (tree, nelts); 1105 for (i = 0; i < nelts; i++) 1106 { 1107 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;; 1108 cst[i] = build_zero_cst (TREE_TYPE (base)); 1109 while (this_base != base) 1110 { 1111 g = SSA_NAME_DEF_STMT (this_base); 1112 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base), 1113 cst[i], gimple_assign_rhs2 (g)); 1114 if (cst[i] == NULL_TREE 1115 || TREE_CODE (cst[i]) != INTEGER_CST 1116 || TREE_OVERFLOW (cst[i])) 1117 return; 1118 this_base = gimple_assign_rhs1 (g); 1119 } 1120 } 1121 for (i = 0; i < nelts; i++) 1122 CONSTRUCTOR_ELT (rhs, i)->value = base; 1123 g = gimple_build_assign (make_ssa_name (type), rhs); 1124 gsi_insert_before (gsi, g, GSI_SAME_STMT); 1125 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g), 1126 build_vector (type, cst)); 1127 gsi_replace (gsi, g, false); 1128 } 1129 1130 /* Return a type for the widest vector mode whose components are of type 1131 TYPE, or NULL_TREE if none is found. */ 1132 1133 static tree 1134 type_for_widest_vector_mode (tree type, optab op) 1135 { 1136 machine_mode inner_mode = TYPE_MODE (type); 1137 machine_mode best_mode = VOIDmode, mode; 1138 int best_nunits = 0; 1139 1140 if (SCALAR_FLOAT_MODE_P (inner_mode)) 1141 mode = MIN_MODE_VECTOR_FLOAT; 1142 else if (SCALAR_FRACT_MODE_P (inner_mode)) 1143 mode = MIN_MODE_VECTOR_FRACT; 1144 else if (SCALAR_UFRACT_MODE_P (inner_mode)) 1145 mode = MIN_MODE_VECTOR_UFRACT; 1146 else if (SCALAR_ACCUM_MODE_P (inner_mode)) 1147 mode = MIN_MODE_VECTOR_ACCUM; 1148 else if (SCALAR_UACCUM_MODE_P (inner_mode)) 1149 mode = MIN_MODE_VECTOR_UACCUM; 1150 else 1151 mode = MIN_MODE_VECTOR_INT; 1152 1153 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 1154 if (GET_MODE_INNER (mode) == inner_mode 1155 && GET_MODE_NUNITS (mode) > best_nunits 1156 && optab_handler (op, mode) != CODE_FOR_nothing) 1157 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); 1158 1159 if (best_mode == VOIDmode) 1160 return NULL_TREE; 1161 else 1162 return build_vector_type_for_mode (type, best_mode); 1163 } 1164 1165 1166 /* Build a reference to the element of the vector VECT. Function 1167 returns either the element itself, either BIT_FIELD_REF, or an 1168 ARRAY_REF expression. 1169 1170 GSI is required to insert temporary variables while building a 1171 refernece to the element of the vector VECT. 1172 1173 PTMPVEC is a pointer to the temporary variable for caching 1174 purposes. In case when PTMPVEC is NULL new temporary variable 1175 will be created. */ 1176 static tree 1177 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) 1178 { 1179 tree vect_type, vect_elt_type; 1180 gimple *asgn; 1181 tree tmpvec; 1182 tree arraytype; 1183 bool need_asgn = true; 1184 unsigned int elements; 1185 1186 vect_type = TREE_TYPE (vect); 1187 vect_elt_type = TREE_TYPE (vect_type); 1188 elements = TYPE_VECTOR_SUBPARTS (vect_type); 1189 1190 if (TREE_CODE (idx) == INTEGER_CST) 1191 { 1192 unsigned HOST_WIDE_INT index; 1193 1194 /* Given that we're about to compute a binary modulus, 1195 we don't care about the high bits of the value. */ 1196 index = TREE_INT_CST_LOW (idx); 1197 if (!tree_fits_uhwi_p (idx) || index >= elements) 1198 { 1199 index &= elements - 1; 1200 idx = build_int_cst (TREE_TYPE (idx), index); 1201 } 1202 1203 /* When lowering a vector statement sequence do some easy 1204 simplification by looking through intermediate vector results. */ 1205 if (TREE_CODE (vect) == SSA_NAME) 1206 { 1207 gimple *def_stmt = SSA_NAME_DEF_STMT (vect); 1208 if (is_gimple_assign (def_stmt) 1209 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST 1210 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) 1211 vect = gimple_assign_rhs1 (def_stmt); 1212 } 1213 1214 if (TREE_CODE (vect) == VECTOR_CST) 1215 return VECTOR_CST_ELT (vect, index); 1216 else if (TREE_CODE (vect) == CONSTRUCTOR 1217 && (CONSTRUCTOR_NELTS (vect) == 0 1218 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value)) 1219 != VECTOR_TYPE)) 1220 { 1221 if (index < CONSTRUCTOR_NELTS (vect)) 1222 return CONSTRUCTOR_ELT (vect, index)->value; 1223 return build_zero_cst (vect_elt_type); 1224 } 1225 else 1226 { 1227 tree size = TYPE_SIZE (vect_elt_type); 1228 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), 1229 size); 1230 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); 1231 } 1232 } 1233 1234 if (!ptmpvec) 1235 tmpvec = create_tmp_var (vect_type, "vectmp"); 1236 else if (!*ptmpvec) 1237 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); 1238 else 1239 { 1240 tmpvec = *ptmpvec; 1241 need_asgn = false; 1242 } 1243 1244 if (need_asgn) 1245 { 1246 TREE_ADDRESSABLE (tmpvec) = 1; 1247 asgn = gimple_build_assign (tmpvec, vect); 1248 gsi_insert_before (gsi, asgn, GSI_SAME_STMT); 1249 } 1250 1251 arraytype = build_array_type_nelts (vect_elt_type, elements); 1252 return build4 (ARRAY_REF, vect_elt_type, 1253 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), 1254 idx, NULL_TREE, NULL_TREE); 1255 } 1256 1257 /* Check if VEC_PERM_EXPR within the given setting is supported 1258 by hardware, or lower it piecewise. 1259 1260 When VEC_PERM_EXPR has the same first and second operands: 1261 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be 1262 {v0[mask[0]], v0[mask[1]], ...} 1263 MASK and V0 must have the same number of elements. 1264 1265 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to 1266 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} 1267 V0 and V1 must have the same type. MASK, V0, V1 must have the 1268 same number of arguments. */ 1269 1270 static void 1271 lower_vec_perm (gimple_stmt_iterator *gsi) 1272 { 1273 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1274 tree mask = gimple_assign_rhs3 (stmt); 1275 tree vec0 = gimple_assign_rhs1 (stmt); 1276 tree vec1 = gimple_assign_rhs2 (stmt); 1277 tree vect_type = TREE_TYPE (vec0); 1278 tree mask_type = TREE_TYPE (mask); 1279 tree vect_elt_type = TREE_TYPE (vect_type); 1280 tree mask_elt_type = TREE_TYPE (mask_type); 1281 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type); 1282 vec<constructor_elt, va_gc> *v; 1283 tree constr, t, si, i_val; 1284 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; 1285 bool two_operand_p = !operand_equal_p (vec0, vec1, 0); 1286 location_t loc = gimple_location (gsi_stmt (*gsi)); 1287 unsigned i; 1288 1289 if (TREE_CODE (mask) == SSA_NAME) 1290 { 1291 gimple *def_stmt = SSA_NAME_DEF_STMT (mask); 1292 if (is_gimple_assign (def_stmt) 1293 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST) 1294 mask = gimple_assign_rhs1 (def_stmt); 1295 } 1296 1297 if (TREE_CODE (mask) == VECTOR_CST) 1298 { 1299 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); 1300 1301 for (i = 0; i < elements; ++i) 1302 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) 1303 & (2 * elements - 1)); 1304 1305 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) 1306 { 1307 gimple_assign_set_rhs3 (stmt, mask); 1308 update_stmt (stmt); 1309 return; 1310 } 1311 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero 1312 vector as VEC1 and a right element shift MASK. */ 1313 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type)) 1314 != CODE_FOR_nothing 1315 && TREE_CODE (vec1) == VECTOR_CST 1316 && initializer_zerop (vec1) 1317 && sel_int[0] 1318 && sel_int[0] < elements) 1319 { 1320 for (i = 1; i < elements; ++i) 1321 { 1322 unsigned int expected = i + sel_int[0]; 1323 /* Indices into the second vector are all equivalent. */ 1324 if (MIN (elements, (unsigned) sel_int[i]) 1325 != MIN (elements, expected)) 1326 break; 1327 } 1328 if (i == elements) 1329 { 1330 gimple_assign_set_rhs3 (stmt, mask); 1331 update_stmt (stmt); 1332 return; 1333 } 1334 } 1335 } 1336 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL)) 1337 return; 1338 1339 warning_at (loc, OPT_Wvector_operation_performance, 1340 "vector shuffling operation will be expanded piecewise"); 1341 1342 vec_alloc (v, elements); 1343 for (i = 0; i < elements; i++) 1344 { 1345 si = size_int (i); 1346 i_val = vector_element (gsi, mask, si, &masktmp); 1347 1348 if (TREE_CODE (i_val) == INTEGER_CST) 1349 { 1350 unsigned HOST_WIDE_INT index; 1351 1352 index = TREE_INT_CST_LOW (i_val); 1353 if (!tree_fits_uhwi_p (i_val) || index >= elements) 1354 i_val = build_int_cst (mask_elt_type, index & (elements - 1)); 1355 1356 if (two_operand_p && (index & elements) != 0) 1357 t = vector_element (gsi, vec1, i_val, &vec1tmp); 1358 else 1359 t = vector_element (gsi, vec0, i_val, &vec0tmp); 1360 1361 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, 1362 true, GSI_SAME_STMT); 1363 } 1364 else 1365 { 1366 tree cond = NULL_TREE, v0_val; 1367 1368 if (two_operand_p) 1369 { 1370 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 1371 build_int_cst (mask_elt_type, elements)); 1372 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 1373 true, GSI_SAME_STMT); 1374 } 1375 1376 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 1377 build_int_cst (mask_elt_type, elements - 1)); 1378 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, 1379 true, GSI_SAME_STMT); 1380 1381 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); 1382 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, 1383 true, GSI_SAME_STMT); 1384 1385 if (two_operand_p) 1386 { 1387 tree v1_val; 1388 1389 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); 1390 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, 1391 true, GSI_SAME_STMT); 1392 1393 cond = fold_build2 (EQ_EXPR, boolean_type_node, 1394 cond, build_zero_cst (mask_elt_type)); 1395 cond = fold_build3 (COND_EXPR, vect_elt_type, 1396 cond, v0_val, v1_val); 1397 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 1398 true, GSI_SAME_STMT); 1399 } 1400 else 1401 t = v0_val; 1402 } 1403 1404 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); 1405 } 1406 1407 constr = build_constructor (vect_type, v); 1408 gimple_assign_set_rhs_from_tree (gsi, constr); 1409 update_stmt (gsi_stmt (*gsi)); 1410 } 1411 1412 /* If OP is a uniform vector return the element it is a splat from. */ 1413 1414 static tree 1415 ssa_uniform_vector_p (tree op) 1416 { 1417 if (TREE_CODE (op) == VECTOR_CST 1418 || TREE_CODE (op) == CONSTRUCTOR) 1419 return uniform_vector_p (op); 1420 if (TREE_CODE (op) == SSA_NAME) 1421 { 1422 gimple *def_stmt = SSA_NAME_DEF_STMT (op); 1423 if (gimple_assign_single_p (def_stmt)) 1424 return uniform_vector_p (gimple_assign_rhs1 (def_stmt)); 1425 } 1426 return NULL_TREE; 1427 } 1428 1429 /* Return type in which CODE operation with optab OP can be 1430 computed. */ 1431 1432 static tree 1433 get_compute_type (enum tree_code code, optab op, tree type) 1434 { 1435 /* For very wide vectors, try using a smaller vector mode. */ 1436 tree compute_type = type; 1437 if (op 1438 && (!VECTOR_MODE_P (TYPE_MODE (type)) 1439 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)) 1440 { 1441 tree vector_compute_type 1442 = type_for_widest_vector_mode (TREE_TYPE (type), op); 1443 if (vector_compute_type != NULL_TREE 1444 && (TYPE_VECTOR_SUBPARTS (vector_compute_type) 1445 < TYPE_VECTOR_SUBPARTS (compute_type)) 1446 && TYPE_VECTOR_SUBPARTS (vector_compute_type) > 1 1447 && (optab_handler (op, TYPE_MODE (vector_compute_type)) 1448 != CODE_FOR_nothing)) 1449 compute_type = vector_compute_type; 1450 } 1451 1452 /* If we are breaking a BLKmode vector into smaller pieces, 1453 type_for_widest_vector_mode has already looked into the optab, 1454 so skip these checks. */ 1455 if (compute_type == type) 1456 { 1457 machine_mode compute_mode = TYPE_MODE (compute_type); 1458 if (VECTOR_MODE_P (compute_mode)) 1459 { 1460 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) 1461 return compute_type; 1462 if (code == MULT_HIGHPART_EXPR 1463 && can_mult_highpart_p (compute_mode, 1464 TYPE_UNSIGNED (compute_type))) 1465 return compute_type; 1466 } 1467 /* There is no operation in hardware, so fall back to scalars. */ 1468 compute_type = TREE_TYPE (type); 1469 } 1470 1471 return compute_type; 1472 } 1473 1474 /* Helper function of expand_vector_operations_1. Return number of 1475 vector elements for vector types or 1 for other types. */ 1476 1477 static inline int 1478 count_type_subparts (tree type) 1479 { 1480 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1; 1481 } 1482 1483 static tree 1484 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 1485 tree bitpos, tree bitsize, enum tree_code code, 1486 tree type ATTRIBUTE_UNUSED) 1487 { 1488 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) 1489 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 1490 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) 1491 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 1492 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi)); 1493 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b); 1494 } 1495 1496 /* Expand a vector COND_EXPR to scalars, piecewise. */ 1497 static void 1498 expand_vector_scalar_condition (gimple_stmt_iterator *gsi) 1499 { 1500 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); 1501 tree type = gimple_expr_type (stmt); 1502 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type); 1503 machine_mode compute_mode = TYPE_MODE (compute_type); 1504 gcc_assert (compute_mode != BLKmode); 1505 tree lhs = gimple_assign_lhs (stmt); 1506 tree rhs2 = gimple_assign_rhs2 (stmt); 1507 tree rhs3 = gimple_assign_rhs3 (stmt); 1508 tree new_rhs; 1509 1510 /* If the compute mode is not a vector mode (hence we are not decomposing 1511 a BLKmode vector to smaller, hardware-supported vectors), we may want 1512 to expand the operations in parallel. */ 1513 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT 1514 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT 1515 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT 1516 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT 1517 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM 1518 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) 1519 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3, 1520 COND_EXPR); 1521 else 1522 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type, 1523 rhs2, rhs3, COND_EXPR); 1524 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 1525 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 1526 new_rhs); 1527 1528 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 1529 way to do it is change expand_vector_operation and its callees to 1530 return a tree_code, RHS1 and RHS2 instead of a tree. */ 1531 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 1532 update_stmt (gsi_stmt (*gsi)); 1533 } 1534 1535 /* Process one statement. If we identify a vector operation, expand it. */ 1536 1537 static void 1538 expand_vector_operations_1 (gimple_stmt_iterator *gsi) 1539 { 1540 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE; 1541 enum tree_code code; 1542 optab op = unknown_optab; 1543 enum gimple_rhs_class rhs_class; 1544 tree new_rhs; 1545 1546 /* Only consider code == GIMPLE_ASSIGN. */ 1547 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi)); 1548 if (!stmt) 1549 return; 1550 1551 code = gimple_assign_rhs_code (stmt); 1552 rhs_class = get_gimple_rhs_class (code); 1553 lhs = gimple_assign_lhs (stmt); 1554 1555 if (code == VEC_PERM_EXPR) 1556 { 1557 lower_vec_perm (gsi); 1558 return; 1559 } 1560 1561 if (code == VEC_COND_EXPR) 1562 { 1563 expand_vector_condition (gsi); 1564 return; 1565 } 1566 1567 if (code == COND_EXPR 1568 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE 1569 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode) 1570 { 1571 expand_vector_scalar_condition (gsi); 1572 return; 1573 } 1574 1575 if (code == CONSTRUCTOR 1576 && TREE_CODE (lhs) == SSA_NAME 1577 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs))) 1578 && !gimple_clobber_p (stmt) 1579 && optimize) 1580 { 1581 optimize_vector_constructor (gsi); 1582 return; 1583 } 1584 1585 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) 1586 return; 1587 1588 rhs1 = gimple_assign_rhs1 (stmt); 1589 type = gimple_expr_type (stmt); 1590 if (rhs_class == GIMPLE_BINARY_RHS) 1591 rhs2 = gimple_assign_rhs2 (stmt); 1592 1593 if (TREE_CODE (type) != VECTOR_TYPE) 1594 return; 1595 1596 /* If the vector operation is operating on all same vector elements 1597 implement it with a scalar operation and a splat if the target 1598 supports the scalar operation. */ 1599 tree srhs1, srhs2 = NULL_TREE; 1600 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE 1601 && (rhs2 == NULL_TREE 1602 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2)) 1603 && (srhs2 = rhs2)) 1604 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE) 1605 /* As we query direct optabs restrict to non-convert operations. */ 1606 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1))) 1607 { 1608 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar); 1609 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB 1610 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing) 1611 { 1612 tree slhs = make_ssa_name (TREE_TYPE (srhs1)); 1613 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2); 1614 gsi_insert_before (gsi, repl, GSI_SAME_STMT); 1615 gimple_assign_set_rhs_from_tree (gsi, 1616 build_vector_from_val (type, slhs)); 1617 update_stmt (stmt); 1618 return; 1619 } 1620 } 1621 1622 /* A scalar operation pretending to be a vector one. */ 1623 if (VECTOR_BOOLEAN_TYPE_P (type) 1624 && !VECTOR_MODE_P (TYPE_MODE (type)) 1625 && TYPE_MODE (type) != BLKmode) 1626 return; 1627 1628 if (CONVERT_EXPR_CODE_P (code) 1629 || code == FLOAT_EXPR 1630 || code == FIX_TRUNC_EXPR 1631 || code == VIEW_CONVERT_EXPR) 1632 return; 1633 1634 /* The signedness is determined from input argument. */ 1635 if (code == VEC_UNPACK_FLOAT_HI_EXPR 1636 || code == VEC_UNPACK_FLOAT_LO_EXPR) 1637 type = TREE_TYPE (rhs1); 1638 1639 /* For widening/narrowing vector operations, the relevant type is of the 1640 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is 1641 calculated in the same way above. */ 1642 if (code == WIDEN_SUM_EXPR 1643 || code == VEC_WIDEN_MULT_HI_EXPR 1644 || code == VEC_WIDEN_MULT_LO_EXPR 1645 || code == VEC_WIDEN_MULT_EVEN_EXPR 1646 || code == VEC_WIDEN_MULT_ODD_EXPR 1647 || code == VEC_UNPACK_HI_EXPR 1648 || code == VEC_UNPACK_LO_EXPR 1649 || code == VEC_PACK_TRUNC_EXPR 1650 || code == VEC_PACK_SAT_EXPR 1651 || code == VEC_PACK_FIX_TRUNC_EXPR 1652 || code == VEC_WIDEN_LSHIFT_HI_EXPR 1653 || code == VEC_WIDEN_LSHIFT_LO_EXPR) 1654 type = TREE_TYPE (rhs1); 1655 1656 /* Choose between vector shift/rotate by vector and vector shift/rotate by 1657 scalar */ 1658 if (code == LSHIFT_EXPR 1659 || code == RSHIFT_EXPR 1660 || code == LROTATE_EXPR 1661 || code == RROTATE_EXPR) 1662 { 1663 optab opv; 1664 1665 /* Check whether we have vector <op> {x,x,x,x} where x 1666 could be a scalar variable or a constant. Transform 1667 vector <op> {x,x,x,x} ==> vector <op> scalar. */ 1668 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1669 { 1670 tree first; 1671 1672 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE) 1673 { 1674 gimple_assign_set_rhs2 (stmt, first); 1675 update_stmt (stmt); 1676 rhs2 = first; 1677 } 1678 } 1679 1680 opv = optab_for_tree_code (code, type, optab_vector); 1681 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1682 op = opv; 1683 else 1684 { 1685 op = optab_for_tree_code (code, type, optab_scalar); 1686 1687 compute_type = get_compute_type (code, op, type); 1688 if (compute_type == type) 1689 return; 1690 /* The rtl expander will expand vector/scalar as vector/vector 1691 if necessary. Pick one with wider vector type. */ 1692 tree compute_vtype = get_compute_type (code, opv, type); 1693 if (count_type_subparts (compute_vtype) 1694 > count_type_subparts (compute_type)) 1695 { 1696 compute_type = compute_vtype; 1697 op = opv; 1698 } 1699 } 1700 1701 if (code == LROTATE_EXPR || code == RROTATE_EXPR) 1702 { 1703 if (compute_type == NULL_TREE) 1704 compute_type = get_compute_type (code, op, type); 1705 if (compute_type == type) 1706 return; 1707 /* Before splitting vector rotates into scalar rotates, 1708 see if we can't use vector shifts and BIT_IOR_EXPR 1709 instead. For vector by vector rotates we'd also 1710 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there 1711 for now, fold doesn't seem to create such rotates anyway. */ 1712 if (compute_type == TREE_TYPE (type) 1713 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1714 { 1715 optab oplv = vashl_optab, opl = ashl_optab; 1716 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab; 1717 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type); 1718 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type); 1719 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type); 1720 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type); 1721 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type); 1722 /* The rtl expander will expand vector/scalar as vector/vector 1723 if necessary. Pick one with wider vector type. */ 1724 if (count_type_subparts (compute_lvtype) 1725 > count_type_subparts (compute_ltype)) 1726 { 1727 compute_ltype = compute_lvtype; 1728 opl = oplv; 1729 } 1730 if (count_type_subparts (compute_rvtype) 1731 > count_type_subparts (compute_rtype)) 1732 { 1733 compute_rtype = compute_rvtype; 1734 opr = oprv; 1735 } 1736 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and 1737 BIT_IOR_EXPR. */ 1738 compute_type = compute_ltype; 1739 if (count_type_subparts (compute_type) 1740 > count_type_subparts (compute_rtype)) 1741 compute_type = compute_rtype; 1742 if (count_type_subparts (compute_type) 1743 > count_type_subparts (compute_otype)) 1744 compute_type = compute_otype; 1745 /* Verify all 3 operations can be performed in that type. */ 1746 if (compute_type != TREE_TYPE (type)) 1747 { 1748 if (optab_handler (opl, TYPE_MODE (compute_type)) 1749 == CODE_FOR_nothing 1750 || optab_handler (opr, TYPE_MODE (compute_type)) 1751 == CODE_FOR_nothing 1752 || optab_handler (opo, TYPE_MODE (compute_type)) 1753 == CODE_FOR_nothing) 1754 compute_type = TREE_TYPE (type); 1755 } 1756 } 1757 } 1758 } 1759 else 1760 op = optab_for_tree_code (code, type, optab_default); 1761 1762 /* Optabs will try converting a negation into a subtraction, so 1763 look for it as well. TODO: negation of floating-point vectors 1764 might be turned into an exclusive OR toggling the sign bit. */ 1765 if (op == unknown_optab 1766 && code == NEGATE_EXPR 1767 && INTEGRAL_TYPE_P (TREE_TYPE (type))) 1768 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 1769 1770 if (compute_type == NULL_TREE) 1771 compute_type = get_compute_type (code, op, type); 1772 if (compute_type == type) 1773 return; 1774 1775 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); 1776 1777 /* Leave expression untouched for later expansion. */ 1778 if (new_rhs == NULL_TREE) 1779 return; 1780 1781 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 1782 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 1783 new_rhs); 1784 1785 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 1786 way to do it is change expand_vector_operation and its callees to 1787 return a tree_code, RHS1 and RHS2 instead of a tree. */ 1788 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 1789 update_stmt (gsi_stmt (*gsi)); 1790 } 1791 1792 /* Use this to lower vector operations introduced by the vectorizer, 1793 if it may need the bit-twiddling tricks implemented in this file. */ 1794 1795 static unsigned int 1796 expand_vector_operations (void) 1797 { 1798 gimple_stmt_iterator gsi; 1799 basic_block bb; 1800 bool cfg_changed = false; 1801 1802 FOR_EACH_BB_FN (bb, cfun) 1803 { 1804 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1805 { 1806 expand_vector_operations_1 (&gsi); 1807 /* ??? If we do not cleanup EH then we will ICE in 1808 verification. But in reality we have created wrong-code 1809 as we did not properly transition EH info and edges to 1810 the piecewise computations. */ 1811 if (maybe_clean_eh_stmt (gsi_stmt (gsi)) 1812 && gimple_purge_dead_eh_edges (bb)) 1813 cfg_changed = true; 1814 } 1815 } 1816 1817 return cfg_changed ? TODO_cleanup_cfg : 0; 1818 } 1819 1820 namespace { 1821 1822 const pass_data pass_data_lower_vector = 1823 { 1824 GIMPLE_PASS, /* type */ 1825 "veclower", /* name */ 1826 OPTGROUP_VEC, /* optinfo_flags */ 1827 TV_NONE, /* tv_id */ 1828 PROP_cfg, /* properties_required */ 1829 PROP_gimple_lvec, /* properties_provided */ 1830 0, /* properties_destroyed */ 1831 0, /* todo_flags_start */ 1832 TODO_update_ssa, /* todo_flags_finish */ 1833 }; 1834 1835 class pass_lower_vector : public gimple_opt_pass 1836 { 1837 public: 1838 pass_lower_vector (gcc::context *ctxt) 1839 : gimple_opt_pass (pass_data_lower_vector, ctxt) 1840 {} 1841 1842 /* opt_pass methods: */ 1843 virtual bool gate (function *fun) 1844 { 1845 return !(fun->curr_properties & PROP_gimple_lvec); 1846 } 1847 1848 virtual unsigned int execute (function *) 1849 { 1850 return expand_vector_operations (); 1851 } 1852 1853 }; // class pass_lower_vector 1854 1855 } // anon namespace 1856 1857 gimple_opt_pass * 1858 make_pass_lower_vector (gcc::context *ctxt) 1859 { 1860 return new pass_lower_vector (ctxt); 1861 } 1862 1863 namespace { 1864 1865 const pass_data pass_data_lower_vector_ssa = 1866 { 1867 GIMPLE_PASS, /* type */ 1868 "veclower2", /* name */ 1869 OPTGROUP_VEC, /* optinfo_flags */ 1870 TV_NONE, /* tv_id */ 1871 PROP_cfg, /* properties_required */ 1872 PROP_gimple_lvec, /* properties_provided */ 1873 0, /* properties_destroyed */ 1874 0, /* todo_flags_start */ 1875 ( TODO_update_ssa 1876 | TODO_cleanup_cfg ), /* todo_flags_finish */ 1877 }; 1878 1879 class pass_lower_vector_ssa : public gimple_opt_pass 1880 { 1881 public: 1882 pass_lower_vector_ssa (gcc::context *ctxt) 1883 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt) 1884 {} 1885 1886 /* opt_pass methods: */ 1887 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); } 1888 virtual unsigned int execute (function *) 1889 { 1890 return expand_vector_operations (); 1891 } 1892 1893 }; // class pass_lower_vector_ssa 1894 1895 } // anon namespace 1896 1897 gimple_opt_pass * 1898 make_pass_lower_vector_ssa (gcc::context *ctxt) 1899 { 1900 return new pass_lower_vector_ssa (ctxt); 1901 } 1902 1903 #include "gt-tree-vect-generic.h" 1904