1 /* Lower vector operations to scalar operations. 2 Copyright (C) 2004-2013 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 3, or (at your option) any 9 later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "tree.h" 24 #include "tm.h" 25 #include "langhooks.h" 26 #include "tree-flow.h" 27 #include "gimple.h" 28 #include "tree-iterator.h" 29 #include "tree-pass.h" 30 #include "flags.h" 31 #include "ggc.h" 32 #include "diagnostic.h" 33 #include "target.h" 34 35 /* Need to include rtl.h, expr.h, etc. for optabs. */ 36 #include "expr.h" 37 #include "optabs.h" 38 39 40 static void expand_vector_operations_1 (gimple_stmt_iterator *); 41 42 43 /* Build a constant of type TYPE, made of VALUE's bits replicated 44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ 45 static tree 46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) 47 { 48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1); 49 int n = HOST_BITS_PER_WIDE_INT / width; 50 unsigned HOST_WIDE_INT low, high, mask; 51 tree ret; 52 53 gcc_assert (n); 54 55 if (width == HOST_BITS_PER_WIDE_INT) 56 low = value; 57 else 58 { 59 mask = ((HOST_WIDE_INT)1 << width) - 1; 60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); 61 } 62 63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) 64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; 65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) 66 high = 0; 67 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT) 68 high = low; 69 else 70 gcc_unreachable (); 71 72 ret = build_int_cst_wide (type, low, high); 73 return ret; 74 } 75 76 static GTY(()) tree vector_inner_type; 77 static GTY(()) tree vector_last_type; 78 static GTY(()) int vector_last_nunits; 79 80 /* Return a suitable vector types made of SUBPARTS units each of mode 81 "word_mode" (the global variable). */ 82 static tree 83 build_word_mode_vector_type (int nunits) 84 { 85 if (!vector_inner_type) 86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); 87 else if (vector_last_nunits == nunits) 88 { 89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); 90 return vector_last_type; 91 } 92 93 /* We build a new type, but we canonicalize it nevertheless, 94 because it still saves some memory. */ 95 vector_last_nunits = nunits; 96 vector_last_type = type_hash_canon (nunits, 97 build_vector_type (vector_inner_type, 98 nunits)); 99 return vector_last_type; 100 } 101 102 typedef tree (*elem_op_func) (gimple_stmt_iterator *, 103 tree, tree, tree, tree, tree, enum tree_code); 104 105 static inline tree 106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type, 107 tree t, tree bitsize, tree bitpos) 108 { 109 if (bitpos) 110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); 111 else 112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); 113 } 114 115 static tree 116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, 117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, 118 enum tree_code code) 119 { 120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 121 return gimplify_build1 (gsi, code, inner_type, a); 122 } 123 124 static tree 125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 126 tree bitpos, tree bitsize, enum tree_code code) 127 { 128 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) 129 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 130 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) 131 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 132 return gimplify_build2 (gsi, code, inner_type, a, b); 133 } 134 135 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 136 137 INNER_TYPE is the type of A and B elements 138 139 returned expression is of signed integer type with the 140 size equal to the size of INNER_TYPE. */ 141 static tree 142 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 143 tree bitpos, tree bitsize, enum tree_code code) 144 { 145 tree comp_type; 146 147 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 148 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 149 150 comp_type = build_nonstandard_integer_type 151 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0); 152 153 return gimplify_build3 (gsi, COND_EXPR, comp_type, 154 fold_build2 (code, boolean_type_node, a, b), 155 build_int_cst (comp_type, -1), 156 build_int_cst (comp_type, 0)); 157 } 158 159 /* Expand vector addition to scalars. This does bit twiddling 160 in order to increase parallelism: 161 162 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ 163 (a ^ b) & 0x80808080 164 165 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ 166 (a ^ ~b) & 0x80808080 167 168 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) 169 170 This optimization should be done only if 4 vector items or more 171 fit into a word. */ 172 static tree 173 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, 174 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, 175 enum tree_code code) 176 { 177 tree inner_type = TREE_TYPE (TREE_TYPE (a)); 178 unsigned HOST_WIDE_INT max; 179 tree low_bits, high_bits, a_low, b_low, result_low, signs; 180 181 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 182 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 183 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 184 185 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); 186 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 187 188 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); 189 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 190 if (code == PLUS_EXPR) 191 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); 192 else 193 { 194 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); 195 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); 196 } 197 198 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 199 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); 200 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 201 } 202 203 static tree 204 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, 205 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, 206 tree bitsize ATTRIBUTE_UNUSED, 207 enum tree_code code ATTRIBUTE_UNUSED) 208 { 209 tree inner_type = TREE_TYPE (TREE_TYPE (b)); 210 HOST_WIDE_INT max; 211 tree low_bits, high_bits, b_low, result_low, signs; 212 213 max = GET_MODE_MASK (TYPE_MODE (inner_type)); 214 low_bits = build_replicated_const (word_type, inner_type, max >> 1); 215 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); 216 217 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); 218 219 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); 220 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); 221 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); 222 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); 223 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); 224 } 225 226 /* Expand a vector operation to scalars, by using many operations 227 whose type is the vector type's inner type. */ 228 static tree 229 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, 230 tree type, tree inner_type, 231 tree a, tree b, enum tree_code code) 232 { 233 vec<constructor_elt, va_gc> *v; 234 tree part_width = TYPE_SIZE (inner_type); 235 tree index = bitsize_int (0); 236 int nunits = TYPE_VECTOR_SUBPARTS (type); 237 int delta = tree_low_cst (part_width, 1) 238 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); 239 int i; 240 location_t loc = gimple_location (gsi_stmt (*gsi)); 241 242 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) 243 warning_at (loc, OPT_Wvector_operation_performance, 244 "vector operation will be expanded piecewise"); 245 else 246 warning_at (loc, OPT_Wvector_operation_performance, 247 "vector operation will be expanded in parallel"); 248 249 vec_alloc (v, (nunits + delta - 1) / delta); 250 for (i = 0; i < nunits; 251 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) 252 { 253 tree result = f (gsi, inner_type, a, b, index, part_width, code); 254 constructor_elt ce = {NULL_TREE, result}; 255 v->quick_push (ce); 256 } 257 258 return build_constructor (type, v); 259 } 260 261 /* Expand a vector operation to scalars with the freedom to use 262 a scalar integer type, or to use a different size for the items 263 in the vector type. */ 264 static tree 265 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, 266 tree a, tree b, 267 enum tree_code code) 268 { 269 tree result, compute_type; 270 enum machine_mode mode; 271 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; 272 location_t loc = gimple_location (gsi_stmt (*gsi)); 273 274 /* We have three strategies. If the type is already correct, just do 275 the operation an element at a time. Else, if the vector is wider than 276 one word, do it a word at a time; finally, if the vector is smaller 277 than one word, do it as a scalar. */ 278 if (TYPE_MODE (TREE_TYPE (type)) == word_mode) 279 return expand_vector_piecewise (gsi, f, 280 type, TREE_TYPE (type), 281 a, b, code); 282 else if (n_words > 1) 283 { 284 tree word_type = build_word_mode_vector_type (n_words); 285 result = expand_vector_piecewise (gsi, f, 286 word_type, TREE_TYPE (word_type), 287 a, b, code); 288 result = force_gimple_operand_gsi (gsi, result, true, NULL, true, 289 GSI_SAME_STMT); 290 } 291 else 292 { 293 /* Use a single scalar operation with a mode no wider than word_mode. */ 294 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); 295 compute_type = lang_hooks.types.type_for_mode (mode, 1); 296 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); 297 warning_at (loc, OPT_Wvector_operation_performance, 298 "vector operation will be expanded with a " 299 "single scalar operation"); 300 } 301 302 return result; 303 } 304 305 /* Expand a vector operation to scalars; for integer types we can use 306 special bit twiddling tricks to do the sums a word at a time, using 307 function F_PARALLEL instead of F. These tricks are done only if 308 they can process at least four items, that is, only if the vector 309 holds at least four items and if a word can hold four items. */ 310 static tree 311 expand_vector_addition (gimple_stmt_iterator *gsi, 312 elem_op_func f, elem_op_func f_parallel, 313 tree type, tree a, tree b, enum tree_code code) 314 { 315 int parts_per_word = UNITS_PER_WORD 316 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); 317 318 if (INTEGRAL_TYPE_P (TREE_TYPE (type)) 319 && parts_per_word >= 4 320 && TYPE_VECTOR_SUBPARTS (type) >= 4) 321 return expand_vector_parallel (gsi, f_parallel, 322 type, a, b, code); 323 else 324 return expand_vector_piecewise (gsi, f, 325 type, TREE_TYPE (type), 326 a, b, code); 327 } 328 329 /* Check if vector VEC consists of all the equal elements and 330 that the number of elements corresponds to the type of VEC. 331 The function returns first element of the vector 332 or NULL_TREE if the vector is not uniform. */ 333 static tree 334 uniform_vector_p (tree vec) 335 { 336 tree first, t; 337 unsigned i; 338 339 if (vec == NULL_TREE) 340 return NULL_TREE; 341 342 if (TREE_CODE (vec) == VECTOR_CST) 343 { 344 first = VECTOR_CST_ELT (vec, 0); 345 for (i = 1; i < VECTOR_CST_NELTS (vec); ++i) 346 if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0)) 347 return NULL_TREE; 348 349 return first; 350 } 351 352 else if (TREE_CODE (vec) == CONSTRUCTOR) 353 { 354 first = error_mark_node; 355 356 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t) 357 { 358 if (i == 0) 359 { 360 first = t; 361 continue; 362 } 363 if (!operand_equal_p (first, t, 0)) 364 return NULL_TREE; 365 } 366 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec))) 367 return NULL_TREE; 368 369 return first; 370 } 371 372 return NULL_TREE; 373 } 374 375 /* Try to expand vector comparison expression OP0 CODE OP1 by 376 querying optab if the following expression: 377 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> 378 can be expanded. */ 379 static tree 380 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, 381 tree op1, enum tree_code code) 382 { 383 tree t; 384 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0))) 385 t = expand_vector_piecewise (gsi, do_compare, type, 386 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); 387 else 388 t = NULL_TREE; 389 390 return t; 391 } 392 393 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type 394 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding 395 the result if successful, otherwise return NULL_TREE. */ 396 static tree 397 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) 398 { 399 optab op; 400 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type); 401 bool scalar_shift = true; 402 403 for (i = 1; i < nunits; i++) 404 { 405 if (shiftcnts[i] != shiftcnts[0]) 406 scalar_shift = false; 407 } 408 409 if (scalar_shift && shiftcnts[0] == 0) 410 return op0; 411 412 if (scalar_shift) 413 { 414 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); 415 if (op != unknown_optab 416 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 417 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, 418 build_int_cst (NULL_TREE, shiftcnts[0])); 419 } 420 421 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); 422 if (op != unknown_optab 423 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 424 { 425 tree *vec = XALLOCAVEC (tree, nunits); 426 for (i = 0; i < nunits; i++) 427 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]); 428 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, 429 build_vector (type, vec)); 430 } 431 432 return NULL_TREE; 433 } 434 435 /* Try to expand integer vector division by constant using 436 widening multiply, shifts and additions. */ 437 static tree 438 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, 439 tree op1, enum tree_code code) 440 { 441 bool use_pow2 = true; 442 bool has_vector_shift = true; 443 int mode = -1, this_mode; 444 int pre_shift = -1, post_shift; 445 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type); 446 int *shifts = XALLOCAVEC (int, nunits * 4); 447 int *pre_shifts = shifts + nunits; 448 int *post_shifts = pre_shifts + nunits; 449 int *shift_temps = post_shifts + nunits; 450 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits); 451 int prec = TYPE_PRECISION (TREE_TYPE (type)); 452 int dummy_int; 453 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type)); 454 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); 455 tree *vec; 456 tree cur_op, mulcst, tem; 457 optab op; 458 459 if (prec > HOST_BITS_PER_WIDE_INT) 460 return NULL_TREE; 461 462 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); 463 if (op == unknown_optab 464 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 465 has_vector_shift = false; 466 467 /* Analysis phase. Determine if all op1 elements are either power 468 of two and it is possible to expand it using shifts (or for remainder 469 using masking). Additionally compute the multiplicative constants 470 and pre and post shifts if the division is to be expanded using 471 widening or high part multiplication plus shifts. */ 472 for (i = 0; i < nunits; i++) 473 { 474 tree cst = VECTOR_CST_ELT (op1, i); 475 unsigned HOST_WIDE_INT ml; 476 477 if (!host_integerp (cst, unsignedp) || integer_zerop (cst)) 478 return NULL_TREE; 479 pre_shifts[i] = 0; 480 post_shifts[i] = 0; 481 mulc[i] = 0; 482 if (use_pow2 483 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)) 484 use_pow2 = false; 485 if (use_pow2) 486 { 487 shifts[i] = tree_log2 (cst); 488 if (shifts[i] != shifts[0] 489 && code == TRUNC_DIV_EXPR 490 && !has_vector_shift) 491 use_pow2 = false; 492 } 493 if (mode == -2) 494 continue; 495 if (unsignedp) 496 { 497 unsigned HOST_WIDE_INT mh; 498 unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask; 499 500 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1))) 501 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */ 502 return NULL_TREE; 503 504 if (d <= 1) 505 { 506 mode = -2; 507 continue; 508 } 509 510 /* Find a suitable multiplier and right shift count 511 instead of multiplying with D. */ 512 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); 513 514 /* If the suggested multiplier is more than SIZE bits, we can 515 do better for even divisors, using an initial right shift. */ 516 if ((mh != 0 && (d & 1) == 0) 517 || (!has_vector_shift && pre_shift != -1)) 518 { 519 if (has_vector_shift) 520 pre_shift = floor_log2 (d & -d); 521 else if (pre_shift == -1) 522 { 523 unsigned int j; 524 for (j = 0; j < nunits; j++) 525 { 526 tree cst2 = VECTOR_CST_ELT (op1, j); 527 unsigned HOST_WIDE_INT d2; 528 int this_pre_shift; 529 530 if (!host_integerp (cst2, 1)) 531 return NULL_TREE; 532 d2 = tree_low_cst (cst2, 1) & mask; 533 if (d2 == 0) 534 return NULL_TREE; 535 this_pre_shift = floor_log2 (d2 & -d2); 536 if (pre_shift == -1 || this_pre_shift < pre_shift) 537 pre_shift = this_pre_shift; 538 } 539 if (i != 0 && pre_shift != 0) 540 { 541 /* Restart. */ 542 i = -1U; 543 mode = -1; 544 continue; 545 } 546 } 547 if (pre_shift != 0) 548 { 549 if ((d >> pre_shift) <= 1) 550 { 551 mode = -2; 552 continue; 553 } 554 mh = choose_multiplier (d >> pre_shift, prec, 555 prec - pre_shift, 556 &ml, &post_shift, &dummy_int); 557 gcc_assert (!mh); 558 pre_shifts[i] = pre_shift; 559 } 560 } 561 if (!mh) 562 this_mode = 0; 563 else 564 this_mode = 1; 565 } 566 else 567 { 568 HOST_WIDE_INT d = tree_low_cst (cst, 0); 569 unsigned HOST_WIDE_INT abs_d; 570 571 if (d == -1) 572 return NULL_TREE; 573 574 /* Since d might be INT_MIN, we have to cast to 575 unsigned HOST_WIDE_INT before negating to avoid 576 undefined signed overflow. */ 577 abs_d = (d >= 0 578 ? (unsigned HOST_WIDE_INT) d 579 : - (unsigned HOST_WIDE_INT) d); 580 581 /* n rem d = n rem -d */ 582 if (code == TRUNC_MOD_EXPR && d < 0) 583 d = abs_d; 584 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1)) 585 { 586 /* This case is not handled correctly below. */ 587 mode = -2; 588 continue; 589 } 590 if (abs_d <= 1) 591 { 592 mode = -2; 593 continue; 594 } 595 596 choose_multiplier (abs_d, prec, prec - 1, &ml, 597 &post_shift, &dummy_int); 598 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1)) 599 { 600 this_mode = 4 + (d < 0); 601 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1); 602 } 603 else 604 this_mode = 2 + (d < 0); 605 } 606 mulc[i] = ml; 607 post_shifts[i] = post_shift; 608 if ((i && !has_vector_shift && post_shifts[0] != post_shift) 609 || post_shift >= prec 610 || pre_shifts[i] >= prec) 611 this_mode = -2; 612 613 if (i == 0) 614 mode = this_mode; 615 else if (mode != this_mode) 616 mode = -2; 617 } 618 619 vec = XALLOCAVEC (tree, nunits); 620 621 if (use_pow2) 622 { 623 tree addend = NULL_TREE; 624 if (!unsignedp) 625 { 626 tree uns_type; 627 628 /* Both division and remainder sequences need 629 op0 < 0 ? mask : 0 computed. It can be either computed as 630 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i])) 631 if none of the shifts is 0, or as the conditional. */ 632 for (i = 0; i < nunits; i++) 633 if (shifts[i] == 0) 634 break; 635 uns_type 636 = build_vector_type (build_nonstandard_integer_type (prec, 1), 637 nunits); 638 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type)) 639 { 640 for (i = 0; i < nunits; i++) 641 shift_temps[i] = prec - 1; 642 cur_op = add_rshift (gsi, type, op0, shift_temps); 643 if (cur_op != NULL_TREE) 644 { 645 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, 646 uns_type, cur_op); 647 for (i = 0; i < nunits; i++) 648 shift_temps[i] = prec - shifts[i]; 649 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); 650 if (cur_op != NULL_TREE) 651 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, 652 type, cur_op); 653 } 654 } 655 if (addend == NULL_TREE 656 && expand_vec_cond_expr_p (type, type)) 657 { 658 tree zero, cst, cond; 659 gimple stmt; 660 661 zero = build_zero_cst (type); 662 cond = build2 (LT_EXPR, type, op0, zero); 663 for (i = 0; i < nunits; i++) 664 vec[i] = build_int_cst (TREE_TYPE (type), 665 ((unsigned HOST_WIDE_INT) 1 666 << shifts[i]) - 1); 667 cst = build_vector (type, vec); 668 addend = make_ssa_name (type, NULL); 669 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend, 670 cond, cst, zero); 671 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 672 } 673 } 674 if (code == TRUNC_DIV_EXPR) 675 { 676 if (unsignedp) 677 { 678 /* q = op0 >> shift; */ 679 cur_op = add_rshift (gsi, type, op0, shifts); 680 if (cur_op != NULL_TREE) 681 return cur_op; 682 } 683 else if (addend != NULL_TREE) 684 { 685 /* t1 = op0 + addend; 686 q = t1 >> shift; */ 687 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 688 if (op != unknown_optab 689 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 690 { 691 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); 692 cur_op = add_rshift (gsi, type, cur_op, shifts); 693 if (cur_op != NULL_TREE) 694 return cur_op; 695 } 696 } 697 } 698 else 699 { 700 tree mask; 701 for (i = 0; i < nunits; i++) 702 vec[i] = build_int_cst (TREE_TYPE (type), 703 ((unsigned HOST_WIDE_INT) 1 704 << shifts[i]) - 1); 705 mask = build_vector (type, vec); 706 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default); 707 if (op != unknown_optab 708 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) 709 { 710 if (unsignedp) 711 /* r = op0 & mask; */ 712 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask); 713 else if (addend != NULL_TREE) 714 { 715 /* t1 = op0 + addend; 716 t2 = t1 & mask; 717 r = t2 - addend; */ 718 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 719 if (op != unknown_optab 720 && optab_handler (op, TYPE_MODE (type)) 721 != CODE_FOR_nothing) 722 { 723 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, 724 addend); 725 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type, 726 cur_op, mask); 727 op = optab_for_tree_code (MINUS_EXPR, type, 728 optab_default); 729 if (op != unknown_optab 730 && optab_handler (op, TYPE_MODE (type)) 731 != CODE_FOR_nothing) 732 return gimplify_build2 (gsi, MINUS_EXPR, type, 733 cur_op, addend); 734 } 735 } 736 } 737 } 738 } 739 740 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) 741 return NULL_TREE; 742 743 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) 744 return NULL_TREE; 745 746 cur_op = op0; 747 748 switch (mode) 749 { 750 case 0: 751 gcc_assert (unsignedp); 752 /* t1 = oprnd0 >> pre_shift; 753 t2 = t1 h* ml; 754 q = t2 >> post_shift; */ 755 cur_op = add_rshift (gsi, type, cur_op, pre_shifts); 756 if (cur_op == NULL_TREE) 757 return NULL_TREE; 758 break; 759 case 1: 760 gcc_assert (unsignedp); 761 for (i = 0; i < nunits; i++) 762 { 763 shift_temps[i] = 1; 764 post_shifts[i]--; 765 } 766 break; 767 case 2: 768 case 3: 769 case 4: 770 case 5: 771 gcc_assert (!unsignedp); 772 for (i = 0; i < nunits; i++) 773 shift_temps[i] = prec - 1; 774 break; 775 default: 776 return NULL_TREE; 777 } 778 779 for (i = 0; i < nunits; i++) 780 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]); 781 mulcst = build_vector (type, vec); 782 783 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); 784 785 switch (mode) 786 { 787 case 0: 788 /* t1 = oprnd0 >> pre_shift; 789 t2 = t1 h* ml; 790 q = t2 >> post_shift; */ 791 cur_op = add_rshift (gsi, type, cur_op, post_shifts); 792 break; 793 case 1: 794 /* t1 = oprnd0 h* ml; 795 t2 = oprnd0 - t1; 796 t3 = t2 >> 1; 797 t4 = t1 + t3; 798 q = t4 >> (post_shift - 1); */ 799 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 800 if (op == unknown_optab 801 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 802 return NULL_TREE; 803 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); 804 tem = add_rshift (gsi, type, tem, shift_temps); 805 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 806 if (op == unknown_optab 807 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 808 return NULL_TREE; 809 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); 810 cur_op = add_rshift (gsi, type, tem, post_shifts); 811 if (cur_op == NULL_TREE) 812 return NULL_TREE; 813 break; 814 case 2: 815 case 3: 816 case 4: 817 case 5: 818 /* t1 = oprnd0 h* ml; 819 t2 = t1; [ iff (mode & 2) != 0 ] 820 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] 821 t3 = t2 >> post_shift; 822 t4 = oprnd0 >> (prec - 1); 823 q = t3 - t4; [ iff (mode & 1) == 0 ] 824 q = t4 - t3; [ iff (mode & 1) != 0 ] */ 825 if ((mode & 2) == 0) 826 { 827 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); 828 if (op == unknown_optab 829 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 830 return NULL_TREE; 831 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); 832 } 833 cur_op = add_rshift (gsi, type, cur_op, post_shifts); 834 if (cur_op == NULL_TREE) 835 return NULL_TREE; 836 tem = add_rshift (gsi, type, op0, shift_temps); 837 if (tem == NULL_TREE) 838 return NULL_TREE; 839 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 840 if (op == unknown_optab 841 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 842 return NULL_TREE; 843 if ((mode & 1) == 0) 844 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem); 845 else 846 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op); 847 break; 848 default: 849 gcc_unreachable (); 850 } 851 852 if (code == TRUNC_DIV_EXPR) 853 return cur_op; 854 855 /* We divided. Now finish by: 856 t1 = q * oprnd1; 857 r = oprnd0 - t1; */ 858 op = optab_for_tree_code (MULT_EXPR, type, optab_default); 859 if (op == unknown_optab 860 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 861 return NULL_TREE; 862 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1); 863 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 864 if (op == unknown_optab 865 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) 866 return NULL_TREE; 867 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem); 868 } 869 870 /* Expand a vector condition to scalars, by using many conditions 871 on the vector's elements. */ 872 static void 873 expand_vector_condition (gimple_stmt_iterator *gsi) 874 { 875 gimple stmt = gsi_stmt (*gsi); 876 tree type = gimple_expr_type (stmt); 877 tree a = gimple_assign_rhs1 (stmt); 878 tree a1 = a; 879 tree a2; 880 bool a_is_comparison = false; 881 tree b = gimple_assign_rhs2 (stmt); 882 tree c = gimple_assign_rhs3 (stmt); 883 vec<constructor_elt, va_gc> *v; 884 tree constr; 885 tree inner_type = TREE_TYPE (type); 886 tree cond_type = TREE_TYPE (TREE_TYPE (a)); 887 tree comp_inner_type = cond_type; 888 tree width = TYPE_SIZE (inner_type); 889 tree index = bitsize_int (0); 890 int nunits = TYPE_VECTOR_SUBPARTS (type); 891 int i; 892 location_t loc = gimple_location (gsi_stmt (*gsi)); 893 894 if (!is_gimple_val (a)) 895 { 896 gcc_assert (COMPARISON_CLASS_P (a)); 897 a_is_comparison = true; 898 a1 = TREE_OPERAND (a, 0); 899 a2 = TREE_OPERAND (a, 1); 900 comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); 901 } 902 903 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1))) 904 return; 905 906 /* TODO: try and find a smaller vector type. */ 907 908 warning_at (loc, OPT_Wvector_operation_performance, 909 "vector condition will be expanded piecewise"); 910 911 vec_alloc (v, nunits); 912 for (i = 0; i < nunits; 913 i++, index = int_const_binop (PLUS_EXPR, index, width)) 914 { 915 tree aa, result; 916 tree bb = tree_vec_extract (gsi, inner_type, b, width, index); 917 tree cc = tree_vec_extract (gsi, inner_type, c, width, index); 918 if (a_is_comparison) 919 { 920 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index); 921 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index); 922 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2); 923 } 924 else 925 aa = tree_vec_extract (gsi, cond_type, a, width, index); 926 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); 927 constructor_elt ce = {NULL_TREE, result}; 928 v->quick_push (ce); 929 } 930 931 constr = build_constructor (type, v); 932 gimple_assign_set_rhs_from_tree (gsi, constr); 933 update_stmt (gsi_stmt (*gsi)); 934 } 935 936 static tree 937 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, 938 gimple assign, enum tree_code code) 939 { 940 enum machine_mode compute_mode = TYPE_MODE (compute_type); 941 942 /* If the compute mode is not a vector mode (hence we are not decomposing 943 a BLKmode vector to smaller, hardware-supported vectors), we may want 944 to expand the operations in parallel. */ 945 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT 946 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT 947 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT 948 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT 949 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM 950 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) 951 switch (code) 952 { 953 case PLUS_EXPR: 954 case MINUS_EXPR: 955 if (!TYPE_OVERFLOW_TRAPS (type)) 956 return expand_vector_addition (gsi, do_binop, do_plus_minus, type, 957 gimple_assign_rhs1 (assign), 958 gimple_assign_rhs2 (assign), code); 959 break; 960 961 case NEGATE_EXPR: 962 if (!TYPE_OVERFLOW_TRAPS (type)) 963 return expand_vector_addition (gsi, do_unop, do_negate, type, 964 gimple_assign_rhs1 (assign), 965 NULL_TREE, code); 966 break; 967 968 case BIT_AND_EXPR: 969 case BIT_IOR_EXPR: 970 case BIT_XOR_EXPR: 971 return expand_vector_parallel (gsi, do_binop, type, 972 gimple_assign_rhs1 (assign), 973 gimple_assign_rhs2 (assign), code); 974 975 case BIT_NOT_EXPR: 976 return expand_vector_parallel (gsi, do_unop, type, 977 gimple_assign_rhs1 (assign), 978 NULL_TREE, code); 979 case EQ_EXPR: 980 case NE_EXPR: 981 case GT_EXPR: 982 case LT_EXPR: 983 case GE_EXPR: 984 case LE_EXPR: 985 case UNEQ_EXPR: 986 case UNGT_EXPR: 987 case UNLT_EXPR: 988 case UNGE_EXPR: 989 case UNLE_EXPR: 990 case LTGT_EXPR: 991 case ORDERED_EXPR: 992 case UNORDERED_EXPR: 993 { 994 tree rhs1 = gimple_assign_rhs1 (assign); 995 tree rhs2 = gimple_assign_rhs2 (assign); 996 997 return expand_vector_comparison (gsi, type, rhs1, rhs2, code); 998 } 999 1000 case TRUNC_DIV_EXPR: 1001 case TRUNC_MOD_EXPR: 1002 { 1003 tree rhs1 = gimple_assign_rhs1 (assign); 1004 tree rhs2 = gimple_assign_rhs2 (assign); 1005 tree ret; 1006 1007 if (!optimize 1008 || !VECTOR_INTEGER_TYPE_P (type) 1009 || TREE_CODE (rhs2) != VECTOR_CST 1010 || !VECTOR_MODE_P (TYPE_MODE (type))) 1011 break; 1012 1013 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code); 1014 if (ret != NULL_TREE) 1015 return ret; 1016 break; 1017 } 1018 1019 default: 1020 break; 1021 } 1022 1023 if (TREE_CODE_CLASS (code) == tcc_unary) 1024 return expand_vector_piecewise (gsi, do_unop, type, compute_type, 1025 gimple_assign_rhs1 (assign), 1026 NULL_TREE, code); 1027 else 1028 return expand_vector_piecewise (gsi, do_binop, type, compute_type, 1029 gimple_assign_rhs1 (assign), 1030 gimple_assign_rhs2 (assign), code); 1031 } 1032 1033 /* Return a type for the widest vector mode whose components are of type 1034 TYPE, or NULL_TREE if none is found. */ 1035 1036 static tree 1037 type_for_widest_vector_mode (tree type, optab op) 1038 { 1039 enum machine_mode inner_mode = TYPE_MODE (type); 1040 enum machine_mode best_mode = VOIDmode, mode; 1041 int best_nunits = 0; 1042 1043 if (SCALAR_FLOAT_MODE_P (inner_mode)) 1044 mode = MIN_MODE_VECTOR_FLOAT; 1045 else if (SCALAR_FRACT_MODE_P (inner_mode)) 1046 mode = MIN_MODE_VECTOR_FRACT; 1047 else if (SCALAR_UFRACT_MODE_P (inner_mode)) 1048 mode = MIN_MODE_VECTOR_UFRACT; 1049 else if (SCALAR_ACCUM_MODE_P (inner_mode)) 1050 mode = MIN_MODE_VECTOR_ACCUM; 1051 else if (SCALAR_UACCUM_MODE_P (inner_mode)) 1052 mode = MIN_MODE_VECTOR_UACCUM; 1053 else 1054 mode = MIN_MODE_VECTOR_INT; 1055 1056 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 1057 if (GET_MODE_INNER (mode) == inner_mode 1058 && GET_MODE_NUNITS (mode) > best_nunits 1059 && optab_handler (op, mode) != CODE_FOR_nothing) 1060 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); 1061 1062 if (best_mode == VOIDmode) 1063 return NULL_TREE; 1064 else 1065 return build_vector_type_for_mode (type, best_mode); 1066 } 1067 1068 1069 /* Build a reference to the element of the vector VECT. Function 1070 returns either the element itself, either BIT_FIELD_REF, or an 1071 ARRAY_REF expression. 1072 1073 GSI is required to insert temporary variables while building a 1074 refernece to the element of the vector VECT. 1075 1076 PTMPVEC is a pointer to the temporary variable for caching 1077 purposes. In case when PTMPVEC is NULL new temporary variable 1078 will be created. */ 1079 static tree 1080 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) 1081 { 1082 tree vect_type, vect_elt_type; 1083 gimple asgn; 1084 tree tmpvec; 1085 tree arraytype; 1086 bool need_asgn = true; 1087 unsigned int elements; 1088 1089 vect_type = TREE_TYPE (vect); 1090 vect_elt_type = TREE_TYPE (vect_type); 1091 elements = TYPE_VECTOR_SUBPARTS (vect_type); 1092 1093 if (TREE_CODE (idx) == INTEGER_CST) 1094 { 1095 unsigned HOST_WIDE_INT index; 1096 1097 /* Given that we're about to compute a binary modulus, 1098 we don't care about the high bits of the value. */ 1099 index = TREE_INT_CST_LOW (idx); 1100 if (!host_integerp (idx, 1) || index >= elements) 1101 { 1102 index &= elements - 1; 1103 idx = build_int_cst (TREE_TYPE (idx), index); 1104 } 1105 1106 /* When lowering a vector statement sequence do some easy 1107 simplification by looking through intermediate vector results. */ 1108 if (TREE_CODE (vect) == SSA_NAME) 1109 { 1110 gimple def_stmt = SSA_NAME_DEF_STMT (vect); 1111 if (is_gimple_assign (def_stmt) 1112 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST 1113 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) 1114 vect = gimple_assign_rhs1 (def_stmt); 1115 } 1116 1117 if (TREE_CODE (vect) == VECTOR_CST) 1118 return VECTOR_CST_ELT (vect, index); 1119 else if (TREE_CODE (vect) == CONSTRUCTOR 1120 && (CONSTRUCTOR_NELTS (vect) == 0 1121 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value)) 1122 != VECTOR_TYPE)) 1123 { 1124 if (index < CONSTRUCTOR_NELTS (vect)) 1125 return CONSTRUCTOR_ELT (vect, index)->value; 1126 return build_zero_cst (vect_elt_type); 1127 } 1128 else 1129 { 1130 tree size = TYPE_SIZE (vect_elt_type); 1131 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), 1132 size); 1133 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); 1134 } 1135 } 1136 1137 if (!ptmpvec) 1138 tmpvec = create_tmp_var (vect_type, "vectmp"); 1139 else if (!*ptmpvec) 1140 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); 1141 else 1142 { 1143 tmpvec = *ptmpvec; 1144 need_asgn = false; 1145 } 1146 1147 if (need_asgn) 1148 { 1149 TREE_ADDRESSABLE (tmpvec) = 1; 1150 asgn = gimple_build_assign (tmpvec, vect); 1151 gsi_insert_before (gsi, asgn, GSI_SAME_STMT); 1152 } 1153 1154 arraytype = build_array_type_nelts (vect_elt_type, elements); 1155 return build4 (ARRAY_REF, vect_elt_type, 1156 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), 1157 idx, NULL_TREE, NULL_TREE); 1158 } 1159 1160 /* Check if VEC_PERM_EXPR within the given setting is supported 1161 by hardware, or lower it piecewise. 1162 1163 When VEC_PERM_EXPR has the same first and second operands: 1164 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be 1165 {v0[mask[0]], v0[mask[1]], ...} 1166 MASK and V0 must have the same number of elements. 1167 1168 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to 1169 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} 1170 V0 and V1 must have the same type. MASK, V0, V1 must have the 1171 same number of arguments. */ 1172 1173 static void 1174 lower_vec_perm (gimple_stmt_iterator *gsi) 1175 { 1176 gimple stmt = gsi_stmt (*gsi); 1177 tree mask = gimple_assign_rhs3 (stmt); 1178 tree vec0 = gimple_assign_rhs1 (stmt); 1179 tree vec1 = gimple_assign_rhs2 (stmt); 1180 tree vect_type = TREE_TYPE (vec0); 1181 tree mask_type = TREE_TYPE (mask); 1182 tree vect_elt_type = TREE_TYPE (vect_type); 1183 tree mask_elt_type = TREE_TYPE (mask_type); 1184 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type); 1185 vec<constructor_elt, va_gc> *v; 1186 tree constr, t, si, i_val; 1187 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; 1188 bool two_operand_p = !operand_equal_p (vec0, vec1, 0); 1189 location_t loc = gimple_location (gsi_stmt (*gsi)); 1190 unsigned i; 1191 1192 if (TREE_CODE (mask) == SSA_NAME) 1193 { 1194 gimple def_stmt = SSA_NAME_DEF_STMT (mask); 1195 if (is_gimple_assign (def_stmt) 1196 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST) 1197 mask = gimple_assign_rhs1 (def_stmt); 1198 } 1199 1200 if (TREE_CODE (mask) == VECTOR_CST) 1201 { 1202 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); 1203 1204 for (i = 0; i < elements; ++i) 1205 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) 1206 & (2 * elements - 1)); 1207 1208 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) 1209 { 1210 gimple_assign_set_rhs3 (stmt, mask); 1211 update_stmt (stmt); 1212 return; 1213 } 1214 } 1215 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL)) 1216 return; 1217 1218 warning_at (loc, OPT_Wvector_operation_performance, 1219 "vector shuffling operation will be expanded piecewise"); 1220 1221 vec_alloc (v, elements); 1222 for (i = 0; i < elements; i++) 1223 { 1224 si = size_int (i); 1225 i_val = vector_element (gsi, mask, si, &masktmp); 1226 1227 if (TREE_CODE (i_val) == INTEGER_CST) 1228 { 1229 unsigned HOST_WIDE_INT index; 1230 1231 index = TREE_INT_CST_LOW (i_val); 1232 if (!host_integerp (i_val, 1) || index >= elements) 1233 i_val = build_int_cst (mask_elt_type, index & (elements - 1)); 1234 1235 if (two_operand_p && (index & elements) != 0) 1236 t = vector_element (gsi, vec1, i_val, &vec1tmp); 1237 else 1238 t = vector_element (gsi, vec0, i_val, &vec0tmp); 1239 1240 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, 1241 true, GSI_SAME_STMT); 1242 } 1243 else 1244 { 1245 tree cond = NULL_TREE, v0_val; 1246 1247 if (two_operand_p) 1248 { 1249 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 1250 build_int_cst (mask_elt_type, elements)); 1251 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 1252 true, GSI_SAME_STMT); 1253 } 1254 1255 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, 1256 build_int_cst (mask_elt_type, elements - 1)); 1257 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, 1258 true, GSI_SAME_STMT); 1259 1260 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); 1261 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, 1262 true, GSI_SAME_STMT); 1263 1264 if (two_operand_p) 1265 { 1266 tree v1_val; 1267 1268 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); 1269 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, 1270 true, GSI_SAME_STMT); 1271 1272 cond = fold_build2 (EQ_EXPR, boolean_type_node, 1273 cond, build_zero_cst (mask_elt_type)); 1274 cond = fold_build3 (COND_EXPR, vect_elt_type, 1275 cond, v0_val, v1_val); 1276 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, 1277 true, GSI_SAME_STMT); 1278 } 1279 else 1280 t = v0_val; 1281 } 1282 1283 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); 1284 } 1285 1286 constr = build_constructor (vect_type, v); 1287 gimple_assign_set_rhs_from_tree (gsi, constr); 1288 update_stmt (gsi_stmt (*gsi)); 1289 } 1290 1291 /* Process one statement. If we identify a vector operation, expand it. */ 1292 1293 static void 1294 expand_vector_operations_1 (gimple_stmt_iterator *gsi) 1295 { 1296 gimple stmt = gsi_stmt (*gsi); 1297 tree lhs, rhs1, rhs2 = NULL, type, compute_type; 1298 enum tree_code code; 1299 enum machine_mode compute_mode; 1300 optab op = unknown_optab; 1301 enum gimple_rhs_class rhs_class; 1302 tree new_rhs; 1303 1304 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1305 return; 1306 1307 code = gimple_assign_rhs_code (stmt); 1308 rhs_class = get_gimple_rhs_class (code); 1309 lhs = gimple_assign_lhs (stmt); 1310 1311 if (code == VEC_PERM_EXPR) 1312 { 1313 lower_vec_perm (gsi); 1314 return; 1315 } 1316 1317 if (code == VEC_COND_EXPR) 1318 { 1319 expand_vector_condition (gsi); 1320 return; 1321 } 1322 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) 1323 return; 1324 1325 rhs1 = gimple_assign_rhs1 (stmt); 1326 type = gimple_expr_type (stmt); 1327 if (rhs_class == GIMPLE_BINARY_RHS) 1328 rhs2 = gimple_assign_rhs2 (stmt); 1329 1330 if (TREE_CODE (type) != VECTOR_TYPE) 1331 return; 1332 1333 if (code == NOP_EXPR 1334 || code == FLOAT_EXPR 1335 || code == FIX_TRUNC_EXPR 1336 || code == VIEW_CONVERT_EXPR) 1337 return; 1338 1339 gcc_assert (code != CONVERT_EXPR); 1340 1341 /* The signedness is determined from input argument. */ 1342 if (code == VEC_UNPACK_FLOAT_HI_EXPR 1343 || code == VEC_UNPACK_FLOAT_LO_EXPR) 1344 type = TREE_TYPE (rhs1); 1345 1346 /* For widening/narrowing vector operations, the relevant type is of the 1347 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is 1348 calculated in the same way above. */ 1349 if (code == WIDEN_SUM_EXPR 1350 || code == VEC_WIDEN_MULT_HI_EXPR 1351 || code == VEC_WIDEN_MULT_LO_EXPR 1352 || code == VEC_WIDEN_MULT_EVEN_EXPR 1353 || code == VEC_WIDEN_MULT_ODD_EXPR 1354 || code == VEC_UNPACK_HI_EXPR 1355 || code == VEC_UNPACK_LO_EXPR 1356 || code == VEC_PACK_TRUNC_EXPR 1357 || code == VEC_PACK_SAT_EXPR 1358 || code == VEC_PACK_FIX_TRUNC_EXPR 1359 || code == VEC_WIDEN_LSHIFT_HI_EXPR 1360 || code == VEC_WIDEN_LSHIFT_LO_EXPR) 1361 type = TREE_TYPE (rhs1); 1362 1363 /* Choose between vector shift/rotate by vector and vector shift/rotate by 1364 scalar */ 1365 if (code == LSHIFT_EXPR 1366 || code == RSHIFT_EXPR 1367 || code == LROTATE_EXPR 1368 || code == RROTATE_EXPR) 1369 { 1370 optab opv; 1371 1372 /* Check whether we have vector <op> {x,x,x,x} where x 1373 could be a scalar variable or a constant. Transform 1374 vector <op> {x,x,x,x} ==> vector <op> scalar. */ 1375 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1376 { 1377 tree first; 1378 gimple def_stmt; 1379 1380 if ((TREE_CODE (rhs2) == VECTOR_CST 1381 && (first = uniform_vector_p (rhs2)) != NULL_TREE) 1382 || (TREE_CODE (rhs2) == SSA_NAME 1383 && (def_stmt = SSA_NAME_DEF_STMT (rhs2)) 1384 && gimple_assign_single_p (def_stmt) 1385 && (first = uniform_vector_p 1386 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE)) 1387 { 1388 gimple_assign_set_rhs2 (stmt, first); 1389 update_stmt (stmt); 1390 rhs2 = first; 1391 } 1392 } 1393 1394 opv = optab_for_tree_code (code, type, optab_vector); 1395 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) 1396 op = opv; 1397 else 1398 { 1399 op = optab_for_tree_code (code, type, optab_scalar); 1400 1401 /* The rtl expander will expand vector/scalar as vector/vector 1402 if necessary. Don't bother converting the stmt here. */ 1403 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing 1404 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing) 1405 return; 1406 } 1407 } 1408 else 1409 op = optab_for_tree_code (code, type, optab_default); 1410 1411 /* Optabs will try converting a negation into a subtraction, so 1412 look for it as well. TODO: negation of floating-point vectors 1413 might be turned into an exclusive OR toggling the sign bit. */ 1414 if (op == unknown_optab 1415 && code == NEGATE_EXPR 1416 && INTEGRAL_TYPE_P (TREE_TYPE (type))) 1417 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 1418 1419 /* For very wide vectors, try using a smaller vector mode. */ 1420 compute_type = type; 1421 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op) 1422 { 1423 tree vector_compute_type 1424 = type_for_widest_vector_mode (TREE_TYPE (type), op); 1425 if (vector_compute_type != NULL_TREE 1426 && (TYPE_VECTOR_SUBPARTS (vector_compute_type) 1427 < TYPE_VECTOR_SUBPARTS (compute_type)) 1428 && (optab_handler (op, TYPE_MODE (vector_compute_type)) 1429 != CODE_FOR_nothing)) 1430 compute_type = vector_compute_type; 1431 } 1432 1433 /* If we are breaking a BLKmode vector into smaller pieces, 1434 type_for_widest_vector_mode has already looked into the optab, 1435 so skip these checks. */ 1436 if (compute_type == type) 1437 { 1438 compute_mode = TYPE_MODE (compute_type); 1439 if (VECTOR_MODE_P (compute_mode)) 1440 { 1441 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) 1442 return; 1443 if (code == MULT_HIGHPART_EXPR 1444 && can_mult_highpart_p (compute_mode, 1445 TYPE_UNSIGNED (compute_type))) 1446 return; 1447 } 1448 /* There is no operation in hardware, so fall back to scalars. */ 1449 compute_type = TREE_TYPE (type); 1450 } 1451 1452 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR); 1453 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); 1454 1455 /* Leave expression untouched for later expansion. */ 1456 if (new_rhs == NULL_TREE) 1457 return; 1458 1459 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 1460 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 1461 new_rhs); 1462 1463 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 1464 way to do it is change expand_vector_operation and its callees to 1465 return a tree_code, RHS1 and RHS2 instead of a tree. */ 1466 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 1467 update_stmt (gsi_stmt (*gsi)); 1468 } 1469 1470 /* Use this to lower vector operations introduced by the vectorizer, 1471 if it may need the bit-twiddling tricks implemented in this file. */ 1472 1473 static bool 1474 gate_expand_vector_operations_ssa (void) 1475 { 1476 return optimize == 0; 1477 } 1478 1479 static unsigned int 1480 expand_vector_operations (void) 1481 { 1482 gimple_stmt_iterator gsi; 1483 basic_block bb; 1484 bool cfg_changed = false; 1485 1486 FOR_EACH_BB (bb) 1487 { 1488 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1489 { 1490 expand_vector_operations_1 (&gsi); 1491 /* ??? If we do not cleanup EH then we will ICE in 1492 verification. But in reality we have created wrong-code 1493 as we did not properly transition EH info and edges to 1494 the piecewise computations. */ 1495 if (maybe_clean_eh_stmt (gsi_stmt (gsi)) 1496 && gimple_purge_dead_eh_edges (bb)) 1497 cfg_changed = true; 1498 } 1499 } 1500 1501 return cfg_changed ? TODO_cleanup_cfg : 0; 1502 } 1503 1504 struct gimple_opt_pass pass_lower_vector = 1505 { 1506 { 1507 GIMPLE_PASS, 1508 "veclower", /* name */ 1509 OPTGROUP_VEC, /* optinfo_flags */ 1510 gate_expand_vector_operations_ssa, /* gate */ 1511 expand_vector_operations, /* execute */ 1512 NULL, /* sub */ 1513 NULL, /* next */ 1514 0, /* static_pass_number */ 1515 TV_NONE, /* tv_id */ 1516 PROP_cfg, /* properties_required */ 1517 0, /* properties_provided */ 1518 0, /* properties_destroyed */ 1519 0, /* todo_flags_start */ 1520 TODO_update_ssa /* todo_flags_finish */ 1521 | TODO_verify_ssa 1522 | TODO_verify_stmts | TODO_verify_flow 1523 | TODO_cleanup_cfg 1524 } 1525 }; 1526 1527 struct gimple_opt_pass pass_lower_vector_ssa = 1528 { 1529 { 1530 GIMPLE_PASS, 1531 "veclower2", /* name */ 1532 OPTGROUP_VEC, /* optinfo_flags */ 1533 0, /* gate */ 1534 expand_vector_operations, /* execute */ 1535 NULL, /* sub */ 1536 NULL, /* next */ 1537 0, /* static_pass_number */ 1538 TV_NONE, /* tv_id */ 1539 PROP_cfg, /* properties_required */ 1540 0, /* properties_provided */ 1541 0, /* properties_destroyed */ 1542 0, /* todo_flags_start */ 1543 TODO_update_ssa /* todo_flags_finish */ 1544 | TODO_verify_ssa 1545 | TODO_verify_stmts | TODO_verify_flow 1546 | TODO_cleanup_cfg 1547 } 1548 }; 1549 1550 #include "gt-tree-vect-generic.h" 1551