1 /* Subroutines used to remove unnecessary doubleword swaps 2 for p8 little-endian VSX code. 3 Copyright (C) 1991-2020 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #define IN_TARGET_CODE 1 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "backend.h" 27 #include "rtl.h" 28 #include "tree.h" 29 #include "memmodel.h" 30 #include "df.h" 31 #include "tm_p.h" 32 #include "ira.h" 33 #include "print-tree.h" 34 #include "varasm.h" 35 #include "explow.h" 36 #include "expr.h" 37 #include "output.h" 38 #include "tree-pass.h" 39 #include "rtx-vector-builder.h" 40 41 /* Analyze vector computations and remove unnecessary doubleword 42 swaps (xxswapdi instructions). This pass is performed only 43 for little-endian VSX code generation. 44 45 For this specific case, loads and stores of 4x32 and 2x64 vectors 46 are inefficient. These are implemented using the lvx2dx and 47 stvx2dx instructions, which invert the order of doublewords in 48 a vector register. Thus the code generation inserts an xxswapdi 49 after each such load, and prior to each such store. (For spill 50 code after register assignment, an additional xxswapdi is inserted 51 following each store in order to return a hard register to its 52 unpermuted value.) 53 54 The extra xxswapdi instructions reduce performance. This can be 55 particularly bad for vectorized code. The purpose of this pass 56 is to reduce the number of xxswapdi instructions required for 57 correctness. 58 59 The primary insight is that much code that operates on vectors 60 does not care about the relative order of elements in a register, 61 so long as the correct memory order is preserved. If we have 62 a computation where all input values are provided by lvxd2x/xxswapdi 63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences, 64 and all intermediate computations are pure SIMD (independent of 65 element order), then all the xxswapdi's associated with the loads 66 and stores may be removed. 67 68 This pass uses some of the infrastructure and logical ideas from 69 the "web" pass in web.c. We create maximal webs of computations 70 fitting the description above using union-find. Each such web is 71 then optimized by removing its unnecessary xxswapdi instructions. 72 73 The pass is placed prior to global optimization so that we can 74 perform the optimization in the safest and simplest way possible; 75 that is, by replacing each xxswapdi insn with a register copy insn. 76 Subsequent forward propagation will remove copies where possible. 77 78 There are some operations sensitive to element order for which we 79 can still allow the operation, provided we modify those operations. 80 These include CONST_VECTORs, for which we must swap the first and 81 second halves of the constant vector; and SUBREGs, for which we 82 must adjust the byte offset to account for the swapped doublewords. 83 A remaining opportunity would be non-immediate-form splats, for 84 which we should adjust the selected lane of the input. We should 85 also make code generation adjustments for sum-across operations, 86 since this is a common vectorizer reduction. 87 88 Because we run prior to the first split, we can see loads and stores 89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla 90 vector loads and stores that have not yet been split into a permuting 91 load/store and a swap. (One way this can happen is with a builtin 92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather 93 than deleting a swap, we convert the load/store into a permuting 94 load/store (which effectively removes the swap). */ 95 96 /* Notes on Permutes 97 98 We do not currently handle computations that contain permutes. There 99 is a general transformation that can be performed correctly, but it 100 may introduce more expensive code than it replaces. To handle these 101 would require a cost model to determine when to perform the optimization. 102 This commentary records how this could be done if desired. 103 104 The most general permute is something like this (example for V16QI): 105 106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI)) 107 (parallel [(const_int a0) (const_int a1) 108 ... 109 (const_int a14) (const_int a15)])) 110 111 where a0,...,a15 are in [0,31] and select elements from op1 and op2 112 to produce in the result. 113 114 Regardless of mode, we can convert the PARALLEL to a mask of 16 115 byte-element selectors. Let's call this M, with M[i] representing 116 the ith byte-element selector value. Then if we swap doublewords 117 throughout the computation, we can get correct behavior by replacing 118 M with M' as follows: 119 120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15] 121 { ((M[i]+8)%16)+16 : M[i] in [16,31] 122 123 This seems promising at first, since we are just replacing one mask 124 with another. But certain masks are preferable to others. If M 125 is a mask that matches a vmrghh pattern, for example, M' certainly 126 will not. Instead of a single vmrghh, we would generate a load of 127 M' and a vperm. So we would need to know how many xxswapd's we can 128 remove as a result of this transformation to determine if it's 129 profitable; and preferably the logic would need to be aware of all 130 the special preferable masks. 131 132 Another form of permute is an UNSPEC_VPERM, in which the mask is 133 already in a register. In some cases, this mask may be a constant 134 that we can discover with ud-chains, in which case the above 135 transformation is ok. However, the common usage here is for the 136 mask to be produced by an UNSPEC_LVSL, in which case the mask 137 cannot be known at compile time. In such a case we would have to 138 generate several instructions to compute M' as above at run time, 139 and a cost model is needed again. 140 141 However, when the mask M for an UNSPEC_VPERM is loaded from the 142 constant pool, we can replace M with M' as above at no cost 143 beyond adding a constant pool entry. */ 144 145 /* This is based on the union-find logic in web.c. web_entry_base is 146 defined in df.h. */ 147 class swap_web_entry : public web_entry_base 148 { 149 public: 150 /* Pointer to the insn. */ 151 rtx_insn *insn; 152 /* Set if insn contains a mention of a vector register. All other 153 fields are undefined if this field is unset. */ 154 unsigned int is_relevant : 1; 155 /* Set if insn is a load. */ 156 unsigned int is_load : 1; 157 /* Set if insn is a store. */ 158 unsigned int is_store : 1; 159 /* Set if insn is a doubleword swap. This can either be a register swap 160 or a permuting load or store (test is_load and is_store for this). */ 161 unsigned int is_swap : 1; 162 /* Set if the insn has a live-in use of a parameter register. */ 163 unsigned int is_live_in : 1; 164 /* Set if the insn has a live-out def of a return register. */ 165 unsigned int is_live_out : 1; 166 /* Set if the insn contains a subreg reference of a vector register. */ 167 unsigned int contains_subreg : 1; 168 /* Set if the insn contains a 128-bit integer operand. */ 169 unsigned int is_128_int : 1; 170 /* Set if this is a call-insn. */ 171 unsigned int is_call : 1; 172 /* Set if this insn does not perform a vector operation for which 173 element order matters, or if we know how to fix it up if it does. 174 Undefined if is_swap is set. */ 175 unsigned int is_swappable : 1; 176 /* A nonzero value indicates what kind of special handling for this 177 insn is required if doublewords are swapped. Undefined if 178 is_swappable is not set. */ 179 unsigned int special_handling : 4; 180 /* Set if the web represented by this entry cannot be optimized. */ 181 unsigned int web_not_optimizable : 1; 182 /* Set if this insn should be deleted. */ 183 unsigned int will_delete : 1; 184 }; 185 186 enum special_handling_values { 187 SH_NONE = 0, 188 SH_CONST_VECTOR, 189 SH_SUBREG, 190 SH_NOSWAP_LD, 191 SH_NOSWAP_ST, 192 SH_EXTRACT, 193 SH_SPLAT, 194 SH_XXPERMDI, 195 SH_CONCAT, 196 SH_VPERM 197 }; 198 199 /* Union INSN with all insns containing definitions that reach USE. 200 Detect whether USE is live-in to the current function. */ 201 static void 202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use) 203 { 204 struct df_link *link = DF_REF_CHAIN (use); 205 206 if (!link) 207 insn_entry[INSN_UID (insn)].is_live_in = 1; 208 209 while (link) 210 { 211 if (DF_REF_IS_ARTIFICIAL (link->ref)) 212 insn_entry[INSN_UID (insn)].is_live_in = 1; 213 214 if (DF_REF_INSN_INFO (link->ref)) 215 { 216 rtx def_insn = DF_REF_INSN (link->ref); 217 (void)unionfind_union (insn_entry + INSN_UID (insn), 218 insn_entry + INSN_UID (def_insn)); 219 } 220 221 link = link->next; 222 } 223 } 224 225 /* Union INSN with all insns containing uses reached from DEF. 226 Detect whether DEF is live-out from the current function. */ 227 static void 228 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) 229 { 230 struct df_link *link = DF_REF_CHAIN (def); 231 232 if (!link) 233 insn_entry[INSN_UID (insn)].is_live_out = 1; 234 235 while (link) 236 { 237 /* This could be an eh use or some other artificial use; 238 we treat these all the same (killing the optimization). */ 239 if (DF_REF_IS_ARTIFICIAL (link->ref)) 240 insn_entry[INSN_UID (insn)].is_live_out = 1; 241 242 if (DF_REF_INSN_INFO (link->ref)) 243 { 244 rtx use_insn = DF_REF_INSN (link->ref); 245 (void)unionfind_union (insn_entry + INSN_UID (insn), 246 insn_entry + INSN_UID (use_insn)); 247 } 248 249 link = link->next; 250 } 251 } 252 253 /* Return 1 iff INSN is a load insn, including permuting loads that 254 represent an lvxd2x instruction; else return 0. */ 255 static unsigned int 256 insn_is_load_p (rtx insn) 257 { 258 rtx body = PATTERN (insn); 259 260 if (GET_CODE (body) == SET) 261 { 262 if (MEM_P (SET_SRC (body))) 263 return 1; 264 265 if (GET_CODE (SET_SRC (body)) == VEC_SELECT 266 && MEM_P (XEXP (SET_SRC (body), 0))) 267 return 1; 268 269 return 0; 270 } 271 272 if (GET_CODE (body) != PARALLEL) 273 return 0; 274 275 rtx set = XVECEXP (body, 0, 0); 276 277 if (GET_CODE (set) == SET && MEM_P (SET_SRC (set))) 278 return 1; 279 280 return 0; 281 } 282 283 /* Return 1 iff INSN is a store insn, including permuting stores that 284 represent an stvxd2x instruction; else return 0. */ 285 static unsigned int 286 insn_is_store_p (rtx insn) 287 { 288 rtx body = PATTERN (insn); 289 if (GET_CODE (body) == SET && MEM_P (SET_DEST (body))) 290 return 1; 291 if (GET_CODE (body) != PARALLEL) 292 return 0; 293 rtx set = XVECEXP (body, 0, 0); 294 if (GET_CODE (set) == SET && MEM_P (SET_DEST (set))) 295 return 1; 296 return 0; 297 } 298 299 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap, 300 a permuting load, or a permuting store. */ 301 static unsigned int 302 insn_is_swap_p (rtx insn) 303 { 304 rtx body = PATTERN (insn); 305 if (GET_CODE (body) != SET) 306 return 0; 307 rtx rhs = SET_SRC (body); 308 if (GET_CODE (rhs) != VEC_SELECT) 309 return 0; 310 rtx parallel = XEXP (rhs, 1); 311 if (GET_CODE (parallel) != PARALLEL) 312 return 0; 313 unsigned int len = XVECLEN (parallel, 0); 314 if (len != 2 && len != 4 && len != 8 && len != 16) 315 return 0; 316 for (unsigned int i = 0; i < len / 2; ++i) 317 { 318 rtx op = XVECEXP (parallel, 0, i); 319 if (!CONST_INT_P (op) || INTVAL (op) != len / 2 + i) 320 return 0; 321 } 322 for (unsigned int i = len / 2; i < len; ++i) 323 { 324 rtx op = XVECEXP (parallel, 0, i); 325 if (!CONST_INT_P (op) || INTVAL (op) != i - len / 2) 326 return 0; 327 } 328 return 1; 329 } 330 331 /* Return true iff EXPR represents the sum of two registers. */ 332 bool 333 rs6000_sum_of_two_registers_p (const_rtx expr) 334 { 335 if (GET_CODE (expr) == PLUS) 336 { 337 const_rtx operand1 = XEXP (expr, 0); 338 const_rtx operand2 = XEXP (expr, 1); 339 return (REG_P (operand1) && REG_P (operand2)); 340 } 341 return false; 342 } 343 344 /* Return true iff EXPR represents an address expression that masks off 345 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */ 346 bool 347 rs6000_quadword_masked_address_p (const_rtx expr) 348 { 349 if (GET_CODE (expr) == AND) 350 { 351 const_rtx operand1 = XEXP (expr, 0); 352 const_rtx operand2 = XEXP (expr, 1); 353 if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1)) 354 && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16) 355 return true; 356 } 357 return false; 358 } 359 360 /* Return TRUE if INSN represents a swap of a swapped load from memory 361 and the memory address is quad-word aligned. */ 362 static bool 363 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn) 364 { 365 unsigned uid = INSN_UID (insn); 366 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) 367 return false; 368 369 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 370 371 /* Since insn is known to represent a swap instruction, we know it 372 "uses" only one input variable. */ 373 df_ref use = DF_INSN_INFO_USES (insn_info); 374 375 /* Figure out where this input variable is defined. */ 376 struct df_link *def_link = DF_REF_CHAIN (use); 377 378 /* If there is no definition or the definition is artificial or there are 379 multiple definitions, punt. */ 380 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) 381 || def_link->next) 382 return false; 383 384 rtx def_insn = DF_REF_INSN (def_link->ref); 385 unsigned uid2 = INSN_UID (def_insn); 386 /* We're looking for a load-with-swap insn. If this is not that, 387 return false. */ 388 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) 389 return false; 390 391 /* If the source of the rtl def is not a set from memory, return 392 false. */ 393 rtx body = PATTERN (def_insn); 394 if (GET_CODE (body) != SET 395 || GET_CODE (SET_SRC (body)) != VEC_SELECT 396 || !MEM_P (XEXP (SET_SRC (body), 0))) 397 return false; 398 399 rtx mem = XEXP (SET_SRC (body), 0); 400 rtx base_reg = XEXP (mem, 0); 401 return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg)) 402 && MEM_ALIGN (mem) >= 128) ? true : false; 403 } 404 405 /* Return TRUE if INSN represents a store-with-swap of a swapped value 406 and the memory address is quad-word aligned. */ 407 static bool 408 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn) 409 { 410 unsigned uid = INSN_UID (insn); 411 if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store) 412 return false; 413 414 rtx body = PATTERN (insn); 415 rtx dest_address = XEXP (SET_DEST (body), 0); 416 rtx swap_reg = XEXP (SET_SRC (body), 0); 417 418 /* If the base address for the memory expression is not represented 419 by a single register and is not the sum of two registers, punt. */ 420 if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address)) 421 return false; 422 423 /* Confirm that the value to be stored is produced by a swap 424 instruction. */ 425 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 426 df_ref use; 427 FOR_EACH_INSN_INFO_USE (use, insn_info) 428 { 429 struct df_link *def_link = DF_REF_CHAIN (use); 430 431 /* If this is not the definition of the candidate swap register, 432 then skip it. I am interested in a different definition. */ 433 if (!rtx_equal_p (DF_REF_REG (use), swap_reg)) 434 continue; 435 436 /* If there is no def or the def is artifical or there are 437 multiple defs, punt. */ 438 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) 439 || def_link->next) 440 return false; 441 442 rtx def_insn = DF_REF_INSN (def_link->ref); 443 unsigned uid2 = INSN_UID (def_insn); 444 445 /* If this source value is not a simple swap, return false */ 446 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load 447 || insn_entry[uid2].is_store) 448 return false; 449 450 /* I've processed the use that I care about, so break out of 451 this loop. */ 452 break; 453 } 454 455 /* At this point, we know the source data comes from a swap. The 456 remaining question is whether the memory address is aligned. */ 457 rtx set = single_set (insn); 458 if (set) 459 { 460 rtx dest = SET_DEST (set); 461 if (MEM_P (dest)) 462 return (MEM_ALIGN (dest) >= 128); 463 } 464 return false; 465 } 466 467 /* Return 1 iff UID, known to reference a swap, is both fed by a load 468 and a feeder of a store. */ 469 static unsigned int 470 swap_feeds_both_load_and_store (swap_web_entry *insn_entry) 471 { 472 rtx insn = insn_entry->insn; 473 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 474 df_ref def, use; 475 struct df_link *link = 0; 476 rtx_insn *load = 0, *store = 0; 477 bool fed_by_load = 0; 478 bool feeds_store = 0; 479 480 FOR_EACH_INSN_INFO_USE (use, insn_info) 481 { 482 link = DF_REF_CHAIN (use); 483 load = DF_REF_INSN (link->ref); 484 if (insn_is_load_p (load) && insn_is_swap_p (load)) 485 fed_by_load = 1; 486 } 487 488 FOR_EACH_INSN_INFO_DEF (def, insn_info) 489 { 490 link = DF_REF_CHAIN (def); 491 store = DF_REF_INSN (link->ref); 492 if (insn_is_store_p (store) && insn_is_swap_p (store)) 493 feeds_store = 1; 494 } 495 496 return fed_by_load && feeds_store; 497 } 498 499 /* Return TRUE if insn is a swap fed by a load from the constant pool. */ 500 static bool 501 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) 502 { 503 unsigned uid = INSN_UID (insn); 504 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) 505 return false; 506 507 const_rtx tocrel_base; 508 509 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 510 df_ref use; 511 512 /* Iterate over the definitions that are used by this insn. Since 513 this is known to be a swap insn, expect only one used definnition. */ 514 FOR_EACH_INSN_INFO_USE (use, insn_info) 515 { 516 struct df_link *def_link = DF_REF_CHAIN (use); 517 518 /* If there is no def or the def is artificial or there are 519 multiple defs, punt. */ 520 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) 521 || def_link->next) 522 return false; 523 524 rtx def_insn = DF_REF_INSN (def_link->ref); 525 unsigned uid2 = INSN_UID (def_insn); 526 /* If this is not a load or is not a swap, return false. */ 527 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) 528 return false; 529 530 /* If the source of the rtl def is not a set from memory, return 531 false. */ 532 rtx body = PATTERN (def_insn); 533 if (GET_CODE (body) != SET 534 || GET_CODE (SET_SRC (body)) != VEC_SELECT 535 || !MEM_P (XEXP (SET_SRC (body), 0))) 536 return false; 537 538 rtx mem = XEXP (SET_SRC (body), 0); 539 rtx base_reg = XEXP (mem, 0); 540 /* If the base address for the memory expression is not 541 represented by a register, punt. */ 542 if (!REG_P (base_reg)) 543 return false; 544 545 df_ref base_use; 546 insn_info = DF_INSN_INFO_GET (def_insn); 547 FOR_EACH_INSN_INFO_USE (base_use, insn_info) 548 { 549 /* If base_use does not represent base_reg, look for another 550 use. */ 551 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) 552 continue; 553 554 struct df_link *base_def_link = DF_REF_CHAIN (base_use); 555 if (!base_def_link || base_def_link->next) 556 return false; 557 558 /* Constants held on the stack are not "true" constants 559 because their values are not part of the static load 560 image. If this constant's base reference is a stack 561 or frame pointer, it is seen as an artificial 562 reference. */ 563 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) 564 return false; 565 566 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref); 567 rtx tocrel_body = PATTERN (tocrel_insn); 568 rtx base, offset; 569 if (GET_CODE (tocrel_body) != SET) 570 return false; 571 /* There is an extra level of indirection for small/large 572 code models. */ 573 rtx tocrel_expr = SET_SRC (tocrel_body); 574 if (MEM_P (tocrel_expr)) 575 tocrel_expr = XEXP (tocrel_expr, 0); 576 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) 577 return false; 578 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); 579 580 if (!SYMBOL_REF_P (base) || !CONSTANT_POOL_ADDRESS_P (base)) 581 return false; 582 else 583 { 584 /* FIXME: The conditions under which 585 (SYMBOL_REF_P (const_vector) 586 && !CONSTANT_POOL_ADDRESS_P (const_vector)) 587 are not well understood. This code prevents 588 an internal compiler error which will occur in 589 replace_swapped_load_constant () if we were to return 590 true. Some day, we should figure out how to properly 591 handle this condition in 592 replace_swapped_load_constant () and then we can 593 remove this special test. */ 594 rtx const_vector = get_pool_constant (base); 595 if (SYMBOL_REF_P (const_vector) 596 && CONSTANT_POOL_ADDRESS_P (const_vector)) 597 const_vector = get_pool_constant (const_vector); 598 if (GET_CODE (const_vector) != CONST_VECTOR) 599 return false; 600 } 601 } 602 } 603 return true; 604 } 605 606 /* Return TRUE iff OP matches a V2DF reduction pattern. See the 607 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */ 608 static bool 609 v2df_reduction_p (rtx op) 610 { 611 if (GET_MODE (op) != V2DFmode) 612 return false; 613 614 enum rtx_code code = GET_CODE (op); 615 if (code != PLUS && code != SMIN && code != SMAX) 616 return false; 617 618 rtx concat = XEXP (op, 0); 619 if (GET_CODE (concat) != VEC_CONCAT) 620 return false; 621 622 rtx select0 = XEXP (concat, 0); 623 rtx select1 = XEXP (concat, 1); 624 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT) 625 return false; 626 627 rtx reg0 = XEXP (select0, 0); 628 rtx reg1 = XEXP (select1, 0); 629 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0)) 630 return false; 631 632 rtx parallel0 = XEXP (select0, 1); 633 rtx parallel1 = XEXP (select1, 1); 634 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL) 635 return false; 636 637 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx) 638 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx)) 639 return false; 640 641 return true; 642 } 643 644 /* Return 1 iff OP is an operand that will not be affected by having 645 vector doublewords swapped in memory. */ 646 static unsigned int 647 rtx_is_swappable_p (rtx op, unsigned int *special) 648 { 649 enum rtx_code code = GET_CODE (op); 650 int i, j; 651 rtx parallel; 652 653 switch (code) 654 { 655 case LABEL_REF: 656 case SYMBOL_REF: 657 case CLOBBER: 658 case REG: 659 return 1; 660 661 case VEC_CONCAT: 662 case ASM_INPUT: 663 case ASM_OPERANDS: 664 return 0; 665 666 case CONST_VECTOR: 667 { 668 *special = SH_CONST_VECTOR; 669 return 1; 670 } 671 672 case VEC_DUPLICATE: 673 /* Opportunity: If XEXP (op, 0) has the same mode as the result, 674 and XEXP (op, 1) is a PARALLEL with a single QImode const int, 675 it represents a vector splat for which we can do special 676 handling. */ 677 if (CONST_INT_P (XEXP (op, 0))) 678 return 1; 679 else if (REG_P (XEXP (op, 0)) 680 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) 681 /* This catches V2DF and V2DI splat, at a minimum. */ 682 return 1; 683 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE 684 && REG_P (XEXP (XEXP (op, 0), 0)) 685 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) 686 /* This catches splat of a truncated value. */ 687 return 1; 688 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT) 689 /* If the duplicated item is from a select, defer to the select 690 processing to see if we can change the lane for the splat. */ 691 return rtx_is_swappable_p (XEXP (op, 0), special); 692 else 693 return 0; 694 695 case VEC_SELECT: 696 /* A vec_extract operation is ok if we change the lane. */ 697 if (REG_P (XEXP (op, 0)) 698 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op) 699 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL 700 && XVECLEN (parallel, 0) == 1 701 && CONST_INT_P (XVECEXP (parallel, 0, 0))) 702 { 703 *special = SH_EXTRACT; 704 return 1; 705 } 706 /* An XXPERMDI is ok if we adjust the lanes. Note that if the 707 XXPERMDI is a swap operation, it will be identified by 708 insn_is_swap_p and therefore we won't get here. */ 709 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT 710 && (GET_MODE (XEXP (op, 0)) == V4DFmode 711 || GET_MODE (XEXP (op, 0)) == V4DImode) 712 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL 713 && XVECLEN (parallel, 0) == 2 714 && CONST_INT_P (XVECEXP (parallel, 0, 0)) 715 && CONST_INT_P (XVECEXP (parallel, 0, 1))) 716 { 717 *special = SH_XXPERMDI; 718 return 1; 719 } 720 else if (v2df_reduction_p (op)) 721 return 1; 722 else 723 return 0; 724 725 case UNSPEC: 726 { 727 /* Various operations are unsafe for this optimization, at least 728 without significant additional work. Permutes are obviously 729 problematic, as both the permute control vector and the ordering 730 of the target values are invalidated by doubleword swapping. 731 Vector pack and unpack modify the number of vector lanes. 732 Merge-high/low will not operate correctly on swapped operands. 733 Vector shifts across element boundaries are clearly uncool, 734 as are vector select and concatenate operations. Vector 735 sum-across instructions define one operand with a specific 736 order-dependent element, so additional fixup code would be 737 needed to make those work. Vector set and non-immediate-form 738 vector splat are element-order sensitive. A few of these 739 cases might be workable with special handling if required. 740 Adding cost modeling would be appropriate in some cases. */ 741 int val = XINT (op, 1); 742 switch (val) 743 { 744 default: 745 break; 746 case UNSPEC_VBPERMQ: 747 case UNSPEC_VMRGH_DIRECT: 748 case UNSPEC_VMRGL_DIRECT: 749 case UNSPEC_VPACK_SIGN_SIGN_SAT: 750 case UNSPEC_VPACK_SIGN_UNS_SAT: 751 case UNSPEC_VPACK_UNS_UNS_MOD: 752 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT: 753 case UNSPEC_VPACK_UNS_UNS_SAT: 754 case UNSPEC_VPERM: 755 case UNSPEC_VPERM_UNS: 756 case UNSPEC_VPERMHI: 757 case UNSPEC_VPERMSI: 758 case UNSPEC_VPERMXOR: 759 case UNSPEC_VPKPX: 760 case UNSPEC_VSLDOI: 761 case UNSPEC_VSLO: 762 case UNSPEC_VSRO: 763 case UNSPEC_VSUM2SWS: 764 case UNSPEC_VSUM4S: 765 case UNSPEC_VSUM4UBS: 766 case UNSPEC_VSUMSWS: 767 case UNSPEC_VSUMSWS_DIRECT: 768 case UNSPEC_VSX_CONCAT: 769 case UNSPEC_VSX_CVDPSPN: 770 case UNSPEC_VSX_CVSPDP: 771 case UNSPEC_VSX_CVSPDPN: 772 case UNSPEC_VSX_EXTRACT: 773 case UNSPEC_VSX_SET: 774 case UNSPEC_VSX_SLDWI: 775 case UNSPEC_VSX_VSLO: 776 case UNSPEC_VUNPACK_HI_SIGN: 777 case UNSPEC_VUNPACK_HI_SIGN_DIRECT: 778 case UNSPEC_VUNPACK_LO_SIGN: 779 case UNSPEC_VUNPACK_LO_SIGN_DIRECT: 780 case UNSPEC_VUPKHPX: 781 case UNSPEC_VUPKHS_V4SF: 782 case UNSPEC_VUPKHU_V4SF: 783 case UNSPEC_VUPKLPX: 784 case UNSPEC_VUPKLS_V4SF: 785 case UNSPEC_VUPKLU_V4SF: 786 return 0; 787 case UNSPEC_VSPLT_DIRECT: 788 case UNSPEC_VSX_XXSPLTD: 789 *special = SH_SPLAT; 790 return 1; 791 case UNSPEC_REDUC_PLUS: 792 case UNSPEC_REDUC: 793 return 1; 794 case UNSPEC_VPMSUM: 795 /* vpmsumd is not swappable, but vpmsum[bhw] are. */ 796 if (GET_MODE (op) == V2DImode) 797 return 0; 798 break; 799 } 800 } 801 802 default: 803 break; 804 } 805 806 const char *fmt = GET_RTX_FORMAT (code); 807 int ok = 1; 808 809 for (i = 0; i < GET_RTX_LENGTH (code); ++i) 810 if (fmt[i] == 'e' || fmt[i] == 'u') 811 { 812 unsigned int special_op = SH_NONE; 813 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op); 814 if (special_op == SH_NONE) 815 continue; 816 /* Ensure we never have two kinds of special handling 817 for the same insn. */ 818 if (*special != SH_NONE && *special != special_op) 819 return 0; 820 *special = special_op; 821 } 822 else if (fmt[i] == 'E') 823 for (j = 0; j < XVECLEN (op, i); ++j) 824 { 825 unsigned int special_op = SH_NONE; 826 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op); 827 if (special_op == SH_NONE) 828 continue; 829 /* Ensure we never have two kinds of special handling 830 for the same insn. */ 831 if (*special != SH_NONE && *special != special_op) 832 return 0; 833 *special = special_op; 834 } 835 836 return ok; 837 } 838 839 /* Return 1 iff INSN is an operand that will not be affected by 840 having vector doublewords swapped in memory (in which case 841 *SPECIAL is unchanged), or that can be modified to be correct 842 if vector doublewords are swapped in memory (in which case 843 *SPECIAL is changed to a value indicating how). */ 844 static unsigned int 845 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, 846 unsigned int *special) 847 { 848 /* Calls are always bad. */ 849 if (GET_CODE (insn) == CALL_INSN) 850 return 0; 851 852 /* Loads and stores seen here are not permuting, but we can still 853 fix them up by converting them to permuting ones. Exceptions: 854 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL 855 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC 856 for the SET source. Also we must now make an exception for lvx 857 and stvx when they are not in the UNSPEC_LVX/STVX form (with the 858 explicit "& -16") since this leads to unrecognizable insns. */ 859 rtx body = PATTERN (insn); 860 int i = INSN_UID (insn); 861 862 if (insn_entry[i].is_load) 863 { 864 if (GET_CODE (body) == SET) 865 { 866 rtx rhs = SET_SRC (body); 867 /* Even without a swap, the RHS might be a vec_select for, say, 868 a byte-reversing load. */ 869 if (!MEM_P (rhs)) 870 return 0; 871 if (GET_CODE (XEXP (rhs, 0)) == AND) 872 return 0; 873 874 *special = SH_NOSWAP_LD; 875 return 1; 876 } 877 else 878 return 0; 879 } 880 881 if (insn_entry[i].is_store) 882 { 883 if (GET_CODE (body) == SET 884 && GET_CODE (SET_SRC (body)) != UNSPEC 885 && GET_CODE (SET_SRC (body)) != VEC_SELECT) 886 { 887 rtx lhs = SET_DEST (body); 888 /* Even without a swap, the RHS might be a vec_select for, say, 889 a byte-reversing store. */ 890 if (!MEM_P (lhs)) 891 return 0; 892 if (GET_CODE (XEXP (lhs, 0)) == AND) 893 return 0; 894 895 *special = SH_NOSWAP_ST; 896 return 1; 897 } 898 else 899 return 0; 900 } 901 902 /* A convert to single precision can be left as is provided that 903 all of its uses are in xxspltw instructions that splat BE element 904 zero. */ 905 if (GET_CODE (body) == SET 906 && GET_CODE (SET_SRC (body)) == UNSPEC 907 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN) 908 { 909 df_ref def; 910 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 911 912 FOR_EACH_INSN_INFO_DEF (def, insn_info) 913 { 914 struct df_link *link = DF_REF_CHAIN (def); 915 if (!link) 916 return 0; 917 918 for (; link; link = link->next) { 919 rtx use_insn = DF_REF_INSN (link->ref); 920 rtx use_body = PATTERN (use_insn); 921 if (GET_CODE (use_body) != SET 922 || GET_CODE (SET_SRC (use_body)) != UNSPEC 923 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW 924 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx) 925 return 0; 926 } 927 } 928 929 return 1; 930 } 931 932 /* A concatenation of two doublewords is ok if we reverse the 933 order of the inputs. */ 934 if (GET_CODE (body) == SET 935 && GET_CODE (SET_SRC (body)) == VEC_CONCAT 936 && (GET_MODE (SET_SRC (body)) == V2DFmode 937 || GET_MODE (SET_SRC (body)) == V2DImode)) 938 { 939 *special = SH_CONCAT; 940 return 1; 941 } 942 943 /* V2DF reductions are always swappable. */ 944 if (GET_CODE (body) == PARALLEL) 945 { 946 rtx expr = XVECEXP (body, 0, 0); 947 if (GET_CODE (expr) == SET 948 && v2df_reduction_p (SET_SRC (expr))) 949 return 1; 950 } 951 952 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the 953 constant pool. */ 954 if (GET_CODE (body) == SET 955 && GET_CODE (SET_SRC (body)) == UNSPEC 956 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM 957 && XVECLEN (SET_SRC (body), 0) == 3 958 && REG_P (XVECEXP (SET_SRC (body), 0, 2))) 959 { 960 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2); 961 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 962 df_ref use; 963 FOR_EACH_INSN_INFO_USE (use, insn_info) 964 if (rtx_equal_p (DF_REF_REG (use), mask_reg)) 965 { 966 struct df_link *def_link = DF_REF_CHAIN (use); 967 /* Punt if multiple definitions for this reg. */ 968 if (def_link && !def_link->next && 969 const_load_sequence_p (insn_entry, 970 DF_REF_INSN (def_link->ref))) 971 { 972 *special = SH_VPERM; 973 return 1; 974 } 975 } 976 } 977 978 /* Otherwise check the operands for vector lane violations. */ 979 return rtx_is_swappable_p (body, special); 980 } 981 982 enum chain_purpose { FOR_LOADS, FOR_STORES }; 983 984 /* Return true if the UD or DU chain headed by LINK is non-empty, 985 and every entry on the chain references an insn that is a 986 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such 987 register swap must have only permuting loads as reaching defs. 988 If PURPOSE is FOR_STORES, each such register swap must have only 989 register swaps or permuting stores as reached uses. */ 990 static bool 991 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link, 992 enum chain_purpose purpose) 993 { 994 if (!link) 995 return false; 996 997 for (; link; link = link->next) 998 { 999 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref)))) 1000 continue; 1001 1002 if (DF_REF_IS_ARTIFICIAL (link->ref)) 1003 return false; 1004 1005 rtx reached_insn = DF_REF_INSN (link->ref); 1006 unsigned uid = INSN_UID (reached_insn); 1007 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn); 1008 1009 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load 1010 || insn_entry[uid].is_store) 1011 return false; 1012 1013 if (purpose == FOR_LOADS) 1014 { 1015 df_ref use; 1016 FOR_EACH_INSN_INFO_USE (use, insn_info) 1017 { 1018 struct df_link *swap_link = DF_REF_CHAIN (use); 1019 1020 while (swap_link) 1021 { 1022 if (DF_REF_IS_ARTIFICIAL (link->ref)) 1023 return false; 1024 1025 rtx swap_def_insn = DF_REF_INSN (swap_link->ref); 1026 unsigned uid2 = INSN_UID (swap_def_insn); 1027 1028 /* Only permuting loads are allowed. */ 1029 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load) 1030 return false; 1031 1032 swap_link = swap_link->next; 1033 } 1034 } 1035 } 1036 else if (purpose == FOR_STORES) 1037 { 1038 df_ref def; 1039 FOR_EACH_INSN_INFO_DEF (def, insn_info) 1040 { 1041 struct df_link *swap_link = DF_REF_CHAIN (def); 1042 1043 while (swap_link) 1044 { 1045 if (DF_REF_IS_ARTIFICIAL (link->ref)) 1046 return false; 1047 1048 rtx swap_use_insn = DF_REF_INSN (swap_link->ref); 1049 unsigned uid2 = INSN_UID (swap_use_insn); 1050 1051 /* Permuting stores or register swaps are allowed. */ 1052 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load) 1053 return false; 1054 1055 swap_link = swap_link->next; 1056 } 1057 } 1058 } 1059 } 1060 1061 return true; 1062 } 1063 1064 /* Mark the xxswapdi instructions associated with permuting loads and 1065 stores for removal. Note that we only flag them for deletion here, 1066 as there is a possibility of a swap being reached from multiple 1067 loads, etc. */ 1068 static void 1069 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i) 1070 { 1071 rtx insn = insn_entry[i].insn; 1072 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 1073 1074 if (insn_entry[i].is_load) 1075 { 1076 df_ref def; 1077 FOR_EACH_INSN_INFO_DEF (def, insn_info) 1078 { 1079 struct df_link *link = DF_REF_CHAIN (def); 1080 1081 /* We know by now that these are swaps, so we can delete 1082 them confidently. */ 1083 while (link) 1084 { 1085 rtx use_insn = DF_REF_INSN (link->ref); 1086 insn_entry[INSN_UID (use_insn)].will_delete = 1; 1087 link = link->next; 1088 } 1089 } 1090 } 1091 else if (insn_entry[i].is_store) 1092 { 1093 df_ref use; 1094 FOR_EACH_INSN_INFO_USE (use, insn_info) 1095 { 1096 /* Ignore uses for addressability. */ 1097 machine_mode mode = GET_MODE (DF_REF_REG (use)); 1098 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode)) 1099 continue; 1100 1101 struct df_link *link = DF_REF_CHAIN (use); 1102 1103 /* We know by now that these are swaps, so we can delete 1104 them confidently. */ 1105 while (link) 1106 { 1107 rtx def_insn = DF_REF_INSN (link->ref); 1108 insn_entry[INSN_UID (def_insn)].will_delete = 1; 1109 link = link->next; 1110 } 1111 } 1112 } 1113 } 1114 1115 /* *OP_PTR is either a CONST_VECTOR or an expression containing one. 1116 Swap the first half of the vector with the second in the first 1117 case. Recurse to find it in the second. */ 1118 static void 1119 swap_const_vector_halves (rtx *op_ptr) 1120 { 1121 int i; 1122 rtx op = *op_ptr; 1123 enum rtx_code code = GET_CODE (op); 1124 if (GET_CODE (op) == CONST_VECTOR) 1125 { 1126 int units = GET_MODE_NUNITS (GET_MODE (op)); 1127 rtx_vector_builder builder (GET_MODE (op), units, 1); 1128 for (i = 0; i < units / 2; ++i) 1129 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2)); 1130 for (i = 0; i < units / 2; ++i) 1131 builder.quick_push (CONST_VECTOR_ELT (op, i)); 1132 *op_ptr = builder.build (); 1133 } 1134 else 1135 { 1136 int j; 1137 const char *fmt = GET_RTX_FORMAT (code); 1138 for (i = 0; i < GET_RTX_LENGTH (code); ++i) 1139 if (fmt[i] == 'e' || fmt[i] == 'u') 1140 swap_const_vector_halves (&XEXP (op, i)); 1141 else if (fmt[i] == 'E') 1142 for (j = 0; j < XVECLEN (op, i); ++j) 1143 swap_const_vector_halves (&XVECEXP (op, i, j)); 1144 } 1145 } 1146 1147 /* Find all subregs of a vector expression that perform a narrowing, 1148 and adjust the subreg index to account for doubleword swapping. */ 1149 static void 1150 adjust_subreg_index (rtx op) 1151 { 1152 enum rtx_code code = GET_CODE (op); 1153 if (code == SUBREG 1154 && (GET_MODE_SIZE (GET_MODE (op)) 1155 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))) 1156 { 1157 unsigned int index = SUBREG_BYTE (op); 1158 if (index < 8) 1159 index += 8; 1160 else 1161 index -= 8; 1162 SUBREG_BYTE (op) = index; 1163 } 1164 1165 const char *fmt = GET_RTX_FORMAT (code); 1166 int i,j; 1167 for (i = 0; i < GET_RTX_LENGTH (code); ++i) 1168 if (fmt[i] == 'e' || fmt[i] == 'u') 1169 adjust_subreg_index (XEXP (op, i)); 1170 else if (fmt[i] == 'E') 1171 for (j = 0; j < XVECLEN (op, i); ++j) 1172 adjust_subreg_index (XVECEXP (op, i, j)); 1173 } 1174 1175 /* Convert the non-permuting load INSN to a permuting one. */ 1176 static void 1177 permute_load (rtx_insn *insn) 1178 { 1179 rtx body = PATTERN (insn); 1180 rtx mem_op = SET_SRC (body); 1181 rtx tgt_reg = SET_DEST (body); 1182 machine_mode mode = GET_MODE (tgt_reg); 1183 int n_elts = GET_MODE_NUNITS (mode); 1184 int half_elts = n_elts / 2; 1185 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); 1186 int i, j; 1187 for (i = 0, j = half_elts; i < half_elts; ++i, ++j) 1188 XVECEXP (par, 0, i) = GEN_INT (j); 1189 for (i = half_elts, j = 0; j < half_elts; ++i, ++j) 1190 XVECEXP (par, 0, i) = GEN_INT (j); 1191 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par); 1192 SET_SRC (body) = sel; 1193 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1194 df_insn_rescan (insn); 1195 1196 if (dump_file) 1197 fprintf (dump_file, "Replacing load %d with permuted load\n", 1198 INSN_UID (insn)); 1199 } 1200 1201 /* Convert the non-permuting store INSN to a permuting one. */ 1202 static void 1203 permute_store (rtx_insn *insn) 1204 { 1205 rtx body = PATTERN (insn); 1206 rtx src_reg = SET_SRC (body); 1207 machine_mode mode = GET_MODE (src_reg); 1208 int n_elts = GET_MODE_NUNITS (mode); 1209 int half_elts = n_elts / 2; 1210 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); 1211 int i, j; 1212 for (i = 0, j = half_elts; i < half_elts; ++i, ++j) 1213 XVECEXP (par, 0, i) = GEN_INT (j); 1214 for (i = half_elts, j = 0; j < half_elts; ++i, ++j) 1215 XVECEXP (par, 0, i) = GEN_INT (j); 1216 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par); 1217 SET_SRC (body) = sel; 1218 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1219 df_insn_rescan (insn); 1220 1221 if (dump_file) 1222 fprintf (dump_file, "Replacing store %d with permuted store\n", 1223 INSN_UID (insn)); 1224 } 1225 1226 /* Given OP that contains a vector extract operation, adjust the index 1227 of the extracted lane to account for the doubleword swap. */ 1228 static void 1229 adjust_extract (rtx_insn *insn) 1230 { 1231 rtx pattern = PATTERN (insn); 1232 if (GET_CODE (pattern) == PARALLEL) 1233 pattern = XVECEXP (pattern, 0, 0); 1234 rtx src = SET_SRC (pattern); 1235 /* The vec_select may be wrapped in a vec_duplicate for a splat, so 1236 account for that. */ 1237 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src; 1238 rtx par = XEXP (sel, 1); 1239 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1; 1240 int lane = INTVAL (XVECEXP (par, 0, 0)); 1241 lane = lane >= half_elts ? lane - half_elts : lane + half_elts; 1242 XVECEXP (par, 0, 0) = GEN_INT (lane); 1243 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1244 df_insn_rescan (insn); 1245 1246 if (dump_file) 1247 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn)); 1248 } 1249 1250 /* Given OP that contains a vector direct-splat operation, adjust the index 1251 of the source lane to account for the doubleword swap. */ 1252 static void 1253 adjust_splat (rtx_insn *insn) 1254 { 1255 rtx body = PATTERN (insn); 1256 rtx unspec = XEXP (body, 1); 1257 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1; 1258 int lane = INTVAL (XVECEXP (unspec, 0, 1)); 1259 lane = lane >= half_elts ? lane - half_elts : lane + half_elts; 1260 XVECEXP (unspec, 0, 1) = GEN_INT (lane); 1261 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1262 df_insn_rescan (insn); 1263 1264 if (dump_file) 1265 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); 1266 } 1267 1268 /* Given OP that contains an XXPERMDI operation (that is not a doubleword 1269 swap), reverse the order of the source operands and adjust the indices 1270 of the source lanes to account for doubleword reversal. */ 1271 static void 1272 adjust_xxpermdi (rtx_insn *insn) 1273 { 1274 rtx set = PATTERN (insn); 1275 rtx select = XEXP (set, 1); 1276 rtx concat = XEXP (select, 0); 1277 rtx src0 = XEXP (concat, 0); 1278 XEXP (concat, 0) = XEXP (concat, 1); 1279 XEXP (concat, 1) = src0; 1280 rtx parallel = XEXP (select, 1); 1281 int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); 1282 int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); 1283 int new_lane0 = 3 - lane1; 1284 int new_lane1 = 3 - lane0; 1285 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); 1286 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); 1287 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1288 df_insn_rescan (insn); 1289 1290 if (dump_file) 1291 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); 1292 } 1293 1294 /* Given OP that contains a VEC_CONCAT operation of two doublewords, 1295 reverse the order of those inputs. */ 1296 static void 1297 adjust_concat (rtx_insn *insn) 1298 { 1299 rtx set = PATTERN (insn); 1300 rtx concat = XEXP (set, 1); 1301 rtx src0 = XEXP (concat, 0); 1302 XEXP (concat, 0) = XEXP (concat, 1); 1303 XEXP (concat, 1) = src0; 1304 INSN_CODE (insn) = -1; /* Force re-recognition. */ 1305 df_insn_rescan (insn); 1306 1307 if (dump_file) 1308 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); 1309 } 1310 1311 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the 1312 constant pool to reflect swapped doublewords. */ 1313 static void 1314 adjust_vperm (rtx_insn *insn) 1315 { 1316 /* We previously determined that the UNSPEC_VPERM was fed by a 1317 swap of a swapping load of a TOC-relative constant pool symbol. 1318 Find the MEM in the swapping load and replace it with a MEM for 1319 the adjusted mask constant. */ 1320 rtx set = PATTERN (insn); 1321 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); 1322 1323 /* Find the swap. */ 1324 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 1325 df_ref use; 1326 rtx_insn *swap_insn = 0; 1327 FOR_EACH_INSN_INFO_USE (use, insn_info) 1328 if (rtx_equal_p (DF_REF_REG (use), mask_reg)) 1329 { 1330 struct df_link *def_link = DF_REF_CHAIN (use); 1331 gcc_assert (def_link && !def_link->next); 1332 swap_insn = DF_REF_INSN (def_link->ref); 1333 break; 1334 } 1335 gcc_assert (swap_insn); 1336 1337 /* Find the load. */ 1338 insn_info = DF_INSN_INFO_GET (swap_insn); 1339 rtx_insn *load_insn = 0; 1340 FOR_EACH_INSN_INFO_USE (use, insn_info) 1341 { 1342 struct df_link *def_link = DF_REF_CHAIN (use); 1343 gcc_assert (def_link && !def_link->next); 1344 load_insn = DF_REF_INSN (def_link->ref); 1345 break; 1346 } 1347 gcc_assert (load_insn); 1348 1349 /* Find the TOC-relative symbol access. */ 1350 insn_info = DF_INSN_INFO_GET (load_insn); 1351 rtx_insn *tocrel_insn = 0; 1352 FOR_EACH_INSN_INFO_USE (use, insn_info) 1353 { 1354 struct df_link *def_link = DF_REF_CHAIN (use); 1355 gcc_assert (def_link && !def_link->next); 1356 tocrel_insn = DF_REF_INSN (def_link->ref); 1357 break; 1358 } 1359 gcc_assert (tocrel_insn); 1360 1361 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p 1362 to set tocrel_base; otherwise it would be unnecessary as we've 1363 already established it will return true. */ 1364 rtx base, offset; 1365 const_rtx tocrel_base; 1366 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); 1367 /* There is an extra level of indirection for small/large code models. */ 1368 if (MEM_P (tocrel_expr)) 1369 tocrel_expr = XEXP (tocrel_expr, 0); 1370 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) 1371 gcc_unreachable (); 1372 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); 1373 rtx const_vector = get_pool_constant (base); 1374 /* With the extra indirection, get_pool_constant will produce the 1375 real constant from the reg_equal expression, so get the real 1376 constant. */ 1377 if (SYMBOL_REF_P (const_vector)) 1378 const_vector = get_pool_constant (const_vector); 1379 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); 1380 1381 /* Create an adjusted mask from the initial mask. */ 1382 unsigned int new_mask[16], i, val; 1383 for (i = 0; i < 16; ++i) { 1384 val = INTVAL (XVECEXP (const_vector, 0, i)); 1385 if (val < 16) 1386 new_mask[i] = (val + 8) % 16; 1387 else 1388 new_mask[i] = ((val + 8) % 16) + 16; 1389 } 1390 1391 /* Create a new CONST_VECTOR and a MEM that references it. */ 1392 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 1393 for (i = 0; i < 16; ++i) 1394 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]); 1395 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0)); 1396 rtx new_mem = force_const_mem (V16QImode, new_const_vector); 1397 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we 1398 can't recognize. Force the SYMBOL_REF into a register. */ 1399 if (!REG_P (XEXP (new_mem, 0))) { 1400 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); 1401 XEXP (new_mem, 0) = base_reg; 1402 /* Move the newly created insn ahead of the load insn. */ 1403 rtx_insn *force_insn = get_last_insn (); 1404 remove_insn (force_insn); 1405 rtx_insn *before_load_insn = PREV_INSN (load_insn); 1406 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); 1407 df_insn_rescan (before_load_insn); 1408 df_insn_rescan (force_insn); 1409 } 1410 1411 /* Replace the MEM in the load instruction and rescan it. */ 1412 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; 1413 INSN_CODE (load_insn) = -1; /* Force re-recognition. */ 1414 df_insn_rescan (load_insn); 1415 1416 if (dump_file) 1417 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); 1418 } 1419 1420 /* The insn described by INSN_ENTRY[I] can be swapped, but only 1421 with special handling. Take care of that here. */ 1422 static void 1423 handle_special_swappables (swap_web_entry *insn_entry, unsigned i) 1424 { 1425 rtx_insn *insn = insn_entry[i].insn; 1426 rtx body = PATTERN (insn); 1427 1428 switch (insn_entry[i].special_handling) 1429 { 1430 default: 1431 gcc_unreachable (); 1432 case SH_CONST_VECTOR: 1433 { 1434 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */ 1435 gcc_assert (GET_CODE (body) == SET); 1436 swap_const_vector_halves (&SET_SRC (body)); 1437 if (dump_file) 1438 fprintf (dump_file, "Swapping constant halves in insn %d\n", i); 1439 break; 1440 } 1441 case SH_SUBREG: 1442 /* A subreg of the same size is already safe. For subregs that 1443 select a smaller portion of a reg, adjust the index for 1444 swapped doublewords. */ 1445 adjust_subreg_index (body); 1446 if (dump_file) 1447 fprintf (dump_file, "Adjusting subreg in insn %d\n", i); 1448 break; 1449 case SH_NOSWAP_LD: 1450 /* Convert a non-permuting load to a permuting one. */ 1451 permute_load (insn); 1452 break; 1453 case SH_NOSWAP_ST: 1454 /* Convert a non-permuting store to a permuting one. */ 1455 permute_store (insn); 1456 break; 1457 case SH_EXTRACT: 1458 /* Change the lane on an extract operation. */ 1459 adjust_extract (insn); 1460 break; 1461 case SH_SPLAT: 1462 /* Change the lane on a direct-splat operation. */ 1463 adjust_splat (insn); 1464 break; 1465 case SH_XXPERMDI: 1466 /* Change the lanes on an XXPERMDI operation. */ 1467 adjust_xxpermdi (insn); 1468 break; 1469 case SH_CONCAT: 1470 /* Reverse the order of a concatenation operation. */ 1471 adjust_concat (insn); 1472 break; 1473 case SH_VPERM: 1474 /* Change the mask loaded from the constant pool for a VPERM. */ 1475 adjust_vperm (insn); 1476 break; 1477 } 1478 } 1479 1480 /* Find the insn from the Ith table entry, which is known to be a 1481 register swap Y = SWAP(X). Replace it with a copy Y = X. */ 1482 static void 1483 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) 1484 { 1485 rtx_insn *insn = insn_entry[i].insn; 1486 rtx body = PATTERN (insn); 1487 rtx src_reg = XEXP (SET_SRC (body), 0); 1488 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg); 1489 rtx_insn *new_insn = emit_insn_before (copy, insn); 1490 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn)); 1491 df_insn_rescan (new_insn); 1492 1493 if (dump_file) 1494 { 1495 unsigned int new_uid = INSN_UID (new_insn); 1496 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); 1497 } 1498 1499 df_insn_delete (insn); 1500 remove_insn (insn); 1501 insn->set_deleted (); 1502 } 1503 1504 /* INSN is known to contain a SUBREG, which we can normally handle, 1505 but if the SUBREG itself contains a MULT then we need to leave it alone 1506 to avoid turning a mult_hipart into a mult_lopart, for example. */ 1507 static bool 1508 has_part_mult (rtx_insn *insn) 1509 { 1510 rtx body = PATTERN (insn); 1511 if (GET_CODE (body) != SET) 1512 return false; 1513 rtx src = SET_SRC (body); 1514 if (GET_CODE (src) != SUBREG) 1515 return false; 1516 rtx inner = XEXP (src, 0); 1517 return (GET_CODE (inner) == MULT); 1518 } 1519 1520 /* Make NEW_MEM_EXP's attributes and flags resemble those of 1521 ORIGINAL_MEM_EXP. */ 1522 static void 1523 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp) 1524 { 1525 RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump); 1526 RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call); 1527 RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging); 1528 RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil); 1529 RTX_FLAG (new_mem_exp, frame_related) = 1530 RTX_FLAG (original_mem_exp, frame_related); 1531 1532 /* The following fields may not be used with MEM subexpressions */ 1533 RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct); 1534 RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val); 1535 1536 struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp); 1537 1538 alias_set_type set = original_attrs.alias; 1539 set_mem_alias_set (new_mem_exp, set); 1540 1541 addr_space_t addrspace = original_attrs.addrspace; 1542 set_mem_addr_space (new_mem_exp, addrspace); 1543 1544 unsigned int align = original_attrs.align; 1545 set_mem_align (new_mem_exp, align); 1546 1547 tree expr = original_attrs.expr; 1548 set_mem_expr (new_mem_exp, expr); 1549 1550 if (original_attrs.offset_known_p) 1551 { 1552 HOST_WIDE_INT offset = original_attrs.offset; 1553 set_mem_offset (new_mem_exp, offset); 1554 } 1555 else 1556 clear_mem_offset (new_mem_exp); 1557 1558 if (original_attrs.size_known_p) 1559 { 1560 HOST_WIDE_INT size = original_attrs.size; 1561 set_mem_size (new_mem_exp, size); 1562 } 1563 else 1564 clear_mem_size (new_mem_exp); 1565 } 1566 1567 /* Generate an rtx expression to represent use of the stvx insn to store 1568 the value represented by register SRC_EXP into the memory at address 1569 DEST_EXP, with vector mode MODE. */ 1570 rtx 1571 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) 1572 { 1573 rtx stvx; 1574 1575 if (mode == V16QImode) 1576 stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp); 1577 else if (mode == V8HImode) 1578 stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp); 1579 #ifdef HAVE_V8HFmode 1580 else if (mode == V8HFmode) 1581 stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp); 1582 #endif 1583 else if (mode == V4SImode) 1584 stvx = gen_altivec_stvx_v4si (src_exp, dest_exp); 1585 else if (mode == V4SFmode) 1586 stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp); 1587 else if (mode == V2DImode) 1588 stvx = gen_altivec_stvx_v2di (src_exp, dest_exp); 1589 else if (mode == V2DFmode) 1590 stvx = gen_altivec_stvx_v2df (src_exp, dest_exp); 1591 else if (mode == V1TImode) 1592 stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp); 1593 else 1594 /* KFmode, TFmode, other modes not expected in this context. */ 1595 gcc_unreachable (); 1596 1597 rtx new_mem_exp = SET_DEST (PATTERN (stvx)); 1598 mimic_memory_attributes_and_flags (new_mem_exp, dest_exp); 1599 return stvx; 1600 } 1601 1602 /* Given that STORE_INSN represents an aligned store-with-swap of a 1603 swapped value, replace the store with an aligned store (without 1604 swap) and replace the swap with a copy insn. */ 1605 static void 1606 replace_swapped_aligned_store (swap_web_entry *insn_entry, 1607 rtx_insn *store_insn) 1608 { 1609 unsigned uid = INSN_UID (store_insn); 1610 gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store); 1611 1612 rtx body = PATTERN (store_insn); 1613 rtx dest_address = XEXP (SET_DEST (body), 0); 1614 rtx swap_reg = XEXP (SET_SRC (body), 0); 1615 gcc_assert (REG_P (dest_address) 1616 || rs6000_sum_of_two_registers_p (dest_address)); 1617 1618 /* Find the swap instruction that provides the value to be stored by 1619 * this store-with-swap instruction. */ 1620 struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn); 1621 df_ref use; 1622 rtx_insn *swap_insn = NULL; 1623 unsigned uid2 = 0; 1624 FOR_EACH_INSN_INFO_USE (use, insn_info) 1625 { 1626 struct df_link *def_link = DF_REF_CHAIN (use); 1627 1628 /* if this is not the definition of the candidate swap register, 1629 then skip it. I am only interested in the swap insnd. */ 1630 if (!rtx_equal_p (DF_REF_REG (use), swap_reg)) 1631 continue; 1632 1633 /* If there is no def or the def is artifical or there are 1634 multiple defs, we should not be here. */ 1635 gcc_assert (def_link && def_link->ref && !def_link->next 1636 && !DF_REF_IS_ARTIFICIAL (def_link->ref)); 1637 1638 swap_insn = DF_REF_INSN (def_link->ref); 1639 uid2 = INSN_UID (swap_insn); 1640 1641 /* If this source value is not a simple swap, we should not be here. */ 1642 gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load 1643 && !insn_entry[uid2].is_store); 1644 1645 /* We've processed the use we care about, so break out of 1646 this loop. */ 1647 break; 1648 } 1649 1650 /* At this point, swap_insn and uid2 represent the swap instruction 1651 that feeds the store. */ 1652 gcc_assert (swap_insn); 1653 rtx set = single_set (store_insn); 1654 gcc_assert (set); 1655 rtx dest_exp = SET_DEST (set); 1656 rtx src_exp = XEXP (SET_SRC (body), 0); 1657 enum machine_mode mode = GET_MODE (dest_exp); 1658 gcc_assert (MEM_P (dest_exp)); 1659 gcc_assert (MEM_ALIGN (dest_exp) >= 128); 1660 1661 /* Replace the copy with a new insn. */ 1662 rtx stvx; 1663 stvx = rs6000_gen_stvx (mode, dest_exp, src_exp); 1664 1665 rtx_insn *new_insn = emit_insn_before (stvx, store_insn); 1666 rtx new_body = PATTERN (new_insn); 1667 1668 gcc_assert ((GET_CODE (new_body) == SET) 1669 && MEM_P (SET_DEST (new_body))); 1670 1671 set_block_for_insn (new_insn, BLOCK_FOR_INSN (store_insn)); 1672 df_insn_rescan (new_insn); 1673 1674 df_insn_delete (store_insn); 1675 remove_insn (store_insn); 1676 store_insn->set_deleted (); 1677 1678 /* Replace the swap with a copy. */ 1679 uid2 = INSN_UID (swap_insn); 1680 mark_swaps_for_removal (insn_entry, uid2); 1681 replace_swap_with_copy (insn_entry, uid2); 1682 } 1683 1684 /* Generate an rtx expression to represent use of the lvx insn to load 1685 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */ 1686 rtx 1687 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) 1688 { 1689 rtx lvx; 1690 1691 if (mode == V16QImode) 1692 lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp); 1693 else if (mode == V8HImode) 1694 lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp); 1695 #ifdef HAVE_V8HFmode 1696 else if (mode == V8HFmode) 1697 lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp); 1698 #endif 1699 else if (mode == V4SImode) 1700 lvx = gen_altivec_lvx_v4si (dest_exp, src_exp); 1701 else if (mode == V4SFmode) 1702 lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp); 1703 else if (mode == V2DImode) 1704 lvx = gen_altivec_lvx_v2di (dest_exp, src_exp); 1705 else if (mode == V2DFmode) 1706 lvx = gen_altivec_lvx_v2df (dest_exp, src_exp); 1707 else if (mode == V1TImode) 1708 lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp); 1709 else 1710 /* KFmode, TFmode, other modes not expected in this context. */ 1711 gcc_unreachable (); 1712 1713 rtx new_mem_exp = SET_SRC (PATTERN (lvx)); 1714 mimic_memory_attributes_and_flags (new_mem_exp, src_exp); 1715 1716 return lvx; 1717 } 1718 1719 /* Given that SWAP_INSN represents a swap of an aligned 1720 load-with-swap, replace the load with an aligned load (without 1721 swap) and replace the swap with a copy insn. */ 1722 static void 1723 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn) 1724 { 1725 /* Find the load. */ 1726 unsigned uid = INSN_UID (swap_insn); 1727 /* Only call this if quad_aligned_load_p (swap_insn). */ 1728 gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load); 1729 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn); 1730 1731 /* Since insn is known to represent a swap instruction, we know it 1732 "uses" only one input variable. */ 1733 df_ref use = DF_INSN_INFO_USES (insn_info); 1734 1735 /* Figure out where this input variable is defined. */ 1736 struct df_link *def_link = DF_REF_CHAIN (use); 1737 gcc_assert (def_link && !def_link->next); 1738 gcc_assert (def_link && def_link->ref && 1739 !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next); 1740 1741 rtx_insn *def_insn = DF_REF_INSN (def_link->ref); 1742 unsigned uid2 = INSN_UID (def_insn); 1743 1744 /* We're expecting a load-with-swap insn. */ 1745 gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap); 1746 1747 /* We expect this to be a set to memory, with source representing a 1748 swap (indicated by code VEC_SELECT). */ 1749 rtx body = PATTERN (def_insn); 1750 gcc_assert ((GET_CODE (body) == SET) 1751 && (GET_CODE (SET_SRC (body)) == VEC_SELECT) 1752 && MEM_P (XEXP (SET_SRC (body), 0))); 1753 1754 rtx src_exp = XEXP (SET_SRC (body), 0); 1755 enum machine_mode mode = GET_MODE (src_exp); 1756 rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp); 1757 1758 rtx_insn *new_insn = emit_insn_before (lvx, def_insn); 1759 rtx new_body = PATTERN (new_insn); 1760 1761 gcc_assert ((GET_CODE (new_body) == SET) 1762 && MEM_P (SET_SRC (new_body))); 1763 1764 set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn)); 1765 df_insn_rescan (new_insn); 1766 1767 df_insn_delete (def_insn); 1768 remove_insn (def_insn); 1769 def_insn->set_deleted (); 1770 1771 /* Replace the swap with a copy. */ 1772 mark_swaps_for_removal (insn_entry, uid); 1773 replace_swap_with_copy (insn_entry, uid); 1774 } 1775 1776 /* Given that SWAP_INSN represents a swap of a load of a constant 1777 vector value, replace with a single instruction that loads a 1778 swapped variant of the original constant. 1779 1780 The "natural" representation of a byte array in memory is the same 1781 for big endian and little endian. 1782 1783 unsigned char byte_array[] = 1784 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }; 1785 1786 However, when loaded into a vector register, the representation 1787 depends on endian conventions. 1788 1789 In big-endian mode, the register holds: 1790 1791 MSB LSB 1792 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ] 1793 1794 In little-endian mode, the register holds: 1795 1796 MSB LSB 1797 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ] 1798 1799 Word arrays require different handling. Consider the word array: 1800 1801 unsigned int word_array[] = 1802 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f }; 1803 1804 The in-memory representation depends on endian configuration. The 1805 equivalent array, declared as a byte array, in memory would be: 1806 1807 unsigned char big_endian_word_array_data[] = 1808 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f } 1809 1810 unsigned char little_endian_word_array_data[] = 1811 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c } 1812 1813 In big-endian mode, the register holds: 1814 1815 MSB LSB 1816 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ] 1817 1818 In little-endian mode, the register holds: 1819 1820 MSB LSB 1821 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ] 1822 1823 1824 Similar transformations apply to the vector of half-word and vector 1825 of double-word representations. 1826 1827 For now, don't handle vectors of quad-precision values. Just return. 1828 A better solution is to fix the code generator to emit lvx/stvx for 1829 those. */ 1830 static void 1831 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn) 1832 { 1833 /* Find the load. */ 1834 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn); 1835 rtx_insn *load_insn; 1836 df_ref use = DF_INSN_INFO_USES (insn_info); 1837 struct df_link *def_link = DF_REF_CHAIN (use); 1838 gcc_assert (def_link && !def_link->next); 1839 1840 load_insn = DF_REF_INSN (def_link->ref); 1841 gcc_assert (load_insn); 1842 1843 /* Find the TOC-relative symbol access. */ 1844 insn_info = DF_INSN_INFO_GET (load_insn); 1845 use = DF_INSN_INFO_USES (insn_info); 1846 1847 def_link = DF_REF_CHAIN (use); 1848 gcc_assert (def_link && !def_link->next); 1849 1850 rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref); 1851 gcc_assert (tocrel_insn); 1852 1853 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p 1854 to set tocrel_base; otherwise it would be unnecessary as we've 1855 already established it will return true. */ 1856 rtx base, offset; 1857 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); 1858 const_rtx tocrel_base; 1859 1860 /* There is an extra level of indirection for small/large code models. */ 1861 if (MEM_P (tocrel_expr)) 1862 tocrel_expr = XEXP (tocrel_expr, 0); 1863 1864 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) 1865 gcc_unreachable (); 1866 1867 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); 1868 rtx const_vector = get_pool_constant (base); 1869 1870 /* With the extra indirection, get_pool_constant will produce the 1871 real constant from the reg_equal expression, so get the real 1872 constant. */ 1873 if (SYMBOL_REF_P (const_vector)) 1874 const_vector = get_pool_constant (const_vector); 1875 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); 1876 1877 rtx new_mem; 1878 enum machine_mode mode = GET_MODE (const_vector); 1879 1880 /* Create an adjusted constant from the original constant. */ 1881 if (mode == V1TImode) 1882 /* Leave this code as is. */ 1883 return; 1884 else if (mode == V16QImode) 1885 { 1886 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16)); 1887 int i; 1888 1889 for (i = 0; i < 16; i++) 1890 XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i); 1891 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 1892 new_mem = force_const_mem (mode, new_const_vector); 1893 } 1894 else if ((mode == V8HImode) 1895 #ifdef HAVE_V8HFmode 1896 || (mode == V8HFmode) 1897 #endif 1898 ) 1899 { 1900 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8)); 1901 int i; 1902 1903 for (i = 0; i < 8; i++) 1904 XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i); 1905 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 1906 new_mem = force_const_mem (mode, new_const_vector); 1907 } 1908 else if ((mode == V4SImode) || (mode == V4SFmode)) 1909 { 1910 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4)); 1911 int i; 1912 1913 for (i = 0; i < 4; i++) 1914 XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i); 1915 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 1916 new_mem = force_const_mem (mode, new_const_vector); 1917 } 1918 else if ((mode == V2DImode) || (mode == V2DFmode)) 1919 { 1920 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2)); 1921 int i; 1922 1923 for (i = 0; i < 2; i++) 1924 XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i); 1925 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 1926 new_mem = force_const_mem (mode, new_const_vector); 1927 } 1928 else 1929 { 1930 /* We do not expect other modes to be constant-load-swapped. */ 1931 gcc_unreachable (); 1932 } 1933 1934 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we 1935 can't recognize. Force the SYMBOL_REF into a register. */ 1936 if (!REG_P (XEXP (new_mem, 0))) { 1937 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); 1938 XEXP (new_mem, 0) = base_reg; 1939 1940 /* Move the newly created insn ahead of the load insn. */ 1941 /* The last insn is the insn that forced new_mem into a register. */ 1942 rtx_insn *force_insn = get_last_insn (); 1943 /* Remove this insn from the end of the instruction sequence. */ 1944 remove_insn (force_insn); 1945 rtx_insn *before_load_insn = PREV_INSN (load_insn); 1946 1947 /* And insert this insn back into the sequence before the previous 1948 load insn so this new expression will be available when the 1949 existing load is modified to load the swapped constant. */ 1950 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); 1951 df_insn_rescan (before_load_insn); 1952 df_insn_rescan (force_insn); 1953 } 1954 1955 /* Replace the MEM in the load instruction and rescan it. */ 1956 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; 1957 INSN_CODE (load_insn) = -1; /* Force re-recognition. */ 1958 df_insn_rescan (load_insn); 1959 1960 unsigned int uid = INSN_UID (swap_insn); 1961 mark_swaps_for_removal (insn_entry, uid); 1962 replace_swap_with_copy (insn_entry, uid); 1963 } 1964 1965 /* Dump the swap table to DUMP_FILE. */ 1966 static void 1967 dump_swap_insn_table (swap_web_entry *insn_entry) 1968 { 1969 int e = get_max_uid (); 1970 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n"); 1971 1972 for (int i = 0; i < e; ++i) 1973 if (insn_entry[i].is_relevant) 1974 { 1975 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred (); 1976 fprintf (dump_file, "%6d %6d ", i, 1977 pred_entry && pred_entry->insn 1978 ? INSN_UID (pred_entry->insn) : 0); 1979 if (insn_entry[i].is_load) 1980 fputs ("load ", dump_file); 1981 if (insn_entry[i].is_store) 1982 fputs ("store ", dump_file); 1983 if (insn_entry[i].is_swap) 1984 fputs ("swap ", dump_file); 1985 if (insn_entry[i].is_live_in) 1986 fputs ("live-in ", dump_file); 1987 if (insn_entry[i].is_live_out) 1988 fputs ("live-out ", dump_file); 1989 if (insn_entry[i].contains_subreg) 1990 fputs ("subreg ", dump_file); 1991 if (insn_entry[i].is_128_int) 1992 fputs ("int128 ", dump_file); 1993 if (insn_entry[i].is_call) 1994 fputs ("call ", dump_file); 1995 if (insn_entry[i].is_swappable) 1996 { 1997 fputs ("swappable ", dump_file); 1998 if (insn_entry[i].special_handling == SH_CONST_VECTOR) 1999 fputs ("special:constvec ", dump_file); 2000 else if (insn_entry[i].special_handling == SH_SUBREG) 2001 fputs ("special:subreg ", dump_file); 2002 else if (insn_entry[i].special_handling == SH_NOSWAP_LD) 2003 fputs ("special:load ", dump_file); 2004 else if (insn_entry[i].special_handling == SH_NOSWAP_ST) 2005 fputs ("special:store ", dump_file); 2006 else if (insn_entry[i].special_handling == SH_EXTRACT) 2007 fputs ("special:extract ", dump_file); 2008 else if (insn_entry[i].special_handling == SH_SPLAT) 2009 fputs ("special:splat ", dump_file); 2010 else if (insn_entry[i].special_handling == SH_XXPERMDI) 2011 fputs ("special:xxpermdi ", dump_file); 2012 else if (insn_entry[i].special_handling == SH_CONCAT) 2013 fputs ("special:concat ", dump_file); 2014 else if (insn_entry[i].special_handling == SH_VPERM) 2015 fputs ("special:vperm ", dump_file); 2016 } 2017 if (insn_entry[i].web_not_optimizable) 2018 fputs ("unoptimizable ", dump_file); 2019 if (insn_entry[i].will_delete) 2020 fputs ("delete ", dump_file); 2021 fputs ("\n", dump_file); 2022 } 2023 fputs ("\n", dump_file); 2024 } 2025 2026 /* Return RTX with its address canonicalized to (reg) or (+ reg reg). 2027 Here RTX is an (& addr (const_int -16)). Always return a new copy 2028 to avoid problems with combine. */ 2029 static rtx 2030 alignment_with_canonical_addr (rtx align) 2031 { 2032 rtx canon; 2033 rtx addr = XEXP (align, 0); 2034 2035 if (REG_P (addr)) 2036 canon = addr; 2037 2038 else if (GET_CODE (addr) == PLUS) 2039 { 2040 rtx addrop0 = XEXP (addr, 0); 2041 rtx addrop1 = XEXP (addr, 1); 2042 2043 if (!REG_P (addrop0)) 2044 addrop0 = force_reg (GET_MODE (addrop0), addrop0); 2045 2046 if (!REG_P (addrop1)) 2047 addrop1 = force_reg (GET_MODE (addrop1), addrop1); 2048 2049 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1); 2050 } 2051 2052 else 2053 canon = force_reg (GET_MODE (addr), addr); 2054 2055 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16)); 2056 } 2057 2058 /* Check whether an rtx is an alignment mask, and if so, return 2059 a fully-expanded rtx for the masking operation. */ 2060 static rtx 2061 alignment_mask (rtx_insn *insn) 2062 { 2063 rtx body = PATTERN (insn); 2064 2065 if (GET_CODE (body) != SET 2066 || GET_CODE (SET_SRC (body)) != AND 2067 || !REG_P (XEXP (SET_SRC (body), 0))) 2068 return 0; 2069 2070 rtx mask = XEXP (SET_SRC (body), 1); 2071 2072 if (CONST_INT_P (mask)) 2073 { 2074 if (INTVAL (mask) == -16) 2075 return alignment_with_canonical_addr (SET_SRC (body)); 2076 else 2077 return 0; 2078 } 2079 2080 if (!REG_P (mask)) 2081 return 0; 2082 2083 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2084 df_ref use; 2085 rtx real_mask = 0; 2086 2087 FOR_EACH_INSN_INFO_USE (use, insn_info) 2088 { 2089 if (!rtx_equal_p (DF_REF_REG (use), mask)) 2090 continue; 2091 2092 struct df_link *def_link = DF_REF_CHAIN (use); 2093 if (!def_link || def_link->next) 2094 return 0; 2095 2096 rtx_insn *const_insn = DF_REF_INSN (def_link->ref); 2097 rtx const_body = PATTERN (const_insn); 2098 if (GET_CODE (const_body) != SET) 2099 return 0; 2100 2101 real_mask = SET_SRC (const_body); 2102 2103 if (!CONST_INT_P (real_mask) 2104 || INTVAL (real_mask) != -16) 2105 return 0; 2106 } 2107 2108 if (real_mask == 0) 2109 return 0; 2110 2111 return alignment_with_canonical_addr (SET_SRC (body)); 2112 } 2113 2114 /* Given INSN that's a load or store based at BASE_REG, look for a 2115 feeding computation that aligns its address on a 16-byte boundary. 2116 Return the rtx and its containing AND_INSN. */ 2117 static rtx 2118 find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn) 2119 { 2120 df_ref base_use; 2121 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2122 rtx and_operation = 0; 2123 2124 FOR_EACH_INSN_INFO_USE (base_use, insn_info) 2125 { 2126 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) 2127 continue; 2128 2129 struct df_link *base_def_link = DF_REF_CHAIN (base_use); 2130 if (!base_def_link || base_def_link->next) 2131 break; 2132 2133 /* With stack-protector code enabled, and possibly in other 2134 circumstances, there may not be an associated insn for 2135 the def. */ 2136 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) 2137 break; 2138 2139 *and_insn = DF_REF_INSN (base_def_link->ref); 2140 and_operation = alignment_mask (*and_insn); 2141 if (and_operation != 0) 2142 break; 2143 } 2144 2145 return and_operation; 2146 } 2147 2148 struct del_info { bool replace; rtx_insn *replace_insn; }; 2149 2150 /* If INSN is the load for an lvx pattern, put it in canonical form. */ 2151 static void 2152 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) 2153 { 2154 rtx body = PATTERN (insn); 2155 gcc_assert (GET_CODE (body) == SET 2156 && GET_CODE (SET_SRC (body)) == VEC_SELECT 2157 && MEM_P (XEXP (SET_SRC (body), 0))); 2158 2159 rtx mem = XEXP (SET_SRC (body), 0); 2160 rtx base_reg = XEXP (mem, 0); 2161 2162 rtx_insn *and_insn; 2163 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn); 2164 2165 if (and_operation != 0) 2166 { 2167 df_ref def; 2168 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2169 FOR_EACH_INSN_INFO_DEF (def, insn_info) 2170 { 2171 struct df_link *link = DF_REF_CHAIN (def); 2172 if (!link || link->next) 2173 break; 2174 2175 rtx_insn *swap_insn = DF_REF_INSN (link->ref); 2176 if (!insn_is_swap_p (swap_insn) 2177 || insn_is_load_p (swap_insn) 2178 || insn_is_store_p (swap_insn)) 2179 break; 2180 2181 /* Expected lvx pattern found. Change the swap to 2182 a copy, and propagate the AND operation into the 2183 load. */ 2184 to_delete[INSN_UID (swap_insn)].replace = true; 2185 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; 2186 2187 /* However, first we must be sure that we make the 2188 base register from the AND operation available 2189 in case the register has been overwritten. Copy 2190 the base register to a new pseudo and use that 2191 as the base register of the AND operation in 2192 the new LVX instruction. */ 2193 rtx and_base = XEXP (and_operation, 0); 2194 rtx new_reg = gen_reg_rtx (GET_MODE (and_base)); 2195 rtx copy = gen_rtx_SET (new_reg, and_base); 2196 rtx_insn *new_insn = emit_insn_after (copy, and_insn); 2197 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); 2198 df_insn_rescan (new_insn); 2199 2200 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg, 2201 XEXP (and_operation, 1)); 2202 SET_SRC (body) = mem; 2203 INSN_CODE (insn) = -1; /* Force re-recognition. */ 2204 df_insn_rescan (insn); 2205 2206 if (dump_file) 2207 fprintf (dump_file, "lvx opportunity found at %d\n", 2208 INSN_UID (insn)); 2209 } 2210 } 2211 } 2212 2213 /* If INSN is the store for an stvx pattern, put it in canonical form. */ 2214 static void 2215 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) 2216 { 2217 rtx body = PATTERN (insn); 2218 gcc_assert (GET_CODE (body) == SET 2219 && MEM_P (SET_DEST (body)) 2220 && GET_CODE (SET_SRC (body)) == VEC_SELECT); 2221 rtx mem = SET_DEST (body); 2222 rtx base_reg = XEXP (mem, 0); 2223 2224 rtx_insn *and_insn; 2225 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn); 2226 2227 if (and_operation != 0) 2228 { 2229 rtx src_reg = XEXP (SET_SRC (body), 0); 2230 df_ref src_use; 2231 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2232 FOR_EACH_INSN_INFO_USE (src_use, insn_info) 2233 { 2234 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg)) 2235 continue; 2236 2237 struct df_link *link = DF_REF_CHAIN (src_use); 2238 if (!link || link->next) 2239 break; 2240 2241 rtx_insn *swap_insn = DF_REF_INSN (link->ref); 2242 if (!insn_is_swap_p (swap_insn) 2243 || insn_is_load_p (swap_insn) 2244 || insn_is_store_p (swap_insn)) 2245 break; 2246 2247 /* Expected stvx pattern found. Change the swap to 2248 a copy, and propagate the AND operation into the 2249 store. */ 2250 to_delete[INSN_UID (swap_insn)].replace = true; 2251 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; 2252 2253 /* However, first we must be sure that we make the 2254 base register from the AND operation available 2255 in case the register has been overwritten. Copy 2256 the base register to a new pseudo and use that 2257 as the base register of the AND operation in 2258 the new STVX instruction. */ 2259 rtx and_base = XEXP (and_operation, 0); 2260 rtx new_reg = gen_reg_rtx (GET_MODE (and_base)); 2261 rtx copy = gen_rtx_SET (new_reg, and_base); 2262 rtx_insn *new_insn = emit_insn_after (copy, and_insn); 2263 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); 2264 df_insn_rescan (new_insn); 2265 2266 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg, 2267 XEXP (and_operation, 1)); 2268 SET_SRC (body) = src_reg; 2269 INSN_CODE (insn) = -1; /* Force re-recognition. */ 2270 df_insn_rescan (insn); 2271 2272 if (dump_file) 2273 fprintf (dump_file, "stvx opportunity found at %d\n", 2274 INSN_UID (insn)); 2275 } 2276 } 2277 } 2278 2279 /* Look for patterns created from builtin lvx and stvx calls, and 2280 canonicalize them to be properly recognized as such. */ 2281 static void 2282 recombine_lvx_stvx_patterns (function *fun) 2283 { 2284 int i; 2285 basic_block bb; 2286 rtx_insn *insn; 2287 2288 int num_insns = get_max_uid (); 2289 del_info *to_delete = XCNEWVEC (del_info, num_insns); 2290 2291 FOR_ALL_BB_FN (bb, fun) 2292 FOR_BB_INSNS (bb, insn) 2293 { 2294 if (!NONDEBUG_INSN_P (insn)) 2295 continue; 2296 2297 if (insn_is_load_p (insn) && insn_is_swap_p (insn)) 2298 recombine_lvx_pattern (insn, to_delete); 2299 else if (insn_is_store_p (insn) && insn_is_swap_p (insn)) 2300 recombine_stvx_pattern (insn, to_delete); 2301 } 2302 2303 /* Turning swaps into copies is delayed until now, to avoid problems 2304 with deleting instructions during the insn walk. */ 2305 for (i = 0; i < num_insns; i++) 2306 if (to_delete[i].replace) 2307 { 2308 rtx swap_body = PATTERN (to_delete[i].replace_insn); 2309 rtx src_reg = XEXP (SET_SRC (swap_body), 0); 2310 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg); 2311 rtx_insn *new_insn = emit_insn_before (copy, 2312 to_delete[i].replace_insn); 2313 set_block_for_insn (new_insn, 2314 BLOCK_FOR_INSN (to_delete[i].replace_insn)); 2315 df_insn_rescan (new_insn); 2316 df_insn_delete (to_delete[i].replace_insn); 2317 remove_insn (to_delete[i].replace_insn); 2318 to_delete[i].replace_insn->set_deleted (); 2319 } 2320 2321 free (to_delete); 2322 } 2323 2324 /* Main entry point for this pass. */ 2325 unsigned int 2326 rs6000_analyze_swaps (function *fun) 2327 { 2328 swap_web_entry *insn_entry; 2329 basic_block bb; 2330 rtx_insn *insn, *curr_insn = 0; 2331 2332 /* Dataflow analysis for use-def chains. */ 2333 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 2334 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); 2335 df_analyze (); 2336 df_set_flags (DF_DEFER_INSN_RESCAN); 2337 2338 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */ 2339 recombine_lvx_stvx_patterns (fun); 2340 2341 /* Rebuild ud- and du-chains. */ 2342 df_remove_problem (df_chain); 2343 df_process_deferred_rescans (); 2344 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 2345 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); 2346 df_analyze (); 2347 df_set_flags (DF_DEFER_INSN_RESCAN); 2348 2349 /* Allocate structure to represent webs of insns. */ 2350 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); 2351 2352 /* Walk the insns to gather basic data. */ 2353 FOR_ALL_BB_FN (bb, fun) 2354 FOR_BB_INSNS_SAFE (bb, insn, curr_insn) 2355 { 2356 unsigned int uid = INSN_UID (insn); 2357 if (NONDEBUG_INSN_P (insn)) 2358 { 2359 insn_entry[uid].insn = insn; 2360 2361 if (GET_CODE (insn) == CALL_INSN) 2362 insn_entry[uid].is_call = 1; 2363 2364 /* Walk the uses and defs to see if we mention vector regs. 2365 Record any constraints on optimization of such mentions. */ 2366 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2367 df_ref mention; 2368 FOR_EACH_INSN_INFO_USE (mention, insn_info) 2369 { 2370 /* We use DF_REF_REAL_REG here to get inside any subregs. */ 2371 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); 2372 2373 /* If a use gets its value from a call insn, it will be 2374 a hard register and will look like (reg:V4SI 3 3). 2375 The df analysis creates two mentions for GPR3 and GPR4, 2376 both DImode. We must recognize this and treat it as a 2377 vector mention to ensure the call is unioned with this 2378 use. */ 2379 if (mode == DImode && DF_REF_INSN_INFO (mention)) 2380 { 2381 rtx feeder = DF_REF_INSN (mention); 2382 /* FIXME: It is pretty hard to get from the df mention 2383 to the mode of the use in the insn. We arbitrarily 2384 pick a vector mode here, even though the use might 2385 be a real DImode. We can be too conservative 2386 (create a web larger than necessary) because of 2387 this, so consider eventually fixing this. */ 2388 if (GET_CODE (feeder) == CALL_INSN) 2389 mode = V4SImode; 2390 } 2391 2392 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) 2393 { 2394 insn_entry[uid].is_relevant = 1; 2395 if (mode == TImode || mode == V1TImode 2396 || FLOAT128_VECTOR_P (mode)) 2397 insn_entry[uid].is_128_int = 1; 2398 if (DF_REF_INSN_INFO (mention)) 2399 insn_entry[uid].contains_subreg 2400 = !rtx_equal_p (DF_REF_REG (mention), 2401 DF_REF_REAL_REG (mention)); 2402 union_defs (insn_entry, insn, mention); 2403 } 2404 } 2405 FOR_EACH_INSN_INFO_DEF (mention, insn_info) 2406 { 2407 /* We use DF_REF_REAL_REG here to get inside any subregs. */ 2408 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); 2409 2410 /* If we're loading up a hard vector register for a call, 2411 it looks like (set (reg:V4SI 9 9) (...)). The df 2412 analysis creates two mentions for GPR9 and GPR10, both 2413 DImode. So relying on the mode from the mentions 2414 isn't sufficient to ensure we union the call into the 2415 web with the parameter setup code. */ 2416 if (mode == DImode && GET_CODE (insn) == SET 2417 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn)))) 2418 mode = GET_MODE (SET_DEST (insn)); 2419 2420 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) 2421 { 2422 insn_entry[uid].is_relevant = 1; 2423 if (mode == TImode || mode == V1TImode 2424 || FLOAT128_VECTOR_P (mode)) 2425 insn_entry[uid].is_128_int = 1; 2426 if (DF_REF_INSN_INFO (mention)) 2427 insn_entry[uid].contains_subreg 2428 = !rtx_equal_p (DF_REF_REG (mention), 2429 DF_REF_REAL_REG (mention)); 2430 /* REG_FUNCTION_VALUE_P is not valid for subregs. */ 2431 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention))) 2432 insn_entry[uid].is_live_out = 1; 2433 union_uses (insn_entry, insn, mention); 2434 } 2435 } 2436 2437 if (insn_entry[uid].is_relevant) 2438 { 2439 /* Determine if this is a load or store. */ 2440 insn_entry[uid].is_load = insn_is_load_p (insn); 2441 insn_entry[uid].is_store = insn_is_store_p (insn); 2442 2443 /* Determine if this is a doubleword swap. If not, 2444 determine whether it can legally be swapped. */ 2445 if (insn_is_swap_p (insn)) 2446 insn_entry[uid].is_swap = 1; 2447 else 2448 { 2449 unsigned int special = SH_NONE; 2450 insn_entry[uid].is_swappable 2451 = insn_is_swappable_p (insn_entry, insn, &special); 2452 if (special != SH_NONE && insn_entry[uid].contains_subreg) 2453 insn_entry[uid].is_swappable = 0; 2454 else if (special != SH_NONE) 2455 insn_entry[uid].special_handling = special; 2456 else if (insn_entry[uid].contains_subreg 2457 && has_part_mult (insn)) 2458 insn_entry[uid].is_swappable = 0; 2459 else if (insn_entry[uid].contains_subreg) 2460 insn_entry[uid].special_handling = SH_SUBREG; 2461 } 2462 } 2463 } 2464 } 2465 2466 if (dump_file) 2467 { 2468 fprintf (dump_file, "\nSwap insn entry table when first built\n"); 2469 dump_swap_insn_table (insn_entry); 2470 } 2471 2472 /* Record unoptimizable webs. */ 2473 unsigned e = get_max_uid (), i; 2474 for (i = 0; i < e; ++i) 2475 { 2476 if (!insn_entry[i].is_relevant) 2477 continue; 2478 2479 swap_web_entry *root 2480 = (swap_web_entry*)(&insn_entry[i])->unionfind_root (); 2481 2482 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out 2483 || (insn_entry[i].contains_subreg 2484 && insn_entry[i].special_handling != SH_SUBREG) 2485 || insn_entry[i].is_128_int || insn_entry[i].is_call 2486 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap)) 2487 root->web_not_optimizable = 1; 2488 2489 /* If we have loads or stores that aren't permuting then the 2490 optimization isn't appropriate. */ 2491 else if ((insn_entry[i].is_load || insn_entry[i].is_store) 2492 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable) 2493 root->web_not_optimizable = 1; 2494 2495 /* If we have a swap that is both fed by a permuting load 2496 and a feeder of a permuting store, then the optimization 2497 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */ 2498 else if (insn_entry[i].is_swap && !insn_entry[i].is_load 2499 && !insn_entry[i].is_store 2500 && swap_feeds_both_load_and_store (&insn_entry[i])) 2501 root->web_not_optimizable = 1; 2502 2503 /* If we have permuting loads or stores that are not accompanied 2504 by a register swap, the optimization isn't appropriate. */ 2505 else if (insn_entry[i].is_load && insn_entry[i].is_swap) 2506 { 2507 rtx insn = insn_entry[i].insn; 2508 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2509 df_ref def; 2510 2511 FOR_EACH_INSN_INFO_DEF (def, insn_info) 2512 { 2513 struct df_link *link = DF_REF_CHAIN (def); 2514 2515 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS)) 2516 { 2517 root->web_not_optimizable = 1; 2518 break; 2519 } 2520 } 2521 } 2522 else if (insn_entry[i].is_store && insn_entry[i].is_swap) 2523 { 2524 rtx insn = insn_entry[i].insn; 2525 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); 2526 df_ref use; 2527 2528 FOR_EACH_INSN_INFO_USE (use, insn_info) 2529 { 2530 struct df_link *link = DF_REF_CHAIN (use); 2531 2532 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES)) 2533 { 2534 root->web_not_optimizable = 1; 2535 break; 2536 } 2537 } 2538 } 2539 } 2540 2541 if (dump_file) 2542 { 2543 fprintf (dump_file, "\nSwap insn entry table after web analysis\n"); 2544 dump_swap_insn_table (insn_entry); 2545 } 2546 2547 /* For each load and store in an optimizable web (which implies 2548 the loads and stores are permuting), find the associated 2549 register swaps and mark them for removal. Due to various 2550 optimizations we may mark the same swap more than once. Also 2551 perform special handling for swappable insns that require it. */ 2552 for (i = 0; i < e; ++i) 2553 if ((insn_entry[i].is_load || insn_entry[i].is_store) 2554 && insn_entry[i].is_swap) 2555 { 2556 swap_web_entry* root_entry 2557 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); 2558 if (!root_entry->web_not_optimizable) 2559 mark_swaps_for_removal (insn_entry, i); 2560 } 2561 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling) 2562 { 2563 swap_web_entry* root_entry 2564 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); 2565 if (!root_entry->web_not_optimizable) 2566 handle_special_swappables (insn_entry, i); 2567 } 2568 2569 /* Now delete the swaps marked for removal. */ 2570 for (i = 0; i < e; ++i) 2571 if (insn_entry[i].will_delete) 2572 replace_swap_with_copy (insn_entry, i); 2573 2574 /* Clean up. */ 2575 free (insn_entry); 2576 2577 /* Use a second pass over rtl to detect that certain vector values 2578 fetched from or stored to memory on quad-word aligned addresses 2579 can use lvx/stvx without swaps. */ 2580 2581 /* First, rebuild ud chains. */ 2582 df_remove_problem (df_chain); 2583 df_process_deferred_rescans (); 2584 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 2585 df_chain_add_problem (DF_UD_CHAIN); 2586 df_analyze (); 2587 2588 swap_web_entry *pass2_insn_entry; 2589 pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); 2590 2591 /* Walk the insns to gather basic data. */ 2592 FOR_ALL_BB_FN (bb, fun) 2593 FOR_BB_INSNS_SAFE (bb, insn, curr_insn) 2594 { 2595 unsigned int uid = INSN_UID (insn); 2596 if (NONDEBUG_INSN_P (insn)) 2597 { 2598 pass2_insn_entry[uid].insn = insn; 2599 2600 pass2_insn_entry[uid].is_relevant = 1; 2601 pass2_insn_entry[uid].is_load = insn_is_load_p (insn); 2602 pass2_insn_entry[uid].is_store = insn_is_store_p (insn); 2603 2604 /* Determine if this is a doubleword swap. If not, 2605 determine whether it can legally be swapped. */ 2606 if (insn_is_swap_p (insn)) 2607 pass2_insn_entry[uid].is_swap = 1; 2608 } 2609 } 2610 2611 e = get_max_uid (); 2612 for (unsigned i = 0; i < e; ++i) 2613 if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load 2614 && !pass2_insn_entry[i].is_store) 2615 { 2616 /* Replace swap of aligned load-swap with aligned unswapped 2617 load. */ 2618 rtx_insn *rtx_insn = pass2_insn_entry[i].insn; 2619 if (quad_aligned_load_p (pass2_insn_entry, rtx_insn)) 2620 replace_swapped_aligned_load (pass2_insn_entry, rtx_insn); 2621 } 2622 else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store) 2623 { 2624 /* Replace aligned store-swap of swapped value with aligned 2625 unswapped store. */ 2626 rtx_insn *rtx_insn = pass2_insn_entry[i].insn; 2627 if (quad_aligned_store_p (pass2_insn_entry, rtx_insn)) 2628 replace_swapped_aligned_store (pass2_insn_entry, rtx_insn); 2629 } 2630 2631 /* Clean up. */ 2632 free (pass2_insn_entry); 2633 2634 /* Use a third pass over rtl to replace swap(load(vector constant)) 2635 with load(swapped vector constant). */ 2636 2637 /* First, rebuild ud chains. */ 2638 df_remove_problem (df_chain); 2639 df_process_deferred_rescans (); 2640 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); 2641 df_chain_add_problem (DF_UD_CHAIN); 2642 df_analyze (); 2643 2644 swap_web_entry *pass3_insn_entry; 2645 pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); 2646 2647 /* Walk the insns to gather basic data. */ 2648 FOR_ALL_BB_FN (bb, fun) 2649 FOR_BB_INSNS_SAFE (bb, insn, curr_insn) 2650 { 2651 unsigned int uid = INSN_UID (insn); 2652 if (NONDEBUG_INSN_P (insn)) 2653 { 2654 pass3_insn_entry[uid].insn = insn; 2655 2656 pass3_insn_entry[uid].is_relevant = 1; 2657 pass3_insn_entry[uid].is_load = insn_is_load_p (insn); 2658 pass3_insn_entry[uid].is_store = insn_is_store_p (insn); 2659 2660 /* Determine if this is a doubleword swap. If not, 2661 determine whether it can legally be swapped. */ 2662 if (insn_is_swap_p (insn)) 2663 pass3_insn_entry[uid].is_swap = 1; 2664 } 2665 } 2666 2667 e = get_max_uid (); 2668 for (unsigned i = 0; i < e; ++i) 2669 if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load 2670 && !pass3_insn_entry[i].is_store) 2671 { 2672 insn = pass3_insn_entry[i].insn; 2673 if (const_load_sequence_p (pass3_insn_entry, insn)) 2674 replace_swapped_load_constant (pass3_insn_entry, insn); 2675 } 2676 2677 /* Clean up. */ 2678 free (pass3_insn_entry); 2679 return 0; 2680 } 2681 2682 const pass_data pass_data_analyze_swaps = 2683 { 2684 RTL_PASS, /* type */ 2685 "swaps", /* name */ 2686 OPTGROUP_NONE, /* optinfo_flags */ 2687 TV_NONE, /* tv_id */ 2688 0, /* properties_required */ 2689 0, /* properties_provided */ 2690 0, /* properties_destroyed */ 2691 0, /* todo_flags_start */ 2692 TODO_df_finish, /* todo_flags_finish */ 2693 }; 2694 2695 class pass_analyze_swaps : public rtl_opt_pass 2696 { 2697 public: 2698 pass_analyze_swaps(gcc::context *ctxt) 2699 : rtl_opt_pass(pass_data_analyze_swaps, ctxt) 2700 {} 2701 2702 /* opt_pass methods: */ 2703 virtual bool gate (function *) 2704 { 2705 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX 2706 && !TARGET_P9_VECTOR && rs6000_optimize_swaps); 2707 } 2708 2709 virtual unsigned int execute (function *fun) 2710 { 2711 return rs6000_analyze_swaps (fun); 2712 } 2713 2714 opt_pass *clone () 2715 { 2716 return new pass_analyze_swaps (m_ctxt); 2717 } 2718 2719 }; // class pass_analyze_swaps 2720 2721 rtl_opt_pass * 2722 make_pass_analyze_swaps (gcc::context *ctxt) 2723 { 2724 return new pass_analyze_swaps (ctxt); 2725 } 2726 2727