1 /* Subroutines used to remove unnecessary doubleword swaps
2 for p8 little-endian VSX code.
3 Copyright (C) 1991-2020 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "ira.h"
33 #include "print-tree.h"
34 #include "varasm.h"
35 #include "explow.h"
36 #include "expr.h"
37 #include "output.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
40
41 /* Analyze vector computations and remove unnecessary doubleword
42 swaps (xxswapdi instructions). This pass is performed only
43 for little-endian VSX code generation.
44
45 For this specific case, loads and stores of 4x32 and 2x64 vectors
46 are inefficient. These are implemented using the lvx2dx and
47 stvx2dx instructions, which invert the order of doublewords in
48 a vector register. Thus the code generation inserts an xxswapdi
49 after each such load, and prior to each such store. (For spill
50 code after register assignment, an additional xxswapdi is inserted
51 following each store in order to return a hard register to its
52 unpermuted value.)
53
54 The extra xxswapdi instructions reduce performance. This can be
55 particularly bad for vectorized code. The purpose of this pass
56 is to reduce the number of xxswapdi instructions required for
57 correctness.
58
59 The primary insight is that much code that operates on vectors
60 does not care about the relative order of elements in a register,
61 so long as the correct memory order is preserved. If we have
62 a computation where all input values are provided by lvxd2x/xxswapdi
63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64 and all intermediate computations are pure SIMD (independent of
65 element order), then all the xxswapdi's associated with the loads
66 and stores may be removed.
67
68 This pass uses some of the infrastructure and logical ideas from
69 the "web" pass in web.c. We create maximal webs of computations
70 fitting the description above using union-find. Each such web is
71 then optimized by removing its unnecessary xxswapdi instructions.
72
73 The pass is placed prior to global optimization so that we can
74 perform the optimization in the safest and simplest way possible;
75 that is, by replacing each xxswapdi insn with a register copy insn.
76 Subsequent forward propagation will remove copies where possible.
77
78 There are some operations sensitive to element order for which we
79 can still allow the operation, provided we modify those operations.
80 These include CONST_VECTORs, for which we must swap the first and
81 second halves of the constant vector; and SUBREGs, for which we
82 must adjust the byte offset to account for the swapped doublewords.
83 A remaining opportunity would be non-immediate-form splats, for
84 which we should adjust the selected lane of the input. We should
85 also make code generation adjustments for sum-across operations,
86 since this is a common vectorizer reduction.
87
88 Because we run prior to the first split, we can see loads and stores
89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
90 vector loads and stores that have not yet been split into a permuting
91 load/store and a swap. (One way this can happen is with a builtin
92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
93 than deleting a swap, we convert the load/store into a permuting
94 load/store (which effectively removes the swap). */
95
96 /* Notes on Permutes
97
98 We do not currently handle computations that contain permutes. There
99 is a general transformation that can be performed correctly, but it
100 may introduce more expensive code than it replaces. To handle these
101 would require a cost model to determine when to perform the optimization.
102 This commentary records how this could be done if desired.
103
104 The most general permute is something like this (example for V16QI):
105
106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107 (parallel [(const_int a0) (const_int a1)
108 ...
109 (const_int a14) (const_int a15)]))
110
111 where a0,...,a15 are in [0,31] and select elements from op1 and op2
112 to produce in the result.
113
114 Regardless of mode, we can convert the PARALLEL to a mask of 16
115 byte-element selectors. Let's call this M, with M[i] representing
116 the ith byte-element selector value. Then if we swap doublewords
117 throughout the computation, we can get correct behavior by replacing
118 M with M' as follows:
119
120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
121 { ((M[i]+8)%16)+16 : M[i] in [16,31]
122
123 This seems promising at first, since we are just replacing one mask
124 with another. But certain masks are preferable to others. If M
125 is a mask that matches a vmrghh pattern, for example, M' certainly
126 will not. Instead of a single vmrghh, we would generate a load of
127 M' and a vperm. So we would need to know how many xxswapd's we can
128 remove as a result of this transformation to determine if it's
129 profitable; and preferably the logic would need to be aware of all
130 the special preferable masks.
131
132 Another form of permute is an UNSPEC_VPERM, in which the mask is
133 already in a register. In some cases, this mask may be a constant
134 that we can discover with ud-chains, in which case the above
135 transformation is ok. However, the common usage here is for the
136 mask to be produced by an UNSPEC_LVSL, in which case the mask
137 cannot be known at compile time. In such a case we would have to
138 generate several instructions to compute M' as above at run time,
139 and a cost model is needed again.
140
141 However, when the mask M for an UNSPEC_VPERM is loaded from the
142 constant pool, we can replace M with M' as above at no cost
143 beyond adding a constant pool entry. */
144
145 /* This is based on the union-find logic in web.c. web_entry_base is
146 defined in df.h. */
147 class swap_web_entry : public web_entry_base
148 {
149 public:
150 /* Pointer to the insn. */
151 rtx_insn *insn;
152 /* Set if insn contains a mention of a vector register. All other
153 fields are undefined if this field is unset. */
154 unsigned int is_relevant : 1;
155 /* Set if insn is a load. */
156 unsigned int is_load : 1;
157 /* Set if insn is a store. */
158 unsigned int is_store : 1;
159 /* Set if insn is a doubleword swap. This can either be a register swap
160 or a permuting load or store (test is_load and is_store for this). */
161 unsigned int is_swap : 1;
162 /* Set if the insn has a live-in use of a parameter register. */
163 unsigned int is_live_in : 1;
164 /* Set if the insn has a live-out def of a return register. */
165 unsigned int is_live_out : 1;
166 /* Set if the insn contains a subreg reference of a vector register. */
167 unsigned int contains_subreg : 1;
168 /* Set if the insn contains a 128-bit integer operand. */
169 unsigned int is_128_int : 1;
170 /* Set if this is a call-insn. */
171 unsigned int is_call : 1;
172 /* Set if this insn does not perform a vector operation for which
173 element order matters, or if we know how to fix it up if it does.
174 Undefined if is_swap is set. */
175 unsigned int is_swappable : 1;
176 /* A nonzero value indicates what kind of special handling for this
177 insn is required if doublewords are swapped. Undefined if
178 is_swappable is not set. */
179 unsigned int special_handling : 4;
180 /* Set if the web represented by this entry cannot be optimized. */
181 unsigned int web_not_optimizable : 1;
182 /* Set if this insn should be deleted. */
183 unsigned int will_delete : 1;
184 };
185
186 enum special_handling_values {
187 SH_NONE = 0,
188 SH_CONST_VECTOR,
189 SH_SUBREG,
190 SH_NOSWAP_LD,
191 SH_NOSWAP_ST,
192 SH_EXTRACT,
193 SH_SPLAT,
194 SH_XXPERMDI,
195 SH_CONCAT,
196 SH_VPERM
197 };
198
199 /* Union INSN with all insns containing definitions that reach USE.
200 Detect whether USE is live-in to the current function. */
201 static void
union_defs(swap_web_entry * insn_entry,rtx insn,df_ref use)202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
203 {
204 struct df_link *link = DF_REF_CHAIN (use);
205
206 if (!link)
207 insn_entry[INSN_UID (insn)].is_live_in = 1;
208
209 while (link)
210 {
211 if (DF_REF_IS_ARTIFICIAL (link->ref))
212 insn_entry[INSN_UID (insn)].is_live_in = 1;
213
214 if (DF_REF_INSN_INFO (link->ref))
215 {
216 rtx def_insn = DF_REF_INSN (link->ref);
217 (void)unionfind_union (insn_entry + INSN_UID (insn),
218 insn_entry + INSN_UID (def_insn));
219 }
220
221 link = link->next;
222 }
223 }
224
225 /* Union INSN with all insns containing uses reached from DEF.
226 Detect whether DEF is live-out from the current function. */
227 static void
union_uses(swap_web_entry * insn_entry,rtx insn,df_ref def)228 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
229 {
230 struct df_link *link = DF_REF_CHAIN (def);
231
232 if (!link)
233 insn_entry[INSN_UID (insn)].is_live_out = 1;
234
235 while (link)
236 {
237 /* This could be an eh use or some other artificial use;
238 we treat these all the same (killing the optimization). */
239 if (DF_REF_IS_ARTIFICIAL (link->ref))
240 insn_entry[INSN_UID (insn)].is_live_out = 1;
241
242 if (DF_REF_INSN_INFO (link->ref))
243 {
244 rtx use_insn = DF_REF_INSN (link->ref);
245 (void)unionfind_union (insn_entry + INSN_UID (insn),
246 insn_entry + INSN_UID (use_insn));
247 }
248
249 link = link->next;
250 }
251 }
252
253 /* Return 1 iff INSN is a load insn, including permuting loads that
254 represent an lvxd2x instruction; else return 0. */
255 static unsigned int
insn_is_load_p(rtx insn)256 insn_is_load_p (rtx insn)
257 {
258 rtx body = PATTERN (insn);
259
260 if (GET_CODE (body) == SET)
261 {
262 if (MEM_P (SET_SRC (body)))
263 return 1;
264
265 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
266 && MEM_P (XEXP (SET_SRC (body), 0)))
267 return 1;
268
269 return 0;
270 }
271
272 if (GET_CODE (body) != PARALLEL)
273 return 0;
274
275 rtx set = XVECEXP (body, 0, 0);
276
277 if (GET_CODE (set) == SET && MEM_P (SET_SRC (set)))
278 return 1;
279
280 return 0;
281 }
282
283 /* Return 1 iff INSN is a store insn, including permuting stores that
284 represent an stvxd2x instruction; else return 0. */
285 static unsigned int
insn_is_store_p(rtx insn)286 insn_is_store_p (rtx insn)
287 {
288 rtx body = PATTERN (insn);
289 if (GET_CODE (body) == SET && MEM_P (SET_DEST (body)))
290 return 1;
291 if (GET_CODE (body) != PARALLEL)
292 return 0;
293 rtx set = XVECEXP (body, 0, 0);
294 if (GET_CODE (set) == SET && MEM_P (SET_DEST (set)))
295 return 1;
296 return 0;
297 }
298
299 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
300 a permuting load, or a permuting store. */
301 static unsigned int
insn_is_swap_p(rtx insn)302 insn_is_swap_p (rtx insn)
303 {
304 rtx body = PATTERN (insn);
305 if (GET_CODE (body) != SET)
306 return 0;
307 rtx rhs = SET_SRC (body);
308 if (GET_CODE (rhs) != VEC_SELECT)
309 return 0;
310 rtx parallel = XEXP (rhs, 1);
311 if (GET_CODE (parallel) != PARALLEL)
312 return 0;
313 unsigned int len = XVECLEN (parallel, 0);
314 if (len != 2 && len != 4 && len != 8 && len != 16)
315 return 0;
316 for (unsigned int i = 0; i < len / 2; ++i)
317 {
318 rtx op = XVECEXP (parallel, 0, i);
319 if (!CONST_INT_P (op) || INTVAL (op) != len / 2 + i)
320 return 0;
321 }
322 for (unsigned int i = len / 2; i < len; ++i)
323 {
324 rtx op = XVECEXP (parallel, 0, i);
325 if (!CONST_INT_P (op) || INTVAL (op) != i - len / 2)
326 return 0;
327 }
328 return 1;
329 }
330
331 /* Return true iff EXPR represents the sum of two registers. */
332 bool
rs6000_sum_of_two_registers_p(const_rtx expr)333 rs6000_sum_of_two_registers_p (const_rtx expr)
334 {
335 if (GET_CODE (expr) == PLUS)
336 {
337 const_rtx operand1 = XEXP (expr, 0);
338 const_rtx operand2 = XEXP (expr, 1);
339 return (REG_P (operand1) && REG_P (operand2));
340 }
341 return false;
342 }
343
344 /* Return true iff EXPR represents an address expression that masks off
345 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
346 bool
rs6000_quadword_masked_address_p(const_rtx expr)347 rs6000_quadword_masked_address_p (const_rtx expr)
348 {
349 if (GET_CODE (expr) == AND)
350 {
351 const_rtx operand1 = XEXP (expr, 0);
352 const_rtx operand2 = XEXP (expr, 1);
353 if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
354 && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
355 return true;
356 }
357 return false;
358 }
359
360 /* Return TRUE if INSN represents a swap of a swapped load from memory
361 and the memory address is quad-word aligned. */
362 static bool
quad_aligned_load_p(swap_web_entry * insn_entry,rtx_insn * insn)363 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
364 {
365 unsigned uid = INSN_UID (insn);
366 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
367 return false;
368
369 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
370
371 /* Since insn is known to represent a swap instruction, we know it
372 "uses" only one input variable. */
373 df_ref use = DF_INSN_INFO_USES (insn_info);
374
375 /* Figure out where this input variable is defined. */
376 struct df_link *def_link = DF_REF_CHAIN (use);
377
378 /* If there is no definition or the definition is artificial or there are
379 multiple definitions, punt. */
380 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
381 || def_link->next)
382 return false;
383
384 rtx def_insn = DF_REF_INSN (def_link->ref);
385 unsigned uid2 = INSN_UID (def_insn);
386 /* We're looking for a load-with-swap insn. If this is not that,
387 return false. */
388 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
389 return false;
390
391 /* If the source of the rtl def is not a set from memory, return
392 false. */
393 rtx body = PATTERN (def_insn);
394 if (GET_CODE (body) != SET
395 || GET_CODE (SET_SRC (body)) != VEC_SELECT
396 || !MEM_P (XEXP (SET_SRC (body), 0)))
397 return false;
398
399 rtx mem = XEXP (SET_SRC (body), 0);
400 rtx base_reg = XEXP (mem, 0);
401 return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
402 && MEM_ALIGN (mem) >= 128) ? true : false;
403 }
404
405 /* Return TRUE if INSN represents a store-with-swap of a swapped value
406 and the memory address is quad-word aligned. */
407 static bool
quad_aligned_store_p(swap_web_entry * insn_entry,rtx_insn * insn)408 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
409 {
410 unsigned uid = INSN_UID (insn);
411 if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
412 return false;
413
414 rtx body = PATTERN (insn);
415 rtx dest_address = XEXP (SET_DEST (body), 0);
416 rtx swap_reg = XEXP (SET_SRC (body), 0);
417
418 /* If the base address for the memory expression is not represented
419 by a single register and is not the sum of two registers, punt. */
420 if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
421 return false;
422
423 /* Confirm that the value to be stored is produced by a swap
424 instruction. */
425 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
426 df_ref use;
427 FOR_EACH_INSN_INFO_USE (use, insn_info)
428 {
429 struct df_link *def_link = DF_REF_CHAIN (use);
430
431 /* If this is not the definition of the candidate swap register,
432 then skip it. I am interested in a different definition. */
433 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
434 continue;
435
436 /* If there is no def or the def is artifical or there are
437 multiple defs, punt. */
438 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
439 || def_link->next)
440 return false;
441
442 rtx def_insn = DF_REF_INSN (def_link->ref);
443 unsigned uid2 = INSN_UID (def_insn);
444
445 /* If this source value is not a simple swap, return false */
446 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
447 || insn_entry[uid2].is_store)
448 return false;
449
450 /* I've processed the use that I care about, so break out of
451 this loop. */
452 break;
453 }
454
455 /* At this point, we know the source data comes from a swap. The
456 remaining question is whether the memory address is aligned. */
457 rtx set = single_set (insn);
458 if (set)
459 {
460 rtx dest = SET_DEST (set);
461 if (MEM_P (dest))
462 return (MEM_ALIGN (dest) >= 128);
463 }
464 return false;
465 }
466
467 /* Return 1 iff UID, known to reference a swap, is both fed by a load
468 and a feeder of a store. */
469 static unsigned int
swap_feeds_both_load_and_store(swap_web_entry * insn_entry)470 swap_feeds_both_load_and_store (swap_web_entry *insn_entry)
471 {
472 rtx insn = insn_entry->insn;
473 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
474 df_ref def, use;
475 struct df_link *link = 0;
476 rtx_insn *load = 0, *store = 0;
477 bool fed_by_load = 0;
478 bool feeds_store = 0;
479
480 FOR_EACH_INSN_INFO_USE (use, insn_info)
481 {
482 link = DF_REF_CHAIN (use);
483 load = DF_REF_INSN (link->ref);
484 if (insn_is_load_p (load) && insn_is_swap_p (load))
485 fed_by_load = 1;
486 }
487
488 FOR_EACH_INSN_INFO_DEF (def, insn_info)
489 {
490 link = DF_REF_CHAIN (def);
491 store = DF_REF_INSN (link->ref);
492 if (insn_is_store_p (store) && insn_is_swap_p (store))
493 feeds_store = 1;
494 }
495
496 return fed_by_load && feeds_store;
497 }
498
499 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
500 static bool
const_load_sequence_p(swap_web_entry * insn_entry,rtx insn)501 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
502 {
503 unsigned uid = INSN_UID (insn);
504 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
505 return false;
506
507 const_rtx tocrel_base;
508
509 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
510 df_ref use;
511
512 /* Iterate over the definitions that are used by this insn. Since
513 this is known to be a swap insn, expect only one used definnition. */
514 FOR_EACH_INSN_INFO_USE (use, insn_info)
515 {
516 struct df_link *def_link = DF_REF_CHAIN (use);
517
518 /* If there is no def or the def is artificial or there are
519 multiple defs, punt. */
520 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
521 || def_link->next)
522 return false;
523
524 rtx def_insn = DF_REF_INSN (def_link->ref);
525 unsigned uid2 = INSN_UID (def_insn);
526 /* If this is not a load or is not a swap, return false. */
527 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
528 return false;
529
530 /* If the source of the rtl def is not a set from memory, return
531 false. */
532 rtx body = PATTERN (def_insn);
533 if (GET_CODE (body) != SET
534 || GET_CODE (SET_SRC (body)) != VEC_SELECT
535 || !MEM_P (XEXP (SET_SRC (body), 0)))
536 return false;
537
538 rtx mem = XEXP (SET_SRC (body), 0);
539 rtx base_reg = XEXP (mem, 0);
540 /* If the base address for the memory expression is not
541 represented by a register, punt. */
542 if (!REG_P (base_reg))
543 return false;
544
545 df_ref base_use;
546 insn_info = DF_INSN_INFO_GET (def_insn);
547 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
548 {
549 /* If base_use does not represent base_reg, look for another
550 use. */
551 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
552 continue;
553
554 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
555 if (!base_def_link || base_def_link->next)
556 return false;
557
558 /* Constants held on the stack are not "true" constants
559 because their values are not part of the static load
560 image. If this constant's base reference is a stack
561 or frame pointer, it is seen as an artificial
562 reference. */
563 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
564 return false;
565
566 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
567 rtx tocrel_body = PATTERN (tocrel_insn);
568 rtx base, offset;
569 if (GET_CODE (tocrel_body) != SET)
570 return false;
571 /* There is an extra level of indirection for small/large
572 code models. */
573 rtx tocrel_expr = SET_SRC (tocrel_body);
574 if (MEM_P (tocrel_expr))
575 tocrel_expr = XEXP (tocrel_expr, 0);
576 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
577 return false;
578 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
579
580 if (!SYMBOL_REF_P (base) || !CONSTANT_POOL_ADDRESS_P (base))
581 return false;
582 else
583 {
584 /* FIXME: The conditions under which
585 (SYMBOL_REF_P (const_vector)
586 && !CONSTANT_POOL_ADDRESS_P (const_vector))
587 are not well understood. This code prevents
588 an internal compiler error which will occur in
589 replace_swapped_load_constant () if we were to return
590 true. Some day, we should figure out how to properly
591 handle this condition in
592 replace_swapped_load_constant () and then we can
593 remove this special test. */
594 rtx const_vector = get_pool_constant (base);
595 if (SYMBOL_REF_P (const_vector)
596 && CONSTANT_POOL_ADDRESS_P (const_vector))
597 const_vector = get_pool_constant (const_vector);
598 if (GET_CODE (const_vector) != CONST_VECTOR)
599 return false;
600 }
601 }
602 }
603 return true;
604 }
605
606 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
607 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
608 static bool
v2df_reduction_p(rtx op)609 v2df_reduction_p (rtx op)
610 {
611 if (GET_MODE (op) != V2DFmode)
612 return false;
613
614 enum rtx_code code = GET_CODE (op);
615 if (code != PLUS && code != SMIN && code != SMAX)
616 return false;
617
618 rtx concat = XEXP (op, 0);
619 if (GET_CODE (concat) != VEC_CONCAT)
620 return false;
621
622 rtx select0 = XEXP (concat, 0);
623 rtx select1 = XEXP (concat, 1);
624 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
625 return false;
626
627 rtx reg0 = XEXP (select0, 0);
628 rtx reg1 = XEXP (select1, 0);
629 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
630 return false;
631
632 rtx parallel0 = XEXP (select0, 1);
633 rtx parallel1 = XEXP (select1, 1);
634 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
635 return false;
636
637 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
638 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
639 return false;
640
641 return true;
642 }
643
644 /* Return 1 iff OP is an operand that will not be affected by having
645 vector doublewords swapped in memory. */
646 static unsigned int
rtx_is_swappable_p(rtx op,unsigned int * special)647 rtx_is_swappable_p (rtx op, unsigned int *special)
648 {
649 enum rtx_code code = GET_CODE (op);
650 int i, j;
651 rtx parallel;
652
653 switch (code)
654 {
655 case LABEL_REF:
656 case SYMBOL_REF:
657 case CLOBBER:
658 case REG:
659 return 1;
660
661 case VEC_CONCAT:
662 case ASM_INPUT:
663 case ASM_OPERANDS:
664 return 0;
665
666 case CONST_VECTOR:
667 {
668 *special = SH_CONST_VECTOR;
669 return 1;
670 }
671
672 case VEC_DUPLICATE:
673 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
674 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
675 it represents a vector splat for which we can do special
676 handling. */
677 if (CONST_INT_P (XEXP (op, 0)))
678 return 1;
679 else if (REG_P (XEXP (op, 0))
680 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
681 /* This catches V2DF and V2DI splat, at a minimum. */
682 return 1;
683 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
684 && REG_P (XEXP (XEXP (op, 0), 0))
685 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
686 /* This catches splat of a truncated value. */
687 return 1;
688 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
689 /* If the duplicated item is from a select, defer to the select
690 processing to see if we can change the lane for the splat. */
691 return rtx_is_swappable_p (XEXP (op, 0), special);
692 else
693 return 0;
694
695 case VEC_SELECT:
696 /* A vec_extract operation is ok if we change the lane. */
697 if (REG_P (XEXP (op, 0))
698 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
699 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
700 && XVECLEN (parallel, 0) == 1
701 && CONST_INT_P (XVECEXP (parallel, 0, 0)))
702 {
703 *special = SH_EXTRACT;
704 return 1;
705 }
706 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
707 XXPERMDI is a swap operation, it will be identified by
708 insn_is_swap_p and therefore we won't get here. */
709 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
710 && (GET_MODE (XEXP (op, 0)) == V4DFmode
711 || GET_MODE (XEXP (op, 0)) == V4DImode)
712 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
713 && XVECLEN (parallel, 0) == 2
714 && CONST_INT_P (XVECEXP (parallel, 0, 0))
715 && CONST_INT_P (XVECEXP (parallel, 0, 1)))
716 {
717 *special = SH_XXPERMDI;
718 return 1;
719 }
720 else if (v2df_reduction_p (op))
721 return 1;
722 else
723 return 0;
724
725 case UNSPEC:
726 {
727 /* Various operations are unsafe for this optimization, at least
728 without significant additional work. Permutes are obviously
729 problematic, as both the permute control vector and the ordering
730 of the target values are invalidated by doubleword swapping.
731 Vector pack and unpack modify the number of vector lanes.
732 Merge-high/low will not operate correctly on swapped operands.
733 Vector shifts across element boundaries are clearly uncool,
734 as are vector select and concatenate operations. Vector
735 sum-across instructions define one operand with a specific
736 order-dependent element, so additional fixup code would be
737 needed to make those work. Vector set and non-immediate-form
738 vector splat are element-order sensitive. A few of these
739 cases might be workable with special handling if required.
740 Adding cost modeling would be appropriate in some cases. */
741 int val = XINT (op, 1);
742 switch (val)
743 {
744 default:
745 break;
746 case UNSPEC_VBPERMQ:
747 case UNSPEC_VMRGH_DIRECT:
748 case UNSPEC_VMRGL_DIRECT:
749 case UNSPEC_VPACK_SIGN_SIGN_SAT:
750 case UNSPEC_VPACK_SIGN_UNS_SAT:
751 case UNSPEC_VPACK_UNS_UNS_MOD:
752 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
753 case UNSPEC_VPACK_UNS_UNS_SAT:
754 case UNSPEC_VPERM:
755 case UNSPEC_VPERM_UNS:
756 case UNSPEC_VPERMHI:
757 case UNSPEC_VPERMSI:
758 case UNSPEC_VPERMXOR:
759 case UNSPEC_VPKPX:
760 case UNSPEC_VSLDOI:
761 case UNSPEC_VSLO:
762 case UNSPEC_VSRO:
763 case UNSPEC_VSUM2SWS:
764 case UNSPEC_VSUM4S:
765 case UNSPEC_VSUM4UBS:
766 case UNSPEC_VSUMSWS:
767 case UNSPEC_VSUMSWS_DIRECT:
768 case UNSPEC_VSX_CONCAT:
769 case UNSPEC_VSX_CVDPSPN:
770 case UNSPEC_VSX_CVSPDP:
771 case UNSPEC_VSX_CVSPDPN:
772 case UNSPEC_VSX_EXTRACT:
773 case UNSPEC_VSX_SET:
774 case UNSPEC_VSX_SLDWI:
775 case UNSPEC_VSX_VSLO:
776 case UNSPEC_VUNPACK_HI_SIGN:
777 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
778 case UNSPEC_VUNPACK_LO_SIGN:
779 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
780 case UNSPEC_VUPKHPX:
781 case UNSPEC_VUPKHS_V4SF:
782 case UNSPEC_VUPKHU_V4SF:
783 case UNSPEC_VUPKLPX:
784 case UNSPEC_VUPKLS_V4SF:
785 case UNSPEC_VUPKLU_V4SF:
786 return 0;
787 case UNSPEC_VSPLT_DIRECT:
788 case UNSPEC_VSX_XXSPLTD:
789 *special = SH_SPLAT;
790 return 1;
791 case UNSPEC_REDUC_PLUS:
792 case UNSPEC_REDUC:
793 return 1;
794 case UNSPEC_VPMSUM:
795 /* vpmsumd is not swappable, but vpmsum[bhw] are. */
796 if (GET_MODE (op) == V2DImode)
797 return 0;
798 break;
799 }
800 }
801
802 default:
803 break;
804 }
805
806 const char *fmt = GET_RTX_FORMAT (code);
807 int ok = 1;
808
809 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
810 if (fmt[i] == 'e' || fmt[i] == 'u')
811 {
812 unsigned int special_op = SH_NONE;
813 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
814 if (special_op == SH_NONE)
815 continue;
816 /* Ensure we never have two kinds of special handling
817 for the same insn. */
818 if (*special != SH_NONE && *special != special_op)
819 return 0;
820 *special = special_op;
821 }
822 else if (fmt[i] == 'E')
823 for (j = 0; j < XVECLEN (op, i); ++j)
824 {
825 unsigned int special_op = SH_NONE;
826 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
827 if (special_op == SH_NONE)
828 continue;
829 /* Ensure we never have two kinds of special handling
830 for the same insn. */
831 if (*special != SH_NONE && *special != special_op)
832 return 0;
833 *special = special_op;
834 }
835
836 return ok;
837 }
838
839 /* Return 1 iff INSN is an operand that will not be affected by
840 having vector doublewords swapped in memory (in which case
841 *SPECIAL is unchanged), or that can be modified to be correct
842 if vector doublewords are swapped in memory (in which case
843 *SPECIAL is changed to a value indicating how). */
844 static unsigned int
insn_is_swappable_p(swap_web_entry * insn_entry,rtx insn,unsigned int * special)845 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
846 unsigned int *special)
847 {
848 /* Calls are always bad. */
849 if (GET_CODE (insn) == CALL_INSN)
850 return 0;
851
852 /* Loads and stores seen here are not permuting, but we can still
853 fix them up by converting them to permuting ones. Exceptions:
854 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
855 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
856 for the SET source. Also we must now make an exception for lvx
857 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
858 explicit "& -16") since this leads to unrecognizable insns. */
859 rtx body = PATTERN (insn);
860 int i = INSN_UID (insn);
861
862 if (insn_entry[i].is_load)
863 {
864 if (GET_CODE (body) == SET)
865 {
866 rtx rhs = SET_SRC (body);
867 /* Even without a swap, the RHS might be a vec_select for, say,
868 a byte-reversing load. */
869 if (!MEM_P (rhs))
870 return 0;
871 if (GET_CODE (XEXP (rhs, 0)) == AND)
872 return 0;
873
874 *special = SH_NOSWAP_LD;
875 return 1;
876 }
877 else
878 return 0;
879 }
880
881 if (insn_entry[i].is_store)
882 {
883 if (GET_CODE (body) == SET
884 && GET_CODE (SET_SRC (body)) != UNSPEC
885 && GET_CODE (SET_SRC (body)) != VEC_SELECT)
886 {
887 rtx lhs = SET_DEST (body);
888 /* Even without a swap, the RHS might be a vec_select for, say,
889 a byte-reversing store. */
890 if (!MEM_P (lhs))
891 return 0;
892 if (GET_CODE (XEXP (lhs, 0)) == AND)
893 return 0;
894
895 *special = SH_NOSWAP_ST;
896 return 1;
897 }
898 else
899 return 0;
900 }
901
902 /* A convert to single precision can be left as is provided that
903 all of its uses are in xxspltw instructions that splat BE element
904 zero. */
905 if (GET_CODE (body) == SET
906 && GET_CODE (SET_SRC (body)) == UNSPEC
907 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
908 {
909 df_ref def;
910 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
911
912 FOR_EACH_INSN_INFO_DEF (def, insn_info)
913 {
914 struct df_link *link = DF_REF_CHAIN (def);
915 if (!link)
916 return 0;
917
918 for (; link; link = link->next) {
919 rtx use_insn = DF_REF_INSN (link->ref);
920 rtx use_body = PATTERN (use_insn);
921 if (GET_CODE (use_body) != SET
922 || GET_CODE (SET_SRC (use_body)) != UNSPEC
923 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
924 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
925 return 0;
926 }
927 }
928
929 return 1;
930 }
931
932 /* A concatenation of two doublewords is ok if we reverse the
933 order of the inputs. */
934 if (GET_CODE (body) == SET
935 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
936 && (GET_MODE (SET_SRC (body)) == V2DFmode
937 || GET_MODE (SET_SRC (body)) == V2DImode))
938 {
939 *special = SH_CONCAT;
940 return 1;
941 }
942
943 /* V2DF reductions are always swappable. */
944 if (GET_CODE (body) == PARALLEL)
945 {
946 rtx expr = XVECEXP (body, 0, 0);
947 if (GET_CODE (expr) == SET
948 && v2df_reduction_p (SET_SRC (expr)))
949 return 1;
950 }
951
952 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
953 constant pool. */
954 if (GET_CODE (body) == SET
955 && GET_CODE (SET_SRC (body)) == UNSPEC
956 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
957 && XVECLEN (SET_SRC (body), 0) == 3
958 && REG_P (XVECEXP (SET_SRC (body), 0, 2)))
959 {
960 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
961 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
962 df_ref use;
963 FOR_EACH_INSN_INFO_USE (use, insn_info)
964 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
965 {
966 struct df_link *def_link = DF_REF_CHAIN (use);
967 /* Punt if multiple definitions for this reg. */
968 if (def_link && !def_link->next &&
969 const_load_sequence_p (insn_entry,
970 DF_REF_INSN (def_link->ref)))
971 {
972 *special = SH_VPERM;
973 return 1;
974 }
975 }
976 }
977
978 /* Otherwise check the operands for vector lane violations. */
979 return rtx_is_swappable_p (body, special);
980 }
981
982 enum chain_purpose { FOR_LOADS, FOR_STORES };
983
984 /* Return true if the UD or DU chain headed by LINK is non-empty,
985 and every entry on the chain references an insn that is a
986 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
987 register swap must have only permuting loads as reaching defs.
988 If PURPOSE is FOR_STORES, each such register swap must have only
989 register swaps or permuting stores as reached uses. */
990 static bool
chain_contains_only_swaps(swap_web_entry * insn_entry,struct df_link * link,enum chain_purpose purpose)991 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
992 enum chain_purpose purpose)
993 {
994 if (!link)
995 return false;
996
997 for (; link; link = link->next)
998 {
999 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
1000 continue;
1001
1002 if (DF_REF_IS_ARTIFICIAL (link->ref))
1003 return false;
1004
1005 rtx reached_insn = DF_REF_INSN (link->ref);
1006 unsigned uid = INSN_UID (reached_insn);
1007 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
1008
1009 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
1010 || insn_entry[uid].is_store)
1011 return false;
1012
1013 if (purpose == FOR_LOADS)
1014 {
1015 df_ref use;
1016 FOR_EACH_INSN_INFO_USE (use, insn_info)
1017 {
1018 struct df_link *swap_link = DF_REF_CHAIN (use);
1019
1020 while (swap_link)
1021 {
1022 if (DF_REF_IS_ARTIFICIAL (link->ref))
1023 return false;
1024
1025 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
1026 unsigned uid2 = INSN_UID (swap_def_insn);
1027
1028 /* Only permuting loads are allowed. */
1029 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
1030 return false;
1031
1032 swap_link = swap_link->next;
1033 }
1034 }
1035 }
1036 else if (purpose == FOR_STORES)
1037 {
1038 df_ref def;
1039 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1040 {
1041 struct df_link *swap_link = DF_REF_CHAIN (def);
1042
1043 while (swap_link)
1044 {
1045 if (DF_REF_IS_ARTIFICIAL (link->ref))
1046 return false;
1047
1048 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
1049 unsigned uid2 = INSN_UID (swap_use_insn);
1050
1051 /* Permuting stores or register swaps are allowed. */
1052 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
1053 return false;
1054
1055 swap_link = swap_link->next;
1056 }
1057 }
1058 }
1059 }
1060
1061 return true;
1062 }
1063
1064 /* Mark the xxswapdi instructions associated with permuting loads and
1065 stores for removal. Note that we only flag them for deletion here,
1066 as there is a possibility of a swap being reached from multiple
1067 loads, etc. */
1068 static void
mark_swaps_for_removal(swap_web_entry * insn_entry,unsigned int i)1069 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
1070 {
1071 rtx insn = insn_entry[i].insn;
1072 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1073
1074 if (insn_entry[i].is_load)
1075 {
1076 df_ref def;
1077 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1078 {
1079 struct df_link *link = DF_REF_CHAIN (def);
1080
1081 /* We know by now that these are swaps, so we can delete
1082 them confidently. */
1083 while (link)
1084 {
1085 rtx use_insn = DF_REF_INSN (link->ref);
1086 insn_entry[INSN_UID (use_insn)].will_delete = 1;
1087 link = link->next;
1088 }
1089 }
1090 }
1091 else if (insn_entry[i].is_store)
1092 {
1093 df_ref use;
1094 FOR_EACH_INSN_INFO_USE (use, insn_info)
1095 {
1096 /* Ignore uses for addressability. */
1097 machine_mode mode = GET_MODE (DF_REF_REG (use));
1098 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1099 continue;
1100
1101 struct df_link *link = DF_REF_CHAIN (use);
1102
1103 /* We know by now that these are swaps, so we can delete
1104 them confidently. */
1105 while (link)
1106 {
1107 rtx def_insn = DF_REF_INSN (link->ref);
1108 insn_entry[INSN_UID (def_insn)].will_delete = 1;
1109 link = link->next;
1110 }
1111 }
1112 }
1113 }
1114
1115 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1116 Swap the first half of the vector with the second in the first
1117 case. Recurse to find it in the second. */
1118 static void
swap_const_vector_halves(rtx * op_ptr)1119 swap_const_vector_halves (rtx *op_ptr)
1120 {
1121 int i;
1122 rtx op = *op_ptr;
1123 enum rtx_code code = GET_CODE (op);
1124 if (GET_CODE (op) == CONST_VECTOR)
1125 {
1126 int units = GET_MODE_NUNITS (GET_MODE (op));
1127 rtx_vector_builder builder (GET_MODE (op), units, 1);
1128 for (i = 0; i < units / 2; ++i)
1129 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
1130 for (i = 0; i < units / 2; ++i)
1131 builder.quick_push (CONST_VECTOR_ELT (op, i));
1132 *op_ptr = builder.build ();
1133 }
1134 else
1135 {
1136 int j;
1137 const char *fmt = GET_RTX_FORMAT (code);
1138 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1139 if (fmt[i] == 'e' || fmt[i] == 'u')
1140 swap_const_vector_halves (&XEXP (op, i));
1141 else if (fmt[i] == 'E')
1142 for (j = 0; j < XVECLEN (op, i); ++j)
1143 swap_const_vector_halves (&XVECEXP (op, i, j));
1144 }
1145 }
1146
1147 /* Find all subregs of a vector expression that perform a narrowing,
1148 and adjust the subreg index to account for doubleword swapping. */
1149 static void
adjust_subreg_index(rtx op)1150 adjust_subreg_index (rtx op)
1151 {
1152 enum rtx_code code = GET_CODE (op);
1153 if (code == SUBREG
1154 && (GET_MODE_SIZE (GET_MODE (op))
1155 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
1156 {
1157 unsigned int index = SUBREG_BYTE (op);
1158 if (index < 8)
1159 index += 8;
1160 else
1161 index -= 8;
1162 SUBREG_BYTE (op) = index;
1163 }
1164
1165 const char *fmt = GET_RTX_FORMAT (code);
1166 int i,j;
1167 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1168 if (fmt[i] == 'e' || fmt[i] == 'u')
1169 adjust_subreg_index (XEXP (op, i));
1170 else if (fmt[i] == 'E')
1171 for (j = 0; j < XVECLEN (op, i); ++j)
1172 adjust_subreg_index (XVECEXP (op, i, j));
1173 }
1174
1175 /* Convert the non-permuting load INSN to a permuting one. */
1176 static void
permute_load(rtx_insn * insn)1177 permute_load (rtx_insn *insn)
1178 {
1179 rtx body = PATTERN (insn);
1180 rtx mem_op = SET_SRC (body);
1181 rtx tgt_reg = SET_DEST (body);
1182 machine_mode mode = GET_MODE (tgt_reg);
1183 int n_elts = GET_MODE_NUNITS (mode);
1184 int half_elts = n_elts / 2;
1185 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1186 int i, j;
1187 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1188 XVECEXP (par, 0, i) = GEN_INT (j);
1189 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1190 XVECEXP (par, 0, i) = GEN_INT (j);
1191 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
1192 SET_SRC (body) = sel;
1193 INSN_CODE (insn) = -1; /* Force re-recognition. */
1194 df_insn_rescan (insn);
1195
1196 if (dump_file)
1197 fprintf (dump_file, "Replacing load %d with permuted load\n",
1198 INSN_UID (insn));
1199 }
1200
1201 /* Convert the non-permuting store INSN to a permuting one. */
1202 static void
permute_store(rtx_insn * insn)1203 permute_store (rtx_insn *insn)
1204 {
1205 rtx body = PATTERN (insn);
1206 rtx src_reg = SET_SRC (body);
1207 machine_mode mode = GET_MODE (src_reg);
1208 int n_elts = GET_MODE_NUNITS (mode);
1209 int half_elts = n_elts / 2;
1210 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1211 int i, j;
1212 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1213 XVECEXP (par, 0, i) = GEN_INT (j);
1214 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1215 XVECEXP (par, 0, i) = GEN_INT (j);
1216 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
1217 SET_SRC (body) = sel;
1218 INSN_CODE (insn) = -1; /* Force re-recognition. */
1219 df_insn_rescan (insn);
1220
1221 if (dump_file)
1222 fprintf (dump_file, "Replacing store %d with permuted store\n",
1223 INSN_UID (insn));
1224 }
1225
1226 /* Given OP that contains a vector extract operation, adjust the index
1227 of the extracted lane to account for the doubleword swap. */
1228 static void
adjust_extract(rtx_insn * insn)1229 adjust_extract (rtx_insn *insn)
1230 {
1231 rtx pattern = PATTERN (insn);
1232 if (GET_CODE (pattern) == PARALLEL)
1233 pattern = XVECEXP (pattern, 0, 0);
1234 rtx src = SET_SRC (pattern);
1235 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1236 account for that. */
1237 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
1238 rtx par = XEXP (sel, 1);
1239 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
1240 int lane = INTVAL (XVECEXP (par, 0, 0));
1241 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1242 XVECEXP (par, 0, 0) = GEN_INT (lane);
1243 INSN_CODE (insn) = -1; /* Force re-recognition. */
1244 df_insn_rescan (insn);
1245
1246 if (dump_file)
1247 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
1248 }
1249
1250 /* Given OP that contains a vector direct-splat operation, adjust the index
1251 of the source lane to account for the doubleword swap. */
1252 static void
adjust_splat(rtx_insn * insn)1253 adjust_splat (rtx_insn *insn)
1254 {
1255 rtx body = PATTERN (insn);
1256 rtx unspec = XEXP (body, 1);
1257 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
1258 int lane = INTVAL (XVECEXP (unspec, 0, 1));
1259 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1260 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
1261 INSN_CODE (insn) = -1; /* Force re-recognition. */
1262 df_insn_rescan (insn);
1263
1264 if (dump_file)
1265 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
1266 }
1267
1268 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1269 swap), reverse the order of the source operands and adjust the indices
1270 of the source lanes to account for doubleword reversal. */
1271 static void
adjust_xxpermdi(rtx_insn * insn)1272 adjust_xxpermdi (rtx_insn *insn)
1273 {
1274 rtx set = PATTERN (insn);
1275 rtx select = XEXP (set, 1);
1276 rtx concat = XEXP (select, 0);
1277 rtx src0 = XEXP (concat, 0);
1278 XEXP (concat, 0) = XEXP (concat, 1);
1279 XEXP (concat, 1) = src0;
1280 rtx parallel = XEXP (select, 1);
1281 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
1282 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
1283 int new_lane0 = 3 - lane1;
1284 int new_lane1 = 3 - lane0;
1285 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
1286 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
1287 INSN_CODE (insn) = -1; /* Force re-recognition. */
1288 df_insn_rescan (insn);
1289
1290 if (dump_file)
1291 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
1292 }
1293
1294 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1295 reverse the order of those inputs. */
1296 static void
adjust_concat(rtx_insn * insn)1297 adjust_concat (rtx_insn *insn)
1298 {
1299 rtx set = PATTERN (insn);
1300 rtx concat = XEXP (set, 1);
1301 rtx src0 = XEXP (concat, 0);
1302 XEXP (concat, 0) = XEXP (concat, 1);
1303 XEXP (concat, 1) = src0;
1304 INSN_CODE (insn) = -1; /* Force re-recognition. */
1305 df_insn_rescan (insn);
1306
1307 if (dump_file)
1308 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
1309 }
1310
1311 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1312 constant pool to reflect swapped doublewords. */
1313 static void
adjust_vperm(rtx_insn * insn)1314 adjust_vperm (rtx_insn *insn)
1315 {
1316 /* We previously determined that the UNSPEC_VPERM was fed by a
1317 swap of a swapping load of a TOC-relative constant pool symbol.
1318 Find the MEM in the swapping load and replace it with a MEM for
1319 the adjusted mask constant. */
1320 rtx set = PATTERN (insn);
1321 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
1322
1323 /* Find the swap. */
1324 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1325 df_ref use;
1326 rtx_insn *swap_insn = 0;
1327 FOR_EACH_INSN_INFO_USE (use, insn_info)
1328 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
1329 {
1330 struct df_link *def_link = DF_REF_CHAIN (use);
1331 gcc_assert (def_link && !def_link->next);
1332 swap_insn = DF_REF_INSN (def_link->ref);
1333 break;
1334 }
1335 gcc_assert (swap_insn);
1336
1337 /* Find the load. */
1338 insn_info = DF_INSN_INFO_GET (swap_insn);
1339 rtx_insn *load_insn = 0;
1340 FOR_EACH_INSN_INFO_USE (use, insn_info)
1341 {
1342 struct df_link *def_link = DF_REF_CHAIN (use);
1343 gcc_assert (def_link && !def_link->next);
1344 load_insn = DF_REF_INSN (def_link->ref);
1345 break;
1346 }
1347 gcc_assert (load_insn);
1348
1349 /* Find the TOC-relative symbol access. */
1350 insn_info = DF_INSN_INFO_GET (load_insn);
1351 rtx_insn *tocrel_insn = 0;
1352 FOR_EACH_INSN_INFO_USE (use, insn_info)
1353 {
1354 struct df_link *def_link = DF_REF_CHAIN (use);
1355 gcc_assert (def_link && !def_link->next);
1356 tocrel_insn = DF_REF_INSN (def_link->ref);
1357 break;
1358 }
1359 gcc_assert (tocrel_insn);
1360
1361 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1362 to set tocrel_base; otherwise it would be unnecessary as we've
1363 already established it will return true. */
1364 rtx base, offset;
1365 const_rtx tocrel_base;
1366 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1367 /* There is an extra level of indirection for small/large code models. */
1368 if (MEM_P (tocrel_expr))
1369 tocrel_expr = XEXP (tocrel_expr, 0);
1370 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1371 gcc_unreachable ();
1372 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1373 rtx const_vector = get_pool_constant (base);
1374 /* With the extra indirection, get_pool_constant will produce the
1375 real constant from the reg_equal expression, so get the real
1376 constant. */
1377 if (SYMBOL_REF_P (const_vector))
1378 const_vector = get_pool_constant (const_vector);
1379 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1380
1381 /* Create an adjusted mask from the initial mask. */
1382 unsigned int new_mask[16], i, val;
1383 for (i = 0; i < 16; ++i) {
1384 val = INTVAL (XVECEXP (const_vector, 0, i));
1385 if (val < 16)
1386 new_mask[i] = (val + 8) % 16;
1387 else
1388 new_mask[i] = ((val + 8) % 16) + 16;
1389 }
1390
1391 /* Create a new CONST_VECTOR and a MEM that references it. */
1392 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
1393 for (i = 0; i < 16; ++i)
1394 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
1395 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
1396 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
1397 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1398 can't recognize. Force the SYMBOL_REF into a register. */
1399 if (!REG_P (XEXP (new_mem, 0))) {
1400 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1401 XEXP (new_mem, 0) = base_reg;
1402 /* Move the newly created insn ahead of the load insn. */
1403 rtx_insn *force_insn = get_last_insn ();
1404 remove_insn (force_insn);
1405 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1406 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1407 df_insn_rescan (before_load_insn);
1408 df_insn_rescan (force_insn);
1409 }
1410
1411 /* Replace the MEM in the load instruction and rescan it. */
1412 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1413 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1414 df_insn_rescan (load_insn);
1415
1416 if (dump_file)
1417 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
1418 }
1419
1420 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1421 with special handling. Take care of that here. */
1422 static void
handle_special_swappables(swap_web_entry * insn_entry,unsigned i)1423 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
1424 {
1425 rtx_insn *insn = insn_entry[i].insn;
1426 rtx body = PATTERN (insn);
1427
1428 switch (insn_entry[i].special_handling)
1429 {
1430 default:
1431 gcc_unreachable ();
1432 case SH_CONST_VECTOR:
1433 {
1434 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
1435 gcc_assert (GET_CODE (body) == SET);
1436 swap_const_vector_halves (&SET_SRC (body));
1437 if (dump_file)
1438 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
1439 break;
1440 }
1441 case SH_SUBREG:
1442 /* A subreg of the same size is already safe. For subregs that
1443 select a smaller portion of a reg, adjust the index for
1444 swapped doublewords. */
1445 adjust_subreg_index (body);
1446 if (dump_file)
1447 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
1448 break;
1449 case SH_NOSWAP_LD:
1450 /* Convert a non-permuting load to a permuting one. */
1451 permute_load (insn);
1452 break;
1453 case SH_NOSWAP_ST:
1454 /* Convert a non-permuting store to a permuting one. */
1455 permute_store (insn);
1456 break;
1457 case SH_EXTRACT:
1458 /* Change the lane on an extract operation. */
1459 adjust_extract (insn);
1460 break;
1461 case SH_SPLAT:
1462 /* Change the lane on a direct-splat operation. */
1463 adjust_splat (insn);
1464 break;
1465 case SH_XXPERMDI:
1466 /* Change the lanes on an XXPERMDI operation. */
1467 adjust_xxpermdi (insn);
1468 break;
1469 case SH_CONCAT:
1470 /* Reverse the order of a concatenation operation. */
1471 adjust_concat (insn);
1472 break;
1473 case SH_VPERM:
1474 /* Change the mask loaded from the constant pool for a VPERM. */
1475 adjust_vperm (insn);
1476 break;
1477 }
1478 }
1479
1480 /* Find the insn from the Ith table entry, which is known to be a
1481 register swap Y = SWAP(X). Replace it with a copy Y = X. */
1482 static void
replace_swap_with_copy(swap_web_entry * insn_entry,unsigned i)1483 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
1484 {
1485 rtx_insn *insn = insn_entry[i].insn;
1486 rtx body = PATTERN (insn);
1487 rtx src_reg = XEXP (SET_SRC (body), 0);
1488 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
1489 rtx_insn *new_insn = emit_insn_before (copy, insn);
1490 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
1491 df_insn_rescan (new_insn);
1492
1493 if (dump_file)
1494 {
1495 unsigned int new_uid = INSN_UID (new_insn);
1496 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
1497 }
1498
1499 df_insn_delete (insn);
1500 remove_insn (insn);
1501 insn->set_deleted ();
1502 }
1503
1504 /* INSN is known to contain a SUBREG, which we can normally handle,
1505 but if the SUBREG itself contains a MULT then we need to leave it alone
1506 to avoid turning a mult_hipart into a mult_lopart, for example. */
1507 static bool
has_part_mult(rtx_insn * insn)1508 has_part_mult (rtx_insn *insn)
1509 {
1510 rtx body = PATTERN (insn);
1511 if (GET_CODE (body) != SET)
1512 return false;
1513 rtx src = SET_SRC (body);
1514 if (GET_CODE (src) != SUBREG)
1515 return false;
1516 rtx inner = XEXP (src, 0);
1517 return (GET_CODE (inner) == MULT);
1518 }
1519
1520 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1521 ORIGINAL_MEM_EXP. */
1522 static void
mimic_memory_attributes_and_flags(rtx new_mem_exp,const_rtx original_mem_exp)1523 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
1524 {
1525 RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
1526 RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
1527 RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
1528 RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
1529 RTX_FLAG (new_mem_exp, frame_related) =
1530 RTX_FLAG (original_mem_exp, frame_related);
1531
1532 /* The following fields may not be used with MEM subexpressions */
1533 RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
1534 RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
1535
1536 struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
1537
1538 alias_set_type set = original_attrs.alias;
1539 set_mem_alias_set (new_mem_exp, set);
1540
1541 addr_space_t addrspace = original_attrs.addrspace;
1542 set_mem_addr_space (new_mem_exp, addrspace);
1543
1544 unsigned int align = original_attrs.align;
1545 set_mem_align (new_mem_exp, align);
1546
1547 tree expr = original_attrs.expr;
1548 set_mem_expr (new_mem_exp, expr);
1549
1550 if (original_attrs.offset_known_p)
1551 {
1552 HOST_WIDE_INT offset = original_attrs.offset;
1553 set_mem_offset (new_mem_exp, offset);
1554 }
1555 else
1556 clear_mem_offset (new_mem_exp);
1557
1558 if (original_attrs.size_known_p)
1559 {
1560 HOST_WIDE_INT size = original_attrs.size;
1561 set_mem_size (new_mem_exp, size);
1562 }
1563 else
1564 clear_mem_size (new_mem_exp);
1565 }
1566
1567 /* Generate an rtx expression to represent use of the stvx insn to store
1568 the value represented by register SRC_EXP into the memory at address
1569 DEST_EXP, with vector mode MODE. */
1570 rtx
rs6000_gen_stvx(enum machine_mode mode,rtx dest_exp,rtx src_exp)1571 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1572 {
1573 rtx stvx;
1574
1575 if (mode == V16QImode)
1576 stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
1577 else if (mode == V8HImode)
1578 stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
1579 #ifdef HAVE_V8HFmode
1580 else if (mode == V8HFmode)
1581 stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
1582 #endif
1583 else if (mode == V4SImode)
1584 stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
1585 else if (mode == V4SFmode)
1586 stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp);
1587 else if (mode == V2DImode)
1588 stvx = gen_altivec_stvx_v2di (src_exp, dest_exp);
1589 else if (mode == V2DFmode)
1590 stvx = gen_altivec_stvx_v2df (src_exp, dest_exp);
1591 else if (mode == V1TImode)
1592 stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp);
1593 else
1594 /* KFmode, TFmode, other modes not expected in this context. */
1595 gcc_unreachable ();
1596
1597 rtx new_mem_exp = SET_DEST (PATTERN (stvx));
1598 mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
1599 return stvx;
1600 }
1601
1602 /* Given that STORE_INSN represents an aligned store-with-swap of a
1603 swapped value, replace the store with an aligned store (without
1604 swap) and replace the swap with a copy insn. */
1605 static void
replace_swapped_aligned_store(swap_web_entry * insn_entry,rtx_insn * store_insn)1606 replace_swapped_aligned_store (swap_web_entry *insn_entry,
1607 rtx_insn *store_insn)
1608 {
1609 unsigned uid = INSN_UID (store_insn);
1610 gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
1611
1612 rtx body = PATTERN (store_insn);
1613 rtx dest_address = XEXP (SET_DEST (body), 0);
1614 rtx swap_reg = XEXP (SET_SRC (body), 0);
1615 gcc_assert (REG_P (dest_address)
1616 || rs6000_sum_of_two_registers_p (dest_address));
1617
1618 /* Find the swap instruction that provides the value to be stored by
1619 * this store-with-swap instruction. */
1620 struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
1621 df_ref use;
1622 rtx_insn *swap_insn = NULL;
1623 unsigned uid2 = 0;
1624 FOR_EACH_INSN_INFO_USE (use, insn_info)
1625 {
1626 struct df_link *def_link = DF_REF_CHAIN (use);
1627
1628 /* if this is not the definition of the candidate swap register,
1629 then skip it. I am only interested in the swap insnd. */
1630 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
1631 continue;
1632
1633 /* If there is no def or the def is artifical or there are
1634 multiple defs, we should not be here. */
1635 gcc_assert (def_link && def_link->ref && !def_link->next
1636 && !DF_REF_IS_ARTIFICIAL (def_link->ref));
1637
1638 swap_insn = DF_REF_INSN (def_link->ref);
1639 uid2 = INSN_UID (swap_insn);
1640
1641 /* If this source value is not a simple swap, we should not be here. */
1642 gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
1643 && !insn_entry[uid2].is_store);
1644
1645 /* We've processed the use we care about, so break out of
1646 this loop. */
1647 break;
1648 }
1649
1650 /* At this point, swap_insn and uid2 represent the swap instruction
1651 that feeds the store. */
1652 gcc_assert (swap_insn);
1653 rtx set = single_set (store_insn);
1654 gcc_assert (set);
1655 rtx dest_exp = SET_DEST (set);
1656 rtx src_exp = XEXP (SET_SRC (body), 0);
1657 enum machine_mode mode = GET_MODE (dest_exp);
1658 gcc_assert (MEM_P (dest_exp));
1659 gcc_assert (MEM_ALIGN (dest_exp) >= 128);
1660
1661 /* Replace the copy with a new insn. */
1662 rtx stvx;
1663 stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
1664
1665 rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
1666 rtx new_body = PATTERN (new_insn);
1667
1668 gcc_assert ((GET_CODE (new_body) == SET)
1669 && MEM_P (SET_DEST (new_body)));
1670
1671 basic_block bb = BLOCK_FOR_INSN (store_insn);
1672 set_block_for_insn (new_insn, bb);
1673 /* Handle REG_EH_REGION note. */
1674 if (cfun->can_throw_non_call_exceptions && BB_END (bb) == store_insn)
1675 {
1676 rtx note = find_reg_note (store_insn, REG_EH_REGION, NULL_RTX);
1677 if (note)
1678 add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0));
1679 }
1680 df_insn_rescan (new_insn);
1681
1682 df_insn_delete (store_insn);
1683 remove_insn (store_insn);
1684 store_insn->set_deleted ();
1685
1686 /* Replace the swap with a copy. */
1687 uid2 = INSN_UID (swap_insn);
1688 mark_swaps_for_removal (insn_entry, uid2);
1689 replace_swap_with_copy (insn_entry, uid2);
1690 }
1691
1692 /* Generate an rtx expression to represent use of the lvx insn to load
1693 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1694 rtx
rs6000_gen_lvx(enum machine_mode mode,rtx dest_exp,rtx src_exp)1695 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1696 {
1697 rtx lvx;
1698
1699 if (mode == V16QImode)
1700 lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
1701 else if (mode == V8HImode)
1702 lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
1703 #ifdef HAVE_V8HFmode
1704 else if (mode == V8HFmode)
1705 lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
1706 #endif
1707 else if (mode == V4SImode)
1708 lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
1709 else if (mode == V4SFmode)
1710 lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp);
1711 else if (mode == V2DImode)
1712 lvx = gen_altivec_lvx_v2di (dest_exp, src_exp);
1713 else if (mode == V2DFmode)
1714 lvx = gen_altivec_lvx_v2df (dest_exp, src_exp);
1715 else if (mode == V1TImode)
1716 lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp);
1717 else
1718 /* KFmode, TFmode, other modes not expected in this context. */
1719 gcc_unreachable ();
1720
1721 rtx new_mem_exp = SET_SRC (PATTERN (lvx));
1722 mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
1723
1724 return lvx;
1725 }
1726
1727 /* Given that SWAP_INSN represents a swap of an aligned
1728 load-with-swap, replace the load with an aligned load (without
1729 swap) and replace the swap with a copy insn. */
1730 static void
replace_swapped_aligned_load(swap_web_entry * insn_entry,rtx swap_insn)1731 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
1732 {
1733 /* Find the load. */
1734 unsigned uid = INSN_UID (swap_insn);
1735 /* Only call this if quad_aligned_load_p (swap_insn). */
1736 gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
1737 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1738
1739 /* Since insn is known to represent a swap instruction, we know it
1740 "uses" only one input variable. */
1741 df_ref use = DF_INSN_INFO_USES (insn_info);
1742
1743 /* Figure out where this input variable is defined. */
1744 struct df_link *def_link = DF_REF_CHAIN (use);
1745 gcc_assert (def_link && !def_link->next);
1746 gcc_assert (def_link && def_link->ref &&
1747 !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
1748
1749 rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
1750 unsigned uid2 = INSN_UID (def_insn);
1751
1752 /* We're expecting a load-with-swap insn. */
1753 gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
1754
1755 /* We expect this to be a set to memory, with source representing a
1756 swap (indicated by code VEC_SELECT). */
1757 rtx body = PATTERN (def_insn);
1758 gcc_assert ((GET_CODE (body) == SET)
1759 && (GET_CODE (SET_SRC (body)) == VEC_SELECT)
1760 && MEM_P (XEXP (SET_SRC (body), 0)));
1761
1762 rtx src_exp = XEXP (SET_SRC (body), 0);
1763 enum machine_mode mode = GET_MODE (src_exp);
1764 rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
1765
1766 rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
1767 rtx new_body = PATTERN (new_insn);
1768
1769 gcc_assert ((GET_CODE (new_body) == SET)
1770 && MEM_P (SET_SRC (new_body)));
1771
1772 basic_block bb = BLOCK_FOR_INSN (def_insn);
1773 set_block_for_insn (new_insn, bb);
1774 /* Handle REG_EH_REGION note. */
1775 if (cfun->can_throw_non_call_exceptions && BB_END (bb) == def_insn)
1776 {
1777 rtx note = find_reg_note (def_insn, REG_EH_REGION, NULL_RTX);
1778 if (note)
1779 add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0));
1780 }
1781 df_insn_rescan (new_insn);
1782
1783 df_insn_delete (def_insn);
1784 remove_insn (def_insn);
1785 def_insn->set_deleted ();
1786
1787 /* Replace the swap with a copy. */
1788 mark_swaps_for_removal (insn_entry, uid);
1789 replace_swap_with_copy (insn_entry, uid);
1790 }
1791
1792 /* Given that SWAP_INSN represents a swap of a load of a constant
1793 vector value, replace with a single instruction that loads a
1794 swapped variant of the original constant.
1795
1796 The "natural" representation of a byte array in memory is the same
1797 for big endian and little endian.
1798
1799 unsigned char byte_array[] =
1800 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1801
1802 However, when loaded into a vector register, the representation
1803 depends on endian conventions.
1804
1805 In big-endian mode, the register holds:
1806
1807 MSB LSB
1808 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1809
1810 In little-endian mode, the register holds:
1811
1812 MSB LSB
1813 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1814
1815 Word arrays require different handling. Consider the word array:
1816
1817 unsigned int word_array[] =
1818 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1819
1820 The in-memory representation depends on endian configuration. The
1821 equivalent array, declared as a byte array, in memory would be:
1822
1823 unsigned char big_endian_word_array_data[] =
1824 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1825
1826 unsigned char little_endian_word_array_data[] =
1827 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1828
1829 In big-endian mode, the register holds:
1830
1831 MSB LSB
1832 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1833
1834 In little-endian mode, the register holds:
1835
1836 MSB LSB
1837 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1838
1839
1840 Similar transformations apply to the vector of half-word and vector
1841 of double-word representations.
1842
1843 For now, don't handle vectors of quad-precision values. Just return.
1844 A better solution is to fix the code generator to emit lvx/stvx for
1845 those. */
1846 static void
replace_swapped_load_constant(swap_web_entry * insn_entry,rtx swap_insn)1847 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
1848 {
1849 /* Find the load. */
1850 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1851 rtx_insn *load_insn;
1852 df_ref use = DF_INSN_INFO_USES (insn_info);
1853 struct df_link *def_link = DF_REF_CHAIN (use);
1854 gcc_assert (def_link && !def_link->next);
1855
1856 load_insn = DF_REF_INSN (def_link->ref);
1857 gcc_assert (load_insn);
1858
1859 /* Find the TOC-relative symbol access. */
1860 insn_info = DF_INSN_INFO_GET (load_insn);
1861 use = DF_INSN_INFO_USES (insn_info);
1862
1863 def_link = DF_REF_CHAIN (use);
1864 gcc_assert (def_link && !def_link->next);
1865
1866 rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref);
1867 gcc_assert (tocrel_insn);
1868
1869 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1870 to set tocrel_base; otherwise it would be unnecessary as we've
1871 already established it will return true. */
1872 rtx base, offset;
1873 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1874 const_rtx tocrel_base;
1875
1876 /* There is an extra level of indirection for small/large code models. */
1877 if (MEM_P (tocrel_expr))
1878 tocrel_expr = XEXP (tocrel_expr, 0);
1879
1880 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1881 gcc_unreachable ();
1882
1883 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1884 rtx const_vector = get_pool_constant (base);
1885
1886 /* With the extra indirection, get_pool_constant will produce the
1887 real constant from the reg_equal expression, so get the real
1888 constant. */
1889 if (SYMBOL_REF_P (const_vector))
1890 const_vector = get_pool_constant (const_vector);
1891 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1892
1893 rtx new_mem;
1894 enum machine_mode mode = GET_MODE (const_vector);
1895
1896 /* Create an adjusted constant from the original constant. */
1897 if (mode == V1TImode)
1898 /* Leave this code as is. */
1899 return;
1900 else if (mode == V16QImode)
1901 {
1902 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
1903 int i;
1904
1905 for (i = 0; i < 16; i++)
1906 XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
1907 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1908 new_mem = force_const_mem (mode, new_const_vector);
1909 }
1910 else if ((mode == V8HImode)
1911 #ifdef HAVE_V8HFmode
1912 || (mode == V8HFmode)
1913 #endif
1914 )
1915 {
1916 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
1917 int i;
1918
1919 for (i = 0; i < 8; i++)
1920 XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
1921 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1922 new_mem = force_const_mem (mode, new_const_vector);
1923 }
1924 else if ((mode == V4SImode) || (mode == V4SFmode))
1925 {
1926 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
1927 int i;
1928
1929 for (i = 0; i < 4; i++)
1930 XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
1931 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1932 new_mem = force_const_mem (mode, new_const_vector);
1933 }
1934 else if ((mode == V2DImode) || (mode == V2DFmode))
1935 {
1936 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
1937 int i;
1938
1939 for (i = 0; i < 2; i++)
1940 XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
1941 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1942 new_mem = force_const_mem (mode, new_const_vector);
1943 }
1944 else
1945 {
1946 /* We do not expect other modes to be constant-load-swapped. */
1947 gcc_unreachable ();
1948 }
1949
1950 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1951 can't recognize. Force the SYMBOL_REF into a register. */
1952 if (!REG_P (XEXP (new_mem, 0))) {
1953 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1954 XEXP (new_mem, 0) = base_reg;
1955
1956 /* Move the newly created insn ahead of the load insn. */
1957 /* The last insn is the insn that forced new_mem into a register. */
1958 rtx_insn *force_insn = get_last_insn ();
1959 /* Remove this insn from the end of the instruction sequence. */
1960 remove_insn (force_insn);
1961 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1962
1963 /* And insert this insn back into the sequence before the previous
1964 load insn so this new expression will be available when the
1965 existing load is modified to load the swapped constant. */
1966 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1967 df_insn_rescan (before_load_insn);
1968 df_insn_rescan (force_insn);
1969 }
1970
1971 /* Replace the MEM in the load instruction and rescan it. */
1972 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1973 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1974 df_insn_rescan (load_insn);
1975
1976 unsigned int uid = INSN_UID (swap_insn);
1977 mark_swaps_for_removal (insn_entry, uid);
1978 replace_swap_with_copy (insn_entry, uid);
1979 }
1980
1981 /* Dump the swap table to DUMP_FILE. */
1982 static void
dump_swap_insn_table(swap_web_entry * insn_entry)1983 dump_swap_insn_table (swap_web_entry *insn_entry)
1984 {
1985 int e = get_max_uid ();
1986 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
1987
1988 for (int i = 0; i < e; ++i)
1989 if (insn_entry[i].is_relevant)
1990 {
1991 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
1992 fprintf (dump_file, "%6d %6d ", i,
1993 pred_entry && pred_entry->insn
1994 ? INSN_UID (pred_entry->insn) : 0);
1995 if (insn_entry[i].is_load)
1996 fputs ("load ", dump_file);
1997 if (insn_entry[i].is_store)
1998 fputs ("store ", dump_file);
1999 if (insn_entry[i].is_swap)
2000 fputs ("swap ", dump_file);
2001 if (insn_entry[i].is_live_in)
2002 fputs ("live-in ", dump_file);
2003 if (insn_entry[i].is_live_out)
2004 fputs ("live-out ", dump_file);
2005 if (insn_entry[i].contains_subreg)
2006 fputs ("subreg ", dump_file);
2007 if (insn_entry[i].is_128_int)
2008 fputs ("int128 ", dump_file);
2009 if (insn_entry[i].is_call)
2010 fputs ("call ", dump_file);
2011 if (insn_entry[i].is_swappable)
2012 {
2013 fputs ("swappable ", dump_file);
2014 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
2015 fputs ("special:constvec ", dump_file);
2016 else if (insn_entry[i].special_handling == SH_SUBREG)
2017 fputs ("special:subreg ", dump_file);
2018 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
2019 fputs ("special:load ", dump_file);
2020 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
2021 fputs ("special:store ", dump_file);
2022 else if (insn_entry[i].special_handling == SH_EXTRACT)
2023 fputs ("special:extract ", dump_file);
2024 else if (insn_entry[i].special_handling == SH_SPLAT)
2025 fputs ("special:splat ", dump_file);
2026 else if (insn_entry[i].special_handling == SH_XXPERMDI)
2027 fputs ("special:xxpermdi ", dump_file);
2028 else if (insn_entry[i].special_handling == SH_CONCAT)
2029 fputs ("special:concat ", dump_file);
2030 else if (insn_entry[i].special_handling == SH_VPERM)
2031 fputs ("special:vperm ", dump_file);
2032 }
2033 if (insn_entry[i].web_not_optimizable)
2034 fputs ("unoptimizable ", dump_file);
2035 if (insn_entry[i].will_delete)
2036 fputs ("delete ", dump_file);
2037 fputs ("\n", dump_file);
2038 }
2039 fputs ("\n", dump_file);
2040 }
2041
2042 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2043 Here RTX is an (& addr (const_int -16)). Always return a new copy
2044 to avoid problems with combine. */
2045 static rtx
alignment_with_canonical_addr(rtx align)2046 alignment_with_canonical_addr (rtx align)
2047 {
2048 rtx canon;
2049 rtx addr = XEXP (align, 0);
2050
2051 if (REG_P (addr))
2052 canon = addr;
2053
2054 else if (GET_CODE (addr) == PLUS)
2055 {
2056 rtx addrop0 = XEXP (addr, 0);
2057 rtx addrop1 = XEXP (addr, 1);
2058
2059 if (!REG_P (addrop0))
2060 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
2061
2062 if (!REG_P (addrop1))
2063 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
2064
2065 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
2066 }
2067
2068 else
2069 canon = force_reg (GET_MODE (addr), addr);
2070
2071 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
2072 }
2073
2074 /* Check whether an rtx is an alignment mask, and if so, return
2075 a fully-expanded rtx for the masking operation. */
2076 static rtx
alignment_mask(rtx_insn * insn)2077 alignment_mask (rtx_insn *insn)
2078 {
2079 rtx body = PATTERN (insn);
2080
2081 if (GET_CODE (body) != SET
2082 || GET_CODE (SET_SRC (body)) != AND
2083 || !REG_P (XEXP (SET_SRC (body), 0)))
2084 return 0;
2085
2086 rtx mask = XEXP (SET_SRC (body), 1);
2087
2088 if (CONST_INT_P (mask))
2089 {
2090 if (INTVAL (mask) == -16)
2091 return alignment_with_canonical_addr (SET_SRC (body));
2092 else
2093 return 0;
2094 }
2095
2096 if (!REG_P (mask))
2097 return 0;
2098
2099 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2100 df_ref use;
2101 rtx real_mask = 0;
2102
2103 FOR_EACH_INSN_INFO_USE (use, insn_info)
2104 {
2105 if (!rtx_equal_p (DF_REF_REG (use), mask))
2106 continue;
2107
2108 struct df_link *def_link = DF_REF_CHAIN (use);
2109 if (!def_link || def_link->next)
2110 return 0;
2111
2112 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
2113 rtx const_body = PATTERN (const_insn);
2114 if (GET_CODE (const_body) != SET)
2115 return 0;
2116
2117 real_mask = SET_SRC (const_body);
2118
2119 if (!CONST_INT_P (real_mask)
2120 || INTVAL (real_mask) != -16)
2121 return 0;
2122 }
2123
2124 if (real_mask == 0)
2125 return 0;
2126
2127 return alignment_with_canonical_addr (SET_SRC (body));
2128 }
2129
2130 /* Given INSN that's a load or store based at BASE_REG, look for a
2131 feeding computation that aligns its address on a 16-byte boundary.
2132 Return the rtx and its containing AND_INSN. */
2133 static rtx
find_alignment_op(rtx_insn * insn,rtx base_reg,rtx_insn ** and_insn)2134 find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
2135 {
2136 df_ref base_use;
2137 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2138 rtx and_operation = 0;
2139
2140 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
2141 {
2142 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
2143 continue;
2144
2145 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
2146 if (!base_def_link || base_def_link->next)
2147 break;
2148
2149 /* With stack-protector code enabled, and possibly in other
2150 circumstances, there may not be an associated insn for
2151 the def. */
2152 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
2153 break;
2154
2155 *and_insn = DF_REF_INSN (base_def_link->ref);
2156 and_operation = alignment_mask (*and_insn);
2157 if (and_operation != 0)
2158 break;
2159 }
2160
2161 return and_operation;
2162 }
2163
2164 struct del_info { bool replace; rtx_insn *replace_insn; };
2165
2166 /* If INSN is the load for an lvx pattern, put it in canonical form. */
2167 static void
recombine_lvx_pattern(rtx_insn * insn,del_info * to_delete)2168 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
2169 {
2170 rtx body = PATTERN (insn);
2171 gcc_assert (GET_CODE (body) == SET
2172 && GET_CODE (SET_SRC (body)) == VEC_SELECT
2173 && MEM_P (XEXP (SET_SRC (body), 0)));
2174
2175 rtx mem = XEXP (SET_SRC (body), 0);
2176 rtx base_reg = XEXP (mem, 0);
2177
2178 rtx_insn *and_insn;
2179 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2180
2181 if (and_operation != 0)
2182 {
2183 df_ref def;
2184 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2185 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2186 {
2187 struct df_link *link = DF_REF_CHAIN (def);
2188 if (!link || link->next)
2189 break;
2190
2191 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2192 if (!insn_is_swap_p (swap_insn)
2193 || insn_is_load_p (swap_insn)
2194 || insn_is_store_p (swap_insn))
2195 break;
2196
2197 /* Expected lvx pattern found. Change the swap to
2198 a copy, and propagate the AND operation into the
2199 load. */
2200 to_delete[INSN_UID (swap_insn)].replace = true;
2201 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2202
2203 /* However, first we must be sure that we make the
2204 base register from the AND operation available
2205 in case the register has been overwritten. Copy
2206 the base register to a new pseudo and use that
2207 as the base register of the AND operation in
2208 the new LVX instruction. */
2209 rtx and_base = XEXP (and_operation, 0);
2210 rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2211 rtx copy = gen_rtx_SET (new_reg, and_base);
2212 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2213 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2214 df_insn_rescan (new_insn);
2215
2216 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2217 XEXP (and_operation, 1));
2218 SET_SRC (body) = mem;
2219 INSN_CODE (insn) = -1; /* Force re-recognition. */
2220 df_insn_rescan (insn);
2221
2222 if (dump_file)
2223 fprintf (dump_file, "lvx opportunity found at %d\n",
2224 INSN_UID (insn));
2225 }
2226 }
2227 }
2228
2229 /* If INSN is the store for an stvx pattern, put it in canonical form. */
2230 static void
recombine_stvx_pattern(rtx_insn * insn,del_info * to_delete)2231 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
2232 {
2233 rtx body = PATTERN (insn);
2234 gcc_assert (GET_CODE (body) == SET
2235 && MEM_P (SET_DEST (body))
2236 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
2237 rtx mem = SET_DEST (body);
2238 rtx base_reg = XEXP (mem, 0);
2239
2240 rtx_insn *and_insn;
2241 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2242
2243 if (and_operation != 0)
2244 {
2245 rtx src_reg = XEXP (SET_SRC (body), 0);
2246 df_ref src_use;
2247 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2248 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
2249 {
2250 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
2251 continue;
2252
2253 struct df_link *link = DF_REF_CHAIN (src_use);
2254 if (!link || link->next)
2255 break;
2256
2257 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2258 if (!insn_is_swap_p (swap_insn)
2259 || insn_is_load_p (swap_insn)
2260 || insn_is_store_p (swap_insn))
2261 break;
2262
2263 /* Expected stvx pattern found. Change the swap to
2264 a copy, and propagate the AND operation into the
2265 store. */
2266 to_delete[INSN_UID (swap_insn)].replace = true;
2267 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2268
2269 /* However, first we must be sure that we make the
2270 base register from the AND operation available
2271 in case the register has been overwritten. Copy
2272 the base register to a new pseudo and use that
2273 as the base register of the AND operation in
2274 the new STVX instruction. */
2275 rtx and_base = XEXP (and_operation, 0);
2276 rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2277 rtx copy = gen_rtx_SET (new_reg, and_base);
2278 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2279 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2280 df_insn_rescan (new_insn);
2281
2282 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2283 XEXP (and_operation, 1));
2284 SET_SRC (body) = src_reg;
2285 INSN_CODE (insn) = -1; /* Force re-recognition. */
2286 df_insn_rescan (insn);
2287
2288 if (dump_file)
2289 fprintf (dump_file, "stvx opportunity found at %d\n",
2290 INSN_UID (insn));
2291 }
2292 }
2293 }
2294
2295 /* Look for patterns created from builtin lvx and stvx calls, and
2296 canonicalize them to be properly recognized as such. */
2297 static void
recombine_lvx_stvx_patterns(function * fun)2298 recombine_lvx_stvx_patterns (function *fun)
2299 {
2300 int i;
2301 basic_block bb;
2302 rtx_insn *insn;
2303
2304 int num_insns = get_max_uid ();
2305 del_info *to_delete = XCNEWVEC (del_info, num_insns);
2306
2307 FOR_ALL_BB_FN (bb, fun)
2308 FOR_BB_INSNS (bb, insn)
2309 {
2310 if (!NONDEBUG_INSN_P (insn))
2311 continue;
2312
2313 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
2314 recombine_lvx_pattern (insn, to_delete);
2315 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
2316 recombine_stvx_pattern (insn, to_delete);
2317 }
2318
2319 /* Turning swaps into copies is delayed until now, to avoid problems
2320 with deleting instructions during the insn walk. */
2321 for (i = 0; i < num_insns; i++)
2322 if (to_delete[i].replace)
2323 {
2324 rtx swap_body = PATTERN (to_delete[i].replace_insn);
2325 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
2326 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
2327 rtx_insn *new_insn = emit_insn_before (copy,
2328 to_delete[i].replace_insn);
2329 set_block_for_insn (new_insn,
2330 BLOCK_FOR_INSN (to_delete[i].replace_insn));
2331 df_insn_rescan (new_insn);
2332 df_insn_delete (to_delete[i].replace_insn);
2333 remove_insn (to_delete[i].replace_insn);
2334 to_delete[i].replace_insn->set_deleted ();
2335 }
2336
2337 free (to_delete);
2338 }
2339
2340 /* Main entry point for this pass. */
2341 unsigned int
rs6000_analyze_swaps(function * fun)2342 rs6000_analyze_swaps (function *fun)
2343 {
2344 swap_web_entry *insn_entry;
2345 basic_block bb;
2346 rtx_insn *insn, *curr_insn = 0;
2347
2348 /* Dataflow analysis for use-def chains. */
2349 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2350 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2351 df_analyze ();
2352 df_set_flags (DF_DEFER_INSN_RESCAN);
2353
2354 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
2355 recombine_lvx_stvx_patterns (fun);
2356
2357 /* Rebuild ud- and du-chains. */
2358 df_remove_problem (df_chain);
2359 df_process_deferred_rescans ();
2360 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2361 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2362 df_analyze ();
2363 df_set_flags (DF_DEFER_INSN_RESCAN);
2364
2365 /* Allocate structure to represent webs of insns. */
2366 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2367
2368 /* Walk the insns to gather basic data. */
2369 FOR_ALL_BB_FN (bb, fun)
2370 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2371 {
2372 unsigned int uid = INSN_UID (insn);
2373 if (NONDEBUG_INSN_P (insn))
2374 {
2375 insn_entry[uid].insn = insn;
2376
2377 if (GET_CODE (insn) == CALL_INSN)
2378 insn_entry[uid].is_call = 1;
2379
2380 /* Walk the uses and defs to see if we mention vector regs.
2381 Record any constraints on optimization of such mentions. */
2382 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2383 df_ref mention;
2384 FOR_EACH_INSN_INFO_USE (mention, insn_info)
2385 {
2386 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2387 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2388
2389 /* If a use gets its value from a call insn, it will be
2390 a hard register and will look like (reg:V4SI 3 3).
2391 The df analysis creates two mentions for GPR3 and GPR4,
2392 both DImode. We must recognize this and treat it as a
2393 vector mention to ensure the call is unioned with this
2394 use. */
2395 if (mode == DImode && DF_REF_INSN_INFO (mention))
2396 {
2397 rtx feeder = DF_REF_INSN (mention);
2398 /* FIXME: It is pretty hard to get from the df mention
2399 to the mode of the use in the insn. We arbitrarily
2400 pick a vector mode here, even though the use might
2401 be a real DImode. We can be too conservative
2402 (create a web larger than necessary) because of
2403 this, so consider eventually fixing this. */
2404 if (GET_CODE (feeder) == CALL_INSN)
2405 mode = V4SImode;
2406 }
2407
2408 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2409 {
2410 insn_entry[uid].is_relevant = 1;
2411 if (mode == TImode || mode == V1TImode
2412 || FLOAT128_VECTOR_P (mode))
2413 insn_entry[uid].is_128_int = 1;
2414 if (DF_REF_INSN_INFO (mention))
2415 insn_entry[uid].contains_subreg
2416 = !rtx_equal_p (DF_REF_REG (mention),
2417 DF_REF_REAL_REG (mention));
2418 union_defs (insn_entry, insn, mention);
2419 }
2420 }
2421 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
2422 {
2423 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2424 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2425
2426 /* If we're loading up a hard vector register for a call,
2427 it looks like (set (reg:V4SI 9 9) (...)). The df
2428 analysis creates two mentions for GPR9 and GPR10, both
2429 DImode. So relying on the mode from the mentions
2430 isn't sufficient to ensure we union the call into the
2431 web with the parameter setup code. */
2432 if (mode == DImode && GET_CODE (insn) == SET
2433 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
2434 mode = GET_MODE (SET_DEST (insn));
2435
2436 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2437 {
2438 insn_entry[uid].is_relevant = 1;
2439 if (mode == TImode || mode == V1TImode
2440 || FLOAT128_VECTOR_P (mode))
2441 insn_entry[uid].is_128_int = 1;
2442 if (DF_REF_INSN_INFO (mention))
2443 insn_entry[uid].contains_subreg
2444 = !rtx_equal_p (DF_REF_REG (mention),
2445 DF_REF_REAL_REG (mention));
2446 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2447 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
2448 insn_entry[uid].is_live_out = 1;
2449 union_uses (insn_entry, insn, mention);
2450 }
2451 }
2452
2453 if (insn_entry[uid].is_relevant)
2454 {
2455 /* Determine if this is a load or store. */
2456 insn_entry[uid].is_load = insn_is_load_p (insn);
2457 insn_entry[uid].is_store = insn_is_store_p (insn);
2458
2459 /* Determine if this is a doubleword swap. If not,
2460 determine whether it can legally be swapped. */
2461 if (insn_is_swap_p (insn))
2462 insn_entry[uid].is_swap = 1;
2463 else
2464 {
2465 unsigned int special = SH_NONE;
2466 insn_entry[uid].is_swappable
2467 = insn_is_swappable_p (insn_entry, insn, &special);
2468 if (special != SH_NONE && insn_entry[uid].contains_subreg)
2469 insn_entry[uid].is_swappable = 0;
2470 else if (special != SH_NONE)
2471 insn_entry[uid].special_handling = special;
2472 else if (insn_entry[uid].contains_subreg
2473 && has_part_mult (insn))
2474 insn_entry[uid].is_swappable = 0;
2475 else if (insn_entry[uid].contains_subreg)
2476 insn_entry[uid].special_handling = SH_SUBREG;
2477 }
2478 }
2479 }
2480 }
2481
2482 if (dump_file)
2483 {
2484 fprintf (dump_file, "\nSwap insn entry table when first built\n");
2485 dump_swap_insn_table (insn_entry);
2486 }
2487
2488 /* Record unoptimizable webs. */
2489 unsigned e = get_max_uid (), i;
2490 for (i = 0; i < e; ++i)
2491 {
2492 if (!insn_entry[i].is_relevant)
2493 continue;
2494
2495 swap_web_entry *root
2496 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
2497
2498 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
2499 || (insn_entry[i].contains_subreg
2500 && insn_entry[i].special_handling != SH_SUBREG)
2501 || insn_entry[i].is_128_int || insn_entry[i].is_call
2502 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
2503 root->web_not_optimizable = 1;
2504
2505 /* If we have loads or stores that aren't permuting then the
2506 optimization isn't appropriate. */
2507 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
2508 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
2509 root->web_not_optimizable = 1;
2510
2511 /* If we have a swap that is both fed by a permuting load
2512 and a feeder of a permuting store, then the optimization
2513 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */
2514 else if (insn_entry[i].is_swap && !insn_entry[i].is_load
2515 && !insn_entry[i].is_store
2516 && swap_feeds_both_load_and_store (&insn_entry[i]))
2517 root->web_not_optimizable = 1;
2518
2519 /* If we have permuting loads or stores that are not accompanied
2520 by a register swap, the optimization isn't appropriate. */
2521 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
2522 {
2523 rtx insn = insn_entry[i].insn;
2524 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2525 df_ref def;
2526
2527 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2528 {
2529 struct df_link *link = DF_REF_CHAIN (def);
2530
2531 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
2532 {
2533 root->web_not_optimizable = 1;
2534 break;
2535 }
2536 }
2537 }
2538 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
2539 {
2540 rtx insn = insn_entry[i].insn;
2541 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2542 df_ref use;
2543
2544 FOR_EACH_INSN_INFO_USE (use, insn_info)
2545 {
2546 struct df_link *link = DF_REF_CHAIN (use);
2547
2548 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
2549 {
2550 root->web_not_optimizable = 1;
2551 break;
2552 }
2553 }
2554 }
2555 }
2556
2557 if (dump_file)
2558 {
2559 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
2560 dump_swap_insn_table (insn_entry);
2561 }
2562
2563 /* For each load and store in an optimizable web (which implies
2564 the loads and stores are permuting), find the associated
2565 register swaps and mark them for removal. Due to various
2566 optimizations we may mark the same swap more than once. Also
2567 perform special handling for swappable insns that require it. */
2568 for (i = 0; i < e; ++i)
2569 if ((insn_entry[i].is_load || insn_entry[i].is_store)
2570 && insn_entry[i].is_swap)
2571 {
2572 swap_web_entry* root_entry
2573 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2574 if (!root_entry->web_not_optimizable)
2575 mark_swaps_for_removal (insn_entry, i);
2576 }
2577 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
2578 {
2579 swap_web_entry* root_entry
2580 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2581 if (!root_entry->web_not_optimizable)
2582 handle_special_swappables (insn_entry, i);
2583 }
2584
2585 /* Now delete the swaps marked for removal. */
2586 for (i = 0; i < e; ++i)
2587 if (insn_entry[i].will_delete)
2588 replace_swap_with_copy (insn_entry, i);
2589
2590 /* Clean up. */
2591 free (insn_entry);
2592
2593 /* Use a second pass over rtl to detect that certain vector values
2594 fetched from or stored to memory on quad-word aligned addresses
2595 can use lvx/stvx without swaps. */
2596
2597 /* First, rebuild ud chains. */
2598 df_remove_problem (df_chain);
2599 df_process_deferred_rescans ();
2600 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2601 df_chain_add_problem (DF_UD_CHAIN);
2602 df_analyze ();
2603
2604 swap_web_entry *pass2_insn_entry;
2605 pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2606
2607 /* Walk the insns to gather basic data. */
2608 FOR_ALL_BB_FN (bb, fun)
2609 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2610 {
2611 unsigned int uid = INSN_UID (insn);
2612 if (NONDEBUG_INSN_P (insn))
2613 {
2614 pass2_insn_entry[uid].insn = insn;
2615
2616 pass2_insn_entry[uid].is_relevant = 1;
2617 pass2_insn_entry[uid].is_load = insn_is_load_p (insn);
2618 pass2_insn_entry[uid].is_store = insn_is_store_p (insn);
2619
2620 /* Determine if this is a doubleword swap. If not,
2621 determine whether it can legally be swapped. */
2622 if (insn_is_swap_p (insn))
2623 pass2_insn_entry[uid].is_swap = 1;
2624 }
2625 }
2626
2627 e = get_max_uid ();
2628 for (unsigned i = 0; i < e; ++i)
2629 if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
2630 && !pass2_insn_entry[i].is_store)
2631 {
2632 /* Replace swap of aligned load-swap with aligned unswapped
2633 load. */
2634 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2635 if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
2636 replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
2637 }
2638 else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
2639 {
2640 /* Replace aligned store-swap of swapped value with aligned
2641 unswapped store. */
2642 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2643 if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
2644 replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
2645 }
2646
2647 /* Clean up. */
2648 free (pass2_insn_entry);
2649
2650 /* Use a third pass over rtl to replace swap(load(vector constant))
2651 with load(swapped vector constant). */
2652
2653 /* First, rebuild ud chains. */
2654 df_remove_problem (df_chain);
2655 df_process_deferred_rescans ();
2656 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2657 df_chain_add_problem (DF_UD_CHAIN);
2658 df_analyze ();
2659
2660 swap_web_entry *pass3_insn_entry;
2661 pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2662
2663 /* Walk the insns to gather basic data. */
2664 FOR_ALL_BB_FN (bb, fun)
2665 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2666 {
2667 unsigned int uid = INSN_UID (insn);
2668 if (NONDEBUG_INSN_P (insn))
2669 {
2670 pass3_insn_entry[uid].insn = insn;
2671
2672 pass3_insn_entry[uid].is_relevant = 1;
2673 pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
2674 pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
2675
2676 /* Determine if this is a doubleword swap. If not,
2677 determine whether it can legally be swapped. */
2678 if (insn_is_swap_p (insn))
2679 pass3_insn_entry[uid].is_swap = 1;
2680 }
2681 }
2682
2683 e = get_max_uid ();
2684 for (unsigned i = 0; i < e; ++i)
2685 if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
2686 && !pass3_insn_entry[i].is_store)
2687 {
2688 insn = pass3_insn_entry[i].insn;
2689 if (const_load_sequence_p (pass3_insn_entry, insn))
2690 replace_swapped_load_constant (pass3_insn_entry, insn);
2691 }
2692
2693 /* Clean up. */
2694 free (pass3_insn_entry);
2695 return 0;
2696 }
2697
2698 const pass_data pass_data_analyze_swaps =
2699 {
2700 RTL_PASS, /* type */
2701 "swaps", /* name */
2702 OPTGROUP_NONE, /* optinfo_flags */
2703 TV_NONE, /* tv_id */
2704 0, /* properties_required */
2705 0, /* properties_provided */
2706 0, /* properties_destroyed */
2707 0, /* todo_flags_start */
2708 TODO_df_finish, /* todo_flags_finish */
2709 };
2710
2711 class pass_analyze_swaps : public rtl_opt_pass
2712 {
2713 public:
pass_analyze_swaps(gcc::context * ctxt)2714 pass_analyze_swaps(gcc::context *ctxt)
2715 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
2716 {}
2717
2718 /* opt_pass methods: */
gate(function *)2719 virtual bool gate (function *)
2720 {
2721 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
2722 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
2723 }
2724
execute(function * fun)2725 virtual unsigned int execute (function *fun)
2726 {
2727 return rs6000_analyze_swaps (fun);
2728 }
2729
clone()2730 opt_pass *clone ()
2731 {
2732 return new pass_analyze_swaps (m_ctxt);
2733 }
2734
2735 }; // class pass_analyze_swaps
2736
2737 rtl_opt_pass *
make_pass_analyze_swaps(gcc::context * ctxt)2738 make_pass_analyze_swaps (gcc::context *ctxt)
2739 {
2740 return new pass_analyze_swaps (ctxt);
2741 }
2742
2743