xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/rs6000/rs6000-p8swap.c (revision 4ac76180e904e771b9d522c7e57296d371f06499)
1 /* Subroutines used to remove unnecessary doubleword swaps
2    for p8 little-endian VSX code.
3    Copyright (C) 1991-2020 Free Software Foundation, Inc.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published
9    by the Free Software Foundation; either version 3, or (at your
10    option) any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "ira.h"
33 #include "print-tree.h"
34 #include "varasm.h"
35 #include "explow.h"
36 #include "expr.h"
37 #include "output.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
40 
41 /* Analyze vector computations and remove unnecessary doubleword
42    swaps (xxswapdi instructions).  This pass is performed only
43    for little-endian VSX code generation.
44 
45    For this specific case, loads and stores of 4x32 and 2x64 vectors
46    are inefficient.  These are implemented using the lvx2dx and
47    stvx2dx instructions, which invert the order of doublewords in
48    a vector register.  Thus the code generation inserts an xxswapdi
49    after each such load, and prior to each such store.  (For spill
50    code after register assignment, an additional xxswapdi is inserted
51    following each store in order to return a hard register to its
52    unpermuted value.)
53 
54    The extra xxswapdi instructions reduce performance.  This can be
55    particularly bad for vectorized code.  The purpose of this pass
56    is to reduce the number of xxswapdi instructions required for
57    correctness.
58 
59    The primary insight is that much code that operates on vectors
60    does not care about the relative order of elements in a register,
61    so long as the correct memory order is preserved.  If we have
62    a computation where all input values are provided by lvxd2x/xxswapdi
63    sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64    and all intermediate computations are pure SIMD (independent of
65    element order), then all the xxswapdi's associated with the loads
66    and stores may be removed.
67 
68    This pass uses some of the infrastructure and logical ideas from
69    the "web" pass in web.c.  We create maximal webs of computations
70    fitting the description above using union-find.  Each such web is
71    then optimized by removing its unnecessary xxswapdi instructions.
72 
73    The pass is placed prior to global optimization so that we can
74    perform the optimization in the safest and simplest way possible;
75    that is, by replacing each xxswapdi insn with a register copy insn.
76    Subsequent forward propagation will remove copies where possible.
77 
78    There are some operations sensitive to element order for which we
79    can still allow the operation, provided we modify those operations.
80    These include CONST_VECTORs, for which we must swap the first and
81    second halves of the constant vector; and SUBREGs, for which we
82    must adjust the byte offset to account for the swapped doublewords.
83    A remaining opportunity would be non-immediate-form splats, for
84    which we should adjust the selected lane of the input.  We should
85    also make code generation adjustments for sum-across operations,
86    since this is a common vectorizer reduction.
87 
88    Because we run prior to the first split, we can see loads and stores
89    here that match *vsx_le_perm_{load,store}_<mode>.  These are vanilla
90    vector loads and stores that have not yet been split into a permuting
91    load/store and a swap.  (One way this can happen is with a builtin
92    call to vec_vsx_{ld,st}.)  We can handle these as well, but rather
93    than deleting a swap, we convert the load/store into a permuting
94    load/store (which effectively removes the swap).  */
95 
96 /* Notes on Permutes
97 
98    We do not currently handle computations that contain permutes.  There
99    is a general transformation that can be performed correctly, but it
100    may introduce more expensive code than it replaces.  To handle these
101    would require a cost model to determine when to perform the optimization.
102    This commentary records how this could be done if desired.
103 
104    The most general permute is something like this (example for V16QI):
105 
106    (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107                      (parallel [(const_int a0) (const_int a1)
108                                  ...
109                                 (const_int a14) (const_int a15)]))
110 
111    where a0,...,a15 are in [0,31] and select elements from op1 and op2
112    to produce in the result.
113 
114    Regardless of mode, we can convert the PARALLEL to a mask of 16
115    byte-element selectors.  Let's call this M, with M[i] representing
116    the ith byte-element selector value.  Then if we swap doublewords
117    throughout the computation, we can get correct behavior by replacing
118    M with M' as follows:
119 
120     M'[i] = { (M[i]+8)%16      : M[i] in [0,15]
121             { ((M[i]+8)%16)+16 : M[i] in [16,31]
122 
123    This seems promising at first, since we are just replacing one mask
124    with another.  But certain masks are preferable to others.  If M
125    is a mask that matches a vmrghh pattern, for example, M' certainly
126    will not.  Instead of a single vmrghh, we would generate a load of
127    M' and a vperm.  So we would need to know how many xxswapd's we can
128    remove as a result of this transformation to determine if it's
129    profitable; and preferably the logic would need to be aware of all
130    the special preferable masks.
131 
132    Another form of permute is an UNSPEC_VPERM, in which the mask is
133    already in a register.  In some cases, this mask may be a constant
134    that we can discover with ud-chains, in which case the above
135    transformation is ok.  However, the common usage here is for the
136    mask to be produced by an UNSPEC_LVSL, in which case the mask
137    cannot be known at compile time.  In such a case we would have to
138    generate several instructions to compute M' as above at run time,
139    and a cost model is needed again.
140 
141    However, when the mask M for an UNSPEC_VPERM is loaded from the
142    constant pool, we can replace M with M' as above at no cost
143    beyond adding a constant pool entry.  */
144 
145 /* This is based on the union-find logic in web.c.  web_entry_base is
146    defined in df.h.  */
147 class swap_web_entry : public web_entry_base
148 {
149  public:
150   /* Pointer to the insn.  */
151   rtx_insn *insn;
152   /* Set if insn contains a mention of a vector register.  All other
153      fields are undefined if this field is unset.  */
154   unsigned int is_relevant : 1;
155   /* Set if insn is a load.  */
156   unsigned int is_load : 1;
157   /* Set if insn is a store.  */
158   unsigned int is_store : 1;
159   /* Set if insn is a doubleword swap.  This can either be a register swap
160      or a permuting load or store (test is_load and is_store for this).  */
161   unsigned int is_swap : 1;
162   /* Set if the insn has a live-in use of a parameter register.  */
163   unsigned int is_live_in : 1;
164   /* Set if the insn has a live-out def of a return register.  */
165   unsigned int is_live_out : 1;
166   /* Set if the insn contains a subreg reference of a vector register.  */
167   unsigned int contains_subreg : 1;
168   /* Set if the insn contains a 128-bit integer operand.  */
169   unsigned int is_128_int : 1;
170   /* Set if this is a call-insn.  */
171   unsigned int is_call : 1;
172   /* Set if this insn does not perform a vector operation for which
173      element order matters, or if we know how to fix it up if it does.
174      Undefined if is_swap is set.  */
175   unsigned int is_swappable : 1;
176   /* A nonzero value indicates what kind of special handling for this
177      insn is required if doublewords are swapped.  Undefined if
178      is_swappable is not set.  */
179   unsigned int special_handling : 4;
180   /* Set if the web represented by this entry cannot be optimized.  */
181   unsigned int web_not_optimizable : 1;
182   /* Set if this insn should be deleted.  */
183   unsigned int will_delete : 1;
184 };
185 
186 enum special_handling_values {
187   SH_NONE = 0,
188   SH_CONST_VECTOR,
189   SH_SUBREG,
190   SH_NOSWAP_LD,
191   SH_NOSWAP_ST,
192   SH_EXTRACT,
193   SH_SPLAT,
194   SH_XXPERMDI,
195   SH_CONCAT,
196   SH_VPERM
197 };
198 
199 /* Union INSN with all insns containing definitions that reach USE.
200    Detect whether USE is live-in to the current function.  */
201 static void
union_defs(swap_web_entry * insn_entry,rtx insn,df_ref use)202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
203 {
204   struct df_link *link = DF_REF_CHAIN (use);
205 
206   if (!link)
207     insn_entry[INSN_UID (insn)].is_live_in = 1;
208 
209   while (link)
210     {
211       if (DF_REF_IS_ARTIFICIAL (link->ref))
212 	insn_entry[INSN_UID (insn)].is_live_in = 1;
213 
214       if (DF_REF_INSN_INFO (link->ref))
215 	{
216 	  rtx def_insn = DF_REF_INSN (link->ref);
217 	  (void)unionfind_union (insn_entry + INSN_UID (insn),
218 				 insn_entry + INSN_UID (def_insn));
219 	}
220 
221       link = link->next;
222     }
223 }
224 
225 /* Union INSN with all insns containing uses reached from DEF.
226    Detect whether DEF is live-out from the current function.  */
227 static void
union_uses(swap_web_entry * insn_entry,rtx insn,df_ref def)228 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
229 {
230   struct df_link *link = DF_REF_CHAIN (def);
231 
232   if (!link)
233     insn_entry[INSN_UID (insn)].is_live_out = 1;
234 
235   while (link)
236     {
237       /* This could be an eh use or some other artificial use;
238 	 we treat these all the same (killing the optimization).  */
239       if (DF_REF_IS_ARTIFICIAL (link->ref))
240 	insn_entry[INSN_UID (insn)].is_live_out = 1;
241 
242       if (DF_REF_INSN_INFO (link->ref))
243 	{
244 	  rtx use_insn = DF_REF_INSN (link->ref);
245 	  (void)unionfind_union (insn_entry + INSN_UID (insn),
246 				 insn_entry + INSN_UID (use_insn));
247 	}
248 
249       link = link->next;
250     }
251 }
252 
253 /* Return 1 iff INSN is a load insn, including permuting loads that
254    represent an lvxd2x instruction; else return 0.  */
255 static unsigned int
insn_is_load_p(rtx insn)256 insn_is_load_p (rtx insn)
257 {
258   rtx body = PATTERN (insn);
259 
260   if (GET_CODE (body) == SET)
261     {
262       if (MEM_P (SET_SRC (body)))
263 	return 1;
264 
265       if (GET_CODE (SET_SRC (body)) == VEC_SELECT
266 	  && MEM_P (XEXP (SET_SRC (body), 0)))
267 	return 1;
268 
269       return 0;
270     }
271 
272   if (GET_CODE (body) != PARALLEL)
273     return 0;
274 
275   rtx set = XVECEXP (body, 0, 0);
276 
277   if (GET_CODE (set) == SET && MEM_P (SET_SRC (set)))
278     return 1;
279 
280   return 0;
281 }
282 
283 /* Return 1 iff INSN is a store insn, including permuting stores that
284    represent an stvxd2x instruction; else return 0.  */
285 static unsigned int
insn_is_store_p(rtx insn)286 insn_is_store_p (rtx insn)
287 {
288   rtx body = PATTERN (insn);
289   if (GET_CODE (body) == SET && MEM_P (SET_DEST (body)))
290     return 1;
291   if (GET_CODE (body) != PARALLEL)
292     return 0;
293   rtx set = XVECEXP (body, 0, 0);
294   if (GET_CODE (set) == SET && MEM_P (SET_DEST (set)))
295     return 1;
296   return 0;
297 }
298 
299 /* Return 1 iff INSN swaps doublewords.  This may be a reg-reg swap,
300    a permuting load, or a permuting store.  */
301 static unsigned int
insn_is_swap_p(rtx insn)302 insn_is_swap_p (rtx insn)
303 {
304   rtx body = PATTERN (insn);
305   if (GET_CODE (body) != SET)
306     return 0;
307   rtx rhs = SET_SRC (body);
308   if (GET_CODE (rhs) != VEC_SELECT)
309     return 0;
310   rtx parallel = XEXP (rhs, 1);
311   if (GET_CODE (parallel) != PARALLEL)
312     return 0;
313   unsigned int len = XVECLEN (parallel, 0);
314   if (len != 2 && len != 4 && len != 8 && len != 16)
315     return 0;
316   for (unsigned int i = 0; i < len / 2; ++i)
317     {
318       rtx op = XVECEXP (parallel, 0, i);
319       if (!CONST_INT_P (op) || INTVAL (op) != len / 2 + i)
320 	return 0;
321     }
322   for (unsigned int i = len / 2; i < len; ++i)
323     {
324       rtx op = XVECEXP (parallel, 0, i);
325       if (!CONST_INT_P (op) || INTVAL (op) != i - len / 2)
326 	return 0;
327     }
328   return 1;
329 }
330 
331 /* Return true iff EXPR represents the sum of two registers.  */
332 bool
rs6000_sum_of_two_registers_p(const_rtx expr)333 rs6000_sum_of_two_registers_p (const_rtx expr)
334 {
335   if (GET_CODE (expr) == PLUS)
336     {
337       const_rtx operand1 = XEXP (expr, 0);
338       const_rtx operand2 = XEXP (expr, 1);
339       return (REG_P (operand1) && REG_P (operand2));
340     }
341   return false;
342 }
343 
344 /* Return true iff EXPR represents an address expression that masks off
345    the low-order 4 bits in the style of an lvx or stvx rtl pattern.  */
346 bool
rs6000_quadword_masked_address_p(const_rtx expr)347 rs6000_quadword_masked_address_p (const_rtx expr)
348 {
349   if (GET_CODE (expr) == AND)
350     {
351       const_rtx operand1 = XEXP (expr, 0);
352       const_rtx operand2 = XEXP (expr, 1);
353       if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
354 	  && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
355 	return true;
356     }
357   return false;
358 }
359 
360 /* Return TRUE if INSN represents a swap of a swapped load from memory
361    and the memory address is quad-word aligned.  */
362 static bool
quad_aligned_load_p(swap_web_entry * insn_entry,rtx_insn * insn)363 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
364 {
365   unsigned uid = INSN_UID (insn);
366   if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
367     return false;
368 
369   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
370 
371   /* Since insn is known to represent a swap instruction, we know it
372      "uses" only one input variable.  */
373   df_ref use = DF_INSN_INFO_USES (insn_info);
374 
375   /* Figure out where this input variable is defined.  */
376   struct df_link *def_link = DF_REF_CHAIN (use);
377 
378   /* If there is no definition or the definition is artificial or there are
379      multiple definitions, punt.  */
380   if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
381       || def_link->next)
382     return false;
383 
384   rtx def_insn = DF_REF_INSN (def_link->ref);
385   unsigned uid2 = INSN_UID (def_insn);
386   /* We're looking for a load-with-swap insn.  If this is not that,
387      return false.  */
388   if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
389     return false;
390 
391   /* If the source of the rtl def is not a set from memory, return
392      false.  */
393   rtx body = PATTERN (def_insn);
394   if (GET_CODE (body) != SET
395       || GET_CODE (SET_SRC (body)) != VEC_SELECT
396       || !MEM_P (XEXP (SET_SRC (body), 0)))
397     return false;
398 
399   rtx mem = XEXP (SET_SRC (body), 0);
400   rtx base_reg = XEXP (mem, 0);
401   return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
402 	  && MEM_ALIGN (mem) >= 128) ? true : false;
403 }
404 
405 /* Return TRUE if INSN represents a store-with-swap of a swapped value
406    and the memory address is quad-word aligned.  */
407 static bool
quad_aligned_store_p(swap_web_entry * insn_entry,rtx_insn * insn)408 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
409 {
410   unsigned uid = INSN_UID (insn);
411   if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
412     return false;
413 
414   rtx body = PATTERN (insn);
415   rtx dest_address = XEXP (SET_DEST (body), 0);
416   rtx swap_reg = XEXP (SET_SRC (body), 0);
417 
418   /* If the base address for the memory expression is not represented
419      by a single register and is not the sum of two registers, punt.  */
420   if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
421     return false;
422 
423   /* Confirm that the value to be stored is produced by a swap
424      instruction.  */
425   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
426   df_ref use;
427   FOR_EACH_INSN_INFO_USE (use, insn_info)
428     {
429       struct df_link *def_link = DF_REF_CHAIN (use);
430 
431       /* If this is not the definition of the candidate swap register,
432 	 then skip it.  I am interested in a different definition.  */
433       if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
434 	continue;
435 
436       /* If there is no def or the def is artifical or there are
437 	 multiple defs, punt.  */
438       if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
439 	  || def_link->next)
440 	return false;
441 
442       rtx def_insn = DF_REF_INSN (def_link->ref);
443       unsigned uid2 = INSN_UID (def_insn);
444 
445       /* If this source value is not a simple swap, return false */
446       if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
447 	  || insn_entry[uid2].is_store)
448 	return false;
449 
450       /* I've processed the use that I care about, so break out of
451 	 this loop.  */
452       break;
453     }
454 
455   /* At this point, we know the source data comes from a swap.  The
456      remaining question is whether the memory address is aligned.  */
457   rtx set = single_set (insn);
458   if (set)
459     {
460       rtx dest = SET_DEST (set);
461       if (MEM_P (dest))
462 	return (MEM_ALIGN (dest) >= 128);
463     }
464   return false;
465 }
466 
467 /* Return 1 iff UID, known to reference a swap, is both fed by a load
468    and a feeder of a store.  */
469 static unsigned int
swap_feeds_both_load_and_store(swap_web_entry * insn_entry)470 swap_feeds_both_load_and_store (swap_web_entry *insn_entry)
471 {
472   rtx insn = insn_entry->insn;
473   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
474   df_ref def, use;
475   struct df_link *link = 0;
476   rtx_insn *load = 0, *store = 0;
477   bool fed_by_load = 0;
478   bool feeds_store = 0;
479 
480   FOR_EACH_INSN_INFO_USE (use, insn_info)
481     {
482       link = DF_REF_CHAIN (use);
483       load = DF_REF_INSN (link->ref);
484       if (insn_is_load_p (load) && insn_is_swap_p (load))
485 	fed_by_load = 1;
486     }
487 
488   FOR_EACH_INSN_INFO_DEF (def, insn_info)
489     {
490       link = DF_REF_CHAIN (def);
491       store = DF_REF_INSN (link->ref);
492       if (insn_is_store_p (store) && insn_is_swap_p (store))
493 	feeds_store = 1;
494     }
495 
496   return fed_by_load && feeds_store;
497 }
498 
499 /* Return TRUE if insn is a swap fed by a load from the constant pool.  */
500 static bool
const_load_sequence_p(swap_web_entry * insn_entry,rtx insn)501 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
502 {
503   unsigned uid = INSN_UID (insn);
504   if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
505     return false;
506 
507   const_rtx tocrel_base;
508 
509   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
510   df_ref use;
511 
512   /* Iterate over the definitions that are used by this insn.  Since
513      this is known to be a swap insn, expect only one used definnition.  */
514   FOR_EACH_INSN_INFO_USE (use, insn_info)
515     {
516       struct df_link *def_link = DF_REF_CHAIN (use);
517 
518       /* If there is no def or the def is artificial or there are
519 	 multiple defs, punt.  */
520       if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
521 	  || def_link->next)
522 	return false;
523 
524       rtx def_insn = DF_REF_INSN (def_link->ref);
525       unsigned uid2 = INSN_UID (def_insn);
526       /* If this is not a load or is not a swap, return false.  */
527       if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
528 	return false;
529 
530       /* If the source of the rtl def is not a set from memory, return
531 	 false.  */
532       rtx body = PATTERN (def_insn);
533       if (GET_CODE (body) != SET
534 	  || GET_CODE (SET_SRC (body)) != VEC_SELECT
535 	  || !MEM_P (XEXP (SET_SRC (body), 0)))
536 	return false;
537 
538       rtx mem = XEXP (SET_SRC (body), 0);
539       rtx base_reg = XEXP (mem, 0);
540       /* If the base address for the memory expression is not
541 	 represented by a register, punt.  */
542       if (!REG_P (base_reg))
543 	return false;
544 
545       df_ref base_use;
546       insn_info = DF_INSN_INFO_GET (def_insn);
547       FOR_EACH_INSN_INFO_USE (base_use, insn_info)
548 	{
549 	  /* If base_use does not represent base_reg, look for another
550 	     use.  */
551 	  if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
552 	    continue;
553 
554 	  struct df_link *base_def_link = DF_REF_CHAIN (base_use);
555 	  if (!base_def_link || base_def_link->next)
556 	    return false;
557 
558 	  /* Constants held on the stack are not "true" constants
559 	     because their values are not part of the static load
560 	     image.  If this constant's base reference is a stack
561 	     or frame pointer, it is seen as an artificial
562 	     reference.  */
563 	  if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
564 	    return false;
565 
566 	  rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
567 	  rtx tocrel_body = PATTERN (tocrel_insn);
568 	  rtx base, offset;
569 	  if (GET_CODE (tocrel_body) != SET)
570 	    return false;
571 	  /* There is an extra level of indirection for small/large
572 	     code models.  */
573 	  rtx tocrel_expr = SET_SRC (tocrel_body);
574 	  if (MEM_P (tocrel_expr))
575 	    tocrel_expr = XEXP (tocrel_expr, 0);
576 	  if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
577 	    return false;
578 	  split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
579 
580 	  if (!SYMBOL_REF_P (base) || !CONSTANT_POOL_ADDRESS_P (base))
581 	    return false;
582 	  else
583 	    {
584 	      /* FIXME: The conditions under which
585 	          (SYMBOL_REF_P (const_vector)
586 	           && !CONSTANT_POOL_ADDRESS_P (const_vector))
587 	         are not well understood.  This code prevents
588 	         an internal compiler error which will occur in
589 	         replace_swapped_load_constant () if we were to return
590 	         true.  Some day, we should figure out how to properly
591 	         handle this condition in
592 	         replace_swapped_load_constant () and then we can
593 	         remove this special test.  */
594 	      rtx const_vector = get_pool_constant (base);
595 	      if (SYMBOL_REF_P (const_vector)
596 		  && CONSTANT_POOL_ADDRESS_P (const_vector))
597 		const_vector = get_pool_constant (const_vector);
598 	      if (GET_CODE (const_vector) != CONST_VECTOR)
599 		return false;
600 	    }
601 	}
602     }
603   return true;
604 }
605 
606 /* Return TRUE iff OP matches a V2DF reduction pattern.  See the
607    definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md.  */
608 static bool
v2df_reduction_p(rtx op)609 v2df_reduction_p (rtx op)
610 {
611   if (GET_MODE (op) != V2DFmode)
612     return false;
613 
614   enum rtx_code code = GET_CODE (op);
615   if (code != PLUS && code != SMIN && code != SMAX)
616     return false;
617 
618   rtx concat = XEXP (op, 0);
619   if (GET_CODE (concat) != VEC_CONCAT)
620     return false;
621 
622   rtx select0 = XEXP (concat, 0);
623   rtx select1 = XEXP (concat, 1);
624   if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
625     return false;
626 
627   rtx reg0 = XEXP (select0, 0);
628   rtx reg1 = XEXP (select1, 0);
629   if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
630     return false;
631 
632   rtx parallel0 = XEXP (select0, 1);
633   rtx parallel1 = XEXP (select1, 1);
634   if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
635     return false;
636 
637   if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
638       || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
639     return false;
640 
641   return true;
642 }
643 
644 /* Return 1 iff OP is an operand that will not be affected by having
645    vector doublewords swapped in memory.  */
646 static unsigned int
rtx_is_swappable_p(rtx op,unsigned int * special)647 rtx_is_swappable_p (rtx op, unsigned int *special)
648 {
649   enum rtx_code code = GET_CODE (op);
650   int i, j;
651   rtx parallel;
652 
653   switch (code)
654     {
655     case LABEL_REF:
656     case SYMBOL_REF:
657     case CLOBBER:
658     case REG:
659       return 1;
660 
661     case VEC_CONCAT:
662     case ASM_INPUT:
663     case ASM_OPERANDS:
664       return 0;
665 
666     case CONST_VECTOR:
667       {
668 	*special = SH_CONST_VECTOR;
669 	return 1;
670       }
671 
672     case VEC_DUPLICATE:
673       /* Opportunity: If XEXP (op, 0) has the same mode as the result,
674 	 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
675 	 it represents a vector splat for which we can do special
676 	 handling.  */
677       if (CONST_INT_P (XEXP (op, 0)))
678 	return 1;
679       else if (REG_P (XEXP (op, 0))
680 	       && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
681 	/* This catches V2DF and V2DI splat, at a minimum.  */
682 	return 1;
683       else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
684 	       && REG_P (XEXP (XEXP (op, 0), 0))
685 	       && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
686 	/* This catches splat of a truncated value.  */
687 	return 1;
688       else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
689 	/* If the duplicated item is from a select, defer to the select
690 	   processing to see if we can change the lane for the splat.  */
691 	return rtx_is_swappable_p (XEXP (op, 0), special);
692       else
693 	return 0;
694 
695     case VEC_SELECT:
696       /* A vec_extract operation is ok if we change the lane.  */
697       if (REG_P (XEXP (op, 0))
698 	  && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
699 	  && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
700 	  && XVECLEN (parallel, 0) == 1
701 	  && CONST_INT_P (XVECEXP (parallel, 0, 0)))
702 	{
703 	  *special = SH_EXTRACT;
704 	  return 1;
705 	}
706       /* An XXPERMDI is ok if we adjust the lanes.  Note that if the
707 	 XXPERMDI is a swap operation, it will be identified by
708 	 insn_is_swap_p and therefore we won't get here.  */
709       else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
710 	       && (GET_MODE (XEXP (op, 0)) == V4DFmode
711 		   || GET_MODE (XEXP (op, 0)) == V4DImode)
712 	       && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
713 	       && XVECLEN (parallel, 0) == 2
714 	       && CONST_INT_P (XVECEXP (parallel, 0, 0))
715 	       && CONST_INT_P (XVECEXP (parallel, 0, 1)))
716 	{
717 	  *special = SH_XXPERMDI;
718 	  return 1;
719 	}
720       else if (v2df_reduction_p (op))
721 	return 1;
722       else
723 	return 0;
724 
725     case UNSPEC:
726       {
727 	/* Various operations are unsafe for this optimization, at least
728 	   without significant additional work.  Permutes are obviously
729 	   problematic, as both the permute control vector and the ordering
730 	   of the target values are invalidated by doubleword swapping.
731 	   Vector pack and unpack modify the number of vector lanes.
732 	   Merge-high/low will not operate correctly on swapped operands.
733 	   Vector shifts across element boundaries are clearly uncool,
734 	   as are vector select and concatenate operations.  Vector
735 	   sum-across instructions define one operand with a specific
736 	   order-dependent element, so additional fixup code would be
737 	   needed to make those work.  Vector set and non-immediate-form
738 	   vector splat are element-order sensitive.  A few of these
739 	   cases might be workable with special handling if required.
740 	   Adding cost modeling would be appropriate in some cases.  */
741 	int val = XINT (op, 1);
742 	switch (val)
743 	  {
744 	  default:
745 	    break;
746 	  case UNSPEC_VBPERMQ:
747 	  case UNSPEC_VMRGH_DIRECT:
748 	  case UNSPEC_VMRGL_DIRECT:
749 	  case UNSPEC_VPACK_SIGN_SIGN_SAT:
750 	  case UNSPEC_VPACK_SIGN_UNS_SAT:
751 	  case UNSPEC_VPACK_UNS_UNS_MOD:
752 	  case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
753 	  case UNSPEC_VPACK_UNS_UNS_SAT:
754 	  case UNSPEC_VPERM:
755 	  case UNSPEC_VPERM_UNS:
756 	  case UNSPEC_VPERMHI:
757 	  case UNSPEC_VPERMSI:
758 	  case UNSPEC_VPERMXOR:
759 	  case UNSPEC_VPKPX:
760 	  case UNSPEC_VSLDOI:
761 	  case UNSPEC_VSLO:
762 	  case UNSPEC_VSRO:
763 	  case UNSPEC_VSUM2SWS:
764 	  case UNSPEC_VSUM4S:
765 	  case UNSPEC_VSUM4UBS:
766 	  case UNSPEC_VSUMSWS:
767 	  case UNSPEC_VSUMSWS_DIRECT:
768 	  case UNSPEC_VSX_CONCAT:
769 	  case UNSPEC_VSX_CVDPSPN:
770 	  case UNSPEC_VSX_CVSPDP:
771 	  case UNSPEC_VSX_CVSPDPN:
772 	  case UNSPEC_VSX_EXTRACT:
773 	  case UNSPEC_VSX_SET:
774 	  case UNSPEC_VSX_SLDWI:
775 	  case UNSPEC_VSX_VSLO:
776 	  case UNSPEC_VUNPACK_HI_SIGN:
777 	  case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
778 	  case UNSPEC_VUNPACK_LO_SIGN:
779 	  case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
780 	  case UNSPEC_VUPKHPX:
781 	  case UNSPEC_VUPKHS_V4SF:
782 	  case UNSPEC_VUPKHU_V4SF:
783 	  case UNSPEC_VUPKLPX:
784 	  case UNSPEC_VUPKLS_V4SF:
785 	  case UNSPEC_VUPKLU_V4SF:
786 	    return 0;
787 	  case UNSPEC_VSPLT_DIRECT:
788 	  case UNSPEC_VSX_XXSPLTD:
789 	    *special = SH_SPLAT;
790 	    return 1;
791 	  case UNSPEC_REDUC_PLUS:
792 	  case UNSPEC_REDUC:
793 	    return 1;
794 	  case UNSPEC_VPMSUM:
795 	    /* vpmsumd is not swappable, but vpmsum[bhw] are.  */
796 	    if (GET_MODE (op) == V2DImode)
797 	      return 0;
798 	    break;
799 	  }
800       }
801 
802     default:
803       break;
804     }
805 
806   const char *fmt = GET_RTX_FORMAT (code);
807   int ok = 1;
808 
809   for (i = 0; i < GET_RTX_LENGTH (code); ++i)
810     if (fmt[i] == 'e' || fmt[i] == 'u')
811       {
812 	unsigned int special_op = SH_NONE;
813 	ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
814 	if (special_op == SH_NONE)
815 	  continue;
816 	/* Ensure we never have two kinds of special handling
817 	   for the same insn.  */
818 	if (*special != SH_NONE && *special != special_op)
819 	  return 0;
820 	*special = special_op;
821       }
822     else if (fmt[i] == 'E')
823       for (j = 0; j < XVECLEN (op, i); ++j)
824 	{
825 	  unsigned int special_op = SH_NONE;
826 	  ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
827 	  if (special_op == SH_NONE)
828 	    continue;
829 	  /* Ensure we never have two kinds of special handling
830 	     for the same insn.  */
831 	  if (*special != SH_NONE && *special != special_op)
832 	    return 0;
833 	  *special = special_op;
834 	}
835 
836   return ok;
837 }
838 
839 /* Return 1 iff INSN is an operand that will not be affected by
840    having vector doublewords swapped in memory (in which case
841    *SPECIAL is unchanged), or that can be modified to be correct
842    if vector doublewords are swapped in memory (in which case
843    *SPECIAL is changed to a value indicating how).  */
844 static unsigned int
insn_is_swappable_p(swap_web_entry * insn_entry,rtx insn,unsigned int * special)845 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
846 		     unsigned int *special)
847 {
848   /* Calls are always bad.  */
849   if (GET_CODE (insn) == CALL_INSN)
850     return 0;
851 
852   /* Loads and stores seen here are not permuting, but we can still
853      fix them up by converting them to permuting ones.  Exceptions:
854      UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
855      body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
856      for the SET source.  Also we must now make an exception for lvx
857      and stvx when they are not in the UNSPEC_LVX/STVX form (with the
858      explicit "& -16") since this leads to unrecognizable insns.  */
859   rtx body = PATTERN (insn);
860   int i = INSN_UID (insn);
861 
862   if (insn_entry[i].is_load)
863     {
864       if (GET_CODE (body) == SET)
865 	{
866 	  rtx rhs = SET_SRC (body);
867 	  /* Even without a swap, the RHS might be a vec_select for, say,
868 	     a byte-reversing load.  */
869 	  if (!MEM_P (rhs))
870 	    return 0;
871 	  if (GET_CODE (XEXP (rhs, 0)) == AND)
872 	    return 0;
873 
874 	  *special = SH_NOSWAP_LD;
875 	  return 1;
876 	}
877       else
878 	return 0;
879     }
880 
881   if (insn_entry[i].is_store)
882     {
883       if (GET_CODE (body) == SET
884 	  && GET_CODE (SET_SRC (body)) != UNSPEC
885 	  && GET_CODE (SET_SRC (body)) != VEC_SELECT)
886 	{
887 	  rtx lhs = SET_DEST (body);
888 	  /* Even without a swap, the RHS might be a vec_select for, say,
889 	     a byte-reversing store.  */
890 	  if (!MEM_P (lhs))
891 	    return 0;
892 	  if (GET_CODE (XEXP (lhs, 0)) == AND)
893 	    return 0;
894 
895 	  *special = SH_NOSWAP_ST;
896 	  return 1;
897 	}
898       else
899 	return 0;
900     }
901 
902   /* A convert to single precision can be left as is provided that
903      all of its uses are in xxspltw instructions that splat BE element
904      zero.  */
905   if (GET_CODE (body) == SET
906       && GET_CODE (SET_SRC (body)) == UNSPEC
907       && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
908     {
909       df_ref def;
910       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
911 
912       FOR_EACH_INSN_INFO_DEF (def, insn_info)
913 	{
914 	  struct df_link *link = DF_REF_CHAIN (def);
915 	  if (!link)
916 	    return 0;
917 
918 	  for (; link; link = link->next) {
919 	    rtx use_insn = DF_REF_INSN (link->ref);
920 	    rtx use_body = PATTERN (use_insn);
921 	    if (GET_CODE (use_body) != SET
922 		|| GET_CODE (SET_SRC (use_body)) != UNSPEC
923 		|| XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
924 		|| XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
925 	      return 0;
926 	  }
927 	}
928 
929       return 1;
930     }
931 
932   /* A concatenation of two doublewords is ok if we reverse the
933      order of the inputs.  */
934   if (GET_CODE (body) == SET
935       && GET_CODE (SET_SRC (body)) == VEC_CONCAT
936       && (GET_MODE (SET_SRC (body)) == V2DFmode
937 	  || GET_MODE (SET_SRC (body)) == V2DImode))
938     {
939       *special = SH_CONCAT;
940       return 1;
941     }
942 
943   /* V2DF reductions are always swappable.  */
944   if (GET_CODE (body) == PARALLEL)
945     {
946       rtx expr = XVECEXP (body, 0, 0);
947       if (GET_CODE (expr) == SET
948 	  && v2df_reduction_p (SET_SRC (expr)))
949 	return 1;
950     }
951 
952   /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
953      constant pool.  */
954   if (GET_CODE (body) == SET
955       && GET_CODE (SET_SRC (body)) == UNSPEC
956       && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
957       && XVECLEN (SET_SRC (body), 0) == 3
958       && REG_P (XVECEXP (SET_SRC (body), 0, 2)))
959     {
960       rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
961       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
962       df_ref use;
963       FOR_EACH_INSN_INFO_USE (use, insn_info)
964 	if (rtx_equal_p (DF_REF_REG (use), mask_reg))
965 	  {
966 	    struct df_link *def_link = DF_REF_CHAIN (use);
967 	    /* Punt if multiple definitions for this reg.  */
968 	    if (def_link && !def_link->next &&
969 		const_load_sequence_p (insn_entry,
970 				       DF_REF_INSN (def_link->ref)))
971 	      {
972 		*special = SH_VPERM;
973 		return 1;
974 	      }
975 	  }
976     }
977 
978   /* Otherwise check the operands for vector lane violations.  */
979   return rtx_is_swappable_p (body, special);
980 }
981 
982 enum chain_purpose { FOR_LOADS, FOR_STORES };
983 
984 /* Return true if the UD or DU chain headed by LINK is non-empty,
985    and every entry on the chain references an insn that is a
986    register swap.  Furthermore, if PURPOSE is FOR_LOADS, each such
987    register swap must have only permuting loads as reaching defs.
988    If PURPOSE is FOR_STORES, each such register swap must have only
989    register swaps or permuting stores as reached uses.  */
990 static bool
chain_contains_only_swaps(swap_web_entry * insn_entry,struct df_link * link,enum chain_purpose purpose)991 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
992 			   enum chain_purpose purpose)
993 {
994   if (!link)
995     return false;
996 
997   for (; link; link = link->next)
998     {
999       if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
1000 	continue;
1001 
1002       if (DF_REF_IS_ARTIFICIAL (link->ref))
1003 	return false;
1004 
1005       rtx reached_insn = DF_REF_INSN (link->ref);
1006       unsigned uid = INSN_UID (reached_insn);
1007       struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
1008 
1009       if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
1010 	  || insn_entry[uid].is_store)
1011 	return false;
1012 
1013       if (purpose == FOR_LOADS)
1014 	{
1015 	  df_ref use;
1016 	  FOR_EACH_INSN_INFO_USE (use, insn_info)
1017 	    {
1018 	      struct df_link *swap_link = DF_REF_CHAIN (use);
1019 
1020 	      while (swap_link)
1021 		{
1022 		  if (DF_REF_IS_ARTIFICIAL (link->ref))
1023 		    return false;
1024 
1025 		  rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
1026 		  unsigned uid2 = INSN_UID (swap_def_insn);
1027 
1028 		  /* Only permuting loads are allowed.  */
1029 		  if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
1030 		    return false;
1031 
1032 		  swap_link = swap_link->next;
1033 		}
1034 	    }
1035 	}
1036       else if (purpose == FOR_STORES)
1037 	{
1038 	  df_ref def;
1039 	  FOR_EACH_INSN_INFO_DEF (def, insn_info)
1040 	    {
1041 	      struct df_link *swap_link = DF_REF_CHAIN (def);
1042 
1043 	      while (swap_link)
1044 		{
1045 		  if (DF_REF_IS_ARTIFICIAL (link->ref))
1046 		    return false;
1047 
1048 		  rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
1049 		  unsigned uid2 = INSN_UID (swap_use_insn);
1050 
1051 		  /* Permuting stores or register swaps are allowed.  */
1052 		  if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
1053 		    return false;
1054 
1055 		  swap_link = swap_link->next;
1056 		}
1057 	    }
1058 	}
1059     }
1060 
1061   return true;
1062 }
1063 
1064 /* Mark the xxswapdi instructions associated with permuting loads and
1065    stores for removal.  Note that we only flag them for deletion here,
1066    as there is a possibility of a swap being reached from multiple
1067    loads, etc.  */
1068 static void
mark_swaps_for_removal(swap_web_entry * insn_entry,unsigned int i)1069 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
1070 {
1071   rtx insn = insn_entry[i].insn;
1072   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1073 
1074   if (insn_entry[i].is_load)
1075     {
1076       df_ref def;
1077       FOR_EACH_INSN_INFO_DEF (def, insn_info)
1078 	{
1079 	  struct df_link *link = DF_REF_CHAIN (def);
1080 
1081 	  /* We know by now that these are swaps, so we can delete
1082 	     them confidently.  */
1083 	  while (link)
1084 	    {
1085 	      rtx use_insn = DF_REF_INSN (link->ref);
1086 	      insn_entry[INSN_UID (use_insn)].will_delete = 1;
1087 	      link = link->next;
1088 	    }
1089 	}
1090     }
1091   else if (insn_entry[i].is_store)
1092     {
1093       df_ref use;
1094       FOR_EACH_INSN_INFO_USE (use, insn_info)
1095 	{
1096 	  /* Ignore uses for addressability.  */
1097 	  machine_mode mode = GET_MODE (DF_REF_REG (use));
1098 	  if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1099 	    continue;
1100 
1101 	  struct df_link *link = DF_REF_CHAIN (use);
1102 
1103 	  /* We know by now that these are swaps, so we can delete
1104 	     them confidently.  */
1105 	  while (link)
1106 	    {
1107 	      rtx def_insn = DF_REF_INSN (link->ref);
1108 	      insn_entry[INSN_UID (def_insn)].will_delete = 1;
1109 	      link = link->next;
1110 	    }
1111 	}
1112     }
1113 }
1114 
1115 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1116    Swap the first half of the vector with the second in the first
1117    case.  Recurse to find it in the second.  */
1118 static void
swap_const_vector_halves(rtx * op_ptr)1119 swap_const_vector_halves (rtx *op_ptr)
1120 {
1121   int i;
1122   rtx op = *op_ptr;
1123   enum rtx_code code = GET_CODE (op);
1124   if (GET_CODE (op) == CONST_VECTOR)
1125     {
1126       int units = GET_MODE_NUNITS (GET_MODE (op));
1127       rtx_vector_builder builder (GET_MODE (op), units, 1);
1128       for (i = 0; i < units / 2; ++i)
1129 	builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
1130       for (i = 0; i < units / 2; ++i)
1131 	builder.quick_push (CONST_VECTOR_ELT (op, i));
1132       *op_ptr = builder.build ();
1133     }
1134   else
1135     {
1136       int j;
1137       const char *fmt = GET_RTX_FORMAT (code);
1138       for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1139 	if (fmt[i] == 'e' || fmt[i] == 'u')
1140 	  swap_const_vector_halves (&XEXP (op, i));
1141 	else if (fmt[i] == 'E')
1142 	  for (j = 0; j < XVECLEN (op, i); ++j)
1143 	    swap_const_vector_halves (&XVECEXP (op, i, j));
1144     }
1145 }
1146 
1147 /* Find all subregs of a vector expression that perform a narrowing,
1148    and adjust the subreg index to account for doubleword swapping.  */
1149 static void
adjust_subreg_index(rtx op)1150 adjust_subreg_index (rtx op)
1151 {
1152   enum rtx_code code = GET_CODE (op);
1153   if (code == SUBREG
1154       && (GET_MODE_SIZE (GET_MODE (op))
1155 	  < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
1156     {
1157       unsigned int index = SUBREG_BYTE (op);
1158       if (index < 8)
1159 	index += 8;
1160       else
1161 	index -= 8;
1162       SUBREG_BYTE (op) = index;
1163     }
1164 
1165   const char *fmt = GET_RTX_FORMAT (code);
1166   int i,j;
1167   for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1168     if (fmt[i] == 'e' || fmt[i] == 'u')
1169       adjust_subreg_index (XEXP (op, i));
1170     else if (fmt[i] == 'E')
1171       for (j = 0; j < XVECLEN (op, i); ++j)
1172 	adjust_subreg_index (XVECEXP (op, i, j));
1173 }
1174 
1175 /* Convert the non-permuting load INSN to a permuting one.  */
1176 static void
permute_load(rtx_insn * insn)1177 permute_load (rtx_insn *insn)
1178 {
1179   rtx body = PATTERN (insn);
1180   rtx mem_op = SET_SRC (body);
1181   rtx tgt_reg = SET_DEST (body);
1182   machine_mode mode = GET_MODE (tgt_reg);
1183   int n_elts = GET_MODE_NUNITS (mode);
1184   int half_elts = n_elts / 2;
1185   rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1186   int i, j;
1187   for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1188     XVECEXP (par, 0, i) = GEN_INT (j);
1189   for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1190     XVECEXP (par, 0, i) = GEN_INT (j);
1191   rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
1192   SET_SRC (body) = sel;
1193   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1194   df_insn_rescan (insn);
1195 
1196   if (dump_file)
1197     fprintf (dump_file, "Replacing load %d with permuted load\n",
1198 	     INSN_UID (insn));
1199 }
1200 
1201 /* Convert the non-permuting store INSN to a permuting one.  */
1202 static void
permute_store(rtx_insn * insn)1203 permute_store (rtx_insn *insn)
1204 {
1205   rtx body = PATTERN (insn);
1206   rtx src_reg = SET_SRC (body);
1207   machine_mode mode = GET_MODE (src_reg);
1208   int n_elts = GET_MODE_NUNITS (mode);
1209   int half_elts = n_elts / 2;
1210   rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1211   int i, j;
1212   for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1213     XVECEXP (par, 0, i) = GEN_INT (j);
1214   for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1215     XVECEXP (par, 0, i) = GEN_INT (j);
1216   rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
1217   SET_SRC (body) = sel;
1218   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1219   df_insn_rescan (insn);
1220 
1221   if (dump_file)
1222     fprintf (dump_file, "Replacing store %d with permuted store\n",
1223 	     INSN_UID (insn));
1224 }
1225 
1226 /* Given OP that contains a vector extract operation, adjust the index
1227    of the extracted lane to account for the doubleword swap.  */
1228 static void
adjust_extract(rtx_insn * insn)1229 adjust_extract (rtx_insn *insn)
1230 {
1231   rtx pattern = PATTERN (insn);
1232   if (GET_CODE (pattern) == PARALLEL)
1233     pattern = XVECEXP (pattern, 0, 0);
1234   rtx src = SET_SRC (pattern);
1235   /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1236      account for that.  */
1237   rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
1238   rtx par = XEXP (sel, 1);
1239   int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
1240   int lane = INTVAL (XVECEXP (par, 0, 0));
1241   lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1242   XVECEXP (par, 0, 0) = GEN_INT (lane);
1243   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1244   df_insn_rescan (insn);
1245 
1246   if (dump_file)
1247     fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
1248 }
1249 
1250 /* Given OP that contains a vector direct-splat operation, adjust the index
1251    of the source lane to account for the doubleword swap.  */
1252 static void
adjust_splat(rtx_insn * insn)1253 adjust_splat (rtx_insn *insn)
1254 {
1255   rtx body = PATTERN (insn);
1256   rtx unspec = XEXP (body, 1);
1257   int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
1258   int lane = INTVAL (XVECEXP (unspec, 0, 1));
1259   lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1260   XVECEXP (unspec, 0, 1) = GEN_INT (lane);
1261   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1262   df_insn_rescan (insn);
1263 
1264   if (dump_file)
1265     fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
1266 }
1267 
1268 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1269    swap), reverse the order of the source operands and adjust the indices
1270    of the source lanes to account for doubleword reversal.  */
1271 static void
adjust_xxpermdi(rtx_insn * insn)1272 adjust_xxpermdi (rtx_insn *insn)
1273 {
1274   rtx set = PATTERN (insn);
1275   rtx select = XEXP (set, 1);
1276   rtx concat = XEXP (select, 0);
1277   rtx src0 = XEXP (concat, 0);
1278   XEXP (concat, 0) = XEXP (concat, 1);
1279   XEXP (concat, 1) = src0;
1280   rtx parallel = XEXP (select, 1);
1281   int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
1282   int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
1283   int new_lane0 = 3 - lane1;
1284   int new_lane1 = 3 - lane0;
1285   XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
1286   XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
1287   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1288   df_insn_rescan (insn);
1289 
1290   if (dump_file)
1291     fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
1292 }
1293 
1294 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1295    reverse the order of those inputs.  */
1296 static void
adjust_concat(rtx_insn * insn)1297 adjust_concat (rtx_insn *insn)
1298 {
1299   rtx set = PATTERN (insn);
1300   rtx concat = XEXP (set, 1);
1301   rtx src0 = XEXP (concat, 0);
1302   XEXP (concat, 0) = XEXP (concat, 1);
1303   XEXP (concat, 1) = src0;
1304   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1305   df_insn_rescan (insn);
1306 
1307   if (dump_file)
1308     fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
1309 }
1310 
1311 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1312    constant pool to reflect swapped doublewords.  */
1313 static void
adjust_vperm(rtx_insn * insn)1314 adjust_vperm (rtx_insn *insn)
1315 {
1316   /* We previously determined that the UNSPEC_VPERM was fed by a
1317      swap of a swapping load of a TOC-relative constant pool symbol.
1318      Find the MEM in the swapping load and replace it with a MEM for
1319      the adjusted mask constant.  */
1320   rtx set = PATTERN (insn);
1321   rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
1322 
1323   /* Find the swap.  */
1324   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1325   df_ref use;
1326   rtx_insn *swap_insn = 0;
1327   FOR_EACH_INSN_INFO_USE (use, insn_info)
1328     if (rtx_equal_p (DF_REF_REG (use), mask_reg))
1329       {
1330 	struct df_link *def_link = DF_REF_CHAIN (use);
1331 	gcc_assert (def_link && !def_link->next);
1332 	swap_insn = DF_REF_INSN (def_link->ref);
1333 	break;
1334       }
1335   gcc_assert (swap_insn);
1336 
1337   /* Find the load.  */
1338   insn_info = DF_INSN_INFO_GET (swap_insn);
1339   rtx_insn *load_insn = 0;
1340   FOR_EACH_INSN_INFO_USE (use, insn_info)
1341     {
1342       struct df_link *def_link = DF_REF_CHAIN (use);
1343       gcc_assert (def_link && !def_link->next);
1344       load_insn = DF_REF_INSN (def_link->ref);
1345       break;
1346     }
1347   gcc_assert (load_insn);
1348 
1349   /* Find the TOC-relative symbol access.  */
1350   insn_info = DF_INSN_INFO_GET (load_insn);
1351   rtx_insn *tocrel_insn = 0;
1352   FOR_EACH_INSN_INFO_USE (use, insn_info)
1353     {
1354       struct df_link *def_link = DF_REF_CHAIN (use);
1355       gcc_assert (def_link && !def_link->next);
1356       tocrel_insn = DF_REF_INSN (def_link->ref);
1357       break;
1358     }
1359   gcc_assert (tocrel_insn);
1360 
1361   /* Find the embedded CONST_VECTOR.  We have to call toc_relative_expr_p
1362      to set tocrel_base; otherwise it would be unnecessary as we've
1363      already established it will return true.  */
1364   rtx base, offset;
1365   const_rtx tocrel_base;
1366   rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1367   /* There is an extra level of indirection for small/large code models.  */
1368   if (MEM_P (tocrel_expr))
1369     tocrel_expr = XEXP (tocrel_expr, 0);
1370   if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1371     gcc_unreachable ();
1372   split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1373   rtx const_vector = get_pool_constant (base);
1374   /* With the extra indirection, get_pool_constant will produce the
1375      real constant from the reg_equal expression, so get the real
1376      constant.  */
1377   if (SYMBOL_REF_P (const_vector))
1378     const_vector = get_pool_constant (const_vector);
1379   gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1380 
1381   /* Create an adjusted mask from the initial mask.  */
1382   unsigned int new_mask[16], i, val;
1383   for (i = 0; i < 16; ++i) {
1384     val = INTVAL (XVECEXP (const_vector, 0, i));
1385     if (val < 16)
1386       new_mask[i] = (val + 8) % 16;
1387     else
1388       new_mask[i] = ((val + 8) % 16) + 16;
1389   }
1390 
1391   /* Create a new CONST_VECTOR and a MEM that references it.  */
1392   rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
1393   for (i = 0; i < 16; ++i)
1394     XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
1395   rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
1396   rtx new_mem = force_const_mem (V16QImode, new_const_vector);
1397   /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1398      can't recognize.  Force the SYMBOL_REF into a register.  */
1399   if (!REG_P (XEXP (new_mem, 0))) {
1400     rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1401     XEXP (new_mem, 0) = base_reg;
1402     /* Move the newly created insn ahead of the load insn.  */
1403     rtx_insn *force_insn = get_last_insn ();
1404     remove_insn (force_insn);
1405     rtx_insn *before_load_insn = PREV_INSN (load_insn);
1406     add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1407     df_insn_rescan (before_load_insn);
1408     df_insn_rescan (force_insn);
1409   }
1410 
1411   /* Replace the MEM in the load instruction and rescan it.  */
1412   XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1413   INSN_CODE (load_insn) = -1; /* Force re-recognition.  */
1414   df_insn_rescan (load_insn);
1415 
1416   if (dump_file)
1417     fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
1418 }
1419 
1420 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1421    with special handling.  Take care of that here.  */
1422 static void
handle_special_swappables(swap_web_entry * insn_entry,unsigned i)1423 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
1424 {
1425   rtx_insn *insn = insn_entry[i].insn;
1426   rtx body = PATTERN (insn);
1427 
1428   switch (insn_entry[i].special_handling)
1429     {
1430     default:
1431       gcc_unreachable ();
1432     case SH_CONST_VECTOR:
1433       {
1434 	/* A CONST_VECTOR will only show up somewhere in the RHS of a SET.  */
1435 	gcc_assert (GET_CODE (body) == SET);
1436 	swap_const_vector_halves (&SET_SRC (body));
1437 	if (dump_file)
1438 	  fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
1439 	break;
1440       }
1441     case SH_SUBREG:
1442       /* A subreg of the same size is already safe.  For subregs that
1443 	 select a smaller portion of a reg, adjust the index for
1444 	 swapped doublewords.  */
1445       adjust_subreg_index (body);
1446       if (dump_file)
1447 	fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
1448       break;
1449     case SH_NOSWAP_LD:
1450       /* Convert a non-permuting load to a permuting one.  */
1451       permute_load (insn);
1452       break;
1453     case SH_NOSWAP_ST:
1454       /* Convert a non-permuting store to a permuting one.  */
1455       permute_store (insn);
1456       break;
1457     case SH_EXTRACT:
1458       /* Change the lane on an extract operation.  */
1459       adjust_extract (insn);
1460       break;
1461     case SH_SPLAT:
1462       /* Change the lane on a direct-splat operation.  */
1463       adjust_splat (insn);
1464       break;
1465     case SH_XXPERMDI:
1466       /* Change the lanes on an XXPERMDI operation.  */
1467       adjust_xxpermdi (insn);
1468       break;
1469     case SH_CONCAT:
1470       /* Reverse the order of a concatenation operation.  */
1471       adjust_concat (insn);
1472       break;
1473     case SH_VPERM:
1474       /* Change the mask loaded from the constant pool for a VPERM.  */
1475       adjust_vperm (insn);
1476       break;
1477     }
1478 }
1479 
1480 /* Find the insn from the Ith table entry, which is known to be a
1481    register swap Y = SWAP(X).  Replace it with a copy Y = X.  */
1482 static void
replace_swap_with_copy(swap_web_entry * insn_entry,unsigned i)1483 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
1484 {
1485   rtx_insn *insn = insn_entry[i].insn;
1486   rtx body = PATTERN (insn);
1487   rtx src_reg = XEXP (SET_SRC (body), 0);
1488   rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
1489   rtx_insn *new_insn = emit_insn_before (copy, insn);
1490   set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
1491   df_insn_rescan (new_insn);
1492 
1493   if (dump_file)
1494     {
1495       unsigned int new_uid = INSN_UID (new_insn);
1496       fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
1497     }
1498 
1499   df_insn_delete (insn);
1500   remove_insn (insn);
1501   insn->set_deleted ();
1502 }
1503 
1504 /* INSN is known to contain a SUBREG, which we can normally handle,
1505    but if the SUBREG itself contains a MULT then we need to leave it alone
1506    to avoid turning a mult_hipart into a mult_lopart, for example.  */
1507 static bool
has_part_mult(rtx_insn * insn)1508 has_part_mult (rtx_insn *insn)
1509 {
1510   rtx body = PATTERN (insn);
1511   if (GET_CODE (body) != SET)
1512     return false;
1513   rtx src = SET_SRC (body);
1514   if (GET_CODE (src) != SUBREG)
1515     return false;
1516   rtx inner = XEXP (src, 0);
1517   return (GET_CODE (inner) == MULT);
1518 }
1519 
1520 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1521    ORIGINAL_MEM_EXP.  */
1522 static void
mimic_memory_attributes_and_flags(rtx new_mem_exp,const_rtx original_mem_exp)1523 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
1524 {
1525   RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
1526   RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
1527   RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
1528   RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
1529   RTX_FLAG (new_mem_exp, frame_related) =
1530     RTX_FLAG (original_mem_exp, frame_related);
1531 
1532   /* The following fields may not be used with MEM subexpressions */
1533   RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
1534   RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
1535 
1536   struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
1537 
1538   alias_set_type set = original_attrs.alias;
1539   set_mem_alias_set (new_mem_exp, set);
1540 
1541   addr_space_t addrspace = original_attrs.addrspace;
1542   set_mem_addr_space (new_mem_exp, addrspace);
1543 
1544   unsigned int align = original_attrs.align;
1545   set_mem_align (new_mem_exp, align);
1546 
1547   tree expr = original_attrs.expr;
1548   set_mem_expr (new_mem_exp, expr);
1549 
1550   if (original_attrs.offset_known_p)
1551     {
1552       HOST_WIDE_INT offset = original_attrs.offset;
1553       set_mem_offset (new_mem_exp, offset);
1554     }
1555   else
1556     clear_mem_offset (new_mem_exp);
1557 
1558   if (original_attrs.size_known_p)
1559     {
1560       HOST_WIDE_INT size = original_attrs.size;
1561       set_mem_size (new_mem_exp, size);
1562     }
1563   else
1564     clear_mem_size (new_mem_exp);
1565 }
1566 
1567 /* Generate an rtx expression to represent use of the stvx insn to store
1568    the value represented by register SRC_EXP into the memory at address
1569    DEST_EXP, with vector mode MODE.  */
1570 rtx
rs6000_gen_stvx(enum machine_mode mode,rtx dest_exp,rtx src_exp)1571 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1572 {
1573   rtx stvx;
1574 
1575   if (mode == V16QImode)
1576     stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
1577   else if (mode == V8HImode)
1578     stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
1579 #ifdef HAVE_V8HFmode
1580   else if (mode == V8HFmode)
1581     stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
1582 #endif
1583   else if (mode == V4SImode)
1584     stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
1585   else if (mode == V4SFmode)
1586     stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp);
1587   else if (mode == V2DImode)
1588     stvx = gen_altivec_stvx_v2di (src_exp, dest_exp);
1589   else if (mode == V2DFmode)
1590     stvx = gen_altivec_stvx_v2df (src_exp, dest_exp);
1591   else if (mode == V1TImode)
1592     stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp);
1593   else
1594     /* KFmode, TFmode, other modes not expected in this context.  */
1595     gcc_unreachable ();
1596 
1597   rtx new_mem_exp = SET_DEST (PATTERN (stvx));
1598   mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
1599   return stvx;
1600 }
1601 
1602 /* Given that STORE_INSN represents an aligned store-with-swap of a
1603    swapped value, replace the store with an aligned store (without
1604    swap) and replace the swap with a copy insn.  */
1605 static void
replace_swapped_aligned_store(swap_web_entry * insn_entry,rtx_insn * store_insn)1606 replace_swapped_aligned_store (swap_web_entry *insn_entry,
1607 			       rtx_insn *store_insn)
1608 {
1609   unsigned uid = INSN_UID (store_insn);
1610   gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
1611 
1612   rtx body = PATTERN (store_insn);
1613   rtx dest_address = XEXP (SET_DEST (body), 0);
1614   rtx swap_reg = XEXP (SET_SRC (body), 0);
1615   gcc_assert (REG_P (dest_address)
1616 	      || rs6000_sum_of_two_registers_p (dest_address));
1617 
1618   /* Find the swap instruction that provides the value to be stored by
1619    * this store-with-swap instruction. */
1620   struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
1621   df_ref use;
1622   rtx_insn *swap_insn = NULL;
1623   unsigned uid2 = 0;
1624   FOR_EACH_INSN_INFO_USE (use, insn_info)
1625     {
1626       struct df_link *def_link = DF_REF_CHAIN (use);
1627 
1628       /* if this is not the definition of the candidate swap register,
1629 	 then skip it.  I am only interested in the swap insnd.  */
1630       if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
1631 	continue;
1632 
1633       /* If there is no def or the def is artifical or there are
1634 	 multiple defs, we should not be here.  */
1635       gcc_assert (def_link && def_link->ref && !def_link->next
1636 		  && !DF_REF_IS_ARTIFICIAL (def_link->ref));
1637 
1638       swap_insn = DF_REF_INSN (def_link->ref);
1639       uid2 = INSN_UID (swap_insn);
1640 
1641       /* If this source value is not a simple swap, we should not be here.  */
1642       gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
1643 		  && !insn_entry[uid2].is_store);
1644 
1645       /* We've processed the use we care about, so break out of
1646 	 this loop.  */
1647       break;
1648     }
1649 
1650   /* At this point, swap_insn and uid2 represent the swap instruction
1651      that feeds the store.  */
1652   gcc_assert (swap_insn);
1653   rtx set = single_set (store_insn);
1654   gcc_assert (set);
1655   rtx dest_exp = SET_DEST (set);
1656   rtx src_exp = XEXP (SET_SRC (body), 0);
1657   enum machine_mode mode = GET_MODE (dest_exp);
1658   gcc_assert (MEM_P (dest_exp));
1659   gcc_assert (MEM_ALIGN (dest_exp) >= 128);
1660 
1661   /* Replace the copy with a new insn.  */
1662   rtx stvx;
1663   stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
1664 
1665   rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
1666   rtx new_body = PATTERN (new_insn);
1667 
1668   gcc_assert ((GET_CODE (new_body) == SET)
1669 	      && MEM_P (SET_DEST (new_body)));
1670 
1671   basic_block bb = BLOCK_FOR_INSN (store_insn);
1672   set_block_for_insn (new_insn, bb);
1673   /* Handle REG_EH_REGION note.  */
1674   if (cfun->can_throw_non_call_exceptions && BB_END (bb) == store_insn)
1675     {
1676       rtx note = find_reg_note (store_insn, REG_EH_REGION, NULL_RTX);
1677       if (note)
1678 	add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0));
1679     }
1680   df_insn_rescan (new_insn);
1681 
1682   df_insn_delete (store_insn);
1683   remove_insn (store_insn);
1684   store_insn->set_deleted ();
1685 
1686   /* Replace the swap with a copy.  */
1687   uid2 = INSN_UID (swap_insn);
1688   mark_swaps_for_removal (insn_entry, uid2);
1689   replace_swap_with_copy (insn_entry, uid2);
1690 }
1691 
1692 /* Generate an rtx expression to represent use of the lvx insn to load
1693    from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1694 rtx
rs6000_gen_lvx(enum machine_mode mode,rtx dest_exp,rtx src_exp)1695 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1696 {
1697   rtx lvx;
1698 
1699   if (mode == V16QImode)
1700     lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
1701   else if (mode == V8HImode)
1702     lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
1703 #ifdef HAVE_V8HFmode
1704   else if (mode == V8HFmode)
1705     lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
1706 #endif
1707   else if (mode == V4SImode)
1708     lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
1709   else if (mode == V4SFmode)
1710     lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp);
1711   else if (mode == V2DImode)
1712     lvx = gen_altivec_lvx_v2di (dest_exp, src_exp);
1713   else if (mode == V2DFmode)
1714     lvx = gen_altivec_lvx_v2df (dest_exp, src_exp);
1715   else if (mode == V1TImode)
1716     lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp);
1717   else
1718     /* KFmode, TFmode, other modes not expected in this context.  */
1719     gcc_unreachable ();
1720 
1721   rtx new_mem_exp = SET_SRC (PATTERN (lvx));
1722   mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
1723 
1724   return lvx;
1725 }
1726 
1727 /* Given that SWAP_INSN represents a swap of an aligned
1728    load-with-swap, replace the load with an aligned load (without
1729    swap) and replace the swap with a copy insn.  */
1730 static void
replace_swapped_aligned_load(swap_web_entry * insn_entry,rtx swap_insn)1731 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
1732 {
1733   /* Find the load.  */
1734   unsigned uid = INSN_UID (swap_insn);
1735   /* Only call this if quad_aligned_load_p (swap_insn).  */
1736   gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
1737   struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1738 
1739   /* Since insn is known to represent a swap instruction, we know it
1740      "uses" only one input variable.  */
1741   df_ref use = DF_INSN_INFO_USES (insn_info);
1742 
1743   /* Figure out where this input variable is defined.  */
1744   struct df_link *def_link = DF_REF_CHAIN (use);
1745   gcc_assert (def_link && !def_link->next);
1746   gcc_assert (def_link && def_link->ref &&
1747 	      !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
1748 
1749   rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
1750   unsigned uid2 = INSN_UID (def_insn);
1751 
1752   /* We're expecting a load-with-swap insn.  */
1753   gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
1754 
1755   /* We expect this to be a set to memory, with source representing a
1756      swap (indicated by code VEC_SELECT).  */
1757   rtx body = PATTERN (def_insn);
1758   gcc_assert ((GET_CODE (body) == SET)
1759 	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT)
1760 	      && MEM_P (XEXP (SET_SRC (body), 0)));
1761 
1762   rtx src_exp = XEXP (SET_SRC (body), 0);
1763   enum machine_mode mode = GET_MODE (src_exp);
1764   rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
1765 
1766   rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
1767   rtx new_body = PATTERN (new_insn);
1768 
1769   gcc_assert ((GET_CODE (new_body) == SET)
1770 	      && MEM_P (SET_SRC (new_body)));
1771 
1772   basic_block bb = BLOCK_FOR_INSN (def_insn);
1773   set_block_for_insn (new_insn, bb);
1774   /* Handle REG_EH_REGION note.  */
1775   if (cfun->can_throw_non_call_exceptions && BB_END (bb) == def_insn)
1776     {
1777       rtx note = find_reg_note (def_insn, REG_EH_REGION, NULL_RTX);
1778       if (note)
1779 	add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0));
1780     }
1781   df_insn_rescan (new_insn);
1782 
1783   df_insn_delete (def_insn);
1784   remove_insn (def_insn);
1785   def_insn->set_deleted ();
1786 
1787   /* Replace the swap with a copy.  */
1788   mark_swaps_for_removal (insn_entry, uid);
1789   replace_swap_with_copy (insn_entry, uid);
1790 }
1791 
1792 /* Given that SWAP_INSN represents a swap of a load of a constant
1793    vector value, replace with a single instruction that loads a
1794    swapped variant of the original constant.
1795 
1796    The "natural" representation of a byte array in memory is the same
1797    for big endian and little endian.
1798 
1799    unsigned char byte_array[] =
1800      { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1801 
1802    However, when loaded into a vector register, the representation
1803    depends on endian conventions.
1804 
1805    In big-endian mode, the register holds:
1806 
1807      MSB                                            LSB
1808      [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1809 
1810    In little-endian mode, the register holds:
1811 
1812      MSB                                            LSB
1813      [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1814 
1815    Word arrays require different handling.  Consider the word array:
1816 
1817    unsigned int word_array[] =
1818      { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1819 
1820    The in-memory representation depends on endian configuration.  The
1821    equivalent array, declared as a byte array, in memory would be:
1822 
1823    unsigned char big_endian_word_array_data[] =
1824      { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1825 
1826    unsigned char little_endian_word_array_data[] =
1827      { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1828 
1829    In big-endian mode, the register holds:
1830 
1831      MSB                                            LSB
1832      [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1833 
1834    In little-endian mode, the register holds:
1835 
1836      MSB                                            LSB
1837      [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1838 
1839 
1840   Similar transformations apply to the vector of half-word and vector
1841   of double-word representations.
1842 
1843   For now, don't handle vectors of quad-precision values.  Just return.
1844   A better solution is to fix the code generator to emit lvx/stvx for
1845   those.  */
1846 static void
replace_swapped_load_constant(swap_web_entry * insn_entry,rtx swap_insn)1847 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
1848 {
1849   /* Find the load.  */
1850   struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1851   rtx_insn *load_insn;
1852   df_ref use  = DF_INSN_INFO_USES (insn_info);
1853   struct df_link *def_link = DF_REF_CHAIN (use);
1854   gcc_assert (def_link && !def_link->next);
1855 
1856   load_insn = DF_REF_INSN (def_link->ref);
1857   gcc_assert (load_insn);
1858 
1859   /* Find the TOC-relative symbol access.  */
1860   insn_info = DF_INSN_INFO_GET (load_insn);
1861   use = DF_INSN_INFO_USES (insn_info);
1862 
1863   def_link = DF_REF_CHAIN (use);
1864   gcc_assert (def_link && !def_link->next);
1865 
1866   rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref);
1867   gcc_assert (tocrel_insn);
1868 
1869   /* Find the embedded CONST_VECTOR.  We have to call toc_relative_expr_p
1870      to set tocrel_base; otherwise it would be unnecessary as we've
1871      already established it will return true.  */
1872   rtx base, offset;
1873   rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1874   const_rtx tocrel_base;
1875 
1876   /* There is an extra level of indirection for small/large code models.  */
1877   if (MEM_P (tocrel_expr))
1878     tocrel_expr = XEXP (tocrel_expr, 0);
1879 
1880   if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1881     gcc_unreachable ();
1882 
1883   split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1884   rtx const_vector = get_pool_constant (base);
1885 
1886   /* With the extra indirection, get_pool_constant will produce the
1887      real constant from the reg_equal expression, so get the real
1888      constant.  */
1889   if (SYMBOL_REF_P (const_vector))
1890     const_vector = get_pool_constant (const_vector);
1891   gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1892 
1893   rtx new_mem;
1894   enum machine_mode mode = GET_MODE (const_vector);
1895 
1896   /* Create an adjusted constant from the original constant.  */
1897   if (mode == V1TImode)
1898     /* Leave this code as is.  */
1899     return;
1900   else if (mode == V16QImode)
1901     {
1902       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
1903       int i;
1904 
1905       for (i = 0; i < 16; i++)
1906 	XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
1907       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1908       new_mem = force_const_mem (mode, new_const_vector);
1909     }
1910   else if ((mode == V8HImode)
1911 #ifdef HAVE_V8HFmode
1912 	   || (mode == V8HFmode)
1913 #endif
1914 	   )
1915     {
1916       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
1917       int i;
1918 
1919       for (i = 0; i < 8; i++)
1920 	XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
1921       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1922       new_mem = force_const_mem (mode, new_const_vector);
1923     }
1924   else if ((mode == V4SImode) || (mode == V4SFmode))
1925     {
1926       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
1927       int i;
1928 
1929       for (i = 0; i < 4; i++)
1930 	XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
1931       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1932       new_mem = force_const_mem (mode, new_const_vector);
1933     }
1934   else if ((mode == V2DImode) || (mode == V2DFmode))
1935     {
1936       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
1937       int i;
1938 
1939       for (i = 0; i < 2; i++)
1940 	XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
1941       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1942       new_mem = force_const_mem (mode, new_const_vector);
1943     }
1944   else
1945     {
1946       /* We do not expect other modes to be constant-load-swapped.  */
1947       gcc_unreachable ();
1948     }
1949 
1950   /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1951      can't recognize.  Force the SYMBOL_REF into a register.  */
1952   if (!REG_P (XEXP (new_mem, 0))) {
1953     rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1954     XEXP (new_mem, 0) = base_reg;
1955 
1956     /* Move the newly created insn ahead of the load insn.  */
1957     /* The last insn is the insn that forced new_mem into a register.  */
1958     rtx_insn *force_insn = get_last_insn ();
1959     /* Remove this insn from the end of the instruction sequence.  */
1960     remove_insn (force_insn);
1961     rtx_insn *before_load_insn = PREV_INSN (load_insn);
1962 
1963     /* And insert this insn back into the sequence before the previous
1964        load insn so this new expression will be available when the
1965        existing load is modified to load the swapped constant.  */
1966     add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1967     df_insn_rescan (before_load_insn);
1968     df_insn_rescan (force_insn);
1969   }
1970 
1971   /* Replace the MEM in the load instruction and rescan it.  */
1972   XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1973   INSN_CODE (load_insn) = -1; /* Force re-recognition.  */
1974   df_insn_rescan (load_insn);
1975 
1976   unsigned int uid = INSN_UID (swap_insn);
1977   mark_swaps_for_removal (insn_entry, uid);
1978   replace_swap_with_copy (insn_entry, uid);
1979 }
1980 
1981 /* Dump the swap table to DUMP_FILE.  */
1982 static void
dump_swap_insn_table(swap_web_entry * insn_entry)1983 dump_swap_insn_table (swap_web_entry *insn_entry)
1984 {
1985   int e = get_max_uid ();
1986   fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
1987 
1988   for (int i = 0; i < e; ++i)
1989     if (insn_entry[i].is_relevant)
1990       {
1991 	swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
1992 	fprintf (dump_file, "%6d %6d  ", i,
1993 		 pred_entry && pred_entry->insn
1994 		 ? INSN_UID (pred_entry->insn) : 0);
1995 	if (insn_entry[i].is_load)
1996 	  fputs ("load ", dump_file);
1997 	if (insn_entry[i].is_store)
1998 	  fputs ("store ", dump_file);
1999 	if (insn_entry[i].is_swap)
2000 	  fputs ("swap ", dump_file);
2001 	if (insn_entry[i].is_live_in)
2002 	  fputs ("live-in ", dump_file);
2003 	if (insn_entry[i].is_live_out)
2004 	  fputs ("live-out ", dump_file);
2005 	if (insn_entry[i].contains_subreg)
2006 	  fputs ("subreg ", dump_file);
2007 	if (insn_entry[i].is_128_int)
2008 	  fputs ("int128 ", dump_file);
2009 	if (insn_entry[i].is_call)
2010 	  fputs ("call ", dump_file);
2011 	if (insn_entry[i].is_swappable)
2012 	  {
2013 	    fputs ("swappable ", dump_file);
2014 	    if (insn_entry[i].special_handling == SH_CONST_VECTOR)
2015 	      fputs ("special:constvec ", dump_file);
2016 	    else if (insn_entry[i].special_handling == SH_SUBREG)
2017 	      fputs ("special:subreg ", dump_file);
2018 	    else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
2019 	      fputs ("special:load ", dump_file);
2020 	    else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
2021 	      fputs ("special:store ", dump_file);
2022 	    else if (insn_entry[i].special_handling == SH_EXTRACT)
2023 	      fputs ("special:extract ", dump_file);
2024 	    else if (insn_entry[i].special_handling == SH_SPLAT)
2025 	      fputs ("special:splat ", dump_file);
2026 	    else if (insn_entry[i].special_handling == SH_XXPERMDI)
2027 	      fputs ("special:xxpermdi ", dump_file);
2028 	    else if (insn_entry[i].special_handling == SH_CONCAT)
2029 	      fputs ("special:concat ", dump_file);
2030 	    else if (insn_entry[i].special_handling == SH_VPERM)
2031 	      fputs ("special:vperm ", dump_file);
2032 	  }
2033 	if (insn_entry[i].web_not_optimizable)
2034 	  fputs ("unoptimizable ", dump_file);
2035 	if (insn_entry[i].will_delete)
2036 	  fputs ("delete ", dump_file);
2037 	fputs ("\n", dump_file);
2038       }
2039   fputs ("\n", dump_file);
2040 }
2041 
2042 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2043    Here RTX is an (& addr (const_int -16)).  Always return a new copy
2044    to avoid problems with combine.  */
2045 static rtx
alignment_with_canonical_addr(rtx align)2046 alignment_with_canonical_addr (rtx align)
2047 {
2048   rtx canon;
2049   rtx addr = XEXP (align, 0);
2050 
2051   if (REG_P (addr))
2052     canon = addr;
2053 
2054   else if (GET_CODE (addr) == PLUS)
2055     {
2056       rtx addrop0 = XEXP (addr, 0);
2057       rtx addrop1 = XEXP (addr, 1);
2058 
2059       if (!REG_P (addrop0))
2060 	addrop0 = force_reg (GET_MODE (addrop0), addrop0);
2061 
2062       if (!REG_P (addrop1))
2063 	addrop1 = force_reg (GET_MODE (addrop1), addrop1);
2064 
2065       canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
2066     }
2067 
2068   else
2069     canon = force_reg (GET_MODE (addr), addr);
2070 
2071   return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
2072 }
2073 
2074 /* Check whether an rtx is an alignment mask, and if so, return
2075    a fully-expanded rtx for the masking operation.  */
2076 static rtx
alignment_mask(rtx_insn * insn)2077 alignment_mask (rtx_insn *insn)
2078 {
2079   rtx body = PATTERN (insn);
2080 
2081   if (GET_CODE (body) != SET
2082       || GET_CODE (SET_SRC (body)) != AND
2083       || !REG_P (XEXP (SET_SRC (body), 0)))
2084     return 0;
2085 
2086   rtx mask = XEXP (SET_SRC (body), 1);
2087 
2088   if (CONST_INT_P (mask))
2089     {
2090       if (INTVAL (mask) == -16)
2091 	return alignment_with_canonical_addr (SET_SRC (body));
2092       else
2093 	return 0;
2094     }
2095 
2096   if (!REG_P (mask))
2097     return 0;
2098 
2099   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2100   df_ref use;
2101   rtx real_mask = 0;
2102 
2103   FOR_EACH_INSN_INFO_USE (use, insn_info)
2104     {
2105       if (!rtx_equal_p (DF_REF_REG (use), mask))
2106 	continue;
2107 
2108       struct df_link *def_link = DF_REF_CHAIN (use);
2109       if (!def_link || def_link->next)
2110 	return 0;
2111 
2112       rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
2113       rtx const_body = PATTERN (const_insn);
2114       if (GET_CODE (const_body) != SET)
2115 	return 0;
2116 
2117       real_mask = SET_SRC (const_body);
2118 
2119       if (!CONST_INT_P (real_mask)
2120 	  || INTVAL (real_mask) != -16)
2121 	return 0;
2122     }
2123 
2124   if (real_mask == 0)
2125     return 0;
2126 
2127   return alignment_with_canonical_addr (SET_SRC (body));
2128 }
2129 
2130 /* Given INSN that's a load or store based at BASE_REG, look for a
2131    feeding computation that aligns its address on a 16-byte boundary.
2132    Return the rtx and its containing AND_INSN.  */
2133 static rtx
find_alignment_op(rtx_insn * insn,rtx base_reg,rtx_insn ** and_insn)2134 find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
2135 {
2136   df_ref base_use;
2137   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2138   rtx and_operation = 0;
2139 
2140   FOR_EACH_INSN_INFO_USE (base_use, insn_info)
2141     {
2142       if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
2143 	continue;
2144 
2145       struct df_link *base_def_link = DF_REF_CHAIN (base_use);
2146       if (!base_def_link || base_def_link->next)
2147 	break;
2148 
2149       /* With stack-protector code enabled, and possibly in other
2150 	 circumstances, there may not be an associated insn for
2151 	 the def.  */
2152       if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
2153 	break;
2154 
2155       *and_insn = DF_REF_INSN (base_def_link->ref);
2156       and_operation = alignment_mask (*and_insn);
2157       if (and_operation != 0)
2158 	break;
2159     }
2160 
2161   return and_operation;
2162 }
2163 
2164 struct del_info { bool replace; rtx_insn *replace_insn; };
2165 
2166 /* If INSN is the load for an lvx pattern, put it in canonical form.  */
2167 static void
recombine_lvx_pattern(rtx_insn * insn,del_info * to_delete)2168 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
2169 {
2170   rtx body = PATTERN (insn);
2171   gcc_assert (GET_CODE (body) == SET
2172 	      && GET_CODE (SET_SRC (body)) == VEC_SELECT
2173 	      && MEM_P (XEXP (SET_SRC (body), 0)));
2174 
2175   rtx mem = XEXP (SET_SRC (body), 0);
2176   rtx base_reg = XEXP (mem, 0);
2177 
2178   rtx_insn *and_insn;
2179   rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2180 
2181   if (and_operation != 0)
2182     {
2183       df_ref def;
2184       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2185       FOR_EACH_INSN_INFO_DEF (def, insn_info)
2186 	{
2187 	  struct df_link *link = DF_REF_CHAIN (def);
2188 	  if (!link || link->next)
2189 	    break;
2190 
2191 	  rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2192 	  if (!insn_is_swap_p (swap_insn)
2193 	      || insn_is_load_p (swap_insn)
2194 	      || insn_is_store_p (swap_insn))
2195 	    break;
2196 
2197 	  /* Expected lvx pattern found.  Change the swap to
2198 	     a copy, and propagate the AND operation into the
2199 	     load.  */
2200 	  to_delete[INSN_UID (swap_insn)].replace = true;
2201 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2202 
2203 	  /* However, first we must be sure that we make the
2204 	     base register from the AND operation available
2205 	     in case the register has been overwritten.  Copy
2206 	     the base register to a new pseudo and use that
2207 	     as the base register of the AND operation in
2208 	     the new LVX instruction.  */
2209 	  rtx and_base = XEXP (and_operation, 0);
2210 	  rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2211 	  rtx copy = gen_rtx_SET (new_reg, and_base);
2212 	  rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2213 	  set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2214 	  df_insn_rescan (new_insn);
2215 
2216 	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2217 				       XEXP (and_operation, 1));
2218 	  SET_SRC (body) = mem;
2219 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
2220 	  df_insn_rescan (insn);
2221 
2222 	  if (dump_file)
2223 	    fprintf (dump_file, "lvx opportunity found at %d\n",
2224 		     INSN_UID (insn));
2225 	}
2226     }
2227 }
2228 
2229 /* If INSN is the store for an stvx pattern, put it in canonical form.  */
2230 static void
recombine_stvx_pattern(rtx_insn * insn,del_info * to_delete)2231 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
2232 {
2233   rtx body = PATTERN (insn);
2234   gcc_assert (GET_CODE (body) == SET
2235 	      && MEM_P (SET_DEST (body))
2236 	      && GET_CODE (SET_SRC (body)) == VEC_SELECT);
2237   rtx mem = SET_DEST (body);
2238   rtx base_reg = XEXP (mem, 0);
2239 
2240   rtx_insn *and_insn;
2241   rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2242 
2243   if (and_operation != 0)
2244     {
2245       rtx src_reg = XEXP (SET_SRC (body), 0);
2246       df_ref src_use;
2247       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2248       FOR_EACH_INSN_INFO_USE (src_use, insn_info)
2249 	{
2250 	  if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
2251 	    continue;
2252 
2253 	  struct df_link *link = DF_REF_CHAIN (src_use);
2254 	  if (!link || link->next)
2255 	    break;
2256 
2257 	  rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2258 	  if (!insn_is_swap_p (swap_insn)
2259 	      || insn_is_load_p (swap_insn)
2260 	      || insn_is_store_p (swap_insn))
2261 	    break;
2262 
2263 	  /* Expected stvx pattern found.  Change the swap to
2264 	     a copy, and propagate the AND operation into the
2265 	     store.  */
2266 	  to_delete[INSN_UID (swap_insn)].replace = true;
2267 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2268 
2269 	  /* However, first we must be sure that we make the
2270 	     base register from the AND operation available
2271 	     in case the register has been overwritten.  Copy
2272 	     the base register to a new pseudo and use that
2273 	     as the base register of the AND operation in
2274 	     the new STVX instruction.  */
2275 	  rtx and_base = XEXP (and_operation, 0);
2276 	  rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2277 	  rtx copy = gen_rtx_SET (new_reg, and_base);
2278 	  rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2279 	  set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2280 	  df_insn_rescan (new_insn);
2281 
2282 	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2283 				       XEXP (and_operation, 1));
2284 	  SET_SRC (body) = src_reg;
2285 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
2286 	  df_insn_rescan (insn);
2287 
2288 	  if (dump_file)
2289 	    fprintf (dump_file, "stvx opportunity found at %d\n",
2290 		     INSN_UID (insn));
2291 	}
2292     }
2293 }
2294 
2295 /* Look for patterns created from builtin lvx and stvx calls, and
2296    canonicalize them to be properly recognized as such.  */
2297 static void
recombine_lvx_stvx_patterns(function * fun)2298 recombine_lvx_stvx_patterns (function *fun)
2299 {
2300   int i;
2301   basic_block bb;
2302   rtx_insn *insn;
2303 
2304   int num_insns = get_max_uid ();
2305   del_info *to_delete = XCNEWVEC (del_info, num_insns);
2306 
2307   FOR_ALL_BB_FN (bb, fun)
2308     FOR_BB_INSNS (bb, insn)
2309     {
2310       if (!NONDEBUG_INSN_P (insn))
2311 	continue;
2312 
2313       if (insn_is_load_p (insn) && insn_is_swap_p (insn))
2314 	recombine_lvx_pattern (insn, to_delete);
2315       else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
2316 	recombine_stvx_pattern (insn, to_delete);
2317     }
2318 
2319   /* Turning swaps into copies is delayed until now, to avoid problems
2320      with deleting instructions during the insn walk.  */
2321   for (i = 0; i < num_insns; i++)
2322     if (to_delete[i].replace)
2323       {
2324 	rtx swap_body = PATTERN (to_delete[i].replace_insn);
2325 	rtx src_reg = XEXP (SET_SRC (swap_body), 0);
2326 	rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
2327 	rtx_insn *new_insn = emit_insn_before (copy,
2328 					       to_delete[i].replace_insn);
2329 	set_block_for_insn (new_insn,
2330 			    BLOCK_FOR_INSN (to_delete[i].replace_insn));
2331 	df_insn_rescan (new_insn);
2332 	df_insn_delete (to_delete[i].replace_insn);
2333 	remove_insn (to_delete[i].replace_insn);
2334 	to_delete[i].replace_insn->set_deleted ();
2335       }
2336 
2337   free (to_delete);
2338 }
2339 
2340 /* Main entry point for this pass.  */
2341 unsigned int
rs6000_analyze_swaps(function * fun)2342 rs6000_analyze_swaps (function *fun)
2343 {
2344   swap_web_entry *insn_entry;
2345   basic_block bb;
2346   rtx_insn *insn, *curr_insn = 0;
2347 
2348   /* Dataflow analysis for use-def chains.  */
2349   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2350   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2351   df_analyze ();
2352   df_set_flags (DF_DEFER_INSN_RESCAN);
2353 
2354   /* Pre-pass to recombine lvx and stvx patterns so we don't lose info.  */
2355   recombine_lvx_stvx_patterns (fun);
2356 
2357   /* Rebuild ud- and du-chains.  */
2358   df_remove_problem (df_chain);
2359   df_process_deferred_rescans ();
2360   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2361   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2362   df_analyze ();
2363   df_set_flags (DF_DEFER_INSN_RESCAN);
2364 
2365   /* Allocate structure to represent webs of insns.  */
2366   insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2367 
2368   /* Walk the insns to gather basic data.  */
2369   FOR_ALL_BB_FN (bb, fun)
2370     FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2371     {
2372       unsigned int uid = INSN_UID (insn);
2373       if (NONDEBUG_INSN_P (insn))
2374 	{
2375 	  insn_entry[uid].insn = insn;
2376 
2377 	  if (GET_CODE (insn) == CALL_INSN)
2378 	    insn_entry[uid].is_call = 1;
2379 
2380 	  /* Walk the uses and defs to see if we mention vector regs.
2381 	     Record any constraints on optimization of such mentions.  */
2382 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2383 	  df_ref mention;
2384 	  FOR_EACH_INSN_INFO_USE (mention, insn_info)
2385 	    {
2386 	      /* We use DF_REF_REAL_REG here to get inside any subregs.  */
2387 	      machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2388 
2389 	      /* If a use gets its value from a call insn, it will be
2390 		 a hard register and will look like (reg:V4SI 3 3).
2391 		 The df analysis creates two mentions for GPR3 and GPR4,
2392 		 both DImode.  We must recognize this and treat it as a
2393 		 vector mention to ensure the call is unioned with this
2394 		 use.  */
2395 	      if (mode == DImode && DF_REF_INSN_INFO (mention))
2396 		{
2397 		  rtx feeder = DF_REF_INSN (mention);
2398 		  /* FIXME:  It is pretty hard to get from the df mention
2399 		     to the mode of the use in the insn.  We arbitrarily
2400 		     pick a vector mode here, even though the use might
2401 		     be a real DImode.  We can be too conservative
2402 		     (create a web larger than necessary) because of
2403 		     this, so consider eventually fixing this.  */
2404 		  if (GET_CODE (feeder) == CALL_INSN)
2405 		    mode = V4SImode;
2406 		}
2407 
2408 	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2409 		{
2410 		  insn_entry[uid].is_relevant = 1;
2411 		  if (mode == TImode || mode == V1TImode
2412 		      || FLOAT128_VECTOR_P (mode))
2413 		    insn_entry[uid].is_128_int = 1;
2414 		  if (DF_REF_INSN_INFO (mention))
2415 		    insn_entry[uid].contains_subreg
2416 		      = !rtx_equal_p (DF_REF_REG (mention),
2417 				      DF_REF_REAL_REG (mention));
2418 		  union_defs (insn_entry, insn, mention);
2419 		}
2420 	    }
2421 	  FOR_EACH_INSN_INFO_DEF (mention, insn_info)
2422 	    {
2423 	      /* We use DF_REF_REAL_REG here to get inside any subregs.  */
2424 	      machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2425 
2426 	      /* If we're loading up a hard vector register for a call,
2427 		 it looks like (set (reg:V4SI 9 9) (...)).  The df
2428 		 analysis creates two mentions for GPR9 and GPR10, both
2429 		 DImode.  So relying on the mode from the mentions
2430 		 isn't sufficient to ensure we union the call into the
2431 		 web with the parameter setup code.  */
2432 	      if (mode == DImode && GET_CODE (insn) == SET
2433 		  && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
2434 		mode = GET_MODE (SET_DEST (insn));
2435 
2436 	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2437 		{
2438 		  insn_entry[uid].is_relevant = 1;
2439 		  if (mode == TImode || mode == V1TImode
2440 		      || FLOAT128_VECTOR_P (mode))
2441 		    insn_entry[uid].is_128_int = 1;
2442 		  if (DF_REF_INSN_INFO (mention))
2443 		    insn_entry[uid].contains_subreg
2444 		      = !rtx_equal_p (DF_REF_REG (mention),
2445 				      DF_REF_REAL_REG (mention));
2446 		  /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2447 		  else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
2448 		    insn_entry[uid].is_live_out = 1;
2449 		  union_uses (insn_entry, insn, mention);
2450 		}
2451 	    }
2452 
2453 	  if (insn_entry[uid].is_relevant)
2454 	    {
2455 	      /* Determine if this is a load or store.  */
2456 	      insn_entry[uid].is_load = insn_is_load_p (insn);
2457 	      insn_entry[uid].is_store = insn_is_store_p (insn);
2458 
2459 	      /* Determine if this is a doubleword swap.  If not,
2460 		 determine whether it can legally be swapped.  */
2461 	      if (insn_is_swap_p (insn))
2462 		insn_entry[uid].is_swap = 1;
2463 	      else
2464 		{
2465 		  unsigned int special = SH_NONE;
2466 		  insn_entry[uid].is_swappable
2467 		    = insn_is_swappable_p (insn_entry, insn, &special);
2468 		  if (special != SH_NONE && insn_entry[uid].contains_subreg)
2469 		    insn_entry[uid].is_swappable = 0;
2470 		  else if (special != SH_NONE)
2471 		    insn_entry[uid].special_handling = special;
2472 		  else if (insn_entry[uid].contains_subreg
2473 			   && has_part_mult (insn))
2474 		    insn_entry[uid].is_swappable = 0;
2475 		  else if (insn_entry[uid].contains_subreg)
2476 		    insn_entry[uid].special_handling = SH_SUBREG;
2477 		}
2478 	    }
2479 	}
2480     }
2481 
2482   if (dump_file)
2483     {
2484       fprintf (dump_file, "\nSwap insn entry table when first built\n");
2485       dump_swap_insn_table (insn_entry);
2486     }
2487 
2488   /* Record unoptimizable webs.  */
2489   unsigned e = get_max_uid (), i;
2490   for (i = 0; i < e; ++i)
2491     {
2492       if (!insn_entry[i].is_relevant)
2493 	continue;
2494 
2495       swap_web_entry *root
2496 	= (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
2497 
2498       if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
2499 	  || (insn_entry[i].contains_subreg
2500 	      && insn_entry[i].special_handling != SH_SUBREG)
2501 	  || insn_entry[i].is_128_int || insn_entry[i].is_call
2502 	  || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
2503 	root->web_not_optimizable = 1;
2504 
2505       /* If we have loads or stores that aren't permuting then the
2506 	 optimization isn't appropriate.  */
2507       else if ((insn_entry[i].is_load || insn_entry[i].is_store)
2508 	  && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
2509 	root->web_not_optimizable = 1;
2510 
2511       /* If we have a swap that is both fed by a permuting load
2512 	 and a feeder of a permuting store, then the optimization
2513 	 isn't appropriate.  (Consider vec_xl followed by vec_xst_be.)  */
2514       else if (insn_entry[i].is_swap && !insn_entry[i].is_load
2515 	       && !insn_entry[i].is_store
2516 	       && swap_feeds_both_load_and_store (&insn_entry[i]))
2517 	root->web_not_optimizable = 1;
2518 
2519       /* If we have permuting loads or stores that are not accompanied
2520 	 by a register swap, the optimization isn't appropriate.  */
2521       else if (insn_entry[i].is_load && insn_entry[i].is_swap)
2522 	{
2523 	  rtx insn = insn_entry[i].insn;
2524 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2525 	  df_ref def;
2526 
2527 	  FOR_EACH_INSN_INFO_DEF (def, insn_info)
2528 	    {
2529 	      struct df_link *link = DF_REF_CHAIN (def);
2530 
2531 	      if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
2532 		{
2533 		  root->web_not_optimizable = 1;
2534 		  break;
2535 		}
2536 	    }
2537 	}
2538       else if (insn_entry[i].is_store && insn_entry[i].is_swap)
2539 	{
2540 	  rtx insn = insn_entry[i].insn;
2541 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2542 	  df_ref use;
2543 
2544 	  FOR_EACH_INSN_INFO_USE (use, insn_info)
2545 	    {
2546 	      struct df_link *link = DF_REF_CHAIN (use);
2547 
2548 	      if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
2549 		{
2550 		  root->web_not_optimizable = 1;
2551 		  break;
2552 		}
2553 	    }
2554 	}
2555     }
2556 
2557   if (dump_file)
2558     {
2559       fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
2560       dump_swap_insn_table (insn_entry);
2561     }
2562 
2563   /* For each load and store in an optimizable web (which implies
2564      the loads and stores are permuting), find the associated
2565      register swaps and mark them for removal.  Due to various
2566      optimizations we may mark the same swap more than once.  Also
2567      perform special handling for swappable insns that require it.  */
2568   for (i = 0; i < e; ++i)
2569     if ((insn_entry[i].is_load || insn_entry[i].is_store)
2570 	&& insn_entry[i].is_swap)
2571       {
2572 	swap_web_entry* root_entry
2573 	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2574 	if (!root_entry->web_not_optimizable)
2575 	  mark_swaps_for_removal (insn_entry, i);
2576       }
2577     else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
2578       {
2579 	swap_web_entry* root_entry
2580 	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2581 	if (!root_entry->web_not_optimizable)
2582 	  handle_special_swappables (insn_entry, i);
2583       }
2584 
2585   /* Now delete the swaps marked for removal.  */
2586   for (i = 0; i < e; ++i)
2587     if (insn_entry[i].will_delete)
2588       replace_swap_with_copy (insn_entry, i);
2589 
2590   /* Clean up.  */
2591   free (insn_entry);
2592 
2593   /* Use a second pass over rtl to detect that certain vector values
2594      fetched from or stored to memory on quad-word aligned addresses
2595      can use lvx/stvx without swaps.  */
2596 
2597   /* First, rebuild ud chains.  */
2598   df_remove_problem (df_chain);
2599   df_process_deferred_rescans ();
2600   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2601   df_chain_add_problem (DF_UD_CHAIN);
2602   df_analyze ();
2603 
2604   swap_web_entry *pass2_insn_entry;
2605   pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2606 
2607   /* Walk the insns to gather basic data.  */
2608   FOR_ALL_BB_FN (bb, fun)
2609     FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2610     {
2611       unsigned int uid = INSN_UID (insn);
2612       if (NONDEBUG_INSN_P (insn))
2613 	{
2614 	  pass2_insn_entry[uid].insn = insn;
2615 
2616 	  pass2_insn_entry[uid].is_relevant = 1;
2617 	  pass2_insn_entry[uid].is_load = insn_is_load_p (insn);
2618 	  pass2_insn_entry[uid].is_store = insn_is_store_p (insn);
2619 
2620 	  /* Determine if this is a doubleword swap.  If not,
2621 	     determine whether it can legally be swapped.  */
2622 	  if (insn_is_swap_p (insn))
2623 	    pass2_insn_entry[uid].is_swap = 1;
2624 	}
2625     }
2626 
2627   e = get_max_uid ();
2628   for (unsigned i = 0; i < e; ++i)
2629     if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
2630 	&& !pass2_insn_entry[i].is_store)
2631       {
2632 	/* Replace swap of aligned load-swap with aligned unswapped
2633 	   load.  */
2634 	rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2635 	if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
2636 	  replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
2637       }
2638     else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
2639       {
2640 	/* Replace aligned store-swap of swapped value with aligned
2641 	   unswapped store.  */
2642 	rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2643 	if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
2644 	  replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
2645       }
2646 
2647   /* Clean up.  */
2648   free (pass2_insn_entry);
2649 
2650   /* Use a third pass over rtl to replace swap(load(vector constant))
2651      with load(swapped vector constant).  */
2652 
2653   /* First, rebuild ud chains.  */
2654   df_remove_problem (df_chain);
2655   df_process_deferred_rescans ();
2656   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2657   df_chain_add_problem (DF_UD_CHAIN);
2658   df_analyze ();
2659 
2660   swap_web_entry *pass3_insn_entry;
2661   pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2662 
2663   /* Walk the insns to gather basic data.  */
2664   FOR_ALL_BB_FN (bb, fun)
2665     FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2666     {
2667       unsigned int uid = INSN_UID (insn);
2668       if (NONDEBUG_INSN_P (insn))
2669 	{
2670 	  pass3_insn_entry[uid].insn = insn;
2671 
2672 	  pass3_insn_entry[uid].is_relevant = 1;
2673 	  pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
2674 	  pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
2675 
2676 	  /* Determine if this is a doubleword swap.  If not,
2677 	     determine whether it can legally be swapped.  */
2678 	  if (insn_is_swap_p (insn))
2679 	    pass3_insn_entry[uid].is_swap = 1;
2680 	}
2681     }
2682 
2683   e = get_max_uid ();
2684   for (unsigned i = 0; i < e; ++i)
2685     if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
2686 	&& !pass3_insn_entry[i].is_store)
2687       {
2688 	insn = pass3_insn_entry[i].insn;
2689 	if (const_load_sequence_p (pass3_insn_entry, insn))
2690 	  replace_swapped_load_constant (pass3_insn_entry, insn);
2691       }
2692 
2693   /* Clean up.  */
2694   free (pass3_insn_entry);
2695   return 0;
2696 }
2697 
2698 const pass_data pass_data_analyze_swaps =
2699 {
2700   RTL_PASS, /* type */
2701   "swaps", /* name */
2702   OPTGROUP_NONE, /* optinfo_flags */
2703   TV_NONE, /* tv_id */
2704   0, /* properties_required */
2705   0, /* properties_provided */
2706   0, /* properties_destroyed */
2707   0, /* todo_flags_start */
2708   TODO_df_finish, /* todo_flags_finish */
2709 };
2710 
2711 class pass_analyze_swaps : public rtl_opt_pass
2712 {
2713 public:
pass_analyze_swaps(gcc::context * ctxt)2714   pass_analyze_swaps(gcc::context *ctxt)
2715     : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
2716   {}
2717 
2718   /* opt_pass methods: */
gate(function *)2719   virtual bool gate (function *)
2720     {
2721       return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
2722 	      && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
2723     }
2724 
execute(function * fun)2725   virtual unsigned int execute (function *fun)
2726     {
2727       return rs6000_analyze_swaps (fun);
2728     }
2729 
clone()2730   opt_pass *clone ()
2731     {
2732       return new pass_analyze_swaps (m_ctxt);
2733     }
2734 
2735 }; // class pass_analyze_swaps
2736 
2737 rtl_opt_pass *
make_pass_analyze_swaps(gcc::context * ctxt)2738 make_pass_analyze_swaps (gcc::context *ctxt)
2739 {
2740   return new pass_analyze_swaps (ctxt);
2741 }
2742 
2743