xref: /netbsd-src/external/gpl3/gcc/dist/gcc/config/rs6000/rs6000-p8swap.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Subroutines used to remove unnecessary doubleword swaps
2    for p8 little-endian VSX code.
3    Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published
9    by the Free Software Foundation; either version 3, or (at your
10    option) any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "ira.h"
33 #include "print-tree.h"
34 #include "varasm.h"
35 #include "explow.h"
36 #include "expr.h"
37 #include "output.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
40 
41 /* Analyze vector computations and remove unnecessary doubleword
42    swaps (xxswapdi instructions).  This pass is performed only
43    for little-endian VSX code generation.
44 
45    For this specific case, loads and stores of 4x32 and 2x64 vectors
46    are inefficient.  These are implemented using the lvx2dx and
47    stvx2dx instructions, which invert the order of doublewords in
48    a vector register.  Thus the code generation inserts an xxswapdi
49    after each such load, and prior to each such store.  (For spill
50    code after register assignment, an additional xxswapdi is inserted
51    following each store in order to return a hard register to its
52    unpermuted value.)
53 
54    The extra xxswapdi instructions reduce performance.  This can be
55    particularly bad for vectorized code.  The purpose of this pass
56    is to reduce the number of xxswapdi instructions required for
57    correctness.
58 
59    The primary insight is that much code that operates on vectors
60    does not care about the relative order of elements in a register,
61    so long as the correct memory order is preserved.  If we have
62    a computation where all input values are provided by lvxd2x/xxswapdi
63    sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64    and all intermediate computations are pure SIMD (independent of
65    element order), then all the xxswapdi's associated with the loads
66    and stores may be removed.
67 
68    This pass uses some of the infrastructure and logical ideas from
69    the "web" pass in web.cc.  We create maximal webs of computations
70    fitting the description above using union-find.  Each such web is
71    then optimized by removing its unnecessary xxswapdi instructions.
72 
73    The pass is placed prior to global optimization so that we can
74    perform the optimization in the safest and simplest way possible;
75    that is, by replacing each xxswapdi insn with a register copy insn.
76    Subsequent forward propagation will remove copies where possible.
77 
78    There are some operations sensitive to element order for which we
79    can still allow the operation, provided we modify those operations.
80    These include CONST_VECTORs, for which we must swap the first and
81    second halves of the constant vector; and SUBREGs, for which we
82    must adjust the byte offset to account for the swapped doublewords.
83    A remaining opportunity would be non-immediate-form splats, for
84    which we should adjust the selected lane of the input.  We should
85    also make code generation adjustments for sum-across operations,
86    since this is a common vectorizer reduction.
87 
88    Because we run prior to the first split, we can see loads and stores
89    here that match *vsx_le_perm_{load,store}_<mode>.  These are vanilla
90    vector loads and stores that have not yet been split into a permuting
91    load/store and a swap.  (One way this can happen is with a builtin
92    call to vec_vsx_{ld,st}.)  We can handle these as well, but rather
93    than deleting a swap, we convert the load/store into a permuting
94    load/store (which effectively removes the swap).  */
95 
96 /* Notes on Permutes
97 
98    We do not currently handle computations that contain permutes.  There
99    is a general transformation that can be performed correctly, but it
100    may introduce more expensive code than it replaces.  To handle these
101    would require a cost model to determine when to perform the optimization.
102    This commentary records how this could be done if desired.
103 
104    The most general permute is something like this (example for V16QI):
105 
106    (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107                      (parallel [(const_int a0) (const_int a1)
108                                  ...
109                                 (const_int a14) (const_int a15)]))
110 
111    where a0,...,a15 are in [0,31] and select elements from op1 and op2
112    to produce in the result.
113 
114    Regardless of mode, we can convert the PARALLEL to a mask of 16
115    byte-element selectors.  Let's call this M, with M[i] representing
116    the ith byte-element selector value.  Then if we swap doublewords
117    throughout the computation, we can get correct behavior by replacing
118    M with M' as follows:
119 
120     M'[i] = { (M[i]+8)%16      : M[i] in [0,15]
121             { ((M[i]+8)%16)+16 : M[i] in [16,31]
122 
123    This seems promising at first, since we are just replacing one mask
124    with another.  But certain masks are preferable to others.  If M
125    is a mask that matches a vmrghh pattern, for example, M' certainly
126    will not.  Instead of a single vmrghh, we would generate a load of
127    M' and a vperm.  So we would need to know how many xxswapd's we can
128    remove as a result of this transformation to determine if it's
129    profitable; and preferably the logic would need to be aware of all
130    the special preferable masks.
131 
132    Another form of permute is an UNSPEC_VPERM, in which the mask is
133    already in a register.  In some cases, this mask may be a constant
134    that we can discover with ud-chains, in which case the above
135    transformation is ok.  However, the common usage here is for the
136    mask to be produced by an UNSPEC_LVSL, in which case the mask
137    cannot be known at compile time.  In such a case we would have to
138    generate several instructions to compute M' as above at run time,
139    and a cost model is needed again.
140 
141    However, when the mask M for an UNSPEC_VPERM is loaded from the
142    constant pool, we can replace M with M' as above at no cost
143    beyond adding a constant pool entry.  */
144 
145 /* This is based on the union-find logic in web.cc.  web_entry_base is
146    defined in df.h.  */
147 class swap_web_entry : public web_entry_base
148 {
149  public:
150   /* Pointer to the insn.  */
151   rtx_insn *insn;
152   /* Set if insn contains a mention of a vector register.  All other
153      fields are undefined if this field is unset.  */
154   unsigned int is_relevant : 1;
155   /* Set if insn is a load.  */
156   unsigned int is_load : 1;
157   /* Set if insn is a store.  */
158   unsigned int is_store : 1;
159   /* Set if insn is a doubleword swap.  This can either be a register swap
160      or a permuting load or store (test is_load and is_store for this).  */
161   unsigned int is_swap : 1;
162   /* Set if the insn has a live-in use of a parameter register.  */
163   unsigned int is_live_in : 1;
164   /* Set if the insn has a live-out def of a return register.  */
165   unsigned int is_live_out : 1;
166   /* Set if the insn contains a subreg reference of a vector register.  */
167   unsigned int contains_subreg : 1;
168   /* Set if the insn contains a 128-bit integer operand.  */
169   unsigned int is_128_int : 1;
170   /* Set if this is a call-insn.  */
171   unsigned int is_call : 1;
172   /* Set if this insn does not perform a vector operation for which
173      element order matters, or if we know how to fix it up if it does.
174      Undefined if is_swap is set.  */
175   unsigned int is_swappable : 1;
176   /* A nonzero value indicates what kind of special handling for this
177      insn is required if doublewords are swapped.  Undefined if
178      is_swappable is not set.  */
179   unsigned int special_handling : 4;
180   /* Set if the web represented by this entry cannot be optimized.  */
181   unsigned int web_not_optimizable : 1;
182   /* Set if this insn should be deleted.  */
183   unsigned int will_delete : 1;
184 };
185 
186 enum special_handling_values {
187   SH_NONE = 0,
188   SH_CONST_VECTOR,
189   SH_SUBREG,
190   SH_NOSWAP_LD,
191   SH_NOSWAP_ST,
192   SH_EXTRACT,
193   SH_SPLAT,
194   SH_XXPERMDI,
195   SH_CONCAT,
196   SH_VPERM
197 };
198 
199 /* Union INSN with all insns containing definitions that reach USE.
200    Detect whether USE is live-in to the current function.  */
201 static void
union_defs(swap_web_entry * insn_entry,rtx insn,df_ref use)202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
203 {
204   struct df_link *link = DF_REF_CHAIN (use);
205 
206   if (!link)
207     insn_entry[INSN_UID (insn)].is_live_in = 1;
208 
209   while (link)
210     {
211       if (DF_REF_IS_ARTIFICIAL (link->ref))
212 	insn_entry[INSN_UID (insn)].is_live_in = 1;
213 
214       if (DF_REF_INSN_INFO (link->ref))
215 	{
216 	  rtx def_insn = DF_REF_INSN (link->ref);
217 	  (void)unionfind_union (insn_entry + INSN_UID (insn),
218 				 insn_entry + INSN_UID (def_insn));
219 	}
220 
221       link = link->next;
222     }
223 }
224 
225 /* Union INSN with all insns containing uses reached from DEF.
226    Detect whether DEF is live-out from the current function.  */
227 static void
union_uses(swap_web_entry * insn_entry,rtx insn,df_ref def)228 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
229 {
230   struct df_link *link = DF_REF_CHAIN (def);
231 
232   if (!link)
233     insn_entry[INSN_UID (insn)].is_live_out = 1;
234 
235   while (link)
236     {
237       /* This could be an eh use or some other artificial use;
238 	 we treat these all the same (killing the optimization).  */
239       if (DF_REF_IS_ARTIFICIAL (link->ref))
240 	insn_entry[INSN_UID (insn)].is_live_out = 1;
241 
242       if (DF_REF_INSN_INFO (link->ref))
243 	{
244 	  rtx use_insn = DF_REF_INSN (link->ref);
245 	  (void)unionfind_union (insn_entry + INSN_UID (insn),
246 				 insn_entry + INSN_UID (use_insn));
247 	}
248 
249       link = link->next;
250     }
251 }
252 
253 /* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else return
254    0.  */
255 
256 static bool
pattern_is_rotate64(rtx pat)257 pattern_is_rotate64 (rtx pat)
258 {
259   rtx rot = SET_SRC (pat);
260 
261   if (GET_CODE (rot) == ROTATE && CONST_INT_P (XEXP (rot, 1))
262       && INTVAL (XEXP (rot, 1)) == 64)
263     return true;
264 
265   return false;
266 }
267 
268 /* Return 1 iff INSN is a load insn, including permuting loads that
269    represent an lvxd2x instruction; else return 0.  */
270 static unsigned int
insn_is_load_p(rtx insn)271 insn_is_load_p (rtx insn)
272 {
273   rtx body = PATTERN (insn);
274 
275   if (GET_CODE (body) == SET)
276     {
277       if (MEM_P (SET_SRC (body)))
278 	return 1;
279 
280       if (GET_CODE (SET_SRC (body)) == VEC_SELECT
281 	  && MEM_P (XEXP (SET_SRC (body), 0)))
282 	return 1;
283 
284       if (pattern_is_rotate64 (body) && MEM_P (XEXP (SET_SRC (body), 0)))
285 	return 1;
286 
287       return 0;
288     }
289 
290   if (GET_CODE (body) != PARALLEL)
291     return 0;
292 
293   rtx set = XVECEXP (body, 0, 0);
294 
295   if (GET_CODE (set) == SET && MEM_P (SET_SRC (set)))
296     return 1;
297 
298   return 0;
299 }
300 
301 /* Return 1 iff INSN is a store insn, including permuting stores that
302    represent an stvxd2x instruction; else return 0.  */
303 static unsigned int
insn_is_store_p(rtx insn)304 insn_is_store_p (rtx insn)
305 {
306   rtx body = PATTERN (insn);
307   if (GET_CODE (body) == SET && MEM_P (SET_DEST (body)))
308     return 1;
309   if (GET_CODE (body) != PARALLEL)
310     return 0;
311   rtx set = XVECEXP (body, 0, 0);
312   if (GET_CODE (set) == SET && MEM_P (SET_DEST (set)))
313     return 1;
314   return 0;
315 }
316 
317 /* Return 1 iff INSN swaps doublewords.  This may be a reg-reg swap,
318    a permuting load, or a permuting store.  */
319 static unsigned int
insn_is_swap_p(rtx insn)320 insn_is_swap_p (rtx insn)
321 {
322   rtx body = PATTERN (insn);
323   if (GET_CODE (body) != SET)
324     return 0;
325   rtx rhs = SET_SRC (body);
326   if (pattern_is_rotate64 (body))
327     return 1;
328   if (GET_CODE (rhs) != VEC_SELECT)
329     return 0;
330   rtx parallel = XEXP (rhs, 1);
331   if (GET_CODE (parallel) != PARALLEL)
332     return 0;
333   unsigned int len = XVECLEN (parallel, 0);
334   if (len != 2 && len != 4 && len != 8 && len != 16)
335     return 0;
336   for (unsigned int i = 0; i < len / 2; ++i)
337     {
338       rtx op = XVECEXP (parallel, 0, i);
339       if (!CONST_INT_P (op) || INTVAL (op) != len / 2 + i)
340 	return 0;
341     }
342   for (unsigned int i = len / 2; i < len; ++i)
343     {
344       rtx op = XVECEXP (parallel, 0, i);
345       if (!CONST_INT_P (op) || INTVAL (op) != i - len / 2)
346 	return 0;
347     }
348   return 1;
349 }
350 
351 /* Return true iff EXPR represents the sum of two registers.  */
352 bool
rs6000_sum_of_two_registers_p(const_rtx expr)353 rs6000_sum_of_two_registers_p (const_rtx expr)
354 {
355   if (GET_CODE (expr) == PLUS)
356     {
357       const_rtx operand1 = XEXP (expr, 0);
358       const_rtx operand2 = XEXP (expr, 1);
359       return (REG_P (operand1) && REG_P (operand2));
360     }
361   return false;
362 }
363 
364 /* Return true iff EXPR represents an address expression that masks off
365    the low-order 4 bits in the style of an lvx or stvx rtl pattern.  */
366 bool
rs6000_quadword_masked_address_p(const_rtx expr)367 rs6000_quadword_masked_address_p (const_rtx expr)
368 {
369   if (GET_CODE (expr) == AND)
370     {
371       const_rtx operand1 = XEXP (expr, 0);
372       const_rtx operand2 = XEXP (expr, 1);
373       if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
374 	  && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
375 	return true;
376     }
377   return false;
378 }
379 
380 /* Return TRUE if INSN represents a swap of a swapped load from memory
381    and the memory address is quad-word aligned.  */
382 static bool
quad_aligned_load_p(swap_web_entry * insn_entry,rtx_insn * insn)383 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
384 {
385   unsigned uid = INSN_UID (insn);
386   if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
387     return false;
388 
389   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
390 
391   /* Since insn is known to represent a swap instruction, we know it
392      "uses" only one input variable.  */
393   df_ref use = DF_INSN_INFO_USES (insn_info);
394 
395   /* Figure out where this input variable is defined.  */
396   struct df_link *def_link = DF_REF_CHAIN (use);
397 
398   /* If there is no definition or the definition is artificial or there are
399      multiple definitions, punt.  */
400   if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
401       || def_link->next)
402     return false;
403 
404   rtx def_insn = DF_REF_INSN (def_link->ref);
405   unsigned uid2 = INSN_UID (def_insn);
406   /* We're looking for a load-with-swap insn.  If this is not that,
407      return false.  */
408   if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
409     return false;
410 
411   /* If the source of the rtl def is not a set from memory, return
412      false.  */
413   rtx body = PATTERN (def_insn);
414   if (GET_CODE (body) != SET
415       || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
416 	   || pattern_is_rotate64 (body))
417       || !MEM_P (XEXP (SET_SRC (body), 0)))
418     return false;
419 
420   rtx mem = XEXP (SET_SRC (body), 0);
421   rtx base_reg = XEXP (mem, 0);
422   return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
423 	  && MEM_ALIGN (mem) >= 128) ? true : false;
424 }
425 
426 /* Return TRUE if INSN represents a store-with-swap of a swapped value
427    and the memory address is quad-word aligned.  */
428 static bool
quad_aligned_store_p(swap_web_entry * insn_entry,rtx_insn * insn)429 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
430 {
431   unsigned uid = INSN_UID (insn);
432   if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
433     return false;
434 
435   rtx body = PATTERN (insn);
436   rtx dest_address = XEXP (SET_DEST (body), 0);
437   rtx swap_reg = XEXP (SET_SRC (body), 0);
438 
439   /* If the base address for the memory expression is not represented
440      by a single register and is not the sum of two registers, punt.  */
441   if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
442     return false;
443 
444   /* Confirm that the value to be stored is produced by a swap
445      instruction.  */
446   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
447   df_ref use;
448   FOR_EACH_INSN_INFO_USE (use, insn_info)
449     {
450       struct df_link *def_link = DF_REF_CHAIN (use);
451 
452       /* If this is not the definition of the candidate swap register,
453 	 then skip it.  I am interested in a different definition.  */
454       if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
455 	continue;
456 
457       /* If there is no def or the def is artifical or there are
458 	 multiple defs, punt.  */
459       if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
460 	  || def_link->next)
461 	return false;
462 
463       rtx def_insn = DF_REF_INSN (def_link->ref);
464       unsigned uid2 = INSN_UID (def_insn);
465 
466       /* If this source value is not a simple swap, return false */
467       if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
468 	  || insn_entry[uid2].is_store)
469 	return false;
470 
471       /* I've processed the use that I care about, so break out of
472 	 this loop.  */
473       break;
474     }
475 
476   /* At this point, we know the source data comes from a swap.  The
477      remaining question is whether the memory address is aligned.  */
478   rtx set = single_set (insn);
479   if (set)
480     {
481       rtx dest = SET_DEST (set);
482       if (MEM_P (dest))
483 	return (MEM_ALIGN (dest) >= 128);
484     }
485   return false;
486 }
487 
488 /* Return 1 iff UID, known to reference a swap, is both fed by a load
489    and a feeder of a store.  */
490 static unsigned int
swap_feeds_both_load_and_store(swap_web_entry * insn_entry)491 swap_feeds_both_load_and_store (swap_web_entry *insn_entry)
492 {
493   rtx insn = insn_entry->insn;
494   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
495   df_ref def, use;
496   struct df_link *link = 0;
497   rtx_insn *load = 0, *store = 0;
498   bool fed_by_load = 0;
499   bool feeds_store = 0;
500 
501   FOR_EACH_INSN_INFO_USE (use, insn_info)
502     {
503       link = DF_REF_CHAIN (use);
504       load = DF_REF_INSN (link->ref);
505       if (insn_is_load_p (load) && insn_is_swap_p (load))
506 	fed_by_load = 1;
507     }
508 
509   FOR_EACH_INSN_INFO_DEF (def, insn_info)
510     {
511       link = DF_REF_CHAIN (def);
512       store = DF_REF_INSN (link->ref);
513       if (insn_is_store_p (store) && insn_is_swap_p (store))
514 	feeds_store = 1;
515     }
516 
517   return fed_by_load && feeds_store;
518 }
519 
520 /* Return TRUE if insn is a swap fed by a load from the constant pool.  */
521 static bool
const_load_sequence_p(swap_web_entry * insn_entry,rtx insn)522 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
523 {
524   unsigned uid = INSN_UID (insn);
525   if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
526     return false;
527 
528   const_rtx tocrel_base;
529 
530   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
531   df_ref use;
532 
533   /* Iterate over the definitions that are used by this insn.  Since
534      this is known to be a swap insn, expect only one used definnition.  */
535   FOR_EACH_INSN_INFO_USE (use, insn_info)
536     {
537       struct df_link *def_link = DF_REF_CHAIN (use);
538 
539       /* If there is no def or the def is artificial or there are
540 	 multiple defs, punt.  */
541       if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
542 	  || def_link->next)
543 	return false;
544 
545       rtx def_insn = DF_REF_INSN (def_link->ref);
546       unsigned uid2 = INSN_UID (def_insn);
547       /* If this is not a load or is not a swap, return false.  */
548       if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
549 	return false;
550 
551       /* If the source of the rtl def is not a set from memory, return
552 	 false.  */
553       rtx body = PATTERN (def_insn);
554       if (GET_CODE (body) != SET
555 	  || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
556 	       || pattern_is_rotate64 (body))
557 	  || !MEM_P (XEXP (SET_SRC (body), 0)))
558 	return false;
559 
560       rtx mem = XEXP (SET_SRC (body), 0);
561       rtx base_reg = XEXP (mem, 0);
562       /* If the base address for the memory expression is not
563 	 represented by a register, punt.  */
564       if (!REG_P (base_reg))
565 	return false;
566 
567       df_ref base_use;
568       insn_info = DF_INSN_INFO_GET (def_insn);
569       FOR_EACH_INSN_INFO_USE (base_use, insn_info)
570 	{
571 	  /* If base_use does not represent base_reg, look for another
572 	     use.  */
573 	  if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
574 	    continue;
575 
576 	  struct df_link *base_def_link = DF_REF_CHAIN (base_use);
577 	  if (!base_def_link || base_def_link->next)
578 	    return false;
579 
580 	  /* Constants held on the stack are not "true" constants
581 	     because their values are not part of the static load
582 	     image.  If this constant's base reference is a stack
583 	     or frame pointer, it is seen as an artificial
584 	     reference.  */
585 	  if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
586 	    return false;
587 
588 	  rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
589 	  rtx tocrel_body = PATTERN (tocrel_insn);
590 	  rtx base, offset;
591 	  if (GET_CODE (tocrel_body) != SET)
592 	    return false;
593 	  /* There is an extra level of indirection for small/large
594 	     code models.  */
595 	  rtx tocrel_expr = SET_SRC (tocrel_body);
596 	  if (MEM_P (tocrel_expr))
597 	    tocrel_expr = XEXP (tocrel_expr, 0);
598 	  if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
599 	    return false;
600 	  split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
601 
602 	  if (!SYMBOL_REF_P (base) || !CONSTANT_POOL_ADDRESS_P (base))
603 	    return false;
604 	  else
605 	    {
606 	      /* FIXME: The conditions under which
607 	          (SYMBOL_REF_P (const_vector)
608 	           && !CONSTANT_POOL_ADDRESS_P (const_vector))
609 	         are not well understood.  This code prevents
610 	         an internal compiler error which will occur in
611 	         replace_swapped_load_constant () if we were to return
612 	         true.  Some day, we should figure out how to properly
613 	         handle this condition in
614 	         replace_swapped_load_constant () and then we can
615 	         remove this special test.  */
616 	      rtx const_vector = get_pool_constant (base);
617 	      if (SYMBOL_REF_P (const_vector)
618 		  && CONSTANT_POOL_ADDRESS_P (const_vector))
619 		const_vector = get_pool_constant (const_vector);
620 	      if (GET_CODE (const_vector) != CONST_VECTOR)
621 		return false;
622 	    }
623 	}
624     }
625   return true;
626 }
627 
628 /* Return TRUE iff OP matches a V2DF reduction pattern.  See the
629    definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md.  */
630 static bool
v2df_reduction_p(rtx op)631 v2df_reduction_p (rtx op)
632 {
633   if (GET_MODE (op) != V2DFmode)
634     return false;
635 
636   enum rtx_code code = GET_CODE (op);
637   if (code != PLUS && code != SMIN && code != SMAX)
638     return false;
639 
640   rtx concat = XEXP (op, 0);
641   if (GET_CODE (concat) != VEC_CONCAT)
642     return false;
643 
644   rtx select0 = XEXP (concat, 0);
645   rtx select1 = XEXP (concat, 1);
646   if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
647     return false;
648 
649   rtx reg0 = XEXP (select0, 0);
650   rtx reg1 = XEXP (select1, 0);
651   if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
652     return false;
653 
654   rtx parallel0 = XEXP (select0, 1);
655   rtx parallel1 = XEXP (select1, 1);
656   if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
657     return false;
658 
659   if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
660       || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
661     return false;
662 
663   return true;
664 }
665 
666 /* Return 1 iff OP is an operand that will not be affected by having
667    vector doublewords swapped in memory.  */
668 static unsigned int
rtx_is_swappable_p(rtx op,unsigned int * special)669 rtx_is_swappable_p (rtx op, unsigned int *special)
670 {
671   enum rtx_code code = GET_CODE (op);
672   int i, j;
673   rtx parallel;
674 
675   switch (code)
676     {
677     case LABEL_REF:
678     case SYMBOL_REF:
679     case CLOBBER:
680     case REG:
681       return 1;
682 
683     case VEC_CONCAT:
684     case ASM_INPUT:
685     case ASM_OPERANDS:
686       return 0;
687 
688     case CONST_VECTOR:
689       {
690 	*special = SH_CONST_VECTOR;
691 	return 1;
692       }
693 
694     case VEC_DUPLICATE:
695       /* Opportunity: If XEXP (op, 0) has the same mode as the result,
696 	 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
697 	 it represents a vector splat for which we can do special
698 	 handling.  */
699       if (CONST_INT_P (XEXP (op, 0)))
700 	return 1;
701       else if (REG_P (XEXP (op, 0))
702 	       && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
703 	/* This catches V2DF and V2DI splat, at a minimum.  */
704 	return 1;
705       else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
706 	       && REG_P (XEXP (XEXP (op, 0), 0))
707 	       && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
708 	/* This catches splat of a truncated value.  */
709 	return 1;
710       else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
711 	/* If the duplicated item is from a select, defer to the select
712 	   processing to see if we can change the lane for the splat.  */
713 	return rtx_is_swappable_p (XEXP (op, 0), special);
714       else
715 	return 0;
716 
717     case VEC_SELECT:
718       /* A vec_extract operation is ok if we change the lane.  */
719       if (REG_P (XEXP (op, 0))
720 	  && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
721 	  && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
722 	  && XVECLEN (parallel, 0) == 1
723 	  && CONST_INT_P (XVECEXP (parallel, 0, 0)))
724 	{
725 	  *special = SH_EXTRACT;
726 	  return 1;
727 	}
728       /* An XXPERMDI is ok if we adjust the lanes.  Note that if the
729 	 XXPERMDI is a swap operation, it will be identified by
730 	 insn_is_swap_p and therefore we won't get here.  */
731       else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
732 	       && (GET_MODE (XEXP (op, 0)) == V4DFmode
733 		   || GET_MODE (XEXP (op, 0)) == V4DImode)
734 	       && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
735 	       && XVECLEN (parallel, 0) == 2
736 	       && CONST_INT_P (XVECEXP (parallel, 0, 0))
737 	       && CONST_INT_P (XVECEXP (parallel, 0, 1)))
738 	{
739 	  *special = SH_XXPERMDI;
740 	  return 1;
741 	}
742       else if (v2df_reduction_p (op))
743 	return 1;
744       else
745 	return 0;
746 
747     case UNSPEC:
748       {
749 	/* Various operations are unsafe for this optimization, at least
750 	   without significant additional work.  Permutes are obviously
751 	   problematic, as both the permute control vector and the ordering
752 	   of the target values are invalidated by doubleword swapping.
753 	   Vector pack and unpack modify the number of vector lanes.
754 	   Merge-high/low will not operate correctly on swapped operands.
755 	   Vector shifts across element boundaries are clearly uncool,
756 	   as are vector select and concatenate operations.  Vector
757 	   sum-across instructions define one operand with a specific
758 	   order-dependent element, so additional fixup code would be
759 	   needed to make those work.  Vector set and non-immediate-form
760 	   vector splat are element-order sensitive.  A few of these
761 	   cases might be workable with special handling if required.
762 	   Adding cost modeling would be appropriate in some cases.  */
763 	int val = XINT (op, 1);
764 	switch (val)
765 	  {
766 	  default:
767 	    break;
768 	  case UNSPEC_VBPERMQ:
769 	  case UNSPEC_VPACK_SIGN_SIGN_SAT:
770 	  case UNSPEC_VPACK_SIGN_UNS_SAT:
771 	  case UNSPEC_VPACK_UNS_UNS_MOD:
772 	  case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
773 	  case UNSPEC_VPACK_UNS_UNS_SAT:
774 	  case UNSPEC_VPERM:
775 	  case UNSPEC_VPERM_UNS:
776 	  case UNSPEC_VPERMHI:
777 	  case UNSPEC_VPERMSI:
778 	  case UNSPEC_VPERMXOR:
779 	  case UNSPEC_VPKPX:
780 	  case UNSPEC_VSLDOI:
781 	  case UNSPEC_VSLO:
782 	  case UNSPEC_VSRO:
783 	  case UNSPEC_VSUM2SWS:
784 	  case UNSPEC_VSUM4S:
785 	  case UNSPEC_VSUM4UBS:
786 	  case UNSPEC_VSUMSWS:
787 	  case UNSPEC_VSUMSWS_DIRECT:
788 	  case UNSPEC_VSX_CONCAT:
789 	  case UNSPEC_VSX_CVDPSPN:
790 	  case UNSPEC_VSX_CVSPDP:
791 	  case UNSPEC_VSX_CVSPDPN:
792 	  case UNSPEC_VSX_EXTRACT:
793 	  case UNSPEC_VSX_SET:
794 	  case UNSPEC_VSX_SLDWI:
795 	  case UNSPEC_VSX_VSLO:
796 	  case UNSPEC_VUNPACK_HI_SIGN:
797 	  case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
798 	  case UNSPEC_VUNPACK_LO_SIGN:
799 	  case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
800 	  case UNSPEC_VUPKHPX:
801 	  case UNSPEC_VUPKHS_V4SF:
802 	  case UNSPEC_VUPKHU_V4SF:
803 	  case UNSPEC_VUPKLPX:
804 	  case UNSPEC_VUPKLS_V4SF:
805 	  case UNSPEC_VUPKLU_V4SF:
806 	    return 0;
807 	  case UNSPEC_VSPLT_DIRECT:
808 	  case UNSPEC_VSX_XXSPLTD:
809 	    *special = SH_SPLAT;
810 	    return 1;
811 	  case UNSPEC_REDUC_PLUS:
812 	  case UNSPEC_REDUC:
813 	    return 1;
814 	  case UNSPEC_VPMSUM:
815 	    /* vpmsumd is not swappable, but vpmsum[bhw] are.  */
816 	    if (GET_MODE (op) == V2DImode)
817 	      return 0;
818 	    break;
819 	  }
820       }
821 
822     default:
823       break;
824     }
825 
826   const char *fmt = GET_RTX_FORMAT (code);
827   int ok = 1;
828 
829   for (i = 0; i < GET_RTX_LENGTH (code); ++i)
830     if (fmt[i] == 'e' || fmt[i] == 'u')
831       {
832 	unsigned int special_op = SH_NONE;
833 	ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
834 	if (special_op == SH_NONE)
835 	  continue;
836 	/* Ensure we never have two kinds of special handling
837 	   for the same insn.  */
838 	if (*special != SH_NONE && *special != special_op)
839 	  return 0;
840 	*special = special_op;
841       }
842     else if (fmt[i] == 'E')
843       for (j = 0; j < XVECLEN (op, i); ++j)
844 	{
845 	  unsigned int special_op = SH_NONE;
846 	  ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
847 	  if (special_op == SH_NONE)
848 	    continue;
849 	  /* Ensure we never have two kinds of special handling
850 	     for the same insn.  */
851 	  if (*special != SH_NONE && *special != special_op)
852 	    return 0;
853 	  *special = special_op;
854 	}
855 
856   return ok;
857 }
858 
859 /* Return 1 iff INSN is an operand that will not be affected by
860    having vector doublewords swapped in memory (in which case
861    *SPECIAL is unchanged), or that can be modified to be correct
862    if vector doublewords are swapped in memory (in which case
863    *SPECIAL is changed to a value indicating how).  */
864 static unsigned int
insn_is_swappable_p(swap_web_entry * insn_entry,rtx insn,unsigned int * special)865 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
866 		     unsigned int *special)
867 {
868   /* Calls are always bad.  */
869   if (GET_CODE (insn) == CALL_INSN)
870     return 0;
871 
872   /* Loads and stores seen here are not permuting, but we can still
873      fix them up by converting them to permuting ones.  Exceptions:
874      UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
875      body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
876      for the SET source.  Also we must now make an exception for lvx
877      and stvx when they are not in the UNSPEC_LVX/STVX form (with the
878      explicit "& -16") since this leads to unrecognizable insns.  */
879   rtx body = PATTERN (insn);
880   int i = INSN_UID (insn);
881 
882   if (insn_entry[i].is_load)
883     {
884       if (GET_CODE (body) == SET)
885 	{
886 	  rtx rhs = SET_SRC (body);
887 	  /* Even without a swap, the RHS might be a vec_select for, say,
888 	     a byte-reversing load.  */
889 	  if (!MEM_P (rhs))
890 	    return 0;
891 	  if (GET_CODE (XEXP (rhs, 0)) == AND)
892 	    return 0;
893 
894 	  *special = SH_NOSWAP_LD;
895 	  return 1;
896 	}
897       else
898 	return 0;
899     }
900 
901   if (insn_entry[i].is_store)
902     {
903       if (GET_CODE (body) == SET
904 	  && GET_CODE (SET_SRC (body)) != UNSPEC
905 	  && GET_CODE (SET_SRC (body)) != VEC_SELECT)
906 	{
907 	  rtx lhs = SET_DEST (body);
908 	  /* Even without a swap, the RHS might be a vec_select for, say,
909 	     a byte-reversing store.  */
910 	  if (!MEM_P (lhs))
911 	    return 0;
912 	  if (GET_CODE (XEXP (lhs, 0)) == AND)
913 	    return 0;
914 
915 	  *special = SH_NOSWAP_ST;
916 	  return 1;
917 	}
918       else
919 	return 0;
920     }
921 
922   /* A convert to single precision can be left as is provided that
923      all of its uses are in xxspltw instructions that splat BE element
924      zero.  */
925   if (GET_CODE (body) == SET
926       && GET_CODE (SET_SRC (body)) == UNSPEC
927       && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
928     {
929       df_ref def;
930       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
931 
932       FOR_EACH_INSN_INFO_DEF (def, insn_info)
933 	{
934 	  struct df_link *link = DF_REF_CHAIN (def);
935 	  if (!link)
936 	    return 0;
937 
938 	  for (; link; link = link->next) {
939 	    rtx use_insn = DF_REF_INSN (link->ref);
940 	    rtx use_body = PATTERN (use_insn);
941 	    if (GET_CODE (use_body) != SET
942 		|| GET_CODE (SET_SRC (use_body)) != UNSPEC
943 		|| XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
944 		|| XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
945 	      return 0;
946 	  }
947 	}
948 
949       return 1;
950     }
951 
952   /* A concatenation of two doublewords is ok if we reverse the
953      order of the inputs.  */
954   if (GET_CODE (body) == SET
955       && GET_CODE (SET_SRC (body)) == VEC_CONCAT
956       && (GET_MODE (SET_SRC (body)) == V2DFmode
957 	  || GET_MODE (SET_SRC (body)) == V2DImode))
958     {
959       *special = SH_CONCAT;
960       return 1;
961     }
962 
963   /* V2DF reductions are always swappable.  */
964   if (GET_CODE (body) == PARALLEL)
965     {
966       rtx expr = XVECEXP (body, 0, 0);
967       if (GET_CODE (expr) == SET
968 	  && v2df_reduction_p (SET_SRC (expr)))
969 	return 1;
970     }
971 
972   /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
973      constant pool.  */
974   if (GET_CODE (body) == SET
975       && GET_CODE (SET_SRC (body)) == UNSPEC
976       && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
977       && XVECLEN (SET_SRC (body), 0) == 3
978       && REG_P (XVECEXP (SET_SRC (body), 0, 2)))
979     {
980       rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
981       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
982       df_ref use;
983       FOR_EACH_INSN_INFO_USE (use, insn_info)
984 	if (rtx_equal_p (DF_REF_REG (use), mask_reg))
985 	  {
986 	    struct df_link *def_link = DF_REF_CHAIN (use);
987 	    /* Punt if multiple definitions for this reg.  */
988 	    if (def_link && !def_link->next &&
989 		const_load_sequence_p (insn_entry,
990 				       DF_REF_INSN (def_link->ref)))
991 	      {
992 		*special = SH_VPERM;
993 		return 1;
994 	      }
995 	  }
996     }
997 
998   /* Otherwise check the operands for vector lane violations.  */
999   return rtx_is_swappable_p (body, special);
1000 }
1001 
1002 enum chain_purpose { FOR_LOADS, FOR_STORES };
1003 
1004 /* Return true if the UD or DU chain headed by LINK is non-empty,
1005    and every entry on the chain references an insn that is a
1006    register swap.  Furthermore, if PURPOSE is FOR_LOADS, each such
1007    register swap must have only permuting loads as reaching defs.
1008    If PURPOSE is FOR_STORES, each such register swap must have only
1009    register swaps or permuting stores as reached uses.  */
1010 static bool
chain_contains_only_swaps(swap_web_entry * insn_entry,struct df_link * link,enum chain_purpose purpose)1011 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
1012 			   enum chain_purpose purpose)
1013 {
1014   if (!link)
1015     return false;
1016 
1017   for (; link; link = link->next)
1018     {
1019       if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
1020 	continue;
1021 
1022       if (DF_REF_IS_ARTIFICIAL (link->ref))
1023 	return false;
1024 
1025       rtx reached_insn = DF_REF_INSN (link->ref);
1026       unsigned uid = INSN_UID (reached_insn);
1027       struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
1028 
1029       if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
1030 	  || insn_entry[uid].is_store)
1031 	return false;
1032 
1033       if (purpose == FOR_LOADS)
1034 	{
1035 	  df_ref use;
1036 	  FOR_EACH_INSN_INFO_USE (use, insn_info)
1037 	    {
1038 	      struct df_link *swap_link = DF_REF_CHAIN (use);
1039 
1040 	      while (swap_link)
1041 		{
1042 		  if (DF_REF_IS_ARTIFICIAL (link->ref))
1043 		    return false;
1044 
1045 		  rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
1046 		  unsigned uid2 = INSN_UID (swap_def_insn);
1047 
1048 		  /* Only permuting loads are allowed.  */
1049 		  if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
1050 		    return false;
1051 
1052 		  swap_link = swap_link->next;
1053 		}
1054 	    }
1055 	}
1056       else if (purpose == FOR_STORES)
1057 	{
1058 	  df_ref def;
1059 	  FOR_EACH_INSN_INFO_DEF (def, insn_info)
1060 	    {
1061 	      struct df_link *swap_link = DF_REF_CHAIN (def);
1062 
1063 	      while (swap_link)
1064 		{
1065 		  if (DF_REF_IS_ARTIFICIAL (link->ref))
1066 		    return false;
1067 
1068 		  rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
1069 		  unsigned uid2 = INSN_UID (swap_use_insn);
1070 
1071 		  /* Permuting stores or register swaps are allowed.  */
1072 		  if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
1073 		    return false;
1074 
1075 		  swap_link = swap_link->next;
1076 		}
1077 	    }
1078 	}
1079     }
1080 
1081   return true;
1082 }
1083 
1084 /* Mark the xxswapdi instructions associated with permuting loads and
1085    stores for removal.  Note that we only flag them for deletion here,
1086    as there is a possibility of a swap being reached from multiple
1087    loads, etc.  */
1088 static void
mark_swaps_for_removal(swap_web_entry * insn_entry,unsigned int i)1089 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
1090 {
1091   rtx insn = insn_entry[i].insn;
1092   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1093 
1094   if (insn_entry[i].is_load)
1095     {
1096       df_ref def;
1097       FOR_EACH_INSN_INFO_DEF (def, insn_info)
1098 	{
1099 	  struct df_link *link = DF_REF_CHAIN (def);
1100 
1101 	  /* We know by now that these are swaps, so we can delete
1102 	     them confidently.  */
1103 	  while (link)
1104 	    {
1105 	      rtx use_insn = DF_REF_INSN (link->ref);
1106 	      insn_entry[INSN_UID (use_insn)].will_delete = 1;
1107 	      link = link->next;
1108 	    }
1109 	}
1110     }
1111   else if (insn_entry[i].is_store)
1112     {
1113       df_ref use;
1114       FOR_EACH_INSN_INFO_USE (use, insn_info)
1115 	{
1116 	  /* Ignore uses for addressability.  */
1117 	  machine_mode mode = GET_MODE (DF_REF_REG (use));
1118 	  if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1119 	    continue;
1120 
1121 	  struct df_link *link = DF_REF_CHAIN (use);
1122 
1123 	  /* We know by now that these are swaps, so we can delete
1124 	     them confidently.  */
1125 	  while (link)
1126 	    {
1127 	      rtx def_insn = DF_REF_INSN (link->ref);
1128 	      insn_entry[INSN_UID (def_insn)].will_delete = 1;
1129 	      link = link->next;
1130 	    }
1131 	}
1132     }
1133 }
1134 
1135 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1136    Swap the first half of the vector with the second in the first
1137    case.  Recurse to find it in the second.  */
1138 static void
swap_const_vector_halves(rtx * op_ptr)1139 swap_const_vector_halves (rtx *op_ptr)
1140 {
1141   int i;
1142   rtx op = *op_ptr;
1143   enum rtx_code code = GET_CODE (op);
1144   if (GET_CODE (op) == CONST_VECTOR)
1145     {
1146       int units = GET_MODE_NUNITS (GET_MODE (op));
1147       rtx_vector_builder builder (GET_MODE (op), units, 1);
1148       for (i = 0; i < units / 2; ++i)
1149 	builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
1150       for (i = 0; i < units / 2; ++i)
1151 	builder.quick_push (CONST_VECTOR_ELT (op, i));
1152       *op_ptr = builder.build ();
1153     }
1154   else
1155     {
1156       int j;
1157       const char *fmt = GET_RTX_FORMAT (code);
1158       for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1159 	if (fmt[i] == 'e' || fmt[i] == 'u')
1160 	  swap_const_vector_halves (&XEXP (op, i));
1161 	else if (fmt[i] == 'E')
1162 	  for (j = 0; j < XVECLEN (op, i); ++j)
1163 	    swap_const_vector_halves (&XVECEXP (op, i, j));
1164     }
1165 }
1166 
1167 /* Find all subregs of a vector expression that perform a narrowing,
1168    and adjust the subreg index to account for doubleword swapping.  */
1169 static void
adjust_subreg_index(rtx op)1170 adjust_subreg_index (rtx op)
1171 {
1172   enum rtx_code code = GET_CODE (op);
1173   if (code == SUBREG
1174       && (GET_MODE_SIZE (GET_MODE (op))
1175 	  < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
1176     {
1177       unsigned int index = SUBREG_BYTE (op);
1178       if (index < 8)
1179 	index += 8;
1180       else
1181 	index -= 8;
1182       SUBREG_BYTE (op) = index;
1183     }
1184 
1185   const char *fmt = GET_RTX_FORMAT (code);
1186   int i,j;
1187   for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1188     if (fmt[i] == 'e' || fmt[i] == 'u')
1189       adjust_subreg_index (XEXP (op, i));
1190     else if (fmt[i] == 'E')
1191       for (j = 0; j < XVECLEN (op, i); ++j)
1192 	adjust_subreg_index (XVECEXP (op, i, j));
1193 }
1194 
1195 /* Convert the non-permuting load INSN to a permuting one.  */
1196 static void
permute_load(rtx_insn * insn)1197 permute_load (rtx_insn *insn)
1198 {
1199   rtx body = PATTERN (insn);
1200   rtx mem_op = SET_SRC (body);
1201   rtx tgt_reg = SET_DEST (body);
1202   machine_mode mode = GET_MODE (tgt_reg);
1203   int n_elts = GET_MODE_NUNITS (mode);
1204   int half_elts = n_elts / 2;
1205   rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1206   int i, j;
1207   for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1208     XVECEXP (par, 0, i) = GEN_INT (j);
1209   for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1210     XVECEXP (par, 0, i) = GEN_INT (j);
1211   rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
1212   SET_SRC (body) = sel;
1213   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1214   df_insn_rescan (insn);
1215 
1216   if (dump_file)
1217     fprintf (dump_file, "Replacing load %d with permuted load\n",
1218 	     INSN_UID (insn));
1219 }
1220 
1221 /* Convert the non-permuting store INSN to a permuting one.  */
1222 static void
permute_store(rtx_insn * insn)1223 permute_store (rtx_insn *insn)
1224 {
1225   rtx body = PATTERN (insn);
1226   rtx src_reg = SET_SRC (body);
1227   machine_mode mode = GET_MODE (src_reg);
1228   int n_elts = GET_MODE_NUNITS (mode);
1229   int half_elts = n_elts / 2;
1230   rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1231   int i, j;
1232   for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1233     XVECEXP (par, 0, i) = GEN_INT (j);
1234   for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1235     XVECEXP (par, 0, i) = GEN_INT (j);
1236   rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
1237   SET_SRC (body) = sel;
1238   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1239   df_insn_rescan (insn);
1240 
1241   if (dump_file)
1242     fprintf (dump_file, "Replacing store %d with permuted store\n",
1243 	     INSN_UID (insn));
1244 }
1245 
1246 /* Given OP that contains a vector extract operation, adjust the index
1247    of the extracted lane to account for the doubleword swap.  */
1248 static void
adjust_extract(rtx_insn * insn)1249 adjust_extract (rtx_insn *insn)
1250 {
1251   rtx pattern = PATTERN (insn);
1252   if (GET_CODE (pattern) == PARALLEL)
1253     pattern = XVECEXP (pattern, 0, 0);
1254   rtx src = SET_SRC (pattern);
1255   /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1256      account for that.  */
1257   rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
1258   rtx par = XEXP (sel, 1);
1259   int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
1260   int lane = INTVAL (XVECEXP (par, 0, 0));
1261   lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1262   XVECEXP (par, 0, 0) = GEN_INT (lane);
1263   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1264   df_insn_rescan (insn);
1265 
1266   if (dump_file)
1267     fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
1268 }
1269 
1270 /* Given OP that contains a vector direct-splat operation, adjust the index
1271    of the source lane to account for the doubleword swap.  */
1272 static void
adjust_splat(rtx_insn * insn)1273 adjust_splat (rtx_insn *insn)
1274 {
1275   rtx body = PATTERN (insn);
1276   rtx unspec = XEXP (body, 1);
1277   int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
1278   int lane = INTVAL (XVECEXP (unspec, 0, 1));
1279   lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1280   XVECEXP (unspec, 0, 1) = GEN_INT (lane);
1281   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1282   df_insn_rescan (insn);
1283 
1284   if (dump_file)
1285     fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
1286 }
1287 
1288 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1289    swap), reverse the order of the source operands and adjust the indices
1290    of the source lanes to account for doubleword reversal.  */
1291 static void
adjust_xxpermdi(rtx_insn * insn)1292 adjust_xxpermdi (rtx_insn *insn)
1293 {
1294   rtx set = PATTERN (insn);
1295   rtx select = XEXP (set, 1);
1296   rtx concat = XEXP (select, 0);
1297   rtx src0 = XEXP (concat, 0);
1298   XEXP (concat, 0) = XEXP (concat, 1);
1299   XEXP (concat, 1) = src0;
1300   rtx parallel = XEXP (select, 1);
1301   int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
1302   int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
1303   int new_lane0 = 3 - lane1;
1304   int new_lane1 = 3 - lane0;
1305   XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
1306   XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
1307   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1308   df_insn_rescan (insn);
1309 
1310   if (dump_file)
1311     fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
1312 }
1313 
1314 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1315    reverse the order of those inputs.  */
1316 static void
adjust_concat(rtx_insn * insn)1317 adjust_concat (rtx_insn *insn)
1318 {
1319   rtx set = PATTERN (insn);
1320   rtx concat = XEXP (set, 1);
1321   rtx src0 = XEXP (concat, 0);
1322   XEXP (concat, 0) = XEXP (concat, 1);
1323   XEXP (concat, 1) = src0;
1324   INSN_CODE (insn) = -1; /* Force re-recognition.  */
1325   df_insn_rescan (insn);
1326 
1327   if (dump_file)
1328     fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
1329 }
1330 
1331 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1332    constant pool to reflect swapped doublewords.  */
1333 static void
adjust_vperm(rtx_insn * insn)1334 adjust_vperm (rtx_insn *insn)
1335 {
1336   /* We previously determined that the UNSPEC_VPERM was fed by a
1337      swap of a swapping load of a TOC-relative constant pool symbol.
1338      Find the MEM in the swapping load and replace it with a MEM for
1339      the adjusted mask constant.  */
1340   rtx set = PATTERN (insn);
1341   rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
1342 
1343   /* Find the swap.  */
1344   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1345   df_ref use;
1346   rtx_insn *swap_insn = 0;
1347   FOR_EACH_INSN_INFO_USE (use, insn_info)
1348     if (rtx_equal_p (DF_REF_REG (use), mask_reg))
1349       {
1350 	struct df_link *def_link = DF_REF_CHAIN (use);
1351 	gcc_assert (def_link && !def_link->next);
1352 	swap_insn = DF_REF_INSN (def_link->ref);
1353 	break;
1354       }
1355   gcc_assert (swap_insn);
1356 
1357   /* Find the load.  */
1358   insn_info = DF_INSN_INFO_GET (swap_insn);
1359   rtx_insn *load_insn = 0;
1360   FOR_EACH_INSN_INFO_USE (use, insn_info)
1361     {
1362       struct df_link *def_link = DF_REF_CHAIN (use);
1363       gcc_assert (def_link && !def_link->next);
1364       load_insn = DF_REF_INSN (def_link->ref);
1365       break;
1366     }
1367   gcc_assert (load_insn);
1368 
1369   /* Find the TOC-relative symbol access.  */
1370   insn_info = DF_INSN_INFO_GET (load_insn);
1371   rtx_insn *tocrel_insn = 0;
1372   FOR_EACH_INSN_INFO_USE (use, insn_info)
1373     {
1374       struct df_link *def_link = DF_REF_CHAIN (use);
1375       gcc_assert (def_link && !def_link->next);
1376       tocrel_insn = DF_REF_INSN (def_link->ref);
1377       break;
1378     }
1379   gcc_assert (tocrel_insn);
1380 
1381   /* Find the embedded CONST_VECTOR.  We have to call toc_relative_expr_p
1382      to set tocrel_base; otherwise it would be unnecessary as we've
1383      already established it will return true.  */
1384   rtx base, offset;
1385   const_rtx tocrel_base;
1386   rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1387   /* There is an extra level of indirection for small/large code models.  */
1388   if (MEM_P (tocrel_expr))
1389     tocrel_expr = XEXP (tocrel_expr, 0);
1390   if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1391     gcc_unreachable ();
1392   split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1393   rtx const_vector = get_pool_constant (base);
1394   /* With the extra indirection, get_pool_constant will produce the
1395      real constant from the reg_equal expression, so get the real
1396      constant.  */
1397   if (SYMBOL_REF_P (const_vector))
1398     const_vector = get_pool_constant (const_vector);
1399   gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1400 
1401   /* Create an adjusted mask from the initial mask.  */
1402   unsigned int new_mask[16], i, val;
1403   for (i = 0; i < 16; ++i) {
1404     val = INTVAL (XVECEXP (const_vector, 0, i));
1405     if (val < 16)
1406       new_mask[i] = (val + 8) % 16;
1407     else
1408       new_mask[i] = ((val + 8) % 16) + 16;
1409   }
1410 
1411   /* Create a new CONST_VECTOR and a MEM that references it.  */
1412   rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
1413   for (i = 0; i < 16; ++i)
1414     XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
1415   rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
1416   rtx new_mem = force_const_mem (V16QImode, new_const_vector);
1417   /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1418      can't recognize.  Force the SYMBOL_REF into a register.  */
1419   if (!REG_P (XEXP (new_mem, 0))) {
1420     rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1421     XEXP (new_mem, 0) = base_reg;
1422     /* Move the newly created insn ahead of the load insn.  */
1423     rtx_insn *force_insn = get_last_insn ();
1424     remove_insn (force_insn);
1425     rtx_insn *before_load_insn = PREV_INSN (load_insn);
1426     add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1427     df_insn_rescan (before_load_insn);
1428     df_insn_rescan (force_insn);
1429   }
1430 
1431   /* Replace the MEM in the load instruction and rescan it.  */
1432   XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1433   INSN_CODE (load_insn) = -1; /* Force re-recognition.  */
1434   df_insn_rescan (load_insn);
1435 
1436   if (dump_file)
1437     fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
1438 }
1439 
1440 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1441    with special handling.  Take care of that here.  */
1442 static void
handle_special_swappables(swap_web_entry * insn_entry,unsigned i)1443 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
1444 {
1445   rtx_insn *insn = insn_entry[i].insn;
1446   rtx body = PATTERN (insn);
1447 
1448   switch (insn_entry[i].special_handling)
1449     {
1450     default:
1451       gcc_unreachable ();
1452     case SH_CONST_VECTOR:
1453       {
1454 	/* A CONST_VECTOR will only show up somewhere in the RHS of a SET.  */
1455 	gcc_assert (GET_CODE (body) == SET);
1456 	swap_const_vector_halves (&SET_SRC (body));
1457 	if (dump_file)
1458 	  fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
1459 	break;
1460       }
1461     case SH_SUBREG:
1462       /* A subreg of the same size is already safe.  For subregs that
1463 	 select a smaller portion of a reg, adjust the index for
1464 	 swapped doublewords.  */
1465       adjust_subreg_index (body);
1466       if (dump_file)
1467 	fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
1468       break;
1469     case SH_NOSWAP_LD:
1470       /* Convert a non-permuting load to a permuting one.  */
1471       permute_load (insn);
1472       break;
1473     case SH_NOSWAP_ST:
1474       /* Convert a non-permuting store to a permuting one.  */
1475       permute_store (insn);
1476       break;
1477     case SH_EXTRACT:
1478       /* Change the lane on an extract operation.  */
1479       adjust_extract (insn);
1480       break;
1481     case SH_SPLAT:
1482       /* Change the lane on a direct-splat operation.  */
1483       adjust_splat (insn);
1484       break;
1485     case SH_XXPERMDI:
1486       /* Change the lanes on an XXPERMDI operation.  */
1487       adjust_xxpermdi (insn);
1488       break;
1489     case SH_CONCAT:
1490       /* Reverse the order of a concatenation operation.  */
1491       adjust_concat (insn);
1492       break;
1493     case SH_VPERM:
1494       /* Change the mask loaded from the constant pool for a VPERM.  */
1495       adjust_vperm (insn);
1496       break;
1497     }
1498 }
1499 
1500 /* Find the insn from the Ith table entry, which is known to be a
1501    register swap Y = SWAP(X).  Replace it with a copy Y = X.  */
1502 static void
replace_swap_with_copy(swap_web_entry * insn_entry,unsigned i)1503 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
1504 {
1505   rtx_insn *insn = insn_entry[i].insn;
1506   rtx body = PATTERN (insn);
1507   rtx src_reg = XEXP (SET_SRC (body), 0);
1508   rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
1509   rtx_insn *new_insn = emit_insn_before (copy, insn);
1510   set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
1511   df_insn_rescan (new_insn);
1512 
1513   if (dump_file)
1514     {
1515       unsigned int new_uid = INSN_UID (new_insn);
1516       fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
1517     }
1518 
1519   df_insn_delete (insn);
1520   remove_insn (insn);
1521   insn->set_deleted ();
1522 }
1523 
1524 /* INSN is known to contain a SUBREG, which we can normally handle,
1525    but if the SUBREG itself contains a MULT then we need to leave it alone
1526    to avoid turning a mult_hipart into a mult_lopart, for example.  */
1527 static bool
has_part_mult(rtx_insn * insn)1528 has_part_mult (rtx_insn *insn)
1529 {
1530   rtx body = PATTERN (insn);
1531   if (GET_CODE (body) != SET)
1532     return false;
1533   rtx src = SET_SRC (body);
1534   if (GET_CODE (src) != SUBREG)
1535     return false;
1536   rtx inner = XEXP (src, 0);
1537   return (GET_CODE (inner) == MULT);
1538 }
1539 
1540 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1541    ORIGINAL_MEM_EXP.  */
1542 static void
mimic_memory_attributes_and_flags(rtx new_mem_exp,const_rtx original_mem_exp)1543 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
1544 {
1545   RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
1546   RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
1547   RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
1548   RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
1549   RTX_FLAG (new_mem_exp, frame_related) =
1550     RTX_FLAG (original_mem_exp, frame_related);
1551 
1552   /* The following fields may not be used with MEM subexpressions */
1553   RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
1554   RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
1555 
1556   struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
1557 
1558   alias_set_type set = original_attrs.alias;
1559   set_mem_alias_set (new_mem_exp, set);
1560 
1561   addr_space_t addrspace = original_attrs.addrspace;
1562   set_mem_addr_space (new_mem_exp, addrspace);
1563 
1564   unsigned int align = original_attrs.align;
1565   set_mem_align (new_mem_exp, align);
1566 
1567   tree expr = original_attrs.expr;
1568   set_mem_expr (new_mem_exp, expr);
1569 
1570   if (original_attrs.offset_known_p)
1571     {
1572       HOST_WIDE_INT offset = original_attrs.offset;
1573       set_mem_offset (new_mem_exp, offset);
1574     }
1575   else
1576     clear_mem_offset (new_mem_exp);
1577 
1578   if (original_attrs.size_known_p)
1579     {
1580       HOST_WIDE_INT size = original_attrs.size;
1581       set_mem_size (new_mem_exp, size);
1582     }
1583   else
1584     clear_mem_size (new_mem_exp);
1585 }
1586 
1587 /* Generate an rtx expression to represent use of the stvx insn to store
1588    the value represented by register SRC_EXP into the memory at address
1589    DEST_EXP, with vector mode MODE.  */
1590 rtx
rs6000_gen_stvx(enum machine_mode mode,rtx dest_exp,rtx src_exp)1591 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1592 {
1593   rtx stvx;
1594 
1595   if (mode == V16QImode)
1596     stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
1597   else if (mode == V8HImode)
1598     stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
1599 #ifdef HAVE_V8HFmode
1600   else if (mode == V8HFmode)
1601     stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
1602 #endif
1603   else if (mode == V4SImode)
1604     stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
1605   else if (mode == V4SFmode)
1606     stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp);
1607   else if (mode == V2DImode)
1608     stvx = gen_altivec_stvx_v2di (src_exp, dest_exp);
1609   else if (mode == V2DFmode)
1610     stvx = gen_altivec_stvx_v2df (src_exp, dest_exp);
1611   else if (mode == V1TImode)
1612     stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp);
1613   else
1614     /* KFmode, TFmode, other modes not expected in this context.  */
1615     gcc_unreachable ();
1616 
1617   rtx new_mem_exp = SET_DEST (PATTERN (stvx));
1618   mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
1619   return stvx;
1620 }
1621 
1622 /* Given that STORE_INSN represents an aligned store-with-swap of a
1623    swapped value, replace the store with an aligned store (without
1624    swap) and replace the swap with a copy insn.  */
1625 static void
replace_swapped_aligned_store(swap_web_entry * insn_entry,rtx_insn * store_insn)1626 replace_swapped_aligned_store (swap_web_entry *insn_entry,
1627 			       rtx_insn *store_insn)
1628 {
1629   unsigned uid = INSN_UID (store_insn);
1630   gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
1631 
1632   rtx body = PATTERN (store_insn);
1633   rtx dest_address = XEXP (SET_DEST (body), 0);
1634   rtx swap_reg = XEXP (SET_SRC (body), 0);
1635   gcc_assert (REG_P (dest_address)
1636 	      || rs6000_sum_of_two_registers_p (dest_address));
1637 
1638   /* Find the swap instruction that provides the value to be stored by
1639    * this store-with-swap instruction. */
1640   struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
1641   df_ref use;
1642   rtx_insn *swap_insn = NULL;
1643   unsigned uid2 = 0;
1644   FOR_EACH_INSN_INFO_USE (use, insn_info)
1645     {
1646       struct df_link *def_link = DF_REF_CHAIN (use);
1647 
1648       /* if this is not the definition of the candidate swap register,
1649 	 then skip it.  I am only interested in the swap insnd.  */
1650       if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
1651 	continue;
1652 
1653       /* If there is no def or the def is artifical or there are
1654 	 multiple defs, we should not be here.  */
1655       gcc_assert (def_link && def_link->ref && !def_link->next
1656 		  && !DF_REF_IS_ARTIFICIAL (def_link->ref));
1657 
1658       swap_insn = DF_REF_INSN (def_link->ref);
1659       uid2 = INSN_UID (swap_insn);
1660 
1661       /* If this source value is not a simple swap, we should not be here.  */
1662       gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
1663 		  && !insn_entry[uid2].is_store);
1664 
1665       /* We've processed the use we care about, so break out of
1666 	 this loop.  */
1667       break;
1668     }
1669 
1670   /* At this point, swap_insn and uid2 represent the swap instruction
1671      that feeds the store.  */
1672   gcc_assert (swap_insn);
1673   rtx set = single_set (store_insn);
1674   gcc_assert (set);
1675   rtx dest_exp = SET_DEST (set);
1676   rtx src_exp = XEXP (SET_SRC (body), 0);
1677   enum machine_mode mode = GET_MODE (dest_exp);
1678   gcc_assert (MEM_P (dest_exp));
1679   gcc_assert (MEM_ALIGN (dest_exp) >= 128);
1680 
1681   /* Replace the copy with a new insn.  */
1682   rtx stvx;
1683   stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
1684 
1685   rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
1686   rtx new_body = PATTERN (new_insn);
1687 
1688   gcc_assert ((GET_CODE (new_body) == SET)
1689 	      && MEM_P (SET_DEST (new_body)));
1690 
1691   basic_block bb = BLOCK_FOR_INSN (store_insn);
1692   set_block_for_insn (new_insn, bb);
1693   /* Handle REG_EH_REGION note.  */
1694   if (cfun->can_throw_non_call_exceptions && BB_END (bb) == store_insn)
1695     {
1696       rtx note = find_reg_note (store_insn, REG_EH_REGION, NULL_RTX);
1697       if (note)
1698 	add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0));
1699     }
1700   df_insn_rescan (new_insn);
1701 
1702   df_insn_delete (store_insn);
1703   remove_insn (store_insn);
1704   store_insn->set_deleted ();
1705 
1706   /* Replace the swap with a copy.  */
1707   uid2 = INSN_UID (swap_insn);
1708   mark_swaps_for_removal (insn_entry, uid2);
1709   replace_swap_with_copy (insn_entry, uid2);
1710 }
1711 
1712 /* Generate an rtx expression to represent use of the lvx insn to load
1713    from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1714 rtx
rs6000_gen_lvx(enum machine_mode mode,rtx dest_exp,rtx src_exp)1715 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1716 {
1717   rtx lvx;
1718 
1719   if (mode == V16QImode)
1720     lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
1721   else if (mode == V8HImode)
1722     lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
1723 #ifdef HAVE_V8HFmode
1724   else if (mode == V8HFmode)
1725     lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
1726 #endif
1727   else if (mode == V4SImode)
1728     lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
1729   else if (mode == V4SFmode)
1730     lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp);
1731   else if (mode == V2DImode)
1732     lvx = gen_altivec_lvx_v2di (dest_exp, src_exp);
1733   else if (mode == V2DFmode)
1734     lvx = gen_altivec_lvx_v2df (dest_exp, src_exp);
1735   else if (mode == V1TImode)
1736     lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp);
1737   else
1738     /* KFmode, TFmode, other modes not expected in this context.  */
1739     gcc_unreachable ();
1740 
1741   rtx new_mem_exp = SET_SRC (PATTERN (lvx));
1742   mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
1743 
1744   return lvx;
1745 }
1746 
1747 /* Given that SWAP_INSN represents a swap of an aligned
1748    load-with-swap, replace the load with an aligned load (without
1749    swap) and replace the swap with a copy insn.  */
1750 static void
replace_swapped_aligned_load(swap_web_entry * insn_entry,rtx swap_insn)1751 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
1752 {
1753   /* Find the load.  */
1754   unsigned uid = INSN_UID (swap_insn);
1755   /* Only call this if quad_aligned_load_p (swap_insn).  */
1756   gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
1757   struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1758 
1759   /* Since insn is known to represent a swap instruction, we know it
1760      "uses" only one input variable.  */
1761   df_ref use = DF_INSN_INFO_USES (insn_info);
1762 
1763   /* Figure out where this input variable is defined.  */
1764   struct df_link *def_link = DF_REF_CHAIN (use);
1765   gcc_assert (def_link && !def_link->next);
1766   gcc_assert (def_link && def_link->ref &&
1767 	      !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
1768 
1769   rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
1770   unsigned uid2 = INSN_UID (def_insn);
1771 
1772   /* We're expecting a load-with-swap insn.  */
1773   gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
1774 
1775   /* We expect this to be a set to memory, with source representing a
1776      swap (indicated by code VEC_SELECT).  */
1777   rtx body = PATTERN (def_insn);
1778   gcc_assert ((GET_CODE (body) == SET)
1779 	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT
1780 		  || pattern_is_rotate64 (body))
1781 	      && MEM_P (XEXP (SET_SRC (body), 0)));
1782 
1783   rtx src_exp = XEXP (SET_SRC (body), 0);
1784   enum machine_mode mode = GET_MODE (src_exp);
1785   rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
1786 
1787   rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
1788   rtx new_body = PATTERN (new_insn);
1789 
1790   gcc_assert ((GET_CODE (new_body) == SET)
1791 	      && MEM_P (SET_SRC (new_body)));
1792 
1793   basic_block bb = BLOCK_FOR_INSN (def_insn);
1794   set_block_for_insn (new_insn, bb);
1795   /* Handle REG_EH_REGION note.  */
1796   if (cfun->can_throw_non_call_exceptions && BB_END (bb) == def_insn)
1797     {
1798       rtx note = find_reg_note (def_insn, REG_EH_REGION, NULL_RTX);
1799       if (note)
1800 	add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0));
1801     }
1802   df_insn_rescan (new_insn);
1803 
1804   df_insn_delete (def_insn);
1805   remove_insn (def_insn);
1806   def_insn->set_deleted ();
1807 
1808   /* Replace the swap with a copy.  */
1809   mark_swaps_for_removal (insn_entry, uid);
1810   replace_swap_with_copy (insn_entry, uid);
1811 }
1812 
1813 /* Given that SWAP_INSN represents a swap of a load of a constant
1814    vector value, replace with a single instruction that loads a
1815    swapped variant of the original constant.
1816 
1817    The "natural" representation of a byte array in memory is the same
1818    for big endian and little endian.
1819 
1820    unsigned char byte_array[] =
1821      { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1822 
1823    However, when loaded into a vector register, the representation
1824    depends on endian conventions.
1825 
1826    In big-endian mode, the register holds:
1827 
1828      MSB                                            LSB
1829      [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1830 
1831    In little-endian mode, the register holds:
1832 
1833      MSB                                            LSB
1834      [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1835 
1836    Word arrays require different handling.  Consider the word array:
1837 
1838    unsigned int word_array[] =
1839      { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1840 
1841    The in-memory representation depends on endian configuration.  The
1842    equivalent array, declared as a byte array, in memory would be:
1843 
1844    unsigned char big_endian_word_array_data[] =
1845      { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1846 
1847    unsigned char little_endian_word_array_data[] =
1848      { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1849 
1850    In big-endian mode, the register holds:
1851 
1852      MSB                                            LSB
1853      [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1854 
1855    In little-endian mode, the register holds:
1856 
1857      MSB                                            LSB
1858      [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1859 
1860 
1861   Similar transformations apply to the vector of half-word and vector
1862   of double-word representations.
1863 
1864   For now, don't handle vectors of quad-precision values.  Just return.
1865   A better solution is to fix the code generator to emit lvx/stvx for
1866   those.  */
1867 static void
replace_swapped_load_constant(swap_web_entry * insn_entry,rtx swap_insn)1868 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
1869 {
1870   /* Find the load.  */
1871   struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1872   rtx_insn *load_insn;
1873   df_ref use  = DF_INSN_INFO_USES (insn_info);
1874   struct df_link *def_link = DF_REF_CHAIN (use);
1875   gcc_assert (def_link && !def_link->next);
1876 
1877   load_insn = DF_REF_INSN (def_link->ref);
1878   gcc_assert (load_insn);
1879 
1880   /* Find the TOC-relative symbol access.  */
1881   insn_info = DF_INSN_INFO_GET (load_insn);
1882   use = DF_INSN_INFO_USES (insn_info);
1883 
1884   def_link = DF_REF_CHAIN (use);
1885   gcc_assert (def_link && !def_link->next);
1886 
1887   rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref);
1888   gcc_assert (tocrel_insn);
1889 
1890   /* Find the embedded CONST_VECTOR.  We have to call toc_relative_expr_p
1891      to set tocrel_base; otherwise it would be unnecessary as we've
1892      already established it will return true.  */
1893   rtx base, offset;
1894   rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1895   const_rtx tocrel_base;
1896 
1897   /* There is an extra level of indirection for small/large code models.  */
1898   if (MEM_P (tocrel_expr))
1899     tocrel_expr = XEXP (tocrel_expr, 0);
1900 
1901   if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1902     gcc_unreachable ();
1903 
1904   split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1905   rtx const_vector = get_pool_constant (base);
1906 
1907   /* With the extra indirection, get_pool_constant will produce the
1908      real constant from the reg_equal expression, so get the real
1909      constant.  */
1910   if (SYMBOL_REF_P (const_vector))
1911     const_vector = get_pool_constant (const_vector);
1912   gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1913 
1914   rtx new_mem;
1915   enum machine_mode mode = GET_MODE (const_vector);
1916 
1917   /* Create an adjusted constant from the original constant.  */
1918   if (mode == V1TImode)
1919     /* Leave this code as is.  */
1920     return;
1921   else if (mode == V16QImode)
1922     {
1923       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
1924       int i;
1925 
1926       for (i = 0; i < 16; i++)
1927 	XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
1928       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1929       new_mem = force_const_mem (mode, new_const_vector);
1930     }
1931   else if ((mode == V8HImode)
1932 #ifdef HAVE_V8HFmode
1933 	   || (mode == V8HFmode)
1934 #endif
1935 	   )
1936     {
1937       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
1938       int i;
1939 
1940       for (i = 0; i < 8; i++)
1941 	XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
1942       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1943       new_mem = force_const_mem (mode, new_const_vector);
1944     }
1945   else if ((mode == V4SImode) || (mode == V4SFmode))
1946     {
1947       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
1948       int i;
1949 
1950       for (i = 0; i < 4; i++)
1951 	XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
1952       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1953       new_mem = force_const_mem (mode, new_const_vector);
1954     }
1955   else if ((mode == V2DImode) || (mode == V2DFmode))
1956     {
1957       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
1958       int i;
1959 
1960       for (i = 0; i < 2; i++)
1961 	XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
1962       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1963       new_mem = force_const_mem (mode, new_const_vector);
1964     }
1965   else
1966     {
1967       /* We do not expect other modes to be constant-load-swapped.  */
1968       gcc_unreachable ();
1969     }
1970 
1971   /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1972      can't recognize.  Force the SYMBOL_REF into a register.  */
1973   if (!REG_P (XEXP (new_mem, 0))) {
1974     rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1975     XEXP (new_mem, 0) = base_reg;
1976 
1977     /* Move the newly created insn ahead of the load insn.  */
1978     /* The last insn is the insn that forced new_mem into a register.  */
1979     rtx_insn *force_insn = get_last_insn ();
1980     /* Remove this insn from the end of the instruction sequence.  */
1981     remove_insn (force_insn);
1982     rtx_insn *before_load_insn = PREV_INSN (load_insn);
1983 
1984     /* And insert this insn back into the sequence before the previous
1985        load insn so this new expression will be available when the
1986        existing load is modified to load the swapped constant.  */
1987     add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1988     df_insn_rescan (before_load_insn);
1989     df_insn_rescan (force_insn);
1990   }
1991 
1992   /* Replace the MEM in the load instruction and rescan it.  */
1993   XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1994   INSN_CODE (load_insn) = -1; /* Force re-recognition.  */
1995   df_insn_rescan (load_insn);
1996 
1997   unsigned int uid = INSN_UID (swap_insn);
1998   mark_swaps_for_removal (insn_entry, uid);
1999   replace_swap_with_copy (insn_entry, uid);
2000 }
2001 
2002 /* Dump the swap table to DUMP_FILE.  */
2003 static void
dump_swap_insn_table(swap_web_entry * insn_entry)2004 dump_swap_insn_table (swap_web_entry *insn_entry)
2005 {
2006   int e = get_max_uid ();
2007   fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
2008 
2009   for (int i = 0; i < e; ++i)
2010     if (insn_entry[i].is_relevant)
2011       {
2012 	swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
2013 	fprintf (dump_file, "%6d %6d  ", i,
2014 		 pred_entry && pred_entry->insn
2015 		 ? INSN_UID (pred_entry->insn) : 0);
2016 	if (insn_entry[i].is_load)
2017 	  fputs ("load ", dump_file);
2018 	if (insn_entry[i].is_store)
2019 	  fputs ("store ", dump_file);
2020 	if (insn_entry[i].is_swap)
2021 	  fputs ("swap ", dump_file);
2022 	if (insn_entry[i].is_live_in)
2023 	  fputs ("live-in ", dump_file);
2024 	if (insn_entry[i].is_live_out)
2025 	  fputs ("live-out ", dump_file);
2026 	if (insn_entry[i].contains_subreg)
2027 	  fputs ("subreg ", dump_file);
2028 	if (insn_entry[i].is_128_int)
2029 	  fputs ("int128 ", dump_file);
2030 	if (insn_entry[i].is_call)
2031 	  fputs ("call ", dump_file);
2032 	if (insn_entry[i].is_swappable)
2033 	  {
2034 	    fputs ("swappable ", dump_file);
2035 	    if (insn_entry[i].special_handling == SH_CONST_VECTOR)
2036 	      fputs ("special:constvec ", dump_file);
2037 	    else if (insn_entry[i].special_handling == SH_SUBREG)
2038 	      fputs ("special:subreg ", dump_file);
2039 	    else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
2040 	      fputs ("special:load ", dump_file);
2041 	    else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
2042 	      fputs ("special:store ", dump_file);
2043 	    else if (insn_entry[i].special_handling == SH_EXTRACT)
2044 	      fputs ("special:extract ", dump_file);
2045 	    else if (insn_entry[i].special_handling == SH_SPLAT)
2046 	      fputs ("special:splat ", dump_file);
2047 	    else if (insn_entry[i].special_handling == SH_XXPERMDI)
2048 	      fputs ("special:xxpermdi ", dump_file);
2049 	    else if (insn_entry[i].special_handling == SH_CONCAT)
2050 	      fputs ("special:concat ", dump_file);
2051 	    else if (insn_entry[i].special_handling == SH_VPERM)
2052 	      fputs ("special:vperm ", dump_file);
2053 	  }
2054 	if (insn_entry[i].web_not_optimizable)
2055 	  fputs ("unoptimizable ", dump_file);
2056 	if (insn_entry[i].will_delete)
2057 	  fputs ("delete ", dump_file);
2058 	fputs ("\n", dump_file);
2059       }
2060   fputs ("\n", dump_file);
2061 }
2062 
2063 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2064    Here RTX is an (& addr (const_int -16)).  Always return a new copy
2065    to avoid problems with combine.  */
2066 static rtx
alignment_with_canonical_addr(rtx align)2067 alignment_with_canonical_addr (rtx align)
2068 {
2069   rtx canon;
2070   rtx addr = XEXP (align, 0);
2071 
2072   if (REG_P (addr))
2073     canon = addr;
2074 
2075   else if (GET_CODE (addr) == PLUS)
2076     {
2077       rtx addrop0 = XEXP (addr, 0);
2078       rtx addrop1 = XEXP (addr, 1);
2079 
2080       if (!REG_P (addrop0))
2081 	addrop0 = force_reg (GET_MODE (addrop0), addrop0);
2082 
2083       if (!REG_P (addrop1))
2084 	addrop1 = force_reg (GET_MODE (addrop1), addrop1);
2085 
2086       canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
2087     }
2088 
2089   else
2090     canon = force_reg (GET_MODE (addr), addr);
2091 
2092   return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
2093 }
2094 
2095 /* Check whether an rtx is an alignment mask, and if so, return
2096    a fully-expanded rtx for the masking operation.  */
2097 static rtx
alignment_mask(rtx_insn * insn)2098 alignment_mask (rtx_insn *insn)
2099 {
2100   rtx body = PATTERN (insn);
2101 
2102   if (GET_CODE (body) != SET
2103       || GET_CODE (SET_SRC (body)) != AND
2104       || !REG_P (XEXP (SET_SRC (body), 0)))
2105     return 0;
2106 
2107   rtx mask = XEXP (SET_SRC (body), 1);
2108 
2109   if (CONST_INT_P (mask))
2110     {
2111       if (INTVAL (mask) == -16)
2112 	return alignment_with_canonical_addr (SET_SRC (body));
2113       else
2114 	return 0;
2115     }
2116 
2117   if (!REG_P (mask))
2118     return 0;
2119 
2120   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2121   df_ref use;
2122   rtx real_mask = 0;
2123 
2124   FOR_EACH_INSN_INFO_USE (use, insn_info)
2125     {
2126       if (!rtx_equal_p (DF_REF_REG (use), mask))
2127 	continue;
2128 
2129       struct df_link *def_link = DF_REF_CHAIN (use);
2130       if (!def_link || def_link->next)
2131 	return 0;
2132 
2133       rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
2134       rtx const_body = PATTERN (const_insn);
2135       if (GET_CODE (const_body) != SET)
2136 	return 0;
2137 
2138       real_mask = SET_SRC (const_body);
2139 
2140       if (!CONST_INT_P (real_mask)
2141 	  || INTVAL (real_mask) != -16)
2142 	return 0;
2143     }
2144 
2145   if (real_mask == 0)
2146     return 0;
2147 
2148   return alignment_with_canonical_addr (SET_SRC (body));
2149 }
2150 
2151 /* Given INSN that's a load or store based at BASE_REG, check if
2152    all of its feeding computations align its address on a 16-byte
2153    boundary.  If so, return true and add all definition insns into
2154    AND_INSNS and their corresponding fully-expanded rtxes for the
2155    masking operations into AND_OPS.  */
2156 
2157 static bool
find_alignment_op(rtx_insn * insn,rtx base_reg,vec<rtx_insn * > * and_insns,vec<rtx> * and_ops)2158 find_alignment_op (rtx_insn *insn, rtx base_reg, vec<rtx_insn *> *and_insns,
2159 		   vec<rtx> *and_ops)
2160 {
2161   df_ref base_use;
2162   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2163   rtx and_operation = 0;
2164 
2165   FOR_EACH_INSN_INFO_USE (base_use, insn_info)
2166     {
2167       if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
2168 	continue;
2169 
2170       struct df_link *base_def_link = DF_REF_CHAIN (base_use);
2171       if (!base_def_link)
2172 	return false;
2173 
2174       while (base_def_link)
2175 	{
2176 	  /* With stack-protector code enabled, and possibly in other
2177 	     circumstances, there may not be an associated insn for
2178 	     the def.  */
2179 	  if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
2180 	    return false;
2181 
2182 	  rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
2183 	  and_operation = alignment_mask (and_insn);
2184 
2185 	  /* Stop if we find any one which doesn't align.  */
2186 	  if (!and_operation)
2187 	    return false;
2188 
2189 	  and_insns->safe_push (and_insn);
2190 	  and_ops->safe_push (and_operation);
2191 	  base_def_link = base_def_link->next;
2192 	}
2193     }
2194 
2195   return and_operation;
2196 }
2197 
2198 struct del_info { bool replace; rtx_insn *replace_insn; };
2199 
2200 /* If INSN is the load for an lvx pattern, put it in canonical form.  */
2201 static void
recombine_lvx_pattern(rtx_insn * insn,del_info * to_delete)2202 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
2203 {
2204   rtx body = PATTERN (insn);
2205   gcc_assert (GET_CODE (body) == SET
2206 	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT
2207 		  || pattern_is_rotate64 (body))
2208 	      && MEM_P (XEXP (SET_SRC (body), 0)));
2209 
2210   rtx mem = XEXP (SET_SRC (body), 0);
2211   rtx base_reg = XEXP (mem, 0);
2212 
2213   auto_vec<rtx_insn *> and_insns;
2214   auto_vec<rtx> and_ops;
2215   bool is_any_def_and
2216     = find_alignment_op (insn, base_reg, &and_insns, &and_ops);
2217 
2218   if (is_any_def_and)
2219     {
2220       gcc_assert (and_insns.length () == and_ops.length ());
2221       df_ref def;
2222       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2223       FOR_EACH_INSN_INFO_DEF (def, insn_info)
2224 	{
2225 	  struct df_link *link = DF_REF_CHAIN (def);
2226 	  if (!link || link->next)
2227 	    break;
2228 
2229 	  rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2230 	  if (!insn_is_swap_p (swap_insn)
2231 	      || insn_is_load_p (swap_insn)
2232 	      || insn_is_store_p (swap_insn))
2233 	    break;
2234 
2235 	  /* Expected lvx pattern found.  Change the swap to
2236 	     a copy, and propagate the AND operation into the
2237 	     load.  */
2238 	  to_delete[INSN_UID (swap_insn)].replace = true;
2239 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2240 
2241 	  rtx new_reg = 0;
2242 	  rtx and_mask = 0;
2243 	  for (unsigned i = 0; i < and_insns.length (); i++)
2244 	    {
2245 	      /* However, first we must be sure that we make the
2246 		 base register from the AND operation available
2247 		 in case the register has been overwritten.  Copy
2248 		 the base register to a new pseudo and use that
2249 		 as the base register of the AND operation in
2250 		 the new LVX instruction.  */
2251 	      rtx_insn *and_insn = and_insns[i];
2252 	      rtx and_op = and_ops[i];
2253 	      rtx and_base = XEXP (and_op, 0);
2254 	      if (!new_reg)
2255 		{
2256 		  new_reg = gen_reg_rtx (GET_MODE (and_base));
2257 		  and_mask = XEXP (and_op, 1);
2258 		}
2259 	      rtx copy = gen_rtx_SET (new_reg, and_base);
2260 	      rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2261 	      set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2262 	      df_insn_rescan (new_insn);
2263 	    }
2264 
2265 	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
2266 	  SET_SRC (body) = mem;
2267 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
2268 	  df_insn_rescan (insn);
2269 
2270 	  if (dump_file)
2271 	    fprintf (dump_file, "lvx opportunity found at %d\n",
2272 		     INSN_UID (insn));
2273 	}
2274     }
2275 }
2276 
2277 /* If INSN is the store for an stvx pattern, put it in canonical form.  */
2278 static void
recombine_stvx_pattern(rtx_insn * insn,del_info * to_delete)2279 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
2280 {
2281   rtx body = PATTERN (insn);
2282   gcc_assert (GET_CODE (body) == SET
2283 	      && MEM_P (SET_DEST (body))
2284 	      && (GET_CODE (SET_SRC (body)) == VEC_SELECT
2285 		  || pattern_is_rotate64 (body)));
2286   rtx mem = SET_DEST (body);
2287   rtx base_reg = XEXP (mem, 0);
2288 
2289   auto_vec<rtx_insn *> and_insns;
2290   auto_vec<rtx> and_ops;
2291   bool is_any_def_and
2292     = find_alignment_op (insn, base_reg, &and_insns, &and_ops);
2293 
2294   if (is_any_def_and)
2295     {
2296       gcc_assert (and_insns.length () == and_ops.length ());
2297       rtx src_reg = XEXP (SET_SRC (body), 0);
2298       df_ref src_use;
2299       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2300       FOR_EACH_INSN_INFO_USE (src_use, insn_info)
2301 	{
2302 	  if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
2303 	    continue;
2304 
2305 	  struct df_link *link = DF_REF_CHAIN (src_use);
2306 	  if (!link || link->next)
2307 	    break;
2308 
2309 	  rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2310 	  if (!insn_is_swap_p (swap_insn)
2311 	      || insn_is_load_p (swap_insn)
2312 	      || insn_is_store_p (swap_insn))
2313 	    break;
2314 
2315 	  /* Expected stvx pattern found.  Change the swap to
2316 	     a copy, and propagate the AND operation into the
2317 	     store.  */
2318 	  to_delete[INSN_UID (swap_insn)].replace = true;
2319 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2320 
2321 	  rtx new_reg = 0;
2322 	  rtx and_mask = 0;
2323 	  for (unsigned i = 0; i < and_insns.length (); i++)
2324 	    {
2325 	      /* However, first we must be sure that we make the
2326 		 base register from the AND operation available
2327 		 in case the register has been overwritten.  Copy
2328 		 the base register to a new pseudo and use that
2329 		 as the base register of the AND operation in
2330 		 the new STVX instruction.  */
2331 	      rtx_insn *and_insn = and_insns[i];
2332 	      rtx and_op = and_ops[i];
2333 	      rtx and_base = XEXP (and_op, 0);
2334 	      if (!new_reg)
2335 		{
2336 		  new_reg = gen_reg_rtx (GET_MODE (and_base));
2337 		  and_mask = XEXP (and_op, 1);
2338 		}
2339 	      rtx copy = gen_rtx_SET (new_reg, and_base);
2340 	      rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2341 	      set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2342 	      df_insn_rescan (new_insn);
2343 	    }
2344 
2345 	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
2346 	  SET_SRC (body) = src_reg;
2347 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
2348 	  df_insn_rescan (insn);
2349 
2350 	  if (dump_file)
2351 	    fprintf (dump_file, "stvx opportunity found at %d\n",
2352 		     INSN_UID (insn));
2353 	}
2354     }
2355 }
2356 
2357 /* Look for patterns created from builtin lvx and stvx calls, and
2358    canonicalize them to be properly recognized as such.  */
2359 static void
recombine_lvx_stvx_patterns(function * fun)2360 recombine_lvx_stvx_patterns (function *fun)
2361 {
2362   int i;
2363   basic_block bb;
2364   rtx_insn *insn;
2365 
2366   int num_insns = get_max_uid ();
2367   del_info *to_delete = XCNEWVEC (del_info, num_insns);
2368 
2369   FOR_ALL_BB_FN (bb, fun)
2370     FOR_BB_INSNS (bb, insn)
2371     {
2372       if (!NONDEBUG_INSN_P (insn))
2373 	continue;
2374 
2375       if (insn_is_load_p (insn) && insn_is_swap_p (insn))
2376 	recombine_lvx_pattern (insn, to_delete);
2377       else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
2378 	recombine_stvx_pattern (insn, to_delete);
2379     }
2380 
2381   /* Turning swaps into copies is delayed until now, to avoid problems
2382      with deleting instructions during the insn walk.  */
2383   for (i = 0; i < num_insns; i++)
2384     if (to_delete[i].replace)
2385       {
2386 	rtx swap_body = PATTERN (to_delete[i].replace_insn);
2387 	rtx src_reg = XEXP (SET_SRC (swap_body), 0);
2388 	rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
2389 	rtx_insn *new_insn = emit_insn_before (copy,
2390 					       to_delete[i].replace_insn);
2391 	set_block_for_insn (new_insn,
2392 			    BLOCK_FOR_INSN (to_delete[i].replace_insn));
2393 	df_insn_rescan (new_insn);
2394 	df_insn_delete (to_delete[i].replace_insn);
2395 	remove_insn (to_delete[i].replace_insn);
2396 	to_delete[i].replace_insn->set_deleted ();
2397       }
2398 
2399   free (to_delete);
2400 }
2401 
2402 /* Main entry point for this pass.  */
2403 unsigned int
rs6000_analyze_swaps(function * fun)2404 rs6000_analyze_swaps (function *fun)
2405 {
2406   swap_web_entry *insn_entry;
2407   basic_block bb;
2408   rtx_insn *insn, *curr_insn = 0;
2409 
2410   /* Dataflow analysis for use-def chains.  */
2411   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2412   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2413   df_analyze ();
2414   df_set_flags (DF_DEFER_INSN_RESCAN);
2415 
2416   /* Pre-pass to recombine lvx and stvx patterns so we don't lose info.  */
2417   recombine_lvx_stvx_patterns (fun);
2418 
2419   /* Rebuild ud- and du-chains.  */
2420   df_remove_problem (df_chain);
2421   df_process_deferred_rescans ();
2422   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2423   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2424   df_analyze ();
2425   df_set_flags (DF_DEFER_INSN_RESCAN);
2426 
2427   /* Allocate structure to represent webs of insns.  */
2428   insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2429 
2430   /* Walk the insns to gather basic data.  */
2431   FOR_ALL_BB_FN (bb, fun)
2432     FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2433     {
2434       unsigned int uid = INSN_UID (insn);
2435       if (NONDEBUG_INSN_P (insn))
2436 	{
2437 	  insn_entry[uid].insn = insn;
2438 
2439 	  if (GET_CODE (insn) == CALL_INSN)
2440 	    insn_entry[uid].is_call = 1;
2441 
2442 	  /* Walk the uses and defs to see if we mention vector regs.
2443 	     Record any constraints on optimization of such mentions.  */
2444 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2445 	  df_ref mention;
2446 	  FOR_EACH_INSN_INFO_USE (mention, insn_info)
2447 	    {
2448 	      /* We use DF_REF_REAL_REG here to get inside any subregs.  */
2449 	      machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2450 
2451 	      /* If a use gets its value from a call insn, it will be
2452 		 a hard register and will look like (reg:V4SI 3 3).
2453 		 The df analysis creates two mentions for GPR3 and GPR4,
2454 		 both DImode.  We must recognize this and treat it as a
2455 		 vector mention to ensure the call is unioned with this
2456 		 use.  */
2457 	      if (mode == DImode && DF_REF_INSN_INFO (mention))
2458 		{
2459 		  rtx feeder = DF_REF_INSN (mention);
2460 		  /* FIXME:  It is pretty hard to get from the df mention
2461 		     to the mode of the use in the insn.  We arbitrarily
2462 		     pick a vector mode here, even though the use might
2463 		     be a real DImode.  We can be too conservative
2464 		     (create a web larger than necessary) because of
2465 		     this, so consider eventually fixing this.  */
2466 		  if (GET_CODE (feeder) == CALL_INSN)
2467 		    mode = V4SImode;
2468 		}
2469 
2470 	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2471 		{
2472 		  insn_entry[uid].is_relevant = 1;
2473 		  if (mode == TImode || mode == V1TImode
2474 		      || FLOAT128_VECTOR_P (mode))
2475 		    insn_entry[uid].is_128_int = 1;
2476 		  if (DF_REF_INSN_INFO (mention))
2477 		    insn_entry[uid].contains_subreg
2478 		      = !rtx_equal_p (DF_REF_REG (mention),
2479 				      DF_REF_REAL_REG (mention));
2480 		  union_defs (insn_entry, insn, mention);
2481 		}
2482 	    }
2483 	  FOR_EACH_INSN_INFO_DEF (mention, insn_info)
2484 	    {
2485 	      /* We use DF_REF_REAL_REG here to get inside any subregs.  */
2486 	      machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2487 
2488 	      /* If we're loading up a hard vector register for a call,
2489 		 it looks like (set (reg:V4SI 9 9) (...)).  The df
2490 		 analysis creates two mentions for GPR9 and GPR10, both
2491 		 DImode.  So relying on the mode from the mentions
2492 		 isn't sufficient to ensure we union the call into the
2493 		 web with the parameter setup code.  */
2494 	      if (mode == DImode && GET_CODE (insn) == SET
2495 		  && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
2496 		mode = GET_MODE (SET_DEST (insn));
2497 
2498 	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2499 		{
2500 		  insn_entry[uid].is_relevant = 1;
2501 		  if (mode == TImode || mode == V1TImode
2502 		      || FLOAT128_VECTOR_P (mode))
2503 		    insn_entry[uid].is_128_int = 1;
2504 		  if (DF_REF_INSN_INFO (mention))
2505 		    insn_entry[uid].contains_subreg
2506 		      = !rtx_equal_p (DF_REF_REG (mention),
2507 				      DF_REF_REAL_REG (mention));
2508 		  /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2509 		  else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
2510 		    insn_entry[uid].is_live_out = 1;
2511 		  union_uses (insn_entry, insn, mention);
2512 		}
2513 	    }
2514 
2515 	  if (insn_entry[uid].is_relevant)
2516 	    {
2517 	      /* Determine if this is a load or store.  */
2518 	      insn_entry[uid].is_load = insn_is_load_p (insn);
2519 	      insn_entry[uid].is_store = insn_is_store_p (insn);
2520 
2521 	      /* Determine if this is a doubleword swap.  If not,
2522 		 determine whether it can legally be swapped.  */
2523 	      if (insn_is_swap_p (insn))
2524 		insn_entry[uid].is_swap = 1;
2525 	      else
2526 		{
2527 		  unsigned int special = SH_NONE;
2528 		  insn_entry[uid].is_swappable
2529 		    = insn_is_swappable_p (insn_entry, insn, &special);
2530 		  if (special != SH_NONE && insn_entry[uid].contains_subreg)
2531 		    insn_entry[uid].is_swappable = 0;
2532 		  else if (special != SH_NONE)
2533 		    insn_entry[uid].special_handling = special;
2534 		  else if (insn_entry[uid].contains_subreg
2535 			   && has_part_mult (insn))
2536 		    insn_entry[uid].is_swappable = 0;
2537 		  else if (insn_entry[uid].contains_subreg)
2538 		    insn_entry[uid].special_handling = SH_SUBREG;
2539 		}
2540 	    }
2541 	}
2542     }
2543 
2544   if (dump_file)
2545     {
2546       fprintf (dump_file, "\nSwap insn entry table when first built\n");
2547       dump_swap_insn_table (insn_entry);
2548     }
2549 
2550   /* Record unoptimizable webs.  */
2551   unsigned e = get_max_uid (), i;
2552   for (i = 0; i < e; ++i)
2553     {
2554       if (!insn_entry[i].is_relevant)
2555 	continue;
2556 
2557       swap_web_entry *root
2558 	= (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
2559 
2560       if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
2561 	  || (insn_entry[i].contains_subreg
2562 	      && insn_entry[i].special_handling != SH_SUBREG)
2563 	  || insn_entry[i].is_128_int || insn_entry[i].is_call
2564 	  || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
2565 	root->web_not_optimizable = 1;
2566 
2567       /* If we have loads or stores that aren't permuting then the
2568 	 optimization isn't appropriate.  */
2569       else if ((insn_entry[i].is_load || insn_entry[i].is_store)
2570 	  && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
2571 	root->web_not_optimizable = 1;
2572 
2573       /* If we have a swap that is both fed by a permuting load
2574 	 and a feeder of a permuting store, then the optimization
2575 	 isn't appropriate.  (Consider vec_xl followed by vec_xst_be.)  */
2576       else if (insn_entry[i].is_swap && !insn_entry[i].is_load
2577 	       && !insn_entry[i].is_store
2578 	       && swap_feeds_both_load_and_store (&insn_entry[i]))
2579 	root->web_not_optimizable = 1;
2580 
2581       /* If we have permuting loads or stores that are not accompanied
2582 	 by a register swap, the optimization isn't appropriate.  */
2583       else if (insn_entry[i].is_load && insn_entry[i].is_swap)
2584 	{
2585 	  rtx insn = insn_entry[i].insn;
2586 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2587 	  df_ref def;
2588 
2589 	  FOR_EACH_INSN_INFO_DEF (def, insn_info)
2590 	    {
2591 	      struct df_link *link = DF_REF_CHAIN (def);
2592 
2593 	      if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
2594 		{
2595 		  root->web_not_optimizable = 1;
2596 		  break;
2597 		}
2598 	    }
2599 	}
2600       else if (insn_entry[i].is_store && insn_entry[i].is_swap)
2601 	{
2602 	  rtx insn = insn_entry[i].insn;
2603 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2604 	  df_ref use;
2605 
2606 	  FOR_EACH_INSN_INFO_USE (use, insn_info)
2607 	    {
2608 	      struct df_link *link = DF_REF_CHAIN (use);
2609 
2610 	      if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
2611 		{
2612 		  root->web_not_optimizable = 1;
2613 		  break;
2614 		}
2615 	    }
2616 	}
2617     }
2618 
2619   if (dump_file)
2620     {
2621       fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
2622       dump_swap_insn_table (insn_entry);
2623     }
2624 
2625   /* For each load and store in an optimizable web (which implies
2626      the loads and stores are permuting), find the associated
2627      register swaps and mark them for removal.  Due to various
2628      optimizations we may mark the same swap more than once.  Also
2629      perform special handling for swappable insns that require it.  */
2630   for (i = 0; i < e; ++i)
2631     if ((insn_entry[i].is_load || insn_entry[i].is_store)
2632 	&& insn_entry[i].is_swap)
2633       {
2634 	swap_web_entry* root_entry
2635 	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2636 	if (!root_entry->web_not_optimizable)
2637 	  mark_swaps_for_removal (insn_entry, i);
2638       }
2639     else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
2640       {
2641 	swap_web_entry* root_entry
2642 	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2643 	if (!root_entry->web_not_optimizable)
2644 	  handle_special_swappables (insn_entry, i);
2645       }
2646 
2647   /* Now delete the swaps marked for removal.  */
2648   for (i = 0; i < e; ++i)
2649     if (insn_entry[i].will_delete)
2650       replace_swap_with_copy (insn_entry, i);
2651 
2652   /* Clean up.  */
2653   free (insn_entry);
2654 
2655   /* Use a second pass over rtl to detect that certain vector values
2656      fetched from or stored to memory on quad-word aligned addresses
2657      can use lvx/stvx without swaps.  */
2658 
2659   /* First, rebuild ud chains.  */
2660   df_remove_problem (df_chain);
2661   df_process_deferred_rescans ();
2662   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2663   df_chain_add_problem (DF_UD_CHAIN);
2664   df_analyze ();
2665 
2666   swap_web_entry *pass2_insn_entry;
2667   pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2668 
2669   /* Walk the insns to gather basic data.  */
2670   FOR_ALL_BB_FN (bb, fun)
2671     FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2672     {
2673       unsigned int uid = INSN_UID (insn);
2674       if (NONDEBUG_INSN_P (insn))
2675 	{
2676 	  pass2_insn_entry[uid].insn = insn;
2677 
2678 	  pass2_insn_entry[uid].is_relevant = 1;
2679 	  pass2_insn_entry[uid].is_load = insn_is_load_p (insn);
2680 	  pass2_insn_entry[uid].is_store = insn_is_store_p (insn);
2681 
2682 	  /* Determine if this is a doubleword swap.  If not,
2683 	     determine whether it can legally be swapped.  */
2684 	  if (insn_is_swap_p (insn))
2685 	    pass2_insn_entry[uid].is_swap = 1;
2686 	}
2687     }
2688 
2689   e = get_max_uid ();
2690   for (unsigned i = 0; i < e; ++i)
2691     if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
2692 	&& !pass2_insn_entry[i].is_store)
2693       {
2694 	/* Replace swap of aligned load-swap with aligned unswapped
2695 	   load.  */
2696 	rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2697 	if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
2698 	  replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
2699       }
2700     else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
2701       {
2702 	/* Replace aligned store-swap of swapped value with aligned
2703 	   unswapped store.  */
2704 	rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2705 	if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
2706 	  replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
2707       }
2708 
2709   /* Clean up.  */
2710   free (pass2_insn_entry);
2711 
2712   /* Use a third pass over rtl to replace swap(load(vector constant))
2713      with load(swapped vector constant).  */
2714 
2715   /* First, rebuild ud chains.  */
2716   df_remove_problem (df_chain);
2717   df_process_deferred_rescans ();
2718   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2719   df_chain_add_problem (DF_UD_CHAIN);
2720   df_analyze ();
2721 
2722   swap_web_entry *pass3_insn_entry;
2723   pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2724 
2725   /* Walk the insns to gather basic data.  */
2726   FOR_ALL_BB_FN (bb, fun)
2727     FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2728     {
2729       unsigned int uid = INSN_UID (insn);
2730       if (NONDEBUG_INSN_P (insn))
2731 	{
2732 	  pass3_insn_entry[uid].insn = insn;
2733 
2734 	  pass3_insn_entry[uid].is_relevant = 1;
2735 	  pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
2736 	  pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
2737 
2738 	  /* Determine if this is a doubleword swap.  If not,
2739 	     determine whether it can legally be swapped.  */
2740 	  if (insn_is_swap_p (insn))
2741 	    pass3_insn_entry[uid].is_swap = 1;
2742 	}
2743     }
2744 
2745   e = get_max_uid ();
2746   for (unsigned i = 0; i < e; ++i)
2747     if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
2748 	&& !pass3_insn_entry[i].is_store)
2749       {
2750 	insn = pass3_insn_entry[i].insn;
2751 	if (const_load_sequence_p (pass3_insn_entry, insn))
2752 	  replace_swapped_load_constant (pass3_insn_entry, insn);
2753       }
2754 
2755   /* Clean up.  */
2756   free (pass3_insn_entry);
2757   return 0;
2758 }
2759 
2760 const pass_data pass_data_analyze_swaps =
2761 {
2762   RTL_PASS, /* type */
2763   "swaps", /* name */
2764   OPTGROUP_NONE, /* optinfo_flags */
2765   TV_NONE, /* tv_id */
2766   0, /* properties_required */
2767   0, /* properties_provided */
2768   0, /* properties_destroyed */
2769   0, /* todo_flags_start */
2770   TODO_df_finish, /* todo_flags_finish */
2771 };
2772 
2773 class pass_analyze_swaps : public rtl_opt_pass
2774 {
2775 public:
pass_analyze_swaps(gcc::context * ctxt)2776   pass_analyze_swaps(gcc::context *ctxt)
2777     : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
2778   {}
2779 
2780   /* opt_pass methods: */
gate(function *)2781   virtual bool gate (function *)
2782     {
2783       return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
2784 	      && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
2785     }
2786 
execute(function * fun)2787   virtual unsigned int execute (function *fun)
2788     {
2789       return rs6000_analyze_swaps (fun);
2790     }
2791 
clone()2792   opt_pass *clone ()
2793     {
2794       return new pass_analyze_swaps (m_ctxt);
2795     }
2796 
2797 }; // class pass_analyze_swaps
2798 
2799 rtl_opt_pass *
make_pass_analyze_swaps(gcc::context * ctxt)2800 make_pass_analyze_swaps (gcc::context *ctxt)
2801 {
2802   return new pass_analyze_swaps (ctxt);
2803 }
2804 
2805