xref: /dflybsd-src/contrib/gcc-8.0/gcc/ree.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Redundant Extension Elimination pass for the GNU compiler.
2*38fd1498Szrj    Copyright (C) 2010-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4*38fd1498Szrj 
5*38fd1498Szrj    Based on the Redundant Zero-extension elimination pass contributed by
6*38fd1498Szrj    Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7*38fd1498Szrj 
8*38fd1498Szrj This file is part of GCC.
9*38fd1498Szrj 
10*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
11*38fd1498Szrj the terms of the GNU General Public License as published by the Free
12*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
13*38fd1498Szrj version.
14*38fd1498Szrj 
15*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
17*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18*38fd1498Szrj for more details.
19*38fd1498Szrj 
20*38fd1498Szrj You should have received a copy of the GNU General Public License
21*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
22*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
23*38fd1498Szrj 
24*38fd1498Szrj 
25*38fd1498Szrj /* Problem Description :
26*38fd1498Szrj    --------------------
27*38fd1498Szrj    This pass is intended to remove redundant extension instructions.
28*38fd1498Szrj    Such instructions appear for different reasons.  We expect some of
29*38fd1498Szrj    them due to implicit zero-extension in 64-bit registers after writing
30*38fd1498Szrj    to their lower 32-bit half (e.g. for the x86-64 architecture).
31*38fd1498Szrj    Another possible reason is a type cast which follows a load (for
32*38fd1498Szrj    instance a register restore) and which can be combined into a single
33*38fd1498Szrj    instruction, and for which earlier local passes, e.g. the combiner,
34*38fd1498Szrj    weren't able to optimize.
35*38fd1498Szrj 
36*38fd1498Szrj    How does this pass work  ?
37*38fd1498Szrj    --------------------------
38*38fd1498Szrj 
39*38fd1498Szrj    This pass is run after register allocation.  Hence, all registers that
40*38fd1498Szrj    this pass deals with are hard registers.  This pass first looks for an
41*38fd1498Szrj    extension instruction that could possibly be redundant.  Such extension
42*38fd1498Szrj    instructions show up in RTL with the pattern  :
43*38fd1498Szrj    (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44*38fd1498Szrj    where x can be any hard register.
45*38fd1498Szrj    Now, this pass tries to eliminate this instruction by merging the
46*38fd1498Szrj    extension with the definitions of register x.  For instance, if
47*38fd1498Szrj    one of the definitions of register x was  :
48*38fd1498Szrj    (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49*38fd1498Szrj    followed by extension  :
50*38fd1498Szrj    (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51*38fd1498Szrj    then the combination converts this into :
52*38fd1498Szrj    (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53*38fd1498Szrj    If all the merged definitions are recognizable assembly instructions,
54*38fd1498Szrj    the extension is effectively eliminated.
55*38fd1498Szrj 
56*38fd1498Szrj    For example, for the x86-64 architecture, implicit zero-extensions
57*38fd1498Szrj    are captured with appropriate patterns in the i386.md file.  Hence,
58*38fd1498Szrj    these merged definition can be matched to a single assembly instruction.
59*38fd1498Szrj    The original extension instruction is then deleted if all the
60*38fd1498Szrj    definitions can be merged.
61*38fd1498Szrj 
62*38fd1498Szrj    However, there are cases where the definition instruction cannot be
63*38fd1498Szrj    merged with an extension.  Examples are CALL instructions.  In such
64*38fd1498Szrj    cases, the original extension is not redundant and this pass does
65*38fd1498Szrj    not delete it.
66*38fd1498Szrj 
67*38fd1498Szrj    Handling conditional moves :
68*38fd1498Szrj    ----------------------------
69*38fd1498Szrj 
70*38fd1498Szrj    Architectures like x86-64 support conditional moves whose semantics for
71*38fd1498Szrj    extension differ from the other instructions.  For instance, the
72*38fd1498Szrj    instruction *cmov ebx, eax*
73*38fd1498Szrj    zero-extends eax onto rax only when the move from ebx to eax happens.
74*38fd1498Szrj    Otherwise, eax may not be zero-extended.  Consider conditional moves as
75*38fd1498Szrj    RTL instructions of the form
76*38fd1498Szrj    (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77*38fd1498Szrj    This pass tries to merge an extension with a conditional move by
78*38fd1498Szrj    actually merging the definitions of y and z with an extension and then
79*38fd1498Szrj    converting the conditional move into :
80*38fd1498Szrj    (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81*38fd1498Szrj    Since registers y and z are extended, register x will also be extended
82*38fd1498Szrj    after the conditional move.  Note that this step has to be done
83*38fd1498Szrj    transitively since the definition of a conditional copy can be
84*38fd1498Szrj    another conditional copy.
85*38fd1498Szrj 
86*38fd1498Szrj    Motivating Example I :
87*38fd1498Szrj    ---------------------
88*38fd1498Szrj    For this program :
89*38fd1498Szrj    **********************************************
90*38fd1498Szrj    bad_code.c
91*38fd1498Szrj 
92*38fd1498Szrj    int mask[1000];
93*38fd1498Szrj 
94*38fd1498Szrj    int foo(unsigned x)
95*38fd1498Szrj    {
96*38fd1498Szrj      if (x < 10)
97*38fd1498Szrj        x = x * 45;
98*38fd1498Szrj      else
99*38fd1498Szrj        x = x * 78;
100*38fd1498Szrj      return mask[x];
101*38fd1498Szrj    }
102*38fd1498Szrj    **********************************************
103*38fd1498Szrj 
104*38fd1498Szrj    $ gcc -O2 bad_code.c
105*38fd1498Szrj      ........
106*38fd1498Szrj      400315:       b8 4e 00 00 00          mov    $0x4e,%eax
107*38fd1498Szrj      40031a:       0f af f8                imul   %eax,%edi
108*38fd1498Szrj      40031d:       89 ff                   mov    %edi,%edi - useless extension
109*38fd1498Szrj      40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
110*38fd1498Szrj      400326:       c3                      retq
111*38fd1498Szrj      ......
112*38fd1498Szrj      400330:       ba 2d 00 00 00          mov    $0x2d,%edx
113*38fd1498Szrj      400335:       0f af fa                imul   %edx,%edi
114*38fd1498Szrj      400338:       89 ff                   mov    %edi,%edi - useless extension
115*38fd1498Szrj      40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
116*38fd1498Szrj      400341:       c3                      retq
117*38fd1498Szrj 
118*38fd1498Szrj    $ gcc -O2 -free bad_code.c
119*38fd1498Szrj      ......
120*38fd1498Szrj      400315:       6b ff 4e                imul   $0x4e,%edi,%edi
121*38fd1498Szrj      400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
122*38fd1498Szrj      40031f:       c3                      retq
123*38fd1498Szrj      400320:       6b ff 2d                imul   $0x2d,%edi,%edi
124*38fd1498Szrj      400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
125*38fd1498Szrj      40032a:       c3                      retq
126*38fd1498Szrj 
127*38fd1498Szrj    Motivating Example II :
128*38fd1498Szrj    ---------------------
129*38fd1498Szrj 
130*38fd1498Szrj    Here is an example with a conditional move.
131*38fd1498Szrj 
132*38fd1498Szrj    For this program :
133*38fd1498Szrj    **********************************************
134*38fd1498Szrj 
135*38fd1498Szrj    unsigned long long foo(unsigned x , unsigned y)
136*38fd1498Szrj    {
137*38fd1498Szrj      unsigned z;
138*38fd1498Szrj      if (x > 100)
139*38fd1498Szrj        z = x + y;
140*38fd1498Szrj      else
141*38fd1498Szrj        z = x - y;
142*38fd1498Szrj      return (unsigned long long)(z);
143*38fd1498Szrj    }
144*38fd1498Szrj 
145*38fd1498Szrj    $ gcc -O2 bad_code.c
146*38fd1498Szrj      ............
147*38fd1498Szrj      400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
148*38fd1498Szrj      400363:       89 f8                   mov    %edi,%eax
149*38fd1498Szrj      400365:       29 f0                   sub    %esi,%eax
150*38fd1498Szrj      400367:       83 ff 65                cmp    $0x65,%edi
151*38fd1498Szrj      40036a:       0f 43 c2                cmovae %edx,%eax
152*38fd1498Szrj      40036d:       89 c0                   mov    %eax,%eax - useless extension
153*38fd1498Szrj      40036f:       c3                      retq
154*38fd1498Szrj 
155*38fd1498Szrj    $ gcc -O2 -free bad_code.c
156*38fd1498Szrj      .............
157*38fd1498Szrj      400360:       89 fa                   mov    %edi,%edx
158*38fd1498Szrj      400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
159*38fd1498Szrj      400365:       29 f2                   sub    %esi,%edx
160*38fd1498Szrj      400367:       83 ff 65                cmp    $0x65,%edi
161*38fd1498Szrj      40036a:       89 d6                   mov    %edx,%esi
162*38fd1498Szrj      40036c:       48 0f 42 c6             cmovb  %rsi,%rax
163*38fd1498Szrj      400370:       c3                      retq
164*38fd1498Szrj 
165*38fd1498Szrj   Motivating Example III :
166*38fd1498Szrj   ---------------------
167*38fd1498Szrj 
168*38fd1498Szrj   Here is an example with a type cast.
169*38fd1498Szrj 
170*38fd1498Szrj   For this program :
171*38fd1498Szrj   **********************************************
172*38fd1498Szrj 
173*38fd1498Szrj   void test(int size, unsigned char *in, unsigned char *out)
174*38fd1498Szrj   {
175*38fd1498Szrj     int i;
176*38fd1498Szrj     unsigned char xr, xg, xy=0;
177*38fd1498Szrj 
178*38fd1498Szrj     for (i = 0; i < size; i++) {
179*38fd1498Szrj       xr = *in++;
180*38fd1498Szrj       xg = *in++;
181*38fd1498Szrj       xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182*38fd1498Szrj       *out++ = xy;
183*38fd1498Szrj     }
184*38fd1498Szrj   }
185*38fd1498Szrj 
186*38fd1498Szrj   $ gcc -O2 bad_code.c
187*38fd1498Szrj     ............
188*38fd1498Szrj     10:   0f b6 0e                movzbl (%rsi),%ecx
189*38fd1498Szrj     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
190*38fd1498Szrj     17:   48 83 c6 02             add    $0x2,%rsi
191*38fd1498Szrj     1b:   0f b6 c9                movzbl %cl,%ecx - useless extension
192*38fd1498Szrj     1e:   0f b6 c0                movzbl %al,%eax - useless extension
193*38fd1498Szrj     21:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
194*38fd1498Szrj     27:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
195*38fd1498Szrj 
196*38fd1498Szrj    $ gcc -O2 -free bad_code.c
197*38fd1498Szrj      .............
198*38fd1498Szrj     10:   0f b6 0e                movzbl (%rsi),%ecx
199*38fd1498Szrj     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
200*38fd1498Szrj     17:   48 83 c6 02             add    $0x2,%rsi
201*38fd1498Szrj     1b:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
202*38fd1498Szrj     21:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
203*38fd1498Szrj 
204*38fd1498Szrj    Usefulness :
205*38fd1498Szrj    ----------
206*38fd1498Szrj 
207*38fd1498Szrj    The original redundant zero-extension elimination pass reported reduction
208*38fd1498Szrj    of the dynamic instruction count of a compression benchmark by 2.8% and
209*38fd1498Szrj    improvement of its run time by about 1%.
210*38fd1498Szrj 
211*38fd1498Szrj    The additional performance gain with the enhanced pass is mostly expected
212*38fd1498Szrj    on in-order architectures where redundancy cannot be compensated by out of
213*38fd1498Szrj    order execution.  Measurements showed up to 10% performance gain (reduced
214*38fd1498Szrj    run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215*38fd1498Szrj    gain 1%.  */
216*38fd1498Szrj 
217*38fd1498Szrj 
218*38fd1498Szrj #include "config.h"
219*38fd1498Szrj #include "system.h"
220*38fd1498Szrj #include "coretypes.h"
221*38fd1498Szrj #include "backend.h"
222*38fd1498Szrj #include "target.h"
223*38fd1498Szrj #include "rtl.h"
224*38fd1498Szrj #include "tree.h"
225*38fd1498Szrj #include "df.h"
226*38fd1498Szrj #include "memmodel.h"
227*38fd1498Szrj #include "tm_p.h"
228*38fd1498Szrj #include "optabs.h"
229*38fd1498Szrj #include "regs.h"
230*38fd1498Szrj #include "emit-rtl.h"
231*38fd1498Szrj #include "recog.h"
232*38fd1498Szrj #include "cfgrtl.h"
233*38fd1498Szrj #include "expr.h"
234*38fd1498Szrj #include "tree-pass.h"
235*38fd1498Szrj 
236*38fd1498Szrj /* This structure represents a candidate for elimination.  */
237*38fd1498Szrj 
238*38fd1498Szrj struct ext_cand
239*38fd1498Szrj {
240*38fd1498Szrj   /* The expression.  */
241*38fd1498Szrj   const_rtx expr;
242*38fd1498Szrj 
243*38fd1498Szrj   /* The kind of extension.  */
244*38fd1498Szrj   enum rtx_code code;
245*38fd1498Szrj 
246*38fd1498Szrj   /* The destination mode.  */
247*38fd1498Szrj   machine_mode mode;
248*38fd1498Szrj 
249*38fd1498Szrj   /* The instruction where it lives.  */
250*38fd1498Szrj   rtx_insn *insn;
251*38fd1498Szrj };
252*38fd1498Szrj 
253*38fd1498Szrj 
254*38fd1498Szrj static int max_insn_uid;
255*38fd1498Szrj 
256*38fd1498Szrj /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
257*38fd1498Szrj 
258*38fd1498Szrj static bool
update_reg_equal_equiv_notes(rtx_insn * insn,machine_mode new_mode,machine_mode old_mode,enum rtx_code code)259*38fd1498Szrj update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode,
260*38fd1498Szrj 			      machine_mode old_mode, enum rtx_code code)
261*38fd1498Szrj {
262*38fd1498Szrj   rtx *loc = &REG_NOTES (insn);
263*38fd1498Szrj   while (*loc)
264*38fd1498Szrj     {
265*38fd1498Szrj       enum reg_note kind = REG_NOTE_KIND (*loc);
266*38fd1498Szrj       if (kind == REG_EQUAL || kind == REG_EQUIV)
267*38fd1498Szrj 	{
268*38fd1498Szrj 	  rtx orig_src = XEXP (*loc, 0);
269*38fd1498Szrj 	  /* Update equivalency constants.  Recall that RTL constants are
270*38fd1498Szrj 	     sign-extended.  */
271*38fd1498Szrj 	  if (GET_CODE (orig_src) == CONST_INT
272*38fd1498Szrj 	      && HWI_COMPUTABLE_MODE_P (new_mode))
273*38fd1498Szrj 	    {
274*38fd1498Szrj 	      if (INTVAL (orig_src) >= 0 || code == SIGN_EXTEND)
275*38fd1498Szrj 		/* Nothing needed.  */;
276*38fd1498Szrj 	      else
277*38fd1498Szrj 		{
278*38fd1498Szrj 		  /* Zero-extend the negative constant by masking out the
279*38fd1498Szrj 		     bits outside the source mode.  */
280*38fd1498Szrj 		  rtx new_const_int
281*38fd1498Szrj 		    = gen_int_mode (INTVAL (orig_src)
282*38fd1498Szrj 				    & GET_MODE_MASK (old_mode),
283*38fd1498Szrj 				    new_mode);
284*38fd1498Szrj 		  if (!validate_change (insn, &XEXP (*loc, 0),
285*38fd1498Szrj 					new_const_int, true))
286*38fd1498Szrj 		    return false;
287*38fd1498Szrj 		}
288*38fd1498Szrj 	      loc = &XEXP (*loc, 1);
289*38fd1498Szrj 	    }
290*38fd1498Szrj 	  /* Drop all other notes, they assume a wrong mode.  */
291*38fd1498Szrj 	  else if (!validate_change (insn, loc, XEXP (*loc, 1), true))
292*38fd1498Szrj 	    return false;
293*38fd1498Szrj 	}
294*38fd1498Szrj       else
295*38fd1498Szrj 	loc = &XEXP (*loc, 1);
296*38fd1498Szrj     }
297*38fd1498Szrj   return true;
298*38fd1498Szrj }
299*38fd1498Szrj 
300*38fd1498Szrj /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
301*38fd1498Szrj    and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
302*38fd1498Szrj    this code modifies the SET rtx to a new SET rtx that extends the
303*38fd1498Szrj    right hand expression into a register on the left hand side.  Note
304*38fd1498Szrj    that multiple assumptions are made about the nature of the set that
305*38fd1498Szrj    needs to be true for this to work and is called from merge_def_and_ext.
306*38fd1498Szrj 
307*38fd1498Szrj    Original :
308*38fd1498Szrj    (set (reg a) (expression))
309*38fd1498Szrj 
310*38fd1498Szrj    Transform :
311*38fd1498Szrj    (set (reg a) (any_extend (expression)))
312*38fd1498Szrj 
313*38fd1498Szrj    Special Cases :
314*38fd1498Szrj    If the expression is a constant or another extension, then directly
315*38fd1498Szrj    assign it to the register.  */
316*38fd1498Szrj 
317*38fd1498Szrj static bool
combine_set_extension(ext_cand * cand,rtx_insn * curr_insn,rtx * orig_set)318*38fd1498Szrj combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
319*38fd1498Szrj {
320*38fd1498Szrj   rtx orig_src = SET_SRC (*orig_set);
321*38fd1498Szrj   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
322*38fd1498Szrj   rtx new_set;
323*38fd1498Szrj   rtx cand_pat = PATTERN (cand->insn);
324*38fd1498Szrj 
325*38fd1498Szrj   /* If the extension's source/destination registers are not the same
326*38fd1498Szrj      then we need to change the original load to reference the destination
327*38fd1498Szrj      of the extension.  Then we need to emit a copy from that destination
328*38fd1498Szrj      to the original destination of the load.  */
329*38fd1498Szrj   rtx new_reg;
330*38fd1498Szrj   bool copy_needed
331*38fd1498Szrj     = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
332*38fd1498Szrj   if (copy_needed)
333*38fd1498Szrj     new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
334*38fd1498Szrj   else
335*38fd1498Szrj     new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
336*38fd1498Szrj 
337*38fd1498Szrj   /* Merge constants by directly moving the constant into the register under
338*38fd1498Szrj      some conditions.  Recall that RTL constants are sign-extended.  */
339*38fd1498Szrj   if (GET_CODE (orig_src) == CONST_INT
340*38fd1498Szrj       && HWI_COMPUTABLE_MODE_P (cand->mode))
341*38fd1498Szrj     {
342*38fd1498Szrj       if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
343*38fd1498Szrj 	new_set = gen_rtx_SET (new_reg, orig_src);
344*38fd1498Szrj       else
345*38fd1498Szrj 	{
346*38fd1498Szrj 	  /* Zero-extend the negative constant by masking out the bits outside
347*38fd1498Szrj 	     the source mode.  */
348*38fd1498Szrj 	  rtx new_const_int
349*38fd1498Szrj 	    = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (orig_mode),
350*38fd1498Szrj 			    GET_MODE (new_reg));
351*38fd1498Szrj 	  new_set = gen_rtx_SET (new_reg, new_const_int);
352*38fd1498Szrj 	}
353*38fd1498Szrj     }
354*38fd1498Szrj   else if (GET_MODE (orig_src) == VOIDmode)
355*38fd1498Szrj     {
356*38fd1498Szrj       /* This is mostly due to a call insn that should not be optimized.  */
357*38fd1498Szrj       return false;
358*38fd1498Szrj     }
359*38fd1498Szrj   else if (GET_CODE (orig_src) == cand->code)
360*38fd1498Szrj     {
361*38fd1498Szrj       /* Here is a sequence of two extensions.  Try to merge them.  */
362*38fd1498Szrj       rtx temp_extension
363*38fd1498Szrj 	= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
364*38fd1498Szrj       rtx simplified_temp_extension = simplify_rtx (temp_extension);
365*38fd1498Szrj       if (simplified_temp_extension)
366*38fd1498Szrj         temp_extension = simplified_temp_extension;
367*38fd1498Szrj       new_set = gen_rtx_SET (new_reg, temp_extension);
368*38fd1498Szrj     }
369*38fd1498Szrj   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
370*38fd1498Szrj     {
371*38fd1498Szrj       /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
372*38fd1498Szrj          in general, IF_THEN_ELSE should not be combined.  */
373*38fd1498Szrj       return false;
374*38fd1498Szrj     }
375*38fd1498Szrj   else
376*38fd1498Szrj     {
377*38fd1498Szrj       /* This is the normal case.  */
378*38fd1498Szrj       rtx temp_extension
379*38fd1498Szrj 	= gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
380*38fd1498Szrj       rtx simplified_temp_extension = simplify_rtx (temp_extension);
381*38fd1498Szrj       if (simplified_temp_extension)
382*38fd1498Szrj         temp_extension = simplified_temp_extension;
383*38fd1498Szrj       new_set = gen_rtx_SET (new_reg, temp_extension);
384*38fd1498Szrj     }
385*38fd1498Szrj 
386*38fd1498Szrj   /* This change is a part of a group of changes.  Hence,
387*38fd1498Szrj      validate_change will not try to commit the change.  */
388*38fd1498Szrj   if (validate_change (curr_insn, orig_set, new_set, true)
389*38fd1498Szrj       && update_reg_equal_equiv_notes (curr_insn, cand->mode, orig_mode,
390*38fd1498Szrj 				       cand->code))
391*38fd1498Szrj     {
392*38fd1498Szrj       if (dump_file)
393*38fd1498Szrj         {
394*38fd1498Szrj           fprintf (dump_file,
395*38fd1498Szrj 		   "Tentatively merged extension with definition %s:\n",
396*38fd1498Szrj 		   (copy_needed) ? "(copy needed)" : "");
397*38fd1498Szrj           print_rtl_single (dump_file, curr_insn);
398*38fd1498Szrj         }
399*38fd1498Szrj       return true;
400*38fd1498Szrj     }
401*38fd1498Szrj 
402*38fd1498Szrj   return false;
403*38fd1498Szrj }
404*38fd1498Szrj 
405*38fd1498Szrj /* Treat if_then_else insns, where the operands of both branches
406*38fd1498Szrj    are registers, as copies.  For instance,
407*38fd1498Szrj    Original :
408*38fd1498Szrj    (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
409*38fd1498Szrj    Transformed :
410*38fd1498Szrj    (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
411*38fd1498Szrj    DEF_INSN is the if_then_else insn.  */
412*38fd1498Szrj 
413*38fd1498Szrj static bool
transform_ifelse(ext_cand * cand,rtx_insn * def_insn)414*38fd1498Szrj transform_ifelse (ext_cand *cand, rtx_insn *def_insn)
415*38fd1498Szrj {
416*38fd1498Szrj   rtx set_insn = PATTERN (def_insn);
417*38fd1498Szrj   rtx srcreg, dstreg, srcreg2;
418*38fd1498Szrj   rtx map_srcreg, map_dstreg, map_srcreg2;
419*38fd1498Szrj   rtx ifexpr;
420*38fd1498Szrj   rtx cond;
421*38fd1498Szrj   rtx new_set;
422*38fd1498Szrj 
423*38fd1498Szrj   gcc_assert (GET_CODE (set_insn) == SET);
424*38fd1498Szrj 
425*38fd1498Szrj   cond = XEXP (SET_SRC (set_insn), 0);
426*38fd1498Szrj   dstreg = SET_DEST (set_insn);
427*38fd1498Szrj   srcreg = XEXP (SET_SRC (set_insn), 1);
428*38fd1498Szrj   srcreg2 = XEXP (SET_SRC (set_insn), 2);
429*38fd1498Szrj   /* If the conditional move already has the right or wider mode,
430*38fd1498Szrj      there is nothing to do.  */
431*38fd1498Szrj   if (GET_MODE_UNIT_SIZE (GET_MODE (dstreg))
432*38fd1498Szrj       >= GET_MODE_UNIT_SIZE (cand->mode))
433*38fd1498Szrj     return true;
434*38fd1498Szrj 
435*38fd1498Szrj   map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
436*38fd1498Szrj   map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
437*38fd1498Szrj   map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
438*38fd1498Szrj   ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
439*38fd1498Szrj   new_set = gen_rtx_SET (map_dstreg, ifexpr);
440*38fd1498Szrj 
441*38fd1498Szrj   if (validate_change (def_insn, &PATTERN (def_insn), new_set, true)
442*38fd1498Szrj       && update_reg_equal_equiv_notes (def_insn, cand->mode, GET_MODE (dstreg),
443*38fd1498Szrj 				       cand->code))
444*38fd1498Szrj     {
445*38fd1498Szrj       if (dump_file)
446*38fd1498Szrj         {
447*38fd1498Szrj           fprintf (dump_file,
448*38fd1498Szrj 		   "Mode of conditional move instruction extended:\n");
449*38fd1498Szrj           print_rtl_single (dump_file, def_insn);
450*38fd1498Szrj         }
451*38fd1498Szrj       return true;
452*38fd1498Szrj     }
453*38fd1498Szrj 
454*38fd1498Szrj   return false;
455*38fd1498Szrj }
456*38fd1498Szrj 
457*38fd1498Szrj /* Get all the reaching definitions of an instruction.  The definitions are
458*38fd1498Szrj    desired for REG used in INSN.  Return the definition list or NULL if a
459*38fd1498Szrj    definition is missing.  If DEST is non-NULL, additionally push the INSN
460*38fd1498Szrj    of the definitions onto DEST.  */
461*38fd1498Szrj 
462*38fd1498Szrj static struct df_link *
get_defs(rtx_insn * insn,rtx reg,vec<rtx_insn * > * dest)463*38fd1498Szrj get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
464*38fd1498Szrj {
465*38fd1498Szrj   df_ref use;
466*38fd1498Szrj   struct df_link *ref_chain, *ref_link;
467*38fd1498Szrj 
468*38fd1498Szrj   FOR_EACH_INSN_USE (use, insn)
469*38fd1498Szrj     {
470*38fd1498Szrj       if (GET_CODE (DF_REF_REG (use)) == SUBREG)
471*38fd1498Szrj         return NULL;
472*38fd1498Szrj       if (REGNO (DF_REF_REG (use)) == REGNO (reg))
473*38fd1498Szrj 	break;
474*38fd1498Szrj     }
475*38fd1498Szrj 
476*38fd1498Szrj   gcc_assert (use != NULL);
477*38fd1498Szrj 
478*38fd1498Szrj   ref_chain = DF_REF_CHAIN (use);
479*38fd1498Szrj 
480*38fd1498Szrj   for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
481*38fd1498Szrj     {
482*38fd1498Szrj       /* Problem getting some definition for this instruction.  */
483*38fd1498Szrj       if (ref_link->ref == NULL)
484*38fd1498Szrj         return NULL;
485*38fd1498Szrj       if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
486*38fd1498Szrj         return NULL;
487*38fd1498Szrj       /* As global regs are assumed to be defined at each function call
488*38fd1498Szrj 	 dataflow can report a call_insn as being a definition of REG.
489*38fd1498Szrj 	 But we can't do anything with that in this pass so proceed only
490*38fd1498Szrj 	 if the instruction really sets REG in a way that can be deduced
491*38fd1498Szrj 	 from the RTL structure.  */
492*38fd1498Szrj       if (global_regs[REGNO (reg)]
493*38fd1498Szrj 	  && !set_of (reg, DF_REF_INSN (ref_link->ref)))
494*38fd1498Szrj 	return NULL;
495*38fd1498Szrj     }
496*38fd1498Szrj 
497*38fd1498Szrj   if (dest)
498*38fd1498Szrj     for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
499*38fd1498Szrj       dest->safe_push (DF_REF_INSN (ref_link->ref));
500*38fd1498Szrj 
501*38fd1498Szrj   return ref_chain;
502*38fd1498Szrj }
503*38fd1498Szrj 
504*38fd1498Szrj /* Get all the reaching uses of an instruction.  The uses are desired for REG
505*38fd1498Szrj    set in INSN.  Return use list or NULL if a use is missing or irregular.  */
506*38fd1498Szrj 
507*38fd1498Szrj static struct df_link *
get_uses(rtx_insn * insn,rtx reg)508*38fd1498Szrj get_uses (rtx_insn *insn, rtx reg)
509*38fd1498Szrj {
510*38fd1498Szrj   df_ref def;
511*38fd1498Szrj   struct df_link *ref_chain, *ref_link;
512*38fd1498Szrj 
513*38fd1498Szrj   FOR_EACH_INSN_DEF (def, insn)
514*38fd1498Szrj     if (REGNO (DF_REF_REG (def)) == REGNO (reg))
515*38fd1498Szrj       break;
516*38fd1498Szrj 
517*38fd1498Szrj   gcc_assert (def != NULL);
518*38fd1498Szrj 
519*38fd1498Szrj   ref_chain = DF_REF_CHAIN (def);
520*38fd1498Szrj 
521*38fd1498Szrj   for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
522*38fd1498Szrj     {
523*38fd1498Szrj       /* Problem getting some use for this instruction.  */
524*38fd1498Szrj       if (ref_link->ref == NULL)
525*38fd1498Szrj         return NULL;
526*38fd1498Szrj       if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
527*38fd1498Szrj 	return NULL;
528*38fd1498Szrj     }
529*38fd1498Szrj 
530*38fd1498Szrj   return ref_chain;
531*38fd1498Szrj }
532*38fd1498Szrj 
533*38fd1498Szrj /* Return true if INSN is
534*38fd1498Szrj      (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
535*38fd1498Szrj    and store x1 and x2 in REG_1 and REG_2.  */
536*38fd1498Szrj 
537*38fd1498Szrj static bool
is_cond_copy_insn(rtx_insn * insn,rtx * reg1,rtx * reg2)538*38fd1498Szrj is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
539*38fd1498Szrj {
540*38fd1498Szrj   rtx expr = single_set (insn);
541*38fd1498Szrj 
542*38fd1498Szrj   if (expr != NULL_RTX
543*38fd1498Szrj       && GET_CODE (expr) == SET
544*38fd1498Szrj       && GET_CODE (SET_DEST (expr)) == REG
545*38fd1498Szrj       && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
546*38fd1498Szrj       && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
547*38fd1498Szrj       && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
548*38fd1498Szrj     {
549*38fd1498Szrj       *reg1 = XEXP (SET_SRC (expr), 1);
550*38fd1498Szrj       *reg2 = XEXP (SET_SRC (expr), 2);
551*38fd1498Szrj       return true;
552*38fd1498Szrj     }
553*38fd1498Szrj 
554*38fd1498Szrj   return false;
555*38fd1498Szrj }
556*38fd1498Szrj 
557*38fd1498Szrj enum ext_modified_kind
558*38fd1498Szrj {
559*38fd1498Szrj   /* The insn hasn't been modified by ree pass yet.  */
560*38fd1498Szrj   EXT_MODIFIED_NONE,
561*38fd1498Szrj   /* Changed into zero extension.  */
562*38fd1498Szrj   EXT_MODIFIED_ZEXT,
563*38fd1498Szrj   /* Changed into sign extension.  */
564*38fd1498Szrj   EXT_MODIFIED_SEXT
565*38fd1498Szrj };
566*38fd1498Szrj 
567*38fd1498Szrj struct ATTRIBUTE_PACKED ext_modified
568*38fd1498Szrj {
569*38fd1498Szrj   /* Mode from which ree has zero or sign extended the destination.  */
570*38fd1498Szrj   ENUM_BITFIELD(machine_mode) mode : 8;
571*38fd1498Szrj 
572*38fd1498Szrj   /* Kind of modification of the insn.  */
573*38fd1498Szrj   ENUM_BITFIELD(ext_modified_kind) kind : 2;
574*38fd1498Szrj 
575*38fd1498Szrj   unsigned int do_not_reextend : 1;
576*38fd1498Szrj 
577*38fd1498Szrj   /* True if the insn is scheduled to be deleted.  */
578*38fd1498Szrj   unsigned int deleted : 1;
579*38fd1498Szrj };
580*38fd1498Szrj 
581*38fd1498Szrj /* Vectors used by combine_reaching_defs and its helpers.  */
582*38fd1498Szrj struct ext_state
583*38fd1498Szrj {
584*38fd1498Szrj   /* In order to avoid constant alloc/free, we keep these
585*38fd1498Szrj      4 vectors live through the entire find_and_remove_re and just
586*38fd1498Szrj      truncate them each time.  */
587*38fd1498Szrj   auto_vec<rtx_insn *> defs_list;
588*38fd1498Szrj   auto_vec<rtx_insn *> copies_list;
589*38fd1498Szrj   auto_vec<rtx_insn *> modified_list;
590*38fd1498Szrj   auto_vec<rtx_insn *> work_list;
591*38fd1498Szrj 
592*38fd1498Szrj   /* For instructions that have been successfully modified, this is
593*38fd1498Szrj      the original mode from which the insn is extending and
594*38fd1498Szrj      kind of extension.  */
595*38fd1498Szrj   struct ext_modified *modified;
596*38fd1498Szrj };
597*38fd1498Szrj 
598*38fd1498Szrj /* Reaching Definitions of the extended register could be conditional copies
599*38fd1498Szrj    or regular definitions.  This function separates the two types into two
600*38fd1498Szrj    lists, STATE->DEFS_LIST and STATE->COPIES_LIST.  This is necessary because,
601*38fd1498Szrj    if a reaching definition is a conditional copy, merging the extension with
602*38fd1498Szrj    this definition is wrong.  Conditional copies are merged by transitively
603*38fd1498Szrj    merging their definitions.  The defs_list is populated with all the reaching
604*38fd1498Szrj    definitions of the extension instruction (EXTEND_INSN) which must be merged
605*38fd1498Szrj    with an extension.  The copies_list contains all the conditional moves that
606*38fd1498Szrj    will later be extended into a wider mode conditional move if all the merges
607*38fd1498Szrj    are successful.  The function returns false upon failure, true upon
608*38fd1498Szrj    success.  */
609*38fd1498Szrj 
610*38fd1498Szrj static bool
make_defs_and_copies_lists(rtx_insn * extend_insn,const_rtx set_pat,ext_state * state)611*38fd1498Szrj make_defs_and_copies_lists (rtx_insn *extend_insn, const_rtx set_pat,
612*38fd1498Szrj 			    ext_state *state)
613*38fd1498Szrj {
614*38fd1498Szrj   rtx src_reg = XEXP (SET_SRC (set_pat), 0);
615*38fd1498Szrj   bool *is_insn_visited;
616*38fd1498Szrj   bool ret = true;
617*38fd1498Szrj 
618*38fd1498Szrj   state->work_list.truncate (0);
619*38fd1498Szrj 
620*38fd1498Szrj   /* Initialize the work list.  */
621*38fd1498Szrj   if (!get_defs (extend_insn, src_reg, &state->work_list))
622*38fd1498Szrj     return false;
623*38fd1498Szrj 
624*38fd1498Szrj   is_insn_visited = XCNEWVEC (bool, max_insn_uid);
625*38fd1498Szrj 
626*38fd1498Szrj   /* Perform transitive closure for conditional copies.  */
627*38fd1498Szrj   while (!state->work_list.is_empty ())
628*38fd1498Szrj     {
629*38fd1498Szrj       rtx_insn *def_insn = state->work_list.pop ();
630*38fd1498Szrj       rtx reg1, reg2;
631*38fd1498Szrj 
632*38fd1498Szrj       gcc_assert (INSN_UID (def_insn) < max_insn_uid);
633*38fd1498Szrj 
634*38fd1498Szrj       if (is_insn_visited[INSN_UID (def_insn)])
635*38fd1498Szrj 	continue;
636*38fd1498Szrj       is_insn_visited[INSN_UID (def_insn)] = true;
637*38fd1498Szrj 
638*38fd1498Szrj       if (is_cond_copy_insn (def_insn, &reg1, &reg2))
639*38fd1498Szrj 	{
640*38fd1498Szrj 	  /* Push it onto the copy list first.  */
641*38fd1498Szrj 	  state->copies_list.safe_push (def_insn);
642*38fd1498Szrj 
643*38fd1498Szrj 	  /* Now perform the transitive closure.  */
644*38fd1498Szrj 	  if (!get_defs (def_insn, reg1, &state->work_list)
645*38fd1498Szrj 	      || !get_defs (def_insn, reg2, &state->work_list))
646*38fd1498Szrj 	    {
647*38fd1498Szrj 	      ret = false;
648*38fd1498Szrj 	      break;
649*38fd1498Szrj 	    }
650*38fd1498Szrj         }
651*38fd1498Szrj       else
652*38fd1498Szrj 	state->defs_list.safe_push (def_insn);
653*38fd1498Szrj     }
654*38fd1498Szrj 
655*38fd1498Szrj   XDELETEVEC (is_insn_visited);
656*38fd1498Szrj 
657*38fd1498Szrj   return ret;
658*38fd1498Szrj }
659*38fd1498Szrj 
660*38fd1498Szrj /* If DEF_INSN has single SET expression, possibly buried inside
661*38fd1498Szrj    a PARALLEL, return the address of the SET expression, else
662*38fd1498Szrj    return NULL.  This is similar to single_set, except that
663*38fd1498Szrj    single_set allows multiple SETs when all but one is dead.  */
664*38fd1498Szrj static rtx *
get_sub_rtx(rtx_insn * def_insn)665*38fd1498Szrj get_sub_rtx (rtx_insn *def_insn)
666*38fd1498Szrj {
667*38fd1498Szrj   enum rtx_code code = GET_CODE (PATTERN (def_insn));
668*38fd1498Szrj   rtx *sub_rtx = NULL;
669*38fd1498Szrj 
670*38fd1498Szrj   if (code == PARALLEL)
671*38fd1498Szrj     {
672*38fd1498Szrj       for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
673*38fd1498Szrj         {
674*38fd1498Szrj           rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
675*38fd1498Szrj           if (GET_CODE (s_expr) != SET)
676*38fd1498Szrj             continue;
677*38fd1498Szrj 
678*38fd1498Szrj           if (sub_rtx == NULL)
679*38fd1498Szrj             sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
680*38fd1498Szrj           else
681*38fd1498Szrj             {
682*38fd1498Szrj               /* PARALLEL with multiple SETs.  */
683*38fd1498Szrj               return NULL;
684*38fd1498Szrj             }
685*38fd1498Szrj         }
686*38fd1498Szrj     }
687*38fd1498Szrj   else if (code == SET)
688*38fd1498Szrj     sub_rtx = &PATTERN (def_insn);
689*38fd1498Szrj   else
690*38fd1498Szrj     {
691*38fd1498Szrj       /* It is not a PARALLEL or a SET, what could it be ? */
692*38fd1498Szrj       return NULL;
693*38fd1498Szrj     }
694*38fd1498Szrj 
695*38fd1498Szrj   gcc_assert (sub_rtx != NULL);
696*38fd1498Szrj   return sub_rtx;
697*38fd1498Szrj }
698*38fd1498Szrj 
699*38fd1498Szrj /* Merge the DEF_INSN with an extension.  Calls combine_set_extension
700*38fd1498Szrj    on the SET pattern.  */
701*38fd1498Szrj 
702*38fd1498Szrj static bool
merge_def_and_ext(ext_cand * cand,rtx_insn * def_insn,ext_state * state)703*38fd1498Szrj merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state)
704*38fd1498Szrj {
705*38fd1498Szrj   machine_mode ext_src_mode;
706*38fd1498Szrj   rtx *sub_rtx;
707*38fd1498Szrj 
708*38fd1498Szrj   ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
709*38fd1498Szrj   sub_rtx = get_sub_rtx (def_insn);
710*38fd1498Szrj 
711*38fd1498Szrj   if (sub_rtx == NULL)
712*38fd1498Szrj     return false;
713*38fd1498Szrj 
714*38fd1498Szrj   if (REG_P (SET_DEST (*sub_rtx))
715*38fd1498Szrj       && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
716*38fd1498Szrj 	  || ((state->modified[INSN_UID (def_insn)].kind
717*38fd1498Szrj 	       == (cand->code == ZERO_EXTEND
718*38fd1498Szrj 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
719*38fd1498Szrj 	      && state->modified[INSN_UID (def_insn)].mode
720*38fd1498Szrj 		 == ext_src_mode)))
721*38fd1498Szrj     {
722*38fd1498Szrj       if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
723*38fd1498Szrj 	  >= GET_MODE_UNIT_SIZE (cand->mode))
724*38fd1498Szrj 	return true;
725*38fd1498Szrj       /* If def_insn is already scheduled to be deleted, don't attempt
726*38fd1498Szrj 	 to modify it.  */
727*38fd1498Szrj       if (state->modified[INSN_UID (def_insn)].deleted)
728*38fd1498Szrj 	return false;
729*38fd1498Szrj       if (combine_set_extension (cand, def_insn, sub_rtx))
730*38fd1498Szrj 	{
731*38fd1498Szrj 	  if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
732*38fd1498Szrj 	    state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
733*38fd1498Szrj 	  return true;
734*38fd1498Szrj 	}
735*38fd1498Szrj     }
736*38fd1498Szrj 
737*38fd1498Szrj   return false;
738*38fd1498Szrj }
739*38fd1498Szrj 
740*38fd1498Szrj /* Given SRC, which should be one or more extensions of a REG, strip
741*38fd1498Szrj    away the extensions and return the REG.  */
742*38fd1498Szrj 
743*38fd1498Szrj static inline rtx
get_extended_src_reg(rtx src)744*38fd1498Szrj get_extended_src_reg (rtx src)
745*38fd1498Szrj {
746*38fd1498Szrj   while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
747*38fd1498Szrj     src = XEXP (src, 0);
748*38fd1498Szrj   gcc_assert (REG_P (src));
749*38fd1498Szrj   return src;
750*38fd1498Szrj }
751*38fd1498Szrj 
752*38fd1498Szrj /* This function goes through all reaching defs of the source
753*38fd1498Szrj    of the candidate for elimination (CAND) and tries to combine
754*38fd1498Szrj    the extension with the definition instruction.  The changes
755*38fd1498Szrj    are made as a group so that even if one definition cannot be
756*38fd1498Szrj    merged, all reaching definitions end up not being merged.
757*38fd1498Szrj    When a conditional copy is encountered, merging is attempted
758*38fd1498Szrj    transitively on its definitions.  It returns true upon success
759*38fd1498Szrj    and false upon failure.  */
760*38fd1498Szrj 
761*38fd1498Szrj static bool
combine_reaching_defs(ext_cand * cand,const_rtx set_pat,ext_state * state)762*38fd1498Szrj combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
763*38fd1498Szrj {
764*38fd1498Szrj   rtx_insn *def_insn;
765*38fd1498Szrj   bool merge_successful = true;
766*38fd1498Szrj   int i;
767*38fd1498Szrj   int defs_ix;
768*38fd1498Szrj   bool outcome;
769*38fd1498Szrj 
770*38fd1498Szrj   state->defs_list.truncate (0);
771*38fd1498Szrj   state->copies_list.truncate (0);
772*38fd1498Szrj 
773*38fd1498Szrj   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
774*38fd1498Szrj 
775*38fd1498Szrj   if (!outcome)
776*38fd1498Szrj     return false;
777*38fd1498Szrj 
778*38fd1498Szrj   /* If the destination operand of the extension is a different
779*38fd1498Szrj      register than the source operand, then additional restrictions
780*38fd1498Szrj      are needed.  Note we have to handle cases where we have nested
781*38fd1498Szrj      extensions in the source operand.  */
782*38fd1498Szrj   bool copy_needed
783*38fd1498Szrj     = (REGNO (SET_DEST (PATTERN (cand->insn)))
784*38fd1498Szrj        != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
785*38fd1498Szrj   if (copy_needed)
786*38fd1498Szrj     {
787*38fd1498Szrj       /* Considering transformation of
788*38fd1498Szrj 	 (set (reg1) (expression))
789*38fd1498Szrj 	 ...
790*38fd1498Szrj 	 (set (reg2) (any_extend (reg1)))
791*38fd1498Szrj 
792*38fd1498Szrj 	 into
793*38fd1498Szrj 
794*38fd1498Szrj 	 (set (reg2) (any_extend (expression)))
795*38fd1498Szrj 	 (set (reg1) (reg2))
796*38fd1498Szrj 	 ...  */
797*38fd1498Szrj 
798*38fd1498Szrj       /* In theory we could handle more than one reaching def, it
799*38fd1498Szrj 	 just makes the code to update the insn stream more complex.  */
800*38fd1498Szrj       if (state->defs_list.length () != 1)
801*38fd1498Szrj 	return false;
802*38fd1498Szrj 
803*38fd1498Szrj       /* We don't have the structure described above if there are
804*38fd1498Szrj 	 conditional moves in between the def and the candidate,
805*38fd1498Szrj 	 and we will not handle them correctly.  See PR68194.  */
806*38fd1498Szrj       if (state->copies_list.length () > 0)
807*38fd1498Szrj 	return false;
808*38fd1498Szrj 
809*38fd1498Szrj       /* We require the candidate not already be modified.  It may,
810*38fd1498Szrj 	 for example have been changed from a (sign_extend (reg))
811*38fd1498Szrj 	 into (zero_extend (sign_extend (reg))).
812*38fd1498Szrj 
813*38fd1498Szrj 	 Handling that case shouldn't be terribly difficult, but the code
814*38fd1498Szrj 	 here and the code to emit copies would need auditing.  Until
815*38fd1498Szrj 	 we see a need, this is the safe thing to do.  */
816*38fd1498Szrj       if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
817*38fd1498Szrj 	return false;
818*38fd1498Szrj 
819*38fd1498Szrj       machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
820*38fd1498Szrj       rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
821*38fd1498Szrj 
822*38fd1498Szrj       /* Ensure we can use the src_reg in dst_mode (needed for
823*38fd1498Szrj 	 the (set (reg1) (reg2)) insn mentioned above).  */
824*38fd1498Szrj       if (!targetm.hard_regno_mode_ok (REGNO (src_reg), dst_mode))
825*38fd1498Szrj 	return false;
826*38fd1498Szrj 
827*38fd1498Szrj       /* Ensure the number of hard registers of the copy match.  */
828*38fd1498Szrj       if (hard_regno_nregs (REGNO (src_reg), dst_mode) != REG_NREGS (src_reg))
829*38fd1498Szrj 	return false;
830*38fd1498Szrj 
831*38fd1498Szrj       /* There's only one reaching def.  */
832*38fd1498Szrj       rtx_insn *def_insn = state->defs_list[0];
833*38fd1498Szrj 
834*38fd1498Szrj       /* The defining statement must not have been modified either.  */
835*38fd1498Szrj       if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
836*38fd1498Szrj 	return false;
837*38fd1498Szrj 
838*38fd1498Szrj       /* The defining statement and candidate insn must be in the same block.
839*38fd1498Szrj 	 This is merely to keep the test for safety and updating the insn
840*38fd1498Szrj 	 stream simple.  Also ensure that within the block the candidate
841*38fd1498Szrj 	 follows the defining insn.  */
842*38fd1498Szrj       basic_block bb = BLOCK_FOR_INSN (cand->insn);
843*38fd1498Szrj       if (bb != BLOCK_FOR_INSN (def_insn)
844*38fd1498Szrj 	  || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
845*38fd1498Szrj 	return false;
846*38fd1498Szrj 
847*38fd1498Szrj       /* If there is an overlap between the destination of DEF_INSN and
848*38fd1498Szrj 	 CAND->insn, then this transformation is not safe.  Note we have
849*38fd1498Szrj 	 to test in the widened mode.  */
850*38fd1498Szrj       rtx *dest_sub_rtx = get_sub_rtx (def_insn);
851*38fd1498Szrj       if (dest_sub_rtx == NULL
852*38fd1498Szrj 	  || !REG_P (SET_DEST (*dest_sub_rtx)))
853*38fd1498Szrj 	return false;
854*38fd1498Szrj 
855*38fd1498Szrj       rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
856*38fd1498Szrj 				 REGNO (SET_DEST (*dest_sub_rtx)));
857*38fd1498Szrj       if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
858*38fd1498Szrj 	return false;
859*38fd1498Szrj 
860*38fd1498Szrj       /* On RISC machines we must make sure that changing the mode of SRC_REG
861*38fd1498Szrj 	 as destination register will not affect its reaching uses, which may
862*38fd1498Szrj 	 read its value in a larger mode because DEF_INSN implicitly sets it
863*38fd1498Szrj 	 in word mode.  */
864*38fd1498Szrj       poly_int64 prec
865*38fd1498Szrj 	= GET_MODE_PRECISION (GET_MODE (SET_DEST (*dest_sub_rtx)));
866*38fd1498Szrj       if (WORD_REGISTER_OPERATIONS && known_lt (prec, BITS_PER_WORD))
867*38fd1498Szrj 	{
868*38fd1498Szrj 	  struct df_link *uses = get_uses (def_insn, src_reg);
869*38fd1498Szrj 	  if (!uses)
870*38fd1498Szrj 	    return false;
871*38fd1498Szrj 
872*38fd1498Szrj 	  for (df_link *use = uses; use; use = use->next)
873*38fd1498Szrj 	    if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)),
874*38fd1498Szrj 				      GET_MODE (SET_DEST (*dest_sub_rtx))))
875*38fd1498Szrj 	      return false;
876*38fd1498Szrj 	}
877*38fd1498Szrj 
878*38fd1498Szrj       /* The destination register of the extension insn must not be
879*38fd1498Szrj 	 used or set between the def_insn and cand->insn exclusive.  */
880*38fd1498Szrj       if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
881*38fd1498Szrj 			      def_insn, cand->insn)
882*38fd1498Szrj 	  || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
883*38fd1498Szrj 				def_insn, cand->insn))
884*38fd1498Szrj 	return false;
885*38fd1498Szrj 
886*38fd1498Szrj       /* We must be able to copy between the two registers.   Generate,
887*38fd1498Szrj 	 recognize and verify constraints of the copy.  Also fail if this
888*38fd1498Szrj 	 generated more than one insn.
889*38fd1498Szrj 
890*38fd1498Szrj          This generates garbage since we throw away the insn when we're
891*38fd1498Szrj 	 done, only to recreate it later if this test was successful.
892*38fd1498Szrj 
893*38fd1498Szrj 	 Make sure to get the mode from the extension (cand->insn).  This
894*38fd1498Szrj 	 is different than in the code to emit the copy as we have not
895*38fd1498Szrj 	 modified the defining insn yet.  */
896*38fd1498Szrj       start_sequence ();
897*38fd1498Szrj       rtx pat = PATTERN (cand->insn);
898*38fd1498Szrj       rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
899*38fd1498Szrj                                  REGNO (get_extended_src_reg (SET_SRC (pat))));
900*38fd1498Szrj       rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
901*38fd1498Szrj                                  REGNO (SET_DEST (pat)));
902*38fd1498Szrj       emit_move_insn (new_dst, new_src);
903*38fd1498Szrj 
904*38fd1498Szrj       rtx_insn *insn = get_insns ();
905*38fd1498Szrj       end_sequence ();
906*38fd1498Szrj       if (NEXT_INSN (insn))
907*38fd1498Szrj 	return false;
908*38fd1498Szrj       if (recog_memoized (insn) == -1)
909*38fd1498Szrj 	return false;
910*38fd1498Szrj       extract_insn (insn);
911*38fd1498Szrj       if (!constrain_operands (1, get_preferred_alternatives (insn, bb)))
912*38fd1498Szrj 	return false;
913*38fd1498Szrj 
914*38fd1498Szrj       while (REG_P (SET_SRC (*dest_sub_rtx))
915*38fd1498Szrj 	     && (REGNO (SET_SRC (*dest_sub_rtx)) == REGNO (SET_DEST (pat))))
916*38fd1498Szrj 	{
917*38fd1498Szrj 	  /* Considering transformation of
918*38fd1498Szrj 	     (set (reg2) (expression))
919*38fd1498Szrj 	     ...
920*38fd1498Szrj 	     (set (reg1) (reg2))
921*38fd1498Szrj 	     ...
922*38fd1498Szrj 	     (set (reg2) (any_extend (reg1)))
923*38fd1498Szrj 
924*38fd1498Szrj 	     into
925*38fd1498Szrj 
926*38fd1498Szrj 	     (set (reg2) (any_extend (expression)))
927*38fd1498Szrj 	     (set (reg1) (reg2))
928*38fd1498Szrj 	     ...  */
929*38fd1498Szrj 	  struct df_link *defs
930*38fd1498Szrj 	    = get_defs (def_insn, SET_SRC (*dest_sub_rtx), NULL);
931*38fd1498Szrj 	  if (defs == NULL || defs->next)
932*38fd1498Szrj 	    break;
933*38fd1498Szrj 
934*38fd1498Szrj 	  /* There is only one reaching def.  */
935*38fd1498Szrj 	  rtx_insn *def_insn2 = DF_REF_INSN (defs->ref);
936*38fd1498Szrj 
937*38fd1498Szrj 	  /* The defining statement must not have been modified either.  */
938*38fd1498Szrj 	  if (state->modified[INSN_UID (def_insn2)].kind != EXT_MODIFIED_NONE)
939*38fd1498Szrj 	    break;
940*38fd1498Szrj 
941*38fd1498Szrj 	  /* The def_insn2 and candidate insn must be in the same
942*38fd1498Szrj 	     block and def_insn follows def_insn2.  */
943*38fd1498Szrj 	  if (bb != BLOCK_FOR_INSN (def_insn2)
944*38fd1498Szrj 	      || DF_INSN_LUID (def_insn2) > DF_INSN_LUID (def_insn))
945*38fd1498Szrj 	    break;
946*38fd1498Szrj 
947*38fd1498Szrj 	  rtx *dest_sub_rtx2 = get_sub_rtx (def_insn2);
948*38fd1498Szrj 	  if (dest_sub_rtx2 == NULL
949*38fd1498Szrj 	      || !REG_P (SET_DEST (*dest_sub_rtx2)))
950*38fd1498Szrj 	    break;
951*38fd1498Szrj 
952*38fd1498Szrj 	  /* On RISC machines we must make sure that changing the mode of
953*38fd1498Szrj 	     SRC_REG as destination register will not affect its reaching
954*38fd1498Szrj 	     uses, which may read its value in a larger mode because DEF_INSN
955*38fd1498Szrj 	     implicitly sets it in word mode.  */
956*38fd1498Szrj 	  if (WORD_REGISTER_OPERATIONS && known_lt (prec, BITS_PER_WORD))
957*38fd1498Szrj 	    {
958*38fd1498Szrj 	      struct df_link *uses = get_uses (def_insn2, SET_DEST (pat));
959*38fd1498Szrj 	      if (!uses)
960*38fd1498Szrj 		break;
961*38fd1498Szrj 
962*38fd1498Szrj 	      df_link *use;
963*38fd1498Szrj 	      rtx dest2 = SET_DEST (*dest_sub_rtx2);
964*38fd1498Szrj 	      for (use = uses; use; use = use->next)
965*38fd1498Szrj 		if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)),
966*38fd1498Szrj 					  GET_MODE (dest2)))
967*38fd1498Szrj 		  break;
968*38fd1498Szrj 	      if (use)
969*38fd1498Szrj 		break;
970*38fd1498Szrj 	    }
971*38fd1498Szrj 
972*38fd1498Szrj 	  /* The destination register of the extension insn must not be
973*38fd1498Szrj 	     used or set between the def_insn2 and def_insn exclusive.
974*38fd1498Szrj 	     Likewise for the other reg, i.e. check both reg1 and reg2
975*38fd1498Szrj 	     in the above comment.  */
976*38fd1498Szrj 	  if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
977*38fd1498Szrj 				  def_insn2, def_insn)
978*38fd1498Szrj 	      || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
979*38fd1498Szrj 				    def_insn2, def_insn)
980*38fd1498Szrj 	      || reg_used_between_p (src_reg, def_insn2, def_insn)
981*38fd1498Szrj 	      || reg_set_between_p (src_reg, def_insn2, def_insn))
982*38fd1498Szrj 	    break;
983*38fd1498Szrj 
984*38fd1498Szrj 	  state->defs_list[0] = def_insn2;
985*38fd1498Szrj 	  break;
986*38fd1498Szrj 	}
987*38fd1498Szrj     }
988*38fd1498Szrj 
989*38fd1498Szrj   /* If cand->insn has been already modified, update cand->mode to a wider
990*38fd1498Szrj      mode if possible, or punt.  */
991*38fd1498Szrj   if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
992*38fd1498Szrj     {
993*38fd1498Szrj       machine_mode mode;
994*38fd1498Szrj       rtx set;
995*38fd1498Szrj 
996*38fd1498Szrj       if (state->modified[INSN_UID (cand->insn)].kind
997*38fd1498Szrj 	  != (cand->code == ZERO_EXTEND
998*38fd1498Szrj 	      ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
999*38fd1498Szrj 	  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
1000*38fd1498Szrj 	  || (set = single_set (cand->insn)) == NULL_RTX)
1001*38fd1498Szrj 	return false;
1002*38fd1498Szrj       mode = GET_MODE (SET_DEST (set));
1003*38fd1498Szrj       gcc_assert (GET_MODE_UNIT_SIZE (mode)
1004*38fd1498Szrj 		  >= GET_MODE_UNIT_SIZE (cand->mode));
1005*38fd1498Szrj       cand->mode = mode;
1006*38fd1498Szrj     }
1007*38fd1498Szrj 
1008*38fd1498Szrj   merge_successful = true;
1009*38fd1498Szrj 
1010*38fd1498Szrj   /* Go through the defs vector and try to merge all the definitions
1011*38fd1498Szrj      in this vector.  */
1012*38fd1498Szrj   state->modified_list.truncate (0);
1013*38fd1498Szrj   FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
1014*38fd1498Szrj     {
1015*38fd1498Szrj       if (merge_def_and_ext (cand, def_insn, state))
1016*38fd1498Szrj 	state->modified_list.safe_push (def_insn);
1017*38fd1498Szrj       else
1018*38fd1498Szrj         {
1019*38fd1498Szrj           merge_successful = false;
1020*38fd1498Szrj           break;
1021*38fd1498Szrj         }
1022*38fd1498Szrj     }
1023*38fd1498Szrj 
1024*38fd1498Szrj   /* Now go through the conditional copies vector and try to merge all
1025*38fd1498Szrj      the copies in this vector.  */
1026*38fd1498Szrj   if (merge_successful)
1027*38fd1498Szrj     {
1028*38fd1498Szrj       FOR_EACH_VEC_ELT (state->copies_list, i, def_insn)
1029*38fd1498Szrj         {
1030*38fd1498Szrj           if (transform_ifelse (cand, def_insn))
1031*38fd1498Szrj 	    state->modified_list.safe_push (def_insn);
1032*38fd1498Szrj           else
1033*38fd1498Szrj             {
1034*38fd1498Szrj               merge_successful = false;
1035*38fd1498Szrj               break;
1036*38fd1498Szrj             }
1037*38fd1498Szrj         }
1038*38fd1498Szrj     }
1039*38fd1498Szrj 
1040*38fd1498Szrj   if (merge_successful)
1041*38fd1498Szrj     {
1042*38fd1498Szrj       /* Commit the changes here if possible
1043*38fd1498Szrj 	 FIXME: It's an all-or-nothing scenario.  Even if only one definition
1044*38fd1498Szrj 	 cannot be merged, we entirely give up.  In the future, we should allow
1045*38fd1498Szrj 	 extensions to be partially eliminated along those paths where the
1046*38fd1498Szrj 	 definitions could be merged.  */
1047*38fd1498Szrj       if (apply_change_group ())
1048*38fd1498Szrj         {
1049*38fd1498Szrj           if (dump_file)
1050*38fd1498Szrj             fprintf (dump_file, "All merges were successful.\n");
1051*38fd1498Szrj 
1052*38fd1498Szrj 	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
1053*38fd1498Szrj 	    {
1054*38fd1498Szrj 	      ext_modified *modified = &state->modified[INSN_UID (def_insn)];
1055*38fd1498Szrj 	      if (modified->kind == EXT_MODIFIED_NONE)
1056*38fd1498Szrj 		modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
1057*38fd1498Szrj 						            : EXT_MODIFIED_SEXT);
1058*38fd1498Szrj 
1059*38fd1498Szrj 	      if (copy_needed)
1060*38fd1498Szrj 		modified->do_not_reextend = 1;
1061*38fd1498Szrj 	    }
1062*38fd1498Szrj           return true;
1063*38fd1498Szrj         }
1064*38fd1498Szrj       else
1065*38fd1498Szrj         {
1066*38fd1498Szrj           /* Changes need not be cancelled explicitly as apply_change_group
1067*38fd1498Szrj              does it.  Print list of definitions in the dump_file for debug
1068*38fd1498Szrj              purposes.  This extension cannot be deleted.  */
1069*38fd1498Szrj           if (dump_file)
1070*38fd1498Szrj             {
1071*38fd1498Szrj 	      fprintf (dump_file,
1072*38fd1498Szrj 		       "Merge cancelled, non-mergeable definitions:\n");
1073*38fd1498Szrj 	      FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
1074*38fd1498Szrj 	        print_rtl_single (dump_file, def_insn);
1075*38fd1498Szrj             }
1076*38fd1498Szrj         }
1077*38fd1498Szrj     }
1078*38fd1498Szrj   else
1079*38fd1498Szrj     {
1080*38fd1498Szrj       /* Cancel any changes that have been made so far.  */
1081*38fd1498Szrj       cancel_changes (0);
1082*38fd1498Szrj     }
1083*38fd1498Szrj 
1084*38fd1498Szrj   return false;
1085*38fd1498Szrj }
1086*38fd1498Szrj 
1087*38fd1498Szrj /* Add an extension pattern that could be eliminated.  */
1088*38fd1498Szrj 
1089*38fd1498Szrj static void
add_removable_extension(const_rtx expr,rtx_insn * insn,vec<ext_cand> * insn_list,unsigned * def_map,bitmap init_regs)1090*38fd1498Szrj add_removable_extension (const_rtx expr, rtx_insn *insn,
1091*38fd1498Szrj 			 vec<ext_cand> *insn_list,
1092*38fd1498Szrj 			 unsigned *def_map,
1093*38fd1498Szrj 			 bitmap init_regs)
1094*38fd1498Szrj {
1095*38fd1498Szrj   enum rtx_code code;
1096*38fd1498Szrj   machine_mode mode;
1097*38fd1498Szrj   unsigned int idx;
1098*38fd1498Szrj   rtx src, dest;
1099*38fd1498Szrj 
1100*38fd1498Szrj   /* We are looking for SET (REG N) (ANY_EXTEND (REG N)).  */
1101*38fd1498Szrj   if (GET_CODE (expr) != SET)
1102*38fd1498Szrj     return;
1103*38fd1498Szrj 
1104*38fd1498Szrj   src = SET_SRC (expr);
1105*38fd1498Szrj   code = GET_CODE (src);
1106*38fd1498Szrj   dest = SET_DEST (expr);
1107*38fd1498Szrj   mode = GET_MODE (dest);
1108*38fd1498Szrj 
1109*38fd1498Szrj   if (REG_P (dest)
1110*38fd1498Szrj       && (code == SIGN_EXTEND || code == ZERO_EXTEND)
1111*38fd1498Szrj       && REG_P (XEXP (src, 0)))
1112*38fd1498Szrj     {
1113*38fd1498Szrj       rtx reg = XEXP (src, 0);
1114*38fd1498Szrj       struct df_link *defs, *def;
1115*38fd1498Szrj       ext_cand *cand;
1116*38fd1498Szrj 
1117*38fd1498Szrj       /* Zero-extension of an undefined value is partly defined (it's
1118*38fd1498Szrj 	 completely undefined for sign-extension, though).  So if there exists
1119*38fd1498Szrj 	 a path from the entry to this zero-extension that leaves this register
1120*38fd1498Szrj 	 uninitialized, removing the extension could change the behavior of
1121*38fd1498Szrj 	 correct programs.  So first, check it is not the case.  */
1122*38fd1498Szrj       if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
1123*38fd1498Szrj 	{
1124*38fd1498Szrj 	  if (dump_file)
1125*38fd1498Szrj 	    {
1126*38fd1498Szrj 	      fprintf (dump_file, "Cannot eliminate extension:\n");
1127*38fd1498Szrj 	      print_rtl_single (dump_file, insn);
1128*38fd1498Szrj 	      fprintf (dump_file, " because it can operate on uninitialized"
1129*38fd1498Szrj 			          " data\n");
1130*38fd1498Szrj 	    }
1131*38fd1498Szrj 	  return;
1132*38fd1498Szrj 	}
1133*38fd1498Szrj 
1134*38fd1498Szrj       /* Second, make sure we can get all the reaching definitions.  */
1135*38fd1498Szrj       defs = get_defs (insn, reg, NULL);
1136*38fd1498Szrj       if (!defs)
1137*38fd1498Szrj 	{
1138*38fd1498Szrj 	  if (dump_file)
1139*38fd1498Szrj 	    {
1140*38fd1498Szrj 	      fprintf (dump_file, "Cannot eliminate extension:\n");
1141*38fd1498Szrj 	      print_rtl_single (dump_file, insn);
1142*38fd1498Szrj 	      fprintf (dump_file, " because of missing definition(s)\n");
1143*38fd1498Szrj 	    }
1144*38fd1498Szrj 	  return;
1145*38fd1498Szrj 	}
1146*38fd1498Szrj 
1147*38fd1498Szrj       /* Third, make sure the reaching definitions don't feed another and
1148*38fd1498Szrj 	 different extension.  FIXME: this obviously can be improved.  */
1149*38fd1498Szrj       for (def = defs; def; def = def->next)
1150*38fd1498Szrj 	if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
1151*38fd1498Szrj 	    && idx != -1U
1152*38fd1498Szrj 	    && (cand = &(*insn_list)[idx - 1])
1153*38fd1498Szrj 	    && cand->code != code)
1154*38fd1498Szrj 	  {
1155*38fd1498Szrj 	    if (dump_file)
1156*38fd1498Szrj 	      {
1157*38fd1498Szrj 	        fprintf (dump_file, "Cannot eliminate extension:\n");
1158*38fd1498Szrj 		print_rtl_single (dump_file, insn);
1159*38fd1498Szrj 	        fprintf (dump_file, " because of other extension\n");
1160*38fd1498Szrj 	      }
1161*38fd1498Szrj 	    return;
1162*38fd1498Szrj 	  }
1163*38fd1498Szrj 	/* For vector mode extensions, ensure that all uses of the
1164*38fd1498Szrj 	   XEXP (src, 0) register are in insn or debug insns, as unlike
1165*38fd1498Szrj 	   integral extensions lowpart subreg of the sign/zero extended
1166*38fd1498Szrj 	   register are not equal to the original register, so we have
1167*38fd1498Szrj 	   to change all uses or none and the current code isn't able
1168*38fd1498Szrj 	   to change them all at once in one transaction.  */
1169*38fd1498Szrj 	else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
1170*38fd1498Szrj 	  {
1171*38fd1498Szrj 	    if (idx == 0)
1172*38fd1498Szrj 	      {
1173*38fd1498Szrj 		struct df_link *ref_chain, *ref_link;
1174*38fd1498Szrj 
1175*38fd1498Szrj 		ref_chain = DF_REF_CHAIN (def->ref);
1176*38fd1498Szrj 		for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
1177*38fd1498Szrj 		  {
1178*38fd1498Szrj 		    if (ref_link->ref == NULL
1179*38fd1498Szrj 			|| DF_REF_INSN_INFO (ref_link->ref) == NULL)
1180*38fd1498Szrj 		      {
1181*38fd1498Szrj 			idx = -1U;
1182*38fd1498Szrj 			break;
1183*38fd1498Szrj 		      }
1184*38fd1498Szrj 		    rtx_insn *use_insn = DF_REF_INSN (ref_link->ref);
1185*38fd1498Szrj 		    if (use_insn != insn && !DEBUG_INSN_P (use_insn))
1186*38fd1498Szrj 		      {
1187*38fd1498Szrj 			idx = -1U;
1188*38fd1498Szrj 			break;
1189*38fd1498Szrj 		      }
1190*38fd1498Szrj 		  }
1191*38fd1498Szrj 		if (idx == -1U)
1192*38fd1498Szrj 		  def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
1193*38fd1498Szrj 	      }
1194*38fd1498Szrj 	    if (idx == -1U)
1195*38fd1498Szrj 	      {
1196*38fd1498Szrj 		if (dump_file)
1197*38fd1498Szrj 		  {
1198*38fd1498Szrj 		    fprintf (dump_file, "Cannot eliminate extension:\n");
1199*38fd1498Szrj 		    print_rtl_single (dump_file, insn);
1200*38fd1498Szrj 		    fprintf (dump_file,
1201*38fd1498Szrj 			     " because some vector uses aren't extension\n");
1202*38fd1498Szrj 		  }
1203*38fd1498Szrj 		return;
1204*38fd1498Szrj 	      }
1205*38fd1498Szrj 	  }
1206*38fd1498Szrj 
1207*38fd1498Szrj       /* Fourth, if the extended version occupies more registers than the
1208*38fd1498Szrj 	 original and the source of the extension is the same hard register
1209*38fd1498Szrj 	 as the destination of the extension, then we can not eliminate
1210*38fd1498Szrj 	 the extension without deep analysis, so just punt.
1211*38fd1498Szrj 
1212*38fd1498Szrj 	 We allow this when the registers are different because the
1213*38fd1498Szrj 	 code in combine_reaching_defs will handle that case correctly.  */
1214*38fd1498Szrj       if (hard_regno_nregs (REGNO (dest), mode) != REG_NREGS (reg)
1215*38fd1498Szrj 	  && reg_overlap_mentioned_p (dest, reg))
1216*38fd1498Szrj 	return;
1217*38fd1498Szrj 
1218*38fd1498Szrj       /* Then add the candidate to the list and insert the reaching definitions
1219*38fd1498Szrj          into the definition map.  */
1220*38fd1498Szrj       ext_cand e = {expr, code, mode, insn};
1221*38fd1498Szrj       insn_list->safe_push (e);
1222*38fd1498Szrj       idx = insn_list->length ();
1223*38fd1498Szrj 
1224*38fd1498Szrj       for (def = defs; def; def = def->next)
1225*38fd1498Szrj 	def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
1226*38fd1498Szrj     }
1227*38fd1498Szrj }
1228*38fd1498Szrj 
1229*38fd1498Szrj /* Traverse the instruction stream looking for extensions and return the
1230*38fd1498Szrj    list of candidates.  */
1231*38fd1498Szrj 
1232*38fd1498Szrj static vec<ext_cand>
find_removable_extensions(void)1233*38fd1498Szrj find_removable_extensions (void)
1234*38fd1498Szrj {
1235*38fd1498Szrj   vec<ext_cand> insn_list = vNULL;
1236*38fd1498Szrj   basic_block bb;
1237*38fd1498Szrj   rtx_insn *insn;
1238*38fd1498Szrj   rtx set;
1239*38fd1498Szrj   unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
1240*38fd1498Szrj   bitmap_head init, kill, gen, tmp;
1241*38fd1498Szrj 
1242*38fd1498Szrj   bitmap_initialize (&init, NULL);
1243*38fd1498Szrj   bitmap_initialize (&kill, NULL);
1244*38fd1498Szrj   bitmap_initialize (&gen, NULL);
1245*38fd1498Szrj   bitmap_initialize (&tmp, NULL);
1246*38fd1498Szrj 
1247*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1248*38fd1498Szrj     {
1249*38fd1498Szrj       bitmap_copy (&init, DF_MIR_IN (bb));
1250*38fd1498Szrj       bitmap_clear (&kill);
1251*38fd1498Szrj       bitmap_clear (&gen);
1252*38fd1498Szrj 
1253*38fd1498Szrj       FOR_BB_INSNS (bb, insn)
1254*38fd1498Szrj 	{
1255*38fd1498Szrj 	  if (NONDEBUG_INSN_P (insn))
1256*38fd1498Szrj 	    {
1257*38fd1498Szrj 	      set = single_set (insn);
1258*38fd1498Szrj 	      if (set != NULL_RTX)
1259*38fd1498Szrj 		add_removable_extension (set, insn, &insn_list, def_map,
1260*38fd1498Szrj 					 &init);
1261*38fd1498Szrj 	      df_mir_simulate_one_insn (bb, insn, &kill, &gen);
1262*38fd1498Szrj 	      bitmap_ior_and_compl (&tmp, &gen, &init, &kill);
1263*38fd1498Szrj 	      bitmap_copy (&init, &tmp);
1264*38fd1498Szrj 	    }
1265*38fd1498Szrj 	}
1266*38fd1498Szrj     }
1267*38fd1498Szrj 
1268*38fd1498Szrj   XDELETEVEC (def_map);
1269*38fd1498Szrj 
1270*38fd1498Szrj   return insn_list;
1271*38fd1498Szrj }
1272*38fd1498Szrj 
1273*38fd1498Szrj /* This is the main function that checks the insn stream for redundant
1274*38fd1498Szrj    extensions and tries to remove them if possible.  */
1275*38fd1498Szrj 
1276*38fd1498Szrj static void
find_and_remove_re(void)1277*38fd1498Szrj find_and_remove_re (void)
1278*38fd1498Szrj {
1279*38fd1498Szrj   ext_cand *curr_cand;
1280*38fd1498Szrj   rtx_insn *curr_insn = NULL;
1281*38fd1498Szrj   int num_re_opportunities = 0, num_realized = 0, i;
1282*38fd1498Szrj   vec<ext_cand> reinsn_list;
1283*38fd1498Szrj   auto_vec<rtx_insn *> reinsn_del_list;
1284*38fd1498Szrj   auto_vec<rtx_insn *> reinsn_copy_list;
1285*38fd1498Szrj 
1286*38fd1498Szrj   /* Construct DU chain to get all reaching definitions of each
1287*38fd1498Szrj      extension instruction.  */
1288*38fd1498Szrj   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
1289*38fd1498Szrj   df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
1290*38fd1498Szrj   df_mir_add_problem ();
1291*38fd1498Szrj   df_analyze ();
1292*38fd1498Szrj   df_set_flags (DF_DEFER_INSN_RESCAN);
1293*38fd1498Szrj 
1294*38fd1498Szrj   max_insn_uid = get_max_uid ();
1295*38fd1498Szrj   reinsn_list = find_removable_extensions ();
1296*38fd1498Szrj 
1297*38fd1498Szrj   ext_state state;
1298*38fd1498Szrj   if (reinsn_list.is_empty ())
1299*38fd1498Szrj     state.modified = NULL;
1300*38fd1498Szrj   else
1301*38fd1498Szrj     state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
1302*38fd1498Szrj 
1303*38fd1498Szrj   FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand)
1304*38fd1498Szrj     {
1305*38fd1498Szrj       num_re_opportunities++;
1306*38fd1498Szrj 
1307*38fd1498Szrj       /* Try to combine the extension with the definition.  */
1308*38fd1498Szrj       if (dump_file)
1309*38fd1498Szrj         {
1310*38fd1498Szrj           fprintf (dump_file, "Trying to eliminate extension:\n");
1311*38fd1498Szrj           print_rtl_single (dump_file, curr_cand->insn);
1312*38fd1498Szrj         }
1313*38fd1498Szrj 
1314*38fd1498Szrj       if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
1315*38fd1498Szrj         {
1316*38fd1498Szrj           if (dump_file)
1317*38fd1498Szrj             fprintf (dump_file, "Eliminated the extension.\n");
1318*38fd1498Szrj           num_realized++;
1319*38fd1498Szrj 	  /* If the RHS of the current candidate is not (extend (reg)), then
1320*38fd1498Szrj 	     we do not allow the optimization of extensions where
1321*38fd1498Szrj 	     the source and destination registers do not match.  Thus
1322*38fd1498Szrj 	     checking REG_P here is correct.  */
1323*38fd1498Szrj 	  if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))
1324*38fd1498Szrj 	      && (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
1325*38fd1498Szrj 		  != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))))
1326*38fd1498Szrj 	    {
1327*38fd1498Szrj               reinsn_copy_list.safe_push (curr_cand->insn);
1328*38fd1498Szrj               reinsn_copy_list.safe_push (state.defs_list[0]);
1329*38fd1498Szrj 	    }
1330*38fd1498Szrj 	  reinsn_del_list.safe_push (curr_cand->insn);
1331*38fd1498Szrj 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
1332*38fd1498Szrj         }
1333*38fd1498Szrj     }
1334*38fd1498Szrj 
1335*38fd1498Szrj   /* The copy list contains pairs of insns which describe copies we
1336*38fd1498Szrj      need to insert into the INSN stream.
1337*38fd1498Szrj 
1338*38fd1498Szrj      The first insn in each pair is the extension insn, from which
1339*38fd1498Szrj      we derive the source and destination of the copy.
1340*38fd1498Szrj 
1341*38fd1498Szrj      The second insn in each pair is the memory reference where the
1342*38fd1498Szrj      extension will ultimately happen.  We emit the new copy
1343*38fd1498Szrj      immediately after this insn.
1344*38fd1498Szrj 
1345*38fd1498Szrj      It may first appear that the arguments for the copy are reversed.
1346*38fd1498Szrj      Remember that the memory reference will be changed to refer to the
1347*38fd1498Szrj      destination of the extention.  So we're actually emitting a copy
1348*38fd1498Szrj      from the new destination to the old destination.  */
1349*38fd1498Szrj   for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
1350*38fd1498Szrj     {
1351*38fd1498Szrj       rtx_insn *curr_insn = reinsn_copy_list[i];
1352*38fd1498Szrj       rtx_insn *def_insn = reinsn_copy_list[i + 1];
1353*38fd1498Szrj 
1354*38fd1498Szrj       /* Use the mode of the destination of the defining insn
1355*38fd1498Szrj 	 for the mode of the copy.  This is necessary if the
1356*38fd1498Szrj 	 defining insn was used to eliminate a second extension
1357*38fd1498Szrj 	 that was wider than the first.  */
1358*38fd1498Szrj       rtx sub_rtx = *get_sub_rtx (def_insn);
1359*38fd1498Szrj       rtx pat = PATTERN (curr_insn);
1360*38fd1498Szrj       rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1361*38fd1498Szrj 				 REGNO (XEXP (SET_SRC (pat), 0)));
1362*38fd1498Szrj       rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1363*38fd1498Szrj 				 REGNO (SET_DEST (pat)));
1364*38fd1498Szrj       rtx set = gen_rtx_SET (new_dst, new_src);
1365*38fd1498Szrj       emit_insn_after (set, def_insn);
1366*38fd1498Szrj     }
1367*38fd1498Szrj 
1368*38fd1498Szrj   /* Delete all useless extensions here in one sweep.  */
1369*38fd1498Szrj   FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
1370*38fd1498Szrj     delete_insn (curr_insn);
1371*38fd1498Szrj 
1372*38fd1498Szrj   reinsn_list.release ();
1373*38fd1498Szrj   XDELETEVEC (state.modified);
1374*38fd1498Szrj 
1375*38fd1498Szrj   if (dump_file && num_re_opportunities > 0)
1376*38fd1498Szrj     fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
1377*38fd1498Szrj 	     num_re_opportunities, num_realized);
1378*38fd1498Szrj }
1379*38fd1498Szrj 
1380*38fd1498Szrj /* Find and remove redundant extensions.  */
1381*38fd1498Szrj 
1382*38fd1498Szrj static unsigned int
rest_of_handle_ree(void)1383*38fd1498Szrj rest_of_handle_ree (void)
1384*38fd1498Szrj {
1385*38fd1498Szrj   find_and_remove_re ();
1386*38fd1498Szrj   return 0;
1387*38fd1498Szrj }
1388*38fd1498Szrj 
1389*38fd1498Szrj namespace {
1390*38fd1498Szrj 
1391*38fd1498Szrj const pass_data pass_data_ree =
1392*38fd1498Szrj {
1393*38fd1498Szrj   RTL_PASS, /* type */
1394*38fd1498Szrj   "ree", /* name */
1395*38fd1498Szrj   OPTGROUP_NONE, /* optinfo_flags */
1396*38fd1498Szrj   TV_REE, /* tv_id */
1397*38fd1498Szrj   0, /* properties_required */
1398*38fd1498Szrj   0, /* properties_provided */
1399*38fd1498Szrj   0, /* properties_destroyed */
1400*38fd1498Szrj   0, /* todo_flags_start */
1401*38fd1498Szrj   TODO_df_finish, /* todo_flags_finish */
1402*38fd1498Szrj };
1403*38fd1498Szrj 
1404*38fd1498Szrj class pass_ree : public rtl_opt_pass
1405*38fd1498Szrj {
1406*38fd1498Szrj public:
pass_ree(gcc::context * ctxt)1407*38fd1498Szrj   pass_ree (gcc::context *ctxt)
1408*38fd1498Szrj     : rtl_opt_pass (pass_data_ree, ctxt)
1409*38fd1498Szrj   {}
1410*38fd1498Szrj 
1411*38fd1498Szrj   /* opt_pass methods: */
gate(function *)1412*38fd1498Szrj   virtual bool gate (function *) { return (optimize > 0 && flag_ree); }
execute(function *)1413*38fd1498Szrj   virtual unsigned int execute (function *) { return rest_of_handle_ree (); }
1414*38fd1498Szrj 
1415*38fd1498Szrj }; // class pass_ree
1416*38fd1498Szrj 
1417*38fd1498Szrj } // anon namespace
1418*38fd1498Szrj 
1419*38fd1498Szrj rtl_opt_pass *
make_pass_ree(gcc::context * ctxt)1420*38fd1498Szrj make_pass_ree (gcc::context *ctxt)
1421*38fd1498Szrj {
1422*38fd1498Szrj   return new pass_ree (ctxt);
1423*38fd1498Szrj }
1424